{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 5289, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.4085, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.999527231467474e-05, "loss": 2.412, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.999054462934947e-05, "loss": 2.2412, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.998581694402421e-05, "loss": 2.7767, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.9981089258698944e-05, "loss": 2.3475, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.997636157337368e-05, "loss": 2.2902, "step": 6 }, { "epoch": 0.01, "learning_rate": 4.9971633888048415e-05, "loss": 2.1618, "step": 7 }, { "epoch": 0.01, "learning_rate": 4.9966906202723154e-05, "loss": 2.0788, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.9962178517397886e-05, "loss": 2.3022, "step": 9 }, { "epoch": 0.01, "learning_rate": 4.9957450832072625e-05, "loss": 2.2085, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.995272314674736e-05, "loss": 2.0357, "step": 11 }, { "epoch": 0.01, "learning_rate": 4.995272314674736e-05, "loss": 2.1559, "step": 12 }, { "epoch": 0.01, "learning_rate": 4.9947995461422096e-05, "loss": 2.0379, "step": 13 }, { "epoch": 0.01, "learning_rate": 4.994326777609683e-05, "loss": 2.0144, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.993854009077156e-05, "loss": 1.997, "step": 15 }, { "epoch": 0.01, "learning_rate": 4.993381240544629e-05, "loss": 2.0751, "step": 16 }, { "epoch": 0.01, "learning_rate": 4.992908472012103e-05, "loss": 1.8244, "step": 17 }, { "epoch": 0.01, "learning_rate": 4.992435703479576e-05, "loss": 1.9439, "step": 18 }, { "epoch": 0.01, "learning_rate": 4.99196293494705e-05, "loss": 1.9008, "step": 19 }, { "epoch": 0.02, "learning_rate": 4.9914901664145234e-05, "loss": 2.1112, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.991017397881997e-05, "loss": 2.0481, "step": 21 }, { "epoch": 0.02, "learning_rate": 4.9905446293494705e-05, "loss": 1.9393, "step": 22 }, { "epoch": 0.02, "learning_rate": 4.9900718608169443e-05, "loss": 1.8458, "step": 23 }, { "epoch": 0.02, "learning_rate": 4.9895990922844175e-05, "loss": 1.9741, "step": 24 }, { "epoch": 0.02, "learning_rate": 4.9891263237518914e-05, "loss": 1.9648, "step": 25 }, { "epoch": 0.02, "learning_rate": 4.9886535552193646e-05, "loss": 1.9738, "step": 26 }, { "epoch": 0.02, "learning_rate": 4.9881807866868385e-05, "loss": 1.9869, "step": 27 }, { "epoch": 0.02, "learning_rate": 4.987708018154312e-05, "loss": 1.8824, "step": 28 }, { "epoch": 0.02, "learning_rate": 4.9872352496217856e-05, "loss": 1.8664, "step": 29 }, { "epoch": 0.02, "learning_rate": 4.986762481089259e-05, "loss": 1.8399, "step": 30 }, { "epoch": 0.02, "learning_rate": 4.986289712556733e-05, "loss": 2.119, "step": 31 }, { "epoch": 0.02, "learning_rate": 4.985816944024206e-05, "loss": 1.9963, "step": 32 }, { "epoch": 0.02, "learning_rate": 4.98534417549168e-05, "loss": 1.9873, "step": 33 }, { "epoch": 0.03, "learning_rate": 4.984871406959153e-05, "loss": 2.006, "step": 34 }, { "epoch": 0.03, "learning_rate": 4.984398638426627e-05, "loss": 1.9115, "step": 35 }, { "epoch": 0.03, "learning_rate": 4.9839258698941e-05, "loss": 2.0756, "step": 36 }, { "epoch": 0.03, "learning_rate": 4.983453101361574e-05, "loss": 2.0229, "step": 37 }, { "epoch": 0.03, "learning_rate": 4.982980332829047e-05, "loss": 2.0575, "step": 38 }, { "epoch": 0.03, "learning_rate": 4.982507564296521e-05, "loss": 1.9715, "step": 39 }, { "epoch": 0.03, "learning_rate": 4.982034795763994e-05, "loss": 2.018, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.981562027231468e-05, "loss": 1.9759, "step": 41 }, { "epoch": 0.03, "learning_rate": 4.9810892586989414e-05, "loss": 1.9817, "step": 42 }, { "epoch": 0.03, "learning_rate": 4.9806164901664146e-05, "loss": 2.0391, "step": 43 }, { "epoch": 0.03, "learning_rate": 4.980143721633888e-05, "loss": 2.0074, "step": 44 }, { "epoch": 0.03, "learning_rate": 4.9796709531013616e-05, "loss": 1.9212, "step": 45 }, { "epoch": 0.03, "learning_rate": 4.979198184568835e-05, "loss": 1.8844, "step": 46 }, { "epoch": 0.04, "learning_rate": 4.978725416036309e-05, "loss": 1.9906, "step": 47 }, { "epoch": 0.04, "learning_rate": 4.978252647503782e-05, "loss": 2.041, "step": 48 }, { "epoch": 0.04, "learning_rate": 4.977779878971256e-05, "loss": 1.8663, "step": 49 }, { "epoch": 0.04, "learning_rate": 4.977307110438729e-05, "loss": 2.0306, "step": 50 }, { "epoch": 0.04, "learning_rate": 4.976834341906203e-05, "loss": 2.0081, "step": 51 }, { "epoch": 0.04, "learning_rate": 4.976361573373676e-05, "loss": 1.9292, "step": 52 }, { "epoch": 0.04, "learning_rate": 4.97588880484115e-05, "loss": 2.118, "step": 53 }, { "epoch": 0.04, "learning_rate": 4.975416036308623e-05, "loss": 1.9822, "step": 54 }, { "epoch": 0.04, "learning_rate": 4.974943267776097e-05, "loss": 2.0308, "step": 55 }, { "epoch": 0.04, "learning_rate": 4.97447049924357e-05, "loss": 1.96, "step": 56 }, { "epoch": 0.04, "learning_rate": 4.973997730711044e-05, "loss": 1.9603, "step": 57 }, { "epoch": 0.04, "learning_rate": 4.9735249621785174e-05, "loss": 2.0642, "step": 58 }, { "epoch": 0.04, "learning_rate": 4.973052193645991e-05, "loss": 1.8739, "step": 59 }, { "epoch": 0.05, "learning_rate": 4.9725794251134645e-05, "loss": 1.958, "step": 60 }, { "epoch": 0.05, "learning_rate": 4.9721066565809384e-05, "loss": 1.9945, "step": 61 }, { "epoch": 0.05, "learning_rate": 4.9716338880484116e-05, "loss": 1.9297, "step": 62 }, { "epoch": 0.05, "learning_rate": 4.9711611195158855e-05, "loss": 1.9928, "step": 63 }, { "epoch": 0.05, "learning_rate": 4.970688350983359e-05, "loss": 1.8398, "step": 64 }, { "epoch": 0.05, "learning_rate": 4.9702155824508325e-05, "loss": 1.998, "step": 65 }, { "epoch": 0.05, "learning_rate": 4.9697428139183064e-05, "loss": 1.9349, "step": 66 }, { "epoch": 0.05, "learning_rate": 4.9692700453857796e-05, "loss": 1.9185, "step": 67 }, { "epoch": 0.05, "learning_rate": 4.968797276853253e-05, "loss": 1.9925, "step": 68 }, { "epoch": 0.05, "learning_rate": 4.968324508320726e-05, "loss": 2.0546, "step": 69 }, { "epoch": 0.05, "learning_rate": 4.9678517397882e-05, "loss": 1.9456, "step": 70 }, { "epoch": 0.05, "learning_rate": 4.967378971255673e-05, "loss": 1.9429, "step": 71 }, { "epoch": 0.05, "learning_rate": 4.966906202723147e-05, "loss": 2.0146, "step": 72 }, { "epoch": 0.06, "learning_rate": 4.96643343419062e-05, "loss": 2.2205, "step": 73 }, { "epoch": 0.06, "learning_rate": 4.965960665658094e-05, "loss": 1.9092, "step": 74 }, { "epoch": 0.06, "learning_rate": 4.965487897125567e-05, "loss": 1.9265, "step": 75 }, { "epoch": 0.06, "learning_rate": 4.965015128593041e-05, "loss": 1.8962, "step": 76 }, { "epoch": 0.06, "learning_rate": 4.9645423600605144e-05, "loss": 2.0029, "step": 77 }, { "epoch": 0.06, "learning_rate": 4.964069591527988e-05, "loss": 1.976, "step": 78 }, { "epoch": 0.06, "learning_rate": 4.9635968229954615e-05, "loss": 1.9505, "step": 79 }, { "epoch": 0.06, "learning_rate": 4.9631240544629354e-05, "loss": 1.9207, "step": 80 }, { "epoch": 0.06, "learning_rate": 4.9626512859304086e-05, "loss": 2.0111, "step": 81 }, { "epoch": 0.06, "learning_rate": 4.9621785173978825e-05, "loss": 1.9453, "step": 82 }, { "epoch": 0.06, "learning_rate": 4.961705748865356e-05, "loss": 1.9826, "step": 83 }, { "epoch": 0.06, "learning_rate": 4.9612329803328296e-05, "loss": 1.9824, "step": 84 }, { "epoch": 0.06, "learning_rate": 4.960760211800303e-05, "loss": 1.9638, "step": 85 }, { "epoch": 0.07, "learning_rate": 4.9602874432677766e-05, "loss": 1.9017, "step": 86 }, { "epoch": 0.07, "learning_rate": 4.95981467473525e-05, "loss": 2.0813, "step": 87 }, { "epoch": 0.07, "learning_rate": 4.959341906202724e-05, "loss": 1.9586, "step": 88 }, { "epoch": 0.07, "learning_rate": 4.958869137670197e-05, "loss": 1.9559, "step": 89 }, { "epoch": 0.07, "learning_rate": 4.958396369137671e-05, "loss": 1.9739, "step": 90 }, { "epoch": 0.07, "learning_rate": 4.957923600605144e-05, "loss": 2.0028, "step": 91 }, { "epoch": 0.07, "learning_rate": 4.957450832072618e-05, "loss": 1.8794, "step": 92 }, { "epoch": 0.07, "learning_rate": 4.956978063540091e-05, "loss": 1.913, "step": 93 }, { "epoch": 0.07, "learning_rate": 4.956505295007565e-05, "loss": 2.0655, "step": 94 }, { "epoch": 0.07, "learning_rate": 4.956032526475038e-05, "loss": 2.069, "step": 95 }, { "epoch": 0.07, "learning_rate": 4.9555597579425114e-05, "loss": 2.0179, "step": 96 }, { "epoch": 0.07, "learning_rate": 4.9550869894099846e-05, "loss": 1.9857, "step": 97 }, { "epoch": 0.07, "learning_rate": 4.9546142208774585e-05, "loss": 2.0019, "step": 98 }, { "epoch": 0.07, "learning_rate": 4.954141452344932e-05, "loss": 2.005, "step": 99 }, { "epoch": 0.08, "learning_rate": 4.9536686838124056e-05, "loss": 2.0512, "step": 100 }, { "epoch": 0.08, "learning_rate": 4.953195915279879e-05, "loss": 1.9384, "step": 101 }, { "epoch": 0.08, "learning_rate": 4.952723146747353e-05, "loss": 1.9374, "step": 102 }, { "epoch": 0.08, "learning_rate": 4.952250378214826e-05, "loss": 2.0586, "step": 103 }, { "epoch": 0.08, "learning_rate": 4.9517776096823e-05, "loss": 1.9806, "step": 104 }, { "epoch": 0.08, "learning_rate": 4.951304841149773e-05, "loss": 1.99, "step": 105 }, { "epoch": 0.08, "learning_rate": 4.950832072617247e-05, "loss": 2.0087, "step": 106 }, { "epoch": 0.08, "learning_rate": 4.95035930408472e-05, "loss": 2.0059, "step": 107 }, { "epoch": 0.08, "learning_rate": 4.949886535552194e-05, "loss": 1.9861, "step": 108 }, { "epoch": 0.08, "learning_rate": 4.949413767019667e-05, "loss": 1.9849, "step": 109 }, { "epoch": 0.08, "learning_rate": 4.948940998487141e-05, "loss": 1.8764, "step": 110 }, { "epoch": 0.08, "learning_rate": 4.948468229954614e-05, "loss": 2.0079, "step": 111 }, { "epoch": 0.08, "learning_rate": 4.947995461422088e-05, "loss": 2.006, "step": 112 }, { "epoch": 0.09, "learning_rate": 4.947522692889561e-05, "loss": 1.946, "step": 113 }, { "epoch": 0.09, "learning_rate": 4.947049924357035e-05, "loss": 1.9703, "step": 114 }, { "epoch": 0.09, "learning_rate": 4.9465771558245084e-05, "loss": 1.9149, "step": 115 }, { "epoch": 0.09, "learning_rate": 4.946104387291982e-05, "loss": 2.0105, "step": 116 }, { "epoch": 0.09, "learning_rate": 4.9456316187594555e-05, "loss": 2.0989, "step": 117 }, { "epoch": 0.09, "learning_rate": 4.9451588502269294e-05, "loss": 1.9225, "step": 118 }, { "epoch": 0.09, "learning_rate": 4.9446860816944026e-05, "loss": 2.0234, "step": 119 }, { "epoch": 0.09, "learning_rate": 4.9442133131618765e-05, "loss": 2.1204, "step": 120 }, { "epoch": 0.09, "learning_rate": 4.94374054462935e-05, "loss": 2.0747, "step": 121 }, { "epoch": 0.09, "learning_rate": 4.943267776096823e-05, "loss": 2.07, "step": 122 }, { "epoch": 0.09, "learning_rate": 4.942795007564297e-05, "loss": 2.0418, "step": 123 }, { "epoch": 0.09, "learning_rate": 4.94232223903177e-05, "loss": 1.876, "step": 124 }, { "epoch": 0.09, "learning_rate": 4.941849470499244e-05, "loss": 2.1391, "step": 125 }, { "epoch": 0.1, "learning_rate": 4.941376701966717e-05, "loss": 1.9722, "step": 126 }, { "epoch": 0.1, "learning_rate": 4.940903933434191e-05, "loss": 1.9892, "step": 127 }, { "epoch": 0.1, "learning_rate": 4.940431164901664e-05, "loss": 2.0291, "step": 128 }, { "epoch": 0.1, "learning_rate": 4.939958396369138e-05, "loss": 1.8464, "step": 129 }, { "epoch": 0.1, "learning_rate": 4.939485627836611e-05, "loss": 1.9236, "step": 130 }, { "epoch": 0.1, "learning_rate": 4.939012859304085e-05, "loss": 1.8781, "step": 131 }, { "epoch": 0.1, "learning_rate": 4.9385400907715583e-05, "loss": 2.0918, "step": 132 }, { "epoch": 0.1, "learning_rate": 4.938067322239032e-05, "loss": 2.0827, "step": 133 }, { "epoch": 0.1, "learning_rate": 4.9375945537065054e-05, "loss": 2.1419, "step": 134 }, { "epoch": 0.1, "learning_rate": 4.937121785173979e-05, "loss": 2.0146, "step": 135 }, { "epoch": 0.1, "learning_rate": 4.9366490166414525e-05, "loss": 2.0217, "step": 136 }, { "epoch": 0.1, "learning_rate": 4.9361762481089264e-05, "loss": 2.138, "step": 137 }, { "epoch": 0.1, "learning_rate": 4.9357034795763996e-05, "loss": 2.098, "step": 138 }, { "epoch": 0.11, "learning_rate": 4.9352307110438735e-05, "loss": 2.1919, "step": 139 }, { "epoch": 0.11, "learning_rate": 4.934757942511347e-05, "loss": 1.9796, "step": 140 }, { "epoch": 0.11, "learning_rate": 4.9342851739788206e-05, "loss": 2.0104, "step": 141 }, { "epoch": 0.11, "learning_rate": 4.933812405446294e-05, "loss": 2.0792, "step": 142 }, { "epoch": 0.11, "learning_rate": 4.933339636913768e-05, "loss": 1.988, "step": 143 }, { "epoch": 0.11, "learning_rate": 4.932866868381241e-05, "loss": 2.0873, "step": 144 }, { "epoch": 0.11, "learning_rate": 4.932394099848715e-05, "loss": 2.4981, "step": 145 }, { "epoch": 0.11, "learning_rate": 4.931921331316188e-05, "loss": 2.1177, "step": 146 }, { "epoch": 0.11, "learning_rate": 4.931448562783662e-05, "loss": 1.9658, "step": 147 }, { "epoch": 0.11, "learning_rate": 4.930975794251135e-05, "loss": 2.1982, "step": 148 }, { "epoch": 0.11, "learning_rate": 4.930503025718608e-05, "loss": 2.0235, "step": 149 }, { "epoch": 0.11, "learning_rate": 4.9300302571860815e-05, "loss": 1.9822, "step": 150 }, { "epoch": 0.11, "learning_rate": 4.9295574886535554e-05, "loss": 2.0329, "step": 151 }, { "epoch": 0.11, "learning_rate": 4.9290847201210286e-05, "loss": 2.0897, "step": 152 }, { "epoch": 0.12, "learning_rate": 4.9286119515885024e-05, "loss": 2.0437, "step": 153 }, { "epoch": 0.12, "learning_rate": 4.9281391830559757e-05, "loss": 1.962, "step": 154 }, { "epoch": 0.12, "learning_rate": 4.9276664145234495e-05, "loss": 2.0581, "step": 155 }, { "epoch": 0.12, "learning_rate": 4.927193645990923e-05, "loss": 1.8534, "step": 156 }, { "epoch": 0.12, "learning_rate": 4.9267208774583966e-05, "loss": 2.0379, "step": 157 }, { "epoch": 0.12, "learning_rate": 4.92624810892587e-05, "loss": 2.0179, "step": 158 }, { "epoch": 0.12, "learning_rate": 4.925775340393344e-05, "loss": 1.9645, "step": 159 }, { "epoch": 0.12, "learning_rate": 4.925302571860817e-05, "loss": 2.0155, "step": 160 }, { "epoch": 0.12, "learning_rate": 4.924829803328291e-05, "loss": 1.9601, "step": 161 }, { "epoch": 0.12, "learning_rate": 4.924357034795764e-05, "loss": 1.9497, "step": 162 }, { "epoch": 0.12, "learning_rate": 4.923884266263238e-05, "loss": 2.0488, "step": 163 }, { "epoch": 0.12, "learning_rate": 4.923411497730711e-05, "loss": 1.9464, "step": 164 }, { "epoch": 0.12, "learning_rate": 4.922938729198185e-05, "loss": 1.9058, "step": 165 }, { "epoch": 0.13, "learning_rate": 4.922465960665658e-05, "loss": 2.0025, "step": 166 }, { "epoch": 0.13, "learning_rate": 4.921993192133132e-05, "loss": 2.0354, "step": 167 }, { "epoch": 0.13, "learning_rate": 4.921520423600605e-05, "loss": 1.9644, "step": 168 }, { "epoch": 0.13, "learning_rate": 4.921047655068079e-05, "loss": 2.0813, "step": 169 }, { "epoch": 0.13, "learning_rate": 4.9205748865355524e-05, "loss": 2.0769, "step": 170 }, { "epoch": 0.13, "learning_rate": 4.920102118003026e-05, "loss": 2.1057, "step": 171 }, { "epoch": 0.13, "learning_rate": 4.9196293494704995e-05, "loss": 2.0048, "step": 172 }, { "epoch": 0.13, "learning_rate": 4.9191565809379733e-05, "loss": 1.9756, "step": 173 }, { "epoch": 0.13, "learning_rate": 4.9186838124054465e-05, "loss": 1.9664, "step": 174 }, { "epoch": 0.13, "learning_rate": 4.91821104387292e-05, "loss": 1.8994, "step": 175 }, { "epoch": 0.13, "learning_rate": 4.9177382753403936e-05, "loss": 1.8496, "step": 176 }, { "epoch": 0.13, "learning_rate": 4.917265506807867e-05, "loss": 2.1344, "step": 177 }, { "epoch": 0.13, "learning_rate": 4.91679273827534e-05, "loss": 2.2317, "step": 178 }, { "epoch": 0.14, "learning_rate": 4.916319969742814e-05, "loss": 2.0201, "step": 179 }, { "epoch": 0.14, "learning_rate": 4.915847201210287e-05, "loss": 1.9307, "step": 180 }, { "epoch": 0.14, "learning_rate": 4.915374432677761e-05, "loss": 1.95, "step": 181 }, { "epoch": 0.14, "learning_rate": 4.914901664145234e-05, "loss": 2.0594, "step": 182 }, { "epoch": 0.14, "learning_rate": 4.914428895612708e-05, "loss": 1.9919, "step": 183 }, { "epoch": 0.14, "learning_rate": 4.913956127080182e-05, "loss": 2.0299, "step": 184 }, { "epoch": 0.14, "learning_rate": 4.913483358547655e-05, "loss": 2.0294, "step": 185 }, { "epoch": 0.14, "learning_rate": 4.913010590015129e-05, "loss": 2.0271, "step": 186 }, { "epoch": 0.14, "learning_rate": 4.912537821482602e-05, "loss": 1.9747, "step": 187 }, { "epoch": 0.14, "learning_rate": 4.912065052950076e-05, "loss": 2.0002, "step": 188 }, { "epoch": 0.14, "learning_rate": 4.9115922844175494e-05, "loss": 1.9998, "step": 189 }, { "epoch": 0.14, "learning_rate": 4.911119515885023e-05, "loss": 1.9361, "step": 190 }, { "epoch": 0.14, "learning_rate": 4.9106467473524965e-05, "loss": 2.0967, "step": 191 }, { "epoch": 0.15, "learning_rate": 4.9101739788199704e-05, "loss": 2.0795, "step": 192 }, { "epoch": 0.15, "learning_rate": 4.9097012102874436e-05, "loss": 1.9912, "step": 193 }, { "epoch": 0.15, "learning_rate": 4.9092284417549174e-05, "loss": 1.9971, "step": 194 }, { "epoch": 0.15, "learning_rate": 4.9087556732223906e-05, "loss": 2.0406, "step": 195 }, { "epoch": 0.15, "learning_rate": 4.9082829046898645e-05, "loss": 2.0048, "step": 196 }, { "epoch": 0.15, "learning_rate": 4.907810136157338e-05, "loss": 1.901, "step": 197 }, { "epoch": 0.15, "learning_rate": 4.9073373676248116e-05, "loss": 1.9592, "step": 198 }, { "epoch": 0.15, "learning_rate": 4.906864599092285e-05, "loss": 1.9288, "step": 199 }, { "epoch": 0.15, "learning_rate": 4.906391830559759e-05, "loss": 1.9349, "step": 200 }, { "epoch": 0.15, "learning_rate": 4.905919062027232e-05, "loss": 2.0374, "step": 201 }, { "epoch": 0.15, "learning_rate": 4.905446293494705e-05, "loss": 1.8165, "step": 202 }, { "epoch": 0.15, "learning_rate": 4.904973524962178e-05, "loss": 2.0718, "step": 203 }, { "epoch": 0.15, "learning_rate": 4.904500756429652e-05, "loss": 2.0086, "step": 204 }, { "epoch": 0.16, "learning_rate": 4.9040279878971254e-05, "loss": 1.9726, "step": 205 }, { "epoch": 0.16, "learning_rate": 4.903555219364599e-05, "loss": 1.9972, "step": 206 }, { "epoch": 0.16, "learning_rate": 4.9030824508320725e-05, "loss": 2.0363, "step": 207 }, { "epoch": 0.16, "learning_rate": 4.9026096822995464e-05, "loss": 1.9986, "step": 208 }, { "epoch": 0.16, "learning_rate": 4.9021369137670196e-05, "loss": 1.8526, "step": 209 }, { "epoch": 0.16, "learning_rate": 4.9016641452344935e-05, "loss": 1.9877, "step": 210 }, { "epoch": 0.16, "learning_rate": 4.901191376701967e-05, "loss": 1.9264, "step": 211 }, { "epoch": 0.16, "learning_rate": 4.9007186081694406e-05, "loss": 2.046, "step": 212 }, { "epoch": 0.16, "learning_rate": 4.900245839636914e-05, "loss": 2.1843, "step": 213 }, { "epoch": 0.16, "learning_rate": 4.8997730711043877e-05, "loss": 2.2355, "step": 214 }, { "epoch": 0.16, "learning_rate": 4.899300302571861e-05, "loss": 2.2393, "step": 215 }, { "epoch": 0.16, "learning_rate": 4.898827534039335e-05, "loss": 1.9779, "step": 216 }, { "epoch": 0.16, "learning_rate": 4.898354765506808e-05, "loss": 2.2316, "step": 217 }, { "epoch": 0.16, "learning_rate": 4.897881996974282e-05, "loss": 2.0092, "step": 218 }, { "epoch": 0.17, "learning_rate": 4.897409228441755e-05, "loss": 1.9382, "step": 219 }, { "epoch": 0.17, "learning_rate": 4.896936459909229e-05, "loss": 1.874, "step": 220 }, { "epoch": 0.17, "learning_rate": 4.896463691376702e-05, "loss": 1.972, "step": 221 }, { "epoch": 0.17, "learning_rate": 4.895990922844176e-05, "loss": 1.9034, "step": 222 }, { "epoch": 0.17, "learning_rate": 4.895518154311649e-05, "loss": 2.1396, "step": 223 }, { "epoch": 0.17, "learning_rate": 4.895045385779123e-05, "loss": 2.0501, "step": 224 }, { "epoch": 0.17, "learning_rate": 4.894572617246596e-05, "loss": 2.0231, "step": 225 }, { "epoch": 0.17, "learning_rate": 4.89409984871407e-05, "loss": 1.9964, "step": 226 }, { "epoch": 0.17, "learning_rate": 4.8936270801815434e-05, "loss": 2.0806, "step": 227 }, { "epoch": 0.17, "learning_rate": 4.8931543116490166e-05, "loss": 1.907, "step": 228 }, { "epoch": 0.17, "learning_rate": 4.8926815431164905e-05, "loss": 1.9331, "step": 229 }, { "epoch": 0.17, "learning_rate": 4.892208774583964e-05, "loss": 2.0212, "step": 230 }, { "epoch": 0.17, "learning_rate": 4.891736006051437e-05, "loss": 2.1189, "step": 231 }, { "epoch": 0.18, "learning_rate": 4.891263237518911e-05, "loss": 2.0265, "step": 232 }, { "epoch": 0.18, "learning_rate": 4.890790468986384e-05, "loss": 1.8529, "step": 233 }, { "epoch": 0.18, "learning_rate": 4.890317700453858e-05, "loss": 1.9881, "step": 234 }, { "epoch": 0.18, "learning_rate": 4.889844931921331e-05, "loss": 2.0176, "step": 235 }, { "epoch": 0.18, "learning_rate": 4.889372163388805e-05, "loss": 2.0067, "step": 236 }, { "epoch": 0.18, "learning_rate": 4.888899394856278e-05, "loss": 1.9262, "step": 237 }, { "epoch": 0.18, "learning_rate": 4.888426626323752e-05, "loss": 2.0144, "step": 238 }, { "epoch": 0.18, "learning_rate": 4.887953857791225e-05, "loss": 2.0123, "step": 239 }, { "epoch": 0.18, "learning_rate": 4.887481089258699e-05, "loss": 1.9921, "step": 240 }, { "epoch": 0.18, "learning_rate": 4.8870083207261723e-05, "loss": 1.9742, "step": 241 }, { "epoch": 0.18, "learning_rate": 4.886535552193646e-05, "loss": 1.9521, "step": 242 }, { "epoch": 0.18, "learning_rate": 4.88606278366112e-05, "loss": 2.0376, "step": 243 }, { "epoch": 0.18, "learning_rate": 4.885590015128593e-05, "loss": 1.9731, "step": 244 }, { "epoch": 0.19, "learning_rate": 4.885117246596067e-05, "loss": 1.9145, "step": 245 }, { "epoch": 0.19, "learning_rate": 4.8846444780635404e-05, "loss": 1.9798, "step": 246 }, { "epoch": 0.19, "learning_rate": 4.884171709531014e-05, "loss": 2.11, "step": 247 }, { "epoch": 0.19, "learning_rate": 4.8836989409984875e-05, "loss": 2.0855, "step": 248 }, { "epoch": 0.19, "learning_rate": 4.8832261724659614e-05, "loss": 1.9132, "step": 249 }, { "epoch": 0.19, "learning_rate": 4.8827534039334346e-05, "loss": 2.0821, "step": 250 }, { "epoch": 0.19, "learning_rate": 4.8822806354009085e-05, "loss": 2.0119, "step": 251 }, { "epoch": 0.19, "learning_rate": 4.881807866868382e-05, "loss": 2.1225, "step": 252 }, { "epoch": 0.19, "learning_rate": 4.8813350983358556e-05, "loss": 2.0861, "step": 253 }, { "epoch": 0.19, "learning_rate": 4.880862329803329e-05, "loss": 2.0075, "step": 254 }, { "epoch": 0.19, "learning_rate": 4.880389561270802e-05, "loss": 1.933, "step": 255 }, { "epoch": 0.19, "learning_rate": 4.879916792738275e-05, "loss": 2.0426, "step": 256 }, { "epoch": 0.19, "learning_rate": 4.879444024205749e-05, "loss": 1.8887, "step": 257 }, { "epoch": 0.2, "learning_rate": 4.878971255673222e-05, "loss": 2.0988, "step": 258 }, { "epoch": 0.2, "learning_rate": 4.878498487140696e-05, "loss": 2.1364, "step": 259 }, { "epoch": 0.2, "learning_rate": 4.8780257186081694e-05, "loss": 1.8695, "step": 260 }, { "epoch": 0.2, "learning_rate": 4.877552950075643e-05, "loss": 1.8532, "step": 261 }, { "epoch": 0.2, "learning_rate": 4.8770801815431165e-05, "loss": 2.0797, "step": 262 }, { "epoch": 0.2, "learning_rate": 4.87660741301059e-05, "loss": 1.8836, "step": 263 }, { "epoch": 0.2, "learning_rate": 4.8761346444780635e-05, "loss": 1.8538, "step": 264 }, { "epoch": 0.2, "learning_rate": 4.8756618759455374e-05, "loss": 2.0143, "step": 265 }, { "epoch": 0.2, "learning_rate": 4.8751891074130106e-05, "loss": 2.0606, "step": 266 }, { "epoch": 0.2, "learning_rate": 4.8747163388804845e-05, "loss": 1.9734, "step": 267 }, { "epoch": 0.2, "learning_rate": 4.874243570347958e-05, "loss": 2.149, "step": 268 }, { "epoch": 0.2, "learning_rate": 4.8737708018154316e-05, "loss": 1.9293, "step": 269 }, { "epoch": 0.2, "learning_rate": 4.873298033282905e-05, "loss": 2.1234, "step": 270 }, { "epoch": 0.2, "learning_rate": 4.872825264750379e-05, "loss": 2.1907, "step": 271 }, { "epoch": 0.21, "learning_rate": 4.872352496217852e-05, "loss": 2.0523, "step": 272 }, { "epoch": 0.21, "learning_rate": 4.871879727685326e-05, "loss": 2.01, "step": 273 }, { "epoch": 0.21, "learning_rate": 4.871406959152799e-05, "loss": 2.0091, "step": 274 }, { "epoch": 0.21, "learning_rate": 4.870934190620273e-05, "loss": 2.2141, "step": 275 }, { "epoch": 0.21, "learning_rate": 4.870461422087746e-05, "loss": 2.0128, "step": 276 }, { "epoch": 0.21, "learning_rate": 4.86998865355522e-05, "loss": 2.0808, "step": 277 }, { "epoch": 0.21, "learning_rate": 4.869515885022693e-05, "loss": 2.0225, "step": 278 }, { "epoch": 0.21, "learning_rate": 4.869043116490167e-05, "loss": 2.0025, "step": 279 }, { "epoch": 0.21, "learning_rate": 4.86857034795764e-05, "loss": 2.0771, "step": 280 }, { "epoch": 0.21, "learning_rate": 4.8680975794251135e-05, "loss": 2.082, "step": 281 }, { "epoch": 0.21, "learning_rate": 4.8676248108925873e-05, "loss": 2.1156, "step": 282 }, { "epoch": 0.21, "learning_rate": 4.8671520423600606e-05, "loss": 1.9408, "step": 283 }, { "epoch": 0.21, "learning_rate": 4.866679273827534e-05, "loss": 1.9722, "step": 284 }, { "epoch": 0.22, "learning_rate": 4.8662065052950076e-05, "loss": 1.9945, "step": 285 }, { "epoch": 0.22, "learning_rate": 4.865733736762481e-05, "loss": 2.12, "step": 286 }, { "epoch": 0.22, "learning_rate": 4.865260968229955e-05, "loss": 2.0671, "step": 287 }, { "epoch": 0.22, "learning_rate": 4.864788199697428e-05, "loss": 1.7592, "step": 288 }, { "epoch": 0.22, "learning_rate": 4.864315431164902e-05, "loss": 1.9844, "step": 289 }, { "epoch": 0.22, "learning_rate": 4.863842662632375e-05, "loss": 2.1166, "step": 290 }, { "epoch": 0.22, "learning_rate": 4.863369894099849e-05, "loss": 2.0838, "step": 291 }, { "epoch": 0.22, "learning_rate": 4.862897125567322e-05, "loss": 2.0535, "step": 292 }, { "epoch": 0.22, "learning_rate": 4.862424357034796e-05, "loss": 2.0544, "step": 293 }, { "epoch": 0.22, "learning_rate": 4.861951588502269e-05, "loss": 2.0789, "step": 294 }, { "epoch": 0.22, "learning_rate": 4.861478819969743e-05, "loss": 2.0141, "step": 295 }, { "epoch": 0.22, "learning_rate": 4.861006051437216e-05, "loss": 2.0381, "step": 296 }, { "epoch": 0.22, "learning_rate": 4.86053328290469e-05, "loss": 2.0571, "step": 297 }, { "epoch": 0.23, "learning_rate": 4.8600605143721634e-05, "loss": 2.011, "step": 298 }, { "epoch": 0.23, "learning_rate": 4.859587745839637e-05, "loss": 1.9378, "step": 299 }, { "epoch": 0.23, "learning_rate": 4.8591149773071105e-05, "loss": 1.9265, "step": 300 }, { "epoch": 0.23, "learning_rate": 4.8586422087745844e-05, "loss": 2.143, "step": 301 }, { "epoch": 0.23, "learning_rate": 4.8581694402420576e-05, "loss": 2.1641, "step": 302 }, { "epoch": 0.23, "learning_rate": 4.8576966717095314e-05, "loss": 2.0647, "step": 303 }, { "epoch": 0.23, "learning_rate": 4.857223903177005e-05, "loss": 1.9862, "step": 304 }, { "epoch": 0.23, "learning_rate": 4.8567511346444785e-05, "loss": 2.0067, "step": 305 }, { "epoch": 0.23, "learning_rate": 4.8562783661119524e-05, "loss": 1.9842, "step": 306 }, { "epoch": 0.23, "learning_rate": 4.8558055975794256e-05, "loss": 2.0996, "step": 307 }, { "epoch": 0.23, "learning_rate": 4.855332829046899e-05, "loss": 1.95, "step": 308 }, { "epoch": 0.23, "learning_rate": 4.854860060514372e-05, "loss": 2.0952, "step": 309 }, { "epoch": 0.23, "learning_rate": 4.854387291981846e-05, "loss": 2.0634, "step": 310 }, { "epoch": 0.24, "learning_rate": 4.853914523449319e-05, "loss": 2.0028, "step": 311 }, { "epoch": 0.24, "learning_rate": 4.853441754916793e-05, "loss": 2.0324, "step": 312 }, { "epoch": 0.24, "learning_rate": 4.852968986384266e-05, "loss": 2.0477, "step": 313 }, { "epoch": 0.24, "learning_rate": 4.85249621785174e-05, "loss": 2.0424, "step": 314 }, { "epoch": 0.24, "learning_rate": 4.852023449319213e-05, "loss": 2.0053, "step": 315 }, { "epoch": 0.24, "learning_rate": 4.851550680786687e-05, "loss": 1.9902, "step": 316 }, { "epoch": 0.24, "learning_rate": 4.8510779122541604e-05, "loss": 1.9086, "step": 317 }, { "epoch": 0.24, "learning_rate": 4.850605143721634e-05, "loss": 2.1005, "step": 318 }, { "epoch": 0.24, "learning_rate": 4.8501323751891075e-05, "loss": 2.0058, "step": 319 }, { "epoch": 0.24, "learning_rate": 4.8496596066565814e-05, "loss": 1.9474, "step": 320 }, { "epoch": 0.24, "learning_rate": 4.8491868381240546e-05, "loss": 1.98, "step": 321 }, { "epoch": 0.24, "learning_rate": 4.8487140695915285e-05, "loss": 2.0892, "step": 322 }, { "epoch": 0.24, "learning_rate": 4.848241301059002e-05, "loss": 2.0518, "step": 323 }, { "epoch": 0.25, "learning_rate": 4.8477685325264755e-05, "loss": 2.1857, "step": 324 }, { "epoch": 0.25, "learning_rate": 4.847295763993949e-05, "loss": 2.0719, "step": 325 }, { "epoch": 0.25, "learning_rate": 4.8468229954614226e-05, "loss": 2.0755, "step": 326 }, { "epoch": 0.25, "learning_rate": 4.846350226928896e-05, "loss": 1.9904, "step": 327 }, { "epoch": 0.25, "learning_rate": 4.84587745839637e-05, "loss": 2.0824, "step": 328 }, { "epoch": 0.25, "learning_rate": 4.845404689863843e-05, "loss": 1.9394, "step": 329 }, { "epoch": 0.25, "learning_rate": 4.844931921331317e-05, "loss": 2.0317, "step": 330 }, { "epoch": 0.25, "learning_rate": 4.84445915279879e-05, "loss": 1.9249, "step": 331 }, { "epoch": 0.25, "learning_rate": 4.843986384266264e-05, "loss": 2.0625, "step": 332 }, { "epoch": 0.25, "learning_rate": 4.843513615733737e-05, "loss": 2.1139, "step": 333 }, { "epoch": 0.25, "learning_rate": 4.84304084720121e-05, "loss": 2.025, "step": 334 }, { "epoch": 0.25, "learning_rate": 4.842568078668684e-05, "loss": 1.828, "step": 335 }, { "epoch": 0.25, "learning_rate": 4.8420953101361574e-05, "loss": 2.0947, "step": 336 }, { "epoch": 0.25, "learning_rate": 4.8416225416036306e-05, "loss": 2.0017, "step": 337 }, { "epoch": 0.26, "learning_rate": 4.8411497730711045e-05, "loss": 2.0254, "step": 338 }, { "epoch": 0.26, "learning_rate": 4.840677004538578e-05, "loss": 2.0, "step": 339 }, { "epoch": 0.26, "learning_rate": 4.8402042360060516e-05, "loss": 1.9429, "step": 340 }, { "epoch": 0.26, "learning_rate": 4.839731467473525e-05, "loss": 2.0546, "step": 341 }, { "epoch": 0.26, "learning_rate": 4.839258698940999e-05, "loss": 1.9702, "step": 342 }, { "epoch": 0.26, "learning_rate": 4.838785930408472e-05, "loss": 1.9797, "step": 343 }, { "epoch": 0.26, "learning_rate": 4.838313161875946e-05, "loss": 1.9039, "step": 344 }, { "epoch": 0.26, "learning_rate": 4.837840393343419e-05, "loss": 1.9548, "step": 345 }, { "epoch": 0.26, "learning_rate": 4.837367624810893e-05, "loss": 1.9396, "step": 346 }, { "epoch": 0.26, "learning_rate": 4.836894856278366e-05, "loss": 2.0506, "step": 347 }, { "epoch": 0.26, "learning_rate": 4.83642208774584e-05, "loss": 2.0167, "step": 348 }, { "epoch": 0.26, "learning_rate": 4.835949319213313e-05, "loss": 2.0363, "step": 349 }, { "epoch": 0.26, "learning_rate": 4.835476550680787e-05, "loss": 1.978, "step": 350 }, { "epoch": 0.27, "learning_rate": 4.83500378214826e-05, "loss": 2.0332, "step": 351 }, { "epoch": 0.27, "learning_rate": 4.834531013615734e-05, "loss": 1.9257, "step": 352 }, { "epoch": 0.27, "learning_rate": 4.834058245083207e-05, "loss": 1.9969, "step": 353 }, { "epoch": 0.27, "learning_rate": 4.833585476550681e-05, "loss": 1.8301, "step": 354 }, { "epoch": 0.27, "learning_rate": 4.8331127080181544e-05, "loss": 1.9831, "step": 355 }, { "epoch": 0.27, "learning_rate": 4.832639939485628e-05, "loss": 2.055, "step": 356 }, { "epoch": 0.27, "learning_rate": 4.8321671709531015e-05, "loss": 2.0513, "step": 357 }, { "epoch": 0.27, "learning_rate": 4.8316944024205754e-05, "loss": 2.089, "step": 358 }, { "epoch": 0.27, "learning_rate": 4.8312216338880486e-05, "loss": 2.0736, "step": 359 }, { "epoch": 0.27, "learning_rate": 4.8307488653555225e-05, "loss": 2.0489, "step": 360 }, { "epoch": 0.27, "learning_rate": 4.830276096822996e-05, "loss": 1.8654, "step": 361 }, { "epoch": 0.27, "learning_rate": 4.829803328290469e-05, "loss": 2.0741, "step": 362 }, { "epoch": 0.27, "learning_rate": 4.829330559757943e-05, "loss": 1.8542, "step": 363 }, { "epoch": 0.28, "learning_rate": 4.828857791225416e-05, "loss": 2.027, "step": 364 }, { "epoch": 0.28, "learning_rate": 4.82838502269289e-05, "loss": 2.035, "step": 365 }, { "epoch": 0.28, "learning_rate": 4.827912254160363e-05, "loss": 1.9873, "step": 366 }, { "epoch": 0.28, "learning_rate": 4.827439485627837e-05, "loss": 1.9848, "step": 367 }, { "epoch": 0.28, "learning_rate": 4.82696671709531e-05, "loss": 1.9613, "step": 368 }, { "epoch": 0.28, "learning_rate": 4.826493948562784e-05, "loss": 2.0728, "step": 369 }, { "epoch": 0.28, "learning_rate": 4.826021180030257e-05, "loss": 2.0528, "step": 370 }, { "epoch": 0.28, "learning_rate": 4.825548411497731e-05, "loss": 1.9249, "step": 371 }, { "epoch": 0.28, "learning_rate": 4.825075642965204e-05, "loss": 1.9564, "step": 372 }, { "epoch": 0.28, "learning_rate": 4.824602874432678e-05, "loss": 2.0631, "step": 373 }, { "epoch": 0.28, "learning_rate": 4.8241301059001514e-05, "loss": 2.0868, "step": 374 }, { "epoch": 0.28, "learning_rate": 4.823657337367625e-05, "loss": 2.0533, "step": 375 }, { "epoch": 0.28, "learning_rate": 4.8231845688350985e-05, "loss": 1.9086, "step": 376 }, { "epoch": 0.29, "learning_rate": 4.8227118003025724e-05, "loss": 2.0189, "step": 377 }, { "epoch": 0.29, "learning_rate": 4.8222390317700456e-05, "loss": 2.0063, "step": 378 }, { "epoch": 0.29, "learning_rate": 4.8217662632375195e-05, "loss": 2.0097, "step": 379 }, { "epoch": 0.29, "learning_rate": 4.821293494704993e-05, "loss": 2.0765, "step": 380 }, { "epoch": 0.29, "learning_rate": 4.8208207261724666e-05, "loss": 2.1804, "step": 381 }, { "epoch": 0.29, "learning_rate": 4.82034795763994e-05, "loss": 1.9955, "step": 382 }, { "epoch": 0.29, "learning_rate": 4.819875189107414e-05, "loss": 1.9851, "step": 383 }, { "epoch": 0.29, "learning_rate": 4.819402420574887e-05, "loss": 1.969, "step": 384 }, { "epoch": 0.29, "learning_rate": 4.818929652042361e-05, "loss": 2.124, "step": 385 }, { "epoch": 0.29, "learning_rate": 4.818456883509834e-05, "loss": 2.0895, "step": 386 }, { "epoch": 0.29, "learning_rate": 4.817984114977307e-05, "loss": 2.0718, "step": 387 }, { "epoch": 0.29, "learning_rate": 4.817511346444781e-05, "loss": 1.9524, "step": 388 }, { "epoch": 0.29, "learning_rate": 4.817038577912254e-05, "loss": 2.1038, "step": 389 }, { "epoch": 0.29, "learning_rate": 4.8165658093797275e-05, "loss": 1.9198, "step": 390 }, { "epoch": 0.3, "learning_rate": 4.8160930408472013e-05, "loss": 1.8899, "step": 391 }, { "epoch": 0.3, "learning_rate": 4.8156202723146746e-05, "loss": 2.0151, "step": 392 }, { "epoch": 0.3, "learning_rate": 4.8151475037821484e-05, "loss": 1.9639, "step": 393 }, { "epoch": 0.3, "learning_rate": 4.8146747352496216e-05, "loss": 1.9401, "step": 394 }, { "epoch": 0.3, "learning_rate": 4.8142019667170955e-05, "loss": 1.9853, "step": 395 }, { "epoch": 0.3, "learning_rate": 4.813729198184569e-05, "loss": 1.9596, "step": 396 }, { "epoch": 0.3, "learning_rate": 4.8132564296520426e-05, "loss": 2.0158, "step": 397 }, { "epoch": 0.3, "learning_rate": 4.812783661119516e-05, "loss": 1.9706, "step": 398 }, { "epoch": 0.3, "learning_rate": 4.81231089258699e-05, "loss": 1.9797, "step": 399 }, { "epoch": 0.3, "learning_rate": 4.811838124054463e-05, "loss": 1.8872, "step": 400 }, { "epoch": 0.3, "learning_rate": 4.811365355521937e-05, "loss": 2.0337, "step": 401 }, { "epoch": 0.3, "learning_rate": 4.81089258698941e-05, "loss": 1.9557, "step": 402 }, { "epoch": 0.3, "learning_rate": 4.810419818456884e-05, "loss": 2.0925, "step": 403 }, { "epoch": 0.31, "learning_rate": 4.809947049924357e-05, "loss": 1.9941, "step": 404 }, { "epoch": 0.31, "learning_rate": 4.809474281391831e-05, "loss": 1.9971, "step": 405 }, { "epoch": 0.31, "learning_rate": 4.809001512859304e-05, "loss": 1.9631, "step": 406 }, { "epoch": 0.31, "learning_rate": 4.808528744326778e-05, "loss": 1.953, "step": 407 }, { "epoch": 0.31, "learning_rate": 4.808055975794251e-05, "loss": 1.9553, "step": 408 }, { "epoch": 0.31, "learning_rate": 4.807583207261725e-05, "loss": 2.0301, "step": 409 }, { "epoch": 0.31, "learning_rate": 4.8071104387291984e-05, "loss": 2.0984, "step": 410 }, { "epoch": 0.31, "learning_rate": 4.806637670196672e-05, "loss": 2.0085, "step": 411 }, { "epoch": 0.31, "learning_rate": 4.8061649016641455e-05, "loss": 2.0306, "step": 412 }, { "epoch": 0.31, "learning_rate": 4.805692133131619e-05, "loss": 1.9533, "step": 413 }, { "epoch": 0.31, "learning_rate": 4.8052193645990925e-05, "loss": 2.021, "step": 414 }, { "epoch": 0.31, "learning_rate": 4.804746596066566e-05, "loss": 2.0063, "step": 415 }, { "epoch": 0.31, "learning_rate": 4.8042738275340396e-05, "loss": 1.9465, "step": 416 }, { "epoch": 0.32, "learning_rate": 4.803801059001513e-05, "loss": 2.1276, "step": 417 }, { "epoch": 0.32, "learning_rate": 4.803328290468986e-05, "loss": 2.0381, "step": 418 }, { "epoch": 0.32, "learning_rate": 4.80285552193646e-05, "loss": 2.0685, "step": 419 }, { "epoch": 0.32, "learning_rate": 4.802382753403933e-05, "loss": 2.0841, "step": 420 }, { "epoch": 0.32, "learning_rate": 4.801909984871407e-05, "loss": 1.9812, "step": 421 }, { "epoch": 0.32, "learning_rate": 4.80143721633888e-05, "loss": 2.1372, "step": 422 }, { "epoch": 0.32, "learning_rate": 4.800964447806354e-05, "loss": 2.0334, "step": 423 }, { "epoch": 0.32, "learning_rate": 4.800491679273828e-05, "loss": 2.0253, "step": 424 }, { "epoch": 0.32, "learning_rate": 4.800018910741301e-05, "loss": 1.9097, "step": 425 }, { "epoch": 0.32, "learning_rate": 4.799546142208775e-05, "loss": 1.9575, "step": 426 }, { "epoch": 0.32, "learning_rate": 4.799073373676248e-05, "loss": 2.187, "step": 427 }, { "epoch": 0.32, "learning_rate": 4.798600605143722e-05, "loss": 2.055, "step": 428 }, { "epoch": 0.32, "learning_rate": 4.7981278366111954e-05, "loss": 2.0625, "step": 429 }, { "epoch": 0.33, "learning_rate": 4.797655068078669e-05, "loss": 2.0664, "step": 430 }, { "epoch": 0.33, "learning_rate": 4.7971822995461425e-05, "loss": 2.0651, "step": 431 }, { "epoch": 0.33, "learning_rate": 4.7967095310136163e-05, "loss": 2.0696, "step": 432 }, { "epoch": 0.33, "learning_rate": 4.7962367624810896e-05, "loss": 2.0453, "step": 433 }, { "epoch": 0.33, "learning_rate": 4.7957639939485634e-05, "loss": 2.0658, "step": 434 }, { "epoch": 0.33, "learning_rate": 4.7952912254160366e-05, "loss": 2.1195, "step": 435 }, { "epoch": 0.33, "learning_rate": 4.7948184568835105e-05, "loss": 2.0814, "step": 436 }, { "epoch": 0.33, "learning_rate": 4.794345688350984e-05, "loss": 2.01, "step": 437 }, { "epoch": 0.33, "learning_rate": 4.7938729198184576e-05, "loss": 2.0097, "step": 438 }, { "epoch": 0.33, "learning_rate": 4.793400151285931e-05, "loss": 1.9284, "step": 439 }, { "epoch": 0.33, "learning_rate": 4.792927382753404e-05, "loss": 1.9519, "step": 440 }, { "epoch": 0.33, "learning_rate": 4.792454614220878e-05, "loss": 2.0919, "step": 441 }, { "epoch": 0.33, "learning_rate": 4.791981845688351e-05, "loss": 1.8452, "step": 442 }, { "epoch": 0.34, "learning_rate": 4.791509077155824e-05, "loss": 1.9984, "step": 443 }, { "epoch": 0.34, "learning_rate": 4.791036308623298e-05, "loss": 1.9537, "step": 444 }, { "epoch": 0.34, "learning_rate": 4.7905635400907714e-05, "loss": 2.0628, "step": 445 }, { "epoch": 0.34, "learning_rate": 4.790090771558245e-05, "loss": 2.0381, "step": 446 }, { "epoch": 0.34, "learning_rate": 4.7896180030257185e-05, "loss": 1.9629, "step": 447 }, { "epoch": 0.34, "learning_rate": 4.7891452344931924e-05, "loss": 2.1311, "step": 448 }, { "epoch": 0.34, "learning_rate": 4.7886724659606656e-05, "loss": 2.0192, "step": 449 }, { "epoch": 0.34, "learning_rate": 4.7881996974281395e-05, "loss": 2.0428, "step": 450 }, { "epoch": 0.34, "learning_rate": 4.787726928895613e-05, "loss": 1.9593, "step": 451 }, { "epoch": 0.34, "learning_rate": 4.7872541603630866e-05, "loss": 2.0618, "step": 452 }, { "epoch": 0.34, "learning_rate": 4.78678139183056e-05, "loss": 2.0374, "step": 453 }, { "epoch": 0.34, "learning_rate": 4.7863086232980337e-05, "loss": 1.9091, "step": 454 }, { "epoch": 0.34, "learning_rate": 4.785835854765507e-05, "loss": 2.0228, "step": 455 }, { "epoch": 0.34, "learning_rate": 4.785363086232981e-05, "loss": 1.9976, "step": 456 }, { "epoch": 0.35, "learning_rate": 4.784890317700454e-05, "loss": 2.0183, "step": 457 }, { "epoch": 0.35, "learning_rate": 4.784417549167928e-05, "loss": 1.9141, "step": 458 }, { "epoch": 0.35, "learning_rate": 4.783944780635401e-05, "loss": 1.9611, "step": 459 }, { "epoch": 0.35, "learning_rate": 4.783472012102875e-05, "loss": 2.1428, "step": 460 }, { "epoch": 0.35, "learning_rate": 4.782999243570348e-05, "loss": 2.0283, "step": 461 }, { "epoch": 0.35, "learning_rate": 4.782526475037822e-05, "loss": 2.0156, "step": 462 }, { "epoch": 0.35, "learning_rate": 4.782053706505295e-05, "loss": 2.0963, "step": 463 }, { "epoch": 0.35, "learning_rate": 4.781580937972769e-05, "loss": 2.0368, "step": 464 }, { "epoch": 0.35, "learning_rate": 4.781108169440242e-05, "loss": 1.9851, "step": 465 }, { "epoch": 0.35, "learning_rate": 4.780635400907716e-05, "loss": 2.0841, "step": 466 }, { "epoch": 0.35, "learning_rate": 4.7801626323751894e-05, "loss": 2.0512, "step": 467 }, { "epoch": 0.35, "learning_rate": 4.7796898638426626e-05, "loss": 2.1608, "step": 468 }, { "epoch": 0.35, "learning_rate": 4.7792170953101365e-05, "loss": 2.0457, "step": 469 }, { "epoch": 0.36, "learning_rate": 4.77874432677761e-05, "loss": 1.9606, "step": 470 }, { "epoch": 0.36, "learning_rate": 4.778271558245083e-05, "loss": 2.1006, "step": 471 }, { "epoch": 0.36, "learning_rate": 4.777798789712557e-05, "loss": 1.9714, "step": 472 }, { "epoch": 0.36, "learning_rate": 4.77732602118003e-05, "loss": 1.9414, "step": 473 }, { "epoch": 0.36, "learning_rate": 4.776853252647504e-05, "loss": 2.0564, "step": 474 }, { "epoch": 0.36, "learning_rate": 4.776380484114977e-05, "loss": 1.9741, "step": 475 }, { "epoch": 0.36, "learning_rate": 4.775907715582451e-05, "loss": 1.9889, "step": 476 }, { "epoch": 0.36, "learning_rate": 4.775434947049924e-05, "loss": 1.989, "step": 477 }, { "epoch": 0.36, "learning_rate": 4.774962178517398e-05, "loss": 1.9534, "step": 478 }, { "epoch": 0.36, "learning_rate": 4.774489409984871e-05, "loss": 2.055, "step": 479 }, { "epoch": 0.36, "learning_rate": 4.774016641452345e-05, "loss": 2.0618, "step": 480 }, { "epoch": 0.36, "learning_rate": 4.7735438729198183e-05, "loss": 1.9862, "step": 481 }, { "epoch": 0.36, "learning_rate": 4.773071104387292e-05, "loss": 2.057, "step": 482 }, { "epoch": 0.37, "learning_rate": 4.772598335854766e-05, "loss": 2.122, "step": 483 }, { "epoch": 0.37, "learning_rate": 4.772125567322239e-05, "loss": 1.9343, "step": 484 }, { "epoch": 0.37, "learning_rate": 4.771652798789713e-05, "loss": 2.0158, "step": 485 }, { "epoch": 0.37, "learning_rate": 4.7711800302571864e-05, "loss": 2.0633, "step": 486 }, { "epoch": 0.37, "learning_rate": 4.77070726172466e-05, "loss": 1.9968, "step": 487 }, { "epoch": 0.37, "learning_rate": 4.7702344931921335e-05, "loss": 1.9506, "step": 488 }, { "epoch": 0.37, "learning_rate": 4.7697617246596074e-05, "loss": 2.1298, "step": 489 }, { "epoch": 0.37, "learning_rate": 4.7692889561270806e-05, "loss": 1.9628, "step": 490 }, { "epoch": 0.37, "learning_rate": 4.7688161875945545e-05, "loss": 1.9442, "step": 491 }, { "epoch": 0.37, "learning_rate": 4.768343419062028e-05, "loss": 2.0119, "step": 492 }, { "epoch": 0.37, "learning_rate": 4.767870650529501e-05, "loss": 2.1189, "step": 493 }, { "epoch": 0.37, "learning_rate": 4.767397881996975e-05, "loss": 1.9944, "step": 494 }, { "epoch": 0.37, "learning_rate": 4.766925113464448e-05, "loss": 2.0168, "step": 495 }, { "epoch": 0.38, "learning_rate": 4.766452344931921e-05, "loss": 1.9918, "step": 496 }, { "epoch": 0.38, "learning_rate": 4.765979576399395e-05, "loss": 2.0893, "step": 497 }, { "epoch": 0.38, "learning_rate": 4.765506807866868e-05, "loss": 2.078, "step": 498 }, { "epoch": 0.38, "learning_rate": 4.765034039334342e-05, "loss": 1.9787, "step": 499 }, { "epoch": 0.38, "learning_rate": 4.7645612708018154e-05, "loss": 2.0261, "step": 500 }, { "epoch": 0.38, "learning_rate": 4.764088502269289e-05, "loss": 2.0458, "step": 501 }, { "epoch": 0.38, "learning_rate": 4.7636157337367624e-05, "loss": 2.0946, "step": 502 }, { "epoch": 0.38, "learning_rate": 4.763142965204236e-05, "loss": 2.0315, "step": 503 }, { "epoch": 0.38, "learning_rate": 4.7626701966717095e-05, "loss": 2.0422, "step": 504 }, { "epoch": 0.38, "learning_rate": 4.7621974281391834e-05, "loss": 1.9925, "step": 505 }, { "epoch": 0.38, "learning_rate": 4.7617246596066566e-05, "loss": 2.0407, "step": 506 }, { "epoch": 0.38, "learning_rate": 4.7612518910741305e-05, "loss": 2.1064, "step": 507 }, { "epoch": 0.38, "learning_rate": 4.760779122541604e-05, "loss": 2.1295, "step": 508 }, { "epoch": 0.38, "learning_rate": 4.7603063540090776e-05, "loss": 1.9745, "step": 509 }, { "epoch": 0.39, "learning_rate": 4.759833585476551e-05, "loss": 1.8469, "step": 510 }, { "epoch": 0.39, "learning_rate": 4.759360816944025e-05, "loss": 1.9734, "step": 511 }, { "epoch": 0.39, "learning_rate": 4.758888048411498e-05, "loss": 1.9218, "step": 512 }, { "epoch": 0.39, "learning_rate": 4.758415279878972e-05, "loss": 1.9402, "step": 513 }, { "epoch": 0.39, "learning_rate": 4.757942511346445e-05, "loss": 1.9133, "step": 514 }, { "epoch": 0.39, "learning_rate": 4.757469742813919e-05, "loss": 1.9429, "step": 515 }, { "epoch": 0.39, "learning_rate": 4.756996974281392e-05, "loss": 1.8812, "step": 516 }, { "epoch": 0.39, "learning_rate": 4.756524205748866e-05, "loss": 1.9573, "step": 517 }, { "epoch": 0.39, "learning_rate": 4.756051437216339e-05, "loss": 2.0669, "step": 518 }, { "epoch": 0.39, "learning_rate": 4.755578668683813e-05, "loss": 2.0122, "step": 519 }, { "epoch": 0.39, "learning_rate": 4.755105900151286e-05, "loss": 2.0268, "step": 520 }, { "epoch": 0.39, "learning_rate": 4.7546331316187595e-05, "loss": 1.991, "step": 521 }, { "epoch": 0.39, "learning_rate": 4.7541603630862333e-05, "loss": 2.0765, "step": 522 }, { "epoch": 0.4, "learning_rate": 4.7536875945537065e-05, "loss": 2.0773, "step": 523 }, { "epoch": 0.4, "learning_rate": 4.75321482602118e-05, "loss": 1.9775, "step": 524 }, { "epoch": 0.4, "learning_rate": 4.7527420574886536e-05, "loss": 1.9895, "step": 525 }, { "epoch": 0.4, "learning_rate": 4.752269288956127e-05, "loss": 2.1144, "step": 526 }, { "epoch": 0.4, "learning_rate": 4.751796520423601e-05, "loss": 2.0288, "step": 527 }, { "epoch": 0.4, "learning_rate": 4.751323751891074e-05, "loss": 2.0229, "step": 528 }, { "epoch": 0.4, "learning_rate": 4.750850983358548e-05, "loss": 1.9717, "step": 529 }, { "epoch": 0.4, "learning_rate": 4.750378214826021e-05, "loss": 2.0122, "step": 530 }, { "epoch": 0.4, "learning_rate": 4.749905446293495e-05, "loss": 1.9679, "step": 531 }, { "epoch": 0.4, "learning_rate": 4.749432677760968e-05, "loss": 2.0692, "step": 532 }, { "epoch": 0.4, "learning_rate": 4.748959909228442e-05, "loss": 2.054, "step": 533 }, { "epoch": 0.4, "learning_rate": 4.748487140695915e-05, "loss": 2.1148, "step": 534 }, { "epoch": 0.4, "learning_rate": 4.748014372163389e-05, "loss": 1.9773, "step": 535 }, { "epoch": 0.41, "learning_rate": 4.747541603630862e-05, "loss": 1.9317, "step": 536 }, { "epoch": 0.41, "learning_rate": 4.747068835098336e-05, "loss": 1.9418, "step": 537 }, { "epoch": 0.41, "learning_rate": 4.7465960665658094e-05, "loss": 1.9951, "step": 538 }, { "epoch": 0.41, "learning_rate": 4.746123298033283e-05, "loss": 1.8869, "step": 539 }, { "epoch": 0.41, "learning_rate": 4.7456505295007565e-05, "loss": 2.0852, "step": 540 }, { "epoch": 0.41, "learning_rate": 4.7451777609682304e-05, "loss": 1.9063, "step": 541 }, { "epoch": 0.41, "learning_rate": 4.7447049924357036e-05, "loss": 2.1507, "step": 542 }, { "epoch": 0.41, "learning_rate": 4.7442322239031774e-05, "loss": 2.0798, "step": 543 }, { "epoch": 0.41, "learning_rate": 4.743759455370651e-05, "loss": 1.9669, "step": 544 }, { "epoch": 0.41, "learning_rate": 4.7432866868381245e-05, "loss": 1.967, "step": 545 }, { "epoch": 0.41, "learning_rate": 4.742813918305598e-05, "loss": 2.0925, "step": 546 }, { "epoch": 0.41, "learning_rate": 4.7423411497730716e-05, "loss": 2.0742, "step": 547 }, { "epoch": 0.41, "learning_rate": 4.741868381240545e-05, "loss": 2.0493, "step": 548 }, { "epoch": 0.42, "learning_rate": 4.741395612708018e-05, "loss": 1.8355, "step": 549 }, { "epoch": 0.42, "learning_rate": 4.740922844175492e-05, "loss": 1.9607, "step": 550 }, { "epoch": 0.42, "learning_rate": 4.740450075642965e-05, "loss": 2.0426, "step": 551 }, { "epoch": 0.42, "learning_rate": 4.739977307110439e-05, "loss": 1.9696, "step": 552 }, { "epoch": 0.42, "learning_rate": 4.739504538577912e-05, "loss": 1.9734, "step": 553 }, { "epoch": 0.42, "learning_rate": 4.739031770045386e-05, "loss": 2.0136, "step": 554 }, { "epoch": 0.42, "learning_rate": 4.738559001512859e-05, "loss": 1.9956, "step": 555 }, { "epoch": 0.42, "learning_rate": 4.738086232980333e-05, "loss": 1.9623, "step": 556 }, { "epoch": 0.42, "learning_rate": 4.7376134644478064e-05, "loss": 2.0878, "step": 557 }, { "epoch": 0.42, "learning_rate": 4.73714069591528e-05, "loss": 2.0773, "step": 558 }, { "epoch": 0.42, "learning_rate": 4.7366679273827535e-05, "loss": 1.9323, "step": 559 }, { "epoch": 0.42, "learning_rate": 4.7361951588502274e-05, "loss": 2.0508, "step": 560 }, { "epoch": 0.42, "learning_rate": 4.7357223903177006e-05, "loss": 1.8543, "step": 561 }, { "epoch": 0.43, "learning_rate": 4.7352496217851745e-05, "loss": 2.0394, "step": 562 }, { "epoch": 0.43, "learning_rate": 4.7347768532526477e-05, "loss": 2.0244, "step": 563 }, { "epoch": 0.43, "learning_rate": 4.7343040847201215e-05, "loss": 2.1197, "step": 564 }, { "epoch": 0.43, "learning_rate": 4.733831316187595e-05, "loss": 2.1361, "step": 565 }, { "epoch": 0.43, "learning_rate": 4.7333585476550686e-05, "loss": 1.9381, "step": 566 }, { "epoch": 0.43, "learning_rate": 4.732885779122542e-05, "loss": 1.9249, "step": 567 }, { "epoch": 0.43, "learning_rate": 4.732413010590016e-05, "loss": 2.0023, "step": 568 }, { "epoch": 0.43, "learning_rate": 4.731940242057489e-05, "loss": 1.9195, "step": 569 }, { "epoch": 0.43, "learning_rate": 4.731467473524963e-05, "loss": 2.0467, "step": 570 }, { "epoch": 0.43, "learning_rate": 4.730994704992436e-05, "loss": 1.9787, "step": 571 }, { "epoch": 0.43, "learning_rate": 4.73052193645991e-05, "loss": 2.1199, "step": 572 }, { "epoch": 0.43, "learning_rate": 4.730049167927383e-05, "loss": 1.9617, "step": 573 }, { "epoch": 0.43, "learning_rate": 4.729576399394856e-05, "loss": 1.9769, "step": 574 }, { "epoch": 0.43, "learning_rate": 4.72910363086233e-05, "loss": 2.093, "step": 575 }, { "epoch": 0.44, "learning_rate": 4.7286308623298034e-05, "loss": 1.9938, "step": 576 }, { "epoch": 0.44, "learning_rate": 4.7281580937972766e-05, "loss": 1.8356, "step": 577 }, { "epoch": 0.44, "learning_rate": 4.7276853252647505e-05, "loss": 1.9721, "step": 578 }, { "epoch": 0.44, "learning_rate": 4.727212556732224e-05, "loss": 1.9555, "step": 579 }, { "epoch": 0.44, "learning_rate": 4.7267397881996976e-05, "loss": 2.1106, "step": 580 }, { "epoch": 0.44, "learning_rate": 4.726267019667171e-05, "loss": 2.0643, "step": 581 }, { "epoch": 0.44, "learning_rate": 4.725794251134645e-05, "loss": 2.0726, "step": 582 }, { "epoch": 0.44, "learning_rate": 4.725321482602118e-05, "loss": 2.0121, "step": 583 }, { "epoch": 0.44, "learning_rate": 4.724848714069592e-05, "loss": 1.9718, "step": 584 }, { "epoch": 0.44, "learning_rate": 4.724375945537065e-05, "loss": 1.9407, "step": 585 }, { "epoch": 0.44, "learning_rate": 4.723903177004539e-05, "loss": 2.0019, "step": 586 }, { "epoch": 0.44, "learning_rate": 4.723430408472012e-05, "loss": 2.0949, "step": 587 }, { "epoch": 0.44, "learning_rate": 4.722957639939486e-05, "loss": 2.0807, "step": 588 }, { "epoch": 0.45, "learning_rate": 4.722484871406959e-05, "loss": 2.092, "step": 589 }, { "epoch": 0.45, "learning_rate": 4.722012102874433e-05, "loss": 2.0374, "step": 590 }, { "epoch": 0.45, "learning_rate": 4.721539334341906e-05, "loss": 2.0948, "step": 591 }, { "epoch": 0.45, "learning_rate": 4.72106656580938e-05, "loss": 2.0181, "step": 592 }, { "epoch": 0.45, "learning_rate": 4.720593797276853e-05, "loss": 2.0157, "step": 593 }, { "epoch": 0.45, "learning_rate": 4.720121028744327e-05, "loss": 2.0252, "step": 594 }, { "epoch": 0.45, "learning_rate": 4.7196482602118004e-05, "loss": 1.9758, "step": 595 }, { "epoch": 0.45, "learning_rate": 4.719175491679274e-05, "loss": 1.8898, "step": 596 }, { "epoch": 0.45, "learning_rate": 4.7187027231467475e-05, "loss": 2.0704, "step": 597 }, { "epoch": 0.45, "learning_rate": 4.7182299546142214e-05, "loss": 2.0232, "step": 598 }, { "epoch": 0.45, "learning_rate": 4.7177571860816946e-05, "loss": 1.8325, "step": 599 }, { "epoch": 0.45, "learning_rate": 4.7172844175491685e-05, "loss": 1.937, "step": 600 }, { "epoch": 0.45, "learning_rate": 4.716811649016642e-05, "loss": 2.0705, "step": 601 }, { "epoch": 0.46, "learning_rate": 4.716338880484115e-05, "loss": 2.0465, "step": 602 }, { "epoch": 0.46, "learning_rate": 4.715866111951589e-05, "loss": 1.9913, "step": 603 }, { "epoch": 0.46, "learning_rate": 4.715393343419062e-05, "loss": 2.0207, "step": 604 }, { "epoch": 0.46, "learning_rate": 4.714920574886536e-05, "loss": 2.1071, "step": 605 }, { "epoch": 0.46, "learning_rate": 4.714447806354009e-05, "loss": 2.0473, "step": 606 }, { "epoch": 0.46, "learning_rate": 4.713975037821483e-05, "loss": 2.0446, "step": 607 }, { "epoch": 0.46, "learning_rate": 4.713502269288956e-05, "loss": 2.1629, "step": 608 }, { "epoch": 0.46, "learning_rate": 4.71302950075643e-05, "loss": 1.8831, "step": 609 }, { "epoch": 0.46, "learning_rate": 4.712556732223903e-05, "loss": 2.0923, "step": 610 }, { "epoch": 0.46, "learning_rate": 4.712083963691377e-05, "loss": 1.9737, "step": 611 }, { "epoch": 0.46, "learning_rate": 4.71161119515885e-05, "loss": 1.9521, "step": 612 }, { "epoch": 0.46, "learning_rate": 4.711138426626324e-05, "loss": 1.9362, "step": 613 }, { "epoch": 0.46, "learning_rate": 4.7106656580937974e-05, "loss": 2.1398, "step": 614 }, { "epoch": 0.47, "learning_rate": 4.710192889561271e-05, "loss": 1.9738, "step": 615 }, { "epoch": 0.47, "learning_rate": 4.7097201210287445e-05, "loss": 1.9791, "step": 616 }, { "epoch": 0.47, "learning_rate": 4.7092473524962184e-05, "loss": 2.079, "step": 617 }, { "epoch": 0.47, "learning_rate": 4.7087745839636916e-05, "loss": 1.8996, "step": 618 }, { "epoch": 0.47, "learning_rate": 4.7083018154311655e-05, "loss": 1.8841, "step": 619 }, { "epoch": 0.47, "learning_rate": 4.707829046898639e-05, "loss": 2.044, "step": 620 }, { "epoch": 0.47, "learning_rate": 4.7073562783661126e-05, "loss": 2.0999, "step": 621 }, { "epoch": 0.47, "learning_rate": 4.706883509833586e-05, "loss": 2.0328, "step": 622 }, { "epoch": 0.47, "learning_rate": 4.70641074130106e-05, "loss": 1.9476, "step": 623 }, { "epoch": 0.47, "learning_rate": 4.705937972768533e-05, "loss": 2.1309, "step": 624 }, { "epoch": 0.47, "learning_rate": 4.705465204236007e-05, "loss": 1.9712, "step": 625 }, { "epoch": 0.47, "learning_rate": 4.70499243570348e-05, "loss": 1.9772, "step": 626 }, { "epoch": 0.47, "learning_rate": 4.704519667170953e-05, "loss": 2.0459, "step": 627 }, { "epoch": 0.47, "learning_rate": 4.704046898638427e-05, "loss": 1.9972, "step": 628 }, { "epoch": 0.48, "learning_rate": 4.7035741301059e-05, "loss": 2.0651, "step": 629 }, { "epoch": 0.48, "learning_rate": 4.7031013615733735e-05, "loss": 2.0208, "step": 630 }, { "epoch": 0.48, "learning_rate": 4.7026285930408473e-05, "loss": 2.0012, "step": 631 }, { "epoch": 0.48, "learning_rate": 4.7021558245083205e-05, "loss": 2.0035, "step": 632 }, { "epoch": 0.48, "learning_rate": 4.7016830559757944e-05, "loss": 1.9165, "step": 633 }, { "epoch": 0.48, "learning_rate": 4.7012102874432676e-05, "loss": 2.0447, "step": 634 }, { "epoch": 0.48, "learning_rate": 4.7007375189107415e-05, "loss": 1.8521, "step": 635 }, { "epoch": 0.48, "learning_rate": 4.700264750378215e-05, "loss": 2.0587, "step": 636 }, { "epoch": 0.48, "learning_rate": 4.6997919818456886e-05, "loss": 1.9826, "step": 637 }, { "epoch": 0.48, "learning_rate": 4.699319213313162e-05, "loss": 1.9142, "step": 638 }, { "epoch": 0.48, "learning_rate": 4.698846444780636e-05, "loss": 1.9865, "step": 639 }, { "epoch": 0.48, "learning_rate": 4.698373676248109e-05, "loss": 1.9934, "step": 640 }, { "epoch": 0.48, "learning_rate": 4.697900907715583e-05, "loss": 2.0886, "step": 641 }, { "epoch": 0.49, "learning_rate": 4.697428139183056e-05, "loss": 2.0479, "step": 642 }, { "epoch": 0.49, "learning_rate": 4.69695537065053e-05, "loss": 2.06, "step": 643 }, { "epoch": 0.49, "learning_rate": 4.696482602118003e-05, "loss": 1.927, "step": 644 }, { "epoch": 0.49, "learning_rate": 4.696009833585477e-05, "loss": 1.9611, "step": 645 }, { "epoch": 0.49, "learning_rate": 4.69553706505295e-05, "loss": 1.8159, "step": 646 }, { "epoch": 0.49, "learning_rate": 4.695064296520424e-05, "loss": 2.0192, "step": 647 }, { "epoch": 0.49, "learning_rate": 4.694591527987897e-05, "loss": 2.0049, "step": 648 }, { "epoch": 0.49, "learning_rate": 4.694118759455371e-05, "loss": 1.8767, "step": 649 }, { "epoch": 0.49, "learning_rate": 4.6936459909228444e-05, "loss": 2.1358, "step": 650 }, { "epoch": 0.49, "learning_rate": 4.693173222390318e-05, "loss": 2.0347, "step": 651 }, { "epoch": 0.49, "learning_rate": 4.6927004538577914e-05, "loss": 1.8809, "step": 652 }, { "epoch": 0.49, "learning_rate": 4.692227685325265e-05, "loss": 1.9794, "step": 653 }, { "epoch": 0.49, "learning_rate": 4.6917549167927385e-05, "loss": 1.9197, "step": 654 }, { "epoch": 0.5, "learning_rate": 4.691282148260212e-05, "loss": 2.0027, "step": 655 }, { "epoch": 0.5, "learning_rate": 4.6908093797276856e-05, "loss": 2.0009, "step": 656 }, { "epoch": 0.5, "learning_rate": 4.690336611195159e-05, "loss": 2.1214, "step": 657 }, { "epoch": 0.5, "learning_rate": 4.689863842662632e-05, "loss": 1.9801, "step": 658 }, { "epoch": 0.5, "learning_rate": 4.689391074130106e-05, "loss": 2.1264, "step": 659 }, { "epoch": 0.5, "learning_rate": 4.688918305597579e-05, "loss": 2.097, "step": 660 }, { "epoch": 0.5, "learning_rate": 4.688445537065053e-05, "loss": 1.9603, "step": 661 }, { "epoch": 0.5, "learning_rate": 4.687972768532526e-05, "loss": 1.9159, "step": 662 }, { "epoch": 0.5, "learning_rate": 4.6875e-05, "loss": 1.9831, "step": 663 }, { "epoch": 0.5, "learning_rate": 4.687027231467474e-05, "loss": 1.9399, "step": 664 }, { "epoch": 0.5, "learning_rate": 4.686554462934947e-05, "loss": 1.9316, "step": 665 }, { "epoch": 0.5, "learning_rate": 4.686081694402421e-05, "loss": 2.0463, "step": 666 }, { "epoch": 0.5, "learning_rate": 4.685608925869894e-05, "loss": 2.0582, "step": 667 }, { "epoch": 0.51, "learning_rate": 4.685136157337368e-05, "loss": 2.0812, "step": 668 }, { "epoch": 0.51, "learning_rate": 4.6846633888048414e-05, "loss": 1.9375, "step": 669 }, { "epoch": 0.51, "learning_rate": 4.684190620272315e-05, "loss": 2.0762, "step": 670 }, { "epoch": 0.51, "learning_rate": 4.6837178517397885e-05, "loss": 2.1082, "step": 671 }, { "epoch": 0.51, "learning_rate": 4.6832450832072623e-05, "loss": 1.9596, "step": 672 }, { "epoch": 0.51, "learning_rate": 4.6827723146747355e-05, "loss": 2.0133, "step": 673 }, { "epoch": 0.51, "learning_rate": 4.6822995461422094e-05, "loss": 2.0128, "step": 674 }, { "epoch": 0.51, "learning_rate": 4.6818267776096826e-05, "loss": 1.9473, "step": 675 }, { "epoch": 0.51, "learning_rate": 4.6813540090771565e-05, "loss": 2.0467, "step": 676 }, { "epoch": 0.51, "learning_rate": 4.68088124054463e-05, "loss": 1.995, "step": 677 }, { "epoch": 0.51, "learning_rate": 4.6804084720121036e-05, "loss": 1.9448, "step": 678 }, { "epoch": 0.51, "learning_rate": 4.679935703479577e-05, "loss": 1.9968, "step": 679 }, { "epoch": 0.51, "learning_rate": 4.67946293494705e-05, "loss": 2.1408, "step": 680 }, { "epoch": 0.52, "learning_rate": 4.678990166414524e-05, "loss": 1.9973, "step": 681 }, { "epoch": 0.52, "learning_rate": 4.678517397881997e-05, "loss": 1.9362, "step": 682 }, { "epoch": 0.52, "learning_rate": 4.67804462934947e-05, "loss": 2.1031, "step": 683 }, { "epoch": 0.52, "learning_rate": 4.677571860816944e-05, "loss": 2.0301, "step": 684 }, { "epoch": 0.52, "learning_rate": 4.6770990922844174e-05, "loss": 1.9666, "step": 685 }, { "epoch": 0.52, "learning_rate": 4.676626323751891e-05, "loss": 2.0277, "step": 686 }, { "epoch": 0.52, "learning_rate": 4.6761535552193645e-05, "loss": 2.0901, "step": 687 }, { "epoch": 0.52, "learning_rate": 4.6756807866868384e-05, "loss": 1.8465, "step": 688 }, { "epoch": 0.52, "learning_rate": 4.6752080181543116e-05, "loss": 1.9978, "step": 689 }, { "epoch": 0.52, "learning_rate": 4.6747352496217855e-05, "loss": 1.9709, "step": 690 }, { "epoch": 0.52, "learning_rate": 4.674262481089259e-05, "loss": 2.033, "step": 691 }, { "epoch": 0.52, "learning_rate": 4.6737897125567326e-05, "loss": 1.9873, "step": 692 }, { "epoch": 0.52, "learning_rate": 4.673316944024206e-05, "loss": 2.0021, "step": 693 }, { "epoch": 0.52, "learning_rate": 4.6728441754916796e-05, "loss": 2.0018, "step": 694 }, { "epoch": 0.53, "learning_rate": 4.672371406959153e-05, "loss": 2.0492, "step": 695 }, { "epoch": 0.53, "learning_rate": 4.671898638426627e-05, "loss": 2.0383, "step": 696 }, { "epoch": 0.53, "learning_rate": 4.6714258698941e-05, "loss": 1.9482, "step": 697 }, { "epoch": 0.53, "learning_rate": 4.670953101361574e-05, "loss": 2.0221, "step": 698 }, { "epoch": 0.53, "learning_rate": 4.670480332829047e-05, "loss": 2.1207, "step": 699 }, { "epoch": 0.53, "learning_rate": 4.670007564296521e-05, "loss": 2.0252, "step": 700 }, { "epoch": 0.53, "learning_rate": 4.669534795763994e-05, "loss": 1.9724, "step": 701 }, { "epoch": 0.53, "learning_rate": 4.669062027231468e-05, "loss": 2.0711, "step": 702 }, { "epoch": 0.53, "learning_rate": 4.668589258698941e-05, "loss": 2.1257, "step": 703 }, { "epoch": 0.53, "learning_rate": 4.668116490166415e-05, "loss": 1.982, "step": 704 }, { "epoch": 0.53, "learning_rate": 4.667643721633888e-05, "loss": 2.012, "step": 705 }, { "epoch": 0.53, "learning_rate": 4.667170953101362e-05, "loss": 2.0941, "step": 706 }, { "epoch": 0.53, "learning_rate": 4.6666981845688354e-05, "loss": 1.9227, "step": 707 }, { "epoch": 0.54, "learning_rate": 4.6662254160363086e-05, "loss": 1.9917, "step": 708 }, { "epoch": 0.54, "learning_rate": 4.6657526475037825e-05, "loss": 1.87, "step": 709 }, { "epoch": 0.54, "learning_rate": 4.665279878971256e-05, "loss": 2.0099, "step": 710 }, { "epoch": 0.54, "learning_rate": 4.664807110438729e-05, "loss": 2.0871, "step": 711 }, { "epoch": 0.54, "learning_rate": 4.664334341906203e-05, "loss": 2.1319, "step": 712 }, { "epoch": 0.54, "learning_rate": 4.663861573373676e-05, "loss": 2.0312, "step": 713 }, { "epoch": 0.54, "learning_rate": 4.66338880484115e-05, "loss": 1.99, "step": 714 }, { "epoch": 0.54, "learning_rate": 4.662916036308623e-05, "loss": 1.9487, "step": 715 }, { "epoch": 0.54, "learning_rate": 4.662443267776097e-05, "loss": 1.9897, "step": 716 }, { "epoch": 0.54, "learning_rate": 4.66197049924357e-05, "loss": 2.0693, "step": 717 }, { "epoch": 0.54, "learning_rate": 4.661497730711044e-05, "loss": 2.0483, "step": 718 }, { "epoch": 0.54, "learning_rate": 4.661024962178517e-05, "loss": 1.8247, "step": 719 }, { "epoch": 0.54, "learning_rate": 4.660552193645991e-05, "loss": 1.9816, "step": 720 }, { "epoch": 0.55, "learning_rate": 4.660079425113464e-05, "loss": 2.1522, "step": 721 }, { "epoch": 0.55, "learning_rate": 4.659606656580938e-05, "loss": 2.1427, "step": 722 }, { "epoch": 0.55, "learning_rate": 4.6591338880484114e-05, "loss": 1.9673, "step": 723 }, { "epoch": 0.55, "learning_rate": 4.658661119515885e-05, "loss": 2.126, "step": 724 }, { "epoch": 0.55, "learning_rate": 4.658188350983359e-05, "loss": 1.9962, "step": 725 }, { "epoch": 0.55, "learning_rate": 4.6577155824508324e-05, "loss": 1.9532, "step": 726 }, { "epoch": 0.55, "learning_rate": 4.657242813918306e-05, "loss": 2.0535, "step": 727 }, { "epoch": 0.55, "learning_rate": 4.6567700453857795e-05, "loss": 2.149, "step": 728 }, { "epoch": 0.55, "learning_rate": 4.6562972768532534e-05, "loss": 2.1494, "step": 729 }, { "epoch": 0.55, "learning_rate": 4.6558245083207266e-05, "loss": 2.0947, "step": 730 }, { "epoch": 0.55, "learning_rate": 4.6553517397882005e-05, "loss": 2.1933, "step": 731 }, { "epoch": 0.55, "learning_rate": 4.654878971255674e-05, "loss": 1.9575, "step": 732 }, { "epoch": 0.55, "learning_rate": 4.654406202723147e-05, "loss": 1.9001, "step": 733 }, { "epoch": 0.56, "learning_rate": 4.653933434190621e-05, "loss": 2.112, "step": 734 }, { "epoch": 0.56, "learning_rate": 4.653460665658094e-05, "loss": 1.8703, "step": 735 }, { "epoch": 0.56, "learning_rate": 4.652987897125567e-05, "loss": 1.9642, "step": 736 }, { "epoch": 0.56, "learning_rate": 4.652515128593041e-05, "loss": 2.1135, "step": 737 }, { "epoch": 0.56, "learning_rate": 4.652042360060514e-05, "loss": 2.0098, "step": 738 }, { "epoch": 0.56, "learning_rate": 4.651569591527988e-05, "loss": 1.9651, "step": 739 }, { "epoch": 0.56, "learning_rate": 4.6510968229954613e-05, "loss": 2.0317, "step": 740 }, { "epoch": 0.56, "learning_rate": 4.650624054462935e-05, "loss": 1.986, "step": 741 }, { "epoch": 0.56, "learning_rate": 4.6501512859304084e-05, "loss": 2.1488, "step": 742 }, { "epoch": 0.56, "learning_rate": 4.649678517397882e-05, "loss": 1.9573, "step": 743 }, { "epoch": 0.56, "learning_rate": 4.6492057488653555e-05, "loss": 2.0016, "step": 744 }, { "epoch": 0.56, "learning_rate": 4.6487329803328294e-05, "loss": 1.9205, "step": 745 }, { "epoch": 0.56, "learning_rate": 4.6482602118003026e-05, "loss": 2.0317, "step": 746 }, { "epoch": 0.56, "learning_rate": 4.6477874432677765e-05, "loss": 2.1237, "step": 747 }, { "epoch": 0.57, "learning_rate": 4.64731467473525e-05, "loss": 2.0307, "step": 748 }, { "epoch": 0.57, "learning_rate": 4.6468419062027236e-05, "loss": 2.0611, "step": 749 }, { "epoch": 0.57, "learning_rate": 4.646369137670197e-05, "loss": 1.9167, "step": 750 }, { "epoch": 0.57, "learning_rate": 4.645896369137671e-05, "loss": 1.9563, "step": 751 }, { "epoch": 0.57, "learning_rate": 4.645423600605144e-05, "loss": 2.008, "step": 752 }, { "epoch": 0.57, "learning_rate": 4.644950832072618e-05, "loss": 2.1151, "step": 753 }, { "epoch": 0.57, "learning_rate": 4.644478063540091e-05, "loss": 1.9419, "step": 754 }, { "epoch": 0.57, "learning_rate": 4.644005295007565e-05, "loss": 2.0756, "step": 755 }, { "epoch": 0.57, "learning_rate": 4.643532526475038e-05, "loss": 2.1911, "step": 756 }, { "epoch": 0.57, "learning_rate": 4.643059757942512e-05, "loss": 1.9557, "step": 757 }, { "epoch": 0.57, "learning_rate": 4.642586989409985e-05, "loss": 2.0832, "step": 758 }, { "epoch": 0.57, "learning_rate": 4.642114220877459e-05, "loss": 1.9087, "step": 759 }, { "epoch": 0.57, "learning_rate": 4.641641452344932e-05, "loss": 2.2157, "step": 760 }, { "epoch": 0.58, "learning_rate": 4.6411686838124054e-05, "loss": 2.0399, "step": 761 }, { "epoch": 0.58, "learning_rate": 4.640695915279879e-05, "loss": 2.1366, "step": 762 }, { "epoch": 0.58, "learning_rate": 4.6402231467473525e-05, "loss": 1.9825, "step": 763 }, { "epoch": 0.58, "learning_rate": 4.639750378214826e-05, "loss": 2.0659, "step": 764 }, { "epoch": 0.58, "learning_rate": 4.6392776096822996e-05, "loss": 1.8721, "step": 765 }, { "epoch": 0.58, "learning_rate": 4.638804841149773e-05, "loss": 1.8921, "step": 766 }, { "epoch": 0.58, "learning_rate": 4.638332072617247e-05, "loss": 1.9436, "step": 767 }, { "epoch": 0.58, "learning_rate": 4.63785930408472e-05, "loss": 1.9984, "step": 768 }, { "epoch": 0.58, "learning_rate": 4.637386535552194e-05, "loss": 2.1202, "step": 769 }, { "epoch": 0.58, "learning_rate": 4.636913767019667e-05, "loss": 2.0339, "step": 770 }, { "epoch": 0.58, "learning_rate": 4.636440998487141e-05, "loss": 1.9306, "step": 771 }, { "epoch": 0.58, "learning_rate": 4.635968229954614e-05, "loss": 2.0467, "step": 772 }, { "epoch": 0.58, "learning_rate": 4.635495461422088e-05, "loss": 1.9731, "step": 773 }, { "epoch": 0.59, "learning_rate": 4.635022692889561e-05, "loss": 2.0685, "step": 774 }, { "epoch": 0.59, "learning_rate": 4.634549924357035e-05, "loss": 2.1566, "step": 775 }, { "epoch": 0.59, "learning_rate": 4.634077155824508e-05, "loss": 1.9014, "step": 776 }, { "epoch": 0.59, "learning_rate": 4.633604387291982e-05, "loss": 1.977, "step": 777 }, { "epoch": 0.59, "learning_rate": 4.6331316187594554e-05, "loss": 2.0002, "step": 778 }, { "epoch": 0.59, "learning_rate": 4.632658850226929e-05, "loss": 1.9837, "step": 779 }, { "epoch": 0.59, "learning_rate": 4.6321860816944025e-05, "loss": 2.0002, "step": 780 }, { "epoch": 0.59, "learning_rate": 4.6317133131618763e-05, "loss": 1.8637, "step": 781 }, { "epoch": 0.59, "learning_rate": 4.6312405446293495e-05, "loss": 2.0607, "step": 782 }, { "epoch": 0.59, "learning_rate": 4.6307677760968234e-05, "loss": 2.0389, "step": 783 }, { "epoch": 0.59, "learning_rate": 4.630295007564297e-05, "loss": 1.9727, "step": 784 }, { "epoch": 0.59, "learning_rate": 4.6298222390317705e-05, "loss": 2.0044, "step": 785 }, { "epoch": 0.59, "learning_rate": 4.629349470499244e-05, "loss": 2.0802, "step": 786 }, { "epoch": 0.6, "learning_rate": 4.6288767019667176e-05, "loss": 1.9076, "step": 787 }, { "epoch": 0.6, "learning_rate": 4.628403933434191e-05, "loss": 2.0007, "step": 788 }, { "epoch": 0.6, "learning_rate": 4.627931164901664e-05, "loss": 1.953, "step": 789 }, { "epoch": 0.6, "learning_rate": 4.627458396369138e-05, "loss": 1.9353, "step": 790 }, { "epoch": 0.6, "learning_rate": 4.626985627836611e-05, "loss": 2.0078, "step": 791 }, { "epoch": 0.6, "learning_rate": 4.626512859304085e-05, "loss": 2.0739, "step": 792 }, { "epoch": 0.6, "learning_rate": 4.626040090771558e-05, "loss": 2.0185, "step": 793 }, { "epoch": 0.6, "learning_rate": 4.625567322239032e-05, "loss": 1.99, "step": 794 }, { "epoch": 0.6, "learning_rate": 4.625094553706505e-05, "loss": 2.1165, "step": 795 }, { "epoch": 0.6, "learning_rate": 4.624621785173979e-05, "loss": 1.9798, "step": 796 }, { "epoch": 0.6, "learning_rate": 4.6241490166414524e-05, "loss": 2.0058, "step": 797 }, { "epoch": 0.6, "learning_rate": 4.623676248108926e-05, "loss": 2.213, "step": 798 }, { "epoch": 0.6, "learning_rate": 4.6232034795763995e-05, "loss": 1.9291, "step": 799 }, { "epoch": 0.61, "learning_rate": 4.6227307110438734e-05, "loss": 2.013, "step": 800 }, { "epoch": 0.61, "learning_rate": 4.6222579425113466e-05, "loss": 1.9955, "step": 801 }, { "epoch": 0.61, "learning_rate": 4.6217851739788204e-05, "loss": 2.1379, "step": 802 }, { "epoch": 0.61, "learning_rate": 4.6213124054462937e-05, "loss": 2.1353, "step": 803 }, { "epoch": 0.61, "learning_rate": 4.6208396369137675e-05, "loss": 1.9637, "step": 804 }, { "epoch": 0.61, "learning_rate": 4.620366868381241e-05, "loss": 1.8686, "step": 805 }, { "epoch": 0.61, "learning_rate": 4.6198940998487146e-05, "loss": 2.0922, "step": 806 }, { "epoch": 0.61, "learning_rate": 4.619421331316188e-05, "loss": 1.9728, "step": 807 }, { "epoch": 0.61, "learning_rate": 4.618948562783662e-05, "loss": 1.8828, "step": 808 }, { "epoch": 0.61, "learning_rate": 4.618475794251135e-05, "loss": 1.9962, "step": 809 }, { "epoch": 0.61, "learning_rate": 4.618003025718609e-05, "loss": 2.0978, "step": 810 }, { "epoch": 0.61, "learning_rate": 4.617530257186082e-05, "loss": 2.0119, "step": 811 }, { "epoch": 0.61, "learning_rate": 4.617057488653556e-05, "loss": 1.9771, "step": 812 }, { "epoch": 0.61, "learning_rate": 4.616584720121029e-05, "loss": 2.0655, "step": 813 }, { "epoch": 0.62, "learning_rate": 4.616111951588502e-05, "loss": 2.072, "step": 814 }, { "epoch": 0.62, "learning_rate": 4.615639183055976e-05, "loss": 2.0743, "step": 815 }, { "epoch": 0.62, "learning_rate": 4.6151664145234494e-05, "loss": 2.1449, "step": 816 }, { "epoch": 0.62, "learning_rate": 4.6146936459909226e-05, "loss": 2.1067, "step": 817 }, { "epoch": 0.62, "learning_rate": 4.6142208774583965e-05, "loss": 2.0394, "step": 818 }, { "epoch": 0.62, "learning_rate": 4.61374810892587e-05, "loss": 1.9803, "step": 819 }, { "epoch": 0.62, "learning_rate": 4.6132753403933436e-05, "loss": 2.1195, "step": 820 }, { "epoch": 0.62, "learning_rate": 4.612802571860817e-05, "loss": 2.1268, "step": 821 }, { "epoch": 0.62, "learning_rate": 4.612329803328291e-05, "loss": 2.0395, "step": 822 }, { "epoch": 0.62, "learning_rate": 4.611857034795764e-05, "loss": 2.0212, "step": 823 }, { "epoch": 0.62, "learning_rate": 4.611384266263238e-05, "loss": 2.0394, "step": 824 }, { "epoch": 0.62, "learning_rate": 4.610911497730711e-05, "loss": 2.007, "step": 825 }, { "epoch": 0.62, "learning_rate": 4.610438729198185e-05, "loss": 2.3236, "step": 826 }, { "epoch": 0.63, "learning_rate": 4.609965960665658e-05, "loss": 2.1389, "step": 827 }, { "epoch": 0.63, "learning_rate": 4.609493192133132e-05, "loss": 2.0798, "step": 828 }, { "epoch": 0.63, "learning_rate": 4.609020423600605e-05, "loss": 2.0415, "step": 829 }, { "epoch": 0.63, "learning_rate": 4.608547655068079e-05, "loss": 2.0602, "step": 830 }, { "epoch": 0.63, "learning_rate": 4.608074886535552e-05, "loss": 1.9149, "step": 831 }, { "epoch": 0.63, "learning_rate": 4.607602118003026e-05, "loss": 1.9145, "step": 832 }, { "epoch": 0.63, "learning_rate": 4.607129349470499e-05, "loss": 2.1503, "step": 833 }, { "epoch": 0.63, "learning_rate": 4.606656580937973e-05, "loss": 1.977, "step": 834 }, { "epoch": 0.63, "learning_rate": 4.6061838124054464e-05, "loss": 2.0126, "step": 835 }, { "epoch": 0.63, "learning_rate": 4.60571104387292e-05, "loss": 2.0527, "step": 836 }, { "epoch": 0.63, "learning_rate": 4.6052382753403935e-05, "loss": 1.8421, "step": 837 }, { "epoch": 0.63, "learning_rate": 4.6047655068078674e-05, "loss": 1.9348, "step": 838 }, { "epoch": 0.63, "learning_rate": 4.6042927382753406e-05, "loss": 2.0096, "step": 839 }, { "epoch": 0.64, "learning_rate": 4.6038199697428145e-05, "loss": 2.0469, "step": 840 }, { "epoch": 0.64, "learning_rate": 4.603347201210288e-05, "loss": 1.9703, "step": 841 }, { "epoch": 0.64, "learning_rate": 4.602874432677761e-05, "loss": 1.7962, "step": 842 }, { "epoch": 0.64, "learning_rate": 4.602401664145235e-05, "loss": 2.1118, "step": 843 }, { "epoch": 0.64, "learning_rate": 4.601928895612708e-05, "loss": 2.0867, "step": 844 }, { "epoch": 0.64, "learning_rate": 4.601456127080182e-05, "loss": 2.1095, "step": 845 }, { "epoch": 0.64, "learning_rate": 4.600983358547655e-05, "loss": 1.8726, "step": 846 }, { "epoch": 0.64, "learning_rate": 4.600510590015129e-05, "loss": 2.0879, "step": 847 }, { "epoch": 0.64, "learning_rate": 4.600037821482602e-05, "loss": 2.0028, "step": 848 }, { "epoch": 0.64, "learning_rate": 4.599565052950076e-05, "loss": 1.9042, "step": 849 }, { "epoch": 0.64, "learning_rate": 4.599092284417549e-05, "loss": 2.0401, "step": 850 }, { "epoch": 0.64, "learning_rate": 4.598619515885023e-05, "loss": 2.0293, "step": 851 }, { "epoch": 0.64, "learning_rate": 4.598146747352496e-05, "loss": 1.9845, "step": 852 }, { "epoch": 0.65, "learning_rate": 4.59767397881997e-05, "loss": 1.9064, "step": 853 }, { "epoch": 0.65, "learning_rate": 4.5972012102874434e-05, "loss": 2.0855, "step": 854 }, { "epoch": 0.65, "learning_rate": 4.596728441754917e-05, "loss": 1.9974, "step": 855 }, { "epoch": 0.65, "learning_rate": 4.5962556732223905e-05, "loss": 1.9134, "step": 856 }, { "epoch": 0.65, "learning_rate": 4.5957829046898644e-05, "loss": 2.0399, "step": 857 }, { "epoch": 0.65, "learning_rate": 4.5953101361573376e-05, "loss": 1.9572, "step": 858 }, { "epoch": 0.65, "learning_rate": 4.5948373676248115e-05, "loss": 1.9667, "step": 859 }, { "epoch": 0.65, "learning_rate": 4.594364599092285e-05, "loss": 2.1055, "step": 860 }, { "epoch": 0.65, "learning_rate": 4.5938918305597586e-05, "loss": 2.0484, "step": 861 }, { "epoch": 0.65, "learning_rate": 4.593419062027232e-05, "loss": 2.0998, "step": 862 }, { "epoch": 0.65, "learning_rate": 4.5929462934947057e-05, "loss": 1.941, "step": 863 }, { "epoch": 0.65, "learning_rate": 4.592473524962179e-05, "loss": 1.8648, "step": 864 }, { "epoch": 0.65, "learning_rate": 4.592000756429653e-05, "loss": 1.9865, "step": 865 }, { "epoch": 0.65, "learning_rate": 4.591527987897126e-05, "loss": 2.0504, "step": 866 }, { "epoch": 0.66, "learning_rate": 4.591055219364599e-05, "loss": 2.0295, "step": 867 }, { "epoch": 0.66, "learning_rate": 4.590582450832073e-05, "loss": 1.9713, "step": 868 }, { "epoch": 0.66, "learning_rate": 4.590109682299546e-05, "loss": 2.04, "step": 869 }, { "epoch": 0.66, "learning_rate": 4.5896369137670195e-05, "loss": 2.0269, "step": 870 }, { "epoch": 0.66, "learning_rate": 4.589164145234493e-05, "loss": 2.0053, "step": 871 }, { "epoch": 0.66, "learning_rate": 4.5886913767019665e-05, "loss": 1.9942, "step": 872 }, { "epoch": 0.66, "learning_rate": 4.5882186081694404e-05, "loss": 2.0882, "step": 873 }, { "epoch": 0.66, "learning_rate": 4.5877458396369136e-05, "loss": 2.0002, "step": 874 }, { "epoch": 0.66, "learning_rate": 4.5872730711043875e-05, "loss": 2.085, "step": 875 }, { "epoch": 0.66, "learning_rate": 4.586800302571861e-05, "loss": 1.9907, "step": 876 }, { "epoch": 0.66, "learning_rate": 4.5863275340393346e-05, "loss": 1.9498, "step": 877 }, { "epoch": 0.66, "learning_rate": 4.585854765506808e-05, "loss": 1.9665, "step": 878 }, { "epoch": 0.66, "learning_rate": 4.585381996974282e-05, "loss": 1.972, "step": 879 }, { "epoch": 0.67, "learning_rate": 4.584909228441755e-05, "loss": 2.0074, "step": 880 }, { "epoch": 0.67, "learning_rate": 4.584436459909229e-05, "loss": 2.1419, "step": 881 }, { "epoch": 0.67, "learning_rate": 4.583963691376702e-05, "loss": 1.9611, "step": 882 }, { "epoch": 0.67, "learning_rate": 4.583490922844176e-05, "loss": 2.0223, "step": 883 }, { "epoch": 0.67, "learning_rate": 4.583018154311649e-05, "loss": 2.0377, "step": 884 }, { "epoch": 0.67, "learning_rate": 4.582545385779123e-05, "loss": 1.9962, "step": 885 }, { "epoch": 0.67, "learning_rate": 4.582072617246596e-05, "loss": 2.056, "step": 886 }, { "epoch": 0.67, "learning_rate": 4.58159984871407e-05, "loss": 2.0463, "step": 887 }, { "epoch": 0.67, "learning_rate": 4.581127080181543e-05, "loss": 2.0105, "step": 888 }, { "epoch": 0.67, "learning_rate": 4.580654311649017e-05, "loss": 2.0342, "step": 889 }, { "epoch": 0.67, "learning_rate": 4.5801815431164903e-05, "loss": 1.8918, "step": 890 }, { "epoch": 0.67, "learning_rate": 4.579708774583964e-05, "loss": 2.045, "step": 891 }, { "epoch": 0.67, "learning_rate": 4.5792360060514374e-05, "loss": 2.1161, "step": 892 }, { "epoch": 0.68, "learning_rate": 4.578763237518911e-05, "loss": 2.0761, "step": 893 }, { "epoch": 0.68, "learning_rate": 4.5782904689863845e-05, "loss": 2.1443, "step": 894 }, { "epoch": 0.68, "learning_rate": 4.577817700453858e-05, "loss": 2.1203, "step": 895 }, { "epoch": 0.68, "learning_rate": 4.5773449319213316e-05, "loss": 1.9773, "step": 896 }, { "epoch": 0.68, "learning_rate": 4.576872163388805e-05, "loss": 1.9783, "step": 897 }, { "epoch": 0.68, "learning_rate": 4.576399394856278e-05, "loss": 1.9718, "step": 898 }, { "epoch": 0.68, "learning_rate": 4.575926626323752e-05, "loss": 1.999, "step": 899 }, { "epoch": 0.68, "learning_rate": 4.575453857791225e-05, "loss": 2.143, "step": 900 }, { "epoch": 0.68, "learning_rate": 4.574981089258699e-05, "loss": 2.0757, "step": 901 }, { "epoch": 0.68, "learning_rate": 4.574508320726172e-05, "loss": 2.0495, "step": 902 }, { "epoch": 0.68, "learning_rate": 4.574035552193646e-05, "loss": 1.8876, "step": 903 }, { "epoch": 0.68, "learning_rate": 4.57356278366112e-05, "loss": 2.1422, "step": 904 }, { "epoch": 0.68, "learning_rate": 4.573090015128593e-05, "loss": 2.0562, "step": 905 }, { "epoch": 0.69, "learning_rate": 4.572617246596067e-05, "loss": 1.9954, "step": 906 }, { "epoch": 0.69, "learning_rate": 4.57214447806354e-05, "loss": 2.0707, "step": 907 }, { "epoch": 0.69, "learning_rate": 4.571671709531014e-05, "loss": 1.9196, "step": 908 }, { "epoch": 0.69, "learning_rate": 4.5711989409984874e-05, "loss": 1.9723, "step": 909 }, { "epoch": 0.69, "learning_rate": 4.570726172465961e-05, "loss": 1.9629, "step": 910 }, { "epoch": 0.69, "learning_rate": 4.5702534039334344e-05, "loss": 1.995, "step": 911 }, { "epoch": 0.69, "learning_rate": 4.569780635400908e-05, "loss": 2.0749, "step": 912 }, { "epoch": 0.69, "learning_rate": 4.5693078668683815e-05, "loss": 2.0445, "step": 913 }, { "epoch": 0.69, "learning_rate": 4.5688350983358554e-05, "loss": 2.0426, "step": 914 }, { "epoch": 0.69, "learning_rate": 4.5683623298033286e-05, "loss": 1.8825, "step": 915 }, { "epoch": 0.69, "learning_rate": 4.5678895612708025e-05, "loss": 2.0573, "step": 916 }, { "epoch": 0.69, "learning_rate": 4.567416792738276e-05, "loss": 2.145, "step": 917 }, { "epoch": 0.69, "learning_rate": 4.5669440242057496e-05, "loss": 1.8955, "step": 918 }, { "epoch": 0.7, "learning_rate": 4.566471255673223e-05, "loss": 2.0371, "step": 919 }, { "epoch": 0.7, "learning_rate": 4.565998487140696e-05, "loss": 1.9649, "step": 920 }, { "epoch": 0.7, "learning_rate": 4.56552571860817e-05, "loss": 2.1807, "step": 921 }, { "epoch": 0.7, "learning_rate": 4.565052950075643e-05, "loss": 2.2127, "step": 922 }, { "epoch": 0.7, "learning_rate": 4.564580181543116e-05, "loss": 2.0379, "step": 923 }, { "epoch": 0.7, "learning_rate": 4.56410741301059e-05, "loss": 1.9888, "step": 924 }, { "epoch": 0.7, "learning_rate": 4.5636346444780634e-05, "loss": 2.1463, "step": 925 }, { "epoch": 0.7, "learning_rate": 4.563161875945537e-05, "loss": 1.9993, "step": 926 }, { "epoch": 0.7, "learning_rate": 4.5626891074130105e-05, "loss": 2.1424, "step": 927 }, { "epoch": 0.7, "learning_rate": 4.5622163388804844e-05, "loss": 2.0453, "step": 928 }, { "epoch": 0.7, "learning_rate": 4.5617435703479576e-05, "loss": 2.0636, "step": 929 }, { "epoch": 0.7, "learning_rate": 4.5612708018154315e-05, "loss": 2.0491, "step": 930 }, { "epoch": 0.7, "learning_rate": 4.560798033282905e-05, "loss": 2.0014, "step": 931 }, { "epoch": 0.7, "learning_rate": 4.5603252647503785e-05, "loss": 2.1246, "step": 932 }, { "epoch": 0.71, "learning_rate": 4.559852496217852e-05, "loss": 1.9253, "step": 933 }, { "epoch": 0.71, "learning_rate": 4.5593797276853256e-05, "loss": 2.0153, "step": 934 }, { "epoch": 0.71, "learning_rate": 4.558906959152799e-05, "loss": 1.9964, "step": 935 }, { "epoch": 0.71, "learning_rate": 4.558434190620273e-05, "loss": 2.0344, "step": 936 }, { "epoch": 0.71, "learning_rate": 4.557961422087746e-05, "loss": 2.0204, "step": 937 }, { "epoch": 0.71, "learning_rate": 4.55748865355522e-05, "loss": 1.9611, "step": 938 }, { "epoch": 0.71, "learning_rate": 4.557015885022693e-05, "loss": 2.0793, "step": 939 }, { "epoch": 0.71, "learning_rate": 4.556543116490167e-05, "loss": 2.0296, "step": 940 }, { "epoch": 0.71, "learning_rate": 4.55607034795764e-05, "loss": 2.0578, "step": 941 }, { "epoch": 0.71, "learning_rate": 4.555597579425114e-05, "loss": 1.9277, "step": 942 }, { "epoch": 0.71, "learning_rate": 4.555124810892587e-05, "loss": 2.127, "step": 943 }, { "epoch": 0.71, "learning_rate": 4.554652042360061e-05, "loss": 1.9804, "step": 944 }, { "epoch": 0.71, "learning_rate": 4.554179273827534e-05, "loss": 2.001, "step": 945 }, { "epoch": 0.72, "learning_rate": 4.553706505295008e-05, "loss": 2.117, "step": 946 }, { "epoch": 0.72, "learning_rate": 4.5532337367624814e-05, "loss": 2.1525, "step": 947 }, { "epoch": 0.72, "learning_rate": 4.5527609682299546e-05, "loss": 1.8391, "step": 948 }, { "epoch": 0.72, "learning_rate": 4.5522881996974285e-05, "loss": 1.9776, "step": 949 }, { "epoch": 0.72, "learning_rate": 4.551815431164902e-05, "loss": 1.9167, "step": 950 }, { "epoch": 0.72, "learning_rate": 4.551342662632375e-05, "loss": 2.0254, "step": 951 }, { "epoch": 0.72, "learning_rate": 4.550869894099849e-05, "loss": 2.0757, "step": 952 }, { "epoch": 0.72, "learning_rate": 4.550397125567322e-05, "loss": 1.9912, "step": 953 }, { "epoch": 0.72, "learning_rate": 4.549924357034796e-05, "loss": 1.9983, "step": 954 }, { "epoch": 0.72, "learning_rate": 4.549451588502269e-05, "loss": 1.9235, "step": 955 }, { "epoch": 0.72, "learning_rate": 4.548978819969743e-05, "loss": 1.8495, "step": 956 }, { "epoch": 0.72, "learning_rate": 4.548506051437216e-05, "loss": 2.2351, "step": 957 }, { "epoch": 0.72, "learning_rate": 4.54803328290469e-05, "loss": 1.9736, "step": 958 }, { "epoch": 0.73, "learning_rate": 4.547560514372163e-05, "loss": 1.8562, "step": 959 }, { "epoch": 0.73, "learning_rate": 4.547087745839637e-05, "loss": 1.8602, "step": 960 }, { "epoch": 0.73, "learning_rate": 4.54661497730711e-05, "loss": 2.0561, "step": 961 }, { "epoch": 0.73, "learning_rate": 4.546142208774584e-05, "loss": 2.0141, "step": 962 }, { "epoch": 0.73, "learning_rate": 4.5456694402420574e-05, "loss": 2.0357, "step": 963 }, { "epoch": 0.73, "learning_rate": 4.545196671709531e-05, "loss": 1.9976, "step": 964 }, { "epoch": 0.73, "learning_rate": 4.544723903177005e-05, "loss": 1.9946, "step": 965 }, { "epoch": 0.73, "learning_rate": 4.5442511346444784e-05, "loss": 1.9592, "step": 966 }, { "epoch": 0.73, "learning_rate": 4.543778366111952e-05, "loss": 1.9293, "step": 967 }, { "epoch": 0.73, "learning_rate": 4.5433055975794255e-05, "loss": 2.101, "step": 968 }, { "epoch": 0.73, "learning_rate": 4.5428328290468994e-05, "loss": 1.9383, "step": 969 }, { "epoch": 0.73, "learning_rate": 4.5423600605143726e-05, "loss": 2.0267, "step": 970 }, { "epoch": 0.73, "learning_rate": 4.5418872919818465e-05, "loss": 2.1624, "step": 971 }, { "epoch": 0.74, "learning_rate": 4.54141452344932e-05, "loss": 2.1052, "step": 972 }, { "epoch": 0.74, "learning_rate": 4.540941754916793e-05, "loss": 1.9751, "step": 973 }, { "epoch": 0.74, "learning_rate": 4.540468986384267e-05, "loss": 2.0394, "step": 974 }, { "epoch": 0.74, "learning_rate": 4.53999621785174e-05, "loss": 1.9521, "step": 975 }, { "epoch": 0.74, "learning_rate": 4.539523449319213e-05, "loss": 1.8484, "step": 976 }, { "epoch": 0.74, "learning_rate": 4.539050680786687e-05, "loss": 2.0445, "step": 977 }, { "epoch": 0.74, "learning_rate": 4.53857791225416e-05, "loss": 2.0686, "step": 978 }, { "epoch": 0.74, "learning_rate": 4.538105143721634e-05, "loss": 1.9952, "step": 979 }, { "epoch": 0.74, "learning_rate": 4.5376323751891073e-05, "loss": 1.973, "step": 980 }, { "epoch": 0.74, "learning_rate": 4.537159606656581e-05, "loss": 1.8808, "step": 981 }, { "epoch": 0.74, "learning_rate": 4.5366868381240544e-05, "loss": 1.9153, "step": 982 }, { "epoch": 0.74, "learning_rate": 4.536214069591528e-05, "loss": 2.0304, "step": 983 }, { "epoch": 0.74, "learning_rate": 4.5357413010590015e-05, "loss": 1.9612, "step": 984 }, { "epoch": 0.74, "learning_rate": 4.5352685325264754e-05, "loss": 1.9308, "step": 985 }, { "epoch": 0.75, "learning_rate": 4.5347957639939486e-05, "loss": 2.1722, "step": 986 }, { "epoch": 0.75, "learning_rate": 4.5343229954614225e-05, "loss": 1.9808, "step": 987 }, { "epoch": 0.75, "learning_rate": 4.533850226928896e-05, "loss": 1.978, "step": 988 }, { "epoch": 0.75, "learning_rate": 4.5333774583963696e-05, "loss": 2.0786, "step": 989 }, { "epoch": 0.75, "learning_rate": 4.532904689863843e-05, "loss": 1.9463, "step": 990 }, { "epoch": 0.75, "learning_rate": 4.532431921331317e-05, "loss": 1.8668, "step": 991 }, { "epoch": 0.75, "learning_rate": 4.53195915279879e-05, "loss": 1.9599, "step": 992 }, { "epoch": 0.75, "learning_rate": 4.531486384266264e-05, "loss": 1.9149, "step": 993 }, { "epoch": 0.75, "learning_rate": 4.531013615733737e-05, "loss": 2.0508, "step": 994 }, { "epoch": 0.75, "learning_rate": 4.530540847201211e-05, "loss": 2.0407, "step": 995 }, { "epoch": 0.75, "learning_rate": 4.530068078668684e-05, "loss": 1.9946, "step": 996 }, { "epoch": 0.75, "learning_rate": 4.529595310136158e-05, "loss": 2.0989, "step": 997 }, { "epoch": 0.75, "learning_rate": 4.529122541603631e-05, "loss": 2.0359, "step": 998 }, { "epoch": 0.76, "learning_rate": 4.528649773071105e-05, "loss": 2.1105, "step": 999 }, { "epoch": 0.76, "learning_rate": 4.528177004538578e-05, "loss": 2.059, "step": 1000 }, { "epoch": 0.76, "learning_rate": 4.5277042360060514e-05, "loss": 2.1039, "step": 1001 }, { "epoch": 0.76, "learning_rate": 4.527231467473525e-05, "loss": 2.0329, "step": 1002 }, { "epoch": 0.76, "learning_rate": 4.5267586989409985e-05, "loss": 2.0521, "step": 1003 }, { "epoch": 0.76, "learning_rate": 4.526285930408472e-05, "loss": 2.0762, "step": 1004 }, { "epoch": 0.76, "learning_rate": 4.5258131618759456e-05, "loss": 1.9445, "step": 1005 }, { "epoch": 0.76, "learning_rate": 4.525340393343419e-05, "loss": 1.9423, "step": 1006 }, { "epoch": 0.76, "learning_rate": 4.524867624810893e-05, "loss": 1.9304, "step": 1007 }, { "epoch": 0.76, "learning_rate": 4.524394856278366e-05, "loss": 2.1053, "step": 1008 }, { "epoch": 0.76, "learning_rate": 4.52392208774584e-05, "loss": 2.0303, "step": 1009 }, { "epoch": 0.76, "learning_rate": 4.523449319213313e-05, "loss": 1.8966, "step": 1010 }, { "epoch": 0.76, "learning_rate": 4.522976550680787e-05, "loss": 2.0294, "step": 1011 }, { "epoch": 0.77, "learning_rate": 4.52250378214826e-05, "loss": 2.042, "step": 1012 }, { "epoch": 0.77, "learning_rate": 4.522031013615734e-05, "loss": 2.049, "step": 1013 }, { "epoch": 0.77, "learning_rate": 4.521558245083207e-05, "loss": 1.9808, "step": 1014 }, { "epoch": 0.77, "learning_rate": 4.521085476550681e-05, "loss": 1.934, "step": 1015 }, { "epoch": 0.77, "learning_rate": 4.520612708018154e-05, "loss": 1.9934, "step": 1016 }, { "epoch": 0.77, "learning_rate": 4.520139939485628e-05, "loss": 2.0257, "step": 1017 }, { "epoch": 0.77, "learning_rate": 4.5196671709531014e-05, "loss": 2.0398, "step": 1018 }, { "epoch": 0.77, "learning_rate": 4.519194402420575e-05, "loss": 2.0448, "step": 1019 }, { "epoch": 0.77, "learning_rate": 4.5187216338880485e-05, "loss": 2.0634, "step": 1020 }, { "epoch": 0.77, "learning_rate": 4.518248865355522e-05, "loss": 1.9742, "step": 1021 }, { "epoch": 0.77, "learning_rate": 4.5177760968229955e-05, "loss": 2.0328, "step": 1022 }, { "epoch": 0.77, "learning_rate": 4.5173033282904694e-05, "loss": 1.906, "step": 1023 }, { "epoch": 0.77, "learning_rate": 4.516830559757943e-05, "loss": 2.0367, "step": 1024 }, { "epoch": 0.78, "learning_rate": 4.5163577912254165e-05, "loss": 2.0815, "step": 1025 }, { "epoch": 0.78, "learning_rate": 4.51588502269289e-05, "loss": 1.9919, "step": 1026 }, { "epoch": 0.78, "learning_rate": 4.5154122541603636e-05, "loss": 1.9553, "step": 1027 }, { "epoch": 0.78, "learning_rate": 4.514939485627837e-05, "loss": 1.9898, "step": 1028 }, { "epoch": 0.78, "learning_rate": 4.51446671709531e-05, "loss": 1.907, "step": 1029 }, { "epoch": 0.78, "learning_rate": 4.513993948562784e-05, "loss": 1.9774, "step": 1030 }, { "epoch": 0.78, "learning_rate": 4.513521180030257e-05, "loss": 1.9224, "step": 1031 }, { "epoch": 0.78, "learning_rate": 4.513048411497731e-05, "loss": 2.0185, "step": 1032 }, { "epoch": 0.78, "learning_rate": 4.512575642965204e-05, "loss": 2.099, "step": 1033 }, { "epoch": 0.78, "learning_rate": 4.512102874432678e-05, "loss": 1.9975, "step": 1034 }, { "epoch": 0.78, "learning_rate": 4.511630105900151e-05, "loss": 2.0393, "step": 1035 }, { "epoch": 0.78, "learning_rate": 4.511157337367625e-05, "loss": 2.0027, "step": 1036 }, { "epoch": 0.78, "learning_rate": 4.5106845688350984e-05, "loss": 1.9128, "step": 1037 }, { "epoch": 0.79, "learning_rate": 4.510211800302572e-05, "loss": 1.947, "step": 1038 }, { "epoch": 0.79, "learning_rate": 4.5097390317700455e-05, "loss": 1.9764, "step": 1039 }, { "epoch": 0.79, "learning_rate": 4.5092662632375193e-05, "loss": 1.934, "step": 1040 }, { "epoch": 0.79, "learning_rate": 4.5087934947049926e-05, "loss": 1.9708, "step": 1041 }, { "epoch": 0.79, "learning_rate": 4.5083207261724664e-05, "loss": 2.0312, "step": 1042 }, { "epoch": 0.79, "learning_rate": 4.5078479576399396e-05, "loss": 1.8832, "step": 1043 }, { "epoch": 0.79, "learning_rate": 4.5073751891074135e-05, "loss": 2.0414, "step": 1044 }, { "epoch": 0.79, "learning_rate": 4.506902420574887e-05, "loss": 1.9327, "step": 1045 }, { "epoch": 0.79, "learning_rate": 4.5064296520423606e-05, "loss": 2.1718, "step": 1046 }, { "epoch": 0.79, "learning_rate": 4.505956883509834e-05, "loss": 2.0347, "step": 1047 }, { "epoch": 0.79, "learning_rate": 4.505484114977308e-05, "loss": 2.1503, "step": 1048 }, { "epoch": 0.79, "learning_rate": 4.505011346444781e-05, "loss": 1.9763, "step": 1049 }, { "epoch": 0.79, "learning_rate": 4.504538577912255e-05, "loss": 1.9731, "step": 1050 }, { "epoch": 0.79, "learning_rate": 4.504065809379728e-05, "loss": 2.0236, "step": 1051 }, { "epoch": 0.8, "learning_rate": 4.503593040847202e-05, "loss": 1.9996, "step": 1052 }, { "epoch": 0.8, "learning_rate": 4.503120272314675e-05, "loss": 1.8836, "step": 1053 }, { "epoch": 0.8, "learning_rate": 4.502647503782148e-05, "loss": 2.0622, "step": 1054 }, { "epoch": 0.8, "learning_rate": 4.502174735249622e-05, "loss": 1.8668, "step": 1055 }, { "epoch": 0.8, "learning_rate": 4.5017019667170954e-05, "loss": 2.0067, "step": 1056 }, { "epoch": 0.8, "learning_rate": 4.5012291981845686e-05, "loss": 2.0207, "step": 1057 }, { "epoch": 0.8, "learning_rate": 4.5007564296520425e-05, "loss": 1.9659, "step": 1058 }, { "epoch": 0.8, "learning_rate": 4.500283661119516e-05, "loss": 1.8786, "step": 1059 }, { "epoch": 0.8, "learning_rate": 4.4998108925869896e-05, "loss": 1.9685, "step": 1060 }, { "epoch": 0.8, "learning_rate": 4.499338124054463e-05, "loss": 1.9907, "step": 1061 }, { "epoch": 0.8, "learning_rate": 4.4988653555219367e-05, "loss": 1.9875, "step": 1062 }, { "epoch": 0.8, "learning_rate": 4.49839258698941e-05, "loss": 2.1072, "step": 1063 }, { "epoch": 0.8, "learning_rate": 4.497919818456884e-05, "loss": 1.9653, "step": 1064 }, { "epoch": 0.81, "learning_rate": 4.497447049924357e-05, "loss": 2.0709, "step": 1065 }, { "epoch": 0.81, "learning_rate": 4.496974281391831e-05, "loss": 2.1283, "step": 1066 }, { "epoch": 0.81, "learning_rate": 4.496501512859304e-05, "loss": 1.9562, "step": 1067 }, { "epoch": 0.81, "learning_rate": 4.496028744326778e-05, "loss": 1.8642, "step": 1068 }, { "epoch": 0.81, "learning_rate": 4.495555975794251e-05, "loss": 2.008, "step": 1069 }, { "epoch": 0.81, "learning_rate": 4.495083207261725e-05, "loss": 2.0039, "step": 1070 }, { "epoch": 0.81, "learning_rate": 4.494610438729198e-05, "loss": 1.998, "step": 1071 }, { "epoch": 0.81, "learning_rate": 4.494137670196672e-05, "loss": 1.9448, "step": 1072 }, { "epoch": 0.81, "learning_rate": 4.493664901664145e-05, "loss": 2.0091, "step": 1073 }, { "epoch": 0.81, "learning_rate": 4.493192133131619e-05, "loss": 1.9875, "step": 1074 }, { "epoch": 0.81, "learning_rate": 4.4927193645990924e-05, "loss": 1.8782, "step": 1075 }, { "epoch": 0.81, "learning_rate": 4.492246596066566e-05, "loss": 1.9391, "step": 1076 }, { "epoch": 0.81, "learning_rate": 4.4917738275340395e-05, "loss": 1.9873, "step": 1077 }, { "epoch": 0.82, "learning_rate": 4.4913010590015134e-05, "loss": 2.0604, "step": 1078 }, { "epoch": 0.82, "learning_rate": 4.4908282904689866e-05, "loss": 1.9803, "step": 1079 }, { "epoch": 0.82, "learning_rate": 4.4903555219364605e-05, "loss": 2.0415, "step": 1080 }, { "epoch": 0.82, "learning_rate": 4.489882753403934e-05, "loss": 2.0557, "step": 1081 }, { "epoch": 0.82, "learning_rate": 4.489409984871407e-05, "loss": 2.1134, "step": 1082 }, { "epoch": 0.82, "learning_rate": 4.488937216338881e-05, "loss": 1.9881, "step": 1083 }, { "epoch": 0.82, "learning_rate": 4.488464447806354e-05, "loss": 1.9538, "step": 1084 }, { "epoch": 0.82, "learning_rate": 4.487991679273828e-05, "loss": 1.8229, "step": 1085 }, { "epoch": 0.82, "learning_rate": 4.487518910741301e-05, "loss": 2.0652, "step": 1086 }, { "epoch": 0.82, "learning_rate": 4.487046142208775e-05, "loss": 2.014, "step": 1087 }, { "epoch": 0.82, "learning_rate": 4.486573373676248e-05, "loss": 2.0786, "step": 1088 }, { "epoch": 0.82, "learning_rate": 4.486100605143722e-05, "loss": 2.0479, "step": 1089 }, { "epoch": 0.82, "learning_rate": 4.485627836611195e-05, "loss": 1.8588, "step": 1090 }, { "epoch": 0.83, "learning_rate": 4.485155068078669e-05, "loss": 2.0716, "step": 1091 }, { "epoch": 0.83, "learning_rate": 4.484682299546142e-05, "loss": 2.0001, "step": 1092 }, { "epoch": 0.83, "learning_rate": 4.484209531013616e-05, "loss": 2.0745, "step": 1093 }, { "epoch": 0.83, "learning_rate": 4.4837367624810894e-05, "loss": 1.9944, "step": 1094 }, { "epoch": 0.83, "learning_rate": 4.483263993948563e-05, "loss": 1.9736, "step": 1095 }, { "epoch": 0.83, "learning_rate": 4.4827912254160365e-05, "loss": 2.0684, "step": 1096 }, { "epoch": 0.83, "learning_rate": 4.4823184568835104e-05, "loss": 2.0382, "step": 1097 }, { "epoch": 0.83, "learning_rate": 4.4818456883509836e-05, "loss": 1.9479, "step": 1098 }, { "epoch": 0.83, "learning_rate": 4.4813729198184575e-05, "loss": 2.1523, "step": 1099 }, { "epoch": 0.83, "learning_rate": 4.480900151285931e-05, "loss": 1.9328, "step": 1100 }, { "epoch": 0.83, "learning_rate": 4.4804273827534046e-05, "loss": 2.0274, "step": 1101 }, { "epoch": 0.83, "learning_rate": 4.479954614220878e-05, "loss": 2.0468, "step": 1102 }, { "epoch": 0.83, "learning_rate": 4.4794818456883517e-05, "loss": 2.0977, "step": 1103 }, { "epoch": 0.83, "learning_rate": 4.479009077155825e-05, "loss": 1.9469, "step": 1104 }, { "epoch": 0.84, "learning_rate": 4.478536308623299e-05, "loss": 1.9386, "step": 1105 }, { "epoch": 0.84, "learning_rate": 4.478063540090772e-05, "loss": 1.9917, "step": 1106 }, { "epoch": 0.84, "learning_rate": 4.477590771558245e-05, "loss": 2.1049, "step": 1107 }, { "epoch": 0.84, "learning_rate": 4.477118003025719e-05, "loss": 1.9923, "step": 1108 }, { "epoch": 0.84, "learning_rate": 4.476645234493192e-05, "loss": 2.0673, "step": 1109 }, { "epoch": 0.84, "learning_rate": 4.4761724659606654e-05, "loss": 1.9602, "step": 1110 }, { "epoch": 0.84, "learning_rate": 4.475699697428139e-05, "loss": 1.9975, "step": 1111 }, { "epoch": 0.84, "learning_rate": 4.4752269288956125e-05, "loss": 1.9682, "step": 1112 }, { "epoch": 0.84, "learning_rate": 4.4747541603630864e-05, "loss": 2.0077, "step": 1113 }, { "epoch": 0.84, "learning_rate": 4.4742813918305596e-05, "loss": 2.1605, "step": 1114 }, { "epoch": 0.84, "learning_rate": 4.4738086232980335e-05, "loss": 2.0586, "step": 1115 }, { "epoch": 0.84, "learning_rate": 4.473335854765507e-05, "loss": 2.1005, "step": 1116 }, { "epoch": 0.84, "learning_rate": 4.4728630862329806e-05, "loss": 2.2024, "step": 1117 }, { "epoch": 0.85, "learning_rate": 4.472390317700454e-05, "loss": 2.0624, "step": 1118 }, { "epoch": 0.85, "learning_rate": 4.471917549167928e-05, "loss": 1.9187, "step": 1119 }, { "epoch": 0.85, "learning_rate": 4.471444780635401e-05, "loss": 2.1008, "step": 1120 }, { "epoch": 0.85, "learning_rate": 4.470972012102875e-05, "loss": 2.2395, "step": 1121 }, { "epoch": 0.85, "learning_rate": 4.470499243570348e-05, "loss": 2.0519, "step": 1122 }, { "epoch": 0.85, "learning_rate": 4.470026475037822e-05, "loss": 1.9914, "step": 1123 }, { "epoch": 0.85, "learning_rate": 4.469553706505295e-05, "loss": 2.0299, "step": 1124 }, { "epoch": 0.85, "learning_rate": 4.469080937972769e-05, "loss": 1.921, "step": 1125 }, { "epoch": 0.85, "learning_rate": 4.468608169440242e-05, "loss": 1.9605, "step": 1126 }, { "epoch": 0.85, "learning_rate": 4.468135400907716e-05, "loss": 1.9685, "step": 1127 }, { "epoch": 0.85, "learning_rate": 4.467662632375189e-05, "loss": 2.0456, "step": 1128 }, { "epoch": 0.85, "learning_rate": 4.467189863842663e-05, "loss": 1.8312, "step": 1129 }, { "epoch": 0.85, "learning_rate": 4.4667170953101363e-05, "loss": 2.1349, "step": 1130 }, { "epoch": 0.86, "learning_rate": 4.46624432677761e-05, "loss": 2.1713, "step": 1131 }, { "epoch": 0.86, "learning_rate": 4.4657715582450834e-05, "loss": 2.0393, "step": 1132 }, { "epoch": 0.86, "learning_rate": 4.465298789712557e-05, "loss": 2.1671, "step": 1133 }, { "epoch": 0.86, "learning_rate": 4.4648260211800305e-05, "loss": 1.9418, "step": 1134 }, { "epoch": 0.86, "learning_rate": 4.464353252647504e-05, "loss": 1.8798, "step": 1135 }, { "epoch": 0.86, "learning_rate": 4.4638804841149776e-05, "loss": 2.025, "step": 1136 }, { "epoch": 0.86, "learning_rate": 4.463407715582451e-05, "loss": 1.9963, "step": 1137 }, { "epoch": 0.86, "learning_rate": 4.462934947049924e-05, "loss": 1.9496, "step": 1138 }, { "epoch": 0.86, "learning_rate": 4.462462178517398e-05, "loss": 1.8681, "step": 1139 }, { "epoch": 0.86, "learning_rate": 4.461989409984871e-05, "loss": 1.9093, "step": 1140 }, { "epoch": 0.86, "learning_rate": 4.461516641452345e-05, "loss": 1.9739, "step": 1141 }, { "epoch": 0.86, "learning_rate": 4.461043872919818e-05, "loss": 1.9765, "step": 1142 }, { "epoch": 0.86, "learning_rate": 4.460571104387292e-05, "loss": 2.0152, "step": 1143 }, { "epoch": 0.87, "learning_rate": 4.460098335854766e-05, "loss": 1.9865, "step": 1144 }, { "epoch": 0.87, "learning_rate": 4.459625567322239e-05, "loss": 1.8557, "step": 1145 }, { "epoch": 0.87, "learning_rate": 4.459152798789713e-05, "loss": 2.0907, "step": 1146 }, { "epoch": 0.87, "learning_rate": 4.458680030257186e-05, "loss": 1.9935, "step": 1147 }, { "epoch": 0.87, "learning_rate": 4.45820726172466e-05, "loss": 1.9475, "step": 1148 }, { "epoch": 0.87, "learning_rate": 4.4577344931921334e-05, "loss": 2.0259, "step": 1149 }, { "epoch": 0.87, "learning_rate": 4.457261724659607e-05, "loss": 1.8934, "step": 1150 }, { "epoch": 0.87, "learning_rate": 4.4567889561270804e-05, "loss": 2.0507, "step": 1151 }, { "epoch": 0.87, "learning_rate": 4.456316187594554e-05, "loss": 1.9941, "step": 1152 }, { "epoch": 0.87, "learning_rate": 4.4558434190620275e-05, "loss": 1.9518, "step": 1153 }, { "epoch": 0.87, "learning_rate": 4.4553706505295014e-05, "loss": 2.0914, "step": 1154 }, { "epoch": 0.87, "learning_rate": 4.4548978819969746e-05, "loss": 1.9503, "step": 1155 }, { "epoch": 0.87, "learning_rate": 4.4544251134644485e-05, "loss": 2.0164, "step": 1156 }, { "epoch": 0.88, "learning_rate": 4.453952344931922e-05, "loss": 2.0177, "step": 1157 }, { "epoch": 0.88, "learning_rate": 4.4534795763993956e-05, "loss": 2.0289, "step": 1158 }, { "epoch": 0.88, "learning_rate": 4.453006807866869e-05, "loss": 1.9981, "step": 1159 }, { "epoch": 0.88, "learning_rate": 4.452534039334342e-05, "loss": 2.1372, "step": 1160 }, { "epoch": 0.88, "learning_rate": 4.452061270801816e-05, "loss": 2.0588, "step": 1161 }, { "epoch": 0.88, "learning_rate": 4.451588502269289e-05, "loss": 1.9849, "step": 1162 }, { "epoch": 0.88, "learning_rate": 4.451115733736762e-05, "loss": 1.9594, "step": 1163 }, { "epoch": 0.88, "learning_rate": 4.450642965204236e-05, "loss": 1.9895, "step": 1164 }, { "epoch": 0.88, "learning_rate": 4.4501701966717094e-05, "loss": 2.0632, "step": 1165 }, { "epoch": 0.88, "learning_rate": 4.449697428139183e-05, "loss": 1.9899, "step": 1166 }, { "epoch": 0.88, "learning_rate": 4.4492246596066565e-05, "loss": 1.9934, "step": 1167 }, { "epoch": 0.88, "learning_rate": 4.4487518910741304e-05, "loss": 2.0015, "step": 1168 }, { "epoch": 0.88, "learning_rate": 4.4482791225416036e-05, "loss": 1.9206, "step": 1169 }, { "epoch": 0.88, "learning_rate": 4.4478063540090775e-05, "loss": 2.0638, "step": 1170 }, { "epoch": 0.89, "learning_rate": 4.4473335854765507e-05, "loss": 2.0885, "step": 1171 }, { "epoch": 0.89, "learning_rate": 4.4468608169440245e-05, "loss": 2.1329, "step": 1172 }, { "epoch": 0.89, "learning_rate": 4.446388048411498e-05, "loss": 2.0848, "step": 1173 }, { "epoch": 0.89, "learning_rate": 4.4459152798789716e-05, "loss": 2.0424, "step": 1174 }, { "epoch": 0.89, "learning_rate": 4.445442511346445e-05, "loss": 2.0171, "step": 1175 }, { "epoch": 0.89, "learning_rate": 4.444969742813919e-05, "loss": 1.9713, "step": 1176 }, { "epoch": 0.89, "learning_rate": 4.444496974281392e-05, "loss": 1.9072, "step": 1177 }, { "epoch": 0.89, "learning_rate": 4.444024205748866e-05, "loss": 2.1101, "step": 1178 }, { "epoch": 0.89, "learning_rate": 4.443551437216339e-05, "loss": 2.1196, "step": 1179 }, { "epoch": 0.89, "learning_rate": 4.443078668683813e-05, "loss": 2.0584, "step": 1180 }, { "epoch": 0.89, "learning_rate": 4.442605900151286e-05, "loss": 2.0529, "step": 1181 }, { "epoch": 0.89, "learning_rate": 4.44213313161876e-05, "loss": 2.1246, "step": 1182 }, { "epoch": 0.89, "learning_rate": 4.441660363086233e-05, "loss": 1.9808, "step": 1183 }, { "epoch": 0.9, "learning_rate": 4.441187594553707e-05, "loss": 2.0232, "step": 1184 }, { "epoch": 0.9, "learning_rate": 4.44071482602118e-05, "loss": 2.035, "step": 1185 }, { "epoch": 0.9, "learning_rate": 4.440242057488654e-05, "loss": 1.9754, "step": 1186 }, { "epoch": 0.9, "learning_rate": 4.4397692889561274e-05, "loss": 1.9198, "step": 1187 }, { "epoch": 0.9, "learning_rate": 4.4392965204236006e-05, "loss": 2.0763, "step": 1188 }, { "epoch": 0.9, "learning_rate": 4.4388237518910745e-05, "loss": 1.9196, "step": 1189 }, { "epoch": 0.9, "learning_rate": 4.438350983358548e-05, "loss": 1.9901, "step": 1190 }, { "epoch": 0.9, "learning_rate": 4.437878214826021e-05, "loss": 1.9, "step": 1191 }, { "epoch": 0.9, "learning_rate": 4.437405446293495e-05, "loss": 2.1058, "step": 1192 }, { "epoch": 0.9, "learning_rate": 4.436932677760968e-05, "loss": 2.0535, "step": 1193 }, { "epoch": 0.9, "learning_rate": 4.436459909228442e-05, "loss": 1.9386, "step": 1194 }, { "epoch": 0.9, "learning_rate": 4.435987140695915e-05, "loss": 2.0213, "step": 1195 }, { "epoch": 0.9, "learning_rate": 4.435514372163389e-05, "loss": 1.8508, "step": 1196 }, { "epoch": 0.91, "learning_rate": 4.435041603630862e-05, "loss": 1.9433, "step": 1197 }, { "epoch": 0.91, "learning_rate": 4.434568835098336e-05, "loss": 2.101, "step": 1198 }, { "epoch": 0.91, "learning_rate": 4.434096066565809e-05, "loss": 2.024, "step": 1199 }, { "epoch": 0.91, "learning_rate": 4.433623298033283e-05, "loss": 2.0493, "step": 1200 }, { "epoch": 0.91, "learning_rate": 4.433150529500756e-05, "loss": 1.9834, "step": 1201 }, { "epoch": 0.91, "learning_rate": 4.43267776096823e-05, "loss": 1.9577, "step": 1202 }, { "epoch": 0.91, "learning_rate": 4.4322049924357034e-05, "loss": 1.9897, "step": 1203 }, { "epoch": 0.91, "learning_rate": 4.431732223903177e-05, "loss": 2.0251, "step": 1204 }, { "epoch": 0.91, "learning_rate": 4.431259455370651e-05, "loss": 1.9344, "step": 1205 }, { "epoch": 0.91, "learning_rate": 4.4307866868381244e-05, "loss": 2.041, "step": 1206 }, { "epoch": 0.91, "learning_rate": 4.430313918305598e-05, "loss": 2.0699, "step": 1207 }, { "epoch": 0.91, "learning_rate": 4.4298411497730715e-05, "loss": 1.8758, "step": 1208 }, { "epoch": 0.91, "learning_rate": 4.4293683812405454e-05, "loss": 1.9562, "step": 1209 }, { "epoch": 0.92, "learning_rate": 4.4288956127080186e-05, "loss": 2.1196, "step": 1210 }, { "epoch": 0.92, "learning_rate": 4.4284228441754924e-05, "loss": 2.0394, "step": 1211 }, { "epoch": 0.92, "learning_rate": 4.4279500756429657e-05, "loss": 2.0055, "step": 1212 }, { "epoch": 0.92, "learning_rate": 4.427477307110439e-05, "loss": 2.0228, "step": 1213 }, { "epoch": 0.92, "learning_rate": 4.427004538577913e-05, "loss": 1.9039, "step": 1214 }, { "epoch": 0.92, "learning_rate": 4.426531770045386e-05, "loss": 2.0931, "step": 1215 }, { "epoch": 0.92, "learning_rate": 4.426059001512859e-05, "loss": 1.9317, "step": 1216 }, { "epoch": 0.92, "learning_rate": 4.425586232980333e-05, "loss": 2.0344, "step": 1217 }, { "epoch": 0.92, "learning_rate": 4.425113464447806e-05, "loss": 1.9156, "step": 1218 }, { "epoch": 0.92, "learning_rate": 4.42464069591528e-05, "loss": 1.9891, "step": 1219 }, { "epoch": 0.92, "learning_rate": 4.424167927382753e-05, "loss": 2.1506, "step": 1220 }, { "epoch": 0.92, "learning_rate": 4.423695158850227e-05, "loss": 2.144, "step": 1221 }, { "epoch": 0.92, "learning_rate": 4.4232223903177004e-05, "loss": 2.1246, "step": 1222 }, { "epoch": 0.92, "learning_rate": 4.422749621785174e-05, "loss": 1.9525, "step": 1223 }, { "epoch": 0.93, "learning_rate": 4.4222768532526475e-05, "loss": 1.9795, "step": 1224 }, { "epoch": 0.93, "learning_rate": 4.4218040847201214e-05, "loss": 2.0335, "step": 1225 }, { "epoch": 0.93, "learning_rate": 4.4213313161875946e-05, "loss": 2.075, "step": 1226 }, { "epoch": 0.93, "learning_rate": 4.4208585476550685e-05, "loss": 2.0182, "step": 1227 }, { "epoch": 0.93, "learning_rate": 4.420385779122542e-05, "loss": 2.0052, "step": 1228 }, { "epoch": 0.93, "learning_rate": 4.4199130105900156e-05, "loss": 1.9375, "step": 1229 }, { "epoch": 0.93, "learning_rate": 4.419440242057489e-05, "loss": 1.9774, "step": 1230 }, { "epoch": 0.93, "learning_rate": 4.418967473524963e-05, "loss": 1.9338, "step": 1231 }, { "epoch": 0.93, "learning_rate": 4.418494704992436e-05, "loss": 2.0199, "step": 1232 }, { "epoch": 0.93, "learning_rate": 4.41802193645991e-05, "loss": 2.0348, "step": 1233 }, { "epoch": 0.93, "learning_rate": 4.417549167927383e-05, "loss": 2.0631, "step": 1234 }, { "epoch": 0.93, "learning_rate": 4.417076399394857e-05, "loss": 1.989, "step": 1235 }, { "epoch": 0.93, "learning_rate": 4.41660363086233e-05, "loss": 2.03, "step": 1236 }, { "epoch": 0.94, "learning_rate": 4.416130862329804e-05, "loss": 2.082, "step": 1237 }, { "epoch": 0.94, "learning_rate": 4.415658093797277e-05, "loss": 1.9478, "step": 1238 }, { "epoch": 0.94, "learning_rate": 4.415185325264751e-05, "loss": 2.0342, "step": 1239 }, { "epoch": 0.94, "learning_rate": 4.414712556732224e-05, "loss": 2.031, "step": 1240 }, { "epoch": 0.94, "learning_rate": 4.4142397881996974e-05, "loss": 2.1617, "step": 1241 }, { "epoch": 0.94, "learning_rate": 4.413767019667171e-05, "loss": 1.9573, "step": 1242 }, { "epoch": 0.94, "learning_rate": 4.4132942511346445e-05, "loss": 1.9324, "step": 1243 }, { "epoch": 0.94, "learning_rate": 4.412821482602118e-05, "loss": 2.0879, "step": 1244 }, { "epoch": 0.94, "learning_rate": 4.4123487140695916e-05, "loss": 2.064, "step": 1245 }, { "epoch": 0.94, "learning_rate": 4.411875945537065e-05, "loss": 2.0583, "step": 1246 }, { "epoch": 0.94, "learning_rate": 4.411403177004539e-05, "loss": 1.9675, "step": 1247 }, { "epoch": 0.94, "learning_rate": 4.410930408472012e-05, "loss": 2.0356, "step": 1248 }, { "epoch": 0.94, "learning_rate": 4.410457639939486e-05, "loss": 1.9396, "step": 1249 }, { "epoch": 0.95, "learning_rate": 4.409984871406959e-05, "loss": 2.121, "step": 1250 }, { "epoch": 0.95, "learning_rate": 4.409512102874433e-05, "loss": 1.9724, "step": 1251 }, { "epoch": 0.95, "learning_rate": 4.409039334341906e-05, "loss": 1.9831, "step": 1252 }, { "epoch": 0.95, "learning_rate": 4.40856656580938e-05, "loss": 1.9637, "step": 1253 }, { "epoch": 0.95, "learning_rate": 4.408093797276853e-05, "loss": 2.1036, "step": 1254 }, { "epoch": 0.95, "learning_rate": 4.407621028744327e-05, "loss": 2.0063, "step": 1255 }, { "epoch": 0.95, "learning_rate": 4.4071482602118e-05, "loss": 2.0036, "step": 1256 }, { "epoch": 0.95, "learning_rate": 4.406675491679274e-05, "loss": 1.9162, "step": 1257 }, { "epoch": 0.95, "learning_rate": 4.4062027231467474e-05, "loss": 2.0343, "step": 1258 }, { "epoch": 0.95, "learning_rate": 4.405729954614221e-05, "loss": 1.955, "step": 1259 }, { "epoch": 0.95, "learning_rate": 4.4052571860816944e-05, "loss": 1.893, "step": 1260 }, { "epoch": 0.95, "learning_rate": 4.404784417549168e-05, "loss": 2.0366, "step": 1261 }, { "epoch": 0.95, "learning_rate": 4.4043116490166415e-05, "loss": 1.8854, "step": 1262 }, { "epoch": 0.96, "learning_rate": 4.4038388804841154e-05, "loss": 1.9901, "step": 1263 }, { "epoch": 0.96, "learning_rate": 4.403366111951589e-05, "loss": 1.99, "step": 1264 }, { "epoch": 0.96, "learning_rate": 4.4028933434190625e-05, "loss": 2.0581, "step": 1265 }, { "epoch": 0.96, "learning_rate": 4.402420574886536e-05, "loss": 2.0881, "step": 1266 }, { "epoch": 0.96, "learning_rate": 4.4019478063540096e-05, "loss": 1.965, "step": 1267 }, { "epoch": 0.96, "learning_rate": 4.401475037821483e-05, "loss": 1.9453, "step": 1268 }, { "epoch": 0.96, "learning_rate": 4.401002269288956e-05, "loss": 1.8581, "step": 1269 }, { "epoch": 0.96, "learning_rate": 4.40052950075643e-05, "loss": 2.0035, "step": 1270 }, { "epoch": 0.96, "learning_rate": 4.400056732223903e-05, "loss": 1.9703, "step": 1271 }, { "epoch": 0.96, "learning_rate": 4.399583963691377e-05, "loss": 1.9211, "step": 1272 }, { "epoch": 0.96, "learning_rate": 4.39911119515885e-05, "loss": 2.043, "step": 1273 }, { "epoch": 0.96, "learning_rate": 4.398638426626324e-05, "loss": 2.0588, "step": 1274 }, { "epoch": 0.96, "learning_rate": 4.398165658093797e-05, "loss": 1.925, "step": 1275 }, { "epoch": 0.97, "learning_rate": 4.397692889561271e-05, "loss": 2.014, "step": 1276 }, { "epoch": 0.97, "learning_rate": 4.3972201210287444e-05, "loss": 2.0509, "step": 1277 }, { "epoch": 0.97, "learning_rate": 4.396747352496218e-05, "loss": 2.0194, "step": 1278 }, { "epoch": 0.97, "learning_rate": 4.3962745839636915e-05, "loss": 1.9255, "step": 1279 }, { "epoch": 0.97, "learning_rate": 4.3958018154311653e-05, "loss": 1.9734, "step": 1280 }, { "epoch": 0.97, "learning_rate": 4.3953290468986385e-05, "loss": 2.1426, "step": 1281 }, { "epoch": 0.97, "learning_rate": 4.3948562783661124e-05, "loss": 1.9921, "step": 1282 }, { "epoch": 0.97, "learning_rate": 4.3943835098335856e-05, "loss": 2.0806, "step": 1283 }, { "epoch": 0.97, "learning_rate": 4.3939107413010595e-05, "loss": 2.013, "step": 1284 }, { "epoch": 0.97, "learning_rate": 4.393437972768533e-05, "loss": 1.9267, "step": 1285 }, { "epoch": 0.97, "learning_rate": 4.3929652042360066e-05, "loss": 2.0773, "step": 1286 }, { "epoch": 0.97, "learning_rate": 4.39249243570348e-05, "loss": 1.9651, "step": 1287 }, { "epoch": 0.97, "learning_rate": 4.392019667170954e-05, "loss": 2.0362, "step": 1288 }, { "epoch": 0.97, "learning_rate": 4.391546898638427e-05, "loss": 2.1191, "step": 1289 }, { "epoch": 0.98, "learning_rate": 4.391074130105901e-05, "loss": 1.9121, "step": 1290 }, { "epoch": 0.98, "learning_rate": 4.390601361573374e-05, "loss": 1.9734, "step": 1291 }, { "epoch": 0.98, "learning_rate": 4.390128593040848e-05, "loss": 2.085, "step": 1292 }, { "epoch": 0.98, "learning_rate": 4.389655824508321e-05, "loss": 2.0894, "step": 1293 }, { "epoch": 0.98, "learning_rate": 4.389183055975794e-05, "loss": 1.9982, "step": 1294 }, { "epoch": 0.98, "learning_rate": 4.388710287443268e-05, "loss": 1.9522, "step": 1295 }, { "epoch": 0.98, "learning_rate": 4.3882375189107414e-05, "loss": 1.982, "step": 1296 }, { "epoch": 0.98, "learning_rate": 4.3877647503782146e-05, "loss": 2.0059, "step": 1297 }, { "epoch": 0.98, "learning_rate": 4.3872919818456885e-05, "loss": 1.9535, "step": 1298 }, { "epoch": 0.98, "learning_rate": 4.386819213313162e-05, "loss": 2.0226, "step": 1299 }, { "epoch": 0.98, "learning_rate": 4.3863464447806356e-05, "loss": 1.9611, "step": 1300 }, { "epoch": 0.98, "learning_rate": 4.385873676248109e-05, "loss": 1.9974, "step": 1301 }, { "epoch": 0.98, "learning_rate": 4.3854009077155826e-05, "loss": 1.9908, "step": 1302 }, { "epoch": 0.99, "learning_rate": 4.384928139183056e-05, "loss": 1.9709, "step": 1303 }, { "epoch": 0.99, "learning_rate": 4.38445537065053e-05, "loss": 1.9916, "step": 1304 }, { "epoch": 0.99, "learning_rate": 4.383982602118003e-05, "loss": 2.1279, "step": 1305 }, { "epoch": 0.99, "learning_rate": 4.383509833585477e-05, "loss": 1.9637, "step": 1306 }, { "epoch": 0.99, "learning_rate": 4.38303706505295e-05, "loss": 2.0203, "step": 1307 }, { "epoch": 0.99, "learning_rate": 4.382564296520424e-05, "loss": 1.9688, "step": 1308 }, { "epoch": 0.99, "learning_rate": 4.382091527987897e-05, "loss": 2.0334, "step": 1309 }, { "epoch": 0.99, "learning_rate": 4.381618759455371e-05, "loss": 1.928, "step": 1310 }, { "epoch": 0.99, "learning_rate": 4.381145990922844e-05, "loss": 2.0224, "step": 1311 }, { "epoch": 0.99, "learning_rate": 4.380673222390318e-05, "loss": 1.9813, "step": 1312 }, { "epoch": 0.99, "learning_rate": 4.380200453857791e-05, "loss": 1.9812, "step": 1313 }, { "epoch": 0.99, "learning_rate": 4.379727685325265e-05, "loss": 1.8933, "step": 1314 }, { "epoch": 0.99, "learning_rate": 4.3792549167927384e-05, "loss": 1.9376, "step": 1315 }, { "epoch": 1.0, "learning_rate": 4.378782148260212e-05, "loss": 2.0272, "step": 1316 }, { "epoch": 1.0, "learning_rate": 4.3783093797276855e-05, "loss": 1.9023, "step": 1317 }, { "epoch": 1.0, "learning_rate": 4.3778366111951594e-05, "loss": 2.0011, "step": 1318 }, { "epoch": 1.0, "learning_rate": 4.3773638426626326e-05, "loss": 1.9494, "step": 1319 }, { "epoch": 1.0, "learning_rate": 4.3768910741301065e-05, "loss": 1.9097, "step": 1320 }, { "epoch": 1.0, "learning_rate": 4.3764183055975797e-05, "loss": 2.0886, "step": 1321 }, { "epoch": 1.0, "learning_rate": 4.375945537065053e-05, "loss": 2.1207, "step": 1322 }, { "epoch": 1.0, "learning_rate": 4.375472768532527e-05, "loss": 1.3393, "step": 1323 }, { "epoch": 1.0, "learning_rate": 4.375e-05, "loss": 1.1356, "step": 1324 }, { "epoch": 1.0, "learning_rate": 4.374527231467474e-05, "loss": 0.9773, "step": 1325 }, { "epoch": 1.0, "learning_rate": 4.374054462934947e-05, "loss": 0.9422, "step": 1326 }, { "epoch": 1.0, "learning_rate": 4.373581694402421e-05, "loss": 0.8269, "step": 1327 }, { "epoch": 1.0, "learning_rate": 4.373108925869894e-05, "loss": 0.7969, "step": 1328 }, { "epoch": 1.01, "learning_rate": 4.372636157337368e-05, "loss": 0.9238, "step": 1329 }, { "epoch": 1.01, "learning_rate": 4.372163388804841e-05, "loss": 0.953, "step": 1330 }, { "epoch": 1.01, "learning_rate": 4.371690620272315e-05, "loss": 0.7952, "step": 1331 }, { "epoch": 1.01, "learning_rate": 4.371217851739788e-05, "loss": 0.8696, "step": 1332 }, { "epoch": 1.01, "learning_rate": 4.370745083207262e-05, "loss": 0.9264, "step": 1333 }, { "epoch": 1.01, "learning_rate": 4.3702723146747354e-05, "loss": 0.8891, "step": 1334 }, { "epoch": 1.01, "learning_rate": 4.369799546142209e-05, "loss": 0.8797, "step": 1335 }, { "epoch": 1.01, "learning_rate": 4.3693267776096825e-05, "loss": 0.9582, "step": 1336 }, { "epoch": 1.01, "learning_rate": 4.3688540090771564e-05, "loss": 0.8601, "step": 1337 }, { "epoch": 1.01, "learning_rate": 4.3683812405446296e-05, "loss": 0.9197, "step": 1338 }, { "epoch": 1.01, "learning_rate": 4.3679084720121035e-05, "loss": 0.9299, "step": 1339 }, { "epoch": 1.01, "learning_rate": 4.367435703479577e-05, "loss": 0.8906, "step": 1340 }, { "epoch": 1.01, "learning_rate": 4.3669629349470506e-05, "loss": 0.8472, "step": 1341 }, { "epoch": 1.01, "learning_rate": 4.366490166414524e-05, "loss": 0.8138, "step": 1342 }, { "epoch": 1.02, "learning_rate": 4.3660173978819976e-05, "loss": 0.932, "step": 1343 }, { "epoch": 1.02, "learning_rate": 4.365544629349471e-05, "loss": 0.9696, "step": 1344 }, { "epoch": 1.02, "learning_rate": 4.365071860816945e-05, "loss": 0.8577, "step": 1345 }, { "epoch": 1.02, "learning_rate": 4.364599092284418e-05, "loss": 0.8216, "step": 1346 }, { "epoch": 1.02, "learning_rate": 4.364126323751891e-05, "loss": 0.7752, "step": 1347 }, { "epoch": 1.02, "learning_rate": 4.363653555219365e-05, "loss": 0.8389, "step": 1348 }, { "epoch": 1.02, "learning_rate": 4.363180786686838e-05, "loss": 0.9212, "step": 1349 }, { "epoch": 1.02, "learning_rate": 4.3627080181543114e-05, "loss": 0.9975, "step": 1350 }, { "epoch": 1.02, "learning_rate": 4.362235249621785e-05, "loss": 0.9531, "step": 1351 }, { "epoch": 1.02, "learning_rate": 4.3617624810892585e-05, "loss": 0.8531, "step": 1352 }, { "epoch": 1.02, "learning_rate": 4.3612897125567324e-05, "loss": 0.8911, "step": 1353 }, { "epoch": 1.02, "learning_rate": 4.3608169440242056e-05, "loss": 0.7328, "step": 1354 }, { "epoch": 1.02, "learning_rate": 4.3603441754916795e-05, "loss": 1.0347, "step": 1355 }, { "epoch": 1.03, "learning_rate": 4.359871406959153e-05, "loss": 0.8704, "step": 1356 }, { "epoch": 1.03, "learning_rate": 4.3593986384266266e-05, "loss": 0.8498, "step": 1357 }, { "epoch": 1.03, "learning_rate": 4.3589258698941e-05, "loss": 0.8885, "step": 1358 }, { "epoch": 1.03, "learning_rate": 4.358453101361574e-05, "loss": 0.8241, "step": 1359 }, { "epoch": 1.03, "learning_rate": 4.357980332829047e-05, "loss": 1.0142, "step": 1360 }, { "epoch": 1.03, "learning_rate": 4.357507564296521e-05, "loss": 0.857, "step": 1361 }, { "epoch": 1.03, "learning_rate": 4.357034795763994e-05, "loss": 0.8673, "step": 1362 }, { "epoch": 1.03, "learning_rate": 4.356562027231468e-05, "loss": 0.8446, "step": 1363 }, { "epoch": 1.03, "learning_rate": 4.356089258698941e-05, "loss": 0.803, "step": 1364 }, { "epoch": 1.03, "learning_rate": 4.355616490166415e-05, "loss": 0.8994, "step": 1365 }, { "epoch": 1.03, "learning_rate": 4.355143721633888e-05, "loss": 0.7737, "step": 1366 }, { "epoch": 1.03, "learning_rate": 4.354670953101362e-05, "loss": 0.8799, "step": 1367 }, { "epoch": 1.03, "learning_rate": 4.354198184568835e-05, "loss": 0.9156, "step": 1368 }, { "epoch": 1.04, "learning_rate": 4.353725416036309e-05, "loss": 0.9886, "step": 1369 }, { "epoch": 1.04, "learning_rate": 4.353252647503782e-05, "loss": 0.8198, "step": 1370 }, { "epoch": 1.04, "learning_rate": 4.352779878971256e-05, "loss": 0.9066, "step": 1371 }, { "epoch": 1.04, "learning_rate": 4.3523071104387294e-05, "loss": 0.939, "step": 1372 }, { "epoch": 1.04, "learning_rate": 4.351834341906203e-05, "loss": 0.7408, "step": 1373 }, { "epoch": 1.04, "learning_rate": 4.3513615733736765e-05, "loss": 0.7962, "step": 1374 }, { "epoch": 1.04, "learning_rate": 4.35088880484115e-05, "loss": 0.7533, "step": 1375 }, { "epoch": 1.04, "learning_rate": 4.3504160363086236e-05, "loss": 0.8055, "step": 1376 }, { "epoch": 1.04, "learning_rate": 4.349943267776097e-05, "loss": 0.9506, "step": 1377 }, { "epoch": 1.04, "learning_rate": 4.34947049924357e-05, "loss": 0.8286, "step": 1378 }, { "epoch": 1.04, "learning_rate": 4.348997730711044e-05, "loss": 0.7459, "step": 1379 }, { "epoch": 1.04, "learning_rate": 4.348524962178517e-05, "loss": 1.015, "step": 1380 }, { "epoch": 1.04, "learning_rate": 4.348052193645991e-05, "loss": 0.8253, "step": 1381 }, { "epoch": 1.05, "learning_rate": 4.347579425113464e-05, "loss": 0.8598, "step": 1382 }, { "epoch": 1.05, "learning_rate": 4.347106656580938e-05, "loss": 0.8823, "step": 1383 }, { "epoch": 1.05, "learning_rate": 4.346633888048411e-05, "loss": 0.7623, "step": 1384 }, { "epoch": 1.05, "learning_rate": 4.346161119515885e-05, "loss": 0.7531, "step": 1385 }, { "epoch": 1.05, "learning_rate": 4.345688350983359e-05, "loss": 0.8568, "step": 1386 }, { "epoch": 1.05, "learning_rate": 4.345215582450832e-05, "loss": 0.9092, "step": 1387 }, { "epoch": 1.05, "learning_rate": 4.344742813918306e-05, "loss": 1.021, "step": 1388 }, { "epoch": 1.05, "learning_rate": 4.3442700453857793e-05, "loss": 0.7631, "step": 1389 }, { "epoch": 1.05, "learning_rate": 4.343797276853253e-05, "loss": 0.9103, "step": 1390 }, { "epoch": 1.05, "learning_rate": 4.3433245083207264e-05, "loss": 0.8605, "step": 1391 }, { "epoch": 1.05, "learning_rate": 4.3428517397882e-05, "loss": 0.89, "step": 1392 }, { "epoch": 1.05, "learning_rate": 4.3423789712556735e-05, "loss": 0.9027, "step": 1393 }, { "epoch": 1.05, "learning_rate": 4.3419062027231474e-05, "loss": 0.9731, "step": 1394 }, { "epoch": 1.06, "learning_rate": 4.3414334341906206e-05, "loss": 0.878, "step": 1395 }, { "epoch": 1.06, "learning_rate": 4.3409606656580945e-05, "loss": 0.9168, "step": 1396 }, { "epoch": 1.06, "learning_rate": 4.340487897125568e-05, "loss": 0.7587, "step": 1397 }, { "epoch": 1.06, "learning_rate": 4.3400151285930416e-05, "loss": 0.9048, "step": 1398 }, { "epoch": 1.06, "learning_rate": 4.339542360060515e-05, "loss": 0.8379, "step": 1399 }, { "epoch": 1.06, "learning_rate": 4.339069591527988e-05, "loss": 0.7602, "step": 1400 }, { "epoch": 1.06, "learning_rate": 4.338596822995462e-05, "loss": 0.9473, "step": 1401 }, { "epoch": 1.06, "learning_rate": 4.338124054462935e-05, "loss": 0.9421, "step": 1402 }, { "epoch": 1.06, "learning_rate": 4.337651285930408e-05, "loss": 0.9071, "step": 1403 }, { "epoch": 1.06, "learning_rate": 4.337178517397882e-05, "loss": 0.8867, "step": 1404 }, { "epoch": 1.06, "learning_rate": 4.3367057488653554e-05, "loss": 0.9294, "step": 1405 }, { "epoch": 1.06, "learning_rate": 4.336232980332829e-05, "loss": 0.9821, "step": 1406 }, { "epoch": 1.06, "learning_rate": 4.3357602118003025e-05, "loss": 0.9136, "step": 1407 }, { "epoch": 1.06, "learning_rate": 4.3352874432677764e-05, "loss": 0.8982, "step": 1408 }, { "epoch": 1.07, "learning_rate": 4.3348146747352496e-05, "loss": 0.8049, "step": 1409 }, { "epoch": 1.07, "learning_rate": 4.3343419062027234e-05, "loss": 0.8598, "step": 1410 }, { "epoch": 1.07, "learning_rate": 4.3338691376701967e-05, "loss": 0.7809, "step": 1411 }, { "epoch": 1.07, "learning_rate": 4.3333963691376705e-05, "loss": 0.8908, "step": 1412 }, { "epoch": 1.07, "learning_rate": 4.332923600605144e-05, "loss": 0.8368, "step": 1413 }, { "epoch": 1.07, "learning_rate": 4.3324508320726176e-05, "loss": 0.8914, "step": 1414 }, { "epoch": 1.07, "learning_rate": 4.331978063540091e-05, "loss": 0.8165, "step": 1415 }, { "epoch": 1.07, "learning_rate": 4.331505295007565e-05, "loss": 0.8113, "step": 1416 }, { "epoch": 1.07, "learning_rate": 4.331032526475038e-05, "loss": 0.7264, "step": 1417 }, { "epoch": 1.07, "learning_rate": 4.330559757942512e-05, "loss": 0.785, "step": 1418 }, { "epoch": 1.07, "learning_rate": 4.330086989409985e-05, "loss": 0.9649, "step": 1419 }, { "epoch": 1.07, "learning_rate": 4.329614220877459e-05, "loss": 0.7819, "step": 1420 }, { "epoch": 1.07, "learning_rate": 4.329141452344932e-05, "loss": 0.7547, "step": 1421 }, { "epoch": 1.08, "learning_rate": 4.328668683812406e-05, "loss": 1.0119, "step": 1422 }, { "epoch": 1.08, "learning_rate": 4.328195915279879e-05, "loss": 0.806, "step": 1423 }, { "epoch": 1.08, "learning_rate": 4.327723146747353e-05, "loss": 0.901, "step": 1424 }, { "epoch": 1.08, "learning_rate": 4.327250378214826e-05, "loss": 0.9185, "step": 1425 }, { "epoch": 1.08, "learning_rate": 4.3267776096823e-05, "loss": 0.7922, "step": 1426 }, { "epoch": 1.08, "learning_rate": 4.3263048411497734e-05, "loss": 0.8117, "step": 1427 }, { "epoch": 1.08, "learning_rate": 4.3258320726172466e-05, "loss": 0.7479, "step": 1428 }, { "epoch": 1.08, "learning_rate": 4.3253593040847205e-05, "loss": 0.8465, "step": 1429 }, { "epoch": 1.08, "learning_rate": 4.324886535552194e-05, "loss": 0.7933, "step": 1430 }, { "epoch": 1.08, "learning_rate": 4.324413767019667e-05, "loss": 0.6972, "step": 1431 }, { "epoch": 1.08, "learning_rate": 4.323940998487141e-05, "loss": 0.9483, "step": 1432 }, { "epoch": 1.08, "learning_rate": 4.323468229954614e-05, "loss": 0.7825, "step": 1433 }, { "epoch": 1.08, "learning_rate": 4.322995461422088e-05, "loss": 0.8995, "step": 1434 }, { "epoch": 1.09, "learning_rate": 4.322522692889561e-05, "loss": 0.8487, "step": 1435 }, { "epoch": 1.09, "learning_rate": 4.322049924357035e-05, "loss": 0.8471, "step": 1436 }, { "epoch": 1.09, "learning_rate": 4.321577155824508e-05, "loss": 0.845, "step": 1437 }, { "epoch": 1.09, "learning_rate": 4.321104387291982e-05, "loss": 0.8214, "step": 1438 }, { "epoch": 1.09, "learning_rate": 4.320631618759455e-05, "loss": 0.8422, "step": 1439 }, { "epoch": 1.09, "learning_rate": 4.320158850226929e-05, "loss": 0.8415, "step": 1440 }, { "epoch": 1.09, "learning_rate": 4.319686081694402e-05, "loss": 0.9191, "step": 1441 }, { "epoch": 1.09, "learning_rate": 4.319213313161876e-05, "loss": 0.8765, "step": 1442 }, { "epoch": 1.09, "learning_rate": 4.3187405446293494e-05, "loss": 0.8604, "step": 1443 }, { "epoch": 1.09, "learning_rate": 4.318267776096823e-05, "loss": 0.7824, "step": 1444 }, { "epoch": 1.09, "learning_rate": 4.317795007564297e-05, "loss": 0.8785, "step": 1445 }, { "epoch": 1.09, "learning_rate": 4.3173222390317704e-05, "loss": 0.917, "step": 1446 }, { "epoch": 1.09, "learning_rate": 4.316849470499244e-05, "loss": 0.8887, "step": 1447 }, { "epoch": 1.1, "learning_rate": 4.3163767019667175e-05, "loss": 0.8847, "step": 1448 }, { "epoch": 1.1, "learning_rate": 4.3159039334341914e-05, "loss": 0.9176, "step": 1449 }, { "epoch": 1.1, "learning_rate": 4.3154311649016646e-05, "loss": 0.766, "step": 1450 }, { "epoch": 1.1, "learning_rate": 4.3149583963691384e-05, "loss": 0.8859, "step": 1451 }, { "epoch": 1.1, "learning_rate": 4.3144856278366116e-05, "loss": 0.9017, "step": 1452 }, { "epoch": 1.1, "learning_rate": 4.314012859304085e-05, "loss": 0.8623, "step": 1453 }, { "epoch": 1.1, "learning_rate": 4.313540090771559e-05, "loss": 0.9411, "step": 1454 }, { "epoch": 1.1, "learning_rate": 4.313067322239032e-05, "loss": 0.8118, "step": 1455 }, { "epoch": 1.1, "learning_rate": 4.312594553706505e-05, "loss": 0.8643, "step": 1456 }, { "epoch": 1.1, "learning_rate": 4.312121785173979e-05, "loss": 0.8134, "step": 1457 }, { "epoch": 1.1, "learning_rate": 4.311649016641452e-05, "loss": 0.8205, "step": 1458 }, { "epoch": 1.1, "learning_rate": 4.311176248108926e-05, "loss": 0.9299, "step": 1459 }, { "epoch": 1.1, "learning_rate": 4.310703479576399e-05, "loss": 0.8439, "step": 1460 }, { "epoch": 1.1, "learning_rate": 4.310230711043873e-05, "loss": 0.8312, "step": 1461 }, { "epoch": 1.11, "learning_rate": 4.3097579425113464e-05, "loss": 0.7662, "step": 1462 }, { "epoch": 1.11, "learning_rate": 4.30928517397882e-05, "loss": 0.844, "step": 1463 }, { "epoch": 1.11, "learning_rate": 4.3088124054462935e-05, "loss": 0.7337, "step": 1464 }, { "epoch": 1.11, "learning_rate": 4.3083396369137674e-05, "loss": 0.8433, "step": 1465 }, { "epoch": 1.11, "learning_rate": 4.3078668683812406e-05, "loss": 0.7738, "step": 1466 }, { "epoch": 1.11, "learning_rate": 4.3073940998487145e-05, "loss": 0.7428, "step": 1467 }, { "epoch": 1.11, "learning_rate": 4.306921331316188e-05, "loss": 0.8651, "step": 1468 }, { "epoch": 1.11, "learning_rate": 4.3064485627836616e-05, "loss": 0.8101, "step": 1469 }, { "epoch": 1.11, "learning_rate": 4.305975794251135e-05, "loss": 0.7759, "step": 1470 }, { "epoch": 1.11, "learning_rate": 4.3055030257186087e-05, "loss": 0.8624, "step": 1471 }, { "epoch": 1.11, "learning_rate": 4.305030257186082e-05, "loss": 0.8136, "step": 1472 }, { "epoch": 1.11, "learning_rate": 4.304557488653556e-05, "loss": 0.6794, "step": 1473 }, { "epoch": 1.11, "learning_rate": 4.304084720121029e-05, "loss": 0.8695, "step": 1474 }, { "epoch": 1.12, "learning_rate": 4.303611951588503e-05, "loss": 0.9442, "step": 1475 }, { "epoch": 1.12, "learning_rate": 4.303139183055976e-05, "loss": 0.8728, "step": 1476 }, { "epoch": 1.12, "learning_rate": 4.30266641452345e-05, "loss": 0.8692, "step": 1477 }, { "epoch": 1.12, "learning_rate": 4.302193645990923e-05, "loss": 0.7741, "step": 1478 }, { "epoch": 1.12, "learning_rate": 4.301720877458397e-05, "loss": 0.855, "step": 1479 }, { "epoch": 1.12, "learning_rate": 4.30124810892587e-05, "loss": 0.8181, "step": 1480 }, { "epoch": 1.12, "learning_rate": 4.3007753403933434e-05, "loss": 0.7867, "step": 1481 }, { "epoch": 1.12, "learning_rate": 4.300302571860817e-05, "loss": 0.8124, "step": 1482 }, { "epoch": 1.12, "learning_rate": 4.2998298033282905e-05, "loss": 0.8579, "step": 1483 }, { "epoch": 1.12, "learning_rate": 4.299357034795764e-05, "loss": 0.8431, "step": 1484 }, { "epoch": 1.12, "learning_rate": 4.2988842662632376e-05, "loss": 0.8276, "step": 1485 }, { "epoch": 1.12, "learning_rate": 4.298411497730711e-05, "loss": 0.8774, "step": 1486 }, { "epoch": 1.12, "learning_rate": 4.297938729198185e-05, "loss": 0.8593, "step": 1487 }, { "epoch": 1.13, "learning_rate": 4.297465960665658e-05, "loss": 0.8257, "step": 1488 }, { "epoch": 1.13, "learning_rate": 4.296993192133132e-05, "loss": 1.0633, "step": 1489 }, { "epoch": 1.13, "learning_rate": 4.296520423600605e-05, "loss": 0.8891, "step": 1490 }, { "epoch": 1.13, "learning_rate": 4.296047655068079e-05, "loss": 0.8403, "step": 1491 }, { "epoch": 1.13, "learning_rate": 4.295574886535552e-05, "loss": 0.8653, "step": 1492 }, { "epoch": 1.13, "learning_rate": 4.295102118003026e-05, "loss": 0.8859, "step": 1493 }, { "epoch": 1.13, "learning_rate": 4.294629349470499e-05, "loss": 0.9025, "step": 1494 }, { "epoch": 1.13, "learning_rate": 4.294156580937973e-05, "loss": 0.8531, "step": 1495 }, { "epoch": 1.13, "learning_rate": 4.293683812405446e-05, "loss": 0.9311, "step": 1496 }, { "epoch": 1.13, "learning_rate": 4.29321104387292e-05, "loss": 0.8676, "step": 1497 }, { "epoch": 1.13, "learning_rate": 4.2927382753403934e-05, "loss": 0.7758, "step": 1498 }, { "epoch": 1.13, "learning_rate": 4.292265506807867e-05, "loss": 0.9471, "step": 1499 }, { "epoch": 1.13, "learning_rate": 4.2917927382753404e-05, "loss": 0.9487, "step": 1500 }, { "epoch": 1.14, "learning_rate": 4.291319969742814e-05, "loss": 0.7755, "step": 1501 }, { "epoch": 1.14, "learning_rate": 4.2908472012102875e-05, "loss": 0.9102, "step": 1502 }, { "epoch": 1.14, "learning_rate": 4.2903744326777614e-05, "loss": 0.9549, "step": 1503 }, { "epoch": 1.14, "learning_rate": 4.2899016641452346e-05, "loss": 0.8819, "step": 1504 }, { "epoch": 1.14, "learning_rate": 4.2894288956127085e-05, "loss": 0.8046, "step": 1505 }, { "epoch": 1.14, "learning_rate": 4.288956127080182e-05, "loss": 0.9226, "step": 1506 }, { "epoch": 1.14, "learning_rate": 4.2884833585476556e-05, "loss": 0.9341, "step": 1507 }, { "epoch": 1.14, "learning_rate": 4.288010590015129e-05, "loss": 0.8795, "step": 1508 }, { "epoch": 1.14, "learning_rate": 4.287537821482602e-05, "loss": 0.8067, "step": 1509 }, { "epoch": 1.14, "learning_rate": 4.287065052950076e-05, "loss": 0.813, "step": 1510 }, { "epoch": 1.14, "learning_rate": 4.286592284417549e-05, "loss": 0.788, "step": 1511 }, { "epoch": 1.14, "learning_rate": 4.286119515885023e-05, "loss": 0.7952, "step": 1512 }, { "epoch": 1.14, "learning_rate": 4.285646747352496e-05, "loss": 0.8335, "step": 1513 }, { "epoch": 1.15, "learning_rate": 4.28517397881997e-05, "loss": 1.1374, "step": 1514 }, { "epoch": 1.15, "learning_rate": 4.284701210287443e-05, "loss": 0.9379, "step": 1515 }, { "epoch": 1.15, "learning_rate": 4.284228441754917e-05, "loss": 0.8506, "step": 1516 }, { "epoch": 1.15, "learning_rate": 4.2837556732223904e-05, "loss": 0.8109, "step": 1517 }, { "epoch": 1.15, "learning_rate": 4.283282904689864e-05, "loss": 0.8367, "step": 1518 }, { "epoch": 1.15, "learning_rate": 4.2828101361573375e-05, "loss": 0.8209, "step": 1519 }, { "epoch": 1.15, "learning_rate": 4.282337367624811e-05, "loss": 0.9535, "step": 1520 }, { "epoch": 1.15, "learning_rate": 4.2818645990922845e-05, "loss": 0.8507, "step": 1521 }, { "epoch": 1.15, "learning_rate": 4.2813918305597584e-05, "loss": 0.803, "step": 1522 }, { "epoch": 1.15, "learning_rate": 4.2809190620272316e-05, "loss": 0.8218, "step": 1523 }, { "epoch": 1.15, "learning_rate": 4.2804462934947055e-05, "loss": 0.9586, "step": 1524 }, { "epoch": 1.15, "learning_rate": 4.279973524962179e-05, "loss": 0.7745, "step": 1525 }, { "epoch": 1.15, "learning_rate": 4.2795007564296526e-05, "loss": 0.8738, "step": 1526 }, { "epoch": 1.15, "learning_rate": 4.279027987897126e-05, "loss": 0.8883, "step": 1527 }, { "epoch": 1.16, "learning_rate": 4.2785552193646e-05, "loss": 0.8628, "step": 1528 }, { "epoch": 1.16, "learning_rate": 4.278082450832073e-05, "loss": 0.8503, "step": 1529 }, { "epoch": 1.16, "learning_rate": 4.277609682299547e-05, "loss": 0.8357, "step": 1530 }, { "epoch": 1.16, "learning_rate": 4.27713691376702e-05, "loss": 0.9079, "step": 1531 }, { "epoch": 1.16, "learning_rate": 4.276664145234494e-05, "loss": 0.887, "step": 1532 }, { "epoch": 1.16, "learning_rate": 4.276191376701967e-05, "loss": 0.7841, "step": 1533 }, { "epoch": 1.16, "learning_rate": 4.27571860816944e-05, "loss": 0.8119, "step": 1534 }, { "epoch": 1.16, "learning_rate": 4.275245839636914e-05, "loss": 0.8907, "step": 1535 }, { "epoch": 1.16, "learning_rate": 4.2747730711043874e-05, "loss": 0.9019, "step": 1536 }, { "epoch": 1.16, "learning_rate": 4.2743003025718606e-05, "loss": 0.8948, "step": 1537 }, { "epoch": 1.16, "learning_rate": 4.2738275340393345e-05, "loss": 0.9036, "step": 1538 }, { "epoch": 1.16, "learning_rate": 4.273354765506808e-05, "loss": 0.8619, "step": 1539 }, { "epoch": 1.16, "learning_rate": 4.2728819969742816e-05, "loss": 0.9542, "step": 1540 }, { "epoch": 1.17, "learning_rate": 4.272409228441755e-05, "loss": 0.8055, "step": 1541 }, { "epoch": 1.17, "learning_rate": 4.2719364599092286e-05, "loss": 0.922, "step": 1542 }, { "epoch": 1.17, "learning_rate": 4.271463691376702e-05, "loss": 0.8581, "step": 1543 }, { "epoch": 1.17, "learning_rate": 4.270990922844176e-05, "loss": 0.8089, "step": 1544 }, { "epoch": 1.17, "learning_rate": 4.270518154311649e-05, "loss": 0.7825, "step": 1545 }, { "epoch": 1.17, "learning_rate": 4.270045385779123e-05, "loss": 0.815, "step": 1546 }, { "epoch": 1.17, "learning_rate": 4.269572617246596e-05, "loss": 0.8674, "step": 1547 }, { "epoch": 1.17, "learning_rate": 4.26909984871407e-05, "loss": 0.8736, "step": 1548 }, { "epoch": 1.17, "learning_rate": 4.268627080181543e-05, "loss": 0.8222, "step": 1549 }, { "epoch": 1.17, "learning_rate": 4.268154311649017e-05, "loss": 0.9471, "step": 1550 }, { "epoch": 1.17, "learning_rate": 4.26768154311649e-05, "loss": 0.8222, "step": 1551 }, { "epoch": 1.17, "learning_rate": 4.267208774583964e-05, "loss": 0.842, "step": 1552 }, { "epoch": 1.17, "learning_rate": 4.266736006051437e-05, "loss": 0.8088, "step": 1553 }, { "epoch": 1.18, "learning_rate": 4.266263237518911e-05, "loss": 0.8122, "step": 1554 }, { "epoch": 1.18, "learning_rate": 4.2657904689863844e-05, "loss": 0.9702, "step": 1555 }, { "epoch": 1.18, "learning_rate": 4.265317700453858e-05, "loss": 0.8656, "step": 1556 }, { "epoch": 1.18, "learning_rate": 4.2648449319213315e-05, "loss": 0.8777, "step": 1557 }, { "epoch": 1.18, "learning_rate": 4.2643721633888054e-05, "loss": 0.95, "step": 1558 }, { "epoch": 1.18, "learning_rate": 4.2638993948562786e-05, "loss": 0.7306, "step": 1559 }, { "epoch": 1.18, "learning_rate": 4.2634266263237524e-05, "loss": 0.7995, "step": 1560 }, { "epoch": 1.18, "learning_rate": 4.2629538577912257e-05, "loss": 0.7926, "step": 1561 }, { "epoch": 1.18, "learning_rate": 4.262481089258699e-05, "loss": 0.8442, "step": 1562 }, { "epoch": 1.18, "learning_rate": 4.262008320726172e-05, "loss": 0.9257, "step": 1563 }, { "epoch": 1.18, "learning_rate": 4.261535552193646e-05, "loss": 0.8428, "step": 1564 }, { "epoch": 1.18, "learning_rate": 4.26106278366112e-05, "loss": 0.7743, "step": 1565 }, { "epoch": 1.18, "learning_rate": 4.260590015128593e-05, "loss": 0.7869, "step": 1566 }, { "epoch": 1.19, "learning_rate": 4.260117246596067e-05, "loss": 0.8993, "step": 1567 }, { "epoch": 1.19, "learning_rate": 4.25964447806354e-05, "loss": 0.995, "step": 1568 }, { "epoch": 1.19, "learning_rate": 4.259171709531014e-05, "loss": 0.8416, "step": 1569 }, { "epoch": 1.19, "learning_rate": 4.258698940998487e-05, "loss": 0.8034, "step": 1570 }, { "epoch": 1.19, "learning_rate": 4.258226172465961e-05, "loss": 0.9244, "step": 1571 }, { "epoch": 1.19, "learning_rate": 4.257753403933434e-05, "loss": 0.8548, "step": 1572 }, { "epoch": 1.19, "learning_rate": 4.257280635400908e-05, "loss": 0.8532, "step": 1573 }, { "epoch": 1.19, "learning_rate": 4.2568078668683814e-05, "loss": 0.8529, "step": 1574 }, { "epoch": 1.19, "learning_rate": 4.256335098335855e-05, "loss": 0.7952, "step": 1575 }, { "epoch": 1.19, "learning_rate": 4.2558623298033285e-05, "loss": 0.8328, "step": 1576 }, { "epoch": 1.19, "learning_rate": 4.2553895612708024e-05, "loss": 0.8527, "step": 1577 }, { "epoch": 1.19, "learning_rate": 4.2549167927382756e-05, "loss": 0.8824, "step": 1578 }, { "epoch": 1.19, "learning_rate": 4.2544440242057495e-05, "loss": 0.7864, "step": 1579 }, { "epoch": 1.19, "learning_rate": 4.253971255673223e-05, "loss": 0.7832, "step": 1580 }, { "epoch": 1.2, "learning_rate": 4.2534984871406965e-05, "loss": 0.8859, "step": 1581 }, { "epoch": 1.2, "learning_rate": 4.25302571860817e-05, "loss": 0.8357, "step": 1582 }, { "epoch": 1.2, "learning_rate": 4.2525529500756436e-05, "loss": 0.8737, "step": 1583 }, { "epoch": 1.2, "learning_rate": 4.252080181543117e-05, "loss": 0.755, "step": 1584 }, { "epoch": 1.2, "learning_rate": 4.251607413010591e-05, "loss": 0.8423, "step": 1585 }, { "epoch": 1.2, "learning_rate": 4.251134644478064e-05, "loss": 0.735, "step": 1586 }, { "epoch": 1.2, "learning_rate": 4.250661875945537e-05, "loss": 0.8226, "step": 1587 }, { "epoch": 1.2, "learning_rate": 4.250189107413011e-05, "loss": 0.998, "step": 1588 }, { "epoch": 1.2, "learning_rate": 4.249716338880484e-05, "loss": 0.9116, "step": 1589 }, { "epoch": 1.2, "learning_rate": 4.2492435703479574e-05, "loss": 0.9074, "step": 1590 }, { "epoch": 1.2, "learning_rate": 4.248770801815431e-05, "loss": 0.7349, "step": 1591 }, { "epoch": 1.2, "learning_rate": 4.2482980332829045e-05, "loss": 0.9172, "step": 1592 }, { "epoch": 1.2, "learning_rate": 4.2478252647503784e-05, "loss": 0.9511, "step": 1593 }, { "epoch": 1.21, "learning_rate": 4.2473524962178516e-05, "loss": 0.8758, "step": 1594 }, { "epoch": 1.21, "learning_rate": 4.2468797276853255e-05, "loss": 0.8271, "step": 1595 }, { "epoch": 1.21, "learning_rate": 4.246406959152799e-05, "loss": 0.7903, "step": 1596 }, { "epoch": 1.21, "learning_rate": 4.2459341906202726e-05, "loss": 0.9691, "step": 1597 }, { "epoch": 1.21, "learning_rate": 4.245461422087746e-05, "loss": 0.9107, "step": 1598 }, { "epoch": 1.21, "learning_rate": 4.24498865355522e-05, "loss": 0.9051, "step": 1599 }, { "epoch": 1.21, "learning_rate": 4.244515885022693e-05, "loss": 0.8284, "step": 1600 }, { "epoch": 1.21, "learning_rate": 4.244043116490167e-05, "loss": 0.9758, "step": 1601 }, { "epoch": 1.21, "learning_rate": 4.24357034795764e-05, "loss": 0.7196, "step": 1602 }, { "epoch": 1.21, "learning_rate": 4.243097579425114e-05, "loss": 0.9545, "step": 1603 }, { "epoch": 1.21, "learning_rate": 4.242624810892587e-05, "loss": 0.9131, "step": 1604 }, { "epoch": 1.21, "learning_rate": 4.242152042360061e-05, "loss": 0.7639, "step": 1605 }, { "epoch": 1.21, "learning_rate": 4.241679273827534e-05, "loss": 0.7627, "step": 1606 }, { "epoch": 1.22, "learning_rate": 4.241206505295008e-05, "loss": 0.8159, "step": 1607 }, { "epoch": 1.22, "learning_rate": 4.240733736762481e-05, "loss": 0.9316, "step": 1608 }, { "epoch": 1.22, "learning_rate": 4.240260968229955e-05, "loss": 0.848, "step": 1609 }, { "epoch": 1.22, "learning_rate": 4.239788199697428e-05, "loss": 0.8091, "step": 1610 }, { "epoch": 1.22, "learning_rate": 4.239315431164902e-05, "loss": 0.9495, "step": 1611 }, { "epoch": 1.22, "learning_rate": 4.2388426626323754e-05, "loss": 0.8154, "step": 1612 }, { "epoch": 1.22, "learning_rate": 4.238369894099849e-05, "loss": 0.9062, "step": 1613 }, { "epoch": 1.22, "learning_rate": 4.2378971255673225e-05, "loss": 0.9265, "step": 1614 }, { "epoch": 1.22, "learning_rate": 4.237424357034796e-05, "loss": 0.954, "step": 1615 }, { "epoch": 1.22, "learning_rate": 4.236951588502269e-05, "loss": 0.9568, "step": 1616 }, { "epoch": 1.22, "learning_rate": 4.236478819969743e-05, "loss": 0.9024, "step": 1617 }, { "epoch": 1.22, "learning_rate": 4.236006051437216e-05, "loss": 0.8591, "step": 1618 }, { "epoch": 1.22, "learning_rate": 4.23553328290469e-05, "loss": 0.9678, "step": 1619 }, { "epoch": 1.23, "learning_rate": 4.235060514372163e-05, "loss": 0.8106, "step": 1620 }, { "epoch": 1.23, "learning_rate": 4.234587745839637e-05, "loss": 0.8483, "step": 1621 }, { "epoch": 1.23, "learning_rate": 4.23411497730711e-05, "loss": 0.8708, "step": 1622 }, { "epoch": 1.23, "learning_rate": 4.233642208774584e-05, "loss": 0.7964, "step": 1623 }, { "epoch": 1.23, "learning_rate": 4.233169440242057e-05, "loss": 0.8709, "step": 1624 }, { "epoch": 1.23, "learning_rate": 4.232696671709531e-05, "loss": 0.8516, "step": 1625 }, { "epoch": 1.23, "learning_rate": 4.232223903177005e-05, "loss": 0.9645, "step": 1626 }, { "epoch": 1.23, "learning_rate": 4.231751134644478e-05, "loss": 0.815, "step": 1627 }, { "epoch": 1.23, "learning_rate": 4.231278366111952e-05, "loss": 0.8976, "step": 1628 }, { "epoch": 1.23, "learning_rate": 4.2308055975794253e-05, "loss": 0.9141, "step": 1629 }, { "epoch": 1.23, "learning_rate": 4.230332829046899e-05, "loss": 0.7924, "step": 1630 }, { "epoch": 1.23, "learning_rate": 4.2298600605143724e-05, "loss": 0.8386, "step": 1631 }, { "epoch": 1.23, "learning_rate": 4.229387291981846e-05, "loss": 1.0109, "step": 1632 }, { "epoch": 1.24, "learning_rate": 4.2289145234493195e-05, "loss": 0.8562, "step": 1633 }, { "epoch": 1.24, "learning_rate": 4.2284417549167934e-05, "loss": 0.729, "step": 1634 }, { "epoch": 1.24, "learning_rate": 4.2279689863842666e-05, "loss": 0.8945, "step": 1635 }, { "epoch": 1.24, "learning_rate": 4.2274962178517405e-05, "loss": 0.8084, "step": 1636 }, { "epoch": 1.24, "learning_rate": 4.227023449319214e-05, "loss": 0.826, "step": 1637 }, { "epoch": 1.24, "learning_rate": 4.2265506807866876e-05, "loss": 0.9356, "step": 1638 }, { "epoch": 1.24, "learning_rate": 4.226077912254161e-05, "loss": 0.8225, "step": 1639 }, { "epoch": 1.24, "learning_rate": 4.225605143721634e-05, "loss": 0.7311, "step": 1640 }, { "epoch": 1.24, "learning_rate": 4.225132375189108e-05, "loss": 0.9405, "step": 1641 }, { "epoch": 1.24, "learning_rate": 4.224659606656581e-05, "loss": 0.8766, "step": 1642 }, { "epoch": 1.24, "learning_rate": 4.224186838124054e-05, "loss": 0.788, "step": 1643 }, { "epoch": 1.24, "learning_rate": 4.223714069591528e-05, "loss": 0.9193, "step": 1644 }, { "epoch": 1.24, "learning_rate": 4.2232413010590014e-05, "loss": 0.9221, "step": 1645 }, { "epoch": 1.24, "learning_rate": 4.222768532526475e-05, "loss": 0.7396, "step": 1646 }, { "epoch": 1.25, "learning_rate": 4.2222957639939485e-05, "loss": 0.9786, "step": 1647 }, { "epoch": 1.25, "learning_rate": 4.2218229954614224e-05, "loss": 0.8411, "step": 1648 }, { "epoch": 1.25, "learning_rate": 4.2213502269288956e-05, "loss": 0.8706, "step": 1649 }, { "epoch": 1.25, "learning_rate": 4.2208774583963694e-05, "loss": 0.9231, "step": 1650 }, { "epoch": 1.25, "learning_rate": 4.2204046898638426e-05, "loss": 1.0235, "step": 1651 }, { "epoch": 1.25, "learning_rate": 4.2199319213313165e-05, "loss": 0.9003, "step": 1652 }, { "epoch": 1.25, "learning_rate": 4.21945915279879e-05, "loss": 0.8166, "step": 1653 }, { "epoch": 1.25, "learning_rate": 4.2189863842662636e-05, "loss": 0.9454, "step": 1654 }, { "epoch": 1.25, "learning_rate": 4.218513615733737e-05, "loss": 0.9311, "step": 1655 }, { "epoch": 1.25, "learning_rate": 4.218040847201211e-05, "loss": 0.8229, "step": 1656 }, { "epoch": 1.25, "learning_rate": 4.217568078668684e-05, "loss": 0.8721, "step": 1657 }, { "epoch": 1.25, "learning_rate": 4.217095310136158e-05, "loss": 0.7638, "step": 1658 }, { "epoch": 1.25, "learning_rate": 4.216622541603631e-05, "loss": 0.6939, "step": 1659 }, { "epoch": 1.26, "learning_rate": 4.216149773071105e-05, "loss": 0.777, "step": 1660 }, { "epoch": 1.26, "learning_rate": 4.215677004538578e-05, "loss": 0.8443, "step": 1661 }, { "epoch": 1.26, "learning_rate": 4.215204236006052e-05, "loss": 0.7513, "step": 1662 }, { "epoch": 1.26, "learning_rate": 4.214731467473525e-05, "loss": 0.8369, "step": 1663 }, { "epoch": 1.26, "learning_rate": 4.214258698940999e-05, "loss": 0.9308, "step": 1664 }, { "epoch": 1.26, "learning_rate": 4.213785930408472e-05, "loss": 0.8457, "step": 1665 }, { "epoch": 1.26, "learning_rate": 4.213313161875946e-05, "loss": 0.8494, "step": 1666 }, { "epoch": 1.26, "learning_rate": 4.2128403933434194e-05, "loss": 0.9063, "step": 1667 }, { "epoch": 1.26, "learning_rate": 4.2123676248108926e-05, "loss": 0.8056, "step": 1668 }, { "epoch": 1.26, "learning_rate": 4.211894856278366e-05, "loss": 0.9441, "step": 1669 }, { "epoch": 1.26, "learning_rate": 4.2114220877458397e-05, "loss": 0.887, "step": 1670 }, { "epoch": 1.26, "learning_rate": 4.210949319213313e-05, "loss": 0.9082, "step": 1671 }, { "epoch": 1.26, "learning_rate": 4.210476550680787e-05, "loss": 0.783, "step": 1672 }, { "epoch": 1.27, "learning_rate": 4.21000378214826e-05, "loss": 0.7935, "step": 1673 }, { "epoch": 1.27, "learning_rate": 4.209531013615734e-05, "loss": 0.8344, "step": 1674 }, { "epoch": 1.27, "learning_rate": 4.209058245083207e-05, "loss": 1.0467, "step": 1675 }, { "epoch": 1.27, "learning_rate": 4.208585476550681e-05, "loss": 0.9096, "step": 1676 }, { "epoch": 1.27, "learning_rate": 4.208112708018154e-05, "loss": 0.9086, "step": 1677 }, { "epoch": 1.27, "learning_rate": 4.207639939485628e-05, "loss": 0.9431, "step": 1678 }, { "epoch": 1.27, "learning_rate": 4.207167170953101e-05, "loss": 0.9967, "step": 1679 }, { "epoch": 1.27, "learning_rate": 4.206694402420575e-05, "loss": 1.0193, "step": 1680 }, { "epoch": 1.27, "learning_rate": 4.206221633888048e-05, "loss": 0.8848, "step": 1681 }, { "epoch": 1.27, "learning_rate": 4.205748865355522e-05, "loss": 0.7985, "step": 1682 }, { "epoch": 1.27, "learning_rate": 4.2052760968229954e-05, "loss": 0.8762, "step": 1683 }, { "epoch": 1.27, "learning_rate": 4.204803328290469e-05, "loss": 0.9228, "step": 1684 }, { "epoch": 1.27, "learning_rate": 4.204330559757943e-05, "loss": 0.8014, "step": 1685 }, { "epoch": 1.28, "learning_rate": 4.2038577912254164e-05, "loss": 0.9721, "step": 1686 }, { "epoch": 1.28, "learning_rate": 4.20338502269289e-05, "loss": 0.8018, "step": 1687 }, { "epoch": 1.28, "learning_rate": 4.2029122541603635e-05, "loss": 0.9443, "step": 1688 }, { "epoch": 1.28, "learning_rate": 4.2024394856278373e-05, "loss": 0.9255, "step": 1689 }, { "epoch": 1.28, "learning_rate": 4.2019667170953106e-05, "loss": 0.8462, "step": 1690 }, { "epoch": 1.28, "learning_rate": 4.2014939485627844e-05, "loss": 0.8646, "step": 1691 }, { "epoch": 1.28, "learning_rate": 4.2010211800302576e-05, "loss": 0.7905, "step": 1692 }, { "epoch": 1.28, "learning_rate": 4.200548411497731e-05, "loss": 0.7606, "step": 1693 }, { "epoch": 1.28, "learning_rate": 4.200075642965205e-05, "loss": 0.9109, "step": 1694 }, { "epoch": 1.28, "learning_rate": 4.199602874432678e-05, "loss": 0.8445, "step": 1695 }, { "epoch": 1.28, "learning_rate": 4.199130105900151e-05, "loss": 0.8244, "step": 1696 }, { "epoch": 1.28, "learning_rate": 4.198657337367625e-05, "loss": 0.9446, "step": 1697 }, { "epoch": 1.28, "learning_rate": 4.198184568835098e-05, "loss": 0.9808, "step": 1698 }, { "epoch": 1.28, "learning_rate": 4.197711800302572e-05, "loss": 0.7856, "step": 1699 }, { "epoch": 1.29, "learning_rate": 4.197239031770045e-05, "loss": 0.8073, "step": 1700 }, { "epoch": 1.29, "learning_rate": 4.196766263237519e-05, "loss": 1.0676, "step": 1701 }, { "epoch": 1.29, "learning_rate": 4.1962934947049924e-05, "loss": 0.9421, "step": 1702 }, { "epoch": 1.29, "learning_rate": 4.195820726172466e-05, "loss": 0.9531, "step": 1703 }, { "epoch": 1.29, "learning_rate": 4.1953479576399395e-05, "loss": 0.8569, "step": 1704 }, { "epoch": 1.29, "learning_rate": 4.1948751891074134e-05, "loss": 0.9385, "step": 1705 }, { "epoch": 1.29, "learning_rate": 4.1944024205748866e-05, "loss": 0.9515, "step": 1706 }, { "epoch": 1.29, "learning_rate": 4.1939296520423605e-05, "loss": 0.7696, "step": 1707 }, { "epoch": 1.29, "learning_rate": 4.193456883509834e-05, "loss": 0.8316, "step": 1708 }, { "epoch": 1.29, "learning_rate": 4.1929841149773076e-05, "loss": 0.8938, "step": 1709 }, { "epoch": 1.29, "learning_rate": 4.192511346444781e-05, "loss": 0.8477, "step": 1710 }, { "epoch": 1.29, "learning_rate": 4.1920385779122547e-05, "loss": 0.8863, "step": 1711 }, { "epoch": 1.29, "learning_rate": 4.191565809379728e-05, "loss": 0.7614, "step": 1712 }, { "epoch": 1.3, "learning_rate": 4.191093040847202e-05, "loss": 0.9538, "step": 1713 }, { "epoch": 1.3, "learning_rate": 4.190620272314675e-05, "loss": 0.8395, "step": 1714 }, { "epoch": 1.3, "learning_rate": 4.190147503782149e-05, "loss": 0.7395, "step": 1715 }, { "epoch": 1.3, "learning_rate": 4.189674735249622e-05, "loss": 0.8881, "step": 1716 }, { "epoch": 1.3, "learning_rate": 4.189201966717096e-05, "loss": 0.8481, "step": 1717 }, { "epoch": 1.3, "learning_rate": 4.188729198184569e-05, "loss": 0.9458, "step": 1718 }, { "epoch": 1.3, "learning_rate": 4.188256429652043e-05, "loss": 0.889, "step": 1719 }, { "epoch": 1.3, "learning_rate": 4.187783661119516e-05, "loss": 0.8094, "step": 1720 }, { "epoch": 1.3, "learning_rate": 4.1873108925869894e-05, "loss": 0.8775, "step": 1721 }, { "epoch": 1.3, "learning_rate": 4.1868381240544626e-05, "loss": 0.8589, "step": 1722 }, { "epoch": 1.3, "learning_rate": 4.1863653555219365e-05, "loss": 0.7877, "step": 1723 }, { "epoch": 1.3, "learning_rate": 4.18589258698941e-05, "loss": 0.876, "step": 1724 }, { "epoch": 1.3, "learning_rate": 4.1854198184568836e-05, "loss": 0.7768, "step": 1725 }, { "epoch": 1.31, "learning_rate": 4.184947049924357e-05, "loss": 0.9494, "step": 1726 }, { "epoch": 1.31, "learning_rate": 4.184474281391831e-05, "loss": 0.8285, "step": 1727 }, { "epoch": 1.31, "learning_rate": 4.184001512859304e-05, "loss": 0.9739, "step": 1728 }, { "epoch": 1.31, "learning_rate": 4.183528744326778e-05, "loss": 0.8771, "step": 1729 }, { "epoch": 1.31, "learning_rate": 4.183055975794251e-05, "loss": 0.88, "step": 1730 }, { "epoch": 1.31, "learning_rate": 4.182583207261725e-05, "loss": 0.8839, "step": 1731 }, { "epoch": 1.31, "learning_rate": 4.182110438729198e-05, "loss": 0.7774, "step": 1732 }, { "epoch": 1.31, "learning_rate": 4.181637670196672e-05, "loss": 0.8566, "step": 1733 }, { "epoch": 1.31, "learning_rate": 4.181164901664145e-05, "loss": 0.9113, "step": 1734 }, { "epoch": 1.31, "learning_rate": 4.180692133131619e-05, "loss": 0.9548, "step": 1735 }, { "epoch": 1.31, "learning_rate": 4.180219364599092e-05, "loss": 0.8681, "step": 1736 }, { "epoch": 1.31, "learning_rate": 4.179746596066566e-05, "loss": 0.8832, "step": 1737 }, { "epoch": 1.31, "learning_rate": 4.1792738275340393e-05, "loss": 0.8257, "step": 1738 }, { "epoch": 1.32, "learning_rate": 4.178801059001513e-05, "loss": 1.0088, "step": 1739 }, { "epoch": 1.32, "learning_rate": 4.1783282904689864e-05, "loss": 0.8771, "step": 1740 }, { "epoch": 1.32, "learning_rate": 4.17785552193646e-05, "loss": 0.8337, "step": 1741 }, { "epoch": 1.32, "learning_rate": 4.1773827534039335e-05, "loss": 0.8956, "step": 1742 }, { "epoch": 1.32, "learning_rate": 4.1769099848714074e-05, "loss": 0.7947, "step": 1743 }, { "epoch": 1.32, "learning_rate": 4.1764372163388806e-05, "loss": 0.8388, "step": 1744 }, { "epoch": 1.32, "learning_rate": 4.1759644478063545e-05, "loss": 0.8279, "step": 1745 }, { "epoch": 1.32, "learning_rate": 4.175491679273828e-05, "loss": 0.7525, "step": 1746 }, { "epoch": 1.32, "learning_rate": 4.1750189107413016e-05, "loss": 0.8997, "step": 1747 }, { "epoch": 1.32, "learning_rate": 4.174546142208775e-05, "loss": 0.8499, "step": 1748 }, { "epoch": 1.32, "learning_rate": 4.174073373676248e-05, "loss": 0.9435, "step": 1749 }, { "epoch": 1.32, "learning_rate": 4.173600605143722e-05, "loss": 0.9114, "step": 1750 }, { "epoch": 1.32, "learning_rate": 4.173127836611195e-05, "loss": 1.0919, "step": 1751 }, { "epoch": 1.33, "learning_rate": 4.172655068078669e-05, "loss": 0.7866, "step": 1752 }, { "epoch": 1.33, "learning_rate": 4.172182299546142e-05, "loss": 0.8788, "step": 1753 }, { "epoch": 1.33, "learning_rate": 4.171709531013616e-05, "loss": 0.7924, "step": 1754 }, { "epoch": 1.33, "learning_rate": 4.171236762481089e-05, "loss": 0.8005, "step": 1755 }, { "epoch": 1.33, "learning_rate": 4.170763993948563e-05, "loss": 0.9331, "step": 1756 }, { "epoch": 1.33, "learning_rate": 4.1702912254160364e-05, "loss": 0.8057, "step": 1757 }, { "epoch": 1.33, "learning_rate": 4.16981845688351e-05, "loss": 0.8805, "step": 1758 }, { "epoch": 1.33, "learning_rate": 4.1693456883509834e-05, "loss": 0.9478, "step": 1759 }, { "epoch": 1.33, "learning_rate": 4.168872919818457e-05, "loss": 0.9375, "step": 1760 }, { "epoch": 1.33, "learning_rate": 4.1684001512859305e-05, "loss": 0.9896, "step": 1761 }, { "epoch": 1.33, "learning_rate": 4.1679273827534044e-05, "loss": 0.8581, "step": 1762 }, { "epoch": 1.33, "learning_rate": 4.1674546142208776e-05, "loss": 0.7759, "step": 1763 }, { "epoch": 1.33, "learning_rate": 4.1669818456883515e-05, "loss": 0.9268, "step": 1764 }, { "epoch": 1.33, "learning_rate": 4.166509077155825e-05, "loss": 0.7976, "step": 1765 }, { "epoch": 1.34, "learning_rate": 4.1660363086232986e-05, "loss": 0.9293, "step": 1766 }, { "epoch": 1.34, "learning_rate": 4.165563540090772e-05, "loss": 0.9192, "step": 1767 }, { "epoch": 1.34, "learning_rate": 4.165090771558246e-05, "loss": 0.671, "step": 1768 }, { "epoch": 1.34, "learning_rate": 4.164618003025719e-05, "loss": 0.8176, "step": 1769 }, { "epoch": 1.34, "learning_rate": 4.164145234493193e-05, "loss": 0.9001, "step": 1770 }, { "epoch": 1.34, "learning_rate": 4.163672465960666e-05, "loss": 0.9391, "step": 1771 }, { "epoch": 1.34, "learning_rate": 4.16319969742814e-05, "loss": 0.789, "step": 1772 }, { "epoch": 1.34, "learning_rate": 4.162726928895613e-05, "loss": 0.8756, "step": 1773 }, { "epoch": 1.34, "learning_rate": 4.162254160363086e-05, "loss": 0.8055, "step": 1774 }, { "epoch": 1.34, "learning_rate": 4.1617813918305595e-05, "loss": 0.7769, "step": 1775 }, { "epoch": 1.34, "learning_rate": 4.1613086232980334e-05, "loss": 0.7872, "step": 1776 }, { "epoch": 1.34, "learning_rate": 4.1608358547655066e-05, "loss": 0.8305, "step": 1777 }, { "epoch": 1.34, "learning_rate": 4.1603630862329805e-05, "loss": 0.9093, "step": 1778 }, { "epoch": 1.35, "learning_rate": 4.159890317700454e-05, "loss": 0.9944, "step": 1779 }, { "epoch": 1.35, "learning_rate": 4.1594175491679275e-05, "loss": 0.8518, "step": 1780 }, { "epoch": 1.35, "learning_rate": 4.158944780635401e-05, "loss": 0.8338, "step": 1781 }, { "epoch": 1.35, "learning_rate": 4.1584720121028746e-05, "loss": 0.8605, "step": 1782 }, { "epoch": 1.35, "learning_rate": 4.157999243570348e-05, "loss": 0.9801, "step": 1783 }, { "epoch": 1.35, "learning_rate": 4.157526475037822e-05, "loss": 0.7776, "step": 1784 }, { "epoch": 1.35, "learning_rate": 4.157053706505295e-05, "loss": 0.8093, "step": 1785 }, { "epoch": 1.35, "learning_rate": 4.156580937972769e-05, "loss": 0.8692, "step": 1786 }, { "epoch": 1.35, "learning_rate": 4.156108169440242e-05, "loss": 0.9492, "step": 1787 }, { "epoch": 1.35, "learning_rate": 4.155635400907716e-05, "loss": 0.8049, "step": 1788 }, { "epoch": 1.35, "learning_rate": 4.155162632375189e-05, "loss": 0.7929, "step": 1789 }, { "epoch": 1.35, "learning_rate": 4.154689863842663e-05, "loss": 1.0079, "step": 1790 }, { "epoch": 1.35, "learning_rate": 4.154217095310136e-05, "loss": 0.9516, "step": 1791 }, { "epoch": 1.36, "learning_rate": 4.15374432677761e-05, "loss": 0.8518, "step": 1792 }, { "epoch": 1.36, "learning_rate": 4.153271558245083e-05, "loss": 0.8686, "step": 1793 }, { "epoch": 1.36, "learning_rate": 4.152798789712557e-05, "loss": 0.8162, "step": 1794 }, { "epoch": 1.36, "learning_rate": 4.1523260211800304e-05, "loss": 0.8069, "step": 1795 }, { "epoch": 1.36, "learning_rate": 4.151853252647504e-05, "loss": 0.9564, "step": 1796 }, { "epoch": 1.36, "learning_rate": 4.1513804841149775e-05, "loss": 0.9981, "step": 1797 }, { "epoch": 1.36, "learning_rate": 4.1509077155824514e-05, "loss": 0.7983, "step": 1798 }, { "epoch": 1.36, "learning_rate": 4.1504349470499246e-05, "loss": 0.9547, "step": 1799 }, { "epoch": 1.36, "learning_rate": 4.149962178517398e-05, "loss": 0.915, "step": 1800 }, { "epoch": 1.36, "learning_rate": 4.1494894099848716e-05, "loss": 0.8171, "step": 1801 }, { "epoch": 1.36, "learning_rate": 4.149016641452345e-05, "loss": 0.9512, "step": 1802 }, { "epoch": 1.36, "learning_rate": 4.148543872919818e-05, "loss": 0.8199, "step": 1803 }, { "epoch": 1.36, "learning_rate": 4.148071104387292e-05, "loss": 0.7785, "step": 1804 }, { "epoch": 1.37, "learning_rate": 4.147598335854766e-05, "loss": 0.8781, "step": 1805 }, { "epoch": 1.37, "learning_rate": 4.147125567322239e-05, "loss": 0.8464, "step": 1806 }, { "epoch": 1.37, "learning_rate": 4.146652798789713e-05, "loss": 0.8284, "step": 1807 }, { "epoch": 1.37, "learning_rate": 4.146180030257186e-05, "loss": 0.9802, "step": 1808 }, { "epoch": 1.37, "learning_rate": 4.14570726172466e-05, "loss": 0.8728, "step": 1809 }, { "epoch": 1.37, "learning_rate": 4.145234493192133e-05, "loss": 0.8611, "step": 1810 }, { "epoch": 1.37, "learning_rate": 4.144761724659607e-05, "loss": 0.981, "step": 1811 }, { "epoch": 1.37, "learning_rate": 4.14428895612708e-05, "loss": 0.7918, "step": 1812 }, { "epoch": 1.37, "learning_rate": 4.143816187594554e-05, "loss": 0.8416, "step": 1813 }, { "epoch": 1.37, "learning_rate": 4.1433434190620274e-05, "loss": 0.9431, "step": 1814 }, { "epoch": 1.37, "learning_rate": 4.142870650529501e-05, "loss": 0.8068, "step": 1815 }, { "epoch": 1.37, "learning_rate": 4.1423978819969745e-05, "loss": 0.8667, "step": 1816 }, { "epoch": 1.37, "learning_rate": 4.1419251134644484e-05, "loss": 1.0549, "step": 1817 }, { "epoch": 1.37, "learning_rate": 4.1414523449319216e-05, "loss": 0.8903, "step": 1818 }, { "epoch": 1.38, "learning_rate": 4.1409795763993955e-05, "loss": 0.8751, "step": 1819 }, { "epoch": 1.38, "learning_rate": 4.1405068078668687e-05, "loss": 0.8569, "step": 1820 }, { "epoch": 1.38, "learning_rate": 4.1400340393343425e-05, "loss": 0.832, "step": 1821 }, { "epoch": 1.38, "learning_rate": 4.139561270801816e-05, "loss": 0.8446, "step": 1822 }, { "epoch": 1.38, "learning_rate": 4.1390885022692896e-05, "loss": 0.8825, "step": 1823 }, { "epoch": 1.38, "learning_rate": 4.138615733736763e-05, "loss": 0.883, "step": 1824 }, { "epoch": 1.38, "learning_rate": 4.138142965204237e-05, "loss": 0.9703, "step": 1825 }, { "epoch": 1.38, "learning_rate": 4.13767019667171e-05, "loss": 0.8655, "step": 1826 }, { "epoch": 1.38, "learning_rate": 4.137197428139183e-05, "loss": 0.8109, "step": 1827 }, { "epoch": 1.38, "learning_rate": 4.1367246596066563e-05, "loss": 0.7901, "step": 1828 }, { "epoch": 1.38, "learning_rate": 4.13625189107413e-05, "loss": 0.8347, "step": 1829 }, { "epoch": 1.38, "learning_rate": 4.1357791225416034e-05, "loss": 0.9142, "step": 1830 }, { "epoch": 1.38, "learning_rate": 4.135306354009077e-05, "loss": 0.841, "step": 1831 }, { "epoch": 1.39, "learning_rate": 4.1348335854765505e-05, "loss": 0.9231, "step": 1832 }, { "epoch": 1.39, "learning_rate": 4.1343608169440244e-05, "loss": 0.8984, "step": 1833 }, { "epoch": 1.39, "learning_rate": 4.1338880484114976e-05, "loss": 0.9682, "step": 1834 }, { "epoch": 1.39, "learning_rate": 4.1334152798789715e-05, "loss": 0.7914, "step": 1835 }, { "epoch": 1.39, "learning_rate": 4.132942511346445e-05, "loss": 0.842, "step": 1836 }, { "epoch": 1.39, "learning_rate": 4.1324697428139186e-05, "loss": 0.8639, "step": 1837 }, { "epoch": 1.39, "learning_rate": 4.131996974281392e-05, "loss": 1.0008, "step": 1838 }, { "epoch": 1.39, "learning_rate": 4.131524205748866e-05, "loss": 0.8679, "step": 1839 }, { "epoch": 1.39, "learning_rate": 4.131051437216339e-05, "loss": 0.8558, "step": 1840 }, { "epoch": 1.39, "learning_rate": 4.130578668683813e-05, "loss": 0.8417, "step": 1841 }, { "epoch": 1.39, "learning_rate": 4.130105900151286e-05, "loss": 0.8867, "step": 1842 }, { "epoch": 1.39, "learning_rate": 4.12963313161876e-05, "loss": 0.8845, "step": 1843 }, { "epoch": 1.39, "learning_rate": 4.129160363086233e-05, "loss": 0.872, "step": 1844 }, { "epoch": 1.4, "learning_rate": 4.128687594553707e-05, "loss": 0.8287, "step": 1845 }, { "epoch": 1.4, "learning_rate": 4.12821482602118e-05, "loss": 0.8556, "step": 1846 }, { "epoch": 1.4, "learning_rate": 4.127742057488654e-05, "loss": 0.9239, "step": 1847 }, { "epoch": 1.4, "learning_rate": 4.127269288956127e-05, "loss": 0.8612, "step": 1848 }, { "epoch": 1.4, "learning_rate": 4.126796520423601e-05, "loss": 0.8538, "step": 1849 }, { "epoch": 1.4, "learning_rate": 4.126323751891074e-05, "loss": 0.9497, "step": 1850 }, { "epoch": 1.4, "learning_rate": 4.125850983358548e-05, "loss": 0.8242, "step": 1851 }, { "epoch": 1.4, "learning_rate": 4.1253782148260214e-05, "loss": 0.8888, "step": 1852 }, { "epoch": 1.4, "learning_rate": 4.1249054462934946e-05, "loss": 0.9758, "step": 1853 }, { "epoch": 1.4, "learning_rate": 4.1244326777609685e-05, "loss": 0.8584, "step": 1854 }, { "epoch": 1.4, "learning_rate": 4.123959909228442e-05, "loss": 0.8553, "step": 1855 }, { "epoch": 1.4, "learning_rate": 4.123487140695915e-05, "loss": 0.913, "step": 1856 }, { "epoch": 1.4, "learning_rate": 4.123014372163389e-05, "loss": 0.9715, "step": 1857 }, { "epoch": 1.41, "learning_rate": 4.122541603630862e-05, "loss": 0.9554, "step": 1858 }, { "epoch": 1.41, "learning_rate": 4.122068835098336e-05, "loss": 0.9292, "step": 1859 }, { "epoch": 1.41, "learning_rate": 4.121596066565809e-05, "loss": 0.8513, "step": 1860 }, { "epoch": 1.41, "learning_rate": 4.121123298033283e-05, "loss": 0.861, "step": 1861 }, { "epoch": 1.41, "learning_rate": 4.120650529500756e-05, "loss": 0.8856, "step": 1862 }, { "epoch": 1.41, "learning_rate": 4.12017776096823e-05, "loss": 0.9274, "step": 1863 }, { "epoch": 1.41, "learning_rate": 4.119704992435703e-05, "loss": 0.9182, "step": 1864 }, { "epoch": 1.41, "learning_rate": 4.119232223903177e-05, "loss": 0.8619, "step": 1865 }, { "epoch": 1.41, "learning_rate": 4.118759455370651e-05, "loss": 0.7559, "step": 1866 }, { "epoch": 1.41, "learning_rate": 4.118286686838124e-05, "loss": 0.8942, "step": 1867 }, { "epoch": 1.41, "learning_rate": 4.117813918305598e-05, "loss": 0.9301, "step": 1868 }, { "epoch": 1.41, "learning_rate": 4.117341149773071e-05, "loss": 0.8775, "step": 1869 }, { "epoch": 1.41, "learning_rate": 4.116868381240545e-05, "loss": 0.8295, "step": 1870 }, { "epoch": 1.42, "learning_rate": 4.1163956127080184e-05, "loss": 0.9126, "step": 1871 }, { "epoch": 1.42, "learning_rate": 4.115922844175492e-05, "loss": 0.9221, "step": 1872 }, { "epoch": 1.42, "learning_rate": 4.1154500756429655e-05, "loss": 0.9363, "step": 1873 }, { "epoch": 1.42, "learning_rate": 4.1149773071104394e-05, "loss": 0.8388, "step": 1874 }, { "epoch": 1.42, "learning_rate": 4.1145045385779126e-05, "loss": 0.8394, "step": 1875 }, { "epoch": 1.42, "learning_rate": 4.1140317700453865e-05, "loss": 0.801, "step": 1876 }, { "epoch": 1.42, "learning_rate": 4.11355900151286e-05, "loss": 0.9158, "step": 1877 }, { "epoch": 1.42, "learning_rate": 4.1130862329803336e-05, "loss": 0.9618, "step": 1878 }, { "epoch": 1.42, "learning_rate": 4.112613464447807e-05, "loss": 0.8671, "step": 1879 }, { "epoch": 1.42, "learning_rate": 4.11214069591528e-05, "loss": 0.9781, "step": 1880 }, { "epoch": 1.42, "learning_rate": 4.111667927382753e-05, "loss": 0.946, "step": 1881 }, { "epoch": 1.42, "learning_rate": 4.111195158850227e-05, "loss": 1.0592, "step": 1882 }, { "epoch": 1.42, "learning_rate": 4.1107223903177e-05, "loss": 0.7477, "step": 1883 }, { "epoch": 1.42, "learning_rate": 4.110249621785174e-05, "loss": 0.8558, "step": 1884 }, { "epoch": 1.43, "learning_rate": 4.1097768532526474e-05, "loss": 0.866, "step": 1885 }, { "epoch": 1.43, "learning_rate": 4.109304084720121e-05, "loss": 0.9976, "step": 1886 }, { "epoch": 1.43, "learning_rate": 4.1088313161875945e-05, "loss": 0.749, "step": 1887 }, { "epoch": 1.43, "learning_rate": 4.1083585476550683e-05, "loss": 0.8294, "step": 1888 }, { "epoch": 1.43, "learning_rate": 4.1078857791225416e-05, "loss": 0.85, "step": 1889 }, { "epoch": 1.43, "learning_rate": 4.1074130105900154e-05, "loss": 0.8707, "step": 1890 }, { "epoch": 1.43, "learning_rate": 4.1069402420574886e-05, "loss": 0.799, "step": 1891 }, { "epoch": 1.43, "learning_rate": 4.1064674735249625e-05, "loss": 0.8383, "step": 1892 }, { "epoch": 1.43, "learning_rate": 4.105994704992436e-05, "loss": 0.9088, "step": 1893 }, { "epoch": 1.43, "learning_rate": 4.1055219364599096e-05, "loss": 0.8848, "step": 1894 }, { "epoch": 1.43, "learning_rate": 4.105049167927383e-05, "loss": 0.8608, "step": 1895 }, { "epoch": 1.43, "learning_rate": 4.104576399394857e-05, "loss": 0.854, "step": 1896 }, { "epoch": 1.43, "learning_rate": 4.10410363086233e-05, "loss": 0.8757, "step": 1897 }, { "epoch": 1.44, "learning_rate": 4.103630862329804e-05, "loss": 0.7727, "step": 1898 }, { "epoch": 1.44, "learning_rate": 4.103158093797277e-05, "loss": 0.7965, "step": 1899 }, { "epoch": 1.44, "learning_rate": 4.102685325264751e-05, "loss": 0.8643, "step": 1900 }, { "epoch": 1.44, "learning_rate": 4.102212556732224e-05, "loss": 0.9119, "step": 1901 }, { "epoch": 1.44, "learning_rate": 4.101739788199698e-05, "loss": 0.9681, "step": 1902 }, { "epoch": 1.44, "learning_rate": 4.101267019667171e-05, "loss": 0.8877, "step": 1903 }, { "epoch": 1.44, "learning_rate": 4.100794251134645e-05, "loss": 0.8928, "step": 1904 }, { "epoch": 1.44, "learning_rate": 4.100321482602118e-05, "loss": 0.937, "step": 1905 }, { "epoch": 1.44, "learning_rate": 4.0998487140695915e-05, "loss": 0.8523, "step": 1906 }, { "epoch": 1.44, "learning_rate": 4.0993759455370654e-05, "loss": 0.8269, "step": 1907 }, { "epoch": 1.44, "learning_rate": 4.0989031770045386e-05, "loss": 0.9149, "step": 1908 }, { "epoch": 1.44, "learning_rate": 4.098430408472012e-05, "loss": 0.9423, "step": 1909 }, { "epoch": 1.44, "learning_rate": 4.0979576399394857e-05, "loss": 0.9137, "step": 1910 }, { "epoch": 1.45, "learning_rate": 4.097484871406959e-05, "loss": 1.0429, "step": 1911 }, { "epoch": 1.45, "learning_rate": 4.097012102874433e-05, "loss": 0.7896, "step": 1912 }, { "epoch": 1.45, "learning_rate": 4.096539334341906e-05, "loss": 0.8795, "step": 1913 }, { "epoch": 1.45, "learning_rate": 4.09606656580938e-05, "loss": 0.7256, "step": 1914 }, { "epoch": 1.45, "learning_rate": 4.095593797276853e-05, "loss": 0.7359, "step": 1915 }, { "epoch": 1.45, "learning_rate": 4.095121028744327e-05, "loss": 0.9324, "step": 1916 }, { "epoch": 1.45, "learning_rate": 4.0946482602118e-05, "loss": 0.8182, "step": 1917 }, { "epoch": 1.45, "learning_rate": 4.094175491679274e-05, "loss": 0.8664, "step": 1918 }, { "epoch": 1.45, "learning_rate": 4.093702723146747e-05, "loss": 0.9025, "step": 1919 }, { "epoch": 1.45, "learning_rate": 4.093229954614221e-05, "loss": 0.7802, "step": 1920 }, { "epoch": 1.45, "learning_rate": 4.092757186081694e-05, "loss": 0.8502, "step": 1921 }, { "epoch": 1.45, "learning_rate": 4.092284417549168e-05, "loss": 0.9403, "step": 1922 }, { "epoch": 1.45, "learning_rate": 4.0918116490166414e-05, "loss": 0.95, "step": 1923 }, { "epoch": 1.46, "learning_rate": 4.091338880484115e-05, "loss": 0.7992, "step": 1924 }, { "epoch": 1.46, "learning_rate": 4.090866111951589e-05, "loss": 0.7147, "step": 1925 }, { "epoch": 1.46, "learning_rate": 4.0903933434190624e-05, "loss": 0.9009, "step": 1926 }, { "epoch": 1.46, "learning_rate": 4.089920574886536e-05, "loss": 0.8046, "step": 1927 }, { "epoch": 1.46, "learning_rate": 4.0894478063540095e-05, "loss": 0.8512, "step": 1928 }, { "epoch": 1.46, "learning_rate": 4.0889750378214833e-05, "loss": 0.8408, "step": 1929 }, { "epoch": 1.46, "learning_rate": 4.0885022692889565e-05, "loss": 0.8978, "step": 1930 }, { "epoch": 1.46, "learning_rate": 4.0880295007564304e-05, "loss": 0.7724, "step": 1931 }, { "epoch": 1.46, "learning_rate": 4.0875567322239036e-05, "loss": 0.8847, "step": 1932 }, { "epoch": 1.46, "learning_rate": 4.087083963691377e-05, "loss": 0.8122, "step": 1933 }, { "epoch": 1.46, "learning_rate": 4.08661119515885e-05, "loss": 0.8767, "step": 1934 }, { "epoch": 1.46, "learning_rate": 4.086138426626324e-05, "loss": 0.752, "step": 1935 }, { "epoch": 1.46, "learning_rate": 4.085665658093797e-05, "loss": 1.0225, "step": 1936 }, { "epoch": 1.46, "learning_rate": 4.085192889561271e-05, "loss": 0.8419, "step": 1937 }, { "epoch": 1.47, "learning_rate": 4.084720121028744e-05, "loss": 0.9642, "step": 1938 }, { "epoch": 1.47, "learning_rate": 4.084247352496218e-05, "loss": 0.7735, "step": 1939 }, { "epoch": 1.47, "learning_rate": 4.083774583963691e-05, "loss": 0.876, "step": 1940 }, { "epoch": 1.47, "learning_rate": 4.083301815431165e-05, "loss": 0.9292, "step": 1941 }, { "epoch": 1.47, "learning_rate": 4.0828290468986384e-05, "loss": 0.825, "step": 1942 }, { "epoch": 1.47, "learning_rate": 4.082356278366112e-05, "loss": 0.8153, "step": 1943 }, { "epoch": 1.47, "learning_rate": 4.0818835098335855e-05, "loss": 0.92, "step": 1944 }, { "epoch": 1.47, "learning_rate": 4.0814107413010594e-05, "loss": 0.8701, "step": 1945 }, { "epoch": 1.47, "learning_rate": 4.0809379727685326e-05, "loss": 0.881, "step": 1946 }, { "epoch": 1.47, "learning_rate": 4.0804652042360065e-05, "loss": 0.9003, "step": 1947 }, { "epoch": 1.47, "learning_rate": 4.07999243570348e-05, "loss": 0.9212, "step": 1948 }, { "epoch": 1.47, "learning_rate": 4.0795196671709536e-05, "loss": 0.8815, "step": 1949 }, { "epoch": 1.47, "learning_rate": 4.079046898638427e-05, "loss": 0.9883, "step": 1950 }, { "epoch": 1.48, "learning_rate": 4.0785741301059006e-05, "loss": 0.9339, "step": 1951 }, { "epoch": 1.48, "learning_rate": 4.078101361573374e-05, "loss": 0.8815, "step": 1952 }, { "epoch": 1.48, "learning_rate": 4.077628593040848e-05, "loss": 0.9142, "step": 1953 }, { "epoch": 1.48, "learning_rate": 4.077155824508321e-05, "loss": 0.7895, "step": 1954 }, { "epoch": 1.48, "learning_rate": 4.076683055975795e-05, "loss": 0.8466, "step": 1955 }, { "epoch": 1.48, "learning_rate": 4.076210287443268e-05, "loss": 0.8297, "step": 1956 }, { "epoch": 1.48, "learning_rate": 4.075737518910742e-05, "loss": 0.8639, "step": 1957 }, { "epoch": 1.48, "learning_rate": 4.075264750378215e-05, "loss": 0.8963, "step": 1958 }, { "epoch": 1.48, "learning_rate": 4.074791981845688e-05, "loss": 0.8327, "step": 1959 }, { "epoch": 1.48, "learning_rate": 4.074319213313162e-05, "loss": 0.6733, "step": 1960 }, { "epoch": 1.48, "learning_rate": 4.0738464447806354e-05, "loss": 0.9623, "step": 1961 }, { "epoch": 1.48, "learning_rate": 4.0733736762481086e-05, "loss": 0.9113, "step": 1962 }, { "epoch": 1.48, "learning_rate": 4.0729009077155825e-05, "loss": 0.8064, "step": 1963 }, { "epoch": 1.49, "learning_rate": 4.072428139183056e-05, "loss": 0.8044, "step": 1964 }, { "epoch": 1.49, "learning_rate": 4.0719553706505296e-05, "loss": 0.7314, "step": 1965 }, { "epoch": 1.49, "learning_rate": 4.071482602118003e-05, "loss": 0.853, "step": 1966 }, { "epoch": 1.49, "learning_rate": 4.071009833585477e-05, "loss": 0.8627, "step": 1967 }, { "epoch": 1.49, "learning_rate": 4.07053706505295e-05, "loss": 0.8834, "step": 1968 }, { "epoch": 1.49, "learning_rate": 4.070064296520424e-05, "loss": 0.8495, "step": 1969 }, { "epoch": 1.49, "learning_rate": 4.069591527987897e-05, "loss": 0.8901, "step": 1970 }, { "epoch": 1.49, "learning_rate": 4.069118759455371e-05, "loss": 0.8028, "step": 1971 }, { "epoch": 1.49, "learning_rate": 4.068645990922844e-05, "loss": 0.914, "step": 1972 }, { "epoch": 1.49, "learning_rate": 4.068173222390318e-05, "loss": 0.8204, "step": 1973 }, { "epoch": 1.49, "learning_rate": 4.067700453857791e-05, "loss": 0.8222, "step": 1974 }, { "epoch": 1.49, "learning_rate": 4.067227685325265e-05, "loss": 0.9275, "step": 1975 }, { "epoch": 1.49, "learning_rate": 4.066754916792738e-05, "loss": 0.772, "step": 1976 }, { "epoch": 1.5, "learning_rate": 4.066282148260212e-05, "loss": 0.9144, "step": 1977 }, { "epoch": 1.5, "learning_rate": 4.0658093797276853e-05, "loss": 0.8894, "step": 1978 }, { "epoch": 1.5, "learning_rate": 4.065336611195159e-05, "loss": 0.9927, "step": 1979 }, { "epoch": 1.5, "learning_rate": 4.0648638426626324e-05, "loss": 0.8588, "step": 1980 }, { "epoch": 1.5, "learning_rate": 4.064391074130106e-05, "loss": 0.8232, "step": 1981 }, { "epoch": 1.5, "learning_rate": 4.0639183055975795e-05, "loss": 0.8826, "step": 1982 }, { "epoch": 1.5, "learning_rate": 4.0634455370650534e-05, "loss": 0.9019, "step": 1983 }, { "epoch": 1.5, "learning_rate": 4.0629727685325266e-05, "loss": 0.816, "step": 1984 }, { "epoch": 1.5, "learning_rate": 4.0625000000000005e-05, "loss": 0.9024, "step": 1985 }, { "epoch": 1.5, "learning_rate": 4.062027231467474e-05, "loss": 0.9572, "step": 1986 }, { "epoch": 1.5, "learning_rate": 4.061554462934947e-05, "loss": 0.9466, "step": 1987 }, { "epoch": 1.5, "learning_rate": 4.061081694402421e-05, "loss": 0.9585, "step": 1988 }, { "epoch": 1.5, "learning_rate": 4.060608925869894e-05, "loss": 0.8959, "step": 1989 }, { "epoch": 1.51, "learning_rate": 4.060136157337368e-05, "loss": 0.8561, "step": 1990 }, { "epoch": 1.51, "learning_rate": 4.059663388804841e-05, "loss": 1.0081, "step": 1991 }, { "epoch": 1.51, "learning_rate": 4.059190620272315e-05, "loss": 0.8836, "step": 1992 }, { "epoch": 1.51, "learning_rate": 4.058717851739788e-05, "loss": 0.868, "step": 1993 }, { "epoch": 1.51, "learning_rate": 4.058245083207262e-05, "loss": 0.8922, "step": 1994 }, { "epoch": 1.51, "learning_rate": 4.057772314674735e-05, "loss": 0.8808, "step": 1995 }, { "epoch": 1.51, "learning_rate": 4.057299546142209e-05, "loss": 0.8516, "step": 1996 }, { "epoch": 1.51, "learning_rate": 4.0568267776096824e-05, "loss": 0.9618, "step": 1997 }, { "epoch": 1.51, "learning_rate": 4.056354009077156e-05, "loss": 0.8685, "step": 1998 }, { "epoch": 1.51, "learning_rate": 4.0558812405446294e-05, "loss": 0.86, "step": 1999 }, { "epoch": 1.51, "learning_rate": 4.055408472012103e-05, "loss": 0.8958, "step": 2000 }, { "epoch": 1.51, "learning_rate": 4.0549357034795765e-05, "loss": 0.7754, "step": 2001 }, { "epoch": 1.51, "learning_rate": 4.0544629349470504e-05, "loss": 0.8493, "step": 2002 }, { "epoch": 1.51, "learning_rate": 4.0539901664145236e-05, "loss": 1.0867, "step": 2003 }, { "epoch": 1.52, "learning_rate": 4.0535173978819975e-05, "loss": 0.8159, "step": 2004 }, { "epoch": 1.52, "learning_rate": 4.053044629349471e-05, "loss": 0.9025, "step": 2005 }, { "epoch": 1.52, "learning_rate": 4.0525718608169446e-05, "loss": 0.8709, "step": 2006 }, { "epoch": 1.52, "learning_rate": 4.052099092284418e-05, "loss": 1.0326, "step": 2007 }, { "epoch": 1.52, "learning_rate": 4.051626323751892e-05, "loss": 0.888, "step": 2008 }, { "epoch": 1.52, "learning_rate": 4.051153555219365e-05, "loss": 0.7679, "step": 2009 }, { "epoch": 1.52, "learning_rate": 4.050680786686839e-05, "loss": 1.0027, "step": 2010 }, { "epoch": 1.52, "learning_rate": 4.050208018154312e-05, "loss": 0.9306, "step": 2011 }, { "epoch": 1.52, "learning_rate": 4.049735249621785e-05, "loss": 0.9187, "step": 2012 }, { "epoch": 1.52, "learning_rate": 4.049262481089259e-05, "loss": 0.8281, "step": 2013 }, { "epoch": 1.52, "learning_rate": 4.048789712556732e-05, "loss": 0.8903, "step": 2014 }, { "epoch": 1.52, "learning_rate": 4.0483169440242055e-05, "loss": 0.9241, "step": 2015 }, { "epoch": 1.52, "learning_rate": 4.0478441754916794e-05, "loss": 0.7207, "step": 2016 }, { "epoch": 1.53, "learning_rate": 4.0473714069591526e-05, "loss": 0.8928, "step": 2017 }, { "epoch": 1.53, "learning_rate": 4.0468986384266265e-05, "loss": 0.8489, "step": 2018 }, { "epoch": 1.53, "learning_rate": 4.0464258698940997e-05, "loss": 0.9054, "step": 2019 }, { "epoch": 1.53, "learning_rate": 4.0459531013615735e-05, "loss": 0.7678, "step": 2020 }, { "epoch": 1.53, "learning_rate": 4.045480332829047e-05, "loss": 0.9356, "step": 2021 }, { "epoch": 1.53, "learning_rate": 4.0450075642965206e-05, "loss": 0.8705, "step": 2022 }, { "epoch": 1.53, "learning_rate": 4.044534795763994e-05, "loss": 0.8134, "step": 2023 }, { "epoch": 1.53, "learning_rate": 4.044062027231468e-05, "loss": 0.8827, "step": 2024 }, { "epoch": 1.53, "learning_rate": 4.043589258698941e-05, "loss": 0.9587, "step": 2025 }, { "epoch": 1.53, "learning_rate": 4.043116490166415e-05, "loss": 0.8241, "step": 2026 }, { "epoch": 1.53, "learning_rate": 4.042643721633888e-05, "loss": 0.7205, "step": 2027 }, { "epoch": 1.53, "learning_rate": 4.042170953101362e-05, "loss": 0.7309, "step": 2028 }, { "epoch": 1.53, "learning_rate": 4.041698184568835e-05, "loss": 0.889, "step": 2029 }, { "epoch": 1.54, "learning_rate": 4.041225416036309e-05, "loss": 0.7811, "step": 2030 }, { "epoch": 1.54, "learning_rate": 4.040752647503782e-05, "loss": 0.9398, "step": 2031 }, { "epoch": 1.54, "learning_rate": 4.040279878971256e-05, "loss": 0.7926, "step": 2032 }, { "epoch": 1.54, "learning_rate": 4.039807110438729e-05, "loss": 0.8775, "step": 2033 }, { "epoch": 1.54, "learning_rate": 4.039334341906203e-05, "loss": 0.8397, "step": 2034 }, { "epoch": 1.54, "learning_rate": 4.0388615733736764e-05, "loss": 1.0397, "step": 2035 }, { "epoch": 1.54, "learning_rate": 4.03838880484115e-05, "loss": 0.78, "step": 2036 }, { "epoch": 1.54, "learning_rate": 4.0379160363086235e-05, "loss": 0.8616, "step": 2037 }, { "epoch": 1.54, "learning_rate": 4.0374432677760973e-05, "loss": 0.9214, "step": 2038 }, { "epoch": 1.54, "learning_rate": 4.0369704992435706e-05, "loss": 0.9142, "step": 2039 }, { "epoch": 1.54, "learning_rate": 4.036497730711044e-05, "loss": 0.7836, "step": 2040 }, { "epoch": 1.54, "learning_rate": 4.0360249621785176e-05, "loss": 0.8948, "step": 2041 }, { "epoch": 1.54, "learning_rate": 4.035552193645991e-05, "loss": 0.8195, "step": 2042 }, { "epoch": 1.55, "learning_rate": 4.035079425113464e-05, "loss": 0.9667, "step": 2043 }, { "epoch": 1.55, "learning_rate": 4.034606656580938e-05, "loss": 0.8691, "step": 2044 }, { "epoch": 1.55, "learning_rate": 4.034133888048411e-05, "loss": 0.8193, "step": 2045 }, { "epoch": 1.55, "learning_rate": 4.033661119515885e-05, "loss": 0.8698, "step": 2046 }, { "epoch": 1.55, "learning_rate": 4.033188350983359e-05, "loss": 0.8872, "step": 2047 }, { "epoch": 1.55, "learning_rate": 4.032715582450832e-05, "loss": 0.723, "step": 2048 }, { "epoch": 1.55, "learning_rate": 4.032242813918306e-05, "loss": 0.8475, "step": 2049 }, { "epoch": 1.55, "learning_rate": 4.031770045385779e-05, "loss": 0.836, "step": 2050 }, { "epoch": 1.55, "learning_rate": 4.031297276853253e-05, "loss": 0.9289, "step": 2051 }, { "epoch": 1.55, "learning_rate": 4.030824508320726e-05, "loss": 0.8425, "step": 2052 }, { "epoch": 1.55, "learning_rate": 4.0303517397882e-05, "loss": 0.8796, "step": 2053 }, { "epoch": 1.55, "learning_rate": 4.0298789712556734e-05, "loss": 0.8289, "step": 2054 }, { "epoch": 1.55, "learning_rate": 4.029406202723147e-05, "loss": 0.9753, "step": 2055 }, { "epoch": 1.55, "learning_rate": 4.0289334341906205e-05, "loss": 0.8651, "step": 2056 }, { "epoch": 1.56, "learning_rate": 4.0284606656580944e-05, "loss": 0.7431, "step": 2057 }, { "epoch": 1.56, "learning_rate": 4.0279878971255676e-05, "loss": 0.7826, "step": 2058 }, { "epoch": 1.56, "learning_rate": 4.0275151285930414e-05, "loss": 0.9014, "step": 2059 }, { "epoch": 1.56, "learning_rate": 4.0270423600605147e-05, "loss": 0.8655, "step": 2060 }, { "epoch": 1.56, "learning_rate": 4.0265695915279885e-05, "loss": 0.8732, "step": 2061 }, { "epoch": 1.56, "learning_rate": 4.026096822995462e-05, "loss": 0.8887, "step": 2062 }, { "epoch": 1.56, "learning_rate": 4.0256240544629356e-05, "loss": 0.8817, "step": 2063 }, { "epoch": 1.56, "learning_rate": 4.025151285930409e-05, "loss": 0.8432, "step": 2064 }, { "epoch": 1.56, "learning_rate": 4.024678517397882e-05, "loss": 0.9028, "step": 2065 }, { "epoch": 1.56, "learning_rate": 4.024205748865356e-05, "loss": 0.8547, "step": 2066 }, { "epoch": 1.56, "learning_rate": 4.023732980332829e-05, "loss": 0.9719, "step": 2067 }, { "epoch": 1.56, "learning_rate": 4.023260211800302e-05, "loss": 0.9328, "step": 2068 }, { "epoch": 1.56, "learning_rate": 4.022787443267776e-05, "loss": 0.8597, "step": 2069 }, { "epoch": 1.57, "learning_rate": 4.0223146747352494e-05, "loss": 0.922, "step": 2070 }, { "epoch": 1.57, "learning_rate": 4.021841906202723e-05, "loss": 0.9165, "step": 2071 }, { "epoch": 1.57, "learning_rate": 4.0213691376701965e-05, "loss": 0.8787, "step": 2072 }, { "epoch": 1.57, "learning_rate": 4.0208963691376704e-05, "loss": 0.8421, "step": 2073 }, { "epoch": 1.57, "learning_rate": 4.0204236006051436e-05, "loss": 0.9681, "step": 2074 }, { "epoch": 1.57, "learning_rate": 4.0199508320726175e-05, "loss": 0.8434, "step": 2075 }, { "epoch": 1.57, "learning_rate": 4.019478063540091e-05, "loss": 0.7603, "step": 2076 }, { "epoch": 1.57, "learning_rate": 4.0190052950075646e-05, "loss": 0.7594, "step": 2077 }, { "epoch": 1.57, "learning_rate": 4.018532526475038e-05, "loss": 0.8929, "step": 2078 }, { "epoch": 1.57, "learning_rate": 4.018059757942512e-05, "loss": 0.832, "step": 2079 }, { "epoch": 1.57, "learning_rate": 4.017586989409985e-05, "loss": 0.8337, "step": 2080 }, { "epoch": 1.57, "learning_rate": 4.017114220877459e-05, "loss": 0.848, "step": 2081 }, { "epoch": 1.57, "learning_rate": 4.016641452344932e-05, "loss": 0.7674, "step": 2082 }, { "epoch": 1.58, "learning_rate": 4.016168683812406e-05, "loss": 0.8739, "step": 2083 }, { "epoch": 1.58, "learning_rate": 4.015695915279879e-05, "loss": 0.869, "step": 2084 }, { "epoch": 1.58, "learning_rate": 4.015223146747353e-05, "loss": 0.8409, "step": 2085 }, { "epoch": 1.58, "learning_rate": 4.014750378214826e-05, "loss": 0.8541, "step": 2086 }, { "epoch": 1.58, "learning_rate": 4.0142776096823e-05, "loss": 0.8415, "step": 2087 }, { "epoch": 1.58, "learning_rate": 4.013804841149773e-05, "loss": 0.9411, "step": 2088 }, { "epoch": 1.58, "learning_rate": 4.013332072617247e-05, "loss": 0.9232, "step": 2089 }, { "epoch": 1.58, "learning_rate": 4.01285930408472e-05, "loss": 0.9037, "step": 2090 }, { "epoch": 1.58, "learning_rate": 4.012386535552194e-05, "loss": 0.8619, "step": 2091 }, { "epoch": 1.58, "learning_rate": 4.0119137670196674e-05, "loss": 0.7936, "step": 2092 }, { "epoch": 1.58, "learning_rate": 4.0114409984871406e-05, "loss": 0.9573, "step": 2093 }, { "epoch": 1.58, "learning_rate": 4.0109682299546145e-05, "loss": 0.8611, "step": 2094 }, { "epoch": 1.58, "learning_rate": 4.010495461422088e-05, "loss": 0.8914, "step": 2095 }, { "epoch": 1.59, "learning_rate": 4.010022692889561e-05, "loss": 0.902, "step": 2096 }, { "epoch": 1.59, "learning_rate": 4.009549924357035e-05, "loss": 0.9043, "step": 2097 }, { "epoch": 1.59, "learning_rate": 4.009077155824508e-05, "loss": 0.9003, "step": 2098 }, { "epoch": 1.59, "learning_rate": 4.008604387291982e-05, "loss": 1.1208, "step": 2099 }, { "epoch": 1.59, "learning_rate": 4.008131618759455e-05, "loss": 0.9452, "step": 2100 }, { "epoch": 1.59, "learning_rate": 4.007658850226929e-05, "loss": 0.8714, "step": 2101 }, { "epoch": 1.59, "learning_rate": 4.007186081694402e-05, "loss": 1.0427, "step": 2102 }, { "epoch": 1.59, "learning_rate": 4.006713313161876e-05, "loss": 0.8495, "step": 2103 }, { "epoch": 1.59, "learning_rate": 4.006240544629349e-05, "loss": 0.9662, "step": 2104 }, { "epoch": 1.59, "learning_rate": 4.005767776096823e-05, "loss": 0.8646, "step": 2105 }, { "epoch": 1.59, "learning_rate": 4.005295007564297e-05, "loss": 0.8651, "step": 2106 }, { "epoch": 1.59, "learning_rate": 4.00482223903177e-05, "loss": 0.9112, "step": 2107 }, { "epoch": 1.59, "learning_rate": 4.004349470499244e-05, "loss": 0.878, "step": 2108 }, { "epoch": 1.6, "learning_rate": 4.003876701966717e-05, "loss": 0.9714, "step": 2109 }, { "epoch": 1.6, "learning_rate": 4.003403933434191e-05, "loss": 0.9255, "step": 2110 }, { "epoch": 1.6, "learning_rate": 4.0029311649016644e-05, "loss": 0.9052, "step": 2111 }, { "epoch": 1.6, "learning_rate": 4.002458396369138e-05, "loss": 0.8659, "step": 2112 }, { "epoch": 1.6, "learning_rate": 4.0019856278366115e-05, "loss": 0.8954, "step": 2113 }, { "epoch": 1.6, "learning_rate": 4.0015128593040854e-05, "loss": 0.8912, "step": 2114 }, { "epoch": 1.6, "learning_rate": 4.0010400907715586e-05, "loss": 0.8858, "step": 2115 }, { "epoch": 1.6, "learning_rate": 4.0005673222390325e-05, "loss": 0.932, "step": 2116 }, { "epoch": 1.6, "learning_rate": 4.000094553706506e-05, "loss": 0.8145, "step": 2117 }, { "epoch": 1.6, "learning_rate": 3.999621785173979e-05, "loss": 0.8868, "step": 2118 }, { "epoch": 1.6, "learning_rate": 3.999149016641453e-05, "loss": 0.9773, "step": 2119 }, { "epoch": 1.6, "learning_rate": 3.998676248108926e-05, "loss": 0.9682, "step": 2120 }, { "epoch": 1.6, "learning_rate": 3.998203479576399e-05, "loss": 0.9856, "step": 2121 }, { "epoch": 1.6, "learning_rate": 3.997730711043873e-05, "loss": 0.8498, "step": 2122 }, { "epoch": 1.61, "learning_rate": 3.997257942511346e-05, "loss": 0.7674, "step": 2123 }, { "epoch": 1.61, "learning_rate": 3.99678517397882e-05, "loss": 0.9605, "step": 2124 }, { "epoch": 1.61, "learning_rate": 3.9963124054462934e-05, "loss": 0.9943, "step": 2125 }, { "epoch": 1.61, "learning_rate": 3.995839636913767e-05, "loss": 0.7984, "step": 2126 }, { "epoch": 1.61, "learning_rate": 3.9953668683812405e-05, "loss": 0.7934, "step": 2127 }, { "epoch": 1.61, "learning_rate": 3.9948940998487143e-05, "loss": 0.8784, "step": 2128 }, { "epoch": 1.61, "learning_rate": 3.9944213313161875e-05, "loss": 0.8796, "step": 2129 }, { "epoch": 1.61, "learning_rate": 3.9939485627836614e-05, "loss": 0.7721, "step": 2130 }, { "epoch": 1.61, "learning_rate": 3.9934757942511346e-05, "loss": 0.9686, "step": 2131 }, { "epoch": 1.61, "learning_rate": 3.9930030257186085e-05, "loss": 0.8049, "step": 2132 }, { "epoch": 1.61, "learning_rate": 3.992530257186082e-05, "loss": 0.8209, "step": 2133 }, { "epoch": 1.61, "learning_rate": 3.9920574886535556e-05, "loss": 0.7988, "step": 2134 }, { "epoch": 1.61, "learning_rate": 3.991584720121029e-05, "loss": 1.0067, "step": 2135 }, { "epoch": 1.62, "learning_rate": 3.991111951588503e-05, "loss": 0.9827, "step": 2136 }, { "epoch": 1.62, "learning_rate": 3.990639183055976e-05, "loss": 0.839, "step": 2137 }, { "epoch": 1.62, "learning_rate": 3.99016641452345e-05, "loss": 0.9259, "step": 2138 }, { "epoch": 1.62, "learning_rate": 3.989693645990923e-05, "loss": 0.7611, "step": 2139 }, { "epoch": 1.62, "learning_rate": 3.989220877458397e-05, "loss": 0.7979, "step": 2140 }, { "epoch": 1.62, "learning_rate": 3.98874810892587e-05, "loss": 0.7124, "step": 2141 }, { "epoch": 1.62, "learning_rate": 3.988275340393344e-05, "loss": 0.7372, "step": 2142 }, { "epoch": 1.62, "learning_rate": 3.987802571860817e-05, "loss": 0.9213, "step": 2143 }, { "epoch": 1.62, "learning_rate": 3.987329803328291e-05, "loss": 0.8475, "step": 2144 }, { "epoch": 1.62, "learning_rate": 3.986857034795764e-05, "loss": 0.9251, "step": 2145 }, { "epoch": 1.62, "learning_rate": 3.9863842662632375e-05, "loss": 0.9744, "step": 2146 }, { "epoch": 1.62, "learning_rate": 3.9859114977307114e-05, "loss": 0.9254, "step": 2147 }, { "epoch": 1.62, "learning_rate": 3.9854387291981846e-05, "loss": 0.8635, "step": 2148 }, { "epoch": 1.63, "learning_rate": 3.984965960665658e-05, "loss": 0.9679, "step": 2149 }, { "epoch": 1.63, "learning_rate": 3.9844931921331316e-05, "loss": 0.9023, "step": 2150 }, { "epoch": 1.63, "learning_rate": 3.984020423600605e-05, "loss": 1.0302, "step": 2151 }, { "epoch": 1.63, "learning_rate": 3.983547655068079e-05, "loss": 0.9645, "step": 2152 }, { "epoch": 1.63, "learning_rate": 3.983074886535552e-05, "loss": 0.8523, "step": 2153 }, { "epoch": 1.63, "learning_rate": 3.982602118003026e-05, "loss": 0.8524, "step": 2154 }, { "epoch": 1.63, "learning_rate": 3.982129349470499e-05, "loss": 0.8858, "step": 2155 }, { "epoch": 1.63, "learning_rate": 3.981656580937973e-05, "loss": 0.8843, "step": 2156 }, { "epoch": 1.63, "learning_rate": 3.981183812405446e-05, "loss": 0.8974, "step": 2157 }, { "epoch": 1.63, "learning_rate": 3.98071104387292e-05, "loss": 0.9414, "step": 2158 }, { "epoch": 1.63, "learning_rate": 3.980238275340393e-05, "loss": 0.7591, "step": 2159 }, { "epoch": 1.63, "learning_rate": 3.979765506807867e-05, "loss": 0.9019, "step": 2160 }, { "epoch": 1.63, "learning_rate": 3.97929273827534e-05, "loss": 0.791, "step": 2161 }, { "epoch": 1.64, "learning_rate": 3.978819969742814e-05, "loss": 0.9155, "step": 2162 }, { "epoch": 1.64, "learning_rate": 3.9783472012102874e-05, "loss": 0.7904, "step": 2163 }, { "epoch": 1.64, "learning_rate": 3.977874432677761e-05, "loss": 0.8167, "step": 2164 }, { "epoch": 1.64, "learning_rate": 3.9774016641452345e-05, "loss": 0.8157, "step": 2165 }, { "epoch": 1.64, "learning_rate": 3.9769288956127084e-05, "loss": 1.0124, "step": 2166 }, { "epoch": 1.64, "learning_rate": 3.976456127080182e-05, "loss": 0.7555, "step": 2167 }, { "epoch": 1.64, "learning_rate": 3.9759833585476555e-05, "loss": 0.802, "step": 2168 }, { "epoch": 1.64, "learning_rate": 3.975510590015129e-05, "loss": 0.8698, "step": 2169 }, { "epoch": 1.64, "learning_rate": 3.9750378214826025e-05, "loss": 0.853, "step": 2170 }, { "epoch": 1.64, "learning_rate": 3.974565052950076e-05, "loss": 0.867, "step": 2171 }, { "epoch": 1.64, "learning_rate": 3.9740922844175496e-05, "loss": 0.9911, "step": 2172 }, { "epoch": 1.64, "learning_rate": 3.973619515885023e-05, "loss": 0.8023, "step": 2173 }, { "epoch": 1.64, "learning_rate": 3.973146747352496e-05, "loss": 0.957, "step": 2174 }, { "epoch": 1.64, "learning_rate": 3.97267397881997e-05, "loss": 0.9008, "step": 2175 }, { "epoch": 1.65, "learning_rate": 3.972201210287443e-05, "loss": 0.9984, "step": 2176 }, { "epoch": 1.65, "learning_rate": 3.971728441754917e-05, "loss": 0.7942, "step": 2177 }, { "epoch": 1.65, "learning_rate": 3.97125567322239e-05, "loss": 0.7996, "step": 2178 }, { "epoch": 1.65, "learning_rate": 3.970782904689864e-05, "loss": 0.8009, "step": 2179 }, { "epoch": 1.65, "learning_rate": 3.970310136157337e-05, "loss": 0.9026, "step": 2180 }, { "epoch": 1.65, "learning_rate": 3.969837367624811e-05, "loss": 0.8435, "step": 2181 }, { "epoch": 1.65, "learning_rate": 3.9693645990922844e-05, "loss": 1.113, "step": 2182 }, { "epoch": 1.65, "learning_rate": 3.968891830559758e-05, "loss": 0.7831, "step": 2183 }, { "epoch": 1.65, "learning_rate": 3.9684190620272315e-05, "loss": 0.8861, "step": 2184 }, { "epoch": 1.65, "learning_rate": 3.9679462934947054e-05, "loss": 0.9109, "step": 2185 }, { "epoch": 1.65, "learning_rate": 3.9674735249621786e-05, "loss": 0.9598, "step": 2186 }, { "epoch": 1.65, "learning_rate": 3.9670007564296525e-05, "loss": 0.9367, "step": 2187 }, { "epoch": 1.65, "learning_rate": 3.966527987897126e-05, "loss": 0.8579, "step": 2188 }, { "epoch": 1.66, "learning_rate": 3.9660552193645996e-05, "loss": 0.938, "step": 2189 }, { "epoch": 1.66, "learning_rate": 3.965582450832073e-05, "loss": 0.8299, "step": 2190 }, { "epoch": 1.66, "learning_rate": 3.9651096822995466e-05, "loss": 1.0076, "step": 2191 }, { "epoch": 1.66, "learning_rate": 3.96463691376702e-05, "loss": 0.9475, "step": 2192 }, { "epoch": 1.66, "learning_rate": 3.964164145234494e-05, "loss": 0.9665, "step": 2193 }, { "epoch": 1.66, "learning_rate": 3.963691376701967e-05, "loss": 0.8982, "step": 2194 }, { "epoch": 1.66, "learning_rate": 3.963218608169441e-05, "loss": 0.8808, "step": 2195 }, { "epoch": 1.66, "learning_rate": 3.962745839636914e-05, "loss": 0.887, "step": 2196 }, { "epoch": 1.66, "learning_rate": 3.962273071104388e-05, "loss": 0.835, "step": 2197 }, { "epoch": 1.66, "learning_rate": 3.961800302571861e-05, "loss": 1.0036, "step": 2198 }, { "epoch": 1.66, "learning_rate": 3.961327534039334e-05, "loss": 0.9091, "step": 2199 }, { "epoch": 1.66, "learning_rate": 3.960854765506808e-05, "loss": 0.9475, "step": 2200 }, { "epoch": 1.66, "learning_rate": 3.9603819969742814e-05, "loss": 0.8709, "step": 2201 }, { "epoch": 1.67, "learning_rate": 3.9599092284417546e-05, "loss": 0.9693, "step": 2202 }, { "epoch": 1.67, "learning_rate": 3.9594364599092285e-05, "loss": 0.9573, "step": 2203 }, { "epoch": 1.67, "learning_rate": 3.958963691376702e-05, "loss": 0.8179, "step": 2204 }, { "epoch": 1.67, "learning_rate": 3.9584909228441756e-05, "loss": 0.8547, "step": 2205 }, { "epoch": 1.67, "learning_rate": 3.958018154311649e-05, "loss": 0.9651, "step": 2206 }, { "epoch": 1.67, "learning_rate": 3.957545385779123e-05, "loss": 0.8468, "step": 2207 }, { "epoch": 1.67, "learning_rate": 3.957072617246596e-05, "loss": 0.7702, "step": 2208 }, { "epoch": 1.67, "learning_rate": 3.95659984871407e-05, "loss": 0.9069, "step": 2209 }, { "epoch": 1.67, "learning_rate": 3.956127080181543e-05, "loss": 0.969, "step": 2210 }, { "epoch": 1.67, "learning_rate": 3.955654311649017e-05, "loss": 0.9476, "step": 2211 }, { "epoch": 1.67, "learning_rate": 3.95518154311649e-05, "loss": 1.0313, "step": 2212 }, { "epoch": 1.67, "learning_rate": 3.954708774583964e-05, "loss": 0.9287, "step": 2213 }, { "epoch": 1.67, "learning_rate": 3.954236006051437e-05, "loss": 0.8581, "step": 2214 }, { "epoch": 1.68, "learning_rate": 3.953763237518911e-05, "loss": 0.8319, "step": 2215 }, { "epoch": 1.68, "learning_rate": 3.953290468986384e-05, "loss": 0.9595, "step": 2216 }, { "epoch": 1.68, "learning_rate": 3.952817700453858e-05, "loss": 0.8777, "step": 2217 }, { "epoch": 1.68, "learning_rate": 3.952344931921331e-05, "loss": 0.824, "step": 2218 }, { "epoch": 1.68, "learning_rate": 3.951872163388805e-05, "loss": 0.8014, "step": 2219 }, { "epoch": 1.68, "learning_rate": 3.9513993948562784e-05, "loss": 0.8735, "step": 2220 }, { "epoch": 1.68, "learning_rate": 3.950926626323752e-05, "loss": 0.9224, "step": 2221 }, { "epoch": 1.68, "learning_rate": 3.9504538577912255e-05, "loss": 0.8889, "step": 2222 }, { "epoch": 1.68, "learning_rate": 3.9499810892586994e-05, "loss": 0.871, "step": 2223 }, { "epoch": 1.68, "learning_rate": 3.9495083207261726e-05, "loss": 0.849, "step": 2224 }, { "epoch": 1.68, "learning_rate": 3.9490355521936465e-05, "loss": 0.8659, "step": 2225 }, { "epoch": 1.68, "learning_rate": 3.94856278366112e-05, "loss": 0.9019, "step": 2226 }, { "epoch": 1.68, "learning_rate": 3.948090015128593e-05, "loss": 0.8529, "step": 2227 }, { "epoch": 1.69, "learning_rate": 3.947617246596067e-05, "loss": 0.832, "step": 2228 }, { "epoch": 1.69, "learning_rate": 3.94714447806354e-05, "loss": 0.9185, "step": 2229 }, { "epoch": 1.69, "learning_rate": 3.946671709531014e-05, "loss": 0.9047, "step": 2230 }, { "epoch": 1.69, "learning_rate": 3.946198940998487e-05, "loss": 0.8973, "step": 2231 }, { "epoch": 1.69, "learning_rate": 3.945726172465961e-05, "loss": 0.835, "step": 2232 }, { "epoch": 1.69, "learning_rate": 3.945253403933434e-05, "loss": 0.8896, "step": 2233 }, { "epoch": 1.69, "learning_rate": 3.944780635400908e-05, "loss": 0.7897, "step": 2234 }, { "epoch": 1.69, "learning_rate": 3.944307866868381e-05, "loss": 0.9194, "step": 2235 }, { "epoch": 1.69, "learning_rate": 3.943835098335855e-05, "loss": 0.9242, "step": 2236 }, { "epoch": 1.69, "learning_rate": 3.9433623298033283e-05, "loss": 0.8781, "step": 2237 }, { "epoch": 1.69, "learning_rate": 3.942889561270802e-05, "loss": 0.8574, "step": 2238 }, { "epoch": 1.69, "learning_rate": 3.9424167927382754e-05, "loss": 1.0155, "step": 2239 }, { "epoch": 1.69, "learning_rate": 3.941944024205749e-05, "loss": 1.0315, "step": 2240 }, { "epoch": 1.69, "learning_rate": 3.9414712556732225e-05, "loss": 0.8786, "step": 2241 }, { "epoch": 1.7, "learning_rate": 3.9409984871406964e-05, "loss": 0.927, "step": 2242 }, { "epoch": 1.7, "learning_rate": 3.9405257186081696e-05, "loss": 0.9213, "step": 2243 }, { "epoch": 1.7, "learning_rate": 3.9400529500756435e-05, "loss": 0.9205, "step": 2244 }, { "epoch": 1.7, "learning_rate": 3.939580181543117e-05, "loss": 0.8692, "step": 2245 }, { "epoch": 1.7, "learning_rate": 3.9391074130105906e-05, "loss": 0.8017, "step": 2246 }, { "epoch": 1.7, "learning_rate": 3.938634644478064e-05, "loss": 0.9434, "step": 2247 }, { "epoch": 1.7, "learning_rate": 3.938161875945538e-05, "loss": 0.9414, "step": 2248 }, { "epoch": 1.7, "learning_rate": 3.937689107413011e-05, "loss": 0.9016, "step": 2249 }, { "epoch": 1.7, "learning_rate": 3.937216338880485e-05, "loss": 0.9244, "step": 2250 }, { "epoch": 1.7, "learning_rate": 3.936743570347958e-05, "loss": 0.8794, "step": 2251 }, { "epoch": 1.7, "learning_rate": 3.936270801815431e-05, "loss": 0.9421, "step": 2252 }, { "epoch": 1.7, "learning_rate": 3.935798033282905e-05, "loss": 0.8737, "step": 2253 }, { "epoch": 1.7, "learning_rate": 3.935325264750378e-05, "loss": 0.882, "step": 2254 }, { "epoch": 1.71, "learning_rate": 3.9348524962178515e-05, "loss": 0.813, "step": 2255 }, { "epoch": 1.71, "learning_rate": 3.9343797276853254e-05, "loss": 0.9108, "step": 2256 }, { "epoch": 1.71, "learning_rate": 3.9339069591527986e-05, "loss": 0.9284, "step": 2257 }, { "epoch": 1.71, "learning_rate": 3.9334341906202724e-05, "loss": 0.8494, "step": 2258 }, { "epoch": 1.71, "learning_rate": 3.9329614220877457e-05, "loss": 0.8387, "step": 2259 }, { "epoch": 1.71, "learning_rate": 3.9324886535552195e-05, "loss": 0.844, "step": 2260 }, { "epoch": 1.71, "learning_rate": 3.932015885022693e-05, "loss": 0.9059, "step": 2261 }, { "epoch": 1.71, "learning_rate": 3.9315431164901666e-05, "loss": 0.8379, "step": 2262 }, { "epoch": 1.71, "learning_rate": 3.93107034795764e-05, "loss": 0.9846, "step": 2263 }, { "epoch": 1.71, "learning_rate": 3.930597579425114e-05, "loss": 0.9601, "step": 2264 }, { "epoch": 1.71, "learning_rate": 3.930124810892587e-05, "loss": 0.9223, "step": 2265 }, { "epoch": 1.71, "learning_rate": 3.929652042360061e-05, "loss": 0.8725, "step": 2266 }, { "epoch": 1.71, "learning_rate": 3.929179273827534e-05, "loss": 0.8749, "step": 2267 }, { "epoch": 1.72, "learning_rate": 3.928706505295008e-05, "loss": 0.8076, "step": 2268 }, { "epoch": 1.72, "learning_rate": 3.928233736762481e-05, "loss": 1.0135, "step": 2269 }, { "epoch": 1.72, "learning_rate": 3.927760968229955e-05, "loss": 0.9563, "step": 2270 }, { "epoch": 1.72, "learning_rate": 3.927288199697428e-05, "loss": 1.0128, "step": 2271 }, { "epoch": 1.72, "learning_rate": 3.926815431164902e-05, "loss": 0.9578, "step": 2272 }, { "epoch": 1.72, "learning_rate": 3.926342662632375e-05, "loss": 0.9611, "step": 2273 }, { "epoch": 1.72, "learning_rate": 3.925869894099849e-05, "loss": 0.8329, "step": 2274 }, { "epoch": 1.72, "learning_rate": 3.9253971255673224e-05, "loss": 0.8855, "step": 2275 }, { "epoch": 1.72, "learning_rate": 3.924924357034796e-05, "loss": 0.8165, "step": 2276 }, { "epoch": 1.72, "learning_rate": 3.9244515885022695e-05, "loss": 0.8454, "step": 2277 }, { "epoch": 1.72, "learning_rate": 3.9239788199697433e-05, "loss": 0.8214, "step": 2278 }, { "epoch": 1.72, "learning_rate": 3.9235060514372165e-05, "loss": 0.896, "step": 2279 }, { "epoch": 1.72, "learning_rate": 3.92303328290469e-05, "loss": 0.8224, "step": 2280 }, { "epoch": 1.73, "learning_rate": 3.9225605143721636e-05, "loss": 0.9742, "step": 2281 }, { "epoch": 1.73, "learning_rate": 3.922087745839637e-05, "loss": 0.8543, "step": 2282 }, { "epoch": 1.73, "learning_rate": 3.92161497730711e-05, "loss": 0.9206, "step": 2283 }, { "epoch": 1.73, "learning_rate": 3.921142208774584e-05, "loss": 0.8165, "step": 2284 }, { "epoch": 1.73, "learning_rate": 3.920669440242057e-05, "loss": 0.838, "step": 2285 }, { "epoch": 1.73, "learning_rate": 3.920196671709531e-05, "loss": 0.9124, "step": 2286 }, { "epoch": 1.73, "learning_rate": 3.919723903177005e-05, "loss": 0.9111, "step": 2287 }, { "epoch": 1.73, "learning_rate": 3.919251134644478e-05, "loss": 0.7956, "step": 2288 }, { "epoch": 1.73, "learning_rate": 3.918778366111952e-05, "loss": 1.0138, "step": 2289 }, { "epoch": 1.73, "learning_rate": 3.918305597579425e-05, "loss": 0.9031, "step": 2290 }, { "epoch": 1.73, "learning_rate": 3.917832829046899e-05, "loss": 0.7912, "step": 2291 }, { "epoch": 1.73, "learning_rate": 3.917360060514372e-05, "loss": 0.8956, "step": 2292 }, { "epoch": 1.73, "learning_rate": 3.916887291981846e-05, "loss": 0.9126, "step": 2293 }, { "epoch": 1.73, "learning_rate": 3.9164145234493194e-05, "loss": 0.8896, "step": 2294 }, { "epoch": 1.74, "learning_rate": 3.915941754916793e-05, "loss": 0.845, "step": 2295 }, { "epoch": 1.74, "learning_rate": 3.9154689863842665e-05, "loss": 0.8711, "step": 2296 }, { "epoch": 1.74, "learning_rate": 3.9149962178517404e-05, "loss": 0.8625, "step": 2297 }, { "epoch": 1.74, "learning_rate": 3.9145234493192136e-05, "loss": 0.894, "step": 2298 }, { "epoch": 1.74, "learning_rate": 3.9140506807866874e-05, "loss": 0.8482, "step": 2299 }, { "epoch": 1.74, "learning_rate": 3.9135779122541606e-05, "loss": 0.9069, "step": 2300 }, { "epoch": 1.74, "learning_rate": 3.9131051437216345e-05, "loss": 0.8675, "step": 2301 }, { "epoch": 1.74, "learning_rate": 3.912632375189108e-05, "loss": 0.8388, "step": 2302 }, { "epoch": 1.74, "learning_rate": 3.9121596066565816e-05, "loss": 0.8515, "step": 2303 }, { "epoch": 1.74, "learning_rate": 3.911686838124055e-05, "loss": 0.9053, "step": 2304 }, { "epoch": 1.74, "learning_rate": 3.911214069591528e-05, "loss": 0.886, "step": 2305 }, { "epoch": 1.74, "learning_rate": 3.910741301059002e-05, "loss": 0.855, "step": 2306 }, { "epoch": 1.74, "learning_rate": 3.910268532526475e-05, "loss": 0.9229, "step": 2307 }, { "epoch": 1.75, "learning_rate": 3.909795763993948e-05, "loss": 0.8656, "step": 2308 }, { "epoch": 1.75, "learning_rate": 3.909322995461422e-05, "loss": 0.895, "step": 2309 }, { "epoch": 1.75, "learning_rate": 3.9088502269288954e-05, "loss": 0.7788, "step": 2310 }, { "epoch": 1.75, "learning_rate": 3.908377458396369e-05, "loss": 1.0032, "step": 2311 }, { "epoch": 1.75, "learning_rate": 3.9079046898638425e-05, "loss": 0.9324, "step": 2312 }, { "epoch": 1.75, "learning_rate": 3.9074319213313164e-05, "loss": 0.8226, "step": 2313 }, { "epoch": 1.75, "learning_rate": 3.9069591527987896e-05, "loss": 0.9094, "step": 2314 }, { "epoch": 1.75, "learning_rate": 3.9064863842662635e-05, "loss": 0.7102, "step": 2315 }, { "epoch": 1.75, "learning_rate": 3.906013615733737e-05, "loss": 0.9703, "step": 2316 }, { "epoch": 1.75, "learning_rate": 3.9055408472012106e-05, "loss": 0.9486, "step": 2317 }, { "epoch": 1.75, "learning_rate": 3.905068078668684e-05, "loss": 0.9755, "step": 2318 }, { "epoch": 1.75, "learning_rate": 3.9045953101361577e-05, "loss": 0.9066, "step": 2319 }, { "epoch": 1.75, "learning_rate": 3.904122541603631e-05, "loss": 0.9264, "step": 2320 }, { "epoch": 1.76, "learning_rate": 3.903649773071105e-05, "loss": 0.801, "step": 2321 }, { "epoch": 1.76, "learning_rate": 3.903177004538578e-05, "loss": 0.8293, "step": 2322 }, { "epoch": 1.76, "learning_rate": 3.902704236006052e-05, "loss": 0.9073, "step": 2323 }, { "epoch": 1.76, "learning_rate": 3.902231467473525e-05, "loss": 0.8397, "step": 2324 }, { "epoch": 1.76, "learning_rate": 3.901758698940999e-05, "loss": 0.715, "step": 2325 }, { "epoch": 1.76, "learning_rate": 3.901285930408472e-05, "loss": 0.9946, "step": 2326 }, { "epoch": 1.76, "learning_rate": 3.900813161875946e-05, "loss": 0.8213, "step": 2327 }, { "epoch": 1.76, "learning_rate": 3.900340393343419e-05, "loss": 0.8184, "step": 2328 }, { "epoch": 1.76, "learning_rate": 3.899867624810893e-05, "loss": 0.839, "step": 2329 }, { "epoch": 1.76, "learning_rate": 3.899394856278366e-05, "loss": 0.8934, "step": 2330 }, { "epoch": 1.76, "learning_rate": 3.89892208774584e-05, "loss": 0.8306, "step": 2331 }, { "epoch": 1.76, "learning_rate": 3.8984493192133134e-05, "loss": 0.8846, "step": 2332 }, { "epoch": 1.76, "learning_rate": 3.8979765506807866e-05, "loss": 0.844, "step": 2333 }, { "epoch": 1.77, "learning_rate": 3.8975037821482605e-05, "loss": 0.9179, "step": 2334 }, { "epoch": 1.77, "learning_rate": 3.897031013615734e-05, "loss": 0.9633, "step": 2335 }, { "epoch": 1.77, "learning_rate": 3.896558245083207e-05, "loss": 0.8701, "step": 2336 }, { "epoch": 1.77, "learning_rate": 3.896085476550681e-05, "loss": 0.9482, "step": 2337 }, { "epoch": 1.77, "learning_rate": 3.895612708018154e-05, "loss": 0.8473, "step": 2338 }, { "epoch": 1.77, "learning_rate": 3.895139939485628e-05, "loss": 0.9663, "step": 2339 }, { "epoch": 1.77, "learning_rate": 3.894667170953101e-05, "loss": 0.7868, "step": 2340 }, { "epoch": 1.77, "learning_rate": 3.894194402420575e-05, "loss": 0.9282, "step": 2341 }, { "epoch": 1.77, "learning_rate": 3.893721633888048e-05, "loss": 0.9044, "step": 2342 }, { "epoch": 1.77, "learning_rate": 3.893248865355522e-05, "loss": 0.9798, "step": 2343 }, { "epoch": 1.77, "learning_rate": 3.892776096822995e-05, "loss": 0.9039, "step": 2344 }, { "epoch": 1.77, "learning_rate": 3.892303328290469e-05, "loss": 0.8795, "step": 2345 }, { "epoch": 1.77, "learning_rate": 3.891830559757943e-05, "loss": 0.9601, "step": 2346 }, { "epoch": 1.78, "learning_rate": 3.891357791225416e-05, "loss": 0.8604, "step": 2347 }, { "epoch": 1.78, "learning_rate": 3.89088502269289e-05, "loss": 0.9203, "step": 2348 }, { "epoch": 1.78, "learning_rate": 3.890412254160363e-05, "loss": 0.9531, "step": 2349 }, { "epoch": 1.78, "learning_rate": 3.889939485627837e-05, "loss": 0.9767, "step": 2350 }, { "epoch": 1.78, "learning_rate": 3.8894667170953104e-05, "loss": 0.7706, "step": 2351 }, { "epoch": 1.78, "learning_rate": 3.888993948562784e-05, "loss": 0.8947, "step": 2352 }, { "epoch": 1.78, "learning_rate": 3.8885211800302575e-05, "loss": 0.9588, "step": 2353 }, { "epoch": 1.78, "learning_rate": 3.8880484114977314e-05, "loss": 0.8734, "step": 2354 }, { "epoch": 1.78, "learning_rate": 3.8875756429652046e-05, "loss": 0.8957, "step": 2355 }, { "epoch": 1.78, "learning_rate": 3.8871028744326785e-05, "loss": 0.863, "step": 2356 }, { "epoch": 1.78, "learning_rate": 3.886630105900152e-05, "loss": 0.8891, "step": 2357 }, { "epoch": 1.78, "learning_rate": 3.886157337367625e-05, "loss": 0.8193, "step": 2358 }, { "epoch": 1.78, "learning_rate": 3.885684568835099e-05, "loss": 0.9471, "step": 2359 }, { "epoch": 1.78, "learning_rate": 3.885211800302572e-05, "loss": 1.0082, "step": 2360 }, { "epoch": 1.79, "learning_rate": 3.884739031770045e-05, "loss": 0.9694, "step": 2361 }, { "epoch": 1.79, "learning_rate": 3.884266263237519e-05, "loss": 0.9028, "step": 2362 }, { "epoch": 1.79, "learning_rate": 3.883793494704992e-05, "loss": 0.8685, "step": 2363 }, { "epoch": 1.79, "learning_rate": 3.883320726172466e-05, "loss": 0.8862, "step": 2364 }, { "epoch": 1.79, "learning_rate": 3.8828479576399394e-05, "loss": 0.8535, "step": 2365 }, { "epoch": 1.79, "learning_rate": 3.882375189107413e-05, "loss": 0.898, "step": 2366 }, { "epoch": 1.79, "learning_rate": 3.8819024205748864e-05, "loss": 1.0786, "step": 2367 }, { "epoch": 1.79, "learning_rate": 3.88142965204236e-05, "loss": 0.9449, "step": 2368 }, { "epoch": 1.79, "learning_rate": 3.8809568835098335e-05, "loss": 0.8705, "step": 2369 }, { "epoch": 1.79, "learning_rate": 3.8804841149773074e-05, "loss": 0.9165, "step": 2370 }, { "epoch": 1.79, "learning_rate": 3.8800113464447806e-05, "loss": 0.8647, "step": 2371 }, { "epoch": 1.79, "learning_rate": 3.8795385779122545e-05, "loss": 0.8112, "step": 2372 }, { "epoch": 1.79, "learning_rate": 3.879065809379728e-05, "loss": 0.9507, "step": 2373 }, { "epoch": 1.8, "learning_rate": 3.8785930408472016e-05, "loss": 0.9207, "step": 2374 }, { "epoch": 1.8, "learning_rate": 3.878120272314675e-05, "loss": 0.8497, "step": 2375 }, { "epoch": 1.8, "learning_rate": 3.877647503782149e-05, "loss": 0.8733, "step": 2376 }, { "epoch": 1.8, "learning_rate": 3.877174735249622e-05, "loss": 0.9038, "step": 2377 }, { "epoch": 1.8, "learning_rate": 3.876701966717096e-05, "loss": 0.8416, "step": 2378 }, { "epoch": 1.8, "learning_rate": 3.876229198184569e-05, "loss": 0.898, "step": 2379 }, { "epoch": 1.8, "learning_rate": 3.875756429652043e-05, "loss": 1.0418, "step": 2380 }, { "epoch": 1.8, "learning_rate": 3.875283661119516e-05, "loss": 0.873, "step": 2381 }, { "epoch": 1.8, "learning_rate": 3.87481089258699e-05, "loss": 0.912, "step": 2382 }, { "epoch": 1.8, "learning_rate": 3.874338124054463e-05, "loss": 0.8446, "step": 2383 }, { "epoch": 1.8, "learning_rate": 3.873865355521937e-05, "loss": 0.8407, "step": 2384 }, { "epoch": 1.8, "learning_rate": 3.87339258698941e-05, "loss": 0.7955, "step": 2385 }, { "epoch": 1.8, "learning_rate": 3.8729198184568835e-05, "loss": 0.8848, "step": 2386 }, { "epoch": 1.81, "learning_rate": 3.8724470499243573e-05, "loss": 0.9486, "step": 2387 }, { "epoch": 1.81, "learning_rate": 3.8719742813918306e-05, "loss": 0.8909, "step": 2388 }, { "epoch": 1.81, "learning_rate": 3.871501512859304e-05, "loss": 0.8682, "step": 2389 }, { "epoch": 1.81, "learning_rate": 3.8710287443267776e-05, "loss": 0.8373, "step": 2390 }, { "epoch": 1.81, "learning_rate": 3.870555975794251e-05, "loss": 0.8412, "step": 2391 }, { "epoch": 1.81, "learning_rate": 3.870083207261725e-05, "loss": 0.8946, "step": 2392 }, { "epoch": 1.81, "learning_rate": 3.869610438729198e-05, "loss": 0.895, "step": 2393 }, { "epoch": 1.81, "learning_rate": 3.869137670196672e-05, "loss": 0.9033, "step": 2394 }, { "epoch": 1.81, "learning_rate": 3.868664901664145e-05, "loss": 0.8957, "step": 2395 }, { "epoch": 1.81, "learning_rate": 3.868192133131619e-05, "loss": 0.8317, "step": 2396 }, { "epoch": 1.81, "learning_rate": 3.867719364599092e-05, "loss": 0.9714, "step": 2397 }, { "epoch": 1.81, "learning_rate": 3.867246596066566e-05, "loss": 0.9066, "step": 2398 }, { "epoch": 1.81, "learning_rate": 3.866773827534039e-05, "loss": 0.8508, "step": 2399 }, { "epoch": 1.82, "learning_rate": 3.866301059001513e-05, "loss": 0.8759, "step": 2400 }, { "epoch": 1.82, "learning_rate": 3.865828290468986e-05, "loss": 0.8482, "step": 2401 }, { "epoch": 1.82, "learning_rate": 3.86535552193646e-05, "loss": 0.848, "step": 2402 }, { "epoch": 1.82, "learning_rate": 3.8648827534039334e-05, "loss": 0.8707, "step": 2403 }, { "epoch": 1.82, "learning_rate": 3.864409984871407e-05, "loss": 0.9242, "step": 2404 }, { "epoch": 1.82, "learning_rate": 3.8639372163388805e-05, "loss": 0.8377, "step": 2405 }, { "epoch": 1.82, "learning_rate": 3.8634644478063544e-05, "loss": 1.0221, "step": 2406 }, { "epoch": 1.82, "learning_rate": 3.862991679273828e-05, "loss": 0.9849, "step": 2407 }, { "epoch": 1.82, "learning_rate": 3.8625189107413014e-05, "loss": 0.7771, "step": 2408 }, { "epoch": 1.82, "learning_rate": 3.862046142208775e-05, "loss": 0.812, "step": 2409 }, { "epoch": 1.82, "learning_rate": 3.8615733736762485e-05, "loss": 0.951, "step": 2410 }, { "epoch": 1.82, "learning_rate": 3.861100605143722e-05, "loss": 0.8975, "step": 2411 }, { "epoch": 1.82, "learning_rate": 3.8606278366111956e-05, "loss": 0.8283, "step": 2412 }, { "epoch": 1.82, "learning_rate": 3.860155068078669e-05, "loss": 0.8697, "step": 2413 }, { "epoch": 1.83, "learning_rate": 3.859682299546142e-05, "loss": 0.8296, "step": 2414 }, { "epoch": 1.83, "learning_rate": 3.859209531013616e-05, "loss": 0.8851, "step": 2415 }, { "epoch": 1.83, "learning_rate": 3.858736762481089e-05, "loss": 0.8644, "step": 2416 }, { "epoch": 1.83, "learning_rate": 3.858263993948563e-05, "loss": 1.0253, "step": 2417 }, { "epoch": 1.83, "learning_rate": 3.857791225416036e-05, "loss": 1.0253, "step": 2418 }, { "epoch": 1.83, "learning_rate": 3.85731845688351e-05, "loss": 0.8448, "step": 2419 }, { "epoch": 1.83, "learning_rate": 3.856845688350983e-05, "loss": 0.9019, "step": 2420 }, { "epoch": 1.83, "learning_rate": 3.856372919818457e-05, "loss": 1.022, "step": 2421 }, { "epoch": 1.83, "learning_rate": 3.8559001512859304e-05, "loss": 0.8698, "step": 2422 }, { "epoch": 1.83, "learning_rate": 3.855427382753404e-05, "loss": 0.9729, "step": 2423 }, { "epoch": 1.83, "learning_rate": 3.8549546142208775e-05, "loss": 0.9221, "step": 2424 }, { "epoch": 1.83, "learning_rate": 3.8544818456883514e-05, "loss": 0.8646, "step": 2425 }, { "epoch": 1.83, "learning_rate": 3.8540090771558246e-05, "loss": 0.8858, "step": 2426 }, { "epoch": 1.84, "learning_rate": 3.8535363086232985e-05, "loss": 1.0001, "step": 2427 }, { "epoch": 1.84, "learning_rate": 3.853063540090772e-05, "loss": 0.8518, "step": 2428 }, { "epoch": 1.84, "learning_rate": 3.8525907715582455e-05, "loss": 0.8688, "step": 2429 }, { "epoch": 1.84, "learning_rate": 3.852118003025719e-05, "loss": 0.8571, "step": 2430 }, { "epoch": 1.84, "learning_rate": 3.8516452344931926e-05, "loss": 0.982, "step": 2431 }, { "epoch": 1.84, "learning_rate": 3.851172465960666e-05, "loss": 0.8598, "step": 2432 }, { "epoch": 1.84, "learning_rate": 3.85069969742814e-05, "loss": 0.8122, "step": 2433 }, { "epoch": 1.84, "learning_rate": 3.850226928895613e-05, "loss": 1.0525, "step": 2434 }, { "epoch": 1.84, "learning_rate": 3.849754160363087e-05, "loss": 0.8173, "step": 2435 }, { "epoch": 1.84, "learning_rate": 3.84928139183056e-05, "loss": 0.9339, "step": 2436 }, { "epoch": 1.84, "learning_rate": 3.848808623298034e-05, "loss": 0.8039, "step": 2437 }, { "epoch": 1.84, "learning_rate": 3.848335854765507e-05, "loss": 0.9341, "step": 2438 }, { "epoch": 1.84, "learning_rate": 3.84786308623298e-05, "loss": 0.9125, "step": 2439 }, { "epoch": 1.85, "learning_rate": 3.847390317700454e-05, "loss": 0.8651, "step": 2440 }, { "epoch": 1.85, "learning_rate": 3.8469175491679274e-05, "loss": 0.9425, "step": 2441 }, { "epoch": 1.85, "learning_rate": 3.8464447806354006e-05, "loss": 1.0273, "step": 2442 }, { "epoch": 1.85, "learning_rate": 3.8459720121028745e-05, "loss": 0.9794, "step": 2443 }, { "epoch": 1.85, "learning_rate": 3.845499243570348e-05, "loss": 0.809, "step": 2444 }, { "epoch": 1.85, "learning_rate": 3.8450264750378216e-05, "loss": 0.9284, "step": 2445 }, { "epoch": 1.85, "learning_rate": 3.844553706505295e-05, "loss": 0.7846, "step": 2446 }, { "epoch": 1.85, "learning_rate": 3.844080937972769e-05, "loss": 0.992, "step": 2447 }, { "epoch": 1.85, "learning_rate": 3.843608169440242e-05, "loss": 0.9588, "step": 2448 }, { "epoch": 1.85, "learning_rate": 3.843135400907716e-05, "loss": 1.0544, "step": 2449 }, { "epoch": 1.85, "learning_rate": 3.842662632375189e-05, "loss": 0.8294, "step": 2450 }, { "epoch": 1.85, "learning_rate": 3.842189863842663e-05, "loss": 0.8198, "step": 2451 }, { "epoch": 1.85, "learning_rate": 3.841717095310136e-05, "loss": 0.8269, "step": 2452 }, { "epoch": 1.86, "learning_rate": 3.84124432677761e-05, "loss": 0.8087, "step": 2453 }, { "epoch": 1.86, "learning_rate": 3.840771558245083e-05, "loss": 0.8568, "step": 2454 }, { "epoch": 1.86, "learning_rate": 3.840298789712557e-05, "loss": 0.8684, "step": 2455 }, { "epoch": 1.86, "learning_rate": 3.83982602118003e-05, "loss": 0.8012, "step": 2456 }, { "epoch": 1.86, "learning_rate": 3.839353252647504e-05, "loss": 0.9238, "step": 2457 }, { "epoch": 1.86, "learning_rate": 3.838880484114977e-05, "loss": 0.8651, "step": 2458 }, { "epoch": 1.86, "learning_rate": 3.838407715582451e-05, "loss": 0.8608, "step": 2459 }, { "epoch": 1.86, "learning_rate": 3.8379349470499244e-05, "loss": 0.9271, "step": 2460 }, { "epoch": 1.86, "learning_rate": 3.837462178517398e-05, "loss": 0.7744, "step": 2461 }, { "epoch": 1.86, "learning_rate": 3.8369894099848715e-05, "loss": 0.91, "step": 2462 }, { "epoch": 1.86, "learning_rate": 3.8365166414523454e-05, "loss": 0.9263, "step": 2463 }, { "epoch": 1.86, "learning_rate": 3.8360438729198186e-05, "loss": 0.9002, "step": 2464 }, { "epoch": 1.86, "learning_rate": 3.8355711043872925e-05, "loss": 0.9796, "step": 2465 }, { "epoch": 1.87, "learning_rate": 3.835098335854766e-05, "loss": 0.8395, "step": 2466 }, { "epoch": 1.87, "learning_rate": 3.834625567322239e-05, "loss": 0.8268, "step": 2467 }, { "epoch": 1.87, "learning_rate": 3.834152798789713e-05, "loss": 0.8788, "step": 2468 }, { "epoch": 1.87, "learning_rate": 3.833680030257186e-05, "loss": 0.7903, "step": 2469 }, { "epoch": 1.87, "learning_rate": 3.83320726172466e-05, "loss": 0.8489, "step": 2470 }, { "epoch": 1.87, "learning_rate": 3.832734493192133e-05, "loss": 1.023, "step": 2471 }, { "epoch": 1.87, "learning_rate": 3.832261724659607e-05, "loss": 0.8853, "step": 2472 }, { "epoch": 1.87, "learning_rate": 3.83178895612708e-05, "loss": 0.8803, "step": 2473 }, { "epoch": 1.87, "learning_rate": 3.831316187594554e-05, "loss": 0.9594, "step": 2474 }, { "epoch": 1.87, "learning_rate": 3.830843419062027e-05, "loss": 0.8684, "step": 2475 }, { "epoch": 1.87, "learning_rate": 3.830370650529501e-05, "loss": 0.8732, "step": 2476 }, { "epoch": 1.87, "learning_rate": 3.829897881996974e-05, "loss": 0.975, "step": 2477 }, { "epoch": 1.87, "learning_rate": 3.829425113464448e-05, "loss": 0.8149, "step": 2478 }, { "epoch": 1.87, "learning_rate": 3.8289523449319214e-05, "loss": 0.9037, "step": 2479 }, { "epoch": 1.88, "learning_rate": 3.828479576399395e-05, "loss": 0.8342, "step": 2480 }, { "epoch": 1.88, "learning_rate": 3.8280068078668685e-05, "loss": 0.9767, "step": 2481 }, { "epoch": 1.88, "learning_rate": 3.8275340393343424e-05, "loss": 0.8852, "step": 2482 }, { "epoch": 1.88, "learning_rate": 3.8270612708018156e-05, "loss": 0.8204, "step": 2483 }, { "epoch": 1.88, "learning_rate": 3.8265885022692895e-05, "loss": 0.9626, "step": 2484 }, { "epoch": 1.88, "learning_rate": 3.826115733736763e-05, "loss": 0.9323, "step": 2485 }, { "epoch": 1.88, "learning_rate": 3.8256429652042366e-05, "loss": 0.9184, "step": 2486 }, { "epoch": 1.88, "learning_rate": 3.82517019667171e-05, "loss": 0.8563, "step": 2487 }, { "epoch": 1.88, "learning_rate": 3.824697428139184e-05, "loss": 0.7796, "step": 2488 }, { "epoch": 1.88, "learning_rate": 3.824224659606657e-05, "loss": 0.8487, "step": 2489 }, { "epoch": 1.88, "learning_rate": 3.823751891074131e-05, "loss": 0.9392, "step": 2490 }, { "epoch": 1.88, "learning_rate": 3.823279122541604e-05, "loss": 0.8732, "step": 2491 }, { "epoch": 1.88, "learning_rate": 3.822806354009077e-05, "loss": 0.8334, "step": 2492 }, { "epoch": 1.89, "learning_rate": 3.822333585476551e-05, "loss": 0.9112, "step": 2493 }, { "epoch": 1.89, "learning_rate": 3.821860816944024e-05, "loss": 0.8823, "step": 2494 }, { "epoch": 1.89, "learning_rate": 3.8213880484114975e-05, "loss": 0.7964, "step": 2495 }, { "epoch": 1.89, "learning_rate": 3.8209152798789713e-05, "loss": 0.8245, "step": 2496 }, { "epoch": 1.89, "learning_rate": 3.8204425113464446e-05, "loss": 0.8324, "step": 2497 }, { "epoch": 1.89, "learning_rate": 3.8199697428139184e-05, "loss": 0.8999, "step": 2498 }, { "epoch": 1.89, "learning_rate": 3.8194969742813916e-05, "loss": 1.0321, "step": 2499 }, { "epoch": 1.89, "learning_rate": 3.8190242057488655e-05, "loss": 0.9642, "step": 2500 }, { "epoch": 1.89, "learning_rate": 3.818551437216339e-05, "loss": 1.0115, "step": 2501 }, { "epoch": 1.89, "learning_rate": 3.8180786686838126e-05, "loss": 0.9873, "step": 2502 }, { "epoch": 1.89, "learning_rate": 3.817605900151286e-05, "loss": 0.9753, "step": 2503 }, { "epoch": 1.89, "learning_rate": 3.81713313161876e-05, "loss": 0.9715, "step": 2504 }, { "epoch": 1.89, "learning_rate": 3.816660363086233e-05, "loss": 0.9719, "step": 2505 }, { "epoch": 1.9, "learning_rate": 3.816187594553707e-05, "loss": 0.8762, "step": 2506 }, { "epoch": 1.9, "learning_rate": 3.81571482602118e-05, "loss": 0.9852, "step": 2507 }, { "epoch": 1.9, "learning_rate": 3.815242057488654e-05, "loss": 0.8357, "step": 2508 }, { "epoch": 1.9, "learning_rate": 3.814769288956127e-05, "loss": 0.9927, "step": 2509 }, { "epoch": 1.9, "learning_rate": 3.814296520423601e-05, "loss": 0.887, "step": 2510 }, { "epoch": 1.9, "learning_rate": 3.813823751891074e-05, "loss": 0.9168, "step": 2511 }, { "epoch": 1.9, "learning_rate": 3.813350983358548e-05, "loss": 0.9584, "step": 2512 }, { "epoch": 1.9, "learning_rate": 3.812878214826021e-05, "loss": 0.9114, "step": 2513 }, { "epoch": 1.9, "learning_rate": 3.812405446293495e-05, "loss": 0.8167, "step": 2514 }, { "epoch": 1.9, "learning_rate": 3.8119326777609684e-05, "loss": 0.8853, "step": 2515 }, { "epoch": 1.9, "learning_rate": 3.811459909228442e-05, "loss": 0.9575, "step": 2516 }, { "epoch": 1.9, "learning_rate": 3.8109871406959154e-05, "loss": 0.9043, "step": 2517 }, { "epoch": 1.9, "learning_rate": 3.810514372163389e-05, "loss": 0.8134, "step": 2518 }, { "epoch": 1.91, "learning_rate": 3.8100416036308625e-05, "loss": 0.9244, "step": 2519 }, { "epoch": 1.91, "learning_rate": 3.809568835098336e-05, "loss": 0.9625, "step": 2520 }, { "epoch": 1.91, "learning_rate": 3.8090960665658096e-05, "loss": 0.7795, "step": 2521 }, { "epoch": 1.91, "learning_rate": 3.808623298033283e-05, "loss": 0.8382, "step": 2522 }, { "epoch": 1.91, "learning_rate": 3.808150529500756e-05, "loss": 0.8309, "step": 2523 }, { "epoch": 1.91, "learning_rate": 3.80767776096823e-05, "loss": 0.8953, "step": 2524 }, { "epoch": 1.91, "learning_rate": 3.807204992435703e-05, "loss": 0.6905, "step": 2525 }, { "epoch": 1.91, "learning_rate": 3.806732223903177e-05, "loss": 0.8702, "step": 2526 }, { "epoch": 1.91, "learning_rate": 3.806259455370651e-05, "loss": 0.914, "step": 2527 }, { "epoch": 1.91, "learning_rate": 3.805786686838124e-05, "loss": 0.8983, "step": 2528 }, { "epoch": 1.91, "learning_rate": 3.805313918305598e-05, "loss": 0.9944, "step": 2529 }, { "epoch": 1.91, "learning_rate": 3.804841149773071e-05, "loss": 0.8786, "step": 2530 }, { "epoch": 1.91, "learning_rate": 3.804368381240545e-05, "loss": 0.879, "step": 2531 }, { "epoch": 1.91, "learning_rate": 3.803895612708018e-05, "loss": 0.9395, "step": 2532 }, { "epoch": 1.92, "learning_rate": 3.803422844175492e-05, "loss": 0.9653, "step": 2533 }, { "epoch": 1.92, "learning_rate": 3.8029500756429654e-05, "loss": 0.8039, "step": 2534 }, { "epoch": 1.92, "learning_rate": 3.802477307110439e-05, "loss": 0.9099, "step": 2535 }, { "epoch": 1.92, "learning_rate": 3.8020045385779125e-05, "loss": 1.0041, "step": 2536 }, { "epoch": 1.92, "learning_rate": 3.8015317700453863e-05, "loss": 0.9962, "step": 2537 }, { "epoch": 1.92, "learning_rate": 3.8010590015128596e-05, "loss": 0.8482, "step": 2538 }, { "epoch": 1.92, "learning_rate": 3.8005862329803334e-05, "loss": 0.8659, "step": 2539 }, { "epoch": 1.92, "learning_rate": 3.8001134644478066e-05, "loss": 1.0008, "step": 2540 }, { "epoch": 1.92, "learning_rate": 3.7996406959152805e-05, "loss": 0.7769, "step": 2541 }, { "epoch": 1.92, "learning_rate": 3.799167927382754e-05, "loss": 0.9848, "step": 2542 }, { "epoch": 1.92, "learning_rate": 3.7986951588502276e-05, "loss": 0.884, "step": 2543 }, { "epoch": 1.92, "learning_rate": 3.798222390317701e-05, "loss": 0.8367, "step": 2544 }, { "epoch": 1.92, "learning_rate": 3.797749621785174e-05, "loss": 0.8446, "step": 2545 }, { "epoch": 1.93, "learning_rate": 3.797276853252648e-05, "loss": 0.8366, "step": 2546 }, { "epoch": 1.93, "learning_rate": 3.796804084720121e-05, "loss": 0.8998, "step": 2547 }, { "epoch": 1.93, "learning_rate": 3.796331316187594e-05, "loss": 0.8534, "step": 2548 }, { "epoch": 1.93, "learning_rate": 3.795858547655068e-05, "loss": 0.8518, "step": 2549 }, { "epoch": 1.93, "learning_rate": 3.7953857791225414e-05, "loss": 0.9408, "step": 2550 }, { "epoch": 1.93, "learning_rate": 3.794913010590015e-05, "loss": 0.8578, "step": 2551 }, { "epoch": 1.93, "learning_rate": 3.7944402420574885e-05, "loss": 0.9664, "step": 2552 }, { "epoch": 1.93, "learning_rate": 3.7939674735249624e-05, "loss": 0.9353, "step": 2553 }, { "epoch": 1.93, "learning_rate": 3.7934947049924356e-05, "loss": 0.7891, "step": 2554 }, { "epoch": 1.93, "learning_rate": 3.7930219364599095e-05, "loss": 1.0583, "step": 2555 }, { "epoch": 1.93, "learning_rate": 3.792549167927383e-05, "loss": 0.9855, "step": 2556 }, { "epoch": 1.93, "learning_rate": 3.7920763993948566e-05, "loss": 0.924, "step": 2557 }, { "epoch": 1.93, "learning_rate": 3.79160363086233e-05, "loss": 0.9281, "step": 2558 }, { "epoch": 1.94, "learning_rate": 3.7911308623298037e-05, "loss": 0.8857, "step": 2559 }, { "epoch": 1.94, "learning_rate": 3.790658093797277e-05, "loss": 0.8914, "step": 2560 }, { "epoch": 1.94, "learning_rate": 3.790185325264751e-05, "loss": 0.9533, "step": 2561 }, { "epoch": 1.94, "learning_rate": 3.789712556732224e-05, "loss": 0.8589, "step": 2562 }, { "epoch": 1.94, "learning_rate": 3.789239788199698e-05, "loss": 0.8606, "step": 2563 }, { "epoch": 1.94, "learning_rate": 3.788767019667171e-05, "loss": 0.8953, "step": 2564 }, { "epoch": 1.94, "learning_rate": 3.788294251134645e-05, "loss": 0.9327, "step": 2565 }, { "epoch": 1.94, "learning_rate": 3.787821482602118e-05, "loss": 0.9071, "step": 2566 }, { "epoch": 1.94, "learning_rate": 3.787348714069592e-05, "loss": 0.8489, "step": 2567 }, { "epoch": 1.94, "learning_rate": 3.786875945537065e-05, "loss": 0.9125, "step": 2568 }, { "epoch": 1.94, "learning_rate": 3.786403177004539e-05, "loss": 0.9674, "step": 2569 }, { "epoch": 1.94, "learning_rate": 3.785930408472012e-05, "loss": 0.8997, "step": 2570 }, { "epoch": 1.94, "learning_rate": 3.785457639939486e-05, "loss": 0.9253, "step": 2571 }, { "epoch": 1.95, "learning_rate": 3.7849848714069594e-05, "loss": 0.8791, "step": 2572 }, { "epoch": 1.95, "learning_rate": 3.7845121028744326e-05, "loss": 0.8969, "step": 2573 }, { "epoch": 1.95, "learning_rate": 3.7840393343419065e-05, "loss": 0.898, "step": 2574 }, { "epoch": 1.95, "learning_rate": 3.78356656580938e-05, "loss": 0.8551, "step": 2575 }, { "epoch": 1.95, "learning_rate": 3.783093797276853e-05, "loss": 0.8829, "step": 2576 }, { "epoch": 1.95, "learning_rate": 3.782621028744327e-05, "loss": 0.8835, "step": 2577 }, { "epoch": 1.95, "learning_rate": 3.7821482602118e-05, "loss": 0.8119, "step": 2578 }, { "epoch": 1.95, "learning_rate": 3.781675491679274e-05, "loss": 0.9587, "step": 2579 }, { "epoch": 1.95, "learning_rate": 3.781202723146747e-05, "loss": 0.969, "step": 2580 }, { "epoch": 1.95, "learning_rate": 3.780729954614221e-05, "loss": 0.8306, "step": 2581 }, { "epoch": 1.95, "learning_rate": 3.780257186081694e-05, "loss": 0.9675, "step": 2582 }, { "epoch": 1.95, "learning_rate": 3.779784417549168e-05, "loss": 0.9041, "step": 2583 }, { "epoch": 1.95, "learning_rate": 3.779311649016641e-05, "loss": 0.8593, "step": 2584 }, { "epoch": 1.96, "learning_rate": 3.778838880484115e-05, "loss": 0.9429, "step": 2585 }, { "epoch": 1.96, "learning_rate": 3.778366111951589e-05, "loss": 0.9883, "step": 2586 }, { "epoch": 1.96, "learning_rate": 3.777893343419062e-05, "loss": 0.8948, "step": 2587 }, { "epoch": 1.96, "learning_rate": 3.777420574886536e-05, "loss": 0.8622, "step": 2588 }, { "epoch": 1.96, "learning_rate": 3.776947806354009e-05, "loss": 0.8066, "step": 2589 }, { "epoch": 1.96, "learning_rate": 3.776475037821483e-05, "loss": 0.8887, "step": 2590 }, { "epoch": 1.96, "learning_rate": 3.7760022692889564e-05, "loss": 0.8134, "step": 2591 }, { "epoch": 1.96, "learning_rate": 3.77552950075643e-05, "loss": 0.8332, "step": 2592 }, { "epoch": 1.96, "learning_rate": 3.7750567322239035e-05, "loss": 0.8395, "step": 2593 }, { "epoch": 1.96, "learning_rate": 3.7745839636913774e-05, "loss": 0.873, "step": 2594 }, { "epoch": 1.96, "learning_rate": 3.7741111951588506e-05, "loss": 0.9161, "step": 2595 }, { "epoch": 1.96, "learning_rate": 3.7736384266263245e-05, "loss": 0.8624, "step": 2596 }, { "epoch": 1.96, "learning_rate": 3.773165658093798e-05, "loss": 0.8045, "step": 2597 }, { "epoch": 1.96, "learning_rate": 3.772692889561271e-05, "loss": 0.8501, "step": 2598 }, { "epoch": 1.97, "learning_rate": 3.772220121028745e-05, "loss": 0.9476, "step": 2599 }, { "epoch": 1.97, "learning_rate": 3.771747352496218e-05, "loss": 0.902, "step": 2600 }, { "epoch": 1.97, "learning_rate": 3.771274583963691e-05, "loss": 0.8581, "step": 2601 }, { "epoch": 1.97, "learning_rate": 3.770801815431165e-05, "loss": 0.9091, "step": 2602 }, { "epoch": 1.97, "learning_rate": 3.770329046898638e-05, "loss": 0.9239, "step": 2603 }, { "epoch": 1.97, "learning_rate": 3.769856278366112e-05, "loss": 0.9306, "step": 2604 }, { "epoch": 1.97, "learning_rate": 3.7693835098335854e-05, "loss": 0.9677, "step": 2605 }, { "epoch": 1.97, "learning_rate": 3.768910741301059e-05, "loss": 0.888, "step": 2606 }, { "epoch": 1.97, "learning_rate": 3.7684379727685324e-05, "loss": 0.8035, "step": 2607 }, { "epoch": 1.97, "learning_rate": 3.767965204236006e-05, "loss": 0.8222, "step": 2608 }, { "epoch": 1.97, "learning_rate": 3.7674924357034795e-05, "loss": 0.8738, "step": 2609 }, { "epoch": 1.97, "learning_rate": 3.7670196671709534e-05, "loss": 0.7585, "step": 2610 }, { "epoch": 1.97, "learning_rate": 3.7665468986384266e-05, "loss": 0.8723, "step": 2611 }, { "epoch": 1.98, "learning_rate": 3.7660741301059005e-05, "loss": 0.9532, "step": 2612 }, { "epoch": 1.98, "learning_rate": 3.765601361573374e-05, "loss": 0.9143, "step": 2613 }, { "epoch": 1.98, "learning_rate": 3.7651285930408476e-05, "loss": 0.94, "step": 2614 }, { "epoch": 1.98, "learning_rate": 3.764655824508321e-05, "loss": 0.8629, "step": 2615 }, { "epoch": 1.98, "learning_rate": 3.764183055975795e-05, "loss": 0.9791, "step": 2616 }, { "epoch": 1.98, "learning_rate": 3.763710287443268e-05, "loss": 0.8596, "step": 2617 }, { "epoch": 1.98, "learning_rate": 3.763237518910742e-05, "loss": 0.853, "step": 2618 }, { "epoch": 1.98, "learning_rate": 3.762764750378215e-05, "loss": 0.8764, "step": 2619 }, { "epoch": 1.98, "learning_rate": 3.762291981845689e-05, "loss": 0.9504, "step": 2620 }, { "epoch": 1.98, "learning_rate": 3.761819213313162e-05, "loss": 0.9352, "step": 2621 }, { "epoch": 1.98, "learning_rate": 3.761346444780636e-05, "loss": 0.9188, "step": 2622 }, { "epoch": 1.98, "learning_rate": 3.760873676248109e-05, "loss": 0.9154, "step": 2623 }, { "epoch": 1.98, "learning_rate": 3.760400907715583e-05, "loss": 0.8201, "step": 2624 }, { "epoch": 1.99, "learning_rate": 3.759928139183056e-05, "loss": 0.9663, "step": 2625 }, { "epoch": 1.99, "learning_rate": 3.7594553706505295e-05, "loss": 0.8462, "step": 2626 }, { "epoch": 1.99, "learning_rate": 3.758982602118003e-05, "loss": 0.8624, "step": 2627 }, { "epoch": 1.99, "learning_rate": 3.7585098335854765e-05, "loss": 1.0373, "step": 2628 }, { "epoch": 1.99, "learning_rate": 3.75803706505295e-05, "loss": 0.887, "step": 2629 }, { "epoch": 1.99, "learning_rate": 3.7575642965204236e-05, "loss": 0.9287, "step": 2630 }, { "epoch": 1.99, "learning_rate": 3.757091527987897e-05, "loss": 0.9733, "step": 2631 }, { "epoch": 1.99, "learning_rate": 3.756618759455371e-05, "loss": 0.8364, "step": 2632 }, { "epoch": 1.99, "learning_rate": 3.756145990922844e-05, "loss": 0.8541, "step": 2633 }, { "epoch": 1.99, "learning_rate": 3.755673222390318e-05, "loss": 0.8262, "step": 2634 }, { "epoch": 1.99, "learning_rate": 3.755200453857791e-05, "loss": 0.8964, "step": 2635 }, { "epoch": 1.99, "learning_rate": 3.754727685325265e-05, "loss": 0.7654, "step": 2636 }, { "epoch": 1.99, "learning_rate": 3.754254916792738e-05, "loss": 0.9505, "step": 2637 }, { "epoch": 2.0, "learning_rate": 3.753782148260212e-05, "loss": 1.006, "step": 2638 }, { "epoch": 2.0, "learning_rate": 3.753309379727685e-05, "loss": 0.8699, "step": 2639 }, { "epoch": 2.0, "learning_rate": 3.752836611195159e-05, "loss": 0.9399, "step": 2640 }, { "epoch": 2.0, "learning_rate": 3.752363842662632e-05, "loss": 0.8242, "step": 2641 }, { "epoch": 2.0, "learning_rate": 3.751891074130106e-05, "loss": 1.0122, "step": 2642 }, { "epoch": 2.0, "learning_rate": 3.7514183055975794e-05, "loss": 0.9879, "step": 2643 }, { "epoch": 2.0, "learning_rate": 3.750945537065053e-05, "loss": 0.8652, "step": 2644 }, { "epoch": 2.0, "learning_rate": 3.7504727685325265e-05, "loss": 0.6463, "step": 2645 }, { "epoch": 2.0, "learning_rate": 3.7500000000000003e-05, "loss": 0.2642, "step": 2646 }, { "epoch": 2.0, "learning_rate": 3.749527231467474e-05, "loss": 0.2791, "step": 2647 }, { "epoch": 2.0, "learning_rate": 3.7490544629349474e-05, "loss": 0.2656, "step": 2648 }, { "epoch": 2.0, "learning_rate": 3.748581694402421e-05, "loss": 0.2777, "step": 2649 }, { "epoch": 2.0, "learning_rate": 3.7481089258698945e-05, "loss": 0.2345, "step": 2650 }, { "epoch": 2.0, "learning_rate": 3.747636157337368e-05, "loss": 0.2687, "step": 2651 }, { "epoch": 2.01, "learning_rate": 3.7471633888048416e-05, "loss": 0.283, "step": 2652 }, { "epoch": 2.01, "learning_rate": 3.746690620272315e-05, "loss": 0.2413, "step": 2653 }, { "epoch": 2.01, "learning_rate": 3.746217851739788e-05, "loss": 0.2339, "step": 2654 }, { "epoch": 2.01, "learning_rate": 3.745745083207262e-05, "loss": 0.2605, "step": 2655 }, { "epoch": 2.01, "learning_rate": 3.745272314674735e-05, "loss": 0.2916, "step": 2656 }, { "epoch": 2.01, "learning_rate": 3.744799546142209e-05, "loss": 0.2188, "step": 2657 }, { "epoch": 2.01, "learning_rate": 3.744326777609682e-05, "loss": 0.2208, "step": 2658 }, { "epoch": 2.01, "learning_rate": 3.743854009077156e-05, "loss": 0.2583, "step": 2659 }, { "epoch": 2.01, "learning_rate": 3.743381240544629e-05, "loss": 0.3086, "step": 2660 }, { "epoch": 2.01, "learning_rate": 3.742908472012103e-05, "loss": 0.2537, "step": 2661 }, { "epoch": 2.01, "learning_rate": 3.7424357034795764e-05, "loss": 0.2863, "step": 2662 }, { "epoch": 2.01, "learning_rate": 3.74196293494705e-05, "loss": 0.2327, "step": 2663 }, { "epoch": 2.01, "learning_rate": 3.7414901664145235e-05, "loss": 0.2863, "step": 2664 }, { "epoch": 2.02, "learning_rate": 3.7410173978819974e-05, "loss": 0.2675, "step": 2665 }, { "epoch": 2.02, "learning_rate": 3.7405446293494706e-05, "loss": 0.2337, "step": 2666 }, { "epoch": 2.02, "learning_rate": 3.7400718608169444e-05, "loss": 0.2477, "step": 2667 }, { "epoch": 2.02, "learning_rate": 3.7395990922844177e-05, "loss": 0.2361, "step": 2668 }, { "epoch": 2.02, "learning_rate": 3.7391263237518915e-05, "loss": 0.2534, "step": 2669 }, { "epoch": 2.02, "learning_rate": 3.738653555219365e-05, "loss": 0.233, "step": 2670 }, { "epoch": 2.02, "learning_rate": 3.7381807866868386e-05, "loss": 0.1913, "step": 2671 }, { "epoch": 2.02, "learning_rate": 3.737708018154312e-05, "loss": 0.2104, "step": 2672 }, { "epoch": 2.02, "learning_rate": 3.737235249621786e-05, "loss": 0.2621, "step": 2673 }, { "epoch": 2.02, "learning_rate": 3.736762481089259e-05, "loss": 0.2314, "step": 2674 }, { "epoch": 2.02, "learning_rate": 3.736289712556733e-05, "loss": 0.2897, "step": 2675 }, { "epoch": 2.02, "learning_rate": 3.735816944024206e-05, "loss": 0.2124, "step": 2676 }, { "epoch": 2.02, "learning_rate": 3.73534417549168e-05, "loss": 0.2044, "step": 2677 }, { "epoch": 2.03, "learning_rate": 3.734871406959153e-05, "loss": 0.2211, "step": 2678 }, { "epoch": 2.03, "learning_rate": 3.734398638426626e-05, "loss": 0.2896, "step": 2679 }, { "epoch": 2.03, "learning_rate": 3.7339258698941e-05, "loss": 0.2022, "step": 2680 }, { "epoch": 2.03, "learning_rate": 3.7334531013615734e-05, "loss": 0.2613, "step": 2681 }, { "epoch": 2.03, "learning_rate": 3.7329803328290466e-05, "loss": 0.2896, "step": 2682 }, { "epoch": 2.03, "learning_rate": 3.7325075642965205e-05, "loss": 0.2243, "step": 2683 }, { "epoch": 2.03, "learning_rate": 3.732034795763994e-05, "loss": 0.2314, "step": 2684 }, { "epoch": 2.03, "learning_rate": 3.7315620272314676e-05, "loss": 0.2944, "step": 2685 }, { "epoch": 2.03, "learning_rate": 3.731089258698941e-05, "loss": 0.2411, "step": 2686 }, { "epoch": 2.03, "learning_rate": 3.730616490166415e-05, "loss": 0.2105, "step": 2687 }, { "epoch": 2.03, "learning_rate": 3.730143721633888e-05, "loss": 0.2415, "step": 2688 }, { "epoch": 2.03, "learning_rate": 3.729670953101362e-05, "loss": 0.2254, "step": 2689 }, { "epoch": 2.03, "learning_rate": 3.729198184568835e-05, "loss": 0.2075, "step": 2690 }, { "epoch": 2.04, "learning_rate": 3.728725416036309e-05, "loss": 0.2243, "step": 2691 }, { "epoch": 2.04, "learning_rate": 3.728252647503782e-05, "loss": 0.2349, "step": 2692 }, { "epoch": 2.04, "learning_rate": 3.727779878971256e-05, "loss": 0.2158, "step": 2693 }, { "epoch": 2.04, "learning_rate": 3.727307110438729e-05, "loss": 0.2504, "step": 2694 }, { "epoch": 2.04, "learning_rate": 3.726834341906203e-05, "loss": 0.2041, "step": 2695 }, { "epoch": 2.04, "learning_rate": 3.726361573373676e-05, "loss": 0.2722, "step": 2696 }, { "epoch": 2.04, "learning_rate": 3.72588880484115e-05, "loss": 0.2318, "step": 2697 }, { "epoch": 2.04, "learning_rate": 3.725416036308623e-05, "loss": 0.2924, "step": 2698 }, { "epoch": 2.04, "learning_rate": 3.724943267776097e-05, "loss": 0.2528, "step": 2699 }, { "epoch": 2.04, "learning_rate": 3.7244704992435704e-05, "loss": 0.2876, "step": 2700 }, { "epoch": 2.04, "learning_rate": 3.723997730711044e-05, "loss": 0.2407, "step": 2701 }, { "epoch": 2.04, "learning_rate": 3.7235249621785175e-05, "loss": 0.2678, "step": 2702 }, { "epoch": 2.04, "learning_rate": 3.7230521936459914e-05, "loss": 0.2925, "step": 2703 }, { "epoch": 2.04, "learning_rate": 3.7225794251134646e-05, "loss": 0.2488, "step": 2704 }, { "epoch": 2.05, "learning_rate": 3.7221066565809385e-05, "loss": 0.2844, "step": 2705 }, { "epoch": 2.05, "learning_rate": 3.721633888048412e-05, "loss": 0.2642, "step": 2706 }, { "epoch": 2.05, "learning_rate": 3.721161119515885e-05, "loss": 0.2678, "step": 2707 }, { "epoch": 2.05, "learning_rate": 3.720688350983359e-05, "loss": 0.2248, "step": 2708 }, { "epoch": 2.05, "learning_rate": 3.720215582450832e-05, "loss": 0.2552, "step": 2709 }, { "epoch": 2.05, "learning_rate": 3.719742813918306e-05, "loss": 0.2574, "step": 2710 }, { "epoch": 2.05, "learning_rate": 3.719270045385779e-05, "loss": 0.2593, "step": 2711 }, { "epoch": 2.05, "learning_rate": 3.718797276853253e-05, "loss": 0.3019, "step": 2712 }, { "epoch": 2.05, "learning_rate": 3.718324508320726e-05, "loss": 0.2727, "step": 2713 }, { "epoch": 2.05, "learning_rate": 3.7178517397882e-05, "loss": 0.2565, "step": 2714 }, { "epoch": 2.05, "learning_rate": 3.717378971255673e-05, "loss": 0.2768, "step": 2715 }, { "epoch": 2.05, "learning_rate": 3.716906202723147e-05, "loss": 0.2561, "step": 2716 }, { "epoch": 2.05, "learning_rate": 3.71643343419062e-05, "loss": 0.1999, "step": 2717 }, { "epoch": 2.06, "learning_rate": 3.715960665658094e-05, "loss": 0.1942, "step": 2718 }, { "epoch": 2.06, "learning_rate": 3.7154878971255674e-05, "loss": 0.2613, "step": 2719 }, { "epoch": 2.06, "learning_rate": 3.715015128593041e-05, "loss": 0.2854, "step": 2720 }, { "epoch": 2.06, "learning_rate": 3.7145423600605145e-05, "loss": 0.2517, "step": 2721 }, { "epoch": 2.06, "learning_rate": 3.7140695915279884e-05, "loss": 0.2872, "step": 2722 }, { "epoch": 2.06, "learning_rate": 3.7135968229954616e-05, "loss": 0.2546, "step": 2723 }, { "epoch": 2.06, "learning_rate": 3.7131240544629355e-05, "loss": 0.2101, "step": 2724 }, { "epoch": 2.06, "learning_rate": 3.712651285930409e-05, "loss": 0.2735, "step": 2725 }, { "epoch": 2.06, "learning_rate": 3.7121785173978826e-05, "loss": 0.2575, "step": 2726 }, { "epoch": 2.06, "learning_rate": 3.711705748865356e-05, "loss": 0.2323, "step": 2727 }, { "epoch": 2.06, "learning_rate": 3.71123298033283e-05, "loss": 0.3217, "step": 2728 }, { "epoch": 2.06, "learning_rate": 3.710760211800303e-05, "loss": 0.28, "step": 2729 }, { "epoch": 2.06, "learning_rate": 3.710287443267777e-05, "loss": 0.256, "step": 2730 }, { "epoch": 2.07, "learning_rate": 3.70981467473525e-05, "loss": 0.2208, "step": 2731 }, { "epoch": 2.07, "learning_rate": 3.709341906202723e-05, "loss": 0.2698, "step": 2732 }, { "epoch": 2.07, "learning_rate": 3.708869137670197e-05, "loss": 0.244, "step": 2733 }, { "epoch": 2.07, "learning_rate": 3.70839636913767e-05, "loss": 0.2184, "step": 2734 }, { "epoch": 2.07, "learning_rate": 3.7079236006051435e-05, "loss": 0.2843, "step": 2735 }, { "epoch": 2.07, "learning_rate": 3.7074508320726173e-05, "loss": 0.2907, "step": 2736 }, { "epoch": 2.07, "learning_rate": 3.7069780635400905e-05, "loss": 0.2353, "step": 2737 }, { "epoch": 2.07, "learning_rate": 3.7065052950075644e-05, "loss": 0.2434, "step": 2738 }, { "epoch": 2.07, "learning_rate": 3.7060325264750376e-05, "loss": 0.2586, "step": 2739 }, { "epoch": 2.07, "learning_rate": 3.7055597579425115e-05, "loss": 0.2578, "step": 2740 }, { "epoch": 2.07, "learning_rate": 3.705086989409985e-05, "loss": 0.2875, "step": 2741 }, { "epoch": 2.07, "learning_rate": 3.7046142208774586e-05, "loss": 0.2484, "step": 2742 }, { "epoch": 2.07, "learning_rate": 3.704141452344932e-05, "loss": 0.2456, "step": 2743 }, { "epoch": 2.08, "learning_rate": 3.703668683812406e-05, "loss": 0.2544, "step": 2744 }, { "epoch": 2.08, "learning_rate": 3.703195915279879e-05, "loss": 0.2664, "step": 2745 }, { "epoch": 2.08, "learning_rate": 3.702723146747353e-05, "loss": 0.2514, "step": 2746 }, { "epoch": 2.08, "learning_rate": 3.702250378214826e-05, "loss": 0.2577, "step": 2747 }, { "epoch": 2.08, "learning_rate": 3.7017776096823e-05, "loss": 0.2732, "step": 2748 }, { "epoch": 2.08, "learning_rate": 3.701304841149773e-05, "loss": 0.283, "step": 2749 }, { "epoch": 2.08, "learning_rate": 3.700832072617247e-05, "loss": 0.2741, "step": 2750 }, { "epoch": 2.08, "learning_rate": 3.70035930408472e-05, "loss": 0.2542, "step": 2751 }, { "epoch": 2.08, "learning_rate": 3.699886535552194e-05, "loss": 0.275, "step": 2752 }, { "epoch": 2.08, "learning_rate": 3.699413767019667e-05, "loss": 0.2492, "step": 2753 }, { "epoch": 2.08, "learning_rate": 3.698940998487141e-05, "loss": 0.2255, "step": 2754 }, { "epoch": 2.08, "learning_rate": 3.6984682299546144e-05, "loss": 0.2332, "step": 2755 }, { "epoch": 2.08, "learning_rate": 3.697995461422088e-05, "loss": 0.3261, "step": 2756 }, { "epoch": 2.09, "learning_rate": 3.6975226928895614e-05, "loss": 0.2309, "step": 2757 }, { "epoch": 2.09, "learning_rate": 3.697049924357035e-05, "loss": 0.2845, "step": 2758 }, { "epoch": 2.09, "learning_rate": 3.6965771558245085e-05, "loss": 0.2816, "step": 2759 }, { "epoch": 2.09, "learning_rate": 3.696104387291982e-05, "loss": 0.2781, "step": 2760 }, { "epoch": 2.09, "learning_rate": 3.6956316187594556e-05, "loss": 0.2943, "step": 2761 }, { "epoch": 2.09, "learning_rate": 3.695158850226929e-05, "loss": 0.2698, "step": 2762 }, { "epoch": 2.09, "learning_rate": 3.694686081694402e-05, "loss": 0.271, "step": 2763 }, { "epoch": 2.09, "learning_rate": 3.694213313161876e-05, "loss": 0.302, "step": 2764 }, { "epoch": 2.09, "learning_rate": 3.693740544629349e-05, "loss": 0.3105, "step": 2765 }, { "epoch": 2.09, "learning_rate": 3.693267776096823e-05, "loss": 0.2694, "step": 2766 }, { "epoch": 2.09, "learning_rate": 3.692795007564297e-05, "loss": 0.2445, "step": 2767 }, { "epoch": 2.09, "learning_rate": 3.69232223903177e-05, "loss": 0.2371, "step": 2768 }, { "epoch": 2.09, "learning_rate": 3.691849470499244e-05, "loss": 0.2385, "step": 2769 }, { "epoch": 2.09, "learning_rate": 3.691376701966717e-05, "loss": 0.3081, "step": 2770 }, { "epoch": 2.1, "learning_rate": 3.690903933434191e-05, "loss": 0.245, "step": 2771 }, { "epoch": 2.1, "learning_rate": 3.690431164901664e-05, "loss": 0.2843, "step": 2772 }, { "epoch": 2.1, "learning_rate": 3.689958396369138e-05, "loss": 0.2426, "step": 2773 }, { "epoch": 2.1, "learning_rate": 3.6894856278366114e-05, "loss": 0.2427, "step": 2774 }, { "epoch": 2.1, "learning_rate": 3.689012859304085e-05, "loss": 0.2574, "step": 2775 }, { "epoch": 2.1, "learning_rate": 3.6885400907715585e-05, "loss": 0.2348, "step": 2776 }, { "epoch": 2.1, "learning_rate": 3.688067322239032e-05, "loss": 0.2487, "step": 2777 }, { "epoch": 2.1, "learning_rate": 3.6875945537065055e-05, "loss": 0.2651, "step": 2778 }, { "epoch": 2.1, "learning_rate": 3.6871217851739794e-05, "loss": 0.261, "step": 2779 }, { "epoch": 2.1, "learning_rate": 3.6866490166414526e-05, "loss": 0.2659, "step": 2780 }, { "epoch": 2.1, "learning_rate": 3.6861762481089265e-05, "loss": 0.2524, "step": 2781 }, { "epoch": 2.1, "learning_rate": 3.6857034795764e-05, "loss": 0.2175, "step": 2782 }, { "epoch": 2.1, "learning_rate": 3.6852307110438736e-05, "loss": 0.2337, "step": 2783 }, { "epoch": 2.11, "learning_rate": 3.684757942511347e-05, "loss": 0.2429, "step": 2784 }, { "epoch": 2.11, "learning_rate": 3.68428517397882e-05, "loss": 0.2785, "step": 2785 }, { "epoch": 2.11, "learning_rate": 3.683812405446294e-05, "loss": 0.239, "step": 2786 }, { "epoch": 2.11, "learning_rate": 3.683339636913767e-05, "loss": 0.2044, "step": 2787 }, { "epoch": 2.11, "learning_rate": 3.68286686838124e-05, "loss": 0.2619, "step": 2788 }, { "epoch": 2.11, "learning_rate": 3.682394099848714e-05, "loss": 0.2904, "step": 2789 }, { "epoch": 2.11, "learning_rate": 3.6819213313161874e-05, "loss": 0.2389, "step": 2790 }, { "epoch": 2.11, "learning_rate": 3.681448562783661e-05, "loss": 0.2592, "step": 2791 }, { "epoch": 2.11, "learning_rate": 3.6809757942511345e-05, "loss": 0.239, "step": 2792 }, { "epoch": 2.11, "learning_rate": 3.6805030257186084e-05, "loss": 0.2952, "step": 2793 }, { "epoch": 2.11, "learning_rate": 3.6800302571860816e-05, "loss": 0.2828, "step": 2794 }, { "epoch": 2.11, "learning_rate": 3.6795574886535555e-05, "loss": 0.2629, "step": 2795 }, { "epoch": 2.11, "learning_rate": 3.679084720121029e-05, "loss": 0.1872, "step": 2796 }, { "epoch": 2.12, "learning_rate": 3.6786119515885026e-05, "loss": 0.2885, "step": 2797 }, { "epoch": 2.12, "learning_rate": 3.678139183055976e-05, "loss": 0.2457, "step": 2798 }, { "epoch": 2.12, "learning_rate": 3.6776664145234496e-05, "loss": 0.2956, "step": 2799 }, { "epoch": 2.12, "learning_rate": 3.677193645990923e-05, "loss": 0.312, "step": 2800 }, { "epoch": 2.12, "learning_rate": 3.676720877458397e-05, "loss": 0.2742, "step": 2801 }, { "epoch": 2.12, "learning_rate": 3.67624810892587e-05, "loss": 0.2343, "step": 2802 }, { "epoch": 2.12, "learning_rate": 3.675775340393344e-05, "loss": 0.2495, "step": 2803 }, { "epoch": 2.12, "learning_rate": 3.675302571860817e-05, "loss": 0.257, "step": 2804 }, { "epoch": 2.12, "learning_rate": 3.674829803328291e-05, "loss": 0.2251, "step": 2805 }, { "epoch": 2.12, "learning_rate": 3.674357034795764e-05, "loss": 0.2134, "step": 2806 }, { "epoch": 2.12, "learning_rate": 3.673884266263238e-05, "loss": 0.2624, "step": 2807 }, { "epoch": 2.12, "learning_rate": 3.673411497730711e-05, "loss": 0.2779, "step": 2808 }, { "epoch": 2.12, "learning_rate": 3.672938729198185e-05, "loss": 0.2446, "step": 2809 }, { "epoch": 2.13, "learning_rate": 3.672465960665658e-05, "loss": 0.2504, "step": 2810 }, { "epoch": 2.13, "learning_rate": 3.671993192133132e-05, "loss": 0.2592, "step": 2811 }, { "epoch": 2.13, "learning_rate": 3.6715204236006054e-05, "loss": 0.2447, "step": 2812 }, { "epoch": 2.13, "learning_rate": 3.6710476550680786e-05, "loss": 0.2626, "step": 2813 }, { "epoch": 2.13, "learning_rate": 3.6705748865355525e-05, "loss": 0.279, "step": 2814 }, { "epoch": 2.13, "learning_rate": 3.670102118003026e-05, "loss": 0.2851, "step": 2815 }, { "epoch": 2.13, "learning_rate": 3.669629349470499e-05, "loss": 0.2726, "step": 2816 }, { "epoch": 2.13, "learning_rate": 3.669156580937973e-05, "loss": 0.2581, "step": 2817 }, { "epoch": 2.13, "learning_rate": 3.668683812405446e-05, "loss": 0.2982, "step": 2818 }, { "epoch": 2.13, "learning_rate": 3.66821104387292e-05, "loss": 0.3168, "step": 2819 }, { "epoch": 2.13, "learning_rate": 3.667738275340393e-05, "loss": 0.2944, "step": 2820 }, { "epoch": 2.13, "learning_rate": 3.667265506807867e-05, "loss": 0.2579, "step": 2821 }, { "epoch": 2.13, "learning_rate": 3.66679273827534e-05, "loss": 0.2299, "step": 2822 }, { "epoch": 2.13, "learning_rate": 3.666319969742814e-05, "loss": 0.2388, "step": 2823 }, { "epoch": 2.14, "learning_rate": 3.665847201210287e-05, "loss": 0.2128, "step": 2824 }, { "epoch": 2.14, "learning_rate": 3.665374432677761e-05, "loss": 0.3137, "step": 2825 }, { "epoch": 2.14, "learning_rate": 3.664901664145234e-05, "loss": 0.2794, "step": 2826 }, { "epoch": 2.14, "learning_rate": 3.664428895612708e-05, "loss": 0.2448, "step": 2827 }, { "epoch": 2.14, "learning_rate": 3.663956127080182e-05, "loss": 0.2626, "step": 2828 }, { "epoch": 2.14, "learning_rate": 3.663483358547655e-05, "loss": 0.2139, "step": 2829 }, { "epoch": 2.14, "learning_rate": 3.663010590015129e-05, "loss": 0.2377, "step": 2830 }, { "epoch": 2.14, "learning_rate": 3.6625378214826024e-05, "loss": 0.2433, "step": 2831 }, { "epoch": 2.14, "learning_rate": 3.662065052950076e-05, "loss": 0.2593, "step": 2832 }, { "epoch": 2.14, "learning_rate": 3.6615922844175495e-05, "loss": 0.3356, "step": 2833 }, { "epoch": 2.14, "learning_rate": 3.6611195158850234e-05, "loss": 0.264, "step": 2834 }, { "epoch": 2.14, "learning_rate": 3.6606467473524966e-05, "loss": 0.2756, "step": 2835 }, { "epoch": 2.14, "learning_rate": 3.6601739788199705e-05, "loss": 0.2502, "step": 2836 }, { "epoch": 2.15, "learning_rate": 3.659701210287444e-05, "loss": 0.2292, "step": 2837 }, { "epoch": 2.15, "learning_rate": 3.659228441754917e-05, "loss": 0.2893, "step": 2838 }, { "epoch": 2.15, "learning_rate": 3.658755673222391e-05, "loss": 0.3022, "step": 2839 }, { "epoch": 2.15, "learning_rate": 3.658282904689864e-05, "loss": 0.2701, "step": 2840 }, { "epoch": 2.15, "learning_rate": 3.657810136157337e-05, "loss": 0.2187, "step": 2841 }, { "epoch": 2.15, "learning_rate": 3.657337367624811e-05, "loss": 0.2614, "step": 2842 }, { "epoch": 2.15, "learning_rate": 3.656864599092284e-05, "loss": 0.2533, "step": 2843 }, { "epoch": 2.15, "learning_rate": 3.656391830559758e-05, "loss": 0.2332, "step": 2844 }, { "epoch": 2.15, "learning_rate": 3.6559190620272313e-05, "loss": 0.2459, "step": 2845 }, { "epoch": 2.15, "learning_rate": 3.655446293494705e-05, "loss": 0.2852, "step": 2846 }, { "epoch": 2.15, "learning_rate": 3.6549735249621784e-05, "loss": 0.3307, "step": 2847 }, { "epoch": 2.15, "learning_rate": 3.654500756429652e-05, "loss": 0.2791, "step": 2848 }, { "epoch": 2.15, "learning_rate": 3.6540279878971255e-05, "loss": 0.2654, "step": 2849 }, { "epoch": 2.16, "learning_rate": 3.6535552193645994e-05, "loss": 0.229, "step": 2850 }, { "epoch": 2.16, "learning_rate": 3.6530824508320726e-05, "loss": 0.2797, "step": 2851 }, { "epoch": 2.16, "learning_rate": 3.6526096822995465e-05, "loss": 0.2379, "step": 2852 }, { "epoch": 2.16, "learning_rate": 3.65213691376702e-05, "loss": 0.2707, "step": 2853 }, { "epoch": 2.16, "learning_rate": 3.6516641452344936e-05, "loss": 0.2518, "step": 2854 }, { "epoch": 2.16, "learning_rate": 3.651191376701967e-05, "loss": 0.2569, "step": 2855 }, { "epoch": 2.16, "learning_rate": 3.650718608169441e-05, "loss": 0.2755, "step": 2856 }, { "epoch": 2.16, "learning_rate": 3.650245839636914e-05, "loss": 0.2432, "step": 2857 }, { "epoch": 2.16, "learning_rate": 3.649773071104388e-05, "loss": 0.2544, "step": 2858 }, { "epoch": 2.16, "learning_rate": 3.649300302571861e-05, "loss": 0.2838, "step": 2859 }, { "epoch": 2.16, "learning_rate": 3.648827534039335e-05, "loss": 0.2909, "step": 2860 }, { "epoch": 2.16, "learning_rate": 3.648354765506808e-05, "loss": 0.2449, "step": 2861 }, { "epoch": 2.16, "learning_rate": 3.647881996974282e-05, "loss": 0.2407, "step": 2862 }, { "epoch": 2.17, "learning_rate": 3.647409228441755e-05, "loss": 0.279, "step": 2863 }, { "epoch": 2.17, "learning_rate": 3.646936459909229e-05, "loss": 0.2757, "step": 2864 }, { "epoch": 2.17, "learning_rate": 3.646463691376702e-05, "loss": 0.2465, "step": 2865 }, { "epoch": 2.17, "learning_rate": 3.6459909228441754e-05, "loss": 0.2472, "step": 2866 }, { "epoch": 2.17, "learning_rate": 3.645518154311649e-05, "loss": 0.2336, "step": 2867 }, { "epoch": 2.17, "learning_rate": 3.6450453857791225e-05, "loss": 0.2852, "step": 2868 }, { "epoch": 2.17, "learning_rate": 3.644572617246596e-05, "loss": 0.2621, "step": 2869 }, { "epoch": 2.17, "learning_rate": 3.6440998487140696e-05, "loss": 0.3297, "step": 2870 }, { "epoch": 2.17, "learning_rate": 3.643627080181543e-05, "loss": 0.3096, "step": 2871 }, { "epoch": 2.17, "learning_rate": 3.643154311649017e-05, "loss": 0.2645, "step": 2872 }, { "epoch": 2.17, "learning_rate": 3.64268154311649e-05, "loss": 0.2349, "step": 2873 }, { "epoch": 2.17, "learning_rate": 3.642208774583964e-05, "loss": 0.2793, "step": 2874 }, { "epoch": 2.17, "learning_rate": 3.641736006051437e-05, "loss": 0.2485, "step": 2875 }, { "epoch": 2.18, "learning_rate": 3.641263237518911e-05, "loss": 0.2494, "step": 2876 }, { "epoch": 2.18, "learning_rate": 3.640790468986384e-05, "loss": 0.269, "step": 2877 }, { "epoch": 2.18, "learning_rate": 3.640317700453858e-05, "loss": 0.2717, "step": 2878 }, { "epoch": 2.18, "learning_rate": 3.639844931921331e-05, "loss": 0.2658, "step": 2879 }, { "epoch": 2.18, "learning_rate": 3.639372163388805e-05, "loss": 0.2768, "step": 2880 }, { "epoch": 2.18, "learning_rate": 3.638899394856278e-05, "loss": 0.2559, "step": 2881 }, { "epoch": 2.18, "learning_rate": 3.638426626323752e-05, "loss": 0.2812, "step": 2882 }, { "epoch": 2.18, "learning_rate": 3.6379538577912254e-05, "loss": 0.2582, "step": 2883 }, { "epoch": 2.18, "learning_rate": 3.637481089258699e-05, "loss": 0.3224, "step": 2884 }, { "epoch": 2.18, "learning_rate": 3.6370083207261725e-05, "loss": 0.236, "step": 2885 }, { "epoch": 2.18, "learning_rate": 3.6365355521936463e-05, "loss": 0.2514, "step": 2886 }, { "epoch": 2.18, "learning_rate": 3.63606278366112e-05, "loss": 0.2564, "step": 2887 }, { "epoch": 2.18, "learning_rate": 3.6355900151285934e-05, "loss": 0.2945, "step": 2888 }, { "epoch": 2.18, "learning_rate": 3.635117246596067e-05, "loss": 0.2447, "step": 2889 }, { "epoch": 2.19, "learning_rate": 3.6346444780635405e-05, "loss": 0.2953, "step": 2890 }, { "epoch": 2.19, "learning_rate": 3.634171709531014e-05, "loss": 0.2647, "step": 2891 }, { "epoch": 2.19, "learning_rate": 3.6336989409984876e-05, "loss": 0.2629, "step": 2892 }, { "epoch": 2.19, "learning_rate": 3.633226172465961e-05, "loss": 0.2632, "step": 2893 }, { "epoch": 2.19, "learning_rate": 3.632753403933434e-05, "loss": 0.2734, "step": 2894 }, { "epoch": 2.19, "learning_rate": 3.632280635400908e-05, "loss": 0.304, "step": 2895 }, { "epoch": 2.19, "learning_rate": 3.631807866868381e-05, "loss": 0.2398, "step": 2896 }, { "epoch": 2.19, "learning_rate": 3.631335098335855e-05, "loss": 0.2429, "step": 2897 }, { "epoch": 2.19, "learning_rate": 3.630862329803328e-05, "loss": 0.2249, "step": 2898 }, { "epoch": 2.19, "learning_rate": 3.630389561270802e-05, "loss": 0.292, "step": 2899 }, { "epoch": 2.19, "learning_rate": 3.629916792738275e-05, "loss": 0.307, "step": 2900 }, { "epoch": 2.19, "learning_rate": 3.629444024205749e-05, "loss": 0.2732, "step": 2901 }, { "epoch": 2.19, "learning_rate": 3.6289712556732224e-05, "loss": 0.2733, "step": 2902 }, { "epoch": 2.2, "learning_rate": 3.628498487140696e-05, "loss": 0.2887, "step": 2903 }, { "epoch": 2.2, "learning_rate": 3.6280257186081695e-05, "loss": 0.3089, "step": 2904 }, { "epoch": 2.2, "learning_rate": 3.6275529500756434e-05, "loss": 0.2429, "step": 2905 }, { "epoch": 2.2, "learning_rate": 3.6270801815431166e-05, "loss": 0.27, "step": 2906 }, { "epoch": 2.2, "learning_rate": 3.6266074130105904e-05, "loss": 0.3267, "step": 2907 }, { "epoch": 2.2, "learning_rate": 3.6261346444780636e-05, "loss": 0.2715, "step": 2908 }, { "epoch": 2.2, "learning_rate": 3.6256618759455375e-05, "loss": 0.2749, "step": 2909 }, { "epoch": 2.2, "learning_rate": 3.625189107413011e-05, "loss": 0.2495, "step": 2910 }, { "epoch": 2.2, "learning_rate": 3.6247163388804846e-05, "loss": 0.2498, "step": 2911 }, { "epoch": 2.2, "learning_rate": 3.624243570347958e-05, "loss": 0.2655, "step": 2912 }, { "epoch": 2.2, "learning_rate": 3.623770801815432e-05, "loss": 0.3053, "step": 2913 }, { "epoch": 2.2, "learning_rate": 3.623298033282905e-05, "loss": 0.2555, "step": 2914 }, { "epoch": 2.2, "learning_rate": 3.622825264750379e-05, "loss": 0.278, "step": 2915 }, { "epoch": 2.21, "learning_rate": 3.622352496217852e-05, "loss": 0.3009, "step": 2916 }, { "epoch": 2.21, "learning_rate": 3.621879727685326e-05, "loss": 0.2592, "step": 2917 }, { "epoch": 2.21, "learning_rate": 3.621406959152799e-05, "loss": 0.2373, "step": 2918 }, { "epoch": 2.21, "learning_rate": 3.620934190620272e-05, "loss": 0.2418, "step": 2919 }, { "epoch": 2.21, "learning_rate": 3.620461422087746e-05, "loss": 0.2941, "step": 2920 }, { "epoch": 2.21, "learning_rate": 3.6199886535552194e-05, "loss": 0.236, "step": 2921 }, { "epoch": 2.21, "learning_rate": 3.6195158850226926e-05, "loss": 0.2764, "step": 2922 }, { "epoch": 2.21, "learning_rate": 3.6190431164901665e-05, "loss": 0.2967, "step": 2923 }, { "epoch": 2.21, "learning_rate": 3.61857034795764e-05, "loss": 0.2175, "step": 2924 }, { "epoch": 2.21, "learning_rate": 3.6180975794251136e-05, "loss": 0.2735, "step": 2925 }, { "epoch": 2.21, "learning_rate": 3.617624810892587e-05, "loss": 0.3096, "step": 2926 }, { "epoch": 2.21, "learning_rate": 3.6171520423600607e-05, "loss": 0.2541, "step": 2927 }, { "epoch": 2.21, "learning_rate": 3.616679273827534e-05, "loss": 0.2466, "step": 2928 }, { "epoch": 2.22, "learning_rate": 3.616206505295008e-05, "loss": 0.2807, "step": 2929 }, { "epoch": 2.22, "learning_rate": 3.615733736762481e-05, "loss": 0.2752, "step": 2930 }, { "epoch": 2.22, "learning_rate": 3.615260968229955e-05, "loss": 0.2675, "step": 2931 }, { "epoch": 2.22, "learning_rate": 3.614788199697428e-05, "loss": 0.2945, "step": 2932 }, { "epoch": 2.22, "learning_rate": 3.614315431164902e-05, "loss": 0.2714, "step": 2933 }, { "epoch": 2.22, "learning_rate": 3.613842662632375e-05, "loss": 0.3129, "step": 2934 }, { "epoch": 2.22, "learning_rate": 3.613369894099849e-05, "loss": 0.2161, "step": 2935 }, { "epoch": 2.22, "learning_rate": 3.612897125567322e-05, "loss": 0.2718, "step": 2936 }, { "epoch": 2.22, "learning_rate": 3.612424357034796e-05, "loss": 0.2491, "step": 2937 }, { "epoch": 2.22, "learning_rate": 3.611951588502269e-05, "loss": 0.2761, "step": 2938 }, { "epoch": 2.22, "learning_rate": 3.611478819969743e-05, "loss": 0.3039, "step": 2939 }, { "epoch": 2.22, "learning_rate": 3.6110060514372164e-05, "loss": 0.3056, "step": 2940 }, { "epoch": 2.22, "learning_rate": 3.61053328290469e-05, "loss": 0.2628, "step": 2941 }, { "epoch": 2.22, "learning_rate": 3.6100605143721635e-05, "loss": 0.2657, "step": 2942 }, { "epoch": 2.23, "learning_rate": 3.6095877458396374e-05, "loss": 0.2787, "step": 2943 }, { "epoch": 2.23, "learning_rate": 3.6091149773071106e-05, "loss": 0.2741, "step": 2944 }, { "epoch": 2.23, "learning_rate": 3.6086422087745845e-05, "loss": 0.2263, "step": 2945 }, { "epoch": 2.23, "learning_rate": 3.608169440242058e-05, "loss": 0.2229, "step": 2946 }, { "epoch": 2.23, "learning_rate": 3.607696671709531e-05, "loss": 0.3325, "step": 2947 }, { "epoch": 2.23, "learning_rate": 3.607223903177005e-05, "loss": 0.2832, "step": 2948 }, { "epoch": 2.23, "learning_rate": 3.606751134644478e-05, "loss": 0.2684, "step": 2949 }, { "epoch": 2.23, "learning_rate": 3.606278366111952e-05, "loss": 0.2694, "step": 2950 }, { "epoch": 2.23, "learning_rate": 3.605805597579425e-05, "loss": 0.251, "step": 2951 }, { "epoch": 2.23, "learning_rate": 3.605332829046899e-05, "loss": 0.2743, "step": 2952 }, { "epoch": 2.23, "learning_rate": 3.604860060514372e-05, "loss": 0.2717, "step": 2953 }, { "epoch": 2.23, "learning_rate": 3.604387291981846e-05, "loss": 0.2337, "step": 2954 }, { "epoch": 2.23, "learning_rate": 3.603914523449319e-05, "loss": 0.2692, "step": 2955 }, { "epoch": 2.24, "learning_rate": 3.603441754916793e-05, "loss": 0.2429, "step": 2956 }, { "epoch": 2.24, "learning_rate": 3.602968986384266e-05, "loss": 0.2307, "step": 2957 }, { "epoch": 2.24, "learning_rate": 3.60249621785174e-05, "loss": 0.3274, "step": 2958 }, { "epoch": 2.24, "learning_rate": 3.6020234493192134e-05, "loss": 0.2627, "step": 2959 }, { "epoch": 2.24, "learning_rate": 3.601550680786687e-05, "loss": 0.2751, "step": 2960 }, { "epoch": 2.24, "learning_rate": 3.6010779122541605e-05, "loss": 0.2745, "step": 2961 }, { "epoch": 2.24, "learning_rate": 3.6006051437216344e-05, "loss": 0.2634, "step": 2962 }, { "epoch": 2.24, "learning_rate": 3.6001323751891076e-05, "loss": 0.2853, "step": 2963 }, { "epoch": 2.24, "learning_rate": 3.5996596066565815e-05, "loss": 0.3089, "step": 2964 }, { "epoch": 2.24, "learning_rate": 3.599186838124055e-05, "loss": 0.2745, "step": 2965 }, { "epoch": 2.24, "learning_rate": 3.5987140695915286e-05, "loss": 0.2461, "step": 2966 }, { "epoch": 2.24, "learning_rate": 3.598241301059002e-05, "loss": 0.2355, "step": 2967 }, { "epoch": 2.24, "learning_rate": 3.5977685325264757e-05, "loss": 0.272, "step": 2968 }, { "epoch": 2.25, "learning_rate": 3.597295763993949e-05, "loss": 0.2651, "step": 2969 }, { "epoch": 2.25, "learning_rate": 3.596822995461423e-05, "loss": 0.2179, "step": 2970 }, { "epoch": 2.25, "learning_rate": 3.596350226928896e-05, "loss": 0.2707, "step": 2971 }, { "epoch": 2.25, "learning_rate": 3.595877458396369e-05, "loss": 0.2562, "step": 2972 }, { "epoch": 2.25, "learning_rate": 3.595404689863843e-05, "loss": 0.3022, "step": 2973 }, { "epoch": 2.25, "learning_rate": 3.594931921331316e-05, "loss": 0.2911, "step": 2974 }, { "epoch": 2.25, "learning_rate": 3.5944591527987895e-05, "loss": 0.2958, "step": 2975 }, { "epoch": 2.25, "learning_rate": 3.593986384266263e-05, "loss": 0.2821, "step": 2976 }, { "epoch": 2.25, "learning_rate": 3.5935136157337365e-05, "loss": 0.28, "step": 2977 }, { "epoch": 2.25, "learning_rate": 3.5930408472012104e-05, "loss": 0.2929, "step": 2978 }, { "epoch": 2.25, "learning_rate": 3.5925680786686836e-05, "loss": 0.2377, "step": 2979 }, { "epoch": 2.25, "learning_rate": 3.5920953101361575e-05, "loss": 0.2388, "step": 2980 }, { "epoch": 2.25, "learning_rate": 3.591622541603631e-05, "loss": 0.3139, "step": 2981 }, { "epoch": 2.26, "learning_rate": 3.5911497730711046e-05, "loss": 0.2913, "step": 2982 }, { "epoch": 2.26, "learning_rate": 3.590677004538578e-05, "loss": 0.2123, "step": 2983 }, { "epoch": 2.26, "learning_rate": 3.590204236006052e-05, "loss": 0.2189, "step": 2984 }, { "epoch": 2.26, "learning_rate": 3.589731467473525e-05, "loss": 0.2883, "step": 2985 }, { "epoch": 2.26, "learning_rate": 3.589258698940999e-05, "loss": 0.2752, "step": 2986 }, { "epoch": 2.26, "learning_rate": 3.588785930408472e-05, "loss": 0.2626, "step": 2987 }, { "epoch": 2.26, "learning_rate": 3.588313161875946e-05, "loss": 0.3082, "step": 2988 }, { "epoch": 2.26, "learning_rate": 3.587840393343419e-05, "loss": 0.2411, "step": 2989 }, { "epoch": 2.26, "learning_rate": 3.587367624810893e-05, "loss": 0.3206, "step": 2990 }, { "epoch": 2.26, "learning_rate": 3.586894856278366e-05, "loss": 0.2397, "step": 2991 }, { "epoch": 2.26, "learning_rate": 3.58642208774584e-05, "loss": 0.3057, "step": 2992 }, { "epoch": 2.26, "learning_rate": 3.585949319213313e-05, "loss": 0.295, "step": 2993 }, { "epoch": 2.26, "learning_rate": 3.585476550680787e-05, "loss": 0.2302, "step": 2994 }, { "epoch": 2.27, "learning_rate": 3.5850037821482603e-05, "loss": 0.2655, "step": 2995 }, { "epoch": 2.27, "learning_rate": 3.584531013615734e-05, "loss": 0.262, "step": 2996 }, { "epoch": 2.27, "learning_rate": 3.5840582450832074e-05, "loss": 0.2157, "step": 2997 }, { "epoch": 2.27, "learning_rate": 3.583585476550681e-05, "loss": 0.2324, "step": 2998 }, { "epoch": 2.27, "learning_rate": 3.5831127080181545e-05, "loss": 0.2633, "step": 2999 }, { "epoch": 2.27, "learning_rate": 3.582639939485628e-05, "loss": 0.2324, "step": 3000 }, { "epoch": 2.27, "learning_rate": 3.5821671709531016e-05, "loss": 0.2983, "step": 3001 }, { "epoch": 2.27, "learning_rate": 3.581694402420575e-05, "loss": 0.2683, "step": 3002 }, { "epoch": 2.27, "learning_rate": 3.581221633888048e-05, "loss": 0.3642, "step": 3003 }, { "epoch": 2.27, "learning_rate": 3.580748865355522e-05, "loss": 0.2594, "step": 3004 }, { "epoch": 2.27, "learning_rate": 3.580276096822995e-05, "loss": 0.2576, "step": 3005 }, { "epoch": 2.27, "learning_rate": 3.579803328290469e-05, "loss": 0.2872, "step": 3006 }, { "epoch": 2.27, "learning_rate": 3.579330559757943e-05, "loss": 0.272, "step": 3007 }, { "epoch": 2.27, "learning_rate": 3.578857791225416e-05, "loss": 0.3169, "step": 3008 }, { "epoch": 2.28, "learning_rate": 3.57838502269289e-05, "loss": 0.2688, "step": 3009 }, { "epoch": 2.28, "learning_rate": 3.577912254160363e-05, "loss": 0.3303, "step": 3010 }, { "epoch": 2.28, "learning_rate": 3.577439485627837e-05, "loss": 0.2623, "step": 3011 }, { "epoch": 2.28, "learning_rate": 3.57696671709531e-05, "loss": 0.238, "step": 3012 }, { "epoch": 2.28, "learning_rate": 3.576493948562784e-05, "loss": 0.2411, "step": 3013 }, { "epoch": 2.28, "learning_rate": 3.5760211800302574e-05, "loss": 0.2757, "step": 3014 }, { "epoch": 2.28, "learning_rate": 3.575548411497731e-05, "loss": 0.3047, "step": 3015 }, { "epoch": 2.28, "learning_rate": 3.5750756429652044e-05, "loss": 0.3214, "step": 3016 }, { "epoch": 2.28, "learning_rate": 3.574602874432678e-05, "loss": 0.2689, "step": 3017 }, { "epoch": 2.28, "learning_rate": 3.5741301059001515e-05, "loss": 0.2482, "step": 3018 }, { "epoch": 2.28, "learning_rate": 3.5736573373676254e-05, "loss": 0.2524, "step": 3019 }, { "epoch": 2.28, "learning_rate": 3.5731845688350986e-05, "loss": 0.2939, "step": 3020 }, { "epoch": 2.28, "learning_rate": 3.5727118003025725e-05, "loss": 0.2464, "step": 3021 }, { "epoch": 2.29, "learning_rate": 3.572239031770046e-05, "loss": 0.2464, "step": 3022 }, { "epoch": 2.29, "learning_rate": 3.5717662632375196e-05, "loss": 0.2923, "step": 3023 }, { "epoch": 2.29, "learning_rate": 3.571293494704993e-05, "loss": 0.3078, "step": 3024 }, { "epoch": 2.29, "learning_rate": 3.570820726172466e-05, "loss": 0.2647, "step": 3025 }, { "epoch": 2.29, "learning_rate": 3.57034795763994e-05, "loss": 0.2622, "step": 3026 }, { "epoch": 2.29, "learning_rate": 3.569875189107413e-05, "loss": 0.216, "step": 3027 }, { "epoch": 2.29, "learning_rate": 3.569402420574886e-05, "loss": 0.336, "step": 3028 }, { "epoch": 2.29, "learning_rate": 3.56892965204236e-05, "loss": 0.281, "step": 3029 }, { "epoch": 2.29, "learning_rate": 3.5684568835098334e-05, "loss": 0.261, "step": 3030 }, { "epoch": 2.29, "learning_rate": 3.567984114977307e-05, "loss": 0.2385, "step": 3031 }, { "epoch": 2.29, "learning_rate": 3.5675113464447805e-05, "loss": 0.2734, "step": 3032 }, { "epoch": 2.29, "learning_rate": 3.5670385779122544e-05, "loss": 0.2842, "step": 3033 }, { "epoch": 2.29, "learning_rate": 3.5665658093797276e-05, "loss": 0.2832, "step": 3034 }, { "epoch": 2.3, "learning_rate": 3.5660930408472015e-05, "loss": 0.2715, "step": 3035 }, { "epoch": 2.3, "learning_rate": 3.565620272314675e-05, "loss": 0.2558, "step": 3036 }, { "epoch": 2.3, "learning_rate": 3.5651475037821485e-05, "loss": 0.2653, "step": 3037 }, { "epoch": 2.3, "learning_rate": 3.564674735249622e-05, "loss": 0.2806, "step": 3038 }, { "epoch": 2.3, "learning_rate": 3.5642019667170956e-05, "loss": 0.2866, "step": 3039 }, { "epoch": 2.3, "learning_rate": 3.563729198184569e-05, "loss": 0.2525, "step": 3040 }, { "epoch": 2.3, "learning_rate": 3.563256429652043e-05, "loss": 0.2912, "step": 3041 }, { "epoch": 2.3, "learning_rate": 3.562783661119516e-05, "loss": 0.2895, "step": 3042 }, { "epoch": 2.3, "learning_rate": 3.56231089258699e-05, "loss": 0.2923, "step": 3043 }, { "epoch": 2.3, "learning_rate": 3.561838124054463e-05, "loss": 0.2506, "step": 3044 }, { "epoch": 2.3, "learning_rate": 3.561365355521937e-05, "loss": 0.2767, "step": 3045 }, { "epoch": 2.3, "learning_rate": 3.56089258698941e-05, "loss": 0.2881, "step": 3046 }, { "epoch": 2.3, "learning_rate": 3.560419818456884e-05, "loss": 0.3086, "step": 3047 }, { "epoch": 2.31, "learning_rate": 3.559947049924357e-05, "loss": 0.2891, "step": 3048 }, { "epoch": 2.31, "learning_rate": 3.559474281391831e-05, "loss": 0.24, "step": 3049 }, { "epoch": 2.31, "learning_rate": 3.559001512859304e-05, "loss": 0.4076, "step": 3050 }, { "epoch": 2.31, "learning_rate": 3.558528744326778e-05, "loss": 0.309, "step": 3051 }, { "epoch": 2.31, "learning_rate": 3.5580559757942514e-05, "loss": 0.2599, "step": 3052 }, { "epoch": 2.31, "learning_rate": 3.5575832072617246e-05, "loss": 0.2222, "step": 3053 }, { "epoch": 2.31, "learning_rate": 3.5571104387291985e-05, "loss": 0.2518, "step": 3054 }, { "epoch": 2.31, "learning_rate": 3.556637670196672e-05, "loss": 0.2579, "step": 3055 }, { "epoch": 2.31, "learning_rate": 3.556164901664145e-05, "loss": 0.3135, "step": 3056 }, { "epoch": 2.31, "learning_rate": 3.555692133131619e-05, "loss": 0.2626, "step": 3057 }, { "epoch": 2.31, "learning_rate": 3.555219364599092e-05, "loss": 0.3095, "step": 3058 }, { "epoch": 2.31, "learning_rate": 3.554746596066566e-05, "loss": 0.2544, "step": 3059 }, { "epoch": 2.31, "learning_rate": 3.554273827534039e-05, "loss": 0.2281, "step": 3060 }, { "epoch": 2.31, "learning_rate": 3.553801059001513e-05, "loss": 0.2653, "step": 3061 }, { "epoch": 2.32, "learning_rate": 3.553328290468986e-05, "loss": 0.2777, "step": 3062 }, { "epoch": 2.32, "learning_rate": 3.55285552193646e-05, "loss": 0.2558, "step": 3063 }, { "epoch": 2.32, "learning_rate": 3.552382753403933e-05, "loss": 0.3037, "step": 3064 }, { "epoch": 2.32, "learning_rate": 3.551909984871407e-05, "loss": 0.2546, "step": 3065 }, { "epoch": 2.32, "learning_rate": 3.55143721633888e-05, "loss": 0.2805, "step": 3066 }, { "epoch": 2.32, "learning_rate": 3.550964447806354e-05, "loss": 0.333, "step": 3067 }, { "epoch": 2.32, "learning_rate": 3.550491679273828e-05, "loss": 0.2777, "step": 3068 }, { "epoch": 2.32, "learning_rate": 3.550018910741301e-05, "loss": 0.3305, "step": 3069 }, { "epoch": 2.32, "learning_rate": 3.549546142208775e-05, "loss": 0.2771, "step": 3070 }, { "epoch": 2.32, "learning_rate": 3.5490733736762484e-05, "loss": 0.2773, "step": 3071 }, { "epoch": 2.32, "learning_rate": 3.548600605143722e-05, "loss": 0.252, "step": 3072 }, { "epoch": 2.32, "learning_rate": 3.5481278366111955e-05, "loss": 0.2395, "step": 3073 }, { "epoch": 2.32, "learning_rate": 3.5476550680786694e-05, "loss": 0.3641, "step": 3074 }, { "epoch": 2.33, "learning_rate": 3.5471822995461426e-05, "loss": 0.2856, "step": 3075 }, { "epoch": 2.33, "learning_rate": 3.5467095310136165e-05, "loss": 0.2664, "step": 3076 }, { "epoch": 2.33, "learning_rate": 3.5462367624810897e-05, "loss": 0.2223, "step": 3077 }, { "epoch": 2.33, "learning_rate": 3.545763993948563e-05, "loss": 0.2937, "step": 3078 }, { "epoch": 2.33, "learning_rate": 3.545291225416037e-05, "loss": 0.2876, "step": 3079 }, { "epoch": 2.33, "learning_rate": 3.54481845688351e-05, "loss": 0.2765, "step": 3080 }, { "epoch": 2.33, "learning_rate": 3.544345688350983e-05, "loss": 0.3236, "step": 3081 }, { "epoch": 2.33, "learning_rate": 3.543872919818457e-05, "loss": 0.2807, "step": 3082 }, { "epoch": 2.33, "learning_rate": 3.54340015128593e-05, "loss": 0.2539, "step": 3083 }, { "epoch": 2.33, "learning_rate": 3.542927382753404e-05, "loss": 0.2709, "step": 3084 }, { "epoch": 2.33, "learning_rate": 3.5424546142208773e-05, "loss": 0.2461, "step": 3085 }, { "epoch": 2.33, "learning_rate": 3.541981845688351e-05, "loss": 0.2945, "step": 3086 }, { "epoch": 2.33, "learning_rate": 3.5415090771558244e-05, "loss": 0.2368, "step": 3087 }, { "epoch": 2.34, "learning_rate": 3.541036308623298e-05, "loss": 0.2564, "step": 3088 }, { "epoch": 2.34, "learning_rate": 3.5405635400907715e-05, "loss": 0.262, "step": 3089 }, { "epoch": 2.34, "learning_rate": 3.5400907715582454e-05, "loss": 0.2886, "step": 3090 }, { "epoch": 2.34, "learning_rate": 3.5396180030257186e-05, "loss": 0.2591, "step": 3091 }, { "epoch": 2.34, "learning_rate": 3.5391452344931925e-05, "loss": 0.2613, "step": 3092 }, { "epoch": 2.34, "learning_rate": 3.538672465960666e-05, "loss": 0.2816, "step": 3093 }, { "epoch": 2.34, "learning_rate": 3.5381996974281396e-05, "loss": 0.343, "step": 3094 }, { "epoch": 2.34, "learning_rate": 3.537726928895613e-05, "loss": 0.2496, "step": 3095 }, { "epoch": 2.34, "learning_rate": 3.537254160363087e-05, "loss": 0.277, "step": 3096 }, { "epoch": 2.34, "learning_rate": 3.53678139183056e-05, "loss": 0.2814, "step": 3097 }, { "epoch": 2.34, "learning_rate": 3.536308623298034e-05, "loss": 0.2779, "step": 3098 }, { "epoch": 2.34, "learning_rate": 3.535835854765507e-05, "loss": 0.2858, "step": 3099 }, { "epoch": 2.34, "learning_rate": 3.535363086232981e-05, "loss": 0.2825, "step": 3100 }, { "epoch": 2.35, "learning_rate": 3.534890317700454e-05, "loss": 0.3011, "step": 3101 }, { "epoch": 2.35, "learning_rate": 3.534417549167928e-05, "loss": 0.2669, "step": 3102 }, { "epoch": 2.35, "learning_rate": 3.533944780635401e-05, "loss": 0.2876, "step": 3103 }, { "epoch": 2.35, "learning_rate": 3.533472012102875e-05, "loss": 0.2549, "step": 3104 }, { "epoch": 2.35, "learning_rate": 3.532999243570348e-05, "loss": 0.2593, "step": 3105 }, { "epoch": 2.35, "learning_rate": 3.5325264750378214e-05, "loss": 0.2641, "step": 3106 }, { "epoch": 2.35, "learning_rate": 3.532053706505295e-05, "loss": 0.2464, "step": 3107 }, { "epoch": 2.35, "learning_rate": 3.5315809379727685e-05, "loss": 0.2546, "step": 3108 }, { "epoch": 2.35, "learning_rate": 3.531108169440242e-05, "loss": 0.2633, "step": 3109 }, { "epoch": 2.35, "learning_rate": 3.5306354009077156e-05, "loss": 0.2978, "step": 3110 }, { "epoch": 2.35, "learning_rate": 3.530162632375189e-05, "loss": 0.2233, "step": 3111 }, { "epoch": 2.35, "learning_rate": 3.529689863842663e-05, "loss": 0.3115, "step": 3112 }, { "epoch": 2.35, "learning_rate": 3.529217095310136e-05, "loss": 0.2668, "step": 3113 }, { "epoch": 2.36, "learning_rate": 3.52874432677761e-05, "loss": 0.2648, "step": 3114 }, { "epoch": 2.36, "learning_rate": 3.528271558245083e-05, "loss": 0.2671, "step": 3115 }, { "epoch": 2.36, "learning_rate": 3.527798789712557e-05, "loss": 0.3128, "step": 3116 }, { "epoch": 2.36, "learning_rate": 3.52732602118003e-05, "loss": 0.2942, "step": 3117 }, { "epoch": 2.36, "learning_rate": 3.526853252647504e-05, "loss": 0.2396, "step": 3118 }, { "epoch": 2.36, "learning_rate": 3.526380484114977e-05, "loss": 0.2912, "step": 3119 }, { "epoch": 2.36, "learning_rate": 3.525907715582451e-05, "loss": 0.2679, "step": 3120 }, { "epoch": 2.36, "learning_rate": 3.525434947049924e-05, "loss": 0.3025, "step": 3121 }, { "epoch": 2.36, "learning_rate": 3.524962178517398e-05, "loss": 0.2542, "step": 3122 }, { "epoch": 2.36, "learning_rate": 3.5244894099848714e-05, "loss": 0.2765, "step": 3123 }, { "epoch": 2.36, "learning_rate": 3.524016641452345e-05, "loss": 0.259, "step": 3124 }, { "epoch": 2.36, "learning_rate": 3.5235438729198185e-05, "loss": 0.232, "step": 3125 }, { "epoch": 2.36, "learning_rate": 3.523071104387292e-05, "loss": 0.2403, "step": 3126 }, { "epoch": 2.36, "learning_rate": 3.522598335854766e-05, "loss": 0.2456, "step": 3127 }, { "epoch": 2.37, "learning_rate": 3.5221255673222394e-05, "loss": 0.2664, "step": 3128 }, { "epoch": 2.37, "learning_rate": 3.521652798789713e-05, "loss": 0.2911, "step": 3129 }, { "epoch": 2.37, "learning_rate": 3.5211800302571865e-05, "loss": 0.27, "step": 3130 }, { "epoch": 2.37, "learning_rate": 3.52070726172466e-05, "loss": 0.3046, "step": 3131 }, { "epoch": 2.37, "learning_rate": 3.5202344931921336e-05, "loss": 0.2677, "step": 3132 }, { "epoch": 2.37, "learning_rate": 3.519761724659607e-05, "loss": 0.2951, "step": 3133 }, { "epoch": 2.37, "learning_rate": 3.51928895612708e-05, "loss": 0.2486, "step": 3134 }, { "epoch": 2.37, "learning_rate": 3.518816187594554e-05, "loss": 0.29, "step": 3135 }, { "epoch": 2.37, "learning_rate": 3.518343419062027e-05, "loss": 0.2739, "step": 3136 }, { "epoch": 2.37, "learning_rate": 3.517870650529501e-05, "loss": 0.254, "step": 3137 }, { "epoch": 2.37, "learning_rate": 3.517397881996974e-05, "loss": 0.3151, "step": 3138 }, { "epoch": 2.37, "learning_rate": 3.516925113464448e-05, "loss": 0.2726, "step": 3139 }, { "epoch": 2.37, "learning_rate": 3.516452344931921e-05, "loss": 0.2619, "step": 3140 }, { "epoch": 2.38, "learning_rate": 3.515979576399395e-05, "loss": 0.2496, "step": 3141 }, { "epoch": 2.38, "learning_rate": 3.5155068078668684e-05, "loss": 0.2564, "step": 3142 }, { "epoch": 2.38, "learning_rate": 3.515034039334342e-05, "loss": 0.2466, "step": 3143 }, { "epoch": 2.38, "learning_rate": 3.5145612708018155e-05, "loss": 0.2581, "step": 3144 }, { "epoch": 2.38, "learning_rate": 3.5140885022692893e-05, "loss": 0.255, "step": 3145 }, { "epoch": 2.38, "learning_rate": 3.5136157337367626e-05, "loss": 0.224, "step": 3146 }, { "epoch": 2.38, "learning_rate": 3.5131429652042364e-05, "loss": 0.2965, "step": 3147 }, { "epoch": 2.38, "learning_rate": 3.5126701966717096e-05, "loss": 0.28, "step": 3148 }, { "epoch": 2.38, "learning_rate": 3.5121974281391835e-05, "loss": 0.2324, "step": 3149 }, { "epoch": 2.38, "learning_rate": 3.511724659606657e-05, "loss": 0.2589, "step": 3150 }, { "epoch": 2.38, "learning_rate": 3.5112518910741306e-05, "loss": 0.2474, "step": 3151 }, { "epoch": 2.38, "learning_rate": 3.510779122541604e-05, "loss": 0.2788, "step": 3152 }, { "epoch": 2.38, "learning_rate": 3.510306354009078e-05, "loss": 0.2377, "step": 3153 }, { "epoch": 2.39, "learning_rate": 3.509833585476551e-05, "loss": 0.2978, "step": 3154 }, { "epoch": 2.39, "learning_rate": 3.509360816944025e-05, "loss": 0.2691, "step": 3155 }, { "epoch": 2.39, "learning_rate": 3.508888048411498e-05, "loss": 0.2944, "step": 3156 }, { "epoch": 2.39, "learning_rate": 3.508415279878972e-05, "loss": 0.2483, "step": 3157 }, { "epoch": 2.39, "learning_rate": 3.507942511346445e-05, "loss": 0.2723, "step": 3158 }, { "epoch": 2.39, "learning_rate": 3.507469742813918e-05, "loss": 0.2184, "step": 3159 }, { "epoch": 2.39, "learning_rate": 3.506996974281392e-05, "loss": 0.2679, "step": 3160 }, { "epoch": 2.39, "learning_rate": 3.5065242057488654e-05, "loss": 0.3087, "step": 3161 }, { "epoch": 2.39, "learning_rate": 3.5060514372163386e-05, "loss": 0.2938, "step": 3162 }, { "epoch": 2.39, "learning_rate": 3.5055786686838125e-05, "loss": 0.2586, "step": 3163 }, { "epoch": 2.39, "learning_rate": 3.505105900151286e-05, "loss": 0.2672, "step": 3164 }, { "epoch": 2.39, "learning_rate": 3.5046331316187596e-05, "loss": 0.2712, "step": 3165 }, { "epoch": 2.39, "learning_rate": 3.504160363086233e-05, "loss": 0.2151, "step": 3166 }, { "epoch": 2.4, "learning_rate": 3.5036875945537067e-05, "loss": 0.2905, "step": 3167 }, { "epoch": 2.4, "learning_rate": 3.50321482602118e-05, "loss": 0.275, "step": 3168 }, { "epoch": 2.4, "learning_rate": 3.502742057488654e-05, "loss": 0.2317, "step": 3169 }, { "epoch": 2.4, "learning_rate": 3.502269288956127e-05, "loss": 0.2669, "step": 3170 }, { "epoch": 2.4, "learning_rate": 3.501796520423601e-05, "loss": 0.3015, "step": 3171 }, { "epoch": 2.4, "learning_rate": 3.501323751891074e-05, "loss": 0.2528, "step": 3172 }, { "epoch": 2.4, "learning_rate": 3.500850983358548e-05, "loss": 0.2416, "step": 3173 }, { "epoch": 2.4, "learning_rate": 3.500378214826021e-05, "loss": 0.283, "step": 3174 }, { "epoch": 2.4, "learning_rate": 3.499905446293495e-05, "loss": 0.2143, "step": 3175 }, { "epoch": 2.4, "learning_rate": 3.499432677760968e-05, "loss": 0.3041, "step": 3176 }, { "epoch": 2.4, "learning_rate": 3.498959909228442e-05, "loss": 0.2494, "step": 3177 }, { "epoch": 2.4, "learning_rate": 3.498487140695915e-05, "loss": 0.2402, "step": 3178 }, { "epoch": 2.4, "learning_rate": 3.498014372163389e-05, "loss": 0.2632, "step": 3179 }, { "epoch": 2.4, "learning_rate": 3.4975416036308624e-05, "loss": 0.2742, "step": 3180 }, { "epoch": 2.41, "learning_rate": 3.497068835098336e-05, "loss": 0.2999, "step": 3181 }, { "epoch": 2.41, "learning_rate": 3.4965960665658095e-05, "loss": 0.2552, "step": 3182 }, { "epoch": 2.41, "learning_rate": 3.4961232980332834e-05, "loss": 0.2507, "step": 3183 }, { "epoch": 2.41, "learning_rate": 3.4956505295007566e-05, "loss": 0.2823, "step": 3184 }, { "epoch": 2.41, "learning_rate": 3.4951777609682305e-05, "loss": 0.2219, "step": 3185 }, { "epoch": 2.41, "learning_rate": 3.494704992435704e-05, "loss": 0.2443, "step": 3186 }, { "epoch": 2.41, "learning_rate": 3.494232223903177e-05, "loss": 0.2283, "step": 3187 }, { "epoch": 2.41, "learning_rate": 3.493759455370651e-05, "loss": 0.3149, "step": 3188 }, { "epoch": 2.41, "learning_rate": 3.493286686838124e-05, "loss": 0.2912, "step": 3189 }, { "epoch": 2.41, "learning_rate": 3.492813918305598e-05, "loss": 0.2653, "step": 3190 }, { "epoch": 2.41, "learning_rate": 3.492341149773071e-05, "loss": 0.2822, "step": 3191 }, { "epoch": 2.41, "learning_rate": 3.491868381240545e-05, "loss": 0.2957, "step": 3192 }, { "epoch": 2.41, "learning_rate": 3.491395612708018e-05, "loss": 0.2609, "step": 3193 }, { "epoch": 2.42, "learning_rate": 3.490922844175492e-05, "loss": 0.2672, "step": 3194 }, { "epoch": 2.42, "learning_rate": 3.490450075642965e-05, "loss": 0.2896, "step": 3195 }, { "epoch": 2.42, "learning_rate": 3.489977307110439e-05, "loss": 0.312, "step": 3196 }, { "epoch": 2.42, "learning_rate": 3.489504538577912e-05, "loss": 0.2666, "step": 3197 }, { "epoch": 2.42, "learning_rate": 3.489031770045386e-05, "loss": 0.3118, "step": 3198 }, { "epoch": 2.42, "learning_rate": 3.4885590015128594e-05, "loss": 0.2617, "step": 3199 }, { "epoch": 2.42, "learning_rate": 3.488086232980333e-05, "loss": 0.2823, "step": 3200 }, { "epoch": 2.42, "learning_rate": 3.4876134644478065e-05, "loss": 0.3045, "step": 3201 }, { "epoch": 2.42, "learning_rate": 3.4871406959152804e-05, "loss": 0.2436, "step": 3202 }, { "epoch": 2.42, "learning_rate": 3.4866679273827536e-05, "loss": 0.2789, "step": 3203 }, { "epoch": 2.42, "learning_rate": 3.4861951588502275e-05, "loss": 0.272, "step": 3204 }, { "epoch": 2.42, "learning_rate": 3.485722390317701e-05, "loss": 0.2825, "step": 3205 }, { "epoch": 2.42, "learning_rate": 3.4852496217851746e-05, "loss": 0.306, "step": 3206 }, { "epoch": 2.43, "learning_rate": 3.484776853252648e-05, "loss": 0.2707, "step": 3207 }, { "epoch": 2.43, "learning_rate": 3.4843040847201216e-05, "loss": 0.2678, "step": 3208 }, { "epoch": 2.43, "learning_rate": 3.483831316187595e-05, "loss": 0.2416, "step": 3209 }, { "epoch": 2.43, "learning_rate": 3.483358547655069e-05, "loss": 0.2872, "step": 3210 }, { "epoch": 2.43, "learning_rate": 3.482885779122542e-05, "loss": 0.3126, "step": 3211 }, { "epoch": 2.43, "learning_rate": 3.482413010590015e-05, "loss": 0.2593, "step": 3212 }, { "epoch": 2.43, "learning_rate": 3.481940242057489e-05, "loss": 0.2992, "step": 3213 }, { "epoch": 2.43, "learning_rate": 3.481467473524962e-05, "loss": 0.2775, "step": 3214 }, { "epoch": 2.43, "learning_rate": 3.4809947049924354e-05, "loss": 0.2501, "step": 3215 }, { "epoch": 2.43, "learning_rate": 3.480521936459909e-05, "loss": 0.2738, "step": 3216 }, { "epoch": 2.43, "learning_rate": 3.4800491679273825e-05, "loss": 0.2975, "step": 3217 }, { "epoch": 2.43, "learning_rate": 3.4795763993948564e-05, "loss": 0.347, "step": 3218 }, { "epoch": 2.43, "learning_rate": 3.4791036308623296e-05, "loss": 0.2766, "step": 3219 }, { "epoch": 2.44, "learning_rate": 3.4786308623298035e-05, "loss": 0.291, "step": 3220 }, { "epoch": 2.44, "learning_rate": 3.478158093797277e-05, "loss": 0.2856, "step": 3221 }, { "epoch": 2.44, "learning_rate": 3.4776853252647506e-05, "loss": 0.2321, "step": 3222 }, { "epoch": 2.44, "learning_rate": 3.477212556732224e-05, "loss": 0.2915, "step": 3223 }, { "epoch": 2.44, "learning_rate": 3.476739788199698e-05, "loss": 0.2923, "step": 3224 }, { "epoch": 2.44, "learning_rate": 3.476267019667171e-05, "loss": 0.2787, "step": 3225 }, { "epoch": 2.44, "learning_rate": 3.475794251134645e-05, "loss": 0.2497, "step": 3226 }, { "epoch": 2.44, "learning_rate": 3.475321482602118e-05, "loss": 0.256, "step": 3227 }, { "epoch": 2.44, "learning_rate": 3.474848714069592e-05, "loss": 0.3188, "step": 3228 }, { "epoch": 2.44, "learning_rate": 3.474375945537065e-05, "loss": 0.2701, "step": 3229 }, { "epoch": 2.44, "learning_rate": 3.473903177004539e-05, "loss": 0.2759, "step": 3230 }, { "epoch": 2.44, "learning_rate": 3.473430408472012e-05, "loss": 0.3698, "step": 3231 }, { "epoch": 2.44, "learning_rate": 3.472957639939486e-05, "loss": 0.3248, "step": 3232 }, { "epoch": 2.45, "learning_rate": 3.472484871406959e-05, "loss": 0.2402, "step": 3233 }, { "epoch": 2.45, "learning_rate": 3.472012102874433e-05, "loss": 0.2618, "step": 3234 }, { "epoch": 2.45, "learning_rate": 3.4715393343419063e-05, "loss": 0.2464, "step": 3235 }, { "epoch": 2.45, "learning_rate": 3.47106656580938e-05, "loss": 0.2388, "step": 3236 }, { "epoch": 2.45, "learning_rate": 3.4705937972768534e-05, "loss": 0.2734, "step": 3237 }, { "epoch": 2.45, "learning_rate": 3.470121028744327e-05, "loss": 0.2867, "step": 3238 }, { "epoch": 2.45, "learning_rate": 3.4696482602118005e-05, "loss": 0.2892, "step": 3239 }, { "epoch": 2.45, "learning_rate": 3.469175491679274e-05, "loss": 0.265, "step": 3240 }, { "epoch": 2.45, "learning_rate": 3.468702723146747e-05, "loss": 0.3099, "step": 3241 }, { "epoch": 2.45, "learning_rate": 3.468229954614221e-05, "loss": 0.2561, "step": 3242 }, { "epoch": 2.45, "learning_rate": 3.467757186081694e-05, "loss": 0.2723, "step": 3243 }, { "epoch": 2.45, "learning_rate": 3.467284417549168e-05, "loss": 0.281, "step": 3244 }, { "epoch": 2.45, "learning_rate": 3.466811649016641e-05, "loss": 0.259, "step": 3245 }, { "epoch": 2.45, "learning_rate": 3.466338880484115e-05, "loss": 0.2697, "step": 3246 }, { "epoch": 2.46, "learning_rate": 3.465866111951589e-05, "loss": 0.2638, "step": 3247 }, { "epoch": 2.46, "learning_rate": 3.465393343419062e-05, "loss": 0.2919, "step": 3248 }, { "epoch": 2.46, "learning_rate": 3.464920574886536e-05, "loss": 0.2492, "step": 3249 }, { "epoch": 2.46, "learning_rate": 3.464447806354009e-05, "loss": 0.265, "step": 3250 }, { "epoch": 2.46, "learning_rate": 3.463975037821483e-05, "loss": 0.2666, "step": 3251 }, { "epoch": 2.46, "learning_rate": 3.463502269288956e-05, "loss": 0.2554, "step": 3252 }, { "epoch": 2.46, "learning_rate": 3.46302950075643e-05, "loss": 0.2571, "step": 3253 }, { "epoch": 2.46, "learning_rate": 3.4625567322239034e-05, "loss": 0.2694, "step": 3254 }, { "epoch": 2.46, "learning_rate": 3.462083963691377e-05, "loss": 0.2611, "step": 3255 }, { "epoch": 2.46, "learning_rate": 3.4616111951588504e-05, "loss": 0.2932, "step": 3256 }, { "epoch": 2.46, "learning_rate": 3.461138426626324e-05, "loss": 0.2593, "step": 3257 }, { "epoch": 2.46, "learning_rate": 3.4606656580937975e-05, "loss": 0.2723, "step": 3258 }, { "epoch": 2.46, "learning_rate": 3.4601928895612714e-05, "loss": 0.2643, "step": 3259 }, { "epoch": 2.47, "learning_rate": 3.4597201210287446e-05, "loss": 0.2502, "step": 3260 }, { "epoch": 2.47, "learning_rate": 3.4592473524962185e-05, "loss": 0.2476, "step": 3261 }, { "epoch": 2.47, "learning_rate": 3.458774583963692e-05, "loss": 0.316, "step": 3262 }, { "epoch": 2.47, "learning_rate": 3.4583018154311656e-05, "loss": 0.2971, "step": 3263 }, { "epoch": 2.47, "learning_rate": 3.457829046898639e-05, "loss": 0.3174, "step": 3264 }, { "epoch": 2.47, "learning_rate": 3.457356278366112e-05, "loss": 0.2817, "step": 3265 }, { "epoch": 2.47, "learning_rate": 3.456883509833586e-05, "loss": 0.2412, "step": 3266 }, { "epoch": 2.47, "learning_rate": 3.456410741301059e-05, "loss": 0.2779, "step": 3267 }, { "epoch": 2.47, "learning_rate": 3.455937972768532e-05, "loss": 0.2495, "step": 3268 }, { "epoch": 2.47, "learning_rate": 3.455465204236006e-05, "loss": 0.2683, "step": 3269 }, { "epoch": 2.47, "learning_rate": 3.4549924357034794e-05, "loss": 0.3031, "step": 3270 }, { "epoch": 2.47, "learning_rate": 3.454519667170953e-05, "loss": 0.2926, "step": 3271 }, { "epoch": 2.47, "learning_rate": 3.4540468986384265e-05, "loss": 0.2624, "step": 3272 }, { "epoch": 2.48, "learning_rate": 3.4535741301059004e-05, "loss": 0.2834, "step": 3273 }, { "epoch": 2.48, "learning_rate": 3.4531013615733736e-05, "loss": 0.2689, "step": 3274 }, { "epoch": 2.48, "learning_rate": 3.4526285930408475e-05, "loss": 0.2758, "step": 3275 }, { "epoch": 2.48, "learning_rate": 3.4521558245083207e-05, "loss": 0.2835, "step": 3276 }, { "epoch": 2.48, "learning_rate": 3.4516830559757945e-05, "loss": 0.2713, "step": 3277 }, { "epoch": 2.48, "learning_rate": 3.451210287443268e-05, "loss": 0.2752, "step": 3278 }, { "epoch": 2.48, "learning_rate": 3.4507375189107416e-05, "loss": 0.286, "step": 3279 }, { "epoch": 2.48, "learning_rate": 3.450264750378215e-05, "loss": 0.2983, "step": 3280 }, { "epoch": 2.48, "learning_rate": 3.449791981845689e-05, "loss": 0.2985, "step": 3281 }, { "epoch": 2.48, "learning_rate": 3.449319213313162e-05, "loss": 0.2943, "step": 3282 }, { "epoch": 2.48, "learning_rate": 3.448846444780636e-05, "loss": 0.2576, "step": 3283 }, { "epoch": 2.48, "learning_rate": 3.448373676248109e-05, "loss": 0.2935, "step": 3284 }, { "epoch": 2.48, "learning_rate": 3.447900907715583e-05, "loss": 0.2858, "step": 3285 }, { "epoch": 2.49, "learning_rate": 3.447428139183056e-05, "loss": 0.284, "step": 3286 }, { "epoch": 2.49, "learning_rate": 3.44695537065053e-05, "loss": 0.2753, "step": 3287 }, { "epoch": 2.49, "learning_rate": 3.446482602118003e-05, "loss": 0.2627, "step": 3288 }, { "epoch": 2.49, "learning_rate": 3.446009833585477e-05, "loss": 0.2945, "step": 3289 }, { "epoch": 2.49, "learning_rate": 3.44553706505295e-05, "loss": 0.3036, "step": 3290 }, { "epoch": 2.49, "learning_rate": 3.445064296520424e-05, "loss": 0.2683, "step": 3291 }, { "epoch": 2.49, "learning_rate": 3.4445915279878974e-05, "loss": 0.3225, "step": 3292 }, { "epoch": 2.49, "learning_rate": 3.4441187594553706e-05, "loss": 0.2688, "step": 3293 }, { "epoch": 2.49, "learning_rate": 3.443645990922844e-05, "loss": 0.2946, "step": 3294 }, { "epoch": 2.49, "learning_rate": 3.443173222390318e-05, "loss": 0.2816, "step": 3295 }, { "epoch": 2.49, "learning_rate": 3.442700453857791e-05, "loss": 0.2735, "step": 3296 }, { "epoch": 2.49, "learning_rate": 3.442227685325265e-05, "loss": 0.2892, "step": 3297 }, { "epoch": 2.49, "learning_rate": 3.441754916792738e-05, "loss": 0.2696, "step": 3298 }, { "epoch": 2.49, "learning_rate": 3.441282148260212e-05, "loss": 0.2933, "step": 3299 }, { "epoch": 2.5, "learning_rate": 3.440809379727685e-05, "loss": 0.2291, "step": 3300 }, { "epoch": 2.5, "learning_rate": 3.440336611195159e-05, "loss": 0.2782, "step": 3301 }, { "epoch": 2.5, "learning_rate": 3.439863842662632e-05, "loss": 0.2499, "step": 3302 }, { "epoch": 2.5, "learning_rate": 3.439391074130106e-05, "loss": 0.2834, "step": 3303 }, { "epoch": 2.5, "learning_rate": 3.438918305597579e-05, "loss": 0.2554, "step": 3304 }, { "epoch": 2.5, "learning_rate": 3.438445537065053e-05, "loss": 0.277, "step": 3305 }, { "epoch": 2.5, "learning_rate": 3.437972768532526e-05, "loss": 0.2573, "step": 3306 }, { "epoch": 2.5, "learning_rate": 3.4375e-05, "loss": 0.2466, "step": 3307 }, { "epoch": 2.5, "learning_rate": 3.437027231467474e-05, "loss": 0.2469, "step": 3308 }, { "epoch": 2.5, "learning_rate": 3.436554462934947e-05, "loss": 0.2887, "step": 3309 }, { "epoch": 2.5, "learning_rate": 3.436081694402421e-05, "loss": 0.2373, "step": 3310 }, { "epoch": 2.5, "learning_rate": 3.4356089258698944e-05, "loss": 0.3025, "step": 3311 }, { "epoch": 2.5, "learning_rate": 3.435136157337368e-05, "loss": 0.2516, "step": 3312 }, { "epoch": 2.51, "learning_rate": 3.4346633888048415e-05, "loss": 0.2789, "step": 3313 }, { "epoch": 2.51, "learning_rate": 3.4341906202723154e-05, "loss": 0.2754, "step": 3314 }, { "epoch": 2.51, "learning_rate": 3.4337178517397886e-05, "loss": 0.3028, "step": 3315 }, { "epoch": 2.51, "learning_rate": 3.4332450832072624e-05, "loss": 0.3073, "step": 3316 }, { "epoch": 2.51, "learning_rate": 3.4327723146747357e-05, "loss": 0.2916, "step": 3317 }, { "epoch": 2.51, "learning_rate": 3.432299546142209e-05, "loss": 0.2853, "step": 3318 }, { "epoch": 2.51, "learning_rate": 3.431826777609683e-05, "loss": 0.3111, "step": 3319 }, { "epoch": 2.51, "learning_rate": 3.431354009077156e-05, "loss": 0.2222, "step": 3320 }, { "epoch": 2.51, "learning_rate": 3.430881240544629e-05, "loss": 0.2523, "step": 3321 }, { "epoch": 2.51, "learning_rate": 3.430408472012103e-05, "loss": 0.308, "step": 3322 }, { "epoch": 2.51, "learning_rate": 3.429935703479576e-05, "loss": 0.3219, "step": 3323 }, { "epoch": 2.51, "learning_rate": 3.42946293494705e-05, "loss": 0.2615, "step": 3324 }, { "epoch": 2.51, "learning_rate": 3.428990166414523e-05, "loss": 0.3042, "step": 3325 }, { "epoch": 2.52, "learning_rate": 3.428517397881997e-05, "loss": 0.2535, "step": 3326 }, { "epoch": 2.52, "learning_rate": 3.4280446293494704e-05, "loss": 0.2567, "step": 3327 }, { "epoch": 2.52, "learning_rate": 3.427571860816944e-05, "loss": 0.2694, "step": 3328 }, { "epoch": 2.52, "learning_rate": 3.4270990922844175e-05, "loss": 0.2644, "step": 3329 }, { "epoch": 2.52, "learning_rate": 3.4266263237518914e-05, "loss": 0.2861, "step": 3330 }, { "epoch": 2.52, "learning_rate": 3.4261535552193646e-05, "loss": 0.2723, "step": 3331 }, { "epoch": 2.52, "learning_rate": 3.4256807866868385e-05, "loss": 0.2501, "step": 3332 }, { "epoch": 2.52, "learning_rate": 3.425208018154312e-05, "loss": 0.304, "step": 3333 }, { "epoch": 2.52, "learning_rate": 3.4247352496217856e-05, "loss": 0.2658, "step": 3334 }, { "epoch": 2.52, "learning_rate": 3.424262481089259e-05, "loss": 0.246, "step": 3335 }, { "epoch": 2.52, "learning_rate": 3.423789712556733e-05, "loss": 0.3094, "step": 3336 }, { "epoch": 2.52, "learning_rate": 3.423316944024206e-05, "loss": 0.3336, "step": 3337 }, { "epoch": 2.52, "learning_rate": 3.42284417549168e-05, "loss": 0.244, "step": 3338 }, { "epoch": 2.53, "learning_rate": 3.422371406959153e-05, "loss": 0.3024, "step": 3339 }, { "epoch": 2.53, "learning_rate": 3.421898638426627e-05, "loss": 0.328, "step": 3340 }, { "epoch": 2.53, "learning_rate": 3.4214258698941e-05, "loss": 0.2616, "step": 3341 }, { "epoch": 2.53, "learning_rate": 3.420953101361574e-05, "loss": 0.3112, "step": 3342 }, { "epoch": 2.53, "learning_rate": 3.420480332829047e-05, "loss": 0.2408, "step": 3343 }, { "epoch": 2.53, "learning_rate": 3.420007564296521e-05, "loss": 0.294, "step": 3344 }, { "epoch": 2.53, "learning_rate": 3.419534795763994e-05, "loss": 0.2579, "step": 3345 }, { "epoch": 2.53, "learning_rate": 3.4190620272314674e-05, "loss": 0.2817, "step": 3346 }, { "epoch": 2.53, "learning_rate": 3.4185892586989406e-05, "loss": 0.2125, "step": 3347 }, { "epoch": 2.53, "learning_rate": 3.4181164901664145e-05, "loss": 0.2642, "step": 3348 }, { "epoch": 2.53, "learning_rate": 3.417643721633888e-05, "loss": 0.3077, "step": 3349 }, { "epoch": 2.53, "learning_rate": 3.4171709531013616e-05, "loss": 0.2644, "step": 3350 }, { "epoch": 2.53, "learning_rate": 3.416698184568835e-05, "loss": 0.3123, "step": 3351 }, { "epoch": 2.54, "learning_rate": 3.416225416036309e-05, "loss": 0.3246, "step": 3352 }, { "epoch": 2.54, "learning_rate": 3.415752647503782e-05, "loss": 0.2681, "step": 3353 }, { "epoch": 2.54, "learning_rate": 3.415279878971256e-05, "loss": 0.2872, "step": 3354 }, { "epoch": 2.54, "learning_rate": 3.414807110438729e-05, "loss": 0.3027, "step": 3355 }, { "epoch": 2.54, "learning_rate": 3.414334341906203e-05, "loss": 0.3095, "step": 3356 }, { "epoch": 2.54, "learning_rate": 3.413861573373676e-05, "loss": 0.2956, "step": 3357 }, { "epoch": 2.54, "learning_rate": 3.41338880484115e-05, "loss": 0.2621, "step": 3358 }, { "epoch": 2.54, "learning_rate": 3.412916036308623e-05, "loss": 0.2879, "step": 3359 }, { "epoch": 2.54, "learning_rate": 3.412443267776097e-05, "loss": 0.2785, "step": 3360 }, { "epoch": 2.54, "learning_rate": 3.41197049924357e-05, "loss": 0.2778, "step": 3361 }, { "epoch": 2.54, "learning_rate": 3.411497730711044e-05, "loss": 0.3125, "step": 3362 }, { "epoch": 2.54, "learning_rate": 3.4110249621785174e-05, "loss": 0.2511, "step": 3363 }, { "epoch": 2.54, "learning_rate": 3.410552193645991e-05, "loss": 0.331, "step": 3364 }, { "epoch": 2.54, "learning_rate": 3.4100794251134644e-05, "loss": 0.2546, "step": 3365 }, { "epoch": 2.55, "learning_rate": 3.409606656580938e-05, "loss": 0.2808, "step": 3366 }, { "epoch": 2.55, "learning_rate": 3.4091338880484115e-05, "loss": 0.2685, "step": 3367 }, { "epoch": 2.55, "learning_rate": 3.4086611195158854e-05, "loss": 0.2997, "step": 3368 }, { "epoch": 2.55, "learning_rate": 3.408188350983359e-05, "loss": 0.2507, "step": 3369 }, { "epoch": 2.55, "learning_rate": 3.4077155824508325e-05, "loss": 0.2837, "step": 3370 }, { "epoch": 2.55, "learning_rate": 3.407242813918306e-05, "loss": 0.3038, "step": 3371 }, { "epoch": 2.55, "learning_rate": 3.4067700453857796e-05, "loss": 0.2796, "step": 3372 }, { "epoch": 2.55, "learning_rate": 3.406297276853253e-05, "loss": 0.2732, "step": 3373 }, { "epoch": 2.55, "learning_rate": 3.405824508320726e-05, "loss": 0.2871, "step": 3374 }, { "epoch": 2.55, "learning_rate": 3.4053517397882e-05, "loss": 0.2451, "step": 3375 }, { "epoch": 2.55, "learning_rate": 3.404878971255673e-05, "loss": 0.2573, "step": 3376 }, { "epoch": 2.55, "learning_rate": 3.404406202723147e-05, "loss": 0.2738, "step": 3377 }, { "epoch": 2.55, "learning_rate": 3.40393343419062e-05, "loss": 0.2494, "step": 3378 }, { "epoch": 2.56, "learning_rate": 3.403460665658094e-05, "loss": 0.2651, "step": 3379 }, { "epoch": 2.56, "learning_rate": 3.402987897125567e-05, "loss": 0.2854, "step": 3380 }, { "epoch": 2.56, "learning_rate": 3.402515128593041e-05, "loss": 0.3245, "step": 3381 }, { "epoch": 2.56, "learning_rate": 3.4020423600605144e-05, "loss": 0.2788, "step": 3382 }, { "epoch": 2.56, "learning_rate": 3.401569591527988e-05, "loss": 0.3214, "step": 3383 }, { "epoch": 2.56, "learning_rate": 3.4010968229954615e-05, "loss": 0.2972, "step": 3384 }, { "epoch": 2.56, "learning_rate": 3.4006240544629353e-05, "loss": 0.2658, "step": 3385 }, { "epoch": 2.56, "learning_rate": 3.4001512859304085e-05, "loss": 0.2198, "step": 3386 }, { "epoch": 2.56, "learning_rate": 3.3996785173978824e-05, "loss": 0.2855, "step": 3387 }, { "epoch": 2.56, "learning_rate": 3.3992057488653556e-05, "loss": 0.3037, "step": 3388 }, { "epoch": 2.56, "learning_rate": 3.3987329803328295e-05, "loss": 0.267, "step": 3389 }, { "epoch": 2.56, "learning_rate": 3.398260211800303e-05, "loss": 0.2223, "step": 3390 }, { "epoch": 2.56, "learning_rate": 3.3977874432677766e-05, "loss": 0.2627, "step": 3391 }, { "epoch": 2.57, "learning_rate": 3.39731467473525e-05, "loss": 0.29, "step": 3392 }, { "epoch": 2.57, "learning_rate": 3.396841906202724e-05, "loss": 0.2713, "step": 3393 }, { "epoch": 2.57, "learning_rate": 3.396369137670197e-05, "loss": 0.2535, "step": 3394 }, { "epoch": 2.57, "learning_rate": 3.395896369137671e-05, "loss": 0.2943, "step": 3395 }, { "epoch": 2.57, "learning_rate": 3.395423600605144e-05, "loss": 0.2234, "step": 3396 }, { "epoch": 2.57, "learning_rate": 3.394950832072618e-05, "loss": 0.2728, "step": 3397 }, { "epoch": 2.57, "learning_rate": 3.394478063540091e-05, "loss": 0.3008, "step": 3398 }, { "epoch": 2.57, "learning_rate": 3.394005295007564e-05, "loss": 0.2705, "step": 3399 }, { "epoch": 2.57, "learning_rate": 3.3935325264750375e-05, "loss": 0.3052, "step": 3400 }, { "epoch": 2.57, "learning_rate": 3.3930597579425114e-05, "loss": 0.414, "step": 3401 }, { "epoch": 2.57, "learning_rate": 3.3925869894099846e-05, "loss": 0.3519, "step": 3402 }, { "epoch": 2.57, "learning_rate": 3.3921142208774585e-05, "loss": 0.3175, "step": 3403 }, { "epoch": 2.57, "learning_rate": 3.391641452344932e-05, "loss": 0.2971, "step": 3404 }, { "epoch": 2.58, "learning_rate": 3.3911686838124056e-05, "loss": 0.2916, "step": 3405 }, { "epoch": 2.58, "learning_rate": 3.390695915279879e-05, "loss": 0.2994, "step": 3406 }, { "epoch": 2.58, "learning_rate": 3.3902231467473526e-05, "loss": 0.2827, "step": 3407 }, { "epoch": 2.58, "learning_rate": 3.389750378214826e-05, "loss": 0.2559, "step": 3408 }, { "epoch": 2.58, "learning_rate": 3.3892776096823e-05, "loss": 0.2848, "step": 3409 }, { "epoch": 2.58, "learning_rate": 3.388804841149773e-05, "loss": 0.2452, "step": 3410 }, { "epoch": 2.58, "learning_rate": 3.388332072617247e-05, "loss": 0.2652, "step": 3411 }, { "epoch": 2.58, "learning_rate": 3.38785930408472e-05, "loss": 0.2676, "step": 3412 }, { "epoch": 2.58, "learning_rate": 3.387386535552194e-05, "loss": 0.2782, "step": 3413 }, { "epoch": 2.58, "learning_rate": 3.386913767019667e-05, "loss": 0.3443, "step": 3414 }, { "epoch": 2.58, "learning_rate": 3.386440998487141e-05, "loss": 0.2682, "step": 3415 }, { "epoch": 2.58, "learning_rate": 3.385968229954614e-05, "loss": 0.3629, "step": 3416 }, { "epoch": 2.58, "learning_rate": 3.385495461422088e-05, "loss": 0.2678, "step": 3417 }, { "epoch": 2.58, "learning_rate": 3.385022692889561e-05, "loss": 0.2559, "step": 3418 }, { "epoch": 2.59, "learning_rate": 3.384549924357035e-05, "loss": 0.2517, "step": 3419 }, { "epoch": 2.59, "learning_rate": 3.3840771558245084e-05, "loss": 0.2893, "step": 3420 }, { "epoch": 2.59, "learning_rate": 3.383604387291982e-05, "loss": 0.277, "step": 3421 }, { "epoch": 2.59, "learning_rate": 3.3831316187594555e-05, "loss": 0.2751, "step": 3422 }, { "epoch": 2.59, "learning_rate": 3.3826588502269294e-05, "loss": 0.2683, "step": 3423 }, { "epoch": 2.59, "learning_rate": 3.3821860816944026e-05, "loss": 0.2815, "step": 3424 }, { "epoch": 2.59, "learning_rate": 3.3817133131618765e-05, "loss": 0.283, "step": 3425 }, { "epoch": 2.59, "learning_rate": 3.3812405446293497e-05, "loss": 0.2657, "step": 3426 }, { "epoch": 2.59, "learning_rate": 3.380767776096823e-05, "loss": 0.3034, "step": 3427 }, { "epoch": 2.59, "learning_rate": 3.380295007564297e-05, "loss": 0.2597, "step": 3428 }, { "epoch": 2.59, "learning_rate": 3.37982223903177e-05, "loss": 0.2558, "step": 3429 }, { "epoch": 2.59, "learning_rate": 3.379349470499244e-05, "loss": 0.2983, "step": 3430 }, { "epoch": 2.59, "learning_rate": 3.378876701966717e-05, "loss": 0.2252, "step": 3431 }, { "epoch": 2.6, "learning_rate": 3.378403933434191e-05, "loss": 0.2593, "step": 3432 }, { "epoch": 2.6, "learning_rate": 3.377931164901664e-05, "loss": 0.3102, "step": 3433 }, { "epoch": 2.6, "learning_rate": 3.377458396369138e-05, "loss": 0.2533, "step": 3434 }, { "epoch": 2.6, "learning_rate": 3.376985627836611e-05, "loss": 0.2535, "step": 3435 }, { "epoch": 2.6, "learning_rate": 3.376512859304085e-05, "loss": 0.3076, "step": 3436 }, { "epoch": 2.6, "learning_rate": 3.376040090771558e-05, "loss": 0.2422, "step": 3437 }, { "epoch": 2.6, "learning_rate": 3.375567322239032e-05, "loss": 0.2948, "step": 3438 }, { "epoch": 2.6, "learning_rate": 3.3750945537065054e-05, "loss": 0.2827, "step": 3439 }, { "epoch": 2.6, "learning_rate": 3.374621785173979e-05, "loss": 0.2469, "step": 3440 }, { "epoch": 2.6, "learning_rate": 3.3741490166414525e-05, "loss": 0.3106, "step": 3441 }, { "epoch": 2.6, "learning_rate": 3.3736762481089264e-05, "loss": 0.2891, "step": 3442 }, { "epoch": 2.6, "learning_rate": 3.3732034795763996e-05, "loss": 0.2545, "step": 3443 }, { "epoch": 2.6, "learning_rate": 3.3727307110438735e-05, "loss": 0.2963, "step": 3444 }, { "epoch": 2.61, "learning_rate": 3.372257942511347e-05, "loss": 0.2444, "step": 3445 }, { "epoch": 2.61, "learning_rate": 3.3717851739788206e-05, "loss": 0.2781, "step": 3446 }, { "epoch": 2.61, "learning_rate": 3.371312405446294e-05, "loss": 0.3104, "step": 3447 }, { "epoch": 2.61, "learning_rate": 3.3708396369137676e-05, "loss": 0.2794, "step": 3448 }, { "epoch": 2.61, "learning_rate": 3.370366868381241e-05, "loss": 0.2886, "step": 3449 }, { "epoch": 2.61, "learning_rate": 3.369894099848715e-05, "loss": 0.3208, "step": 3450 }, { "epoch": 2.61, "learning_rate": 3.369421331316188e-05, "loss": 0.2714, "step": 3451 }, { "epoch": 2.61, "learning_rate": 3.368948562783661e-05, "loss": 0.2596, "step": 3452 }, { "epoch": 2.61, "learning_rate": 3.3684757942511344e-05, "loss": 0.2287, "step": 3453 }, { "epoch": 2.61, "learning_rate": 3.368003025718608e-05, "loss": 0.2879, "step": 3454 }, { "epoch": 2.61, "learning_rate": 3.3675302571860814e-05, "loss": 0.3039, "step": 3455 }, { "epoch": 2.61, "learning_rate": 3.367057488653555e-05, "loss": 0.2477, "step": 3456 }, { "epoch": 2.61, "learning_rate": 3.3665847201210285e-05, "loss": 0.3168, "step": 3457 }, { "epoch": 2.62, "learning_rate": 3.3661119515885024e-05, "loss": 0.2612, "step": 3458 }, { "epoch": 2.62, "learning_rate": 3.3656391830559756e-05, "loss": 0.297, "step": 3459 }, { "epoch": 2.62, "learning_rate": 3.3651664145234495e-05, "loss": 0.3138, "step": 3460 }, { "epoch": 2.62, "learning_rate": 3.364693645990923e-05, "loss": 0.2261, "step": 3461 }, { "epoch": 2.62, "learning_rate": 3.3642208774583966e-05, "loss": 0.269, "step": 3462 }, { "epoch": 2.62, "learning_rate": 3.36374810892587e-05, "loss": 0.2798, "step": 3463 }, { "epoch": 2.62, "learning_rate": 3.363275340393344e-05, "loss": 0.263, "step": 3464 }, { "epoch": 2.62, "learning_rate": 3.362802571860817e-05, "loss": 0.3136, "step": 3465 }, { "epoch": 2.62, "learning_rate": 3.362329803328291e-05, "loss": 0.2386, "step": 3466 }, { "epoch": 2.62, "learning_rate": 3.361857034795764e-05, "loss": 0.2924, "step": 3467 }, { "epoch": 2.62, "learning_rate": 3.361384266263238e-05, "loss": 0.2349, "step": 3468 }, { "epoch": 2.62, "learning_rate": 3.360911497730711e-05, "loss": 0.3269, "step": 3469 }, { "epoch": 2.62, "learning_rate": 3.360438729198185e-05, "loss": 0.3092, "step": 3470 }, { "epoch": 2.63, "learning_rate": 3.359965960665658e-05, "loss": 0.2714, "step": 3471 }, { "epoch": 2.63, "learning_rate": 3.359493192133132e-05, "loss": 0.288, "step": 3472 }, { "epoch": 2.63, "learning_rate": 3.359020423600605e-05, "loss": 0.3036, "step": 3473 }, { "epoch": 2.63, "learning_rate": 3.358547655068079e-05, "loss": 0.3068, "step": 3474 }, { "epoch": 2.63, "learning_rate": 3.358074886535552e-05, "loss": 0.3168, "step": 3475 }, { "epoch": 2.63, "learning_rate": 3.357602118003026e-05, "loss": 0.3102, "step": 3476 }, { "epoch": 2.63, "learning_rate": 3.3571293494704994e-05, "loss": 0.2878, "step": 3477 }, { "epoch": 2.63, "learning_rate": 3.356656580937973e-05, "loss": 0.3015, "step": 3478 }, { "epoch": 2.63, "learning_rate": 3.3561838124054465e-05, "loss": 0.2636, "step": 3479 }, { "epoch": 2.63, "learning_rate": 3.35571104387292e-05, "loss": 0.2488, "step": 3480 }, { "epoch": 2.63, "learning_rate": 3.355238275340393e-05, "loss": 0.2755, "step": 3481 }, { "epoch": 2.63, "learning_rate": 3.354765506807867e-05, "loss": 0.2688, "step": 3482 }, { "epoch": 2.63, "learning_rate": 3.35429273827534e-05, "loss": 0.2407, "step": 3483 }, { "epoch": 2.63, "learning_rate": 3.353819969742814e-05, "loss": 0.2291, "step": 3484 }, { "epoch": 2.64, "learning_rate": 3.353347201210287e-05, "loss": 0.2868, "step": 3485 }, { "epoch": 2.64, "learning_rate": 3.352874432677761e-05, "loss": 0.2715, "step": 3486 }, { "epoch": 2.64, "learning_rate": 3.352401664145234e-05, "loss": 0.2491, "step": 3487 }, { "epoch": 2.64, "learning_rate": 3.351928895612708e-05, "loss": 0.2484, "step": 3488 }, { "epoch": 2.64, "learning_rate": 3.351456127080182e-05, "loss": 0.3249, "step": 3489 }, { "epoch": 2.64, "learning_rate": 3.350983358547655e-05, "loss": 0.2606, "step": 3490 }, { "epoch": 2.64, "learning_rate": 3.350510590015129e-05, "loss": 0.2559, "step": 3491 }, { "epoch": 2.64, "learning_rate": 3.350037821482602e-05, "loss": 0.2787, "step": 3492 }, { "epoch": 2.64, "learning_rate": 3.349565052950076e-05, "loss": 0.3028, "step": 3493 }, { "epoch": 2.64, "learning_rate": 3.3490922844175493e-05, "loss": 0.2653, "step": 3494 }, { "epoch": 2.64, "learning_rate": 3.348619515885023e-05, "loss": 0.2439, "step": 3495 }, { "epoch": 2.64, "learning_rate": 3.3481467473524964e-05, "loss": 0.2906, "step": 3496 }, { "epoch": 2.64, "learning_rate": 3.34767397881997e-05, "loss": 0.3111, "step": 3497 }, { "epoch": 2.65, "learning_rate": 3.3472012102874435e-05, "loss": 0.2791, "step": 3498 }, { "epoch": 2.65, "learning_rate": 3.3467284417549174e-05, "loss": 0.3213, "step": 3499 }, { "epoch": 2.65, "learning_rate": 3.3462556732223906e-05, "loss": 0.2422, "step": 3500 }, { "epoch": 2.65, "learning_rate": 3.3457829046898645e-05, "loss": 0.3145, "step": 3501 }, { "epoch": 2.65, "learning_rate": 3.345310136157338e-05, "loss": 0.2804, "step": 3502 }, { "epoch": 2.65, "learning_rate": 3.3448373676248116e-05, "loss": 0.2789, "step": 3503 }, { "epoch": 2.65, "learning_rate": 3.344364599092285e-05, "loss": 0.3353, "step": 3504 }, { "epoch": 2.65, "learning_rate": 3.343891830559758e-05, "loss": 0.3051, "step": 3505 }, { "epoch": 2.65, "learning_rate": 3.343419062027231e-05, "loss": 0.2676, "step": 3506 }, { "epoch": 2.65, "learning_rate": 3.342946293494705e-05, "loss": 0.2563, "step": 3507 }, { "epoch": 2.65, "learning_rate": 3.342473524962178e-05, "loss": 0.3013, "step": 3508 }, { "epoch": 2.65, "learning_rate": 3.342000756429652e-05, "loss": 0.2848, "step": 3509 }, { "epoch": 2.65, "learning_rate": 3.3415279878971254e-05, "loss": 0.2707, "step": 3510 }, { "epoch": 2.66, "learning_rate": 3.341055219364599e-05, "loss": 0.2481, "step": 3511 }, { "epoch": 2.66, "learning_rate": 3.3405824508320725e-05, "loss": 0.3128, "step": 3512 }, { "epoch": 2.66, "learning_rate": 3.3401096822995464e-05, "loss": 0.2528, "step": 3513 }, { "epoch": 2.66, "learning_rate": 3.3396369137670196e-05, "loss": 0.2498, "step": 3514 }, { "epoch": 2.66, "learning_rate": 3.3391641452344934e-05, "loss": 0.2568, "step": 3515 }, { "epoch": 2.66, "learning_rate": 3.3386913767019667e-05, "loss": 0.2525, "step": 3516 }, { "epoch": 2.66, "learning_rate": 3.3382186081694405e-05, "loss": 0.3106, "step": 3517 }, { "epoch": 2.66, "learning_rate": 3.337745839636914e-05, "loss": 0.2697, "step": 3518 }, { "epoch": 2.66, "learning_rate": 3.3372730711043876e-05, "loss": 0.2906, "step": 3519 }, { "epoch": 2.66, "learning_rate": 3.336800302571861e-05, "loss": 0.2738, "step": 3520 }, { "epoch": 2.66, "learning_rate": 3.336327534039335e-05, "loss": 0.2566, "step": 3521 }, { "epoch": 2.66, "learning_rate": 3.335854765506808e-05, "loss": 0.2956, "step": 3522 }, { "epoch": 2.66, "learning_rate": 3.335381996974282e-05, "loss": 0.3084, "step": 3523 }, { "epoch": 2.67, "learning_rate": 3.334909228441755e-05, "loss": 0.2547, "step": 3524 }, { "epoch": 2.67, "learning_rate": 3.334436459909229e-05, "loss": 0.2978, "step": 3525 }, { "epoch": 2.67, "learning_rate": 3.333963691376702e-05, "loss": 0.3062, "step": 3526 }, { "epoch": 2.67, "learning_rate": 3.333490922844176e-05, "loss": 0.2896, "step": 3527 }, { "epoch": 2.67, "learning_rate": 3.333018154311649e-05, "loss": 0.2748, "step": 3528 }, { "epoch": 2.67, "learning_rate": 3.332545385779123e-05, "loss": 0.3086, "step": 3529 }, { "epoch": 2.67, "learning_rate": 3.332072617246596e-05, "loss": 0.3056, "step": 3530 }, { "epoch": 2.67, "learning_rate": 3.33159984871407e-05, "loss": 0.2711, "step": 3531 }, { "epoch": 2.67, "learning_rate": 3.3311270801815434e-05, "loss": 0.282, "step": 3532 }, { "epoch": 2.67, "learning_rate": 3.3306543116490166e-05, "loss": 0.2737, "step": 3533 }, { "epoch": 2.67, "learning_rate": 3.33018154311649e-05, "loss": 0.3109, "step": 3534 }, { "epoch": 2.67, "learning_rate": 3.329708774583964e-05, "loss": 0.2426, "step": 3535 }, { "epoch": 2.67, "learning_rate": 3.329236006051437e-05, "loss": 0.2806, "step": 3536 }, { "epoch": 2.67, "learning_rate": 3.328763237518911e-05, "loss": 0.2675, "step": 3537 }, { "epoch": 2.68, "learning_rate": 3.328290468986384e-05, "loss": 0.2809, "step": 3538 }, { "epoch": 2.68, "learning_rate": 3.327817700453858e-05, "loss": 0.2803, "step": 3539 }, { "epoch": 2.68, "learning_rate": 3.327344931921331e-05, "loss": 0.2905, "step": 3540 }, { "epoch": 2.68, "learning_rate": 3.326872163388805e-05, "loss": 0.3435, "step": 3541 }, { "epoch": 2.68, "learning_rate": 3.326399394856278e-05, "loss": 0.2646, "step": 3542 }, { "epoch": 2.68, "learning_rate": 3.325926626323752e-05, "loss": 0.286, "step": 3543 }, { "epoch": 2.68, "learning_rate": 3.325453857791225e-05, "loss": 0.2973, "step": 3544 }, { "epoch": 2.68, "learning_rate": 3.324981089258699e-05, "loss": 0.2899, "step": 3545 }, { "epoch": 2.68, "learning_rate": 3.324508320726172e-05, "loss": 0.2386, "step": 3546 }, { "epoch": 2.68, "learning_rate": 3.324035552193646e-05, "loss": 0.2757, "step": 3547 }, { "epoch": 2.68, "learning_rate": 3.32356278366112e-05, "loss": 0.3077, "step": 3548 }, { "epoch": 2.68, "learning_rate": 3.323090015128593e-05, "loss": 0.2805, "step": 3549 }, { "epoch": 2.68, "learning_rate": 3.322617246596067e-05, "loss": 0.2795, "step": 3550 }, { "epoch": 2.69, "learning_rate": 3.3221444780635404e-05, "loss": 0.2828, "step": 3551 }, { "epoch": 2.69, "learning_rate": 3.321671709531014e-05, "loss": 0.242, "step": 3552 }, { "epoch": 2.69, "learning_rate": 3.3211989409984875e-05, "loss": 0.2927, "step": 3553 }, { "epoch": 2.69, "learning_rate": 3.3207261724659614e-05, "loss": 0.3064, "step": 3554 }, { "epoch": 2.69, "learning_rate": 3.3202534039334346e-05, "loss": 0.3094, "step": 3555 }, { "epoch": 2.69, "learning_rate": 3.3197806354009084e-05, "loss": 0.2559, "step": 3556 }, { "epoch": 2.69, "learning_rate": 3.3193078668683816e-05, "loss": 0.2359, "step": 3557 }, { "epoch": 2.69, "learning_rate": 3.318835098335855e-05, "loss": 0.2766, "step": 3558 }, { "epoch": 2.69, "learning_rate": 3.318362329803328e-05, "loss": 0.2482, "step": 3559 }, { "epoch": 2.69, "learning_rate": 3.317889561270802e-05, "loss": 0.3022, "step": 3560 }, { "epoch": 2.69, "learning_rate": 3.317416792738275e-05, "loss": 0.2788, "step": 3561 }, { "epoch": 2.69, "learning_rate": 3.316944024205749e-05, "loss": 0.2562, "step": 3562 }, { "epoch": 2.69, "learning_rate": 3.316471255673222e-05, "loss": 0.2775, "step": 3563 }, { "epoch": 2.7, "learning_rate": 3.315998487140696e-05, "loss": 0.2997, "step": 3564 }, { "epoch": 2.7, "learning_rate": 3.315525718608169e-05, "loss": 0.2415, "step": 3565 }, { "epoch": 2.7, "learning_rate": 3.315052950075643e-05, "loss": 0.2895, "step": 3566 }, { "epoch": 2.7, "learning_rate": 3.3145801815431164e-05, "loss": 0.2789, "step": 3567 }, { "epoch": 2.7, "learning_rate": 3.31410741301059e-05, "loss": 0.2806, "step": 3568 }, { "epoch": 2.7, "learning_rate": 3.3136346444780635e-05, "loss": 0.3507, "step": 3569 }, { "epoch": 2.7, "learning_rate": 3.3131618759455374e-05, "loss": 0.3135, "step": 3570 }, { "epoch": 2.7, "learning_rate": 3.3126891074130106e-05, "loss": 0.2527, "step": 3571 }, { "epoch": 2.7, "learning_rate": 3.3122163388804845e-05, "loss": 0.2927, "step": 3572 }, { "epoch": 2.7, "learning_rate": 3.311743570347958e-05, "loss": 0.3153, "step": 3573 }, { "epoch": 2.7, "learning_rate": 3.3112708018154316e-05, "loss": 0.2733, "step": 3574 }, { "epoch": 2.7, "learning_rate": 3.310798033282905e-05, "loss": 0.3249, "step": 3575 }, { "epoch": 2.7, "learning_rate": 3.3103252647503787e-05, "loss": 0.3073, "step": 3576 }, { "epoch": 2.71, "learning_rate": 3.309852496217852e-05, "loss": 0.2609, "step": 3577 }, { "epoch": 2.71, "learning_rate": 3.309379727685326e-05, "loss": 0.2982, "step": 3578 }, { "epoch": 2.71, "learning_rate": 3.308906959152799e-05, "loss": 0.2861, "step": 3579 }, { "epoch": 2.71, "learning_rate": 3.308434190620273e-05, "loss": 0.2314, "step": 3580 }, { "epoch": 2.71, "learning_rate": 3.307961422087746e-05, "loss": 0.2656, "step": 3581 }, { "epoch": 2.71, "learning_rate": 3.30748865355522e-05, "loss": 0.2479, "step": 3582 }, { "epoch": 2.71, "learning_rate": 3.307015885022693e-05, "loss": 0.3222, "step": 3583 }, { "epoch": 2.71, "learning_rate": 3.306543116490167e-05, "loss": 0.2818, "step": 3584 }, { "epoch": 2.71, "learning_rate": 3.30607034795764e-05, "loss": 0.3246, "step": 3585 }, { "epoch": 2.71, "learning_rate": 3.3055975794251134e-05, "loss": 0.272, "step": 3586 }, { "epoch": 2.71, "learning_rate": 3.3051248108925866e-05, "loss": 0.2761, "step": 3587 }, { "epoch": 2.71, "learning_rate": 3.3046520423600605e-05, "loss": 0.2844, "step": 3588 }, { "epoch": 2.71, "learning_rate": 3.304179273827534e-05, "loss": 0.2806, "step": 3589 }, { "epoch": 2.72, "learning_rate": 3.3037065052950076e-05, "loss": 0.2541, "step": 3590 }, { "epoch": 2.72, "learning_rate": 3.303233736762481e-05, "loss": 0.2806, "step": 3591 }, { "epoch": 2.72, "learning_rate": 3.302760968229955e-05, "loss": 0.2909, "step": 3592 }, { "epoch": 2.72, "learning_rate": 3.302288199697428e-05, "loss": 0.3154, "step": 3593 }, { "epoch": 2.72, "learning_rate": 3.301815431164902e-05, "loss": 0.2689, "step": 3594 }, { "epoch": 2.72, "learning_rate": 3.301342662632375e-05, "loss": 0.2528, "step": 3595 }, { "epoch": 2.72, "learning_rate": 3.300869894099849e-05, "loss": 0.2677, "step": 3596 }, { "epoch": 2.72, "learning_rate": 3.300397125567322e-05, "loss": 0.3406, "step": 3597 }, { "epoch": 2.72, "learning_rate": 3.299924357034796e-05, "loss": 0.2492, "step": 3598 }, { "epoch": 2.72, "learning_rate": 3.299451588502269e-05, "loss": 0.3108, "step": 3599 }, { "epoch": 2.72, "learning_rate": 3.298978819969743e-05, "loss": 0.2899, "step": 3600 }, { "epoch": 2.72, "learning_rate": 3.298506051437216e-05, "loss": 0.2422, "step": 3601 }, { "epoch": 2.72, "learning_rate": 3.29803328290469e-05, "loss": 0.2949, "step": 3602 }, { "epoch": 2.72, "learning_rate": 3.2975605143721634e-05, "loss": 0.2755, "step": 3603 }, { "epoch": 2.73, "learning_rate": 3.297087745839637e-05, "loss": 0.2648, "step": 3604 }, { "epoch": 2.73, "learning_rate": 3.2966149773071104e-05, "loss": 0.2839, "step": 3605 }, { "epoch": 2.73, "learning_rate": 3.296142208774584e-05, "loss": 0.3272, "step": 3606 }, { "epoch": 2.73, "learning_rate": 3.2956694402420575e-05, "loss": 0.302, "step": 3607 }, { "epoch": 2.73, "learning_rate": 3.2951966717095314e-05, "loss": 0.2577, "step": 3608 }, { "epoch": 2.73, "learning_rate": 3.294723903177005e-05, "loss": 0.362, "step": 3609 }, { "epoch": 2.73, "learning_rate": 3.2942511346444785e-05, "loss": 0.2733, "step": 3610 }, { "epoch": 2.73, "learning_rate": 3.293778366111952e-05, "loss": 0.2547, "step": 3611 }, { "epoch": 2.73, "learning_rate": 3.293305597579425e-05, "loss": 0.3294, "step": 3612 }, { "epoch": 2.73, "learning_rate": 3.292832829046899e-05, "loss": 0.2698, "step": 3613 }, { "epoch": 2.73, "learning_rate": 3.292360060514372e-05, "loss": 0.2942, "step": 3614 }, { "epoch": 2.73, "learning_rate": 3.291887291981846e-05, "loss": 0.3246, "step": 3615 }, { "epoch": 2.73, "learning_rate": 3.291414523449319e-05, "loss": 0.2544, "step": 3616 }, { "epoch": 2.74, "learning_rate": 3.290941754916793e-05, "loss": 0.2515, "step": 3617 }, { "epoch": 2.74, "learning_rate": 3.290468986384266e-05, "loss": 0.289, "step": 3618 }, { "epoch": 2.74, "learning_rate": 3.28999621785174e-05, "loss": 0.2915, "step": 3619 }, { "epoch": 2.74, "learning_rate": 3.289523449319213e-05, "loss": 0.2702, "step": 3620 }, { "epoch": 2.74, "learning_rate": 3.289050680786687e-05, "loss": 0.2508, "step": 3621 }, { "epoch": 2.74, "learning_rate": 3.2885779122541604e-05, "loss": 0.2509, "step": 3622 }, { "epoch": 2.74, "learning_rate": 3.288105143721634e-05, "loss": 0.2368, "step": 3623 }, { "epoch": 2.74, "learning_rate": 3.2876323751891075e-05, "loss": 0.2589, "step": 3624 }, { "epoch": 2.74, "learning_rate": 3.287159606656581e-05, "loss": 0.2718, "step": 3625 }, { "epoch": 2.74, "learning_rate": 3.2866868381240545e-05, "loss": 0.306, "step": 3626 }, { "epoch": 2.74, "learning_rate": 3.2862140695915284e-05, "loss": 0.2819, "step": 3627 }, { "epoch": 2.74, "learning_rate": 3.2857413010590016e-05, "loss": 0.3178, "step": 3628 }, { "epoch": 2.74, "learning_rate": 3.2852685325264755e-05, "loss": 0.2672, "step": 3629 }, { "epoch": 2.75, "learning_rate": 3.284795763993949e-05, "loss": 0.2688, "step": 3630 }, { "epoch": 2.75, "learning_rate": 3.2843229954614226e-05, "loss": 0.2703, "step": 3631 }, { "epoch": 2.75, "learning_rate": 3.283850226928896e-05, "loss": 0.2406, "step": 3632 }, { "epoch": 2.75, "learning_rate": 3.28337745839637e-05, "loss": 0.2534, "step": 3633 }, { "epoch": 2.75, "learning_rate": 3.282904689863843e-05, "loss": 0.2632, "step": 3634 }, { "epoch": 2.75, "learning_rate": 3.282431921331317e-05, "loss": 0.3051, "step": 3635 }, { "epoch": 2.75, "learning_rate": 3.28195915279879e-05, "loss": 0.3132, "step": 3636 }, { "epoch": 2.75, "learning_rate": 3.281486384266264e-05, "loss": 0.2683, "step": 3637 }, { "epoch": 2.75, "learning_rate": 3.281013615733737e-05, "loss": 0.2788, "step": 3638 }, { "epoch": 2.75, "learning_rate": 3.28054084720121e-05, "loss": 0.3085, "step": 3639 }, { "epoch": 2.75, "learning_rate": 3.2800680786686835e-05, "loss": 0.3037, "step": 3640 }, { "epoch": 2.75, "learning_rate": 3.2795953101361574e-05, "loss": 0.2814, "step": 3641 }, { "epoch": 2.75, "learning_rate": 3.2791225416036306e-05, "loss": 0.2875, "step": 3642 }, { "epoch": 2.76, "learning_rate": 3.2786497730711045e-05, "loss": 0.2565, "step": 3643 }, { "epoch": 2.76, "learning_rate": 3.278177004538578e-05, "loss": 0.2436, "step": 3644 }, { "epoch": 2.76, "learning_rate": 3.2777042360060516e-05, "loss": 0.2582, "step": 3645 }, { "epoch": 2.76, "learning_rate": 3.277231467473525e-05, "loss": 0.2942, "step": 3646 }, { "epoch": 2.76, "learning_rate": 3.2767586989409986e-05, "loss": 0.2174, "step": 3647 }, { "epoch": 2.76, "learning_rate": 3.276285930408472e-05, "loss": 0.2345, "step": 3648 }, { "epoch": 2.76, "learning_rate": 3.275813161875946e-05, "loss": 0.2784, "step": 3649 }, { "epoch": 2.76, "learning_rate": 3.275340393343419e-05, "loss": 0.2666, "step": 3650 }, { "epoch": 2.76, "learning_rate": 3.274867624810893e-05, "loss": 0.2792, "step": 3651 }, { "epoch": 2.76, "learning_rate": 3.274394856278366e-05, "loss": 0.2971, "step": 3652 }, { "epoch": 2.76, "learning_rate": 3.27392208774584e-05, "loss": 0.2829, "step": 3653 }, { "epoch": 2.76, "learning_rate": 3.273449319213313e-05, "loss": 0.2831, "step": 3654 }, { "epoch": 2.76, "learning_rate": 3.272976550680787e-05, "loss": 0.2855, "step": 3655 }, { "epoch": 2.76, "learning_rate": 3.27250378214826e-05, "loss": 0.2731, "step": 3656 }, { "epoch": 2.77, "learning_rate": 3.272031013615734e-05, "loss": 0.3351, "step": 3657 }, { "epoch": 2.77, "learning_rate": 3.271558245083207e-05, "loss": 0.2716, "step": 3658 }, { "epoch": 2.77, "learning_rate": 3.271085476550681e-05, "loss": 0.2619, "step": 3659 }, { "epoch": 2.77, "learning_rate": 3.2706127080181544e-05, "loss": 0.355, "step": 3660 }, { "epoch": 2.77, "learning_rate": 3.270139939485628e-05, "loss": 0.2773, "step": 3661 }, { "epoch": 2.77, "learning_rate": 3.2696671709531015e-05, "loss": 0.308, "step": 3662 }, { "epoch": 2.77, "learning_rate": 3.2691944024205754e-05, "loss": 0.3027, "step": 3663 }, { "epoch": 2.77, "learning_rate": 3.2687216338880486e-05, "loss": 0.3016, "step": 3664 }, { "epoch": 2.77, "learning_rate": 3.268248865355522e-05, "loss": 0.2942, "step": 3665 }, { "epoch": 2.77, "learning_rate": 3.2677760968229957e-05, "loss": 0.2559, "step": 3666 }, { "epoch": 2.77, "learning_rate": 3.267303328290469e-05, "loss": 0.2802, "step": 3667 }, { "epoch": 2.77, "learning_rate": 3.266830559757943e-05, "loss": 0.3143, "step": 3668 }, { "epoch": 2.77, "learning_rate": 3.266357791225416e-05, "loss": 0.2678, "step": 3669 }, { "epoch": 2.78, "learning_rate": 3.26588502269289e-05, "loss": 0.28, "step": 3670 }, { "epoch": 2.78, "learning_rate": 3.265412254160363e-05, "loss": 0.2966, "step": 3671 }, { "epoch": 2.78, "learning_rate": 3.264939485627837e-05, "loss": 0.2721, "step": 3672 }, { "epoch": 2.78, "learning_rate": 3.26446671709531e-05, "loss": 0.2595, "step": 3673 }, { "epoch": 2.78, "learning_rate": 3.263993948562784e-05, "loss": 0.2692, "step": 3674 }, { "epoch": 2.78, "learning_rate": 3.263521180030257e-05, "loss": 0.2966, "step": 3675 }, { "epoch": 2.78, "learning_rate": 3.263048411497731e-05, "loss": 0.2258, "step": 3676 }, { "epoch": 2.78, "learning_rate": 3.262575642965204e-05, "loss": 0.2579, "step": 3677 }, { "epoch": 2.78, "learning_rate": 3.262102874432678e-05, "loss": 0.3029, "step": 3678 }, { "epoch": 2.78, "learning_rate": 3.2616301059001514e-05, "loss": 0.2777, "step": 3679 }, { "epoch": 2.78, "learning_rate": 3.261157337367625e-05, "loss": 0.2953, "step": 3680 }, { "epoch": 2.78, "learning_rate": 3.2606845688350985e-05, "loss": 0.288, "step": 3681 }, { "epoch": 2.78, "learning_rate": 3.2602118003025724e-05, "loss": 0.3052, "step": 3682 }, { "epoch": 2.79, "learning_rate": 3.2597390317700456e-05, "loss": 0.3111, "step": 3683 }, { "epoch": 2.79, "learning_rate": 3.2592662632375195e-05, "loss": 0.2743, "step": 3684 }, { "epoch": 2.79, "learning_rate": 3.258793494704993e-05, "loss": 0.3096, "step": 3685 }, { "epoch": 2.79, "learning_rate": 3.2583207261724665e-05, "loss": 0.2327, "step": 3686 }, { "epoch": 2.79, "learning_rate": 3.25784795763994e-05, "loss": 0.2383, "step": 3687 }, { "epoch": 2.79, "learning_rate": 3.2573751891074136e-05, "loss": 0.2971, "step": 3688 }, { "epoch": 2.79, "learning_rate": 3.256902420574887e-05, "loss": 0.2907, "step": 3689 }, { "epoch": 2.79, "learning_rate": 3.256429652042361e-05, "loss": 0.2597, "step": 3690 }, { "epoch": 2.79, "learning_rate": 3.255956883509834e-05, "loss": 0.2874, "step": 3691 }, { "epoch": 2.79, "learning_rate": 3.255484114977307e-05, "loss": 0.3437, "step": 3692 }, { "epoch": 2.79, "learning_rate": 3.2550113464447803e-05, "loss": 0.3165, "step": 3693 }, { "epoch": 2.79, "learning_rate": 3.254538577912254e-05, "loss": 0.2551, "step": 3694 }, { "epoch": 2.79, "learning_rate": 3.2540658093797274e-05, "loss": 0.2751, "step": 3695 }, { "epoch": 2.8, "learning_rate": 3.253593040847201e-05, "loss": 0.264, "step": 3696 }, { "epoch": 2.8, "learning_rate": 3.2531202723146745e-05, "loss": 0.2439, "step": 3697 }, { "epoch": 2.8, "learning_rate": 3.2526475037821484e-05, "loss": 0.2653, "step": 3698 }, { "epoch": 2.8, "learning_rate": 3.2521747352496216e-05, "loss": 0.3125, "step": 3699 }, { "epoch": 2.8, "learning_rate": 3.2517019667170955e-05, "loss": 0.3439, "step": 3700 }, { "epoch": 2.8, "learning_rate": 3.251229198184569e-05, "loss": 0.2994, "step": 3701 }, { "epoch": 2.8, "learning_rate": 3.2507564296520426e-05, "loss": 0.2823, "step": 3702 }, { "epoch": 2.8, "learning_rate": 3.250283661119516e-05, "loss": 0.2682, "step": 3703 }, { "epoch": 2.8, "learning_rate": 3.24981089258699e-05, "loss": 0.2762, "step": 3704 }, { "epoch": 2.8, "learning_rate": 3.249338124054463e-05, "loss": 0.276, "step": 3705 }, { "epoch": 2.8, "learning_rate": 3.248865355521937e-05, "loss": 0.2689, "step": 3706 }, { "epoch": 2.8, "learning_rate": 3.24839258698941e-05, "loss": 0.2807, "step": 3707 }, { "epoch": 2.8, "learning_rate": 3.247919818456884e-05, "loss": 0.2699, "step": 3708 }, { "epoch": 2.81, "learning_rate": 3.247447049924357e-05, "loss": 0.2795, "step": 3709 }, { "epoch": 2.81, "learning_rate": 3.246974281391831e-05, "loss": 0.3053, "step": 3710 }, { "epoch": 2.81, "learning_rate": 3.246501512859304e-05, "loss": 0.2575, "step": 3711 }, { "epoch": 2.81, "learning_rate": 3.246028744326778e-05, "loss": 0.2904, "step": 3712 }, { "epoch": 2.81, "learning_rate": 3.245555975794251e-05, "loss": 0.2408, "step": 3713 }, { "epoch": 2.81, "learning_rate": 3.245083207261725e-05, "loss": 0.2546, "step": 3714 }, { "epoch": 2.81, "learning_rate": 3.244610438729198e-05, "loss": 0.3368, "step": 3715 }, { "epoch": 2.81, "learning_rate": 3.244137670196672e-05, "loss": 0.3014, "step": 3716 }, { "epoch": 2.81, "learning_rate": 3.2436649016641454e-05, "loss": 0.2913, "step": 3717 }, { "epoch": 2.81, "learning_rate": 3.2431921331316186e-05, "loss": 0.2663, "step": 3718 }, { "epoch": 2.81, "learning_rate": 3.2427193645990925e-05, "loss": 0.2785, "step": 3719 }, { "epoch": 2.81, "learning_rate": 3.242246596066566e-05, "loss": 0.2751, "step": 3720 }, { "epoch": 2.81, "learning_rate": 3.241773827534039e-05, "loss": 0.3026, "step": 3721 }, { "epoch": 2.81, "learning_rate": 3.241301059001513e-05, "loss": 0.2843, "step": 3722 }, { "epoch": 2.82, "learning_rate": 3.240828290468986e-05, "loss": 0.241, "step": 3723 }, { "epoch": 2.82, "learning_rate": 3.24035552193646e-05, "loss": 0.3097, "step": 3724 }, { "epoch": 2.82, "learning_rate": 3.239882753403933e-05, "loss": 0.2872, "step": 3725 }, { "epoch": 2.82, "learning_rate": 3.239409984871407e-05, "loss": 0.3163, "step": 3726 }, { "epoch": 2.82, "learning_rate": 3.23893721633888e-05, "loss": 0.2614, "step": 3727 }, { "epoch": 2.82, "learning_rate": 3.238464447806354e-05, "loss": 0.2641, "step": 3728 }, { "epoch": 2.82, "learning_rate": 3.237991679273828e-05, "loss": 0.2686, "step": 3729 }, { "epoch": 2.82, "learning_rate": 3.237518910741301e-05, "loss": 0.2643, "step": 3730 }, { "epoch": 2.82, "learning_rate": 3.237046142208775e-05, "loss": 0.2702, "step": 3731 }, { "epoch": 2.82, "learning_rate": 3.236573373676248e-05, "loss": 0.2895, "step": 3732 }, { "epoch": 2.82, "learning_rate": 3.236100605143722e-05, "loss": 0.3041, "step": 3733 }, { "epoch": 2.82, "learning_rate": 3.2356278366111953e-05, "loss": 0.3267, "step": 3734 }, { "epoch": 2.82, "learning_rate": 3.235155068078669e-05, "loss": 0.2499, "step": 3735 }, { "epoch": 2.83, "learning_rate": 3.2346822995461424e-05, "loss": 0.2766, "step": 3736 }, { "epoch": 2.83, "learning_rate": 3.234209531013616e-05, "loss": 0.2698, "step": 3737 }, { "epoch": 2.83, "learning_rate": 3.2337367624810895e-05, "loss": 0.345, "step": 3738 }, { "epoch": 2.83, "learning_rate": 3.2332639939485634e-05, "loss": 0.2824, "step": 3739 }, { "epoch": 2.83, "learning_rate": 3.2327912254160366e-05, "loss": 0.2737, "step": 3740 }, { "epoch": 2.83, "learning_rate": 3.2323184568835105e-05, "loss": 0.2909, "step": 3741 }, { "epoch": 2.83, "learning_rate": 3.231845688350984e-05, "loss": 0.2524, "step": 3742 }, { "epoch": 2.83, "learning_rate": 3.2313729198184576e-05, "loss": 0.2907, "step": 3743 }, { "epoch": 2.83, "learning_rate": 3.230900151285931e-05, "loss": 0.301, "step": 3744 }, { "epoch": 2.83, "learning_rate": 3.230427382753404e-05, "loss": 0.2487, "step": 3745 }, { "epoch": 2.83, "learning_rate": 3.229954614220877e-05, "loss": 0.2768, "step": 3746 }, { "epoch": 2.83, "learning_rate": 3.229481845688351e-05, "loss": 0.2639, "step": 3747 }, { "epoch": 2.83, "learning_rate": 3.229009077155824e-05, "loss": 0.308, "step": 3748 }, { "epoch": 2.84, "learning_rate": 3.228536308623298e-05, "loss": 0.2502, "step": 3749 }, { "epoch": 2.84, "learning_rate": 3.2280635400907714e-05, "loss": 0.3054, "step": 3750 }, { "epoch": 2.84, "learning_rate": 3.227590771558245e-05, "loss": 0.2442, "step": 3751 }, { "epoch": 2.84, "learning_rate": 3.2271180030257185e-05, "loss": 0.2999, "step": 3752 }, { "epoch": 2.84, "learning_rate": 3.2266452344931924e-05, "loss": 0.2658, "step": 3753 }, { "epoch": 2.84, "learning_rate": 3.2261724659606656e-05, "loss": 0.3336, "step": 3754 }, { "epoch": 2.84, "learning_rate": 3.2256996974281394e-05, "loss": 0.2831, "step": 3755 }, { "epoch": 2.84, "learning_rate": 3.2252269288956126e-05, "loss": 0.2616, "step": 3756 }, { "epoch": 2.84, "learning_rate": 3.2247541603630865e-05, "loss": 0.2921, "step": 3757 }, { "epoch": 2.84, "learning_rate": 3.22428139183056e-05, "loss": 0.261, "step": 3758 }, { "epoch": 2.84, "learning_rate": 3.2238086232980336e-05, "loss": 0.2606, "step": 3759 }, { "epoch": 2.84, "learning_rate": 3.223335854765507e-05, "loss": 0.2759, "step": 3760 }, { "epoch": 2.84, "learning_rate": 3.222863086232981e-05, "loss": 0.2651, "step": 3761 }, { "epoch": 2.85, "learning_rate": 3.222390317700454e-05, "loss": 0.2946, "step": 3762 }, { "epoch": 2.85, "learning_rate": 3.221917549167928e-05, "loss": 0.3133, "step": 3763 }, { "epoch": 2.85, "learning_rate": 3.221444780635401e-05, "loss": 0.2983, "step": 3764 }, { "epoch": 2.85, "learning_rate": 3.220972012102875e-05, "loss": 0.2358, "step": 3765 }, { "epoch": 2.85, "learning_rate": 3.220499243570348e-05, "loss": 0.2576, "step": 3766 }, { "epoch": 2.85, "learning_rate": 3.220026475037822e-05, "loss": 0.3063, "step": 3767 }, { "epoch": 2.85, "learning_rate": 3.219553706505295e-05, "loss": 0.3093, "step": 3768 }, { "epoch": 2.85, "learning_rate": 3.219080937972769e-05, "loss": 0.3209, "step": 3769 }, { "epoch": 2.85, "learning_rate": 3.218608169440242e-05, "loss": 0.2617, "step": 3770 }, { "epoch": 2.85, "learning_rate": 3.2181354009077155e-05, "loss": 0.285, "step": 3771 }, { "epoch": 2.85, "learning_rate": 3.2176626323751894e-05, "loss": 0.2711, "step": 3772 }, { "epoch": 2.85, "learning_rate": 3.2171898638426626e-05, "loss": 0.2571, "step": 3773 }, { "epoch": 2.85, "learning_rate": 3.216717095310136e-05, "loss": 0.3146, "step": 3774 }, { "epoch": 2.85, "learning_rate": 3.2162443267776097e-05, "loss": 0.2949, "step": 3775 }, { "epoch": 2.86, "learning_rate": 3.215771558245083e-05, "loss": 0.3022, "step": 3776 }, { "epoch": 2.86, "learning_rate": 3.215298789712557e-05, "loss": 0.2961, "step": 3777 }, { "epoch": 2.86, "learning_rate": 3.21482602118003e-05, "loss": 0.2853, "step": 3778 }, { "epoch": 2.86, "learning_rate": 3.214353252647504e-05, "loss": 0.2563, "step": 3779 }, { "epoch": 2.86, "learning_rate": 3.213880484114977e-05, "loss": 0.2685, "step": 3780 }, { "epoch": 2.86, "learning_rate": 3.213407715582451e-05, "loss": 0.2903, "step": 3781 }, { "epoch": 2.86, "learning_rate": 3.212934947049924e-05, "loss": 0.2942, "step": 3782 }, { "epoch": 2.86, "learning_rate": 3.212462178517398e-05, "loss": 0.2951, "step": 3783 }, { "epoch": 2.86, "learning_rate": 3.211989409984871e-05, "loss": 0.3334, "step": 3784 }, { "epoch": 2.86, "learning_rate": 3.211516641452345e-05, "loss": 0.2916, "step": 3785 }, { "epoch": 2.86, "learning_rate": 3.211043872919818e-05, "loss": 0.286, "step": 3786 }, { "epoch": 2.86, "learning_rate": 3.210571104387292e-05, "loss": 0.249, "step": 3787 }, { "epoch": 2.86, "learning_rate": 3.210098335854766e-05, "loss": 0.2981, "step": 3788 }, { "epoch": 2.87, "learning_rate": 3.209625567322239e-05, "loss": 0.268, "step": 3789 }, { "epoch": 2.87, "learning_rate": 3.209152798789713e-05, "loss": 0.3108, "step": 3790 }, { "epoch": 2.87, "learning_rate": 3.2086800302571864e-05, "loss": 0.3138, "step": 3791 }, { "epoch": 2.87, "learning_rate": 3.20820726172466e-05, "loss": 0.2681, "step": 3792 }, { "epoch": 2.87, "learning_rate": 3.2077344931921335e-05, "loss": 0.2663, "step": 3793 }, { "epoch": 2.87, "learning_rate": 3.2072617246596073e-05, "loss": 0.2669, "step": 3794 }, { "epoch": 2.87, "learning_rate": 3.2067889561270806e-05, "loss": 0.2403, "step": 3795 }, { "epoch": 2.87, "learning_rate": 3.2063161875945544e-05, "loss": 0.2849, "step": 3796 }, { "epoch": 2.87, "learning_rate": 3.2058434190620276e-05, "loss": 0.2785, "step": 3797 }, { "epoch": 2.87, "learning_rate": 3.205370650529501e-05, "loss": 0.254, "step": 3798 }, { "epoch": 2.87, "learning_rate": 3.204897881996974e-05, "loss": 0.2611, "step": 3799 }, { "epoch": 2.87, "learning_rate": 3.204425113464448e-05, "loss": 0.2609, "step": 3800 }, { "epoch": 2.87, "learning_rate": 3.203952344931921e-05, "loss": 0.2642, "step": 3801 }, { "epoch": 2.88, "learning_rate": 3.203479576399395e-05, "loss": 0.2987, "step": 3802 }, { "epoch": 2.88, "learning_rate": 3.203006807866868e-05, "loss": 0.2898, "step": 3803 }, { "epoch": 2.88, "learning_rate": 3.202534039334342e-05, "loss": 0.2928, "step": 3804 }, { "epoch": 2.88, "learning_rate": 3.202061270801815e-05, "loss": 0.3077, "step": 3805 }, { "epoch": 2.88, "learning_rate": 3.201588502269289e-05, "loss": 0.2754, "step": 3806 }, { "epoch": 2.88, "learning_rate": 3.2011157337367624e-05, "loss": 0.3463, "step": 3807 }, { "epoch": 2.88, "learning_rate": 3.200642965204236e-05, "loss": 0.2883, "step": 3808 }, { "epoch": 2.88, "learning_rate": 3.2001701966717095e-05, "loss": 0.2537, "step": 3809 }, { "epoch": 2.88, "learning_rate": 3.1996974281391834e-05, "loss": 0.2904, "step": 3810 }, { "epoch": 2.88, "learning_rate": 3.1992246596066566e-05, "loss": 0.2793, "step": 3811 }, { "epoch": 2.88, "learning_rate": 3.1987518910741305e-05, "loss": 0.2959, "step": 3812 }, { "epoch": 2.88, "learning_rate": 3.198279122541604e-05, "loss": 0.2902, "step": 3813 }, { "epoch": 2.88, "learning_rate": 3.1978063540090776e-05, "loss": 0.2944, "step": 3814 }, { "epoch": 2.89, "learning_rate": 3.197333585476551e-05, "loss": 0.3028, "step": 3815 }, { "epoch": 2.89, "learning_rate": 3.1968608169440247e-05, "loss": 0.301, "step": 3816 }, { "epoch": 2.89, "learning_rate": 3.196388048411498e-05, "loss": 0.3193, "step": 3817 }, { "epoch": 2.89, "learning_rate": 3.195915279878972e-05, "loss": 0.2852, "step": 3818 }, { "epoch": 2.89, "learning_rate": 3.195442511346445e-05, "loss": 0.259, "step": 3819 }, { "epoch": 2.89, "learning_rate": 3.194969742813919e-05, "loss": 0.2627, "step": 3820 }, { "epoch": 2.89, "learning_rate": 3.194496974281392e-05, "loss": 0.2559, "step": 3821 }, { "epoch": 2.89, "learning_rate": 3.194024205748866e-05, "loss": 0.3083, "step": 3822 }, { "epoch": 2.89, "learning_rate": 3.193551437216339e-05, "loss": 0.2617, "step": 3823 }, { "epoch": 2.89, "learning_rate": 3.193078668683812e-05, "loss": 0.242, "step": 3824 }, { "epoch": 2.89, "learning_rate": 3.192605900151286e-05, "loss": 0.2899, "step": 3825 }, { "epoch": 2.89, "learning_rate": 3.1921331316187594e-05, "loss": 0.3157, "step": 3826 }, { "epoch": 2.89, "learning_rate": 3.1916603630862326e-05, "loss": 0.2334, "step": 3827 }, { "epoch": 2.9, "learning_rate": 3.1911875945537065e-05, "loss": 0.2851, "step": 3828 }, { "epoch": 2.9, "learning_rate": 3.19071482602118e-05, "loss": 0.2446, "step": 3829 }, { "epoch": 2.9, "learning_rate": 3.1902420574886536e-05, "loss": 0.317, "step": 3830 }, { "epoch": 2.9, "learning_rate": 3.189769288956127e-05, "loss": 0.2915, "step": 3831 }, { "epoch": 2.9, "learning_rate": 3.189296520423601e-05, "loss": 0.2845, "step": 3832 }, { "epoch": 2.9, "learning_rate": 3.188823751891074e-05, "loss": 0.2603, "step": 3833 }, { "epoch": 2.9, "learning_rate": 3.188350983358548e-05, "loss": 0.293, "step": 3834 }, { "epoch": 2.9, "learning_rate": 3.187878214826021e-05, "loss": 0.2962, "step": 3835 }, { "epoch": 2.9, "learning_rate": 3.187405446293495e-05, "loss": 0.274, "step": 3836 }, { "epoch": 2.9, "learning_rate": 3.186932677760968e-05, "loss": 0.3088, "step": 3837 }, { "epoch": 2.9, "learning_rate": 3.186459909228442e-05, "loss": 0.3055, "step": 3838 }, { "epoch": 2.9, "learning_rate": 3.185987140695915e-05, "loss": 0.2813, "step": 3839 }, { "epoch": 2.9, "learning_rate": 3.185514372163389e-05, "loss": 0.2852, "step": 3840 }, { "epoch": 2.9, "learning_rate": 3.185041603630862e-05, "loss": 0.2654, "step": 3841 }, { "epoch": 2.91, "learning_rate": 3.184568835098336e-05, "loss": 0.3376, "step": 3842 }, { "epoch": 2.91, "learning_rate": 3.1840960665658093e-05, "loss": 0.2863, "step": 3843 }, { "epoch": 2.91, "learning_rate": 3.183623298033283e-05, "loss": 0.2575, "step": 3844 }, { "epoch": 2.91, "learning_rate": 3.1831505295007564e-05, "loss": 0.2725, "step": 3845 }, { "epoch": 2.91, "learning_rate": 3.18267776096823e-05, "loss": 0.2508, "step": 3846 }, { "epoch": 2.91, "learning_rate": 3.1822049924357035e-05, "loss": 0.2858, "step": 3847 }, { "epoch": 2.91, "learning_rate": 3.1817322239031774e-05, "loss": 0.3387, "step": 3848 }, { "epoch": 2.91, "learning_rate": 3.181259455370651e-05, "loss": 0.285, "step": 3849 }, { "epoch": 2.91, "learning_rate": 3.1807866868381245e-05, "loss": 0.2854, "step": 3850 }, { "epoch": 2.91, "learning_rate": 3.180313918305598e-05, "loss": 0.2866, "step": 3851 }, { "epoch": 2.91, "learning_rate": 3.179841149773071e-05, "loss": 0.2791, "step": 3852 }, { "epoch": 2.91, "learning_rate": 3.179368381240545e-05, "loss": 0.2901, "step": 3853 }, { "epoch": 2.91, "learning_rate": 3.178895612708018e-05, "loss": 0.2848, "step": 3854 }, { "epoch": 2.92, "learning_rate": 3.178422844175492e-05, "loss": 0.3223, "step": 3855 }, { "epoch": 2.92, "learning_rate": 3.177950075642965e-05, "loss": 0.3193, "step": 3856 }, { "epoch": 2.92, "learning_rate": 3.177477307110439e-05, "loss": 0.2607, "step": 3857 }, { "epoch": 2.92, "learning_rate": 3.177004538577912e-05, "loss": 0.2647, "step": 3858 }, { "epoch": 2.92, "learning_rate": 3.176531770045386e-05, "loss": 0.2753, "step": 3859 }, { "epoch": 2.92, "learning_rate": 3.176059001512859e-05, "loss": 0.2926, "step": 3860 }, { "epoch": 2.92, "learning_rate": 3.175586232980333e-05, "loss": 0.2905, "step": 3861 }, { "epoch": 2.92, "learning_rate": 3.1751134644478064e-05, "loss": 0.2779, "step": 3862 }, { "epoch": 2.92, "learning_rate": 3.17464069591528e-05, "loss": 0.2576, "step": 3863 }, { "epoch": 2.92, "learning_rate": 3.1741679273827534e-05, "loss": 0.2933, "step": 3864 }, { "epoch": 2.92, "learning_rate": 3.173695158850227e-05, "loss": 0.324, "step": 3865 }, { "epoch": 2.92, "learning_rate": 3.1732223903177005e-05, "loss": 0.3019, "step": 3866 }, { "epoch": 2.92, "learning_rate": 3.1727496217851744e-05, "loss": 0.3011, "step": 3867 }, { "epoch": 2.93, "learning_rate": 3.1722768532526476e-05, "loss": 0.2816, "step": 3868 }, { "epoch": 2.93, "learning_rate": 3.1718040847201215e-05, "loss": 0.3229, "step": 3869 }, { "epoch": 2.93, "learning_rate": 3.171331316187595e-05, "loss": 0.2683, "step": 3870 }, { "epoch": 2.93, "learning_rate": 3.1708585476550686e-05, "loss": 0.2568, "step": 3871 }, { "epoch": 2.93, "learning_rate": 3.170385779122542e-05, "loss": 0.3055, "step": 3872 }, { "epoch": 2.93, "learning_rate": 3.169913010590016e-05, "loss": 0.2359, "step": 3873 }, { "epoch": 2.93, "learning_rate": 3.169440242057489e-05, "loss": 0.3171, "step": 3874 }, { "epoch": 2.93, "learning_rate": 3.168967473524963e-05, "loss": 0.2705, "step": 3875 }, { "epoch": 2.93, "learning_rate": 3.168494704992436e-05, "loss": 0.3116, "step": 3876 }, { "epoch": 2.93, "learning_rate": 3.168021936459909e-05, "loss": 0.309, "step": 3877 }, { "epoch": 2.93, "learning_rate": 3.167549167927383e-05, "loss": 0.2858, "step": 3878 }, { "epoch": 2.93, "learning_rate": 3.167076399394856e-05, "loss": 0.279, "step": 3879 }, { "epoch": 2.93, "learning_rate": 3.1666036308623295e-05, "loss": 0.2878, "step": 3880 }, { "epoch": 2.94, "learning_rate": 3.1661308623298034e-05, "loss": 0.331, "step": 3881 }, { "epoch": 2.94, "learning_rate": 3.1656580937972766e-05, "loss": 0.271, "step": 3882 }, { "epoch": 2.94, "learning_rate": 3.1651853252647505e-05, "loss": 0.2794, "step": 3883 }, { "epoch": 2.94, "learning_rate": 3.164712556732224e-05, "loss": 0.2743, "step": 3884 }, { "epoch": 2.94, "learning_rate": 3.1642397881996975e-05, "loss": 0.2768, "step": 3885 }, { "epoch": 2.94, "learning_rate": 3.163767019667171e-05, "loss": 0.3396, "step": 3886 }, { "epoch": 2.94, "learning_rate": 3.1632942511346446e-05, "loss": 0.2703, "step": 3887 }, { "epoch": 2.94, "learning_rate": 3.162821482602118e-05, "loss": 0.2621, "step": 3888 }, { "epoch": 2.94, "learning_rate": 3.162348714069592e-05, "loss": 0.2749, "step": 3889 }, { "epoch": 2.94, "learning_rate": 3.161875945537065e-05, "loss": 0.2664, "step": 3890 }, { "epoch": 2.94, "learning_rate": 3.161403177004539e-05, "loss": 0.2693, "step": 3891 }, { "epoch": 2.94, "learning_rate": 3.160930408472012e-05, "loss": 0.3094, "step": 3892 }, { "epoch": 2.94, "learning_rate": 3.160457639939486e-05, "loss": 0.2628, "step": 3893 }, { "epoch": 2.94, "learning_rate": 3.159984871406959e-05, "loss": 0.2859, "step": 3894 }, { "epoch": 2.95, "learning_rate": 3.159512102874433e-05, "loss": 0.2498, "step": 3895 }, { "epoch": 2.95, "learning_rate": 3.159039334341906e-05, "loss": 0.2648, "step": 3896 }, { "epoch": 2.95, "learning_rate": 3.15856656580938e-05, "loss": 0.2626, "step": 3897 }, { "epoch": 2.95, "learning_rate": 3.158093797276853e-05, "loss": 0.2626, "step": 3898 }, { "epoch": 2.95, "learning_rate": 3.157621028744327e-05, "loss": 0.2823, "step": 3899 }, { "epoch": 2.95, "learning_rate": 3.1571482602118004e-05, "loss": 0.2875, "step": 3900 }, { "epoch": 2.95, "learning_rate": 3.156675491679274e-05, "loss": 0.2645, "step": 3901 }, { "epoch": 2.95, "learning_rate": 3.1562027231467475e-05, "loss": 0.2387, "step": 3902 }, { "epoch": 2.95, "learning_rate": 3.1557299546142214e-05, "loss": 0.2871, "step": 3903 }, { "epoch": 2.95, "learning_rate": 3.1552571860816946e-05, "loss": 0.2629, "step": 3904 }, { "epoch": 2.95, "learning_rate": 3.154784417549168e-05, "loss": 0.2904, "step": 3905 }, { "epoch": 2.95, "learning_rate": 3.1543116490166416e-05, "loss": 0.2927, "step": 3906 }, { "epoch": 2.95, "learning_rate": 3.153838880484115e-05, "loss": 0.2911, "step": 3907 }, { "epoch": 2.96, "learning_rate": 3.153366111951589e-05, "loss": 0.2603, "step": 3908 }, { "epoch": 2.96, "learning_rate": 3.152893343419062e-05, "loss": 0.3057, "step": 3909 }, { "epoch": 2.96, "learning_rate": 3.152420574886536e-05, "loss": 0.2596, "step": 3910 }, { "epoch": 2.96, "learning_rate": 3.151947806354009e-05, "loss": 0.3245, "step": 3911 }, { "epoch": 2.96, "learning_rate": 3.151475037821483e-05, "loss": 0.331, "step": 3912 }, { "epoch": 2.96, "learning_rate": 3.151002269288956e-05, "loss": 0.2783, "step": 3913 }, { "epoch": 2.96, "learning_rate": 3.15052950075643e-05, "loss": 0.288, "step": 3914 }, { "epoch": 2.96, "learning_rate": 3.150056732223903e-05, "loss": 0.2839, "step": 3915 }, { "epoch": 2.96, "learning_rate": 3.149583963691377e-05, "loss": 0.2516, "step": 3916 }, { "epoch": 2.96, "learning_rate": 3.14911119515885e-05, "loss": 0.2451, "step": 3917 }, { "epoch": 2.96, "learning_rate": 3.148638426626324e-05, "loss": 0.2436, "step": 3918 }, { "epoch": 2.96, "learning_rate": 3.1481656580937974e-05, "loss": 0.2616, "step": 3919 }, { "epoch": 2.96, "learning_rate": 3.147692889561271e-05, "loss": 0.283, "step": 3920 }, { "epoch": 2.97, "learning_rate": 3.1472201210287445e-05, "loss": 0.3107, "step": 3921 }, { "epoch": 2.97, "learning_rate": 3.1467473524962184e-05, "loss": 0.2777, "step": 3922 }, { "epoch": 2.97, "learning_rate": 3.1462745839636916e-05, "loss": 0.2874, "step": 3923 }, { "epoch": 2.97, "learning_rate": 3.1458018154311655e-05, "loss": 0.2929, "step": 3924 }, { "epoch": 2.97, "learning_rate": 3.1453290468986387e-05, "loss": 0.2687, "step": 3925 }, { "epoch": 2.97, "learning_rate": 3.1448562783661125e-05, "loss": 0.2761, "step": 3926 }, { "epoch": 2.97, "learning_rate": 3.144383509833586e-05, "loss": 0.2549, "step": 3927 }, { "epoch": 2.97, "learning_rate": 3.1439107413010596e-05, "loss": 0.2485, "step": 3928 }, { "epoch": 2.97, "learning_rate": 3.143437972768533e-05, "loss": 0.3061, "step": 3929 }, { "epoch": 2.97, "learning_rate": 3.142965204236006e-05, "loss": 0.2946, "step": 3930 }, { "epoch": 2.97, "learning_rate": 3.14249243570348e-05, "loss": 0.275, "step": 3931 }, { "epoch": 2.97, "learning_rate": 3.142019667170953e-05, "loss": 0.2895, "step": 3932 }, { "epoch": 2.97, "learning_rate": 3.141546898638426e-05, "loss": 0.3085, "step": 3933 }, { "epoch": 2.98, "learning_rate": 3.1410741301059e-05, "loss": 0.2572, "step": 3934 }, { "epoch": 2.98, "learning_rate": 3.1406013615733734e-05, "loss": 0.2634, "step": 3935 }, { "epoch": 2.98, "learning_rate": 3.140128593040847e-05, "loss": 0.326, "step": 3936 }, { "epoch": 2.98, "learning_rate": 3.1396558245083205e-05, "loss": 0.285, "step": 3937 }, { "epoch": 2.98, "learning_rate": 3.1391830559757944e-05, "loss": 0.2885, "step": 3938 }, { "epoch": 2.98, "learning_rate": 3.1387102874432676e-05, "loss": 0.2491, "step": 3939 }, { "epoch": 2.98, "learning_rate": 3.1382375189107415e-05, "loss": 0.2789, "step": 3940 }, { "epoch": 2.98, "learning_rate": 3.137764750378215e-05, "loss": 0.3107, "step": 3941 }, { "epoch": 2.98, "learning_rate": 3.1372919818456886e-05, "loss": 0.3228, "step": 3942 }, { "epoch": 2.98, "learning_rate": 3.136819213313162e-05, "loss": 0.3204, "step": 3943 }, { "epoch": 2.98, "learning_rate": 3.136346444780636e-05, "loss": 0.2941, "step": 3944 }, { "epoch": 2.98, "learning_rate": 3.135873676248109e-05, "loss": 0.2544, "step": 3945 }, { "epoch": 2.98, "learning_rate": 3.135400907715583e-05, "loss": 0.3144, "step": 3946 }, { "epoch": 2.99, "learning_rate": 3.134928139183056e-05, "loss": 0.3119, "step": 3947 }, { "epoch": 2.99, "learning_rate": 3.13445537065053e-05, "loss": 0.2462, "step": 3948 }, { "epoch": 2.99, "learning_rate": 3.133982602118003e-05, "loss": 0.2995, "step": 3949 }, { "epoch": 2.99, "learning_rate": 3.133509833585477e-05, "loss": 0.3033, "step": 3950 }, { "epoch": 2.99, "learning_rate": 3.13303706505295e-05, "loss": 0.2512, "step": 3951 }, { "epoch": 2.99, "learning_rate": 3.132564296520424e-05, "loss": 0.3702, "step": 3952 }, { "epoch": 2.99, "learning_rate": 3.132091527987897e-05, "loss": 0.2882, "step": 3953 }, { "epoch": 2.99, "learning_rate": 3.131618759455371e-05, "loss": 0.2686, "step": 3954 }, { "epoch": 2.99, "learning_rate": 3.131145990922844e-05, "loss": 0.2786, "step": 3955 }, { "epoch": 2.99, "learning_rate": 3.130673222390318e-05, "loss": 0.2927, "step": 3956 }, { "epoch": 2.99, "learning_rate": 3.1302004538577914e-05, "loss": 0.2921, "step": 3957 }, { "epoch": 2.99, "learning_rate": 3.1297276853252646e-05, "loss": 0.2736, "step": 3958 }, { "epoch": 2.99, "learning_rate": 3.1292549167927385e-05, "loss": 0.2553, "step": 3959 }, { "epoch": 2.99, "learning_rate": 3.128782148260212e-05, "loss": 0.258, "step": 3960 }, { "epoch": 3.0, "learning_rate": 3.128309379727685e-05, "loss": 0.2985, "step": 3961 }, { "epoch": 3.0, "learning_rate": 3.127836611195159e-05, "loss": 0.2827, "step": 3962 }, { "epoch": 3.0, "learning_rate": 3.127363842662632e-05, "loss": 0.2906, "step": 3963 }, { "epoch": 3.0, "learning_rate": 3.126891074130106e-05, "loss": 0.2911, "step": 3964 }, { "epoch": 3.0, "learning_rate": 3.126418305597579e-05, "loss": 0.2498, "step": 3965 }, { "epoch": 3.0, "learning_rate": 3.125945537065053e-05, "loss": 0.3225, "step": 3966 }, { "epoch": 3.0, "learning_rate": 3.125472768532526e-05, "loss": 0.2241, "step": 3967 }, { "epoch": 3.0, "learning_rate": 3.125e-05, "loss": 0.1596, "step": 3968 }, { "epoch": 3.0, "learning_rate": 3.124527231467474e-05, "loss": 0.125, "step": 3969 }, { "epoch": 3.0, "learning_rate": 3.124054462934947e-05, "loss": 0.1135, "step": 3970 }, { "epoch": 3.0, "learning_rate": 3.123581694402421e-05, "loss": 0.1149, "step": 3971 }, { "epoch": 3.0, "learning_rate": 3.123108925869894e-05, "loss": 0.1435, "step": 3972 }, { "epoch": 3.0, "learning_rate": 3.122636157337368e-05, "loss": 0.1178, "step": 3973 }, { "epoch": 3.01, "learning_rate": 3.122163388804841e-05, "loss": 0.1529, "step": 3974 }, { "epoch": 3.01, "learning_rate": 3.121690620272315e-05, "loss": 0.1819, "step": 3975 }, { "epoch": 3.01, "learning_rate": 3.1212178517397884e-05, "loss": 0.1347, "step": 3976 }, { "epoch": 3.01, "learning_rate": 3.120745083207262e-05, "loss": 0.1363, "step": 3977 }, { "epoch": 3.01, "learning_rate": 3.1202723146747355e-05, "loss": 0.1192, "step": 3978 }, { "epoch": 3.01, "learning_rate": 3.1197995461422094e-05, "loss": 0.1259, "step": 3979 }, { "epoch": 3.01, "learning_rate": 3.1193267776096826e-05, "loss": 0.1672, "step": 3980 }, { "epoch": 3.01, "learning_rate": 3.1188540090771565e-05, "loss": 0.1483, "step": 3981 }, { "epoch": 3.01, "learning_rate": 3.11838124054463e-05, "loss": 0.1281, "step": 3982 }, { "epoch": 3.01, "learning_rate": 3.117908472012103e-05, "loss": 0.1283, "step": 3983 }, { "epoch": 3.01, "learning_rate": 3.117435703479577e-05, "loss": 0.1334, "step": 3984 }, { "epoch": 3.01, "learning_rate": 3.11696293494705e-05, "loss": 0.1304, "step": 3985 }, { "epoch": 3.01, "learning_rate": 3.116490166414523e-05, "loss": 0.1258, "step": 3986 }, { "epoch": 3.02, "learning_rate": 3.116017397881997e-05, "loss": 0.1168, "step": 3987 }, { "epoch": 3.02, "learning_rate": 3.11554462934947e-05, "loss": 0.1222, "step": 3988 }, { "epoch": 3.02, "learning_rate": 3.115071860816944e-05, "loss": 0.1614, "step": 3989 }, { "epoch": 3.02, "learning_rate": 3.1145990922844174e-05, "loss": 0.1417, "step": 3990 }, { "epoch": 3.02, "learning_rate": 3.114126323751891e-05, "loss": 0.1195, "step": 3991 }, { "epoch": 3.02, "learning_rate": 3.1136535552193645e-05, "loss": 0.1341, "step": 3992 }, { "epoch": 3.02, "learning_rate": 3.1131807866868383e-05, "loss": 0.173, "step": 3993 }, { "epoch": 3.02, "learning_rate": 3.1127080181543116e-05, "loss": 0.1593, "step": 3994 }, { "epoch": 3.02, "learning_rate": 3.1122352496217854e-05, "loss": 0.1328, "step": 3995 }, { "epoch": 3.02, "learning_rate": 3.1117624810892586e-05, "loss": 0.1204, "step": 3996 }, { "epoch": 3.02, "learning_rate": 3.1112897125567325e-05, "loss": 0.1444, "step": 3997 }, { "epoch": 3.02, "learning_rate": 3.110816944024206e-05, "loss": 0.1369, "step": 3998 }, { "epoch": 3.02, "learning_rate": 3.1103441754916796e-05, "loss": 0.153, "step": 3999 }, { "epoch": 3.03, "learning_rate": 3.109871406959153e-05, "loss": 0.1313, "step": 4000 }, { "epoch": 3.03, "learning_rate": 3.109398638426627e-05, "loss": 0.145, "step": 4001 }, { "epoch": 3.03, "learning_rate": 3.1089258698941e-05, "loss": 0.1524, "step": 4002 }, { "epoch": 3.03, "learning_rate": 3.108453101361574e-05, "loss": 0.1493, "step": 4003 }, { "epoch": 3.03, "learning_rate": 3.107980332829047e-05, "loss": 0.1798, "step": 4004 }, { "epoch": 3.03, "learning_rate": 3.107507564296521e-05, "loss": 0.1464, "step": 4005 }, { "epoch": 3.03, "learning_rate": 3.107034795763994e-05, "loss": 0.121, "step": 4006 }, { "epoch": 3.03, "learning_rate": 3.106562027231468e-05, "loss": 0.1412, "step": 4007 }, { "epoch": 3.03, "learning_rate": 3.106089258698941e-05, "loss": 0.1482, "step": 4008 }, { "epoch": 3.03, "learning_rate": 3.105616490166415e-05, "loss": 0.1408, "step": 4009 }, { "epoch": 3.03, "learning_rate": 3.105143721633888e-05, "loss": 0.1403, "step": 4010 }, { "epoch": 3.03, "learning_rate": 3.1046709531013615e-05, "loss": 0.141, "step": 4011 }, { "epoch": 3.03, "learning_rate": 3.1041981845688354e-05, "loss": 0.1362, "step": 4012 }, { "epoch": 3.03, "learning_rate": 3.1037254160363086e-05, "loss": 0.1258, "step": 4013 }, { "epoch": 3.04, "learning_rate": 3.103252647503782e-05, "loss": 0.139, "step": 4014 }, { "epoch": 3.04, "learning_rate": 3.1027798789712557e-05, "loss": 0.1629, "step": 4015 }, { "epoch": 3.04, "learning_rate": 3.102307110438729e-05, "loss": 0.1366, "step": 4016 }, { "epoch": 3.04, "learning_rate": 3.101834341906203e-05, "loss": 0.145, "step": 4017 }, { "epoch": 3.04, "learning_rate": 3.101361573373676e-05, "loss": 0.1228, "step": 4018 }, { "epoch": 3.04, "learning_rate": 3.10088880484115e-05, "loss": 0.1353, "step": 4019 }, { "epoch": 3.04, "learning_rate": 3.100416036308623e-05, "loss": 0.1179, "step": 4020 }, { "epoch": 3.04, "learning_rate": 3.099943267776097e-05, "loss": 0.1462, "step": 4021 }, { "epoch": 3.04, "learning_rate": 3.09947049924357e-05, "loss": 0.1585, "step": 4022 }, { "epoch": 3.04, "learning_rate": 3.098997730711044e-05, "loss": 0.1425, "step": 4023 }, { "epoch": 3.04, "learning_rate": 3.098524962178517e-05, "loss": 0.1392, "step": 4024 }, { "epoch": 3.04, "learning_rate": 3.098052193645991e-05, "loss": 0.1365, "step": 4025 }, { "epoch": 3.04, "learning_rate": 3.097579425113464e-05, "loss": 0.1454, "step": 4026 }, { "epoch": 3.05, "learning_rate": 3.097106656580938e-05, "loss": 0.1553, "step": 4027 }, { "epoch": 3.05, "learning_rate": 3.0966338880484114e-05, "loss": 0.1386, "step": 4028 }, { "epoch": 3.05, "learning_rate": 3.096161119515885e-05, "loss": 0.1417, "step": 4029 }, { "epoch": 3.05, "learning_rate": 3.095688350983359e-05, "loss": 0.1605, "step": 4030 }, { "epoch": 3.05, "learning_rate": 3.0952155824508324e-05, "loss": 0.1488, "step": 4031 }, { "epoch": 3.05, "learning_rate": 3.094742813918306e-05, "loss": 0.1305, "step": 4032 }, { "epoch": 3.05, "learning_rate": 3.0942700453857795e-05, "loss": 0.124, "step": 4033 }, { "epoch": 3.05, "learning_rate": 3.0937972768532533e-05, "loss": 0.1296, "step": 4034 }, { "epoch": 3.05, "learning_rate": 3.0933245083207265e-05, "loss": 0.1308, "step": 4035 }, { "epoch": 3.05, "learning_rate": 3.0928517397882e-05, "loss": 0.1391, "step": 4036 }, { "epoch": 3.05, "learning_rate": 3.0923789712556736e-05, "loss": 0.1413, "step": 4037 }, { "epoch": 3.05, "learning_rate": 3.091906202723147e-05, "loss": 0.1326, "step": 4038 }, { "epoch": 3.05, "learning_rate": 3.09143343419062e-05, "loss": 0.1331, "step": 4039 }, { "epoch": 3.06, "learning_rate": 3.090960665658094e-05, "loss": 0.1143, "step": 4040 }, { "epoch": 3.06, "learning_rate": 3.090487897125567e-05, "loss": 0.1395, "step": 4041 }, { "epoch": 3.06, "learning_rate": 3.090015128593041e-05, "loss": 0.1414, "step": 4042 }, { "epoch": 3.06, "learning_rate": 3.089542360060514e-05, "loss": 0.143, "step": 4043 }, { "epoch": 3.06, "learning_rate": 3.089069591527988e-05, "loss": 0.1786, "step": 4044 }, { "epoch": 3.06, "learning_rate": 3.088596822995461e-05, "loss": 0.1577, "step": 4045 }, { "epoch": 3.06, "learning_rate": 3.088124054462935e-05, "loss": 0.1461, "step": 4046 }, { "epoch": 3.06, "learning_rate": 3.0876512859304084e-05, "loss": 0.1565, "step": 4047 }, { "epoch": 3.06, "learning_rate": 3.087178517397882e-05, "loss": 0.124, "step": 4048 }, { "epoch": 3.06, "learning_rate": 3.0867057488653555e-05, "loss": 0.1344, "step": 4049 }, { "epoch": 3.06, "learning_rate": 3.0862329803328294e-05, "loss": 0.1242, "step": 4050 }, { "epoch": 3.06, "learning_rate": 3.0857602118003026e-05, "loss": 0.1473, "step": 4051 }, { "epoch": 3.06, "learning_rate": 3.0852874432677765e-05, "loss": 0.1327, "step": 4052 }, { "epoch": 3.07, "learning_rate": 3.08481467473525e-05, "loss": 0.1278, "step": 4053 }, { "epoch": 3.07, "learning_rate": 3.0843419062027236e-05, "loss": 0.1881, "step": 4054 }, { "epoch": 3.07, "learning_rate": 3.083869137670197e-05, "loss": 0.1361, "step": 4055 }, { "epoch": 3.07, "learning_rate": 3.0833963691376706e-05, "loss": 0.1596, "step": 4056 }, { "epoch": 3.07, "learning_rate": 3.082923600605144e-05, "loss": 0.1499, "step": 4057 }, { "epoch": 3.07, "learning_rate": 3.082450832072618e-05, "loss": 0.1542, "step": 4058 }, { "epoch": 3.07, "learning_rate": 3.081978063540091e-05, "loss": 0.1304, "step": 4059 }, { "epoch": 3.07, "learning_rate": 3.081505295007565e-05, "loss": 0.1679, "step": 4060 }, { "epoch": 3.07, "learning_rate": 3.081032526475038e-05, "loss": 0.1403, "step": 4061 }, { "epoch": 3.07, "learning_rate": 3.080559757942512e-05, "loss": 0.1525, "step": 4062 }, { "epoch": 3.07, "learning_rate": 3.080086989409985e-05, "loss": 0.1277, "step": 4063 }, { "epoch": 3.07, "learning_rate": 3.079614220877458e-05, "loss": 0.1583, "step": 4064 }, { "epoch": 3.07, "learning_rate": 3.079141452344932e-05, "loss": 0.1456, "step": 4065 }, { "epoch": 3.08, "learning_rate": 3.0786686838124054e-05, "loss": 0.1305, "step": 4066 }, { "epoch": 3.08, "learning_rate": 3.0781959152798786e-05, "loss": 0.1313, "step": 4067 }, { "epoch": 3.08, "learning_rate": 3.0777231467473525e-05, "loss": 0.1525, "step": 4068 }, { "epoch": 3.08, "learning_rate": 3.077250378214826e-05, "loss": 0.143, "step": 4069 }, { "epoch": 3.08, "learning_rate": 3.0767776096822996e-05, "loss": 0.1411, "step": 4070 }, { "epoch": 3.08, "learning_rate": 3.076304841149773e-05, "loss": 0.1437, "step": 4071 }, { "epoch": 3.08, "learning_rate": 3.075832072617247e-05, "loss": 0.1557, "step": 4072 }, { "epoch": 3.08, "learning_rate": 3.07535930408472e-05, "loss": 0.1527, "step": 4073 }, { "epoch": 3.08, "learning_rate": 3.074886535552194e-05, "loss": 0.1248, "step": 4074 }, { "epoch": 3.08, "learning_rate": 3.074413767019667e-05, "loss": 0.1437, "step": 4075 }, { "epoch": 3.08, "learning_rate": 3.073940998487141e-05, "loss": 0.1454, "step": 4076 }, { "epoch": 3.08, "learning_rate": 3.073468229954614e-05, "loss": 0.1384, "step": 4077 }, { "epoch": 3.08, "learning_rate": 3.072995461422088e-05, "loss": 0.1384, "step": 4078 }, { "epoch": 3.08, "learning_rate": 3.072522692889561e-05, "loss": 0.1457, "step": 4079 }, { "epoch": 3.09, "learning_rate": 3.072049924357035e-05, "loss": 0.1488, "step": 4080 }, { "epoch": 3.09, "learning_rate": 3.071577155824508e-05, "loss": 0.1569, "step": 4081 }, { "epoch": 3.09, "learning_rate": 3.071104387291982e-05, "loss": 0.1542, "step": 4082 }, { "epoch": 3.09, "learning_rate": 3.070631618759455e-05, "loss": 0.1239, "step": 4083 }, { "epoch": 3.09, "learning_rate": 3.070158850226929e-05, "loss": 0.1587, "step": 4084 }, { "epoch": 3.09, "learning_rate": 3.0696860816944024e-05, "loss": 0.1437, "step": 4085 }, { "epoch": 3.09, "learning_rate": 3.069213313161876e-05, "loss": 0.1494, "step": 4086 }, { "epoch": 3.09, "learning_rate": 3.0687405446293495e-05, "loss": 0.1436, "step": 4087 }, { "epoch": 3.09, "learning_rate": 3.0682677760968234e-05, "loss": 0.1531, "step": 4088 }, { "epoch": 3.09, "learning_rate": 3.0677950075642966e-05, "loss": 0.1274, "step": 4089 }, { "epoch": 3.09, "learning_rate": 3.0673222390317705e-05, "loss": 0.1545, "step": 4090 }, { "epoch": 3.09, "learning_rate": 3.066849470499244e-05, "loss": 0.1412, "step": 4091 }, { "epoch": 3.09, "learning_rate": 3.066376701966717e-05, "loss": 0.1503, "step": 4092 }, { "epoch": 3.1, "learning_rate": 3.065903933434191e-05, "loss": 0.1337, "step": 4093 }, { "epoch": 3.1, "learning_rate": 3.065431164901664e-05, "loss": 0.138, "step": 4094 }, { "epoch": 3.1, "learning_rate": 3.064958396369138e-05, "loss": 0.1283, "step": 4095 }, { "epoch": 3.1, "learning_rate": 3.064485627836611e-05, "loss": 0.1419, "step": 4096 }, { "epoch": 3.1, "learning_rate": 3.064012859304085e-05, "loss": 0.1452, "step": 4097 }, { "epoch": 3.1, "learning_rate": 3.063540090771558e-05, "loss": 0.1615, "step": 4098 }, { "epoch": 3.1, "learning_rate": 3.063067322239032e-05, "loss": 0.155, "step": 4099 }, { "epoch": 3.1, "learning_rate": 3.062594553706505e-05, "loss": 0.1473, "step": 4100 }, { "epoch": 3.1, "learning_rate": 3.062121785173979e-05, "loss": 0.1376, "step": 4101 }, { "epoch": 3.1, "learning_rate": 3.0616490166414523e-05, "loss": 0.1374, "step": 4102 }, { "epoch": 3.1, "learning_rate": 3.061176248108926e-05, "loss": 0.1587, "step": 4103 }, { "epoch": 3.1, "learning_rate": 3.0607034795763994e-05, "loss": 0.1179, "step": 4104 }, { "epoch": 3.1, "learning_rate": 3.060230711043873e-05, "loss": 0.1748, "step": 4105 }, { "epoch": 3.11, "learning_rate": 3.0597579425113465e-05, "loss": 0.1546, "step": 4106 }, { "epoch": 3.11, "learning_rate": 3.0592851739788204e-05, "loss": 0.1381, "step": 4107 }, { "epoch": 3.11, "learning_rate": 3.0588124054462936e-05, "loss": 0.1566, "step": 4108 }, { "epoch": 3.11, "learning_rate": 3.0583396369137675e-05, "loss": 0.1767, "step": 4109 }, { "epoch": 3.11, "learning_rate": 3.057866868381241e-05, "loss": 0.1501, "step": 4110 }, { "epoch": 3.11, "learning_rate": 3.0573940998487146e-05, "loss": 0.1206, "step": 4111 }, { "epoch": 3.11, "learning_rate": 3.056921331316188e-05, "loss": 0.1351, "step": 4112 }, { "epoch": 3.11, "learning_rate": 3.056448562783662e-05, "loss": 0.1429, "step": 4113 }, { "epoch": 3.11, "learning_rate": 3.055975794251135e-05, "loss": 0.13, "step": 4114 }, { "epoch": 3.11, "learning_rate": 3.055503025718609e-05, "loss": 0.1105, "step": 4115 }, { "epoch": 3.11, "learning_rate": 3.055030257186082e-05, "loss": 0.1571, "step": 4116 }, { "epoch": 3.11, "learning_rate": 3.054557488653555e-05, "loss": 0.1211, "step": 4117 }, { "epoch": 3.11, "learning_rate": 3.054084720121029e-05, "loss": 0.1324, "step": 4118 }, { "epoch": 3.12, "learning_rate": 3.053611951588502e-05, "loss": 0.1647, "step": 4119 }, { "epoch": 3.12, "learning_rate": 3.0531391830559755e-05, "loss": 0.1373, "step": 4120 }, { "epoch": 3.12, "learning_rate": 3.0526664145234494e-05, "loss": 0.1475, "step": 4121 }, { "epoch": 3.12, "learning_rate": 3.0521936459909226e-05, "loss": 0.1545, "step": 4122 }, { "epoch": 3.12, "learning_rate": 3.0517208774583968e-05, "loss": 0.1354, "step": 4123 }, { "epoch": 3.12, "learning_rate": 3.05124810892587e-05, "loss": 0.1364, "step": 4124 }, { "epoch": 3.12, "learning_rate": 3.0507753403933435e-05, "loss": 0.1333, "step": 4125 }, { "epoch": 3.12, "learning_rate": 3.050302571860817e-05, "loss": 0.1326, "step": 4126 }, { "epoch": 3.12, "learning_rate": 3.0498298033282906e-05, "loss": 0.1319, "step": 4127 }, { "epoch": 3.12, "learning_rate": 3.049357034795764e-05, "loss": 0.1407, "step": 4128 }, { "epoch": 3.12, "learning_rate": 3.0488842662632377e-05, "loss": 0.1424, "step": 4129 }, { "epoch": 3.12, "learning_rate": 3.048411497730711e-05, "loss": 0.139, "step": 4130 }, { "epoch": 3.12, "learning_rate": 3.0479387291981848e-05, "loss": 0.15, "step": 4131 }, { "epoch": 3.12, "learning_rate": 3.047465960665658e-05, "loss": 0.1435, "step": 4132 }, { "epoch": 3.13, "learning_rate": 3.046993192133132e-05, "loss": 0.1352, "step": 4133 }, { "epoch": 3.13, "learning_rate": 3.046520423600605e-05, "loss": 0.1424, "step": 4134 }, { "epoch": 3.13, "learning_rate": 3.046047655068079e-05, "loss": 0.1523, "step": 4135 }, { "epoch": 3.13, "learning_rate": 3.0455748865355522e-05, "loss": 0.1491, "step": 4136 }, { "epoch": 3.13, "learning_rate": 3.045102118003026e-05, "loss": 0.14, "step": 4137 }, { "epoch": 3.13, "learning_rate": 3.0446293494704993e-05, "loss": 0.1433, "step": 4138 }, { "epoch": 3.13, "learning_rate": 3.0441565809379728e-05, "loss": 0.1069, "step": 4139 }, { "epoch": 3.13, "learning_rate": 3.043683812405446e-05, "loss": 0.1346, "step": 4140 }, { "epoch": 3.13, "learning_rate": 3.04321104387292e-05, "loss": 0.1662, "step": 4141 }, { "epoch": 3.13, "learning_rate": 3.042738275340393e-05, "loss": 0.1579, "step": 4142 }, { "epoch": 3.13, "learning_rate": 3.042265506807867e-05, "loss": 0.1461, "step": 4143 }, { "epoch": 3.13, "learning_rate": 3.0417927382753402e-05, "loss": 0.1305, "step": 4144 }, { "epoch": 3.13, "learning_rate": 3.041319969742814e-05, "loss": 0.1608, "step": 4145 }, { "epoch": 3.14, "learning_rate": 3.0408472012102873e-05, "loss": 0.1259, "step": 4146 }, { "epoch": 3.14, "learning_rate": 3.0403744326777612e-05, "loss": 0.1681, "step": 4147 }, { "epoch": 3.14, "learning_rate": 3.0399016641452344e-05, "loss": 0.1521, "step": 4148 }, { "epoch": 3.14, "learning_rate": 3.0394288956127083e-05, "loss": 0.1315, "step": 4149 }, { "epoch": 3.14, "learning_rate": 3.0389561270801818e-05, "loss": 0.1583, "step": 4150 }, { "epoch": 3.14, "learning_rate": 3.0384833585476554e-05, "loss": 0.1541, "step": 4151 }, { "epoch": 3.14, "learning_rate": 3.038010590015129e-05, "loss": 0.1513, "step": 4152 }, { "epoch": 3.14, "learning_rate": 3.037537821482602e-05, "loss": 0.1292, "step": 4153 }, { "epoch": 3.14, "learning_rate": 3.037065052950076e-05, "loss": 0.1486, "step": 4154 }, { "epoch": 3.14, "learning_rate": 3.0365922844175492e-05, "loss": 0.1383, "step": 4155 }, { "epoch": 3.14, "learning_rate": 3.036119515885023e-05, "loss": 0.127, "step": 4156 }, { "epoch": 3.14, "learning_rate": 3.0356467473524963e-05, "loss": 0.1475, "step": 4157 }, { "epoch": 3.14, "learning_rate": 3.0351739788199702e-05, "loss": 0.1477, "step": 4158 }, { "epoch": 3.15, "learning_rate": 3.0347012102874434e-05, "loss": 0.1489, "step": 4159 }, { "epoch": 3.15, "learning_rate": 3.0342284417549173e-05, "loss": 0.131, "step": 4160 }, { "epoch": 3.15, "learning_rate": 3.0337556732223905e-05, "loss": 0.1503, "step": 4161 }, { "epoch": 3.15, "learning_rate": 3.0332829046898644e-05, "loss": 0.154, "step": 4162 }, { "epoch": 3.15, "learning_rate": 3.0328101361573376e-05, "loss": 0.1416, "step": 4163 }, { "epoch": 3.15, "learning_rate": 3.032337367624811e-05, "loss": 0.1136, "step": 4164 }, { "epoch": 3.15, "learning_rate": 3.0318645990922847e-05, "loss": 0.174, "step": 4165 }, { "epoch": 3.15, "learning_rate": 3.0313918305597582e-05, "loss": 0.1484, "step": 4166 }, { "epoch": 3.15, "learning_rate": 3.0309190620272314e-05, "loss": 0.121, "step": 4167 }, { "epoch": 3.15, "learning_rate": 3.0304462934947053e-05, "loss": 0.1419, "step": 4168 }, { "epoch": 3.15, "learning_rate": 3.0299735249621785e-05, "loss": 0.1405, "step": 4169 }, { "epoch": 3.15, "learning_rate": 3.0295007564296524e-05, "loss": 0.1372, "step": 4170 }, { "epoch": 3.15, "learning_rate": 3.0290279878971256e-05, "loss": 0.1396, "step": 4171 }, { "epoch": 3.16, "learning_rate": 3.0285552193645995e-05, "loss": 0.1358, "step": 4172 }, { "epoch": 3.16, "learning_rate": 3.0280824508320727e-05, "loss": 0.1542, "step": 4173 }, { "epoch": 3.16, "learning_rate": 3.0276096822995466e-05, "loss": 0.1452, "step": 4174 }, { "epoch": 3.16, "learning_rate": 3.0271369137670198e-05, "loss": 0.1481, "step": 4175 }, { "epoch": 3.16, "learning_rate": 3.0266641452344936e-05, "loss": 0.1311, "step": 4176 }, { "epoch": 3.16, "learning_rate": 3.026191376701967e-05, "loss": 0.141, "step": 4177 }, { "epoch": 3.16, "learning_rate": 3.0257186081694404e-05, "loss": 0.1489, "step": 4178 }, { "epoch": 3.16, "learning_rate": 3.025245839636914e-05, "loss": 0.1757, "step": 4179 }, { "epoch": 3.16, "learning_rate": 3.0247730711043875e-05, "loss": 0.1708, "step": 4180 }, { "epoch": 3.16, "learning_rate": 3.0243003025718607e-05, "loss": 0.1273, "step": 4181 }, { "epoch": 3.16, "learning_rate": 3.0238275340393346e-05, "loss": 0.1546, "step": 4182 }, { "epoch": 3.16, "learning_rate": 3.0233547655068078e-05, "loss": 0.144, "step": 4183 }, { "epoch": 3.16, "learning_rate": 3.0228819969742817e-05, "loss": 0.1443, "step": 4184 }, { "epoch": 3.17, "learning_rate": 3.022409228441755e-05, "loss": 0.1584, "step": 4185 }, { "epoch": 3.17, "learning_rate": 3.0219364599092288e-05, "loss": 0.1625, "step": 4186 }, { "epoch": 3.17, "learning_rate": 3.021463691376702e-05, "loss": 0.1403, "step": 4187 }, { "epoch": 3.17, "learning_rate": 3.020990922844176e-05, "loss": 0.1593, "step": 4188 }, { "epoch": 3.17, "learning_rate": 3.020518154311649e-05, "loss": 0.127, "step": 4189 }, { "epoch": 3.17, "learning_rate": 3.020045385779123e-05, "loss": 0.1308, "step": 4190 }, { "epoch": 3.17, "learning_rate": 3.019572617246596e-05, "loss": 0.1339, "step": 4191 }, { "epoch": 3.17, "learning_rate": 3.0190998487140697e-05, "loss": 0.1564, "step": 4192 }, { "epoch": 3.17, "learning_rate": 3.018627080181543e-05, "loss": 0.1774, "step": 4193 }, { "epoch": 3.17, "learning_rate": 3.0181543116490168e-05, "loss": 0.1538, "step": 4194 }, { "epoch": 3.17, "learning_rate": 3.01768154311649e-05, "loss": 0.1314, "step": 4195 }, { "epoch": 3.17, "learning_rate": 3.017208774583964e-05, "loss": 0.1555, "step": 4196 }, { "epoch": 3.17, "learning_rate": 3.016736006051437e-05, "loss": 0.1317, "step": 4197 }, { "epoch": 3.17, "learning_rate": 3.016263237518911e-05, "loss": 0.1374, "step": 4198 }, { "epoch": 3.18, "learning_rate": 3.015790468986384e-05, "loss": 0.1374, "step": 4199 }, { "epoch": 3.18, "learning_rate": 3.015317700453858e-05, "loss": 0.1363, "step": 4200 }, { "epoch": 3.18, "learning_rate": 3.0148449319213312e-05, "loss": 0.1774, "step": 4201 }, { "epoch": 3.18, "learning_rate": 3.014372163388805e-05, "loss": 0.1408, "step": 4202 }, { "epoch": 3.18, "learning_rate": 3.0138993948562783e-05, "loss": 0.1626, "step": 4203 }, { "epoch": 3.18, "learning_rate": 3.0134266263237522e-05, "loss": 0.1562, "step": 4204 }, { "epoch": 3.18, "learning_rate": 3.0129538577912254e-05, "loss": 0.1274, "step": 4205 }, { "epoch": 3.18, "learning_rate": 3.012481089258699e-05, "loss": 0.1324, "step": 4206 }, { "epoch": 3.18, "learning_rate": 3.0120083207261722e-05, "loss": 0.1857, "step": 4207 }, { "epoch": 3.18, "learning_rate": 3.011535552193646e-05, "loss": 0.1298, "step": 4208 }, { "epoch": 3.18, "learning_rate": 3.01106278366112e-05, "loss": 0.134, "step": 4209 }, { "epoch": 3.18, "learning_rate": 3.010590015128593e-05, "loss": 0.1342, "step": 4210 }, { "epoch": 3.18, "learning_rate": 3.010117246596067e-05, "loss": 0.1499, "step": 4211 }, { "epoch": 3.19, "learning_rate": 3.0096444780635402e-05, "loss": 0.1604, "step": 4212 }, { "epoch": 3.19, "learning_rate": 3.009171709531014e-05, "loss": 0.1556, "step": 4213 }, { "epoch": 3.19, "learning_rate": 3.0086989409984873e-05, "loss": 0.1224, "step": 4214 }, { "epoch": 3.19, "learning_rate": 3.0082261724659612e-05, "loss": 0.1527, "step": 4215 }, { "epoch": 3.19, "learning_rate": 3.0077534039334344e-05, "loss": 0.1353, "step": 4216 }, { "epoch": 3.19, "learning_rate": 3.007280635400908e-05, "loss": 0.1603, "step": 4217 }, { "epoch": 3.19, "learning_rate": 3.0068078668683815e-05, "loss": 0.1784, "step": 4218 }, { "epoch": 3.19, "learning_rate": 3.006335098335855e-05, "loss": 0.1511, "step": 4219 }, { "epoch": 3.19, "learning_rate": 3.0058623298033283e-05, "loss": 0.1365, "step": 4220 }, { "epoch": 3.19, "learning_rate": 3.005389561270802e-05, "loss": 0.1487, "step": 4221 }, { "epoch": 3.19, "learning_rate": 3.0049167927382753e-05, "loss": 0.131, "step": 4222 }, { "epoch": 3.19, "learning_rate": 3.0044440242057492e-05, "loss": 0.1328, "step": 4223 }, { "epoch": 3.19, "learning_rate": 3.0039712556732224e-05, "loss": 0.1523, "step": 4224 }, { "epoch": 3.2, "learning_rate": 3.0034984871406963e-05, "loss": 0.1596, "step": 4225 }, { "epoch": 3.2, "learning_rate": 3.0030257186081695e-05, "loss": 0.1466, "step": 4226 }, { "epoch": 3.2, "learning_rate": 3.0025529500756434e-05, "loss": 0.1708, "step": 4227 }, { "epoch": 3.2, "learning_rate": 3.0020801815431166e-05, "loss": 0.1381, "step": 4228 }, { "epoch": 3.2, "learning_rate": 3.0016074130105905e-05, "loss": 0.1342, "step": 4229 }, { "epoch": 3.2, "learning_rate": 3.0011346444780637e-05, "loss": 0.1424, "step": 4230 }, { "epoch": 3.2, "learning_rate": 3.0006618759455372e-05, "loss": 0.1429, "step": 4231 }, { "epoch": 3.2, "learning_rate": 3.0001891074130108e-05, "loss": 0.1764, "step": 4232 }, { "epoch": 3.2, "learning_rate": 2.9997163388804843e-05, "loss": 0.1347, "step": 4233 }, { "epoch": 3.2, "learning_rate": 2.9992435703479575e-05, "loss": 0.1535, "step": 4234 }, { "epoch": 3.2, "learning_rate": 2.9987708018154314e-05, "loss": 0.1533, "step": 4235 }, { "epoch": 3.2, "learning_rate": 2.9982980332829046e-05, "loss": 0.1362, "step": 4236 }, { "epoch": 3.2, "learning_rate": 2.9978252647503785e-05, "loss": 0.1268, "step": 4237 }, { "epoch": 3.21, "learning_rate": 2.9973524962178517e-05, "loss": 0.1894, "step": 4238 }, { "epoch": 3.21, "learning_rate": 2.9968797276853256e-05, "loss": 0.1538, "step": 4239 }, { "epoch": 3.21, "learning_rate": 2.9964069591527988e-05, "loss": 0.1566, "step": 4240 }, { "epoch": 3.21, "learning_rate": 2.9959341906202727e-05, "loss": 0.1387, "step": 4241 }, { "epoch": 3.21, "learning_rate": 2.995461422087746e-05, "loss": 0.1472, "step": 4242 }, { "epoch": 3.21, "learning_rate": 2.9949886535552198e-05, "loss": 0.1459, "step": 4243 }, { "epoch": 3.21, "learning_rate": 2.994515885022693e-05, "loss": 0.1483, "step": 4244 }, { "epoch": 3.21, "learning_rate": 2.9940431164901665e-05, "loss": 0.1514, "step": 4245 }, { "epoch": 3.21, "learning_rate": 2.9935703479576397e-05, "loss": 0.1361, "step": 4246 }, { "epoch": 3.21, "learning_rate": 2.9930975794251136e-05, "loss": 0.1449, "step": 4247 }, { "epoch": 3.21, "learning_rate": 2.992624810892587e-05, "loss": 0.151, "step": 4248 }, { "epoch": 3.21, "learning_rate": 2.9921520423600607e-05, "loss": 0.123, "step": 4249 }, { "epoch": 3.21, "learning_rate": 2.991679273827534e-05, "loss": 0.145, "step": 4250 }, { "epoch": 3.21, "learning_rate": 2.9912065052950078e-05, "loss": 0.1715, "step": 4251 }, { "epoch": 3.22, "learning_rate": 2.990733736762481e-05, "loss": 0.1858, "step": 4252 }, { "epoch": 3.22, "learning_rate": 2.990260968229955e-05, "loss": 0.145, "step": 4253 }, { "epoch": 3.22, "learning_rate": 2.989788199697428e-05, "loss": 0.1479, "step": 4254 }, { "epoch": 3.22, "learning_rate": 2.989315431164902e-05, "loss": 0.1372, "step": 4255 }, { "epoch": 3.22, "learning_rate": 2.9888426626323752e-05, "loss": 0.1475, "step": 4256 }, { "epoch": 3.22, "learning_rate": 2.988369894099849e-05, "loss": 0.1594, "step": 4257 }, { "epoch": 3.22, "learning_rate": 2.9878971255673223e-05, "loss": 0.1258, "step": 4258 }, { "epoch": 3.22, "learning_rate": 2.9874243570347958e-05, "loss": 0.1587, "step": 4259 }, { "epoch": 3.22, "learning_rate": 2.986951588502269e-05, "loss": 0.1756, "step": 4260 }, { "epoch": 3.22, "learning_rate": 2.986478819969743e-05, "loss": 0.1528, "step": 4261 }, { "epoch": 3.22, "learning_rate": 2.986006051437216e-05, "loss": 0.1507, "step": 4262 }, { "epoch": 3.22, "learning_rate": 2.98553328290469e-05, "loss": 0.1382, "step": 4263 }, { "epoch": 3.22, "learning_rate": 2.9850605143721632e-05, "loss": 0.1649, "step": 4264 }, { "epoch": 3.23, "learning_rate": 2.984587745839637e-05, "loss": 0.1225, "step": 4265 }, { "epoch": 3.23, "learning_rate": 2.9841149773071103e-05, "loss": 0.1317, "step": 4266 }, { "epoch": 3.23, "learning_rate": 2.9836422087745842e-05, "loss": 0.1517, "step": 4267 }, { "epoch": 3.23, "learning_rate": 2.9831694402420574e-05, "loss": 0.1411, "step": 4268 }, { "epoch": 3.23, "learning_rate": 2.9826966717095313e-05, "loss": 0.1314, "step": 4269 }, { "epoch": 3.23, "learning_rate": 2.9822239031770048e-05, "loss": 0.138, "step": 4270 }, { "epoch": 3.23, "learning_rate": 2.9817511346444784e-05, "loss": 0.1572, "step": 4271 }, { "epoch": 3.23, "learning_rate": 2.981278366111952e-05, "loss": 0.1481, "step": 4272 }, { "epoch": 3.23, "learning_rate": 2.980805597579425e-05, "loss": 0.1473, "step": 4273 }, { "epoch": 3.23, "learning_rate": 2.980332829046899e-05, "loss": 0.1499, "step": 4274 }, { "epoch": 3.23, "learning_rate": 2.9798600605143722e-05, "loss": 0.1396, "step": 4275 }, { "epoch": 3.23, "learning_rate": 2.979387291981846e-05, "loss": 0.1433, "step": 4276 }, { "epoch": 3.23, "learning_rate": 2.9789145234493193e-05, "loss": 0.1414, "step": 4277 }, { "epoch": 3.24, "learning_rate": 2.9784417549167932e-05, "loss": 0.1408, "step": 4278 }, { "epoch": 3.24, "learning_rate": 2.9779689863842664e-05, "loss": 0.1432, "step": 4279 }, { "epoch": 3.24, "learning_rate": 2.9774962178517403e-05, "loss": 0.1361, "step": 4280 }, { "epoch": 3.24, "learning_rate": 2.9770234493192135e-05, "loss": 0.1296, "step": 4281 }, { "epoch": 3.24, "learning_rate": 2.9765506807866874e-05, "loss": 0.1674, "step": 4282 }, { "epoch": 3.24, "learning_rate": 2.9760779122541606e-05, "loss": 0.1215, "step": 4283 }, { "epoch": 3.24, "learning_rate": 2.975605143721634e-05, "loss": 0.1628, "step": 4284 }, { "epoch": 3.24, "learning_rate": 2.9751323751891076e-05, "loss": 0.1503, "step": 4285 }, { "epoch": 3.24, "learning_rate": 2.9746596066565812e-05, "loss": 0.1524, "step": 4286 }, { "epoch": 3.24, "learning_rate": 2.9741868381240544e-05, "loss": 0.1624, "step": 4287 }, { "epoch": 3.24, "learning_rate": 2.9737140695915283e-05, "loss": 0.1376, "step": 4288 }, { "epoch": 3.24, "learning_rate": 2.9732413010590015e-05, "loss": 0.1187, "step": 4289 }, { "epoch": 3.24, "learning_rate": 2.9727685325264754e-05, "loss": 0.1609, "step": 4290 }, { "epoch": 3.25, "learning_rate": 2.9722957639939486e-05, "loss": 0.1426, "step": 4291 }, { "epoch": 3.25, "learning_rate": 2.9718229954614225e-05, "loss": 0.1651, "step": 4292 }, { "epoch": 3.25, "learning_rate": 2.9713502269288957e-05, "loss": 0.1177, "step": 4293 }, { "epoch": 3.25, "learning_rate": 2.9708774583963696e-05, "loss": 0.128, "step": 4294 }, { "epoch": 3.25, "learning_rate": 2.9704046898638428e-05, "loss": 0.1753, "step": 4295 }, { "epoch": 3.25, "learning_rate": 2.9699319213313166e-05, "loss": 0.1249, "step": 4296 }, { "epoch": 3.25, "learning_rate": 2.96945915279879e-05, "loss": 0.1454, "step": 4297 }, { "epoch": 3.25, "learning_rate": 2.9689863842662634e-05, "loss": 0.1422, "step": 4298 }, { "epoch": 3.25, "learning_rate": 2.9685136157337366e-05, "loss": 0.1498, "step": 4299 }, { "epoch": 3.25, "learning_rate": 2.9680408472012105e-05, "loss": 0.1687, "step": 4300 }, { "epoch": 3.25, "learning_rate": 2.9675680786686837e-05, "loss": 0.1264, "step": 4301 }, { "epoch": 3.25, "learning_rate": 2.9670953101361576e-05, "loss": 0.1607, "step": 4302 }, { "epoch": 3.25, "learning_rate": 2.9666225416036308e-05, "loss": 0.14, "step": 4303 }, { "epoch": 3.26, "learning_rate": 2.9661497730711047e-05, "loss": 0.1507, "step": 4304 }, { "epoch": 3.26, "learning_rate": 2.965677004538578e-05, "loss": 0.1335, "step": 4305 }, { "epoch": 3.26, "learning_rate": 2.9652042360060517e-05, "loss": 0.1457, "step": 4306 }, { "epoch": 3.26, "learning_rate": 2.964731467473525e-05, "loss": 0.1375, "step": 4307 }, { "epoch": 3.26, "learning_rate": 2.964258698940999e-05, "loss": 0.1706, "step": 4308 }, { "epoch": 3.26, "learning_rate": 2.963785930408472e-05, "loss": 0.1445, "step": 4309 }, { "epoch": 3.26, "learning_rate": 2.963313161875946e-05, "loss": 0.132, "step": 4310 }, { "epoch": 3.26, "learning_rate": 2.962840393343419e-05, "loss": 0.1441, "step": 4311 }, { "epoch": 3.26, "learning_rate": 2.9623676248108927e-05, "loss": 0.1446, "step": 4312 }, { "epoch": 3.26, "learning_rate": 2.961894856278366e-05, "loss": 0.1706, "step": 4313 }, { "epoch": 3.26, "learning_rate": 2.9614220877458398e-05, "loss": 0.1419, "step": 4314 }, { "epoch": 3.26, "learning_rate": 2.960949319213313e-05, "loss": 0.1455, "step": 4315 }, { "epoch": 3.26, "learning_rate": 2.960476550680787e-05, "loss": 0.1398, "step": 4316 }, { "epoch": 3.26, "learning_rate": 2.96000378214826e-05, "loss": 0.1615, "step": 4317 }, { "epoch": 3.27, "learning_rate": 2.959531013615734e-05, "loss": 0.1368, "step": 4318 }, { "epoch": 3.27, "learning_rate": 2.959058245083207e-05, "loss": 0.1636, "step": 4319 }, { "epoch": 3.27, "learning_rate": 2.958585476550681e-05, "loss": 0.1658, "step": 4320 }, { "epoch": 3.27, "learning_rate": 2.9581127080181542e-05, "loss": 0.1318, "step": 4321 }, { "epoch": 3.27, "learning_rate": 2.957639939485628e-05, "loss": 0.1281, "step": 4322 }, { "epoch": 3.27, "learning_rate": 2.9571671709531013e-05, "loss": 0.1586, "step": 4323 }, { "epoch": 3.27, "learning_rate": 2.9566944024205752e-05, "loss": 0.1489, "step": 4324 }, { "epoch": 3.27, "learning_rate": 2.9562216338880484e-05, "loss": 0.1753, "step": 4325 }, { "epoch": 3.27, "learning_rate": 2.955748865355522e-05, "loss": 0.141, "step": 4326 }, { "epoch": 3.27, "learning_rate": 2.9552760968229952e-05, "loss": 0.1523, "step": 4327 }, { "epoch": 3.27, "learning_rate": 2.954803328290469e-05, "loss": 0.1527, "step": 4328 }, { "epoch": 3.27, "learning_rate": 2.954330559757943e-05, "loss": 0.1252, "step": 4329 }, { "epoch": 3.27, "learning_rate": 2.953857791225416e-05, "loss": 0.1246, "step": 4330 }, { "epoch": 3.28, "learning_rate": 2.95338502269289e-05, "loss": 0.1417, "step": 4331 }, { "epoch": 3.28, "learning_rate": 2.9529122541603632e-05, "loss": 0.1505, "step": 4332 }, { "epoch": 3.28, "learning_rate": 2.952439485627837e-05, "loss": 0.1489, "step": 4333 }, { "epoch": 3.28, "learning_rate": 2.9519667170953103e-05, "loss": 0.1441, "step": 4334 }, { "epoch": 3.28, "learning_rate": 2.9514939485627842e-05, "loss": 0.1349, "step": 4335 }, { "epoch": 3.28, "learning_rate": 2.9510211800302574e-05, "loss": 0.1435, "step": 4336 }, { "epoch": 3.28, "learning_rate": 2.950548411497731e-05, "loss": 0.1521, "step": 4337 }, { "epoch": 3.28, "learning_rate": 2.9500756429652045e-05, "loss": 0.1433, "step": 4338 }, { "epoch": 3.28, "learning_rate": 2.949602874432678e-05, "loss": 0.1492, "step": 4339 }, { "epoch": 3.28, "learning_rate": 2.9491301059001513e-05, "loss": 0.1212, "step": 4340 }, { "epoch": 3.28, "learning_rate": 2.948657337367625e-05, "loss": 0.1521, "step": 4341 }, { "epoch": 3.28, "learning_rate": 2.9481845688350983e-05, "loss": 0.1511, "step": 4342 }, { "epoch": 3.28, "learning_rate": 2.9477118003025722e-05, "loss": 0.157, "step": 4343 }, { "epoch": 3.29, "learning_rate": 2.9472390317700454e-05, "loss": 0.1286, "step": 4344 }, { "epoch": 3.29, "learning_rate": 2.9467662632375193e-05, "loss": 0.1492, "step": 4345 }, { "epoch": 3.29, "learning_rate": 2.9462934947049925e-05, "loss": 0.1407, "step": 4346 }, { "epoch": 3.29, "learning_rate": 2.9458207261724664e-05, "loss": 0.1336, "step": 4347 }, { "epoch": 3.29, "learning_rate": 2.9453479576399396e-05, "loss": 0.1449, "step": 4348 }, { "epoch": 3.29, "learning_rate": 2.9448751891074135e-05, "loss": 0.154, "step": 4349 }, { "epoch": 3.29, "learning_rate": 2.9444024205748867e-05, "loss": 0.1354, "step": 4350 }, { "epoch": 3.29, "learning_rate": 2.9439296520423602e-05, "loss": 0.1557, "step": 4351 }, { "epoch": 3.29, "learning_rate": 2.9434568835098335e-05, "loss": 0.1494, "step": 4352 }, { "epoch": 3.29, "learning_rate": 2.9429841149773073e-05, "loss": 0.1569, "step": 4353 }, { "epoch": 3.29, "learning_rate": 2.9425113464447805e-05, "loss": 0.1581, "step": 4354 }, { "epoch": 3.29, "learning_rate": 2.9420385779122544e-05, "loss": 0.1634, "step": 4355 }, { "epoch": 3.29, "learning_rate": 2.9415658093797276e-05, "loss": 0.1485, "step": 4356 }, { "epoch": 3.3, "learning_rate": 2.9410930408472015e-05, "loss": 0.1332, "step": 4357 }, { "epoch": 3.3, "learning_rate": 2.9406202723146747e-05, "loss": 0.1415, "step": 4358 }, { "epoch": 3.3, "learning_rate": 2.9401475037821486e-05, "loss": 0.1628, "step": 4359 }, { "epoch": 3.3, "learning_rate": 2.9396747352496218e-05, "loss": 0.1515, "step": 4360 }, { "epoch": 3.3, "learning_rate": 2.9392019667170957e-05, "loss": 0.1963, "step": 4361 }, { "epoch": 3.3, "learning_rate": 2.938729198184569e-05, "loss": 0.1367, "step": 4362 }, { "epoch": 3.3, "learning_rate": 2.9382564296520428e-05, "loss": 0.1381, "step": 4363 }, { "epoch": 3.3, "learning_rate": 2.937783661119516e-05, "loss": 0.1377, "step": 4364 }, { "epoch": 3.3, "learning_rate": 2.9373108925869895e-05, "loss": 0.1453, "step": 4365 }, { "epoch": 3.3, "learning_rate": 2.9368381240544627e-05, "loss": 0.1556, "step": 4366 }, { "epoch": 3.3, "learning_rate": 2.9363653555219366e-05, "loss": 0.1625, "step": 4367 }, { "epoch": 3.3, "learning_rate": 2.9358925869894098e-05, "loss": 0.1355, "step": 4368 }, { "epoch": 3.3, "learning_rate": 2.9354198184568837e-05, "loss": 0.1423, "step": 4369 }, { "epoch": 3.3, "learning_rate": 2.934947049924357e-05, "loss": 0.1507, "step": 4370 }, { "epoch": 3.31, "learning_rate": 2.9344742813918308e-05, "loss": 0.141, "step": 4371 }, { "epoch": 3.31, "learning_rate": 2.934001512859304e-05, "loss": 0.1629, "step": 4372 }, { "epoch": 3.31, "learning_rate": 2.933528744326778e-05, "loss": 0.1413, "step": 4373 }, { "epoch": 3.31, "learning_rate": 2.933055975794251e-05, "loss": 0.1426, "step": 4374 }, { "epoch": 3.31, "learning_rate": 2.932583207261725e-05, "loss": 0.1548, "step": 4375 }, { "epoch": 3.31, "learning_rate": 2.9321104387291982e-05, "loss": 0.1381, "step": 4376 }, { "epoch": 3.31, "learning_rate": 2.931637670196672e-05, "loss": 0.1576, "step": 4377 }, { "epoch": 3.31, "learning_rate": 2.9311649016641453e-05, "loss": 0.1515, "step": 4378 }, { "epoch": 3.31, "learning_rate": 2.9306921331316188e-05, "loss": 0.1578, "step": 4379 }, { "epoch": 3.31, "learning_rate": 2.930219364599092e-05, "loss": 0.1367, "step": 4380 }, { "epoch": 3.31, "learning_rate": 2.929746596066566e-05, "loss": 0.1474, "step": 4381 }, { "epoch": 3.31, "learning_rate": 2.929273827534039e-05, "loss": 0.138, "step": 4382 }, { "epoch": 3.31, "learning_rate": 2.928801059001513e-05, "loss": 0.1316, "step": 4383 }, { "epoch": 3.32, "learning_rate": 2.9283282904689862e-05, "loss": 0.1183, "step": 4384 }, { "epoch": 3.32, "learning_rate": 2.92785552193646e-05, "loss": 0.1484, "step": 4385 }, { "epoch": 3.32, "learning_rate": 2.9273827534039333e-05, "loss": 0.1533, "step": 4386 }, { "epoch": 3.32, "learning_rate": 2.9269099848714072e-05, "loss": 0.1476, "step": 4387 }, { "epoch": 3.32, "learning_rate": 2.9264372163388804e-05, "loss": 0.1601, "step": 4388 }, { "epoch": 3.32, "learning_rate": 2.9259644478063543e-05, "loss": 0.1433, "step": 4389 }, { "epoch": 3.32, "learning_rate": 2.9254916792738278e-05, "loss": 0.162, "step": 4390 }, { "epoch": 3.32, "learning_rate": 2.9250189107413014e-05, "loss": 0.1429, "step": 4391 }, { "epoch": 3.32, "learning_rate": 2.924546142208775e-05, "loss": 0.1449, "step": 4392 }, { "epoch": 3.32, "learning_rate": 2.924073373676248e-05, "loss": 0.1404, "step": 4393 }, { "epoch": 3.32, "learning_rate": 2.923600605143722e-05, "loss": 0.1349, "step": 4394 }, { "epoch": 3.32, "learning_rate": 2.9231278366111952e-05, "loss": 0.1567, "step": 4395 }, { "epoch": 3.32, "learning_rate": 2.922655068078669e-05, "loss": 0.1844, "step": 4396 }, { "epoch": 3.33, "learning_rate": 2.9221822995461423e-05, "loss": 0.1449, "step": 4397 }, { "epoch": 3.33, "learning_rate": 2.9217095310136162e-05, "loss": 0.1399, "step": 4398 }, { "epoch": 3.33, "learning_rate": 2.9212367624810894e-05, "loss": 0.1626, "step": 4399 }, { "epoch": 3.33, "learning_rate": 2.9207639939485633e-05, "loss": 0.1331, "step": 4400 }, { "epoch": 3.33, "learning_rate": 2.9202912254160365e-05, "loss": 0.1299, "step": 4401 }, { "epoch": 3.33, "learning_rate": 2.9198184568835103e-05, "loss": 0.1566, "step": 4402 }, { "epoch": 3.33, "learning_rate": 2.9193456883509836e-05, "loss": 0.1492, "step": 4403 }, { "epoch": 3.33, "learning_rate": 2.918872919818457e-05, "loss": 0.1637, "step": 4404 }, { "epoch": 3.33, "learning_rate": 2.9184001512859303e-05, "loss": 0.1414, "step": 4405 }, { "epoch": 3.33, "learning_rate": 2.9179273827534042e-05, "loss": 0.1256, "step": 4406 }, { "epoch": 3.33, "learning_rate": 2.9174546142208774e-05, "loss": 0.1504, "step": 4407 }, { "epoch": 3.33, "learning_rate": 2.9169818456883513e-05, "loss": 0.1246, "step": 4408 }, { "epoch": 3.33, "learning_rate": 2.9165090771558245e-05, "loss": 0.1342, "step": 4409 }, { "epoch": 3.34, "learning_rate": 2.9160363086232984e-05, "loss": 0.1362, "step": 4410 }, { "epoch": 3.34, "learning_rate": 2.9155635400907716e-05, "loss": 0.1382, "step": 4411 }, { "epoch": 3.34, "learning_rate": 2.9150907715582455e-05, "loss": 0.1434, "step": 4412 }, { "epoch": 3.34, "learning_rate": 2.9146180030257187e-05, "loss": 0.1499, "step": 4413 }, { "epoch": 3.34, "learning_rate": 2.9141452344931925e-05, "loss": 0.1527, "step": 4414 }, { "epoch": 3.34, "learning_rate": 2.9136724659606658e-05, "loss": 0.1549, "step": 4415 }, { "epoch": 3.34, "learning_rate": 2.9131996974281396e-05, "loss": 0.1582, "step": 4416 }, { "epoch": 3.34, "learning_rate": 2.912726928895613e-05, "loss": 0.1347, "step": 4417 }, { "epoch": 3.34, "learning_rate": 2.9122541603630864e-05, "loss": 0.141, "step": 4418 }, { "epoch": 3.34, "learning_rate": 2.9117813918305596e-05, "loss": 0.1486, "step": 4419 }, { "epoch": 3.34, "learning_rate": 2.9113086232980335e-05, "loss": 0.1537, "step": 4420 }, { "epoch": 3.34, "learning_rate": 2.9108358547655067e-05, "loss": 0.1707, "step": 4421 }, { "epoch": 3.34, "learning_rate": 2.9103630862329806e-05, "loss": 0.1363, "step": 4422 }, { "epoch": 3.35, "learning_rate": 2.9098903177004538e-05, "loss": 0.1409, "step": 4423 }, { "epoch": 3.35, "learning_rate": 2.9094175491679277e-05, "loss": 0.1447, "step": 4424 }, { "epoch": 3.35, "learning_rate": 2.908944780635401e-05, "loss": 0.1455, "step": 4425 }, { "epoch": 3.35, "learning_rate": 2.9084720121028747e-05, "loss": 0.1335, "step": 4426 }, { "epoch": 3.35, "learning_rate": 2.907999243570348e-05, "loss": 0.1377, "step": 4427 }, { "epoch": 3.35, "learning_rate": 2.907526475037822e-05, "loss": 0.1542, "step": 4428 }, { "epoch": 3.35, "learning_rate": 2.907053706505295e-05, "loss": 0.1489, "step": 4429 }, { "epoch": 3.35, "learning_rate": 2.906580937972769e-05, "loss": 0.1704, "step": 4430 }, { "epoch": 3.35, "learning_rate": 2.906108169440242e-05, "loss": 0.1457, "step": 4431 }, { "epoch": 3.35, "learning_rate": 2.9056354009077157e-05, "loss": 0.1379, "step": 4432 }, { "epoch": 3.35, "learning_rate": 2.905162632375189e-05, "loss": 0.1887, "step": 4433 }, { "epoch": 3.35, "learning_rate": 2.9046898638426628e-05, "loss": 0.1288, "step": 4434 }, { "epoch": 3.35, "learning_rate": 2.904217095310136e-05, "loss": 0.144, "step": 4435 }, { "epoch": 3.35, "learning_rate": 2.90374432677761e-05, "loss": 0.1654, "step": 4436 }, { "epoch": 3.36, "learning_rate": 2.903271558245083e-05, "loss": 0.1586, "step": 4437 }, { "epoch": 3.36, "learning_rate": 2.902798789712557e-05, "loss": 0.1422, "step": 4438 }, { "epoch": 3.36, "learning_rate": 2.90232602118003e-05, "loss": 0.147, "step": 4439 }, { "epoch": 3.36, "learning_rate": 2.901853252647504e-05, "loss": 0.132, "step": 4440 }, { "epoch": 3.36, "learning_rate": 2.9013804841149772e-05, "loss": 0.1571, "step": 4441 }, { "epoch": 3.36, "learning_rate": 2.900907715582451e-05, "loss": 0.1494, "step": 4442 }, { "epoch": 3.36, "learning_rate": 2.9004349470499243e-05, "loss": 0.1189, "step": 4443 }, { "epoch": 3.36, "learning_rate": 2.899962178517398e-05, "loss": 0.1351, "step": 4444 }, { "epoch": 3.36, "learning_rate": 2.8994894099848714e-05, "loss": 0.152, "step": 4445 }, { "epoch": 3.36, "learning_rate": 2.899016641452345e-05, "loss": 0.1494, "step": 4446 }, { "epoch": 3.36, "learning_rate": 2.898543872919818e-05, "loss": 0.1385, "step": 4447 }, { "epoch": 3.36, "learning_rate": 2.898071104387292e-05, "loss": 0.1555, "step": 4448 }, { "epoch": 3.36, "learning_rate": 2.897598335854766e-05, "loss": 0.1633, "step": 4449 }, { "epoch": 3.37, "learning_rate": 2.897125567322239e-05, "loss": 0.1478, "step": 4450 }, { "epoch": 3.37, "learning_rate": 2.896652798789713e-05, "loss": 0.1439, "step": 4451 }, { "epoch": 3.37, "learning_rate": 2.8961800302571862e-05, "loss": 0.1467, "step": 4452 }, { "epoch": 3.37, "learning_rate": 2.89570726172466e-05, "loss": 0.1386, "step": 4453 }, { "epoch": 3.37, "learning_rate": 2.8952344931921333e-05, "loss": 0.1363, "step": 4454 }, { "epoch": 3.37, "learning_rate": 2.8947617246596072e-05, "loss": 0.138, "step": 4455 }, { "epoch": 3.37, "learning_rate": 2.8942889561270804e-05, "loss": 0.1347, "step": 4456 }, { "epoch": 3.37, "learning_rate": 2.893816187594554e-05, "loss": 0.1691, "step": 4457 }, { "epoch": 3.37, "learning_rate": 2.893343419062027e-05, "loss": 0.1396, "step": 4458 }, { "epoch": 3.37, "learning_rate": 2.892870650529501e-05, "loss": 0.1479, "step": 4459 }, { "epoch": 3.37, "learning_rate": 2.8923978819969743e-05, "loss": 0.1417, "step": 4460 }, { "epoch": 3.37, "learning_rate": 2.891925113464448e-05, "loss": 0.165, "step": 4461 }, { "epoch": 3.37, "learning_rate": 2.8914523449319213e-05, "loss": 0.143, "step": 4462 }, { "epoch": 3.38, "learning_rate": 2.8909795763993952e-05, "loss": 0.1545, "step": 4463 }, { "epoch": 3.38, "learning_rate": 2.8905068078668684e-05, "loss": 0.1616, "step": 4464 }, { "epoch": 3.38, "learning_rate": 2.8900340393343423e-05, "loss": 0.1487, "step": 4465 }, { "epoch": 3.38, "learning_rate": 2.8895612708018155e-05, "loss": 0.1585, "step": 4466 }, { "epoch": 3.38, "learning_rate": 2.8890885022692894e-05, "loss": 0.1516, "step": 4467 }, { "epoch": 3.38, "learning_rate": 2.8886157337367626e-05, "loss": 0.1681, "step": 4468 }, { "epoch": 3.38, "learning_rate": 2.8881429652042365e-05, "loss": 0.1302, "step": 4469 }, { "epoch": 3.38, "learning_rate": 2.8876701966717097e-05, "loss": 0.1497, "step": 4470 }, { "epoch": 3.38, "learning_rate": 2.8871974281391832e-05, "loss": 0.1989, "step": 4471 }, { "epoch": 3.38, "learning_rate": 2.8867246596066564e-05, "loss": 0.1407, "step": 4472 }, { "epoch": 3.38, "learning_rate": 2.8862518910741303e-05, "loss": 0.154, "step": 4473 }, { "epoch": 3.38, "learning_rate": 2.8857791225416035e-05, "loss": 0.1436, "step": 4474 }, { "epoch": 3.38, "learning_rate": 2.8853063540090774e-05, "loss": 0.1567, "step": 4475 }, { "epoch": 3.39, "learning_rate": 2.8848335854765506e-05, "loss": 0.1556, "step": 4476 }, { "epoch": 3.39, "learning_rate": 2.8843608169440245e-05, "loss": 0.1295, "step": 4477 }, { "epoch": 3.39, "learning_rate": 2.8838880484114977e-05, "loss": 0.1389, "step": 4478 }, { "epoch": 3.39, "learning_rate": 2.8834152798789716e-05, "loss": 0.1455, "step": 4479 }, { "epoch": 3.39, "learning_rate": 2.8829425113464448e-05, "loss": 0.1528, "step": 4480 }, { "epoch": 3.39, "learning_rate": 2.8824697428139187e-05, "loss": 0.1555, "step": 4481 }, { "epoch": 3.39, "learning_rate": 2.881996974281392e-05, "loss": 0.1404, "step": 4482 }, { "epoch": 3.39, "learning_rate": 2.8815242057488658e-05, "loss": 0.1548, "step": 4483 }, { "epoch": 3.39, "learning_rate": 2.881051437216339e-05, "loss": 0.1343, "step": 4484 }, { "epoch": 3.39, "learning_rate": 2.8805786686838125e-05, "loss": 0.143, "step": 4485 }, { "epoch": 3.39, "learning_rate": 2.8801059001512857e-05, "loss": 0.1436, "step": 4486 }, { "epoch": 3.39, "learning_rate": 2.8796331316187596e-05, "loss": 0.1588, "step": 4487 }, { "epoch": 3.39, "learning_rate": 2.8791603630862328e-05, "loss": 0.1689, "step": 4488 }, { "epoch": 3.39, "learning_rate": 2.8786875945537067e-05, "loss": 0.1452, "step": 4489 }, { "epoch": 3.4, "learning_rate": 2.87821482602118e-05, "loss": 0.1597, "step": 4490 }, { "epoch": 3.4, "learning_rate": 2.8777420574886538e-05, "loss": 0.1315, "step": 4491 }, { "epoch": 3.4, "learning_rate": 2.877269288956127e-05, "loss": 0.1499, "step": 4492 }, { "epoch": 3.4, "learning_rate": 2.876796520423601e-05, "loss": 0.1314, "step": 4493 }, { "epoch": 3.4, "learning_rate": 2.876323751891074e-05, "loss": 0.1466, "step": 4494 }, { "epoch": 3.4, "learning_rate": 2.875850983358548e-05, "loss": 0.1386, "step": 4495 }, { "epoch": 3.4, "learning_rate": 2.8753782148260212e-05, "loss": 0.1486, "step": 4496 }, { "epoch": 3.4, "learning_rate": 2.8749054462934947e-05, "loss": 0.1376, "step": 4497 }, { "epoch": 3.4, "learning_rate": 2.8744326777609683e-05, "loss": 0.1317, "step": 4498 }, { "epoch": 3.4, "learning_rate": 2.8739599092284418e-05, "loss": 0.1554, "step": 4499 }, { "epoch": 3.4, "learning_rate": 2.873487140695915e-05, "loss": 0.1512, "step": 4500 }, { "epoch": 3.4, "learning_rate": 2.873014372163389e-05, "loss": 0.1347, "step": 4501 }, { "epoch": 3.4, "learning_rate": 2.872541603630862e-05, "loss": 0.1457, "step": 4502 }, { "epoch": 3.41, "learning_rate": 2.872068835098336e-05, "loss": 0.1427, "step": 4503 }, { "epoch": 3.41, "learning_rate": 2.8715960665658092e-05, "loss": 0.1616, "step": 4504 }, { "epoch": 3.41, "learning_rate": 2.871123298033283e-05, "loss": 0.1432, "step": 4505 }, { "epoch": 3.41, "learning_rate": 2.8706505295007563e-05, "loss": 0.1395, "step": 4506 }, { "epoch": 3.41, "learning_rate": 2.8701777609682302e-05, "loss": 0.1847, "step": 4507 }, { "epoch": 3.41, "learning_rate": 2.8697049924357034e-05, "loss": 0.1664, "step": 4508 }, { "epoch": 3.41, "learning_rate": 2.8692322239031773e-05, "loss": 0.1348, "step": 4509 }, { "epoch": 3.41, "learning_rate": 2.8687594553706508e-05, "loss": 0.1234, "step": 4510 }, { "epoch": 3.41, "learning_rate": 2.868286686838124e-05, "loss": 0.1598, "step": 4511 }, { "epoch": 3.41, "learning_rate": 2.867813918305598e-05, "loss": 0.1456, "step": 4512 }, { "epoch": 3.41, "learning_rate": 2.867341149773071e-05, "loss": 0.1356, "step": 4513 }, { "epoch": 3.41, "learning_rate": 2.866868381240545e-05, "loss": 0.1411, "step": 4514 }, { "epoch": 3.41, "learning_rate": 2.8663956127080182e-05, "loss": 0.1453, "step": 4515 }, { "epoch": 3.42, "learning_rate": 2.865922844175492e-05, "loss": 0.1557, "step": 4516 }, { "epoch": 3.42, "learning_rate": 2.8654500756429653e-05, "loss": 0.1491, "step": 4517 }, { "epoch": 3.42, "learning_rate": 2.864977307110439e-05, "loss": 0.1642, "step": 4518 }, { "epoch": 3.42, "learning_rate": 2.8645045385779124e-05, "loss": 0.148, "step": 4519 }, { "epoch": 3.42, "learning_rate": 2.8640317700453863e-05, "loss": 0.1293, "step": 4520 }, { "epoch": 3.42, "learning_rate": 2.8635590015128595e-05, "loss": 0.1394, "step": 4521 }, { "epoch": 3.42, "learning_rate": 2.8630862329803333e-05, "loss": 0.1566, "step": 4522 }, { "epoch": 3.42, "learning_rate": 2.8626134644478066e-05, "loss": 0.1432, "step": 4523 }, { "epoch": 3.42, "learning_rate": 2.86214069591528e-05, "loss": 0.1468, "step": 4524 }, { "epoch": 3.42, "learning_rate": 2.8616679273827533e-05, "loss": 0.1581, "step": 4525 }, { "epoch": 3.42, "learning_rate": 2.8611951588502272e-05, "loss": 0.1427, "step": 4526 }, { "epoch": 3.42, "learning_rate": 2.8607223903177004e-05, "loss": 0.1718, "step": 4527 }, { "epoch": 3.42, "learning_rate": 2.8602496217851743e-05, "loss": 0.1494, "step": 4528 }, { "epoch": 3.43, "learning_rate": 2.8597768532526475e-05, "loss": 0.1568, "step": 4529 }, { "epoch": 3.43, "learning_rate": 2.8593040847201214e-05, "loss": 0.1396, "step": 4530 }, { "epoch": 3.43, "learning_rate": 2.8588313161875946e-05, "loss": 0.1344, "step": 4531 }, { "epoch": 3.43, "learning_rate": 2.8583585476550685e-05, "loss": 0.1453, "step": 4532 }, { "epoch": 3.43, "learning_rate": 2.8578857791225417e-05, "loss": 0.1543, "step": 4533 }, { "epoch": 3.43, "learning_rate": 2.8574130105900155e-05, "loss": 0.1332, "step": 4534 }, { "epoch": 3.43, "learning_rate": 2.8569402420574888e-05, "loss": 0.1568, "step": 4535 }, { "epoch": 3.43, "learning_rate": 2.8564674735249626e-05, "loss": 0.1559, "step": 4536 }, { "epoch": 3.43, "learning_rate": 2.855994704992436e-05, "loss": 0.1536, "step": 4537 }, { "epoch": 3.43, "learning_rate": 2.8555219364599094e-05, "loss": 0.1493, "step": 4538 }, { "epoch": 3.43, "learning_rate": 2.8550491679273826e-05, "loss": 0.1595, "step": 4539 }, { "epoch": 3.43, "learning_rate": 2.8545763993948565e-05, "loss": 0.1442, "step": 4540 }, { "epoch": 3.43, "learning_rate": 2.8541036308623297e-05, "loss": 0.1471, "step": 4541 }, { "epoch": 3.44, "learning_rate": 2.8536308623298036e-05, "loss": 0.1592, "step": 4542 }, { "epoch": 3.44, "learning_rate": 2.8531580937972768e-05, "loss": 0.1425, "step": 4543 }, { "epoch": 3.44, "learning_rate": 2.8526853252647507e-05, "loss": 0.1306, "step": 4544 }, { "epoch": 3.44, "learning_rate": 2.852212556732224e-05, "loss": 0.1445, "step": 4545 }, { "epoch": 3.44, "learning_rate": 2.8517397881996977e-05, "loss": 0.1272, "step": 4546 }, { "epoch": 3.44, "learning_rate": 2.851267019667171e-05, "loss": 0.1571, "step": 4547 }, { "epoch": 3.44, "learning_rate": 2.850794251134645e-05, "loss": 0.1662, "step": 4548 }, { "epoch": 3.44, "learning_rate": 2.850321482602118e-05, "loss": 0.1529, "step": 4549 }, { "epoch": 3.44, "learning_rate": 2.8498487140695916e-05, "loss": 0.1446, "step": 4550 }, { "epoch": 3.44, "learning_rate": 2.849375945537065e-05, "loss": 0.15, "step": 4551 }, { "epoch": 3.44, "learning_rate": 2.8489031770045387e-05, "loss": 0.1453, "step": 4552 }, { "epoch": 3.44, "learning_rate": 2.848430408472012e-05, "loss": 0.1424, "step": 4553 }, { "epoch": 3.44, "learning_rate": 2.8479576399394858e-05, "loss": 0.1572, "step": 4554 }, { "epoch": 3.44, "learning_rate": 2.847484871406959e-05, "loss": 0.1872, "step": 4555 }, { "epoch": 3.45, "learning_rate": 2.847012102874433e-05, "loss": 0.1307, "step": 4556 }, { "epoch": 3.45, "learning_rate": 2.846539334341906e-05, "loss": 0.1655, "step": 4557 }, { "epoch": 3.45, "learning_rate": 2.84606656580938e-05, "loss": 0.1452, "step": 4558 }, { "epoch": 3.45, "learning_rate": 2.845593797276853e-05, "loss": 0.152, "step": 4559 }, { "epoch": 3.45, "learning_rate": 2.845121028744327e-05, "loss": 0.1601, "step": 4560 }, { "epoch": 3.45, "learning_rate": 2.8446482602118002e-05, "loss": 0.1529, "step": 4561 }, { "epoch": 3.45, "learning_rate": 2.844175491679274e-05, "loss": 0.1266, "step": 4562 }, { "epoch": 3.45, "learning_rate": 2.8437027231467473e-05, "loss": 0.1789, "step": 4563 }, { "epoch": 3.45, "learning_rate": 2.843229954614221e-05, "loss": 0.1243, "step": 4564 }, { "epoch": 3.45, "learning_rate": 2.8427571860816944e-05, "loss": 0.1466, "step": 4565 }, { "epoch": 3.45, "learning_rate": 2.842284417549168e-05, "loss": 0.1415, "step": 4566 }, { "epoch": 3.45, "learning_rate": 2.841811649016641e-05, "loss": 0.1538, "step": 4567 }, { "epoch": 3.45, "learning_rate": 2.841338880484115e-05, "loss": 0.1349, "step": 4568 }, { "epoch": 3.46, "learning_rate": 2.840866111951589e-05, "loss": 0.1355, "step": 4569 }, { "epoch": 3.46, "learning_rate": 2.840393343419062e-05, "loss": 0.1438, "step": 4570 }, { "epoch": 3.46, "learning_rate": 2.839920574886536e-05, "loss": 0.158, "step": 4571 }, { "epoch": 3.46, "learning_rate": 2.8394478063540092e-05, "loss": 0.138, "step": 4572 }, { "epoch": 3.46, "learning_rate": 2.838975037821483e-05, "loss": 0.155, "step": 4573 }, { "epoch": 3.46, "learning_rate": 2.8385022692889563e-05, "loss": 0.1431, "step": 4574 }, { "epoch": 3.46, "learning_rate": 2.8380295007564302e-05, "loss": 0.1461, "step": 4575 }, { "epoch": 3.46, "learning_rate": 2.8375567322239034e-05, "loss": 0.1679, "step": 4576 }, { "epoch": 3.46, "learning_rate": 2.837083963691377e-05, "loss": 0.1401, "step": 4577 }, { "epoch": 3.46, "learning_rate": 2.83661119515885e-05, "loss": 0.1612, "step": 4578 }, { "epoch": 3.46, "learning_rate": 2.836138426626324e-05, "loss": 0.1634, "step": 4579 }, { "epoch": 3.46, "learning_rate": 2.8356656580937972e-05, "loss": 0.1536, "step": 4580 }, { "epoch": 3.46, "learning_rate": 2.835192889561271e-05, "loss": 0.154, "step": 4581 }, { "epoch": 3.47, "learning_rate": 2.8347201210287443e-05, "loss": 0.1424, "step": 4582 }, { "epoch": 3.47, "learning_rate": 2.8342473524962182e-05, "loss": 0.1503, "step": 4583 }, { "epoch": 3.47, "learning_rate": 2.8337745839636914e-05, "loss": 0.1422, "step": 4584 }, { "epoch": 3.47, "learning_rate": 2.8333018154311653e-05, "loss": 0.1437, "step": 4585 }, { "epoch": 3.47, "learning_rate": 2.8328290468986385e-05, "loss": 0.1446, "step": 4586 }, { "epoch": 3.47, "learning_rate": 2.8323562783661124e-05, "loss": 0.1498, "step": 4587 }, { "epoch": 3.47, "learning_rate": 2.8318835098335856e-05, "loss": 0.145, "step": 4588 }, { "epoch": 3.47, "learning_rate": 2.8314107413010595e-05, "loss": 0.1431, "step": 4589 }, { "epoch": 3.47, "learning_rate": 2.8309379727685327e-05, "loss": 0.1644, "step": 4590 }, { "epoch": 3.47, "learning_rate": 2.8304652042360062e-05, "loss": 0.1355, "step": 4591 }, { "epoch": 3.47, "learning_rate": 2.8299924357034794e-05, "loss": 0.1317, "step": 4592 }, { "epoch": 3.47, "learning_rate": 2.8295196671709533e-05, "loss": 0.1376, "step": 4593 }, { "epoch": 3.47, "learning_rate": 2.8290468986384265e-05, "loss": 0.1418, "step": 4594 }, { "epoch": 3.48, "learning_rate": 2.8285741301059004e-05, "loss": 0.1667, "step": 4595 }, { "epoch": 3.48, "learning_rate": 2.8281013615733736e-05, "loss": 0.1418, "step": 4596 }, { "epoch": 3.48, "learning_rate": 2.8276285930408475e-05, "loss": 0.1478, "step": 4597 }, { "epoch": 3.48, "learning_rate": 2.8271558245083207e-05, "loss": 0.1457, "step": 4598 }, { "epoch": 3.48, "learning_rate": 2.8266830559757946e-05, "loss": 0.1675, "step": 4599 }, { "epoch": 3.48, "learning_rate": 2.8262102874432678e-05, "loss": 0.1567, "step": 4600 }, { "epoch": 3.48, "learning_rate": 2.8257375189107417e-05, "loss": 0.1268, "step": 4601 }, { "epoch": 3.48, "learning_rate": 2.825264750378215e-05, "loss": 0.1471, "step": 4602 }, { "epoch": 3.48, "learning_rate": 2.8247919818456884e-05, "loss": 0.1591, "step": 4603 }, { "epoch": 3.48, "learning_rate": 2.824319213313162e-05, "loss": 0.161, "step": 4604 }, { "epoch": 3.48, "learning_rate": 2.8238464447806355e-05, "loss": 0.1434, "step": 4605 }, { "epoch": 3.48, "learning_rate": 2.8233736762481087e-05, "loss": 0.1556, "step": 4606 }, { "epoch": 3.48, "learning_rate": 2.8229009077155826e-05, "loss": 0.1606, "step": 4607 }, { "epoch": 3.48, "learning_rate": 2.8224281391830558e-05, "loss": 0.1557, "step": 4608 }, { "epoch": 3.49, "learning_rate": 2.8219553706505297e-05, "loss": 0.1578, "step": 4609 }, { "epoch": 3.49, "learning_rate": 2.821482602118003e-05, "loss": 0.14, "step": 4610 }, { "epoch": 3.49, "learning_rate": 2.8210098335854768e-05, "loss": 0.1275, "step": 4611 }, { "epoch": 3.49, "learning_rate": 2.82053706505295e-05, "loss": 0.1313, "step": 4612 }, { "epoch": 3.49, "learning_rate": 2.820064296520424e-05, "loss": 0.1792, "step": 4613 }, { "epoch": 3.49, "learning_rate": 2.819591527987897e-05, "loss": 0.139, "step": 4614 }, { "epoch": 3.49, "learning_rate": 2.819118759455371e-05, "loss": 0.1762, "step": 4615 }, { "epoch": 3.49, "learning_rate": 2.8186459909228442e-05, "loss": 0.1634, "step": 4616 }, { "epoch": 3.49, "learning_rate": 2.8181732223903177e-05, "loss": 0.1371, "step": 4617 }, { "epoch": 3.49, "learning_rate": 2.8177004538577913e-05, "loss": 0.1216, "step": 4618 }, { "epoch": 3.49, "learning_rate": 2.8172276853252648e-05, "loss": 0.1389, "step": 4619 }, { "epoch": 3.49, "learning_rate": 2.816754916792738e-05, "loss": 0.1235, "step": 4620 }, { "epoch": 3.49, "learning_rate": 2.816282148260212e-05, "loss": 0.147, "step": 4621 }, { "epoch": 3.5, "learning_rate": 2.815809379727685e-05, "loss": 0.1588, "step": 4622 }, { "epoch": 3.5, "learning_rate": 2.815336611195159e-05, "loss": 0.1448, "step": 4623 }, { "epoch": 3.5, "learning_rate": 2.8148638426626322e-05, "loss": 0.1542, "step": 4624 }, { "epoch": 3.5, "learning_rate": 2.814391074130106e-05, "loss": 0.146, "step": 4625 }, { "epoch": 3.5, "learning_rate": 2.8139183055975793e-05, "loss": 0.1493, "step": 4626 }, { "epoch": 3.5, "learning_rate": 2.8134455370650532e-05, "loss": 0.157, "step": 4627 }, { "epoch": 3.5, "learning_rate": 2.8129727685325264e-05, "loss": 0.1579, "step": 4628 }, { "epoch": 3.5, "learning_rate": 2.8125000000000003e-05, "loss": 0.1504, "step": 4629 }, { "epoch": 3.5, "learning_rate": 2.8120272314674738e-05, "loss": 0.1458, "step": 4630 }, { "epoch": 3.5, "learning_rate": 2.811554462934947e-05, "loss": 0.1669, "step": 4631 }, { "epoch": 3.5, "learning_rate": 2.811081694402421e-05, "loss": 0.147, "step": 4632 }, { "epoch": 3.5, "learning_rate": 2.810608925869894e-05, "loss": 0.155, "step": 4633 }, { "epoch": 3.5, "learning_rate": 2.810136157337368e-05, "loss": 0.1311, "step": 4634 }, { "epoch": 3.51, "learning_rate": 2.8096633888048412e-05, "loss": 0.1683, "step": 4635 }, { "epoch": 3.51, "learning_rate": 2.809190620272315e-05, "loss": 0.1639, "step": 4636 }, { "epoch": 3.51, "learning_rate": 2.8087178517397883e-05, "loss": 0.1303, "step": 4637 }, { "epoch": 3.51, "learning_rate": 2.808245083207262e-05, "loss": 0.1568, "step": 4638 }, { "epoch": 3.51, "learning_rate": 2.8077723146747354e-05, "loss": 0.1426, "step": 4639 }, { "epoch": 3.51, "learning_rate": 2.8072995461422093e-05, "loss": 0.1352, "step": 4640 }, { "epoch": 3.51, "learning_rate": 2.8068267776096825e-05, "loss": 0.139, "step": 4641 }, { "epoch": 3.51, "learning_rate": 2.8063540090771563e-05, "loss": 0.1742, "step": 4642 }, { "epoch": 3.51, "learning_rate": 2.8058812405446295e-05, "loss": 0.1619, "step": 4643 }, { "epoch": 3.51, "learning_rate": 2.805408472012103e-05, "loss": 0.1297, "step": 4644 }, { "epoch": 3.51, "learning_rate": 2.8049357034795763e-05, "loss": 0.1379, "step": 4645 }, { "epoch": 3.51, "learning_rate": 2.8044629349470502e-05, "loss": 0.1508, "step": 4646 }, { "epoch": 3.51, "learning_rate": 2.8039901664145234e-05, "loss": 0.1598, "step": 4647 }, { "epoch": 3.52, "learning_rate": 2.8035173978819973e-05, "loss": 0.1389, "step": 4648 }, { "epoch": 3.52, "learning_rate": 2.8030446293494705e-05, "loss": 0.1564, "step": 4649 }, { "epoch": 3.52, "learning_rate": 2.8025718608169444e-05, "loss": 0.1638, "step": 4650 }, { "epoch": 3.52, "learning_rate": 2.8020990922844176e-05, "loss": 0.1357, "step": 4651 }, { "epoch": 3.52, "learning_rate": 2.8016263237518915e-05, "loss": 0.1431, "step": 4652 }, { "epoch": 3.52, "learning_rate": 2.8011535552193647e-05, "loss": 0.1476, "step": 4653 }, { "epoch": 3.52, "learning_rate": 2.8006807866868385e-05, "loss": 0.1435, "step": 4654 }, { "epoch": 3.52, "learning_rate": 2.8002080181543117e-05, "loss": 0.1742, "step": 4655 }, { "epoch": 3.52, "learning_rate": 2.7997352496217853e-05, "loss": 0.1493, "step": 4656 }, { "epoch": 3.52, "learning_rate": 2.799262481089259e-05, "loss": 0.1476, "step": 4657 }, { "epoch": 3.52, "learning_rate": 2.7987897125567324e-05, "loss": 0.1584, "step": 4658 }, { "epoch": 3.52, "learning_rate": 2.7983169440242056e-05, "loss": 0.1654, "step": 4659 }, { "epoch": 3.52, "learning_rate": 2.7978441754916795e-05, "loss": 0.1378, "step": 4660 }, { "epoch": 3.53, "learning_rate": 2.7973714069591527e-05, "loss": 0.136, "step": 4661 }, { "epoch": 3.53, "learning_rate": 2.7968986384266266e-05, "loss": 0.1505, "step": 4662 }, { "epoch": 3.53, "learning_rate": 2.7964258698940998e-05, "loss": 0.1143, "step": 4663 }, { "epoch": 3.53, "learning_rate": 2.7959531013615737e-05, "loss": 0.1465, "step": 4664 }, { "epoch": 3.53, "learning_rate": 2.795480332829047e-05, "loss": 0.1403, "step": 4665 }, { "epoch": 3.53, "learning_rate": 2.7950075642965207e-05, "loss": 0.1427, "step": 4666 }, { "epoch": 3.53, "learning_rate": 2.794534795763994e-05, "loss": 0.15, "step": 4667 }, { "epoch": 3.53, "learning_rate": 2.7940620272314678e-05, "loss": 0.1501, "step": 4668 }, { "epoch": 3.53, "learning_rate": 2.793589258698941e-05, "loss": 0.129, "step": 4669 }, { "epoch": 3.53, "learning_rate": 2.7931164901664146e-05, "loss": 0.1492, "step": 4670 }, { "epoch": 3.53, "learning_rate": 2.792643721633888e-05, "loss": 0.1313, "step": 4671 }, { "epoch": 3.53, "learning_rate": 2.7921709531013617e-05, "loss": 0.1361, "step": 4672 }, { "epoch": 3.53, "learning_rate": 2.791698184568835e-05, "loss": 0.1705, "step": 4673 }, { "epoch": 3.53, "learning_rate": 2.7912254160363088e-05, "loss": 0.14, "step": 4674 }, { "epoch": 3.54, "learning_rate": 2.790752647503782e-05, "loss": 0.1825, "step": 4675 }, { "epoch": 3.54, "learning_rate": 2.790279878971256e-05, "loss": 0.1472, "step": 4676 }, { "epoch": 3.54, "learning_rate": 2.789807110438729e-05, "loss": 0.1413, "step": 4677 }, { "epoch": 3.54, "learning_rate": 2.789334341906203e-05, "loss": 0.134, "step": 4678 }, { "epoch": 3.54, "learning_rate": 2.788861573373676e-05, "loss": 0.1541, "step": 4679 }, { "epoch": 3.54, "learning_rate": 2.78838880484115e-05, "loss": 0.1304, "step": 4680 }, { "epoch": 3.54, "learning_rate": 2.7879160363086232e-05, "loss": 0.1362, "step": 4681 }, { "epoch": 3.54, "learning_rate": 2.787443267776097e-05, "loss": 0.1427, "step": 4682 }, { "epoch": 3.54, "learning_rate": 2.7869704992435703e-05, "loss": 0.1517, "step": 4683 }, { "epoch": 3.54, "learning_rate": 2.786497730711044e-05, "loss": 0.12, "step": 4684 }, { "epoch": 3.54, "learning_rate": 2.7860249621785174e-05, "loss": 0.1549, "step": 4685 }, { "epoch": 3.54, "learning_rate": 2.785552193645991e-05, "loss": 0.1262, "step": 4686 }, { "epoch": 3.54, "learning_rate": 2.785079425113464e-05, "loss": 0.177, "step": 4687 }, { "epoch": 3.55, "learning_rate": 2.784606656580938e-05, "loss": 0.1276, "step": 4688 }, { "epoch": 3.55, "learning_rate": 2.7841338880484113e-05, "loss": 0.1453, "step": 4689 }, { "epoch": 3.55, "learning_rate": 2.783661119515885e-05, "loss": 0.1599, "step": 4690 }, { "epoch": 3.55, "learning_rate": 2.783188350983359e-05, "loss": 0.1473, "step": 4691 }, { "epoch": 3.55, "learning_rate": 2.7827155824508322e-05, "loss": 0.136, "step": 4692 }, { "epoch": 3.55, "learning_rate": 2.782242813918306e-05, "loss": 0.1522, "step": 4693 }, { "epoch": 3.55, "learning_rate": 2.7817700453857793e-05, "loss": 0.1572, "step": 4694 }, { "epoch": 3.55, "learning_rate": 2.7812972768532532e-05, "loss": 0.1518, "step": 4695 }, { "epoch": 3.55, "learning_rate": 2.7808245083207264e-05, "loss": 0.1401, "step": 4696 }, { "epoch": 3.55, "learning_rate": 2.7803517397882e-05, "loss": 0.1665, "step": 4697 }, { "epoch": 3.55, "learning_rate": 2.779878971255673e-05, "loss": 0.1211, "step": 4698 }, { "epoch": 3.55, "learning_rate": 2.779406202723147e-05, "loss": 0.1502, "step": 4699 }, { "epoch": 3.55, "learning_rate": 2.7789334341906202e-05, "loss": 0.1331, "step": 4700 }, { "epoch": 3.56, "learning_rate": 2.778460665658094e-05, "loss": 0.1382, "step": 4701 }, { "epoch": 3.56, "learning_rate": 2.7779878971255673e-05, "loss": 0.1275, "step": 4702 }, { "epoch": 3.56, "learning_rate": 2.7775151285930412e-05, "loss": 0.1487, "step": 4703 }, { "epoch": 3.56, "learning_rate": 2.7770423600605144e-05, "loss": 0.1748, "step": 4704 }, { "epoch": 3.56, "learning_rate": 2.7765695915279883e-05, "loss": 0.1339, "step": 4705 }, { "epoch": 3.56, "learning_rate": 2.7760968229954615e-05, "loss": 0.1672, "step": 4706 }, { "epoch": 3.56, "learning_rate": 2.7756240544629354e-05, "loss": 0.1643, "step": 4707 }, { "epoch": 3.56, "learning_rate": 2.7751512859304086e-05, "loss": 0.1345, "step": 4708 }, { "epoch": 3.56, "learning_rate": 2.774678517397882e-05, "loss": 0.1498, "step": 4709 }, { "epoch": 3.56, "learning_rate": 2.7742057488653557e-05, "loss": 0.1323, "step": 4710 }, { "epoch": 3.56, "learning_rate": 2.7737329803328292e-05, "loss": 0.1827, "step": 4711 }, { "epoch": 3.56, "learning_rate": 2.7732602118003024e-05, "loss": 0.1616, "step": 4712 }, { "epoch": 3.56, "learning_rate": 2.7727874432677763e-05, "loss": 0.161, "step": 4713 }, { "epoch": 3.57, "learning_rate": 2.7723146747352495e-05, "loss": 0.1356, "step": 4714 }, { "epoch": 3.57, "learning_rate": 2.7718419062027234e-05, "loss": 0.1397, "step": 4715 }, { "epoch": 3.57, "learning_rate": 2.7713691376701966e-05, "loss": 0.1325, "step": 4716 }, { "epoch": 3.57, "learning_rate": 2.7708963691376705e-05, "loss": 0.1502, "step": 4717 }, { "epoch": 3.57, "learning_rate": 2.7704236006051437e-05, "loss": 0.1636, "step": 4718 }, { "epoch": 3.57, "learning_rate": 2.7699508320726176e-05, "loss": 0.13, "step": 4719 }, { "epoch": 3.57, "learning_rate": 2.7694780635400908e-05, "loss": 0.1424, "step": 4720 }, { "epoch": 3.57, "learning_rate": 2.7690052950075647e-05, "loss": 0.1549, "step": 4721 }, { "epoch": 3.57, "learning_rate": 2.768532526475038e-05, "loss": 0.1438, "step": 4722 }, { "epoch": 3.57, "learning_rate": 2.7680597579425114e-05, "loss": 0.1678, "step": 4723 }, { "epoch": 3.57, "learning_rate": 2.767586989409985e-05, "loss": 0.161, "step": 4724 }, { "epoch": 3.57, "learning_rate": 2.7671142208774585e-05, "loss": 0.1419, "step": 4725 }, { "epoch": 3.57, "learning_rate": 2.7666414523449317e-05, "loss": 0.1233, "step": 4726 }, { "epoch": 3.57, "learning_rate": 2.7661686838124056e-05, "loss": 0.1295, "step": 4727 }, { "epoch": 3.58, "learning_rate": 2.7656959152798788e-05, "loss": 0.1573, "step": 4728 }, { "epoch": 3.58, "learning_rate": 2.7652231467473527e-05, "loss": 0.1655, "step": 4729 }, { "epoch": 3.58, "learning_rate": 2.764750378214826e-05, "loss": 0.1376, "step": 4730 }, { "epoch": 3.58, "learning_rate": 2.7642776096822998e-05, "loss": 0.1494, "step": 4731 }, { "epoch": 3.58, "learning_rate": 2.763804841149773e-05, "loss": 0.1519, "step": 4732 }, { "epoch": 3.58, "learning_rate": 2.763332072617247e-05, "loss": 0.148, "step": 4733 }, { "epoch": 3.58, "learning_rate": 2.76285930408472e-05, "loss": 0.1454, "step": 4734 }, { "epoch": 3.58, "learning_rate": 2.762386535552194e-05, "loss": 0.1382, "step": 4735 }, { "epoch": 3.58, "learning_rate": 2.7619137670196672e-05, "loss": 0.1492, "step": 4736 }, { "epoch": 3.58, "learning_rate": 2.7614409984871407e-05, "loss": 0.1411, "step": 4737 }, { "epoch": 3.58, "learning_rate": 2.7609682299546143e-05, "loss": 0.1689, "step": 4738 }, { "epoch": 3.58, "learning_rate": 2.7604954614220878e-05, "loss": 0.1454, "step": 4739 }, { "epoch": 3.58, "learning_rate": 2.760022692889561e-05, "loss": 0.1534, "step": 4740 }, { "epoch": 3.59, "learning_rate": 2.759549924357035e-05, "loss": 0.1501, "step": 4741 }, { "epoch": 3.59, "learning_rate": 2.759077155824508e-05, "loss": 0.1534, "step": 4742 }, { "epoch": 3.59, "learning_rate": 2.758604387291982e-05, "loss": 0.1355, "step": 4743 }, { "epoch": 3.59, "learning_rate": 2.7581316187594552e-05, "loss": 0.1348, "step": 4744 }, { "epoch": 3.59, "learning_rate": 2.757658850226929e-05, "loss": 0.1497, "step": 4745 }, { "epoch": 3.59, "learning_rate": 2.7571860816944023e-05, "loss": 0.1487, "step": 4746 }, { "epoch": 3.59, "learning_rate": 2.756713313161876e-05, "loss": 0.151, "step": 4747 }, { "epoch": 3.59, "learning_rate": 2.7562405446293494e-05, "loss": 0.168, "step": 4748 }, { "epoch": 3.59, "learning_rate": 2.7557677760968233e-05, "loss": 0.145, "step": 4749 }, { "epoch": 3.59, "learning_rate": 2.7552950075642968e-05, "loss": 0.1739, "step": 4750 }, { "epoch": 3.59, "learning_rate": 2.75482223903177e-05, "loss": 0.1558, "step": 4751 }, { "epoch": 3.59, "learning_rate": 2.754349470499244e-05, "loss": 0.1608, "step": 4752 }, { "epoch": 3.59, "learning_rate": 2.753876701966717e-05, "loss": 0.1446, "step": 4753 }, { "epoch": 3.6, "learning_rate": 2.753403933434191e-05, "loss": 0.1646, "step": 4754 }, { "epoch": 3.6, "learning_rate": 2.7529311649016642e-05, "loss": 0.1399, "step": 4755 }, { "epoch": 3.6, "learning_rate": 2.752458396369138e-05, "loss": 0.1414, "step": 4756 }, { "epoch": 3.6, "learning_rate": 2.7519856278366113e-05, "loss": 0.1822, "step": 4757 }, { "epoch": 3.6, "learning_rate": 2.751512859304085e-05, "loss": 0.143, "step": 4758 }, { "epoch": 3.6, "learning_rate": 2.7510400907715584e-05, "loss": 0.1456, "step": 4759 }, { "epoch": 3.6, "learning_rate": 2.7505673222390323e-05, "loss": 0.1676, "step": 4760 }, { "epoch": 3.6, "learning_rate": 2.7500945537065055e-05, "loss": 0.1936, "step": 4761 }, { "epoch": 3.6, "learning_rate": 2.749621785173979e-05, "loss": 0.1521, "step": 4762 }, { "epoch": 3.6, "learning_rate": 2.7491490166414525e-05, "loss": 0.1773, "step": 4763 }, { "epoch": 3.6, "learning_rate": 2.748676248108926e-05, "loss": 0.1499, "step": 4764 }, { "epoch": 3.6, "learning_rate": 2.7482034795763993e-05, "loss": 0.1547, "step": 4765 }, { "epoch": 3.6, "learning_rate": 2.7477307110438732e-05, "loss": 0.1375, "step": 4766 }, { "epoch": 3.61, "learning_rate": 2.7472579425113464e-05, "loss": 0.1495, "step": 4767 }, { "epoch": 3.61, "learning_rate": 2.7467851739788203e-05, "loss": 0.1448, "step": 4768 }, { "epoch": 3.61, "learning_rate": 2.7463124054462935e-05, "loss": 0.1481, "step": 4769 }, { "epoch": 3.61, "learning_rate": 2.7458396369137674e-05, "loss": 0.1378, "step": 4770 }, { "epoch": 3.61, "learning_rate": 2.7453668683812406e-05, "loss": 0.1571, "step": 4771 }, { "epoch": 3.61, "learning_rate": 2.7448940998487144e-05, "loss": 0.1357, "step": 4772 }, { "epoch": 3.61, "learning_rate": 2.7444213313161877e-05, "loss": 0.1398, "step": 4773 }, { "epoch": 3.61, "learning_rate": 2.7439485627836615e-05, "loss": 0.1689, "step": 4774 }, { "epoch": 3.61, "learning_rate": 2.7434757942511347e-05, "loss": 0.1414, "step": 4775 }, { "epoch": 3.61, "learning_rate": 2.7430030257186083e-05, "loss": 0.1491, "step": 4776 }, { "epoch": 3.61, "learning_rate": 2.742530257186082e-05, "loss": 0.1458, "step": 4777 }, { "epoch": 3.61, "learning_rate": 2.7420574886535554e-05, "loss": 0.1518, "step": 4778 }, { "epoch": 3.61, "learning_rate": 2.7415847201210286e-05, "loss": 0.1527, "step": 4779 }, { "epoch": 3.62, "learning_rate": 2.7411119515885025e-05, "loss": 0.1564, "step": 4780 }, { "epoch": 3.62, "learning_rate": 2.7406391830559757e-05, "loss": 0.1595, "step": 4781 }, { "epoch": 3.62, "learning_rate": 2.7401664145234496e-05, "loss": 0.124, "step": 4782 }, { "epoch": 3.62, "learning_rate": 2.7396936459909228e-05, "loss": 0.1463, "step": 4783 }, { "epoch": 3.62, "learning_rate": 2.7392208774583966e-05, "loss": 0.1366, "step": 4784 }, { "epoch": 3.62, "learning_rate": 2.73874810892587e-05, "loss": 0.1526, "step": 4785 }, { "epoch": 3.62, "learning_rate": 2.7382753403933437e-05, "loss": 0.148, "step": 4786 }, { "epoch": 3.62, "learning_rate": 2.737802571860817e-05, "loss": 0.1637, "step": 4787 }, { "epoch": 3.62, "learning_rate": 2.7373298033282908e-05, "loss": 0.1533, "step": 4788 }, { "epoch": 3.62, "learning_rate": 2.736857034795764e-05, "loss": 0.1514, "step": 4789 }, { "epoch": 3.62, "learning_rate": 2.7363842662632376e-05, "loss": 0.1315, "step": 4790 }, { "epoch": 3.62, "learning_rate": 2.735911497730711e-05, "loss": 0.1435, "step": 4791 }, { "epoch": 3.62, "learning_rate": 2.7354387291981847e-05, "loss": 0.1533, "step": 4792 }, { "epoch": 3.62, "learning_rate": 2.734965960665658e-05, "loss": 0.148, "step": 4793 }, { "epoch": 3.63, "learning_rate": 2.7344931921331318e-05, "loss": 0.1386, "step": 4794 }, { "epoch": 3.63, "learning_rate": 2.734020423600605e-05, "loss": 0.1438, "step": 4795 }, { "epoch": 3.63, "learning_rate": 2.733547655068079e-05, "loss": 0.1346, "step": 4796 }, { "epoch": 3.63, "learning_rate": 2.733074886535552e-05, "loss": 0.1298, "step": 4797 }, { "epoch": 3.63, "learning_rate": 2.732602118003026e-05, "loss": 0.1643, "step": 4798 }, { "epoch": 3.63, "learning_rate": 2.732129349470499e-05, "loss": 0.1446, "step": 4799 }, { "epoch": 3.63, "learning_rate": 2.731656580937973e-05, "loss": 0.1316, "step": 4800 }, { "epoch": 3.63, "learning_rate": 2.7311838124054462e-05, "loss": 0.1426, "step": 4801 }, { "epoch": 3.63, "learning_rate": 2.73071104387292e-05, "loss": 0.1639, "step": 4802 }, { "epoch": 3.63, "learning_rate": 2.7302382753403933e-05, "loss": 0.1551, "step": 4803 }, { "epoch": 3.63, "learning_rate": 2.729765506807867e-05, "loss": 0.1393, "step": 4804 }, { "epoch": 3.63, "learning_rate": 2.7292927382753404e-05, "loss": 0.154, "step": 4805 }, { "epoch": 3.63, "learning_rate": 2.728819969742814e-05, "loss": 0.1676, "step": 4806 }, { "epoch": 3.64, "learning_rate": 2.728347201210287e-05, "loss": 0.137, "step": 4807 }, { "epoch": 3.64, "learning_rate": 2.727874432677761e-05, "loss": 0.1428, "step": 4808 }, { "epoch": 3.64, "learning_rate": 2.7274016641452342e-05, "loss": 0.1261, "step": 4809 }, { "epoch": 3.64, "learning_rate": 2.726928895612708e-05, "loss": 0.1552, "step": 4810 }, { "epoch": 3.64, "learning_rate": 2.726456127080182e-05, "loss": 0.1432, "step": 4811 }, { "epoch": 3.64, "learning_rate": 2.7259833585476552e-05, "loss": 0.162, "step": 4812 }, { "epoch": 3.64, "learning_rate": 2.725510590015129e-05, "loss": 0.1195, "step": 4813 }, { "epoch": 3.64, "learning_rate": 2.7250378214826023e-05, "loss": 0.1546, "step": 4814 }, { "epoch": 3.64, "learning_rate": 2.724565052950076e-05, "loss": 0.1356, "step": 4815 }, { "epoch": 3.64, "learning_rate": 2.7240922844175494e-05, "loss": 0.1387, "step": 4816 }, { "epoch": 3.64, "learning_rate": 2.723619515885023e-05, "loss": 0.1342, "step": 4817 }, { "epoch": 3.64, "learning_rate": 2.723146747352496e-05, "loss": 0.1348, "step": 4818 }, { "epoch": 3.64, "learning_rate": 2.72267397881997e-05, "loss": 0.1467, "step": 4819 }, { "epoch": 3.65, "learning_rate": 2.7222012102874432e-05, "loss": 0.1384, "step": 4820 }, { "epoch": 3.65, "learning_rate": 2.721728441754917e-05, "loss": 0.152, "step": 4821 }, { "epoch": 3.65, "learning_rate": 2.7212556732223903e-05, "loss": 0.1835, "step": 4822 }, { "epoch": 3.65, "learning_rate": 2.7207829046898642e-05, "loss": 0.1411, "step": 4823 }, { "epoch": 3.65, "learning_rate": 2.7203101361573374e-05, "loss": 0.1333, "step": 4824 }, { "epoch": 3.65, "learning_rate": 2.7198373676248113e-05, "loss": 0.1788, "step": 4825 }, { "epoch": 3.65, "learning_rate": 2.7193645990922845e-05, "loss": 0.1356, "step": 4826 }, { "epoch": 3.65, "learning_rate": 2.7188918305597584e-05, "loss": 0.1313, "step": 4827 }, { "epoch": 3.65, "learning_rate": 2.7184190620272316e-05, "loss": 0.1565, "step": 4828 }, { "epoch": 3.65, "learning_rate": 2.717946293494705e-05, "loss": 0.1581, "step": 4829 }, { "epoch": 3.65, "learning_rate": 2.7174735249621787e-05, "loss": 0.1884, "step": 4830 }, { "epoch": 3.65, "learning_rate": 2.7170007564296522e-05, "loss": 0.122, "step": 4831 }, { "epoch": 3.65, "learning_rate": 2.7165279878971254e-05, "loss": 0.1596, "step": 4832 }, { "epoch": 3.66, "learning_rate": 2.7160552193645993e-05, "loss": 0.155, "step": 4833 }, { "epoch": 3.66, "learning_rate": 2.7155824508320725e-05, "loss": 0.1572, "step": 4834 }, { "epoch": 3.66, "learning_rate": 2.7151096822995464e-05, "loss": 0.1534, "step": 4835 }, { "epoch": 3.66, "learning_rate": 2.7146369137670196e-05, "loss": 0.1547, "step": 4836 }, { "epoch": 3.66, "learning_rate": 2.7141641452344935e-05, "loss": 0.1536, "step": 4837 }, { "epoch": 3.66, "learning_rate": 2.7136913767019667e-05, "loss": 0.136, "step": 4838 }, { "epoch": 3.66, "learning_rate": 2.7132186081694406e-05, "loss": 0.1349, "step": 4839 }, { "epoch": 3.66, "learning_rate": 2.7127458396369138e-05, "loss": 0.1692, "step": 4840 }, { "epoch": 3.66, "learning_rate": 2.7122730711043877e-05, "loss": 0.139, "step": 4841 }, { "epoch": 3.66, "learning_rate": 2.711800302571861e-05, "loss": 0.1356, "step": 4842 }, { "epoch": 3.66, "learning_rate": 2.7113275340393344e-05, "loss": 0.1379, "step": 4843 }, { "epoch": 3.66, "learning_rate": 2.710854765506808e-05, "loss": 0.145, "step": 4844 }, { "epoch": 3.66, "learning_rate": 2.7103819969742815e-05, "loss": 0.1412, "step": 4845 }, { "epoch": 3.66, "learning_rate": 2.7099092284417547e-05, "loss": 0.1519, "step": 4846 }, { "epoch": 3.67, "learning_rate": 2.7094364599092286e-05, "loss": 0.1448, "step": 4847 }, { "epoch": 3.67, "learning_rate": 2.7089636913767018e-05, "loss": 0.1662, "step": 4848 }, { "epoch": 3.67, "learning_rate": 2.7084909228441757e-05, "loss": 0.1338, "step": 4849 }, { "epoch": 3.67, "learning_rate": 2.708018154311649e-05, "loss": 0.1421, "step": 4850 }, { "epoch": 3.67, "learning_rate": 2.7075453857791228e-05, "loss": 0.1449, "step": 4851 }, { "epoch": 3.67, "learning_rate": 2.707072617246596e-05, "loss": 0.1364, "step": 4852 }, { "epoch": 3.67, "learning_rate": 2.70659984871407e-05, "loss": 0.1308, "step": 4853 }, { "epoch": 3.67, "learning_rate": 2.706127080181543e-05, "loss": 0.1534, "step": 4854 }, { "epoch": 3.67, "learning_rate": 2.705654311649017e-05, "loss": 0.1384, "step": 4855 }, { "epoch": 3.67, "learning_rate": 2.7051815431164902e-05, "loss": 0.1626, "step": 4856 }, { "epoch": 3.67, "learning_rate": 2.7047087745839637e-05, "loss": 0.1415, "step": 4857 }, { "epoch": 3.67, "learning_rate": 2.7042360060514373e-05, "loss": 0.1609, "step": 4858 }, { "epoch": 3.67, "learning_rate": 2.7037632375189108e-05, "loss": 0.1555, "step": 4859 }, { "epoch": 3.68, "learning_rate": 2.703290468986384e-05, "loss": 0.1712, "step": 4860 }, { "epoch": 3.68, "learning_rate": 2.702817700453858e-05, "loss": 0.1426, "step": 4861 }, { "epoch": 3.68, "learning_rate": 2.702344931921331e-05, "loss": 0.1407, "step": 4862 }, { "epoch": 3.68, "learning_rate": 2.701872163388805e-05, "loss": 0.1464, "step": 4863 }, { "epoch": 3.68, "learning_rate": 2.7013993948562782e-05, "loss": 0.1427, "step": 4864 }, { "epoch": 3.68, "learning_rate": 2.700926626323752e-05, "loss": 0.1489, "step": 4865 }, { "epoch": 3.68, "learning_rate": 2.7004538577912253e-05, "loss": 0.1546, "step": 4866 }, { "epoch": 3.68, "learning_rate": 2.699981089258699e-05, "loss": 0.1703, "step": 4867 }, { "epoch": 3.68, "learning_rate": 2.6995083207261724e-05, "loss": 0.1451, "step": 4868 }, { "epoch": 3.68, "learning_rate": 2.6990355521936463e-05, "loss": 0.1525, "step": 4869 }, { "epoch": 3.68, "learning_rate": 2.6985627836611198e-05, "loss": 0.145, "step": 4870 }, { "epoch": 3.68, "learning_rate": 2.698090015128593e-05, "loss": 0.1507, "step": 4871 }, { "epoch": 3.68, "learning_rate": 2.697617246596067e-05, "loss": 0.1381, "step": 4872 }, { "epoch": 3.69, "learning_rate": 2.69714447806354e-05, "loss": 0.1485, "step": 4873 }, { "epoch": 3.69, "learning_rate": 2.696671709531014e-05, "loss": 0.1476, "step": 4874 }, { "epoch": 3.69, "learning_rate": 2.6961989409984872e-05, "loss": 0.1378, "step": 4875 }, { "epoch": 3.69, "learning_rate": 2.695726172465961e-05, "loss": 0.1348, "step": 4876 }, { "epoch": 3.69, "learning_rate": 2.6952534039334343e-05, "loss": 0.1428, "step": 4877 }, { "epoch": 3.69, "learning_rate": 2.694780635400908e-05, "loss": 0.1201, "step": 4878 }, { "epoch": 3.69, "learning_rate": 2.6943078668683814e-05, "loss": 0.1665, "step": 4879 }, { "epoch": 3.69, "learning_rate": 2.6938350983358552e-05, "loss": 0.149, "step": 4880 }, { "epoch": 3.69, "learning_rate": 2.6933623298033285e-05, "loss": 0.1612, "step": 4881 }, { "epoch": 3.69, "learning_rate": 2.692889561270802e-05, "loss": 0.1372, "step": 4882 }, { "epoch": 3.69, "learning_rate": 2.6924167927382755e-05, "loss": 0.1419, "step": 4883 }, { "epoch": 3.69, "learning_rate": 2.691944024205749e-05, "loss": 0.1311, "step": 4884 }, { "epoch": 3.69, "learning_rate": 2.6914712556732223e-05, "loss": 0.1598, "step": 4885 }, { "epoch": 3.7, "learning_rate": 2.6909984871406962e-05, "loss": 0.1586, "step": 4886 }, { "epoch": 3.7, "learning_rate": 2.6905257186081694e-05, "loss": 0.146, "step": 4887 }, { "epoch": 3.7, "learning_rate": 2.6900529500756433e-05, "loss": 0.1295, "step": 4888 }, { "epoch": 3.7, "learning_rate": 2.6895801815431165e-05, "loss": 0.1635, "step": 4889 }, { "epoch": 3.7, "learning_rate": 2.6891074130105904e-05, "loss": 0.166, "step": 4890 }, { "epoch": 3.7, "learning_rate": 2.6886346444780636e-05, "loss": 0.1735, "step": 4891 }, { "epoch": 3.7, "learning_rate": 2.6881618759455374e-05, "loss": 0.1678, "step": 4892 }, { "epoch": 3.7, "learning_rate": 2.6876891074130107e-05, "loss": 0.1611, "step": 4893 }, { "epoch": 3.7, "learning_rate": 2.6872163388804845e-05, "loss": 0.1364, "step": 4894 }, { "epoch": 3.7, "learning_rate": 2.6867435703479577e-05, "loss": 0.1541, "step": 4895 }, { "epoch": 3.7, "learning_rate": 2.6862708018154313e-05, "loss": 0.1745, "step": 4896 }, { "epoch": 3.7, "learning_rate": 2.6857980332829048e-05, "loss": 0.1353, "step": 4897 }, { "epoch": 3.7, "learning_rate": 2.6853252647503784e-05, "loss": 0.1483, "step": 4898 }, { "epoch": 3.71, "learning_rate": 2.6848524962178516e-05, "loss": 0.1693, "step": 4899 }, { "epoch": 3.71, "learning_rate": 2.6843797276853255e-05, "loss": 0.1564, "step": 4900 }, { "epoch": 3.71, "learning_rate": 2.6839069591527987e-05, "loss": 0.1339, "step": 4901 }, { "epoch": 3.71, "learning_rate": 2.6834341906202726e-05, "loss": 0.138, "step": 4902 }, { "epoch": 3.71, "learning_rate": 2.6829614220877458e-05, "loss": 0.1571, "step": 4903 }, { "epoch": 3.71, "learning_rate": 2.6824886535552196e-05, "loss": 0.1414, "step": 4904 }, { "epoch": 3.71, "learning_rate": 2.682015885022693e-05, "loss": 0.1465, "step": 4905 }, { "epoch": 3.71, "learning_rate": 2.6815431164901667e-05, "loss": 0.1349, "step": 4906 }, { "epoch": 3.71, "learning_rate": 2.68107034795764e-05, "loss": 0.1557, "step": 4907 }, { "epoch": 3.71, "learning_rate": 2.6805975794251138e-05, "loss": 0.1344, "step": 4908 }, { "epoch": 3.71, "learning_rate": 2.680124810892587e-05, "loss": 0.1476, "step": 4909 }, { "epoch": 3.71, "learning_rate": 2.6796520423600606e-05, "loss": 0.1836, "step": 4910 }, { "epoch": 3.71, "learning_rate": 2.679179273827534e-05, "loss": 0.1381, "step": 4911 }, { "epoch": 3.71, "learning_rate": 2.6787065052950077e-05, "loss": 0.1381, "step": 4912 }, { "epoch": 3.72, "learning_rate": 2.678233736762481e-05, "loss": 0.1404, "step": 4913 }, { "epoch": 3.72, "learning_rate": 2.6777609682299548e-05, "loss": 0.1638, "step": 4914 }, { "epoch": 3.72, "learning_rate": 2.677288199697428e-05, "loss": 0.1825, "step": 4915 }, { "epoch": 3.72, "learning_rate": 2.676815431164902e-05, "loss": 0.1844, "step": 4916 }, { "epoch": 3.72, "learning_rate": 2.676342662632375e-05, "loss": 0.1593, "step": 4917 }, { "epoch": 3.72, "learning_rate": 2.675869894099849e-05, "loss": 0.175, "step": 4918 }, { "epoch": 3.72, "learning_rate": 2.675397125567322e-05, "loss": 0.1353, "step": 4919 }, { "epoch": 3.72, "learning_rate": 2.674924357034796e-05, "loss": 0.144, "step": 4920 }, { "epoch": 3.72, "learning_rate": 2.6744515885022692e-05, "loss": 0.1378, "step": 4921 }, { "epoch": 3.72, "learning_rate": 2.673978819969743e-05, "loss": 0.1435, "step": 4922 }, { "epoch": 3.72, "learning_rate": 2.6735060514372163e-05, "loss": 0.1431, "step": 4923 }, { "epoch": 3.72, "learning_rate": 2.67303328290469e-05, "loss": 0.1278, "step": 4924 }, { "epoch": 3.72, "learning_rate": 2.6725605143721634e-05, "loss": 0.1466, "step": 4925 }, { "epoch": 3.73, "learning_rate": 2.672087745839637e-05, "loss": 0.1592, "step": 4926 }, { "epoch": 3.73, "learning_rate": 2.672087745839637e-05, "loss": 0.3025, "step": 4927 }, { "epoch": 3.73, "learning_rate": 2.67161497730711e-05, "loss": 0.1809, "step": 4928 }, { "epoch": 3.73, "learning_rate": 2.671142208774584e-05, "loss": 0.163, "step": 4929 }, { "epoch": 3.73, "learning_rate": 2.6706694402420572e-05, "loss": 0.1812, "step": 4930 }, { "epoch": 3.73, "learning_rate": 2.670196671709531e-05, "loss": 0.1625, "step": 4931 }, { "epoch": 3.73, "learning_rate": 2.669723903177005e-05, "loss": 0.1502, "step": 4932 }, { "epoch": 3.73, "learning_rate": 2.6692511346444782e-05, "loss": 0.1584, "step": 4933 }, { "epoch": 3.73, "learning_rate": 2.668778366111952e-05, "loss": 0.145, "step": 4934 }, { "epoch": 3.73, "learning_rate": 2.6683055975794253e-05, "loss": 0.154, "step": 4935 }, { "epoch": 3.73, "learning_rate": 2.667832829046899e-05, "loss": 0.1426, "step": 4936 }, { "epoch": 3.73, "learning_rate": 2.6673600605143724e-05, "loss": 0.1348, "step": 4937 }, { "epoch": 3.73, "learning_rate": 2.666887291981846e-05, "loss": 0.161, "step": 4938 }, { "epoch": 3.74, "learning_rate": 2.666414523449319e-05, "loss": 0.1611, "step": 4939 }, { "epoch": 3.74, "learning_rate": 2.665941754916793e-05, "loss": 0.1865, "step": 4940 }, { "epoch": 3.74, "learning_rate": 2.6654689863842662e-05, "loss": 0.1241, "step": 4941 }, { "epoch": 3.74, "learning_rate": 2.66499621785174e-05, "loss": 0.1349, "step": 4942 }, { "epoch": 3.74, "learning_rate": 2.6645234493192133e-05, "loss": 0.1459, "step": 4943 }, { "epoch": 3.74, "learning_rate": 2.6640506807866872e-05, "loss": 0.1738, "step": 4944 }, { "epoch": 3.74, "learning_rate": 2.6635779122541604e-05, "loss": 0.148, "step": 4945 }, { "epoch": 3.74, "learning_rate": 2.6631051437216343e-05, "loss": 0.132, "step": 4946 }, { "epoch": 3.74, "learning_rate": 2.6626323751891075e-05, "loss": 0.1549, "step": 4947 }, { "epoch": 3.74, "learning_rate": 2.6621596066565814e-05, "loss": 0.1453, "step": 4948 }, { "epoch": 3.74, "learning_rate": 2.6616868381240546e-05, "loss": 0.1461, "step": 4949 }, { "epoch": 3.74, "learning_rate": 2.661214069591528e-05, "loss": 0.1373, "step": 4950 }, { "epoch": 3.74, "learning_rate": 2.6607413010590017e-05, "loss": 0.1301, "step": 4951 }, { "epoch": 3.75, "learning_rate": 2.6602685325264752e-05, "loss": 0.1422, "step": 4952 }, { "epoch": 3.75, "learning_rate": 2.6597957639939484e-05, "loss": 0.1459, "step": 4953 }, { "epoch": 3.75, "learning_rate": 2.6593229954614223e-05, "loss": 0.1466, "step": 4954 }, { "epoch": 3.75, "learning_rate": 2.6588502269288955e-05, "loss": 0.1839, "step": 4955 }, { "epoch": 3.75, "learning_rate": 2.6583774583963694e-05, "loss": 0.1567, "step": 4956 }, { "epoch": 3.75, "learning_rate": 2.6579046898638426e-05, "loss": 0.1525, "step": 4957 }, { "epoch": 3.75, "learning_rate": 2.6574319213313165e-05, "loss": 0.147, "step": 4958 }, { "epoch": 3.75, "learning_rate": 2.6569591527987897e-05, "loss": 0.1495, "step": 4959 }, { "epoch": 3.75, "learning_rate": 2.6564863842662636e-05, "loss": 0.168, "step": 4960 }, { "epoch": 3.75, "learning_rate": 2.6560136157337368e-05, "loss": 0.1413, "step": 4961 }, { "epoch": 3.75, "learning_rate": 2.6555408472012107e-05, "loss": 0.1574, "step": 4962 }, { "epoch": 3.75, "learning_rate": 2.655068078668684e-05, "loss": 0.1615, "step": 4963 }, { "epoch": 3.75, "learning_rate": 2.6545953101361574e-05, "loss": 0.1377, "step": 4964 }, { "epoch": 3.75, "learning_rate": 2.654122541603631e-05, "loss": 0.1495, "step": 4965 }, { "epoch": 3.76, "learning_rate": 2.6536497730711045e-05, "loss": 0.1531, "step": 4966 }, { "epoch": 3.76, "learning_rate": 2.6531770045385777e-05, "loss": 0.145, "step": 4967 }, { "epoch": 3.76, "learning_rate": 2.6527042360060516e-05, "loss": 0.14, "step": 4968 }, { "epoch": 3.76, "learning_rate": 2.6522314674735248e-05, "loss": 0.1541, "step": 4969 }, { "epoch": 3.76, "learning_rate": 2.6517586989409987e-05, "loss": 0.1503, "step": 4970 }, { "epoch": 3.76, "learning_rate": 2.651285930408472e-05, "loss": 0.1337, "step": 4971 }, { "epoch": 3.76, "learning_rate": 2.6508131618759458e-05, "loss": 0.1428, "step": 4972 }, { "epoch": 3.76, "learning_rate": 2.650340393343419e-05, "loss": 0.151, "step": 4973 }, { "epoch": 3.76, "learning_rate": 2.649867624810893e-05, "loss": 0.1368, "step": 4974 }, { "epoch": 3.76, "learning_rate": 2.649394856278366e-05, "loss": 0.1367, "step": 4975 }, { "epoch": 3.76, "learning_rate": 2.64892208774584e-05, "loss": 0.1841, "step": 4976 }, { "epoch": 3.76, "learning_rate": 2.6484493192133132e-05, "loss": 0.1432, "step": 4977 }, { "epoch": 3.76, "learning_rate": 2.6479765506807867e-05, "loss": 0.1686, "step": 4978 }, { "epoch": 3.77, "learning_rate": 2.6475037821482603e-05, "loss": 0.1416, "step": 4979 }, { "epoch": 3.77, "learning_rate": 2.6470310136157338e-05, "loss": 0.1578, "step": 4980 }, { "epoch": 3.77, "learning_rate": 2.646558245083207e-05, "loss": 0.1626, "step": 4981 }, { "epoch": 3.77, "learning_rate": 2.646085476550681e-05, "loss": 0.1557, "step": 4982 }, { "epoch": 3.77, "learning_rate": 2.645612708018154e-05, "loss": 0.1491, "step": 4983 }, { "epoch": 3.77, "learning_rate": 2.645139939485628e-05, "loss": 0.1667, "step": 4984 }, { "epoch": 3.77, "learning_rate": 2.6446671709531012e-05, "loss": 0.1578, "step": 4985 }, { "epoch": 3.77, "learning_rate": 2.644194402420575e-05, "loss": 0.1532, "step": 4986 }, { "epoch": 3.77, "learning_rate": 2.6437216338880483e-05, "loss": 0.1406, "step": 4987 }, { "epoch": 3.77, "learning_rate": 2.643248865355522e-05, "loss": 0.1655, "step": 4988 }, { "epoch": 3.77, "learning_rate": 2.6427760968229954e-05, "loss": 0.1692, "step": 4989 }, { "epoch": 3.77, "learning_rate": 2.6423033282904693e-05, "loss": 0.171, "step": 4990 }, { "epoch": 3.77, "learning_rate": 2.6418305597579428e-05, "loss": 0.1708, "step": 4991 }, { "epoch": 3.78, "learning_rate": 2.641357791225416e-05, "loss": 0.1521, "step": 4992 }, { "epoch": 3.78, "learning_rate": 2.64088502269289e-05, "loss": 0.1472, "step": 4993 }, { "epoch": 3.78, "learning_rate": 2.640412254160363e-05, "loss": 0.1319, "step": 4994 }, { "epoch": 3.78, "learning_rate": 2.639939485627837e-05, "loss": 0.1369, "step": 4995 }, { "epoch": 3.78, "learning_rate": 2.6394667170953102e-05, "loss": 0.1599, "step": 4996 }, { "epoch": 3.78, "learning_rate": 2.638993948562784e-05, "loss": 0.1736, "step": 4997 }, { "epoch": 3.78, "learning_rate": 2.6385211800302573e-05, "loss": 0.1408, "step": 4998 }, { "epoch": 3.78, "learning_rate": 2.638048411497731e-05, "loss": 0.1312, "step": 4999 }, { "epoch": 3.78, "learning_rate": 2.6375756429652044e-05, "loss": 0.1483, "step": 5000 }, { "epoch": 3.78, "learning_rate": 2.6371028744326782e-05, "loss": 0.1633, "step": 5001 }, { "epoch": 3.78, "learning_rate": 2.6366301059001515e-05, "loss": 0.1408, "step": 5002 }, { "epoch": 3.78, "learning_rate": 2.636157337367625e-05, "loss": 0.1352, "step": 5003 }, { "epoch": 3.78, "learning_rate": 2.6356845688350985e-05, "loss": 0.1189, "step": 5004 }, { "epoch": 3.79, "learning_rate": 2.635211800302572e-05, "loss": 0.1509, "step": 5005 }, { "epoch": 3.79, "learning_rate": 2.6347390317700453e-05, "loss": 0.1578, "step": 5006 }, { "epoch": 3.79, "learning_rate": 2.6342662632375192e-05, "loss": 0.1538, "step": 5007 }, { "epoch": 3.79, "learning_rate": 2.6337934947049924e-05, "loss": 0.141, "step": 5008 }, { "epoch": 3.79, "learning_rate": 2.6333207261724663e-05, "loss": 0.1599, "step": 5009 }, { "epoch": 3.79, "learning_rate": 2.6328479576399395e-05, "loss": 0.131, "step": 5010 }, { "epoch": 3.79, "learning_rate": 2.6323751891074134e-05, "loss": 0.1545, "step": 5011 }, { "epoch": 3.79, "learning_rate": 2.6319024205748866e-05, "loss": 0.1452, "step": 5012 }, { "epoch": 3.79, "learning_rate": 2.6314296520423604e-05, "loss": 0.1557, "step": 5013 }, { "epoch": 3.79, "learning_rate": 2.6309568835098336e-05, "loss": 0.147, "step": 5014 }, { "epoch": 3.79, "learning_rate": 2.6304841149773075e-05, "loss": 0.1561, "step": 5015 }, { "epoch": 3.79, "learning_rate": 2.6300113464447807e-05, "loss": 0.148, "step": 5016 }, { "epoch": 3.79, "learning_rate": 2.6295385779122543e-05, "loss": 0.145, "step": 5017 }, { "epoch": 3.8, "learning_rate": 2.6290658093797278e-05, "loss": 0.1668, "step": 5018 }, { "epoch": 3.8, "learning_rate": 2.6285930408472014e-05, "loss": 0.1383, "step": 5019 }, { "epoch": 3.8, "learning_rate": 2.6281202723146746e-05, "loss": 0.1489, "step": 5020 }, { "epoch": 3.8, "learning_rate": 2.6276475037821485e-05, "loss": 0.1731, "step": 5021 }, { "epoch": 3.8, "learning_rate": 2.6271747352496217e-05, "loss": 0.1433, "step": 5022 }, { "epoch": 3.8, "learning_rate": 2.6267019667170956e-05, "loss": 0.136, "step": 5023 }, { "epoch": 3.8, "learning_rate": 2.6262291981845688e-05, "loss": 0.1366, "step": 5024 }, { "epoch": 3.8, "learning_rate": 2.6257564296520426e-05, "loss": 0.1445, "step": 5025 }, { "epoch": 3.8, "learning_rate": 2.625283661119516e-05, "loss": 0.1385, "step": 5026 }, { "epoch": 3.8, "learning_rate": 2.6248108925869897e-05, "loss": 0.1769, "step": 5027 }, { "epoch": 3.8, "learning_rate": 2.624338124054463e-05, "loss": 0.1384, "step": 5028 }, { "epoch": 3.8, "learning_rate": 2.6238653555219368e-05, "loss": 0.1372, "step": 5029 }, { "epoch": 3.8, "learning_rate": 2.62339258698941e-05, "loss": 0.1356, "step": 5030 }, { "epoch": 3.8, "learning_rate": 2.6229198184568836e-05, "loss": 0.1285, "step": 5031 }, { "epoch": 3.81, "learning_rate": 2.622447049924357e-05, "loss": 0.1554, "step": 5032 }, { "epoch": 3.81, "learning_rate": 2.6219742813918307e-05, "loss": 0.1523, "step": 5033 }, { "epoch": 3.81, "learning_rate": 2.621501512859304e-05, "loss": 0.1331, "step": 5034 }, { "epoch": 3.81, "learning_rate": 2.6210287443267777e-05, "loss": 0.1851, "step": 5035 }, { "epoch": 3.81, "learning_rate": 2.620555975794251e-05, "loss": 0.1326, "step": 5036 }, { "epoch": 3.81, "learning_rate": 2.620083207261725e-05, "loss": 0.1715, "step": 5037 }, { "epoch": 3.81, "learning_rate": 2.619610438729198e-05, "loss": 0.1515, "step": 5038 }, { "epoch": 3.81, "learning_rate": 2.619137670196672e-05, "loss": 0.1339, "step": 5039 }, { "epoch": 3.81, "learning_rate": 2.618664901664145e-05, "loss": 0.1433, "step": 5040 }, { "epoch": 3.81, "learning_rate": 2.618192133131619e-05, "loss": 0.1603, "step": 5041 }, { "epoch": 3.81, "learning_rate": 2.6177193645990922e-05, "loss": 0.142, "step": 5042 }, { "epoch": 3.81, "learning_rate": 2.617246596066566e-05, "loss": 0.1488, "step": 5043 }, { "epoch": 3.81, "learning_rate": 2.6167738275340393e-05, "loss": 0.1606, "step": 5044 }, { "epoch": 3.82, "learning_rate": 2.616301059001513e-05, "loss": 0.1502, "step": 5045 }, { "epoch": 3.82, "learning_rate": 2.6158282904689864e-05, "loss": 0.153, "step": 5046 }, { "epoch": 3.82, "learning_rate": 2.61535552193646e-05, "loss": 0.1403, "step": 5047 }, { "epoch": 3.82, "learning_rate": 2.614882753403933e-05, "loss": 0.1521, "step": 5048 }, { "epoch": 3.82, "learning_rate": 2.614409984871407e-05, "loss": 0.1393, "step": 5049 }, { "epoch": 3.82, "learning_rate": 2.6139372163388802e-05, "loss": 0.1445, "step": 5050 }, { "epoch": 3.82, "learning_rate": 2.613464447806354e-05, "loss": 0.1542, "step": 5051 }, { "epoch": 3.82, "learning_rate": 2.612991679273828e-05, "loss": 0.1352, "step": 5052 }, { "epoch": 3.82, "learning_rate": 2.6125189107413012e-05, "loss": 0.125, "step": 5053 }, { "epoch": 3.82, "learning_rate": 2.612046142208775e-05, "loss": 0.156, "step": 5054 }, { "epoch": 3.82, "learning_rate": 2.6115733736762483e-05, "loss": 0.1469, "step": 5055 }, { "epoch": 3.82, "learning_rate": 2.611100605143722e-05, "loss": 0.1389, "step": 5056 }, { "epoch": 3.82, "learning_rate": 2.6106278366111954e-05, "loss": 0.1731, "step": 5057 }, { "epoch": 3.83, "learning_rate": 2.610155068078669e-05, "loss": 0.1454, "step": 5058 }, { "epoch": 3.83, "learning_rate": 2.609682299546142e-05, "loss": 0.131, "step": 5059 }, { "epoch": 3.83, "learning_rate": 2.609209531013616e-05, "loss": 0.1281, "step": 5060 }, { "epoch": 3.83, "learning_rate": 2.6087367624810892e-05, "loss": 0.1671, "step": 5061 }, { "epoch": 3.83, "learning_rate": 2.608263993948563e-05, "loss": 0.1489, "step": 5062 }, { "epoch": 3.83, "learning_rate": 2.6077912254160363e-05, "loss": 0.1573, "step": 5063 }, { "epoch": 3.83, "learning_rate": 2.6073184568835102e-05, "loss": 0.1552, "step": 5064 }, { "epoch": 3.83, "learning_rate": 2.6068456883509834e-05, "loss": 0.1641, "step": 5065 }, { "epoch": 3.83, "learning_rate": 2.6063729198184573e-05, "loss": 0.1517, "step": 5066 }, { "epoch": 3.83, "learning_rate": 2.6059001512859305e-05, "loss": 0.1458, "step": 5067 }, { "epoch": 3.83, "learning_rate": 2.6054273827534044e-05, "loss": 0.1873, "step": 5068 }, { "epoch": 3.83, "learning_rate": 2.6049546142208776e-05, "loss": 0.1639, "step": 5069 }, { "epoch": 3.83, "learning_rate": 2.604481845688351e-05, "loss": 0.1424, "step": 5070 }, { "epoch": 3.84, "learning_rate": 2.6040090771558247e-05, "loss": 0.1631, "step": 5071 }, { "epoch": 3.84, "learning_rate": 2.6035363086232982e-05, "loss": 0.1463, "step": 5072 }, { "epoch": 3.84, "learning_rate": 2.6030635400907714e-05, "loss": 0.1338, "step": 5073 }, { "epoch": 3.84, "learning_rate": 2.6025907715582453e-05, "loss": 0.1565, "step": 5074 }, { "epoch": 3.84, "learning_rate": 2.6021180030257185e-05, "loss": 0.1505, "step": 5075 }, { "epoch": 3.84, "learning_rate": 2.6016452344931924e-05, "loss": 0.1713, "step": 5076 }, { "epoch": 3.84, "learning_rate": 2.6011724659606656e-05, "loss": 0.1343, "step": 5077 }, { "epoch": 3.84, "learning_rate": 2.6006996974281395e-05, "loss": 0.1326, "step": 5078 }, { "epoch": 3.84, "learning_rate": 2.6002269288956127e-05, "loss": 0.1556, "step": 5079 }, { "epoch": 3.84, "learning_rate": 2.5997541603630866e-05, "loss": 0.1432, "step": 5080 }, { "epoch": 3.84, "learning_rate": 2.5992813918305598e-05, "loss": 0.1444, "step": 5081 }, { "epoch": 3.84, "learning_rate": 2.5988086232980337e-05, "loss": 0.1235, "step": 5082 }, { "epoch": 3.84, "learning_rate": 2.598335854765507e-05, "loss": 0.1463, "step": 5083 }, { "epoch": 3.84, "learning_rate": 2.5978630862329804e-05, "loss": 0.1615, "step": 5084 }, { "epoch": 3.85, "learning_rate": 2.597390317700454e-05, "loss": 0.159, "step": 5085 }, { "epoch": 3.85, "learning_rate": 2.5969175491679275e-05, "loss": 0.1551, "step": 5086 }, { "epoch": 3.85, "learning_rate": 2.5964447806354007e-05, "loss": 0.155, "step": 5087 }, { "epoch": 3.85, "learning_rate": 2.5959720121028746e-05, "loss": 0.135, "step": 5088 }, { "epoch": 3.85, "learning_rate": 2.5954992435703478e-05, "loss": 0.1648, "step": 5089 }, { "epoch": 3.85, "learning_rate": 2.5950264750378217e-05, "loss": 0.1681, "step": 5090 }, { "epoch": 3.85, "learning_rate": 2.594553706505295e-05, "loss": 0.1527, "step": 5091 }, { "epoch": 3.85, "learning_rate": 2.5940809379727688e-05, "loss": 0.1638, "step": 5092 }, { "epoch": 3.85, "learning_rate": 2.593608169440242e-05, "loss": 0.136, "step": 5093 }, { "epoch": 3.85, "learning_rate": 2.593135400907716e-05, "loss": 0.1563, "step": 5094 }, { "epoch": 3.85, "learning_rate": 2.592662632375189e-05, "loss": 0.1534, "step": 5095 }, { "epoch": 3.85, "learning_rate": 2.592189863842663e-05, "loss": 0.1479, "step": 5096 }, { "epoch": 3.85, "learning_rate": 2.591717095310136e-05, "loss": 0.1729, "step": 5097 }, { "epoch": 3.86, "learning_rate": 2.5912443267776097e-05, "loss": 0.1423, "step": 5098 }, { "epoch": 3.86, "learning_rate": 2.5907715582450833e-05, "loss": 0.1467, "step": 5099 }, { "epoch": 3.86, "learning_rate": 2.5902987897125568e-05, "loss": 0.1577, "step": 5100 }, { "epoch": 3.86, "learning_rate": 2.58982602118003e-05, "loss": 0.1592, "step": 5101 }, { "epoch": 3.86, "learning_rate": 2.589353252647504e-05, "loss": 0.1456, "step": 5102 }, { "epoch": 3.86, "learning_rate": 2.588880484114977e-05, "loss": 0.1507, "step": 5103 }, { "epoch": 3.86, "learning_rate": 2.588407715582451e-05, "loss": 0.1413, "step": 5104 }, { "epoch": 3.86, "learning_rate": 2.5879349470499242e-05, "loss": 0.1237, "step": 5105 }, { "epoch": 3.86, "learning_rate": 2.587462178517398e-05, "loss": 0.1335, "step": 5106 }, { "epoch": 3.86, "learning_rate": 2.5869894099848713e-05, "loss": 0.1253, "step": 5107 }, { "epoch": 3.86, "learning_rate": 2.586516641452345e-05, "loss": 0.1573, "step": 5108 }, { "epoch": 3.86, "learning_rate": 2.5860438729198184e-05, "loss": 0.1376, "step": 5109 }, { "epoch": 3.86, "learning_rate": 2.5855711043872922e-05, "loss": 0.1372, "step": 5110 }, { "epoch": 3.87, "learning_rate": 2.5850983358547658e-05, "loss": 0.1637, "step": 5111 }, { "epoch": 3.87, "learning_rate": 2.584625567322239e-05, "loss": 0.1324, "step": 5112 }, { "epoch": 3.87, "learning_rate": 2.584152798789713e-05, "loss": 0.152, "step": 5113 }, { "epoch": 3.87, "learning_rate": 2.583680030257186e-05, "loss": 0.1829, "step": 5114 }, { "epoch": 3.87, "learning_rate": 2.58320726172466e-05, "loss": 0.1242, "step": 5115 }, { "epoch": 3.87, "learning_rate": 2.5827344931921332e-05, "loss": 0.1475, "step": 5116 }, { "epoch": 3.87, "learning_rate": 2.582261724659607e-05, "loss": 0.1277, "step": 5117 }, { "epoch": 3.87, "learning_rate": 2.5817889561270803e-05, "loss": 0.1475, "step": 5118 }, { "epoch": 3.87, "learning_rate": 2.581316187594554e-05, "loss": 0.1366, "step": 5119 }, { "epoch": 3.87, "learning_rate": 2.5808434190620274e-05, "loss": 0.135, "step": 5120 }, { "epoch": 3.87, "learning_rate": 2.5803706505295012e-05, "loss": 0.1718, "step": 5121 }, { "epoch": 3.87, "learning_rate": 2.5798978819969744e-05, "loss": 0.1379, "step": 5122 }, { "epoch": 3.87, "learning_rate": 2.579425113464448e-05, "loss": 0.1682, "step": 5123 }, { "epoch": 3.88, "learning_rate": 2.5789523449319215e-05, "loss": 0.1493, "step": 5124 }, { "epoch": 3.88, "learning_rate": 2.578479576399395e-05, "loss": 0.1345, "step": 5125 }, { "epoch": 3.88, "learning_rate": 2.5780068078668683e-05, "loss": 0.1724, "step": 5126 }, { "epoch": 3.88, "learning_rate": 2.5775340393343422e-05, "loss": 0.1399, "step": 5127 }, { "epoch": 3.88, "learning_rate": 2.5770612708018154e-05, "loss": 0.1733, "step": 5128 }, { "epoch": 3.88, "learning_rate": 2.5765885022692893e-05, "loss": 0.1725, "step": 5129 }, { "epoch": 3.88, "learning_rate": 2.5761157337367625e-05, "loss": 0.1392, "step": 5130 }, { "epoch": 3.88, "learning_rate": 2.5756429652042363e-05, "loss": 0.1565, "step": 5131 }, { "epoch": 3.88, "learning_rate": 2.5751701966717096e-05, "loss": 0.151, "step": 5132 }, { "epoch": 3.88, "learning_rate": 2.5746974281391834e-05, "loss": 0.1485, "step": 5133 }, { "epoch": 3.88, "learning_rate": 2.5742246596066566e-05, "loss": 0.1421, "step": 5134 }, { "epoch": 3.88, "learning_rate": 2.5737518910741305e-05, "loss": 0.1469, "step": 5135 }, { "epoch": 3.88, "learning_rate": 2.5732791225416037e-05, "loss": 0.1278, "step": 5136 }, { "epoch": 3.89, "learning_rate": 2.5728063540090773e-05, "loss": 0.134, "step": 5137 }, { "epoch": 3.89, "learning_rate": 2.5723335854765508e-05, "loss": 0.155, "step": 5138 }, { "epoch": 3.89, "learning_rate": 2.5718608169440244e-05, "loss": 0.1532, "step": 5139 }, { "epoch": 3.89, "learning_rate": 2.5713880484114976e-05, "loss": 0.1542, "step": 5140 }, { "epoch": 3.89, "learning_rate": 2.5709152798789715e-05, "loss": 0.1422, "step": 5141 }, { "epoch": 3.89, "learning_rate": 2.5704425113464447e-05, "loss": 0.1362, "step": 5142 }, { "epoch": 3.89, "learning_rate": 2.5699697428139185e-05, "loss": 0.1312, "step": 5143 }, { "epoch": 3.89, "learning_rate": 2.5694969742813918e-05, "loss": 0.1531, "step": 5144 }, { "epoch": 3.89, "learning_rate": 2.5690242057488656e-05, "loss": 0.1347, "step": 5145 }, { "epoch": 3.89, "learning_rate": 2.568551437216339e-05, "loss": 0.1572, "step": 5146 }, { "epoch": 3.89, "learning_rate": 2.5680786686838127e-05, "loss": 0.1619, "step": 5147 }, { "epoch": 3.89, "learning_rate": 2.567605900151286e-05, "loss": 0.1435, "step": 5148 }, { "epoch": 3.89, "learning_rate": 2.5671331316187598e-05, "loss": 0.1168, "step": 5149 }, { "epoch": 3.89, "learning_rate": 2.566660363086233e-05, "loss": 0.1555, "step": 5150 }, { "epoch": 3.9, "learning_rate": 2.5661875945537066e-05, "loss": 0.1318, "step": 5151 }, { "epoch": 3.9, "learning_rate": 2.56571482602118e-05, "loss": 0.163, "step": 5152 }, { "epoch": 3.9, "learning_rate": 2.5652420574886537e-05, "loss": 0.1446, "step": 5153 }, { "epoch": 3.9, "learning_rate": 2.564769288956127e-05, "loss": 0.1531, "step": 5154 }, { "epoch": 3.9, "learning_rate": 2.5642965204236007e-05, "loss": 0.1334, "step": 5155 }, { "epoch": 3.9, "learning_rate": 2.563823751891074e-05, "loss": 0.1384, "step": 5156 }, { "epoch": 3.9, "learning_rate": 2.563350983358548e-05, "loss": 0.1364, "step": 5157 }, { "epoch": 3.9, "learning_rate": 2.562878214826021e-05, "loss": 0.1585, "step": 5158 }, { "epoch": 3.9, "learning_rate": 2.562405446293495e-05, "loss": 0.1344, "step": 5159 }, { "epoch": 3.9, "learning_rate": 2.561932677760968e-05, "loss": 0.1317, "step": 5160 }, { "epoch": 3.9, "learning_rate": 2.561459909228442e-05, "loss": 0.1338, "step": 5161 }, { "epoch": 3.9, "learning_rate": 2.5609871406959152e-05, "loss": 0.1511, "step": 5162 }, { "epoch": 3.9, "learning_rate": 2.560514372163389e-05, "loss": 0.1313, "step": 5163 }, { "epoch": 3.91, "learning_rate": 2.5600416036308623e-05, "loss": 0.1552, "step": 5164 }, { "epoch": 3.91, "learning_rate": 2.559568835098336e-05, "loss": 0.1556, "step": 5165 }, { "epoch": 3.91, "learning_rate": 2.5590960665658094e-05, "loss": 0.1504, "step": 5166 }, { "epoch": 3.91, "learning_rate": 2.558623298033283e-05, "loss": 0.1287, "step": 5167 }, { "epoch": 3.91, "learning_rate": 2.558150529500756e-05, "loss": 0.1306, "step": 5168 }, { "epoch": 3.91, "learning_rate": 2.55767776096823e-05, "loss": 0.1292, "step": 5169 }, { "epoch": 3.91, "learning_rate": 2.5572049924357032e-05, "loss": 0.1398, "step": 5170 }, { "epoch": 3.91, "learning_rate": 2.556732223903177e-05, "loss": 0.1641, "step": 5171 }, { "epoch": 3.91, "learning_rate": 2.556259455370651e-05, "loss": 0.1615, "step": 5172 }, { "epoch": 3.91, "learning_rate": 2.5557866868381242e-05, "loss": 0.1381, "step": 5173 }, { "epoch": 3.91, "learning_rate": 2.555313918305598e-05, "loss": 0.1219, "step": 5174 }, { "epoch": 3.91, "learning_rate": 2.5548411497730713e-05, "loss": 0.1437, "step": 5175 }, { "epoch": 3.91, "learning_rate": 2.554368381240545e-05, "loss": 0.1528, "step": 5176 }, { "epoch": 3.92, "learning_rate": 2.5538956127080184e-05, "loss": 0.1438, "step": 5177 }, { "epoch": 3.92, "learning_rate": 2.553422844175492e-05, "loss": 0.128, "step": 5178 }, { "epoch": 3.92, "learning_rate": 2.552950075642965e-05, "loss": 0.1455, "step": 5179 }, { "epoch": 3.92, "learning_rate": 2.552477307110439e-05, "loss": 0.1526, "step": 5180 }, { "epoch": 3.92, "learning_rate": 2.5520045385779122e-05, "loss": 0.1386, "step": 5181 }, { "epoch": 3.92, "learning_rate": 2.551531770045386e-05, "loss": 0.1704, "step": 5182 }, { "epoch": 3.92, "learning_rate": 2.5510590015128593e-05, "loss": 0.1526, "step": 5183 }, { "epoch": 3.92, "learning_rate": 2.5505862329803332e-05, "loss": 0.1551, "step": 5184 }, { "epoch": 3.92, "learning_rate": 2.5501134644478064e-05, "loss": 0.1295, "step": 5185 }, { "epoch": 3.92, "learning_rate": 2.5496406959152803e-05, "loss": 0.143, "step": 5186 }, { "epoch": 3.92, "learning_rate": 2.5491679273827535e-05, "loss": 0.1712, "step": 5187 }, { "epoch": 3.92, "learning_rate": 2.5486951588502274e-05, "loss": 0.1427, "step": 5188 }, { "epoch": 3.92, "learning_rate": 2.5482223903177006e-05, "loss": 0.1461, "step": 5189 }, { "epoch": 3.93, "learning_rate": 2.547749621785174e-05, "loss": 0.1381, "step": 5190 }, { "epoch": 3.93, "learning_rate": 2.5472768532526477e-05, "loss": 0.1439, "step": 5191 }, { "epoch": 3.93, "learning_rate": 2.5468040847201212e-05, "loss": 0.1316, "step": 5192 }, { "epoch": 3.93, "learning_rate": 2.5463313161875944e-05, "loss": 0.164, "step": 5193 }, { "epoch": 3.93, "learning_rate": 2.5458585476550683e-05, "loss": 0.1377, "step": 5194 }, { "epoch": 3.93, "learning_rate": 2.5453857791225415e-05, "loss": 0.1395, "step": 5195 }, { "epoch": 3.93, "learning_rate": 2.5449130105900154e-05, "loss": 0.1533, "step": 5196 }, { "epoch": 3.93, "learning_rate": 2.5444402420574886e-05, "loss": 0.1466, "step": 5197 }, { "epoch": 3.93, "learning_rate": 2.5439674735249625e-05, "loss": 0.1444, "step": 5198 }, { "epoch": 3.93, "learning_rate": 2.5434947049924357e-05, "loss": 0.1504, "step": 5199 }, { "epoch": 3.93, "learning_rate": 2.5430219364599096e-05, "loss": 0.1732, "step": 5200 }, { "epoch": 3.93, "learning_rate": 2.5425491679273828e-05, "loss": 0.1485, "step": 5201 }, { "epoch": 3.93, "learning_rate": 2.5420763993948567e-05, "loss": 0.1768, "step": 5202 }, { "epoch": 3.93, "learning_rate": 2.54160363086233e-05, "loss": 0.1445, "step": 5203 }, { "epoch": 3.94, "learning_rate": 2.5411308623298034e-05, "loss": 0.1495, "step": 5204 }, { "epoch": 3.94, "learning_rate": 2.540658093797277e-05, "loss": 0.1597, "step": 5205 }, { "epoch": 3.94, "learning_rate": 2.5401853252647505e-05, "loss": 0.1554, "step": 5206 }, { "epoch": 3.94, "learning_rate": 2.5397125567322237e-05, "loss": 0.1347, "step": 5207 }, { "epoch": 3.94, "learning_rate": 2.5392397881996976e-05, "loss": 0.1499, "step": 5208 }, { "epoch": 3.94, "learning_rate": 2.5387670196671708e-05, "loss": 0.1398, "step": 5209 }, { "epoch": 3.94, "learning_rate": 2.5382942511346447e-05, "loss": 0.1577, "step": 5210 }, { "epoch": 3.94, "learning_rate": 2.537821482602118e-05, "loss": 0.1283, "step": 5211 }, { "epoch": 3.94, "learning_rate": 2.5373487140695918e-05, "loss": 0.1261, "step": 5212 }, { "epoch": 3.94, "learning_rate": 2.536875945537065e-05, "loss": 0.1549, "step": 5213 }, { "epoch": 3.94, "learning_rate": 2.536403177004539e-05, "loss": 0.1408, "step": 5214 }, { "epoch": 3.94, "learning_rate": 2.535930408472012e-05, "loss": 0.1284, "step": 5215 }, { "epoch": 3.94, "learning_rate": 2.535457639939486e-05, "loss": 0.134, "step": 5216 }, { "epoch": 3.95, "learning_rate": 2.534984871406959e-05, "loss": 0.1602, "step": 5217 }, { "epoch": 3.95, "learning_rate": 2.5345121028744327e-05, "loss": 0.1265, "step": 5218 }, { "epoch": 3.95, "learning_rate": 2.5340393343419063e-05, "loss": 0.1412, "step": 5219 }, { "epoch": 3.95, "learning_rate": 2.5335665658093798e-05, "loss": 0.1745, "step": 5220 }, { "epoch": 3.95, "learning_rate": 2.533093797276853e-05, "loss": 0.1568, "step": 5221 }, { "epoch": 3.95, "learning_rate": 2.532621028744327e-05, "loss": 0.1552, "step": 5222 }, { "epoch": 3.95, "learning_rate": 2.5321482602118e-05, "loss": 0.1413, "step": 5223 }, { "epoch": 3.95, "learning_rate": 2.531675491679274e-05, "loss": 0.1498, "step": 5224 }, { "epoch": 3.95, "learning_rate": 2.5312027231467472e-05, "loss": 0.1351, "step": 5225 }, { "epoch": 3.95, "learning_rate": 2.530729954614221e-05, "loss": 0.1516, "step": 5226 }, { "epoch": 3.95, "learning_rate": 2.5302571860816943e-05, "loss": 0.1474, "step": 5227 }, { "epoch": 3.95, "learning_rate": 2.529784417549168e-05, "loss": 0.1527, "step": 5228 }, { "epoch": 3.95, "learning_rate": 2.5293116490166414e-05, "loss": 0.1646, "step": 5229 }, { "epoch": 3.96, "learning_rate": 2.5288388804841152e-05, "loss": 0.1502, "step": 5230 }, { "epoch": 3.96, "learning_rate": 2.5283661119515888e-05, "loss": 0.1421, "step": 5231 }, { "epoch": 3.96, "learning_rate": 2.527893343419062e-05, "loss": 0.1644, "step": 5232 }, { "epoch": 3.96, "learning_rate": 2.527420574886536e-05, "loss": 0.1466, "step": 5233 }, { "epoch": 3.96, "learning_rate": 2.526947806354009e-05, "loss": 0.1483, "step": 5234 }, { "epoch": 3.96, "learning_rate": 2.526475037821483e-05, "loss": 0.152, "step": 5235 }, { "epoch": 3.96, "learning_rate": 2.5260022692889562e-05, "loss": 0.154, "step": 5236 }, { "epoch": 3.96, "learning_rate": 2.52552950075643e-05, "loss": 0.1797, "step": 5237 }, { "epoch": 3.96, "learning_rate": 2.5250567322239033e-05, "loss": 0.1673, "step": 5238 }, { "epoch": 3.96, "learning_rate": 2.524583963691377e-05, "loss": 0.1699, "step": 5239 }, { "epoch": 3.96, "learning_rate": 2.5241111951588504e-05, "loss": 0.141, "step": 5240 }, { "epoch": 3.96, "learning_rate": 2.5236384266263242e-05, "loss": 0.1272, "step": 5241 }, { "epoch": 3.96, "learning_rate": 2.5231656580937974e-05, "loss": 0.1502, "step": 5242 }, { "epoch": 3.97, "learning_rate": 2.522692889561271e-05, "loss": 0.1644, "step": 5243 }, { "epoch": 3.97, "learning_rate": 2.5222201210287445e-05, "loss": 0.1565, "step": 5244 }, { "epoch": 3.97, "learning_rate": 2.521747352496218e-05, "loss": 0.1444, "step": 5245 }, { "epoch": 3.97, "learning_rate": 2.5212745839636913e-05, "loss": 0.1607, "step": 5246 }, { "epoch": 3.97, "learning_rate": 2.520801815431165e-05, "loss": 0.1663, "step": 5247 }, { "epoch": 3.97, "learning_rate": 2.5203290468986384e-05, "loss": 0.1549, "step": 5248 }, { "epoch": 3.97, "learning_rate": 2.5198562783661123e-05, "loss": 0.1427, "step": 5249 }, { "epoch": 3.97, "learning_rate": 2.5193835098335855e-05, "loss": 0.1488, "step": 5250 }, { "epoch": 3.97, "learning_rate": 2.5189107413010593e-05, "loss": 0.1489, "step": 5251 }, { "epoch": 3.97, "learning_rate": 2.5184379727685326e-05, "loss": 0.1335, "step": 5252 }, { "epoch": 3.97, "learning_rate": 2.5179652042360064e-05, "loss": 0.1486, "step": 5253 }, { "epoch": 3.97, "learning_rate": 2.5174924357034796e-05, "loss": 0.1431, "step": 5254 }, { "epoch": 3.97, "learning_rate": 2.5170196671709535e-05, "loss": 0.1535, "step": 5255 }, { "epoch": 3.98, "learning_rate": 2.5165468986384267e-05, "loss": 0.1519, "step": 5256 }, { "epoch": 3.98, "learning_rate": 2.5160741301059003e-05, "loss": 0.1436, "step": 5257 }, { "epoch": 3.98, "learning_rate": 2.5156013615733738e-05, "loss": 0.1371, "step": 5258 }, { "epoch": 3.98, "learning_rate": 2.5151285930408474e-05, "loss": 0.1703, "step": 5259 }, { "epoch": 3.98, "learning_rate": 2.5146558245083206e-05, "loss": 0.1505, "step": 5260 }, { "epoch": 3.98, "learning_rate": 2.5141830559757945e-05, "loss": 0.1408, "step": 5261 }, { "epoch": 3.98, "learning_rate": 2.5137102874432677e-05, "loss": 0.1504, "step": 5262 }, { "epoch": 3.98, "learning_rate": 2.5132375189107415e-05, "loss": 0.145, "step": 5263 }, { "epoch": 3.98, "learning_rate": 2.5127647503782148e-05, "loss": 0.1327, "step": 5264 }, { "epoch": 3.98, "learning_rate": 2.5122919818456886e-05, "loss": 0.1252, "step": 5265 }, { "epoch": 3.98, "learning_rate": 2.511819213313162e-05, "loss": 0.1433, "step": 5266 }, { "epoch": 3.98, "learning_rate": 2.5113464447806357e-05, "loss": 0.151, "step": 5267 }, { "epoch": 3.98, "learning_rate": 2.510873676248109e-05, "loss": 0.1406, "step": 5268 }, { "epoch": 3.98, "learning_rate": 2.5104009077155828e-05, "loss": 0.1465, "step": 5269 }, { "epoch": 3.99, "learning_rate": 2.509928139183056e-05, "loss": 0.1394, "step": 5270 }, { "epoch": 3.99, "learning_rate": 2.5094553706505296e-05, "loss": 0.164, "step": 5271 }, { "epoch": 3.99, "learning_rate": 2.508982602118003e-05, "loss": 0.1248, "step": 5272 }, { "epoch": 3.99, "learning_rate": 2.5085098335854767e-05, "loss": 0.1958, "step": 5273 }, { "epoch": 3.99, "learning_rate": 2.50803706505295e-05, "loss": 0.1354, "step": 5274 }, { "epoch": 3.99, "learning_rate": 2.5075642965204237e-05, "loss": 0.1502, "step": 5275 }, { "epoch": 3.99, "learning_rate": 2.507091527987897e-05, "loss": 0.1542, "step": 5276 }, { "epoch": 3.99, "learning_rate": 2.506618759455371e-05, "loss": 0.1298, "step": 5277 }, { "epoch": 3.99, "learning_rate": 2.506145990922844e-05, "loss": 0.1394, "step": 5278 }, { "epoch": 3.99, "learning_rate": 2.505673222390318e-05, "loss": 0.1631, "step": 5279 }, { "epoch": 3.99, "learning_rate": 2.505200453857791e-05, "loss": 0.145, "step": 5280 }, { "epoch": 3.99, "learning_rate": 2.504727685325265e-05, "loss": 0.1428, "step": 5281 }, { "epoch": 3.99, "learning_rate": 2.5042549167927382e-05, "loss": 0.1409, "step": 5282 }, { "epoch": 4.0, "learning_rate": 2.503782148260212e-05, "loss": 0.1504, "step": 5283 }, { "epoch": 4.0, "learning_rate": 2.5033093797276853e-05, "loss": 0.1305, "step": 5284 }, { "epoch": 4.0, "learning_rate": 2.502836611195159e-05, "loss": 0.1509, "step": 5285 }, { "epoch": 4.0, "learning_rate": 2.5023638426626324e-05, "loss": 0.1599, "step": 5286 }, { "epoch": 4.0, "learning_rate": 2.501891074130106e-05, "loss": 0.1491, "step": 5287 }, { "epoch": 4.0, "learning_rate": 2.501418305597579e-05, "loss": 0.1474, "step": 5288 }, { "epoch": 4.0, "learning_rate": 2.500945537065053e-05, "loss": 0.1404, "step": 5289 } ], "logging_steps": 1, "max_steps": 10576, "num_train_epochs": 8, "save_steps": 32, "total_flos": 1.8430546283824742e+18, "trial_name": null, "trial_params": null }