{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999210671718368, "eval_steps": 500, "global_step": 3167, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666667e-07, "loss": 2.8156, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.0833333333333333e-07, "loss": 2.4465, "step": 2 }, { "epoch": 0.0, "learning_rate": 3.125e-07, "loss": 2.9714, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.1666666666666667e-07, "loss": 2.7171, "step": 4 }, { "epoch": 0.0, "learning_rate": 5.208333333333334e-07, "loss": 2.6355, "step": 5 }, { "epoch": 0.0, "learning_rate": 6.25e-07, "loss": 2.2848, "step": 6 }, { "epoch": 0.0, "learning_rate": 7.291666666666667e-07, "loss": 2.3252, "step": 7 }, { "epoch": 0.0, "learning_rate": 8.333333333333333e-07, "loss": 1.9412, "step": 8 }, { "epoch": 0.0, "learning_rate": 9.375000000000001e-07, "loss": 2.0705, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.0416666666666667e-06, "loss": 2.4618, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.1458333333333333e-06, "loss": 1.9924, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.25e-06, "loss": 2.625, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.3541666666666667e-06, "loss": 2.131, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.4583333333333335e-06, "loss": 2.4284, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.5625e-06, "loss": 1.9733, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.6666666666666667e-06, "loss": 2.2577, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.7708333333333337e-06, "loss": 1.4548, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.8750000000000003e-06, "loss": 1.7473, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.9791666666666666e-06, "loss": 1.4739, "step": 19 }, { "epoch": 0.01, "learning_rate": 2.0833333333333334e-06, "loss": 1.7394, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.1875000000000002e-06, "loss": 1.7946, "step": 21 }, { "epoch": 0.01, "learning_rate": 2.2916666666666666e-06, "loss": 1.672, "step": 22 }, { "epoch": 0.01, "learning_rate": 2.395833333333334e-06, "loss": 1.9767, "step": 23 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 1.5437, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.604166666666667e-06, "loss": 1.4698, "step": 25 }, { "epoch": 0.01, "learning_rate": 2.7083333333333334e-06, "loss": 1.7753, "step": 26 }, { "epoch": 0.01, "learning_rate": 2.8125e-06, "loss": 1.5218, "step": 27 }, { "epoch": 0.01, "learning_rate": 2.916666666666667e-06, "loss": 1.9874, "step": 28 }, { "epoch": 0.01, "learning_rate": 3.0208333333333334e-06, "loss": 1.5165, "step": 29 }, { "epoch": 0.01, "learning_rate": 3.125e-06, "loss": 2.0646, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.229166666666667e-06, "loss": 1.0516, "step": 31 }, { "epoch": 0.01, "learning_rate": 3.3333333333333333e-06, "loss": 1.7042, "step": 32 }, { "epoch": 0.01, "learning_rate": 3.4375e-06, "loss": 2.17, "step": 33 }, { "epoch": 0.01, "learning_rate": 3.5416666666666673e-06, "loss": 1.8702, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.6458333333333333e-06, "loss": 1.6258, "step": 35 }, { "epoch": 0.01, "learning_rate": 3.7500000000000005e-06, "loss": 1.5007, "step": 36 }, { "epoch": 0.01, "learning_rate": 3.854166666666667e-06, "loss": 1.2916, "step": 37 }, { "epoch": 0.01, "learning_rate": 3.958333333333333e-06, "loss": 1.6389, "step": 38 }, { "epoch": 0.01, "learning_rate": 4.0625000000000005e-06, "loss": 1.1242, "step": 39 }, { "epoch": 0.01, "learning_rate": 4.166666666666667e-06, "loss": 1.5837, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.270833333333333e-06, "loss": 1.474, "step": 41 }, { "epoch": 0.01, "learning_rate": 4.3750000000000005e-06, "loss": 1.4258, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.479166666666667e-06, "loss": 1.6018, "step": 43 }, { "epoch": 0.01, "learning_rate": 4.583333333333333e-06, "loss": 1.7465, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.6875000000000004e-06, "loss": 1.632, "step": 45 }, { "epoch": 0.01, "learning_rate": 4.791666666666668e-06, "loss": 1.2199, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.895833333333333e-06, "loss": 1.4677, "step": 47 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 1.7694, "step": 48 }, { "epoch": 0.02, "learning_rate": 5.104166666666667e-06, "loss": 1.5253, "step": 49 }, { "epoch": 0.02, "learning_rate": 5.208333333333334e-06, "loss": 1.5144, "step": 50 }, { "epoch": 0.02, "learning_rate": 5.3125e-06, "loss": 1.4301, "step": 51 }, { "epoch": 0.02, "learning_rate": 5.416666666666667e-06, "loss": 1.6318, "step": 52 }, { "epoch": 0.02, "learning_rate": 5.520833333333334e-06, "loss": 1.6151, "step": 53 }, { "epoch": 0.02, "learning_rate": 5.625e-06, "loss": 1.5326, "step": 54 }, { "epoch": 0.02, "learning_rate": 5.729166666666667e-06, "loss": 1.7309, "step": 55 }, { "epoch": 0.02, "learning_rate": 5.833333333333334e-06, "loss": 1.8575, "step": 56 }, { "epoch": 0.02, "learning_rate": 5.9375e-06, "loss": 1.628, "step": 57 }, { "epoch": 0.02, "learning_rate": 6.041666666666667e-06, "loss": 1.7362, "step": 58 }, { "epoch": 0.02, "learning_rate": 6.145833333333334e-06, "loss": 1.4693, "step": 59 }, { "epoch": 0.02, "learning_rate": 6.25e-06, "loss": 1.6701, "step": 60 }, { "epoch": 0.02, "learning_rate": 6.354166666666667e-06, "loss": 1.2851, "step": 61 }, { "epoch": 0.02, "learning_rate": 6.458333333333334e-06, "loss": 1.7457, "step": 62 }, { "epoch": 0.02, "learning_rate": 6.5625e-06, "loss": 1.8074, "step": 63 }, { "epoch": 0.02, "learning_rate": 6.666666666666667e-06, "loss": 1.6153, "step": 64 }, { "epoch": 0.02, "learning_rate": 6.770833333333334e-06, "loss": 1.2354, "step": 65 }, { "epoch": 0.02, "learning_rate": 6.875e-06, "loss": 1.4022, "step": 66 }, { "epoch": 0.02, "learning_rate": 6.979166666666667e-06, "loss": 1.3848, "step": 67 }, { "epoch": 0.02, "learning_rate": 7.083333333333335e-06, "loss": 1.3943, "step": 68 }, { "epoch": 0.02, "learning_rate": 7.1875e-06, "loss": 1.5819, "step": 69 }, { "epoch": 0.02, "learning_rate": 7.291666666666667e-06, "loss": 1.0114, "step": 70 }, { "epoch": 0.02, "learning_rate": 7.395833333333335e-06, "loss": 1.217, "step": 71 }, { "epoch": 0.02, "learning_rate": 7.500000000000001e-06, "loss": 1.3366, "step": 72 }, { "epoch": 0.02, "learning_rate": 7.6041666666666666e-06, "loss": 1.4169, "step": 73 }, { "epoch": 0.02, "learning_rate": 7.708333333333334e-06, "loss": 1.4355, "step": 74 }, { "epoch": 0.02, "learning_rate": 7.8125e-06, "loss": 1.3917, "step": 75 }, { "epoch": 0.02, "learning_rate": 7.916666666666667e-06, "loss": 1.1946, "step": 76 }, { "epoch": 0.02, "learning_rate": 8.020833333333335e-06, "loss": 1.3275, "step": 77 }, { "epoch": 0.02, "learning_rate": 8.125000000000001e-06, "loss": 1.0985, "step": 78 }, { "epoch": 0.02, "learning_rate": 8.229166666666667e-06, "loss": 1.5995, "step": 79 }, { "epoch": 0.03, "learning_rate": 8.333333333333334e-06, "loss": 1.5353, "step": 80 }, { "epoch": 0.03, "learning_rate": 8.4375e-06, "loss": 1.4408, "step": 81 }, { "epoch": 0.03, "learning_rate": 8.541666666666666e-06, "loss": 1.3122, "step": 82 }, { "epoch": 0.03, "learning_rate": 8.645833333333335e-06, "loss": 1.2063, "step": 83 }, { "epoch": 0.03, "learning_rate": 8.750000000000001e-06, "loss": 1.44, "step": 84 }, { "epoch": 0.03, "learning_rate": 8.854166666666667e-06, "loss": 1.6565, "step": 85 }, { "epoch": 0.03, "learning_rate": 8.958333333333334e-06, "loss": 1.2278, "step": 86 }, { "epoch": 0.03, "learning_rate": 9.0625e-06, "loss": 1.5107, "step": 87 }, { "epoch": 0.03, "learning_rate": 9.166666666666666e-06, "loss": 1.661, "step": 88 }, { "epoch": 0.03, "learning_rate": 9.270833333333334e-06, "loss": 1.2179, "step": 89 }, { "epoch": 0.03, "learning_rate": 9.375000000000001e-06, "loss": 1.603, "step": 90 }, { "epoch": 0.03, "learning_rate": 9.479166666666667e-06, "loss": 1.3143, "step": 91 }, { "epoch": 0.03, "learning_rate": 9.583333333333335e-06, "loss": 1.8711, "step": 92 }, { "epoch": 0.03, "learning_rate": 9.6875e-06, "loss": 1.2736, "step": 93 }, { "epoch": 0.03, "learning_rate": 9.791666666666666e-06, "loss": 1.6296, "step": 94 }, { "epoch": 0.03, "learning_rate": 9.895833333333334e-06, "loss": 1.3508, "step": 95 }, { "epoch": 0.03, "learning_rate": 1e-05, "loss": 1.3732, "step": 96 }, { "epoch": 0.03, "learning_rate": 9.999997383744929e-06, "loss": 1.244, "step": 97 }, { "epoch": 0.03, "learning_rate": 9.999989534982451e-06, "loss": 1.7695, "step": 98 }, { "epoch": 0.03, "learning_rate": 9.99997645372078e-06, "loss": 1.0348, "step": 99 }, { "epoch": 0.03, "learning_rate": 9.999958139973607e-06, "loss": 1.9336, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.999934593760096e-06, "loss": 1.6912, "step": 101 }, { "epoch": 0.03, "learning_rate": 9.999905815104888e-06, "loss": 1.5225, "step": 102 }, { "epoch": 0.03, "learning_rate": 9.999871804038102e-06, "loss": 1.4217, "step": 103 }, { "epoch": 0.03, "learning_rate": 9.999832560595329e-06, "loss": 1.8426, "step": 104 }, { "epoch": 0.03, "learning_rate": 9.999788084817637e-06, "loss": 1.1125, "step": 105 }, { "epoch": 0.03, "learning_rate": 9.999738376751569e-06, "loss": 1.1937, "step": 106 }, { "epoch": 0.03, "learning_rate": 9.999683436449147e-06, "loss": 1.9472, "step": 107 }, { "epoch": 0.03, "learning_rate": 9.999623263967868e-06, "loss": 1.5342, "step": 108 }, { "epoch": 0.03, "learning_rate": 9.999557859370696e-06, "loss": 1.5897, "step": 109 }, { "epoch": 0.03, "learning_rate": 9.999487222726084e-06, "loss": 1.7838, "step": 110 }, { "epoch": 0.04, "learning_rate": 9.999411354107949e-06, "loss": 1.7522, "step": 111 }, { "epoch": 0.04, "learning_rate": 9.999330253595689e-06, "loss": 1.6945, "step": 112 }, { "epoch": 0.04, "learning_rate": 9.999243921274177e-06, "loss": 1.2105, "step": 113 }, { "epoch": 0.04, "learning_rate": 9.99915235723376e-06, "loss": 1.3681, "step": 114 }, { "epoch": 0.04, "learning_rate": 9.999055561570255e-06, "loss": 1.6845, "step": 115 }, { "epoch": 0.04, "learning_rate": 9.998953534384965e-06, "loss": 1.5039, "step": 116 }, { "epoch": 0.04, "learning_rate": 9.998846275784658e-06, "loss": 1.457, "step": 117 }, { "epoch": 0.04, "learning_rate": 9.998733785881583e-06, "loss": 1.2317, "step": 118 }, { "epoch": 0.04, "learning_rate": 9.998616064793458e-06, "loss": 1.4213, "step": 119 }, { "epoch": 0.04, "learning_rate": 9.998493112643481e-06, "loss": 1.2127, "step": 120 }, { "epoch": 0.04, "learning_rate": 9.998364929560322e-06, "loss": 1.982, "step": 121 }, { "epoch": 0.04, "learning_rate": 9.99823151567812e-06, "loss": 1.1237, "step": 122 }, { "epoch": 0.04, "learning_rate": 9.9980928711365e-06, "loss": 1.2189, "step": 123 }, { "epoch": 0.04, "learning_rate": 9.997948996080547e-06, "loss": 1.3361, "step": 124 }, { "epoch": 0.04, "learning_rate": 9.997799890660833e-06, "loss": 1.3134, "step": 125 }, { "epoch": 0.04, "learning_rate": 9.997645555033391e-06, "loss": 1.8981, "step": 126 }, { "epoch": 0.04, "learning_rate": 9.997485989359737e-06, "loss": 1.3199, "step": 127 }, { "epoch": 0.04, "learning_rate": 9.997321193806857e-06, "loss": 1.7517, "step": 128 }, { "epoch": 0.04, "learning_rate": 9.997151168547208e-06, "loss": 1.4371, "step": 129 }, { "epoch": 0.04, "learning_rate": 9.996975913758722e-06, "loss": 1.3717, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.996795429624806e-06, "loss": 1.3614, "step": 131 }, { "epoch": 0.04, "learning_rate": 9.996609716334335e-06, "loss": 1.6848, "step": 132 }, { "epoch": 0.04, "learning_rate": 9.996418774081658e-06, "loss": 1.6411, "step": 133 }, { "epoch": 0.04, "learning_rate": 9.996222603066597e-06, "loss": 1.6645, "step": 134 }, { "epoch": 0.04, "learning_rate": 9.996021203494446e-06, "loss": 1.1424, "step": 135 }, { "epoch": 0.04, "learning_rate": 9.99581457557597e-06, "loss": 1.4565, "step": 136 }, { "epoch": 0.04, "learning_rate": 9.995602719527404e-06, "loss": 1.813, "step": 137 }, { "epoch": 0.04, "learning_rate": 9.995385635570459e-06, "loss": 1.9485, "step": 138 }, { "epoch": 0.04, "learning_rate": 9.995163323932308e-06, "loss": 1.3796, "step": 139 }, { "epoch": 0.04, "learning_rate": 9.994935784845608e-06, "loss": 1.4265, "step": 140 }, { "epoch": 0.04, "learning_rate": 9.994703018548472e-06, "loss": 1.4919, "step": 141 }, { "epoch": 0.04, "learning_rate": 9.994465025284496e-06, "loss": 1.5355, "step": 142 }, { "epoch": 0.05, "learning_rate": 9.994221805302737e-06, "loss": 1.4055, "step": 143 }, { "epoch": 0.05, "learning_rate": 9.993973358857726e-06, "loss": 1.0806, "step": 144 }, { "epoch": 0.05, "learning_rate": 9.993719686209464e-06, "loss": 1.4864, "step": 145 }, { "epoch": 0.05, "learning_rate": 9.993460787623419e-06, "loss": 1.2083, "step": 146 }, { "epoch": 0.05, "learning_rate": 9.993196663370531e-06, "loss": 1.3557, "step": 147 }, { "epoch": 0.05, "learning_rate": 9.992927313727201e-06, "loss": 1.2833, "step": 148 }, { "epoch": 0.05, "learning_rate": 9.992652738975308e-06, "loss": 1.9396, "step": 149 }, { "epoch": 0.05, "learning_rate": 9.992372939402196e-06, "loss": 1.4485, "step": 150 }, { "epoch": 0.05, "learning_rate": 9.992087915300674e-06, "loss": 1.2039, "step": 151 }, { "epoch": 0.05, "learning_rate": 9.99179766696902e-06, "loss": 1.5156, "step": 152 }, { "epoch": 0.05, "learning_rate": 9.99150219471098e-06, "loss": 1.443, "step": 153 }, { "epoch": 0.05, "learning_rate": 9.991201498835766e-06, "loss": 1.3472, "step": 154 }, { "epoch": 0.05, "learning_rate": 9.990895579658057e-06, "loss": 1.3371, "step": 155 }, { "epoch": 0.05, "learning_rate": 9.990584437498001e-06, "loss": 1.7594, "step": 156 }, { "epoch": 0.05, "learning_rate": 9.990268072681204e-06, "loss": 1.2515, "step": 157 }, { "epoch": 0.05, "learning_rate": 9.989946485538743e-06, "loss": 1.1579, "step": 158 }, { "epoch": 0.05, "learning_rate": 9.989619676407164e-06, "loss": 1.4017, "step": 159 }, { "epoch": 0.05, "learning_rate": 9.989287645628469e-06, "loss": 1.4496, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.988950393550132e-06, "loss": 1.2079, "step": 161 }, { "epoch": 0.05, "learning_rate": 9.988607920525087e-06, "loss": 1.4018, "step": 162 }, { "epoch": 0.05, "learning_rate": 9.98826022691173e-06, "loss": 1.2119, "step": 163 }, { "epoch": 0.05, "learning_rate": 9.987907313073925e-06, "loss": 1.4256, "step": 164 }, { "epoch": 0.05, "learning_rate": 9.987549179381e-06, "loss": 1.1194, "step": 165 }, { "epoch": 0.05, "learning_rate": 9.987185826207736e-06, "loss": 1.4015, "step": 166 }, { "epoch": 0.05, "learning_rate": 9.986817253934391e-06, "loss": 1.2421, "step": 167 }, { "epoch": 0.05, "learning_rate": 9.98644346294667e-06, "loss": 1.4741, "step": 168 }, { "epoch": 0.05, "learning_rate": 9.986064453635748e-06, "loss": 1.2293, "step": 169 }, { "epoch": 0.05, "learning_rate": 9.985680226398261e-06, "loss": 1.3491, "step": 170 }, { "epoch": 0.05, "learning_rate": 9.985290781636302e-06, "loss": 1.721, "step": 171 }, { "epoch": 0.05, "learning_rate": 9.984896119757425e-06, "loss": 1.3223, "step": 172 }, { "epoch": 0.05, "learning_rate": 9.984496241174646e-06, "loss": 1.2363, "step": 173 }, { "epoch": 0.05, "learning_rate": 9.984091146306436e-06, "loss": 1.3128, "step": 174 }, { "epoch": 0.06, "learning_rate": 9.983680835576731e-06, "loss": 1.2395, "step": 175 }, { "epoch": 0.06, "learning_rate": 9.98326530941492e-06, "loss": 1.553, "step": 176 }, { "epoch": 0.06, "learning_rate": 9.982844568255853e-06, "loss": 1.6084, "step": 177 }, { "epoch": 0.06, "learning_rate": 9.982418612539837e-06, "loss": 1.8462, "step": 178 }, { "epoch": 0.06, "learning_rate": 9.981987442712634e-06, "loss": 1.3552, "step": 179 }, { "epoch": 0.06, "learning_rate": 9.981551059225464e-06, "loss": 1.4207, "step": 180 }, { "epoch": 0.06, "learning_rate": 9.981109462535004e-06, "loss": 1.2276, "step": 181 }, { "epoch": 0.06, "learning_rate": 9.980662653103385e-06, "loss": 1.3328, "step": 182 }, { "epoch": 0.06, "learning_rate": 9.980210631398197e-06, "loss": 1.5115, "step": 183 }, { "epoch": 0.06, "learning_rate": 9.979753397892477e-06, "loss": 1.3171, "step": 184 }, { "epoch": 0.06, "learning_rate": 9.979290953064723e-06, "loss": 1.4083, "step": 185 }, { "epoch": 0.06, "learning_rate": 9.978823297398885e-06, "loss": 1.0534, "step": 186 }, { "epoch": 0.06, "learning_rate": 9.978350431384367e-06, "loss": 1.1786, "step": 187 }, { "epoch": 0.06, "learning_rate": 9.977872355516021e-06, "loss": 1.697, "step": 188 }, { "epoch": 0.06, "learning_rate": 9.977389070294155e-06, "loss": 1.5591, "step": 189 }, { "epoch": 0.06, "learning_rate": 9.97690057622453e-06, "loss": 1.4832, "step": 190 }, { "epoch": 0.06, "learning_rate": 9.976406873818355e-06, "loss": 1.5999, "step": 191 }, { "epoch": 0.06, "learning_rate": 9.975907963592288e-06, "loss": 1.2896, "step": 192 }, { "epoch": 0.06, "learning_rate": 9.975403846068444e-06, "loss": 1.9716, "step": 193 }, { "epoch": 0.06, "learning_rate": 9.97489452177438e-06, "loss": 1.198, "step": 194 }, { "epoch": 0.06, "learning_rate": 9.974379991243107e-06, "loss": 1.4041, "step": 195 }, { "epoch": 0.06, "learning_rate": 9.973860255013079e-06, "loss": 1.3128, "step": 196 }, { "epoch": 0.06, "learning_rate": 9.973335313628203e-06, "loss": 1.2192, "step": 197 }, { "epoch": 0.06, "learning_rate": 9.972805167637833e-06, "loss": 1.4471, "step": 198 }, { "epoch": 0.06, "learning_rate": 9.972269817596766e-06, "loss": 1.5188, "step": 199 }, { "epoch": 0.06, "learning_rate": 9.971729264065246e-06, "loss": 1.2533, "step": 200 }, { "epoch": 0.06, "learning_rate": 9.971183507608967e-06, "loss": 1.2823, "step": 201 }, { "epoch": 0.06, "learning_rate": 9.970632548799058e-06, "loss": 1.2434, "step": 202 }, { "epoch": 0.06, "learning_rate": 9.970076388212104e-06, "loss": 1.5793, "step": 203 }, { "epoch": 0.06, "learning_rate": 9.969515026430126e-06, "loss": 1.7272, "step": 204 }, { "epoch": 0.06, "learning_rate": 9.968948464040592e-06, "loss": 1.5328, "step": 205 }, { "epoch": 0.07, "learning_rate": 9.968376701636408e-06, "loss": 1.7449, "step": 206 }, { "epoch": 0.07, "learning_rate": 9.967799739815925e-06, "loss": 1.6415, "step": 207 }, { "epoch": 0.07, "learning_rate": 9.967217579182937e-06, "loss": 0.894, "step": 208 }, { "epoch": 0.07, "learning_rate": 9.966630220346674e-06, "loss": 1.4592, "step": 209 }, { "epoch": 0.07, "learning_rate": 9.966037663921809e-06, "loss": 1.5997, "step": 210 }, { "epoch": 0.07, "learning_rate": 9.965439910528454e-06, "loss": 2.313, "step": 211 }, { "epoch": 0.07, "learning_rate": 9.964836960792159e-06, "loss": 1.413, "step": 212 }, { "epoch": 0.07, "learning_rate": 9.96422881534391e-06, "loss": 1.6127, "step": 213 }, { "epoch": 0.07, "learning_rate": 9.963615474820136e-06, "loss": 1.2929, "step": 214 }, { "epoch": 0.07, "learning_rate": 9.962996939862696e-06, "loss": 1.271, "step": 215 }, { "epoch": 0.07, "learning_rate": 9.96237321111889e-06, "loss": 1.154, "step": 216 }, { "epoch": 0.07, "learning_rate": 9.961744289241452e-06, "loss": 1.4936, "step": 217 }, { "epoch": 0.07, "learning_rate": 9.961110174888547e-06, "loss": 1.7374, "step": 218 }, { "epoch": 0.07, "learning_rate": 9.960470868723779e-06, "loss": 1.6729, "step": 219 }, { "epoch": 0.07, "learning_rate": 9.959826371416185e-06, "loss": 1.0433, "step": 220 }, { "epoch": 0.07, "learning_rate": 9.959176683640228e-06, "loss": 1.4341, "step": 221 }, { "epoch": 0.07, "learning_rate": 9.958521806075811e-06, "loss": 1.2874, "step": 222 }, { "epoch": 0.07, "learning_rate": 9.957861739408265e-06, "loss": 1.329, "step": 223 }, { "epoch": 0.07, "learning_rate": 9.957196484328351e-06, "loss": 1.4164, "step": 224 }, { "epoch": 0.07, "learning_rate": 9.956526041532258e-06, "loss": 1.2297, "step": 225 }, { "epoch": 0.07, "learning_rate": 9.955850411721606e-06, "loss": 1.4721, "step": 226 }, { "epoch": 0.07, "learning_rate": 9.955169595603444e-06, "loss": 1.2726, "step": 227 }, { "epoch": 0.07, "learning_rate": 9.954483593890245e-06, "loss": 1.4484, "step": 228 }, { "epoch": 0.07, "learning_rate": 9.953792407299915e-06, "loss": 1.8528, "step": 229 }, { "epoch": 0.07, "learning_rate": 9.953096036555782e-06, "loss": 1.0662, "step": 230 }, { "epoch": 0.07, "learning_rate": 9.952394482386597e-06, "loss": 1.6212, "step": 231 }, { "epoch": 0.07, "learning_rate": 9.951687745526538e-06, "loss": 1.6131, "step": 232 }, { "epoch": 0.07, "learning_rate": 9.950975826715205e-06, "loss": 1.1192, "step": 233 }, { "epoch": 0.07, "learning_rate": 9.950258726697628e-06, "loss": 1.4461, "step": 234 }, { "epoch": 0.07, "learning_rate": 9.94953644622425e-06, "loss": 1.3163, "step": 235 }, { "epoch": 0.07, "learning_rate": 9.948808986050938e-06, "loss": 1.8161, "step": 236 }, { "epoch": 0.07, "learning_rate": 9.948076346938983e-06, "loss": 1.2215, "step": 237 }, { "epoch": 0.08, "learning_rate": 9.94733852965509e-06, "loss": 1.4455, "step": 238 }, { "epoch": 0.08, "learning_rate": 9.94659553497139e-06, "loss": 1.5311, "step": 239 }, { "epoch": 0.08, "learning_rate": 9.945847363665428e-06, "loss": 1.095, "step": 240 }, { "epoch": 0.08, "learning_rate": 9.945094016520166e-06, "loss": 1.2005, "step": 241 }, { "epoch": 0.08, "learning_rate": 9.94433549432398e-06, "loss": 1.5973, "step": 242 }, { "epoch": 0.08, "learning_rate": 9.94357179787067e-06, "loss": 1.384, "step": 243 }, { "epoch": 0.08, "learning_rate": 9.942802927959444e-06, "loss": 1.4147, "step": 244 }, { "epoch": 0.08, "learning_rate": 9.942028885394926e-06, "loss": 1.9484, "step": 245 }, { "epoch": 0.08, "learning_rate": 9.941249670987152e-06, "loss": 1.4233, "step": 246 }, { "epoch": 0.08, "learning_rate": 9.940465285551573e-06, "loss": 1.5731, "step": 247 }, { "epoch": 0.08, "learning_rate": 9.939675729909049e-06, "loss": 1.6541, "step": 248 }, { "epoch": 0.08, "learning_rate": 9.938881004885852e-06, "loss": 1.5975, "step": 249 }, { "epoch": 0.08, "learning_rate": 9.938081111313662e-06, "loss": 1.1806, "step": 250 }, { "epoch": 0.08, "learning_rate": 9.937276050029572e-06, "loss": 1.5556, "step": 251 }, { "epoch": 0.08, "learning_rate": 9.93646582187608e-06, "loss": 1.4953, "step": 252 }, { "epoch": 0.08, "learning_rate": 9.93565042770109e-06, "loss": 2.0472, "step": 253 }, { "epoch": 0.08, "learning_rate": 9.934829868357911e-06, "loss": 1.1668, "step": 254 }, { "epoch": 0.08, "learning_rate": 9.934004144705265e-06, "loss": 1.3443, "step": 255 }, { "epoch": 0.08, "learning_rate": 9.933173257607271e-06, "loss": 1.2298, "step": 256 }, { "epoch": 0.08, "learning_rate": 9.932337207933454e-06, "loss": 1.4703, "step": 257 }, { "epoch": 0.08, "learning_rate": 9.931495996558743e-06, "loss": 1.5138, "step": 258 }, { "epoch": 0.08, "learning_rate": 9.930649624363466e-06, "loss": 1.2004, "step": 259 }, { "epoch": 0.08, "learning_rate": 9.929798092233354e-06, "loss": 1.6395, "step": 260 }, { "epoch": 0.08, "learning_rate": 9.928941401059538e-06, "loss": 1.0097, "step": 261 }, { "epoch": 0.08, "learning_rate": 9.928079551738542e-06, "loss": 1.1854, "step": 262 }, { "epoch": 0.08, "learning_rate": 9.9272125451723e-06, "loss": 1.3284, "step": 263 }, { "epoch": 0.08, "learning_rate": 9.926340382268134e-06, "loss": 1.2096, "step": 264 }, { "epoch": 0.08, "learning_rate": 9.92546306393876e-06, "loss": 1.3788, "step": 265 }, { "epoch": 0.08, "learning_rate": 9.9245805911023e-06, "loss": 1.2902, "step": 266 }, { "epoch": 0.08, "learning_rate": 9.923692964682257e-06, "loss": 1.3819, "step": 267 }, { "epoch": 0.08, "learning_rate": 9.922800185607539e-06, "loss": 1.3693, "step": 268 }, { "epoch": 0.08, "learning_rate": 9.92190225481244e-06, "loss": 1.0808, "step": 269 }, { "epoch": 0.09, "learning_rate": 9.920999173236645e-06, "loss": 1.1558, "step": 270 }, { "epoch": 0.09, "learning_rate": 9.92009094182523e-06, "loss": 1.5496, "step": 271 }, { "epoch": 0.09, "learning_rate": 9.919177561528666e-06, "loss": 0.9341, "step": 272 }, { "epoch": 0.09, "learning_rate": 9.9182590333028e-06, "loss": 1.0191, "step": 273 }, { "epoch": 0.09, "learning_rate": 9.917335358108879e-06, "loss": 0.8892, "step": 274 }, { "epoch": 0.09, "learning_rate": 9.91640653691353e-06, "loss": 1.4763, "step": 275 }, { "epoch": 0.09, "learning_rate": 9.915472570688765e-06, "loss": 1.5355, "step": 276 }, { "epoch": 0.09, "learning_rate": 9.914533460411982e-06, "loss": 1.2176, "step": 277 }, { "epoch": 0.09, "learning_rate": 9.913589207065962e-06, "loss": 1.3623, "step": 278 }, { "epoch": 0.09, "learning_rate": 9.912639811638866e-06, "loss": 1.053, "step": 279 }, { "epoch": 0.09, "learning_rate": 9.911685275124242e-06, "loss": 1.4685, "step": 280 }, { "epoch": 0.09, "learning_rate": 9.910725598521014e-06, "loss": 1.2594, "step": 281 }, { "epoch": 0.09, "learning_rate": 9.909760782833484e-06, "loss": 1.6666, "step": 282 }, { "epoch": 0.09, "learning_rate": 9.908790829071332e-06, "loss": 1.1781, "step": 283 }, { "epoch": 0.09, "learning_rate": 9.90781573824962e-06, "loss": 1.4465, "step": 284 }, { "epoch": 0.09, "learning_rate": 9.90683551138878e-06, "loss": 1.5444, "step": 285 }, { "epoch": 0.09, "learning_rate": 9.905850149514621e-06, "loss": 1.321, "step": 286 }, { "epoch": 0.09, "learning_rate": 9.904859653658329e-06, "loss": 1.4823, "step": 287 }, { "epoch": 0.09, "learning_rate": 9.903864024856458e-06, "loss": 1.513, "step": 288 }, { "epoch": 0.09, "learning_rate": 9.902863264150936e-06, "loss": 1.5943, "step": 289 }, { "epoch": 0.09, "learning_rate": 9.901857372589061e-06, "loss": 1.2345, "step": 290 }, { "epoch": 0.09, "learning_rate": 9.900846351223502e-06, "loss": 1.4216, "step": 291 }, { "epoch": 0.09, "learning_rate": 9.899830201112291e-06, "loss": 1.4385, "step": 292 }, { "epoch": 0.09, "learning_rate": 9.898808923318834e-06, "loss": 1.4965, "step": 293 }, { "epoch": 0.09, "learning_rate": 9.897782518911902e-06, "loss": 1.3126, "step": 294 }, { "epoch": 0.09, "learning_rate": 9.896750988965626e-06, "loss": 1.2311, "step": 295 }, { "epoch": 0.09, "learning_rate": 9.895714334559506e-06, "loss": 1.1718, "step": 296 }, { "epoch": 0.09, "learning_rate": 9.894672556778402e-06, "loss": 1.4838, "step": 297 }, { "epoch": 0.09, "learning_rate": 9.893625656712536e-06, "loss": 1.4921, "step": 298 }, { "epoch": 0.09, "learning_rate": 9.892573635457492e-06, "loss": 1.0637, "step": 299 }, { "epoch": 0.09, "learning_rate": 9.891516494114214e-06, "loss": 1.8705, "step": 300 }, { "epoch": 0.1, "learning_rate": 9.890454233789e-06, "loss": 1.2186, "step": 301 }, { "epoch": 0.1, "learning_rate": 9.889386855593508e-06, "loss": 1.3971, "step": 302 }, { "epoch": 0.1, "learning_rate": 9.888314360644753e-06, "loss": 1.5063, "step": 303 }, { "epoch": 0.1, "learning_rate": 9.887236750065101e-06, "loss": 1.4085, "step": 304 }, { "epoch": 0.1, "learning_rate": 9.886154024982276e-06, "loss": 1.2738, "step": 305 }, { "epoch": 0.1, "learning_rate": 9.88506618652935e-06, "loss": 1.469, "step": 306 }, { "epoch": 0.1, "learning_rate": 9.883973235844749e-06, "loss": 1.6119, "step": 307 }, { "epoch": 0.1, "learning_rate": 9.882875174072247e-06, "loss": 1.6577, "step": 308 }, { "epoch": 0.1, "learning_rate": 9.88177200236097e-06, "loss": 1.3504, "step": 309 }, { "epoch": 0.1, "learning_rate": 9.880663721865388e-06, "loss": 1.2823, "step": 310 }, { "epoch": 0.1, "learning_rate": 9.879550333745319e-06, "loss": 1.4268, "step": 311 }, { "epoch": 0.1, "learning_rate": 9.878431839165925e-06, "loss": 1.4209, "step": 312 }, { "epoch": 0.1, "learning_rate": 9.877308239297715e-06, "loss": 1.5912, "step": 313 }, { "epoch": 0.1, "learning_rate": 9.876179535316538e-06, "loss": 1.3655, "step": 314 }, { "epoch": 0.1, "learning_rate": 9.875045728403583e-06, "loss": 1.7096, "step": 315 }, { "epoch": 0.1, "learning_rate": 9.873906819745385e-06, "loss": 1.1574, "step": 316 }, { "epoch": 0.1, "learning_rate": 9.872762810533809e-06, "loss": 1.1941, "step": 317 }, { "epoch": 0.1, "learning_rate": 9.871613701966067e-06, "loss": 1.1846, "step": 318 }, { "epoch": 0.1, "learning_rate": 9.870459495244703e-06, "loss": 1.3102, "step": 319 }, { "epoch": 0.1, "learning_rate": 9.869300191577596e-06, "loss": 1.2098, "step": 320 }, { "epoch": 0.1, "learning_rate": 9.868135792177957e-06, "loss": 1.2865, "step": 321 }, { "epoch": 0.1, "learning_rate": 9.866966298264337e-06, "loss": 1.5749, "step": 322 }, { "epoch": 0.1, "learning_rate": 9.865791711060613e-06, "loss": 1.0152, "step": 323 }, { "epoch": 0.1, "learning_rate": 9.86461203179599e-06, "loss": 1.3686, "step": 324 }, { "epoch": 0.1, "learning_rate": 9.863427261705005e-06, "loss": 1.4783, "step": 325 }, { "epoch": 0.1, "learning_rate": 9.862237402027525e-06, "loss": 1.5631, "step": 326 }, { "epoch": 0.1, "learning_rate": 9.86104245400874e-06, "loss": 1.3502, "step": 327 }, { "epoch": 0.1, "learning_rate": 9.859842418899163e-06, "loss": 1.6332, "step": 328 }, { "epoch": 0.1, "learning_rate": 9.858637297954636e-06, "loss": 1.2913, "step": 329 }, { "epoch": 0.1, "learning_rate": 9.85742709243632e-06, "loss": 1.03, "step": 330 }, { "epoch": 0.1, "learning_rate": 9.856211803610695e-06, "loss": 1.0409, "step": 331 }, { "epoch": 0.1, "learning_rate": 9.854991432749566e-06, "loss": 0.8449, "step": 332 }, { "epoch": 0.11, "learning_rate": 9.853765981130055e-06, "loss": 1.5968, "step": 333 }, { "epoch": 0.11, "learning_rate": 9.852535450034593e-06, "loss": 1.3538, "step": 334 }, { "epoch": 0.11, "learning_rate": 9.851299840750942e-06, "loss": 1.7195, "step": 335 }, { "epoch": 0.11, "learning_rate": 9.850059154572163e-06, "loss": 1.6458, "step": 336 }, { "epoch": 0.11, "learning_rate": 9.848813392796639e-06, "loss": 1.498, "step": 337 }, { "epoch": 0.11, "learning_rate": 9.847562556728061e-06, "loss": 1.407, "step": 338 }, { "epoch": 0.11, "learning_rate": 9.846306647675434e-06, "loss": 1.1718, "step": 339 }, { "epoch": 0.11, "learning_rate": 9.845045666953066e-06, "loss": 1.3469, "step": 340 }, { "epoch": 0.11, "learning_rate": 9.84377961588058e-06, "loss": 1.3747, "step": 341 }, { "epoch": 0.11, "learning_rate": 9.842508495782898e-06, "loss": 1.4172, "step": 342 }, { "epoch": 0.11, "learning_rate": 9.841232307990252e-06, "loss": 1.2162, "step": 343 }, { "epoch": 0.11, "learning_rate": 9.839951053838173e-06, "loss": 1.2288, "step": 344 }, { "epoch": 0.11, "learning_rate": 9.838664734667496e-06, "loss": 1.3375, "step": 345 }, { "epoch": 0.11, "learning_rate": 9.837373351824357e-06, "loss": 1.3939, "step": 346 }, { "epoch": 0.11, "learning_rate": 9.83607690666019e-06, "loss": 1.6265, "step": 347 }, { "epoch": 0.11, "learning_rate": 9.834775400531733e-06, "loss": 1.5192, "step": 348 }, { "epoch": 0.11, "learning_rate": 9.833468834801006e-06, "loss": 0.9759, "step": 349 }, { "epoch": 0.11, "learning_rate": 9.83215721083534e-06, "loss": 1.6061, "step": 350 }, { "epoch": 0.11, "learning_rate": 9.830840530007348e-06, "loss": 1.5276, "step": 351 }, { "epoch": 0.11, "learning_rate": 9.829518793694941e-06, "loss": 1.7397, "step": 352 }, { "epoch": 0.11, "learning_rate": 9.828192003281318e-06, "loss": 0.9104, "step": 353 }, { "epoch": 0.11, "learning_rate": 9.826860160154967e-06, "loss": 1.4673, "step": 354 }, { "epoch": 0.11, "learning_rate": 9.825523265709667e-06, "loss": 1.3211, "step": 355 }, { "epoch": 0.11, "learning_rate": 9.82418132134448e-06, "loss": 1.251, "step": 356 }, { "epoch": 0.11, "learning_rate": 9.82283432846375e-06, "loss": 1.4301, "step": 357 }, { "epoch": 0.11, "learning_rate": 9.821482288477113e-06, "loss": 1.2287, "step": 358 }, { "epoch": 0.11, "learning_rate": 9.820125202799479e-06, "loss": 1.2045, "step": 359 }, { "epoch": 0.11, "learning_rate": 9.818763072851041e-06, "loss": 1.6211, "step": 360 }, { "epoch": 0.11, "learning_rate": 9.817395900057269e-06, "loss": 1.3515, "step": 361 }, { "epoch": 0.11, "learning_rate": 9.816023685848917e-06, "loss": 1.9171, "step": 362 }, { "epoch": 0.11, "learning_rate": 9.814646431662005e-06, "loss": 1.3811, "step": 363 }, { "epoch": 0.11, "learning_rate": 9.813264138937835e-06, "loss": 1.1943, "step": 364 }, { "epoch": 0.12, "learning_rate": 9.811876809122977e-06, "loss": 1.2716, "step": 365 }, { "epoch": 0.12, "learning_rate": 9.810484443669276e-06, "loss": 1.5299, "step": 366 }, { "epoch": 0.12, "learning_rate": 9.809087044033846e-06, "loss": 1.3194, "step": 367 }, { "epoch": 0.12, "learning_rate": 9.807684611679068e-06, "loss": 1.4912, "step": 368 }, { "epoch": 0.12, "learning_rate": 9.80627714807259e-06, "loss": 1.2558, "step": 369 }, { "epoch": 0.12, "learning_rate": 9.804864654687325e-06, "loss": 1.038, "step": 370 }, { "epoch": 0.12, "learning_rate": 9.80344713300145e-06, "loss": 1.4475, "step": 371 }, { "epoch": 0.12, "learning_rate": 9.802024584498407e-06, "loss": 1.4185, "step": 372 }, { "epoch": 0.12, "learning_rate": 9.800597010666892e-06, "loss": 1.9354, "step": 373 }, { "epoch": 0.12, "learning_rate": 9.799164413000865e-06, "loss": 1.3171, "step": 374 }, { "epoch": 0.12, "learning_rate": 9.797726792999544e-06, "loss": 1.2637, "step": 375 }, { "epoch": 0.12, "learning_rate": 9.796284152167401e-06, "loss": 1.2377, "step": 376 }, { "epoch": 0.12, "learning_rate": 9.794836492014163e-06, "loss": 1.1815, "step": 377 }, { "epoch": 0.12, "learning_rate": 9.793383814054807e-06, "loss": 1.5768, "step": 378 }, { "epoch": 0.12, "learning_rate": 9.791926119809564e-06, "loss": 1.6812, "step": 379 }, { "epoch": 0.12, "learning_rate": 9.790463410803916e-06, "loss": 1.5797, "step": 380 }, { "epoch": 0.12, "learning_rate": 9.788995688568589e-06, "loss": 1.6017, "step": 381 }, { "epoch": 0.12, "learning_rate": 9.787522954639558e-06, "loss": 1.188, "step": 382 }, { "epoch": 0.12, "learning_rate": 9.786045210558041e-06, "loss": 1.6014, "step": 383 }, { "epoch": 0.12, "learning_rate": 9.784562457870504e-06, "loss": 1.185, "step": 384 }, { "epoch": 0.12, "learning_rate": 9.783074698128645e-06, "loss": 1.2544, "step": 385 }, { "epoch": 0.12, "learning_rate": 9.78158193288941e-06, "loss": 1.0323, "step": 386 }, { "epoch": 0.12, "learning_rate": 9.780084163714983e-06, "loss": 1.2487, "step": 387 }, { "epoch": 0.12, "learning_rate": 9.778581392172781e-06, "loss": 1.3204, "step": 388 }, { "epoch": 0.12, "learning_rate": 9.777073619835456e-06, "loss": 1.5898, "step": 389 }, { "epoch": 0.12, "learning_rate": 9.775560848280897e-06, "loss": 1.9197, "step": 390 }, { "epoch": 0.12, "learning_rate": 9.77404307909222e-06, "loss": 1.6193, "step": 391 }, { "epoch": 0.12, "learning_rate": 9.772520313857777e-06, "loss": 1.481, "step": 392 }, { "epoch": 0.12, "learning_rate": 9.770992554171141e-06, "loss": 1.0387, "step": 393 }, { "epoch": 0.12, "learning_rate": 9.769459801631119e-06, "loss": 1.6711, "step": 394 }, { "epoch": 0.12, "learning_rate": 9.767922057841739e-06, "loss": 1.029, "step": 395 }, { "epoch": 0.13, "learning_rate": 9.766379324412251e-06, "loss": 0.9272, "step": 396 }, { "epoch": 0.13, "learning_rate": 9.76483160295713e-06, "loss": 1.3655, "step": 397 }, { "epoch": 0.13, "learning_rate": 9.763278895096068e-06, "loss": 1.5329, "step": 398 }, { "epoch": 0.13, "learning_rate": 9.76172120245398e-06, "loss": 1.8201, "step": 399 }, { "epoch": 0.13, "learning_rate": 9.760158526660994e-06, "loss": 1.5653, "step": 400 }, { "epoch": 0.13, "learning_rate": 9.758590869352451e-06, "loss": 1.2675, "step": 401 }, { "epoch": 0.13, "learning_rate": 9.75701823216891e-06, "loss": 1.1503, "step": 402 }, { "epoch": 0.13, "learning_rate": 9.755440616756135e-06, "loss": 1.6752, "step": 403 }, { "epoch": 0.13, "learning_rate": 9.75385802476511e-06, "loss": 1.0798, "step": 404 }, { "epoch": 0.13, "learning_rate": 9.752270457852016e-06, "loss": 1.1644, "step": 405 }, { "epoch": 0.13, "learning_rate": 9.750677917678246e-06, "loss": 1.1514, "step": 406 }, { "epoch": 0.13, "learning_rate": 9.749080405910399e-06, "loss": 1.3072, "step": 407 }, { "epoch": 0.13, "learning_rate": 9.74747792422027e-06, "loss": 1.0297, "step": 408 }, { "epoch": 0.13, "learning_rate": 9.745870474284863e-06, "loss": 1.6977, "step": 409 }, { "epoch": 0.13, "learning_rate": 9.744258057786373e-06, "loss": 1.1711, "step": 410 }, { "epoch": 0.13, "learning_rate": 9.742640676412202e-06, "loss": 1.0465, "step": 411 }, { "epoch": 0.13, "learning_rate": 9.741018331854942e-06, "loss": 1.4472, "step": 412 }, { "epoch": 0.13, "learning_rate": 9.73939102581238e-06, "loss": 1.4795, "step": 413 }, { "epoch": 0.13, "learning_rate": 9.737758759987492e-06, "loss": 1.7931, "step": 414 }, { "epoch": 0.13, "learning_rate": 9.73612153608845e-06, "loss": 1.2965, "step": 415 }, { "epoch": 0.13, "learning_rate": 9.734479355828613e-06, "loss": 1.5149, "step": 416 }, { "epoch": 0.13, "learning_rate": 9.732832220926522e-06, "loss": 1.4118, "step": 417 }, { "epoch": 0.13, "learning_rate": 9.731180133105913e-06, "loss": 1.327, "step": 418 }, { "epoch": 0.13, "learning_rate": 9.729523094095694e-06, "loss": 1.5002, "step": 419 }, { "epoch": 0.13, "learning_rate": 9.727861105629962e-06, "loss": 1.402, "step": 420 }, { "epoch": 0.13, "learning_rate": 9.726194169447993e-06, "loss": 1.0462, "step": 421 }, { "epoch": 0.13, "learning_rate": 9.724522287294235e-06, "loss": 1.2308, "step": 422 }, { "epoch": 0.13, "learning_rate": 9.722845460918317e-06, "loss": 1.2217, "step": 423 }, { "epoch": 0.13, "learning_rate": 9.721163692075044e-06, "loss": 2.0577, "step": 424 }, { "epoch": 0.13, "learning_rate": 9.71947698252439e-06, "loss": 1.3103, "step": 425 }, { "epoch": 0.13, "learning_rate": 9.717785334031498e-06, "loss": 1.2974, "step": 426 }, { "epoch": 0.13, "learning_rate": 9.716088748366681e-06, "loss": 1.3743, "step": 427 }, { "epoch": 0.14, "learning_rate": 9.714387227305422e-06, "loss": 1.1029, "step": 428 }, { "epoch": 0.14, "learning_rate": 9.712680772628365e-06, "loss": 1.374, "step": 429 }, { "epoch": 0.14, "learning_rate": 9.710969386121317e-06, "loss": 1.4277, "step": 430 }, { "epoch": 0.14, "learning_rate": 9.70925306957525e-06, "loss": 1.1446, "step": 431 }, { "epoch": 0.14, "learning_rate": 9.707531824786292e-06, "loss": 1.4186, "step": 432 }, { "epoch": 0.14, "learning_rate": 9.705805653555726e-06, "loss": 1.7625, "step": 433 }, { "epoch": 0.14, "learning_rate": 9.70407455769e-06, "loss": 1.6411, "step": 434 }, { "epoch": 0.14, "learning_rate": 9.702338539000703e-06, "loss": 1.4779, "step": 435 }, { "epoch": 0.14, "learning_rate": 9.700597599304586e-06, "loss": 0.9176, "step": 436 }, { "epoch": 0.14, "learning_rate": 9.698851740423543e-06, "loss": 1.3166, "step": 437 }, { "epoch": 0.14, "learning_rate": 9.697100964184623e-06, "loss": 1.597, "step": 438 }, { "epoch": 0.14, "learning_rate": 9.69534527242001e-06, "loss": 1.3852, "step": 439 }, { "epoch": 0.14, "learning_rate": 9.693584666967049e-06, "loss": 1.1767, "step": 440 }, { "epoch": 0.14, "learning_rate": 9.691819149668209e-06, "loss": 1.4242, "step": 441 }, { "epoch": 0.14, "learning_rate": 9.69004872237111e-06, "loss": 1.3366, "step": 442 }, { "epoch": 0.14, "learning_rate": 9.688273386928509e-06, "loss": 1.1642, "step": 443 }, { "epoch": 0.14, "learning_rate": 9.686493145198295e-06, "loss": 1.29, "step": 444 }, { "epoch": 0.14, "learning_rate": 9.684707999043497e-06, "loss": 1.2501, "step": 445 }, { "epoch": 0.14, "learning_rate": 9.682917950332274e-06, "loss": 1.393, "step": 446 }, { "epoch": 0.14, "learning_rate": 9.681123000937915e-06, "loss": 1.3397, "step": 447 }, { "epoch": 0.14, "learning_rate": 9.679323152738838e-06, "loss": 1.4668, "step": 448 }, { "epoch": 0.14, "learning_rate": 9.67751840761859e-06, "loss": 1.5252, "step": 449 }, { "epoch": 0.14, "learning_rate": 9.675708767465836e-06, "loss": 1.3509, "step": 450 }, { "epoch": 0.14, "learning_rate": 9.673894234174372e-06, "loss": 1.0928, "step": 451 }, { "epoch": 0.14, "learning_rate": 9.672074809643108e-06, "loss": 1.4051, "step": 452 }, { "epoch": 0.14, "learning_rate": 9.67025049577608e-06, "loss": 1.1205, "step": 453 }, { "epoch": 0.14, "learning_rate": 9.66842129448243e-06, "loss": 1.4112, "step": 454 }, { "epoch": 0.14, "learning_rate": 9.666587207676425e-06, "loss": 1.1538, "step": 455 }, { "epoch": 0.14, "learning_rate": 9.664748237277441e-06, "loss": 1.1349, "step": 456 }, { "epoch": 0.14, "learning_rate": 9.662904385209962e-06, "loss": 1.4425, "step": 457 }, { "epoch": 0.14, "learning_rate": 9.661055653403582e-06, "loss": 1.5416, "step": 458 }, { "epoch": 0.14, "learning_rate": 9.659202043793005e-06, "loss": 0.9379, "step": 459 }, { "epoch": 0.15, "learning_rate": 9.657343558318038e-06, "loss": 1.4177, "step": 460 }, { "epoch": 0.15, "learning_rate": 9.655480198923587e-06, "loss": 1.5073, "step": 461 }, { "epoch": 0.15, "learning_rate": 9.653611967559663e-06, "loss": 1.6127, "step": 462 }, { "epoch": 0.15, "learning_rate": 9.651738866181372e-06, "loss": 1.6609, "step": 463 }, { "epoch": 0.15, "learning_rate": 9.649860896748924e-06, "loss": 1.0624, "step": 464 }, { "epoch": 0.15, "learning_rate": 9.64797806122761e-06, "loss": 1.3559, "step": 465 }, { "epoch": 0.15, "learning_rate": 9.646090361587828e-06, "loss": 1.4724, "step": 466 }, { "epoch": 0.15, "learning_rate": 9.644197799805053e-06, "loss": 1.4788, "step": 467 }, { "epoch": 0.15, "learning_rate": 9.64230037785986e-06, "loss": 1.1775, "step": 468 }, { "epoch": 0.15, "learning_rate": 9.640398097737905e-06, "loss": 1.2293, "step": 469 }, { "epoch": 0.15, "learning_rate": 9.638490961429924e-06, "loss": 1.4077, "step": 470 }, { "epoch": 0.15, "learning_rate": 9.636578970931743e-06, "loss": 1.2626, "step": 471 }, { "epoch": 0.15, "learning_rate": 9.63466212824426e-06, "loss": 1.3983, "step": 472 }, { "epoch": 0.15, "learning_rate": 9.632740435373457e-06, "loss": 1.7914, "step": 473 }, { "epoch": 0.15, "learning_rate": 9.630813894330391e-06, "loss": 1.3749, "step": 474 }, { "epoch": 0.15, "learning_rate": 9.628882507131188e-06, "loss": 1.3001, "step": 475 }, { "epoch": 0.15, "learning_rate": 9.626946275797052e-06, "loss": 1.3154, "step": 476 }, { "epoch": 0.15, "learning_rate": 9.625005202354249e-06, "loss": 1.3957, "step": 477 }, { "epoch": 0.15, "learning_rate": 9.62305928883412e-06, "loss": 1.2626, "step": 478 }, { "epoch": 0.15, "learning_rate": 9.621108537273065e-06, "loss": 1.3528, "step": 479 }, { "epoch": 0.15, "learning_rate": 9.619152949712551e-06, "loss": 1.0725, "step": 480 }, { "epoch": 0.15, "learning_rate": 9.617192528199104e-06, "loss": 1.1208, "step": 481 }, { "epoch": 0.15, "learning_rate": 9.615227274784306e-06, "loss": 1.1647, "step": 482 }, { "epoch": 0.15, "learning_rate": 9.613257191524806e-06, "loss": 1.9667, "step": 483 }, { "epoch": 0.15, "learning_rate": 9.611282280482292e-06, "loss": 1.3022, "step": 484 }, { "epoch": 0.15, "learning_rate": 9.609302543723517e-06, "loss": 1.4315, "step": 485 }, { "epoch": 0.15, "learning_rate": 9.60731798332028e-06, "loss": 1.3224, "step": 486 }, { "epoch": 0.15, "learning_rate": 9.605328601349424e-06, "loss": 1.256, "step": 487 }, { "epoch": 0.15, "learning_rate": 9.603334399892845e-06, "loss": 1.6579, "step": 488 }, { "epoch": 0.15, "learning_rate": 9.601335381037475e-06, "loss": 0.9037, "step": 489 }, { "epoch": 0.15, "learning_rate": 9.599331546875295e-06, "loss": 1.253, "step": 490 }, { "epoch": 0.16, "learning_rate": 9.59732289950332e-06, "loss": 1.075, "step": 491 }, { "epoch": 0.16, "learning_rate": 9.595309441023604e-06, "loss": 1.31, "step": 492 }, { "epoch": 0.16, "learning_rate": 9.593291173543233e-06, "loss": 1.595, "step": 493 }, { "epoch": 0.16, "learning_rate": 9.59126809917433e-06, "loss": 1.1034, "step": 494 }, { "epoch": 0.16, "learning_rate": 9.58924022003405e-06, "loss": 1.7467, "step": 495 }, { "epoch": 0.16, "learning_rate": 9.587207538244567e-06, "loss": 1.139, "step": 496 }, { "epoch": 0.16, "learning_rate": 9.585170055933088e-06, "loss": 1.0684, "step": 497 }, { "epoch": 0.16, "learning_rate": 9.583127775231842e-06, "loss": 1.4177, "step": 498 }, { "epoch": 0.16, "learning_rate": 9.581080698278082e-06, "loss": 1.4122, "step": 499 }, { "epoch": 0.16, "learning_rate": 9.579028827214078e-06, "loss": 1.1153, "step": 500 }, { "epoch": 0.16, "learning_rate": 9.576972164187115e-06, "loss": 1.4279, "step": 501 }, { "epoch": 0.16, "learning_rate": 9.574910711349497e-06, "loss": 1.164, "step": 502 }, { "epoch": 0.16, "learning_rate": 9.572844470858537e-06, "loss": 1.5008, "step": 503 }, { "epoch": 0.16, "learning_rate": 9.570773444876562e-06, "loss": 1.4278, "step": 504 }, { "epoch": 0.16, "learning_rate": 9.568697635570903e-06, "loss": 1.2893, "step": 505 }, { "epoch": 0.16, "learning_rate": 9.566617045113899e-06, "loss": 1.2966, "step": 506 }, { "epoch": 0.16, "learning_rate": 9.564531675682893e-06, "loss": 1.0979, "step": 507 }, { "epoch": 0.16, "learning_rate": 9.562441529460226e-06, "loss": 1.2221, "step": 508 }, { "epoch": 0.16, "learning_rate": 9.560346608633244e-06, "loss": 0.9885, "step": 509 }, { "epoch": 0.16, "learning_rate": 9.558246915394285e-06, "loss": 1.1408, "step": 510 }, { "epoch": 0.16, "learning_rate": 9.55614245194068e-06, "loss": 1.3938, "step": 511 }, { "epoch": 0.16, "learning_rate": 9.554033220474754e-06, "loss": 1.637, "step": 512 }, { "epoch": 0.16, "learning_rate": 9.551919223203825e-06, "loss": 1.7259, "step": 513 }, { "epoch": 0.16, "learning_rate": 9.549800462340193e-06, "loss": 1.7796, "step": 514 }, { "epoch": 0.16, "learning_rate": 9.547676940101147e-06, "loss": 1.4244, "step": 515 }, { "epoch": 0.16, "learning_rate": 9.545548658708958e-06, "loss": 1.24, "step": 516 }, { "epoch": 0.16, "learning_rate": 9.543415620390875e-06, "loss": 0.9891, "step": 517 }, { "epoch": 0.16, "learning_rate": 9.541277827379127e-06, "loss": 1.3187, "step": 518 }, { "epoch": 0.16, "learning_rate": 9.539135281910919e-06, "loss": 1.4677, "step": 519 }, { "epoch": 0.16, "learning_rate": 9.53698798622843e-06, "loss": 1.1842, "step": 520 }, { "epoch": 0.16, "learning_rate": 9.534835942578805e-06, "loss": 1.6846, "step": 521 }, { "epoch": 0.16, "learning_rate": 9.532679153214171e-06, "loss": 1.2163, "step": 522 }, { "epoch": 0.17, "learning_rate": 9.530517620391604e-06, "loss": 1.4298, "step": 523 }, { "epoch": 0.17, "learning_rate": 9.528351346373157e-06, "loss": 1.0741, "step": 524 }, { "epoch": 0.17, "learning_rate": 9.526180333425838e-06, "loss": 1.1803, "step": 525 }, { "epoch": 0.17, "learning_rate": 9.52400458382162e-06, "loss": 1.3704, "step": 526 }, { "epoch": 0.17, "learning_rate": 9.521824099837422e-06, "loss": 1.2647, "step": 527 }, { "epoch": 0.17, "learning_rate": 9.519638883755134e-06, "loss": 1.3773, "step": 528 }, { "epoch": 0.17, "learning_rate": 9.517448937861582e-06, "loss": 1.0656, "step": 529 }, { "epoch": 0.17, "learning_rate": 9.515254264448553e-06, "loss": 1.2379, "step": 530 }, { "epoch": 0.17, "learning_rate": 9.513054865812774e-06, "loss": 1.1354, "step": 531 }, { "epoch": 0.17, "learning_rate": 9.510850744255922e-06, "loss": 1.1479, "step": 532 }, { "epoch": 0.17, "learning_rate": 9.508641902084615e-06, "loss": 1.7936, "step": 533 }, { "epoch": 0.17, "learning_rate": 9.50642834161041e-06, "loss": 1.0603, "step": 534 }, { "epoch": 0.17, "learning_rate": 9.504210065149804e-06, "loss": 1.452, "step": 535 }, { "epoch": 0.17, "learning_rate": 9.501987075024223e-06, "loss": 1.4475, "step": 536 }, { "epoch": 0.17, "learning_rate": 9.499759373560039e-06, "loss": 1.4137, "step": 537 }, { "epoch": 0.17, "learning_rate": 9.49752696308854e-06, "loss": 1.19, "step": 538 }, { "epoch": 0.17, "learning_rate": 9.495289845945947e-06, "loss": 1.3164, "step": 539 }, { "epoch": 0.17, "learning_rate": 9.493048024473413e-06, "loss": 1.5386, "step": 540 }, { "epoch": 0.17, "learning_rate": 9.490801501017003e-06, "loss": 1.2867, "step": 541 }, { "epoch": 0.17, "learning_rate": 9.488550277927713e-06, "loss": 1.571, "step": 542 }, { "epoch": 0.17, "learning_rate": 9.48629435756145e-06, "loss": 0.9207, "step": 543 }, { "epoch": 0.17, "learning_rate": 9.484033742279039e-06, "loss": 1.6259, "step": 544 }, { "epoch": 0.17, "learning_rate": 9.481768434446223e-06, "loss": 1.7835, "step": 545 }, { "epoch": 0.17, "learning_rate": 9.479498436433646e-06, "loss": 1.1856, "step": 546 }, { "epoch": 0.17, "learning_rate": 9.477223750616865e-06, "loss": 1.125, "step": 547 }, { "epoch": 0.17, "learning_rate": 9.474944379376347e-06, "loss": 1.7961, "step": 548 }, { "epoch": 0.17, "learning_rate": 9.472660325097458e-06, "loss": 1.1005, "step": 549 }, { "epoch": 0.17, "learning_rate": 9.470371590170462e-06, "loss": 1.4358, "step": 550 }, { "epoch": 0.17, "learning_rate": 9.46807817699053e-06, "loss": 1.1656, "step": 551 }, { "epoch": 0.17, "learning_rate": 9.46578008795772e-06, "loss": 1.4007, "step": 552 }, { "epoch": 0.17, "learning_rate": 9.463477325476988e-06, "loss": 0.9368, "step": 553 }, { "epoch": 0.17, "learning_rate": 9.46116989195818e-06, "loss": 0.9594, "step": 554 }, { "epoch": 0.18, "learning_rate": 9.458857789816027e-06, "loss": 1.4429, "step": 555 }, { "epoch": 0.18, "learning_rate": 9.456541021470151e-06, "loss": 1.1359, "step": 556 }, { "epoch": 0.18, "learning_rate": 9.454219589345056e-06, "loss": 1.179, "step": 557 }, { "epoch": 0.18, "learning_rate": 9.451893495870124e-06, "loss": 1.3066, "step": 558 }, { "epoch": 0.18, "learning_rate": 9.449562743479615e-06, "loss": 1.151, "step": 559 }, { "epoch": 0.18, "learning_rate": 9.447227334612667e-06, "loss": 1.405, "step": 560 }, { "epoch": 0.18, "learning_rate": 9.444887271713292e-06, "loss": 1.4528, "step": 561 }, { "epoch": 0.18, "learning_rate": 9.44254255723037e-06, "loss": 1.9917, "step": 562 }, { "epoch": 0.18, "learning_rate": 9.440193193617648e-06, "loss": 1.6714, "step": 563 }, { "epoch": 0.18, "learning_rate": 9.437839183333742e-06, "loss": 1.5063, "step": 564 }, { "epoch": 0.18, "learning_rate": 9.435480528842125e-06, "loss": 1.3741, "step": 565 }, { "epoch": 0.18, "learning_rate": 9.433117232611135e-06, "loss": 1.5061, "step": 566 }, { "epoch": 0.18, "learning_rate": 9.43074929711397e-06, "loss": 1.441, "step": 567 }, { "epoch": 0.18, "learning_rate": 9.428376724828674e-06, "loss": 1.2903, "step": 568 }, { "epoch": 0.18, "learning_rate": 9.425999518238152e-06, "loss": 1.2948, "step": 569 }, { "epoch": 0.18, "learning_rate": 9.423617679830155e-06, "loss": 1.3361, "step": 570 }, { "epoch": 0.18, "learning_rate": 9.42123121209728e-06, "loss": 1.0408, "step": 571 }, { "epoch": 0.18, "learning_rate": 9.418840117536973e-06, "loss": 1.3857, "step": 572 }, { "epoch": 0.18, "learning_rate": 9.416444398651519e-06, "loss": 1.4453, "step": 573 }, { "epoch": 0.18, "learning_rate": 9.41404405794804e-06, "loss": 1.5379, "step": 574 }, { "epoch": 0.18, "learning_rate": 9.4116390979385e-06, "loss": 1.5005, "step": 575 }, { "epoch": 0.18, "learning_rate": 9.409229521139691e-06, "loss": 1.2209, "step": 576 }, { "epoch": 0.18, "learning_rate": 9.406815330073244e-06, "loss": 1.3107, "step": 577 }, { "epoch": 0.18, "learning_rate": 9.404396527265616e-06, "loss": 1.3747, "step": 578 }, { "epoch": 0.18, "learning_rate": 9.401973115248082e-06, "loss": 1.1613, "step": 579 }, { "epoch": 0.18, "learning_rate": 9.399545096556755e-06, "loss": 1.1665, "step": 580 }, { "epoch": 0.18, "learning_rate": 9.397112473732559e-06, "loss": 1.7011, "step": 581 }, { "epoch": 0.18, "learning_rate": 9.394675249321238e-06, "loss": 1.5762, "step": 582 }, { "epoch": 0.18, "learning_rate": 9.392233425873351e-06, "loss": 1.0861, "step": 583 }, { "epoch": 0.18, "learning_rate": 9.389787005944275e-06, "loss": 1.2904, "step": 584 }, { "epoch": 0.18, "learning_rate": 9.387335992094188e-06, "loss": 1.7364, "step": 585 }, { "epoch": 0.19, "learning_rate": 9.384880386888086e-06, "loss": 1.9022, "step": 586 }, { "epoch": 0.19, "learning_rate": 9.382420192895765e-06, "loss": 1.4516, "step": 587 }, { "epoch": 0.19, "learning_rate": 9.379955412691818e-06, "loss": 1.4485, "step": 588 }, { "epoch": 0.19, "learning_rate": 9.377486048855647e-06, "loss": 1.6947, "step": 589 }, { "epoch": 0.19, "learning_rate": 9.375012103971443e-06, "loss": 1.4093, "step": 590 }, { "epoch": 0.19, "learning_rate": 9.372533580628198e-06, "loss": 1.3568, "step": 591 }, { "epoch": 0.19, "learning_rate": 9.37005048141969e-06, "loss": 1.1786, "step": 592 }, { "epoch": 0.19, "learning_rate": 9.367562808944485e-06, "loss": 1.3929, "step": 593 }, { "epoch": 0.19, "learning_rate": 9.365070565805941e-06, "loss": 1.5558, "step": 594 }, { "epoch": 0.19, "learning_rate": 9.362573754612195e-06, "loss": 1.1875, "step": 595 }, { "epoch": 0.19, "learning_rate": 9.360072377976161e-06, "loss": 1.113, "step": 596 }, { "epoch": 0.19, "learning_rate": 9.35756643851554e-06, "loss": 1.8038, "step": 597 }, { "epoch": 0.19, "learning_rate": 9.3550559388528e-06, "loss": 1.4524, "step": 598 }, { "epoch": 0.19, "learning_rate": 9.352540881615185e-06, "loss": 1.0176, "step": 599 }, { "epoch": 0.19, "learning_rate": 9.350021269434705e-06, "loss": 1.6289, "step": 600 }, { "epoch": 0.19, "learning_rate": 9.347497104948143e-06, "loss": 0.9572, "step": 601 }, { "epoch": 0.19, "learning_rate": 9.344968390797038e-06, "loss": 1.4596, "step": 602 }, { "epoch": 0.19, "learning_rate": 9.342435129627698e-06, "loss": 1.344, "step": 603 }, { "epoch": 0.19, "learning_rate": 9.339897324091186e-06, "loss": 1.3237, "step": 604 }, { "epoch": 0.19, "learning_rate": 9.337354976843319e-06, "loss": 1.1307, "step": 605 }, { "epoch": 0.19, "learning_rate": 9.334808090544669e-06, "loss": 1.365, "step": 606 }, { "epoch": 0.19, "learning_rate": 9.332256667860556e-06, "loss": 1.4672, "step": 607 }, { "epoch": 0.19, "learning_rate": 9.329700711461051e-06, "loss": 1.2486, "step": 608 }, { "epoch": 0.19, "learning_rate": 9.32714022402097e-06, "loss": 1.2233, "step": 609 }, { "epoch": 0.19, "learning_rate": 9.324575208219864e-06, "loss": 1.319, "step": 610 }, { "epoch": 0.19, "learning_rate": 9.322005666742029e-06, "loss": 1.6989, "step": 611 }, { "epoch": 0.19, "learning_rate": 9.319431602276495e-06, "loss": 1.2686, "step": 612 }, { "epoch": 0.19, "learning_rate": 9.316853017517025e-06, "loss": 1.0994, "step": 613 }, { "epoch": 0.19, "learning_rate": 9.314269915162115e-06, "loss": 1.3523, "step": 614 }, { "epoch": 0.19, "learning_rate": 9.311682297914986e-06, "loss": 1.5664, "step": 615 }, { "epoch": 0.19, "learning_rate": 9.309090168483584e-06, "loss": 1.2536, "step": 616 }, { "epoch": 0.19, "learning_rate": 9.306493529580578e-06, "loss": 1.4, "step": 617 }, { "epoch": 0.2, "learning_rate": 9.303892383923359e-06, "loss": 1.3589, "step": 618 }, { "epoch": 0.2, "learning_rate": 9.301286734234026e-06, "loss": 1.412, "step": 619 }, { "epoch": 0.2, "learning_rate": 9.298676583239398e-06, "loss": 1.3712, "step": 620 }, { "epoch": 0.2, "learning_rate": 9.296061933671005e-06, "loss": 1.471, "step": 621 }, { "epoch": 0.2, "learning_rate": 9.293442788265083e-06, "loss": 1.738, "step": 622 }, { "epoch": 0.2, "learning_rate": 9.290819149762574e-06, "loss": 1.2583, "step": 623 }, { "epoch": 0.2, "learning_rate": 9.288191020909119e-06, "loss": 1.2871, "step": 624 }, { "epoch": 0.2, "learning_rate": 9.285558404455059e-06, "loss": 1.0463, "step": 625 }, { "epoch": 0.2, "learning_rate": 9.282921303155434e-06, "loss": 1.3396, "step": 626 }, { "epoch": 0.2, "learning_rate": 9.280279719769978e-06, "loss": 1.5236, "step": 627 }, { "epoch": 0.2, "learning_rate": 9.27763365706311e-06, "loss": 1.6921, "step": 628 }, { "epoch": 0.2, "learning_rate": 9.274983117803942e-06, "loss": 1.2664, "step": 629 }, { "epoch": 0.2, "learning_rate": 9.272328104766269e-06, "loss": 1.1016, "step": 630 }, { "epoch": 0.2, "learning_rate": 9.269668620728564e-06, "loss": 1.7983, "step": 631 }, { "epoch": 0.2, "learning_rate": 9.267004668473986e-06, "loss": 1.1491, "step": 632 }, { "epoch": 0.2, "learning_rate": 9.264336250790366e-06, "loss": 1.3782, "step": 633 }, { "epoch": 0.2, "learning_rate": 9.261663370470207e-06, "loss": 1.2729, "step": 634 }, { "epoch": 0.2, "learning_rate": 9.258986030310687e-06, "loss": 1.2368, "step": 635 }, { "epoch": 0.2, "learning_rate": 9.256304233113642e-06, "loss": 1.0855, "step": 636 }, { "epoch": 0.2, "learning_rate": 9.253617981685586e-06, "loss": 1.6789, "step": 637 }, { "epoch": 0.2, "learning_rate": 9.250927278837678e-06, "loss": 1.3779, "step": 638 }, { "epoch": 0.2, "learning_rate": 9.24823212738575e-06, "loss": 0.9676, "step": 639 }, { "epoch": 0.2, "learning_rate": 9.245532530150282e-06, "loss": 0.8761, "step": 640 }, { "epoch": 0.2, "learning_rate": 9.242828489956409e-06, "loss": 1.7097, "step": 641 }, { "epoch": 0.2, "learning_rate": 9.240120009633913e-06, "loss": 1.4223, "step": 642 }, { "epoch": 0.2, "learning_rate": 9.237407092017222e-06, "loss": 1.2088, "step": 643 }, { "epoch": 0.2, "learning_rate": 9.234689739945414e-06, "loss": 1.1797, "step": 644 }, { "epoch": 0.2, "learning_rate": 9.231967956262203e-06, "loss": 1.0868, "step": 645 }, { "epoch": 0.2, "learning_rate": 9.229241743815938e-06, "loss": 1.1434, "step": 646 }, { "epoch": 0.2, "learning_rate": 9.22651110545961e-06, "loss": 1.1415, "step": 647 }, { "epoch": 0.2, "learning_rate": 9.223776044050833e-06, "loss": 1.216, "step": 648 }, { "epoch": 0.2, "learning_rate": 9.221036562451858e-06, "loss": 1.7046, "step": 649 }, { "epoch": 0.21, "learning_rate": 9.218292663529555e-06, "loss": 1.3021, "step": 650 }, { "epoch": 0.21, "learning_rate": 9.215544350155423e-06, "loss": 1.4081, "step": 651 }, { "epoch": 0.21, "learning_rate": 9.212791625205575e-06, "loss": 1.2438, "step": 652 }, { "epoch": 0.21, "learning_rate": 9.210034491560744e-06, "loss": 1.2055, "step": 653 }, { "epoch": 0.21, "learning_rate": 9.207272952106277e-06, "loss": 1.4279, "step": 654 }, { "epoch": 0.21, "learning_rate": 9.204507009732131e-06, "loss": 1.2392, "step": 655 }, { "epoch": 0.21, "learning_rate": 9.201736667332868e-06, "loss": 1.0773, "step": 656 }, { "epoch": 0.21, "learning_rate": 9.198961927807657e-06, "loss": 1.2236, "step": 657 }, { "epoch": 0.21, "learning_rate": 9.196182794060272e-06, "loss": 1.3711, "step": 658 }, { "epoch": 0.21, "learning_rate": 9.193399268999076e-06, "loss": 0.9318, "step": 659 }, { "epoch": 0.21, "learning_rate": 9.19061135553704e-06, "loss": 1.3451, "step": 660 }, { "epoch": 0.21, "learning_rate": 9.187819056591719e-06, "loss": 1.2067, "step": 661 }, { "epoch": 0.21, "learning_rate": 9.185022375085257e-06, "loss": 1.5883, "step": 662 }, { "epoch": 0.21, "learning_rate": 9.18222131394439e-06, "loss": 1.1624, "step": 663 }, { "epoch": 0.21, "learning_rate": 9.179415876100433e-06, "loss": 1.373, "step": 664 }, { "epoch": 0.21, "learning_rate": 9.176606064489281e-06, "loss": 1.3713, "step": 665 }, { "epoch": 0.21, "learning_rate": 9.17379188205141e-06, "loss": 1.4919, "step": 666 }, { "epoch": 0.21, "learning_rate": 9.170973331731867e-06, "loss": 1.3279, "step": 667 }, { "epoch": 0.21, "learning_rate": 9.16815041648027e-06, "loss": 1.5104, "step": 668 }, { "epoch": 0.21, "learning_rate": 9.165323139250805e-06, "loss": 1.1754, "step": 669 }, { "epoch": 0.21, "learning_rate": 9.162491503002226e-06, "loss": 1.3752, "step": 670 }, { "epoch": 0.21, "learning_rate": 9.159655510697843e-06, "loss": 1.2283, "step": 671 }, { "epoch": 0.21, "learning_rate": 9.156815165305528e-06, "loss": 1.0705, "step": 672 }, { "epoch": 0.21, "learning_rate": 9.15397046979771e-06, "loss": 1.6615, "step": 673 }, { "epoch": 0.21, "learning_rate": 9.151121427151368e-06, "loss": 1.4852, "step": 674 }, { "epoch": 0.21, "learning_rate": 9.148268040348029e-06, "loss": 1.3204, "step": 675 }, { "epoch": 0.21, "learning_rate": 9.145410312373772e-06, "loss": 1.3391, "step": 676 }, { "epoch": 0.21, "learning_rate": 9.142548246219212e-06, "loss": 1.1739, "step": 677 }, { "epoch": 0.21, "learning_rate": 9.139681844879506e-06, "loss": 1.4066, "step": 678 }, { "epoch": 0.21, "learning_rate": 9.136811111354353e-06, "loss": 1.4231, "step": 679 }, { "epoch": 0.21, "learning_rate": 9.133936048647978e-06, "loss": 1.2918, "step": 680 }, { "epoch": 0.22, "learning_rate": 9.131056659769142e-06, "loss": 1.1455, "step": 681 }, { "epoch": 0.22, "learning_rate": 9.12817294773113e-06, "loss": 1.3621, "step": 682 }, { "epoch": 0.22, "learning_rate": 9.125284915551751e-06, "loss": 1.2156, "step": 683 }, { "epoch": 0.22, "learning_rate": 9.12239256625334e-06, "loss": 1.2849, "step": 684 }, { "epoch": 0.22, "learning_rate": 9.119495902862745e-06, "loss": 1.8127, "step": 685 }, { "epoch": 0.22, "learning_rate": 9.116594928411331e-06, "loss": 1.0382, "step": 686 }, { "epoch": 0.22, "learning_rate": 9.113689645934971e-06, "loss": 1.5898, "step": 687 }, { "epoch": 0.22, "learning_rate": 9.110780058474052e-06, "loss": 1.353, "step": 688 }, { "epoch": 0.22, "learning_rate": 9.10786616907346e-06, "loss": 1.091, "step": 689 }, { "epoch": 0.22, "learning_rate": 9.104947980782589e-06, "loss": 1.4505, "step": 690 }, { "epoch": 0.22, "learning_rate": 9.102025496655326e-06, "loss": 1.3498, "step": 691 }, { "epoch": 0.22, "learning_rate": 9.099098719750062e-06, "loss": 1.5062, "step": 692 }, { "epoch": 0.22, "learning_rate": 9.096167653129668e-06, "loss": 1.1948, "step": 693 }, { "epoch": 0.22, "learning_rate": 9.093232299861516e-06, "loss": 1.2804, "step": 694 }, { "epoch": 0.22, "learning_rate": 9.09029266301746e-06, "loss": 1.0856, "step": 695 }, { "epoch": 0.22, "learning_rate": 9.08734874567383e-06, "loss": 1.4789, "step": 696 }, { "epoch": 0.22, "learning_rate": 9.084400550911448e-06, "loss": 1.5506, "step": 697 }, { "epoch": 0.22, "learning_rate": 9.0814480818156e-06, "loss": 1.5661, "step": 698 }, { "epoch": 0.22, "learning_rate": 9.078491341476057e-06, "loss": 1.41, "step": 699 }, { "epoch": 0.22, "learning_rate": 9.075530332987048e-06, "loss": 1.0601, "step": 700 }, { "epoch": 0.22, "learning_rate": 9.072565059447279e-06, "loss": 1.3169, "step": 701 }, { "epoch": 0.22, "learning_rate": 9.06959552395991e-06, "loss": 1.7157, "step": 702 }, { "epoch": 0.22, "learning_rate": 9.066621729632573e-06, "loss": 1.1322, "step": 703 }, { "epoch": 0.22, "learning_rate": 9.063643679577342e-06, "loss": 0.8676, "step": 704 }, { "epoch": 0.22, "learning_rate": 9.060661376910757e-06, "loss": 1.1496, "step": 705 }, { "epoch": 0.22, "learning_rate": 9.057674824753804e-06, "loss": 1.3833, "step": 706 }, { "epoch": 0.22, "learning_rate": 9.054684026231912e-06, "loss": 1.316, "step": 707 }, { "epoch": 0.22, "learning_rate": 9.051688984474962e-06, "loss": 1.2432, "step": 708 }, { "epoch": 0.22, "learning_rate": 9.04868970261727e-06, "loss": 1.1252, "step": 709 }, { "epoch": 0.22, "learning_rate": 9.04568618379759e-06, "loss": 1.4438, "step": 710 }, { "epoch": 0.22, "learning_rate": 9.04267843115911e-06, "loss": 1.4966, "step": 711 }, { "epoch": 0.22, "learning_rate": 9.03966644784945e-06, "loss": 1.1717, "step": 712 }, { "epoch": 0.23, "learning_rate": 9.036650237020657e-06, "loss": 1.3689, "step": 713 }, { "epoch": 0.23, "learning_rate": 9.033629801829201e-06, "loss": 1.2295, "step": 714 }, { "epoch": 0.23, "learning_rate": 9.030605145435974e-06, "loss": 1.0145, "step": 715 }, { "epoch": 0.23, "learning_rate": 9.027576271006285e-06, "loss": 1.3251, "step": 716 }, { "epoch": 0.23, "learning_rate": 9.024543181709857e-06, "loss": 1.6536, "step": 717 }, { "epoch": 0.23, "learning_rate": 9.021505880720825e-06, "loss": 1.2347, "step": 718 }, { "epoch": 0.23, "learning_rate": 9.018464371217729e-06, "loss": 1.3882, "step": 719 }, { "epoch": 0.23, "learning_rate": 9.015418656383516e-06, "loss": 1.5248, "step": 720 }, { "epoch": 0.23, "learning_rate": 9.012368739405532e-06, "loss": 1.3325, "step": 721 }, { "epoch": 0.23, "learning_rate": 9.009314623475523e-06, "loss": 1.3675, "step": 722 }, { "epoch": 0.23, "learning_rate": 9.006256311789625e-06, "loss": 1.3685, "step": 723 }, { "epoch": 0.23, "learning_rate": 9.003193807548369e-06, "loss": 1.1252, "step": 724 }, { "epoch": 0.23, "learning_rate": 9.000127113956673e-06, "loss": 1.115, "step": 725 }, { "epoch": 0.23, "learning_rate": 8.997056234223836e-06, "loss": 1.2954, "step": 726 }, { "epoch": 0.23, "learning_rate": 8.99398117156354e-06, "loss": 1.4616, "step": 727 }, { "epoch": 0.23, "learning_rate": 8.990901929193845e-06, "loss": 1.4958, "step": 728 }, { "epoch": 0.23, "learning_rate": 8.987818510337185e-06, "loss": 1.2546, "step": 729 }, { "epoch": 0.23, "learning_rate": 8.984730918220364e-06, "loss": 1.3689, "step": 730 }, { "epoch": 0.23, "learning_rate": 8.98163915607455e-06, "loss": 1.0953, "step": 731 }, { "epoch": 0.23, "learning_rate": 8.978543227135283e-06, "loss": 1.3372, "step": 732 }, { "epoch": 0.23, "learning_rate": 8.975443134642458e-06, "loss": 1.3517, "step": 733 }, { "epoch": 0.23, "learning_rate": 8.972338881840326e-06, "loss": 1.6203, "step": 734 }, { "epoch": 0.23, "learning_rate": 8.969230471977494e-06, "loss": 1.7264, "step": 735 }, { "epoch": 0.23, "learning_rate": 8.966117908306923e-06, "loss": 1.4803, "step": 736 }, { "epoch": 0.23, "learning_rate": 8.963001194085912e-06, "loss": 1.6747, "step": 737 }, { "epoch": 0.23, "learning_rate": 8.959880332576112e-06, "loss": 1.5569, "step": 738 }, { "epoch": 0.23, "learning_rate": 8.956755327043511e-06, "loss": 1.3577, "step": 739 }, { "epoch": 0.23, "learning_rate": 8.953626180758431e-06, "loss": 1.0094, "step": 740 }, { "epoch": 0.23, "learning_rate": 8.950492896995533e-06, "loss": 1.7848, "step": 741 }, { "epoch": 0.23, "learning_rate": 8.947355479033802e-06, "loss": 1.1051, "step": 742 }, { "epoch": 0.23, "learning_rate": 8.944213930156555e-06, "loss": 1.6571, "step": 743 }, { "epoch": 0.23, "learning_rate": 8.941068253651428e-06, "loss": 1.2689, "step": 744 }, { "epoch": 0.24, "learning_rate": 8.937918452810377e-06, "loss": 1.6516, "step": 745 }, { "epoch": 0.24, "learning_rate": 8.934764530929677e-06, "loss": 1.2054, "step": 746 }, { "epoch": 0.24, "learning_rate": 8.93160649130991e-06, "loss": 1.3028, "step": 747 }, { "epoch": 0.24, "learning_rate": 8.928444337255974e-06, "loss": 1.6169, "step": 748 }, { "epoch": 0.24, "learning_rate": 8.925278072077072e-06, "loss": 1.5165, "step": 749 }, { "epoch": 0.24, "learning_rate": 8.922107699086701e-06, "loss": 1.1472, "step": 750 }, { "epoch": 0.24, "learning_rate": 8.918933221602666e-06, "loss": 1.3853, "step": 751 }, { "epoch": 0.24, "learning_rate": 8.915754642947065e-06, "loss": 1.043, "step": 752 }, { "epoch": 0.24, "learning_rate": 8.912571966446286e-06, "loss": 1.0857, "step": 753 }, { "epoch": 0.24, "learning_rate": 8.909385195431005e-06, "loss": 1.2717, "step": 754 }, { "epoch": 0.24, "learning_rate": 8.906194333236185e-06, "loss": 1.4493, "step": 755 }, { "epoch": 0.24, "learning_rate": 8.902999383201071e-06, "loss": 1.6234, "step": 756 }, { "epoch": 0.24, "learning_rate": 8.899800348669184e-06, "loss": 1.1493, "step": 757 }, { "epoch": 0.24, "learning_rate": 8.89659723298832e-06, "loss": 1.7766, "step": 758 }, { "epoch": 0.24, "learning_rate": 8.893390039510548e-06, "loss": 1.2116, "step": 759 }, { "epoch": 0.24, "learning_rate": 8.890178771592198e-06, "loss": 1.2421, "step": 760 }, { "epoch": 0.24, "learning_rate": 8.886963432593872e-06, "loss": 1.7509, "step": 761 }, { "epoch": 0.24, "learning_rate": 8.883744025880429e-06, "loss": 0.9725, "step": 762 }, { "epoch": 0.24, "learning_rate": 8.880520554820982e-06, "loss": 1.1628, "step": 763 }, { "epoch": 0.24, "learning_rate": 8.877293022788903e-06, "loss": 1.467, "step": 764 }, { "epoch": 0.24, "learning_rate": 8.874061433161807e-06, "loss": 1.3762, "step": 765 }, { "epoch": 0.24, "learning_rate": 8.870825789321563e-06, "loss": 0.9027, "step": 766 }, { "epoch": 0.24, "learning_rate": 8.867586094654276e-06, "loss": 1.1662, "step": 767 }, { "epoch": 0.24, "learning_rate": 8.864342352550297e-06, "loss": 1.3838, "step": 768 }, { "epoch": 0.24, "learning_rate": 8.861094566404206e-06, "loss": 0.9631, "step": 769 }, { "epoch": 0.24, "learning_rate": 8.857842739614814e-06, "loss": 1.3127, "step": 770 }, { "epoch": 0.24, "learning_rate": 8.85458687558517e-06, "loss": 1.3322, "step": 771 }, { "epoch": 0.24, "learning_rate": 8.85132697772254e-06, "loss": 1.0889, "step": 772 }, { "epoch": 0.24, "learning_rate": 8.848063049438416e-06, "loss": 1.2277, "step": 773 }, { "epoch": 0.24, "learning_rate": 8.844795094148503e-06, "loss": 1.1015, "step": 774 }, { "epoch": 0.24, "learning_rate": 8.841523115272723e-06, "loss": 1.396, "step": 775 }, { "epoch": 0.25, "learning_rate": 8.83824711623521e-06, "loss": 1.3484, "step": 776 }, { "epoch": 0.25, "learning_rate": 8.834967100464302e-06, "loss": 1.3169, "step": 777 }, { "epoch": 0.25, "learning_rate": 8.831683071392542e-06, "loss": 1.5049, "step": 778 }, { "epoch": 0.25, "learning_rate": 8.828395032456676e-06, "loss": 1.4656, "step": 779 }, { "epoch": 0.25, "learning_rate": 8.82510298709764e-06, "loss": 1.7388, "step": 780 }, { "epoch": 0.25, "learning_rate": 8.821806938760568e-06, "loss": 1.5469, "step": 781 }, { "epoch": 0.25, "learning_rate": 8.81850689089478e-06, "loss": 1.4955, "step": 782 }, { "epoch": 0.25, "learning_rate": 8.815202846953786e-06, "loss": 1.6364, "step": 783 }, { "epoch": 0.25, "learning_rate": 8.81189481039527e-06, "loss": 1.6018, "step": 784 }, { "epoch": 0.25, "learning_rate": 8.808582784681102e-06, "loss": 1.1738, "step": 785 }, { "epoch": 0.25, "learning_rate": 8.805266773277324e-06, "loss": 0.9949, "step": 786 }, { "epoch": 0.25, "learning_rate": 8.801946779654145e-06, "loss": 1.1411, "step": 787 }, { "epoch": 0.25, "learning_rate": 8.79862280728595e-06, "loss": 1.3452, "step": 788 }, { "epoch": 0.25, "learning_rate": 8.795294859651279e-06, "loss": 1.5209, "step": 789 }, { "epoch": 0.25, "learning_rate": 8.791962940232837e-06, "loss": 1.4234, "step": 790 }, { "epoch": 0.25, "learning_rate": 8.788627052517486e-06, "loss": 1.05, "step": 791 }, { "epoch": 0.25, "learning_rate": 8.785287199996239e-06, "loss": 1.1589, "step": 792 }, { "epoch": 0.25, "learning_rate": 8.781943386164256e-06, "loss": 1.7273, "step": 793 }, { "epoch": 0.25, "learning_rate": 8.778595614520847e-06, "loss": 1.3783, "step": 794 }, { "epoch": 0.25, "learning_rate": 8.77524388856946e-06, "loss": 1.0055, "step": 795 }, { "epoch": 0.25, "learning_rate": 8.771888211817687e-06, "loss": 1.5255, "step": 796 }, { "epoch": 0.25, "learning_rate": 8.768528587777247e-06, "loss": 1.623, "step": 797 }, { "epoch": 0.25, "learning_rate": 8.765165019963992e-06, "loss": 1.198, "step": 798 }, { "epoch": 0.25, "learning_rate": 8.761797511897907e-06, "loss": 1.1745, "step": 799 }, { "epoch": 0.25, "learning_rate": 8.758426067103094e-06, "loss": 1.2036, "step": 800 }, { "epoch": 0.25, "learning_rate": 8.755050689107776e-06, "loss": 1.2471, "step": 801 }, { "epoch": 0.25, "learning_rate": 8.751671381444293e-06, "loss": 1.4777, "step": 802 }, { "epoch": 0.25, "learning_rate": 8.748288147649097e-06, "loss": 1.0307, "step": 803 }, { "epoch": 0.25, "learning_rate": 8.744900991262753e-06, "loss": 1.1512, "step": 804 }, { "epoch": 0.25, "learning_rate": 8.741509915829921e-06, "loss": 1.2735, "step": 805 }, { "epoch": 0.25, "learning_rate": 8.738114924899372e-06, "loss": 1.4987, "step": 806 }, { "epoch": 0.25, "learning_rate": 8.73471602202397e-06, "loss": 1.401, "step": 807 }, { "epoch": 0.26, "learning_rate": 8.731313210760675e-06, "loss": 1.326, "step": 808 }, { "epoch": 0.26, "learning_rate": 8.727906494670534e-06, "loss": 1.2413, "step": 809 }, { "epoch": 0.26, "learning_rate": 8.724495877318684e-06, "loss": 1.459, "step": 810 }, { "epoch": 0.26, "learning_rate": 8.72108136227434e-06, "loss": 1.0958, "step": 811 }, { "epoch": 0.26, "learning_rate": 8.717662953110802e-06, "loss": 1.3368, "step": 812 }, { "epoch": 0.26, "learning_rate": 8.714240653405442e-06, "loss": 1.3881, "step": 813 }, { "epoch": 0.26, "learning_rate": 8.7108144667397e-06, "loss": 1.2932, "step": 814 }, { "epoch": 0.26, "learning_rate": 8.707384396699094e-06, "loss": 1.4916, "step": 815 }, { "epoch": 0.26, "learning_rate": 8.703950446873194e-06, "loss": 1.5342, "step": 816 }, { "epoch": 0.26, "learning_rate": 8.700512620855635e-06, "loss": 1.3804, "step": 817 }, { "epoch": 0.26, "learning_rate": 8.697070922244111e-06, "loss": 1.7151, "step": 818 }, { "epoch": 0.26, "learning_rate": 8.693625354640367e-06, "loss": 1.5779, "step": 819 }, { "epoch": 0.26, "learning_rate": 8.690175921650196e-06, "loss": 1.4846, "step": 820 }, { "epoch": 0.26, "learning_rate": 8.686722626883434e-06, "loss": 1.3494, "step": 821 }, { "epoch": 0.26, "learning_rate": 8.683265473953965e-06, "loss": 1.4012, "step": 822 }, { "epoch": 0.26, "learning_rate": 8.679804466479706e-06, "loss": 1.5649, "step": 823 }, { "epoch": 0.26, "learning_rate": 8.676339608082607e-06, "loss": 1.1432, "step": 824 }, { "epoch": 0.26, "learning_rate": 8.672870902388648e-06, "loss": 1.035, "step": 825 }, { "epoch": 0.26, "learning_rate": 8.669398353027838e-06, "loss": 1.1877, "step": 826 }, { "epoch": 0.26, "learning_rate": 8.665921963634207e-06, "loss": 1.4222, "step": 827 }, { "epoch": 0.26, "learning_rate": 8.662441737845805e-06, "loss": 1.3603, "step": 828 }, { "epoch": 0.26, "learning_rate": 8.658957679304693e-06, "loss": 1.2569, "step": 829 }, { "epoch": 0.26, "learning_rate": 8.655469791656946e-06, "loss": 1.3043, "step": 830 }, { "epoch": 0.26, "learning_rate": 8.651978078552646e-06, "loss": 1.6043, "step": 831 }, { "epoch": 0.26, "learning_rate": 8.648482543645877e-06, "loss": 1.4126, "step": 832 }, { "epoch": 0.26, "learning_rate": 8.644983190594725e-06, "loss": 1.9686, "step": 833 }, { "epoch": 0.26, "learning_rate": 8.641480023061269e-06, "loss": 1.209, "step": 834 }, { "epoch": 0.26, "learning_rate": 8.63797304471158e-06, "loss": 1.2622, "step": 835 }, { "epoch": 0.26, "learning_rate": 8.634462259215719e-06, "loss": 1.3831, "step": 836 }, { "epoch": 0.26, "learning_rate": 8.630947670247731e-06, "loss": 1.3946, "step": 837 }, { "epoch": 0.26, "learning_rate": 8.627429281485639e-06, "loss": 1.9398, "step": 838 }, { "epoch": 0.26, "learning_rate": 8.623907096611444e-06, "loss": 1.6617, "step": 839 }, { "epoch": 0.27, "learning_rate": 8.62038111931112e-06, "loss": 1.492, "step": 840 }, { "epoch": 0.27, "learning_rate": 8.616851353274611e-06, "loss": 1.3223, "step": 841 }, { "epoch": 0.27, "learning_rate": 8.613317802195821e-06, "loss": 0.8342, "step": 842 }, { "epoch": 0.27, "learning_rate": 8.609780469772623e-06, "loss": 1.4176, "step": 843 }, { "epoch": 0.27, "learning_rate": 8.606239359706838e-06, "loss": 1.3321, "step": 844 }, { "epoch": 0.27, "learning_rate": 8.602694475704247e-06, "loss": 1.4117, "step": 845 }, { "epoch": 0.27, "learning_rate": 8.599145821474579e-06, "loss": 1.0299, "step": 846 }, { "epoch": 0.27, "learning_rate": 8.595593400731505e-06, "loss": 1.0768, "step": 847 }, { "epoch": 0.27, "learning_rate": 8.592037217192642e-06, "loss": 1.1725, "step": 848 }, { "epoch": 0.27, "learning_rate": 8.588477274579545e-06, "loss": 1.1186, "step": 849 }, { "epoch": 0.27, "learning_rate": 8.5849135766177e-06, "loss": 1.5728, "step": 850 }, { "epoch": 0.27, "learning_rate": 8.581346127036522e-06, "loss": 1.464, "step": 851 }, { "epoch": 0.27, "learning_rate": 8.577774929569357e-06, "loss": 1.0676, "step": 852 }, { "epoch": 0.27, "learning_rate": 8.57419998795347e-06, "loss": 1.5588, "step": 853 }, { "epoch": 0.27, "learning_rate": 8.570621305930045e-06, "loss": 1.6186, "step": 854 }, { "epoch": 0.27, "learning_rate": 8.56703888724418e-06, "loss": 1.2022, "step": 855 }, { "epoch": 0.27, "learning_rate": 8.56345273564488e-06, "loss": 1.899, "step": 856 }, { "epoch": 0.27, "learning_rate": 8.559862854885063e-06, "loss": 1.2866, "step": 857 }, { "epoch": 0.27, "learning_rate": 8.556269248721547e-06, "loss": 1.3787, "step": 858 }, { "epoch": 0.27, "learning_rate": 8.552671920915045e-06, "loss": 1.0334, "step": 859 }, { "epoch": 0.27, "learning_rate": 8.549070875230173e-06, "loss": 1.2226, "step": 860 }, { "epoch": 0.27, "learning_rate": 8.545466115435427e-06, "loss": 1.1062, "step": 861 }, { "epoch": 0.27, "learning_rate": 8.5418576453032e-06, "loss": 1.5001, "step": 862 }, { "epoch": 0.27, "learning_rate": 8.538245468609762e-06, "loss": 1.2385, "step": 863 }, { "epoch": 0.27, "learning_rate": 8.534629589135261e-06, "loss": 1.0781, "step": 864 }, { "epoch": 0.27, "learning_rate": 8.531010010663726e-06, "loss": 1.2116, "step": 865 }, { "epoch": 0.27, "learning_rate": 8.527386736983048e-06, "loss": 1.7235, "step": 866 }, { "epoch": 0.27, "learning_rate": 8.523759771884995e-06, "loss": 1.6934, "step": 867 }, { "epoch": 0.27, "learning_rate": 8.520129119165192e-06, "loss": 0.9474, "step": 868 }, { "epoch": 0.27, "learning_rate": 8.516494782623124e-06, "loss": 1.4997, "step": 869 }, { "epoch": 0.27, "learning_rate": 8.51285676606213e-06, "loss": 1.0513, "step": 870 }, { "epoch": 0.28, "learning_rate": 8.509215073289405e-06, "loss": 1.1502, "step": 871 }, { "epoch": 0.28, "learning_rate": 8.505569708115985e-06, "loss": 1.7849, "step": 872 }, { "epoch": 0.28, "learning_rate": 8.501920674356755e-06, "loss": 1.4357, "step": 873 }, { "epoch": 0.28, "learning_rate": 8.498267975830434e-06, "loss": 1.6868, "step": 874 }, { "epoch": 0.28, "learning_rate": 8.494611616359579e-06, "loss": 1.2642, "step": 875 }, { "epoch": 0.28, "learning_rate": 8.490951599770575e-06, "loss": 1.1491, "step": 876 }, { "epoch": 0.28, "learning_rate": 8.487287929893642e-06, "loss": 1.2737, "step": 877 }, { "epoch": 0.28, "learning_rate": 8.483620610562815e-06, "loss": 1.2017, "step": 878 }, { "epoch": 0.28, "learning_rate": 8.479949645615952e-06, "loss": 0.9613, "step": 879 }, { "epoch": 0.28, "learning_rate": 8.476275038894723e-06, "loss": 1.5784, "step": 880 }, { "epoch": 0.28, "learning_rate": 8.472596794244614e-06, "loss": 1.4138, "step": 881 }, { "epoch": 0.28, "learning_rate": 8.468914915514913e-06, "loss": 1.302, "step": 882 }, { "epoch": 0.28, "learning_rate": 8.465229406558717e-06, "loss": 1.3564, "step": 883 }, { "epoch": 0.28, "learning_rate": 8.461540271232917e-06, "loss": 1.0112, "step": 884 }, { "epoch": 0.28, "learning_rate": 8.4578475133982e-06, "loss": 1.082, "step": 885 }, { "epoch": 0.28, "learning_rate": 8.454151136919044e-06, "loss": 1.2428, "step": 886 }, { "epoch": 0.28, "learning_rate": 8.450451145663716e-06, "loss": 1.229, "step": 887 }, { "epoch": 0.28, "learning_rate": 8.446747543504263e-06, "loss": 1.1593, "step": 888 }, { "epoch": 0.28, "learning_rate": 8.443040334316514e-06, "loss": 1.1187, "step": 889 }, { "epoch": 0.28, "learning_rate": 8.43932952198007e-06, "loss": 1.6537, "step": 890 }, { "epoch": 0.28, "learning_rate": 8.435615110378302e-06, "loss": 1.4007, "step": 891 }, { "epoch": 0.28, "learning_rate": 8.43189710339835e-06, "loss": 1.1812, "step": 892 }, { "epoch": 0.28, "learning_rate": 8.42817550493112e-06, "loss": 1.079, "step": 893 }, { "epoch": 0.28, "learning_rate": 8.424450318871266e-06, "loss": 1.5176, "step": 894 }, { "epoch": 0.28, "learning_rate": 8.420721549117207e-06, "loss": 1.4131, "step": 895 }, { "epoch": 0.28, "learning_rate": 8.416989199571106e-06, "loss": 1.0438, "step": 896 }, { "epoch": 0.28, "learning_rate": 8.413253274138875e-06, "loss": 1.6125, "step": 897 }, { "epoch": 0.28, "learning_rate": 8.40951377673017e-06, "loss": 1.1027, "step": 898 }, { "epoch": 0.28, "learning_rate": 8.405770711258378e-06, "loss": 1.4662, "step": 899 }, { "epoch": 0.28, "learning_rate": 8.402024081640628e-06, "loss": 1.2609, "step": 900 }, { "epoch": 0.28, "learning_rate": 8.398273891797775e-06, "loss": 1.3485, "step": 901 }, { "epoch": 0.28, "learning_rate": 8.394520145654399e-06, "loss": 0.9759, "step": 902 }, { "epoch": 0.29, "learning_rate": 8.390762847138803e-06, "loss": 0.9808, "step": 903 }, { "epoch": 0.29, "learning_rate": 8.387002000183008e-06, "loss": 1.134, "step": 904 }, { "epoch": 0.29, "learning_rate": 8.383237608722751e-06, "loss": 1.1114, "step": 905 }, { "epoch": 0.29, "learning_rate": 8.379469676697471e-06, "loss": 1.0743, "step": 906 }, { "epoch": 0.29, "learning_rate": 8.375698208050318e-06, "loss": 1.1027, "step": 907 }, { "epoch": 0.29, "learning_rate": 8.37192320672814e-06, "loss": 0.8942, "step": 908 }, { "epoch": 0.29, "learning_rate": 8.368144676681487e-06, "loss": 1.2962, "step": 909 }, { "epoch": 0.29, "learning_rate": 8.364362621864595e-06, "loss": 1.1648, "step": 910 }, { "epoch": 0.29, "learning_rate": 8.360577046235395e-06, "loss": 1.6299, "step": 911 }, { "epoch": 0.29, "learning_rate": 8.356787953755497e-06, "loss": 1.4763, "step": 912 }, { "epoch": 0.29, "learning_rate": 8.352995348390197e-06, "loss": 1.4032, "step": 913 }, { "epoch": 0.29, "learning_rate": 8.34919923410846e-06, "loss": 1.4139, "step": 914 }, { "epoch": 0.29, "learning_rate": 8.34539961488293e-06, "loss": 1.5268, "step": 915 }, { "epoch": 0.29, "learning_rate": 8.341596494689917e-06, "loss": 1.6139, "step": 916 }, { "epoch": 0.29, "learning_rate": 8.337789877509393e-06, "loss": 1.4215, "step": 917 }, { "epoch": 0.29, "learning_rate": 8.33397976732499e-06, "loss": 1.143, "step": 918 }, { "epoch": 0.29, "learning_rate": 8.330166168123994e-06, "loss": 0.9746, "step": 919 }, { "epoch": 0.29, "learning_rate": 8.326349083897349e-06, "loss": 1.0901, "step": 920 }, { "epoch": 0.29, "learning_rate": 8.32252851863964e-06, "loss": 1.1437, "step": 921 }, { "epoch": 0.29, "learning_rate": 8.318704476349093e-06, "loss": 1.2317, "step": 922 }, { "epoch": 0.29, "learning_rate": 8.31487696102758e-06, "loss": 1.1888, "step": 923 }, { "epoch": 0.29, "learning_rate": 8.311045976680602e-06, "loss": 1.1006, "step": 924 }, { "epoch": 0.29, "learning_rate": 8.30721152731729e-06, "loss": 1.1529, "step": 925 }, { "epoch": 0.29, "learning_rate": 8.303373616950408e-06, "loss": 1.2178, "step": 926 }, { "epoch": 0.29, "learning_rate": 8.299532249596332e-06, "loss": 1.5345, "step": 927 }, { "epoch": 0.29, "learning_rate": 8.295687429275063e-06, "loss": 1.5397, "step": 928 }, { "epoch": 0.29, "learning_rate": 8.291839160010213e-06, "loss": 1.2732, "step": 929 }, { "epoch": 0.29, "learning_rate": 8.287987445829004e-06, "loss": 0.9928, "step": 930 }, { "epoch": 0.29, "learning_rate": 8.284132290762262e-06, "loss": 1.06, "step": 931 }, { "epoch": 0.29, "learning_rate": 8.280273698844415e-06, "loss": 1.161, "step": 932 }, { "epoch": 0.29, "learning_rate": 8.276411674113486e-06, "loss": 1.0148, "step": 933 }, { "epoch": 0.29, "learning_rate": 8.272546220611095e-06, "loss": 1.3726, "step": 934 }, { "epoch": 0.3, "learning_rate": 8.268677342382442e-06, "loss": 1.3816, "step": 935 }, { "epoch": 0.3, "learning_rate": 8.264805043476319e-06, "loss": 0.9232, "step": 936 }, { "epoch": 0.3, "learning_rate": 8.260929327945096e-06, "loss": 1.324, "step": 937 }, { "epoch": 0.3, "learning_rate": 8.257050199844714e-06, "loss": 1.0364, "step": 938 }, { "epoch": 0.3, "learning_rate": 8.253167663234691e-06, "loss": 1.0539, "step": 939 }, { "epoch": 0.3, "learning_rate": 8.24928172217811e-06, "loss": 1.3444, "step": 940 }, { "epoch": 0.3, "learning_rate": 8.24539238074161e-06, "loss": 1.4647, "step": 941 }, { "epoch": 0.3, "learning_rate": 8.2414996429954e-06, "loss": 1.5505, "step": 942 }, { "epoch": 0.3, "learning_rate": 8.23760351301324e-06, "loss": 0.915, "step": 943 }, { "epoch": 0.3, "learning_rate": 8.233703994872435e-06, "loss": 1.0501, "step": 944 }, { "epoch": 0.3, "learning_rate": 8.229801092653837e-06, "loss": 1.3183, "step": 945 }, { "epoch": 0.3, "learning_rate": 8.225894810441843e-06, "loss": 1.2577, "step": 946 }, { "epoch": 0.3, "learning_rate": 8.221985152324385e-06, "loss": 1.2466, "step": 947 }, { "epoch": 0.3, "learning_rate": 8.218072122392929e-06, "loss": 1.3209, "step": 948 }, { "epoch": 0.3, "learning_rate": 8.21415572474247e-06, "loss": 1.259, "step": 949 }, { "epoch": 0.3, "learning_rate": 8.210235963471521e-06, "loss": 1.3139, "step": 950 }, { "epoch": 0.3, "learning_rate": 8.206312842682124e-06, "loss": 1.5792, "step": 951 }, { "epoch": 0.3, "learning_rate": 8.202386366479833e-06, "loss": 1.1179, "step": 952 }, { "epoch": 0.3, "learning_rate": 8.198456538973711e-06, "loss": 1.2902, "step": 953 }, { "epoch": 0.3, "learning_rate": 8.194523364276331e-06, "loss": 1.6296, "step": 954 }, { "epoch": 0.3, "learning_rate": 8.190586846503773e-06, "loss": 1.3536, "step": 955 }, { "epoch": 0.3, "learning_rate": 8.186646989775605e-06, "loss": 1.167, "step": 956 }, { "epoch": 0.3, "learning_rate": 8.182703798214897e-06, "loss": 1.443, "step": 957 }, { "epoch": 0.3, "learning_rate": 8.178757275948206e-06, "loss": 1.3735, "step": 958 }, { "epoch": 0.3, "learning_rate": 8.174807427105578e-06, "loss": 1.1216, "step": 959 }, { "epoch": 0.3, "learning_rate": 8.170854255820536e-06, "loss": 1.2264, "step": 960 }, { "epoch": 0.3, "learning_rate": 8.166897766230082e-06, "loss": 1.1574, "step": 961 }, { "epoch": 0.3, "learning_rate": 8.16293796247469e-06, "loss": 1.3644, "step": 962 }, { "epoch": 0.3, "learning_rate": 8.158974848698304e-06, "loss": 1.0945, "step": 963 }, { "epoch": 0.3, "learning_rate": 8.155008429048332e-06, "loss": 0.9514, "step": 964 }, { "epoch": 0.3, "learning_rate": 8.151038707675635e-06, "loss": 1.2166, "step": 965 }, { "epoch": 0.3, "learning_rate": 8.147065688734539e-06, "loss": 0.8857, "step": 966 }, { "epoch": 0.31, "learning_rate": 8.143089376382814e-06, "loss": 1.4203, "step": 967 }, { "epoch": 0.31, "learning_rate": 8.13910977478168e-06, "loss": 1.3708, "step": 968 }, { "epoch": 0.31, "learning_rate": 8.135126888095799e-06, "loss": 0.9595, "step": 969 }, { "epoch": 0.31, "learning_rate": 8.131140720493267e-06, "loss": 1.4632, "step": 970 }, { "epoch": 0.31, "learning_rate": 8.127151276145619e-06, "loss": 1.4857, "step": 971 }, { "epoch": 0.31, "learning_rate": 8.123158559227819e-06, "loss": 1.2952, "step": 972 }, { "epoch": 0.31, "learning_rate": 8.119162573918247e-06, "loss": 1.2434, "step": 973 }, { "epoch": 0.31, "learning_rate": 8.115163324398715e-06, "loss": 1.0379, "step": 974 }, { "epoch": 0.31, "learning_rate": 8.111160814854444e-06, "loss": 1.429, "step": 975 }, { "epoch": 0.31, "learning_rate": 8.107155049474067e-06, "loss": 1.2637, "step": 976 }, { "epoch": 0.31, "learning_rate": 8.103146032449627e-06, "loss": 1.2949, "step": 977 }, { "epoch": 0.31, "learning_rate": 8.099133767976569e-06, "loss": 2.0464, "step": 978 }, { "epoch": 0.31, "learning_rate": 8.095118260253737e-06, "loss": 1.4209, "step": 979 }, { "epoch": 0.31, "learning_rate": 8.091099513483363e-06, "loss": 1.3829, "step": 980 }, { "epoch": 0.31, "learning_rate": 8.087077531871078e-06, "loss": 1.2983, "step": 981 }, { "epoch": 0.31, "learning_rate": 8.083052319625893e-06, "loss": 1.5948, "step": 982 }, { "epoch": 0.31, "learning_rate": 8.0790238809602e-06, "loss": 1.0692, "step": 983 }, { "epoch": 0.31, "learning_rate": 8.07499222008977e-06, "loss": 1.0565, "step": 984 }, { "epoch": 0.31, "learning_rate": 8.07095734123374e-06, "loss": 1.6621, "step": 985 }, { "epoch": 0.31, "learning_rate": 8.066919248614625e-06, "loss": 1.4062, "step": 986 }, { "epoch": 0.31, "learning_rate": 8.062877946458294e-06, "loss": 1.4228, "step": 987 }, { "epoch": 0.31, "learning_rate": 8.058833438993976e-06, "loss": 1.3731, "step": 988 }, { "epoch": 0.31, "learning_rate": 8.054785730454261e-06, "loss": 1.3574, "step": 989 }, { "epoch": 0.31, "learning_rate": 8.050734825075079e-06, "loss": 1.0965, "step": 990 }, { "epoch": 0.31, "learning_rate": 8.046680727095715e-06, "loss": 1.3037, "step": 991 }, { "epoch": 0.31, "learning_rate": 8.042623440758788e-06, "loss": 1.4105, "step": 992 }, { "epoch": 0.31, "learning_rate": 8.038562970310257e-06, "loss": 0.9217, "step": 993 }, { "epoch": 0.31, "learning_rate": 8.034499319999415e-06, "loss": 1.2483, "step": 994 }, { "epoch": 0.31, "learning_rate": 8.030432494078876e-06, "loss": 1.246, "step": 995 }, { "epoch": 0.31, "learning_rate": 8.026362496804586e-06, "loss": 1.3679, "step": 996 }, { "epoch": 0.31, "learning_rate": 8.0222893324358e-06, "loss": 1.455, "step": 997 }, { "epoch": 0.32, "learning_rate": 8.018213005235097e-06, "loss": 1.0752, "step": 998 }, { "epoch": 0.32, "learning_rate": 8.014133519468361e-06, "loss": 1.0471, "step": 999 }, { "epoch": 0.32, "learning_rate": 8.010050879404783e-06, "loss": 1.3474, "step": 1000 }, { "epoch": 0.32, "learning_rate": 8.005965089316851e-06, "loss": 1.4941, "step": 1001 }, { "epoch": 0.32, "learning_rate": 8.001876153480354e-06, "loss": 1.2537, "step": 1002 }, { "epoch": 0.32, "learning_rate": 7.997784076174374e-06, "loss": 1.2127, "step": 1003 }, { "epoch": 0.32, "learning_rate": 7.993688861681274e-06, "loss": 1.5791, "step": 1004 }, { "epoch": 0.32, "learning_rate": 7.98959051428671e-06, "loss": 1.5993, "step": 1005 }, { "epoch": 0.32, "learning_rate": 7.985489038279606e-06, "loss": 1.2063, "step": 1006 }, { "epoch": 0.32, "learning_rate": 7.981384437952164e-06, "loss": 1.4555, "step": 1007 }, { "epoch": 0.32, "learning_rate": 7.977276717599862e-06, "loss": 1.6055, "step": 1008 }, { "epoch": 0.32, "learning_rate": 7.973165881521435e-06, "loss": 1.3582, "step": 1009 }, { "epoch": 0.32, "learning_rate": 7.96905193401888e-06, "loss": 1.4849, "step": 1010 }, { "epoch": 0.32, "learning_rate": 7.96493487939745e-06, "loss": 0.7368, "step": 1011 }, { "epoch": 0.32, "learning_rate": 7.960814721965656e-06, "loss": 1.3427, "step": 1012 }, { "epoch": 0.32, "learning_rate": 7.956691466035247e-06, "loss": 1.1562, "step": 1013 }, { "epoch": 0.32, "learning_rate": 7.952565115921223e-06, "loss": 1.149, "step": 1014 }, { "epoch": 0.32, "learning_rate": 7.948435675941812e-06, "loss": 1.4668, "step": 1015 }, { "epoch": 0.32, "learning_rate": 7.944303150418487e-06, "loss": 1.278, "step": 1016 }, { "epoch": 0.32, "learning_rate": 7.940167543675938e-06, "loss": 1.3078, "step": 1017 }, { "epoch": 0.32, "learning_rate": 7.936028860042092e-06, "loss": 1.3827, "step": 1018 }, { "epoch": 0.32, "learning_rate": 7.931887103848086e-06, "loss": 1.569, "step": 1019 }, { "epoch": 0.32, "learning_rate": 7.927742279428279e-06, "loss": 1.6362, "step": 1020 }, { "epoch": 0.32, "learning_rate": 7.923594391120237e-06, "loss": 1.2056, "step": 1021 }, { "epoch": 0.32, "learning_rate": 7.919443443264732e-06, "loss": 1.0538, "step": 1022 }, { "epoch": 0.32, "learning_rate": 7.91528944020574e-06, "loss": 1.2099, "step": 1023 }, { "epoch": 0.32, "learning_rate": 7.911132386290436e-06, "loss": 1.3575, "step": 1024 }, { "epoch": 0.32, "learning_rate": 7.906972285869182e-06, "loss": 1.3353, "step": 1025 }, { "epoch": 0.32, "learning_rate": 7.902809143295533e-06, "loss": 1.1676, "step": 1026 }, { "epoch": 0.32, "learning_rate": 7.898642962926229e-06, "loss": 1.4753, "step": 1027 }, { "epoch": 0.32, "learning_rate": 7.894473749121182e-06, "loss": 0.9784, "step": 1028 }, { "epoch": 0.32, "learning_rate": 7.890301506243484e-06, "loss": 1.3852, "step": 1029 }, { "epoch": 0.33, "learning_rate": 7.886126238659395e-06, "loss": 1.3017, "step": 1030 }, { "epoch": 0.33, "learning_rate": 7.881947950738342e-06, "loss": 1.3667, "step": 1031 }, { "epoch": 0.33, "learning_rate": 7.877766646852912e-06, "loss": 1.2069, "step": 1032 }, { "epoch": 0.33, "learning_rate": 7.873582331378848e-06, "loss": 0.9396, "step": 1033 }, { "epoch": 0.33, "learning_rate": 7.869395008695043e-06, "loss": 1.3177, "step": 1034 }, { "epoch": 0.33, "learning_rate": 7.865204683183541e-06, "loss": 1.0191, "step": 1035 }, { "epoch": 0.33, "learning_rate": 7.861011359229524e-06, "loss": 1.5017, "step": 1036 }, { "epoch": 0.33, "learning_rate": 7.856815041221316e-06, "loss": 1.1735, "step": 1037 }, { "epoch": 0.33, "learning_rate": 7.852615733550373e-06, "loss": 1.0848, "step": 1038 }, { "epoch": 0.33, "learning_rate": 7.848413440611275e-06, "loss": 1.4649, "step": 1039 }, { "epoch": 0.33, "learning_rate": 7.844208166801731e-06, "loss": 1.1507, "step": 1040 }, { "epoch": 0.33, "learning_rate": 7.839999916522573e-06, "loss": 1.5175, "step": 1041 }, { "epoch": 0.33, "learning_rate": 7.83578869417774e-06, "loss": 1.6031, "step": 1042 }, { "epoch": 0.33, "learning_rate": 7.831574504174287e-06, "loss": 1.1513, "step": 1043 }, { "epoch": 0.33, "learning_rate": 7.827357350922368e-06, "loss": 1.0546, "step": 1044 }, { "epoch": 0.33, "learning_rate": 7.823137238835245e-06, "loss": 1.1289, "step": 1045 }, { "epoch": 0.33, "learning_rate": 7.818914172329276e-06, "loss": 1.6404, "step": 1046 }, { "epoch": 0.33, "learning_rate": 7.814688155823903e-06, "loss": 1.4769, "step": 1047 }, { "epoch": 0.33, "learning_rate": 7.810459193741667e-06, "loss": 1.2313, "step": 1048 }, { "epoch": 0.33, "learning_rate": 7.806227290508183e-06, "loss": 1.0294, "step": 1049 }, { "epoch": 0.33, "learning_rate": 7.801992450552146e-06, "loss": 1.2874, "step": 1050 }, { "epoch": 0.33, "learning_rate": 7.797754678305325e-06, "loss": 1.1688, "step": 1051 }, { "epoch": 0.33, "learning_rate": 7.793513978202557e-06, "loss": 1.3762, "step": 1052 }, { "epoch": 0.33, "learning_rate": 7.789270354681741e-06, "loss": 1.3011, "step": 1053 }, { "epoch": 0.33, "learning_rate": 7.785023812183842e-06, "loss": 1.2125, "step": 1054 }, { "epoch": 0.33, "learning_rate": 7.780774355152874e-06, "loss": 1.3684, "step": 1055 }, { "epoch": 0.33, "learning_rate": 7.7765219880359e-06, "loss": 1.0069, "step": 1056 }, { "epoch": 0.33, "learning_rate": 7.772266715283032e-06, "loss": 1.3618, "step": 1057 }, { "epoch": 0.33, "learning_rate": 7.768008541347423e-06, "loss": 1.0286, "step": 1058 }, { "epoch": 0.33, "learning_rate": 7.76374747068526e-06, "loss": 1.5114, "step": 1059 }, { "epoch": 0.33, "learning_rate": 7.759483507755761e-06, "loss": 1.2938, "step": 1060 }, { "epoch": 0.33, "learning_rate": 7.755216657021174e-06, "loss": 1.4213, "step": 1061 }, { "epoch": 0.34, "learning_rate": 7.750946922946766e-06, "loss": 0.9586, "step": 1062 }, { "epoch": 0.34, "learning_rate": 7.746674310000818e-06, "loss": 1.2144, "step": 1063 }, { "epoch": 0.34, "learning_rate": 7.742398822654636e-06, "loss": 1.2866, "step": 1064 }, { "epoch": 0.34, "learning_rate": 7.73812046538252e-06, "loss": 1.2257, "step": 1065 }, { "epoch": 0.34, "learning_rate": 7.733839242661784e-06, "loss": 1.2237, "step": 1066 }, { "epoch": 0.34, "learning_rate": 7.729555158972732e-06, "loss": 1.2699, "step": 1067 }, { "epoch": 0.34, "learning_rate": 7.725268218798669e-06, "loss": 1.2079, "step": 1068 }, { "epoch": 0.34, "learning_rate": 7.720978426625887e-06, "loss": 1.1287, "step": 1069 }, { "epoch": 0.34, "learning_rate": 7.71668578694366e-06, "loss": 0.7684, "step": 1070 }, { "epoch": 0.34, "learning_rate": 7.712390304244247e-06, "loss": 1.236, "step": 1071 }, { "epoch": 0.34, "learning_rate": 7.708091983022876e-06, "loss": 1.156, "step": 1072 }, { "epoch": 0.34, "learning_rate": 7.70379082777775e-06, "loss": 1.4165, "step": 1073 }, { "epoch": 0.34, "learning_rate": 7.699486843010039e-06, "loss": 1.4821, "step": 1074 }, { "epoch": 0.34, "learning_rate": 7.695180033223869e-06, "loss": 1.0382, "step": 1075 }, { "epoch": 0.34, "learning_rate": 7.690870402926325e-06, "loss": 1.5538, "step": 1076 }, { "epoch": 0.34, "learning_rate": 7.686557956627448e-06, "loss": 1.3773, "step": 1077 }, { "epoch": 0.34, "learning_rate": 7.682242698840217e-06, "loss": 1.7329, "step": 1078 }, { "epoch": 0.34, "learning_rate": 7.67792463408056e-06, "loss": 1.3464, "step": 1079 }, { "epoch": 0.34, "learning_rate": 7.673603766867339e-06, "loss": 1.184, "step": 1080 }, { "epoch": 0.34, "learning_rate": 7.669280101722353e-06, "loss": 1.2348, "step": 1081 }, { "epoch": 0.34, "learning_rate": 7.664953643170328e-06, "loss": 1.0944, "step": 1082 }, { "epoch": 0.34, "learning_rate": 7.660624395738905e-06, "loss": 1.1745, "step": 1083 }, { "epoch": 0.34, "learning_rate": 7.656292363958654e-06, "loss": 1.4242, "step": 1084 }, { "epoch": 0.34, "learning_rate": 7.651957552363056e-06, "loss": 1.3101, "step": 1085 }, { "epoch": 0.34, "learning_rate": 7.647619965488499e-06, "loss": 1.3458, "step": 1086 }, { "epoch": 0.34, "learning_rate": 7.643279607874276e-06, "loss": 0.9796, "step": 1087 }, { "epoch": 0.34, "learning_rate": 7.638936484062583e-06, "loss": 1.3243, "step": 1088 }, { "epoch": 0.34, "learning_rate": 7.634590598598503e-06, "loss": 1.2803, "step": 1089 }, { "epoch": 0.34, "learning_rate": 7.630241956030017e-06, "loss": 1.2697, "step": 1090 }, { "epoch": 0.34, "learning_rate": 7.625890560907987e-06, "loss": 1.1837, "step": 1091 }, { "epoch": 0.34, "learning_rate": 7.621536417786159e-06, "loss": 1.6359, "step": 1092 }, { "epoch": 0.35, "learning_rate": 7.61717953122115e-06, "loss": 1.4926, "step": 1093 }, { "epoch": 0.35, "learning_rate": 7.612819905772452e-06, "loss": 1.2669, "step": 1094 }, { "epoch": 0.35, "learning_rate": 7.608457546002423e-06, "loss": 1.3459, "step": 1095 }, { "epoch": 0.35, "learning_rate": 7.604092456476279e-06, "loss": 1.5344, "step": 1096 }, { "epoch": 0.35, "learning_rate": 7.599724641762096e-06, "loss": 1.1849, "step": 1097 }, { "epoch": 0.35, "learning_rate": 7.595354106430801e-06, "loss": 1.5993, "step": 1098 }, { "epoch": 0.35, "learning_rate": 7.590980855056169e-06, "loss": 1.2259, "step": 1099 }, { "epoch": 0.35, "learning_rate": 7.586604892214815e-06, "loss": 1.1677, "step": 1100 }, { "epoch": 0.35, "learning_rate": 7.582226222486193e-06, "loss": 1.3454, "step": 1101 }, { "epoch": 0.35, "learning_rate": 7.577844850452591e-06, "loss": 1.2434, "step": 1102 }, { "epoch": 0.35, "learning_rate": 7.573460780699122e-06, "loss": 1.2341, "step": 1103 }, { "epoch": 0.35, "learning_rate": 7.569074017813725e-06, "loss": 1.3944, "step": 1104 }, { "epoch": 0.35, "learning_rate": 7.564684566387159e-06, "loss": 0.8512, "step": 1105 }, { "epoch": 0.35, "learning_rate": 7.560292431012988e-06, "loss": 1.2153, "step": 1106 }, { "epoch": 0.35, "learning_rate": 7.555897616287594e-06, "loss": 1.4419, "step": 1107 }, { "epoch": 0.35, "learning_rate": 7.551500126810159e-06, "loss": 1.3453, "step": 1108 }, { "epoch": 0.35, "learning_rate": 7.547099967182665e-06, "loss": 1.0429, "step": 1109 }, { "epoch": 0.35, "learning_rate": 7.542697142009889e-06, "loss": 1.0607, "step": 1110 }, { "epoch": 0.35, "learning_rate": 7.538291655899394e-06, "loss": 0.9574, "step": 1111 }, { "epoch": 0.35, "learning_rate": 7.533883513461532e-06, "loss": 1.3013, "step": 1112 }, { "epoch": 0.35, "learning_rate": 7.529472719309432e-06, "loss": 0.8825, "step": 1113 }, { "epoch": 0.35, "learning_rate": 7.525059278058999e-06, "loss": 1.2702, "step": 1114 }, { "epoch": 0.35, "learning_rate": 7.520643194328909e-06, "loss": 1.3096, "step": 1115 }, { "epoch": 0.35, "learning_rate": 7.516224472740603e-06, "loss": 1.0633, "step": 1116 }, { "epoch": 0.35, "learning_rate": 7.5118031179182794e-06, "loss": 1.2488, "step": 1117 }, { "epoch": 0.35, "learning_rate": 7.507379134488898e-06, "loss": 1.196, "step": 1118 }, { "epoch": 0.35, "learning_rate": 7.502952527082166e-06, "loss": 1.3743, "step": 1119 }, { "epoch": 0.35, "learning_rate": 7.498523300330535e-06, "loss": 0.9808, "step": 1120 }, { "epoch": 0.35, "learning_rate": 7.4940914588692024e-06, "loss": 1.3021, "step": 1121 }, { "epoch": 0.35, "learning_rate": 7.489657007336098e-06, "loss": 1.2716, "step": 1122 }, { "epoch": 0.35, "learning_rate": 7.485219950371883e-06, "loss": 1.2222, "step": 1123 }, { "epoch": 0.35, "learning_rate": 7.480780292619949e-06, "loss": 1.1261, "step": 1124 }, { "epoch": 0.36, "learning_rate": 7.476338038726405e-06, "loss": 1.1952, "step": 1125 }, { "epoch": 0.36, "learning_rate": 7.471893193340081e-06, "loss": 1.4024, "step": 1126 }, { "epoch": 0.36, "learning_rate": 7.467445761112513e-06, "loss": 1.3599, "step": 1127 }, { "epoch": 0.36, "learning_rate": 7.462995746697951e-06, "loss": 1.4214, "step": 1128 }, { "epoch": 0.36, "learning_rate": 7.458543154753343e-06, "loss": 1.2028, "step": 1129 }, { "epoch": 0.36, "learning_rate": 7.454087989938335e-06, "loss": 1.4441, "step": 1130 }, { "epoch": 0.36, "learning_rate": 7.449630256915269e-06, "loss": 1.5727, "step": 1131 }, { "epoch": 0.36, "learning_rate": 7.445169960349167e-06, "loss": 1.7437, "step": 1132 }, { "epoch": 0.36, "learning_rate": 7.440707104907741e-06, "loss": 1.1295, "step": 1133 }, { "epoch": 0.36, "learning_rate": 7.4362416952613785e-06, "loss": 1.1225, "step": 1134 }, { "epoch": 0.36, "learning_rate": 7.43177373608314e-06, "loss": 1.5656, "step": 1135 }, { "epoch": 0.36, "learning_rate": 7.427303232048752e-06, "loss": 1.3214, "step": 1136 }, { "epoch": 0.36, "learning_rate": 7.422830187836606e-06, "loss": 1.3162, "step": 1137 }, { "epoch": 0.36, "learning_rate": 7.418354608127754e-06, "loss": 1.158, "step": 1138 }, { "epoch": 0.36, "learning_rate": 7.413876497605899e-06, "loss": 1.5441, "step": 1139 }, { "epoch": 0.36, "learning_rate": 7.4093958609573916e-06, "loss": 1.4048, "step": 1140 }, { "epoch": 0.36, "learning_rate": 7.4049127028712265e-06, "loss": 1.0678, "step": 1141 }, { "epoch": 0.36, "learning_rate": 7.400427028039039e-06, "loss": 1.0785, "step": 1142 }, { "epoch": 0.36, "learning_rate": 7.395938841155097e-06, "loss": 1.2715, "step": 1143 }, { "epoch": 0.36, "learning_rate": 7.391448146916298e-06, "loss": 1.284, "step": 1144 }, { "epoch": 0.36, "learning_rate": 7.386954950022159e-06, "loss": 1.3952, "step": 1145 }, { "epoch": 0.36, "learning_rate": 7.382459255174824e-06, "loss": 1.4593, "step": 1146 }, { "epoch": 0.36, "learning_rate": 7.377961067079043e-06, "loss": 1.3557, "step": 1147 }, { "epoch": 0.36, "learning_rate": 7.373460390442182e-06, "loss": 1.3152, "step": 1148 }, { "epoch": 0.36, "learning_rate": 7.368957229974209e-06, "loss": 1.249, "step": 1149 }, { "epoch": 0.36, "learning_rate": 7.3644515903876865e-06, "loss": 1.172, "step": 1150 }, { "epoch": 0.36, "learning_rate": 7.359943476397777e-06, "loss": 1.1469, "step": 1151 }, { "epoch": 0.36, "learning_rate": 7.355432892722232e-06, "loss": 1.4988, "step": 1152 }, { "epoch": 0.36, "learning_rate": 7.350919844081387e-06, "loss": 1.2337, "step": 1153 }, { "epoch": 0.36, "learning_rate": 7.346404335198156e-06, "loss": 1.2668, "step": 1154 }, { "epoch": 0.36, "learning_rate": 7.341886370798026e-06, "loss": 1.5358, "step": 1155 }, { "epoch": 0.36, "learning_rate": 7.337365955609058e-06, "loss": 1.2219, "step": 1156 }, { "epoch": 0.37, "learning_rate": 7.332843094361876e-06, "loss": 1.5932, "step": 1157 }, { "epoch": 0.37, "learning_rate": 7.328317791789663e-06, "loss": 1.1999, "step": 1158 }, { "epoch": 0.37, "learning_rate": 7.323790052628157e-06, "loss": 1.0699, "step": 1159 }, { "epoch": 0.37, "learning_rate": 7.3192598816156456e-06, "loss": 1.5599, "step": 1160 }, { "epoch": 0.37, "learning_rate": 7.314727283492964e-06, "loss": 1.7206, "step": 1161 }, { "epoch": 0.37, "learning_rate": 7.3101922630034825e-06, "loss": 1.8959, "step": 1162 }, { "epoch": 0.37, "learning_rate": 7.305654824893112e-06, "loss": 0.9824, "step": 1163 }, { "epoch": 0.37, "learning_rate": 7.30111497391029e-06, "loss": 1.215, "step": 1164 }, { "epoch": 0.37, "learning_rate": 7.296572714805979e-06, "loss": 1.1578, "step": 1165 }, { "epoch": 0.37, "learning_rate": 7.292028052333662e-06, "loss": 1.4665, "step": 1166 }, { "epoch": 0.37, "learning_rate": 7.287480991249337e-06, "loss": 1.2958, "step": 1167 }, { "epoch": 0.37, "learning_rate": 7.282931536311515e-06, "loss": 1.2545, "step": 1168 }, { "epoch": 0.37, "learning_rate": 7.278379692281209e-06, "loss": 1.1195, "step": 1169 }, { "epoch": 0.37, "learning_rate": 7.273825463921932e-06, "loss": 1.3824, "step": 1170 }, { "epoch": 0.37, "learning_rate": 7.269268855999694e-06, "loss": 1.345, "step": 1171 }, { "epoch": 0.37, "learning_rate": 7.264709873282994e-06, "loss": 1.3002, "step": 1172 }, { "epoch": 0.37, "learning_rate": 7.260148520542818e-06, "loss": 1.0398, "step": 1173 }, { "epoch": 0.37, "learning_rate": 7.255584802552629e-06, "loss": 1.3737, "step": 1174 }, { "epoch": 0.37, "learning_rate": 7.251018724088367e-06, "loss": 1.0654, "step": 1175 }, { "epoch": 0.37, "learning_rate": 7.246450289928445e-06, "loss": 1.7459, "step": 1176 }, { "epoch": 0.37, "learning_rate": 7.241879504853736e-06, "loss": 1.344, "step": 1177 }, { "epoch": 0.37, "learning_rate": 7.2373063736475765e-06, "loss": 1.1038, "step": 1178 }, { "epoch": 0.37, "learning_rate": 7.232730901095759e-06, "loss": 1.3498, "step": 1179 }, { "epoch": 0.37, "learning_rate": 7.228153091986523e-06, "loss": 1.0657, "step": 1180 }, { "epoch": 0.37, "learning_rate": 7.223572951110556e-06, "loss": 1.3118, "step": 1181 }, { "epoch": 0.37, "learning_rate": 7.2189904832609845e-06, "loss": 1.2404, "step": 1182 }, { "epoch": 0.37, "learning_rate": 7.21440569323337e-06, "loss": 1.7662, "step": 1183 }, { "epoch": 0.37, "learning_rate": 7.2098185858257055e-06, "loss": 1.2422, "step": 1184 }, { "epoch": 0.37, "learning_rate": 7.205229165838407e-06, "loss": 1.0525, "step": 1185 }, { "epoch": 0.37, "learning_rate": 7.200637438074312e-06, "loss": 1.2277, "step": 1186 }, { "epoch": 0.37, "learning_rate": 7.196043407338674e-06, "loss": 0.994, "step": 1187 }, { "epoch": 0.38, "learning_rate": 7.1914470784391554e-06, "loss": 1.0496, "step": 1188 }, { "epoch": 0.38, "learning_rate": 7.186848456185822e-06, "loss": 1.3563, "step": 1189 }, { "epoch": 0.38, "learning_rate": 7.182247545391143e-06, "loss": 1.1375, "step": 1190 }, { "epoch": 0.38, "learning_rate": 7.17764435086998e-06, "loss": 1.1226, "step": 1191 }, { "epoch": 0.38, "learning_rate": 7.1730388774395865e-06, "loss": 1.422, "step": 1192 }, { "epoch": 0.38, "learning_rate": 7.168431129919599e-06, "loss": 1.0466, "step": 1193 }, { "epoch": 0.38, "learning_rate": 7.163821113132034e-06, "loss": 1.2012, "step": 1194 }, { "epoch": 0.38, "learning_rate": 7.159208831901284e-06, "loss": 1.6263, "step": 1195 }, { "epoch": 0.38, "learning_rate": 7.154594291054111e-06, "loss": 1.1268, "step": 1196 }, { "epoch": 0.38, "learning_rate": 7.1499774954196425e-06, "loss": 1.1078, "step": 1197 }, { "epoch": 0.38, "learning_rate": 7.145358449829363e-06, "loss": 1.4369, "step": 1198 }, { "epoch": 0.38, "learning_rate": 7.1407371591171115e-06, "loss": 1.6077, "step": 1199 }, { "epoch": 0.38, "learning_rate": 7.13611362811908e-06, "loss": 1.7012, "step": 1200 }, { "epoch": 0.38, "learning_rate": 7.131487861673804e-06, "loss": 1.5958, "step": 1201 }, { "epoch": 0.38, "learning_rate": 7.1268598646221554e-06, "loss": 1.2357, "step": 1202 }, { "epoch": 0.38, "learning_rate": 7.122229641807343e-06, "loss": 1.3705, "step": 1203 }, { "epoch": 0.38, "learning_rate": 7.117597198074907e-06, "loss": 1.1067, "step": 1204 }, { "epoch": 0.38, "learning_rate": 7.112962538272705e-06, "loss": 1.2512, "step": 1205 }, { "epoch": 0.38, "learning_rate": 7.10832566725092e-06, "loss": 1.3684, "step": 1206 }, { "epoch": 0.38, "learning_rate": 7.103686589862048e-06, "loss": 1.3415, "step": 1207 }, { "epoch": 0.38, "learning_rate": 7.099045310960891e-06, "loss": 1.1419, "step": 1208 }, { "epoch": 0.38, "learning_rate": 7.094401835404558e-06, "loss": 1.1338, "step": 1209 }, { "epoch": 0.38, "learning_rate": 7.089756168052455e-06, "loss": 1.2972, "step": 1210 }, { "epoch": 0.38, "learning_rate": 7.085108313766281e-06, "loss": 1.1154, "step": 1211 }, { "epoch": 0.38, "learning_rate": 7.080458277410027e-06, "loss": 0.9941, "step": 1212 }, { "epoch": 0.38, "learning_rate": 7.075806063849966e-06, "loss": 0.8557, "step": 1213 }, { "epoch": 0.38, "learning_rate": 7.071151677954646e-06, "loss": 0.8786, "step": 1214 }, { "epoch": 0.38, "learning_rate": 7.0664951245948955e-06, "loss": 0.8439, "step": 1215 }, { "epoch": 0.38, "learning_rate": 7.061836408643803e-06, "loss": 1.4392, "step": 1216 }, { "epoch": 0.38, "learning_rate": 7.057175534976726e-06, "loss": 1.1409, "step": 1217 }, { "epoch": 0.38, "learning_rate": 7.052512508471276e-06, "loss": 1.1115, "step": 1218 }, { "epoch": 0.38, "learning_rate": 7.047847334007324e-06, "loss": 1.1761, "step": 1219 }, { "epoch": 0.39, "learning_rate": 7.043180016466982e-06, "loss": 0.9638, "step": 1220 }, { "epoch": 0.39, "learning_rate": 7.038510560734609e-06, "loss": 0.9455, "step": 1221 }, { "epoch": 0.39, "learning_rate": 7.033838971696796e-06, "loss": 1.028, "step": 1222 }, { "epoch": 0.39, "learning_rate": 7.0291652542423725e-06, "loss": 1.5057, "step": 1223 }, { "epoch": 0.39, "learning_rate": 7.0244894132623946e-06, "loss": 1.3619, "step": 1224 }, { "epoch": 0.39, "learning_rate": 7.0198114536501385e-06, "loss": 1.4313, "step": 1225 }, { "epoch": 0.39, "learning_rate": 7.015131380301099e-06, "loss": 1.2701, "step": 1226 }, { "epoch": 0.39, "learning_rate": 7.0104491981129804e-06, "loss": 1.2694, "step": 1227 }, { "epoch": 0.39, "learning_rate": 7.0057649119856975e-06, "loss": 1.2121, "step": 1228 }, { "epoch": 0.39, "learning_rate": 7.001078526821365e-06, "loss": 1.2022, "step": 1229 }, { "epoch": 0.39, "learning_rate": 6.996390047524295e-06, "loss": 0.8189, "step": 1230 }, { "epoch": 0.39, "learning_rate": 6.991699479000989e-06, "loss": 1.2254, "step": 1231 }, { "epoch": 0.39, "learning_rate": 6.9870068261601385e-06, "loss": 1.234, "step": 1232 }, { "epoch": 0.39, "learning_rate": 6.982312093912612e-06, "loss": 1.205, "step": 1233 }, { "epoch": 0.39, "learning_rate": 6.977615287171457e-06, "loss": 1.3607, "step": 1234 }, { "epoch": 0.39, "learning_rate": 6.972916410851895e-06, "loss": 1.3562, "step": 1235 }, { "epoch": 0.39, "learning_rate": 6.9682154698713036e-06, "loss": 1.0245, "step": 1236 }, { "epoch": 0.39, "learning_rate": 6.963512469149229e-06, "loss": 1.3329, "step": 1237 }, { "epoch": 0.39, "learning_rate": 6.958807413607373e-06, "loss": 1.4099, "step": 1238 }, { "epoch": 0.39, "learning_rate": 6.954100308169587e-06, "loss": 1.4027, "step": 1239 }, { "epoch": 0.39, "learning_rate": 6.949391157761861e-06, "loss": 1.0285, "step": 1240 }, { "epoch": 0.39, "learning_rate": 6.944679967312334e-06, "loss": 1.2659, "step": 1241 }, { "epoch": 0.39, "learning_rate": 6.939966741751276e-06, "loss": 1.4628, "step": 1242 }, { "epoch": 0.39, "learning_rate": 6.9352514860110876e-06, "loss": 1.7882, "step": 1243 }, { "epoch": 0.39, "learning_rate": 6.930534205026291e-06, "loss": 1.5241, "step": 1244 }, { "epoch": 0.39, "learning_rate": 6.925814903733533e-06, "loss": 1.2188, "step": 1245 }, { "epoch": 0.39, "learning_rate": 6.921093587071571e-06, "loss": 1.6094, "step": 1246 }, { "epoch": 0.39, "learning_rate": 6.9163702599812735e-06, "loss": 1.4496, "step": 1247 }, { "epoch": 0.39, "learning_rate": 6.911644927405611e-06, "loss": 1.3882, "step": 1248 }, { "epoch": 0.39, "learning_rate": 6.906917594289653e-06, "loss": 0.9768, "step": 1249 }, { "epoch": 0.39, "learning_rate": 6.902188265580563e-06, "loss": 1.3009, "step": 1250 }, { "epoch": 0.39, "learning_rate": 6.897456946227594e-06, "loss": 1.3315, "step": 1251 }, { "epoch": 0.4, "learning_rate": 6.892723641182082e-06, "loss": 1.349, "step": 1252 }, { "epoch": 0.4, "learning_rate": 6.887988355397439e-06, "loss": 1.1741, "step": 1253 }, { "epoch": 0.4, "learning_rate": 6.883251093829153e-06, "loss": 1.5384, "step": 1254 }, { "epoch": 0.4, "learning_rate": 6.878511861434775e-06, "loss": 1.4665, "step": 1255 }, { "epoch": 0.4, "learning_rate": 6.873770663173923e-06, "loss": 1.8337, "step": 1256 }, { "epoch": 0.4, "learning_rate": 6.869027504008271e-06, "loss": 1.2314, "step": 1257 }, { "epoch": 0.4, "learning_rate": 6.864282388901544e-06, "loss": 1.0066, "step": 1258 }, { "epoch": 0.4, "learning_rate": 6.859535322819515e-06, "loss": 1.516, "step": 1259 }, { "epoch": 0.4, "learning_rate": 6.854786310729997e-06, "loss": 1.428, "step": 1260 }, { "epoch": 0.4, "learning_rate": 6.850035357602843e-06, "loss": 1.0436, "step": 1261 }, { "epoch": 0.4, "learning_rate": 6.8452824684099325e-06, "loss": 0.9877, "step": 1262 }, { "epoch": 0.4, "learning_rate": 6.840527648125176e-06, "loss": 1.3024, "step": 1263 }, { "epoch": 0.4, "learning_rate": 6.835770901724501e-06, "loss": 1.115, "step": 1264 }, { "epoch": 0.4, "learning_rate": 6.831012234185854e-06, "loss": 1.5939, "step": 1265 }, { "epoch": 0.4, "learning_rate": 6.826251650489189e-06, "loss": 1.1392, "step": 1266 }, { "epoch": 0.4, "learning_rate": 6.821489155616467e-06, "loss": 1.4116, "step": 1267 }, { "epoch": 0.4, "learning_rate": 6.816724754551647e-06, "loss": 1.1516, "step": 1268 }, { "epoch": 0.4, "learning_rate": 6.8119584522806845e-06, "loss": 1.1456, "step": 1269 }, { "epoch": 0.4, "learning_rate": 6.807190253791529e-06, "loss": 1.5794, "step": 1270 }, { "epoch": 0.4, "learning_rate": 6.802420164074104e-06, "loss": 1.3512, "step": 1271 }, { "epoch": 0.4, "learning_rate": 6.797648188120321e-06, "loss": 1.6698, "step": 1272 }, { "epoch": 0.4, "learning_rate": 6.792874330924061e-06, "loss": 1.2183, "step": 1273 }, { "epoch": 0.4, "learning_rate": 6.7880985974811776e-06, "loss": 1.679, "step": 1274 }, { "epoch": 0.4, "learning_rate": 6.783320992789484e-06, "loss": 1.252, "step": 1275 }, { "epoch": 0.4, "learning_rate": 6.778541521848754e-06, "loss": 1.1311, "step": 1276 }, { "epoch": 0.4, "learning_rate": 6.773760189660712e-06, "loss": 1.3132, "step": 1277 }, { "epoch": 0.4, "learning_rate": 6.7689770012290324e-06, "loss": 1.8635, "step": 1278 }, { "epoch": 0.4, "learning_rate": 6.764191961559332e-06, "loss": 1.105, "step": 1279 }, { "epoch": 0.4, "learning_rate": 6.759405075659165e-06, "loss": 1.3943, "step": 1280 }, { "epoch": 0.4, "learning_rate": 6.754616348538019e-06, "loss": 1.0336, "step": 1281 }, { "epoch": 0.4, "learning_rate": 6.749825785207302e-06, "loss": 1.205, "step": 1282 }, { "epoch": 0.41, "learning_rate": 6.745033390680352e-06, "loss": 1.1346, "step": 1283 }, { "epoch": 0.41, "learning_rate": 6.740239169972418e-06, "loss": 1.3702, "step": 1284 }, { "epoch": 0.41, "learning_rate": 6.73544312810066e-06, "loss": 1.3034, "step": 1285 }, { "epoch": 0.41, "learning_rate": 6.73064527008415e-06, "loss": 1.3648, "step": 1286 }, { "epoch": 0.41, "learning_rate": 6.725845600943854e-06, "loss": 1.6119, "step": 1287 }, { "epoch": 0.41, "learning_rate": 6.721044125702634e-06, "loss": 1.1142, "step": 1288 }, { "epoch": 0.41, "learning_rate": 6.7162408493852446e-06, "loss": 1.21, "step": 1289 }, { "epoch": 0.41, "learning_rate": 6.711435777018325e-06, "loss": 1.346, "step": 1290 }, { "epoch": 0.41, "learning_rate": 6.706628913630391e-06, "loss": 1.3947, "step": 1291 }, { "epoch": 0.41, "learning_rate": 6.70182026425184e-06, "loss": 1.2201, "step": 1292 }, { "epoch": 0.41, "learning_rate": 6.697009833914926e-06, "loss": 1.1434, "step": 1293 }, { "epoch": 0.41, "learning_rate": 6.692197627653779e-06, "loss": 0.9151, "step": 1294 }, { "epoch": 0.41, "learning_rate": 6.687383650504382e-06, "loss": 1.5171, "step": 1295 }, { "epoch": 0.41, "learning_rate": 6.6825679075045715e-06, "loss": 1.0201, "step": 1296 }, { "epoch": 0.41, "learning_rate": 6.677750403694031e-06, "loss": 1.3174, "step": 1297 }, { "epoch": 0.41, "learning_rate": 6.6729311441142906e-06, "loss": 1.4508, "step": 1298 }, { "epoch": 0.41, "learning_rate": 6.668110133808714e-06, "loss": 1.1166, "step": 1299 }, { "epoch": 0.41, "learning_rate": 6.663287377822496e-06, "loss": 1.2301, "step": 1300 }, { "epoch": 0.41, "learning_rate": 6.658462881202664e-06, "loss": 1.575, "step": 1301 }, { "epoch": 0.41, "learning_rate": 6.6536366489980615e-06, "loss": 1.5923, "step": 1302 }, { "epoch": 0.41, "learning_rate": 6.648808686259352e-06, "loss": 1.1223, "step": 1303 }, { "epoch": 0.41, "learning_rate": 6.643978998039007e-06, "loss": 1.1523, "step": 1304 }, { "epoch": 0.41, "learning_rate": 6.639147589391305e-06, "loss": 1.5004, "step": 1305 }, { "epoch": 0.41, "learning_rate": 6.634314465372325e-06, "loss": 1.4808, "step": 1306 }, { "epoch": 0.41, "learning_rate": 6.629479631039941e-06, "loss": 1.2885, "step": 1307 }, { "epoch": 0.41, "learning_rate": 6.624643091453817e-06, "loss": 1.2985, "step": 1308 }, { "epoch": 0.41, "learning_rate": 6.619804851675403e-06, "loss": 1.4676, "step": 1309 }, { "epoch": 0.41, "learning_rate": 6.614964916767924e-06, "loss": 1.5712, "step": 1310 }, { "epoch": 0.41, "learning_rate": 6.610123291796384e-06, "loss": 1.1557, "step": 1311 }, { "epoch": 0.41, "learning_rate": 6.605279981827551e-06, "loss": 1.5325, "step": 1312 }, { "epoch": 0.41, "learning_rate": 6.600434991929962e-06, "loss": 1.3264, "step": 1313 }, { "epoch": 0.41, "learning_rate": 6.595588327173907e-06, "loss": 1.1372, "step": 1314 }, { "epoch": 0.42, "learning_rate": 6.59073999263143e-06, "loss": 1.2892, "step": 1315 }, { "epoch": 0.42, "learning_rate": 6.585889993376322e-06, "loss": 1.6304, "step": 1316 }, { "epoch": 0.42, "learning_rate": 6.58103833448412e-06, "loss": 1.1044, "step": 1317 }, { "epoch": 0.42, "learning_rate": 6.576185021032093e-06, "loss": 1.5182, "step": 1318 }, { "epoch": 0.42, "learning_rate": 6.571330058099243e-06, "loss": 1.7091, "step": 1319 }, { "epoch": 0.42, "learning_rate": 6.566473450766301e-06, "loss": 1.2624, "step": 1320 }, { "epoch": 0.42, "learning_rate": 6.561615204115714e-06, "loss": 1.3108, "step": 1321 }, { "epoch": 0.42, "learning_rate": 6.556755323231646e-06, "loss": 1.4568, "step": 1322 }, { "epoch": 0.42, "learning_rate": 6.551893813199975e-06, "loss": 1.3282, "step": 1323 }, { "epoch": 0.42, "learning_rate": 6.547030679108281e-06, "loss": 1.34, "step": 1324 }, { "epoch": 0.42, "learning_rate": 6.5421659260458415e-06, "loss": 1.4971, "step": 1325 }, { "epoch": 0.42, "learning_rate": 6.537299559103632e-06, "loss": 1.5678, "step": 1326 }, { "epoch": 0.42, "learning_rate": 6.532431583374314e-06, "loss": 1.5129, "step": 1327 }, { "epoch": 0.42, "learning_rate": 6.527562003952236e-06, "loss": 1.0107, "step": 1328 }, { "epoch": 0.42, "learning_rate": 6.522690825933422e-06, "loss": 1.4173, "step": 1329 }, { "epoch": 0.42, "learning_rate": 6.517818054415569e-06, "loss": 1.4142, "step": 1330 }, { "epoch": 0.42, "learning_rate": 6.512943694498043e-06, "loss": 1.3022, "step": 1331 }, { "epoch": 0.42, "learning_rate": 6.5080677512818706e-06, "loss": 1.3486, "step": 1332 }, { "epoch": 0.42, "learning_rate": 6.503190229869737e-06, "loss": 1.0588, "step": 1333 }, { "epoch": 0.42, "learning_rate": 6.498311135365979e-06, "loss": 1.3442, "step": 1334 }, { "epoch": 0.42, "learning_rate": 6.4934304728765766e-06, "loss": 1.3034, "step": 1335 }, { "epoch": 0.42, "learning_rate": 6.488548247509156e-06, "loss": 1.5053, "step": 1336 }, { "epoch": 0.42, "learning_rate": 6.483664464372973e-06, "loss": 1.3669, "step": 1337 }, { "epoch": 0.42, "learning_rate": 6.478779128578919e-06, "loss": 1.4864, "step": 1338 }, { "epoch": 0.42, "learning_rate": 6.473892245239506e-06, "loss": 1.0252, "step": 1339 }, { "epoch": 0.42, "learning_rate": 6.469003819468867e-06, "loss": 1.3773, "step": 1340 }, { "epoch": 0.42, "learning_rate": 6.464113856382752e-06, "loss": 1.7232, "step": 1341 }, { "epoch": 0.42, "learning_rate": 6.459222361098516e-06, "loss": 1.4334, "step": 1342 }, { "epoch": 0.42, "learning_rate": 6.454329338735119e-06, "loss": 1.4965, "step": 1343 }, { "epoch": 0.42, "learning_rate": 6.4494347944131185e-06, "loss": 1.4829, "step": 1344 }, { "epoch": 0.42, "learning_rate": 6.4445387332546635e-06, "loss": 1.1056, "step": 1345 }, { "epoch": 0.42, "learning_rate": 6.439641160383494e-06, "loss": 1.2063, "step": 1346 }, { "epoch": 0.43, "learning_rate": 6.434742080924929e-06, "loss": 1.314, "step": 1347 }, { "epoch": 0.43, "learning_rate": 6.429841500005867e-06, "loss": 1.1591, "step": 1348 }, { "epoch": 0.43, "learning_rate": 6.424939422754774e-06, "loss": 1.0826, "step": 1349 }, { "epoch": 0.43, "learning_rate": 6.420035854301684e-06, "loss": 1.4405, "step": 1350 }, { "epoch": 0.43, "learning_rate": 6.4151307997781906e-06, "loss": 0.9949, "step": 1351 }, { "epoch": 0.43, "learning_rate": 6.410224264317444e-06, "loss": 1.1201, "step": 1352 }, { "epoch": 0.43, "learning_rate": 6.405316253054146e-06, "loss": 1.3291, "step": 1353 }, { "epoch": 0.43, "learning_rate": 6.4004067711245366e-06, "loss": 1.5491, "step": 1354 }, { "epoch": 0.43, "learning_rate": 6.395495823666399e-06, "loss": 1.8679, "step": 1355 }, { "epoch": 0.43, "learning_rate": 6.390583415819052e-06, "loss": 1.19, "step": 1356 }, { "epoch": 0.43, "learning_rate": 6.385669552723338e-06, "loss": 0.9439, "step": 1357 }, { "epoch": 0.43, "learning_rate": 6.380754239521626e-06, "loss": 0.9192, "step": 1358 }, { "epoch": 0.43, "learning_rate": 6.375837481357803e-06, "loss": 1.0997, "step": 1359 }, { "epoch": 0.43, "learning_rate": 6.3709192833772626e-06, "loss": 1.5597, "step": 1360 }, { "epoch": 0.43, "learning_rate": 6.365999650726911e-06, "loss": 1.0669, "step": 1361 }, { "epoch": 0.43, "learning_rate": 6.361078588555154e-06, "loss": 1.4105, "step": 1362 }, { "epoch": 0.43, "learning_rate": 6.356156102011892e-06, "loss": 1.3653, "step": 1363 }, { "epoch": 0.43, "learning_rate": 6.351232196248519e-06, "loss": 1.0192, "step": 1364 }, { "epoch": 0.43, "learning_rate": 6.3463068764179105e-06, "loss": 1.808, "step": 1365 }, { "epoch": 0.43, "learning_rate": 6.341380147674424e-06, "loss": 1.4879, "step": 1366 }, { "epoch": 0.43, "learning_rate": 6.336452015173892e-06, "loss": 1.2105, "step": 1367 }, { "epoch": 0.43, "learning_rate": 6.331522484073615e-06, "loss": 1.1491, "step": 1368 }, { "epoch": 0.43, "learning_rate": 6.326591559532356e-06, "loss": 1.0165, "step": 1369 }, { "epoch": 0.43, "learning_rate": 6.32165924671034e-06, "loss": 1.1037, "step": 1370 }, { "epoch": 0.43, "learning_rate": 6.31672555076924e-06, "loss": 1.1809, "step": 1371 }, { "epoch": 0.43, "learning_rate": 6.31179047687218e-06, "loss": 1.6316, "step": 1372 }, { "epoch": 0.43, "learning_rate": 6.306854030183723e-06, "loss": 1.2034, "step": 1373 }, { "epoch": 0.43, "learning_rate": 6.301916215869875e-06, "loss": 1.4062, "step": 1374 }, { "epoch": 0.43, "learning_rate": 6.296977039098065e-06, "loss": 1.1801, "step": 1375 }, { "epoch": 0.43, "learning_rate": 6.29203650503715e-06, "loss": 1.476, "step": 1376 }, { "epoch": 0.43, "learning_rate": 6.2870946188574126e-06, "loss": 1.4176, "step": 1377 }, { "epoch": 0.44, "learning_rate": 6.282151385730545e-06, "loss": 1.0918, "step": 1378 }, { "epoch": 0.44, "learning_rate": 6.2772068108296516e-06, "loss": 1.2566, "step": 1379 }, { "epoch": 0.44, "learning_rate": 6.272260899329239e-06, "loss": 1.2785, "step": 1380 }, { "epoch": 0.44, "learning_rate": 6.267313656405214e-06, "loss": 1.0595, "step": 1381 }, { "epoch": 0.44, "learning_rate": 6.262365087234878e-06, "loss": 1.0538, "step": 1382 }, { "epoch": 0.44, "learning_rate": 6.257415196996915e-06, "loss": 1.2234, "step": 1383 }, { "epoch": 0.44, "learning_rate": 6.252463990871398e-06, "loss": 1.1039, "step": 1384 }, { "epoch": 0.44, "learning_rate": 6.247511474039772e-06, "loss": 1.1476, "step": 1385 }, { "epoch": 0.44, "learning_rate": 6.242557651684861e-06, "loss": 1.1879, "step": 1386 }, { "epoch": 0.44, "learning_rate": 6.237602528990844e-06, "loss": 1.1008, "step": 1387 }, { "epoch": 0.44, "learning_rate": 6.23264611114327e-06, "loss": 1.3229, "step": 1388 }, { "epoch": 0.44, "learning_rate": 6.227688403329039e-06, "loss": 1.0469, "step": 1389 }, { "epoch": 0.44, "learning_rate": 6.222729410736405e-06, "loss": 1.2614, "step": 1390 }, { "epoch": 0.44, "learning_rate": 6.2177691385549595e-06, "loss": 1.6941, "step": 1391 }, { "epoch": 0.44, "learning_rate": 6.212807591975642e-06, "loss": 1.4509, "step": 1392 }, { "epoch": 0.44, "learning_rate": 6.207844776190719e-06, "loss": 1.2454, "step": 1393 }, { "epoch": 0.44, "learning_rate": 6.202880696393787e-06, "loss": 1.7576, "step": 1394 }, { "epoch": 0.44, "learning_rate": 6.197915357779764e-06, "loss": 1.4981, "step": 1395 }, { "epoch": 0.44, "learning_rate": 6.192948765544891e-06, "loss": 1.0968, "step": 1396 }, { "epoch": 0.44, "learning_rate": 6.187980924886714e-06, "loss": 1.2406, "step": 1397 }, { "epoch": 0.44, "learning_rate": 6.183011841004089e-06, "loss": 1.2792, "step": 1398 }, { "epoch": 0.44, "learning_rate": 6.178041519097171e-06, "loss": 1.0237, "step": 1399 }, { "epoch": 0.44, "learning_rate": 6.173069964367414e-06, "loss": 1.3444, "step": 1400 }, { "epoch": 0.44, "learning_rate": 6.1680971820175594e-06, "loss": 1.4464, "step": 1401 }, { "epoch": 0.44, "learning_rate": 6.163123177251634e-06, "loss": 1.2807, "step": 1402 }, { "epoch": 0.44, "learning_rate": 6.158147955274944e-06, "loss": 1.1411, "step": 1403 }, { "epoch": 0.44, "learning_rate": 6.1531715212940705e-06, "loss": 1.0212, "step": 1404 }, { "epoch": 0.44, "learning_rate": 6.1481938805168575e-06, "loss": 1.6647, "step": 1405 }, { "epoch": 0.44, "learning_rate": 6.143215038152421e-06, "loss": 1.358, "step": 1406 }, { "epoch": 0.44, "learning_rate": 6.1382349994111265e-06, "loss": 1.1404, "step": 1407 }, { "epoch": 0.44, "learning_rate": 6.133253769504597e-06, "loss": 1.279, "step": 1408 }, { "epoch": 0.44, "learning_rate": 6.128271353645698e-06, "loss": 1.3439, "step": 1409 }, { "epoch": 0.45, "learning_rate": 6.123287757048538e-06, "loss": 1.1206, "step": 1410 }, { "epoch": 0.45, "learning_rate": 6.118302984928462e-06, "loss": 1.2248, "step": 1411 }, { "epoch": 0.45, "learning_rate": 6.1133170425020425e-06, "loss": 1.0412, "step": 1412 }, { "epoch": 0.45, "learning_rate": 6.10832993498708e-06, "loss": 1.1767, "step": 1413 }, { "epoch": 0.45, "learning_rate": 6.103341667602592e-06, "loss": 1.2122, "step": 1414 }, { "epoch": 0.45, "learning_rate": 6.098352245568811e-06, "loss": 1.2052, "step": 1415 }, { "epoch": 0.45, "learning_rate": 6.0933616741071756e-06, "loss": 1.0663, "step": 1416 }, { "epoch": 0.45, "learning_rate": 6.08836995844033e-06, "loss": 1.1478, "step": 1417 }, { "epoch": 0.45, "learning_rate": 6.083377103792114e-06, "loss": 1.1638, "step": 1418 }, { "epoch": 0.45, "learning_rate": 6.078383115387564e-06, "loss": 1.5171, "step": 1419 }, { "epoch": 0.45, "learning_rate": 6.073387998452893e-06, "loss": 1.679, "step": 1420 }, { "epoch": 0.45, "learning_rate": 6.068391758215505e-06, "loss": 1.5834, "step": 1421 }, { "epoch": 0.45, "learning_rate": 6.063394399903974e-06, "loss": 1.3824, "step": 1422 }, { "epoch": 0.45, "learning_rate": 6.058395928748047e-06, "loss": 1.6578, "step": 1423 }, { "epoch": 0.45, "learning_rate": 6.053396349978632e-06, "loss": 1.5284, "step": 1424 }, { "epoch": 0.45, "learning_rate": 6.0483956688278e-06, "loss": 1.4592, "step": 1425 }, { "epoch": 0.45, "learning_rate": 6.043393890528774e-06, "loss": 1.3385, "step": 1426 }, { "epoch": 0.45, "learning_rate": 6.038391020315924e-06, "loss": 1.2565, "step": 1427 }, { "epoch": 0.45, "learning_rate": 6.033387063424765e-06, "loss": 1.3018, "step": 1428 }, { "epoch": 0.45, "learning_rate": 6.028382025091949e-06, "loss": 1.1305, "step": 1429 }, { "epoch": 0.45, "learning_rate": 6.023375910555256e-06, "loss": 1.6388, "step": 1430 }, { "epoch": 0.45, "learning_rate": 6.018368725053595e-06, "loss": 1.0227, "step": 1431 }, { "epoch": 0.45, "learning_rate": 6.013360473826999e-06, "loss": 1.2433, "step": 1432 }, { "epoch": 0.45, "learning_rate": 6.0083511621166115e-06, "loss": 1.4821, "step": 1433 }, { "epoch": 0.45, "learning_rate": 6.003340795164685e-06, "loss": 1.4368, "step": 1434 }, { "epoch": 0.45, "learning_rate": 5.9983293782145825e-06, "loss": 1.1806, "step": 1435 }, { "epoch": 0.45, "learning_rate": 5.99331691651076e-06, "loss": 1.1984, "step": 1436 }, { "epoch": 0.45, "learning_rate": 5.988303415298768e-06, "loss": 1.2458, "step": 1437 }, { "epoch": 0.45, "learning_rate": 5.983288879825249e-06, "loss": 0.926, "step": 1438 }, { "epoch": 0.45, "learning_rate": 5.97827331533792e-06, "loss": 1.451, "step": 1439 }, { "epoch": 0.45, "learning_rate": 5.973256727085584e-06, "loss": 1.1584, "step": 1440 }, { "epoch": 0.45, "learning_rate": 5.968239120318105e-06, "loss": 1.3234, "step": 1441 }, { "epoch": 0.46, "learning_rate": 5.963220500286426e-06, "loss": 1.4154, "step": 1442 }, { "epoch": 0.46, "learning_rate": 5.9582008722425375e-06, "loss": 1.1951, "step": 1443 }, { "epoch": 0.46, "learning_rate": 5.953180241439492e-06, "loss": 1.1867, "step": 1444 }, { "epoch": 0.46, "learning_rate": 5.948158613131391e-06, "loss": 1.5974, "step": 1445 }, { "epoch": 0.46, "learning_rate": 5.943135992573377e-06, "loss": 1.1011, "step": 1446 }, { "epoch": 0.46, "learning_rate": 5.9381123850216335e-06, "loss": 1.1746, "step": 1447 }, { "epoch": 0.46, "learning_rate": 5.933087795733376e-06, "loss": 1.1545, "step": 1448 }, { "epoch": 0.46, "learning_rate": 5.928062229966847e-06, "loss": 1.101, "step": 1449 }, { "epoch": 0.46, "learning_rate": 5.9230356929813116e-06, "loss": 1.1145, "step": 1450 }, { "epoch": 0.46, "learning_rate": 5.91800819003705e-06, "loss": 1.3089, "step": 1451 }, { "epoch": 0.46, "learning_rate": 5.912979726395357e-06, "loss": 1.5455, "step": 1452 }, { "epoch": 0.46, "learning_rate": 5.907950307318526e-06, "loss": 1.0597, "step": 1453 }, { "epoch": 0.46, "learning_rate": 5.902919938069857e-06, "loss": 1.5131, "step": 1454 }, { "epoch": 0.46, "learning_rate": 5.897888623913641e-06, "loss": 1.0892, "step": 1455 }, { "epoch": 0.46, "learning_rate": 5.892856370115159e-06, "loss": 1.0027, "step": 1456 }, { "epoch": 0.46, "learning_rate": 5.887823181940673e-06, "loss": 1.3567, "step": 1457 }, { "epoch": 0.46, "learning_rate": 5.882789064657427e-06, "loss": 1.4318, "step": 1458 }, { "epoch": 0.46, "learning_rate": 5.877754023533634e-06, "loss": 1.0102, "step": 1459 }, { "epoch": 0.46, "learning_rate": 5.872718063838474e-06, "loss": 1.2899, "step": 1460 }, { "epoch": 0.46, "learning_rate": 5.867681190842089e-06, "loss": 1.1747, "step": 1461 }, { "epoch": 0.46, "learning_rate": 5.862643409815577e-06, "loss": 0.9992, "step": 1462 }, { "epoch": 0.46, "learning_rate": 5.857604726030988e-06, "loss": 0.9406, "step": 1463 }, { "epoch": 0.46, "learning_rate": 5.852565144761313e-06, "loss": 1.1714, "step": 1464 }, { "epoch": 0.46, "learning_rate": 5.8475246712804845e-06, "loss": 1.2739, "step": 1465 }, { "epoch": 0.46, "learning_rate": 5.842483310863367e-06, "loss": 1.5551, "step": 1466 }, { "epoch": 0.46, "learning_rate": 5.837441068785757e-06, "loss": 1.1571, "step": 1467 }, { "epoch": 0.46, "learning_rate": 5.832397950324368e-06, "loss": 1.4672, "step": 1468 }, { "epoch": 0.46, "learning_rate": 5.827353960756836e-06, "loss": 1.3617, "step": 1469 }, { "epoch": 0.46, "learning_rate": 5.8223091053617055e-06, "loss": 1.3219, "step": 1470 }, { "epoch": 0.46, "learning_rate": 5.817263389418427e-06, "loss": 0.9431, "step": 1471 }, { "epoch": 0.46, "learning_rate": 5.8122168182073535e-06, "loss": 1.6984, "step": 1472 }, { "epoch": 0.47, "learning_rate": 5.807169397009733e-06, "loss": 1.4235, "step": 1473 }, { "epoch": 0.47, "learning_rate": 5.8021211311077e-06, "loss": 1.0216, "step": 1474 }, { "epoch": 0.47, "learning_rate": 5.7970720257842775e-06, "loss": 1.2339, "step": 1475 }, { "epoch": 0.47, "learning_rate": 5.792022086323361e-06, "loss": 1.1873, "step": 1476 }, { "epoch": 0.47, "learning_rate": 5.786971318009725e-06, "loss": 1.2571, "step": 1477 }, { "epoch": 0.47, "learning_rate": 5.7819197261290085e-06, "loss": 1.473, "step": 1478 }, { "epoch": 0.47, "learning_rate": 5.776867315967711e-06, "loss": 1.5893, "step": 1479 }, { "epoch": 0.47, "learning_rate": 5.7718140928131925e-06, "loss": 1.3979, "step": 1480 }, { "epoch": 0.47, "learning_rate": 5.76676006195366e-06, "loss": 0.8098, "step": 1481 }, { "epoch": 0.47, "learning_rate": 5.761705228678166e-06, "loss": 1.272, "step": 1482 }, { "epoch": 0.47, "learning_rate": 5.756649598276606e-06, "loss": 1.3856, "step": 1483 }, { "epoch": 0.47, "learning_rate": 5.751593176039705e-06, "loss": 1.5671, "step": 1484 }, { "epoch": 0.47, "learning_rate": 5.746535967259021e-06, "loss": 1.063, "step": 1485 }, { "epoch": 0.47, "learning_rate": 5.741477977226934e-06, "loss": 1.297, "step": 1486 }, { "epoch": 0.47, "learning_rate": 5.736419211236638e-06, "loss": 1.1043, "step": 1487 }, { "epoch": 0.47, "learning_rate": 5.731359674582143e-06, "loss": 1.1393, "step": 1488 }, { "epoch": 0.47, "learning_rate": 5.726299372558266e-06, "loss": 1.1642, "step": 1489 }, { "epoch": 0.47, "learning_rate": 5.7212383104606206e-06, "loss": 1.6094, "step": 1490 }, { "epoch": 0.47, "learning_rate": 5.716176493585622e-06, "loss": 1.3658, "step": 1491 }, { "epoch": 0.47, "learning_rate": 5.711113927230468e-06, "loss": 1.2604, "step": 1492 }, { "epoch": 0.47, "learning_rate": 5.706050616693148e-06, "loss": 1.8983, "step": 1493 }, { "epoch": 0.47, "learning_rate": 5.700986567272424e-06, "loss": 0.8591, "step": 1494 }, { "epoch": 0.47, "learning_rate": 5.695921784267836e-06, "loss": 1.3428, "step": 1495 }, { "epoch": 0.47, "learning_rate": 5.690856272979688e-06, "loss": 1.3506, "step": 1496 }, { "epoch": 0.47, "learning_rate": 5.68579003870905e-06, "loss": 1.1548, "step": 1497 }, { "epoch": 0.47, "learning_rate": 5.6807230867577444e-06, "loss": 1.199, "step": 1498 }, { "epoch": 0.47, "learning_rate": 5.675655422428346e-06, "loss": 1.5007, "step": 1499 }, { "epoch": 0.47, "learning_rate": 5.670587051024179e-06, "loss": 1.2224, "step": 1500 }, { "epoch": 0.47, "learning_rate": 5.665517977849303e-06, "loss": 1.2555, "step": 1501 }, { "epoch": 0.47, "learning_rate": 5.660448208208513e-06, "loss": 1.592, "step": 1502 }, { "epoch": 0.47, "learning_rate": 5.655377747407331e-06, "loss": 1.2267, "step": 1503 }, { "epoch": 0.47, "learning_rate": 5.650306600752009e-06, "loss": 1.5012, "step": 1504 }, { "epoch": 0.48, "learning_rate": 5.645234773549508e-06, "loss": 1.3715, "step": 1505 }, { "epoch": 0.48, "learning_rate": 5.640162271107509e-06, "loss": 1.7403, "step": 1506 }, { "epoch": 0.48, "learning_rate": 5.635089098734394e-06, "loss": 1.1928, "step": 1507 }, { "epoch": 0.48, "learning_rate": 5.630015261739249e-06, "loss": 1.2158, "step": 1508 }, { "epoch": 0.48, "learning_rate": 5.624940765431854e-06, "loss": 1.6485, "step": 1509 }, { "epoch": 0.48, "learning_rate": 5.6198656151226805e-06, "loss": 1.1834, "step": 1510 }, { "epoch": 0.48, "learning_rate": 5.614789816122882e-06, "loss": 1.2665, "step": 1511 }, { "epoch": 0.48, "learning_rate": 5.609713373744295e-06, "loss": 1.3218, "step": 1512 }, { "epoch": 0.48, "learning_rate": 5.604636293299426e-06, "loss": 1.2883, "step": 1513 }, { "epoch": 0.48, "learning_rate": 5.599558580101447e-06, "loss": 1.3307, "step": 1514 }, { "epoch": 0.48, "learning_rate": 5.5944802394642e-06, "loss": 1.4174, "step": 1515 }, { "epoch": 0.48, "learning_rate": 5.5894012767021736e-06, "loss": 1.1703, "step": 1516 }, { "epoch": 0.48, "learning_rate": 5.584321697130517e-06, "loss": 1.2699, "step": 1517 }, { "epoch": 0.48, "learning_rate": 5.5792415060650186e-06, "loss": 1.3823, "step": 1518 }, { "epoch": 0.48, "learning_rate": 5.574160708822109e-06, "loss": 1.3291, "step": 1519 }, { "epoch": 0.48, "learning_rate": 5.5690793107188534e-06, "loss": 1.151, "step": 1520 }, { "epoch": 0.48, "learning_rate": 5.563997317072943e-06, "loss": 1.287, "step": 1521 }, { "epoch": 0.48, "learning_rate": 5.558914733202697e-06, "loss": 1.4825, "step": 1522 }, { "epoch": 0.48, "learning_rate": 5.553831564427049e-06, "loss": 0.9587, "step": 1523 }, { "epoch": 0.48, "learning_rate": 5.548747816065545e-06, "loss": 1.1934, "step": 1524 }, { "epoch": 0.48, "learning_rate": 5.543663493438338e-06, "loss": 1.3444, "step": 1525 }, { "epoch": 0.48, "learning_rate": 5.538578601866182e-06, "loss": 1.1861, "step": 1526 }, { "epoch": 0.48, "learning_rate": 5.5334931466704265e-06, "loss": 1.4941, "step": 1527 }, { "epoch": 0.48, "learning_rate": 5.528407133173013e-06, "loss": 1.2717, "step": 1528 }, { "epoch": 0.48, "learning_rate": 5.52332056669646e-06, "loss": 1.6751, "step": 1529 }, { "epoch": 0.48, "learning_rate": 5.518233452563871e-06, "loss": 1.1484, "step": 1530 }, { "epoch": 0.48, "learning_rate": 5.5131457960989244e-06, "loss": 1.2241, "step": 1531 }, { "epoch": 0.48, "learning_rate": 5.508057602625862e-06, "loss": 1.3396, "step": 1532 }, { "epoch": 0.48, "learning_rate": 5.502968877469487e-06, "loss": 1.0409, "step": 1533 }, { "epoch": 0.48, "learning_rate": 5.4978796259551605e-06, "loss": 1.222, "step": 1534 }, { "epoch": 0.48, "learning_rate": 5.4927898534087956e-06, "loss": 1.1193, "step": 1535 }, { "epoch": 0.48, "learning_rate": 5.4876995651568485e-06, "loss": 1.2283, "step": 1536 }, { "epoch": 0.49, "learning_rate": 5.48260876652632e-06, "loss": 1.226, "step": 1537 }, { "epoch": 0.49, "learning_rate": 5.477517462844735e-06, "loss": 1.264, "step": 1538 }, { "epoch": 0.49, "learning_rate": 5.472425659440157e-06, "loss": 1.0656, "step": 1539 }, { "epoch": 0.49, "learning_rate": 5.467333361641169e-06, "loss": 1.3261, "step": 1540 }, { "epoch": 0.49, "learning_rate": 5.462240574776869e-06, "loss": 1.1295, "step": 1541 }, { "epoch": 0.49, "learning_rate": 5.457147304176871e-06, "loss": 1.1806, "step": 1542 }, { "epoch": 0.49, "learning_rate": 5.45205355517129e-06, "loss": 1.0718, "step": 1543 }, { "epoch": 0.49, "learning_rate": 5.446959333090746e-06, "loss": 0.9181, "step": 1544 }, { "epoch": 0.49, "learning_rate": 5.441864643266355e-06, "loss": 1.391, "step": 1545 }, { "epoch": 0.49, "learning_rate": 5.436769491029719e-06, "loss": 1.4487, "step": 1546 }, { "epoch": 0.49, "learning_rate": 5.431673881712922e-06, "loss": 1.1772, "step": 1547 }, { "epoch": 0.49, "learning_rate": 5.426577820648533e-06, "loss": 1.395, "step": 1548 }, { "epoch": 0.49, "learning_rate": 5.421481313169589e-06, "loss": 1.5403, "step": 1549 }, { "epoch": 0.49, "learning_rate": 5.416384364609596e-06, "loss": 1.2438, "step": 1550 }, { "epoch": 0.49, "learning_rate": 5.41128698030252e-06, "loss": 1.6462, "step": 1551 }, { "epoch": 0.49, "learning_rate": 5.406189165582787e-06, "loss": 0.8655, "step": 1552 }, { "epoch": 0.49, "learning_rate": 5.401090925785266e-06, "loss": 1.0978, "step": 1553 }, { "epoch": 0.49, "learning_rate": 5.395992266245277e-06, "loss": 1.1258, "step": 1554 }, { "epoch": 0.49, "learning_rate": 5.3908931922985785e-06, "loss": 1.2378, "step": 1555 }, { "epoch": 0.49, "learning_rate": 5.385793709281362e-06, "loss": 1.4291, "step": 1556 }, { "epoch": 0.49, "learning_rate": 5.380693822530247e-06, "loss": 1.2495, "step": 1557 }, { "epoch": 0.49, "learning_rate": 5.375593537382272e-06, "loss": 1.1388, "step": 1558 }, { "epoch": 0.49, "learning_rate": 5.370492859174898e-06, "loss": 1.7383, "step": 1559 }, { "epoch": 0.49, "learning_rate": 5.3653917932459975e-06, "loss": 1.4529, "step": 1560 }, { "epoch": 0.49, "learning_rate": 5.360290344933842e-06, "loss": 1.4194, "step": 1561 }, { "epoch": 0.49, "learning_rate": 5.35518851957711e-06, "loss": 1.2722, "step": 1562 }, { "epoch": 0.49, "learning_rate": 5.350086322514873e-06, "loss": 0.8837, "step": 1563 }, { "epoch": 0.49, "learning_rate": 5.3449837590865885e-06, "loss": 1.0356, "step": 1564 }, { "epoch": 0.49, "learning_rate": 5.339880834632099e-06, "loss": 1.1945, "step": 1565 }, { "epoch": 0.49, "learning_rate": 5.334777554491628e-06, "loss": 1.4518, "step": 1566 }, { "epoch": 0.49, "learning_rate": 5.329673924005766e-06, "loss": 1.3984, "step": 1567 }, { "epoch": 0.5, "learning_rate": 5.324569948515476e-06, "loss": 1.2594, "step": 1568 }, { "epoch": 0.5, "learning_rate": 5.319465633362074e-06, "loss": 1.4722, "step": 1569 }, { "epoch": 0.5, "learning_rate": 5.314360983887239e-06, "loss": 1.1598, "step": 1570 }, { "epoch": 0.5, "learning_rate": 5.309256005432997e-06, "loss": 1.1224, "step": 1571 }, { "epoch": 0.5, "learning_rate": 5.304150703341716e-06, "loss": 1.2645, "step": 1572 }, { "epoch": 0.5, "learning_rate": 5.299045082956109e-06, "loss": 1.6295, "step": 1573 }, { "epoch": 0.5, "learning_rate": 5.293939149619216e-06, "loss": 1.0591, "step": 1574 }, { "epoch": 0.5, "learning_rate": 5.288832908674406e-06, "loss": 1.1486, "step": 1575 }, { "epoch": 0.5, "learning_rate": 5.2837263654653715e-06, "loss": 1.2927, "step": 1576 }, { "epoch": 0.5, "learning_rate": 5.278619525336118e-06, "loss": 1.4466, "step": 1577 }, { "epoch": 0.5, "learning_rate": 5.273512393630968e-06, "loss": 1.5711, "step": 1578 }, { "epoch": 0.5, "learning_rate": 5.268404975694542e-06, "loss": 1.0547, "step": 1579 }, { "epoch": 0.5, "learning_rate": 5.263297276871766e-06, "loss": 1.0147, "step": 1580 }, { "epoch": 0.5, "learning_rate": 5.258189302507854e-06, "loss": 1.2879, "step": 1581 }, { "epoch": 0.5, "learning_rate": 5.2530810579483135e-06, "loss": 1.3662, "step": 1582 }, { "epoch": 0.5, "learning_rate": 5.2479725485389335e-06, "loss": 1.2092, "step": 1583 }, { "epoch": 0.5, "learning_rate": 5.242863779625776e-06, "loss": 1.0865, "step": 1584 }, { "epoch": 0.5, "learning_rate": 5.237754756555183e-06, "loss": 1.1544, "step": 1585 }, { "epoch": 0.5, "learning_rate": 5.232645484673754e-06, "loss": 1.0165, "step": 1586 }, { "epoch": 0.5, "learning_rate": 5.227535969328353e-06, "loss": 1.4268, "step": 1587 }, { "epoch": 0.5, "learning_rate": 5.222426215866099e-06, "loss": 1.3433, "step": 1588 }, { "epoch": 0.5, "learning_rate": 5.217316229634357e-06, "loss": 1.0759, "step": 1589 }, { "epoch": 0.5, "learning_rate": 5.212206015980742e-06, "loss": 1.5693, "step": 1590 }, { "epoch": 0.5, "learning_rate": 5.207095580253101e-06, "loss": 1.445, "step": 1591 }, { "epoch": 0.5, "learning_rate": 5.201984927799513e-06, "loss": 1.0204, "step": 1592 }, { "epoch": 0.5, "learning_rate": 5.196874063968289e-06, "loss": 1.3672, "step": 1593 }, { "epoch": 0.5, "learning_rate": 5.191762994107958e-06, "loss": 1.3207, "step": 1594 }, { "epoch": 0.5, "learning_rate": 5.186651723567264e-06, "loss": 1.3637, "step": 1595 }, { "epoch": 0.5, "learning_rate": 5.181540257695164e-06, "loss": 1.2853, "step": 1596 }, { "epoch": 0.5, "learning_rate": 5.176428601840815e-06, "loss": 1.4525, "step": 1597 }, { "epoch": 0.5, "learning_rate": 5.171316761353576e-06, "loss": 0.9537, "step": 1598 }, { "epoch": 0.5, "learning_rate": 5.166204741583e-06, "loss": 1.0279, "step": 1599 }, { "epoch": 0.51, "learning_rate": 5.1610925478788236e-06, "loss": 1.2408, "step": 1600 }, { "epoch": 0.51, "learning_rate": 5.155980185590969e-06, "loss": 1.1886, "step": 1601 }, { "epoch": 0.51, "learning_rate": 5.150867660069534e-06, "loss": 1.1356, "step": 1602 }, { "epoch": 0.51, "learning_rate": 5.145754976664786e-06, "loss": 1.1249, "step": 1603 }, { "epoch": 0.51, "learning_rate": 5.14064214072716e-06, "loss": 1.0975, "step": 1604 }, { "epoch": 0.51, "learning_rate": 5.135529157607249e-06, "loss": 1.3444, "step": 1605 }, { "epoch": 0.51, "learning_rate": 5.130416032655797e-06, "loss": 1.3483, "step": 1606 }, { "epoch": 0.51, "learning_rate": 5.1253027712237055e-06, "loss": 1.3357, "step": 1607 }, { "epoch": 0.51, "learning_rate": 5.120189378662009e-06, "loss": 1.135, "step": 1608 }, { "epoch": 0.51, "learning_rate": 5.115075860321882e-06, "loss": 1.2381, "step": 1609 }, { "epoch": 0.51, "learning_rate": 5.109962221554635e-06, "loss": 1.1758, "step": 1610 }, { "epoch": 0.51, "learning_rate": 5.1048484677116995e-06, "loss": 1.2127, "step": 1611 }, { "epoch": 0.51, "learning_rate": 5.099734604144629e-06, "loss": 1.1462, "step": 1612 }, { "epoch": 0.51, "learning_rate": 5.094620636205096e-06, "loss": 0.9182, "step": 1613 }, { "epoch": 0.51, "learning_rate": 5.089506569244873e-06, "loss": 1.4728, "step": 1614 }, { "epoch": 0.51, "learning_rate": 5.084392408615843e-06, "loss": 1.4714, "step": 1615 }, { "epoch": 0.51, "learning_rate": 5.079278159669988e-06, "loss": 1.069, "step": 1616 }, { "epoch": 0.51, "learning_rate": 5.074163827759377e-06, "loss": 0.886, "step": 1617 }, { "epoch": 0.51, "learning_rate": 5.06904941823617e-06, "loss": 1.313, "step": 1618 }, { "epoch": 0.51, "learning_rate": 5.0639349364526066e-06, "loss": 1.7596, "step": 1619 }, { "epoch": 0.51, "learning_rate": 5.058820387761003e-06, "loss": 1.3654, "step": 1620 }, { "epoch": 0.51, "learning_rate": 5.053705777513745e-06, "loss": 1.1645, "step": 1621 }, { "epoch": 0.51, "learning_rate": 5.048591111063279e-06, "loss": 1.0323, "step": 1622 }, { "epoch": 0.51, "learning_rate": 5.04347639376212e-06, "loss": 1.443, "step": 1623 }, { "epoch": 0.51, "learning_rate": 5.038361630962825e-06, "loss": 1.521, "step": 1624 }, { "epoch": 0.51, "learning_rate": 5.033246828018006e-06, "loss": 0.9371, "step": 1625 }, { "epoch": 0.51, "learning_rate": 5.028131990280312e-06, "loss": 1.1297, "step": 1626 }, { "epoch": 0.51, "learning_rate": 5.023017123102435e-06, "loss": 1.4095, "step": 1627 }, { "epoch": 0.51, "learning_rate": 5.017902231837093e-06, "loss": 1.3238, "step": 1628 }, { "epoch": 0.51, "learning_rate": 5.012787321837027e-06, "loss": 1.5147, "step": 1629 }, { "epoch": 0.51, "learning_rate": 5.007672398455004e-06, "loss": 1.0651, "step": 1630 }, { "epoch": 0.51, "learning_rate": 5.002557467043799e-06, "loss": 0.8969, "step": 1631 }, { "epoch": 0.52, "learning_rate": 4.997442532956202e-06, "loss": 1.6167, "step": 1632 }, { "epoch": 0.52, "learning_rate": 4.992327601544998e-06, "loss": 1.2817, "step": 1633 }, { "epoch": 0.52, "learning_rate": 4.987212678162974e-06, "loss": 1.375, "step": 1634 }, { "epoch": 0.52, "learning_rate": 4.982097768162909e-06, "loss": 1.6615, "step": 1635 }, { "epoch": 0.52, "learning_rate": 4.9769828768975645e-06, "loss": 1.2619, "step": 1636 }, { "epoch": 0.52, "learning_rate": 4.9718680097196876e-06, "loss": 1.5016, "step": 1637 }, { "epoch": 0.52, "learning_rate": 4.966753171981996e-06, "loss": 1.0024, "step": 1638 }, { "epoch": 0.52, "learning_rate": 4.961638369037177e-06, "loss": 1.204, "step": 1639 }, { "epoch": 0.52, "learning_rate": 4.956523606237881e-06, "loss": 1.5816, "step": 1640 }, { "epoch": 0.52, "learning_rate": 4.951408888936722e-06, "loss": 1.0857, "step": 1641 }, { "epoch": 0.52, "learning_rate": 4.946294222486258e-06, "loss": 1.0476, "step": 1642 }, { "epoch": 0.52, "learning_rate": 4.941179612238998e-06, "loss": 1.0575, "step": 1643 }, { "epoch": 0.52, "learning_rate": 4.936065063547394e-06, "loss": 1.0318, "step": 1644 }, { "epoch": 0.52, "learning_rate": 4.930950581763831e-06, "loss": 1.6016, "step": 1645 }, { "epoch": 0.52, "learning_rate": 4.925836172240623e-06, "loss": 1.1366, "step": 1646 }, { "epoch": 0.52, "learning_rate": 4.920721840330013e-06, "loss": 0.9655, "step": 1647 }, { "epoch": 0.52, "learning_rate": 4.915607591384157e-06, "loss": 1.3922, "step": 1648 }, { "epoch": 0.52, "learning_rate": 4.910493430755128e-06, "loss": 1.284, "step": 1649 }, { "epoch": 0.52, "learning_rate": 4.905379363794907e-06, "loss": 1.4518, "step": 1650 }, { "epoch": 0.52, "learning_rate": 4.900265395855371e-06, "loss": 1.3542, "step": 1651 }, { "epoch": 0.52, "learning_rate": 4.895151532288302e-06, "loss": 1.2943, "step": 1652 }, { "epoch": 0.52, "learning_rate": 4.890037778445367e-06, "loss": 1.0706, "step": 1653 }, { "epoch": 0.52, "learning_rate": 4.884924139678119e-06, "loss": 1.7859, "step": 1654 }, { "epoch": 0.52, "learning_rate": 4.879810621337993e-06, "loss": 1.0257, "step": 1655 }, { "epoch": 0.52, "learning_rate": 4.874697228776295e-06, "loss": 1.4565, "step": 1656 }, { "epoch": 0.52, "learning_rate": 4.869583967344203e-06, "loss": 1.2422, "step": 1657 }, { "epoch": 0.52, "learning_rate": 4.864470842392752e-06, "loss": 1.0397, "step": 1658 }, { "epoch": 0.52, "learning_rate": 4.8593578592728405e-06, "loss": 1.441, "step": 1659 }, { "epoch": 0.52, "learning_rate": 4.854245023335216e-06, "loss": 1.1884, "step": 1660 }, { "epoch": 0.52, "learning_rate": 4.849132339930469e-06, "loss": 1.3941, "step": 1661 }, { "epoch": 0.52, "learning_rate": 4.844019814409033e-06, "loss": 1.452, "step": 1662 }, { "epoch": 0.53, "learning_rate": 4.838907452121179e-06, "loss": 0.9724, "step": 1663 }, { "epoch": 0.53, "learning_rate": 4.8337952584170025e-06, "loss": 1.7718, "step": 1664 }, { "epoch": 0.53, "learning_rate": 4.828683238646425e-06, "loss": 1.589, "step": 1665 }, { "epoch": 0.53, "learning_rate": 4.823571398159186e-06, "loss": 1.0235, "step": 1666 }, { "epoch": 0.53, "learning_rate": 4.8184597423048365e-06, "loss": 1.1077, "step": 1667 }, { "epoch": 0.53, "learning_rate": 4.813348276432736e-06, "loss": 1.1334, "step": 1668 }, { "epoch": 0.53, "learning_rate": 4.808237005892043e-06, "loss": 0.975, "step": 1669 }, { "epoch": 0.53, "learning_rate": 4.803125936031711e-06, "loss": 1.4362, "step": 1670 }, { "epoch": 0.53, "learning_rate": 4.798015072200489e-06, "loss": 1.2278, "step": 1671 }, { "epoch": 0.53, "learning_rate": 4.792904419746902e-06, "loss": 1.2654, "step": 1672 }, { "epoch": 0.53, "learning_rate": 4.78779398401926e-06, "loss": 1.834, "step": 1673 }, { "epoch": 0.53, "learning_rate": 4.782683770365644e-06, "loss": 1.4711, "step": 1674 }, { "epoch": 0.53, "learning_rate": 4.777573784133903e-06, "loss": 1.1922, "step": 1675 }, { "epoch": 0.53, "learning_rate": 4.772464030671649e-06, "loss": 1.2088, "step": 1676 }, { "epoch": 0.53, "learning_rate": 4.767354515326247e-06, "loss": 1.1203, "step": 1677 }, { "epoch": 0.53, "learning_rate": 4.7622452434448175e-06, "loss": 1.5457, "step": 1678 }, { "epoch": 0.53, "learning_rate": 4.757136220374224e-06, "loss": 1.0978, "step": 1679 }, { "epoch": 0.53, "learning_rate": 4.752027451461067e-06, "loss": 1.3203, "step": 1680 }, { "epoch": 0.53, "learning_rate": 4.746918942051686e-06, "loss": 1.0658, "step": 1681 }, { "epoch": 0.53, "learning_rate": 4.7418106974921475e-06, "loss": 1.2836, "step": 1682 }, { "epoch": 0.53, "learning_rate": 4.736702723128236e-06, "loss": 1.2449, "step": 1683 }, { "epoch": 0.53, "learning_rate": 4.731595024305459e-06, "loss": 1.1262, "step": 1684 }, { "epoch": 0.53, "learning_rate": 4.726487606369033e-06, "loss": 1.4463, "step": 1685 }, { "epoch": 0.53, "learning_rate": 4.721380474663883e-06, "loss": 0.9101, "step": 1686 }, { "epoch": 0.53, "learning_rate": 4.71627363453463e-06, "loss": 1.0162, "step": 1687 }, { "epoch": 0.53, "learning_rate": 4.711167091325595e-06, "loss": 1.1092, "step": 1688 }, { "epoch": 0.53, "learning_rate": 4.706060850380785e-06, "loss": 1.0095, "step": 1689 }, { "epoch": 0.53, "learning_rate": 4.700954917043892e-06, "loss": 0.9652, "step": 1690 }, { "epoch": 0.53, "learning_rate": 4.695849296658284e-06, "loss": 1.353, "step": 1691 }, { "epoch": 0.53, "learning_rate": 4.690743994567004e-06, "loss": 1.4712, "step": 1692 }, { "epoch": 0.53, "learning_rate": 4.685639016112764e-06, "loss": 0.9778, "step": 1693 }, { "epoch": 0.53, "learning_rate": 4.680534366637929e-06, "loss": 1.7658, "step": 1694 }, { "epoch": 0.54, "learning_rate": 4.675430051484527e-06, "loss": 1.7759, "step": 1695 }, { "epoch": 0.54, "learning_rate": 4.670326075994235e-06, "loss": 1.1705, "step": 1696 }, { "epoch": 0.54, "learning_rate": 4.665222445508374e-06, "loss": 1.2678, "step": 1697 }, { "epoch": 0.54, "learning_rate": 4.660119165367902e-06, "loss": 1.2255, "step": 1698 }, { "epoch": 0.54, "learning_rate": 4.655016240913413e-06, "loss": 1.1293, "step": 1699 }, { "epoch": 0.54, "learning_rate": 4.6499136774851285e-06, "loss": 1.296, "step": 1700 }, { "epoch": 0.54, "learning_rate": 4.64481148042289e-06, "loss": 0.976, "step": 1701 }, { "epoch": 0.54, "learning_rate": 4.639709655066158e-06, "loss": 1.5172, "step": 1702 }, { "epoch": 0.54, "learning_rate": 4.634608206754003e-06, "loss": 1.2122, "step": 1703 }, { "epoch": 0.54, "learning_rate": 4.629507140825104e-06, "loss": 1.4552, "step": 1704 }, { "epoch": 0.54, "learning_rate": 4.62440646261773e-06, "loss": 1.0942, "step": 1705 }, { "epoch": 0.54, "learning_rate": 4.619306177469756e-06, "loss": 1.1498, "step": 1706 }, { "epoch": 0.54, "learning_rate": 4.61420629071864e-06, "loss": 1.7185, "step": 1707 }, { "epoch": 0.54, "learning_rate": 4.609106807701422e-06, "loss": 1.4175, "step": 1708 }, { "epoch": 0.54, "learning_rate": 4.604007733754724e-06, "loss": 0.9553, "step": 1709 }, { "epoch": 0.54, "learning_rate": 4.598909074214735e-06, "loss": 0.903, "step": 1710 }, { "epoch": 0.54, "learning_rate": 4.593810834417215e-06, "loss": 1.1137, "step": 1711 }, { "epoch": 0.54, "learning_rate": 4.58871301969748e-06, "loss": 1.2397, "step": 1712 }, { "epoch": 0.54, "learning_rate": 4.583615635390404e-06, "loss": 1.2129, "step": 1713 }, { "epoch": 0.54, "learning_rate": 4.578518686830412e-06, "loss": 1.1302, "step": 1714 }, { "epoch": 0.54, "learning_rate": 4.57342217935147e-06, "loss": 1.2711, "step": 1715 }, { "epoch": 0.54, "learning_rate": 4.568326118287081e-06, "loss": 0.9225, "step": 1716 }, { "epoch": 0.54, "learning_rate": 4.563230508970285e-06, "loss": 1.1182, "step": 1717 }, { "epoch": 0.54, "learning_rate": 4.558135356733647e-06, "loss": 1.3617, "step": 1718 }, { "epoch": 0.54, "learning_rate": 4.5530406669092545e-06, "loss": 1.2577, "step": 1719 }, { "epoch": 0.54, "learning_rate": 4.547946444828711e-06, "loss": 0.861, "step": 1720 }, { "epoch": 0.54, "learning_rate": 4.5428526958231316e-06, "loss": 1.0538, "step": 1721 }, { "epoch": 0.54, "learning_rate": 4.537759425223132e-06, "loss": 1.1726, "step": 1722 }, { "epoch": 0.54, "learning_rate": 4.532666638358833e-06, "loss": 1.3766, "step": 1723 }, { "epoch": 0.54, "learning_rate": 4.527574340559844e-06, "loss": 0.8715, "step": 1724 }, { "epoch": 0.54, "learning_rate": 4.522482537155267e-06, "loss": 1.0509, "step": 1725 }, { "epoch": 0.54, "learning_rate": 4.517391233473684e-06, "loss": 1.4224, "step": 1726 }, { "epoch": 0.55, "learning_rate": 4.512300434843152e-06, "loss": 1.0548, "step": 1727 }, { "epoch": 0.55, "learning_rate": 4.507210146591207e-06, "loss": 0.9314, "step": 1728 }, { "epoch": 0.55, "learning_rate": 4.502120374044841e-06, "loss": 1.2891, "step": 1729 }, { "epoch": 0.55, "learning_rate": 4.497031122530515e-06, "loss": 1.0307, "step": 1730 }, { "epoch": 0.55, "learning_rate": 4.491942397374141e-06, "loss": 1.6062, "step": 1731 }, { "epoch": 0.55, "learning_rate": 4.486854203901076e-06, "loss": 1.0267, "step": 1732 }, { "epoch": 0.55, "learning_rate": 4.4817665474361295e-06, "loss": 0.9257, "step": 1733 }, { "epoch": 0.55, "learning_rate": 4.476679433303542e-06, "loss": 1.4941, "step": 1734 }, { "epoch": 0.55, "learning_rate": 4.4715928668269894e-06, "loss": 2.2027, "step": 1735 }, { "epoch": 0.55, "learning_rate": 4.4665068533295734e-06, "loss": 1.2855, "step": 1736 }, { "epoch": 0.55, "learning_rate": 4.461421398133819e-06, "loss": 1.3124, "step": 1737 }, { "epoch": 0.55, "learning_rate": 4.456336506561663e-06, "loss": 1.4074, "step": 1738 }, { "epoch": 0.55, "learning_rate": 4.451252183934456e-06, "loss": 1.4085, "step": 1739 }, { "epoch": 0.55, "learning_rate": 4.446168435572953e-06, "loss": 1.2545, "step": 1740 }, { "epoch": 0.55, "learning_rate": 4.441085266797304e-06, "loss": 1.1816, "step": 1741 }, { "epoch": 0.55, "learning_rate": 4.436002682927058e-06, "loss": 1.093, "step": 1742 }, { "epoch": 0.55, "learning_rate": 4.430920689281148e-06, "loss": 1.7274, "step": 1743 }, { "epoch": 0.55, "learning_rate": 4.425839291177892e-06, "loss": 1.5056, "step": 1744 }, { "epoch": 0.55, "learning_rate": 4.420758493934982e-06, "loss": 1.1533, "step": 1745 }, { "epoch": 0.55, "learning_rate": 4.415678302869484e-06, "loss": 1.1023, "step": 1746 }, { "epoch": 0.55, "learning_rate": 4.410598723297826e-06, "loss": 1.1048, "step": 1747 }, { "epoch": 0.55, "learning_rate": 4.405519760535803e-06, "loss": 1.1197, "step": 1748 }, { "epoch": 0.55, "learning_rate": 4.400441419898555e-06, "loss": 1.5362, "step": 1749 }, { "epoch": 0.55, "learning_rate": 4.395363706700577e-06, "loss": 1.1023, "step": 1750 }, { "epoch": 0.55, "learning_rate": 4.390286626255707e-06, "loss": 1.0397, "step": 1751 }, { "epoch": 0.55, "learning_rate": 4.385210183877119e-06, "loss": 1.4357, "step": 1752 }, { "epoch": 0.55, "learning_rate": 4.380134384877321e-06, "loss": 1.2703, "step": 1753 }, { "epoch": 0.55, "learning_rate": 4.375059234568147e-06, "loss": 1.2433, "step": 1754 }, { "epoch": 0.55, "learning_rate": 4.3699847382607515e-06, "loss": 1.1841, "step": 1755 }, { "epoch": 0.55, "learning_rate": 4.364910901265607e-06, "loss": 1.1683, "step": 1756 }, { "epoch": 0.55, "learning_rate": 4.359837728892491e-06, "loss": 1.1879, "step": 1757 }, { "epoch": 0.56, "learning_rate": 4.354765226450491e-06, "loss": 1.4104, "step": 1758 }, { "epoch": 0.56, "learning_rate": 4.349693399247994e-06, "loss": 1.0282, "step": 1759 }, { "epoch": 0.56, "learning_rate": 4.34462225259267e-06, "loss": 1.3551, "step": 1760 }, { "epoch": 0.56, "learning_rate": 4.33955179179149e-06, "loss": 1.0407, "step": 1761 }, { "epoch": 0.56, "learning_rate": 4.334482022150699e-06, "loss": 1.2877, "step": 1762 }, { "epoch": 0.56, "learning_rate": 4.329412948975822e-06, "loss": 1.2974, "step": 1763 }, { "epoch": 0.56, "learning_rate": 4.3243445775716546e-06, "loss": 1.0748, "step": 1764 }, { "epoch": 0.56, "learning_rate": 4.319276913242257e-06, "loss": 1.8272, "step": 1765 }, { "epoch": 0.56, "learning_rate": 4.314209961290952e-06, "loss": 1.1385, "step": 1766 }, { "epoch": 0.56, "learning_rate": 4.3091437270203126e-06, "loss": 1.4905, "step": 1767 }, { "epoch": 0.56, "learning_rate": 4.3040782157321646e-06, "loss": 1.1573, "step": 1768 }, { "epoch": 0.56, "learning_rate": 4.299013432727576e-06, "loss": 1.2123, "step": 1769 }, { "epoch": 0.56, "learning_rate": 4.293949383306854e-06, "loss": 1.0767, "step": 1770 }, { "epoch": 0.56, "learning_rate": 4.288886072769534e-06, "loss": 1.2831, "step": 1771 }, { "epoch": 0.56, "learning_rate": 4.283823506414381e-06, "loss": 1.2786, "step": 1772 }, { "epoch": 0.56, "learning_rate": 4.278761689539381e-06, "loss": 1.404, "step": 1773 }, { "epoch": 0.56, "learning_rate": 4.273700627441736e-06, "loss": 1.1243, "step": 1774 }, { "epoch": 0.56, "learning_rate": 4.268640325417858e-06, "loss": 1.1712, "step": 1775 }, { "epoch": 0.56, "learning_rate": 4.263580788763364e-06, "loss": 1.0509, "step": 1776 }, { "epoch": 0.56, "learning_rate": 4.258522022773068e-06, "loss": 1.0069, "step": 1777 }, { "epoch": 0.56, "learning_rate": 4.253464032740979e-06, "loss": 2.1048, "step": 1778 }, { "epoch": 0.56, "learning_rate": 4.248406823960295e-06, "loss": 1.7414, "step": 1779 }, { "epoch": 0.56, "learning_rate": 4.243350401723395e-06, "loss": 1.3471, "step": 1780 }, { "epoch": 0.56, "learning_rate": 4.2382947713218364e-06, "loss": 0.97, "step": 1781 }, { "epoch": 0.56, "learning_rate": 4.233239938046343e-06, "loss": 1.6504, "step": 1782 }, { "epoch": 0.56, "learning_rate": 4.22818590718681e-06, "loss": 1.1305, "step": 1783 }, { "epoch": 0.56, "learning_rate": 4.22313268403229e-06, "loss": 1.2761, "step": 1784 }, { "epoch": 0.56, "learning_rate": 4.218080273870993e-06, "loss": 1.0972, "step": 1785 }, { "epoch": 0.56, "learning_rate": 4.213028681990277e-06, "loss": 0.9006, "step": 1786 }, { "epoch": 0.56, "learning_rate": 4.2079779136766404e-06, "loss": 1.3292, "step": 1787 }, { "epoch": 0.56, "learning_rate": 4.202927974215725e-06, "loss": 1.2059, "step": 1788 }, { "epoch": 0.56, "learning_rate": 4.1978788688923e-06, "loss": 1.0511, "step": 1789 }, { "epoch": 0.57, "learning_rate": 4.192830602990268e-06, "loss": 0.9995, "step": 1790 }, { "epoch": 0.57, "learning_rate": 4.1877831817926465e-06, "loss": 1.2879, "step": 1791 }, { "epoch": 0.57, "learning_rate": 4.182736610581576e-06, "loss": 1.2858, "step": 1792 }, { "epoch": 0.57, "learning_rate": 4.177690894638298e-06, "loss": 1.236, "step": 1793 }, { "epoch": 0.57, "learning_rate": 4.1726460392431665e-06, "loss": 1.1889, "step": 1794 }, { "epoch": 0.57, "learning_rate": 4.167602049675634e-06, "loss": 1.0719, "step": 1795 }, { "epoch": 0.57, "learning_rate": 4.162558931214246e-06, "loss": 1.2383, "step": 1796 }, { "epoch": 0.57, "learning_rate": 4.157516689136634e-06, "loss": 1.089, "step": 1797 }, { "epoch": 0.57, "learning_rate": 4.152475328719517e-06, "loss": 1.2381, "step": 1798 }, { "epoch": 0.57, "learning_rate": 4.147434855238688e-06, "loss": 1.2682, "step": 1799 }, { "epoch": 0.57, "learning_rate": 4.142395273969013e-06, "loss": 1.5787, "step": 1800 }, { "epoch": 0.57, "learning_rate": 4.137356590184423e-06, "loss": 1.2836, "step": 1801 }, { "epoch": 0.57, "learning_rate": 4.132318809157912e-06, "loss": 1.1395, "step": 1802 }, { "epoch": 0.57, "learning_rate": 4.127281936161529e-06, "loss": 1.2458, "step": 1803 }, { "epoch": 0.57, "learning_rate": 4.122245976466369e-06, "loss": 1.1391, "step": 1804 }, { "epoch": 0.57, "learning_rate": 4.1172109353425745e-06, "loss": 1.3855, "step": 1805 }, { "epoch": 0.57, "learning_rate": 4.1121768180593285e-06, "loss": 1.2391, "step": 1806 }, { "epoch": 0.57, "learning_rate": 4.107143629884843e-06, "loss": 1.3479, "step": 1807 }, { "epoch": 0.57, "learning_rate": 4.10211137608636e-06, "loss": 0.9971, "step": 1808 }, { "epoch": 0.57, "learning_rate": 4.097080061930144e-06, "loss": 1.456, "step": 1809 }, { "epoch": 0.57, "learning_rate": 4.0920496926814755e-06, "loss": 1.2538, "step": 1810 }, { "epoch": 0.57, "learning_rate": 4.0870202736046445e-06, "loss": 1.2711, "step": 1811 }, { "epoch": 0.57, "learning_rate": 4.0819918099629506e-06, "loss": 1.5137, "step": 1812 }, { "epoch": 0.57, "learning_rate": 4.07696430701869e-06, "loss": 1.0965, "step": 1813 }, { "epoch": 0.57, "learning_rate": 4.071937770033155e-06, "loss": 1.4047, "step": 1814 }, { "epoch": 0.57, "learning_rate": 4.066912204266626e-06, "loss": 1.2257, "step": 1815 }, { "epoch": 0.57, "learning_rate": 4.061887614978368e-06, "loss": 1.0082, "step": 1816 }, { "epoch": 0.57, "learning_rate": 4.056864007426625e-06, "loss": 1.2449, "step": 1817 }, { "epoch": 0.57, "learning_rate": 4.05184138686861e-06, "loss": 1.348, "step": 1818 }, { "epoch": 0.57, "learning_rate": 4.046819758560509e-06, "loss": 1.0337, "step": 1819 }, { "epoch": 0.57, "learning_rate": 4.041799127757463e-06, "loss": 1.2572, "step": 1820 }, { "epoch": 0.57, "learning_rate": 4.036779499713575e-06, "loss": 1.2702, "step": 1821 }, { "epoch": 0.58, "learning_rate": 4.0317608796818956e-06, "loss": 1.455, "step": 1822 }, { "epoch": 0.58, "learning_rate": 4.026743272914419e-06, "loss": 1.3874, "step": 1823 }, { "epoch": 0.58, "learning_rate": 4.021726684662081e-06, "loss": 1.3552, "step": 1824 }, { "epoch": 0.58, "learning_rate": 4.016711120174754e-06, "loss": 1.0404, "step": 1825 }, { "epoch": 0.58, "learning_rate": 4.0116965847012335e-06, "loss": 0.8019, "step": 1826 }, { "epoch": 0.58, "learning_rate": 4.006683083489243e-06, "loss": 1.156, "step": 1827 }, { "epoch": 0.58, "learning_rate": 4.001670621785419e-06, "loss": 1.0007, "step": 1828 }, { "epoch": 0.58, "learning_rate": 3.996659204835316e-06, "loss": 1.3032, "step": 1829 }, { "epoch": 0.58, "learning_rate": 3.991648837883391e-06, "loss": 1.2603, "step": 1830 }, { "epoch": 0.58, "learning_rate": 3.986639526173003e-06, "loss": 1.6929, "step": 1831 }, { "epoch": 0.58, "learning_rate": 3.981631274946406e-06, "loss": 1.1019, "step": 1832 }, { "epoch": 0.58, "learning_rate": 3.976624089444746e-06, "loss": 0.9448, "step": 1833 }, { "epoch": 0.58, "learning_rate": 3.971617974908053e-06, "loss": 1.3052, "step": 1834 }, { "epoch": 0.58, "learning_rate": 3.966612936575235e-06, "loss": 1.2827, "step": 1835 }, { "epoch": 0.58, "learning_rate": 3.961608979684077e-06, "loss": 1.0904, "step": 1836 }, { "epoch": 0.58, "learning_rate": 3.956606109471227e-06, "loss": 1.2534, "step": 1837 }, { "epoch": 0.58, "learning_rate": 3.951604331172201e-06, "loss": 1.2961, "step": 1838 }, { "epoch": 0.58, "learning_rate": 3.94660365002137e-06, "loss": 1.535, "step": 1839 }, { "epoch": 0.58, "learning_rate": 3.941604071251956e-06, "loss": 1.3385, "step": 1840 }, { "epoch": 0.58, "learning_rate": 3.936605600096027e-06, "loss": 0.8874, "step": 1841 }, { "epoch": 0.58, "learning_rate": 3.931608241784496e-06, "loss": 1.226, "step": 1842 }, { "epoch": 0.58, "learning_rate": 3.926612001547108e-06, "loss": 1.5879, "step": 1843 }, { "epoch": 0.58, "learning_rate": 3.921616884612438e-06, "loss": 1.212, "step": 1844 }, { "epoch": 0.58, "learning_rate": 3.9166228962078856e-06, "loss": 1.1998, "step": 1845 }, { "epoch": 0.58, "learning_rate": 3.91163004155967e-06, "loss": 1.4584, "step": 1846 }, { "epoch": 0.58, "learning_rate": 3.906638325892826e-06, "loss": 1.1404, "step": 1847 }, { "epoch": 0.58, "learning_rate": 3.901647754431192e-06, "loss": 1.4275, "step": 1848 }, { "epoch": 0.58, "learning_rate": 3.89665833239741e-06, "loss": 0.9031, "step": 1849 }, { "epoch": 0.58, "learning_rate": 3.891670065012921e-06, "loss": 1.1706, "step": 1850 }, { "epoch": 0.58, "learning_rate": 3.886682957497958e-06, "loss": 1.3982, "step": 1851 }, { "epoch": 0.58, "learning_rate": 3.881697015071539e-06, "loss": 0.977, "step": 1852 }, { "epoch": 0.59, "learning_rate": 3.8767122429514635e-06, "loss": 1.4194, "step": 1853 }, { "epoch": 0.59, "learning_rate": 3.871728646354303e-06, "loss": 1.6231, "step": 1854 }, { "epoch": 0.59, "learning_rate": 3.866746230495404e-06, "loss": 1.1851, "step": 1855 }, { "epoch": 0.59, "learning_rate": 3.861765000588874e-06, "loss": 1.163, "step": 1856 }, { "epoch": 0.59, "learning_rate": 3.85678496184758e-06, "loss": 1.2967, "step": 1857 }, { "epoch": 0.59, "learning_rate": 3.851806119483145e-06, "loss": 1.3567, "step": 1858 }, { "epoch": 0.59, "learning_rate": 3.846828478705933e-06, "loss": 1.4895, "step": 1859 }, { "epoch": 0.59, "learning_rate": 3.841852044725058e-06, "loss": 1.2719, "step": 1860 }, { "epoch": 0.59, "learning_rate": 3.836876822748368e-06, "loss": 1.2758, "step": 1861 }, { "epoch": 0.59, "learning_rate": 3.831902817982442e-06, "loss": 1.0667, "step": 1862 }, { "epoch": 0.59, "learning_rate": 3.8269300356325874e-06, "loss": 1.1053, "step": 1863 }, { "epoch": 0.59, "learning_rate": 3.82195848090283e-06, "loss": 1.25, "step": 1864 }, { "epoch": 0.59, "learning_rate": 3.816988158995912e-06, "loss": 0.9229, "step": 1865 }, { "epoch": 0.59, "learning_rate": 3.8120190751132868e-06, "loss": 1.1889, "step": 1866 }, { "epoch": 0.59, "learning_rate": 3.80705123445511e-06, "loss": 1.1042, "step": 1867 }, { "epoch": 0.59, "learning_rate": 3.8020846422202357e-06, "loss": 1.1533, "step": 1868 }, { "epoch": 0.59, "learning_rate": 3.7971193036062137e-06, "loss": 0.9173, "step": 1869 }, { "epoch": 0.59, "learning_rate": 3.7921552238092834e-06, "loss": 1.2899, "step": 1870 }, { "epoch": 0.59, "learning_rate": 3.7871924080243593e-06, "loss": 0.9672, "step": 1871 }, { "epoch": 0.59, "learning_rate": 3.782230861445041e-06, "loss": 1.1414, "step": 1872 }, { "epoch": 0.59, "learning_rate": 3.777270589263597e-06, "loss": 1.4374, "step": 1873 }, { "epoch": 0.59, "learning_rate": 3.7723115966709623e-06, "loss": 1.2031, "step": 1874 }, { "epoch": 0.59, "learning_rate": 3.7673538888567316e-06, "loss": 1.1229, "step": 1875 }, { "epoch": 0.59, "learning_rate": 3.7623974710091577e-06, "loss": 1.4759, "step": 1876 }, { "epoch": 0.59, "learning_rate": 3.7574423483151413e-06, "loss": 1.2386, "step": 1877 }, { "epoch": 0.59, "learning_rate": 3.7524885259602277e-06, "loss": 1.3049, "step": 1878 }, { "epoch": 0.59, "learning_rate": 3.7475360091286027e-06, "loss": 1.3568, "step": 1879 }, { "epoch": 0.59, "learning_rate": 3.7425848030030857e-06, "loss": 0.9942, "step": 1880 }, { "epoch": 0.59, "learning_rate": 3.7376349127651257e-06, "loss": 1.4272, "step": 1881 }, { "epoch": 0.59, "learning_rate": 3.7326863435947878e-06, "loss": 1.1744, "step": 1882 }, { "epoch": 0.59, "learning_rate": 3.727739100670763e-06, "loss": 1.0667, "step": 1883 }, { "epoch": 0.59, "learning_rate": 3.7227931891703505e-06, "loss": 1.2892, "step": 1884 }, { "epoch": 0.6, "learning_rate": 3.7178486142694557e-06, "loss": 1.2618, "step": 1885 }, { "epoch": 0.6, "learning_rate": 3.7129053811425887e-06, "loss": 1.3694, "step": 1886 }, { "epoch": 0.6, "learning_rate": 3.7079634949628514e-06, "loss": 0.8225, "step": 1887 }, { "epoch": 0.6, "learning_rate": 3.7030229609019374e-06, "loss": 1.328, "step": 1888 }, { "epoch": 0.6, "learning_rate": 3.6980837841301265e-06, "loss": 0.9666, "step": 1889 }, { "epoch": 0.6, "learning_rate": 3.6931459698162764e-06, "loss": 1.2805, "step": 1890 }, { "epoch": 0.6, "learning_rate": 3.6882095231278207e-06, "loss": 0.9269, "step": 1891 }, { "epoch": 0.6, "learning_rate": 3.6832744492307626e-06, "loss": 1.3354, "step": 1892 }, { "epoch": 0.6, "learning_rate": 3.6783407532896627e-06, "loss": 0.9498, "step": 1893 }, { "epoch": 0.6, "learning_rate": 3.6734084404676456e-06, "loss": 1.3196, "step": 1894 }, { "epoch": 0.6, "learning_rate": 3.6684775159263876e-06, "loss": 1.3671, "step": 1895 }, { "epoch": 0.6, "learning_rate": 3.6635479848261097e-06, "loss": 1.0985, "step": 1896 }, { "epoch": 0.6, "learning_rate": 3.6586198523255775e-06, "loss": 1.0519, "step": 1897 }, { "epoch": 0.6, "learning_rate": 3.653693123582091e-06, "loss": 1.4362, "step": 1898 }, { "epoch": 0.6, "learning_rate": 3.6487678037514824e-06, "loss": 1.1924, "step": 1899 }, { "epoch": 0.6, "learning_rate": 3.6438438979881085e-06, "loss": 1.6432, "step": 1900 }, { "epoch": 0.6, "learning_rate": 3.6389214114448467e-06, "loss": 1.0735, "step": 1901 }, { "epoch": 0.6, "learning_rate": 3.634000349273089e-06, "loss": 1.2016, "step": 1902 }, { "epoch": 0.6, "learning_rate": 3.62908071662274e-06, "loss": 1.0183, "step": 1903 }, { "epoch": 0.6, "learning_rate": 3.6241625186421998e-06, "loss": 1.3116, "step": 1904 }, { "epoch": 0.6, "learning_rate": 3.619245760478375e-06, "loss": 0.9918, "step": 1905 }, { "epoch": 0.6, "learning_rate": 3.6143304472766633e-06, "loss": 1.3333, "step": 1906 }, { "epoch": 0.6, "learning_rate": 3.6094165841809497e-06, "loss": 1.1933, "step": 1907 }, { "epoch": 0.6, "learning_rate": 3.604504176333602e-06, "loss": 1.2316, "step": 1908 }, { "epoch": 0.6, "learning_rate": 3.5995932288754655e-06, "loss": 1.129, "step": 1909 }, { "epoch": 0.6, "learning_rate": 3.5946837469458556e-06, "loss": 0.9217, "step": 1910 }, { "epoch": 0.6, "learning_rate": 3.5897757356825558e-06, "loss": 1.4951, "step": 1911 }, { "epoch": 0.6, "learning_rate": 3.58486920022181e-06, "loss": 1.1017, "step": 1912 }, { "epoch": 0.6, "learning_rate": 3.579964145698318e-06, "loss": 0.8728, "step": 1913 }, { "epoch": 0.6, "learning_rate": 3.5750605772452287e-06, "loss": 1.158, "step": 1914 }, { "epoch": 0.6, "learning_rate": 3.5701584999941347e-06, "loss": 0.967, "step": 1915 }, { "epoch": 0.6, "learning_rate": 3.565257919075071e-06, "loss": 0.8151, "step": 1916 }, { "epoch": 0.61, "learning_rate": 3.5603588396165067e-06, "loss": 1.0486, "step": 1917 }, { "epoch": 0.61, "learning_rate": 3.5554612667453377e-06, "loss": 1.3153, "step": 1918 }, { "epoch": 0.61, "learning_rate": 3.5505652055868835e-06, "loss": 1.2144, "step": 1919 }, { "epoch": 0.61, "learning_rate": 3.5456706612648816e-06, "loss": 1.0039, "step": 1920 }, { "epoch": 0.61, "learning_rate": 3.540777638901485e-06, "loss": 1.4455, "step": 1921 }, { "epoch": 0.61, "learning_rate": 3.5358861436172487e-06, "loss": 1.0235, "step": 1922 }, { "epoch": 0.61, "learning_rate": 3.530996180531133e-06, "loss": 1.2289, "step": 1923 }, { "epoch": 0.61, "learning_rate": 3.5261077547604954e-06, "loss": 1.128, "step": 1924 }, { "epoch": 0.61, "learning_rate": 3.5212208714210836e-06, "loss": 1.2207, "step": 1925 }, { "epoch": 0.61, "learning_rate": 3.5163355356270292e-06, "loss": 1.2874, "step": 1926 }, { "epoch": 0.61, "learning_rate": 3.5114517524908463e-06, "loss": 1.0981, "step": 1927 }, { "epoch": 0.61, "learning_rate": 3.5065695271234247e-06, "loss": 1.2082, "step": 1928 }, { "epoch": 0.61, "learning_rate": 3.501688864634023e-06, "loss": 1.4302, "step": 1929 }, { "epoch": 0.61, "learning_rate": 3.496809770130265e-06, "loss": 1.07, "step": 1930 }, { "epoch": 0.61, "learning_rate": 3.491932248718131e-06, "loss": 1.305, "step": 1931 }, { "epoch": 0.61, "learning_rate": 3.487056305501959e-06, "loss": 1.0127, "step": 1932 }, { "epoch": 0.61, "learning_rate": 3.482181945584432e-06, "loss": 1.2917, "step": 1933 }, { "epoch": 0.61, "learning_rate": 3.4773091740665787e-06, "loss": 1.6215, "step": 1934 }, { "epoch": 0.61, "learning_rate": 3.472437996047764e-06, "loss": 1.283, "step": 1935 }, { "epoch": 0.61, "learning_rate": 3.467568416625687e-06, "loss": 1.0094, "step": 1936 }, { "epoch": 0.61, "learning_rate": 3.4627004408963695e-06, "loss": 0.9598, "step": 1937 }, { "epoch": 0.61, "learning_rate": 3.4578340739541593e-06, "loss": 1.5037, "step": 1938 }, { "epoch": 0.61, "learning_rate": 3.4529693208917215e-06, "loss": 0.9095, "step": 1939 }, { "epoch": 0.61, "learning_rate": 3.4481061868000267e-06, "loss": 1.4402, "step": 1940 }, { "epoch": 0.61, "learning_rate": 3.443244676768355e-06, "loss": 0.9727, "step": 1941 }, { "epoch": 0.61, "learning_rate": 3.438384795884288e-06, "loss": 1.2594, "step": 1942 }, { "epoch": 0.61, "learning_rate": 3.4335265492337005e-06, "loss": 1.2377, "step": 1943 }, { "epoch": 0.61, "learning_rate": 3.4286699419007573e-06, "loss": 1.038, "step": 1944 }, { "epoch": 0.61, "learning_rate": 3.4238149789679076e-06, "loss": 1.244, "step": 1945 }, { "epoch": 0.61, "learning_rate": 3.4189616655158803e-06, "loss": 1.0445, "step": 1946 }, { "epoch": 0.61, "learning_rate": 3.4141100066236786e-06, "loss": 1.0028, "step": 1947 }, { "epoch": 0.62, "learning_rate": 3.409260007368573e-06, "loss": 1.0038, "step": 1948 }, { "epoch": 0.62, "learning_rate": 3.4044116728260955e-06, "loss": 1.1794, "step": 1949 }, { "epoch": 0.62, "learning_rate": 3.399565008070039e-06, "loss": 1.2325, "step": 1950 }, { "epoch": 0.62, "learning_rate": 3.3947200181724494e-06, "loss": 1.0393, "step": 1951 }, { "epoch": 0.62, "learning_rate": 3.3898767082036175e-06, "loss": 1.4026, "step": 1952 }, { "epoch": 0.62, "learning_rate": 3.3850350832320774e-06, "loss": 1.2579, "step": 1953 }, { "epoch": 0.62, "learning_rate": 3.3801951483245987e-06, "loss": 1.2215, "step": 1954 }, { "epoch": 0.62, "learning_rate": 3.3753569085461837e-06, "loss": 1.0896, "step": 1955 }, { "epoch": 0.62, "learning_rate": 3.3705203689600597e-06, "loss": 1.4355, "step": 1956 }, { "epoch": 0.62, "learning_rate": 3.3656855346276756e-06, "loss": 1.347, "step": 1957 }, { "epoch": 0.62, "learning_rate": 3.3608524106086976e-06, "loss": 1.1714, "step": 1958 }, { "epoch": 0.62, "learning_rate": 3.3560210019609953e-06, "loss": 1.1696, "step": 1959 }, { "epoch": 0.62, "learning_rate": 3.3511913137406493e-06, "loss": 1.0784, "step": 1960 }, { "epoch": 0.62, "learning_rate": 3.3463633510019393e-06, "loss": 1.1635, "step": 1961 }, { "epoch": 0.62, "learning_rate": 3.3415371187973374e-06, "loss": 1.0978, "step": 1962 }, { "epoch": 0.62, "learning_rate": 3.3367126221775047e-06, "loss": 1.1018, "step": 1963 }, { "epoch": 0.62, "learning_rate": 3.331889866191288e-06, "loss": 1.4938, "step": 1964 }, { "epoch": 0.62, "learning_rate": 3.3270688558857107e-06, "loss": 1.4213, "step": 1965 }, { "epoch": 0.62, "learning_rate": 3.322249596305969e-06, "loss": 0.9343, "step": 1966 }, { "epoch": 0.62, "learning_rate": 3.3174320924954297e-06, "loss": 1.2414, "step": 1967 }, { "epoch": 0.62, "learning_rate": 3.3126163494956186e-06, "loss": 1.4493, "step": 1968 }, { "epoch": 0.62, "learning_rate": 3.307802372346223e-06, "loss": 1.246, "step": 1969 }, { "epoch": 0.62, "learning_rate": 3.302990166085076e-06, "loss": 1.5453, "step": 1970 }, { "epoch": 0.62, "learning_rate": 3.2981797357481638e-06, "loss": 1.0254, "step": 1971 }, { "epoch": 0.62, "learning_rate": 3.29337108636961e-06, "loss": 1.0915, "step": 1972 }, { "epoch": 0.62, "learning_rate": 3.288564222981677e-06, "loss": 1.2181, "step": 1973 }, { "epoch": 0.62, "learning_rate": 3.283759150614757e-06, "loss": 1.4196, "step": 1974 }, { "epoch": 0.62, "learning_rate": 3.2789558742973675e-06, "loss": 1.2638, "step": 1975 }, { "epoch": 0.62, "learning_rate": 3.2741543990561475e-06, "loss": 1.5104, "step": 1976 }, { "epoch": 0.62, "learning_rate": 3.26935472991585e-06, "loss": 1.2463, "step": 1977 }, { "epoch": 0.62, "learning_rate": 3.2645568718993393e-06, "loss": 1.0407, "step": 1978 }, { "epoch": 0.62, "learning_rate": 3.259760830027583e-06, "loss": 0.6985, "step": 1979 }, { "epoch": 0.63, "learning_rate": 3.254966609319651e-06, "loss": 1.081, "step": 1980 }, { "epoch": 0.63, "learning_rate": 3.2501742147926997e-06, "loss": 0.9543, "step": 1981 }, { "epoch": 0.63, "learning_rate": 3.245383651461983e-06, "loss": 1.1102, "step": 1982 }, { "epoch": 0.63, "learning_rate": 3.240594924340835e-06, "loss": 1.2458, "step": 1983 }, { "epoch": 0.63, "learning_rate": 3.2358080384406686e-06, "loss": 1.3444, "step": 1984 }, { "epoch": 0.63, "learning_rate": 3.2310229987709692e-06, "loss": 0.8688, "step": 1985 }, { "epoch": 0.63, "learning_rate": 3.22623981033929e-06, "loss": 1.2598, "step": 1986 }, { "epoch": 0.63, "learning_rate": 3.2214584781512482e-06, "loss": 1.1462, "step": 1987 }, { "epoch": 0.63, "learning_rate": 3.216679007210517e-06, "loss": 1.2958, "step": 1988 }, { "epoch": 0.63, "learning_rate": 3.2119014025188233e-06, "loss": 1.0683, "step": 1989 }, { "epoch": 0.63, "learning_rate": 3.207125669075939e-06, "loss": 1.205, "step": 1990 }, { "epoch": 0.63, "learning_rate": 3.202351811879682e-06, "loss": 1.0046, "step": 1991 }, { "epoch": 0.63, "learning_rate": 3.1975798359258984e-06, "loss": 0.8936, "step": 1992 }, { "epoch": 0.63, "learning_rate": 3.192809746208474e-06, "loss": 1.1067, "step": 1993 }, { "epoch": 0.63, "learning_rate": 3.188041547719316e-06, "loss": 1.2035, "step": 1994 }, { "epoch": 0.63, "learning_rate": 3.1832752454483553e-06, "loss": 1.3163, "step": 1995 }, { "epoch": 0.63, "learning_rate": 3.1785108443835353e-06, "loss": 1.2119, "step": 1996 }, { "epoch": 0.63, "learning_rate": 3.1737483495108123e-06, "loss": 1.3959, "step": 1997 }, { "epoch": 0.63, "learning_rate": 3.1689877658141465e-06, "loss": 1.101, "step": 1998 }, { "epoch": 0.63, "learning_rate": 3.1642290982754988e-06, "loss": 1.2983, "step": 1999 }, { "epoch": 0.63, "learning_rate": 3.159472351874824e-06, "loss": 1.3647, "step": 2000 }, { "epoch": 0.63, "learning_rate": 3.1547175315900675e-06, "loss": 1.159, "step": 2001 }, { "epoch": 0.63, "learning_rate": 3.1499646423971595e-06, "loss": 0.9406, "step": 2002 }, { "epoch": 0.63, "learning_rate": 3.145213689270005e-06, "loss": 1.2438, "step": 2003 }, { "epoch": 0.63, "learning_rate": 3.140464677180487e-06, "loss": 1.0818, "step": 2004 }, { "epoch": 0.63, "learning_rate": 3.1357176110984578e-06, "loss": 1.2694, "step": 2005 }, { "epoch": 0.63, "learning_rate": 3.1309724959917304e-06, "loss": 1.1656, "step": 2006 }, { "epoch": 0.63, "learning_rate": 3.1262293368260777e-06, "loss": 0.9701, "step": 2007 }, { "epoch": 0.63, "learning_rate": 3.1214881385652264e-06, "loss": 1.6568, "step": 2008 }, { "epoch": 0.63, "learning_rate": 3.1167489061708486e-06, "loss": 1.1239, "step": 2009 }, { "epoch": 0.63, "learning_rate": 3.112011644602561e-06, "loss": 1.3447, "step": 2010 }, { "epoch": 0.63, "learning_rate": 3.107276358817918e-06, "loss": 1.2229, "step": 2011 }, { "epoch": 0.64, "learning_rate": 3.1025430537724065e-06, "loss": 1.0809, "step": 2012 }, { "epoch": 0.64, "learning_rate": 3.097811734419439e-06, "loss": 1.3699, "step": 2013 }, { "epoch": 0.64, "learning_rate": 3.09308240571035e-06, "loss": 1.1782, "step": 2014 }, { "epoch": 0.64, "learning_rate": 3.088355072594391e-06, "loss": 1.8982, "step": 2015 }, { "epoch": 0.64, "learning_rate": 3.083629740018728e-06, "loss": 1.2553, "step": 2016 }, { "epoch": 0.64, "learning_rate": 3.0789064129284297e-06, "loss": 1.4008, "step": 2017 }, { "epoch": 0.64, "learning_rate": 3.0741850962664675e-06, "loss": 1.3111, "step": 2018 }, { "epoch": 0.64, "learning_rate": 3.06946579497371e-06, "loss": 1.3903, "step": 2019 }, { "epoch": 0.64, "learning_rate": 3.0647485139889145e-06, "loss": 0.9073, "step": 2020 }, { "epoch": 0.64, "learning_rate": 3.060033258248726e-06, "loss": 1.1551, "step": 2021 }, { "epoch": 0.64, "learning_rate": 3.0553200326876675e-06, "loss": 1.4826, "step": 2022 }, { "epoch": 0.64, "learning_rate": 3.050608842238141e-06, "loss": 1.3483, "step": 2023 }, { "epoch": 0.64, "learning_rate": 3.045899691830416e-06, "loss": 0.8081, "step": 2024 }, { "epoch": 0.64, "learning_rate": 3.041192586392627e-06, "loss": 1.0493, "step": 2025 }, { "epoch": 0.64, "learning_rate": 3.0364875308507713e-06, "loss": 1.0717, "step": 2026 }, { "epoch": 0.64, "learning_rate": 3.031784530128698e-06, "loss": 1.3219, "step": 2027 }, { "epoch": 0.64, "learning_rate": 3.0270835891481065e-06, "loss": 1.1837, "step": 2028 }, { "epoch": 0.64, "learning_rate": 3.022384712828543e-06, "loss": 1.37, "step": 2029 }, { "epoch": 0.64, "learning_rate": 3.0176879060873887e-06, "loss": 1.0197, "step": 2030 }, { "epoch": 0.64, "learning_rate": 3.012993173839863e-06, "loss": 1.2272, "step": 2031 }, { "epoch": 0.64, "learning_rate": 3.0083005209990112e-06, "loss": 1.1516, "step": 2032 }, { "epoch": 0.64, "learning_rate": 3.0036099524757056e-06, "loss": 1.1543, "step": 2033 }, { "epoch": 0.64, "learning_rate": 2.9989214731786347e-06, "loss": 1.2288, "step": 2034 }, { "epoch": 0.64, "learning_rate": 2.9942350880143033e-06, "loss": 1.3815, "step": 2035 }, { "epoch": 0.64, "learning_rate": 2.98955080188702e-06, "loss": 1.5085, "step": 2036 }, { "epoch": 0.64, "learning_rate": 2.984868619698902e-06, "loss": 1.1459, "step": 2037 }, { "epoch": 0.64, "learning_rate": 2.9801885463498627e-06, "loss": 1.113, "step": 2038 }, { "epoch": 0.64, "learning_rate": 2.9755105867376067e-06, "loss": 1.1355, "step": 2039 }, { "epoch": 0.64, "learning_rate": 2.9708347457576288e-06, "loss": 1.5716, "step": 2040 }, { "epoch": 0.64, "learning_rate": 2.966161028303206e-06, "loss": 1.5506, "step": 2041 }, { "epoch": 0.64, "learning_rate": 2.9614894392653935e-06, "loss": 1.6332, "step": 2042 }, { "epoch": 0.65, "learning_rate": 2.9568199835330187e-06, "loss": 1.4168, "step": 2043 }, { "epoch": 0.65, "learning_rate": 2.9521526659926756e-06, "loss": 1.2374, "step": 2044 }, { "epoch": 0.65, "learning_rate": 2.9474874915287232e-06, "loss": 1.4185, "step": 2045 }, { "epoch": 0.65, "learning_rate": 2.942824465023276e-06, "loss": 1.2339, "step": 2046 }, { "epoch": 0.65, "learning_rate": 2.9381635913562e-06, "loss": 0.7605, "step": 2047 }, { "epoch": 0.65, "learning_rate": 2.933504875405107e-06, "loss": 1.4504, "step": 2048 }, { "epoch": 0.65, "learning_rate": 2.928848322045355e-06, "loss": 1.5816, "step": 2049 }, { "epoch": 0.65, "learning_rate": 2.924193936150036e-06, "loss": 1.0779, "step": 2050 }, { "epoch": 0.65, "learning_rate": 2.919541722589974e-06, "loss": 1.2636, "step": 2051 }, { "epoch": 0.65, "learning_rate": 2.91489168623372e-06, "loss": 1.5837, "step": 2052 }, { "epoch": 0.65, "learning_rate": 2.9102438319475457e-06, "loss": 0.9879, "step": 2053 }, { "epoch": 0.65, "learning_rate": 2.9055981645954433e-06, "loss": 1.2848, "step": 2054 }, { "epoch": 0.65, "learning_rate": 2.900954689039109e-06, "loss": 1.4401, "step": 2055 }, { "epoch": 0.65, "learning_rate": 2.8963134101379527e-06, "loss": 1.3508, "step": 2056 }, { "epoch": 0.65, "learning_rate": 2.89167433274908e-06, "loss": 1.8778, "step": 2057 }, { "epoch": 0.65, "learning_rate": 2.887037461727297e-06, "loss": 1.0763, "step": 2058 }, { "epoch": 0.65, "learning_rate": 2.882402801925095e-06, "loss": 1.1106, "step": 2059 }, { "epoch": 0.65, "learning_rate": 2.877770358192658e-06, "loss": 1.1903, "step": 2060 }, { "epoch": 0.65, "learning_rate": 2.8731401353778454e-06, "loss": 1.5669, "step": 2061 }, { "epoch": 0.65, "learning_rate": 2.8685121383261982e-06, "loss": 1.3327, "step": 2062 }, { "epoch": 0.65, "learning_rate": 2.8638863718809206e-06, "loss": 1.1496, "step": 2063 }, { "epoch": 0.65, "learning_rate": 2.8592628408828906e-06, "loss": 1.2838, "step": 2064 }, { "epoch": 0.65, "learning_rate": 2.854641550170638e-06, "loss": 1.1749, "step": 2065 }, { "epoch": 0.65, "learning_rate": 2.8500225045803587e-06, "loss": 1.5487, "step": 2066 }, { "epoch": 0.65, "learning_rate": 2.8454057089458896e-06, "loss": 1.2978, "step": 2067 }, { "epoch": 0.65, "learning_rate": 2.8407911680987176e-06, "loss": 1.0172, "step": 2068 }, { "epoch": 0.65, "learning_rate": 2.8361788868679673e-06, "loss": 1.2395, "step": 2069 }, { "epoch": 0.65, "learning_rate": 2.8315688700804034e-06, "loss": 1.1736, "step": 2070 }, { "epoch": 0.65, "learning_rate": 2.8269611225604144e-06, "loss": 0.8919, "step": 2071 }, { "epoch": 0.65, "learning_rate": 2.8223556491300215e-06, "loss": 1.1508, "step": 2072 }, { "epoch": 0.65, "learning_rate": 2.8177524546088575e-06, "loss": 0.9926, "step": 2073 }, { "epoch": 0.65, "learning_rate": 2.813151543814179e-06, "loss": 1.0405, "step": 2074 }, { "epoch": 0.66, "learning_rate": 2.8085529215608475e-06, "loss": 0.9403, "step": 2075 }, { "epoch": 0.66, "learning_rate": 2.803956592661327e-06, "loss": 0.9373, "step": 2076 }, { "epoch": 0.66, "learning_rate": 2.79936256192569e-06, "loss": 1.0198, "step": 2077 }, { "epoch": 0.66, "learning_rate": 2.794770834161594e-06, "loss": 1.1795, "step": 2078 }, { "epoch": 0.66, "learning_rate": 2.7901814141742966e-06, "loss": 1.274, "step": 2079 }, { "epoch": 0.66, "learning_rate": 2.7855943067666325e-06, "loss": 1.1214, "step": 2080 }, { "epoch": 0.66, "learning_rate": 2.781009516739017e-06, "loss": 1.1647, "step": 2081 }, { "epoch": 0.66, "learning_rate": 2.776427048889446e-06, "loss": 1.3853, "step": 2082 }, { "epoch": 0.66, "learning_rate": 2.7718469080134795e-06, "loss": 1.2518, "step": 2083 }, { "epoch": 0.66, "learning_rate": 2.767269098904243e-06, "loss": 1.645, "step": 2084 }, { "epoch": 0.66, "learning_rate": 2.762693626352425e-06, "loss": 1.23, "step": 2085 }, { "epoch": 0.66, "learning_rate": 2.7581204951462646e-06, "loss": 1.3495, "step": 2086 }, { "epoch": 0.66, "learning_rate": 2.753549710071557e-06, "loss": 1.2106, "step": 2087 }, { "epoch": 0.66, "learning_rate": 2.748981275911633e-06, "loss": 1.0713, "step": 2088 }, { "epoch": 0.66, "learning_rate": 2.744415197447373e-06, "loss": 1.4268, "step": 2089 }, { "epoch": 0.66, "learning_rate": 2.7398514794571853e-06, "loss": 0.9597, "step": 2090 }, { "epoch": 0.66, "learning_rate": 2.7352901267170073e-06, "loss": 0.9372, "step": 2091 }, { "epoch": 0.66, "learning_rate": 2.730731144000308e-06, "loss": 1.0643, "step": 2092 }, { "epoch": 0.66, "learning_rate": 2.7261745360780712e-06, "loss": 1.2639, "step": 2093 }, { "epoch": 0.66, "learning_rate": 2.721620307718793e-06, "loss": 1.4495, "step": 2094 }, { "epoch": 0.66, "learning_rate": 2.717068463688487e-06, "loss": 1.1723, "step": 2095 }, { "epoch": 0.66, "learning_rate": 2.7125190087506637e-06, "loss": 1.6213, "step": 2096 }, { "epoch": 0.66, "learning_rate": 2.7079719476663415e-06, "loss": 0.9526, "step": 2097 }, { "epoch": 0.66, "learning_rate": 2.703427285194023e-06, "loss": 1.51, "step": 2098 }, { "epoch": 0.66, "learning_rate": 2.6988850260897124e-06, "loss": 1.0042, "step": 2099 }, { "epoch": 0.66, "learning_rate": 2.694345175106888e-06, "loss": 1.2466, "step": 2100 }, { "epoch": 0.66, "learning_rate": 2.6898077369965205e-06, "loss": 1.545, "step": 2101 }, { "epoch": 0.66, "learning_rate": 2.6852727165070386e-06, "loss": 0.9459, "step": 2102 }, { "epoch": 0.66, "learning_rate": 2.6807401183843574e-06, "loss": 0.9875, "step": 2103 }, { "epoch": 0.66, "learning_rate": 2.676209947371845e-06, "loss": 1.0125, "step": 2104 }, { "epoch": 0.66, "learning_rate": 2.67168220821034e-06, "loss": 1.633, "step": 2105 }, { "epoch": 0.66, "learning_rate": 2.6671569056381253e-06, "loss": 1.3164, "step": 2106 }, { "epoch": 0.67, "learning_rate": 2.6626340443909438e-06, "loss": 1.3381, "step": 2107 }, { "epoch": 0.67, "learning_rate": 2.658113629201975e-06, "loss": 1.1186, "step": 2108 }, { "epoch": 0.67, "learning_rate": 2.653595664801847e-06, "loss": 1.2783, "step": 2109 }, { "epoch": 0.67, "learning_rate": 2.6490801559186135e-06, "loss": 1.1854, "step": 2110 }, { "epoch": 0.67, "learning_rate": 2.644567107277769e-06, "loss": 1.1673, "step": 2111 }, { "epoch": 0.67, "learning_rate": 2.640056523602223e-06, "loss": 1.166, "step": 2112 }, { "epoch": 0.67, "learning_rate": 2.6355484096123173e-06, "loss": 1.2865, "step": 2113 }, { "epoch": 0.67, "learning_rate": 2.6310427700257937e-06, "loss": 0.9074, "step": 2114 }, { "epoch": 0.67, "learning_rate": 2.62653960955782e-06, "loss": 0.9333, "step": 2115 }, { "epoch": 0.67, "learning_rate": 2.622038932920957e-06, "loss": 1.3108, "step": 2116 }, { "epoch": 0.67, "learning_rate": 2.617540744825179e-06, "loss": 0.9822, "step": 2117 }, { "epoch": 0.67, "learning_rate": 2.6130450499778416e-06, "loss": 1.2313, "step": 2118 }, { "epoch": 0.67, "learning_rate": 2.6085518530837047e-06, "loss": 0.9406, "step": 2119 }, { "epoch": 0.67, "learning_rate": 2.604061158844903e-06, "loss": 0.8964, "step": 2120 }, { "epoch": 0.67, "learning_rate": 2.599572971960962e-06, "loss": 0.9975, "step": 2121 }, { "epoch": 0.67, "learning_rate": 2.595087297128773e-06, "loss": 1.4573, "step": 2122 }, { "epoch": 0.67, "learning_rate": 2.5906041390426097e-06, "loss": 1.0212, "step": 2123 }, { "epoch": 0.67, "learning_rate": 2.586123502394103e-06, "loss": 1.4553, "step": 2124 }, { "epoch": 0.67, "learning_rate": 2.5816453918722483e-06, "loss": 1.1133, "step": 2125 }, { "epoch": 0.67, "learning_rate": 2.5771698121633946e-06, "loss": 1.2831, "step": 2126 }, { "epoch": 0.67, "learning_rate": 2.572696767951251e-06, "loss": 1.1767, "step": 2127 }, { "epoch": 0.67, "learning_rate": 2.568226263916862e-06, "loss": 1.3885, "step": 2128 }, { "epoch": 0.67, "learning_rate": 2.5637583047386228e-06, "loss": 1.3546, "step": 2129 }, { "epoch": 0.67, "learning_rate": 2.559292895092259e-06, "loss": 1.1875, "step": 2130 }, { "epoch": 0.67, "learning_rate": 2.554830039650834e-06, "loss": 1.3329, "step": 2131 }, { "epoch": 0.67, "learning_rate": 2.550369743084732e-06, "loss": 1.0292, "step": 2132 }, { "epoch": 0.67, "learning_rate": 2.545912010061665e-06, "loss": 1.3354, "step": 2133 }, { "epoch": 0.67, "learning_rate": 2.5414568452466566e-06, "loss": 1.3084, "step": 2134 }, { "epoch": 0.67, "learning_rate": 2.537004253302051e-06, "loss": 1.2285, "step": 2135 }, { "epoch": 0.67, "learning_rate": 2.532554238887488e-06, "loss": 1.3742, "step": 2136 }, { "epoch": 0.67, "learning_rate": 2.528106806659922e-06, "loss": 1.2167, "step": 2137 }, { "epoch": 0.68, "learning_rate": 2.523661961273596e-06, "loss": 1.2093, "step": 2138 }, { "epoch": 0.68, "learning_rate": 2.5192197073800526e-06, "loss": 1.2047, "step": 2139 }, { "epoch": 0.68, "learning_rate": 2.5147800496281173e-06, "loss": 1.1712, "step": 2140 }, { "epoch": 0.68, "learning_rate": 2.510342992663904e-06, "loss": 1.2942, "step": 2141 }, { "epoch": 0.68, "learning_rate": 2.5059085411307984e-06, "loss": 1.2321, "step": 2142 }, { "epoch": 0.68, "learning_rate": 2.5014766996694656e-06, "loss": 1.0438, "step": 2143 }, { "epoch": 0.68, "learning_rate": 2.497047472917834e-06, "loss": 1.516, "step": 2144 }, { "epoch": 0.68, "learning_rate": 2.492620865511102e-06, "loss": 0.8345, "step": 2145 }, { "epoch": 0.68, "learning_rate": 2.4881968820817214e-06, "loss": 1.1114, "step": 2146 }, { "epoch": 0.68, "learning_rate": 2.4837755272594e-06, "loss": 1.2842, "step": 2147 }, { "epoch": 0.68, "learning_rate": 2.479356805671092e-06, "loss": 0.7552, "step": 2148 }, { "epoch": 0.68, "learning_rate": 2.474940721941003e-06, "loss": 0.9262, "step": 2149 }, { "epoch": 0.68, "learning_rate": 2.4705272806905688e-06, "loss": 1.3642, "step": 2150 }, { "epoch": 0.68, "learning_rate": 2.4661164865384697e-06, "loss": 1.1125, "step": 2151 }, { "epoch": 0.68, "learning_rate": 2.4617083441006058e-06, "loss": 1.0448, "step": 2152 }, { "epoch": 0.68, "learning_rate": 2.457302857990112e-06, "loss": 1.1162, "step": 2153 }, { "epoch": 0.68, "learning_rate": 2.4529000328173334e-06, "loss": 1.1207, "step": 2154 }, { "epoch": 0.68, "learning_rate": 2.4484998731898406e-06, "loss": 1.0855, "step": 2155 }, { "epoch": 0.68, "learning_rate": 2.444102383712407e-06, "loss": 1.5888, "step": 2156 }, { "epoch": 0.68, "learning_rate": 2.439707568987014e-06, "loss": 1.4881, "step": 2157 }, { "epoch": 0.68, "learning_rate": 2.4353154336128425e-06, "loss": 1.1306, "step": 2158 }, { "epoch": 0.68, "learning_rate": 2.430925982186275e-06, "loss": 1.0462, "step": 2159 }, { "epoch": 0.68, "learning_rate": 2.426539219300878e-06, "loss": 1.2042, "step": 2160 }, { "epoch": 0.68, "learning_rate": 2.4221551495474104e-06, "loss": 1.1981, "step": 2161 }, { "epoch": 0.68, "learning_rate": 2.4177737775138072e-06, "loss": 1.1939, "step": 2162 }, { "epoch": 0.68, "learning_rate": 2.413395107785186e-06, "loss": 1.0309, "step": 2163 }, { "epoch": 0.68, "learning_rate": 2.409019144943831e-06, "loss": 1.2042, "step": 2164 }, { "epoch": 0.68, "learning_rate": 2.404645893569199e-06, "loss": 1.5766, "step": 2165 }, { "epoch": 0.68, "learning_rate": 2.400275358237905e-06, "loss": 1.4045, "step": 2166 }, { "epoch": 0.68, "learning_rate": 2.395907543523721e-06, "loss": 1.2412, "step": 2167 }, { "epoch": 0.68, "learning_rate": 2.391542453997578e-06, "loss": 1.2733, "step": 2168 }, { "epoch": 0.68, "learning_rate": 2.3871800942275484e-06, "loss": 1.4967, "step": 2169 }, { "epoch": 0.69, "learning_rate": 2.38282046877885e-06, "loss": 0.9357, "step": 2170 }, { "epoch": 0.69, "learning_rate": 2.3784635822138424e-06, "loss": 1.3104, "step": 2171 }, { "epoch": 0.69, "learning_rate": 2.3741094390920127e-06, "loss": 1.3114, "step": 2172 }, { "epoch": 0.69, "learning_rate": 2.3697580439699846e-06, "loss": 1.1294, "step": 2173 }, { "epoch": 0.69, "learning_rate": 2.3654094014014995e-06, "loss": 1.084, "step": 2174 }, { "epoch": 0.69, "learning_rate": 2.361063515937419e-06, "loss": 1.2247, "step": 2175 }, { "epoch": 0.69, "learning_rate": 2.3567203921257248e-06, "loss": 1.0511, "step": 2176 }, { "epoch": 0.69, "learning_rate": 2.3523800345115015e-06, "loss": 1.6354, "step": 2177 }, { "epoch": 0.69, "learning_rate": 2.348042447636945e-06, "loss": 1.1396, "step": 2178 }, { "epoch": 0.69, "learning_rate": 2.343707636041348e-06, "loss": 1.661, "step": 2179 }, { "epoch": 0.69, "learning_rate": 2.3393756042610967e-06, "loss": 1.2203, "step": 2180 }, { "epoch": 0.69, "learning_rate": 2.3350463568296757e-06, "loss": 0.9733, "step": 2181 }, { "epoch": 0.69, "learning_rate": 2.3307198982776468e-06, "loss": 1.0107, "step": 2182 }, { "epoch": 0.69, "learning_rate": 2.3263962331326615e-06, "loss": 0.8616, "step": 2183 }, { "epoch": 0.69, "learning_rate": 2.322075365919443e-06, "loss": 1.6109, "step": 2184 }, { "epoch": 0.69, "learning_rate": 2.317757301159785e-06, "loss": 1.3015, "step": 2185 }, { "epoch": 0.69, "learning_rate": 2.3134420433725546e-06, "loss": 1.285, "step": 2186 }, { "epoch": 0.69, "learning_rate": 2.309129597073675e-06, "loss": 0.9577, "step": 2187 }, { "epoch": 0.69, "learning_rate": 2.3048199667761335e-06, "loss": 1.0521, "step": 2188 }, { "epoch": 0.69, "learning_rate": 2.300513156989962e-06, "loss": 1.2557, "step": 2189 }, { "epoch": 0.69, "learning_rate": 2.2962091722222512e-06, "loss": 1.2183, "step": 2190 }, { "epoch": 0.69, "learning_rate": 2.291908016977127e-06, "loss": 1.2211, "step": 2191 }, { "epoch": 0.69, "learning_rate": 2.287609695755757e-06, "loss": 1.2336, "step": 2192 }, { "epoch": 0.69, "learning_rate": 2.2833142130563413e-06, "loss": 0.8239, "step": 2193 }, { "epoch": 0.69, "learning_rate": 2.2790215733741157e-06, "loss": 1.1492, "step": 2194 }, { "epoch": 0.69, "learning_rate": 2.2747317812013313e-06, "loss": 1.2718, "step": 2195 }, { "epoch": 0.69, "learning_rate": 2.2704448410272696e-06, "loss": 0.8943, "step": 2196 }, { "epoch": 0.69, "learning_rate": 2.266160757338217e-06, "loss": 1.0773, "step": 2197 }, { "epoch": 0.69, "learning_rate": 2.261879534617481e-06, "loss": 1.2116, "step": 2198 }, { "epoch": 0.69, "learning_rate": 2.2576011773453647e-06, "loss": 0.9118, "step": 2199 }, { "epoch": 0.69, "learning_rate": 2.2533256899991825e-06, "loss": 1.2827, "step": 2200 }, { "epoch": 0.69, "learning_rate": 2.2490530770532374e-06, "loss": 1.0844, "step": 2201 }, { "epoch": 0.7, "learning_rate": 2.244783342978829e-06, "loss": 1.2038, "step": 2202 }, { "epoch": 0.7, "learning_rate": 2.2405164922442403e-06, "loss": 1.1055, "step": 2203 }, { "epoch": 0.7, "learning_rate": 2.2362525293147423e-06, "loss": 1.4454, "step": 2204 }, { "epoch": 0.7, "learning_rate": 2.2319914586525776e-06, "loss": 1.0219, "step": 2205 }, { "epoch": 0.7, "learning_rate": 2.2277332847169697e-06, "loss": 1.2456, "step": 2206 }, { "epoch": 0.7, "learning_rate": 2.2234780119641013e-06, "loss": 0.905, "step": 2207 }, { "epoch": 0.7, "learning_rate": 2.219225644847129e-06, "loss": 1.0939, "step": 2208 }, { "epoch": 0.7, "learning_rate": 2.2149761878161587e-06, "loss": 1.3952, "step": 2209 }, { "epoch": 0.7, "learning_rate": 2.2107296453182596e-06, "loss": 1.0626, "step": 2210 }, { "epoch": 0.7, "learning_rate": 2.2064860217974442e-06, "loss": 1.0669, "step": 2211 }, { "epoch": 0.7, "learning_rate": 2.2022453216946782e-06, "loss": 1.5224, "step": 2212 }, { "epoch": 0.7, "learning_rate": 2.1980075494478557e-06, "loss": 0.8538, "step": 2213 }, { "epoch": 0.7, "learning_rate": 2.193772709491819e-06, "loss": 1.2673, "step": 2214 }, { "epoch": 0.7, "learning_rate": 2.1895408062583336e-06, "loss": 0.8111, "step": 2215 }, { "epoch": 0.7, "learning_rate": 2.185311844176098e-06, "loss": 1.2382, "step": 2216 }, { "epoch": 0.7, "learning_rate": 2.1810858276707257e-06, "loss": 1.0241, "step": 2217 }, { "epoch": 0.7, "learning_rate": 2.1768627611647566e-06, "loss": 1.3142, "step": 2218 }, { "epoch": 0.7, "learning_rate": 2.1726426490776333e-06, "loss": 1.2712, "step": 2219 }, { "epoch": 0.7, "learning_rate": 2.1684254958257158e-06, "loss": 1.5881, "step": 2220 }, { "epoch": 0.7, "learning_rate": 2.164211305822259e-06, "loss": 1.2209, "step": 2221 }, { "epoch": 0.7, "learning_rate": 2.1600000834774264e-06, "loss": 1.0427, "step": 2222 }, { "epoch": 0.7, "learning_rate": 2.1557918331982685e-06, "loss": 0.8392, "step": 2223 }, { "epoch": 0.7, "learning_rate": 2.1515865593887275e-06, "loss": 1.4423, "step": 2224 }, { "epoch": 0.7, "learning_rate": 2.147384266449629e-06, "loss": 1.1601, "step": 2225 }, { "epoch": 0.7, "learning_rate": 2.143184958778685e-06, "loss": 1.3754, "step": 2226 }, { "epoch": 0.7, "learning_rate": 2.1389886407704765e-06, "loss": 1.1615, "step": 2227 }, { "epoch": 0.7, "learning_rate": 2.134795316816461e-06, "loss": 1.1037, "step": 2228 }, { "epoch": 0.7, "learning_rate": 2.1306049913049577e-06, "loss": 1.0455, "step": 2229 }, { "epoch": 0.7, "learning_rate": 2.126417668621154e-06, "loss": 1.3493, "step": 2230 }, { "epoch": 0.7, "learning_rate": 2.1222333531470885e-06, "loss": 1.2982, "step": 2231 }, { "epoch": 0.7, "learning_rate": 2.118052049261659e-06, "loss": 1.1746, "step": 2232 }, { "epoch": 0.71, "learning_rate": 2.1138737613406057e-06, "loss": 1.0411, "step": 2233 }, { "epoch": 0.71, "learning_rate": 2.1096984937565197e-06, "loss": 1.3577, "step": 2234 }, { "epoch": 0.71, "learning_rate": 2.1055262508788205e-06, "loss": 1.3947, "step": 2235 }, { "epoch": 0.71, "learning_rate": 2.101357037073774e-06, "loss": 1.1624, "step": 2236 }, { "epoch": 0.71, "learning_rate": 2.097190856704467e-06, "loss": 1.0629, "step": 2237 }, { "epoch": 0.71, "learning_rate": 2.0930277141308192e-06, "loss": 1.9269, "step": 2238 }, { "epoch": 0.71, "learning_rate": 2.0888676137095648e-06, "loss": 1.0303, "step": 2239 }, { "epoch": 0.71, "learning_rate": 2.0847105597942607e-06, "loss": 1.5178, "step": 2240 }, { "epoch": 0.71, "learning_rate": 2.0805565567352686e-06, "loss": 1.225, "step": 2241 }, { "epoch": 0.71, "learning_rate": 2.0764056088797646e-06, "loss": 1.0907, "step": 2242 }, { "epoch": 0.71, "learning_rate": 2.072257720571721e-06, "loss": 1.2737, "step": 2243 }, { "epoch": 0.71, "learning_rate": 2.0681128961519136e-06, "loss": 1.0934, "step": 2244 }, { "epoch": 0.71, "learning_rate": 2.0639711399579092e-06, "loss": 1.3377, "step": 2245 }, { "epoch": 0.71, "learning_rate": 2.0598324563240635e-06, "loss": 1.7001, "step": 2246 }, { "epoch": 0.71, "learning_rate": 2.0556968495815156e-06, "loss": 1.0796, "step": 2247 }, { "epoch": 0.71, "learning_rate": 2.0515643240581896e-06, "loss": 1.1609, "step": 2248 }, { "epoch": 0.71, "learning_rate": 2.0474348840787787e-06, "loss": 1.0119, "step": 2249 }, { "epoch": 0.71, "learning_rate": 2.0433085339647535e-06, "loss": 1.1198, "step": 2250 }, { "epoch": 0.71, "learning_rate": 2.0391852780343447e-06, "loss": 1.2979, "step": 2251 }, { "epoch": 0.71, "learning_rate": 2.0350651206025508e-06, "loss": 1.3486, "step": 2252 }, { "epoch": 0.71, "learning_rate": 2.0309480659811213e-06, "loss": 1.1669, "step": 2253 }, { "epoch": 0.71, "learning_rate": 2.0268341184785674e-06, "loss": 0.9349, "step": 2254 }, { "epoch": 0.71, "learning_rate": 2.0227232824001376e-06, "loss": 1.137, "step": 2255 }, { "epoch": 0.71, "learning_rate": 2.018615562047837e-06, "loss": 0.9366, "step": 2256 }, { "epoch": 0.71, "learning_rate": 2.0145109617203962e-06, "loss": 1.5436, "step": 2257 }, { "epoch": 0.71, "learning_rate": 2.0104094857132922e-06, "loss": 1.1923, "step": 2258 }, { "epoch": 0.71, "learning_rate": 2.0063111383187255e-06, "loss": 1.3059, "step": 2259 }, { "epoch": 0.71, "learning_rate": 2.002215923825628e-06, "loss": 1.3038, "step": 2260 }, { "epoch": 0.71, "learning_rate": 1.9981238465196457e-06, "loss": 1.4478, "step": 2261 }, { "epoch": 0.71, "learning_rate": 1.994034910683151e-06, "loss": 1.5301, "step": 2262 }, { "epoch": 0.71, "learning_rate": 1.989949120595218e-06, "loss": 0.7965, "step": 2263 }, { "epoch": 0.71, "learning_rate": 1.9858664805316397e-06, "loss": 0.9932, "step": 2264 }, { "epoch": 0.72, "learning_rate": 1.9817869947649045e-06, "loss": 1.0852, "step": 2265 }, { "epoch": 0.72, "learning_rate": 1.977710667564201e-06, "loss": 1.1512, "step": 2266 }, { "epoch": 0.72, "learning_rate": 1.973637503195417e-06, "loss": 1.4381, "step": 2267 }, { "epoch": 0.72, "learning_rate": 1.9695675059211262e-06, "loss": 1.1199, "step": 2268 }, { "epoch": 0.72, "learning_rate": 1.9655006800005866e-06, "loss": 1.3771, "step": 2269 }, { "epoch": 0.72, "learning_rate": 1.961437029689744e-06, "loss": 1.341, "step": 2270 }, { "epoch": 0.72, "learning_rate": 1.9573765592412124e-06, "loss": 1.1374, "step": 2271 }, { "epoch": 0.72, "learning_rate": 1.953319272904286e-06, "loss": 1.218, "step": 2272 }, { "epoch": 0.72, "learning_rate": 1.949265174924922e-06, "loss": 1.2973, "step": 2273 }, { "epoch": 0.72, "learning_rate": 1.9452142695457404e-06, "loss": 0.9506, "step": 2274 }, { "epoch": 0.72, "learning_rate": 1.941166561006025e-06, "loss": 1.1869, "step": 2275 }, { "epoch": 0.72, "learning_rate": 1.9371220535417067e-06, "loss": 1.3025, "step": 2276 }, { "epoch": 0.72, "learning_rate": 1.933080751385376e-06, "loss": 0.848, "step": 2277 }, { "epoch": 0.72, "learning_rate": 1.9290426587662608e-06, "loss": 1.5226, "step": 2278 }, { "epoch": 0.72, "learning_rate": 1.9250077799102323e-06, "loss": 1.1907, "step": 2279 }, { "epoch": 0.72, "learning_rate": 1.9209761190398023e-06, "loss": 1.1628, "step": 2280 }, { "epoch": 0.72, "learning_rate": 1.916947680374109e-06, "loss": 1.02, "step": 2281 }, { "epoch": 0.72, "learning_rate": 1.9129224681289245e-06, "loss": 1.736, "step": 2282 }, { "epoch": 0.72, "learning_rate": 1.90890048651664e-06, "loss": 0.9385, "step": 2283 }, { "epoch": 0.72, "learning_rate": 1.904881739746266e-06, "loss": 1.4371, "step": 2284 }, { "epoch": 0.72, "learning_rate": 1.9008662320234327e-06, "loss": 1.265, "step": 2285 }, { "epoch": 0.72, "learning_rate": 1.8968539675503738e-06, "loss": 0.9343, "step": 2286 }, { "epoch": 0.72, "learning_rate": 1.892844950525935e-06, "loss": 1.307, "step": 2287 }, { "epoch": 0.72, "learning_rate": 1.8888391851455573e-06, "loss": 1.185, "step": 2288 }, { "epoch": 0.72, "learning_rate": 1.8848366756012866e-06, "loss": 1.6139, "step": 2289 }, { "epoch": 0.72, "learning_rate": 1.8808374260817546e-06, "loss": 1.3563, "step": 2290 }, { "epoch": 0.72, "learning_rate": 1.8768414407721846e-06, "loss": 0.8658, "step": 2291 }, { "epoch": 0.72, "learning_rate": 1.8728487238543813e-06, "loss": 1.1133, "step": 2292 }, { "epoch": 0.72, "learning_rate": 1.8688592795067345e-06, "loss": 1.201, "step": 2293 }, { "epoch": 0.72, "learning_rate": 1.864873111904203e-06, "loss": 1.1945, "step": 2294 }, { "epoch": 0.72, "learning_rate": 1.860890225218322e-06, "loss": 0.9428, "step": 2295 }, { "epoch": 0.72, "learning_rate": 1.8569106236171868e-06, "loss": 0.8245, "step": 2296 }, { "epoch": 0.73, "learning_rate": 1.852934311265463e-06, "loss": 1.3434, "step": 2297 }, { "epoch": 0.73, "learning_rate": 1.8489612923243655e-06, "loss": 0.9771, "step": 2298 }, { "epoch": 0.73, "learning_rate": 1.8449915709516704e-06, "loss": 1.0132, "step": 2299 }, { "epoch": 0.73, "learning_rate": 1.841025151301697e-06, "loss": 1.111, "step": 2300 }, { "epoch": 0.73, "learning_rate": 1.8370620375253112e-06, "loss": 1.7159, "step": 2301 }, { "epoch": 0.73, "learning_rate": 1.8331022337699189e-06, "loss": 1.0792, "step": 2302 }, { "epoch": 0.73, "learning_rate": 1.8291457441794659e-06, "loss": 0.9959, "step": 2303 }, { "epoch": 0.73, "learning_rate": 1.825192572894423e-06, "loss": 1.263, "step": 2304 }, { "epoch": 0.73, "learning_rate": 1.8212427240517954e-06, "loss": 1.1389, "step": 2305 }, { "epoch": 0.73, "learning_rate": 1.8172962017851043e-06, "loss": 0.9992, "step": 2306 }, { "epoch": 0.73, "learning_rate": 1.813353010224397e-06, "loss": 1.4533, "step": 2307 }, { "epoch": 0.73, "learning_rate": 1.8094131534962278e-06, "loss": 0.9953, "step": 2308 }, { "epoch": 0.73, "learning_rate": 1.805476635723668e-06, "loss": 1.2614, "step": 2309 }, { "epoch": 0.73, "learning_rate": 1.8015434610262888e-06, "loss": 1.3423, "step": 2310 }, { "epoch": 0.73, "learning_rate": 1.7976136335201699e-06, "loss": 1.0807, "step": 2311 }, { "epoch": 0.73, "learning_rate": 1.7936871573178777e-06, "loss": 0.9413, "step": 2312 }, { "epoch": 0.73, "learning_rate": 1.7897640365284812e-06, "loss": 1.225, "step": 2313 }, { "epoch": 0.73, "learning_rate": 1.7858442752575322e-06, "loss": 1.3377, "step": 2314 }, { "epoch": 0.73, "learning_rate": 1.781927877607072e-06, "loss": 1.4477, "step": 2315 }, { "epoch": 0.73, "learning_rate": 1.7780148476756148e-06, "loss": 1.248, "step": 2316 }, { "epoch": 0.73, "learning_rate": 1.7741051895581585e-06, "loss": 1.1334, "step": 2317 }, { "epoch": 0.73, "learning_rate": 1.770198907346164e-06, "loss": 1.2396, "step": 2318 }, { "epoch": 0.73, "learning_rate": 1.7662960051275673e-06, "loss": 1.1161, "step": 2319 }, { "epoch": 0.73, "learning_rate": 1.76239648698676e-06, "loss": 1.4141, "step": 2320 }, { "epoch": 0.73, "learning_rate": 1.7585003570045995e-06, "loss": 1.1247, "step": 2321 }, { "epoch": 0.73, "learning_rate": 1.7546076192583915e-06, "loss": 1.1345, "step": 2322 }, { "epoch": 0.73, "learning_rate": 1.750718277821895e-06, "loss": 1.1986, "step": 2323 }, { "epoch": 0.73, "learning_rate": 1.7468323367653107e-06, "loss": 1.1781, "step": 2324 }, { "epoch": 0.73, "learning_rate": 1.7429498001552881e-06, "loss": 1.5507, "step": 2325 }, { "epoch": 0.73, "learning_rate": 1.7390706720549054e-06, "loss": 1.066, "step": 2326 }, { "epoch": 0.73, "learning_rate": 1.735194956523682e-06, "loss": 0.9698, "step": 2327 }, { "epoch": 0.74, "learning_rate": 1.731322657617559e-06, "loss": 1.2834, "step": 2328 }, { "epoch": 0.74, "learning_rate": 1.727453779388908e-06, "loss": 1.4057, "step": 2329 }, { "epoch": 0.74, "learning_rate": 1.7235883258865144e-06, "loss": 1.2089, "step": 2330 }, { "epoch": 0.74, "learning_rate": 1.7197263011555864e-06, "loss": 1.1729, "step": 2331 }, { "epoch": 0.74, "learning_rate": 1.715867709237738e-06, "loss": 0.6677, "step": 2332 }, { "epoch": 0.74, "learning_rate": 1.712012554170998e-06, "loss": 1.0393, "step": 2333 }, { "epoch": 0.74, "learning_rate": 1.7081608399897876e-06, "loss": 1.1931, "step": 2334 }, { "epoch": 0.74, "learning_rate": 1.7043125707249387e-06, "loss": 1.0212, "step": 2335 }, { "epoch": 0.74, "learning_rate": 1.7004677504036688e-06, "loss": 1.4092, "step": 2336 }, { "epoch": 0.74, "learning_rate": 1.6966263830495939e-06, "loss": 1.2814, "step": 2337 }, { "epoch": 0.74, "learning_rate": 1.6927884726827092e-06, "loss": 0.8879, "step": 2338 }, { "epoch": 0.74, "learning_rate": 1.6889540233193996e-06, "loss": 1.0383, "step": 2339 }, { "epoch": 0.74, "learning_rate": 1.6851230389724204e-06, "loss": 1.0104, "step": 2340 }, { "epoch": 0.74, "learning_rate": 1.6812955236509076e-06, "loss": 1.2941, "step": 2341 }, { "epoch": 0.74, "learning_rate": 1.6774714813603609e-06, "loss": 1.2239, "step": 2342 }, { "epoch": 0.74, "learning_rate": 1.6736509161026521e-06, "loss": 1.3029, "step": 2343 }, { "epoch": 0.74, "learning_rate": 1.6698338318760054e-06, "loss": 0.7759, "step": 2344 }, { "epoch": 0.74, "learning_rate": 1.666020232675014e-06, "loss": 1.3685, "step": 2345 }, { "epoch": 0.74, "learning_rate": 1.6622101224906096e-06, "loss": 1.1516, "step": 2346 }, { "epoch": 0.74, "learning_rate": 1.658403505310085e-06, "loss": 1.02, "step": 2347 }, { "epoch": 0.74, "learning_rate": 1.6546003851170706e-06, "loss": 1.622, "step": 2348 }, { "epoch": 0.74, "learning_rate": 1.6508007658915414e-06, "loss": 0.967, "step": 2349 }, { "epoch": 0.74, "learning_rate": 1.6470046516098047e-06, "loss": 1.1007, "step": 2350 }, { "epoch": 0.74, "learning_rate": 1.643212046244504e-06, "loss": 1.2609, "step": 2351 }, { "epoch": 0.74, "learning_rate": 1.6394229537646055e-06, "loss": 0.9441, "step": 2352 }, { "epoch": 0.74, "learning_rate": 1.6356373781354058e-06, "loss": 1.228, "step": 2353 }, { "epoch": 0.74, "learning_rate": 1.6318553233185136e-06, "loss": 1.5295, "step": 2354 }, { "epoch": 0.74, "learning_rate": 1.6280767932718604e-06, "loss": 1.2647, "step": 2355 }, { "epoch": 0.74, "learning_rate": 1.6243017919496845e-06, "loss": 1.4253, "step": 2356 }, { "epoch": 0.74, "learning_rate": 1.6205303233025316e-06, "loss": 1.1672, "step": 2357 }, { "epoch": 0.74, "learning_rate": 1.6167623912772506e-06, "loss": 1.3176, "step": 2358 }, { "epoch": 0.74, "learning_rate": 1.612997999816992e-06, "loss": 1.0779, "step": 2359 }, { "epoch": 0.75, "learning_rate": 1.6092371528611971e-06, "loss": 1.1816, "step": 2360 }, { "epoch": 0.75, "learning_rate": 1.6054798543456025e-06, "loss": 1.2526, "step": 2361 }, { "epoch": 0.75, "learning_rate": 1.6017261082022256e-06, "loss": 1.4592, "step": 2362 }, { "epoch": 0.75, "learning_rate": 1.5979759183593725e-06, "loss": 1.4246, "step": 2363 }, { "epoch": 0.75, "learning_rate": 1.5942292887416234e-06, "loss": 1.2012, "step": 2364 }, { "epoch": 0.75, "learning_rate": 1.590486223269831e-06, "loss": 1.1433, "step": 2365 }, { "epoch": 0.75, "learning_rate": 1.5867467258611252e-06, "loss": 1.407, "step": 2366 }, { "epoch": 0.75, "learning_rate": 1.5830108004288957e-06, "loss": 1.3044, "step": 2367 }, { "epoch": 0.75, "learning_rate": 1.579278450882794e-06, "loss": 1.4251, "step": 2368 }, { "epoch": 0.75, "learning_rate": 1.5755496811287351e-06, "loss": 1.2573, "step": 2369 }, { "epoch": 0.75, "learning_rate": 1.5718244950688815e-06, "loss": 1.3887, "step": 2370 }, { "epoch": 0.75, "learning_rate": 1.5681028966016505e-06, "loss": 0.8514, "step": 2371 }, { "epoch": 0.75, "learning_rate": 1.5643848896216984e-06, "loss": 0.9258, "step": 2372 }, { "epoch": 0.75, "learning_rate": 1.5606704780199322e-06, "loss": 0.9595, "step": 2373 }, { "epoch": 0.75, "learning_rate": 1.5569596656834878e-06, "loss": 0.935, "step": 2374 }, { "epoch": 0.75, "learning_rate": 1.5532524564957374e-06, "loss": 1.3169, "step": 2375 }, { "epoch": 0.75, "learning_rate": 1.5495488543362858e-06, "loss": 1.0594, "step": 2376 }, { "epoch": 0.75, "learning_rate": 1.5458488630809564e-06, "loss": 1.085, "step": 2377 }, { "epoch": 0.75, "learning_rate": 1.5421524866018017e-06, "loss": 1.2956, "step": 2378 }, { "epoch": 0.75, "learning_rate": 1.5384597287670849e-06, "loss": 1.2328, "step": 2379 }, { "epoch": 0.75, "learning_rate": 1.5347705934412833e-06, "loss": 1.4608, "step": 2380 }, { "epoch": 0.75, "learning_rate": 1.5310850844850872e-06, "loss": 0.88, "step": 2381 }, { "epoch": 0.75, "learning_rate": 1.5274032057553883e-06, "loss": 0.8614, "step": 2382 }, { "epoch": 0.75, "learning_rate": 1.5237249611052779e-06, "loss": 1.1221, "step": 2383 }, { "epoch": 0.75, "learning_rate": 1.5200503543840506e-06, "loss": 1.0023, "step": 2384 }, { "epoch": 0.75, "learning_rate": 1.516379389437186e-06, "loss": 1.3832, "step": 2385 }, { "epoch": 0.75, "learning_rate": 1.512712070106359e-06, "loss": 0.9293, "step": 2386 }, { "epoch": 0.75, "learning_rate": 1.5090484002294247e-06, "loss": 1.2573, "step": 2387 }, { "epoch": 0.75, "learning_rate": 1.505388383640423e-06, "loss": 1.3202, "step": 2388 }, { "epoch": 0.75, "learning_rate": 1.5017320241695682e-06, "loss": 0.9882, "step": 2389 }, { "epoch": 0.75, "learning_rate": 1.4980793256432474e-06, "loss": 1.5165, "step": 2390 }, { "epoch": 0.75, "learning_rate": 1.4944302918840158e-06, "loss": 1.4918, "step": 2391 }, { "epoch": 0.76, "learning_rate": 1.4907849267105973e-06, "loss": 0.9679, "step": 2392 }, { "epoch": 0.76, "learning_rate": 1.487143233937871e-06, "loss": 1.4478, "step": 2393 }, { "epoch": 0.76, "learning_rate": 1.4835052173768789e-06, "loss": 1.4294, "step": 2394 }, { "epoch": 0.76, "learning_rate": 1.4798708808348095e-06, "loss": 0.8946, "step": 2395 }, { "epoch": 0.76, "learning_rate": 1.4762402281150063e-06, "loss": 1.0525, "step": 2396 }, { "epoch": 0.76, "learning_rate": 1.4726132630169521e-06, "loss": 1.2527, "step": 2397 }, { "epoch": 0.76, "learning_rate": 1.4689899893362764e-06, "loss": 1.4035, "step": 2398 }, { "epoch": 0.76, "learning_rate": 1.4653704108647387e-06, "loss": 0.957, "step": 2399 }, { "epoch": 0.76, "learning_rate": 1.4617545313902404e-06, "loss": 1.2119, "step": 2400 }, { "epoch": 0.76, "learning_rate": 1.4581423546968004e-06, "loss": 1.1227, "step": 2401 }, { "epoch": 0.76, "learning_rate": 1.4545338845645734e-06, "loss": 0.972, "step": 2402 }, { "epoch": 0.76, "learning_rate": 1.4509291247698282e-06, "loss": 1.0911, "step": 2403 }, { "epoch": 0.76, "learning_rate": 1.447328079084956e-06, "loss": 1.2655, "step": 2404 }, { "epoch": 0.76, "learning_rate": 1.4437307512784543e-06, "loss": 1.3944, "step": 2405 }, { "epoch": 0.76, "learning_rate": 1.4401371451149387e-06, "loss": 1.5642, "step": 2406 }, { "epoch": 0.76, "learning_rate": 1.4365472643551214e-06, "loss": 1.2578, "step": 2407 }, { "epoch": 0.76, "learning_rate": 1.432961112755823e-06, "loss": 1.3164, "step": 2408 }, { "epoch": 0.76, "learning_rate": 1.4293786940699556e-06, "loss": 1.102, "step": 2409 }, { "epoch": 0.76, "learning_rate": 1.4258000120465299e-06, "loss": 0.8391, "step": 2410 }, { "epoch": 0.76, "learning_rate": 1.4222250704306434e-06, "loss": 1.0308, "step": 2411 }, { "epoch": 0.76, "learning_rate": 1.4186538729634797e-06, "loss": 1.2844, "step": 2412 }, { "epoch": 0.76, "learning_rate": 1.415086423382302e-06, "loss": 1.2747, "step": 2413 }, { "epoch": 0.76, "learning_rate": 1.4115227254204571e-06, "loss": 1.525, "step": 2414 }, { "epoch": 0.76, "learning_rate": 1.4079627828073583e-06, "loss": 1.149, "step": 2415 }, { "epoch": 0.76, "learning_rate": 1.404406599268497e-06, "loss": 1.1915, "step": 2416 }, { "epoch": 0.76, "learning_rate": 1.4008541785254226e-06, "loss": 1.355, "step": 2417 }, { "epoch": 0.76, "learning_rate": 1.3973055242957545e-06, "loss": 1.094, "step": 2418 }, { "epoch": 0.76, "learning_rate": 1.393760640293162e-06, "loss": 1.2303, "step": 2419 }, { "epoch": 0.76, "learning_rate": 1.390219530227378e-06, "loss": 1.2611, "step": 2420 }, { "epoch": 0.76, "learning_rate": 1.3866821978041772e-06, "loss": 1.2621, "step": 2421 }, { "epoch": 0.76, "learning_rate": 1.3831486467253908e-06, "loss": 1.1767, "step": 2422 }, { "epoch": 0.77, "learning_rate": 1.37961888068888e-06, "loss": 1.4391, "step": 2423 }, { "epoch": 0.77, "learning_rate": 1.3760929033885573e-06, "loss": 0.7842, "step": 2424 }, { "epoch": 0.77, "learning_rate": 1.372570718514362e-06, "loss": 1.0568, "step": 2425 }, { "epoch": 0.77, "learning_rate": 1.3690523297522706e-06, "loss": 0.8196, "step": 2426 }, { "epoch": 0.77, "learning_rate": 1.3655377407842813e-06, "loss": 1.0059, "step": 2427 }, { "epoch": 0.77, "learning_rate": 1.3620269552884214e-06, "loss": 1.1711, "step": 2428 }, { "epoch": 0.77, "learning_rate": 1.358519976938732e-06, "loss": 0.8693, "step": 2429 }, { "epoch": 0.77, "learning_rate": 1.3550168094052763e-06, "loss": 0.999, "step": 2430 }, { "epoch": 0.77, "learning_rate": 1.3515174563541228e-06, "loss": 1.3998, "step": 2431 }, { "epoch": 0.77, "learning_rate": 1.348021921447355e-06, "loss": 0.8766, "step": 2432 }, { "epoch": 0.77, "learning_rate": 1.3445302083430556e-06, "loss": 1.546, "step": 2433 }, { "epoch": 0.77, "learning_rate": 1.3410423206953094e-06, "loss": 0.9272, "step": 2434 }, { "epoch": 0.77, "learning_rate": 1.3375582621541965e-06, "loss": 1.2377, "step": 2435 }, { "epoch": 0.77, "learning_rate": 1.3340780363657946e-06, "loss": 0.9725, "step": 2436 }, { "epoch": 0.77, "learning_rate": 1.3306016469721629e-06, "loss": 1.1543, "step": 2437 }, { "epoch": 0.77, "learning_rate": 1.3271290976113542e-06, "loss": 1.298, "step": 2438 }, { "epoch": 0.77, "learning_rate": 1.3236603919173946e-06, "loss": 1.3227, "step": 2439 }, { "epoch": 0.77, "learning_rate": 1.3201955335202954e-06, "loss": 1.3086, "step": 2440 }, { "epoch": 0.77, "learning_rate": 1.316734526046034e-06, "loss": 1.2313, "step": 2441 }, { "epoch": 0.77, "learning_rate": 1.3132773731165655e-06, "loss": 1.0102, "step": 2442 }, { "epoch": 0.77, "learning_rate": 1.3098240783498045e-06, "loss": 0.9851, "step": 2443 }, { "epoch": 0.77, "learning_rate": 1.3063746453596349e-06, "loss": 1.3057, "step": 2444 }, { "epoch": 0.77, "learning_rate": 1.3029290777558894e-06, "loss": 1.1776, "step": 2445 }, { "epoch": 0.77, "learning_rate": 1.2994873791443669e-06, "loss": 0.9672, "step": 2446 }, { "epoch": 0.77, "learning_rate": 1.296049553126808e-06, "loss": 1.4507, "step": 2447 }, { "epoch": 0.77, "learning_rate": 1.292615603300908e-06, "loss": 1.1078, "step": 2448 }, { "epoch": 0.77, "learning_rate": 1.2891855332602987e-06, "loss": 1.1702, "step": 2449 }, { "epoch": 0.77, "learning_rate": 1.2857593465945595e-06, "loss": 0.9325, "step": 2450 }, { "epoch": 0.77, "learning_rate": 1.2823370468891977e-06, "loss": 1.3002, "step": 2451 }, { "epoch": 0.77, "learning_rate": 1.2789186377256606e-06, "loss": 1.4489, "step": 2452 }, { "epoch": 0.77, "learning_rate": 1.275504122681317e-06, "loss": 1.3288, "step": 2453 }, { "epoch": 0.77, "learning_rate": 1.2720935053294665e-06, "loss": 1.3943, "step": 2454 }, { "epoch": 0.78, "learning_rate": 1.2686867892393263e-06, "loss": 1.2098, "step": 2455 }, { "epoch": 0.78, "learning_rate": 1.2652839779760312e-06, "loss": 1.2944, "step": 2456 }, { "epoch": 0.78, "learning_rate": 1.261885075100629e-06, "loss": 1.3486, "step": 2457 }, { "epoch": 0.78, "learning_rate": 1.2584900841700803e-06, "loss": 1.1121, "step": 2458 }, { "epoch": 0.78, "learning_rate": 1.2550990087372489e-06, "loss": 1.1784, "step": 2459 }, { "epoch": 0.78, "learning_rate": 1.2517118523509035e-06, "loss": 1.3664, "step": 2460 }, { "epoch": 0.78, "learning_rate": 1.248328618555708e-06, "loss": 1.4388, "step": 2461 }, { "epoch": 0.78, "learning_rate": 1.2449493108922256e-06, "loss": 1.1874, "step": 2462 }, { "epoch": 0.78, "learning_rate": 1.2415739328969084e-06, "loss": 1.0656, "step": 2463 }, { "epoch": 0.78, "learning_rate": 1.2382024881020937e-06, "loss": 1.4465, "step": 2464 }, { "epoch": 0.78, "learning_rate": 1.2348349800360087e-06, "loss": 1.3424, "step": 2465 }, { "epoch": 0.78, "learning_rate": 1.2314714122227562e-06, "loss": 1.0436, "step": 2466 }, { "epoch": 0.78, "learning_rate": 1.2281117881823147e-06, "loss": 1.1112, "step": 2467 }, { "epoch": 0.78, "learning_rate": 1.2247561114305405e-06, "loss": 1.1635, "step": 2468 }, { "epoch": 0.78, "learning_rate": 1.221404385479154e-06, "loss": 1.2075, "step": 2469 }, { "epoch": 0.78, "learning_rate": 1.218056613835746e-06, "loss": 1.0219, "step": 2470 }, { "epoch": 0.78, "learning_rate": 1.2147128000037621e-06, "loss": 1.2968, "step": 2471 }, { "epoch": 0.78, "learning_rate": 1.2113729474825148e-06, "loss": 1.1518, "step": 2472 }, { "epoch": 0.78, "learning_rate": 1.2080370597671643e-06, "loss": 1.1632, "step": 2473 }, { "epoch": 0.78, "learning_rate": 1.204705140348722e-06, "loss": 1.4078, "step": 2474 }, { "epoch": 0.78, "learning_rate": 1.2013771927140523e-06, "loss": 1.232, "step": 2475 }, { "epoch": 0.78, "learning_rate": 1.1980532203458556e-06, "loss": 1.0032, "step": 2476 }, { "epoch": 0.78, "learning_rate": 1.1947332267226786e-06, "loss": 1.3196, "step": 2477 }, { "epoch": 0.78, "learning_rate": 1.1914172153189003e-06, "loss": 1.1583, "step": 2478 }, { "epoch": 0.78, "learning_rate": 1.1881051896047313e-06, "loss": 1.0774, "step": 2479 }, { "epoch": 0.78, "learning_rate": 1.1847971530462165e-06, "loss": 1.0929, "step": 2480 }, { "epoch": 0.78, "learning_rate": 1.1814931091052217e-06, "loss": 1.4762, "step": 2481 }, { "epoch": 0.78, "learning_rate": 1.1781930612394333e-06, "loss": 1.1655, "step": 2482 }, { "epoch": 0.78, "learning_rate": 1.1748970129023617e-06, "loss": 1.1742, "step": 2483 }, { "epoch": 0.78, "learning_rate": 1.1716049675433255e-06, "loss": 1.1033, "step": 2484 }, { "epoch": 0.78, "learning_rate": 1.168316928607459e-06, "loss": 1.1318, "step": 2485 }, { "epoch": 0.78, "learning_rate": 1.165032899535699e-06, "loss": 1.2242, "step": 2486 }, { "epoch": 0.79, "learning_rate": 1.1617528837647919e-06, "loss": 1.3275, "step": 2487 }, { "epoch": 0.79, "learning_rate": 1.1584768847272788e-06, "loss": 1.0643, "step": 2488 }, { "epoch": 0.79, "learning_rate": 1.155204905851498e-06, "loss": 0.915, "step": 2489 }, { "epoch": 0.79, "learning_rate": 1.151936950561585e-06, "loss": 1.6091, "step": 2490 }, { "epoch": 0.79, "learning_rate": 1.1486730222774605e-06, "loss": 1.0606, "step": 2491 }, { "epoch": 0.79, "learning_rate": 1.1454131244148308e-06, "loss": 1.1483, "step": 2492 }, { "epoch": 0.79, "learning_rate": 1.1421572603851876e-06, "loss": 1.6881, "step": 2493 }, { "epoch": 0.79, "learning_rate": 1.1389054335957972e-06, "loss": 1.1135, "step": 2494 }, { "epoch": 0.79, "learning_rate": 1.135657647449705e-06, "loss": 1.0804, "step": 2495 }, { "epoch": 0.79, "learning_rate": 1.1324139053457234e-06, "loss": 1.4794, "step": 2496 }, { "epoch": 0.79, "learning_rate": 1.1291742106784382e-06, "loss": 1.1913, "step": 2497 }, { "epoch": 0.79, "learning_rate": 1.1259385668381927e-06, "loss": 1.5505, "step": 2498 }, { "epoch": 0.79, "learning_rate": 1.1227069772111004e-06, "loss": 1.4089, "step": 2499 }, { "epoch": 0.79, "learning_rate": 1.1194794451790197e-06, "loss": 1.2274, "step": 2500 }, { "epoch": 0.79, "learning_rate": 1.1162559741195733e-06, "loss": 1.656, "step": 2501 }, { "epoch": 0.79, "learning_rate": 1.113036567406129e-06, "loss": 1.1327, "step": 2502 }, { "epoch": 0.79, "learning_rate": 1.1098212284078037e-06, "loss": 1.0288, "step": 2503 }, { "epoch": 0.79, "learning_rate": 1.1066099604894536e-06, "loss": 0.9104, "step": 2504 }, { "epoch": 0.79, "learning_rate": 1.1034027670116809e-06, "loss": 1.0002, "step": 2505 }, { "epoch": 0.79, "learning_rate": 1.100199651330816e-06, "loss": 1.1312, "step": 2506 }, { "epoch": 0.79, "learning_rate": 1.0970006167989299e-06, "loss": 1.0932, "step": 2507 }, { "epoch": 0.79, "learning_rate": 1.0938056667638154e-06, "loss": 1.0678, "step": 2508 }, { "epoch": 0.79, "learning_rate": 1.0906148045689968e-06, "loss": 1.0942, "step": 2509 }, { "epoch": 0.79, "learning_rate": 1.0874280335537163e-06, "loss": 1.3277, "step": 2510 }, { "epoch": 0.79, "learning_rate": 1.084245357052937e-06, "loss": 1.0664, "step": 2511 }, { "epoch": 0.79, "learning_rate": 1.0810667783973344e-06, "loss": 1.6466, "step": 2512 }, { "epoch": 0.79, "learning_rate": 1.0778923009133007e-06, "loss": 1.3701, "step": 2513 }, { "epoch": 0.79, "learning_rate": 1.07472192792293e-06, "loss": 1.5514, "step": 2514 }, { "epoch": 0.79, "learning_rate": 1.071555662744026e-06, "loss": 1.1623, "step": 2515 }, { "epoch": 0.79, "learning_rate": 1.0683935086900904e-06, "loss": 1.2644, "step": 2516 }, { "epoch": 0.79, "learning_rate": 1.0652354690703253e-06, "loss": 1.2306, "step": 2517 }, { "epoch": 0.8, "learning_rate": 1.062081547189624e-06, "loss": 1.0859, "step": 2518 }, { "epoch": 0.8, "learning_rate": 1.0589317463485739e-06, "loss": 1.3857, "step": 2519 }, { "epoch": 0.8, "learning_rate": 1.0557860698434452e-06, "loss": 1.3196, "step": 2520 }, { "epoch": 0.8, "learning_rate": 1.0526445209662001e-06, "loss": 1.6497, "step": 2521 }, { "epoch": 0.8, "learning_rate": 1.049507103004469e-06, "loss": 0.9961, "step": 2522 }, { "epoch": 0.8, "learning_rate": 1.0463738192415707e-06, "loss": 0.9908, "step": 2523 }, { "epoch": 0.8, "learning_rate": 1.043244672956491e-06, "loss": 1.2362, "step": 2524 }, { "epoch": 0.8, "learning_rate": 1.0401196674238895e-06, "loss": 1.2951, "step": 2525 }, { "epoch": 0.8, "learning_rate": 1.0369988059140885e-06, "loss": 0.7956, "step": 2526 }, { "epoch": 0.8, "learning_rate": 1.0338820916930787e-06, "loss": 0.7748, "step": 2527 }, { "epoch": 0.8, "learning_rate": 1.0307695280225056e-06, "loss": 1.1807, "step": 2528 }, { "epoch": 0.8, "learning_rate": 1.0276611181596751e-06, "loss": 1.5742, "step": 2529 }, { "epoch": 0.8, "learning_rate": 1.0245568653575422e-06, "loss": 1.1169, "step": 2530 }, { "epoch": 0.8, "learning_rate": 1.0214567728647169e-06, "loss": 0.8599, "step": 2531 }, { "epoch": 0.8, "learning_rate": 1.0183608439254506e-06, "loss": 1.1792, "step": 2532 }, { "epoch": 0.8, "learning_rate": 1.0152690817796384e-06, "loss": 1.3896, "step": 2533 }, { "epoch": 0.8, "learning_rate": 1.0121814896628157e-06, "loss": 1.4991, "step": 2534 }, { "epoch": 0.8, "learning_rate": 1.009098070806156e-06, "loss": 1.4454, "step": 2535 }, { "epoch": 0.8, "learning_rate": 1.0060188284364603e-06, "loss": 1.3454, "step": 2536 }, { "epoch": 0.8, "learning_rate": 1.0029437657761653e-06, "loss": 1.3012, "step": 2537 }, { "epoch": 0.8, "learning_rate": 9.998728860433277e-07, "loss": 1.4535, "step": 2538 }, { "epoch": 0.8, "learning_rate": 9.968061924516316e-07, "loss": 1.1844, "step": 2539 }, { "epoch": 0.8, "learning_rate": 9.937436882103752e-07, "loss": 1.1257, "step": 2540 }, { "epoch": 0.8, "learning_rate": 9.906853765244783e-07, "loss": 1.1475, "step": 2541 }, { "epoch": 0.8, "learning_rate": 9.876312605944676e-07, "loss": 0.6925, "step": 2542 }, { "epoch": 0.8, "learning_rate": 9.845813436164863e-07, "loss": 1.106, "step": 2543 }, { "epoch": 0.8, "learning_rate": 9.815356287822725e-07, "loss": 1.0523, "step": 2544 }, { "epoch": 0.8, "learning_rate": 9.784941192791774e-07, "loss": 1.6344, "step": 2545 }, { "epoch": 0.8, "learning_rate": 9.754568182901436e-07, "loss": 1.2805, "step": 2546 }, { "epoch": 0.8, "learning_rate": 9.724237289937167e-07, "loss": 0.8743, "step": 2547 }, { "epoch": 0.8, "learning_rate": 9.693948545640263e-07, "loss": 1.0456, "step": 2548 }, { "epoch": 0.8, "learning_rate": 9.663701981708e-07, "loss": 1.4261, "step": 2549 }, { "epoch": 0.81, "learning_rate": 9.633497629793431e-07, "loss": 1.1279, "step": 2550 }, { "epoch": 0.81, "learning_rate": 9.603335521505509e-07, "loss": 1.1531, "step": 2551 }, { "epoch": 0.81, "learning_rate": 9.573215688408899e-07, "loss": 1.0669, "step": 2552 }, { "epoch": 0.81, "learning_rate": 9.543138162024106e-07, "loss": 1.0635, "step": 2553 }, { "epoch": 0.81, "learning_rate": 9.51310297382731e-07, "loss": 1.5675, "step": 2554 }, { "epoch": 0.81, "learning_rate": 9.483110155250386e-07, "loss": 0.8431, "step": 2555 }, { "epoch": 0.81, "learning_rate": 9.453159737680878e-07, "loss": 1.2164, "step": 2556 }, { "epoch": 0.81, "learning_rate": 9.423251752461976e-07, "loss": 1.0644, "step": 2557 }, { "epoch": 0.81, "learning_rate": 9.393386230892432e-07, "loss": 1.2696, "step": 2558 }, { "epoch": 0.81, "learning_rate": 9.363563204226583e-07, "loss": 1.2945, "step": 2559 }, { "epoch": 0.81, "learning_rate": 9.333782703674283e-07, "loss": 1.2768, "step": 2560 }, { "epoch": 0.81, "learning_rate": 9.304044760400893e-07, "loss": 1.0558, "step": 2561 }, { "epoch": 0.81, "learning_rate": 9.274349405527216e-07, "loss": 1.6388, "step": 2562 }, { "epoch": 0.81, "learning_rate": 9.244696670129522e-07, "loss": 1.0952, "step": 2563 }, { "epoch": 0.81, "learning_rate": 9.215086585239452e-07, "loss": 1.2049, "step": 2564 }, { "epoch": 0.81, "learning_rate": 9.18551918184401e-07, "loss": 1.2987, "step": 2565 }, { "epoch": 0.81, "learning_rate": 9.155994490885539e-07, "loss": 0.9099, "step": 2566 }, { "epoch": 0.81, "learning_rate": 9.126512543261712e-07, "loss": 1.3746, "step": 2567 }, { "epoch": 0.81, "learning_rate": 9.097073369825421e-07, "loss": 1.6353, "step": 2568 }, { "epoch": 0.81, "learning_rate": 9.067677001384845e-07, "loss": 1.2233, "step": 2569 }, { "epoch": 0.81, "learning_rate": 9.038323468703319e-07, "loss": 1.2297, "step": 2570 }, { "epoch": 0.81, "learning_rate": 9.009012802499401e-07, "loss": 1.4292, "step": 2571 }, { "epoch": 0.81, "learning_rate": 8.979745033446746e-07, "loss": 1.1514, "step": 2572 }, { "epoch": 0.81, "learning_rate": 8.950520192174128e-07, "loss": 1.6367, "step": 2573 }, { "epoch": 0.81, "learning_rate": 8.92133830926542e-07, "loss": 1.1663, "step": 2574 }, { "epoch": 0.81, "learning_rate": 8.892199415259501e-07, "loss": 1.2574, "step": 2575 }, { "epoch": 0.81, "learning_rate": 8.863103540650303e-07, "loss": 1.3806, "step": 2576 }, { "epoch": 0.81, "learning_rate": 8.83405071588671e-07, "loss": 1.3492, "step": 2577 }, { "epoch": 0.81, "learning_rate": 8.805040971372552e-07, "loss": 1.4684, "step": 2578 }, { "epoch": 0.81, "learning_rate": 8.776074337466605e-07, "loss": 1.3724, "step": 2579 }, { "epoch": 0.81, "learning_rate": 8.747150844482499e-07, "loss": 1.13, "step": 2580 }, { "epoch": 0.81, "learning_rate": 8.718270522688721e-07, "loss": 1.2635, "step": 2581 }, { "epoch": 0.82, "learning_rate": 8.6894334023086e-07, "loss": 1.2137, "step": 2582 }, { "epoch": 0.82, "learning_rate": 8.660639513520225e-07, "loss": 1.0762, "step": 2583 }, { "epoch": 0.82, "learning_rate": 8.631888886456485e-07, "loss": 1.6265, "step": 2584 }, { "epoch": 0.82, "learning_rate": 8.603181551204937e-07, "loss": 1.3128, "step": 2585 }, { "epoch": 0.82, "learning_rate": 8.574517537807897e-07, "loss": 1.0217, "step": 2586 }, { "epoch": 0.82, "learning_rate": 8.545896876262288e-07, "loss": 1.0533, "step": 2587 }, { "epoch": 0.82, "learning_rate": 8.517319596519707e-07, "loss": 1.1178, "step": 2588 }, { "epoch": 0.82, "learning_rate": 8.488785728486338e-07, "loss": 1.2354, "step": 2589 }, { "epoch": 0.82, "learning_rate": 8.460295302022914e-07, "loss": 0.9817, "step": 2590 }, { "epoch": 0.82, "learning_rate": 8.431848346944726e-07, "loss": 1.3711, "step": 2591 }, { "epoch": 0.82, "learning_rate": 8.403444893021589e-07, "loss": 1.4345, "step": 2592 }, { "epoch": 0.82, "learning_rate": 8.375084969977748e-07, "loss": 1.4568, "step": 2593 }, { "epoch": 0.82, "learning_rate": 8.346768607491951e-07, "loss": 0.9195, "step": 2594 }, { "epoch": 0.82, "learning_rate": 8.318495835197299e-07, "loss": 1.4317, "step": 2595 }, { "epoch": 0.82, "learning_rate": 8.29026668268133e-07, "loss": 1.241, "step": 2596 }, { "epoch": 0.82, "learning_rate": 8.262081179485893e-07, "loss": 1.5492, "step": 2597 }, { "epoch": 0.82, "learning_rate": 8.233939355107185e-07, "loss": 1.0236, "step": 2598 }, { "epoch": 0.82, "learning_rate": 8.205841238995682e-07, "loss": 0.9934, "step": 2599 }, { "epoch": 0.82, "learning_rate": 8.177786860556114e-07, "loss": 1.2024, "step": 2600 }, { "epoch": 0.82, "learning_rate": 8.149776249147435e-07, "loss": 1.3978, "step": 2601 }, { "epoch": 0.82, "learning_rate": 8.121809434082834e-07, "loss": 1.2708, "step": 2602 }, { "epoch": 0.82, "learning_rate": 8.093886444629606e-07, "loss": 1.6003, "step": 2603 }, { "epoch": 0.82, "learning_rate": 8.066007310009244e-07, "loss": 1.5201, "step": 2604 }, { "epoch": 0.82, "learning_rate": 8.038172059397298e-07, "loss": 1.1589, "step": 2605 }, { "epoch": 0.82, "learning_rate": 8.010380721923439e-07, "loss": 1.0064, "step": 2606 }, { "epoch": 0.82, "learning_rate": 7.982633326671329e-07, "loss": 1.1158, "step": 2607 }, { "epoch": 0.82, "learning_rate": 7.954929902678704e-07, "loss": 1.5178, "step": 2608 }, { "epoch": 0.82, "learning_rate": 7.927270478937227e-07, "loss": 1.4379, "step": 2609 }, { "epoch": 0.82, "learning_rate": 7.899655084392571e-07, "loss": 1.343, "step": 2610 }, { "epoch": 0.82, "learning_rate": 7.872083747944259e-07, "loss": 1.0024, "step": 2611 }, { "epoch": 0.82, "learning_rate": 7.844556498445788e-07, "loss": 1.2372, "step": 2612 }, { "epoch": 0.83, "learning_rate": 7.817073364704458e-07, "loss": 1.2072, "step": 2613 }, { "epoch": 0.83, "learning_rate": 7.789634375481441e-07, "loss": 1.4992, "step": 2614 }, { "epoch": 0.83, "learning_rate": 7.76223955949168e-07, "loss": 1.2142, "step": 2615 }, { "epoch": 0.83, "learning_rate": 7.734888945403918e-07, "loss": 0.7856, "step": 2616 }, { "epoch": 0.83, "learning_rate": 7.707582561840615e-07, "loss": 1.2255, "step": 2617 }, { "epoch": 0.83, "learning_rate": 7.680320437377981e-07, "loss": 1.0762, "step": 2618 }, { "epoch": 0.83, "learning_rate": 7.653102600545848e-07, "loss": 0.887, "step": 2619 }, { "epoch": 0.83, "learning_rate": 7.625929079827776e-07, "loss": 0.9012, "step": 2620 }, { "epoch": 0.83, "learning_rate": 7.598799903660887e-07, "loss": 1.3086, "step": 2621 }, { "epoch": 0.83, "learning_rate": 7.571715100435923e-07, "loss": 0.7991, "step": 2622 }, { "epoch": 0.83, "learning_rate": 7.544674698497178e-07, "loss": 1.233, "step": 2623 }, { "epoch": 0.83, "learning_rate": 7.517678726142503e-07, "loss": 1.0909, "step": 2624 }, { "epoch": 0.83, "learning_rate": 7.490727211623222e-07, "loss": 1.6354, "step": 2625 }, { "epoch": 0.83, "learning_rate": 7.463820183144166e-07, "loss": 1.0064, "step": 2626 }, { "epoch": 0.83, "learning_rate": 7.436957668863576e-07, "loss": 1.2424, "step": 2627 }, { "epoch": 0.83, "learning_rate": 7.410139696893153e-07, "loss": 0.8533, "step": 2628 }, { "epoch": 0.83, "learning_rate": 7.383366295297928e-07, "loss": 1.0727, "step": 2629 }, { "epoch": 0.83, "learning_rate": 7.356637492096353e-07, "loss": 1.3979, "step": 2630 }, { "epoch": 0.83, "learning_rate": 7.329953315260141e-07, "loss": 1.0578, "step": 2631 }, { "epoch": 0.83, "learning_rate": 7.303313792714378e-07, "loss": 1.0337, "step": 2632 }, { "epoch": 0.83, "learning_rate": 7.276718952337336e-07, "loss": 0.9757, "step": 2633 }, { "epoch": 0.83, "learning_rate": 7.250168821960596e-07, "loss": 1.2025, "step": 2634 }, { "epoch": 0.83, "learning_rate": 7.223663429368905e-07, "loss": 1.5048, "step": 2635 }, { "epoch": 0.83, "learning_rate": 7.197202802300235e-07, "loss": 1.7901, "step": 2636 }, { "epoch": 0.83, "learning_rate": 7.170786968445659e-07, "loss": 1.3726, "step": 2637 }, { "epoch": 0.83, "learning_rate": 7.144415955449418e-07, "loss": 1.5566, "step": 2638 }, { "epoch": 0.83, "learning_rate": 7.118089790908822e-07, "loss": 0.8296, "step": 2639 }, { "epoch": 0.83, "learning_rate": 7.09180850237427e-07, "loss": 1.669, "step": 2640 }, { "epoch": 0.83, "learning_rate": 7.065572117349162e-07, "loss": 1.1205, "step": 2641 }, { "epoch": 0.83, "learning_rate": 7.039380663289947e-07, "loss": 1.4217, "step": 2642 }, { "epoch": 0.83, "learning_rate": 7.013234167606031e-07, "loss": 1.0335, "step": 2643 }, { "epoch": 0.83, "learning_rate": 6.987132657659768e-07, "loss": 1.3056, "step": 2644 }, { "epoch": 0.84, "learning_rate": 6.961076160766433e-07, "loss": 0.7552, "step": 2645 }, { "epoch": 0.84, "learning_rate": 6.935064704194227e-07, "loss": 0.8134, "step": 2646 }, { "epoch": 0.84, "learning_rate": 6.909098315164164e-07, "loss": 1.1936, "step": 2647 }, { "epoch": 0.84, "learning_rate": 6.883177020850157e-07, "loss": 1.3941, "step": 2648 }, { "epoch": 0.84, "learning_rate": 6.857300848378857e-07, "loss": 1.449, "step": 2649 }, { "epoch": 0.84, "learning_rate": 6.831469824829756e-07, "loss": 1.2409, "step": 2650 }, { "epoch": 0.84, "learning_rate": 6.805683977235061e-07, "loss": 1.3894, "step": 2651 }, { "epoch": 0.84, "learning_rate": 6.779943332579725e-07, "loss": 0.8018, "step": 2652 }, { "epoch": 0.84, "learning_rate": 6.754247917801376e-07, "loss": 1.4645, "step": 2653 }, { "epoch": 0.84, "learning_rate": 6.728597759790317e-07, "loss": 1.0636, "step": 2654 }, { "epoch": 0.84, "learning_rate": 6.702992885389487e-07, "loss": 0.9723, "step": 2655 }, { "epoch": 0.84, "learning_rate": 6.67743332139445e-07, "loss": 1.1601, "step": 2656 }, { "epoch": 0.84, "learning_rate": 6.651919094553327e-07, "loss": 1.4274, "step": 2657 }, { "epoch": 0.84, "learning_rate": 6.626450231566827e-07, "loss": 1.2592, "step": 2658 }, { "epoch": 0.84, "learning_rate": 6.601026759088147e-07, "loss": 1.3118, "step": 2659 }, { "epoch": 0.84, "learning_rate": 6.575648703723025e-07, "loss": 1.0256, "step": 2660 }, { "epoch": 0.84, "learning_rate": 6.550316092029619e-07, "loss": 1.1734, "step": 2661 }, { "epoch": 0.84, "learning_rate": 6.525028950518592e-07, "loss": 0.9969, "step": 2662 }, { "epoch": 0.84, "learning_rate": 6.499787305652966e-07, "loss": 0.9503, "step": 2663 }, { "epoch": 0.84, "learning_rate": 6.474591183848167e-07, "loss": 1.1486, "step": 2664 }, { "epoch": 0.84, "learning_rate": 6.449440611472013e-07, "loss": 1.4199, "step": 2665 }, { "epoch": 0.84, "learning_rate": 6.424335614844612e-07, "loss": 1.2161, "step": 2666 }, { "epoch": 0.84, "learning_rate": 6.399276220238387e-07, "loss": 1.0451, "step": 2667 }, { "epoch": 0.84, "learning_rate": 6.374262453878072e-07, "loss": 1.2655, "step": 2668 }, { "epoch": 0.84, "learning_rate": 6.349294341940593e-07, "loss": 1.0696, "step": 2669 }, { "epoch": 0.84, "learning_rate": 6.324371910555155e-07, "loss": 0.9255, "step": 2670 }, { "epoch": 0.84, "learning_rate": 6.299495185803123e-07, "loss": 1.0883, "step": 2671 }, { "epoch": 0.84, "learning_rate": 6.274664193718028e-07, "loss": 1.1656, "step": 2672 }, { "epoch": 0.84, "learning_rate": 6.249878960285572e-07, "loss": 1.4998, "step": 2673 }, { "epoch": 0.84, "learning_rate": 6.22513951144354e-07, "loss": 1.0526, "step": 2674 }, { "epoch": 0.84, "learning_rate": 6.200445873081828e-07, "loss": 0.814, "step": 2675 }, { "epoch": 0.84, "learning_rate": 6.175798071042377e-07, "loss": 1.6332, "step": 2676 }, { "epoch": 0.85, "learning_rate": 6.151196131119142e-07, "loss": 1.3827, "step": 2677 }, { "epoch": 0.85, "learning_rate": 6.126640079058128e-07, "loss": 1.3257, "step": 2678 }, { "epoch": 0.85, "learning_rate": 6.102129940557266e-07, "loss": 1.07, "step": 2679 }, { "epoch": 0.85, "learning_rate": 6.077665741266498e-07, "loss": 1.3251, "step": 2680 }, { "epoch": 0.85, "learning_rate": 6.053247506787641e-07, "loss": 1.1748, "step": 2681 }, { "epoch": 0.85, "learning_rate": 6.028875262674422e-07, "loss": 1.1279, "step": 2682 }, { "epoch": 0.85, "learning_rate": 6.004549034432456e-07, "loss": 1.022, "step": 2683 }, { "epoch": 0.85, "learning_rate": 5.980268847519171e-07, "loss": 1.4961, "step": 2684 }, { "epoch": 0.85, "learning_rate": 5.956034727343862e-07, "loss": 0.8757, "step": 2685 }, { "epoch": 0.85, "learning_rate": 5.931846699267558e-07, "loss": 1.3287, "step": 2686 }, { "epoch": 0.85, "learning_rate": 5.907704788603091e-07, "loss": 1.3226, "step": 2687 }, { "epoch": 0.85, "learning_rate": 5.883609020615027e-07, "loss": 1.1705, "step": 2688 }, { "epoch": 0.85, "learning_rate": 5.859559420519622e-07, "loss": 1.4463, "step": 2689 }, { "epoch": 0.85, "learning_rate": 5.835556013484828e-07, "loss": 1.1338, "step": 2690 }, { "epoch": 0.85, "learning_rate": 5.811598824630282e-07, "loss": 1.1235, "step": 2691 }, { "epoch": 0.85, "learning_rate": 5.787687879027199e-07, "loss": 1.1521, "step": 2692 }, { "epoch": 0.85, "learning_rate": 5.763823201698465e-07, "loss": 1.0806, "step": 2693 }, { "epoch": 0.85, "learning_rate": 5.740004817618483e-07, "loss": 0.9825, "step": 2694 }, { "epoch": 0.85, "learning_rate": 5.716232751713269e-07, "loss": 0.9527, "step": 2695 }, { "epoch": 0.85, "learning_rate": 5.692507028860311e-07, "loss": 0.832, "step": 2696 }, { "epoch": 0.85, "learning_rate": 5.66882767388865e-07, "loss": 0.9988, "step": 2697 }, { "epoch": 0.85, "learning_rate": 5.645194711578766e-07, "loss": 1.3175, "step": 2698 }, { "epoch": 0.85, "learning_rate": 5.621608166662607e-07, "loss": 1.1813, "step": 2699 }, { "epoch": 0.85, "learning_rate": 5.59806806382353e-07, "loss": 1.0226, "step": 2700 }, { "epoch": 0.85, "learning_rate": 5.574574427696311e-07, "loss": 1.0255, "step": 2701 }, { "epoch": 0.85, "learning_rate": 5.55112728286708e-07, "loss": 0.962, "step": 2702 }, { "epoch": 0.85, "learning_rate": 5.527726653873333e-07, "loss": 1.0139, "step": 2703 }, { "epoch": 0.85, "learning_rate": 5.504372565203863e-07, "loss": 1.2044, "step": 2704 }, { "epoch": 0.85, "learning_rate": 5.481065041298783e-07, "loss": 1.2445, "step": 2705 }, { "epoch": 0.85, "learning_rate": 5.457804106549453e-07, "loss": 1.8073, "step": 2706 }, { "epoch": 0.85, "learning_rate": 5.434589785298499e-07, "loss": 0.9274, "step": 2707 }, { "epoch": 0.86, "learning_rate": 5.411422101839742e-07, "loss": 0.9669, "step": 2708 }, { "epoch": 0.86, "learning_rate": 5.388301080418234e-07, "loss": 0.9656, "step": 2709 }, { "epoch": 0.86, "learning_rate": 5.36522674523014e-07, "loss": 1.3918, "step": 2710 }, { "epoch": 0.86, "learning_rate": 5.34219912042282e-07, "loss": 1.6329, "step": 2711 }, { "epoch": 0.86, "learning_rate": 5.319218230094708e-07, "loss": 1.2656, "step": 2712 }, { "epoch": 0.86, "learning_rate": 5.296284098295384e-07, "loss": 1.3379, "step": 2713 }, { "epoch": 0.86, "learning_rate": 5.273396749025439e-07, "loss": 1.5828, "step": 2714 }, { "epoch": 0.86, "learning_rate": 5.25055620623654e-07, "loss": 1.1476, "step": 2715 }, { "epoch": 0.86, "learning_rate": 5.227762493831357e-07, "loss": 1.7023, "step": 2716 }, { "epoch": 0.86, "learning_rate": 5.205015635663568e-07, "loss": 1.3366, "step": 2717 }, { "epoch": 0.86, "learning_rate": 5.182315655537784e-07, "loss": 1.4207, "step": 2718 }, { "epoch": 0.86, "learning_rate": 5.159662577209607e-07, "loss": 1.0596, "step": 2719 }, { "epoch": 0.86, "learning_rate": 5.137056424385512e-07, "loss": 1.5967, "step": 2720 }, { "epoch": 0.86, "learning_rate": 5.114497220722886e-07, "loss": 0.7993, "step": 2721 }, { "epoch": 0.86, "learning_rate": 5.091984989829979e-07, "loss": 1.1116, "step": 2722 }, { "epoch": 0.86, "learning_rate": 5.0695197552659e-07, "loss": 1.3998, "step": 2723 }, { "epoch": 0.86, "learning_rate": 5.047101540540538e-07, "loss": 1.1392, "step": 2724 }, { "epoch": 0.86, "learning_rate": 5.024730369114628e-07, "loss": 1.4137, "step": 2725 }, { "epoch": 0.86, "learning_rate": 5.002406264399623e-07, "loss": 1.0393, "step": 2726 }, { "epoch": 0.86, "learning_rate": 4.980129249757765e-07, "loss": 1.3221, "step": 2727 }, { "epoch": 0.86, "learning_rate": 4.957899348501977e-07, "loss": 1.0195, "step": 2728 }, { "epoch": 0.86, "learning_rate": 4.935716583895911e-07, "loss": 1.4966, "step": 2729 }, { "epoch": 0.86, "learning_rate": 4.913580979153853e-07, "loss": 1.2591, "step": 2730 }, { "epoch": 0.86, "learning_rate": 4.891492557440796e-07, "loss": 1.2113, "step": 2731 }, { "epoch": 0.86, "learning_rate": 4.869451341872278e-07, "loss": 1.3923, "step": 2732 }, { "epoch": 0.86, "learning_rate": 4.847457355514496e-07, "loss": 1.1392, "step": 2733 }, { "epoch": 0.86, "learning_rate": 4.825510621384193e-07, "loss": 1.2803, "step": 2734 }, { "epoch": 0.86, "learning_rate": 4.803611162448679e-07, "loss": 1.2117, "step": 2735 }, { "epoch": 0.86, "learning_rate": 4.781759001625775e-07, "loss": 0.9173, "step": 2736 }, { "epoch": 0.86, "learning_rate": 4.759954161783825e-07, "loss": 1.3542, "step": 2737 }, { "epoch": 0.86, "learning_rate": 4.738196665741618e-07, "loss": 1.5853, "step": 2738 }, { "epoch": 0.86, "learning_rate": 4.716486536268439e-07, "loss": 0.9471, "step": 2739 }, { "epoch": 0.87, "learning_rate": 4.694823796083958e-07, "loss": 0.9707, "step": 2740 }, { "epoch": 0.87, "learning_rate": 4.6732084678583035e-07, "loss": 1.1214, "step": 2741 }, { "epoch": 0.87, "learning_rate": 4.651640574211946e-07, "loss": 1.2242, "step": 2742 }, { "epoch": 0.87, "learning_rate": 4.6301201377157323e-07, "loss": 1.297, "step": 2743 }, { "epoch": 0.87, "learning_rate": 4.6086471808908296e-07, "loss": 1.1337, "step": 2744 }, { "epoch": 0.87, "learning_rate": 4.587221726208757e-07, "loss": 1.4213, "step": 2745 }, { "epoch": 0.87, "learning_rate": 4.5658437960912684e-07, "loss": 1.2962, "step": 2746 }, { "epoch": 0.87, "learning_rate": 4.544513412910434e-07, "loss": 1.2082, "step": 2747 }, { "epoch": 0.87, "learning_rate": 4.523230598988526e-07, "loss": 1.1665, "step": 2748 }, { "epoch": 0.87, "learning_rate": 4.501995376598073e-07, "loss": 1.0407, "step": 2749 }, { "epoch": 0.87, "learning_rate": 4.480807767961753e-07, "loss": 1.1038, "step": 2750 }, { "epoch": 0.87, "learning_rate": 4.459667795252465e-07, "loss": 1.2021, "step": 2751 }, { "epoch": 0.87, "learning_rate": 4.43857548059321e-07, "loss": 1.5058, "step": 2752 }, { "epoch": 0.87, "learning_rate": 4.417530846057172e-07, "loss": 0.827, "step": 2753 }, { "epoch": 0.87, "learning_rate": 4.396533913667561e-07, "loss": 1.6036, "step": 2754 }, { "epoch": 0.87, "learning_rate": 4.37558470539774e-07, "loss": 1.2622, "step": 2755 }, { "epoch": 0.87, "learning_rate": 4.3546832431710815e-07, "loss": 1.148, "step": 2756 }, { "epoch": 0.87, "learning_rate": 4.333829548861024e-07, "loss": 1.1814, "step": 2757 }, { "epoch": 0.87, "learning_rate": 4.313023644290981e-07, "loss": 1.3412, "step": 2758 }, { "epoch": 0.87, "learning_rate": 4.292265551234398e-07, "loss": 1.3309, "step": 2759 }, { "epoch": 0.87, "learning_rate": 4.271555291414636e-07, "loss": 1.2148, "step": 2760 }, { "epoch": 0.87, "learning_rate": 4.2508928865050427e-07, "loss": 1.1958, "step": 2761 }, { "epoch": 0.87, "learning_rate": 4.2302783581288643e-07, "loss": 1.6366, "step": 2762 }, { "epoch": 0.87, "learning_rate": 4.209711727859234e-07, "loss": 1.418, "step": 2763 }, { "epoch": 0.87, "learning_rate": 4.18919301721919e-07, "loss": 1.2679, "step": 2764 }, { "epoch": 0.87, "learning_rate": 4.168722247681589e-07, "loss": 1.2219, "step": 2765 }, { "epoch": 0.87, "learning_rate": 4.1482994406691346e-07, "loss": 1.4274, "step": 2766 }, { "epoch": 0.87, "learning_rate": 4.127924617554352e-07, "loss": 1.1987, "step": 2767 }, { "epoch": 0.87, "learning_rate": 4.107597799659513e-07, "loss": 0.9905, "step": 2768 }, { "epoch": 0.87, "learning_rate": 4.087319008256691e-07, "loss": 1.0282, "step": 2769 }, { "epoch": 0.87, "learning_rate": 4.067088264567676e-07, "loss": 1.0862, "step": 2770 }, { "epoch": 0.87, "learning_rate": 4.046905589763972e-07, "loss": 1.0844, "step": 2771 }, { "epoch": 0.88, "learning_rate": 4.026771004966812e-07, "loss": 0.88, "step": 2772 }, { "epoch": 0.88, "learning_rate": 4.0066845312470517e-07, "loss": 1.2051, "step": 2773 }, { "epoch": 0.88, "learning_rate": 3.986646189625254e-07, "loss": 1.132, "step": 2774 }, { "epoch": 0.88, "learning_rate": 3.966656001071573e-07, "loss": 1.0051, "step": 2775 }, { "epoch": 0.88, "learning_rate": 3.946713986505768e-07, "loss": 0.8415, "step": 2776 }, { "epoch": 0.88, "learning_rate": 3.926820166797218e-07, "loss": 1.1076, "step": 2777 }, { "epoch": 0.88, "learning_rate": 3.9069745627648316e-07, "loss": 1.3329, "step": 2778 }, { "epoch": 0.88, "learning_rate": 3.8871771951770885e-07, "loss": 0.99, "step": 2779 }, { "epoch": 0.88, "learning_rate": 3.8674280847519676e-07, "loss": 1.316, "step": 2780 }, { "epoch": 0.88, "learning_rate": 3.847727252156935e-07, "loss": 1.0053, "step": 2781 }, { "epoch": 0.88, "learning_rate": 3.828074718008978e-07, "loss": 1.3877, "step": 2782 }, { "epoch": 0.88, "learning_rate": 3.8084705028745007e-07, "loss": 1.2829, "step": 2783 }, { "epoch": 0.88, "learning_rate": 3.788914627269358e-07, "loss": 1.0596, "step": 2784 }, { "epoch": 0.88, "learning_rate": 3.7694071116588073e-07, "loss": 1.0699, "step": 2785 }, { "epoch": 0.88, "learning_rate": 3.749947976457513e-07, "loss": 0.8764, "step": 2786 }, { "epoch": 0.88, "learning_rate": 3.730537242029497e-07, "loss": 1.2743, "step": 2787 }, { "epoch": 0.88, "learning_rate": 3.7111749286881325e-07, "loss": 1.0532, "step": 2788 }, { "epoch": 0.88, "learning_rate": 3.6918610566961056e-07, "loss": 1.4215, "step": 2789 }, { "epoch": 0.88, "learning_rate": 3.672595646265437e-07, "loss": 0.9761, "step": 2790 }, { "epoch": 0.88, "learning_rate": 3.65337871755741e-07, "loss": 1.3617, "step": 2791 }, { "epoch": 0.88, "learning_rate": 3.634210290682594e-07, "loss": 1.3422, "step": 2792 }, { "epoch": 0.88, "learning_rate": 3.61509038570077e-07, "loss": 1.107, "step": 2793 }, { "epoch": 0.88, "learning_rate": 3.596019022620967e-07, "loss": 1.1631, "step": 2794 }, { "epoch": 0.88, "learning_rate": 3.5769962214013963e-07, "loss": 1.0722, "step": 2795 }, { "epoch": 0.88, "learning_rate": 3.5580220019494737e-07, "loss": 1.6214, "step": 2796 }, { "epoch": 0.88, "learning_rate": 3.539096384121743e-07, "loss": 1.6432, "step": 2797 }, { "epoch": 0.88, "learning_rate": 3.520219387723911e-07, "loss": 1.0459, "step": 2798 }, { "epoch": 0.88, "learning_rate": 3.5013910325107815e-07, "loss": 1.2581, "step": 2799 }, { "epoch": 0.88, "learning_rate": 3.482611338186281e-07, "loss": 1.078, "step": 2800 }, { "epoch": 0.88, "learning_rate": 3.4638803244033794e-07, "loss": 1.3518, "step": 2801 }, { "epoch": 0.88, "learning_rate": 3.445198010764145e-07, "loss": 1.2666, "step": 2802 }, { "epoch": 0.88, "learning_rate": 3.426564416819633e-07, "loss": 1.3697, "step": 2803 }, { "epoch": 0.89, "learning_rate": 3.407979562069957e-07, "loss": 1.1798, "step": 2804 }, { "epoch": 0.89, "learning_rate": 3.3894434659641875e-07, "loss": 0.9262, "step": 2805 }, { "epoch": 0.89, "learning_rate": 3.3709561479004006e-07, "loss": 1.0714, "step": 2806 }, { "epoch": 0.89, "learning_rate": 3.352517627225599e-07, "loss": 1.7714, "step": 2807 }, { "epoch": 0.89, "learning_rate": 3.3341279232357583e-07, "loss": 1.0975, "step": 2808 }, { "epoch": 0.89, "learning_rate": 3.3157870551757066e-07, "loss": 1.0989, "step": 2809 }, { "epoch": 0.89, "learning_rate": 3.2974950422392237e-07, "loss": 0.9587, "step": 2810 }, { "epoch": 0.89, "learning_rate": 3.2792519035689195e-07, "loss": 1.1501, "step": 2811 }, { "epoch": 0.89, "learning_rate": 3.2610576582562947e-07, "loss": 1.6036, "step": 2812 }, { "epoch": 0.89, "learning_rate": 3.2429123253416515e-07, "loss": 1.3471, "step": 2813 }, { "epoch": 0.89, "learning_rate": 3.224815923814123e-07, "loss": 1.2162, "step": 2814 }, { "epoch": 0.89, "learning_rate": 3.206768472611627e-07, "loss": 0.9524, "step": 2815 }, { "epoch": 0.89, "learning_rate": 3.188769990620866e-07, "loss": 1.0695, "step": 2816 }, { "epoch": 0.89, "learning_rate": 3.170820496677268e-07, "loss": 0.9603, "step": 2817 }, { "epoch": 0.89, "learning_rate": 3.1529200095650414e-07, "loss": 1.7772, "step": 2818 }, { "epoch": 0.89, "learning_rate": 3.135068548017062e-07, "loss": 1.5623, "step": 2819 }, { "epoch": 0.89, "learning_rate": 3.117266130714941e-07, "loss": 1.4154, "step": 2820 }, { "epoch": 0.89, "learning_rate": 3.0995127762889154e-07, "loss": 1.3539, "step": 2821 }, { "epoch": 0.89, "learning_rate": 3.0818085033179293e-07, "loss": 1.391, "step": 2822 }, { "epoch": 0.89, "learning_rate": 3.0641533303295225e-07, "loss": 1.5469, "step": 2823 }, { "epoch": 0.89, "learning_rate": 3.0465472757998936e-07, "loss": 0.984, "step": 2824 }, { "epoch": 0.89, "learning_rate": 3.0289903581537884e-07, "loss": 1.1741, "step": 2825 }, { "epoch": 0.89, "learning_rate": 3.011482595764581e-07, "loss": 1.0969, "step": 2826 }, { "epoch": 0.89, "learning_rate": 2.994024006954155e-07, "loss": 1.4022, "step": 2827 }, { "epoch": 0.89, "learning_rate": 2.976614609992984e-07, "loss": 1.1365, "step": 2828 }, { "epoch": 0.89, "learning_rate": 2.9592544231000163e-07, "loss": 1.0451, "step": 2829 }, { "epoch": 0.89, "learning_rate": 2.9419434644427424e-07, "loss": 1.0716, "step": 2830 }, { "epoch": 0.89, "learning_rate": 2.924681752137098e-07, "loss": 1.2151, "step": 2831 }, { "epoch": 0.89, "learning_rate": 2.907469304247512e-07, "loss": 1.2518, "step": 2832 }, { "epoch": 0.89, "learning_rate": 2.890306138786836e-07, "loss": 1.239, "step": 2833 }, { "epoch": 0.89, "learning_rate": 2.873192273716369e-07, "loss": 0.8341, "step": 2834 }, { "epoch": 0.9, "learning_rate": 2.85612772694579e-07, "loss": 1.1089, "step": 2835 }, { "epoch": 0.9, "learning_rate": 2.8391125163332033e-07, "loss": 1.6971, "step": 2836 }, { "epoch": 0.9, "learning_rate": 2.822146659685038e-07, "loss": 1.1266, "step": 2837 }, { "epoch": 0.9, "learning_rate": 2.805230174756113e-07, "loss": 1.1206, "step": 2838 }, { "epoch": 0.9, "learning_rate": 2.7883630792495566e-07, "loss": 2.3043, "step": 2839 }, { "epoch": 0.9, "learning_rate": 2.771545390816827e-07, "loss": 1.191, "step": 2840 }, { "epoch": 0.9, "learning_rate": 2.7547771270576616e-07, "loss": 0.9288, "step": 2841 }, { "epoch": 0.9, "learning_rate": 2.7380583055200915e-07, "loss": 1.1885, "step": 2842 }, { "epoch": 0.9, "learning_rate": 2.7213889437003816e-07, "loss": 1.1251, "step": 2843 }, { "epoch": 0.9, "learning_rate": 2.7047690590430664e-07, "loss": 1.227, "step": 2844 }, { "epoch": 0.9, "learning_rate": 2.688198668940878e-07, "loss": 1.043, "step": 2845 }, { "epoch": 0.9, "learning_rate": 2.671677790734778e-07, "loss": 1.2331, "step": 2846 }, { "epoch": 0.9, "learning_rate": 2.6552064417138814e-07, "loss": 1.4159, "step": 2847 }, { "epoch": 0.9, "learning_rate": 2.63878463911551e-07, "loss": 1.1998, "step": 2848 }, { "epoch": 0.9, "learning_rate": 2.6224124001250903e-07, "loss": 1.3111, "step": 2849 }, { "epoch": 0.9, "learning_rate": 2.6060897418762165e-07, "loss": 1.1194, "step": 2850 }, { "epoch": 0.9, "learning_rate": 2.5898166814505796e-07, "loss": 1.2133, "step": 2851 }, { "epoch": 0.9, "learning_rate": 2.5735932358779804e-07, "loss": 1.0037, "step": 2852 }, { "epoch": 0.9, "learning_rate": 2.557419422136276e-07, "loss": 1.1619, "step": 2853 }, { "epoch": 0.9, "learning_rate": 2.5412952571513995e-07, "loss": 1.4279, "step": 2854 }, { "epoch": 0.9, "learning_rate": 2.52522075779732e-07, "loss": 1.2277, "step": 2855 }, { "epoch": 0.9, "learning_rate": 2.5091959408960363e-07, "loss": 1.2246, "step": 2856 }, { "epoch": 0.9, "learning_rate": 2.4932208232175456e-07, "loss": 1.1306, "step": 2857 }, { "epoch": 0.9, "learning_rate": 2.4772954214798527e-07, "loss": 1.5332, "step": 2858 }, { "epoch": 0.9, "learning_rate": 2.4614197523489094e-07, "loss": 0.9712, "step": 2859 }, { "epoch": 0.9, "learning_rate": 2.4455938324386476e-07, "loss": 1.0894, "step": 2860 }, { "epoch": 0.9, "learning_rate": 2.4298176783109197e-07, "loss": 1.1669, "step": 2861 }, { "epoch": 0.9, "learning_rate": 2.4140913064754966e-07, "loss": 1.3793, "step": 2862 }, { "epoch": 0.9, "learning_rate": 2.398414733390075e-07, "loss": 0.8263, "step": 2863 }, { "epoch": 0.9, "learning_rate": 2.3827879754602046e-07, "loss": 0.7067, "step": 2864 }, { "epoch": 0.9, "learning_rate": 2.3672110490393196e-07, "loss": 1.4442, "step": 2865 }, { "epoch": 0.9, "learning_rate": 2.3516839704287198e-07, "loss": 1.0651, "step": 2866 }, { "epoch": 0.91, "learning_rate": 2.3362067558775014e-07, "loss": 1.1949, "step": 2867 }, { "epoch": 0.91, "learning_rate": 2.3207794215826252e-07, "loss": 1.2781, "step": 2868 }, { "epoch": 0.91, "learning_rate": 2.3054019836888153e-07, "loss": 0.9943, "step": 2869 }, { "epoch": 0.91, "learning_rate": 2.290074458288588e-07, "loss": 1.2114, "step": 2870 }, { "epoch": 0.91, "learning_rate": 2.274796861422246e-07, "loss": 1.2809, "step": 2871 }, { "epoch": 0.91, "learning_rate": 2.2595692090778e-07, "loss": 1.676, "step": 2872 }, { "epoch": 0.91, "learning_rate": 2.2443915171910414e-07, "loss": 1.328, "step": 2873 }, { "epoch": 0.91, "learning_rate": 2.2292638016454427e-07, "loss": 1.2026, "step": 2874 }, { "epoch": 0.91, "learning_rate": 2.2141860782722014e-07, "loss": 0.7147, "step": 2875 }, { "epoch": 0.91, "learning_rate": 2.199158362850179e-07, "loss": 1.1235, "step": 2876 }, { "epoch": 0.91, "learning_rate": 2.184180671105901e-07, "loss": 1.3642, "step": 2877 }, { "epoch": 0.91, "learning_rate": 2.169253018713563e-07, "loss": 1.2025, "step": 2878 }, { "epoch": 0.91, "learning_rate": 2.1543754212949853e-07, "loss": 1.0308, "step": 2879 }, { "epoch": 0.91, "learning_rate": 2.1395478944195912e-07, "loss": 1.1187, "step": 2880 }, { "epoch": 0.91, "learning_rate": 2.12477045360443e-07, "loss": 1.4205, "step": 2881 }, { "epoch": 0.91, "learning_rate": 2.1100431143141198e-07, "loss": 1.2201, "step": 2882 }, { "epoch": 0.91, "learning_rate": 2.095365891960849e-07, "loss": 1.4288, "step": 2883 }, { "epoch": 0.91, "learning_rate": 2.0807388019043585e-07, "loss": 1.2422, "step": 2884 }, { "epoch": 0.91, "learning_rate": 2.0661618594519428e-07, "loss": 1.1008, "step": 2885 }, { "epoch": 0.91, "learning_rate": 2.0516350798583828e-07, "loss": 1.2629, "step": 2886 }, { "epoch": 0.91, "learning_rate": 2.0371584783259956e-07, "loss": 0.771, "step": 2887 }, { "epoch": 0.91, "learning_rate": 2.0227320700045571e-07, "loss": 1.1467, "step": 2888 }, { "epoch": 0.91, "learning_rate": 2.0083558699913576e-07, "loss": 0.893, "step": 2889 }, { "epoch": 0.91, "learning_rate": 1.9940298933310954e-07, "loss": 1.4488, "step": 2890 }, { "epoch": 0.91, "learning_rate": 1.9797541550159504e-07, "loss": 1.1371, "step": 2891 }, { "epoch": 0.91, "learning_rate": 1.965528669985506e-07, "loss": 0.9628, "step": 2892 }, { "epoch": 0.91, "learning_rate": 1.951353453126764e-07, "loss": 1.349, "step": 2893 }, { "epoch": 0.91, "learning_rate": 1.937228519274109e-07, "loss": 1.4286, "step": 2894 }, { "epoch": 0.91, "learning_rate": 1.9231538832093278e-07, "loss": 1.0246, "step": 2895 }, { "epoch": 0.91, "learning_rate": 1.909129559661538e-07, "loss": 1.441, "step": 2896 }, { "epoch": 0.91, "learning_rate": 1.8951555633072438e-07, "loss": 0.957, "step": 2897 }, { "epoch": 0.91, "learning_rate": 1.8812319087702368e-07, "loss": 1.0669, "step": 2898 }, { "epoch": 0.92, "learning_rate": 1.8673586106216723e-07, "loss": 1.2154, "step": 2899 }, { "epoch": 0.92, "learning_rate": 1.8535356833799646e-07, "loss": 1.1441, "step": 2900 }, { "epoch": 0.92, "learning_rate": 1.8397631415108474e-07, "loss": 1.2061, "step": 2901 }, { "epoch": 0.92, "learning_rate": 1.8260409994273086e-07, "loss": 1.0569, "step": 2902 }, { "epoch": 0.92, "learning_rate": 1.8123692714896056e-07, "loss": 0.8535, "step": 2903 }, { "epoch": 0.92, "learning_rate": 1.7987479720052215e-07, "loss": 1.171, "step": 2904 }, { "epoch": 0.92, "learning_rate": 1.7851771152288754e-07, "loss": 1.2417, "step": 2905 }, { "epoch": 0.92, "learning_rate": 1.7716567153624963e-07, "loss": 1.0653, "step": 2906 }, { "epoch": 0.92, "learning_rate": 1.758186786555216e-07, "loss": 1.1656, "step": 2907 }, { "epoch": 0.92, "learning_rate": 1.7447673429033361e-07, "loss": 0.965, "step": 2908 }, { "epoch": 0.92, "learning_rate": 1.7313983984503347e-07, "loss": 1.2567, "step": 2909 }, { "epoch": 0.92, "learning_rate": 1.718079967186831e-07, "loss": 1.0252, "step": 2910 }, { "epoch": 0.92, "learning_rate": 1.7048120630506038e-07, "loss": 1.1312, "step": 2911 }, { "epoch": 0.92, "learning_rate": 1.69159469992653e-07, "loss": 0.9461, "step": 2912 }, { "epoch": 0.92, "learning_rate": 1.6784278916466112e-07, "loss": 1.1712, "step": 2913 }, { "epoch": 0.92, "learning_rate": 1.6653116519899426e-07, "loss": 1.403, "step": 2914 }, { "epoch": 0.92, "learning_rate": 1.652245994682694e-07, "loss": 0.8427, "step": 2915 }, { "epoch": 0.92, "learning_rate": 1.639230933398095e-07, "loss": 1.1921, "step": 2916 }, { "epoch": 0.92, "learning_rate": 1.6262664817564444e-07, "loss": 1.5892, "step": 2917 }, { "epoch": 0.92, "learning_rate": 1.6133526533250566e-07, "loss": 1.4172, "step": 2918 }, { "epoch": 0.92, "learning_rate": 1.6004894616182932e-07, "loss": 0.934, "step": 2919 }, { "epoch": 0.92, "learning_rate": 1.5876769200974972e-07, "loss": 1.451, "step": 2920 }, { "epoch": 0.92, "learning_rate": 1.574915042171027e-07, "loss": 1.3397, "step": 2921 }, { "epoch": 0.92, "learning_rate": 1.5622038411942042e-07, "loss": 1.2028, "step": 2922 }, { "epoch": 0.92, "learning_rate": 1.5495433304693385e-07, "loss": 1.3911, "step": 2923 }, { "epoch": 0.92, "learning_rate": 1.5369335232456707e-07, "loss": 1.0937, "step": 2924 }, { "epoch": 0.92, "learning_rate": 1.5243744327193998e-07, "loss": 1.0885, "step": 2925 }, { "epoch": 0.92, "learning_rate": 1.5118660720336297e-07, "loss": 1.2987, "step": 2926 }, { "epoch": 0.92, "learning_rate": 1.4994084542783894e-07, "loss": 1.3652, "step": 2927 }, { "epoch": 0.92, "learning_rate": 1.4870015924905945e-07, "loss": 1.3591, "step": 2928 }, { "epoch": 0.92, "learning_rate": 1.4746454996540705e-07, "loss": 1.1311, "step": 2929 }, { "epoch": 0.93, "learning_rate": 1.4623401886994738e-07, "loss": 1.1719, "step": 2930 }, { "epoch": 0.93, "learning_rate": 1.4500856725043423e-07, "loss": 1.0476, "step": 2931 }, { "epoch": 0.93, "learning_rate": 1.437881963893051e-07, "loss": 1.3598, "step": 2932 }, { "epoch": 0.93, "learning_rate": 1.425729075636817e-07, "loss": 1.1348, "step": 2933 }, { "epoch": 0.93, "learning_rate": 1.4136270204536451e-07, "loss": 1.0516, "step": 2934 }, { "epoch": 0.93, "learning_rate": 1.401575811008371e-07, "loss": 1.1478, "step": 2935 }, { "epoch": 0.93, "learning_rate": 1.3895754599126065e-07, "loss": 1.2535, "step": 2936 }, { "epoch": 0.93, "learning_rate": 1.3776259797247504e-07, "loss": 1.4604, "step": 2937 }, { "epoch": 0.93, "learning_rate": 1.3657273829499496e-07, "loss": 1.1531, "step": 2938 }, { "epoch": 0.93, "learning_rate": 1.3538796820401158e-07, "loss": 1.1325, "step": 2939 }, { "epoch": 0.93, "learning_rate": 1.342082889393881e-07, "loss": 1.0293, "step": 2940 }, { "epoch": 0.93, "learning_rate": 1.330337017356631e-07, "loss": 1.3196, "step": 2941 }, { "epoch": 0.93, "learning_rate": 1.3186420782204278e-07, "loss": 1.0763, "step": 2942 }, { "epoch": 0.93, "learning_rate": 1.3069980842240592e-07, "loss": 1.203, "step": 2943 }, { "epoch": 0.93, "learning_rate": 1.2954050475529778e-07, "loss": 1.1639, "step": 2944 }, { "epoch": 0.93, "learning_rate": 1.2838629803393343e-07, "loss": 0.7174, "step": 2945 }, { "epoch": 0.93, "learning_rate": 1.2723718946619112e-07, "loss": 1.3374, "step": 2946 }, { "epoch": 0.93, "learning_rate": 1.260931802546167e-07, "loss": 1.1897, "step": 2947 }, { "epoch": 0.93, "learning_rate": 1.2495427159641694e-07, "loss": 1.0549, "step": 2948 }, { "epoch": 0.93, "learning_rate": 1.2382046468346286e-07, "loss": 1.085, "step": 2949 }, { "epoch": 0.93, "learning_rate": 1.2269176070228484e-07, "loss": 1.1494, "step": 2950 }, { "epoch": 0.93, "learning_rate": 1.2156816083407463e-07, "loss": 1.2808, "step": 2951 }, { "epoch": 0.93, "learning_rate": 1.2044966625468224e-07, "loss": 0.9199, "step": 2952 }, { "epoch": 0.93, "learning_rate": 1.1933627813461358e-07, "loss": 1.0302, "step": 2953 }, { "epoch": 0.93, "learning_rate": 1.1822799763903103e-07, "loss": 1.0, "step": 2954 }, { "epoch": 0.93, "learning_rate": 1.1712482592775409e-07, "loss": 1.8445, "step": 2955 }, { "epoch": 0.93, "learning_rate": 1.160267641552526e-07, "loss": 1.0613, "step": 2956 }, { "epoch": 0.93, "learning_rate": 1.149338134706518e-07, "loss": 1.1714, "step": 2957 }, { "epoch": 0.93, "learning_rate": 1.1384597501772509e-07, "loss": 0.9754, "step": 2958 }, { "epoch": 0.93, "learning_rate": 1.1276324993489962e-07, "loss": 0.8852, "step": 2959 }, { "epoch": 0.93, "learning_rate": 1.1168563935524845e-07, "loss": 1.0869, "step": 2960 }, { "epoch": 0.93, "learning_rate": 1.106131444064923e-07, "loss": 1.4772, "step": 2961 }, { "epoch": 0.94, "learning_rate": 1.095457662110011e-07, "loss": 0.9569, "step": 2962 }, { "epoch": 0.94, "learning_rate": 1.0848350588578748e-07, "loss": 1.4763, "step": 2963 }, { "epoch": 0.94, "learning_rate": 1.0742636454250832e-07, "loss": 1.0702, "step": 2964 }, { "epoch": 0.94, "learning_rate": 1.0637434328746532e-07, "loss": 1.0155, "step": 2965 }, { "epoch": 0.94, "learning_rate": 1.053274432215995e-07, "loss": 1.1266, "step": 2966 }, { "epoch": 0.94, "learning_rate": 1.0428566544049557e-07, "loss": 1.3418, "step": 2967 }, { "epoch": 0.94, "learning_rate": 1.032490110343748e-07, "loss": 1.4808, "step": 2968 }, { "epoch": 0.94, "learning_rate": 1.0221748108809882e-07, "loss": 1.2672, "step": 2969 }, { "epoch": 0.94, "learning_rate": 1.0119107668116578e-07, "loss": 1.3415, "step": 2970 }, { "epoch": 0.94, "learning_rate": 1.0016979888770928e-07, "loss": 1.1016, "step": 2971 }, { "epoch": 0.94, "learning_rate": 9.915364877649991e-08, "loss": 1.0266, "step": 2972 }, { "epoch": 0.94, "learning_rate": 9.814262741093927e-08, "loss": 1.2328, "step": 2973 }, { "epoch": 0.94, "learning_rate": 9.713673584906435e-08, "loss": 1.1566, "step": 2974 }, { "epoch": 0.94, "learning_rate": 9.613597514354255e-08, "loss": 0.8504, "step": 2975 }, { "epoch": 0.94, "learning_rate": 9.514034634167169e-08, "loss": 1.09, "step": 2976 }, { "epoch": 0.94, "learning_rate": 9.41498504853794e-08, "loss": 1.1036, "step": 2977 }, { "epoch": 0.94, "learning_rate": 9.316448861122207e-08, "loss": 1.16, "step": 2978 }, { "epoch": 0.94, "learning_rate": 9.218426175038153e-08, "loss": 1.6032, "step": 2979 }, { "epoch": 0.94, "learning_rate": 9.120917092866887e-08, "loss": 0.886, "step": 2980 }, { "epoch": 0.94, "learning_rate": 9.023921716651729e-08, "loss": 1.5319, "step": 2981 }, { "epoch": 0.94, "learning_rate": 8.927440147898703e-08, "loss": 1.1673, "step": 2982 }, { "epoch": 0.94, "learning_rate": 8.831472487575765e-08, "loss": 1.1731, "step": 2983 }, { "epoch": 0.94, "learning_rate": 8.736018836113414e-08, "loss": 1.1856, "step": 2984 }, { "epoch": 0.94, "learning_rate": 8.64107929340402e-08, "loss": 1.3163, "step": 2985 }, { "epoch": 0.94, "learning_rate": 8.546653958801998e-08, "loss": 1.0206, "step": 2986 }, { "epoch": 0.94, "learning_rate": 8.452742931123692e-08, "loss": 1.2677, "step": 2987 }, { "epoch": 0.94, "learning_rate": 8.35934630864721e-08, "loss": 1.5573, "step": 2988 }, { "epoch": 0.94, "learning_rate": 8.2664641891122e-08, "loss": 1.156, "step": 2989 }, { "epoch": 0.94, "learning_rate": 8.174096669720077e-08, "loss": 1.1389, "step": 2990 }, { "epoch": 0.94, "learning_rate": 8.082243847133575e-08, "loss": 1.1871, "step": 2991 }, { "epoch": 0.94, "learning_rate": 7.990905817476969e-08, "loss": 1.1432, "step": 2992 }, { "epoch": 0.94, "learning_rate": 7.900082676335574e-08, "loss": 1.0755, "step": 2993 }, { "epoch": 0.95, "learning_rate": 7.809774518756085e-08, "loss": 0.9741, "step": 2994 }, { "epoch": 0.95, "learning_rate": 7.719981439246127e-08, "loss": 1.5591, "step": 2995 }, { "epoch": 0.95, "learning_rate": 7.630703531774364e-08, "loss": 1.141, "step": 2996 }, { "epoch": 0.95, "learning_rate": 7.541940889770228e-08, "loss": 1.111, "step": 2997 }, { "epoch": 0.95, "learning_rate": 7.45369360612408e-08, "loss": 1.1215, "step": 2998 }, { "epoch": 0.95, "learning_rate": 7.365961773186824e-08, "loss": 1.3896, "step": 2999 }, { "epoch": 0.95, "learning_rate": 7.278745482770078e-08, "loss": 1.4013, "step": 3000 }, { "epoch": 0.95, "learning_rate": 7.192044826145772e-08, "loss": 1.3268, "step": 3001 }, { "epoch": 0.95, "learning_rate": 7.105859894046441e-08, "loss": 1.4153, "step": 3002 }, { "epoch": 0.95, "learning_rate": 7.02019077666466e-08, "loss": 1.3871, "step": 3003 }, { "epoch": 0.95, "learning_rate": 6.935037563653435e-08, "loss": 1.3619, "step": 3004 }, { "epoch": 0.95, "learning_rate": 6.850400344125763e-08, "loss": 1.0777, "step": 3005 }, { "epoch": 0.95, "learning_rate": 6.766279206654625e-08, "loss": 1.0801, "step": 3006 }, { "epoch": 0.95, "learning_rate": 6.682674239272991e-08, "loss": 1.1237, "step": 3007 }, { "epoch": 0.95, "learning_rate": 6.599585529473596e-08, "loss": 1.3163, "step": 3008 }, { "epoch": 0.95, "learning_rate": 6.51701316420894e-08, "loss": 1.2371, "step": 3009 }, { "epoch": 0.95, "learning_rate": 6.434957229891237e-08, "loss": 1.0817, "step": 3010 }, { "epoch": 0.95, "learning_rate": 6.353417812392127e-08, "loss": 0.8773, "step": 3011 }, { "epoch": 0.95, "learning_rate": 6.272394997042797e-08, "loss": 1.1416, "step": 3012 }, { "epoch": 0.95, "learning_rate": 6.191888868633811e-08, "loss": 1.1706, "step": 3013 }, { "epoch": 0.95, "learning_rate": 6.111899511414943e-08, "loss": 1.0912, "step": 3014 }, { "epoch": 0.95, "learning_rate": 6.032427009095232e-08, "loss": 1.1759, "step": 3015 }, { "epoch": 0.95, "learning_rate": 5.953471444842873e-08, "loss": 1.4084, "step": 3016 }, { "epoch": 0.95, "learning_rate": 5.8750329012849386e-08, "loss": 1.1768, "step": 3017 }, { "epoch": 0.95, "learning_rate": 5.797111460507599e-08, "loss": 1.3319, "step": 3018 }, { "epoch": 0.95, "learning_rate": 5.7197072040557356e-08, "loss": 1.3624, "step": 3019 }, { "epoch": 0.95, "learning_rate": 5.642820212933109e-08, "loss": 1.3174, "step": 3020 }, { "epoch": 0.95, "learning_rate": 5.5664505676020774e-08, "loss": 1.3171, "step": 3021 }, { "epoch": 0.95, "learning_rate": 5.4905983479836555e-08, "loss": 1.3798, "step": 3022 }, { "epoch": 0.95, "learning_rate": 5.4152636334572904e-08, "loss": 1.1795, "step": 3023 }, { "epoch": 0.95, "learning_rate": 5.34044650286103e-08, "loss": 0.8507, "step": 3024 }, { "epoch": 0.96, "learning_rate": 5.26614703449102e-08, "loss": 1.2162, "step": 3025 }, { "epoch": 0.96, "learning_rate": 5.192365306101843e-08, "loss": 1.2751, "step": 3026 }, { "epoch": 0.96, "learning_rate": 5.1191013949062896e-08, "loss": 1.3268, "step": 3027 }, { "epoch": 0.96, "learning_rate": 5.0463553775751407e-08, "loss": 1.1642, "step": 3028 }, { "epoch": 0.96, "learning_rate": 4.9741273302372775e-08, "loss": 1.31, "step": 3029 }, { "epoch": 0.96, "learning_rate": 4.902417328479514e-08, "loss": 1.6522, "step": 3030 }, { "epoch": 0.96, "learning_rate": 4.8312254473464323e-08, "loss": 1.2375, "step": 3031 }, { "epoch": 0.96, "learning_rate": 4.7605517613404926e-08, "loss": 1.2022, "step": 3032 }, { "epoch": 0.96, "learning_rate": 4.6903963444219215e-08, "loss": 1.3069, "step": 3033 }, { "epoch": 0.96, "learning_rate": 4.62075927000849e-08, "loss": 1.4542, "step": 3034 }, { "epoch": 0.96, "learning_rate": 4.5516406109754604e-08, "loss": 1.2736, "step": 3035 }, { "epoch": 0.96, "learning_rate": 4.483040439655806e-08, "loss": 1.3236, "step": 3036 }, { "epoch": 0.96, "learning_rate": 4.414958827839544e-08, "loss": 1.0782, "step": 3037 }, { "epoch": 0.96, "learning_rate": 4.347395846774405e-08, "loss": 1.3884, "step": 3038 }, { "epoch": 0.96, "learning_rate": 4.2803515671649976e-08, "loss": 1.3802, "step": 3039 }, { "epoch": 0.96, "learning_rate": 4.213826059173531e-08, "loss": 1.3323, "step": 3040 }, { "epoch": 0.96, "learning_rate": 4.147819392418872e-08, "loss": 1.469, "step": 3041 }, { "epoch": 0.96, "learning_rate": 4.0823316359772105e-08, "loss": 1.0502, "step": 3042 }, { "epoch": 0.96, "learning_rate": 4.017362858381613e-08, "loss": 1.2401, "step": 3043 }, { "epoch": 0.96, "learning_rate": 3.952913127622082e-08, "loss": 1.4691, "step": 3044 }, { "epoch": 0.96, "learning_rate": 3.8889825111453336e-08, "loss": 1.1805, "step": 3045 }, { "epoch": 0.96, "learning_rate": 3.8255710758549055e-08, "loss": 0.9139, "step": 3046 }, { "epoch": 0.96, "learning_rate": 3.762678888110993e-08, "loss": 1.3074, "step": 3047 }, { "epoch": 0.96, "learning_rate": 3.700306013730448e-08, "loss": 1.0255, "step": 3048 }, { "epoch": 0.96, "learning_rate": 3.638452517986501e-08, "loss": 1.1675, "step": 3049 }, { "epoch": 0.96, "learning_rate": 3.577118465609042e-08, "loss": 1.0045, "step": 3050 }, { "epoch": 0.96, "learning_rate": 3.516303920784281e-08, "loss": 1.0166, "step": 3051 }, { "epoch": 0.96, "learning_rate": 3.4560089471546986e-08, "loss": 1.3311, "step": 3052 }, { "epoch": 0.96, "learning_rate": 3.396233607819155e-08, "loss": 1.2998, "step": 3053 }, { "epoch": 0.96, "learning_rate": 3.336977965332722e-08, "loss": 1.204, "step": 3054 }, { "epoch": 0.96, "learning_rate": 3.2782420817064065e-08, "loss": 0.9887, "step": 3055 }, { "epoch": 0.96, "learning_rate": 3.220026018407541e-08, "loss": 1.1944, "step": 3056 }, { "epoch": 0.97, "learning_rate": 3.1623298363593347e-08, "loss": 0.9776, "step": 3057 }, { "epoch": 0.97, "learning_rate": 3.105153595940935e-08, "loss": 1.2018, "step": 3058 }, { "epoch": 0.97, "learning_rate": 3.048497356987423e-08, "loss": 1.2923, "step": 3059 }, { "epoch": 0.97, "learning_rate": 2.992361178789649e-08, "loss": 1.28, "step": 3060 }, { "epoch": 0.97, "learning_rate": 2.936745120094231e-08, "loss": 1.4301, "step": 3061 }, { "epoch": 0.97, "learning_rate": 2.8816492391035013e-08, "loss": 1.8475, "step": 3062 }, { "epoch": 0.97, "learning_rate": 2.827073593475449e-08, "loss": 1.125, "step": 3063 }, { "epoch": 0.97, "learning_rate": 2.7730182403234997e-08, "loss": 1.2872, "step": 3064 }, { "epoch": 0.97, "learning_rate": 2.7194832362167355e-08, "loss": 1.2937, "step": 3065 }, { "epoch": 0.97, "learning_rate": 2.6664686371796754e-08, "loss": 1.1259, "step": 3066 }, { "epoch": 0.97, "learning_rate": 2.6139744986922178e-08, "loss": 1.2559, "step": 3067 }, { "epoch": 0.97, "learning_rate": 2.5620008756895297e-08, "loss": 1.2491, "step": 3068 }, { "epoch": 0.97, "learning_rate": 2.510547822562104e-08, "loss": 0.8702, "step": 3069 }, { "epoch": 0.97, "learning_rate": 2.4596153931557564e-08, "loss": 1.4585, "step": 3070 }, { "epoch": 0.97, "learning_rate": 2.409203640771296e-08, "loss": 1.4392, "step": 3071 }, { "epoch": 0.97, "learning_rate": 2.3593126181646886e-08, "loss": 1.4755, "step": 3072 }, { "epoch": 0.97, "learning_rate": 2.309942377547114e-08, "loss": 1.2545, "step": 3073 }, { "epoch": 0.97, "learning_rate": 2.2610929705845774e-08, "loss": 1.3951, "step": 3074 }, { "epoch": 0.97, "learning_rate": 2.2127644483980194e-08, "loss": 1.2999, "step": 3075 }, { "epoch": 0.97, "learning_rate": 2.1649568615634276e-08, "loss": 1.0148, "step": 3076 }, { "epoch": 0.97, "learning_rate": 2.117670260111504e-08, "loss": 1.3494, "step": 3077 }, { "epoch": 0.97, "learning_rate": 2.07090469352772e-08, "loss": 1.3054, "step": 3078 }, { "epoch": 0.97, "learning_rate": 2.0246602107524273e-08, "loss": 1.1812, "step": 3079 }, { "epoch": 0.97, "learning_rate": 1.9789368601804694e-08, "loss": 1.1973, "step": 3080 }, { "epoch": 0.97, "learning_rate": 1.933734689661515e-08, "loss": 1.2914, "step": 3081 }, { "epoch": 0.97, "learning_rate": 1.889053746499725e-08, "loss": 1.0989, "step": 3082 }, { "epoch": 0.97, "learning_rate": 1.8448940774537518e-08, "loss": 1.3251, "step": 3083 }, { "epoch": 0.97, "learning_rate": 1.8012557287367394e-08, "loss": 1.3025, "step": 3084 }, { "epoch": 0.97, "learning_rate": 1.7581387460164358e-08, "loss": 1.2073, "step": 3085 }, { "epoch": 0.97, "learning_rate": 1.7155431744147467e-08, "loss": 1.0492, "step": 3086 }, { "epoch": 0.97, "learning_rate": 1.6734690585080148e-08, "loss": 1.1317, "step": 3087 }, { "epoch": 0.97, "learning_rate": 1.631916442326964e-08, "loss": 1.283, "step": 3088 }, { "epoch": 0.98, "learning_rate": 1.590885369356476e-08, "loss": 1.4798, "step": 3089 }, { "epoch": 0.98, "learning_rate": 1.550375882535593e-08, "loss": 1.0602, "step": 3090 }, { "epoch": 0.98, "learning_rate": 1.5103880242576252e-08, "loss": 1.0824, "step": 3091 }, { "epoch": 0.98, "learning_rate": 1.4709218363699317e-08, "loss": 1.052, "step": 3092 }, { "epoch": 0.98, "learning_rate": 1.431977360173975e-08, "loss": 1.4479, "step": 3093 }, { "epoch": 0.98, "learning_rate": 1.3935546364252095e-08, "loss": 1.3241, "step": 3094 }, { "epoch": 0.98, "learning_rate": 1.355653705333082e-08, "loss": 1.1483, "step": 3095 }, { "epoch": 0.98, "learning_rate": 1.3182746065610874e-08, "loss": 1.1047, "step": 3096 }, { "epoch": 0.98, "learning_rate": 1.2814173792263796e-08, "loss": 1.3092, "step": 3097 }, { "epoch": 0.98, "learning_rate": 1.2450820619002158e-08, "loss": 1.0988, "step": 3098 }, { "epoch": 0.98, "learning_rate": 1.2092686926075126e-08, "loss": 1.382, "step": 3099 }, { "epoch": 0.98, "learning_rate": 1.1739773088271234e-08, "loss": 1.3062, "step": 3100 }, { "epoch": 0.98, "learning_rate": 1.1392079474915052e-08, "loss": 1.211, "step": 3101 }, { "epoch": 0.98, "learning_rate": 1.1049606449868855e-08, "loss": 1.233, "step": 3102 }, { "epoch": 0.98, "learning_rate": 1.0712354371530954e-08, "loss": 1.3538, "step": 3103 }, { "epoch": 0.98, "learning_rate": 1.038032359283625e-08, "loss": 1.4221, "step": 3104 }, { "epoch": 0.98, "learning_rate": 1.0053514461256242e-08, "loss": 1.1463, "step": 3105 }, { "epoch": 0.98, "learning_rate": 9.731927318797353e-09, "loss": 1.1145, "step": 3106 }, { "epoch": 0.98, "learning_rate": 9.415562502000375e-09, "loss": 1.0563, "step": 3107 }, { "epoch": 0.98, "learning_rate": 9.104420341942699e-09, "loss": 1.1193, "step": 3108 }, { "epoch": 0.98, "learning_rate": 8.798501164234419e-09, "loss": 1.14, "step": 3109 }, { "epoch": 0.98, "learning_rate": 8.49780528902111e-09, "loss": 1.1509, "step": 3110 }, { "epoch": 0.98, "learning_rate": 8.202333030981057e-09, "loss": 1.1406, "step": 3111 }, { "epoch": 0.98, "learning_rate": 7.912084699327471e-09, "loss": 1.0562, "step": 3112 }, { "epoch": 0.98, "learning_rate": 7.627060597805158e-09, "loss": 0.8968, "step": 3113 }, { "epoch": 0.98, "learning_rate": 7.3472610246921875e-09, "loss": 1.1777, "step": 3114 }, { "epoch": 0.98, "learning_rate": 7.072686272799889e-09, "loss": 1.1451, "step": 3115 }, { "epoch": 0.98, "learning_rate": 6.803336629471191e-09, "loss": 1.2935, "step": 3116 }, { "epoch": 0.98, "learning_rate": 6.5392123765811724e-09, "loss": 0.8414, "step": 3117 }, { "epoch": 0.98, "learning_rate": 6.2803137905359526e-09, "loss": 1.4191, "step": 3118 }, { "epoch": 0.98, "learning_rate": 6.026641142273804e-09, "loss": 1.1961, "step": 3119 }, { "epoch": 0.99, "learning_rate": 5.778194697264039e-09, "loss": 1.2523, "step": 3120 }, { "epoch": 0.99, "learning_rate": 5.534974715505348e-09, "loss": 1.1725, "step": 3121 }, { "epoch": 0.99, "learning_rate": 5.296981451528571e-09, "loss": 1.3012, "step": 3122 }, { "epoch": 0.99, "learning_rate": 5.064215154393925e-09, "loss": 1.1478, "step": 3123 }, { "epoch": 0.99, "learning_rate": 4.836676067692114e-09, "loss": 1.4044, "step": 3124 }, { "epoch": 0.99, "learning_rate": 4.614364429543217e-09, "loss": 0.924, "step": 3125 }, { "epoch": 0.99, "learning_rate": 4.397280472596688e-09, "loss": 1.04, "step": 3126 }, { "epoch": 0.99, "learning_rate": 4.185424424030804e-09, "loss": 1.4065, "step": 3127 }, { "epoch": 0.99, "learning_rate": 3.978796505554328e-09, "loss": 1.2936, "step": 3128 }, { "epoch": 0.99, "learning_rate": 3.7773969334031765e-09, "loss": 1.5067, "step": 3129 }, { "epoch": 0.99, "learning_rate": 3.5812259183426457e-09, "loss": 1.2202, "step": 3130 }, { "epoch": 0.99, "learning_rate": 3.390283665666294e-09, "loss": 1.1658, "step": 3131 }, { "epoch": 0.99, "learning_rate": 3.2045703751948378e-09, "loss": 1.1235, "step": 3132 }, { "epoch": 0.99, "learning_rate": 3.0240862412783678e-09, "loss": 1.2253, "step": 3133 }, { "epoch": 0.99, "learning_rate": 2.8488314527935767e-09, "loss": 0.9054, "step": 3134 }, { "epoch": 0.99, "learning_rate": 2.678806193144867e-09, "loss": 1.0089, "step": 3135 }, { "epoch": 0.99, "learning_rate": 2.5140106402637975e-09, "loss": 1.1502, "step": 3136 }, { "epoch": 0.99, "learning_rate": 2.354444966610192e-09, "loss": 1.3905, "step": 3137 }, { "epoch": 0.99, "learning_rate": 2.20010933916881e-09, "loss": 1.1996, "step": 3138 }, { "epoch": 0.99, "learning_rate": 2.0510039194532317e-09, "loss": 1.1578, "step": 3139 }, { "epoch": 0.99, "learning_rate": 1.907128863501417e-09, "loss": 1.0545, "step": 3140 }, { "epoch": 0.99, "learning_rate": 1.7684843218795933e-09, "loss": 1.2033, "step": 3141 }, { "epoch": 0.99, "learning_rate": 1.635070439679476e-09, "loss": 1.2088, "step": 3142 }, { "epoch": 0.99, "learning_rate": 1.506887356518827e-09, "loss": 1.5176, "step": 3143 }, { "epoch": 0.99, "learning_rate": 1.3839352065420087e-09, "loss": 1.3232, "step": 3144 }, { "epoch": 0.99, "learning_rate": 1.2662141184177634e-09, "loss": 1.1333, "step": 3145 }, { "epoch": 0.99, "learning_rate": 1.1537242153425443e-09, "loss": 1.3245, "step": 3146 }, { "epoch": 0.99, "learning_rate": 1.0464656150360742e-09, "loss": 0.9689, "step": 3147 }, { "epoch": 0.99, "learning_rate": 9.444384297457864e-10, "loss": 0.9165, "step": 3148 }, { "epoch": 0.99, "learning_rate": 8.476427662423847e-10, "loss": 1.1259, "step": 3149 }, { "epoch": 0.99, "learning_rate": 7.560787258237279e-10, "loss": 0.9758, "step": 3150 }, { "epoch": 0.99, "learning_rate": 6.697464043109447e-10, "loss": 1.1252, "step": 3151 }, { "epoch": 1.0, "learning_rate": 5.886458920512095e-10, "loss": 1.0621, "step": 3152 }, { "epoch": 1.0, "learning_rate": 5.127772739166314e-10, "loss": 1.3284, "step": 3153 }, { "epoch": 1.0, "learning_rate": 4.4214062930370004e-10, "loss": 0.9363, "step": 3154 }, { "epoch": 1.0, "learning_rate": 3.7673603213328467e-10, "loss": 1.1363, "step": 3155 }, { "epoch": 1.0, "learning_rate": 3.165635508528553e-10, "loss": 1.2464, "step": 3156 }, { "epoch": 1.0, "learning_rate": 2.6162324843148624e-10, "loss": 1.3518, "step": 3157 }, { "epoch": 1.0, "learning_rate": 2.119151823648524e-10, "loss": 1.4769, "step": 3158 }, { "epoch": 1.0, "learning_rate": 1.674394046730088e-10, "loss": 1.0983, "step": 3159 }, { "epoch": 1.0, "learning_rate": 1.281959618998352e-10, "loss": 0.6495, "step": 3160 }, { "epoch": 1.0, "learning_rate": 9.418489511303641e-11, "loss": 0.9112, "step": 3161 }, { "epoch": 1.0, "learning_rate": 6.540623990525242e-11, "loss": 1.1316, "step": 3162 }, { "epoch": 1.0, "learning_rate": 4.1860026394613394e-11, "loss": 1.2253, "step": 3163 }, { "epoch": 1.0, "learning_rate": 2.3546279220854063e-11, "loss": 0.9211, "step": 3164 }, { "epoch": 1.0, "learning_rate": 1.0465017550309598e-11, "loss": 1.0583, "step": 3165 }, { "epoch": 1.0, "learning_rate": 2.616255072029894e-12, "loss": 1.3668, "step": 3166 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.9766, "step": 3167 }, { "epoch": 1.0, "step": 3167, "total_flos": 5.244656580003103e+17, "train_loss": 1.2813219355791572, "train_runtime": 10063.7221, "train_samples_per_second": 10.07, "train_steps_per_second": 0.315 } ], "logging_steps": 1.0, "max_steps": 3167, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 5.244656580003103e+17, "trial_name": null, "trial_params": null }