{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999892507793185, "global_step": 4651, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4285714285714285e-05, "loss": 7.0898, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.857142857142857e-05, "loss": 6.8672, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.2857142857142856e-05, "loss": 6.8906, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.714285714285714e-05, "loss": 6.668, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.142857142857142e-05, "loss": 5.8945, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.571428571428571e-05, "loss": 5.7773, "step": 6 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 5.4805, "step": 7 }, { "epoch": 0.0, "learning_rate": 0.00011428571428571428, "loss": 5.3086, "step": 8 }, { "epoch": 0.0, "learning_rate": 0.00012857142857142855, "loss": 5.168, "step": 9 }, { "epoch": 0.0, "learning_rate": 0.00014285714285714284, "loss": 4.9727, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00015714285714285713, "loss": 4.8711, "step": 11 }, { "epoch": 0.0, "learning_rate": 0.00017142857142857143, "loss": 4.8633, "step": 12 }, { "epoch": 0.0, "learning_rate": 0.00018571428571428572, "loss": 4.6055, "step": 13 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 4.4727, "step": 14 }, { "epoch": 0.0, "learning_rate": 0.00021428571428571427, "loss": 4.3711, "step": 15 }, { "epoch": 0.0, "learning_rate": 0.00022857142857142857, "loss": 4.248, "step": 16 }, { "epoch": 0.0, "learning_rate": 0.00024285714285714286, "loss": 4.127, "step": 17 }, { "epoch": 0.0, "learning_rate": 0.0002571428571428571, "loss": 4.1289, "step": 18 }, { "epoch": 0.0, "learning_rate": 0.0002714285714285714, "loss": 3.9277, "step": 19 }, { "epoch": 0.0, "learning_rate": 0.0002857142857142857, "loss": 3.8789, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.0003, "loss": 3.8105, "step": 21 }, { "epoch": 0.0, "learning_rate": 0.00031428571428571427, "loss": 3.834, "step": 22 }, { "epoch": 0.0, "learning_rate": 0.00032857142857142856, "loss": 3.8418, "step": 23 }, { "epoch": 0.01, "learning_rate": 0.00034285714285714285, "loss": 3.7285, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.00035714285714285714, "loss": 3.6172, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00037142857142857143, "loss": 3.5898, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.0003857142857142857, "loss": 3.4688, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.0004, "loss": 3.6074, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.0004142857142857143, "loss": 3.4336, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.00042857142857142855, "loss": 3.4648, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00044285714285714284, "loss": 3.4277, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.00045714285714285713, "loss": 3.3926, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.0004714285714285714, "loss": 3.2578, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.0004857142857142857, "loss": 3.2852, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.4043, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.0005142857142857142, "loss": 3.3125, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.0005285714285714286, "loss": 3.3008, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.0005428571428571428, "loss": 3.0801, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.0005571428571428572, "loss": 3.2988, "step": 39 }, { "epoch": 0.01, "learning_rate": 0.0005714285714285714, "loss": 3.0898, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.0005857142857142858, "loss": 3.1523, "step": 41 }, { "epoch": 0.01, "learning_rate": 0.0006, "loss": 3.2734, "step": 42 }, { "epoch": 0.01, "learning_rate": 0.0006142857142857143, "loss": 3.1133, "step": 43 }, { "epoch": 0.01, "learning_rate": 0.0006285714285714285, "loss": 3.3008, "step": 44 }, { "epoch": 0.01, "learning_rate": 0.0006428571428571429, "loss": 3.166, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.0006571428571428571, "loss": 3.1973, "step": 46 }, { "epoch": 0.01, "learning_rate": 0.0006714285714285714, "loss": 3.0312, "step": 47 }, { "epoch": 0.01, "learning_rate": 0.0006857142857142857, "loss": 3.2461, "step": 48 }, { "epoch": 0.01, "learning_rate": 0.0007, "loss": 3.1133, "step": 49 }, { "epoch": 0.01, "learning_rate": 0.0007142857142857143, "loss": 3.1543, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0007285714285714286, "loss": 3.0352, "step": 51 }, { "epoch": 0.01, "learning_rate": 0.0007428571428571429, "loss": 3.1816, "step": 52 }, { "epoch": 0.01, "learning_rate": 0.0007571428571428572, "loss": 3.2793, "step": 53 }, { "epoch": 0.01, "learning_rate": 0.0007714285714285715, "loss": 3.2031, "step": 54 }, { "epoch": 0.01, "learning_rate": 0.0007857142857142857, "loss": 3.084, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.0008, "loss": 3.0137, "step": 56 }, { "epoch": 0.01, "learning_rate": 0.0008142857142857143, "loss": 3.0566, "step": 57 }, { "epoch": 0.01, "learning_rate": 0.0008285714285714286, "loss": 3.1152, "step": 58 }, { "epoch": 0.01, "learning_rate": 0.0008428571428571429, "loss": 3.1602, "step": 59 }, { "epoch": 0.01, "learning_rate": 0.0008571428571428571, "loss": 3.1445, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.0008714285714285715, "loss": 3.1367, "step": 61 }, { "epoch": 0.01, "learning_rate": 0.0008857142857142857, "loss": 3.1895, "step": 62 }, { "epoch": 0.01, "learning_rate": 0.0009000000000000001, "loss": 3.0781, "step": 63 }, { "epoch": 0.01, "learning_rate": 0.0009142857142857143, "loss": 3.0664, "step": 64 }, { "epoch": 0.01, "learning_rate": 0.0009285714285714287, "loss": 3.082, "step": 65 }, { "epoch": 0.01, "learning_rate": 0.0009428571428571429, "loss": 3.0996, "step": 66 }, { "epoch": 0.01, "learning_rate": 0.0009571428571428573, "loss": 2.8984, "step": 67 }, { "epoch": 0.01, "learning_rate": 0.0009714285714285714, "loss": 3.0117, "step": 68 }, { "epoch": 0.01, "learning_rate": 0.0009857142857142857, "loss": 3.0254, "step": 69 }, { "epoch": 0.02, "learning_rate": 0.001, "loss": 2.9844, "step": 70 }, { "epoch": 0.02, "learning_rate": 0.0010142857142857143, "loss": 2.8633, "step": 71 }, { "epoch": 0.02, "learning_rate": 0.0010285714285714284, "loss": 2.9941, "step": 72 }, { "epoch": 0.02, "learning_rate": 0.001042857142857143, "loss": 2.9844, "step": 73 }, { "epoch": 0.02, "learning_rate": 0.0010571428571428572, "loss": 2.7793, "step": 74 }, { "epoch": 0.02, "learning_rate": 0.0010714285714285715, "loss": 2.9492, "step": 75 }, { "epoch": 0.02, "learning_rate": 0.0010857142857142856, "loss": 2.9336, "step": 76 }, { "epoch": 0.02, "learning_rate": 0.0011, "loss": 2.873, "step": 77 }, { "epoch": 0.02, "learning_rate": 0.0011142857142857144, "loss": 2.8633, "step": 78 }, { "epoch": 0.02, "learning_rate": 0.0011285714285714287, "loss": 2.9453, "step": 79 }, { "epoch": 0.02, "learning_rate": 0.0011428571428571427, "loss": 2.8516, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.0011571428571428572, "loss": 2.8574, "step": 81 }, { "epoch": 0.02, "learning_rate": 0.0011714285714285715, "loss": 2.8359, "step": 82 }, { "epoch": 0.02, "learning_rate": 0.0011857142857142858, "loss": 2.8008, "step": 83 }, { "epoch": 0.02, "learning_rate": 0.0012, "loss": 2.8145, "step": 84 }, { "epoch": 0.02, "learning_rate": 0.0012142857142857142, "loss": 2.9785, "step": 85 }, { "epoch": 0.02, "learning_rate": 0.0012285714285714287, "loss": 2.7695, "step": 86 }, { "epoch": 0.02, "learning_rate": 0.001242857142857143, "loss": 2.752, "step": 87 }, { "epoch": 0.02, "learning_rate": 0.001257142857142857, "loss": 2.6719, "step": 88 }, { "epoch": 0.02, "learning_rate": 0.0012714285714285714, "loss": 2.7031, "step": 89 }, { "epoch": 0.02, "learning_rate": 0.0012857142857142859, "loss": 2.8613, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.0013000000000000002, "loss": 2.709, "step": 91 }, { "epoch": 0.02, "learning_rate": 0.0013142857142857142, "loss": 2.7188, "step": 92 }, { "epoch": 0.02, "learning_rate": 0.0013285714285714285, "loss": 2.7793, "step": 93 }, { "epoch": 0.02, "learning_rate": 0.0013428571428571428, "loss": 2.6387, "step": 94 }, { "epoch": 0.02, "learning_rate": 0.0013571428571428573, "loss": 2.7891, "step": 95 }, { "epoch": 0.02, "learning_rate": 0.0013714285714285714, "loss": 2.709, "step": 96 }, { "epoch": 0.02, "learning_rate": 0.0013857142857142857, "loss": 2.6973, "step": 97 }, { "epoch": 0.02, "learning_rate": 0.0014, "loss": 2.625, "step": 98 }, { "epoch": 0.02, "learning_rate": 0.0014142857142857145, "loss": 2.5742, "step": 99 }, { "epoch": 0.02, "learning_rate": 0.0014285714285714286, "loss": 2.6641, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0014428571428571429, "loss": 2.6484, "step": 101 }, { "epoch": 0.02, "learning_rate": 0.0014571428571428572, "loss": 2.6348, "step": 102 }, { "epoch": 0.02, "learning_rate": 0.0014714285714285717, "loss": 2.6211, "step": 103 }, { "epoch": 0.02, "learning_rate": 0.0014857142857142857, "loss": 2.6895, "step": 104 }, { "epoch": 0.02, "learning_rate": 0.0015, "loss": 2.4727, "step": 105 }, { "epoch": 0.02, "learning_rate": 0.0015142857142857143, "loss": 2.5762, "step": 106 }, { "epoch": 0.02, "learning_rate": 0.0015285714285714284, "loss": 2.6094, "step": 107 }, { "epoch": 0.02, "learning_rate": 0.001542857142857143, "loss": 2.8281, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.0015571428571428572, "loss": 2.6797, "step": 109 }, { "epoch": 0.02, "learning_rate": 0.0015714285714285715, "loss": 2.5762, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.0015857142857142856, "loss": 2.6113, "step": 111 }, { "epoch": 0.02, "learning_rate": 0.0016, "loss": 2.5449, "step": 112 }, { "epoch": 0.02, "learning_rate": 0.0016142857142857144, "loss": 2.6621, "step": 113 }, { "epoch": 0.02, "learning_rate": 0.0016285714285714287, "loss": 2.875, "step": 114 }, { "epoch": 0.02, "learning_rate": 0.0016428571428571427, "loss": 2.6855, "step": 115 }, { "epoch": 0.02, "learning_rate": 0.0016571428571428572, "loss": 2.6367, "step": 116 }, { "epoch": 0.03, "learning_rate": 0.0016714285714285715, "loss": 2.627, "step": 117 }, { "epoch": 0.03, "learning_rate": 0.0016857142857142858, "loss": 2.6289, "step": 118 }, { "epoch": 0.03, "learning_rate": 0.0017, "loss": 2.5586, "step": 119 }, { "epoch": 0.03, "learning_rate": 0.0017142857142857142, "loss": 2.6543, "step": 120 }, { "epoch": 0.03, "learning_rate": 0.0017285714285714287, "loss": 2.6113, "step": 121 }, { "epoch": 0.03, "learning_rate": 0.001742857142857143, "loss": 2.7363, "step": 122 }, { "epoch": 0.03, "learning_rate": 0.001757142857142857, "loss": 2.6934, "step": 123 }, { "epoch": 0.03, "learning_rate": 0.0017714285714285714, "loss": 2.6777, "step": 124 }, { "epoch": 0.03, "learning_rate": 0.0017857142857142859, "loss": 2.6504, "step": 125 }, { "epoch": 0.03, "learning_rate": 0.0018000000000000002, "loss": 2.7246, "step": 126 }, { "epoch": 0.03, "learning_rate": 0.0018142857142857142, "loss": 2.6348, "step": 127 }, { "epoch": 0.03, "learning_rate": 0.0018285714285714285, "loss": 2.6582, "step": 128 }, { "epoch": 0.03, "learning_rate": 0.0018428571428571428, "loss": 2.6992, "step": 129 }, { "epoch": 0.03, "learning_rate": 0.0018571428571428573, "loss": 2.8379, "step": 130 }, { "epoch": 0.03, "learning_rate": 0.0018714285714285714, "loss": 2.6152, "step": 131 }, { "epoch": 0.03, "learning_rate": 0.0018857142857142857, "loss": 2.6289, "step": 132 }, { "epoch": 0.03, "learning_rate": 0.0019, "loss": 2.5469, "step": 133 }, { "epoch": 0.03, "learning_rate": 0.0019142857142857145, "loss": 2.6113, "step": 134 }, { "epoch": 0.03, "learning_rate": 0.0019285714285714286, "loss": 2.7461, "step": 135 }, { "epoch": 0.03, "learning_rate": 0.0019428571428571429, "loss": 2.6543, "step": 136 }, { "epoch": 0.03, "learning_rate": 0.001957142857142857, "loss": 2.7148, "step": 137 }, { "epoch": 0.03, "learning_rate": 0.0019714285714285715, "loss": 2.6074, "step": 138 }, { "epoch": 0.03, "learning_rate": 0.001985714285714286, "loss": 2.6289, "step": 139 }, { "epoch": 0.03, "learning_rate": 0.002, "loss": 2.4961, "step": 140 }, { "epoch": 0.03, "learning_rate": 0.001999999757493112, "loss": 2.6699, "step": 141 }, { "epoch": 0.03, "learning_rate": 0.001999999029972567, "loss": 2.6309, "step": 142 }, { "epoch": 0.03, "learning_rate": 0.0019999978174387164, "loss": 2.6172, "step": 143 }, { "epoch": 0.03, "learning_rate": 0.001999996119892149, "loss": 2.584, "step": 144 }, { "epoch": 0.03, "learning_rate": 0.0019999939373336884, "loss": 2.5801, "step": 145 }, { "epoch": 0.03, "learning_rate": 0.001999991269764393, "loss": 2.6172, "step": 146 }, { "epoch": 0.03, "learning_rate": 0.0019999881171855563, "loss": 2.5352, "step": 147 }, { "epoch": 0.03, "learning_rate": 0.0019999844795987073, "loss": 2.5684, "step": 148 }, { "epoch": 0.03, "learning_rate": 0.0019999803570056106, "loss": 2.6016, "step": 149 }, { "epoch": 0.03, "learning_rate": 0.001999975749408266, "loss": 2.5508, "step": 150 }, { "epoch": 0.03, "learning_rate": 0.0019999706568089074, "loss": 2.5156, "step": 151 }, { "epoch": 0.03, "learning_rate": 0.0019999650792100056, "loss": 2.6973, "step": 152 }, { "epoch": 0.03, "learning_rate": 0.0019999590166142655, "loss": 2.5039, "step": 153 }, { "epoch": 0.03, "learning_rate": 0.001999952469024627, "loss": 2.5176, "step": 154 }, { "epoch": 0.03, "learning_rate": 0.001999945436444267, "loss": 2.6836, "step": 155 }, { "epoch": 0.03, "learning_rate": 0.0019999379188765953, "loss": 2.6719, "step": 156 }, { "epoch": 0.03, "learning_rate": 0.0019999299163252584, "loss": 2.5723, "step": 157 }, { "epoch": 0.03, "learning_rate": 0.0019999214287941378, "loss": 2.6211, "step": 158 }, { "epoch": 0.03, "learning_rate": 0.0019999124562873494, "loss": 2.6113, "step": 159 }, { "epoch": 0.03, "learning_rate": 0.0019999029988092463, "loss": 2.6914, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0019998930563644144, "loss": 2.4746, "step": 161 }, { "epoch": 0.03, "learning_rate": 0.001999882628957676, "loss": 2.6719, "step": 162 }, { "epoch": 0.04, "learning_rate": 0.0019998717165940886, "loss": 2.6113, "step": 163 }, { "epoch": 0.04, "learning_rate": 0.0019998603192789453, "loss": 2.6895, "step": 164 }, { "epoch": 0.04, "learning_rate": 0.001999848437017774, "loss": 2.5137, "step": 165 }, { "epoch": 0.04, "learning_rate": 0.0019998360698163372, "loss": 2.5742, "step": 166 }, { "epoch": 0.04, "learning_rate": 0.0019998232176806335, "loss": 2.6328, "step": 167 }, { "epoch": 0.04, "learning_rate": 0.001999809880616896, "loss": 2.6016, "step": 168 }, { "epoch": 0.04, "learning_rate": 0.001999796058631594, "loss": 2.5547, "step": 169 }, { "epoch": 0.04, "learning_rate": 0.0019997817517314305, "loss": 2.582, "step": 170 }, { "epoch": 0.04, "learning_rate": 0.0019997669599233453, "loss": 2.5195, "step": 171 }, { "epoch": 0.04, "learning_rate": 0.0019997516832145124, "loss": 2.6152, "step": 172 }, { "epoch": 0.04, "learning_rate": 0.001999735921612341, "loss": 2.584, "step": 173 }, { "epoch": 0.04, "learning_rate": 0.0019997196751244764, "loss": 2.5547, "step": 174 }, { "epoch": 0.04, "learning_rate": 0.001999702943758797, "loss": 2.7598, "step": 175 }, { "epoch": 0.04, "learning_rate": 0.001999685727523419, "loss": 2.625, "step": 176 }, { "epoch": 0.04, "learning_rate": 0.001999668026426692, "loss": 2.6094, "step": 177 }, { "epoch": 0.04, "learning_rate": 0.0019996498404772013, "loss": 2.498, "step": 178 }, { "epoch": 0.04, "learning_rate": 0.001999631169683768, "loss": 2.5078, "step": 179 }, { "epoch": 0.04, "learning_rate": 0.0019996120140554465, "loss": 2.5527, "step": 180 }, { "epoch": 0.04, "learning_rate": 0.0019995923736015283, "loss": 2.7148, "step": 181 }, { "epoch": 0.04, "learning_rate": 0.0019995722483315387, "loss": 2.6367, "step": 182 }, { "epoch": 0.04, "learning_rate": 0.0019995516382552395, "loss": 2.5117, "step": 183 }, { "epoch": 0.04, "learning_rate": 0.001999530543382627, "loss": 2.5469, "step": 184 }, { "epoch": 0.04, "learning_rate": 0.0019995089637239315, "loss": 2.5078, "step": 185 }, { "epoch": 0.04, "learning_rate": 0.0019994868992896197, "loss": 2.5723, "step": 186 }, { "epoch": 0.04, "learning_rate": 0.0019994643500903936, "loss": 2.4883, "step": 187 }, { "epoch": 0.04, "learning_rate": 0.00199944131613719, "loss": 2.5801, "step": 188 }, { "epoch": 0.04, "learning_rate": 0.0019994177974411807, "loss": 2.4609, "step": 189 }, { "epoch": 0.04, "learning_rate": 0.0019993937940137713, "loss": 2.5234, "step": 190 }, { "epoch": 0.04, "learning_rate": 0.0019993693058666055, "loss": 2.6113, "step": 191 }, { "epoch": 0.04, "learning_rate": 0.001999344333011559, "loss": 2.4316, "step": 192 }, { "epoch": 0.04, "learning_rate": 0.001999318875460745, "loss": 2.5762, "step": 193 }, { "epoch": 0.04, "learning_rate": 0.0019992929332265106, "loss": 2.5352, "step": 194 }, { "epoch": 0.04, "learning_rate": 0.001999266506321438, "loss": 2.5508, "step": 195 }, { "epoch": 0.04, "learning_rate": 0.001999239594758344, "loss": 2.5273, "step": 196 }, { "epoch": 0.04, "learning_rate": 0.001999212198550282, "loss": 2.4707, "step": 197 }, { "epoch": 0.04, "learning_rate": 0.001999184317710539, "loss": 2.5059, "step": 198 }, { "epoch": 0.04, "learning_rate": 0.0019991559522526377, "loss": 2.666, "step": 199 }, { "epoch": 0.04, "learning_rate": 0.001999127102190336, "loss": 2.541, "step": 200 }, { "epoch": 0.04, "learning_rate": 0.0019990977675376264, "loss": 2.707, "step": 201 }, { "epoch": 0.04, "learning_rate": 0.001999067948308736, "loss": 2.5449, "step": 202 }, { "epoch": 0.04, "learning_rate": 0.001999037644518129, "loss": 2.582, "step": 203 }, { "epoch": 0.04, "learning_rate": 0.0019990068561805017, "loss": 2.5234, "step": 204 }, { "epoch": 0.04, "learning_rate": 0.0019989755833107873, "loss": 2.4893, "step": 205 }, { "epoch": 0.04, "learning_rate": 0.001998943825924154, "loss": 2.4453, "step": 206 }, { "epoch": 0.04, "learning_rate": 0.001998911584036005, "loss": 2.5332, "step": 207 }, { "epoch": 0.04, "learning_rate": 0.001998878857661976, "loss": 2.4512, "step": 208 }, { "epoch": 0.04, "learning_rate": 0.0019988456468179416, "loss": 2.4766, "step": 209 }, { "epoch": 0.05, "learning_rate": 0.0019988119515200096, "loss": 2.5488, "step": 210 }, { "epoch": 0.05, "learning_rate": 0.001998777771784521, "loss": 2.457, "step": 211 }, { "epoch": 0.05, "learning_rate": 0.001998743107628055, "loss": 2.582, "step": 212 }, { "epoch": 0.05, "learning_rate": 0.001998707959067424, "loss": 2.5469, "step": 213 }, { "epoch": 0.05, "learning_rate": 0.0019986723261196755, "loss": 2.5098, "step": 214 }, { "epoch": 0.05, "learning_rate": 0.001998636208802091, "loss": 2.4629, "step": 215 }, { "epoch": 0.05, "learning_rate": 0.001998599607132189, "loss": 2.5039, "step": 216 }, { "epoch": 0.05, "learning_rate": 0.001998562521127721, "loss": 2.6875, "step": 217 }, { "epoch": 0.05, "learning_rate": 0.0019985249508066754, "loss": 2.5039, "step": 218 }, { "epoch": 0.05, "learning_rate": 0.001998486896187273, "loss": 2.6426, "step": 219 }, { "epoch": 0.05, "learning_rate": 0.0019984483572879717, "loss": 2.5117, "step": 220 }, { "epoch": 0.05, "learning_rate": 0.001998409334127463, "loss": 2.6152, "step": 221 }, { "epoch": 0.05, "learning_rate": 0.001998369826724674, "loss": 2.5586, "step": 222 }, { "epoch": 0.05, "learning_rate": 0.0019983298350987654, "loss": 2.5254, "step": 223 }, { "epoch": 0.05, "learning_rate": 0.001998289359269135, "loss": 2.5547, "step": 224 }, { "epoch": 0.05, "learning_rate": 0.0019982483992554137, "loss": 2.5938, "step": 225 }, { "epoch": 0.05, "learning_rate": 0.001998206955077467, "loss": 2.623, "step": 226 }, { "epoch": 0.05, "learning_rate": 0.001998165026755396, "loss": 2.4648, "step": 227 }, { "epoch": 0.05, "learning_rate": 0.001998122614309538, "loss": 2.459, "step": 228 }, { "epoch": 0.05, "learning_rate": 0.001998079717760462, "loss": 2.6074, "step": 229 }, { "epoch": 0.05, "learning_rate": 0.001998036337128974, "loss": 2.5371, "step": 230 }, { "epoch": 0.05, "learning_rate": 0.0019979924724361138, "loss": 2.7422, "step": 231 }, { "epoch": 0.05, "learning_rate": 0.0019979481237031574, "loss": 2.4492, "step": 232 }, { "epoch": 0.05, "learning_rate": 0.001997903290951613, "loss": 2.6055, "step": 233 }, { "epoch": 0.05, "learning_rate": 0.0019978579742032264, "loss": 2.543, "step": 234 }, { "epoch": 0.05, "learning_rate": 0.0019978121734799768, "loss": 2.5645, "step": 235 }, { "epoch": 0.05, "learning_rate": 0.001997765888804077, "loss": 2.4688, "step": 236 }, { "epoch": 0.05, "learning_rate": 0.0019977191201979772, "loss": 2.6055, "step": 237 }, { "epoch": 0.05, "learning_rate": 0.00199767186768436, "loss": 2.5469, "step": 238 }, { "epoch": 0.05, "learning_rate": 0.0019976241312861438, "loss": 2.6895, "step": 239 }, { "epoch": 0.05, "learning_rate": 0.001997575911026481, "loss": 2.3398, "step": 240 }, { "epoch": 0.05, "learning_rate": 0.0019975272069287595, "loss": 2.5664, "step": 241 }, { "epoch": 0.05, "learning_rate": 0.001997478019016601, "loss": 2.3535, "step": 242 }, { "epoch": 0.05, "learning_rate": 0.001997428347313863, "loss": 2.4883, "step": 243 }, { "epoch": 0.05, "learning_rate": 0.0019973781918446363, "loss": 2.4199, "step": 244 }, { "epoch": 0.05, "learning_rate": 0.0019973275526332475, "loss": 2.5195, "step": 245 }, { "epoch": 0.05, "learning_rate": 0.0019972764297042566, "loss": 2.6016, "step": 246 }, { "epoch": 0.05, "learning_rate": 0.00199722482308246, "loss": 2.4316, "step": 247 }, { "epoch": 0.05, "learning_rate": 0.001997172732792887, "loss": 2.5801, "step": 248 }, { "epoch": 0.05, "learning_rate": 0.0019971201588608015, "loss": 2.5938, "step": 249 }, { "epoch": 0.05, "learning_rate": 0.0019970671013117038, "loss": 2.5645, "step": 250 }, { "epoch": 0.05, "learning_rate": 0.001997013560171327, "loss": 2.5605, "step": 251 }, { "epoch": 0.05, "learning_rate": 0.001996959535465639, "loss": 2.4668, "step": 252 }, { "epoch": 0.05, "learning_rate": 0.001996905027220843, "loss": 2.5254, "step": 253 }, { "epoch": 0.05, "learning_rate": 0.0019968500354633763, "loss": 2.5938, "step": 254 }, { "epoch": 0.05, "learning_rate": 0.0019967945602199096, "loss": 2.4492, "step": 255 }, { "epoch": 0.06, "learning_rate": 0.0019967386015173503, "loss": 2.4727, "step": 256 }, { "epoch": 0.06, "learning_rate": 0.0019966821593828394, "loss": 2.4727, "step": 257 }, { "epoch": 0.06, "learning_rate": 0.001996625233843751, "loss": 2.5742, "step": 258 }, { "epoch": 0.06, "learning_rate": 0.001996567824927695, "loss": 2.4453, "step": 259 }, { "epoch": 0.06, "learning_rate": 0.0019965099326625163, "loss": 2.5762, "step": 260 }, { "epoch": 0.06, "learning_rate": 0.001996451557076293, "loss": 2.6074, "step": 261 }, { "epoch": 0.06, "learning_rate": 0.0019963926981973372, "loss": 2.3574, "step": 262 }, { "epoch": 0.06, "learning_rate": 0.0019963333560541977, "loss": 2.4785, "step": 263 }, { "epoch": 0.06, "learning_rate": 0.0019962735306756556, "loss": 2.459, "step": 264 }, { "epoch": 0.06, "learning_rate": 0.0019962132220907266, "loss": 2.4121, "step": 265 }, { "epoch": 0.06, "learning_rate": 0.0019961524303286626, "loss": 2.6016, "step": 266 }, { "epoch": 0.06, "learning_rate": 0.0019960911554189466, "loss": 2.5781, "step": 267 }, { "epoch": 0.06, "learning_rate": 0.001996029397391299, "loss": 2.4668, "step": 268 }, { "epoch": 0.06, "learning_rate": 0.0019959671562756734, "loss": 2.5742, "step": 269 }, { "epoch": 0.06, "learning_rate": 0.0019959044321022565, "loss": 2.6309, "step": 270 }, { "epoch": 0.06, "learning_rate": 0.001995841224901471, "loss": 2.6367, "step": 271 }, { "epoch": 0.06, "learning_rate": 0.001995777534703974, "loss": 2.4004, "step": 272 }, { "epoch": 0.06, "learning_rate": 0.0019957133615406547, "loss": 2.4375, "step": 273 }, { "epoch": 0.06, "learning_rate": 0.0019956487054426392, "loss": 2.5293, "step": 274 }, { "epoch": 0.06, "learning_rate": 0.0019955835664412862, "loss": 2.5469, "step": 275 }, { "epoch": 0.06, "learning_rate": 0.0019955179445681885, "loss": 2.5176, "step": 276 }, { "epoch": 0.06, "learning_rate": 0.001995451839855174, "loss": 2.543, "step": 277 }, { "epoch": 0.06, "learning_rate": 0.0019953852523343046, "loss": 2.5273, "step": 278 }, { "epoch": 0.06, "learning_rate": 0.0019953181820378764, "loss": 2.5508, "step": 279 }, { "epoch": 0.06, "learning_rate": 0.0019952506289984185, "loss": 2.5254, "step": 280 }, { "epoch": 0.06, "learning_rate": 0.0019951825932486957, "loss": 2.5273, "step": 281 }, { "epoch": 0.06, "learning_rate": 0.001995114074821706, "loss": 2.5742, "step": 282 }, { "epoch": 0.06, "learning_rate": 0.001995045073750682, "loss": 2.3828, "step": 283 }, { "epoch": 0.06, "learning_rate": 0.0019949755900690903, "loss": 2.5879, "step": 284 }, { "epoch": 0.06, "learning_rate": 0.001994905623810631, "loss": 2.4844, "step": 285 }, { "epoch": 0.06, "learning_rate": 0.0019948351750092392, "loss": 2.457, "step": 286 }, { "epoch": 0.06, "learning_rate": 0.001994764243699083, "loss": 2.6211, "step": 287 }, { "epoch": 0.06, "learning_rate": 0.0019946928299145656, "loss": 2.4062, "step": 288 }, { "epoch": 0.06, "learning_rate": 0.0019946209336903233, "loss": 2.3887, "step": 289 }, { "epoch": 0.06, "learning_rate": 0.0019945485550612266, "loss": 2.4199, "step": 290 }, { "epoch": 0.06, "learning_rate": 0.001994475694062381, "loss": 2.5254, "step": 291 }, { "epoch": 0.06, "learning_rate": 0.001994402350729124, "loss": 2.4961, "step": 292 }, { "epoch": 0.06, "learning_rate": 0.001994328525097029, "loss": 2.6113, "step": 293 }, { "epoch": 0.06, "learning_rate": 0.0019942542172019014, "loss": 2.584, "step": 294 }, { "epoch": 0.06, "learning_rate": 0.0019941794270797826, "loss": 2.4609, "step": 295 }, { "epoch": 0.06, "learning_rate": 0.0019941041547669467, "loss": 2.5195, "step": 296 }, { "epoch": 0.06, "learning_rate": 0.001994028400299901, "loss": 2.5996, "step": 297 }, { "epoch": 0.06, "learning_rate": 0.0019939521637153883, "loss": 2.4922, "step": 298 }, { "epoch": 0.06, "learning_rate": 0.001993875445050384, "loss": 2.5977, "step": 299 }, { "epoch": 0.06, "learning_rate": 0.001993798244342098, "loss": 2.4922, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.0019937205616279738, "loss": 2.6113, "step": 301 }, { "epoch": 0.06, "learning_rate": 0.001993642396945688, "loss": 2.4434, "step": 302 }, { "epoch": 0.07, "learning_rate": 0.001993563750333152, "loss": 2.6289, "step": 303 }, { "epoch": 0.07, "learning_rate": 0.00199348462182851, "loss": 2.6465, "step": 304 }, { "epoch": 0.07, "learning_rate": 0.001993405011470141, "loss": 2.5977, "step": 305 }, { "epoch": 0.07, "learning_rate": 0.001993324919296657, "loss": 2.5352, "step": 306 }, { "epoch": 0.07, "learning_rate": 0.0019932443453469033, "loss": 2.6445, "step": 307 }, { "epoch": 0.07, "learning_rate": 0.00199316328965996, "loss": 2.4805, "step": 308 }, { "epoch": 0.07, "learning_rate": 0.00199308175227514, "loss": 2.4453, "step": 309 }, { "epoch": 0.07, "learning_rate": 0.0019929997332319904, "loss": 2.6445, "step": 310 }, { "epoch": 0.07, "learning_rate": 0.001992917232570291, "loss": 2.3691, "step": 311 }, { "epoch": 0.07, "learning_rate": 0.0019928342503300555, "loss": 2.5986, "step": 312 }, { "epoch": 0.07, "learning_rate": 0.0019927507865515324, "loss": 2.5742, "step": 313 }, { "epoch": 0.07, "learning_rate": 0.0019926668412752024, "loss": 2.5098, "step": 314 }, { "epoch": 0.07, "learning_rate": 0.0019925824145417795, "loss": 2.4688, "step": 315 }, { "epoch": 0.07, "learning_rate": 0.0019924975063922126, "loss": 2.457, "step": 316 }, { "epoch": 0.07, "learning_rate": 0.0019924121168676833, "loss": 2.5957, "step": 317 }, { "epoch": 0.07, "learning_rate": 0.0019923262460096063, "loss": 2.5176, "step": 318 }, { "epoch": 0.07, "learning_rate": 0.00199223989385963, "loss": 2.3809, "step": 319 }, { "epoch": 0.07, "learning_rate": 0.001992153060459637, "loss": 2.4844, "step": 320 }, { "epoch": 0.07, "learning_rate": 0.0019920657458517422, "loss": 2.4824, "step": 321 }, { "epoch": 0.07, "learning_rate": 0.0019919779500782945, "loss": 2.4297, "step": 322 }, { "epoch": 0.07, "learning_rate": 0.0019918896731818767, "loss": 2.5215, "step": 323 }, { "epoch": 0.07, "learning_rate": 0.001991800915205303, "loss": 2.4219, "step": 324 }, { "epoch": 0.07, "learning_rate": 0.0019917116761916233, "loss": 2.4766, "step": 325 }, { "epoch": 0.07, "learning_rate": 0.0019916219561841196, "loss": 2.3184, "step": 326 }, { "epoch": 0.07, "learning_rate": 0.001991531755226307, "loss": 2.4805, "step": 327 }, { "epoch": 0.07, "learning_rate": 0.001991441073361934, "loss": 2.418, "step": 328 }, { "epoch": 0.07, "learning_rate": 0.0019913499106349835, "loss": 2.5098, "step": 329 }, { "epoch": 0.07, "learning_rate": 0.00199125826708967, "loss": 2.4199, "step": 330 }, { "epoch": 0.07, "learning_rate": 0.001991166142770442, "loss": 2.4277, "step": 331 }, { "epoch": 0.07, "learning_rate": 0.001991073537721981, "loss": 2.3965, "step": 332 }, { "epoch": 0.07, "learning_rate": 0.0019909804519892015, "loss": 2.5332, "step": 333 }, { "epoch": 0.07, "learning_rate": 0.0019908868856172517, "loss": 2.5332, "step": 334 }, { "epoch": 0.07, "learning_rate": 0.0019907928386515127, "loss": 2.5391, "step": 335 }, { "epoch": 0.07, "learning_rate": 0.0019906983111375983, "loss": 2.6035, "step": 336 }, { "epoch": 0.07, "learning_rate": 0.001990603303121356, "loss": 2.5312, "step": 337 }, { "epoch": 0.07, "learning_rate": 0.001990507814648865, "loss": 2.4805, "step": 338 }, { "epoch": 0.07, "learning_rate": 0.00199041184576644, "loss": 2.4336, "step": 339 }, { "epoch": 0.07, "learning_rate": 0.0019903153965206265, "loss": 2.5703, "step": 340 }, { "epoch": 0.07, "learning_rate": 0.001990218466958203, "loss": 2.5137, "step": 341 }, { "epoch": 0.07, "learning_rate": 0.001990121057126183, "loss": 2.5, "step": 342 }, { "epoch": 0.07, "learning_rate": 0.001990023167071811, "loss": 2.4473, "step": 343 }, { "epoch": 0.07, "learning_rate": 0.0019899247968425647, "loss": 2.5684, "step": 344 }, { "epoch": 0.07, "learning_rate": 0.0019898259464861557, "loss": 2.5039, "step": 345 }, { "epoch": 0.07, "learning_rate": 0.0019897266160505273, "loss": 2.3535, "step": 346 }, { "epoch": 0.07, "learning_rate": 0.001989626805583856, "loss": 2.4766, "step": 347 }, { "epoch": 0.07, "learning_rate": 0.0019895265151345515, "loss": 2.416, "step": 348 }, { "epoch": 0.08, "learning_rate": 0.0019894257447512563, "loss": 2.6348, "step": 349 }, { "epoch": 0.08, "learning_rate": 0.0019893244944828446, "loss": 2.5898, "step": 350 }, { "epoch": 0.08, "learning_rate": 0.0019892227643784257, "loss": 2.3496, "step": 351 }, { "epoch": 0.08, "learning_rate": 0.0019891205544873385, "loss": 2.2715, "step": 352 }, { "epoch": 0.08, "learning_rate": 0.001989017864859157, "loss": 2.3633, "step": 353 }, { "epoch": 0.08, "learning_rate": 0.001988914695543687, "loss": 2.3984, "step": 354 }, { "epoch": 0.08, "learning_rate": 0.0019888110465909664, "loss": 2.4492, "step": 355 }, { "epoch": 0.08, "learning_rate": 0.0019887069180512676, "loss": 2.4609, "step": 356 }, { "epoch": 0.08, "learning_rate": 0.0019886023099750932, "loss": 2.4355, "step": 357 }, { "epoch": 0.08, "learning_rate": 0.0019884972224131804, "loss": 2.5234, "step": 358 }, { "epoch": 0.08, "learning_rate": 0.001988391655416498, "loss": 2.2861, "step": 359 }, { "epoch": 0.08, "learning_rate": 0.001988285609036247, "loss": 2.4336, "step": 360 }, { "epoch": 0.08, "learning_rate": 0.0019881790833238613, "loss": 2.4062, "step": 361 }, { "epoch": 0.08, "learning_rate": 0.0019880720783310084, "loss": 2.4395, "step": 362 }, { "epoch": 0.08, "learning_rate": 0.001987964594109586, "loss": 2.3906, "step": 363 }, { "epoch": 0.08, "learning_rate": 0.0019878566307117256, "loss": 2.4883, "step": 364 }, { "epoch": 0.08, "learning_rate": 0.0019877481881897916, "loss": 2.4277, "step": 365 }, { "epoch": 0.08, "learning_rate": 0.0019876392665963793, "loss": 2.3691, "step": 366 }, { "epoch": 0.08, "learning_rate": 0.0019875298659843177, "loss": 2.3516, "step": 367 }, { "epoch": 0.08, "learning_rate": 0.0019874199864066675, "loss": 2.5273, "step": 368 }, { "epoch": 0.08, "learning_rate": 0.001987309627916722, "loss": 2.5156, "step": 369 }, { "epoch": 0.08, "learning_rate": 0.0019871987905680064, "loss": 2.6074, "step": 370 }, { "epoch": 0.08, "learning_rate": 0.001987087474414278, "loss": 2.418, "step": 371 }, { "epoch": 0.08, "learning_rate": 0.001986975679509527, "loss": 2.3066, "step": 372 }, { "epoch": 0.08, "learning_rate": 0.001986863405907976, "loss": 2.4648, "step": 373 }, { "epoch": 0.08, "learning_rate": 0.001986750653664078, "loss": 2.5234, "step": 374 }, { "epoch": 0.08, "learning_rate": 0.0019866374228325206, "loss": 2.418, "step": 375 }, { "epoch": 0.08, "learning_rate": 0.0019865237134682216, "loss": 2.3535, "step": 376 }, { "epoch": 0.08, "learning_rate": 0.0019864095256263317, "loss": 2.5586, "step": 377 }, { "epoch": 0.08, "learning_rate": 0.0019862948593622337, "loss": 2.3809, "step": 378 }, { "epoch": 0.08, "learning_rate": 0.0019861797147315424, "loss": 2.4141, "step": 379 }, { "epoch": 0.08, "learning_rate": 0.0019860640917901047, "loss": 2.4961, "step": 380 }, { "epoch": 0.08, "learning_rate": 0.001985947990593999, "loss": 2.5234, "step": 381 }, { "epoch": 0.08, "learning_rate": 0.0019858314111995353, "loss": 2.5332, "step": 382 }, { "epoch": 0.08, "learning_rate": 0.0019857143536632575, "loss": 2.4395, "step": 383 }, { "epoch": 0.08, "learning_rate": 0.00198559681804194, "loss": 2.4141, "step": 384 }, { "epoch": 0.08, "learning_rate": 0.0019854788043925883, "loss": 2.375, "step": 385 }, { "epoch": 0.08, "learning_rate": 0.001985360312772441, "loss": 2.459, "step": 386 }, { "epoch": 0.08, "learning_rate": 0.0019852413432389685, "loss": 2.4492, "step": 387 }, { "epoch": 0.08, "learning_rate": 0.0019851218958498724, "loss": 2.3477, "step": 388 }, { "epoch": 0.08, "learning_rate": 0.0019850019706630865, "loss": 2.5117, "step": 389 }, { "epoch": 0.08, "learning_rate": 0.0019848815677367755, "loss": 2.4121, "step": 390 }, { "epoch": 0.08, "learning_rate": 0.0019847606871293377, "loss": 2.5059, "step": 391 }, { "epoch": 0.08, "learning_rate": 0.0019846393288994006, "loss": 2.5, "step": 392 }, { "epoch": 0.08, "learning_rate": 0.0019845174931058256, "loss": 2.5117, "step": 393 }, { "epoch": 0.08, "learning_rate": 0.0019843951798077042, "loss": 2.3652, "step": 394 }, { "epoch": 0.08, "learning_rate": 0.00198427238906436, "loss": 2.2344, "step": 395 }, { "epoch": 0.09, "learning_rate": 0.0019841491209353487, "loss": 2.4316, "step": 396 }, { "epoch": 0.09, "learning_rate": 0.0019840253754804566, "loss": 2.46, "step": 397 }, { "epoch": 0.09, "learning_rate": 0.0019839011527597017, "loss": 2.377, "step": 398 }, { "epoch": 0.09, "learning_rate": 0.0019837764528333342, "loss": 2.418, "step": 399 }, { "epoch": 0.09, "learning_rate": 0.0019836512757618356, "loss": 2.4004, "step": 400 }, { "epoch": 0.09, "learning_rate": 0.0019835256216059177, "loss": 2.4062, "step": 401 }, { "epoch": 0.09, "learning_rate": 0.0019833994904265248, "loss": 2.6426, "step": 402 }, { "epoch": 0.09, "learning_rate": 0.0019832728822848328, "loss": 2.6191, "step": 403 }, { "epoch": 0.09, "learning_rate": 0.0019831457972422474, "loss": 2.4062, "step": 404 }, { "epoch": 0.09, "learning_rate": 0.001983018235360407, "loss": 2.4883, "step": 405 }, { "epoch": 0.09, "learning_rate": 0.0019828901967011816, "loss": 2.4648, "step": 406 }, { "epoch": 0.09, "learning_rate": 0.001982761681326671, "loss": 2.418, "step": 407 }, { "epoch": 0.09, "learning_rate": 0.0019826326892992065, "loss": 2.4277, "step": 408 }, { "epoch": 0.09, "learning_rate": 0.0019825032206813517, "loss": 2.4844, "step": 409 }, { "epoch": 0.09, "learning_rate": 0.001982373275535901, "loss": 2.2812, "step": 410 }, { "epoch": 0.09, "learning_rate": 0.0019822428539258787, "loss": 2.5137, "step": 411 }, { "epoch": 0.09, "learning_rate": 0.0019821119559145415, "loss": 2.4199, "step": 412 }, { "epoch": 0.09, "learning_rate": 0.0019819805815653765, "loss": 2.2734, "step": 413 }, { "epoch": 0.09, "learning_rate": 0.001981848730942103, "loss": 2.4922, "step": 414 }, { "epoch": 0.09, "learning_rate": 0.001981716404108669, "loss": 2.5879, "step": 415 }, { "epoch": 0.09, "learning_rate": 0.001981583601129255, "loss": 2.4648, "step": 416 }, { "epoch": 0.09, "learning_rate": 0.0019814503220682736, "loss": 2.502, "step": 417 }, { "epoch": 0.09, "learning_rate": 0.001981316566990366, "loss": 2.3672, "step": 418 }, { "epoch": 0.09, "learning_rate": 0.0019811823359604055, "loss": 2.4434, "step": 419 }, { "epoch": 0.09, "learning_rate": 0.0019810476290434953, "loss": 2.3516, "step": 420 }, { "epoch": 0.09, "learning_rate": 0.0019809124463049713, "loss": 2.4805, "step": 421 }, { "epoch": 0.09, "learning_rate": 0.001980776787810398, "loss": 2.29, "step": 422 }, { "epoch": 0.09, "learning_rate": 0.001980640653625572, "loss": 2.3965, "step": 423 }, { "epoch": 0.09, "learning_rate": 0.0019805040438165204, "loss": 2.5508, "step": 424 }, { "epoch": 0.09, "learning_rate": 0.0019803669584495007, "loss": 2.415, "step": 425 }, { "epoch": 0.09, "learning_rate": 0.0019802293975910014, "loss": 2.5254, "step": 426 }, { "epoch": 0.09, "learning_rate": 0.0019800913613077413, "loss": 2.4805, "step": 427 }, { "epoch": 0.09, "learning_rate": 0.0019799528496666696, "loss": 2.4668, "step": 428 }, { "epoch": 0.09, "learning_rate": 0.0019798138627349663, "loss": 2.2988, "step": 429 }, { "epoch": 0.09, "learning_rate": 0.0019796744005800425, "loss": 2.4512, "step": 430 }, { "epoch": 0.09, "learning_rate": 0.0019795344632695393, "loss": 2.4375, "step": 431 }, { "epoch": 0.09, "learning_rate": 0.0019793940508713273, "loss": 2.584, "step": 432 }, { "epoch": 0.09, "learning_rate": 0.0019792531634535097, "loss": 2.3438, "step": 433 }, { "epoch": 0.09, "learning_rate": 0.001979111801084418, "loss": 2.4863, "step": 434 }, { "epoch": 0.09, "learning_rate": 0.001978969963832615, "loss": 2.2344, "step": 435 }, { "epoch": 0.09, "learning_rate": 0.001978827651766894, "loss": 2.4434, "step": 436 }, { "epoch": 0.09, "learning_rate": 0.001978684864956278, "loss": 2.4453, "step": 437 }, { "epoch": 0.09, "learning_rate": 0.001978541603470021, "loss": 2.4531, "step": 438 }, { "epoch": 0.09, "learning_rate": 0.0019783978673776066, "loss": 2.4102, "step": 439 }, { "epoch": 0.09, "learning_rate": 0.0019782536567487484, "loss": 2.4023, "step": 440 }, { "epoch": 0.09, "learning_rate": 0.0019781089716533907, "loss": 2.4062, "step": 441 }, { "epoch": 0.1, "learning_rate": 0.0019779638121617082, "loss": 2.4131, "step": 442 }, { "epoch": 0.1, "learning_rate": 0.001977818178344105, "loss": 2.4912, "step": 443 }, { "epoch": 0.1, "learning_rate": 0.0019776720702712153, "loss": 2.3301, "step": 444 }, { "epoch": 0.1, "learning_rate": 0.0019775254880139035, "loss": 2.4961, "step": 445 }, { "epoch": 0.1, "learning_rate": 0.0019773784316432647, "loss": 2.2988, "step": 446 }, { "epoch": 0.1, "learning_rate": 0.0019772309012306226, "loss": 2.3926, "step": 447 }, { "epoch": 0.1, "learning_rate": 0.0019770828968475314, "loss": 2.3496, "step": 448 }, { "epoch": 0.1, "learning_rate": 0.001976934418565776, "loss": 2.3828, "step": 449 }, { "epoch": 0.1, "learning_rate": 0.0019767854664573693, "loss": 2.4951, "step": 450 }, { "epoch": 0.1, "learning_rate": 0.0019766360405945563, "loss": 2.5176, "step": 451 }, { "epoch": 0.1, "learning_rate": 0.00197648614104981, "loss": 2.4414, "step": 452 }, { "epoch": 0.1, "learning_rate": 0.001976335767895834, "loss": 2.2861, "step": 453 }, { "epoch": 0.1, "learning_rate": 0.0019761849212055604, "loss": 2.374, "step": 454 }, { "epoch": 0.1, "learning_rate": 0.0019760336010521532, "loss": 2.2363, "step": 455 }, { "epoch": 0.1, "learning_rate": 0.001975881807509004, "loss": 2.3027, "step": 456 }, { "epoch": 0.1, "learning_rate": 0.0019757295406497353, "loss": 2.4043, "step": 457 }, { "epoch": 0.1, "learning_rate": 0.001975576800548198, "loss": 2.418, "step": 458 }, { "epoch": 0.1, "learning_rate": 0.001975423587278474, "loss": 2.4385, "step": 459 }, { "epoch": 0.1, "learning_rate": 0.001975269900914873, "loss": 2.3213, "step": 460 }, { "epoch": 0.1, "learning_rate": 0.001975115741531935, "loss": 2.4629, "step": 461 }, { "epoch": 0.1, "learning_rate": 0.0019749611092044306, "loss": 2.3027, "step": 462 }, { "epoch": 0.1, "learning_rate": 0.0019748060040073573, "loss": 2.4375, "step": 463 }, { "epoch": 0.1, "learning_rate": 0.0019746504260159435, "loss": 2.4102, "step": 464 }, { "epoch": 0.1, "learning_rate": 0.001974494375305647, "loss": 2.3516, "step": 465 }, { "epoch": 0.1, "learning_rate": 0.0019743378519521543, "loss": 2.3496, "step": 466 }, { "epoch": 0.1, "learning_rate": 0.001974180856031382, "loss": 2.4414, "step": 467 }, { "epoch": 0.1, "learning_rate": 0.001974023387619475, "loss": 2.4238, "step": 468 }, { "epoch": 0.1, "learning_rate": 0.0019738654467928063, "loss": 2.5, "step": 469 }, { "epoch": 0.1, "learning_rate": 0.0019737070336279814, "loss": 2.2861, "step": 470 }, { "epoch": 0.1, "learning_rate": 0.0019735481482018318, "loss": 2.4492, "step": 471 }, { "epoch": 0.1, "learning_rate": 0.001973388790591419, "loss": 2.2988, "step": 472 }, { "epoch": 0.1, "learning_rate": 0.0019732289608740345, "loss": 2.4277, "step": 473 }, { "epoch": 0.1, "learning_rate": 0.001973068659127197, "loss": 2.498, "step": 474 }, { "epoch": 0.1, "learning_rate": 0.0019729078854286556, "loss": 2.4941, "step": 475 }, { "epoch": 0.1, "learning_rate": 0.0019727466398563875, "loss": 2.3594, "step": 476 }, { "epoch": 0.1, "learning_rate": 0.001972584922488599, "loss": 2.3809, "step": 477 }, { "epoch": 0.1, "learning_rate": 0.0019724227334037255, "loss": 2.373, "step": 478 }, { "epoch": 0.1, "learning_rate": 0.0019722600726804307, "loss": 2.4902, "step": 479 }, { "epoch": 0.1, "learning_rate": 0.0019720969403976074, "loss": 2.3467, "step": 480 }, { "epoch": 0.1, "learning_rate": 0.001971933336634377, "loss": 2.5098, "step": 481 }, { "epoch": 0.1, "learning_rate": 0.00197176926147009, "loss": 2.3848, "step": 482 }, { "epoch": 0.1, "learning_rate": 0.001971604714984324, "loss": 2.5312, "step": 483 }, { "epoch": 0.1, "learning_rate": 0.001971439697256887, "loss": 2.3438, "step": 484 }, { "epoch": 0.1, "learning_rate": 0.001971274208367815, "loss": 2.3164, "step": 485 }, { "epoch": 0.1, "learning_rate": 0.0019711082483973726, "loss": 2.3926, "step": 486 }, { "epoch": 0.1, "learning_rate": 0.001970941817426052, "loss": 2.4551, "step": 487 }, { "epoch": 0.1, "learning_rate": 0.001970774915534575, "loss": 2.4424, "step": 488 }, { "epoch": 0.11, "learning_rate": 0.001970607542803891, "loss": 2.4473, "step": 489 }, { "epoch": 0.11, "learning_rate": 0.0019704396993151787, "loss": 2.4141, "step": 490 }, { "epoch": 0.11, "learning_rate": 0.0019702713851498434, "loss": 2.541, "step": 491 }, { "epoch": 0.11, "learning_rate": 0.0019701026003895207, "loss": 2.457, "step": 492 }, { "epoch": 0.11, "learning_rate": 0.0019699333451160734, "loss": 2.3047, "step": 493 }, { "epoch": 0.11, "learning_rate": 0.001969763619411592, "loss": 2.3486, "step": 494 }, { "epoch": 0.11, "learning_rate": 0.001969593423358397, "loss": 2.5117, "step": 495 }, { "epoch": 0.11, "learning_rate": 0.001969422757039035, "loss": 2.3535, "step": 496 }, { "epoch": 0.11, "learning_rate": 0.001969251620536281, "loss": 2.4824, "step": 497 }, { "epoch": 0.11, "learning_rate": 0.00196908001393314, "loss": 2.4258, "step": 498 }, { "epoch": 0.11, "learning_rate": 0.001968907937312842, "loss": 2.3359, "step": 499 }, { "epoch": 0.11, "learning_rate": 0.001968735390758848, "loss": 2.5312, "step": 500 }, { "epoch": 0.11, "learning_rate": 0.001968562374354845, "loss": 2.4766, "step": 501 }, { "epoch": 0.11, "learning_rate": 0.0019683888881847473, "loss": 2.3008, "step": 502 }, { "epoch": 0.11, "learning_rate": 0.001968214932332699, "loss": 2.2949, "step": 503 }, { "epoch": 0.11, "learning_rate": 0.0019680405068830714, "loss": 2.3271, "step": 504 }, { "epoch": 0.11, "learning_rate": 0.001967865611920463, "loss": 2.249, "step": 505 }, { "epoch": 0.11, "learning_rate": 0.0019676902475296997, "loss": 2.2363, "step": 506 }, { "epoch": 0.11, "learning_rate": 0.0019675144137958358, "loss": 2.4961, "step": 507 }, { "epoch": 0.11, "learning_rate": 0.0019673381108041535, "loss": 2.3145, "step": 508 }, { "epoch": 0.11, "learning_rate": 0.001967161338640162, "loss": 2.3301, "step": 509 }, { "epoch": 0.11, "learning_rate": 0.0019669840973895987, "loss": 2.3008, "step": 510 }, { "epoch": 0.11, "learning_rate": 0.001966806387138427, "loss": 2.4453, "step": 511 }, { "epoch": 0.11, "learning_rate": 0.0019666282079728397, "loss": 2.3574, "step": 512 }, { "epoch": 0.11, "learning_rate": 0.001966449559979256, "loss": 2.4297, "step": 513 }, { "epoch": 0.11, "learning_rate": 0.001966270443244322, "loss": 2.3828, "step": 514 }, { "epoch": 0.11, "learning_rate": 0.0019660908578549123, "loss": 2.4277, "step": 515 }, { "epoch": 0.11, "learning_rate": 0.0019659108038981286, "loss": 2.4082, "step": 516 }, { "epoch": 0.11, "learning_rate": 0.001965730281461299, "loss": 2.3594, "step": 517 }, { "epoch": 0.11, "learning_rate": 0.0019655492906319794, "loss": 2.4473, "step": 518 }, { "epoch": 0.11, "learning_rate": 0.0019653678314979534, "loss": 2.2402, "step": 519 }, { "epoch": 0.11, "learning_rate": 0.001965185904147231, "loss": 2.4688, "step": 520 }, { "epoch": 0.11, "learning_rate": 0.001965003508668049, "loss": 2.4082, "step": 521 }, { "epoch": 0.11, "learning_rate": 0.0019648206451488718, "loss": 2.2334, "step": 522 }, { "epoch": 0.11, "learning_rate": 0.001964637313678391, "loss": 2.4297, "step": 523 }, { "epoch": 0.11, "learning_rate": 0.0019644535143455246, "loss": 2.4707, "step": 524 }, { "epoch": 0.11, "learning_rate": 0.0019642692472394184, "loss": 2.2227, "step": 525 }, { "epoch": 0.11, "learning_rate": 0.001964084512449444, "loss": 2.3496, "step": 526 }, { "epoch": 0.11, "learning_rate": 0.0019638993100652004, "loss": 2.3223, "step": 527 }, { "epoch": 0.11, "learning_rate": 0.001963713640176513, "loss": 2.3613, "step": 528 }, { "epoch": 0.11, "learning_rate": 0.001963527502873435, "loss": 2.4609, "step": 529 }, { "epoch": 0.11, "learning_rate": 0.0019633408982462452, "loss": 2.25, "step": 530 }, { "epoch": 0.11, "learning_rate": 0.0019631538263854493, "loss": 2.5234, "step": 531 }, { "epoch": 0.11, "learning_rate": 0.0019629662873817794, "loss": 2.375, "step": 532 }, { "epoch": 0.11, "learning_rate": 0.001962778281326195, "loss": 2.3809, "step": 533 }, { "epoch": 0.11, "learning_rate": 0.0019625898083098815, "loss": 2.4482, "step": 534 }, { "epoch": 0.12, "learning_rate": 0.0019624008684242513, "loss": 2.4961, "step": 535 }, { "epoch": 0.12, "learning_rate": 0.001962211461760942, "loss": 2.3379, "step": 536 }, { "epoch": 0.12, "learning_rate": 0.0019620215884118193, "loss": 2.4102, "step": 537 }, { "epoch": 0.12, "learning_rate": 0.0019618312484689735, "loss": 2.4043, "step": 538 }, { "epoch": 0.12, "learning_rate": 0.001961640442024723, "loss": 2.3086, "step": 539 }, { "epoch": 0.12, "learning_rate": 0.001961449169171611, "loss": 2.4219, "step": 540 }, { "epoch": 0.12, "learning_rate": 0.001961257430002408, "loss": 2.3525, "step": 541 }, { "epoch": 0.12, "learning_rate": 0.001961065224610109, "loss": 2.4297, "step": 542 }, { "epoch": 0.12, "learning_rate": 0.0019608725530879377, "loss": 2.3086, "step": 543 }, { "epoch": 0.12, "learning_rate": 0.001960679415529341, "loss": 2.377, "step": 544 }, { "epoch": 0.12, "learning_rate": 0.001960485812027995, "loss": 2.25, "step": 545 }, { "epoch": 0.12, "learning_rate": 0.001960291742677798, "loss": 2.2559, "step": 546 }, { "epoch": 0.12, "learning_rate": 0.001960097207572878, "loss": 2.3867, "step": 547 }, { "epoch": 0.12, "learning_rate": 0.0019599022068075866, "loss": 2.3369, "step": 548 }, { "epoch": 0.12, "learning_rate": 0.001959706740476502, "loss": 2.4609, "step": 549 }, { "epoch": 0.12, "learning_rate": 0.0019595108086744276, "loss": 2.4219, "step": 550 }, { "epoch": 0.12, "learning_rate": 0.0019593144114963936, "loss": 2.4062, "step": 551 }, { "epoch": 0.12, "learning_rate": 0.001959117549037655, "loss": 2.2324, "step": 552 }, { "epoch": 0.12, "learning_rate": 0.001958920221393693, "loss": 2.2217, "step": 553 }, { "epoch": 0.12, "learning_rate": 0.001958722428660214, "loss": 2.4668, "step": 554 }, { "epoch": 0.12, "learning_rate": 0.0019585241709331506, "loss": 2.4814, "step": 555 }, { "epoch": 0.12, "learning_rate": 0.00195832544830866, "loss": 2.4492, "step": 556 }, { "epoch": 0.12, "learning_rate": 0.0019581262608831256, "loss": 2.3809, "step": 557 }, { "epoch": 0.12, "learning_rate": 0.001957926608753156, "loss": 2.4375, "step": 558 }, { "epoch": 0.12, "learning_rate": 0.0019577264920155853, "loss": 2.4102, "step": 559 }, { "epoch": 0.12, "learning_rate": 0.0019575259107674735, "loss": 2.5332, "step": 560 }, { "epoch": 0.12, "learning_rate": 0.001957324865106104, "loss": 2.2227, "step": 561 }, { "epoch": 0.12, "learning_rate": 0.0019571233551289877, "loss": 2.2158, "step": 562 }, { "epoch": 0.12, "learning_rate": 0.0019569213809338595, "loss": 2.2969, "step": 563 }, { "epoch": 0.12, "learning_rate": 0.0019567189426186794, "loss": 2.3867, "step": 564 }, { "epoch": 0.12, "learning_rate": 0.001956516040281633, "loss": 2.4258, "step": 565 }, { "epoch": 0.12, "learning_rate": 0.0019563126740211306, "loss": 2.4512, "step": 566 }, { "epoch": 0.12, "learning_rate": 0.0019561088439358078, "loss": 2.3418, "step": 567 }, { "epoch": 0.12, "learning_rate": 0.001955904550124525, "loss": 2.4014, "step": 568 }, { "epoch": 0.12, "learning_rate": 0.0019556997926863675, "loss": 2.3145, "step": 569 }, { "epoch": 0.12, "learning_rate": 0.001955494571720645, "loss": 2.4902, "step": 570 }, { "epoch": 0.12, "learning_rate": 0.0019552888873268933, "loss": 2.2832, "step": 571 }, { "epoch": 0.12, "learning_rate": 0.0019550827396048716, "loss": 2.2773, "step": 572 }, { "epoch": 0.12, "learning_rate": 0.0019548761286545644, "loss": 2.3887, "step": 573 }, { "epoch": 0.12, "learning_rate": 0.0019546690545761813, "loss": 2.3262, "step": 574 }, { "epoch": 0.12, "learning_rate": 0.001954461517470156, "loss": 2.5469, "step": 575 }, { "epoch": 0.12, "learning_rate": 0.001954253517437146, "loss": 2.293, "step": 576 }, { "epoch": 0.12, "learning_rate": 0.0019540450545780354, "loss": 2.6523, "step": 577 }, { "epoch": 0.12, "learning_rate": 0.0019538361289939306, "loss": 2.4258, "step": 578 }, { "epoch": 0.12, "learning_rate": 0.0019536267407861637, "loss": 2.4131, "step": 579 }, { "epoch": 0.12, "learning_rate": 0.001953416890056291, "loss": 2.3945, "step": 580 }, { "epoch": 0.12, "learning_rate": 0.001953206576906093, "loss": 2.3672, "step": 581 }, { "epoch": 0.13, "learning_rate": 0.0019529958014375746, "loss": 2.3633, "step": 582 }, { "epoch": 0.13, "learning_rate": 0.0019527845637529644, "loss": 2.4531, "step": 583 }, { "epoch": 0.13, "learning_rate": 0.0019525728639547158, "loss": 2.4043, "step": 584 }, { "epoch": 0.13, "learning_rate": 0.0019523607021455062, "loss": 2.4785, "step": 585 }, { "epoch": 0.13, "learning_rate": 0.0019521480784282371, "loss": 2.3906, "step": 586 }, { "epoch": 0.13, "learning_rate": 0.0019519349929060334, "loss": 2.3281, "step": 587 }, { "epoch": 0.13, "learning_rate": 0.001951721445682245, "loss": 2.5391, "step": 588 }, { "epoch": 0.13, "learning_rate": 0.001951507436860445, "loss": 2.4512, "step": 589 }, { "epoch": 0.13, "learning_rate": 0.0019512929665444307, "loss": 2.5293, "step": 590 }, { "epoch": 0.13, "learning_rate": 0.0019510780348382234, "loss": 2.5352, "step": 591 }, { "epoch": 0.13, "learning_rate": 0.0019508626418460679, "loss": 2.416, "step": 592 }, { "epoch": 0.13, "learning_rate": 0.0019506467876724322, "loss": 2.2969, "step": 593 }, { "epoch": 0.13, "learning_rate": 0.001950430472422009, "loss": 2.375, "step": 594 }, { "epoch": 0.13, "learning_rate": 0.001950213696199714, "loss": 2.4707, "step": 595 }, { "epoch": 0.13, "learning_rate": 0.0019499964591106872, "loss": 2.4336, "step": 596 }, { "epoch": 0.13, "learning_rate": 0.001949778761260291, "loss": 2.2754, "step": 597 }, { "epoch": 0.13, "learning_rate": 0.001949560602754112, "loss": 2.4102, "step": 598 }, { "epoch": 0.13, "learning_rate": 0.00194934198369796, "loss": 2.2383, "step": 599 }, { "epoch": 0.13, "learning_rate": 0.0019491229041978682, "loss": 2.3994, "step": 600 }, { "epoch": 0.13, "learning_rate": 0.0019489033643600938, "loss": 2.2422, "step": 601 }, { "epoch": 0.13, "learning_rate": 0.0019486833642911154, "loss": 2.2822, "step": 602 }, { "epoch": 0.13, "learning_rate": 0.0019484629040976374, "loss": 2.3828, "step": 603 }, { "epoch": 0.13, "learning_rate": 0.0019482419838865856, "loss": 2.3887, "step": 604 }, { "epoch": 0.13, "learning_rate": 0.0019480206037651086, "loss": 2.3398, "step": 605 }, { "epoch": 0.13, "learning_rate": 0.0019477987638405801, "loss": 2.4102, "step": 606 }, { "epoch": 0.13, "learning_rate": 0.0019475764642205944, "loss": 2.4043, "step": 607 }, { "epoch": 0.13, "learning_rate": 0.0019473537050129703, "loss": 2.1895, "step": 608 }, { "epoch": 0.13, "learning_rate": 0.0019471304863257495, "loss": 2.5312, "step": 609 }, { "epoch": 0.13, "learning_rate": 0.0019469068082671953, "loss": 2.3838, "step": 610 }, { "epoch": 0.13, "learning_rate": 0.0019466826709457954, "loss": 2.3203, "step": 611 }, { "epoch": 0.13, "learning_rate": 0.0019464580744702588, "loss": 2.2236, "step": 612 }, { "epoch": 0.13, "learning_rate": 0.0019462330189495185, "loss": 2.3613, "step": 613 }, { "epoch": 0.13, "learning_rate": 0.0019460075044927293, "loss": 2.3584, "step": 614 }, { "epoch": 0.13, "learning_rate": 0.0019457815312092686, "loss": 2.4824, "step": 615 }, { "epoch": 0.13, "learning_rate": 0.001945555099208737, "loss": 2.3809, "step": 616 }, { "epoch": 0.13, "learning_rate": 0.0019453282086009565, "loss": 2.2568, "step": 617 }, { "epoch": 0.13, "learning_rate": 0.0019451008594959728, "loss": 2.3965, "step": 618 }, { "epoch": 0.13, "learning_rate": 0.0019448730520040534, "loss": 2.5898, "step": 619 }, { "epoch": 0.13, "learning_rate": 0.0019446447862356876, "loss": 2.3652, "step": 620 }, { "epoch": 0.13, "learning_rate": 0.0019444160623015873, "loss": 2.3613, "step": 621 }, { "epoch": 0.13, "learning_rate": 0.0019441868803126874, "loss": 2.4258, "step": 622 }, { "epoch": 0.13, "learning_rate": 0.0019439572403801438, "loss": 2.3145, "step": 623 }, { "epoch": 0.13, "learning_rate": 0.0019437271426153353, "loss": 2.4473, "step": 624 }, { "epoch": 0.13, "learning_rate": 0.0019434965871298624, "loss": 2.375, "step": 625 }, { "epoch": 0.13, "learning_rate": 0.0019432655740355478, "loss": 2.3691, "step": 626 }, { "epoch": 0.13, "learning_rate": 0.001943034103444436, "loss": 2.4668, "step": 627 }, { "epoch": 0.14, "learning_rate": 0.0019428021754687931, "loss": 2.4648, "step": 628 }, { "epoch": 0.14, "learning_rate": 0.0019425697902211078, "loss": 2.2363, "step": 629 }, { "epoch": 0.14, "learning_rate": 0.00194233694781409, "loss": 2.3301, "step": 630 }, { "epoch": 0.14, "learning_rate": 0.0019421036483606713, "loss": 2.3574, "step": 631 }, { "epoch": 0.14, "learning_rate": 0.0019418698919740054, "loss": 2.4434, "step": 632 }, { "epoch": 0.14, "learning_rate": 0.0019416356787674673, "loss": 2.3496, "step": 633 }, { "epoch": 0.14, "learning_rate": 0.0019414010088546535, "loss": 2.3145, "step": 634 }, { "epoch": 0.14, "learning_rate": 0.0019411658823493823, "loss": 2.3857, "step": 635 }, { "epoch": 0.14, "learning_rate": 0.0019409302993656933, "loss": 2.4082, "step": 636 }, { "epoch": 0.14, "learning_rate": 0.0019406942600178473, "loss": 2.4062, "step": 637 }, { "epoch": 0.14, "learning_rate": 0.0019404577644203268, "loss": 2.4355, "step": 638 }, { "epoch": 0.14, "learning_rate": 0.0019402208126878353, "loss": 2.4434, "step": 639 }, { "epoch": 0.14, "learning_rate": 0.0019399834049352977, "loss": 2.4014, "step": 640 }, { "epoch": 0.14, "learning_rate": 0.00193974554127786, "loss": 2.3438, "step": 641 }, { "epoch": 0.14, "learning_rate": 0.0019395072218308898, "loss": 2.3359, "step": 642 }, { "epoch": 0.14, "learning_rate": 0.0019392684467099746, "loss": 2.3809, "step": 643 }, { "epoch": 0.14, "learning_rate": 0.0019390292160309242, "loss": 2.4014, "step": 644 }, { "epoch": 0.14, "learning_rate": 0.0019387895299097684, "loss": 2.4219, "step": 645 }, { "epoch": 0.14, "learning_rate": 0.0019385493884627584, "loss": 2.3965, "step": 646 }, { "epoch": 0.14, "learning_rate": 0.001938308791806366, "loss": 2.3633, "step": 647 }, { "epoch": 0.14, "learning_rate": 0.0019380677400572842, "loss": 2.2773, "step": 648 }, { "epoch": 0.14, "learning_rate": 0.0019378262333324262, "loss": 2.3555, "step": 649 }, { "epoch": 0.14, "learning_rate": 0.0019375842717489259, "loss": 2.3711, "step": 650 }, { "epoch": 0.14, "learning_rate": 0.0019373418554241382, "loss": 2.3262, "step": 651 }, { "epoch": 0.14, "learning_rate": 0.0019370989844756385, "loss": 2.248, "step": 652 }, { "epoch": 0.14, "learning_rate": 0.0019368556590212225, "loss": 2.3047, "step": 653 }, { "epoch": 0.14, "learning_rate": 0.0019366118791789063, "loss": 2.3281, "step": 654 }, { "epoch": 0.14, "learning_rate": 0.0019363676450669263, "loss": 2.3555, "step": 655 }, { "epoch": 0.14, "learning_rate": 0.0019361229568037397, "loss": 2.2949, "step": 656 }, { "epoch": 0.14, "learning_rate": 0.0019358778145080237, "loss": 2.3457, "step": 657 }, { "epoch": 0.14, "learning_rate": 0.0019356322182986754, "loss": 2.3262, "step": 658 }, { "epoch": 0.14, "learning_rate": 0.0019353861682948126, "loss": 2.3125, "step": 659 }, { "epoch": 0.14, "learning_rate": 0.0019351396646157728, "loss": 2.3574, "step": 660 }, { "epoch": 0.14, "learning_rate": 0.0019348927073811139, "loss": 2.3711, "step": 661 }, { "epoch": 0.14, "learning_rate": 0.0019346452967106134, "loss": 2.3213, "step": 662 }, { "epoch": 0.14, "learning_rate": 0.0019343974327242686, "loss": 2.4121, "step": 663 }, { "epoch": 0.14, "learning_rate": 0.0019341491155422978, "loss": 2.3672, "step": 664 }, { "epoch": 0.14, "learning_rate": 0.0019339003452851372, "loss": 2.4092, "step": 665 }, { "epoch": 0.14, "learning_rate": 0.0019336511220734444, "loss": 2.4238, "step": 666 }, { "epoch": 0.14, "learning_rate": 0.001933401446028096, "loss": 2.4482, "step": 667 }, { "epoch": 0.14, "learning_rate": 0.0019331513172701883, "loss": 2.3398, "step": 668 }, { "epoch": 0.14, "learning_rate": 0.0019329007359210372, "loss": 2.3047, "step": 669 }, { "epoch": 0.14, "learning_rate": 0.001932649702102178, "loss": 2.3027, "step": 670 }, { "epoch": 0.14, "learning_rate": 0.0019323982159353656, "loss": 2.4355, "step": 671 }, { "epoch": 0.14, "learning_rate": 0.0019321462775425747, "loss": 2.2383, "step": 672 }, { "epoch": 0.14, "learning_rate": 0.0019318938870459984, "loss": 2.3027, "step": 673 }, { "epoch": 0.14, "learning_rate": 0.0019316410445680498, "loss": 2.3496, "step": 674 }, { "epoch": 0.15, "learning_rate": 0.0019313877502313604, "loss": 2.3164, "step": 675 }, { "epoch": 0.15, "learning_rate": 0.001931134004158782, "loss": 2.293, "step": 676 }, { "epoch": 0.15, "learning_rate": 0.001930879806473385, "loss": 2.3809, "step": 677 }, { "epoch": 0.15, "learning_rate": 0.0019306251572984588, "loss": 2.375, "step": 678 }, { "epoch": 0.15, "learning_rate": 0.001930370056757511, "loss": 2.2676, "step": 679 }, { "epoch": 0.15, "learning_rate": 0.00193011450497427, "loss": 2.3984, "step": 680 }, { "epoch": 0.15, "learning_rate": 0.0019298585020726814, "loss": 2.3477, "step": 681 }, { "epoch": 0.15, "learning_rate": 0.0019296020481769098, "loss": 2.4004, "step": 682 }, { "epoch": 0.15, "learning_rate": 0.0019293451434113393, "loss": 2.4648, "step": 683 }, { "epoch": 0.15, "learning_rate": 0.0019290877879005724, "loss": 2.375, "step": 684 }, { "epoch": 0.15, "learning_rate": 0.0019288299817694296, "loss": 2.4824, "step": 685 }, { "epoch": 0.15, "learning_rate": 0.0019285717251429507, "loss": 2.4082, "step": 686 }, { "epoch": 0.15, "learning_rate": 0.0019283130181463935, "loss": 2.2852, "step": 687 }, { "epoch": 0.15, "learning_rate": 0.001928053860905235, "loss": 2.4297, "step": 688 }, { "epoch": 0.15, "learning_rate": 0.0019277942535451693, "loss": 2.3223, "step": 689 }, { "epoch": 0.15, "learning_rate": 0.0019275341961921097, "loss": 2.4062, "step": 690 }, { "epoch": 0.15, "learning_rate": 0.0019272736889721883, "loss": 2.4805, "step": 691 }, { "epoch": 0.15, "learning_rate": 0.001927012732011754, "loss": 2.3535, "step": 692 }, { "epoch": 0.15, "learning_rate": 0.0019267513254373747, "loss": 2.3184, "step": 693 }, { "epoch": 0.15, "learning_rate": 0.0019264894693758361, "loss": 2.46, "step": 694 }, { "epoch": 0.15, "learning_rate": 0.0019262271639541421, "loss": 2.4258, "step": 695 }, { "epoch": 0.15, "learning_rate": 0.0019259644092995147, "loss": 2.293, "step": 696 }, { "epoch": 0.15, "learning_rate": 0.0019257012055393932, "loss": 2.3418, "step": 697 }, { "epoch": 0.15, "learning_rate": 0.001925437552801435, "loss": 2.3301, "step": 698 }, { "epoch": 0.15, "learning_rate": 0.0019251734512135154, "loss": 2.3467, "step": 699 }, { "epoch": 0.15, "learning_rate": 0.0019249089009037272, "loss": 2.3301, "step": 700 }, { "epoch": 0.15, "learning_rate": 0.0019246439020003813, "loss": 2.3223, "step": 701 }, { "epoch": 0.15, "learning_rate": 0.0019243784546320055, "loss": 2.3379, "step": 702 }, { "epoch": 0.15, "learning_rate": 0.0019241125589273456, "loss": 2.2695, "step": 703 }, { "epoch": 0.15, "learning_rate": 0.0019238462150153646, "loss": 2.4668, "step": 704 }, { "epoch": 0.15, "learning_rate": 0.001923579423025243, "loss": 2.3887, "step": 705 }, { "epoch": 0.15, "learning_rate": 0.0019233121830863782, "loss": 2.2354, "step": 706 }, { "epoch": 0.15, "learning_rate": 0.001923044495328386, "loss": 2.3965, "step": 707 }, { "epoch": 0.15, "learning_rate": 0.0019227763598810982, "loss": 2.4629, "step": 708 }, { "epoch": 0.15, "learning_rate": 0.0019225077768745642, "loss": 2.3818, "step": 709 }, { "epoch": 0.15, "learning_rate": 0.0019222387464390507, "loss": 2.3281, "step": 710 }, { "epoch": 0.15, "learning_rate": 0.0019219692687050408, "loss": 2.2402, "step": 711 }, { "epoch": 0.15, "learning_rate": 0.001921699343803235, "loss": 2.4385, "step": 712 }, { "epoch": 0.15, "learning_rate": 0.0019214289718645508, "loss": 2.207, "step": 713 }, { "epoch": 0.15, "learning_rate": 0.001921158153020122, "loss": 2.4512, "step": 714 }, { "epoch": 0.15, "learning_rate": 0.0019208868874013, "loss": 2.4336, "step": 715 }, { "epoch": 0.15, "learning_rate": 0.001920615175139652, "loss": 2.3154, "step": 716 }, { "epoch": 0.15, "learning_rate": 0.001920343016366962, "loss": 2.3486, "step": 717 }, { "epoch": 0.15, "learning_rate": 0.0019200704112152308, "loss": 2.293, "step": 718 }, { "epoch": 0.15, "learning_rate": 0.001919797359816676, "loss": 2.3438, "step": 719 }, { "epoch": 0.15, "learning_rate": 0.0019195238623037315, "loss": 2.2734, "step": 720 }, { "epoch": 0.16, "learning_rate": 0.0019192499188090465, "loss": 2.3398, "step": 721 }, { "epoch": 0.16, "learning_rate": 0.001918975529465488, "loss": 2.2969, "step": 722 }, { "epoch": 0.16, "learning_rate": 0.0019187006944061385, "loss": 2.4277, "step": 723 }, { "epoch": 0.16, "learning_rate": 0.0019184254137642968, "loss": 2.3145, "step": 724 }, { "epoch": 0.16, "learning_rate": 0.0019181496876734777, "loss": 2.2842, "step": 725 }, { "epoch": 0.16, "learning_rate": 0.0019178735162674121, "loss": 2.4434, "step": 726 }, { "epoch": 0.16, "learning_rate": 0.001917596899680047, "loss": 2.3369, "step": 727 }, { "epoch": 0.16, "learning_rate": 0.0019173198380455455, "loss": 2.2773, "step": 728 }, { "epoch": 0.16, "learning_rate": 0.0019170423314982862, "loss": 2.2383, "step": 729 }, { "epoch": 0.16, "learning_rate": 0.001916764380172863, "loss": 2.2979, "step": 730 }, { "epoch": 0.16, "learning_rate": 0.0019164859842040873, "loss": 2.3867, "step": 731 }, { "epoch": 0.16, "learning_rate": 0.0019162071437269837, "loss": 2.3887, "step": 732 }, { "epoch": 0.16, "learning_rate": 0.0019159278588767945, "loss": 2.2725, "step": 733 }, { "epoch": 0.16, "learning_rate": 0.0019156481297889765, "loss": 2.3213, "step": 734 }, { "epoch": 0.16, "learning_rate": 0.0019153679565992025, "loss": 2.3262, "step": 735 }, { "epoch": 0.16, "learning_rate": 0.0019150873394433595, "loss": 2.373, "step": 736 }, { "epoch": 0.16, "learning_rate": 0.0019148062784575513, "loss": 2.3984, "step": 737 }, { "epoch": 0.16, "learning_rate": 0.0019145247737780962, "loss": 2.3447, "step": 738 }, { "epoch": 0.16, "learning_rate": 0.0019142428255415277, "loss": 2.5039, "step": 739 }, { "epoch": 0.16, "learning_rate": 0.0019139604338845953, "loss": 2.3086, "step": 740 }, { "epoch": 0.16, "learning_rate": 0.001913677598944262, "loss": 2.3809, "step": 741 }, { "epoch": 0.16, "learning_rate": 0.0019133943208577068, "loss": 2.248, "step": 742 }, { "epoch": 0.16, "learning_rate": 0.001913110599762324, "loss": 2.4004, "step": 743 }, { "epoch": 0.16, "learning_rate": 0.0019128264357957212, "loss": 2.4766, "step": 744 }, { "epoch": 0.16, "learning_rate": 0.001912541829095723, "loss": 2.3125, "step": 745 }, { "epoch": 0.16, "learning_rate": 0.001912256779800367, "loss": 2.3652, "step": 746 }, { "epoch": 0.16, "learning_rate": 0.001911971288047906, "loss": 2.3418, "step": 747 }, { "epoch": 0.16, "learning_rate": 0.0019116853539768077, "loss": 2.457, "step": 748 }, { "epoch": 0.16, "learning_rate": 0.0019113989777257538, "loss": 2.502, "step": 749 }, { "epoch": 0.16, "learning_rate": 0.0019111121594336407, "loss": 2.3457, "step": 750 }, { "epoch": 0.16, "learning_rate": 0.0019108248992395795, "loss": 2.3906, "step": 751 }, { "epoch": 0.16, "learning_rate": 0.001910537197282895, "loss": 2.332, "step": 752 }, { "epoch": 0.16, "learning_rate": 0.0019102490537031272, "loss": 2.3223, "step": 753 }, { "epoch": 0.16, "learning_rate": 0.001909960468640029, "loss": 2.3516, "step": 754 }, { "epoch": 0.16, "learning_rate": 0.0019096714422335683, "loss": 2.2715, "step": 755 }, { "epoch": 0.16, "learning_rate": 0.0019093819746239273, "loss": 2.3418, "step": 756 }, { "epoch": 0.16, "learning_rate": 0.0019090920659515015, "loss": 2.4648, "step": 757 }, { "epoch": 0.16, "learning_rate": 0.0019088017163569006, "loss": 2.4805, "step": 758 }, { "epoch": 0.16, "learning_rate": 0.0019085109259809481, "loss": 2.4629, "step": 759 }, { "epoch": 0.16, "learning_rate": 0.0019082196949646816, "loss": 2.3359, "step": 760 }, { "epoch": 0.16, "learning_rate": 0.0019079280234493518, "loss": 2.1777, "step": 761 }, { "epoch": 0.16, "learning_rate": 0.0019076359115764235, "loss": 2.4824, "step": 762 }, { "epoch": 0.16, "learning_rate": 0.001907343359487575, "loss": 2.3496, "step": 763 }, { "epoch": 0.16, "learning_rate": 0.0019070503673246982, "loss": 2.3301, "step": 764 }, { "epoch": 0.16, "learning_rate": 0.0019067569352298984, "loss": 2.3281, "step": 765 }, { "epoch": 0.16, "learning_rate": 0.0019064630633454937, "loss": 2.4473, "step": 766 }, { "epoch": 0.16, "learning_rate": 0.001906168751814017, "loss": 2.3164, "step": 767 }, { "epoch": 0.17, "learning_rate": 0.0019058740007782125, "loss": 2.4062, "step": 768 }, { "epoch": 0.17, "learning_rate": 0.0019055788103810388, "loss": 2.2803, "step": 769 }, { "epoch": 0.17, "learning_rate": 0.0019052831807656674, "loss": 2.377, "step": 770 }, { "epoch": 0.17, "learning_rate": 0.0019049871120754827, "loss": 2.4189, "step": 771 }, { "epoch": 0.17, "learning_rate": 0.001904690604454082, "loss": 2.332, "step": 772 }, { "epoch": 0.17, "learning_rate": 0.001904393658045276, "loss": 2.5625, "step": 773 }, { "epoch": 0.17, "learning_rate": 0.001904096272993087, "loss": 2.4531, "step": 774 }, { "epoch": 0.17, "learning_rate": 0.0019037984494417516, "loss": 2.3867, "step": 775 }, { "epoch": 0.17, "learning_rate": 0.001903500187535718, "loss": 2.3613, "step": 776 }, { "epoch": 0.17, "learning_rate": 0.0019032014874196476, "loss": 2.1582, "step": 777 }, { "epoch": 0.17, "learning_rate": 0.0019029023492384135, "loss": 2.4492, "step": 778 }, { "epoch": 0.17, "learning_rate": 0.0019026027731371023, "loss": 2.3281, "step": 779 }, { "epoch": 0.17, "learning_rate": 0.0019023027592610124, "loss": 2.377, "step": 780 }, { "epoch": 0.17, "learning_rate": 0.0019020023077556548, "loss": 2.2266, "step": 781 }, { "epoch": 0.17, "learning_rate": 0.001901701418766752, "loss": 2.2969, "step": 782 }, { "epoch": 0.17, "learning_rate": 0.0019014000924402403, "loss": 2.293, "step": 783 }, { "epoch": 0.17, "learning_rate": 0.0019010983289222665, "loss": 2.3398, "step": 784 }, { "epoch": 0.17, "learning_rate": 0.0019007961283591904, "loss": 2.5215, "step": 785 }, { "epoch": 0.17, "learning_rate": 0.001900493490897583, "loss": 2.4004, "step": 786 }, { "epoch": 0.17, "learning_rate": 0.0019001904166842279, "loss": 2.459, "step": 787 }, { "epoch": 0.17, "learning_rate": 0.0018998869058661203, "loss": 2.3027, "step": 788 }, { "epoch": 0.17, "learning_rate": 0.0018995829585904667, "loss": 2.4492, "step": 789 }, { "epoch": 0.17, "learning_rate": 0.0018992785750046863, "loss": 2.3398, "step": 790 }, { "epoch": 0.17, "learning_rate": 0.0018989737552564093, "loss": 2.3027, "step": 791 }, { "epoch": 0.17, "learning_rate": 0.001898668499493477, "loss": 2.4082, "step": 792 }, { "epoch": 0.17, "learning_rate": 0.0018983628078639433, "loss": 2.4746, "step": 793 }, { "epoch": 0.17, "learning_rate": 0.001898056680516072, "loss": 2.3516, "step": 794 }, { "epoch": 0.17, "learning_rate": 0.00189775011759834, "loss": 2.3486, "step": 795 }, { "epoch": 0.17, "learning_rate": 0.0018974431192594339, "loss": 2.3105, "step": 796 }, { "epoch": 0.17, "learning_rate": 0.0018971356856482525, "loss": 2.373, "step": 797 }, { "epoch": 0.17, "learning_rate": 0.001896827816913905, "loss": 2.4141, "step": 798 }, { "epoch": 0.17, "learning_rate": 0.001896519513205712, "loss": 2.2607, "step": 799 }, { "epoch": 0.17, "learning_rate": 0.0018962107746732053, "loss": 2.3359, "step": 800 }, { "epoch": 0.17, "learning_rate": 0.0018959016014661275, "loss": 2.2617, "step": 801 }, { "epoch": 0.17, "learning_rate": 0.0018955919937344312, "loss": 2.3486, "step": 802 }, { "epoch": 0.17, "learning_rate": 0.001895281951628281, "loss": 2.3516, "step": 803 }, { "epoch": 0.17, "learning_rate": 0.0018949714752980514, "loss": 2.3066, "step": 804 }, { "epoch": 0.17, "learning_rate": 0.0018946605648943273, "loss": 2.4766, "step": 805 }, { "epoch": 0.17, "learning_rate": 0.0018943492205679053, "loss": 2.251, "step": 806 }, { "epoch": 0.17, "learning_rate": 0.0018940374424697914, "loss": 2.3984, "step": 807 }, { "epoch": 0.17, "learning_rate": 0.001893725230751202, "loss": 2.2686, "step": 808 }, { "epoch": 0.17, "learning_rate": 0.0018934125855635644, "loss": 2.3984, "step": 809 }, { "epoch": 0.17, "learning_rate": 0.0018930995070585157, "loss": 2.3555, "step": 810 }, { "epoch": 0.17, "learning_rate": 0.001892785995387903, "loss": 2.2383, "step": 811 }, { "epoch": 0.17, "learning_rate": 0.0018924720507037846, "loss": 2.3389, "step": 812 }, { "epoch": 0.17, "learning_rate": 0.001892157673158427, "loss": 2.3242, "step": 813 }, { "epoch": 0.17, "learning_rate": 0.001891842862904308, "loss": 2.3047, "step": 814 }, { "epoch": 0.18, "learning_rate": 0.0018915276200941154, "loss": 2.4531, "step": 815 }, { "epoch": 0.18, "learning_rate": 0.001891211944880746, "loss": 2.3594, "step": 816 }, { "epoch": 0.18, "learning_rate": 0.001890895837417306, "loss": 2.2119, "step": 817 }, { "epoch": 0.18, "learning_rate": 0.0018905792978571125, "loss": 2.375, "step": 818 }, { "epoch": 0.18, "learning_rate": 0.0018902623263536919, "loss": 2.3555, "step": 819 }, { "epoch": 0.18, "learning_rate": 0.0018899449230607788, "loss": 2.2666, "step": 820 }, { "epoch": 0.18, "learning_rate": 0.001889627088132319, "loss": 2.2393, "step": 821 }, { "epoch": 0.18, "learning_rate": 0.0018893088217224662, "loss": 2.416, "step": 822 }, { "epoch": 0.18, "learning_rate": 0.0018889901239855847, "loss": 2.2979, "step": 823 }, { "epoch": 0.18, "learning_rate": 0.0018886709950762467, "loss": 2.3262, "step": 824 }, { "epoch": 0.18, "learning_rate": 0.001888351435149234, "loss": 2.293, "step": 825 }, { "epoch": 0.18, "learning_rate": 0.0018880314443595384, "loss": 2.3008, "step": 826 }, { "epoch": 0.18, "learning_rate": 0.001887711022862359, "loss": 2.4346, "step": 827 }, { "epoch": 0.18, "learning_rate": 0.0018873901708131048, "loss": 2.2949, "step": 828 }, { "epoch": 0.18, "learning_rate": 0.0018870688883673937, "loss": 2.2334, "step": 829 }, { "epoch": 0.18, "learning_rate": 0.0018867471756810522, "loss": 2.2715, "step": 830 }, { "epoch": 0.18, "learning_rate": 0.0018864250329101151, "loss": 2.374, "step": 831 }, { "epoch": 0.18, "learning_rate": 0.001886102460210826, "loss": 2.373, "step": 832 }, { "epoch": 0.18, "learning_rate": 0.0018857794577396376, "loss": 2.334, "step": 833 }, { "epoch": 0.18, "learning_rate": 0.0018854560256532098, "loss": 2.3809, "step": 834 }, { "epoch": 0.18, "learning_rate": 0.0018851321641084125, "loss": 2.2686, "step": 835 }, { "epoch": 0.18, "learning_rate": 0.0018848078732623223, "loss": 2.3105, "step": 836 }, { "epoch": 0.18, "learning_rate": 0.0018844831532722252, "loss": 2.3799, "step": 837 }, { "epoch": 0.18, "learning_rate": 0.0018841580042956144, "loss": 2.4258, "step": 838 }, { "epoch": 0.18, "learning_rate": 0.0018838324264901921, "loss": 2.3105, "step": 839 }, { "epoch": 0.18, "learning_rate": 0.001883506420013868, "loss": 2.3809, "step": 840 }, { "epoch": 0.18, "learning_rate": 0.001883179985024759, "loss": 2.3818, "step": 841 }, { "epoch": 0.18, "learning_rate": 0.0018828531216811913, "loss": 2.2871, "step": 842 }, { "epoch": 0.18, "learning_rate": 0.0018825258301416983, "loss": 2.3955, "step": 843 }, { "epoch": 0.18, "learning_rate": 0.00188219811056502, "loss": 2.498, "step": 844 }, { "epoch": 0.18, "learning_rate": 0.001881869963110106, "loss": 2.2275, "step": 845 }, { "epoch": 0.18, "learning_rate": 0.0018815413879361112, "loss": 2.373, "step": 846 }, { "epoch": 0.18, "learning_rate": 0.0018812123852024, "loss": 2.3086, "step": 847 }, { "epoch": 0.18, "learning_rate": 0.001880882955068543, "loss": 2.3066, "step": 848 }, { "epoch": 0.18, "learning_rate": 0.0018805530976943182, "loss": 2.3213, "step": 849 }, { "epoch": 0.18, "learning_rate": 0.001880222813239711, "loss": 2.3223, "step": 850 }, { "epoch": 0.18, "learning_rate": 0.001879892101864914, "loss": 2.293, "step": 851 }, { "epoch": 0.18, "learning_rate": 0.0018795609637303268, "loss": 2.3555, "step": 852 }, { "epoch": 0.18, "learning_rate": 0.001879229398996556, "loss": 2.3125, "step": 853 }, { "epoch": 0.18, "learning_rate": 0.0018788974078244145, "loss": 2.3691, "step": 854 }, { "epoch": 0.18, "learning_rate": 0.0018785649903749234, "loss": 2.4043, "step": 855 }, { "epoch": 0.18, "learning_rate": 0.0018782321468093094, "loss": 2.502, "step": 856 }, { "epoch": 0.18, "learning_rate": 0.001877898877289006, "loss": 2.3184, "step": 857 }, { "epoch": 0.18, "learning_rate": 0.0018775651819756538, "loss": 2.3047, "step": 858 }, { "epoch": 0.18, "learning_rate": 0.0018772310610310995, "loss": 2.3691, "step": 859 }, { "epoch": 0.18, "learning_rate": 0.0018768965146173965, "loss": 2.3027, "step": 860 }, { "epoch": 0.19, "learning_rate": 0.0018765615428968042, "loss": 2.2861, "step": 861 }, { "epoch": 0.19, "learning_rate": 0.0018762261460317886, "loss": 2.4082, "step": 862 }, { "epoch": 0.19, "learning_rate": 0.001875890324185022, "loss": 2.4062, "step": 863 }, { "epoch": 0.19, "learning_rate": 0.0018755540775193822, "loss": 2.375, "step": 864 }, { "epoch": 0.19, "learning_rate": 0.0018752174061979542, "loss": 2.3418, "step": 865 }, { "epoch": 0.19, "learning_rate": 0.001874880310384027, "loss": 2.457, "step": 866 }, { "epoch": 0.19, "learning_rate": 0.001874542790241098, "loss": 2.2773, "step": 867 }, { "epoch": 0.19, "learning_rate": 0.0018742048459328682, "loss": 2.2793, "step": 868 }, { "epoch": 0.19, "learning_rate": 0.001873866477623246, "loss": 2.375, "step": 869 }, { "epoch": 0.19, "learning_rate": 0.001873527685476344, "loss": 2.3721, "step": 870 }, { "epoch": 0.19, "learning_rate": 0.001873188469656481, "loss": 2.3477, "step": 871 }, { "epoch": 0.19, "learning_rate": 0.0018728488303281826, "loss": 2.375, "step": 872 }, { "epoch": 0.19, "learning_rate": 0.0018725087676561768, "loss": 2.5645, "step": 873 }, { "epoch": 0.19, "learning_rate": 0.0018721682818054002, "loss": 2.4512, "step": 874 }, { "epoch": 0.19, "learning_rate": 0.001871827372940992, "loss": 2.3594, "step": 875 }, { "epoch": 0.19, "learning_rate": 0.0018714860412282984, "loss": 2.2852, "step": 876 }, { "epoch": 0.19, "learning_rate": 0.0018711442868328697, "loss": 2.3418, "step": 877 }, { "epoch": 0.19, "learning_rate": 0.0018708021099204616, "loss": 2.2109, "step": 878 }, { "epoch": 0.19, "learning_rate": 0.0018704595106570343, "loss": 2.3848, "step": 879 }, { "epoch": 0.19, "learning_rate": 0.0018701164892087534, "loss": 2.3652, "step": 880 }, { "epoch": 0.19, "learning_rate": 0.0018697730457419893, "loss": 2.3135, "step": 881 }, { "epoch": 0.19, "learning_rate": 0.0018694291804233164, "loss": 2.3252, "step": 882 }, { "epoch": 0.19, "learning_rate": 0.0018690848934195144, "loss": 2.4453, "step": 883 }, { "epoch": 0.19, "learning_rate": 0.001868740184897567, "loss": 2.4199, "step": 884 }, { "epoch": 0.19, "learning_rate": 0.0018683950550246627, "loss": 2.5117, "step": 885 }, { "epoch": 0.19, "learning_rate": 0.0018680495039681939, "loss": 2.4492, "step": 886 }, { "epoch": 0.19, "learning_rate": 0.0018677035318957582, "loss": 2.2852, "step": 887 }, { "epoch": 0.19, "learning_rate": 0.001867357138975157, "loss": 2.4785, "step": 888 }, { "epoch": 0.19, "learning_rate": 0.0018670103253743946, "loss": 2.416, "step": 889 }, { "epoch": 0.19, "learning_rate": 0.0018666630912616813, "loss": 2.2217, "step": 890 }, { "epoch": 0.19, "learning_rate": 0.0018663154368054298, "loss": 2.3672, "step": 891 }, { "epoch": 0.19, "learning_rate": 0.001865967362174258, "loss": 2.2559, "step": 892 }, { "epoch": 0.19, "learning_rate": 0.0018656188675369864, "loss": 2.2539, "step": 893 }, { "epoch": 0.19, "learning_rate": 0.0018652699530626398, "loss": 2.4395, "step": 894 }, { "epoch": 0.19, "learning_rate": 0.0018649206189204463, "loss": 2.2451, "step": 895 }, { "epoch": 0.19, "learning_rate": 0.0018645708652798383, "loss": 2.293, "step": 896 }, { "epoch": 0.19, "learning_rate": 0.0018642206923104507, "loss": 2.2539, "step": 897 }, { "epoch": 0.19, "learning_rate": 0.0018638701001821225, "loss": 2.2773, "step": 898 }, { "epoch": 0.19, "learning_rate": 0.0018635190890648951, "loss": 2.4492, "step": 899 }, { "epoch": 0.19, "learning_rate": 0.0018631676591290148, "loss": 2.4258, "step": 900 }, { "epoch": 0.19, "learning_rate": 0.0018628158105449288, "loss": 2.3389, "step": 901 }, { "epoch": 0.19, "learning_rate": 0.0018624635434832896, "loss": 2.2656, "step": 902 }, { "epoch": 0.19, "learning_rate": 0.0018621108581149506, "loss": 2.2168, "step": 903 }, { "epoch": 0.19, "learning_rate": 0.0018617577546109695, "loss": 2.2266, "step": 904 }, { "epoch": 0.19, "learning_rate": 0.0018614042331426067, "loss": 2.3711, "step": 905 }, { "epoch": 0.19, "learning_rate": 0.0018610502938813244, "loss": 2.418, "step": 906 }, { "epoch": 0.19, "learning_rate": 0.0018606959369987883, "loss": 2.1934, "step": 907 }, { "epoch": 0.2, "learning_rate": 0.0018603411626668663, "loss": 2.332, "step": 908 }, { "epoch": 0.2, "learning_rate": 0.001859985971057629, "loss": 2.208, "step": 909 }, { "epoch": 0.2, "learning_rate": 0.0018596303623433488, "loss": 2.4434, "step": 910 }, { "epoch": 0.2, "learning_rate": 0.0018592743366965016, "loss": 2.3613, "step": 911 }, { "epoch": 0.2, "learning_rate": 0.0018589178942897638, "loss": 2.1494, "step": 912 }, { "epoch": 0.2, "learning_rate": 0.0018585610352960154, "loss": 2.3262, "step": 913 }, { "epoch": 0.2, "learning_rate": 0.0018582037598883381, "loss": 2.3262, "step": 914 }, { "epoch": 0.2, "learning_rate": 0.001857846068240015, "loss": 2.3242, "step": 915 }, { "epoch": 0.2, "learning_rate": 0.0018574879605245318, "loss": 2.4414, "step": 916 }, { "epoch": 0.2, "learning_rate": 0.0018571294369155754, "loss": 2.4473, "step": 917 }, { "epoch": 0.2, "learning_rate": 0.0018567704975870347, "loss": 2.3018, "step": 918 }, { "epoch": 0.2, "learning_rate": 0.0018564111427130001, "loss": 2.5352, "step": 919 }, { "epoch": 0.2, "learning_rate": 0.0018560513724677644, "loss": 2.3125, "step": 920 }, { "epoch": 0.2, "learning_rate": 0.0018556911870258204, "loss": 2.3145, "step": 921 }, { "epoch": 0.2, "learning_rate": 0.001855330586561863, "loss": 2.4062, "step": 922 }, { "epoch": 0.2, "learning_rate": 0.001854969571250789, "loss": 2.3613, "step": 923 }, { "epoch": 0.2, "learning_rate": 0.0018546081412676952, "loss": 2.2812, "step": 924 }, { "epoch": 0.2, "learning_rate": 0.0018542462967878804, "loss": 2.3594, "step": 925 }, { "epoch": 0.2, "learning_rate": 0.0018538840379868438, "loss": 2.4082, "step": 926 }, { "epoch": 0.2, "learning_rate": 0.0018535213650402865, "loss": 2.3125, "step": 927 }, { "epoch": 0.2, "learning_rate": 0.0018531582781241092, "loss": 2.2373, "step": 928 }, { "epoch": 0.2, "learning_rate": 0.0018527947774144148, "loss": 2.2441, "step": 929 }, { "epoch": 0.2, "learning_rate": 0.0018524308630875055, "loss": 2.4297, "step": 930 }, { "epoch": 0.2, "learning_rate": 0.001852066535319885, "loss": 2.3799, "step": 931 }, { "epoch": 0.2, "learning_rate": 0.0018517017942882576, "loss": 2.1895, "step": 932 }, { "epoch": 0.2, "learning_rate": 0.0018513366401695275, "loss": 2.291, "step": 933 }, { "epoch": 0.2, "learning_rate": 0.001850971073140799, "loss": 2.3184, "step": 934 }, { "epoch": 0.2, "learning_rate": 0.001850605093379378, "loss": 2.252, "step": 935 }, { "epoch": 0.2, "learning_rate": 0.0018502387010627687, "loss": 2.377, "step": 936 }, { "epoch": 0.2, "learning_rate": 0.0018498718963686777, "loss": 2.2803, "step": 937 }, { "epoch": 0.2, "learning_rate": 0.0018495046794750093, "loss": 2.2021, "step": 938 }, { "epoch": 0.2, "learning_rate": 0.001849137050559869, "loss": 2.4082, "step": 939 }, { "epoch": 0.2, "learning_rate": 0.0018487690098015622, "loss": 2.1709, "step": 940 }, { "epoch": 0.2, "learning_rate": 0.0018484005573785934, "loss": 2.3008, "step": 941 }, { "epoch": 0.2, "learning_rate": 0.0018480316934696673, "loss": 2.2998, "step": 942 }, { "epoch": 0.2, "learning_rate": 0.001847662418253688, "loss": 2.1885, "step": 943 }, { "epoch": 0.2, "learning_rate": 0.0018472927319097587, "loss": 2.3281, "step": 944 }, { "epoch": 0.2, "learning_rate": 0.0018469226346171826, "loss": 2.3613, "step": 945 }, { "epoch": 0.2, "learning_rate": 0.001846552126555462, "loss": 2.3887, "step": 946 }, { "epoch": 0.2, "learning_rate": 0.0018461812079042987, "loss": 2.2539, "step": 947 }, { "epoch": 0.2, "learning_rate": 0.001845809878843593, "loss": 2.418, "step": 948 }, { "epoch": 0.2, "learning_rate": 0.0018454381395534448, "loss": 2.3613, "step": 949 }, { "epoch": 0.2, "learning_rate": 0.0018450659902141525, "loss": 2.2549, "step": 950 }, { "epoch": 0.2, "learning_rate": 0.0018446934310062137, "loss": 2.4199, "step": 951 }, { "epoch": 0.2, "learning_rate": 0.001844320462110325, "loss": 2.3418, "step": 952 }, { "epoch": 0.2, "learning_rate": 0.0018439470837073816, "loss": 2.3965, "step": 953 }, { "epoch": 0.21, "learning_rate": 0.0018435732959784766, "loss": 2.1816, "step": 954 }, { "epoch": 0.21, "learning_rate": 0.0018431990991049025, "loss": 2.3643, "step": 955 }, { "epoch": 0.21, "learning_rate": 0.00184282449326815, "loss": 2.2598, "step": 956 }, { "epoch": 0.21, "learning_rate": 0.001842449478649908, "loss": 2.334, "step": 957 }, { "epoch": 0.21, "learning_rate": 0.0018420740554320639, "loss": 2.2627, "step": 958 }, { "epoch": 0.21, "learning_rate": 0.0018416982237967029, "loss": 2.418, "step": 959 }, { "epoch": 0.21, "learning_rate": 0.0018413219839261085, "loss": 2.4609, "step": 960 }, { "epoch": 0.21, "learning_rate": 0.0018409453360027626, "loss": 2.4121, "step": 961 }, { "epoch": 0.21, "learning_rate": 0.0018405682802093443, "loss": 2.2295, "step": 962 }, { "epoch": 0.21, "learning_rate": 0.0018401908167287309, "loss": 2.2129, "step": 963 }, { "epoch": 0.21, "learning_rate": 0.0018398129457439976, "loss": 2.3418, "step": 964 }, { "epoch": 0.21, "learning_rate": 0.0018394346674384164, "loss": 2.3906, "step": 965 }, { "epoch": 0.21, "learning_rate": 0.0018390559819954585, "loss": 2.377, "step": 966 }, { "epoch": 0.21, "learning_rate": 0.0018386768895987907, "loss": 2.252, "step": 967 }, { "epoch": 0.21, "learning_rate": 0.0018382973904322783, "loss": 2.3066, "step": 968 }, { "epoch": 0.21, "learning_rate": 0.0018379174846799838, "loss": 2.4531, "step": 969 }, { "epoch": 0.21, "learning_rate": 0.0018375371725261665, "loss": 2.2363, "step": 970 }, { "epoch": 0.21, "learning_rate": 0.0018371564541552832, "loss": 2.2539, "step": 971 }, { "epoch": 0.21, "learning_rate": 0.0018367753297519873, "loss": 2.3486, "step": 972 }, { "epoch": 0.21, "learning_rate": 0.0018363937995011298, "loss": 2.3447, "step": 973 }, { "epoch": 0.21, "learning_rate": 0.0018360118635877578, "loss": 2.4062, "step": 974 }, { "epoch": 0.21, "learning_rate": 0.0018356295221971155, "loss": 2.2607, "step": 975 }, { "epoch": 0.21, "learning_rate": 0.0018352467755146438, "loss": 2.4785, "step": 976 }, { "epoch": 0.21, "learning_rate": 0.00183486362372598, "loss": 2.3135, "step": 977 }, { "epoch": 0.21, "learning_rate": 0.0018344800670169585, "loss": 2.4062, "step": 978 }, { "epoch": 0.21, "learning_rate": 0.001834096105573609, "loss": 2.2832, "step": 979 }, { "epoch": 0.21, "learning_rate": 0.0018337117395821583, "loss": 2.4141, "step": 980 }, { "epoch": 0.21, "learning_rate": 0.001833326969229029, "loss": 2.2637, "step": 981 }, { "epoch": 0.21, "learning_rate": 0.0018329417947008408, "loss": 2.3418, "step": 982 }, { "epoch": 0.21, "learning_rate": 0.0018325562161844076, "loss": 2.2393, "step": 983 }, { "epoch": 0.21, "learning_rate": 0.0018321702338667408, "loss": 2.1924, "step": 984 }, { "epoch": 0.21, "learning_rate": 0.001831783847935047, "loss": 2.3398, "step": 985 }, { "epoch": 0.21, "learning_rate": 0.001831397058576729, "loss": 2.332, "step": 986 }, { "epoch": 0.21, "learning_rate": 0.0018310098659793845, "loss": 2.248, "step": 987 }, { "epoch": 0.21, "learning_rate": 0.001830622270330808, "loss": 2.3496, "step": 988 }, { "epoch": 0.21, "learning_rate": 0.0018302342718189877, "loss": 2.3945, "step": 989 }, { "epoch": 0.21, "learning_rate": 0.001829845870632109, "loss": 2.3691, "step": 990 }, { "epoch": 0.21, "learning_rate": 0.0018294570669585518, "loss": 2.2725, "step": 991 }, { "epoch": 0.21, "learning_rate": 0.0018290678609868907, "loss": 2.3418, "step": 992 }, { "epoch": 0.21, "learning_rate": 0.0018286782529058963, "loss": 2.3574, "step": 993 }, { "epoch": 0.21, "learning_rate": 0.001828288242904534, "loss": 2.3887, "step": 994 }, { "epoch": 0.21, "learning_rate": 0.001827897831171964, "loss": 2.2246, "step": 995 }, { "epoch": 0.21, "learning_rate": 0.001827507017897541, "loss": 2.4307, "step": 996 }, { "epoch": 0.21, "learning_rate": 0.0018271158032708153, "loss": 2.3457, "step": 997 }, { "epoch": 0.21, "learning_rate": 0.0018267241874815312, "loss": 2.3018, "step": 998 }, { "epoch": 0.21, "learning_rate": 0.0018263321707196276, "loss": 2.2871, "step": 999 }, { "epoch": 0.21, "learning_rate": 0.0018259397531752383, "loss": 2.375, "step": 1000 }, { "epoch": 0.22, "learning_rate": 0.0018255469350386907, "loss": 2.2881, "step": 1001 }, { "epoch": 0.22, "learning_rate": 0.0018251537165005077, "loss": 2.3145, "step": 1002 }, { "epoch": 0.22, "learning_rate": 0.0018247600977514052, "loss": 2.2031, "step": 1003 }, { "epoch": 0.22, "learning_rate": 0.001824366078982294, "loss": 2.418, "step": 1004 }, { "epoch": 0.22, "learning_rate": 0.0018239716603842783, "loss": 2.2402, "step": 1005 }, { "epoch": 0.22, "learning_rate": 0.0018235768421486572, "loss": 2.373, "step": 1006 }, { "epoch": 0.22, "learning_rate": 0.001823181624466922, "loss": 2.2441, "step": 1007 }, { "epoch": 0.22, "learning_rate": 0.00182278600753076, "loss": 2.3008, "step": 1008 }, { "epoch": 0.22, "learning_rate": 0.0018223899915320496, "loss": 2.3555, "step": 1009 }, { "epoch": 0.22, "learning_rate": 0.0018219935766628646, "loss": 2.25, "step": 1010 }, { "epoch": 0.22, "learning_rate": 0.0018215967631154718, "loss": 2.3037, "step": 1011 }, { "epoch": 0.22, "learning_rate": 0.001821199551082331, "loss": 2.2217, "step": 1012 }, { "epoch": 0.22, "learning_rate": 0.0018208019407560956, "loss": 2.5488, "step": 1013 }, { "epoch": 0.22, "learning_rate": 0.001820403932329612, "loss": 2.2578, "step": 1014 }, { "epoch": 0.22, "learning_rate": 0.0018200055259959198, "loss": 2.4873, "step": 1015 }, { "epoch": 0.22, "learning_rate": 0.0018196067219482517, "loss": 2.3223, "step": 1016 }, { "epoch": 0.22, "learning_rate": 0.001819207520380033, "loss": 2.375, "step": 1017 }, { "epoch": 0.22, "learning_rate": 0.001818807921484882, "loss": 2.4248, "step": 1018 }, { "epoch": 0.22, "learning_rate": 0.0018184079254566097, "loss": 2.4014, "step": 1019 }, { "epoch": 0.22, "learning_rate": 0.0018180075324892198, "loss": 2.3086, "step": 1020 }, { "epoch": 0.22, "learning_rate": 0.0018176067427769082, "loss": 2.166, "step": 1021 }, { "epoch": 0.22, "learning_rate": 0.0018172055565140633, "loss": 2.4297, "step": 1022 }, { "epoch": 0.22, "learning_rate": 0.0018168039738952664, "loss": 2.2842, "step": 1023 }, { "epoch": 0.22, "learning_rate": 0.0018164019951152901, "loss": 2.2627, "step": 1024 }, { "epoch": 0.22, "learning_rate": 0.0018159996203691003, "loss": 2.3809, "step": 1025 }, { "epoch": 0.22, "learning_rate": 0.0018155968498518537, "loss": 2.4023, "step": 1026 }, { "epoch": 0.22, "learning_rate": 0.0018151936837588996, "loss": 2.4307, "step": 1027 }, { "epoch": 0.22, "learning_rate": 0.0018147901222857793, "loss": 2.3066, "step": 1028 }, { "epoch": 0.22, "learning_rate": 0.0018143861656282256, "loss": 2.334, "step": 1029 }, { "epoch": 0.22, "learning_rate": 0.0018139818139821632, "loss": 2.541, "step": 1030 }, { "epoch": 0.22, "learning_rate": 0.0018135770675437085, "loss": 2.3105, "step": 1031 }, { "epoch": 0.22, "learning_rate": 0.0018131719265091681, "loss": 2.291, "step": 1032 }, { "epoch": 0.22, "learning_rate": 0.0018127663910750419, "loss": 2.2441, "step": 1033 }, { "epoch": 0.22, "learning_rate": 0.0018123604614380196, "loss": 2.3047, "step": 1034 }, { "epoch": 0.22, "learning_rate": 0.001811954137794983, "loss": 2.4121, "step": 1035 }, { "epoch": 0.22, "learning_rate": 0.0018115474203430047, "loss": 2.2988, "step": 1036 }, { "epoch": 0.22, "learning_rate": 0.001811140309279348, "loss": 2.2832, "step": 1037 }, { "epoch": 0.22, "learning_rate": 0.0018107328048014674, "loss": 2.416, "step": 1038 }, { "epoch": 0.22, "learning_rate": 0.0018103249071070087, "loss": 2.332, "step": 1039 }, { "epoch": 0.22, "learning_rate": 0.0018099166163938075, "loss": 2.4355, "step": 1040 }, { "epoch": 0.22, "learning_rate": 0.00180950793285989, "loss": 2.3506, "step": 1041 }, { "epoch": 0.22, "learning_rate": 0.001809098856703474, "loss": 2.2188, "step": 1042 }, { "epoch": 0.22, "learning_rate": 0.001808689388122967, "loss": 2.3945, "step": 1043 }, { "epoch": 0.22, "learning_rate": 0.0018082795273169662, "loss": 2.2793, "step": 1044 }, { "epoch": 0.22, "learning_rate": 0.0018078692744842608, "loss": 2.2539, "step": 1045 }, { "epoch": 0.22, "learning_rate": 0.0018074586298238288, "loss": 2.3184, "step": 1046 }, { "epoch": 0.23, "learning_rate": 0.0018070475935348376, "loss": 2.2598, "step": 1047 }, { "epoch": 0.23, "learning_rate": 0.0018066361658166464, "loss": 2.1367, "step": 1048 }, { "epoch": 0.23, "learning_rate": 0.0018062243468688031, "loss": 2.3301, "step": 1049 }, { "epoch": 0.23, "learning_rate": 0.0018058121368910455, "loss": 2.3086, "step": 1050 }, { "epoch": 0.23, "learning_rate": 0.001805399536083301, "loss": 2.4707, "step": 1051 }, { "epoch": 0.23, "learning_rate": 0.0018049865446456872, "loss": 2.3203, "step": 1052 }, { "epoch": 0.23, "learning_rate": 0.0018045731627785097, "loss": 2.252, "step": 1053 }, { "epoch": 0.23, "learning_rate": 0.0018041593906822655, "loss": 2.2588, "step": 1054 }, { "epoch": 0.23, "learning_rate": 0.0018037452285576388, "loss": 2.3691, "step": 1055 }, { "epoch": 0.23, "learning_rate": 0.0018033306766055044, "loss": 2.334, "step": 1056 }, { "epoch": 0.23, "learning_rate": 0.0018029157350269256, "loss": 2.4375, "step": 1057 }, { "epoch": 0.23, "learning_rate": 0.0018025004040231553, "loss": 2.3447, "step": 1058 }, { "epoch": 0.23, "learning_rate": 0.0018020846837956336, "loss": 2.377, "step": 1059 }, { "epoch": 0.23, "learning_rate": 0.0018016685745459915, "loss": 2.4199, "step": 1060 }, { "epoch": 0.23, "learning_rate": 0.0018012520764760476, "loss": 2.3096, "step": 1061 }, { "epoch": 0.23, "learning_rate": 0.0018008351897878087, "loss": 2.3535, "step": 1062 }, { "epoch": 0.23, "learning_rate": 0.0018004179146834712, "loss": 2.4492, "step": 1063 }, { "epoch": 0.23, "learning_rate": 0.001800000251365419, "loss": 2.5371, "step": 1064 }, { "epoch": 0.23, "learning_rate": 0.0017995822000362244, "loss": 2.4023, "step": 1065 }, { "epoch": 0.23, "learning_rate": 0.0017991637608986487, "loss": 2.375, "step": 1066 }, { "epoch": 0.23, "learning_rate": 0.0017987449341556397, "loss": 2.2725, "step": 1067 }, { "epoch": 0.23, "learning_rate": 0.0017983257200103347, "loss": 2.4619, "step": 1068 }, { "epoch": 0.23, "learning_rate": 0.0017979061186660583, "loss": 2.3008, "step": 1069 }, { "epoch": 0.23, "learning_rate": 0.0017974861303263227, "loss": 2.3369, "step": 1070 }, { "epoch": 0.23, "learning_rate": 0.0017970657551948284, "loss": 2.2988, "step": 1071 }, { "epoch": 0.23, "learning_rate": 0.001796644993475463, "loss": 2.3311, "step": 1072 }, { "epoch": 0.23, "learning_rate": 0.0017962238453723017, "loss": 2.4014, "step": 1073 }, { "epoch": 0.23, "learning_rate": 0.001795802311089607, "loss": 2.2578, "step": 1074 }, { "epoch": 0.23, "learning_rate": 0.0017953803908318288, "loss": 2.3223, "step": 1075 }, { "epoch": 0.23, "learning_rate": 0.0017949580848036044, "loss": 2.4102, "step": 1076 }, { "epoch": 0.23, "learning_rate": 0.0017945353932097578, "loss": 2.2178, "step": 1077 }, { "epoch": 0.23, "learning_rate": 0.001794112316255301, "loss": 2.2803, "step": 1078 }, { "epoch": 0.23, "learning_rate": 0.0017936888541454313, "loss": 2.4258, "step": 1079 }, { "epoch": 0.23, "learning_rate": 0.0017932650070855338, "loss": 2.4473, "step": 1080 }, { "epoch": 0.23, "learning_rate": 0.0017928407752811806, "loss": 2.2803, "step": 1081 }, { "epoch": 0.23, "learning_rate": 0.0017924161589381296, "loss": 2.1543, "step": 1082 }, { "epoch": 0.23, "learning_rate": 0.001791991158262326, "loss": 2.502, "step": 1083 }, { "epoch": 0.23, "learning_rate": 0.0017915657734599, "loss": 2.2686, "step": 1084 }, { "epoch": 0.23, "learning_rate": 0.0017911400047371702, "loss": 2.3848, "step": 1085 }, { "epoch": 0.23, "learning_rate": 0.0017907138523006396, "loss": 2.2832, "step": 1086 }, { "epoch": 0.23, "learning_rate": 0.001790287316356998, "loss": 2.2441, "step": 1087 }, { "epoch": 0.23, "learning_rate": 0.001789860397113122, "loss": 2.4473, "step": 1088 }, { "epoch": 0.23, "learning_rate": 0.0017894330947760726, "loss": 2.375, "step": 1089 }, { "epoch": 0.23, "learning_rate": 0.0017890054095530976, "loss": 2.252, "step": 1090 }, { "epoch": 0.23, "learning_rate": 0.0017885773416516297, "loss": 2.4062, "step": 1091 }, { "epoch": 0.23, "learning_rate": 0.0017881488912792885, "loss": 2.3154, "step": 1092 }, { "epoch": 0.23, "learning_rate": 0.001787720058643878, "loss": 2.2773, "step": 1093 }, { "epoch": 0.24, "learning_rate": 0.0017872908439533876, "loss": 2.3086, "step": 1094 }, { "epoch": 0.24, "learning_rate": 0.0017868612474159927, "loss": 2.4941, "step": 1095 }, { "epoch": 0.24, "learning_rate": 0.0017864312692400536, "loss": 2.332, "step": 1096 }, { "epoch": 0.24, "learning_rate": 0.0017860009096341155, "loss": 2.2734, "step": 1097 }, { "epoch": 0.24, "learning_rate": 0.0017855701688069088, "loss": 2.4629, "step": 1098 }, { "epoch": 0.24, "learning_rate": 0.0017851390469673485, "loss": 2.3555, "step": 1099 }, { "epoch": 0.24, "learning_rate": 0.0017847075443245348, "loss": 2.2061, "step": 1100 }, { "epoch": 0.24, "learning_rate": 0.0017842756610877526, "loss": 2.3564, "step": 1101 }, { "epoch": 0.24, "learning_rate": 0.0017838433974664712, "loss": 2.2246, "step": 1102 }, { "epoch": 0.24, "learning_rate": 0.0017834107536703438, "loss": 2.502, "step": 1103 }, { "epoch": 0.24, "learning_rate": 0.0017829777299092095, "loss": 2.4102, "step": 1104 }, { "epoch": 0.24, "learning_rate": 0.0017825443263930906, "loss": 2.2754, "step": 1105 }, { "epoch": 0.24, "learning_rate": 0.001782110543332193, "loss": 2.3926, "step": 1106 }, { "epoch": 0.24, "learning_rate": 0.0017816763809369084, "loss": 2.3213, "step": 1107 }, { "epoch": 0.24, "learning_rate": 0.001781241839417811, "loss": 2.4082, "step": 1108 }, { "epoch": 0.24, "learning_rate": 0.0017808069189856593, "loss": 2.2793, "step": 1109 }, { "epoch": 0.24, "learning_rate": 0.0017803716198513961, "loss": 2.3711, "step": 1110 }, { "epoch": 0.24, "learning_rate": 0.0017799359422261474, "loss": 2.3389, "step": 1111 }, { "epoch": 0.24, "learning_rate": 0.0017794998863212232, "loss": 2.3447, "step": 1112 }, { "epoch": 0.24, "learning_rate": 0.0017790634523481158, "loss": 2.2891, "step": 1113 }, { "epoch": 0.24, "learning_rate": 0.001778626640518502, "loss": 2.1904, "step": 1114 }, { "epoch": 0.24, "learning_rate": 0.0017781894510442418, "loss": 2.3135, "step": 1115 }, { "epoch": 0.24, "learning_rate": 0.0017777518841373778, "loss": 2.4688, "step": 1116 }, { "epoch": 0.24, "learning_rate": 0.0017773139400101363, "loss": 2.2695, "step": 1117 }, { "epoch": 0.24, "learning_rate": 0.0017768756188749264, "loss": 2.4688, "step": 1118 }, { "epoch": 0.24, "learning_rate": 0.0017764369209443394, "loss": 2.2129, "step": 1119 }, { "epoch": 0.24, "learning_rate": 0.0017759978464311496, "loss": 2.3594, "step": 1120 }, { "epoch": 0.24, "learning_rate": 0.001775558395548315, "loss": 2.2842, "step": 1121 }, { "epoch": 0.24, "learning_rate": 0.001775118568508975, "loss": 2.1865, "step": 1122 }, { "epoch": 0.24, "learning_rate": 0.0017746783655264516, "loss": 2.2949, "step": 1123 }, { "epoch": 0.24, "learning_rate": 0.0017742377868142493, "loss": 2.3008, "step": 1124 }, { "epoch": 0.24, "learning_rate": 0.001773796832586055, "loss": 2.3965, "step": 1125 }, { "epoch": 0.24, "learning_rate": 0.0017733555030557376, "loss": 2.2783, "step": 1126 }, { "epoch": 0.24, "learning_rate": 0.0017729137984373478, "loss": 2.3291, "step": 1127 }, { "epoch": 0.24, "learning_rate": 0.001772471718945119, "loss": 2.3066, "step": 1128 }, { "epoch": 0.24, "learning_rate": 0.0017720292647934648, "loss": 2.2539, "step": 1129 }, { "epoch": 0.24, "learning_rate": 0.0017715864361969827, "loss": 2.1436, "step": 1130 }, { "epoch": 0.24, "learning_rate": 0.00177114323337045, "loss": 2.2529, "step": 1131 }, { "epoch": 0.24, "learning_rate": 0.001770699656528826, "loss": 2.2334, "step": 1132 }, { "epoch": 0.24, "learning_rate": 0.0017702557058872522, "loss": 2.2549, "step": 1133 }, { "epoch": 0.24, "learning_rate": 0.0017698113816610505, "loss": 2.4785, "step": 1134 }, { "epoch": 0.24, "learning_rate": 0.0017693666840657239, "loss": 2.2031, "step": 1135 }, { "epoch": 0.24, "learning_rate": 0.0017689216133169575, "loss": 2.252, "step": 1136 }, { "epoch": 0.24, "learning_rate": 0.0017684761696306164, "loss": 2.3809, "step": 1137 }, { "epoch": 0.24, "learning_rate": 0.001768030353222747, "loss": 2.2832, "step": 1138 }, { "epoch": 0.24, "learning_rate": 0.001767584164309576, "loss": 2.4609, "step": 1139 }, { "epoch": 0.25, "learning_rate": 0.0017671376031075119, "loss": 2.2959, "step": 1140 }, { "epoch": 0.25, "learning_rate": 0.0017666906698331427, "loss": 2.21, "step": 1141 }, { "epoch": 0.25, "learning_rate": 0.001766243364703237, "loss": 2.3223, "step": 1142 }, { "epoch": 0.25, "learning_rate": 0.001765795687934744, "loss": 2.1396, "step": 1143 }, { "epoch": 0.25, "learning_rate": 0.0017653476397447934, "loss": 2.3242, "step": 1144 }, { "epoch": 0.25, "learning_rate": 0.0017648992203506946, "loss": 2.2988, "step": 1145 }, { "epoch": 0.25, "learning_rate": 0.0017644504299699371, "loss": 2.2568, "step": 1146 }, { "epoch": 0.25, "learning_rate": 0.0017640012688201906, "loss": 2.1914, "step": 1147 }, { "epoch": 0.25, "learning_rate": 0.0017635517371193039, "loss": 2.2988, "step": 1148 }, { "epoch": 0.25, "learning_rate": 0.0017631018350853069, "loss": 2.1582, "step": 1149 }, { "epoch": 0.25, "learning_rate": 0.0017626515629364076, "loss": 2.3027, "step": 1150 }, { "epoch": 0.25, "learning_rate": 0.0017622009208909945, "loss": 2.334, "step": 1151 }, { "epoch": 0.25, "learning_rate": 0.0017617499091676352, "loss": 2.3076, "step": 1152 }, { "epoch": 0.25, "learning_rate": 0.0017612985279850766, "loss": 2.416, "step": 1153 }, { "epoch": 0.25, "learning_rate": 0.0017608467775622446, "loss": 2.3691, "step": 1154 }, { "epoch": 0.25, "learning_rate": 0.0017603946581182446, "loss": 2.208, "step": 1155 }, { "epoch": 0.25, "learning_rate": 0.0017599421698723606, "loss": 2.4023, "step": 1156 }, { "epoch": 0.25, "learning_rate": 0.0017594893130440557, "loss": 2.2588, "step": 1157 }, { "epoch": 0.25, "learning_rate": 0.001759036087852972, "loss": 2.3564, "step": 1158 }, { "epoch": 0.25, "learning_rate": 0.0017585824945189293, "loss": 2.3926, "step": 1159 }, { "epoch": 0.25, "learning_rate": 0.0017581285332619273, "loss": 2.3809, "step": 1160 }, { "epoch": 0.25, "learning_rate": 0.0017576742043021426, "loss": 2.3887, "step": 1161 }, { "epoch": 0.25, "learning_rate": 0.001757219507859932, "loss": 2.4023, "step": 1162 }, { "epoch": 0.25, "learning_rate": 0.001756764444155829, "loss": 2.3965, "step": 1163 }, { "epoch": 0.25, "learning_rate": 0.0017563090134105458, "loss": 2.1768, "step": 1164 }, { "epoch": 0.25, "learning_rate": 0.0017558532158449727, "loss": 2.3184, "step": 1165 }, { "epoch": 0.25, "learning_rate": 0.0017553970516801778, "loss": 2.293, "step": 1166 }, { "epoch": 0.25, "learning_rate": 0.001754940521137407, "loss": 2.3125, "step": 1167 }, { "epoch": 0.25, "learning_rate": 0.0017544836244380835, "loss": 2.3711, "step": 1168 }, { "epoch": 0.25, "learning_rate": 0.0017540263618038092, "loss": 2.3643, "step": 1169 }, { "epoch": 0.25, "learning_rate": 0.0017535687334563623, "loss": 2.3828, "step": 1170 }, { "epoch": 0.25, "learning_rate": 0.0017531107396176991, "loss": 2.3555, "step": 1171 }, { "epoch": 0.25, "learning_rate": 0.0017526523805099525, "loss": 2.3945, "step": 1172 }, { "epoch": 0.25, "learning_rate": 0.0017521936563554333, "loss": 2.4102, "step": 1173 }, { "epoch": 0.25, "learning_rate": 0.0017517345673766292, "loss": 2.3379, "step": 1174 }, { "epoch": 0.25, "learning_rate": 0.0017512751137962045, "loss": 2.2949, "step": 1175 }, { "epoch": 0.25, "learning_rate": 0.0017508152958370003, "loss": 2.1787, "step": 1176 }, { "epoch": 0.25, "learning_rate": 0.001750355113722035, "loss": 2.293, "step": 1177 }, { "epoch": 0.25, "learning_rate": 0.0017498945676745028, "loss": 2.3203, "step": 1178 }, { "epoch": 0.25, "learning_rate": 0.0017494336579177757, "loss": 2.4102, "step": 1179 }, { "epoch": 0.25, "learning_rate": 0.0017489723846754002, "loss": 2.2393, "step": 1180 }, { "epoch": 0.25, "learning_rate": 0.001748510748171101, "loss": 2.3975, "step": 1181 }, { "epoch": 0.25, "learning_rate": 0.001748048748628778, "loss": 2.4961, "step": 1182 }, { "epoch": 0.25, "learning_rate": 0.0017475863862725072, "loss": 2.3115, "step": 1183 }, { "epoch": 0.25, "learning_rate": 0.0017471236613265408, "loss": 2.3594, "step": 1184 }, { "epoch": 0.25, "learning_rate": 0.0017466605740153068, "loss": 2.2402, "step": 1185 }, { "epoch": 0.25, "learning_rate": 0.0017461971245634085, "loss": 2.4043, "step": 1186 }, { "epoch": 0.26, "learning_rate": 0.0017457333131956259, "loss": 2.4375, "step": 1187 }, { "epoch": 0.26, "learning_rate": 0.0017452691401369136, "loss": 2.3203, "step": 1188 }, { "epoch": 0.26, "learning_rate": 0.001744804605612402, "loss": 2.3125, "step": 1189 }, { "epoch": 0.26, "learning_rate": 0.0017443397098473965, "loss": 2.2637, "step": 1190 }, { "epoch": 0.26, "learning_rate": 0.0017438744530673784, "loss": 2.2988, "step": 1191 }, { "epoch": 0.26, "learning_rate": 0.0017434088354980034, "loss": 2.1973, "step": 1192 }, { "epoch": 0.26, "learning_rate": 0.0017429428573651022, "loss": 2.3633, "step": 1193 }, { "epoch": 0.26, "learning_rate": 0.0017424765188946809, "loss": 2.2246, "step": 1194 }, { "epoch": 0.26, "learning_rate": 0.0017420098203129201, "loss": 2.3291, "step": 1195 }, { "epoch": 0.26, "learning_rate": 0.001741542761846175, "loss": 2.4004, "step": 1196 }, { "epoch": 0.26, "learning_rate": 0.0017410753437209754, "loss": 2.3301, "step": 1197 }, { "epoch": 0.26, "learning_rate": 0.0017406075661640253, "loss": 2.1426, "step": 1198 }, { "epoch": 0.26, "learning_rate": 0.0017401394294022036, "loss": 2.2627, "step": 1199 }, { "epoch": 0.26, "learning_rate": 0.001739670933662563, "loss": 2.5, "step": 1200 }, { "epoch": 0.26, "learning_rate": 0.0017392020791723302, "loss": 2.2578, "step": 1201 }, { "epoch": 0.26, "learning_rate": 0.001738732866158906, "loss": 2.3242, "step": 1202 }, { "epoch": 0.26, "learning_rate": 0.0017382632948498658, "loss": 2.3438, "step": 1203 }, { "epoch": 0.26, "learning_rate": 0.0017377933654729575, "loss": 2.3311, "step": 1204 }, { "epoch": 0.26, "learning_rate": 0.0017373230782561037, "loss": 2.375, "step": 1205 }, { "epoch": 0.26, "learning_rate": 0.0017368524334273995, "loss": 2.3066, "step": 1206 }, { "epoch": 0.26, "learning_rate": 0.0017363814312151154, "loss": 2.3047, "step": 1207 }, { "epoch": 0.26, "learning_rate": 0.0017359100718476927, "loss": 2.3496, "step": 1208 }, { "epoch": 0.26, "learning_rate": 0.0017354383555537479, "loss": 2.3242, "step": 1209 }, { "epoch": 0.26, "learning_rate": 0.0017349662825620698, "loss": 2.2705, "step": 1210 }, { "epoch": 0.26, "learning_rate": 0.0017344938531016199, "loss": 2.3574, "step": 1211 }, { "epoch": 0.26, "learning_rate": 0.0017340210674015334, "loss": 2.2832, "step": 1212 }, { "epoch": 0.26, "learning_rate": 0.0017335479256911182, "loss": 2.165, "step": 1213 }, { "epoch": 0.26, "learning_rate": 0.0017330744281998536, "loss": 2.377, "step": 1214 }, { "epoch": 0.26, "learning_rate": 0.001732600575157393, "loss": 2.2979, "step": 1215 }, { "epoch": 0.26, "learning_rate": 0.0017321263667935617, "loss": 2.332, "step": 1216 }, { "epoch": 0.26, "learning_rate": 0.0017316518033383573, "loss": 2.3418, "step": 1217 }, { "epoch": 0.26, "learning_rate": 0.0017311768850219495, "loss": 2.2988, "step": 1218 }, { "epoch": 0.26, "learning_rate": 0.0017307016120746799, "loss": 2.3633, "step": 1219 }, { "epoch": 0.26, "learning_rate": 0.0017302259847270629, "loss": 2.3242, "step": 1220 }, { "epoch": 0.26, "learning_rate": 0.0017297500032097842, "loss": 2.2188, "step": 1221 }, { "epoch": 0.26, "learning_rate": 0.0017292736677537012, "loss": 2.2197, "step": 1222 }, { "epoch": 0.26, "learning_rate": 0.0017287969785898434, "loss": 2.3047, "step": 1223 }, { "epoch": 0.26, "learning_rate": 0.0017283199359494118, "loss": 2.3691, "step": 1224 }, { "epoch": 0.26, "learning_rate": 0.001727842540063778, "loss": 2.3535, "step": 1225 }, { "epoch": 0.26, "learning_rate": 0.001727364791164486, "loss": 2.2041, "step": 1226 }, { "epoch": 0.26, "learning_rate": 0.0017268866894832506, "loss": 2.3281, "step": 1227 }, { "epoch": 0.26, "learning_rate": 0.0017264082352519577, "loss": 2.3496, "step": 1228 }, { "epoch": 0.26, "learning_rate": 0.0017259294287026643, "loss": 2.3789, "step": 1229 }, { "epoch": 0.26, "learning_rate": 0.0017254502700675975, "loss": 2.2266, "step": 1230 }, { "epoch": 0.26, "learning_rate": 0.0017249707595791569, "loss": 2.2656, "step": 1231 }, { "epoch": 0.26, "learning_rate": 0.0017244908974699109, "loss": 2.3887, "step": 1232 }, { "epoch": 0.27, "learning_rate": 0.0017240106839725995, "loss": 2.2734, "step": 1233 }, { "epoch": 0.27, "learning_rate": 0.001723530119320133, "loss": 2.3496, "step": 1234 }, { "epoch": 0.27, "learning_rate": 0.0017230492037455913, "loss": 2.2617, "step": 1235 }, { "epoch": 0.27, "learning_rate": 0.001722567937482226, "loss": 2.2891, "step": 1236 }, { "epoch": 0.27, "learning_rate": 0.001722086320763457, "loss": 2.3154, "step": 1237 }, { "epoch": 0.27, "learning_rate": 0.0017216043538228756, "loss": 2.2656, "step": 1238 }, { "epoch": 0.27, "learning_rate": 0.001721122036894242, "loss": 2.3008, "step": 1239 }, { "epoch": 0.27, "learning_rate": 0.0017206393702114868, "loss": 2.3652, "step": 1240 }, { "epoch": 0.27, "learning_rate": 0.0017201563540087102, "loss": 2.1396, "step": 1241 }, { "epoch": 0.27, "learning_rate": 0.0017196729885201813, "loss": 2.375, "step": 1242 }, { "epoch": 0.27, "learning_rate": 0.0017191892739803392, "loss": 2.1738, "step": 1243 }, { "epoch": 0.27, "learning_rate": 0.0017187052106237921, "loss": 2.457, "step": 1244 }, { "epoch": 0.27, "learning_rate": 0.0017182207986853177, "loss": 2.292, "step": 1245 }, { "epoch": 0.27, "learning_rate": 0.0017177360383998615, "loss": 2.3809, "step": 1246 }, { "epoch": 0.27, "learning_rate": 0.0017172509300025398, "loss": 2.2871, "step": 1247 }, { "epoch": 0.27, "learning_rate": 0.001716765473728637, "loss": 2.3066, "step": 1248 }, { "epoch": 0.27, "learning_rate": 0.0017162796698136056, "loss": 2.3359, "step": 1249 }, { "epoch": 0.27, "learning_rate": 0.001715793518493067, "loss": 2.3145, "step": 1250 }, { "epoch": 0.27, "learning_rate": 0.001715307020002812, "loss": 2.2832, "step": 1251 }, { "epoch": 0.27, "learning_rate": 0.0017148201745787984, "loss": 2.4102, "step": 1252 }, { "epoch": 0.27, "learning_rate": 0.0017143329824571532, "loss": 2.4453, "step": 1253 }, { "epoch": 0.27, "learning_rate": 0.0017138454438741712, "loss": 2.2891, "step": 1254 }, { "epoch": 0.27, "learning_rate": 0.0017133575590663158, "loss": 2.2773, "step": 1255 }, { "epoch": 0.27, "learning_rate": 0.0017128693282702173, "loss": 2.2432, "step": 1256 }, { "epoch": 0.27, "learning_rate": 0.0017123807517226745, "loss": 2.2217, "step": 1257 }, { "epoch": 0.27, "learning_rate": 0.0017118918296606537, "loss": 2.3623, "step": 1258 }, { "epoch": 0.27, "learning_rate": 0.0017114025623212888, "loss": 2.3613, "step": 1259 }, { "epoch": 0.27, "learning_rate": 0.0017109129499418816, "loss": 2.2969, "step": 1260 }, { "epoch": 0.27, "learning_rate": 0.0017104229927599004, "loss": 2.2832, "step": 1261 }, { "epoch": 0.27, "learning_rate": 0.0017099326910129814, "loss": 2.4248, "step": 1262 }, { "epoch": 0.27, "learning_rate": 0.0017094420449389278, "loss": 2.21, "step": 1263 }, { "epoch": 0.27, "learning_rate": 0.0017089510547757091, "loss": 2.3926, "step": 1264 }, { "epoch": 0.27, "learning_rate": 0.0017084597207614634, "loss": 2.2822, "step": 1265 }, { "epoch": 0.27, "learning_rate": 0.0017079680431344934, "loss": 2.3564, "step": 1266 }, { "epoch": 0.27, "learning_rate": 0.0017074760221332701, "loss": 2.3105, "step": 1267 }, { "epoch": 0.27, "learning_rate": 0.0017069836579964304, "loss": 2.2061, "step": 1268 }, { "epoch": 0.27, "learning_rate": 0.0017064909509627772, "loss": 2.459, "step": 1269 }, { "epoch": 0.27, "learning_rate": 0.0017059979012712812, "loss": 2.2598, "step": 1270 }, { "epoch": 0.27, "learning_rate": 0.001705504509161077, "loss": 2.3594, "step": 1271 }, { "epoch": 0.27, "learning_rate": 0.0017050107748714676, "loss": 2.1768, "step": 1272 }, { "epoch": 0.27, "learning_rate": 0.0017045166986419207, "loss": 2.332, "step": 1273 }, { "epoch": 0.27, "learning_rate": 0.0017040222807120698, "loss": 2.1309, "step": 1274 }, { "epoch": 0.27, "learning_rate": 0.0017035275213217146, "loss": 2.3594, "step": 1275 }, { "epoch": 0.27, "learning_rate": 0.00170303242071082, "loss": 2.4238, "step": 1276 }, { "epoch": 0.27, "learning_rate": 0.0017025369791195172, "loss": 2.3574, "step": 1277 }, { "epoch": 0.27, "learning_rate": 0.0017020411967881015, "loss": 2.3643, "step": 1278 }, { "epoch": 0.27, "learning_rate": 0.0017015450739570346, "loss": 2.3574, "step": 1279 }, { "epoch": 0.28, "learning_rate": 0.0017010486108669427, "loss": 2.3027, "step": 1280 }, { "epoch": 0.28, "learning_rate": 0.0017005518077586174, "loss": 2.418, "step": 1281 }, { "epoch": 0.28, "learning_rate": 0.0017000546648730146, "loss": 2.3359, "step": 1282 }, { "epoch": 0.28, "learning_rate": 0.0016995571824512563, "loss": 2.3066, "step": 1283 }, { "epoch": 0.28, "learning_rate": 0.0016990593607346277, "loss": 2.3633, "step": 1284 }, { "epoch": 0.28, "learning_rate": 0.0016985611999645793, "loss": 2.2695, "step": 1285 }, { "epoch": 0.28, "learning_rate": 0.0016980627003827257, "loss": 2.3379, "step": 1286 }, { "epoch": 0.28, "learning_rate": 0.0016975638622308466, "loss": 2.3965, "step": 1287 }, { "epoch": 0.28, "learning_rate": 0.0016970646857508852, "loss": 2.3145, "step": 1288 }, { "epoch": 0.28, "learning_rate": 0.0016965651711849485, "loss": 2.418, "step": 1289 }, { "epoch": 0.28, "learning_rate": 0.0016960653187753087, "loss": 2.377, "step": 1290 }, { "epoch": 0.28, "learning_rate": 0.0016955651287644006, "loss": 2.3398, "step": 1291 }, { "epoch": 0.28, "learning_rate": 0.0016950646013948233, "loss": 2.3145, "step": 1292 }, { "epoch": 0.28, "learning_rate": 0.0016945637369093394, "loss": 2.252, "step": 1293 }, { "epoch": 0.28, "learning_rate": 0.0016940625355508754, "loss": 2.2832, "step": 1294 }, { "epoch": 0.28, "learning_rate": 0.0016935609975625206, "loss": 2.3477, "step": 1295 }, { "epoch": 0.28, "learning_rate": 0.0016930591231875281, "loss": 2.3418, "step": 1296 }, { "epoch": 0.28, "learning_rate": 0.0016925569126693134, "loss": 2.3047, "step": 1297 }, { "epoch": 0.28, "learning_rate": 0.001692054366251456, "loss": 2.3438, "step": 1298 }, { "epoch": 0.28, "learning_rate": 0.0016915514841776976, "loss": 2.2959, "step": 1299 }, { "epoch": 0.28, "learning_rate": 0.001691048266691943, "loss": 2.2461, "step": 1300 }, { "epoch": 0.28, "learning_rate": 0.0016905447140382595, "loss": 2.1885, "step": 1301 }, { "epoch": 0.28, "learning_rate": 0.0016900408264608772, "loss": 2.4062, "step": 1302 }, { "epoch": 0.28, "learning_rate": 0.0016895366042041884, "loss": 2.3652, "step": 1303 }, { "epoch": 0.28, "learning_rate": 0.0016890320475127481, "loss": 2.373, "step": 1304 }, { "epoch": 0.28, "learning_rate": 0.001688527156631273, "loss": 2.3203, "step": 1305 }, { "epoch": 0.28, "learning_rate": 0.0016880219318046422, "loss": 2.3135, "step": 1306 }, { "epoch": 0.28, "learning_rate": 0.0016875163732778967, "loss": 2.2207, "step": 1307 }, { "epoch": 0.28, "learning_rate": 0.0016870104812962393, "loss": 2.2441, "step": 1308 }, { "epoch": 0.28, "learning_rate": 0.0016865042561050349, "loss": 2.375, "step": 1309 }, { "epoch": 0.28, "learning_rate": 0.0016859976979498091, "loss": 2.2373, "step": 1310 }, { "epoch": 0.28, "learning_rate": 0.0016854908070762498, "loss": 2.3262, "step": 1311 }, { "epoch": 0.28, "learning_rate": 0.0016849835837302062, "loss": 2.2471, "step": 1312 }, { "epoch": 0.28, "learning_rate": 0.001684476028157689, "loss": 2.2871, "step": 1313 }, { "epoch": 0.28, "learning_rate": 0.0016839681406048688, "loss": 2.4863, "step": 1314 }, { "epoch": 0.28, "learning_rate": 0.0016834599213180787, "loss": 2.2627, "step": 1315 }, { "epoch": 0.28, "learning_rate": 0.0016829513705438118, "loss": 2.3848, "step": 1316 }, { "epoch": 0.28, "learning_rate": 0.0016824424885287222, "loss": 2.2891, "step": 1317 }, { "epoch": 0.28, "learning_rate": 0.0016819332755196247, "loss": 2.2891, "step": 1318 }, { "epoch": 0.28, "learning_rate": 0.0016814237317634949, "loss": 2.2539, "step": 1319 }, { "epoch": 0.28, "learning_rate": 0.001680913857507468, "loss": 2.3613, "step": 1320 }, { "epoch": 0.28, "learning_rate": 0.0016804036529988406, "loss": 2.291, "step": 1321 }, { "epoch": 0.28, "learning_rate": 0.0016798931184850683, "loss": 2.3848, "step": 1322 }, { "epoch": 0.28, "learning_rate": 0.0016793822542137682, "loss": 2.3945, "step": 1323 }, { "epoch": 0.28, "learning_rate": 0.0016788710604327157, "loss": 2.2432, "step": 1324 }, { "epoch": 0.28, "learning_rate": 0.0016783595373898474, "loss": 2.1543, "step": 1325 }, { "epoch": 0.29, "learning_rate": 0.0016778476853332588, "loss": 2.2295, "step": 1326 }, { "epoch": 0.29, "learning_rate": 0.001677335504511205, "loss": 2.2207, "step": 1327 }, { "epoch": 0.29, "learning_rate": 0.0016768229951721013, "loss": 2.2783, "step": 1328 }, { "epoch": 0.29, "learning_rate": 0.001676310157564521, "loss": 2.3262, "step": 1329 }, { "epoch": 0.29, "learning_rate": 0.0016757969919371982, "loss": 2.3594, "step": 1330 }, { "epoch": 0.29, "learning_rate": 0.0016752834985390246, "loss": 2.2646, "step": 1331 }, { "epoch": 0.29, "learning_rate": 0.001674769677619052, "loss": 2.1826, "step": 1332 }, { "epoch": 0.29, "learning_rate": 0.0016742555294264906, "loss": 2.2461, "step": 1333 }, { "epoch": 0.29, "learning_rate": 0.0016737410542107093, "loss": 2.3574, "step": 1334 }, { "epoch": 0.29, "learning_rate": 0.0016732262522212357, "loss": 2.2725, "step": 1335 }, { "epoch": 0.29, "learning_rate": 0.001672711123707756, "loss": 2.334, "step": 1336 }, { "epoch": 0.29, "learning_rate": 0.0016721956689201144, "loss": 2.2812, "step": 1337 }, { "epoch": 0.29, "learning_rate": 0.0016716798881083134, "loss": 2.2832, "step": 1338 }, { "epoch": 0.29, "learning_rate": 0.0016711637815225138, "loss": 2.293, "step": 1339 }, { "epoch": 0.29, "learning_rate": 0.001670647349413035, "loss": 2.2041, "step": 1340 }, { "epoch": 0.29, "learning_rate": 0.001670130592030353, "loss": 2.3877, "step": 1341 }, { "epoch": 0.29, "learning_rate": 0.0016696135096251029, "loss": 2.3223, "step": 1342 }, { "epoch": 0.29, "learning_rate": 0.001669096102448076, "loss": 2.2803, "step": 1343 }, { "epoch": 0.29, "learning_rate": 0.0016685783707502228, "loss": 2.3594, "step": 1344 }, { "epoch": 0.29, "learning_rate": 0.0016680603147826499, "loss": 2.4102, "step": 1345 }, { "epoch": 0.29, "learning_rate": 0.0016675419347966212, "loss": 2.3848, "step": 1346 }, { "epoch": 0.29, "learning_rate": 0.0016670232310435585, "loss": 2.2285, "step": 1347 }, { "epoch": 0.29, "learning_rate": 0.00166650420377504, "loss": 2.3721, "step": 1348 }, { "epoch": 0.29, "learning_rate": 0.001665984853242802, "loss": 2.4512, "step": 1349 }, { "epoch": 0.29, "learning_rate": 0.0016654651796987354, "loss": 2.2764, "step": 1350 }, { "epoch": 0.29, "learning_rate": 0.00166494518339489, "loss": 2.3672, "step": 1351 }, { "epoch": 0.29, "learning_rate": 0.0016644248645834706, "loss": 2.4082, "step": 1352 }, { "epoch": 0.29, "learning_rate": 0.0016639042235168388, "loss": 2.3535, "step": 1353 }, { "epoch": 0.29, "learning_rate": 0.0016633832604475136, "loss": 2.123, "step": 1354 }, { "epoch": 0.29, "learning_rate": 0.0016628619756281684, "loss": 2.2852, "step": 1355 }, { "epoch": 0.29, "learning_rate": 0.0016623403693116342, "loss": 2.2617, "step": 1356 }, { "epoch": 0.29, "learning_rate": 0.0016618184417508965, "loss": 2.2236, "step": 1357 }, { "epoch": 0.29, "learning_rate": 0.0016612961931990978, "loss": 2.3066, "step": 1358 }, { "epoch": 0.29, "learning_rate": 0.001660773623909536, "loss": 2.3398, "step": 1359 }, { "epoch": 0.29, "learning_rate": 0.0016602507341356638, "loss": 2.3281, "step": 1360 }, { "epoch": 0.29, "learning_rate": 0.0016597275241310907, "loss": 2.3281, "step": 1361 }, { "epoch": 0.29, "learning_rate": 0.0016592039941495803, "loss": 2.3594, "step": 1362 }, { "epoch": 0.29, "learning_rate": 0.0016586801444450519, "loss": 2.2842, "step": 1363 }, { "epoch": 0.29, "learning_rate": 0.0016581559752715798, "loss": 2.3418, "step": 1364 }, { "epoch": 0.29, "learning_rate": 0.0016576314868833933, "loss": 2.2939, "step": 1365 }, { "epoch": 0.29, "learning_rate": 0.0016571066795348765, "loss": 2.3311, "step": 1366 }, { "epoch": 0.29, "learning_rate": 0.001656581553480568, "loss": 2.335, "step": 1367 }, { "epoch": 0.29, "learning_rate": 0.0016560561089751622, "loss": 2.3262, "step": 1368 }, { "epoch": 0.29, "learning_rate": 0.0016555303462735056, "loss": 2.3613, "step": 1369 }, { "epoch": 0.29, "learning_rate": 0.0016550042656306011, "loss": 2.4004, "step": 1370 }, { "epoch": 0.29, "learning_rate": 0.0016544778673016044, "loss": 2.3027, "step": 1371 }, { "epoch": 0.29, "learning_rate": 0.0016539511515418265, "loss": 2.3398, "step": 1372 }, { "epoch": 0.3, "learning_rate": 0.0016534241186067318, "loss": 2.2988, "step": 1373 }, { "epoch": 0.3, "learning_rate": 0.0016528967687519382, "loss": 2.292, "step": 1374 }, { "epoch": 0.3, "learning_rate": 0.0016523691022332183, "loss": 2.3262, "step": 1375 }, { "epoch": 0.3, "learning_rate": 0.0016518411193064968, "loss": 2.293, "step": 1376 }, { "epoch": 0.3, "learning_rate": 0.0016513128202278531, "loss": 2.3633, "step": 1377 }, { "epoch": 0.3, "learning_rate": 0.0016507842052535199, "loss": 2.4082, "step": 1378 }, { "epoch": 0.3, "learning_rate": 0.0016502552746398816, "loss": 2.2939, "step": 1379 }, { "epoch": 0.3, "learning_rate": 0.001649726028643478, "loss": 2.3174, "step": 1380 }, { "epoch": 0.3, "learning_rate": 0.0016491964675210006, "loss": 2.3594, "step": 1381 }, { "epoch": 0.3, "learning_rate": 0.0016486665915292928, "loss": 2.3828, "step": 1382 }, { "epoch": 0.3, "learning_rate": 0.001648136400925353, "loss": 2.2607, "step": 1383 }, { "epoch": 0.3, "learning_rate": 0.0016476058959663303, "loss": 2.291, "step": 1384 }, { "epoch": 0.3, "learning_rate": 0.001647075076909527, "loss": 2.4414, "step": 1385 }, { "epoch": 0.3, "learning_rate": 0.0016465439440123974, "loss": 2.5391, "step": 1386 }, { "epoch": 0.3, "learning_rate": 0.001646012497532549, "loss": 2.2227, "step": 1387 }, { "epoch": 0.3, "learning_rate": 0.00164548073772774, "loss": 2.1562, "step": 1388 }, { "epoch": 0.3, "learning_rate": 0.0016449486648558813, "loss": 2.3047, "step": 1389 }, { "epoch": 0.3, "learning_rate": 0.001644416279175036, "loss": 2.2217, "step": 1390 }, { "epoch": 0.3, "learning_rate": 0.0016438835809434182, "loss": 2.3535, "step": 1391 }, { "epoch": 0.3, "learning_rate": 0.0016433505704193938, "loss": 2.3555, "step": 1392 }, { "epoch": 0.3, "learning_rate": 0.0016428172478614802, "loss": 2.3574, "step": 1393 }, { "epoch": 0.3, "learning_rate": 0.0016422836135283469, "loss": 2.4355, "step": 1394 }, { "epoch": 0.3, "learning_rate": 0.0016417496676788127, "loss": 2.4297, "step": 1395 }, { "epoch": 0.3, "learning_rate": 0.0016412154105718492, "loss": 2.2148, "step": 1396 }, { "epoch": 0.3, "learning_rate": 0.0016406808424665794, "loss": 2.3105, "step": 1397 }, { "epoch": 0.3, "learning_rate": 0.0016401459636222744, "loss": 2.2939, "step": 1398 }, { "epoch": 0.3, "learning_rate": 0.0016396107742983595, "loss": 2.1514, "step": 1399 }, { "epoch": 0.3, "learning_rate": 0.0016390752747544078, "loss": 2.4375, "step": 1400 }, { "epoch": 0.3, "learning_rate": 0.0016385394652501445, "loss": 2.2441, "step": 1401 }, { "epoch": 0.3, "learning_rate": 0.0016380033460454448, "loss": 2.4102, "step": 1402 }, { "epoch": 0.3, "learning_rate": 0.0016374669174003327, "loss": 2.3164, "step": 1403 }, { "epoch": 0.3, "learning_rate": 0.0016369301795749846, "loss": 2.1562, "step": 1404 }, { "epoch": 0.3, "learning_rate": 0.0016363931328297256, "loss": 2.377, "step": 1405 }, { "epoch": 0.3, "learning_rate": 0.0016358557774250305, "loss": 2.3008, "step": 1406 }, { "epoch": 0.3, "learning_rate": 0.0016353181136215245, "loss": 2.2803, "step": 1407 }, { "epoch": 0.3, "learning_rate": 0.0016347801416799813, "loss": 2.4619, "step": 1408 }, { "epoch": 0.3, "learning_rate": 0.001634241861861325, "loss": 2.2988, "step": 1409 }, { "epoch": 0.3, "learning_rate": 0.001633703274426629, "loss": 2.3281, "step": 1410 }, { "epoch": 0.3, "learning_rate": 0.0016331643796371152, "loss": 2.291, "step": 1411 }, { "epoch": 0.3, "learning_rate": 0.001632625177754155, "loss": 2.3135, "step": 1412 }, { "epoch": 0.3, "learning_rate": 0.0016320856690392694, "loss": 2.2832, "step": 1413 }, { "epoch": 0.3, "learning_rate": 0.001631545853754127, "loss": 2.3398, "step": 1414 }, { "epoch": 0.3, "learning_rate": 0.0016310057321605456, "loss": 2.3633, "step": 1415 }, { "epoch": 0.3, "learning_rate": 0.0016304653045204915, "loss": 2.3203, "step": 1416 }, { "epoch": 0.3, "learning_rate": 0.0016299245710960802, "loss": 2.3672, "step": 1417 }, { "epoch": 0.3, "learning_rate": 0.0016293835321495745, "loss": 2.3594, "step": 1418 }, { "epoch": 0.31, "learning_rate": 0.0016288421879433854, "loss": 2.4141, "step": 1419 }, { "epoch": 0.31, "learning_rate": 0.0016283005387400726, "loss": 2.168, "step": 1420 }, { "epoch": 0.31, "learning_rate": 0.0016277585848023435, "loss": 2.2109, "step": 1421 }, { "epoch": 0.31, "learning_rate": 0.0016272163263930533, "loss": 2.1592, "step": 1422 }, { "epoch": 0.31, "learning_rate": 0.0016266737637752045, "loss": 2.3545, "step": 1423 }, { "epoch": 0.31, "learning_rate": 0.0016261308972119475, "loss": 2.3457, "step": 1424 }, { "epoch": 0.31, "learning_rate": 0.0016255877269665802, "loss": 2.1572, "step": 1425 }, { "epoch": 0.31, "learning_rate": 0.0016250442533025477, "loss": 2.2549, "step": 1426 }, { "epoch": 0.31, "learning_rate": 0.0016245004764834422, "loss": 2.3496, "step": 1427 }, { "epoch": 0.31, "learning_rate": 0.0016239563967730027, "loss": 2.1836, "step": 1428 }, { "epoch": 0.31, "learning_rate": 0.0016234120144351155, "loss": 2.2988, "step": 1429 }, { "epoch": 0.31, "learning_rate": 0.0016228673297338137, "loss": 2.1562, "step": 1430 }, { "epoch": 0.31, "learning_rate": 0.0016223223429332764, "loss": 2.4746, "step": 1431 }, { "epoch": 0.31, "learning_rate": 0.0016217770542978306, "loss": 2.3701, "step": 1432 }, { "epoch": 0.31, "learning_rate": 0.001621231464091948, "loss": 2.3799, "step": 1433 }, { "epoch": 0.31, "learning_rate": 0.0016206855725802475, "loss": 2.5293, "step": 1434 }, { "epoch": 0.31, "learning_rate": 0.0016201393800274939, "loss": 2.1611, "step": 1435 }, { "epoch": 0.31, "learning_rate": 0.0016195928866985987, "loss": 2.3535, "step": 1436 }, { "epoch": 0.31, "learning_rate": 0.0016190460928586182, "loss": 2.4023, "step": 1437 }, { "epoch": 0.31, "learning_rate": 0.001618498998772755, "loss": 2.2637, "step": 1438 }, { "epoch": 0.31, "learning_rate": 0.0016179516047063575, "loss": 2.3594, "step": 1439 }, { "epoch": 0.31, "learning_rate": 0.001617403910924919, "loss": 2.3398, "step": 1440 }, { "epoch": 0.31, "learning_rate": 0.0016168559176940787, "loss": 2.0938, "step": 1441 }, { "epoch": 0.31, "learning_rate": 0.001616307625279621, "loss": 2.3076, "step": 1442 }, { "epoch": 0.31, "learning_rate": 0.0016157590339474753, "loss": 2.2695, "step": 1443 }, { "epoch": 0.31, "learning_rate": 0.0016152101439637155, "loss": 2.3252, "step": 1444 }, { "epoch": 0.31, "learning_rate": 0.0016146609555945613, "loss": 2.4453, "step": 1445 }, { "epoch": 0.31, "learning_rate": 0.0016141114691063762, "loss": 2.4199, "step": 1446 }, { "epoch": 0.31, "learning_rate": 0.0016135616847656694, "loss": 2.2988, "step": 1447 }, { "epoch": 0.31, "learning_rate": 0.001613011602839093, "loss": 2.3027, "step": 1448 }, { "epoch": 0.31, "learning_rate": 0.001612461223593445, "loss": 2.2354, "step": 1449 }, { "epoch": 0.31, "learning_rate": 0.0016119105472956665, "loss": 2.3906, "step": 1450 }, { "epoch": 0.31, "learning_rate": 0.0016113595742128432, "loss": 2.2461, "step": 1451 }, { "epoch": 0.31, "learning_rate": 0.0016108083046122053, "loss": 2.3555, "step": 1452 }, { "epoch": 0.31, "learning_rate": 0.001610256738761125, "loss": 2.1631, "step": 1453 }, { "epoch": 0.31, "learning_rate": 0.0016097048769271197, "loss": 2.334, "step": 1454 }, { "epoch": 0.31, "learning_rate": 0.0016091527193778507, "loss": 2.2432, "step": 1455 }, { "epoch": 0.31, "learning_rate": 0.001608600266381121, "loss": 2.3828, "step": 1456 }, { "epoch": 0.31, "learning_rate": 0.0016080475182048786, "loss": 2.2617, "step": 1457 }, { "epoch": 0.31, "learning_rate": 0.0016074944751172135, "loss": 2.3887, "step": 1458 }, { "epoch": 0.31, "learning_rate": 0.00160694113738636, "loss": 2.3154, "step": 1459 }, { "epoch": 0.31, "learning_rate": 0.0016063875052806937, "loss": 2.334, "step": 1460 }, { "epoch": 0.31, "learning_rate": 0.0016058335790687342, "loss": 2.1973, "step": 1461 }, { "epoch": 0.31, "learning_rate": 0.0016052793590191432, "loss": 2.2559, "step": 1462 }, { "epoch": 0.31, "learning_rate": 0.0016047248454007252, "loss": 2.293, "step": 1463 }, { "epoch": 0.31, "learning_rate": 0.001604170038482427, "loss": 2.3066, "step": 1464 }, { "epoch": 0.31, "learning_rate": 0.0016036149385333372, "loss": 2.2852, "step": 1465 }, { "epoch": 0.32, "learning_rate": 0.0016030595458226872, "loss": 2.3359, "step": 1466 }, { "epoch": 0.32, "learning_rate": 0.00160250386061985, "loss": 2.1914, "step": 1467 }, { "epoch": 0.32, "learning_rate": 0.0016019478831943408, "loss": 2.2354, "step": 1468 }, { "epoch": 0.32, "learning_rate": 0.0016013916138158163, "loss": 2.1914, "step": 1469 }, { "epoch": 0.32, "learning_rate": 0.0016008350527540744, "loss": 2.292, "step": 1470 }, { "epoch": 0.32, "learning_rate": 0.0016002782002790554, "loss": 2.3857, "step": 1471 }, { "epoch": 0.32, "learning_rate": 0.0015997210566608402, "loss": 2.0469, "step": 1472 }, { "epoch": 0.32, "learning_rate": 0.001599163622169651, "loss": 2.2822, "step": 1473 }, { "epoch": 0.32, "learning_rate": 0.0015986058970758514, "loss": 2.165, "step": 1474 }, { "epoch": 0.32, "learning_rate": 0.0015980478816499458, "loss": 2.3115, "step": 1475 }, { "epoch": 0.32, "learning_rate": 0.001597489576162579, "loss": 2.3418, "step": 1476 }, { "epoch": 0.32, "learning_rate": 0.001596930980884537, "loss": 2.4268, "step": 1477 }, { "epoch": 0.32, "learning_rate": 0.0015963720960867466, "loss": 2.2793, "step": 1478 }, { "epoch": 0.32, "learning_rate": 0.0015958129220402745, "loss": 1.9375, "step": 1479 }, { "epoch": 0.32, "learning_rate": 0.0015952534590163272, "loss": 2.3379, "step": 1480 }, { "epoch": 0.32, "learning_rate": 0.0015946937072862531, "loss": 2.2578, "step": 1481 }, { "epoch": 0.32, "learning_rate": 0.0015941336671215383, "loss": 2.291, "step": 1482 }, { "epoch": 0.32, "learning_rate": 0.0015935733387938105, "loss": 2.373, "step": 1483 }, { "epoch": 0.32, "learning_rate": 0.0015930127225748368, "loss": 2.2793, "step": 1484 }, { "epoch": 0.32, "learning_rate": 0.0015924518187365236, "loss": 2.3711, "step": 1485 }, { "epoch": 0.32, "learning_rate": 0.0015918906275509171, "loss": 2.3848, "step": 1486 }, { "epoch": 0.32, "learning_rate": 0.0015913291492902029, "loss": 2.3652, "step": 1487 }, { "epoch": 0.32, "learning_rate": 0.0015907673842267052, "loss": 2.4336, "step": 1488 }, { "epoch": 0.32, "learning_rate": 0.0015902053326328882, "loss": 2.1182, "step": 1489 }, { "epoch": 0.32, "learning_rate": 0.0015896429947813546, "loss": 2.334, "step": 1490 }, { "epoch": 0.32, "learning_rate": 0.0015890803709448461, "loss": 2.3018, "step": 1491 }, { "epoch": 0.32, "learning_rate": 0.0015885174613962426, "loss": 2.4688, "step": 1492 }, { "epoch": 0.32, "learning_rate": 0.0015879542664085633, "loss": 2.3438, "step": 1493 }, { "epoch": 0.32, "learning_rate": 0.0015873907862549654, "loss": 2.2529, "step": 1494 }, { "epoch": 0.32, "learning_rate": 0.001586827021208745, "loss": 2.2598, "step": 1495 }, { "epoch": 0.32, "learning_rate": 0.001586262971543335, "loss": 2.3086, "step": 1496 }, { "epoch": 0.32, "learning_rate": 0.0015856986375323086, "loss": 2.4336, "step": 1497 }, { "epoch": 0.32, "learning_rate": 0.0015851340194493742, "loss": 2.2891, "step": 1498 }, { "epoch": 0.32, "learning_rate": 0.00158456911756838, "loss": 2.4062, "step": 1499 }, { "epoch": 0.32, "learning_rate": 0.001584003932163311, "loss": 2.3281, "step": 1500 }, { "epoch": 0.32, "learning_rate": 0.00158343846350829, "loss": 2.4707, "step": 1501 }, { "epoch": 0.32, "learning_rate": 0.0015828727118775774, "loss": 2.3906, "step": 1502 }, { "epoch": 0.32, "learning_rate": 0.0015823066775455703, "loss": 2.21, "step": 1503 }, { "epoch": 0.32, "learning_rate": 0.0015817403607868027, "loss": 2.2441, "step": 1504 }, { "epoch": 0.32, "learning_rate": 0.0015811737618759468, "loss": 2.2402, "step": 1505 }, { "epoch": 0.32, "learning_rate": 0.0015806068810878102, "loss": 2.3145, "step": 1506 }, { "epoch": 0.32, "learning_rate": 0.0015800397186973383, "loss": 2.2354, "step": 1507 }, { "epoch": 0.32, "learning_rate": 0.0015794722749796124, "loss": 2.2822, "step": 1508 }, { "epoch": 0.32, "learning_rate": 0.0015789045502098507, "loss": 2.2734, "step": 1509 }, { "epoch": 0.32, "learning_rate": 0.001578336544663408, "loss": 2.3535, "step": 1510 }, { "epoch": 0.32, "learning_rate": 0.0015777682586157736, "loss": 2.207, "step": 1511 }, { "epoch": 0.33, "learning_rate": 0.001577199692342575, "loss": 2.293, "step": 1512 }, { "epoch": 0.33, "learning_rate": 0.0015766308461195747, "loss": 2.2617, "step": 1513 }, { "epoch": 0.33, "learning_rate": 0.0015760617202226705, "loss": 2.2559, "step": 1514 }, { "epoch": 0.33, "learning_rate": 0.0015754923149278969, "loss": 2.334, "step": 1515 }, { "epoch": 0.33, "learning_rate": 0.0015749226305114227, "loss": 2.208, "step": 1516 }, { "epoch": 0.33, "learning_rate": 0.0015743526672495527, "loss": 2.2783, "step": 1517 }, { "epoch": 0.33, "learning_rate": 0.0015737824254187275, "loss": 2.0928, "step": 1518 }, { "epoch": 0.33, "learning_rate": 0.0015732119052955214, "loss": 2.2891, "step": 1519 }, { "epoch": 0.33, "learning_rate": 0.0015726411071566454, "loss": 2.3613, "step": 1520 }, { "epoch": 0.33, "learning_rate": 0.001572070031278944, "loss": 2.3984, "step": 1521 }, { "epoch": 0.33, "learning_rate": 0.0015714986779393966, "loss": 2.3926, "step": 1522 }, { "epoch": 0.33, "learning_rate": 0.0015709270474151182, "loss": 2.167, "step": 1523 }, { "epoch": 0.33, "learning_rate": 0.0015703551399833572, "loss": 2.2314, "step": 1524 }, { "epoch": 0.33, "learning_rate": 0.0015697829559214959, "loss": 2.2578, "step": 1525 }, { "epoch": 0.33, "learning_rate": 0.0015692104955070524, "loss": 2.2617, "step": 1526 }, { "epoch": 0.33, "learning_rate": 0.0015686377590176772, "loss": 2.3223, "step": 1527 }, { "epoch": 0.33, "learning_rate": 0.0015680647467311557, "loss": 2.4434, "step": 1528 }, { "epoch": 0.33, "learning_rate": 0.001567491458925407, "loss": 2.249, "step": 1529 }, { "epoch": 0.33, "learning_rate": 0.001566917895878483, "loss": 2.2617, "step": 1530 }, { "epoch": 0.33, "learning_rate": 0.0015663440578685703, "loss": 2.4219, "step": 1531 }, { "epoch": 0.33, "learning_rate": 0.0015657699451739877, "loss": 2.2871, "step": 1532 }, { "epoch": 0.33, "learning_rate": 0.001565195558073188, "loss": 2.3574, "step": 1533 }, { "epoch": 0.33, "learning_rate": 0.0015646208968447567, "loss": 2.4316, "step": 1534 }, { "epoch": 0.33, "learning_rate": 0.001564045961767413, "loss": 2.3408, "step": 1535 }, { "epoch": 0.33, "learning_rate": 0.0015634707531200075, "loss": 2.1416, "step": 1536 }, { "epoch": 0.33, "learning_rate": 0.001562895271181525, "loss": 2.3418, "step": 1537 }, { "epoch": 0.33, "learning_rate": 0.0015623195162310815, "loss": 2.3105, "step": 1538 }, { "epoch": 0.33, "learning_rate": 0.0015617434885479267, "loss": 2.3389, "step": 1539 }, { "epoch": 0.33, "learning_rate": 0.0015611671884114419, "loss": 2.0625, "step": 1540 }, { "epoch": 0.33, "learning_rate": 0.0015605906161011399, "loss": 2.3203, "step": 1541 }, { "epoch": 0.33, "learning_rate": 0.001560013771896667, "loss": 2.1953, "step": 1542 }, { "epoch": 0.33, "learning_rate": 0.0015594366560778004, "loss": 2.2461, "step": 1543 }, { "epoch": 0.33, "learning_rate": 0.001558859268924449, "loss": 2.1865, "step": 1544 }, { "epoch": 0.33, "learning_rate": 0.0015582816107166538, "loss": 2.3594, "step": 1545 }, { "epoch": 0.33, "learning_rate": 0.0015577036817345869, "loss": 2.2383, "step": 1546 }, { "epoch": 0.33, "learning_rate": 0.0015571254822585514, "loss": 2.3184, "step": 1547 }, { "epoch": 0.33, "learning_rate": 0.0015565470125689829, "loss": 2.2695, "step": 1548 }, { "epoch": 0.33, "learning_rate": 0.0015559682729464463, "loss": 2.2539, "step": 1549 }, { "epoch": 0.33, "learning_rate": 0.0015553892636716387, "loss": 2.2666, "step": 1550 }, { "epoch": 0.33, "learning_rate": 0.0015548099850253875, "loss": 2.3809, "step": 1551 }, { "epoch": 0.33, "learning_rate": 0.0015542304372886508, "loss": 2.375, "step": 1552 }, { "epoch": 0.33, "learning_rate": 0.0015536506207425177, "loss": 2.3574, "step": 1553 }, { "epoch": 0.33, "learning_rate": 0.0015530705356682061, "loss": 2.332, "step": 1554 }, { "epoch": 0.33, "learning_rate": 0.0015524901823470666, "loss": 2.4414, "step": 1555 }, { "epoch": 0.33, "learning_rate": 0.0015519095610605773, "loss": 2.3008, "step": 1556 }, { "epoch": 0.33, "learning_rate": 0.0015513286720903485, "loss": 2.3164, "step": 1557 }, { "epoch": 0.33, "learning_rate": 0.0015507475157181187, "loss": 2.3105, "step": 1558 }, { "epoch": 0.34, "learning_rate": 0.0015501660922257572, "loss": 2.3047, "step": 1559 }, { "epoch": 0.34, "learning_rate": 0.0015495844018952622, "loss": 2.3965, "step": 1560 }, { "epoch": 0.34, "learning_rate": 0.0015490024450087617, "loss": 2.334, "step": 1561 }, { "epoch": 0.34, "learning_rate": 0.0015484202218485122, "loss": 2.2617, "step": 1562 }, { "epoch": 0.34, "learning_rate": 0.0015478377326969008, "loss": 2.3223, "step": 1563 }, { "epoch": 0.34, "learning_rate": 0.001547254977836442, "loss": 2.291, "step": 1564 }, { "epoch": 0.34, "learning_rate": 0.0015466719575497807, "loss": 2.3145, "step": 1565 }, { "epoch": 0.34, "learning_rate": 0.0015460886721196893, "loss": 2.1895, "step": 1566 }, { "epoch": 0.34, "learning_rate": 0.001545505121829069, "loss": 2.1533, "step": 1567 }, { "epoch": 0.34, "learning_rate": 0.00154492130696095, "loss": 2.377, "step": 1568 }, { "epoch": 0.34, "learning_rate": 0.0015443372277984912, "loss": 2.2441, "step": 1569 }, { "epoch": 0.34, "learning_rate": 0.001543752884624978, "loss": 2.3652, "step": 1570 }, { "epoch": 0.34, "learning_rate": 0.0015431682777238259, "loss": 2.207, "step": 1571 }, { "epoch": 0.34, "learning_rate": 0.0015425834073785761, "loss": 2.2734, "step": 1572 }, { "epoch": 0.34, "learning_rate": 0.0015419982738728996, "loss": 2.2939, "step": 1573 }, { "epoch": 0.34, "learning_rate": 0.0015414128774905943, "loss": 2.3066, "step": 1574 }, { "epoch": 0.34, "learning_rate": 0.001540827218515585, "loss": 2.3125, "step": 1575 }, { "epoch": 0.34, "learning_rate": 0.0015402412972319243, "loss": 2.4238, "step": 1576 }, { "epoch": 0.34, "learning_rate": 0.001539655113923793, "loss": 2.4219, "step": 1577 }, { "epoch": 0.34, "learning_rate": 0.001539068668875497, "loss": 2.2471, "step": 1578 }, { "epoch": 0.34, "learning_rate": 0.001538481962371471, "loss": 2.4629, "step": 1579 }, { "epoch": 0.34, "learning_rate": 0.0015378949946962754, "loss": 2.4043, "step": 1580 }, { "epoch": 0.34, "learning_rate": 0.0015373077661345975, "loss": 2.3096, "step": 1581 }, { "epoch": 0.34, "learning_rate": 0.001536720276971252, "loss": 2.1914, "step": 1582 }, { "epoch": 0.34, "learning_rate": 0.0015361325274911779, "loss": 2.3271, "step": 1583 }, { "epoch": 0.34, "learning_rate": 0.0015355445179794428, "loss": 2.418, "step": 1584 }, { "epoch": 0.34, "learning_rate": 0.0015349562487212394, "loss": 2.2754, "step": 1585 }, { "epoch": 0.34, "learning_rate": 0.0015343677200018858, "loss": 2.2754, "step": 1586 }, { "epoch": 0.34, "learning_rate": 0.001533778932106827, "loss": 2.3789, "step": 1587 }, { "epoch": 0.34, "learning_rate": 0.0015331898853216328, "loss": 2.1875, "step": 1588 }, { "epoch": 0.34, "learning_rate": 0.0015326005799319998, "loss": 2.1875, "step": 1589 }, { "epoch": 0.34, "learning_rate": 0.0015320110162237485, "loss": 2.0781, "step": 1590 }, { "epoch": 0.34, "learning_rate": 0.0015314211944828255, "loss": 2.3975, "step": 1591 }, { "epoch": 0.34, "learning_rate": 0.001530831114995303, "loss": 2.3535, "step": 1592 }, { "epoch": 0.34, "learning_rate": 0.0015302407780473772, "loss": 2.3643, "step": 1593 }, { "epoch": 0.34, "learning_rate": 0.0015296501839253697, "loss": 2.2715, "step": 1594 }, { "epoch": 0.34, "learning_rate": 0.001529059332915727, "loss": 2.3906, "step": 1595 }, { "epoch": 0.34, "learning_rate": 0.0015284682253050197, "loss": 2.4688, "step": 1596 }, { "epoch": 0.34, "learning_rate": 0.0015278768613799431, "loss": 2.333, "step": 1597 }, { "epoch": 0.34, "learning_rate": 0.0015272852414273173, "loss": 2.3457, "step": 1598 }, { "epoch": 0.34, "learning_rate": 0.001526693365734086, "loss": 2.4121, "step": 1599 }, { "epoch": 0.34, "learning_rate": 0.0015261012345873166, "loss": 2.3262, "step": 1600 }, { "epoch": 0.34, "learning_rate": 0.001525508848274201, "loss": 2.2109, "step": 1601 }, { "epoch": 0.34, "learning_rate": 0.0015249162070820554, "loss": 2.2285, "step": 1602 }, { "epoch": 0.34, "learning_rate": 0.0015243233112983182, "loss": 2.2988, "step": 1603 }, { "epoch": 0.34, "learning_rate": 0.0015237301612105521, "loss": 2.3066, "step": 1604 }, { "epoch": 0.35, "learning_rate": 0.0015231367571064433, "loss": 2.2314, "step": 1605 }, { "epoch": 0.35, "learning_rate": 0.001522543099273801, "loss": 2.2754, "step": 1606 }, { "epoch": 0.35, "learning_rate": 0.001521949188000557, "loss": 2.3477, "step": 1607 }, { "epoch": 0.35, "learning_rate": 0.0015213550235747672, "loss": 2.5, "step": 1608 }, { "epoch": 0.35, "learning_rate": 0.001520760606284609, "loss": 2.2393, "step": 1609 }, { "epoch": 0.35, "learning_rate": 0.001520165936418383, "loss": 2.3359, "step": 1610 }, { "epoch": 0.35, "learning_rate": 0.0015195710142645123, "loss": 2.2666, "step": 1611 }, { "epoch": 0.35, "learning_rate": 0.0015189758401115423, "loss": 2.2598, "step": 1612 }, { "epoch": 0.35, "learning_rate": 0.001518380414248141, "loss": 2.2559, "step": 1613 }, { "epoch": 0.35, "learning_rate": 0.0015177847369630979, "loss": 2.3008, "step": 1614 }, { "epoch": 0.35, "learning_rate": 0.0015171888085453245, "loss": 2.2422, "step": 1615 }, { "epoch": 0.35, "learning_rate": 0.0015165926292838546, "loss": 2.3516, "step": 1616 }, { "epoch": 0.35, "learning_rate": 0.001515996199467843, "loss": 2.2461, "step": 1617 }, { "epoch": 0.35, "learning_rate": 0.0015153995193865667, "loss": 2.292, "step": 1618 }, { "epoch": 0.35, "learning_rate": 0.001514802589329424, "loss": 2.4434, "step": 1619 }, { "epoch": 0.35, "learning_rate": 0.0015142054095859336, "loss": 2.2793, "step": 1620 }, { "epoch": 0.35, "learning_rate": 0.0015136079804457358, "loss": 2.3193, "step": 1621 }, { "epoch": 0.35, "learning_rate": 0.0015130103021985927, "loss": 2.4004, "step": 1622 }, { "epoch": 0.35, "learning_rate": 0.0015124123751343863, "loss": 2.1826, "step": 1623 }, { "epoch": 0.35, "learning_rate": 0.0015118141995431192, "loss": 2.373, "step": 1624 }, { "epoch": 0.35, "learning_rate": 0.0015112157757149146, "loss": 2.3096, "step": 1625 }, { "epoch": 0.35, "learning_rate": 0.0015106171039400169, "loss": 2.2646, "step": 1626 }, { "epoch": 0.35, "learning_rate": 0.0015100181845087897, "loss": 2.373, "step": 1627 }, { "epoch": 0.35, "learning_rate": 0.001509419017711717, "loss": 2.3418, "step": 1628 }, { "epoch": 0.35, "learning_rate": 0.0015088196038394036, "loss": 2.2188, "step": 1629 }, { "epoch": 0.35, "learning_rate": 0.0015082199431825734, "loss": 2.292, "step": 1630 }, { "epoch": 0.35, "learning_rate": 0.0015076200360320694, "loss": 2.2383, "step": 1631 }, { "epoch": 0.35, "learning_rate": 0.0015070198826788552, "loss": 2.2783, "step": 1632 }, { "epoch": 0.35, "learning_rate": 0.0015064194834140138, "loss": 2.3125, "step": 1633 }, { "epoch": 0.35, "learning_rate": 0.0015058188385287466, "loss": 2.2441, "step": 1634 }, { "epoch": 0.35, "learning_rate": 0.001505217948314375, "loss": 2.2197, "step": 1635 }, { "epoch": 0.35, "learning_rate": 0.0015046168130623389, "loss": 2.2168, "step": 1636 }, { "epoch": 0.35, "learning_rate": 0.0015040154330641972, "loss": 2.2412, "step": 1637 }, { "epoch": 0.35, "learning_rate": 0.0015034138086116272, "loss": 2.3223, "step": 1638 }, { "epoch": 0.35, "learning_rate": 0.0015028119399964255, "loss": 2.2793, "step": 1639 }, { "epoch": 0.35, "learning_rate": 0.0015022098275105065, "loss": 2.208, "step": 1640 }, { "epoch": 0.35, "learning_rate": 0.001501607471445903, "loss": 2.2881, "step": 1641 }, { "epoch": 0.35, "learning_rate": 0.0015010048720947658, "loss": 2.3955, "step": 1642 }, { "epoch": 0.35, "learning_rate": 0.001500402029749364, "loss": 2.2891, "step": 1643 }, { "epoch": 0.35, "learning_rate": 0.0014997989447020853, "loss": 2.3008, "step": 1644 }, { "epoch": 0.35, "learning_rate": 0.0014991956172454328, "loss": 2.1494, "step": 1645 }, { "epoch": 0.35, "learning_rate": 0.0014985920476720295, "loss": 2.1416, "step": 1646 }, { "epoch": 0.35, "learning_rate": 0.001497988236274615, "loss": 2.3145, "step": 1647 }, { "epoch": 0.35, "learning_rate": 0.0014973841833460456, "loss": 2.4375, "step": 1648 }, { "epoch": 0.35, "learning_rate": 0.0014967798891792957, "loss": 2.2031, "step": 1649 }, { "epoch": 0.35, "learning_rate": 0.001496175354067456, "loss": 2.3867, "step": 1650 }, { "epoch": 0.35, "learning_rate": 0.001495570578303735, "loss": 2.2881, "step": 1651 }, { "epoch": 0.36, "learning_rate": 0.0014949655621814566, "loss": 2.2412, "step": 1652 }, { "epoch": 0.36, "learning_rate": 0.0014943603059940623, "loss": 2.3066, "step": 1653 }, { "epoch": 0.36, "learning_rate": 0.0014937548100351094, "loss": 2.2432, "step": 1654 }, { "epoch": 0.36, "learning_rate": 0.0014931490745982718, "loss": 2.1338, "step": 1655 }, { "epoch": 0.36, "learning_rate": 0.0014925430999773402, "loss": 2.3672, "step": 1656 }, { "epoch": 0.36, "learning_rate": 0.00149193688646622, "loss": 2.292, "step": 1657 }, { "epoch": 0.36, "learning_rate": 0.001491330434358933, "loss": 2.2803, "step": 1658 }, { "epoch": 0.36, "learning_rate": 0.0014907237439496172, "loss": 2.3594, "step": 1659 }, { "epoch": 0.36, "learning_rate": 0.0014901168155325255, "loss": 2.3828, "step": 1660 }, { "epoch": 0.36, "learning_rate": 0.0014895096494020274, "loss": 2.2939, "step": 1661 }, { "epoch": 0.36, "learning_rate": 0.0014889022458526053, "loss": 2.2441, "step": 1662 }, { "epoch": 0.36, "learning_rate": 0.0014882946051788595, "loss": 2.3223, "step": 1663 }, { "epoch": 0.36, "learning_rate": 0.001487686727675504, "loss": 2.3975, "step": 1664 }, { "epoch": 0.36, "learning_rate": 0.001487078613637367, "loss": 2.3525, "step": 1665 }, { "epoch": 0.36, "learning_rate": 0.0014864702633593928, "loss": 2.2783, "step": 1666 }, { "epoch": 0.36, "learning_rate": 0.0014858616771366397, "loss": 2.2969, "step": 1667 }, { "epoch": 0.36, "learning_rate": 0.0014852528552642802, "loss": 2.3145, "step": 1668 }, { "epoch": 0.36, "learning_rate": 0.0014846437980376016, "loss": 2.3242, "step": 1669 }, { "epoch": 0.36, "learning_rate": 0.0014840345057520045, "loss": 2.2949, "step": 1670 }, { "epoch": 0.36, "learning_rate": 0.0014834249787030044, "loss": 2.2031, "step": 1671 }, { "epoch": 0.36, "learning_rate": 0.0014828152171862303, "loss": 2.3184, "step": 1672 }, { "epoch": 0.36, "learning_rate": 0.0014822052214974247, "loss": 2.2373, "step": 1673 }, { "epoch": 0.36, "learning_rate": 0.0014815949919324443, "loss": 2.3096, "step": 1674 }, { "epoch": 0.36, "learning_rate": 0.0014809845287872588, "loss": 2.2852, "step": 1675 }, { "epoch": 0.36, "learning_rate": 0.0014803738323579507, "loss": 2.2598, "step": 1676 }, { "epoch": 0.36, "learning_rate": 0.001479762902940717, "loss": 2.3789, "step": 1677 }, { "epoch": 0.36, "learning_rate": 0.001479151740831866, "loss": 2.3135, "step": 1678 }, { "epoch": 0.36, "learning_rate": 0.0014785403463278203, "loss": 2.3008, "step": 1679 }, { "epoch": 0.36, "learning_rate": 0.0014779287197251147, "loss": 2.2129, "step": 1680 }, { "epoch": 0.36, "learning_rate": 0.0014773168613203965, "loss": 2.2842, "step": 1681 }, { "epoch": 0.36, "learning_rate": 0.0014767047714104248, "loss": 2.3477, "step": 1682 }, { "epoch": 0.36, "learning_rate": 0.0014760924502920728, "loss": 2.332, "step": 1683 }, { "epoch": 0.36, "learning_rate": 0.0014754798982623237, "loss": 2.3613, "step": 1684 }, { "epoch": 0.36, "learning_rate": 0.0014748671156182742, "loss": 2.3105, "step": 1685 }, { "epoch": 0.36, "learning_rate": 0.0014742541026571319, "loss": 2.1426, "step": 1686 }, { "epoch": 0.36, "learning_rate": 0.001473640859676217, "loss": 2.2812, "step": 1687 }, { "epoch": 0.36, "learning_rate": 0.0014730273869729606, "loss": 2.2861, "step": 1688 }, { "epoch": 0.36, "learning_rate": 0.0014724136848449053, "loss": 2.2617, "step": 1689 }, { "epoch": 0.36, "learning_rate": 0.001471799753589705, "loss": 2.3594, "step": 1690 }, { "epoch": 0.36, "learning_rate": 0.001471185593505125, "loss": 2.2266, "step": 1691 }, { "epoch": 0.36, "learning_rate": 0.0014705712048890417, "loss": 2.3457, "step": 1692 }, { "epoch": 0.36, "learning_rate": 0.0014699565880394415, "loss": 2.3223, "step": 1693 }, { "epoch": 0.36, "learning_rate": 0.001469341743254422, "loss": 2.4219, "step": 1694 }, { "epoch": 0.36, "learning_rate": 0.001468726670832192, "loss": 2.1914, "step": 1695 }, { "epoch": 0.36, "learning_rate": 0.00146811137107107, "loss": 2.2461, "step": 1696 }, { "epoch": 0.36, "learning_rate": 0.0014674958442694838, "loss": 2.3203, "step": 1697 }, { "epoch": 0.37, "learning_rate": 0.0014668800907259739, "loss": 2.3271, "step": 1698 }, { "epoch": 0.37, "learning_rate": 0.0014662641107391883, "loss": 2.1738, "step": 1699 }, { "epoch": 0.37, "learning_rate": 0.001465647904607886, "loss": 2.3691, "step": 1700 }, { "epoch": 0.37, "learning_rate": 0.0014650314726309356, "loss": 2.2793, "step": 1701 }, { "epoch": 0.37, "learning_rate": 0.0014644148151073148, "loss": 2.375, "step": 1702 }, { "epoch": 0.37, "learning_rate": 0.0014637979323361113, "loss": 2.248, "step": 1703 }, { "epoch": 0.37, "learning_rate": 0.0014631808246165217, "loss": 2.0879, "step": 1704 }, { "epoch": 0.37, "learning_rate": 0.0014625634922478517, "loss": 2.2949, "step": 1705 }, { "epoch": 0.37, "learning_rate": 0.0014619459355295157, "loss": 2.1523, "step": 1706 }, { "epoch": 0.37, "learning_rate": 0.0014613281547610376, "loss": 2.3457, "step": 1707 }, { "epoch": 0.37, "learning_rate": 0.0014607101502420493, "loss": 2.2656, "step": 1708 }, { "epoch": 0.37, "learning_rate": 0.0014600919222722921, "loss": 2.2627, "step": 1709 }, { "epoch": 0.37, "learning_rate": 0.0014594734711516142, "loss": 2.2295, "step": 1710 }, { "epoch": 0.37, "learning_rate": 0.0014588547971799734, "loss": 2.4043, "step": 1711 }, { "epoch": 0.37, "learning_rate": 0.0014582359006574356, "loss": 2.2598, "step": 1712 }, { "epoch": 0.37, "learning_rate": 0.001457616781884173, "loss": 2.252, "step": 1713 }, { "epoch": 0.37, "learning_rate": 0.0014569974411604676, "loss": 2.2803, "step": 1714 }, { "epoch": 0.37, "learning_rate": 0.0014563778787867077, "loss": 2.4004, "step": 1715 }, { "epoch": 0.37, "learning_rate": 0.00145575809506339, "loss": 2.3047, "step": 1716 }, { "epoch": 0.37, "learning_rate": 0.001455138090291118, "loss": 2.3496, "step": 1717 }, { "epoch": 0.37, "learning_rate": 0.0014545178647706024, "loss": 2.3408, "step": 1718 }, { "epoch": 0.37, "learning_rate": 0.0014538974188026612, "loss": 2.3887, "step": 1719 }, { "epoch": 0.37, "learning_rate": 0.0014532767526882194, "loss": 2.2793, "step": 1720 }, { "epoch": 0.37, "learning_rate": 0.0014526558667283082, "loss": 2.2246, "step": 1721 }, { "epoch": 0.37, "learning_rate": 0.0014520347612240663, "loss": 2.2363, "step": 1722 }, { "epoch": 0.37, "learning_rate": 0.0014514134364767384, "loss": 2.3633, "step": 1723 }, { "epoch": 0.37, "learning_rate": 0.001450791892787675, "loss": 2.0947, "step": 1724 }, { "epoch": 0.37, "learning_rate": 0.0014501701304583342, "loss": 2.252, "step": 1725 }, { "epoch": 0.37, "learning_rate": 0.0014495481497902786, "loss": 2.1758, "step": 1726 }, { "epoch": 0.37, "learning_rate": 0.0014489259510851778, "loss": 2.3789, "step": 1727 }, { "epoch": 0.37, "learning_rate": 0.0014483035346448065, "loss": 2.3701, "step": 1728 }, { "epoch": 0.37, "learning_rate": 0.0014476809007710454, "loss": 2.1953, "step": 1729 }, { "epoch": 0.37, "learning_rate": 0.0014470580497658804, "loss": 2.2783, "step": 1730 }, { "epoch": 0.37, "learning_rate": 0.0014464349819314029, "loss": 2.3105, "step": 1731 }, { "epoch": 0.37, "learning_rate": 0.001445811697569809, "loss": 2.2012, "step": 1732 }, { "epoch": 0.37, "learning_rate": 0.0014451881969834014, "loss": 2.252, "step": 1733 }, { "epoch": 0.37, "learning_rate": 0.0014445644804745848, "loss": 2.1084, "step": 1734 }, { "epoch": 0.37, "learning_rate": 0.0014439405483458714, "loss": 2.293, "step": 1735 }, { "epoch": 0.37, "learning_rate": 0.0014433164008998767, "loss": 2.2549, "step": 1736 }, { "epoch": 0.37, "learning_rate": 0.0014426920384393205, "loss": 2.1807, "step": 1737 }, { "epoch": 0.37, "learning_rate": 0.0014420674612670274, "loss": 2.1328, "step": 1738 }, { "epoch": 0.37, "learning_rate": 0.0014414426696859259, "loss": 2.3262, "step": 1739 }, { "epoch": 0.37, "learning_rate": 0.001440817663999049, "loss": 2.4092, "step": 1740 }, { "epoch": 0.37, "learning_rate": 0.0014401924445095323, "loss": 2.208, "step": 1741 }, { "epoch": 0.37, "learning_rate": 0.001439567011520616, "loss": 2.3643, "step": 1742 }, { "epoch": 0.37, "learning_rate": 0.0014389413653356443, "loss": 2.3447, "step": 1743 }, { "epoch": 0.37, "learning_rate": 0.0014383155062580636, "loss": 2.2646, "step": 1744 }, { "epoch": 0.38, "learning_rate": 0.0014376894345914243, "loss": 2.2227, "step": 1745 }, { "epoch": 0.38, "learning_rate": 0.0014370631506393798, "loss": 2.2793, "step": 1746 }, { "epoch": 0.38, "learning_rate": 0.0014364366547056872, "loss": 2.2324, "step": 1747 }, { "epoch": 0.38, "learning_rate": 0.0014358099470942042, "loss": 2.3184, "step": 1748 }, { "epoch": 0.38, "learning_rate": 0.0014351830281088937, "loss": 2.3711, "step": 1749 }, { "epoch": 0.38, "learning_rate": 0.0014345558980538198, "loss": 2.3398, "step": 1750 }, { "epoch": 0.38, "learning_rate": 0.0014339285572331493, "loss": 2.3428, "step": 1751 }, { "epoch": 0.38, "learning_rate": 0.0014333010059511505, "loss": 2.2773, "step": 1752 }, { "epoch": 0.38, "learning_rate": 0.0014326732445121952, "loss": 2.3672, "step": 1753 }, { "epoch": 0.38, "learning_rate": 0.0014320452732207562, "loss": 2.252, "step": 1754 }, { "epoch": 0.38, "learning_rate": 0.001431417092381408, "loss": 2.2363, "step": 1755 }, { "epoch": 0.38, "learning_rate": 0.0014307887022988268, "loss": 2.2002, "step": 1756 }, { "epoch": 0.38, "learning_rate": 0.0014301601032777912, "loss": 2.3926, "step": 1757 }, { "epoch": 0.38, "learning_rate": 0.0014295312956231795, "loss": 2.252, "step": 1758 }, { "epoch": 0.38, "learning_rate": 0.001428902279639973, "loss": 2.1924, "step": 1759 }, { "epoch": 0.38, "learning_rate": 0.0014282730556332522, "loss": 2.2598, "step": 1760 }, { "epoch": 0.38, "learning_rate": 0.0014276436239081996, "loss": 2.3516, "step": 1761 }, { "epoch": 0.38, "learning_rate": 0.0014270139847700986, "loss": 2.1875, "step": 1762 }, { "epoch": 0.38, "learning_rate": 0.0014263841385243326, "loss": 2.1934, "step": 1763 }, { "epoch": 0.38, "learning_rate": 0.001425754085476386, "loss": 2.2197, "step": 1764 }, { "epoch": 0.38, "learning_rate": 0.0014251238259318428, "loss": 2.3965, "step": 1765 }, { "epoch": 0.38, "learning_rate": 0.0014244933601963879, "loss": 2.3135, "step": 1766 }, { "epoch": 0.38, "learning_rate": 0.0014238626885758057, "loss": 2.2031, "step": 1767 }, { "epoch": 0.38, "learning_rate": 0.0014232318113759808, "loss": 2.0117, "step": 1768 }, { "epoch": 0.38, "learning_rate": 0.001422600728902897, "loss": 2.1357, "step": 1769 }, { "epoch": 0.38, "learning_rate": 0.0014219694414626383, "loss": 2.1807, "step": 1770 }, { "epoch": 0.38, "learning_rate": 0.0014213379493613878, "loss": 2.3359, "step": 1771 }, { "epoch": 0.38, "learning_rate": 0.0014207062529054276, "loss": 2.2568, "step": 1772 }, { "epoch": 0.38, "learning_rate": 0.0014200743524011394, "loss": 2.2871, "step": 1773 }, { "epoch": 0.38, "learning_rate": 0.0014194422481550036, "loss": 2.3291, "step": 1774 }, { "epoch": 0.38, "learning_rate": 0.0014188099404735994, "loss": 2.2471, "step": 1775 }, { "epoch": 0.38, "learning_rate": 0.0014181774296636047, "loss": 2.1357, "step": 1776 }, { "epoch": 0.38, "learning_rate": 0.0014175447160317965, "loss": 2.2461, "step": 1777 }, { "epoch": 0.38, "learning_rate": 0.001416911799885049, "loss": 2.2324, "step": 1778 }, { "epoch": 0.38, "learning_rate": 0.001416278681530335, "loss": 2.2744, "step": 1779 }, { "epoch": 0.38, "learning_rate": 0.0014156453612747262, "loss": 2.1211, "step": 1780 }, { "epoch": 0.38, "learning_rate": 0.0014150118394253921, "loss": 2.2832, "step": 1781 }, { "epoch": 0.38, "learning_rate": 0.0014143781162895984, "loss": 2.127, "step": 1782 }, { "epoch": 0.38, "learning_rate": 0.0014137441921747104, "loss": 2.3623, "step": 1783 }, { "epoch": 0.38, "learning_rate": 0.0014131100673881894, "loss": 2.1465, "step": 1784 }, { "epoch": 0.38, "learning_rate": 0.0014124757422375946, "loss": 2.2607, "step": 1785 }, { "epoch": 0.38, "learning_rate": 0.0014118412170305837, "loss": 2.2344, "step": 1786 }, { "epoch": 0.38, "learning_rate": 0.0014112064920749083, "loss": 2.2734, "step": 1787 }, { "epoch": 0.38, "learning_rate": 0.0014105715676784208, "loss": 2.291, "step": 1788 }, { "epoch": 0.38, "learning_rate": 0.0014099364441490664, "loss": 2.2812, "step": 1789 }, { "epoch": 0.38, "learning_rate": 0.0014093011217948895, "loss": 2.3867, "step": 1790 }, { "epoch": 0.39, "learning_rate": 0.0014086656009240307, "loss": 2.2402, "step": 1791 }, { "epoch": 0.39, "learning_rate": 0.0014080298818447256, "loss": 2.2988, "step": 1792 }, { "epoch": 0.39, "learning_rate": 0.001407393964865307, "loss": 2.3936, "step": 1793 }, { "epoch": 0.39, "learning_rate": 0.0014067578502942038, "loss": 2.1201, "step": 1794 }, { "epoch": 0.39, "learning_rate": 0.00140612153843994, "loss": 2.1533, "step": 1795 }, { "epoch": 0.39, "learning_rate": 0.0014054850296111353, "loss": 2.2266, "step": 1796 }, { "epoch": 0.39, "learning_rate": 0.0014048483241165053, "loss": 2.1055, "step": 1797 }, { "epoch": 0.39, "learning_rate": 0.0014042114222648614, "loss": 2.1689, "step": 1798 }, { "epoch": 0.39, "learning_rate": 0.0014035743243651097, "loss": 2.375, "step": 1799 }, { "epoch": 0.39, "learning_rate": 0.001402937030726251, "loss": 2.3574, "step": 1800 }, { "epoch": 0.39, "learning_rate": 0.0014022995416573818, "loss": 2.2959, "step": 1801 }, { "epoch": 0.39, "learning_rate": 0.0014016618574676934, "loss": 2.3398, "step": 1802 }, { "epoch": 0.39, "learning_rate": 0.0014010239784664708, "loss": 2.3906, "step": 1803 }, { "epoch": 0.39, "learning_rate": 0.0014003859049630943, "loss": 2.291, "step": 1804 }, { "epoch": 0.39, "learning_rate": 0.0013997476372670386, "loss": 2.1484, "step": 1805 }, { "epoch": 0.39, "learning_rate": 0.001399109175687872, "loss": 2.3408, "step": 1806 }, { "epoch": 0.39, "learning_rate": 0.0013984705205352574, "loss": 2.1855, "step": 1807 }, { "epoch": 0.39, "learning_rate": 0.0013978316721189512, "loss": 2.3223, "step": 1808 }, { "epoch": 0.39, "learning_rate": 0.0013971926307488039, "loss": 2.2314, "step": 1809 }, { "epoch": 0.39, "learning_rate": 0.001396553396734759, "loss": 2.2178, "step": 1810 }, { "epoch": 0.39, "learning_rate": 0.001395913970386854, "loss": 2.3486, "step": 1811 }, { "epoch": 0.39, "learning_rate": 0.0013952743520152199, "loss": 2.2246, "step": 1812 }, { "epoch": 0.39, "learning_rate": 0.0013946345419300794, "loss": 2.1797, "step": 1813 }, { "epoch": 0.39, "learning_rate": 0.00139399454044175, "loss": 2.2773, "step": 1814 }, { "epoch": 0.39, "learning_rate": 0.0013933543478606412, "loss": 2.165, "step": 1815 }, { "epoch": 0.39, "learning_rate": 0.0013927139644972551, "loss": 2.3477, "step": 1816 }, { "epoch": 0.39, "learning_rate": 0.0013920733906621862, "loss": 2.4424, "step": 1817 }, { "epoch": 0.39, "learning_rate": 0.0013914326266661222, "loss": 2.2012, "step": 1818 }, { "epoch": 0.39, "learning_rate": 0.0013907916728198417, "loss": 2.1953, "step": 1819 }, { "epoch": 0.39, "learning_rate": 0.001390150529434217, "loss": 2.3467, "step": 1820 }, { "epoch": 0.39, "learning_rate": 0.0013895091968202107, "loss": 2.2607, "step": 1821 }, { "epoch": 0.39, "learning_rate": 0.0013888676752888786, "loss": 2.165, "step": 1822 }, { "epoch": 0.39, "learning_rate": 0.0013882259651513671, "loss": 2.2402, "step": 1823 }, { "epoch": 0.39, "learning_rate": 0.0013875840667189143, "loss": 2.167, "step": 1824 }, { "epoch": 0.39, "learning_rate": 0.0013869419803028502, "loss": 2.0596, "step": 1825 }, { "epoch": 0.39, "learning_rate": 0.0013862997062145954, "loss": 2.3945, "step": 1826 }, { "epoch": 0.39, "learning_rate": 0.0013856572447656617, "loss": 2.21, "step": 1827 }, { "epoch": 0.39, "learning_rate": 0.0013850145962676517, "loss": 2.1885, "step": 1828 }, { "epoch": 0.39, "learning_rate": 0.0013843717610322588, "loss": 2.2441, "step": 1829 }, { "epoch": 0.39, "learning_rate": 0.0013837287393712666, "loss": 2.3066, "step": 1830 }, { "epoch": 0.39, "learning_rate": 0.0013830855315965502, "loss": 2.2979, "step": 1831 }, { "epoch": 0.39, "learning_rate": 0.0013824421380200739, "loss": 2.2871, "step": 1832 }, { "epoch": 0.39, "learning_rate": 0.001381798558953892, "loss": 2.2041, "step": 1833 }, { "epoch": 0.39, "learning_rate": 0.0013811547947101496, "loss": 2.2725, "step": 1834 }, { "epoch": 0.39, "learning_rate": 0.001380510845601081, "loss": 2.2109, "step": 1835 }, { "epoch": 0.39, "learning_rate": 0.0013798667119390112, "loss": 2.3828, "step": 1836 }, { "epoch": 0.39, "learning_rate": 0.0013792223940363529, "loss": 2.2832, "step": 1837 }, { "epoch": 0.4, "learning_rate": 0.0013785778922056095, "loss": 2.3574, "step": 1838 }, { "epoch": 0.4, "learning_rate": 0.0013779332067593738, "loss": 2.1885, "step": 1839 }, { "epoch": 0.4, "learning_rate": 0.001377288338010326, "loss": 2.2715, "step": 1840 }, { "epoch": 0.4, "learning_rate": 0.0013766432862712375, "loss": 2.373, "step": 1841 }, { "epoch": 0.4, "learning_rate": 0.0013759980518549663, "loss": 2.2314, "step": 1842 }, { "epoch": 0.4, "learning_rate": 0.001375352635074461, "loss": 2.2236, "step": 1843 }, { "epoch": 0.4, "learning_rate": 0.0013747070362427568, "loss": 2.2637, "step": 1844 }, { "epoch": 0.4, "learning_rate": 0.001374061255672978, "loss": 2.4004, "step": 1845 }, { "epoch": 0.4, "learning_rate": 0.0013734152936783375, "loss": 2.4727, "step": 1846 }, { "epoch": 0.4, "learning_rate": 0.0013727691505721357, "loss": 2.0674, "step": 1847 }, { "epoch": 0.4, "learning_rate": 0.0013721228266677609, "loss": 2.2139, "step": 1848 }, { "epoch": 0.4, "learning_rate": 0.001371476322278689, "loss": 2.3154, "step": 1849 }, { "epoch": 0.4, "learning_rate": 0.0013708296377184835, "loss": 2.1963, "step": 1850 }, { "epoch": 0.4, "learning_rate": 0.0013701827733007953, "loss": 2.376, "step": 1851 }, { "epoch": 0.4, "learning_rate": 0.0013695357293393626, "loss": 2.2734, "step": 1852 }, { "epoch": 0.4, "learning_rate": 0.001368888506148011, "loss": 2.2451, "step": 1853 }, { "epoch": 0.4, "learning_rate": 0.001368241104040652, "loss": 2.2041, "step": 1854 }, { "epoch": 0.4, "learning_rate": 0.001367593523331285, "loss": 2.3457, "step": 1855 }, { "epoch": 0.4, "learning_rate": 0.0013669457643339955, "loss": 2.2178, "step": 1856 }, { "epoch": 0.4, "learning_rate": 0.0013662978273629553, "loss": 2.2539, "step": 1857 }, { "epoch": 0.4, "learning_rate": 0.0013656497127324232, "loss": 2.3516, "step": 1858 }, { "epoch": 0.4, "learning_rate": 0.0013650014207567433, "loss": 2.25, "step": 1859 }, { "epoch": 0.4, "learning_rate": 0.0013643529517503464, "loss": 2.1582, "step": 1860 }, { "epoch": 0.4, "learning_rate": 0.0013637043060277486, "loss": 2.2246, "step": 1861 }, { "epoch": 0.4, "learning_rate": 0.001363055483903552, "loss": 2.4199, "step": 1862 }, { "epoch": 0.4, "learning_rate": 0.001362406485692445, "loss": 2.2334, "step": 1863 }, { "epoch": 0.4, "learning_rate": 0.0013617573117092, "loss": 2.3555, "step": 1864 }, { "epoch": 0.4, "learning_rate": 0.0013611079622686752, "loss": 2.2715, "step": 1865 }, { "epoch": 0.4, "learning_rate": 0.0013604584376858144, "loss": 2.2266, "step": 1866 }, { "epoch": 0.4, "learning_rate": 0.001359808738275646, "loss": 2.3115, "step": 1867 }, { "epoch": 0.4, "learning_rate": 0.001359158864353283, "loss": 2.2646, "step": 1868 }, { "epoch": 0.4, "learning_rate": 0.0013585088162339231, "loss": 2.1865, "step": 1869 }, { "epoch": 0.4, "learning_rate": 0.0013578585942328489, "loss": 2.2422, "step": 1870 }, { "epoch": 0.4, "learning_rate": 0.001357208198665427, "loss": 2.2402, "step": 1871 }, { "epoch": 0.4, "learning_rate": 0.0013565576298471076, "loss": 2.2617, "step": 1872 }, { "epoch": 0.4, "learning_rate": 0.0013559068880934265, "loss": 2.1621, "step": 1873 }, { "epoch": 0.4, "learning_rate": 0.0013552559737200016, "loss": 2.252, "step": 1874 }, { "epoch": 0.4, "learning_rate": 0.0013546048870425357, "loss": 2.2002, "step": 1875 }, { "epoch": 0.4, "learning_rate": 0.0013539536283768147, "loss": 2.2754, "step": 1876 }, { "epoch": 0.4, "learning_rate": 0.0013533021980387083, "loss": 2.2178, "step": 1877 }, { "epoch": 0.4, "learning_rate": 0.0013526505963441689, "loss": 2.2861, "step": 1878 }, { "epoch": 0.4, "learning_rate": 0.0013519988236092321, "loss": 2.1582, "step": 1879 }, { "epoch": 0.4, "learning_rate": 0.0013513468801500173, "loss": 2.3926, "step": 1880 }, { "epoch": 0.4, "learning_rate": 0.0013506947662827256, "loss": 2.1133, "step": 1881 }, { "epoch": 0.4, "learning_rate": 0.0013500424823236412, "loss": 2.0146, "step": 1882 }, { "epoch": 0.4, "learning_rate": 0.0013493900285891306, "loss": 2.2676, "step": 1883 }, { "epoch": 0.41, "learning_rate": 0.0013487374053956437, "loss": 2.168, "step": 1884 }, { "epoch": 0.41, "learning_rate": 0.0013480846130597111, "loss": 2.1074, "step": 1885 }, { "epoch": 0.41, "learning_rate": 0.0013474316518979458, "loss": 2.2725, "step": 1886 }, { "epoch": 0.41, "learning_rate": 0.0013467785222270434, "loss": 2.3184, "step": 1887 }, { "epoch": 0.41, "learning_rate": 0.0013461252243637811, "loss": 2.1973, "step": 1888 }, { "epoch": 0.41, "learning_rate": 0.0013454717586250167, "loss": 2.2637, "step": 1889 }, { "epoch": 0.41, "learning_rate": 0.0013448181253276903, "loss": 2.1875, "step": 1890 }, { "epoch": 0.41, "learning_rate": 0.0013441643247888233, "loss": 2.2871, "step": 1891 }, { "epoch": 0.41, "learning_rate": 0.001343510357325518, "loss": 2.2012, "step": 1892 }, { "epoch": 0.41, "learning_rate": 0.0013428562232549565, "loss": 2.2803, "step": 1893 }, { "epoch": 0.41, "learning_rate": 0.0013422019228944045, "loss": 2.3281, "step": 1894 }, { "epoch": 0.41, "learning_rate": 0.0013415474565612057, "loss": 2.2441, "step": 1895 }, { "epoch": 0.41, "learning_rate": 0.0013408928245727857, "loss": 2.3525, "step": 1896 }, { "epoch": 0.41, "learning_rate": 0.0013402380272466497, "loss": 2.2734, "step": 1897 }, { "epoch": 0.41, "learning_rate": 0.0013395830649003836, "loss": 2.1631, "step": 1898 }, { "epoch": 0.41, "learning_rate": 0.001338927937851653, "loss": 2.2451, "step": 1899 }, { "epoch": 0.41, "learning_rate": 0.0013382726464182038, "loss": 2.2012, "step": 1900 }, { "epoch": 0.41, "learning_rate": 0.0013376171909178613, "loss": 2.2451, "step": 1901 }, { "epoch": 0.41, "learning_rate": 0.0013369615716685304, "loss": 2.2119, "step": 1902 }, { "epoch": 0.41, "learning_rate": 0.0013363057889881954, "loss": 2.1064, "step": 1903 }, { "epoch": 0.41, "learning_rate": 0.00133564984319492, "loss": 2.3037, "step": 1904 }, { "epoch": 0.41, "learning_rate": 0.001334993734606847, "loss": 2.1719, "step": 1905 }, { "epoch": 0.41, "learning_rate": 0.0013343374635421978, "loss": 2.0967, "step": 1906 }, { "epoch": 0.41, "learning_rate": 0.0013336810303192733, "loss": 2.1494, "step": 1907 }, { "epoch": 0.41, "learning_rate": 0.0013330244352564528, "loss": 2.293, "step": 1908 }, { "epoch": 0.41, "learning_rate": 0.0013323676786721932, "loss": 2.2949, "step": 1909 }, { "epoch": 0.41, "learning_rate": 0.0013317107608850313, "loss": 2.127, "step": 1910 }, { "epoch": 0.41, "learning_rate": 0.0013310536822135806, "loss": 2.2217, "step": 1911 }, { "epoch": 0.41, "learning_rate": 0.001330396442976534, "loss": 2.3389, "step": 1912 }, { "epoch": 0.41, "learning_rate": 0.0013297390434926608, "loss": 2.3711, "step": 1913 }, { "epoch": 0.41, "learning_rate": 0.0013290814840808095, "loss": 2.1699, "step": 1914 }, { "epoch": 0.41, "learning_rate": 0.0013284237650599052, "loss": 2.3115, "step": 1915 }, { "epoch": 0.41, "learning_rate": 0.0013277658867489506, "loss": 2.3516, "step": 1916 }, { "epoch": 0.41, "learning_rate": 0.0013271078494670257, "loss": 2.2891, "step": 1917 }, { "epoch": 0.41, "learning_rate": 0.001326449653533288, "loss": 2.2988, "step": 1918 }, { "epoch": 0.41, "learning_rate": 0.0013257912992669712, "loss": 2.335, "step": 1919 }, { "epoch": 0.41, "learning_rate": 0.0013251327869873864, "loss": 2.3711, "step": 1920 }, { "epoch": 0.41, "learning_rate": 0.0013244741170139209, "loss": 2.0742, "step": 1921 }, { "epoch": 0.41, "learning_rate": 0.0013238152896660393, "loss": 2.3213, "step": 1922 }, { "epoch": 0.41, "learning_rate": 0.0013231563052632813, "loss": 2.1592, "step": 1923 }, { "epoch": 0.41, "learning_rate": 0.0013224971641252635, "loss": 2.2139, "step": 1924 }, { "epoch": 0.41, "learning_rate": 0.0013218378665716787, "loss": 2.3057, "step": 1925 }, { "epoch": 0.41, "learning_rate": 0.0013211784129222954, "loss": 2.2031, "step": 1926 }, { "epoch": 0.41, "learning_rate": 0.001320518803496957, "loss": 2.416, "step": 1927 }, { "epoch": 0.41, "learning_rate": 0.0013198590386155843, "loss": 2.291, "step": 1928 }, { "epoch": 0.41, "learning_rate": 0.001319199118598171, "loss": 2.2578, "step": 1929 }, { "epoch": 0.41, "learning_rate": 0.0013185390437647883, "loss": 2.165, "step": 1930 }, { "epoch": 0.42, "learning_rate": 0.0013178788144355815, "loss": 2.3193, "step": 1931 }, { "epoch": 0.42, "learning_rate": 0.0013172184309307709, "loss": 2.2363, "step": 1932 }, { "epoch": 0.42, "learning_rate": 0.0013165578935706512, "loss": 2.2109, "step": 1933 }, { "epoch": 0.42, "learning_rate": 0.0013158972026755926, "loss": 2.5781, "step": 1934 }, { "epoch": 0.42, "learning_rate": 0.0013152363585660386, "loss": 2.2559, "step": 1935 }, { "epoch": 0.42, "learning_rate": 0.001314575361562509, "loss": 2.1963, "step": 1936 }, { "epoch": 0.42, "learning_rate": 0.0013139142119855953, "loss": 2.1367, "step": 1937 }, { "epoch": 0.42, "learning_rate": 0.0013132529101559643, "loss": 2.2598, "step": 1938 }, { "epoch": 0.42, "learning_rate": 0.001312591456394357, "loss": 2.2871, "step": 1939 }, { "epoch": 0.42, "learning_rate": 0.001311929851021587, "loss": 2.1445, "step": 1940 }, { "epoch": 0.42, "learning_rate": 0.0013112680943585424, "loss": 2.4238, "step": 1941 }, { "epoch": 0.42, "learning_rate": 0.0013106061867261842, "loss": 2.1387, "step": 1942 }, { "epoch": 0.42, "learning_rate": 0.001309944128445547, "loss": 2.2939, "step": 1943 }, { "epoch": 0.42, "learning_rate": 0.0013092819198377373, "loss": 2.332, "step": 1944 }, { "epoch": 0.42, "learning_rate": 0.0013086195612239365, "loss": 2.3125, "step": 1945 }, { "epoch": 0.42, "learning_rate": 0.001307957052925397, "loss": 2.3086, "step": 1946 }, { "epoch": 0.42, "learning_rate": 0.0013072943952634447, "loss": 2.2383, "step": 1947 }, { "epoch": 0.42, "learning_rate": 0.0013066315885594774, "loss": 2.3457, "step": 1948 }, { "epoch": 0.42, "learning_rate": 0.0013059686331349657, "loss": 2.0928, "step": 1949 }, { "epoch": 0.42, "learning_rate": 0.0013053055293114522, "loss": 2.3984, "step": 1950 }, { "epoch": 0.42, "learning_rate": 0.001304642277410551, "loss": 2.3193, "step": 1951 }, { "epoch": 0.42, "learning_rate": 0.0013039788777539489, "loss": 2.3691, "step": 1952 }, { "epoch": 0.42, "learning_rate": 0.0013033153306634038, "loss": 2.2676, "step": 1953 }, { "epoch": 0.42, "learning_rate": 0.0013026516364607447, "loss": 2.2744, "step": 1954 }, { "epoch": 0.42, "learning_rate": 0.001301987795467873, "loss": 2.1729, "step": 1955 }, { "epoch": 0.42, "learning_rate": 0.0013013238080067607, "loss": 2.0986, "step": 1956 }, { "epoch": 0.42, "learning_rate": 0.0013006596743994504, "loss": 2.375, "step": 1957 }, { "epoch": 0.42, "learning_rate": 0.0012999953949680563, "loss": 2.2285, "step": 1958 }, { "epoch": 0.42, "learning_rate": 0.001299330970034763, "loss": 2.4141, "step": 1959 }, { "epoch": 0.42, "learning_rate": 0.0012986663999218263, "loss": 2.2178, "step": 1960 }, { "epoch": 0.42, "learning_rate": 0.001298001684951571, "loss": 2.3301, "step": 1961 }, { "epoch": 0.42, "learning_rate": 0.0012973368254463934, "loss": 2.2832, "step": 1962 }, { "epoch": 0.42, "learning_rate": 0.0012966718217287596, "loss": 2.1104, "step": 1963 }, { "epoch": 0.42, "learning_rate": 0.0012960066741212054, "loss": 2.4199, "step": 1964 }, { "epoch": 0.42, "learning_rate": 0.0012953413829463365, "loss": 2.3066, "step": 1965 }, { "epoch": 0.42, "learning_rate": 0.0012946759485268288, "loss": 2.2871, "step": 1966 }, { "epoch": 0.42, "learning_rate": 0.0012940103711854267, "loss": 2.2432, "step": 1967 }, { "epoch": 0.42, "learning_rate": 0.0012933446512449446, "loss": 2.2646, "step": 1968 }, { "epoch": 0.42, "learning_rate": 0.0012926787890282655, "loss": 2.2734, "step": 1969 }, { "epoch": 0.42, "learning_rate": 0.0012920127848583419, "loss": 2.166, "step": 1970 }, { "epoch": 0.42, "learning_rate": 0.001291346639058195, "loss": 2.4043, "step": 1971 }, { "epoch": 0.42, "learning_rate": 0.0012906803519509148, "loss": 2.3145, "step": 1972 }, { "epoch": 0.42, "learning_rate": 0.0012900139238596598, "loss": 2.2441, "step": 1973 }, { "epoch": 0.42, "learning_rate": 0.0012893473551076568, "loss": 2.2822, "step": 1974 }, { "epoch": 0.42, "learning_rate": 0.0012886806460182003, "loss": 2.125, "step": 1975 }, { "epoch": 0.42, "learning_rate": 0.0012880137969146542, "loss": 2.0918, "step": 1976 }, { "epoch": 0.43, "learning_rate": 0.0012873468081204488, "loss": 2.3467, "step": 1977 }, { "epoch": 0.43, "learning_rate": 0.001286679679959083, "loss": 2.2871, "step": 1978 }, { "epoch": 0.43, "learning_rate": 0.0012860124127541236, "loss": 2.2559, "step": 1979 }, { "epoch": 0.43, "learning_rate": 0.001285345006829204, "loss": 2.2793, "step": 1980 }, { "epoch": 0.43, "learning_rate": 0.0012846774625080253, "loss": 2.1953, "step": 1981 }, { "epoch": 0.43, "learning_rate": 0.001284009780114356, "loss": 2.1816, "step": 1982 }, { "epoch": 0.43, "learning_rate": 0.0012833419599720304, "loss": 2.2148, "step": 1983 }, { "epoch": 0.43, "learning_rate": 0.0012826740024049518, "loss": 2.2803, "step": 1984 }, { "epoch": 0.43, "learning_rate": 0.0012820059077370877, "loss": 2.166, "step": 1985 }, { "epoch": 0.43, "learning_rate": 0.0012813376762924734, "loss": 2.2012, "step": 1986 }, { "epoch": 0.43, "learning_rate": 0.0012806693083952112, "loss": 2.3262, "step": 1987 }, { "epoch": 0.43, "learning_rate": 0.0012800008043694676, "loss": 2.1074, "step": 1988 }, { "epoch": 0.43, "learning_rate": 0.0012793321645394767, "loss": 2.2656, "step": 1989 }, { "epoch": 0.43, "learning_rate": 0.0012786633892295383, "loss": 2.1992, "step": 1990 }, { "epoch": 0.43, "learning_rate": 0.0012779944787640173, "loss": 2.3301, "step": 1991 }, { "epoch": 0.43, "learning_rate": 0.0012773254334673449, "loss": 2.333, "step": 1992 }, { "epoch": 0.43, "learning_rate": 0.0012766562536640166, "loss": 2.2812, "step": 1993 }, { "epoch": 0.43, "learning_rate": 0.0012759869396785945, "loss": 2.2236, "step": 1994 }, { "epoch": 0.43, "learning_rate": 0.001275317491835705, "loss": 2.2773, "step": 1995 }, { "epoch": 0.43, "learning_rate": 0.0012746479104600388, "loss": 2.3359, "step": 1996 }, { "epoch": 0.43, "learning_rate": 0.001273978195876353, "loss": 2.293, "step": 1997 }, { "epoch": 0.43, "learning_rate": 0.001273308348409468, "loss": 2.1777, "step": 1998 }, { "epoch": 0.43, "learning_rate": 0.0012726383683842688, "loss": 2.3066, "step": 1999 }, { "epoch": 0.43, "learning_rate": 0.0012719682561257058, "loss": 2.1211, "step": 2000 }, { "epoch": 0.43, "learning_rate": 0.0012712980119587917, "loss": 2.2051, "step": 2001 }, { "epoch": 0.43, "learning_rate": 0.0012706276362086048, "loss": 2.2139, "step": 2002 }, { "epoch": 0.43, "learning_rate": 0.001269957129200286, "loss": 2.2236, "step": 2003 }, { "epoch": 0.43, "learning_rate": 0.0012692864912590412, "loss": 2.3613, "step": 2004 }, { "epoch": 0.43, "learning_rate": 0.0012686157227101383, "loss": 2.2461, "step": 2005 }, { "epoch": 0.43, "learning_rate": 0.0012679448238789096, "loss": 2.1885, "step": 2006 }, { "epoch": 0.43, "learning_rate": 0.0012672737950907502, "loss": 2.2783, "step": 2007 }, { "epoch": 0.43, "learning_rate": 0.0012666026366711187, "loss": 2.3486, "step": 2008 }, { "epoch": 0.43, "learning_rate": 0.0012659313489455357, "loss": 2.1113, "step": 2009 }, { "epoch": 0.43, "learning_rate": 0.001265259932239585, "loss": 2.2373, "step": 2010 }, { "epoch": 0.43, "learning_rate": 0.0012645883868789135, "loss": 2.2354, "step": 2011 }, { "epoch": 0.43, "learning_rate": 0.0012639167131892294, "loss": 2.332, "step": 2012 }, { "epoch": 0.43, "learning_rate": 0.0012632449114963035, "loss": 2.168, "step": 2013 }, { "epoch": 0.43, "learning_rate": 0.0012625729821259695, "loss": 2.332, "step": 2014 }, { "epoch": 0.43, "learning_rate": 0.0012619009254041223, "loss": 2.2314, "step": 2015 }, { "epoch": 0.43, "learning_rate": 0.0012612287416567183, "loss": 2.2793, "step": 2016 }, { "epoch": 0.43, "learning_rate": 0.001260556431209776, "loss": 2.2568, "step": 2017 }, { "epoch": 0.43, "learning_rate": 0.0012598839943893751, "loss": 2.1318, "step": 2018 }, { "epoch": 0.43, "learning_rate": 0.0012592114315216575, "loss": 2.3662, "step": 2019 }, { "epoch": 0.43, "learning_rate": 0.0012585387429328244, "loss": 2.2236, "step": 2020 }, { "epoch": 0.43, "learning_rate": 0.0012578659289491396, "loss": 2.0635, "step": 2021 }, { "epoch": 0.43, "learning_rate": 0.0012571929898969273, "loss": 2.1201, "step": 2022 }, { "epoch": 0.43, "learning_rate": 0.0012565199261025716, "loss": 2.2383, "step": 2023 }, { "epoch": 0.44, "learning_rate": 0.0012558467378925183, "loss": 2.2139, "step": 2024 }, { "epoch": 0.44, "learning_rate": 0.0012551734255932727, "loss": 2.4492, "step": 2025 }, { "epoch": 0.44, "learning_rate": 0.0012544999895314005, "loss": 2.3633, "step": 2026 }, { "epoch": 0.44, "learning_rate": 0.0012538264300335277, "loss": 2.1885, "step": 2027 }, { "epoch": 0.44, "learning_rate": 0.0012531527474263396, "loss": 2.1475, "step": 2028 }, { "epoch": 0.44, "learning_rate": 0.0012524789420365818, "loss": 2.2441, "step": 2029 }, { "epoch": 0.44, "learning_rate": 0.001251805014191059, "loss": 2.2539, "step": 2030 }, { "epoch": 0.44, "learning_rate": 0.0012511309642166356, "loss": 2.2275, "step": 2031 }, { "epoch": 0.44, "learning_rate": 0.0012504567924402354, "loss": 2.3164, "step": 2032 }, { "epoch": 0.44, "learning_rate": 0.0012497824991888405, "loss": 2.2695, "step": 2033 }, { "epoch": 0.44, "learning_rate": 0.0012491080847894923, "loss": 2.4375, "step": 2034 }, { "epoch": 0.44, "learning_rate": 0.0012484335495692922, "loss": 2.1523, "step": 2035 }, { "epoch": 0.44, "learning_rate": 0.001247758893855398, "loss": 2.1484, "step": 2036 }, { "epoch": 0.44, "learning_rate": 0.0012470841179750272, "loss": 2.127, "step": 2037 }, { "epoch": 0.44, "learning_rate": 0.0012464092222554553, "loss": 2.1738, "step": 2038 }, { "epoch": 0.44, "learning_rate": 0.0012457342070240165, "loss": 2.208, "step": 2039 }, { "epoch": 0.44, "learning_rate": 0.0012450590726081022, "loss": 2.1611, "step": 2040 }, { "epoch": 0.44, "learning_rate": 0.0012443838193351615, "loss": 2.2256, "step": 2041 }, { "epoch": 0.44, "learning_rate": 0.0012437084475327026, "loss": 2.3535, "step": 2042 }, { "epoch": 0.44, "learning_rate": 0.0012430329575282891, "loss": 2.1475, "step": 2043 }, { "epoch": 0.44, "learning_rate": 0.0012423573496495432, "loss": 2.3477, "step": 2044 }, { "epoch": 0.44, "learning_rate": 0.0012416816242241445, "loss": 2.3574, "step": 2045 }, { "epoch": 0.44, "learning_rate": 0.0012410057815798285, "loss": 2.1621, "step": 2046 }, { "epoch": 0.44, "learning_rate": 0.0012403298220443886, "loss": 2.3691, "step": 2047 }, { "epoch": 0.44, "learning_rate": 0.0012396537459456742, "loss": 2.3965, "step": 2048 }, { "epoch": 0.44, "learning_rate": 0.001238977553611592, "loss": 2.1807, "step": 2049 }, { "epoch": 0.44, "learning_rate": 0.001238301245370104, "loss": 2.2461, "step": 2050 }, { "epoch": 0.44, "learning_rate": 0.0012376248215492296, "loss": 2.2881, "step": 2051 }, { "epoch": 0.44, "learning_rate": 0.0012369482824770434, "loss": 2.3389, "step": 2052 }, { "epoch": 0.44, "learning_rate": 0.001236271628481676, "loss": 2.2988, "step": 2053 }, { "epoch": 0.44, "learning_rate": 0.0012355948598913136, "loss": 2.2314, "step": 2054 }, { "epoch": 0.44, "learning_rate": 0.001234917977034199, "loss": 2.1582, "step": 2055 }, { "epoch": 0.44, "learning_rate": 0.00123424098023863, "loss": 2.209, "step": 2056 }, { "epoch": 0.44, "learning_rate": 0.0012335638698329583, "loss": 2.2266, "step": 2057 }, { "epoch": 0.44, "learning_rate": 0.0012328866461455924, "loss": 2.1689, "step": 2058 }, { "epoch": 0.44, "learning_rate": 0.0012322093095049952, "loss": 2.1777, "step": 2059 }, { "epoch": 0.44, "learning_rate": 0.0012315318602396836, "loss": 2.2012, "step": 2060 }, { "epoch": 0.44, "learning_rate": 0.001230854298678231, "loss": 2.1338, "step": 2061 }, { "epoch": 0.44, "learning_rate": 0.0012301766251492628, "loss": 2.2158, "step": 2062 }, { "epoch": 0.44, "learning_rate": 0.001229498839981461, "loss": 2.293, "step": 2063 }, { "epoch": 0.44, "learning_rate": 0.0012288209435035605, "loss": 2.1396, "step": 2064 }, { "epoch": 0.44, "learning_rate": 0.0012281429360443499, "loss": 2.1885, "step": 2065 }, { "epoch": 0.44, "learning_rate": 0.001227464817932673, "loss": 2.2402, "step": 2066 }, { "epoch": 0.44, "learning_rate": 0.0012267865894974258, "loss": 2.2246, "step": 2067 }, { "epoch": 0.44, "learning_rate": 0.0012261082510675583, "loss": 1.9902, "step": 2068 }, { "epoch": 0.44, "learning_rate": 0.0012254298029720748, "loss": 2.2734, "step": 2069 }, { "epoch": 0.45, "learning_rate": 0.001224751245540031, "loss": 2.3516, "step": 2070 }, { "epoch": 0.45, "learning_rate": 0.0012240725791005374, "loss": 2.3301, "step": 2071 }, { "epoch": 0.45, "learning_rate": 0.0012233938039827562, "loss": 2.1992, "step": 2072 }, { "epoch": 0.45, "learning_rate": 0.001222714920515903, "loss": 2.207, "step": 2073 }, { "epoch": 0.45, "learning_rate": 0.0012220359290292448, "loss": 2.2002, "step": 2074 }, { "epoch": 0.45, "learning_rate": 0.0012213568298521027, "loss": 2.1562, "step": 2075 }, { "epoch": 0.45, "learning_rate": 0.0012206776233138488, "loss": 2.252, "step": 2076 }, { "epoch": 0.45, "learning_rate": 0.0012199983097439077, "loss": 2.251, "step": 2077 }, { "epoch": 0.45, "learning_rate": 0.0012193188894717556, "loss": 2.2197, "step": 2078 }, { "epoch": 0.45, "learning_rate": 0.001218639362826921, "loss": 2.1113, "step": 2079 }, { "epoch": 0.45, "learning_rate": 0.001217959730138984, "loss": 2.3438, "step": 2080 }, { "epoch": 0.45, "learning_rate": 0.001217279991737575, "loss": 2.3018, "step": 2081 }, { "epoch": 0.45, "learning_rate": 0.0012166001479523769, "loss": 2.0273, "step": 2082 }, { "epoch": 0.45, "learning_rate": 0.0012159201991131234, "loss": 2.2578, "step": 2083 }, { "epoch": 0.45, "learning_rate": 0.0012152401455495989, "loss": 2.4609, "step": 2084 }, { "epoch": 0.45, "learning_rate": 0.001214559987591639, "loss": 2.375, "step": 2085 }, { "epoch": 0.45, "learning_rate": 0.001213879725569129, "loss": 2.2012, "step": 2086 }, { "epoch": 0.45, "learning_rate": 0.0012131993598120062, "loss": 2.3223, "step": 2087 }, { "epoch": 0.45, "learning_rate": 0.001212518890650257, "loss": 2.2109, "step": 2088 }, { "epoch": 0.45, "learning_rate": 0.0012118383184139179, "loss": 2.2334, "step": 2089 }, { "epoch": 0.45, "learning_rate": 0.0012111576434330766, "loss": 2.2051, "step": 2090 }, { "epoch": 0.45, "learning_rate": 0.0012104768660378693, "loss": 2.2725, "step": 2091 }, { "epoch": 0.45, "learning_rate": 0.0012097959865584825, "loss": 2.3477, "step": 2092 }, { "epoch": 0.45, "learning_rate": 0.0012091150053251524, "loss": 2.2441, "step": 2093 }, { "epoch": 0.45, "learning_rate": 0.001208433922668164, "loss": 2.208, "step": 2094 }, { "epoch": 0.45, "learning_rate": 0.0012077527389178517, "loss": 2.2959, "step": 2095 }, { "epoch": 0.45, "learning_rate": 0.0012070714544045993, "loss": 2.0459, "step": 2096 }, { "epoch": 0.45, "learning_rate": 0.0012063900694588392, "loss": 2.1992, "step": 2097 }, { "epoch": 0.45, "learning_rate": 0.001205708584411052, "loss": 2.3447, "step": 2098 }, { "epoch": 0.45, "learning_rate": 0.0012050269995917677, "loss": 2.2656, "step": 2099 }, { "epoch": 0.45, "learning_rate": 0.0012043453153315644, "loss": 2.2217, "step": 2100 }, { "epoch": 0.45, "learning_rate": 0.0012036635319610683, "loss": 2.3398, "step": 2101 }, { "epoch": 0.45, "learning_rate": 0.0012029816498109534, "loss": 2.1455, "step": 2102 }, { "epoch": 0.45, "learning_rate": 0.0012022996692119424, "loss": 2.3428, "step": 2103 }, { "epoch": 0.45, "learning_rate": 0.0012016175904948048, "loss": 2.0479, "step": 2104 }, { "epoch": 0.45, "learning_rate": 0.0012009354139903585, "loss": 2.3496, "step": 2105 }, { "epoch": 0.45, "learning_rate": 0.0012002531400294688, "loss": 2.3379, "step": 2106 }, { "epoch": 0.45, "learning_rate": 0.0011995707689430473, "loss": 2.1826, "step": 2107 }, { "epoch": 0.45, "learning_rate": 0.0011988883010620537, "loss": 2.251, "step": 2108 }, { "epoch": 0.45, "learning_rate": 0.0011982057367174942, "loss": 2.1113, "step": 2109 }, { "epoch": 0.45, "learning_rate": 0.0011975230762404224, "loss": 2.377, "step": 2110 }, { "epoch": 0.45, "learning_rate": 0.0011968403199619378, "loss": 2.2676, "step": 2111 }, { "epoch": 0.45, "learning_rate": 0.0011961574682131859, "loss": 2.3584, "step": 2112 }, { "epoch": 0.45, "learning_rate": 0.00119547452132536, "loss": 2.2197, "step": 2113 }, { "epoch": 0.45, "learning_rate": 0.0011947914796296985, "loss": 2.3262, "step": 2114 }, { "epoch": 0.45, "learning_rate": 0.0011941083434574861, "loss": 2.3887, "step": 2115 }, { "epoch": 0.45, "learning_rate": 0.001193425113140053, "loss": 2.1104, "step": 2116 }, { "epoch": 0.46, "learning_rate": 0.001192741789008776, "loss": 2.1221, "step": 2117 }, { "epoch": 0.46, "learning_rate": 0.001192058371395076, "loss": 2.3066, "step": 2118 }, { "epoch": 0.46, "learning_rate": 0.00119137486063042, "loss": 2.1758, "step": 2119 }, { "epoch": 0.46, "learning_rate": 0.0011906912570463204, "loss": 2.1934, "step": 2120 }, { "epoch": 0.46, "learning_rate": 0.0011900075609743346, "loss": 2.2334, "step": 2121 }, { "epoch": 0.46, "learning_rate": 0.001189323772746064, "loss": 2.2227, "step": 2122 }, { "epoch": 0.46, "learning_rate": 0.0011886398926931557, "loss": 2.1621, "step": 2123 }, { "epoch": 0.46, "learning_rate": 0.0011879559211473009, "loss": 2.2295, "step": 2124 }, { "epoch": 0.46, "learning_rate": 0.0011872718584402349, "loss": 2.2559, "step": 2125 }, { "epoch": 0.46, "learning_rate": 0.0011865877049037378, "loss": 2.3164, "step": 2126 }, { "epoch": 0.46, "learning_rate": 0.001185903460869634, "loss": 2.3447, "step": 2127 }, { "epoch": 0.46, "learning_rate": 0.0011852191266697905, "loss": 2.2344, "step": 2128 }, { "epoch": 0.46, "learning_rate": 0.001184534702636119, "loss": 2.2363, "step": 2129 }, { "epoch": 0.46, "learning_rate": 0.0011838501891005748, "loss": 2.3574, "step": 2130 }, { "epoch": 0.46, "learning_rate": 0.0011831655863951563, "loss": 2.0234, "step": 2131 }, { "epoch": 0.46, "learning_rate": 0.001182480894851905, "loss": 2.1182, "step": 2132 }, { "epoch": 0.46, "learning_rate": 0.0011817961148029065, "loss": 2.1885, "step": 2133 }, { "epoch": 0.46, "learning_rate": 0.0011811112465802873, "loss": 2.3711, "step": 2134 }, { "epoch": 0.46, "learning_rate": 0.0011804262905162192, "loss": 2.2861, "step": 2135 }, { "epoch": 0.46, "learning_rate": 0.0011797412469429145, "loss": 2.2344, "step": 2136 }, { "epoch": 0.46, "learning_rate": 0.0011790561161926288, "loss": 2.3291, "step": 2137 }, { "epoch": 0.46, "learning_rate": 0.0011783708985976605, "loss": 2.25, "step": 2138 }, { "epoch": 0.46, "learning_rate": 0.0011776855944903492, "loss": 2.3652, "step": 2139 }, { "epoch": 0.46, "learning_rate": 0.0011770002042030766, "loss": 2.2588, "step": 2140 }, { "epoch": 0.46, "learning_rate": 0.001176314728068267, "loss": 2.0986, "step": 2141 }, { "epoch": 0.46, "learning_rate": 0.0011756291664183859, "loss": 2.1152, "step": 2142 }, { "epoch": 0.46, "learning_rate": 0.0011749435195859393, "loss": 2.2637, "step": 2143 }, { "epoch": 0.46, "learning_rate": 0.0011742577879034755, "loss": 2.1289, "step": 2144 }, { "epoch": 0.46, "learning_rate": 0.0011735719717035843, "loss": 2.1113, "step": 2145 }, { "epoch": 0.46, "learning_rate": 0.001172886071318896, "loss": 2.2305, "step": 2146 }, { "epoch": 0.46, "learning_rate": 0.0011722000870820813, "loss": 2.2109, "step": 2147 }, { "epoch": 0.46, "learning_rate": 0.0011715140193258524, "loss": 2.291, "step": 2148 }, { "epoch": 0.46, "learning_rate": 0.0011708278683829616, "loss": 2.2832, "step": 2149 }, { "epoch": 0.46, "learning_rate": 0.001170141634586201, "loss": 2.2451, "step": 2150 }, { "epoch": 0.46, "learning_rate": 0.0011694553182684043, "loss": 2.0439, "step": 2151 }, { "epoch": 0.46, "learning_rate": 0.0011687689197624437, "loss": 2.2402, "step": 2152 }, { "epoch": 0.46, "learning_rate": 0.001168082439401232, "loss": 2.2256, "step": 2153 }, { "epoch": 0.46, "learning_rate": 0.001167395877517722, "loss": 2.1924, "step": 2154 }, { "epoch": 0.46, "learning_rate": 0.0011667092344449053, "loss": 2.2275, "step": 2155 }, { "epoch": 0.46, "learning_rate": 0.0011660225105158135, "loss": 2.1494, "step": 2156 }, { "epoch": 0.46, "learning_rate": 0.001165335706063517, "loss": 2.1338, "step": 2157 }, { "epoch": 0.46, "learning_rate": 0.0011646488214211255, "loss": 2.3789, "step": 2158 }, { "epoch": 0.46, "learning_rate": 0.001163961856921788, "loss": 2.3613, "step": 2159 }, { "epoch": 0.46, "learning_rate": 0.0011632748128986906, "loss": 2.21, "step": 2160 }, { "epoch": 0.46, "learning_rate": 0.0011625876896850598, "loss": 2.1816, "step": 2161 }, { "epoch": 0.46, "learning_rate": 0.0011619004876141602, "loss": 2.3652, "step": 2162 }, { "epoch": 0.47, "learning_rate": 0.0011612132070192936, "loss": 1.9531, "step": 2163 }, { "epoch": 0.47, "learning_rate": 0.001160525848233801, "loss": 2.2695, "step": 2164 }, { "epoch": 0.47, "learning_rate": 0.0011598384115910606, "loss": 2.2812, "step": 2165 }, { "epoch": 0.47, "learning_rate": 0.0011591508974244887, "loss": 2.083, "step": 2166 }, { "epoch": 0.47, "learning_rate": 0.0011584633060675391, "loss": 2.3359, "step": 2167 }, { "epoch": 0.47, "learning_rate": 0.0011577756378537032, "loss": 2.3193, "step": 2168 }, { "epoch": 0.47, "learning_rate": 0.0011570878931165096, "loss": 2.1904, "step": 2169 }, { "epoch": 0.47, "learning_rate": 0.001156400072189524, "loss": 2.3311, "step": 2170 }, { "epoch": 0.47, "learning_rate": 0.0011557121754063487, "loss": 2.2666, "step": 2171 }, { "epoch": 0.47, "learning_rate": 0.0011550242031006235, "loss": 2.1982, "step": 2172 }, { "epoch": 0.47, "learning_rate": 0.0011543361556060243, "loss": 2.2109, "step": 2173 }, { "epoch": 0.47, "learning_rate": 0.0011536480332562634, "loss": 2.2598, "step": 2174 }, { "epoch": 0.47, "learning_rate": 0.0011529598363850897, "loss": 2.2129, "step": 2175 }, { "epoch": 0.47, "learning_rate": 0.0011522715653262887, "loss": 2.2793, "step": 2176 }, { "epoch": 0.47, "learning_rate": 0.0011515832204136804, "loss": 2.1797, "step": 2177 }, { "epoch": 0.47, "learning_rate": 0.0011508948019811225, "loss": 2.2041, "step": 2178 }, { "epoch": 0.47, "learning_rate": 0.0011502063103625072, "loss": 2.3008, "step": 2179 }, { "epoch": 0.47, "learning_rate": 0.0011495177458917618, "loss": 2.1641, "step": 2180 }, { "epoch": 0.47, "learning_rate": 0.00114882910890285, "loss": 2.2393, "step": 2181 }, { "epoch": 0.47, "learning_rate": 0.0011481403997297699, "loss": 2.2979, "step": 2182 }, { "epoch": 0.47, "learning_rate": 0.0011474516187065561, "loss": 2.1592, "step": 2183 }, { "epoch": 0.47, "learning_rate": 0.0011467627661672752, "loss": 2.2295, "step": 2184 }, { "epoch": 0.47, "learning_rate": 0.0011460738424460314, "loss": 2.1309, "step": 2185 }, { "epoch": 0.47, "learning_rate": 0.0011453848478769617, "loss": 2.1797, "step": 2186 }, { "epoch": 0.47, "learning_rate": 0.0011446957827942381, "loss": 2.2031, "step": 2187 }, { "epoch": 0.47, "learning_rate": 0.0011440066475320667, "loss": 2.1865, "step": 2188 }, { "epoch": 0.47, "learning_rate": 0.001143317442424687, "loss": 2.4707, "step": 2189 }, { "epoch": 0.47, "learning_rate": 0.0011426281678063742, "loss": 2.2686, "step": 2190 }, { "epoch": 0.47, "learning_rate": 0.0011419388240114347, "loss": 2.291, "step": 2191 }, { "epoch": 0.47, "learning_rate": 0.0011412494113742105, "loss": 2.1455, "step": 2192 }, { "epoch": 0.47, "learning_rate": 0.001140559930229076, "loss": 2.2559, "step": 2193 }, { "epoch": 0.47, "learning_rate": 0.0011398703809104391, "loss": 2.2441, "step": 2194 }, { "epoch": 0.47, "learning_rate": 0.0011391807637527406, "loss": 2.1934, "step": 2195 }, { "epoch": 0.47, "learning_rate": 0.0011384910790904544, "loss": 2.1035, "step": 2196 }, { "epoch": 0.47, "learning_rate": 0.0011378013272580873, "loss": 2.0303, "step": 2197 }, { "epoch": 0.47, "learning_rate": 0.001137111508590178, "loss": 2.2637, "step": 2198 }, { "epoch": 0.47, "learning_rate": 0.0011364216234212984, "loss": 2.1934, "step": 2199 }, { "epoch": 0.47, "learning_rate": 0.001135731672086052, "loss": 2.3164, "step": 2200 }, { "epoch": 0.47, "learning_rate": 0.0011350416549190752, "loss": 2.2285, "step": 2201 }, { "epoch": 0.47, "learning_rate": 0.001134351572255035, "loss": 2.1396, "step": 2202 }, { "epoch": 0.47, "learning_rate": 0.0011336614244286318, "loss": 2.4023, "step": 2203 }, { "epoch": 0.47, "learning_rate": 0.0011329712117745968, "loss": 2.1982, "step": 2204 }, { "epoch": 0.47, "learning_rate": 0.001132280934627692, "loss": 2.2852, "step": 2205 }, { "epoch": 0.47, "learning_rate": 0.0011315905933227115, "loss": 2.373, "step": 2206 }, { "epoch": 0.47, "learning_rate": 0.0011309001881944808, "loss": 2.3027, "step": 2207 }, { "epoch": 0.47, "learning_rate": 0.0011302097195778555, "loss": 2.1318, "step": 2208 }, { "epoch": 0.47, "learning_rate": 0.001129519187807723, "loss": 2.1016, "step": 2209 }, { "epoch": 0.48, "learning_rate": 0.0011288285932189994, "loss": 2.0146, "step": 2210 }, { "epoch": 0.48, "learning_rate": 0.0011281379361466341, "loss": 2.2559, "step": 2211 }, { "epoch": 0.48, "learning_rate": 0.0011274472169256044, "loss": 2.1963, "step": 2212 }, { "epoch": 0.48, "learning_rate": 0.0011267564358909189, "loss": 2.3125, "step": 2213 }, { "epoch": 0.48, "learning_rate": 0.0011260655933776158, "loss": 2.2803, "step": 2214 }, { "epoch": 0.48, "learning_rate": 0.0011253746897207633, "loss": 2.2881, "step": 2215 }, { "epoch": 0.48, "learning_rate": 0.0011246837252554592, "loss": 2.3027, "step": 2216 }, { "epoch": 0.48, "learning_rate": 0.0011239927003168309, "loss": 2.1475, "step": 2217 }, { "epoch": 0.48, "learning_rate": 0.0011233016152400349, "loss": 2.2432, "step": 2218 }, { "epoch": 0.48, "learning_rate": 0.0011226104703602566, "loss": 2.3066, "step": 2219 }, { "epoch": 0.48, "learning_rate": 0.0011219192660127115, "loss": 2.2646, "step": 2220 }, { "epoch": 0.48, "learning_rate": 0.0011212280025326429, "loss": 2.1914, "step": 2221 }, { "epoch": 0.48, "learning_rate": 0.0011205366802553229, "loss": 2.2344, "step": 2222 }, { "epoch": 0.48, "learning_rate": 0.0011198452995160528, "loss": 2.2266, "step": 2223 }, { "epoch": 0.48, "learning_rate": 0.0011191538606501617, "loss": 2.3379, "step": 2224 }, { "epoch": 0.48, "learning_rate": 0.0011184623639930065, "loss": 2.0918, "step": 2225 }, { "epoch": 0.48, "learning_rate": 0.001117770809879973, "loss": 2.0479, "step": 2226 }, { "epoch": 0.48, "learning_rate": 0.001117079198646474, "loss": 2.0928, "step": 2227 }, { "epoch": 0.48, "learning_rate": 0.0011163875306279515, "loss": 2.1934, "step": 2228 }, { "epoch": 0.48, "learning_rate": 0.0011156958061598725, "loss": 2.0547, "step": 2229 }, { "epoch": 0.48, "learning_rate": 0.0011150040255777343, "loss": 2.3555, "step": 2230 }, { "epoch": 0.48, "learning_rate": 0.0011143121892170597, "loss": 2.2002, "step": 2231 }, { "epoch": 0.48, "learning_rate": 0.0011136202974133982, "loss": 2.168, "step": 2232 }, { "epoch": 0.48, "learning_rate": 0.0011129283505023273, "loss": 2.1455, "step": 2233 }, { "epoch": 0.48, "learning_rate": 0.001112236348819451, "loss": 2.3125, "step": 2234 }, { "epoch": 0.48, "learning_rate": 0.0011115442927003994, "loss": 2.2295, "step": 2235 }, { "epoch": 0.48, "learning_rate": 0.0011108521824808291, "loss": 2.2852, "step": 2236 }, { "epoch": 0.48, "learning_rate": 0.0011101600184964234, "loss": 2.2334, "step": 2237 }, { "epoch": 0.48, "learning_rate": 0.0011094678010828913, "loss": 2.248, "step": 2238 }, { "epoch": 0.48, "learning_rate": 0.0011087755305759675, "loss": 2.2529, "step": 2239 }, { "epoch": 0.48, "learning_rate": 0.0011080832073114128, "loss": 2.2842, "step": 2240 }, { "epoch": 0.48, "learning_rate": 0.001107390831625014, "loss": 2.207, "step": 2241 }, { "epoch": 0.48, "learning_rate": 0.0011066984038525826, "loss": 2.1377, "step": 2242 }, { "epoch": 0.48, "learning_rate": 0.0011060059243299554, "loss": 2.0527, "step": 2243 }, { "epoch": 0.48, "learning_rate": 0.0011053133933929943, "loss": 2.1768, "step": 2244 }, { "epoch": 0.48, "learning_rate": 0.0011046208113775872, "loss": 2.3398, "step": 2245 }, { "epoch": 0.48, "learning_rate": 0.0011039281786196454, "loss": 2.3672, "step": 2246 }, { "epoch": 0.48, "learning_rate": 0.0011032354954551046, "loss": 2.1523, "step": 2247 }, { "epoch": 0.48, "learning_rate": 0.0011025427622199271, "loss": 2.1504, "step": 2248 }, { "epoch": 0.48, "learning_rate": 0.001101849979250097, "loss": 2.1738, "step": 2249 }, { "epoch": 0.48, "learning_rate": 0.0011011571468816242, "loss": 2.2109, "step": 2250 }, { "epoch": 0.48, "learning_rate": 0.0011004642654505415, "loss": 2.3076, "step": 2251 }, { "epoch": 0.48, "learning_rate": 0.001099771335292906, "loss": 2.2363, "step": 2252 }, { "epoch": 0.48, "learning_rate": 0.0010990783567447987, "loss": 2.2168, "step": 2253 }, { "epoch": 0.48, "learning_rate": 0.0010983853301423238, "loss": 2.252, "step": 2254 }, { "epoch": 0.48, "learning_rate": 0.001097692255821608, "loss": 2.2471, "step": 2255 }, { "epoch": 0.49, "learning_rate": 0.0010969991341188024, "loss": 2.2305, "step": 2256 }, { "epoch": 0.49, "learning_rate": 0.0010963059653700806, "loss": 2.252, "step": 2257 }, { "epoch": 0.49, "learning_rate": 0.001095612749911639, "loss": 2.1836, "step": 2258 }, { "epoch": 0.49, "learning_rate": 0.0010949194880796966, "loss": 2.3496, "step": 2259 }, { "epoch": 0.49, "learning_rate": 0.0010942261802104952, "loss": 2.3477, "step": 2260 }, { "epoch": 0.49, "learning_rate": 0.001093532826640298, "loss": 2.3262, "step": 2261 }, { "epoch": 0.49, "learning_rate": 0.001092839427705392, "loss": 2.1162, "step": 2262 }, { "epoch": 0.49, "learning_rate": 0.001092145983742084, "loss": 2.2617, "step": 2263 }, { "epoch": 0.49, "learning_rate": 0.0010914524950867046, "loss": 2.207, "step": 2264 }, { "epoch": 0.49, "learning_rate": 0.0010907589620756056, "loss": 2.0928, "step": 2265 }, { "epoch": 0.49, "learning_rate": 0.0010900653850451597, "loss": 2.2402, "step": 2266 }, { "epoch": 0.49, "learning_rate": 0.001089371764331761, "loss": 2.0508, "step": 2267 }, { "epoch": 0.49, "learning_rate": 0.0010886781002718258, "loss": 2.1553, "step": 2268 }, { "epoch": 0.49, "learning_rate": 0.0010879843932017905, "loss": 2.3379, "step": 2269 }, { "epoch": 0.49, "learning_rate": 0.0010872906434581124, "loss": 2.1953, "step": 2270 }, { "epoch": 0.49, "learning_rate": 0.0010865968513772695, "loss": 2.2451, "step": 2271 }, { "epoch": 0.49, "learning_rate": 0.001085903017295761, "loss": 2.2422, "step": 2272 }, { "epoch": 0.49, "learning_rate": 0.0010852091415501058, "loss": 2.0654, "step": 2273 }, { "epoch": 0.49, "learning_rate": 0.001084515224476843, "loss": 2.3555, "step": 2274 }, { "epoch": 0.49, "learning_rate": 0.0010838212664125322, "loss": 2.2383, "step": 2275 }, { "epoch": 0.49, "learning_rate": 0.0010831272676937525, "loss": 2.2969, "step": 2276 }, { "epoch": 0.49, "learning_rate": 0.0010824332286571029, "loss": 2.0977, "step": 2277 }, { "epoch": 0.49, "learning_rate": 0.0010817391496392018, "loss": 2.2441, "step": 2278 }, { "epoch": 0.49, "learning_rate": 0.001081045030976687, "loss": 2.4434, "step": 2279 }, { "epoch": 0.49, "learning_rate": 0.001080350873006216, "loss": 2.167, "step": 2280 }, { "epoch": 0.49, "learning_rate": 0.0010796566760644646, "loss": 2.209, "step": 2281 }, { "epoch": 0.49, "learning_rate": 0.001078962440488128, "loss": 2.127, "step": 2282 }, { "epoch": 0.49, "learning_rate": 0.0010782681666139202, "loss": 2.4043, "step": 2283 }, { "epoch": 0.49, "learning_rate": 0.001077573854778573, "loss": 2.1611, "step": 2284 }, { "epoch": 0.49, "learning_rate": 0.0010768795053188378, "loss": 2.1631, "step": 2285 }, { "epoch": 0.49, "learning_rate": 0.001076185118571484, "loss": 2.2266, "step": 2286 }, { "epoch": 0.49, "learning_rate": 0.0010754906948732977, "loss": 2.1816, "step": 2287 }, { "epoch": 0.49, "learning_rate": 0.0010747962345610841, "loss": 2.2754, "step": 2288 }, { "epoch": 0.49, "learning_rate": 0.0010741017379716671, "loss": 2.1133, "step": 2289 }, { "epoch": 0.49, "learning_rate": 0.0010734072054418861, "loss": 2.3076, "step": 2290 }, { "epoch": 0.49, "learning_rate": 0.0010727126373085993, "loss": 2.208, "step": 2291 }, { "epoch": 0.49, "learning_rate": 0.0010720180339086817, "loss": 2.3613, "step": 2292 }, { "epoch": 0.49, "learning_rate": 0.0010713233955790258, "loss": 2.1426, "step": 2293 }, { "epoch": 0.49, "learning_rate": 0.0010706287226565404, "loss": 2.3633, "step": 2294 }, { "epoch": 0.49, "learning_rate": 0.0010699340154781513, "loss": 2.1582, "step": 2295 }, { "epoch": 0.49, "learning_rate": 0.0010692392743808018, "loss": 2.3135, "step": 2296 }, { "epoch": 0.49, "learning_rate": 0.0010685444997014502, "loss": 2.2334, "step": 2297 }, { "epoch": 0.49, "learning_rate": 0.0010678496917770719, "loss": 2.2549, "step": 2298 }, { "epoch": 0.49, "learning_rate": 0.0010671548509446585, "loss": 2.1885, "step": 2299 }, { "epoch": 0.49, "learning_rate": 0.0010664599775412173, "loss": 2.2529, "step": 2300 }, { "epoch": 0.49, "learning_rate": 0.0010657650719037716, "loss": 2.3418, "step": 2301 }, { "epoch": 0.49, "learning_rate": 0.00106507013436936, "loss": 2.2188, "step": 2302 }, { "epoch": 0.5, "learning_rate": 0.001064375165275037, "loss": 2.1865, "step": 2303 }, { "epoch": 0.5, "learning_rate": 0.0010636801649578718, "loss": 2.2275, "step": 2304 }, { "epoch": 0.5, "learning_rate": 0.0010629851337549494, "loss": 2.3154, "step": 2305 }, { "epoch": 0.5, "learning_rate": 0.0010622900720033694, "loss": 2.1318, "step": 2306 }, { "epoch": 0.5, "learning_rate": 0.001061594980040247, "loss": 2.2285, "step": 2307 }, { "epoch": 0.5, "learning_rate": 0.0010608998582027102, "loss": 2.1475, "step": 2308 }, { "epoch": 0.5, "learning_rate": 0.0010602047068279032, "loss": 2.2061, "step": 2309 }, { "epoch": 0.5, "learning_rate": 0.0010595095262529845, "loss": 2.2256, "step": 2310 }, { "epoch": 0.5, "learning_rate": 0.0010588143168151257, "loss": 2.2021, "step": 2311 }, { "epoch": 0.5, "learning_rate": 0.0010581190788515127, "loss": 2.1523, "step": 2312 }, { "epoch": 0.5, "learning_rate": 0.001057423812699346, "loss": 2.1221, "step": 2313 }, { "epoch": 0.5, "learning_rate": 0.0010567285186958394, "loss": 2.0596, "step": 2314 }, { "epoch": 0.5, "learning_rate": 0.0010560331971782196, "loss": 2.0664, "step": 2315 }, { "epoch": 0.5, "learning_rate": 0.001055337848483727, "loss": 2.2158, "step": 2316 }, { "epoch": 0.5, "learning_rate": 0.001054642472949616, "loss": 2.3057, "step": 2317 }, { "epoch": 0.5, "learning_rate": 0.0010539470709131527, "loss": 2.3711, "step": 2318 }, { "epoch": 0.5, "learning_rate": 0.0010532516427116168, "loss": 2.0918, "step": 2319 }, { "epoch": 0.5, "learning_rate": 0.0010525561886823008, "loss": 2.124, "step": 2320 }, { "epoch": 0.5, "learning_rate": 0.0010518607091625093, "loss": 2.1738, "step": 2321 }, { "epoch": 0.5, "learning_rate": 0.0010511652044895591, "loss": 2.2637, "step": 2322 }, { "epoch": 0.5, "learning_rate": 0.0010504696750007804, "loss": 2.123, "step": 2323 }, { "epoch": 0.5, "learning_rate": 0.0010497741210335138, "loss": 2.3027, "step": 2324 }, { "epoch": 0.5, "learning_rate": 0.0010490785429251128, "loss": 2.2207, "step": 2325 }, { "epoch": 0.5, "learning_rate": 0.0010483829410129425, "loss": 2.2412, "step": 2326 }, { "epoch": 0.5, "learning_rate": 0.0010476873156343796, "loss": 2.3711, "step": 2327 }, { "epoch": 0.5, "learning_rate": 0.0010469916671268114, "loss": 2.3203, "step": 2328 }, { "epoch": 0.5, "learning_rate": 0.0010462959958276372, "loss": 2.1025, "step": 2329 }, { "epoch": 0.5, "learning_rate": 0.0010456003020742673, "loss": 2.3047, "step": 2330 }, { "epoch": 0.5, "learning_rate": 0.0010449045862041231, "loss": 2.3252, "step": 2331 }, { "epoch": 0.5, "learning_rate": 0.0010442088485546354, "loss": 2.1387, "step": 2332 }, { "epoch": 0.5, "learning_rate": 0.0010435130894632476, "loss": 2.2617, "step": 2333 }, { "epoch": 0.5, "learning_rate": 0.0010428173092674122, "loss": 2.2061, "step": 2334 }, { "epoch": 0.5, "learning_rate": 0.0010421215083045916, "loss": 2.3262, "step": 2335 }, { "epoch": 0.5, "learning_rate": 0.0010414256869122594, "loss": 2.292, "step": 2336 }, { "epoch": 0.5, "learning_rate": 0.0010407298454278983, "loss": 2.2041, "step": 2337 }, { "epoch": 0.5, "learning_rate": 0.001040033984189001, "loss": 2.3379, "step": 2338 }, { "epoch": 0.5, "learning_rate": 0.00103933810353307, "loss": 2.207, "step": 2339 }, { "epoch": 0.5, "learning_rate": 0.0010386422037976168, "loss": 2.3047, "step": 2340 }, { "epoch": 0.5, "learning_rate": 0.0010379462853201626, "loss": 2.2734, "step": 2341 }, { "epoch": 0.5, "learning_rate": 0.0010372503484382374, "loss": 2.208, "step": 2342 }, { "epoch": 0.5, "learning_rate": 0.0010365543934893798, "loss": 2.3691, "step": 2343 }, { "epoch": 0.5, "learning_rate": 0.0010358584208111378, "loss": 2.1904, "step": 2344 }, { "epoch": 0.5, "learning_rate": 0.0010351624307410679, "loss": 2.1807, "step": 2345 }, { "epoch": 0.5, "learning_rate": 0.0010344664236167345, "loss": 2.1895, "step": 2346 }, { "epoch": 0.5, "learning_rate": 0.001033770399775711, "loss": 2.1104, "step": 2347 }, { "epoch": 0.5, "learning_rate": 0.0010330743595555788, "loss": 2.209, "step": 2348 }, { "epoch": 0.5, "learning_rate": 0.001032378303293926, "loss": 2.0547, "step": 2349 }, { "epoch": 0.51, "learning_rate": 0.0010316822313283503, "loss": 2.1963, "step": 2350 }, { "epoch": 0.51, "learning_rate": 0.001030986143996456, "loss": 2.2148, "step": 2351 }, { "epoch": 0.51, "learning_rate": 0.0010302900416358552, "loss": 2.1416, "step": 2352 }, { "epoch": 0.51, "learning_rate": 0.0010295939245841665, "loss": 2.2793, "step": 2353 }, { "epoch": 0.51, "learning_rate": 0.0010288977931790168, "loss": 2.2061, "step": 2354 }, { "epoch": 0.51, "learning_rate": 0.0010282016477580398, "loss": 2.1855, "step": 2355 }, { "epoch": 0.51, "learning_rate": 0.0010275054886588748, "loss": 2.2949, "step": 2356 }, { "epoch": 0.51, "learning_rate": 0.0010268093162191687, "loss": 2.1875, "step": 2357 }, { "epoch": 0.51, "learning_rate": 0.0010261131307765755, "loss": 2.1699, "step": 2358 }, { "epoch": 0.51, "learning_rate": 0.0010254169326687538, "loss": 2.1055, "step": 2359 }, { "epoch": 0.51, "learning_rate": 0.0010247207222333697, "loss": 2.1592, "step": 2360 }, { "epoch": 0.51, "learning_rate": 0.0010240244998080945, "loss": 2.4023, "step": 2361 }, { "epoch": 0.51, "learning_rate": 0.0010233282657306061, "loss": 2.2529, "step": 2362 }, { "epoch": 0.51, "learning_rate": 0.0010226320203385877, "loss": 2.2422, "step": 2363 }, { "epoch": 0.51, "learning_rate": 0.0010219357639697274, "loss": 2.1162, "step": 2364 }, { "epoch": 0.51, "learning_rate": 0.0010212394969617196, "loss": 2.3271, "step": 2365 }, { "epoch": 0.51, "learning_rate": 0.001020543219652263, "loss": 2.2969, "step": 2366 }, { "epoch": 0.51, "learning_rate": 0.001019846932379062, "loss": 2.1875, "step": 2367 }, { "epoch": 0.51, "learning_rate": 0.0010191506354798254, "loss": 2.1953, "step": 2368 }, { "epoch": 0.51, "learning_rate": 0.0010184543292922669, "loss": 2.2852, "step": 2369 }, { "epoch": 0.51, "learning_rate": 0.001017758014154104, "loss": 2.375, "step": 2370 }, { "epoch": 0.51, "learning_rate": 0.0010170616904030601, "loss": 2.1191, "step": 2371 }, { "epoch": 0.51, "learning_rate": 0.0010163653583768614, "loss": 2.3711, "step": 2372 }, { "epoch": 0.51, "learning_rate": 0.0010156690184132382, "loss": 2.2285, "step": 2373 }, { "epoch": 0.51, "learning_rate": 0.001014972670849925, "loss": 2.2305, "step": 2374 }, { "epoch": 0.51, "learning_rate": 0.0010142763160246604, "loss": 2.2773, "step": 2375 }, { "epoch": 0.51, "learning_rate": 0.001013579954275186, "loss": 2.1865, "step": 2376 }, { "epoch": 0.51, "learning_rate": 0.0010128835859392464, "loss": 2.1543, "step": 2377 }, { "epoch": 0.51, "learning_rate": 0.0010121872113545902, "loss": 2.0625, "step": 2378 }, { "epoch": 0.51, "learning_rate": 0.0010114908308589692, "loss": 2.1523, "step": 2379 }, { "epoch": 0.51, "learning_rate": 0.0010107944447901363, "loss": 2.3438, "step": 2380 }, { "epoch": 0.51, "learning_rate": 0.0010100980534858493, "loss": 2.2676, "step": 2381 }, { "epoch": 0.51, "learning_rate": 0.0010094016572838671, "loss": 2.292, "step": 2382 }, { "epoch": 0.51, "learning_rate": 0.0010087052565219514, "loss": 2.2227, "step": 2383 }, { "epoch": 0.51, "learning_rate": 0.0010080088515378666, "loss": 2.2051, "step": 2384 }, { "epoch": 0.51, "learning_rate": 0.0010073124426693786, "loss": 2.2461, "step": 2385 }, { "epoch": 0.51, "learning_rate": 0.0010066160302542552, "loss": 2.2002, "step": 2386 }, { "epoch": 0.51, "learning_rate": 0.0010059196146302658, "loss": 2.1533, "step": 2387 }, { "epoch": 0.51, "learning_rate": 0.0010052231961351816, "loss": 2.1807, "step": 2388 }, { "epoch": 0.51, "learning_rate": 0.0010045267751067758, "loss": 2.2578, "step": 2389 }, { "epoch": 0.51, "learning_rate": 0.0010038303518828213, "loss": 2.1035, "step": 2390 }, { "epoch": 0.51, "learning_rate": 0.0010031339268010934, "loss": 2.1641, "step": 2391 }, { "epoch": 0.51, "learning_rate": 0.001002437500199368, "loss": 2.1152, "step": 2392 }, { "epoch": 0.51, "learning_rate": 0.0010017410724154215, "loss": 2.3184, "step": 2393 }, { "epoch": 0.51, "learning_rate": 0.0010010446437870304, "loss": 2.2793, "step": 2394 }, { "epoch": 0.51, "learning_rate": 0.0010003482146519731, "loss": 2.2744, "step": 2395 }, { "epoch": 0.52, "learning_rate": 0.0009996517853480271, "loss": 2.1582, "step": 2396 }, { "epoch": 0.52, "learning_rate": 0.0009989553562129694, "loss": 2.3066, "step": 2397 }, { "epoch": 0.52, "learning_rate": 0.0009982589275845785, "loss": 2.3301, "step": 2398 }, { "epoch": 0.52, "learning_rate": 0.000997562499800632, "loss": 2.1602, "step": 2399 }, { "epoch": 0.52, "learning_rate": 0.000996866073198907, "loss": 2.2305, "step": 2400 }, { "epoch": 0.52, "learning_rate": 0.000996169648117179, "loss": 2.0068, "step": 2401 }, { "epoch": 0.52, "learning_rate": 0.0009954732248932242, "loss": 2.1201, "step": 2402 }, { "epoch": 0.52, "learning_rate": 0.0009947768038648184, "loss": 2.0645, "step": 2403 }, { "epoch": 0.52, "learning_rate": 0.0009940803853697347, "loss": 2.1221, "step": 2404 }, { "epoch": 0.52, "learning_rate": 0.000993383969745745, "loss": 2.1807, "step": 2405 }, { "epoch": 0.52, "learning_rate": 0.0009926875573306217, "loss": 2.1191, "step": 2406 }, { "epoch": 0.52, "learning_rate": 0.0009919911484621334, "loss": 2.1543, "step": 2407 }, { "epoch": 0.52, "learning_rate": 0.0009912947434780484, "loss": 2.1582, "step": 2408 }, { "epoch": 0.52, "learning_rate": 0.0009905983427161333, "loss": 2.0635, "step": 2409 }, { "epoch": 0.52, "learning_rate": 0.0009899019465141512, "loss": 2.1543, "step": 2410 }, { "epoch": 0.52, "learning_rate": 0.000989205555209864, "loss": 2.2656, "step": 2411 }, { "epoch": 0.52, "learning_rate": 0.000988509169141031, "loss": 2.2637, "step": 2412 }, { "epoch": 0.52, "learning_rate": 0.0009878127886454096, "loss": 2.3291, "step": 2413 }, { "epoch": 0.52, "learning_rate": 0.0009871164140607535, "loss": 2.4062, "step": 2414 }, { "epoch": 0.52, "learning_rate": 0.0009864200457248143, "loss": 2.3086, "step": 2415 }, { "epoch": 0.52, "learning_rate": 0.0009857236839753397, "loss": 2.1455, "step": 2416 }, { "epoch": 0.52, "learning_rate": 0.0009850273291500751, "loss": 2.1357, "step": 2417 }, { "epoch": 0.52, "learning_rate": 0.000984330981586762, "loss": 2.1299, "step": 2418 }, { "epoch": 0.52, "learning_rate": 0.000983634641623139, "loss": 2.0908, "step": 2419 }, { "epoch": 0.52, "learning_rate": 0.0009829383095969401, "loss": 2.2324, "step": 2420 }, { "epoch": 0.52, "learning_rate": 0.000982241985845896, "loss": 2.2812, "step": 2421 }, { "epoch": 0.52, "learning_rate": 0.0009815456707077334, "loss": 2.2559, "step": 2422 }, { "epoch": 0.52, "learning_rate": 0.000980849364520175, "loss": 2.1924, "step": 2423 }, { "epoch": 0.52, "learning_rate": 0.000980153067620938, "loss": 2.2705, "step": 2424 }, { "epoch": 0.52, "learning_rate": 0.0009794567803477372, "loss": 2.2568, "step": 2425 }, { "epoch": 0.52, "learning_rate": 0.0009787605030382807, "loss": 2.0811, "step": 2426 }, { "epoch": 0.52, "learning_rate": 0.0009780642360302729, "loss": 2.0098, "step": 2427 }, { "epoch": 0.52, "learning_rate": 0.0009773679796614124, "loss": 2.1807, "step": 2428 }, { "epoch": 0.52, "learning_rate": 0.0009766717342693937, "loss": 2.1943, "step": 2429 }, { "epoch": 0.52, "learning_rate": 0.0009759755001919057, "loss": 2.1035, "step": 2430 }, { "epoch": 0.52, "learning_rate": 0.0009752792777666309, "loss": 2.166, "step": 2431 }, { "epoch": 0.52, "learning_rate": 0.0009745830673312465, "loss": 2.2812, "step": 2432 }, { "epoch": 0.52, "learning_rate": 0.0009738868692234247, "loss": 2.2627, "step": 2433 }, { "epoch": 0.52, "learning_rate": 0.0009731906837808312, "loss": 2.3018, "step": 2434 }, { "epoch": 0.52, "learning_rate": 0.0009724945113411252, "loss": 2.1582, "step": 2435 }, { "epoch": 0.52, "learning_rate": 0.0009717983522419606, "loss": 2.1855, "step": 2436 }, { "epoch": 0.52, "learning_rate": 0.0009711022068209832, "loss": 2.1797, "step": 2437 }, { "epoch": 0.52, "learning_rate": 0.0009704060754158336, "loss": 2.2842, "step": 2438 }, { "epoch": 0.52, "learning_rate": 0.000969709958364145, "loss": 2.126, "step": 2439 }, { "epoch": 0.52, "learning_rate": 0.0009690138560035441, "loss": 2.3408, "step": 2440 }, { "epoch": 0.52, "learning_rate": 0.00096831776867165, "loss": 2.1582, "step": 2441 }, { "epoch": 0.52, "learning_rate": 0.0009676216967060742, "loss": 2.2012, "step": 2442 }, { "epoch": 0.53, "learning_rate": 0.0009669256404444214, "loss": 2.2363, "step": 2443 }, { "epoch": 0.53, "learning_rate": 0.000966229600224289, "loss": 2.3105, "step": 2444 }, { "epoch": 0.53, "learning_rate": 0.0009655335763832653, "loss": 2.208, "step": 2445 }, { "epoch": 0.53, "learning_rate": 0.0009648375692589324, "loss": 2.1494, "step": 2446 }, { "epoch": 0.53, "learning_rate": 0.0009641415791888624, "loss": 2.1963, "step": 2447 }, { "epoch": 0.53, "learning_rate": 0.0009634456065106205, "loss": 2.249, "step": 2448 }, { "epoch": 0.53, "learning_rate": 0.0009627496515617628, "loss": 2.1895, "step": 2449 }, { "epoch": 0.53, "learning_rate": 0.0009620537146798374, "loss": 2.1719, "step": 2450 }, { "epoch": 0.53, "learning_rate": 0.0009613577962023831, "loss": 2.2773, "step": 2451 }, { "epoch": 0.53, "learning_rate": 0.0009606618964669303, "loss": 2.1387, "step": 2452 }, { "epoch": 0.53, "learning_rate": 0.0009599660158109991, "loss": 2.2129, "step": 2453 }, { "epoch": 0.53, "learning_rate": 0.0009592701545721021, "loss": 2.125, "step": 2454 }, { "epoch": 0.53, "learning_rate": 0.0009585743130877409, "loss": 2.2061, "step": 2455 }, { "epoch": 0.53, "learning_rate": 0.0009578784916954085, "loss": 2.1211, "step": 2456 }, { "epoch": 0.53, "learning_rate": 0.0009571826907325882, "loss": 2.1006, "step": 2457 }, { "epoch": 0.53, "learning_rate": 0.0009564869105367526, "loss": 2.2617, "step": 2458 }, { "epoch": 0.53, "learning_rate": 0.0009557911514453646, "loss": 2.2812, "step": 2459 }, { "epoch": 0.53, "learning_rate": 0.000955095413795877, "loss": 2.1084, "step": 2460 }, { "epoch": 0.53, "learning_rate": 0.0009543996979257327, "loss": 2.2217, "step": 2461 }, { "epoch": 0.53, "learning_rate": 0.0009537040041723633, "loss": 2.2207, "step": 2462 }, { "epoch": 0.53, "learning_rate": 0.000953008332873189, "loss": 2.1719, "step": 2463 }, { "epoch": 0.53, "learning_rate": 0.0009523126843656207, "loss": 2.1484, "step": 2464 }, { "epoch": 0.53, "learning_rate": 0.0009516170589870575, "loss": 2.2402, "step": 2465 }, { "epoch": 0.53, "learning_rate": 0.0009509214570748871, "loss": 2.2051, "step": 2466 }, { "epoch": 0.53, "learning_rate": 0.0009502258789664865, "loss": 2.2539, "step": 2467 }, { "epoch": 0.53, "learning_rate": 0.0009495303249992199, "loss": 2.1533, "step": 2468 }, { "epoch": 0.53, "learning_rate": 0.0009488347955104409, "loss": 2.1416, "step": 2469 }, { "epoch": 0.53, "learning_rate": 0.0009481392908374909, "loss": 2.1924, "step": 2470 }, { "epoch": 0.53, "learning_rate": 0.0009474438113176994, "loss": 2.2598, "step": 2471 }, { "epoch": 0.53, "learning_rate": 0.0009467483572883832, "loss": 2.2002, "step": 2472 }, { "epoch": 0.53, "learning_rate": 0.0009460529290868476, "loss": 2.2119, "step": 2473 }, { "epoch": 0.53, "learning_rate": 0.0009453575270503842, "loss": 2.1338, "step": 2474 }, { "epoch": 0.53, "learning_rate": 0.0009446621515162731, "loss": 2.2227, "step": 2475 }, { "epoch": 0.53, "learning_rate": 0.0009439668028217806, "loss": 1.9668, "step": 2476 }, { "epoch": 0.53, "learning_rate": 0.0009432714813041607, "loss": 2.2607, "step": 2477 }, { "epoch": 0.53, "learning_rate": 0.0009425761873006541, "loss": 2.2207, "step": 2478 }, { "epoch": 0.53, "learning_rate": 0.0009418809211484874, "loss": 2.1797, "step": 2479 }, { "epoch": 0.53, "learning_rate": 0.0009411856831848745, "loss": 2.3789, "step": 2480 }, { "epoch": 0.53, "learning_rate": 0.0009404904737470155, "loss": 2.1914, "step": 2481 }, { "epoch": 0.53, "learning_rate": 0.0009397952931720966, "loss": 2.249, "step": 2482 }, { "epoch": 0.53, "learning_rate": 0.0009391001417972902, "loss": 2.1621, "step": 2483 }, { "epoch": 0.53, "learning_rate": 0.0009384050199597534, "loss": 2.1709, "step": 2484 }, { "epoch": 0.53, "learning_rate": 0.0009377099279966307, "loss": 2.2246, "step": 2485 }, { "epoch": 0.53, "learning_rate": 0.0009370148662450507, "loss": 2.3018, "step": 2486 }, { "epoch": 0.53, "learning_rate": 0.0009363198350421282, "loss": 2.1641, "step": 2487 }, { "epoch": 0.53, "learning_rate": 0.0009356248347249632, "loss": 2.1465, "step": 2488 }, { "epoch": 0.54, "learning_rate": 0.0009349298656306404, "loss": 2.2959, "step": 2489 }, { "epoch": 0.54, "learning_rate": 0.0009342349280962287, "loss": 2.2676, "step": 2490 }, { "epoch": 0.54, "learning_rate": 0.0009335400224587826, "loss": 2.2207, "step": 2491 }, { "epoch": 0.54, "learning_rate": 0.0009328451490553417, "loss": 2.127, "step": 2492 }, { "epoch": 0.54, "learning_rate": 0.0009321503082229282, "loss": 2.2188, "step": 2493 }, { "epoch": 0.54, "learning_rate": 0.0009314555002985502, "loss": 2.2051, "step": 2494 }, { "epoch": 0.54, "learning_rate": 0.0009307607256191984, "loss": 2.1025, "step": 2495 }, { "epoch": 0.54, "learning_rate": 0.0009300659845218488, "loss": 2.292, "step": 2496 }, { "epoch": 0.54, "learning_rate": 0.0009293712773434598, "loss": 2.248, "step": 2497 }, { "epoch": 0.54, "learning_rate": 0.0009286766044209742, "loss": 2.4043, "step": 2498 }, { "epoch": 0.54, "learning_rate": 0.0009279819660913184, "loss": 2.127, "step": 2499 }, { "epoch": 0.54, "learning_rate": 0.000927287362691401, "loss": 2.2158, "step": 2500 }, { "epoch": 0.54, "learning_rate": 0.0009265927945581139, "loss": 2.1992, "step": 2501 }, { "epoch": 0.54, "learning_rate": 0.000925898262028333, "loss": 2.1777, "step": 2502 }, { "epoch": 0.54, "learning_rate": 0.0009252037654389157, "loss": 2.1406, "step": 2503 }, { "epoch": 0.54, "learning_rate": 0.0009245093051267023, "loss": 2.2705, "step": 2504 }, { "epoch": 0.54, "learning_rate": 0.0009238148814285165, "loss": 2.1162, "step": 2505 }, { "epoch": 0.54, "learning_rate": 0.0009231204946811623, "loss": 2.2246, "step": 2506 }, { "epoch": 0.54, "learning_rate": 0.000922426145221427, "loss": 2.2266, "step": 2507 }, { "epoch": 0.54, "learning_rate": 0.0009217318333860799, "loss": 2.0479, "step": 2508 }, { "epoch": 0.54, "learning_rate": 0.000921037559511872, "loss": 2.1777, "step": 2509 }, { "epoch": 0.54, "learning_rate": 0.0009203433239355357, "loss": 2.25, "step": 2510 }, { "epoch": 0.54, "learning_rate": 0.0009196491269937842, "loss": 2.3672, "step": 2511 }, { "epoch": 0.54, "learning_rate": 0.000918954969023313, "loss": 2.1455, "step": 2512 }, { "epoch": 0.54, "learning_rate": 0.0009182608503607984, "loss": 2.1367, "step": 2513 }, { "epoch": 0.54, "learning_rate": 0.0009175667713428971, "loss": 2.2461, "step": 2514 }, { "epoch": 0.54, "learning_rate": 0.0009168727323062478, "loss": 2.2246, "step": 2515 }, { "epoch": 0.54, "learning_rate": 0.0009161787335874679, "loss": 2.3232, "step": 2516 }, { "epoch": 0.54, "learning_rate": 0.0009154847755231572, "loss": 2.2373, "step": 2517 }, { "epoch": 0.54, "learning_rate": 0.0009147908584498942, "loss": 2.2227, "step": 2518 }, { "epoch": 0.54, "learning_rate": 0.0009140969827042391, "loss": 2.2383, "step": 2519 }, { "epoch": 0.54, "learning_rate": 0.0009134031486227309, "loss": 2.3291, "step": 2520 }, { "epoch": 0.54, "learning_rate": 0.0009127093565418881, "loss": 2.167, "step": 2521 }, { "epoch": 0.54, "learning_rate": 0.0009120156067982097, "loss": 2.2598, "step": 2522 }, { "epoch": 0.54, "learning_rate": 0.0009113218997281743, "loss": 2.1152, "step": 2523 }, { "epoch": 0.54, "learning_rate": 0.000910628235668239, "loss": 2.1943, "step": 2524 }, { "epoch": 0.54, "learning_rate": 0.0009099346149548405, "loss": 2.1143, "step": 2525 }, { "epoch": 0.54, "learning_rate": 0.0009092410379243947, "loss": 2.3496, "step": 2526 }, { "epoch": 0.54, "learning_rate": 0.0009085475049132956, "loss": 2.2334, "step": 2527 }, { "epoch": 0.54, "learning_rate": 0.0009078540162579162, "loss": 2.2754, "step": 2528 }, { "epoch": 0.54, "learning_rate": 0.0009071605722946084, "loss": 2.2715, "step": 2529 }, { "epoch": 0.54, "learning_rate": 0.000906467173359702, "loss": 2.2139, "step": 2530 }, { "epoch": 0.54, "learning_rate": 0.0009057738197895053, "loss": 2.1953, "step": 2531 }, { "epoch": 0.54, "learning_rate": 0.0009050805119203034, "loss": 2.1533, "step": 2532 }, { "epoch": 0.54, "learning_rate": 0.0009043872500883611, "loss": 2.1436, "step": 2533 }, { "epoch": 0.54, "learning_rate": 0.0009036940346299194, "loss": 2.1436, "step": 2534 }, { "epoch": 0.54, "learning_rate": 0.0009030008658811975, "loss": 2.2998, "step": 2535 }, { "epoch": 0.55, "learning_rate": 0.0009023077441783925, "loss": 2.0488, "step": 2536 }, { "epoch": 0.55, "learning_rate": 0.0009016146698576768, "loss": 2.1602, "step": 2537 }, { "epoch": 0.55, "learning_rate": 0.0009009216432552014, "loss": 2.207, "step": 2538 }, { "epoch": 0.55, "learning_rate": 0.0009002286647070939, "loss": 2.25, "step": 2539 }, { "epoch": 0.55, "learning_rate": 0.0008995357345494587, "loss": 2.167, "step": 2540 }, { "epoch": 0.55, "learning_rate": 0.0008988428531183759, "loss": 2.1357, "step": 2541 }, { "epoch": 0.55, "learning_rate": 0.0008981500207499031, "loss": 2.2168, "step": 2542 }, { "epoch": 0.55, "learning_rate": 0.000897457237780073, "loss": 2.2148, "step": 2543 }, { "epoch": 0.55, "learning_rate": 0.0008967645045448953, "loss": 2.1572, "step": 2544 }, { "epoch": 0.55, "learning_rate": 0.0008960718213803549, "loss": 2.2402, "step": 2545 }, { "epoch": 0.55, "learning_rate": 0.0008953791886224128, "loss": 2.2725, "step": 2546 }, { "epoch": 0.55, "learning_rate": 0.0008946866066070056, "loss": 2.2207, "step": 2547 }, { "epoch": 0.55, "learning_rate": 0.0008939940756700448, "loss": 2.3379, "step": 2548 }, { "epoch": 0.55, "learning_rate": 0.0008933015961474174, "loss": 2.1357, "step": 2549 }, { "epoch": 0.55, "learning_rate": 0.000892609168374986, "loss": 2.2754, "step": 2550 }, { "epoch": 0.55, "learning_rate": 0.000891916792688587, "loss": 2.1279, "step": 2551 }, { "epoch": 0.55, "learning_rate": 0.0008912244694240328, "loss": 2.2139, "step": 2552 }, { "epoch": 0.55, "learning_rate": 0.000890532198917109, "loss": 2.0605, "step": 2553 }, { "epoch": 0.55, "learning_rate": 0.0008898399815035769, "loss": 2.1699, "step": 2554 }, { "epoch": 0.55, "learning_rate": 0.0008891478175191709, "loss": 2.0469, "step": 2555 }, { "epoch": 0.55, "learning_rate": 0.0008884557072996006, "loss": 2.1885, "step": 2556 }, { "epoch": 0.55, "learning_rate": 0.0008877636511805492, "loss": 2.1553, "step": 2557 }, { "epoch": 0.55, "learning_rate": 0.0008870716494976729, "loss": 2.2363, "step": 2558 }, { "epoch": 0.55, "learning_rate": 0.000886379702586602, "loss": 2.1797, "step": 2559 }, { "epoch": 0.55, "learning_rate": 0.0008856878107829405, "loss": 2.2227, "step": 2560 }, { "epoch": 0.55, "learning_rate": 0.0008849959744222657, "loss": 2.2031, "step": 2561 }, { "epoch": 0.55, "learning_rate": 0.0008843041938401273, "loss": 2.0439, "step": 2562 }, { "epoch": 0.55, "learning_rate": 0.0008836124693720491, "loss": 2.2207, "step": 2563 }, { "epoch": 0.55, "learning_rate": 0.000882920801353526, "loss": 2.252, "step": 2564 }, { "epoch": 0.55, "learning_rate": 0.0008822291901200275, "loss": 2.2197, "step": 2565 }, { "epoch": 0.55, "learning_rate": 0.0008815376360069937, "loss": 2.3242, "step": 2566 }, { "epoch": 0.55, "learning_rate": 0.0008808461393498385, "loss": 2.1514, "step": 2567 }, { "epoch": 0.55, "learning_rate": 0.0008801547004839475, "loss": 2.0732, "step": 2568 }, { "epoch": 0.55, "learning_rate": 0.0008794633197446771, "loss": 2.2549, "step": 2569 }, { "epoch": 0.55, "learning_rate": 0.0008787719974673572, "loss": 1.9951, "step": 2570 }, { "epoch": 0.55, "learning_rate": 0.0008780807339872886, "loss": 2.0156, "step": 2571 }, { "epoch": 0.55, "learning_rate": 0.0008773895296397433, "loss": 2.1621, "step": 2572 }, { "epoch": 0.55, "learning_rate": 0.0008766983847599655, "loss": 2.1221, "step": 2573 }, { "epoch": 0.55, "learning_rate": 0.0008760072996831694, "loss": 2.2227, "step": 2574 }, { "epoch": 0.55, "learning_rate": 0.000875316274744541, "loss": 2.2236, "step": 2575 }, { "epoch": 0.55, "learning_rate": 0.0008746253102792367, "loss": 2.2129, "step": 2576 }, { "epoch": 0.55, "learning_rate": 0.0008739344066223842, "loss": 2.1367, "step": 2577 }, { "epoch": 0.55, "learning_rate": 0.0008732435641090813, "loss": 2.0664, "step": 2578 }, { "epoch": 0.55, "learning_rate": 0.000872552783074396, "loss": 2.1611, "step": 2579 }, { "epoch": 0.55, "learning_rate": 0.0008718620638533661, "loss": 2.043, "step": 2580 }, { "epoch": 0.55, "learning_rate": 0.0008711714067810006, "loss": 2.2422, "step": 2581 }, { "epoch": 0.56, "learning_rate": 0.0008704808121922774, "loss": 2.3525, "step": 2582 }, { "epoch": 0.56, "learning_rate": 0.0008697902804221442, "loss": 2.2207, "step": 2583 }, { "epoch": 0.56, "learning_rate": 0.0008690998118055193, "loss": 2.2236, "step": 2584 }, { "epoch": 0.56, "learning_rate": 0.0008684094066772887, "loss": 2.1201, "step": 2585 }, { "epoch": 0.56, "learning_rate": 0.0008677190653723083, "loss": 1.9961, "step": 2586 }, { "epoch": 0.56, "learning_rate": 0.0008670287882254035, "loss": 2.2969, "step": 2587 }, { "epoch": 0.56, "learning_rate": 0.0008663385755713683, "loss": 2.124, "step": 2588 }, { "epoch": 0.56, "learning_rate": 0.0008656484277449653, "loss": 2.251, "step": 2589 }, { "epoch": 0.56, "learning_rate": 0.0008649583450809253, "loss": 2.1553, "step": 2590 }, { "epoch": 0.56, "learning_rate": 0.0008642683279139481, "loss": 2.0195, "step": 2591 }, { "epoch": 0.56, "learning_rate": 0.0008635783765787019, "loss": 2.0303, "step": 2592 }, { "epoch": 0.56, "learning_rate": 0.000862888491409822, "loss": 2.0596, "step": 2593 }, { "epoch": 0.56, "learning_rate": 0.0008621986727419126, "loss": 2.1104, "step": 2594 }, { "epoch": 0.56, "learning_rate": 0.0008615089209095456, "loss": 2.1533, "step": 2595 }, { "epoch": 0.56, "learning_rate": 0.0008608192362472597, "loss": 2.1211, "step": 2596 }, { "epoch": 0.56, "learning_rate": 0.000860129619089561, "loss": 2.1201, "step": 2597 }, { "epoch": 0.56, "learning_rate": 0.0008594400697709241, "loss": 2.248, "step": 2598 }, { "epoch": 0.56, "learning_rate": 0.0008587505886257895, "loss": 2.2256, "step": 2599 }, { "epoch": 0.56, "learning_rate": 0.0008580611759885655, "loss": 2.2266, "step": 2600 }, { "epoch": 0.56, "learning_rate": 0.0008573718321936262, "loss": 2.1729, "step": 2601 }, { "epoch": 0.56, "learning_rate": 0.000856682557575313, "loss": 2.1816, "step": 2602 }, { "epoch": 0.56, "learning_rate": 0.0008559933524679335, "loss": 2.3438, "step": 2603 }, { "epoch": 0.56, "learning_rate": 0.0008553042172057619, "loss": 2.084, "step": 2604 }, { "epoch": 0.56, "learning_rate": 0.0008546151521230385, "loss": 2.209, "step": 2605 }, { "epoch": 0.56, "learning_rate": 0.000853926157553969, "loss": 2.2148, "step": 2606 }, { "epoch": 0.56, "learning_rate": 0.0008532372338327248, "loss": 2.2012, "step": 2607 }, { "epoch": 0.56, "learning_rate": 0.0008525483812934441, "loss": 2.1299, "step": 2608 }, { "epoch": 0.56, "learning_rate": 0.00085185960027023, "loss": 2.2002, "step": 2609 }, { "epoch": 0.56, "learning_rate": 0.0008511708910971505, "loss": 2.2686, "step": 2610 }, { "epoch": 0.56, "learning_rate": 0.0008504822541082386, "loss": 2.0137, "step": 2611 }, { "epoch": 0.56, "learning_rate": 0.0008497936896374931, "loss": 2.041, "step": 2612 }, { "epoch": 0.56, "learning_rate": 0.0008491051980188776, "loss": 2.2793, "step": 2613 }, { "epoch": 0.56, "learning_rate": 0.0008484167795863195, "loss": 2.125, "step": 2614 }, { "epoch": 0.56, "learning_rate": 0.0008477284346737115, "loss": 2.0576, "step": 2615 }, { "epoch": 0.56, "learning_rate": 0.0008470401636149105, "loss": 2.168, "step": 2616 }, { "epoch": 0.56, "learning_rate": 0.0008463519667437369, "loss": 2.1895, "step": 2617 }, { "epoch": 0.56, "learning_rate": 0.0008456638443939759, "loss": 2.1094, "step": 2618 }, { "epoch": 0.56, "learning_rate": 0.0008449757968993768, "loss": 2.1348, "step": 2619 }, { "epoch": 0.56, "learning_rate": 0.0008442878245936513, "loss": 2.1562, "step": 2620 }, { "epoch": 0.56, "learning_rate": 0.0008435999278104763, "loss": 2.2607, "step": 2621 }, { "epoch": 0.56, "learning_rate": 0.0008429121068834905, "loss": 2.1338, "step": 2622 }, { "epoch": 0.56, "learning_rate": 0.0008422243621462969, "loss": 2.0215, "step": 2623 }, { "epoch": 0.56, "learning_rate": 0.000841536693932461, "loss": 2.2305, "step": 2624 }, { "epoch": 0.56, "learning_rate": 0.0008408491025755114, "loss": 2.126, "step": 2625 }, { "epoch": 0.56, "learning_rate": 0.0008401615884089397, "loss": 2.2412, "step": 2626 }, { "epoch": 0.56, "learning_rate": 0.0008394741517661994, "loss": 2.2266, "step": 2627 }, { "epoch": 0.56, "learning_rate": 0.0008387867929807065, "loss": 2.2119, "step": 2628 }, { "epoch": 0.57, "learning_rate": 0.0008380995123858401, "loss": 2.0791, "step": 2629 }, { "epoch": 0.57, "learning_rate": 0.0008374123103149401, "loss": 2.334, "step": 2630 }, { "epoch": 0.57, "learning_rate": 0.0008367251871013095, "loss": 2.0693, "step": 2631 }, { "epoch": 0.57, "learning_rate": 0.0008360381430782125, "loss": 2.2617, "step": 2632 }, { "epoch": 0.57, "learning_rate": 0.0008353511785788747, "loss": 2.208, "step": 2633 }, { "epoch": 0.57, "learning_rate": 0.000834664293936483, "loss": 2.1904, "step": 2634 }, { "epoch": 0.57, "learning_rate": 0.0008339774894841865, "loss": 2.2383, "step": 2635 }, { "epoch": 0.57, "learning_rate": 0.0008332907655550948, "loss": 2.1348, "step": 2636 }, { "epoch": 0.57, "learning_rate": 0.0008326041224822785, "loss": 2.25, "step": 2637 }, { "epoch": 0.57, "learning_rate": 0.0008319175605987683, "loss": 2.3828, "step": 2638 }, { "epoch": 0.57, "learning_rate": 0.0008312310802375566, "loss": 2.0928, "step": 2639 }, { "epoch": 0.57, "learning_rate": 0.000830544681731596, "loss": 2.2734, "step": 2640 }, { "epoch": 0.57, "learning_rate": 0.000829858365413799, "loss": 2.1367, "step": 2641 }, { "epoch": 0.57, "learning_rate": 0.0008291721316170389, "loss": 2.1025, "step": 2642 }, { "epoch": 0.57, "learning_rate": 0.0008284859806741477, "loss": 2.2598, "step": 2643 }, { "epoch": 0.57, "learning_rate": 0.0008277999129179188, "loss": 2.3848, "step": 2644 }, { "epoch": 0.57, "learning_rate": 0.000827113928681104, "loss": 2.3984, "step": 2645 }, { "epoch": 0.57, "learning_rate": 0.0008264280282964157, "loss": 2.2803, "step": 2646 }, { "epoch": 0.57, "learning_rate": 0.0008257422120965247, "loss": 2.0361, "step": 2647 }, { "epoch": 0.57, "learning_rate": 0.0008250564804140612, "loss": 1.9707, "step": 2648 }, { "epoch": 0.57, "learning_rate": 0.0008243708335816145, "loss": 2.4473, "step": 2649 }, { "epoch": 0.57, "learning_rate": 0.0008236852719317329, "loss": 2.1494, "step": 2650 }, { "epoch": 0.57, "learning_rate": 0.0008229997957969232, "loss": 2.209, "step": 2651 }, { "epoch": 0.57, "learning_rate": 0.0008223144055096507, "loss": 2.0928, "step": 2652 }, { "epoch": 0.57, "learning_rate": 0.0008216291014023396, "loss": 2.2695, "step": 2653 }, { "epoch": 0.57, "learning_rate": 0.0008209438838073712, "loss": 2.1875, "step": 2654 }, { "epoch": 0.57, "learning_rate": 0.0008202587530570858, "loss": 2.1699, "step": 2655 }, { "epoch": 0.57, "learning_rate": 0.000819573709483781, "loss": 2.1406, "step": 2656 }, { "epoch": 0.57, "learning_rate": 0.0008188887534197127, "loss": 2.2334, "step": 2657 }, { "epoch": 0.57, "learning_rate": 0.0008182038851970941, "loss": 2.2568, "step": 2658 }, { "epoch": 0.57, "learning_rate": 0.000817519105148095, "loss": 2.1973, "step": 2659 }, { "epoch": 0.57, "learning_rate": 0.0008168344136048437, "loss": 2.1729, "step": 2660 }, { "epoch": 0.57, "learning_rate": 0.0008161498108994253, "loss": 2.1963, "step": 2661 }, { "epoch": 0.57, "learning_rate": 0.0008154652973638809, "loss": 2.2539, "step": 2662 }, { "epoch": 0.57, "learning_rate": 0.0008147808733302098, "loss": 2.1553, "step": 2663 }, { "epoch": 0.57, "learning_rate": 0.0008140965391303664, "loss": 2.2754, "step": 2664 }, { "epoch": 0.57, "learning_rate": 0.0008134122950962621, "loss": 2.1475, "step": 2665 }, { "epoch": 0.57, "learning_rate": 0.0008127281415597652, "loss": 2.2578, "step": 2666 }, { "epoch": 0.57, "learning_rate": 0.0008120440788526995, "loss": 2.1846, "step": 2667 }, { "epoch": 0.57, "learning_rate": 0.0008113601073068444, "loss": 2.0996, "step": 2668 }, { "epoch": 0.57, "learning_rate": 0.0008106762272539363, "loss": 2.1562, "step": 2669 }, { "epoch": 0.57, "learning_rate": 0.0008099924390256657, "loss": 2.3613, "step": 2670 }, { "epoch": 0.57, "learning_rate": 0.0008093087429536797, "loss": 2.1865, "step": 2671 }, { "epoch": 0.57, "learning_rate": 0.00080862513936958, "loss": 2.3584, "step": 2672 }, { "epoch": 0.57, "learning_rate": 0.000807941628604924, "loss": 2.2695, "step": 2673 }, { "epoch": 0.57, "learning_rate": 0.0008072582109912243, "loss": 2.1445, "step": 2674 }, { "epoch": 0.58, "learning_rate": 0.000806574886859947, "loss": 2.1367, "step": 2675 }, { "epoch": 0.58, "learning_rate": 0.000805891656542514, "loss": 2.1475, "step": 2676 }, { "epoch": 0.58, "learning_rate": 0.0008052085203703017, "loss": 2.1182, "step": 2677 }, { "epoch": 0.58, "learning_rate": 0.0008045254786746401, "loss": 2.252, "step": 2678 }, { "epoch": 0.58, "learning_rate": 0.0008038425317868146, "loss": 2.0254, "step": 2679 }, { "epoch": 0.58, "learning_rate": 0.0008031596800380628, "loss": 2.2119, "step": 2680 }, { "epoch": 0.58, "learning_rate": 0.0008024769237595779, "loss": 2.2646, "step": 2681 }, { "epoch": 0.58, "learning_rate": 0.0008017942632825057, "loss": 2.2412, "step": 2682 }, { "epoch": 0.58, "learning_rate": 0.0008011116989379463, "loss": 2.0859, "step": 2683 }, { "epoch": 0.58, "learning_rate": 0.0008004292310569528, "loss": 2.334, "step": 2684 }, { "epoch": 0.58, "learning_rate": 0.0007997468599705318, "loss": 2.2793, "step": 2685 }, { "epoch": 0.58, "learning_rate": 0.0007990645860096416, "loss": 2.208, "step": 2686 }, { "epoch": 0.58, "learning_rate": 0.0007983824095051952, "loss": 2.1406, "step": 2687 }, { "epoch": 0.58, "learning_rate": 0.0007977003307880579, "loss": 2.2393, "step": 2688 }, { "epoch": 0.58, "learning_rate": 0.0007970183501890466, "loss": 2.2002, "step": 2689 }, { "epoch": 0.58, "learning_rate": 0.0007963364680389322, "loss": 2.1836, "step": 2690 }, { "epoch": 0.58, "learning_rate": 0.0007956546846684358, "loss": 2.125, "step": 2691 }, { "epoch": 0.58, "learning_rate": 0.0007949730004082325, "loss": 2.2852, "step": 2692 }, { "epoch": 0.58, "learning_rate": 0.000794291415588948, "loss": 2.1943, "step": 2693 }, { "epoch": 0.58, "learning_rate": 0.0007936099305411611, "loss": 2.167, "step": 2694 }, { "epoch": 0.58, "learning_rate": 0.0007929285455954007, "loss": 2.1689, "step": 2695 }, { "epoch": 0.58, "learning_rate": 0.0007922472610821485, "loss": 2.2529, "step": 2696 }, { "epoch": 0.58, "learning_rate": 0.0007915660773318361, "loss": 2.1924, "step": 2697 }, { "epoch": 0.58, "learning_rate": 0.0007908849946748477, "loss": 2.1436, "step": 2698 }, { "epoch": 0.58, "learning_rate": 0.0007902040134415174, "loss": 2.2178, "step": 2699 }, { "epoch": 0.58, "learning_rate": 0.0007895231339621311, "loss": 2.248, "step": 2700 }, { "epoch": 0.58, "learning_rate": 0.0007888423565669236, "loss": 2.1221, "step": 2701 }, { "epoch": 0.58, "learning_rate": 0.0007881616815860823, "loss": 2.1914, "step": 2702 }, { "epoch": 0.58, "learning_rate": 0.0007874811093497433, "loss": 2.2715, "step": 2703 }, { "epoch": 0.58, "learning_rate": 0.0007868006401879938, "loss": 2.2344, "step": 2704 }, { "epoch": 0.58, "learning_rate": 0.000786120274430871, "loss": 2.1572, "step": 2705 }, { "epoch": 0.58, "learning_rate": 0.0007854400124083615, "loss": 2.0664, "step": 2706 }, { "epoch": 0.58, "learning_rate": 0.0007847598544504013, "loss": 2.0674, "step": 2707 }, { "epoch": 0.58, "learning_rate": 0.0007840798008868766, "loss": 1.9004, "step": 2708 }, { "epoch": 0.58, "learning_rate": 0.000783399852047623, "loss": 2.2012, "step": 2709 }, { "epoch": 0.58, "learning_rate": 0.0007827200082624249, "loss": 2.2266, "step": 2710 }, { "epoch": 0.58, "learning_rate": 0.0007820402698610163, "loss": 2.1729, "step": 2711 }, { "epoch": 0.58, "learning_rate": 0.000781360637173079, "loss": 2.1455, "step": 2712 }, { "epoch": 0.58, "learning_rate": 0.0007806811105282444, "loss": 2.2461, "step": 2713 }, { "epoch": 0.58, "learning_rate": 0.0007800016902560923, "loss": 2.3125, "step": 2714 }, { "epoch": 0.58, "learning_rate": 0.0007793223766861513, "loss": 2.1006, "step": 2715 }, { "epoch": 0.58, "learning_rate": 0.0007786431701478977, "loss": 2.2949, "step": 2716 }, { "epoch": 0.58, "learning_rate": 0.0007779640709707553, "loss": 2.3203, "step": 2717 }, { "epoch": 0.58, "learning_rate": 0.0007772850794840973, "loss": 2.207, "step": 2718 }, { "epoch": 0.58, "learning_rate": 0.0007766061960172439, "loss": 2.2207, "step": 2719 }, { "epoch": 0.58, "learning_rate": 0.0007759274208994626, "loss": 2.1406, "step": 2720 }, { "epoch": 0.58, "learning_rate": 0.0007752487544599686, "loss": 2.1855, "step": 2721 }, { "epoch": 0.59, "learning_rate": 0.0007745701970279255, "loss": 2.2266, "step": 2722 }, { "epoch": 0.59, "learning_rate": 0.0007738917489324419, "loss": 2.248, "step": 2723 }, { "epoch": 0.59, "learning_rate": 0.0007732134105025744, "loss": 2.0176, "step": 2724 }, { "epoch": 0.59, "learning_rate": 0.0007725351820673273, "loss": 2.2529, "step": 2725 }, { "epoch": 0.59, "learning_rate": 0.0007718570639556501, "loss": 2.2188, "step": 2726 }, { "epoch": 0.59, "learning_rate": 0.0007711790564964399, "loss": 2.3027, "step": 2727 }, { "epoch": 0.59, "learning_rate": 0.000770501160018539, "loss": 2.2148, "step": 2728 }, { "epoch": 0.59, "learning_rate": 0.0007698233748507372, "loss": 2.3311, "step": 2729 }, { "epoch": 0.59, "learning_rate": 0.0007691457013217693, "loss": 2.1211, "step": 2730 }, { "epoch": 0.59, "learning_rate": 0.0007684681397603162, "loss": 2.2637, "step": 2731 }, { "epoch": 0.59, "learning_rate": 0.0007677906904950053, "loss": 2.1973, "step": 2732 }, { "epoch": 0.59, "learning_rate": 0.000767113353854408, "loss": 2.1104, "step": 2733 }, { "epoch": 0.59, "learning_rate": 0.000766436130167042, "loss": 2.1943, "step": 2734 }, { "epoch": 0.59, "learning_rate": 0.0007657590197613701, "loss": 2.0645, "step": 2735 }, { "epoch": 0.59, "learning_rate": 0.0007650820229658009, "loss": 2.1797, "step": 2736 }, { "epoch": 0.59, "learning_rate": 0.0007644051401086863, "loss": 2.166, "step": 2737 }, { "epoch": 0.59, "learning_rate": 0.0007637283715183245, "loss": 2.1602, "step": 2738 }, { "epoch": 0.59, "learning_rate": 0.000763051717522957, "loss": 2.2012, "step": 2739 }, { "epoch": 0.59, "learning_rate": 0.0007623751784507707, "loss": 2.0264, "step": 2740 }, { "epoch": 0.59, "learning_rate": 0.0007616987546298959, "loss": 2.1924, "step": 2741 }, { "epoch": 0.59, "learning_rate": 0.0007610224463884081, "loss": 2.3682, "step": 2742 }, { "epoch": 0.59, "learning_rate": 0.0007603462540543258, "loss": 2.083, "step": 2743 }, { "epoch": 0.59, "learning_rate": 0.0007596701779556117, "loss": 2.1953, "step": 2744 }, { "epoch": 0.59, "learning_rate": 0.0007589942184201716, "loss": 2.1172, "step": 2745 }, { "epoch": 0.59, "learning_rate": 0.0007583183757758559, "loss": 2.1621, "step": 2746 }, { "epoch": 0.59, "learning_rate": 0.0007576426503504568, "loss": 2.1416, "step": 2747 }, { "epoch": 0.59, "learning_rate": 0.0007569670424717113, "loss": 2.1943, "step": 2748 }, { "epoch": 0.59, "learning_rate": 0.0007562915524672977, "loss": 2.0566, "step": 2749 }, { "epoch": 0.59, "learning_rate": 0.0007556161806648385, "loss": 2.1348, "step": 2750 }, { "epoch": 0.59, "learning_rate": 0.000754940927391898, "loss": 2.1914, "step": 2751 }, { "epoch": 0.59, "learning_rate": 0.0007542657929759835, "loss": 2.2139, "step": 2752 }, { "epoch": 0.59, "learning_rate": 0.0007535907777445449, "loss": 2.2021, "step": 2753 }, { "epoch": 0.59, "learning_rate": 0.0007529158820249733, "loss": 1.9805, "step": 2754 }, { "epoch": 0.59, "learning_rate": 0.0007522411061446023, "loss": 2.1865, "step": 2755 }, { "epoch": 0.59, "learning_rate": 0.0007515664504307078, "loss": 2.0938, "step": 2756 }, { "epoch": 0.59, "learning_rate": 0.0007508919152105074, "loss": 2.252, "step": 2757 }, { "epoch": 0.59, "learning_rate": 0.0007502175008111594, "loss": 2.3271, "step": 2758 }, { "epoch": 0.59, "learning_rate": 0.0007495432075597649, "loss": 2.0381, "step": 2759 }, { "epoch": 0.59, "learning_rate": 0.0007488690357833646, "loss": 2.1768, "step": 2760 }, { "epoch": 0.59, "learning_rate": 0.0007481949858089412, "loss": 2.2129, "step": 2761 }, { "epoch": 0.59, "learning_rate": 0.0007475210579634183, "loss": 2.0693, "step": 2762 }, { "epoch": 0.59, "learning_rate": 0.0007468472525736606, "loss": 2.126, "step": 2763 }, { "epoch": 0.59, "learning_rate": 0.0007461735699664729, "loss": 2.1445, "step": 2764 }, { "epoch": 0.59, "learning_rate": 0.0007455000104685998, "loss": 2.1846, "step": 2765 }, { "epoch": 0.59, "learning_rate": 0.0007448265744067276, "loss": 2.3008, "step": 2766 }, { "epoch": 0.59, "learning_rate": 0.0007441532621074819, "loss": 2.3105, "step": 2767 }, { "epoch": 0.6, "learning_rate": 0.0007434800738974284, "loss": 2.2305, "step": 2768 }, { "epoch": 0.6, "learning_rate": 0.0007428070101030732, "loss": 2.1758, "step": 2769 }, { "epoch": 0.6, "learning_rate": 0.0007421340710508605, "loss": 2.166, "step": 2770 }, { "epoch": 0.6, "learning_rate": 0.0007414612570671758, "loss": 2.2188, "step": 2771 }, { "epoch": 0.6, "learning_rate": 0.0007407885684783428, "loss": 2.2441, "step": 2772 }, { "epoch": 0.6, "learning_rate": 0.0007401160056106249, "loss": 2.1934, "step": 2773 }, { "epoch": 0.6, "learning_rate": 0.0007394435687902241, "loss": 2.1514, "step": 2774 }, { "epoch": 0.6, "learning_rate": 0.0007387712583432821, "loss": 2.3203, "step": 2775 }, { "epoch": 0.6, "learning_rate": 0.000738099074595878, "loss": 2.2676, "step": 2776 }, { "epoch": 0.6, "learning_rate": 0.0007374270178740306, "loss": 2.1143, "step": 2777 }, { "epoch": 0.6, "learning_rate": 0.0007367550885036964, "loss": 2.2334, "step": 2778 }, { "epoch": 0.6, "learning_rate": 0.0007360832868107708, "loss": 2.1445, "step": 2779 }, { "epoch": 0.6, "learning_rate": 0.0007354116131210868, "loss": 2.1777, "step": 2780 }, { "epoch": 0.6, "learning_rate": 0.0007347400677604151, "loss": 2.2051, "step": 2781 }, { "epoch": 0.6, "learning_rate": 0.0007340686510544644, "loss": 2.3838, "step": 2782 }, { "epoch": 0.6, "learning_rate": 0.0007333973633288813, "loss": 2.2539, "step": 2783 }, { "epoch": 0.6, "learning_rate": 0.0007327262049092497, "loss": 2.1865, "step": 2784 }, { "epoch": 0.6, "learning_rate": 0.0007320551761210907, "loss": 2.1885, "step": 2785 }, { "epoch": 0.6, "learning_rate": 0.000731384277289862, "loss": 2.1943, "step": 2786 }, { "epoch": 0.6, "learning_rate": 0.0007307135087409591, "loss": 2.2139, "step": 2787 }, { "epoch": 0.6, "learning_rate": 0.0007300428707997141, "loss": 2.1494, "step": 2788 }, { "epoch": 0.6, "learning_rate": 0.0007293723637913954, "loss": 2.2246, "step": 2789 }, { "epoch": 0.6, "learning_rate": 0.0007287019880412086, "loss": 2.1514, "step": 2790 }, { "epoch": 0.6, "learning_rate": 0.0007280317438742945, "loss": 2.2129, "step": 2791 }, { "epoch": 0.6, "learning_rate": 0.0007273616316157312, "loss": 2.1602, "step": 2792 }, { "epoch": 0.6, "learning_rate": 0.0007266916515905322, "loss": 2.3555, "step": 2793 }, { "epoch": 0.6, "learning_rate": 0.0007260218041236473, "loss": 2.1709, "step": 2794 }, { "epoch": 0.6, "learning_rate": 0.0007253520895399612, "loss": 2.0947, "step": 2795 }, { "epoch": 0.6, "learning_rate": 0.0007246825081642956, "loss": 2.2412, "step": 2796 }, { "epoch": 0.6, "learning_rate": 0.0007240130603214057, "loss": 2.208, "step": 2797 }, { "epoch": 0.6, "learning_rate": 0.0007233437463359836, "loss": 2.0371, "step": 2798 }, { "epoch": 0.6, "learning_rate": 0.0007226745665326553, "loss": 2.0811, "step": 2799 }, { "epoch": 0.6, "learning_rate": 0.0007220055212359826, "loss": 1.9941, "step": 2800 }, { "epoch": 0.6, "learning_rate": 0.0007213366107704619, "loss": 2.2451, "step": 2801 }, { "epoch": 0.6, "learning_rate": 0.0007206678354605236, "loss": 2.1221, "step": 2802 }, { "epoch": 0.6, "learning_rate": 0.0007199991956305327, "loss": 2.168, "step": 2803 }, { "epoch": 0.6, "learning_rate": 0.0007193306916047891, "loss": 2.3066, "step": 2804 }, { "epoch": 0.6, "learning_rate": 0.0007186623237075265, "loss": 2.2539, "step": 2805 }, { "epoch": 0.6, "learning_rate": 0.0007179940922629127, "loss": 2.2295, "step": 2806 }, { "epoch": 0.6, "learning_rate": 0.0007173259975950486, "loss": 2.1797, "step": 2807 }, { "epoch": 0.6, "learning_rate": 0.0007166580400279697, "loss": 2.1777, "step": 2808 }, { "epoch": 0.6, "learning_rate": 0.0007159902198856443, "loss": 2.2598, "step": 2809 }, { "epoch": 0.6, "learning_rate": 0.0007153225374919747, "loss": 2.2383, "step": 2810 }, { "epoch": 0.6, "learning_rate": 0.0007146549931707959, "loss": 2.0098, "step": 2811 }, { "epoch": 0.6, "learning_rate": 0.0007139875872458767, "loss": 2.1816, "step": 2812 }, { "epoch": 0.6, "learning_rate": 0.0007133203200409171, "loss": 2.1885, "step": 2813 }, { "epoch": 0.6, "learning_rate": 0.0007126531918795514, "loss": 2.082, "step": 2814 }, { "epoch": 0.61, "learning_rate": 0.0007119862030853461, "loss": 2.167, "step": 2815 }, { "epoch": 0.61, "learning_rate": 0.0007113193539817997, "loss": 2.2441, "step": 2816 }, { "epoch": 0.61, "learning_rate": 0.0007106526448923436, "loss": 2.0977, "step": 2817 }, { "epoch": 0.61, "learning_rate": 0.0007099860761403403, "loss": 2.207, "step": 2818 }, { "epoch": 0.61, "learning_rate": 0.0007093196480490853, "loss": 2.2578, "step": 2819 }, { "epoch": 0.61, "learning_rate": 0.0007086533609418052, "loss": 2.2422, "step": 2820 }, { "epoch": 0.61, "learning_rate": 0.0007079872151416583, "loss": 2.2656, "step": 2821 }, { "epoch": 0.61, "learning_rate": 0.0007073212109717349, "loss": 2.0752, "step": 2822 }, { "epoch": 0.61, "learning_rate": 0.0007066553487550558, "loss": 2.1709, "step": 2823 }, { "epoch": 0.61, "learning_rate": 0.0007059896288145734, "loss": 2.2207, "step": 2824 }, { "epoch": 0.61, "learning_rate": 0.0007053240514731713, "loss": 2.1416, "step": 2825 }, { "epoch": 0.61, "learning_rate": 0.0007046586170536633, "loss": 2.0566, "step": 2826 }, { "epoch": 0.61, "learning_rate": 0.0007039933258787945, "loss": 2.2383, "step": 2827 }, { "epoch": 0.61, "learning_rate": 0.0007033281782712407, "loss": 2.1553, "step": 2828 }, { "epoch": 0.61, "learning_rate": 0.000702663174553607, "loss": 2.0977, "step": 2829 }, { "epoch": 0.61, "learning_rate": 0.0007019983150484293, "loss": 2.1201, "step": 2830 }, { "epoch": 0.61, "learning_rate": 0.0007013336000781738, "loss": 2.1436, "step": 2831 }, { "epoch": 0.61, "learning_rate": 0.000700669029965237, "loss": 2.1562, "step": 2832 }, { "epoch": 0.61, "learning_rate": 0.000700004605031944, "loss": 2.1494, "step": 2833 }, { "epoch": 0.61, "learning_rate": 0.0006993403256005498, "loss": 2.1709, "step": 2834 }, { "epoch": 0.61, "learning_rate": 0.0006986761919932394, "loss": 2.2275, "step": 2835 }, { "epoch": 0.61, "learning_rate": 0.0006980122045321268, "loss": 2.0605, "step": 2836 }, { "epoch": 0.61, "learning_rate": 0.000697348363539255, "loss": 2.0859, "step": 2837 }, { "epoch": 0.61, "learning_rate": 0.0006966846693365965, "loss": 2.2051, "step": 2838 }, { "epoch": 0.61, "learning_rate": 0.0006960211222460511, "loss": 2.1143, "step": 2839 }, { "epoch": 0.61, "learning_rate": 0.0006953577225894491, "loss": 2.1494, "step": 2840 }, { "epoch": 0.61, "learning_rate": 0.000694694470688548, "loss": 2.2324, "step": 2841 }, { "epoch": 0.61, "learning_rate": 0.0006940313668650345, "loss": 2.1406, "step": 2842 }, { "epoch": 0.61, "learning_rate": 0.0006933684114405231, "loss": 2.2441, "step": 2843 }, { "epoch": 0.61, "learning_rate": 0.0006927056047365557, "loss": 2.1123, "step": 2844 }, { "epoch": 0.61, "learning_rate": 0.0006920429470746032, "loss": 2.2285, "step": 2845 }, { "epoch": 0.61, "learning_rate": 0.0006913804387760637, "loss": 2.2285, "step": 2846 }, { "epoch": 0.61, "learning_rate": 0.0006907180801622626, "loss": 2.209, "step": 2847 }, { "epoch": 0.61, "learning_rate": 0.0006900558715544531, "loss": 2.248, "step": 2848 }, { "epoch": 0.61, "learning_rate": 0.0006893938132738159, "loss": 2.2959, "step": 2849 }, { "epoch": 0.61, "learning_rate": 0.0006887319056414578, "loss": 2.0996, "step": 2850 }, { "epoch": 0.61, "learning_rate": 0.0006880701489784131, "loss": 2.165, "step": 2851 }, { "epoch": 0.61, "learning_rate": 0.000687408543605643, "loss": 2.2305, "step": 2852 }, { "epoch": 0.61, "learning_rate": 0.0006867470898440357, "loss": 2.1885, "step": 2853 }, { "epoch": 0.61, "learning_rate": 0.0006860857880144052, "loss": 1.9727, "step": 2854 }, { "epoch": 0.61, "learning_rate": 0.0006854246384374911, "loss": 2.165, "step": 2855 }, { "epoch": 0.61, "learning_rate": 0.0006847636414339613, "loss": 2.0176, "step": 2856 }, { "epoch": 0.61, "learning_rate": 0.0006841027973244076, "loss": 2.2412, "step": 2857 }, { "epoch": 0.61, "learning_rate": 0.0006834421064293488, "loss": 2.2012, "step": 2858 }, { "epoch": 0.61, "learning_rate": 0.0006827815690692294, "loss": 2.1611, "step": 2859 }, { "epoch": 0.61, "learning_rate": 0.0006821211855644187, "loss": 2.1924, "step": 2860 }, { "epoch": 0.62, "learning_rate": 0.0006814609562352117, "loss": 2.1631, "step": 2861 }, { "epoch": 0.62, "learning_rate": 0.000680800881401829, "loss": 2.125, "step": 2862 }, { "epoch": 0.62, "learning_rate": 0.0006801409613844161, "loss": 2.2129, "step": 2863 }, { "epoch": 0.62, "learning_rate": 0.0006794811965030428, "loss": 2.1738, "step": 2864 }, { "epoch": 0.62, "learning_rate": 0.000678821587077705, "loss": 2.1221, "step": 2865 }, { "epoch": 0.62, "learning_rate": 0.0006781621334283212, "loss": 2.0898, "step": 2866 }, { "epoch": 0.62, "learning_rate": 0.0006775028358747366, "loss": 2.3184, "step": 2867 }, { "epoch": 0.62, "learning_rate": 0.0006768436947367188, "loss": 2.2109, "step": 2868 }, { "epoch": 0.62, "learning_rate": 0.0006761847103339608, "loss": 2.1934, "step": 2869 }, { "epoch": 0.62, "learning_rate": 0.0006755258829860791, "loss": 2.0957, "step": 2870 }, { "epoch": 0.62, "learning_rate": 0.0006748672130126138, "loss": 2.1875, "step": 2871 }, { "epoch": 0.62, "learning_rate": 0.0006742087007330288, "loss": 2.1592, "step": 2872 }, { "epoch": 0.62, "learning_rate": 0.0006735503464667123, "loss": 2.2559, "step": 2873 }, { "epoch": 0.62, "learning_rate": 0.0006728921505329743, "loss": 2.2393, "step": 2874 }, { "epoch": 0.62, "learning_rate": 0.0006722341132510499, "loss": 2.3076, "step": 2875 }, { "epoch": 0.62, "learning_rate": 0.0006715762349400952, "loss": 2.082, "step": 2876 }, { "epoch": 0.62, "learning_rate": 0.0006709185159191909, "loss": 2.2422, "step": 2877 }, { "epoch": 0.62, "learning_rate": 0.0006702609565073394, "loss": 2.1504, "step": 2878 }, { "epoch": 0.62, "learning_rate": 0.0006696035570234661, "loss": 2.1758, "step": 2879 }, { "epoch": 0.62, "learning_rate": 0.0006689463177864197, "loss": 2.1758, "step": 2880 }, { "epoch": 0.62, "learning_rate": 0.0006682892391149692, "loss": 2.1934, "step": 2881 }, { "epoch": 0.62, "learning_rate": 0.0006676323213278071, "loss": 2.1875, "step": 2882 }, { "epoch": 0.62, "learning_rate": 0.0006669755647435474, "loss": 2.0273, "step": 2883 }, { "epoch": 0.62, "learning_rate": 0.0006663189696807268, "loss": 2.0723, "step": 2884 }, { "epoch": 0.62, "learning_rate": 0.0006656625364578021, "loss": 2.209, "step": 2885 }, { "epoch": 0.62, "learning_rate": 0.0006650062653931534, "loss": 2.1182, "step": 2886 }, { "epoch": 0.62, "learning_rate": 0.0006643501568050802, "loss": 2.1367, "step": 2887 }, { "epoch": 0.62, "learning_rate": 0.0006636942110118049, "loss": 2.1104, "step": 2888 }, { "epoch": 0.62, "learning_rate": 0.0006630384283314697, "loss": 2.126, "step": 2889 }, { "epoch": 0.62, "learning_rate": 0.0006623828090821388, "loss": 2.2197, "step": 2890 }, { "epoch": 0.62, "learning_rate": 0.0006617273535817965, "loss": 2.0674, "step": 2891 }, { "epoch": 0.62, "learning_rate": 0.0006610720621483473, "loss": 2.1436, "step": 2892 }, { "epoch": 0.62, "learning_rate": 0.0006604169350996165, "loss": 2.252, "step": 2893 }, { "epoch": 0.62, "learning_rate": 0.0006597619727533504, "loss": 2.2148, "step": 2894 }, { "epoch": 0.62, "learning_rate": 0.0006591071754272145, "loss": 2.0625, "step": 2895 }, { "epoch": 0.62, "learning_rate": 0.0006584525434387944, "loss": 2.2871, "step": 2896 }, { "epoch": 0.62, "learning_rate": 0.0006577980771055956, "loss": 2.2402, "step": 2897 }, { "epoch": 0.62, "learning_rate": 0.0006571437767450434, "loss": 2.1826, "step": 2898 }, { "epoch": 0.62, "learning_rate": 0.0006564896426744824, "loss": 2.2588, "step": 2899 }, { "epoch": 0.62, "learning_rate": 0.0006558356752111767, "loss": 2.3145, "step": 2900 }, { "epoch": 0.62, "learning_rate": 0.0006551818746723096, "loss": 2.0371, "step": 2901 }, { "epoch": 0.62, "learning_rate": 0.0006545282413749836, "loss": 2.1357, "step": 2902 }, { "epoch": 0.62, "learning_rate": 0.0006538747756362191, "loss": 2.084, "step": 2903 }, { "epoch": 0.62, "learning_rate": 0.0006532214777729566, "loss": 2.1504, "step": 2904 }, { "epoch": 0.62, "learning_rate": 0.0006525683481020541, "loss": 1.9795, "step": 2905 }, { "epoch": 0.62, "learning_rate": 0.0006519153869402889, "loss": 2.2832, "step": 2906 }, { "epoch": 0.62, "learning_rate": 0.0006512625946043565, "loss": 2.3154, "step": 2907 }, { "epoch": 0.63, "learning_rate": 0.0006506099714108695, "loss": 2.0752, "step": 2908 }, { "epoch": 0.63, "learning_rate": 0.000649957517676359, "loss": 2.2861, "step": 2909 }, { "epoch": 0.63, "learning_rate": 0.0006493052337172745, "loss": 2.2373, "step": 2910 }, { "epoch": 0.63, "learning_rate": 0.0006486531198499828, "loss": 2.0752, "step": 2911 }, { "epoch": 0.63, "learning_rate": 0.0006480011763907681, "loss": 2.2207, "step": 2912 }, { "epoch": 0.63, "learning_rate": 0.0006473494036558314, "loss": 2.3936, "step": 2913 }, { "epoch": 0.63, "learning_rate": 0.0006466978019612919, "loss": 2.1494, "step": 2914 }, { "epoch": 0.63, "learning_rate": 0.0006460463716231854, "loss": 2.1553, "step": 2915 }, { "epoch": 0.63, "learning_rate": 0.0006453951129574643, "loss": 2.3555, "step": 2916 }, { "epoch": 0.63, "learning_rate": 0.0006447440262799985, "loss": 2.2109, "step": 2917 }, { "epoch": 0.63, "learning_rate": 0.0006440931119065738, "loss": 2.2041, "step": 2918 }, { "epoch": 0.63, "learning_rate": 0.0006434423701528924, "loss": 1.9502, "step": 2919 }, { "epoch": 0.63, "learning_rate": 0.0006427918013345732, "loss": 2.126, "step": 2920 }, { "epoch": 0.63, "learning_rate": 0.000642141405767151, "loss": 2.251, "step": 2921 }, { "epoch": 0.63, "learning_rate": 0.0006414911837660767, "loss": 2.1992, "step": 2922 }, { "epoch": 0.63, "learning_rate": 0.0006408411356467172, "loss": 2.1318, "step": 2923 }, { "epoch": 0.63, "learning_rate": 0.000640191261724354, "loss": 2.207, "step": 2924 }, { "epoch": 0.63, "learning_rate": 0.0006395415623141856, "loss": 2.0762, "step": 2925 }, { "epoch": 0.63, "learning_rate": 0.0006388920377313247, "loss": 2.1445, "step": 2926 }, { "epoch": 0.63, "learning_rate": 0.0006382426882908001, "loss": 2.0957, "step": 2927 }, { "epoch": 0.63, "learning_rate": 0.0006375935143075553, "loss": 2.375, "step": 2928 }, { "epoch": 0.63, "learning_rate": 0.0006369445160964481, "loss": 2.1846, "step": 2929 }, { "epoch": 0.63, "learning_rate": 0.0006362956939722518, "loss": 2.1895, "step": 2930 }, { "epoch": 0.63, "learning_rate": 0.0006356470482496538, "loss": 2.165, "step": 2931 }, { "epoch": 0.63, "learning_rate": 0.0006349985792432569, "loss": 2.0957, "step": 2932 }, { "epoch": 0.63, "learning_rate": 0.0006343502872675772, "loss": 2.2266, "step": 2933 }, { "epoch": 0.63, "learning_rate": 0.0006337021726370448, "loss": 2.0947, "step": 2934 }, { "epoch": 0.63, "learning_rate": 0.0006330542356660046, "loss": 2.2168, "step": 2935 }, { "epoch": 0.63, "learning_rate": 0.0006324064766687152, "loss": 2.2148, "step": 2936 }, { "epoch": 0.63, "learning_rate": 0.000631758895959348, "loss": 1.9297, "step": 2937 }, { "epoch": 0.63, "learning_rate": 0.0006311114938519892, "loss": 2.1855, "step": 2938 }, { "epoch": 0.63, "learning_rate": 0.0006304642706606376, "loss": 2.0996, "step": 2939 }, { "epoch": 0.63, "learning_rate": 0.0006298172266992049, "loss": 2.0527, "step": 2940 }, { "epoch": 0.63, "learning_rate": 0.0006291703622815167, "loss": 2.2275, "step": 2941 }, { "epoch": 0.63, "learning_rate": 0.0006285236777213112, "loss": 2.21, "step": 2942 }, { "epoch": 0.63, "learning_rate": 0.0006278771733322392, "loss": 2.1367, "step": 2943 }, { "epoch": 0.63, "learning_rate": 0.0006272308494278645, "loss": 2.1514, "step": 2944 }, { "epoch": 0.63, "learning_rate": 0.0006265847063216625, "loss": 2.2021, "step": 2945 }, { "epoch": 0.63, "learning_rate": 0.0006259387443270223, "loss": 1.998, "step": 2946 }, { "epoch": 0.63, "learning_rate": 0.0006252929637572436, "loss": 2.167, "step": 2947 }, { "epoch": 0.63, "learning_rate": 0.0006246473649255391, "loss": 2.1064, "step": 2948 }, { "epoch": 0.63, "learning_rate": 0.0006240019481450337, "loss": 2.2236, "step": 2949 }, { "epoch": 0.63, "learning_rate": 0.0006233567137287628, "loss": 2.0479, "step": 2950 }, { "epoch": 0.63, "learning_rate": 0.000622711661989674, "loss": 2.208, "step": 2951 }, { "epoch": 0.63, "learning_rate": 0.0006220667932406265, "loss": 2.2422, "step": 2952 }, { "epoch": 0.63, "learning_rate": 0.0006214221077943904, "loss": 2.1689, "step": 2953 }, { "epoch": 0.64, "learning_rate": 0.000620777605963647, "loss": 2.1465, "step": 2954 }, { "epoch": 0.64, "learning_rate": 0.0006201332880609892, "loss": 2.208, "step": 2955 }, { "epoch": 0.64, "learning_rate": 0.0006194891543989191, "loss": 2.1611, "step": 2956 }, { "epoch": 0.64, "learning_rate": 0.0006188452052898507, "loss": 2.2021, "step": 2957 }, { "epoch": 0.64, "learning_rate": 0.0006182014410461082, "loss": 2.3262, "step": 2958 }, { "epoch": 0.64, "learning_rate": 0.0006175578619799265, "loss": 2.2383, "step": 2959 }, { "epoch": 0.64, "learning_rate": 0.0006169144684034502, "loss": 2.0908, "step": 2960 }, { "epoch": 0.64, "learning_rate": 0.0006162712606287335, "loss": 2.3242, "step": 2961 }, { "epoch": 0.64, "learning_rate": 0.0006156282389677414, "loss": 2.1611, "step": 2962 }, { "epoch": 0.64, "learning_rate": 0.0006149854037323485, "loss": 2.1133, "step": 2963 }, { "epoch": 0.64, "learning_rate": 0.0006143427552343383, "loss": 2.0322, "step": 2964 }, { "epoch": 0.64, "learning_rate": 0.0006137002937854049, "loss": 2.1973, "step": 2965 }, { "epoch": 0.64, "learning_rate": 0.0006130580196971498, "loss": 2.165, "step": 2966 }, { "epoch": 0.64, "learning_rate": 0.0006124159332810858, "loss": 2.1572, "step": 2967 }, { "epoch": 0.64, "learning_rate": 0.000611774034848633, "loss": 2.1025, "step": 2968 }, { "epoch": 0.64, "learning_rate": 0.0006111323247111216, "loss": 2.0957, "step": 2969 }, { "epoch": 0.64, "learning_rate": 0.0006104908031797896, "loss": 2.4004, "step": 2970 }, { "epoch": 0.64, "learning_rate": 0.0006098494705657834, "loss": 2.1104, "step": 2971 }, { "epoch": 0.64, "learning_rate": 0.0006092083271801583, "loss": 2.1689, "step": 2972 }, { "epoch": 0.64, "learning_rate": 0.0006085673733338781, "loss": 2.127, "step": 2973 }, { "epoch": 0.64, "learning_rate": 0.0006079266093378138, "loss": 2.0684, "step": 2974 }, { "epoch": 0.64, "learning_rate": 0.0006072860355027449, "loss": 2.1797, "step": 2975 }, { "epoch": 0.64, "learning_rate": 0.000606645652139359, "loss": 2.1025, "step": 2976 }, { "epoch": 0.64, "learning_rate": 0.0006060054595582502, "loss": 2.0234, "step": 2977 }, { "epoch": 0.64, "learning_rate": 0.0006053654580699207, "loss": 2.0996, "step": 2978 }, { "epoch": 0.64, "learning_rate": 0.0006047256479847804, "loss": 2.2695, "step": 2979 }, { "epoch": 0.64, "learning_rate": 0.000604086029613146, "loss": 2.2041, "step": 2980 }, { "epoch": 0.64, "learning_rate": 0.0006034466032652414, "loss": 2.127, "step": 2981 }, { "epoch": 0.64, "learning_rate": 0.0006028073692511964, "loss": 2.2441, "step": 2982 }, { "epoch": 0.64, "learning_rate": 0.0006021683278810488, "loss": 2.2207, "step": 2983 }, { "epoch": 0.64, "learning_rate": 0.0006015294794647428, "loss": 2.166, "step": 2984 }, { "epoch": 0.64, "learning_rate": 0.000600890824312128, "loss": 2.2451, "step": 2985 }, { "epoch": 0.64, "learning_rate": 0.0006002523627329617, "loss": 2.1904, "step": 2986 }, { "epoch": 0.64, "learning_rate": 0.000599614095036906, "loss": 2.0684, "step": 2987 }, { "epoch": 0.64, "learning_rate": 0.0005989760215335295, "loss": 2.2051, "step": 2988 }, { "epoch": 0.64, "learning_rate": 0.0005983381425323068, "loss": 2.0029, "step": 2989 }, { "epoch": 0.64, "learning_rate": 0.0005977004583426183, "loss": 2.2451, "step": 2990 }, { "epoch": 0.64, "learning_rate": 0.000597062969273749, "loss": 2.0332, "step": 2991 }, { "epoch": 0.64, "learning_rate": 0.0005964256756348907, "loss": 2.1357, "step": 2992 }, { "epoch": 0.64, "learning_rate": 0.0005957885777351386, "loss": 2.1465, "step": 2993 }, { "epoch": 0.64, "learning_rate": 0.0005951516758834948, "loss": 1.9863, "step": 2994 }, { "epoch": 0.64, "learning_rate": 0.0005945149703888649, "loss": 1.9854, "step": 2995 }, { "epoch": 0.64, "learning_rate": 0.0005938784615600601, "loss": 1.9688, "step": 2996 }, { "epoch": 0.64, "learning_rate": 0.0005932421497057963, "loss": 2.2246, "step": 2997 }, { "epoch": 0.64, "learning_rate": 0.0005926060351346929, "loss": 1.918, "step": 2998 }, { "epoch": 0.64, "learning_rate": 0.0005919701181552743, "loss": 2.3027, "step": 2999 }, { "epoch": 0.64, "learning_rate": 0.0005913343990759694, "loss": 2.2412, "step": 3000 }, { "epoch": 0.65, "learning_rate": 0.0005906988782051104, "loss": 2.1455, "step": 3001 }, { "epoch": 0.65, "learning_rate": 0.000590063555850934, "loss": 2.1895, "step": 3002 }, { "epoch": 0.65, "learning_rate": 0.0005894284323215797, "loss": 2.1943, "step": 3003 }, { "epoch": 0.65, "learning_rate": 0.0005887935079250916, "loss": 2.1641, "step": 3004 }, { "epoch": 0.65, "learning_rate": 0.0005881587829694166, "loss": 2.1514, "step": 3005 }, { "epoch": 0.65, "learning_rate": 0.0005875242577624051, "loss": 2.0293, "step": 3006 }, { "epoch": 0.65, "learning_rate": 0.0005868899326118109, "loss": 2.2822, "step": 3007 }, { "epoch": 0.65, "learning_rate": 0.0005862558078252902, "loss": 2.2158, "step": 3008 }, { "epoch": 0.65, "learning_rate": 0.0005856218837104019, "loss": 2.3076, "step": 3009 }, { "epoch": 0.65, "learning_rate": 0.0005849881605746081, "loss": 2.1475, "step": 3010 }, { "epoch": 0.65, "learning_rate": 0.0005843546387252737, "loss": 2.2002, "step": 3011 }, { "epoch": 0.65, "learning_rate": 0.0005837213184696649, "loss": 2.1191, "step": 3012 }, { "epoch": 0.65, "learning_rate": 0.0005830882001149517, "loss": 2.2412, "step": 3013 }, { "epoch": 0.65, "learning_rate": 0.0005824552839682038, "loss": 2.127, "step": 3014 }, { "epoch": 0.65, "learning_rate": 0.0005818225703363953, "loss": 2.1123, "step": 3015 }, { "epoch": 0.65, "learning_rate": 0.0005811900595264008, "loss": 2.0957, "step": 3016 }, { "epoch": 0.65, "learning_rate": 0.0005805577518449963, "loss": 2.1914, "step": 3017 }, { "epoch": 0.65, "learning_rate": 0.000579925647598861, "loss": 2.1846, "step": 3018 }, { "epoch": 0.65, "learning_rate": 0.0005792937470945726, "loss": 2.0732, "step": 3019 }, { "epoch": 0.65, "learning_rate": 0.0005786620506386124, "loss": 2.1494, "step": 3020 }, { "epoch": 0.65, "learning_rate": 0.0005780305585373618, "loss": 2.2334, "step": 3021 }, { "epoch": 0.65, "learning_rate": 0.0005773992710971028, "loss": 2.1494, "step": 3022 }, { "epoch": 0.65, "learning_rate": 0.0005767681886240195, "loss": 2.1602, "step": 3023 }, { "epoch": 0.65, "learning_rate": 0.0005761373114241946, "loss": 2.1006, "step": 3024 }, { "epoch": 0.65, "learning_rate": 0.0005755066398036122, "loss": 2.2178, "step": 3025 }, { "epoch": 0.65, "learning_rate": 0.0005748761740681573, "loss": 2.2354, "step": 3026 }, { "epoch": 0.65, "learning_rate": 0.0005742459145236143, "loss": 2.2109, "step": 3027 }, { "epoch": 0.65, "learning_rate": 0.0005736158614756673, "loss": 2.1211, "step": 3028 }, { "epoch": 0.65, "learning_rate": 0.0005729860152299018, "loss": 2.0469, "step": 3029 }, { "epoch": 0.65, "learning_rate": 0.0005723563760918009, "loss": 2.0859, "step": 3030 }, { "epoch": 0.65, "learning_rate": 0.0005717269443667482, "loss": 2.2627, "step": 3031 }, { "epoch": 0.65, "learning_rate": 0.0005710977203600274, "loss": 2.2441, "step": 3032 }, { "epoch": 0.65, "learning_rate": 0.0005704687043768205, "loss": 2.3301, "step": 3033 }, { "epoch": 0.65, "learning_rate": 0.000569839896722209, "loss": 2.1162, "step": 3034 }, { "epoch": 0.65, "learning_rate": 0.0005692112977011732, "loss": 2.0557, "step": 3035 }, { "epoch": 0.65, "learning_rate": 0.000568582907618592, "loss": 2.0947, "step": 3036 }, { "epoch": 0.65, "learning_rate": 0.0005679547267792437, "loss": 2.2227, "step": 3037 }, { "epoch": 0.65, "learning_rate": 0.0005673267554878047, "loss": 2.1699, "step": 3038 }, { "epoch": 0.65, "learning_rate": 0.0005666989940488497, "loss": 2.1582, "step": 3039 }, { "epoch": 0.65, "learning_rate": 0.0005660714427668512, "loss": 2.2178, "step": 3040 }, { "epoch": 0.65, "learning_rate": 0.0005654441019461806, "loss": 2.1367, "step": 3041 }, { "epoch": 0.65, "learning_rate": 0.0005648169718911062, "loss": 2.2451, "step": 3042 }, { "epoch": 0.65, "learning_rate": 0.0005641900529057959, "loss": 2.1055, "step": 3043 }, { "epoch": 0.65, "learning_rate": 0.0005635633452943132, "loss": 2.2217, "step": 3044 }, { "epoch": 0.65, "learning_rate": 0.0005629368493606201, "loss": 2.2178, "step": 3045 }, { "epoch": 0.65, "learning_rate": 0.000562310565408576, "loss": 2.3096, "step": 3046 }, { "epoch": 0.66, "learning_rate": 0.0005616844937419368, "loss": 2.2129, "step": 3047 }, { "epoch": 0.66, "learning_rate": 0.0005610586346643558, "loss": 2.0225, "step": 3048 }, { "epoch": 0.66, "learning_rate": 0.000560432988479384, "loss": 2.1768, "step": 3049 }, { "epoch": 0.66, "learning_rate": 0.0005598075554904679, "loss": 2.0713, "step": 3050 }, { "epoch": 0.66, "learning_rate": 0.0005591823360009513, "loss": 2.0869, "step": 3051 }, { "epoch": 0.66, "learning_rate": 0.000558557330314074, "loss": 2.1602, "step": 3052 }, { "epoch": 0.66, "learning_rate": 0.0005579325387329724, "loss": 2.2773, "step": 3053 }, { "epoch": 0.66, "learning_rate": 0.0005573079615606795, "loss": 2.1855, "step": 3054 }, { "epoch": 0.66, "learning_rate": 0.0005566835991001238, "loss": 2.0742, "step": 3055 }, { "epoch": 0.66, "learning_rate": 0.0005560594516541286, "loss": 2.1113, "step": 3056 }, { "epoch": 0.66, "learning_rate": 0.0005554355195254154, "loss": 2.2412, "step": 3057 }, { "epoch": 0.66, "learning_rate": 0.0005548118030165992, "loss": 2.1318, "step": 3058 }, { "epoch": 0.66, "learning_rate": 0.0005541883024301905, "loss": 2.209, "step": 3059 }, { "epoch": 0.66, "learning_rate": 0.0005535650180685975, "loss": 2.1475, "step": 3060 }, { "epoch": 0.66, "learning_rate": 0.00055294195023412, "loss": 2.2441, "step": 3061 }, { "epoch": 0.66, "learning_rate": 0.0005523190992289546, "loss": 2.1787, "step": 3062 }, { "epoch": 0.66, "learning_rate": 0.0005516964653551937, "loss": 2.1025, "step": 3063 }, { "epoch": 0.66, "learning_rate": 0.0005510740489148225, "loss": 2.1328, "step": 3064 }, { "epoch": 0.66, "learning_rate": 0.0005504518502097212, "loss": 2.2881, "step": 3065 }, { "epoch": 0.66, "learning_rate": 0.0005498298695416662, "loss": 2.1592, "step": 3066 }, { "epoch": 0.66, "learning_rate": 0.0005492081072123249, "loss": 2.249, "step": 3067 }, { "epoch": 0.66, "learning_rate": 0.0005485865635232618, "loss": 2.1982, "step": 3068 }, { "epoch": 0.66, "learning_rate": 0.0005479652387759338, "loss": 2.207, "step": 3069 }, { "epoch": 0.66, "learning_rate": 0.0005473441332716915, "loss": 2.1631, "step": 3070 }, { "epoch": 0.66, "learning_rate": 0.0005467232473117809, "loss": 2.2256, "step": 3071 }, { "epoch": 0.66, "learning_rate": 0.0005461025811973391, "loss": 2.0488, "step": 3072 }, { "epoch": 0.66, "learning_rate": 0.0005454821352293977, "loss": 2.3809, "step": 3073 }, { "epoch": 0.66, "learning_rate": 0.000544861909708882, "loss": 2.2969, "step": 3074 }, { "epoch": 0.66, "learning_rate": 0.0005442419049366101, "loss": 2.4414, "step": 3075 }, { "epoch": 0.66, "learning_rate": 0.0005436221212132923, "loss": 2.2832, "step": 3076 }, { "epoch": 0.66, "learning_rate": 0.0005430025588395327, "loss": 2.1484, "step": 3077 }, { "epoch": 0.66, "learning_rate": 0.0005423832181158274, "loss": 2.1543, "step": 3078 }, { "epoch": 0.66, "learning_rate": 0.0005417640993425647, "loss": 2.1943, "step": 3079 }, { "epoch": 0.66, "learning_rate": 0.0005411452028200265, "loss": 2.1367, "step": 3080 }, { "epoch": 0.66, "learning_rate": 0.0005405265288483859, "loss": 2.1494, "step": 3081 }, { "epoch": 0.66, "learning_rate": 0.0005399080777277084, "loss": 2.0928, "step": 3082 }, { "epoch": 0.66, "learning_rate": 0.0005392898497579508, "loss": 2.2539, "step": 3083 }, { "epoch": 0.66, "learning_rate": 0.0005386718452389624, "loss": 2.1543, "step": 3084 }, { "epoch": 0.66, "learning_rate": 0.0005380540644704843, "loss": 2.1709, "step": 3085 }, { "epoch": 0.66, "learning_rate": 0.0005374365077521486, "loss": 2.0986, "step": 3086 }, { "epoch": 0.66, "learning_rate": 0.0005368191753834786, "loss": 2.1865, "step": 3087 }, { "epoch": 0.66, "learning_rate": 0.0005362020676638888, "loss": 2.1484, "step": 3088 }, { "epoch": 0.66, "learning_rate": 0.0005355851848926854, "loss": 2.1084, "step": 3089 }, { "epoch": 0.66, "learning_rate": 0.0005349685273690644, "loss": 2.1738, "step": 3090 }, { "epoch": 0.66, "learning_rate": 0.0005343520953921139, "loss": 2.042, "step": 3091 }, { "epoch": 0.66, "learning_rate": 0.0005337358892608121, "loss": 2.2578, "step": 3092 }, { "epoch": 0.66, "learning_rate": 0.0005331199092740262, "loss": 2.1738, "step": 3093 }, { "epoch": 0.67, "learning_rate": 0.0005325041557305161, "loss": 2.0391, "step": 3094 }, { "epoch": 0.67, "learning_rate": 0.0005318886289289305, "loss": 2.0566, "step": 3095 }, { "epoch": 0.67, "learning_rate": 0.0005312733291678079, "loss": 2.0859, "step": 3096 }, { "epoch": 0.67, "learning_rate": 0.0005306582567455779, "loss": 2.1436, "step": 3097 }, { "epoch": 0.67, "learning_rate": 0.0005300434119605588, "loss": 2.2832, "step": 3098 }, { "epoch": 0.67, "learning_rate": 0.0005294287951109586, "loss": 2.1016, "step": 3099 }, { "epoch": 0.67, "learning_rate": 0.0005288144064948753, "loss": 2.1406, "step": 3100 }, { "epoch": 0.67, "learning_rate": 0.0005282002464102949, "loss": 2.2188, "step": 3101 }, { "epoch": 0.67, "learning_rate": 0.0005275863151550948, "loss": 2.0244, "step": 3102 }, { "epoch": 0.67, "learning_rate": 0.00052697261302704, "loss": 2.0967, "step": 3103 }, { "epoch": 0.67, "learning_rate": 0.0005263591403237831, "loss": 2.2695, "step": 3104 }, { "epoch": 0.67, "learning_rate": 0.0005257458973428682, "loss": 2.085, "step": 3105 }, { "epoch": 0.67, "learning_rate": 0.000525132884381726, "loss": 2.1514, "step": 3106 }, { "epoch": 0.67, "learning_rate": 0.0005245201017376761, "loss": 2.1514, "step": 3107 }, { "epoch": 0.67, "learning_rate": 0.0005239075497079275, "loss": 2.0029, "step": 3108 }, { "epoch": 0.67, "learning_rate": 0.0005232952285895753, "loss": 2.1777, "step": 3109 }, { "epoch": 0.67, "learning_rate": 0.0005226831386796037, "loss": 2.1816, "step": 3110 }, { "epoch": 0.67, "learning_rate": 0.0005220712802748854, "loss": 2.0986, "step": 3111 }, { "epoch": 0.67, "learning_rate": 0.0005214596536721798, "loss": 2.2188, "step": 3112 }, { "epoch": 0.67, "learning_rate": 0.0005208482591681343, "loss": 2.0234, "step": 3113 }, { "epoch": 0.67, "learning_rate": 0.0005202370970592836, "loss": 2.1855, "step": 3114 }, { "epoch": 0.67, "learning_rate": 0.0005196261676420493, "loss": 2.1299, "step": 3115 }, { "epoch": 0.67, "learning_rate": 0.0005190154712127415, "loss": 2.083, "step": 3116 }, { "epoch": 0.67, "learning_rate": 0.0005184050080675557, "loss": 2.0518, "step": 3117 }, { "epoch": 0.67, "learning_rate": 0.000517794778502575, "loss": 2.1143, "step": 3118 }, { "epoch": 0.67, "learning_rate": 0.0005171847828137701, "loss": 2.1533, "step": 3119 }, { "epoch": 0.67, "learning_rate": 0.000516575021296996, "loss": 2.1543, "step": 3120 }, { "epoch": 0.67, "learning_rate": 0.0005159654942479956, "loss": 2.126, "step": 3121 }, { "epoch": 0.67, "learning_rate": 0.0005153562019623985, "loss": 2.2041, "step": 3122 }, { "epoch": 0.67, "learning_rate": 0.0005147471447357197, "loss": 2.1816, "step": 3123 }, { "epoch": 0.67, "learning_rate": 0.0005141383228633604, "loss": 2.2129, "step": 3124 }, { "epoch": 0.67, "learning_rate": 0.0005135297366406073, "loss": 2.2725, "step": 3125 }, { "epoch": 0.67, "learning_rate": 0.0005129213863626332, "loss": 2.1836, "step": 3126 }, { "epoch": 0.67, "learning_rate": 0.0005123132723244961, "loss": 2.1582, "step": 3127 }, { "epoch": 0.67, "learning_rate": 0.0005117053948211405, "loss": 2.2324, "step": 3128 }, { "epoch": 0.67, "learning_rate": 0.0005110977541473947, "loss": 2.2441, "step": 3129 }, { "epoch": 0.67, "learning_rate": 0.000510490350597973, "loss": 2.2275, "step": 3130 }, { "epoch": 0.67, "learning_rate": 0.0005098831844674745, "loss": 2.2285, "step": 3131 }, { "epoch": 0.67, "learning_rate": 0.0005092762560503826, "loss": 2.1719, "step": 3132 }, { "epoch": 0.67, "learning_rate": 0.000508669565641067, "loss": 2.2061, "step": 3133 }, { "epoch": 0.67, "learning_rate": 0.0005080631135337801, "loss": 2.1377, "step": 3134 }, { "epoch": 0.67, "learning_rate": 0.0005074569000226599, "loss": 2.1963, "step": 3135 }, { "epoch": 0.67, "learning_rate": 0.0005068509254017282, "loss": 2.2393, "step": 3136 }, { "epoch": 0.67, "learning_rate": 0.0005062451899648909, "loss": 2.0469, "step": 3137 }, { "epoch": 0.67, "learning_rate": 0.0005056396940059378, "loss": 2.1816, "step": 3138 }, { "epoch": 0.67, "learning_rate": 0.0005050344378185434, "loss": 1.9502, "step": 3139 }, { "epoch": 0.68, "learning_rate": 0.0005044294216962655, "loss": 2.0596, "step": 3140 }, { "epoch": 0.68, "learning_rate": 0.0005038246459325439, "loss": 2.1943, "step": 3141 }, { "epoch": 0.68, "learning_rate": 0.0005032201108207046, "loss": 2.291, "step": 3142 }, { "epoch": 0.68, "learning_rate": 0.0005026158166539547, "loss": 2.167, "step": 3143 }, { "epoch": 0.68, "learning_rate": 0.0005020117637253851, "loss": 2.1377, "step": 3144 }, { "epoch": 0.68, "learning_rate": 0.0005014079523279709, "loss": 2.1631, "step": 3145 }, { "epoch": 0.68, "learning_rate": 0.0005008043827545672, "loss": 2.3145, "step": 3146 }, { "epoch": 0.68, "learning_rate": 0.000500201055297915, "loss": 2.168, "step": 3147 }, { "epoch": 0.68, "learning_rate": 0.0004995979702506359, "loss": 2.1338, "step": 3148 }, { "epoch": 0.68, "learning_rate": 0.0004989951279052341, "loss": 2.1875, "step": 3149 }, { "epoch": 0.68, "learning_rate": 0.0004983925285540975, "loss": 1.96, "step": 3150 }, { "epoch": 0.68, "learning_rate": 0.0004977901724894938, "loss": 2.25, "step": 3151 }, { "epoch": 0.68, "learning_rate": 0.0004971880600035746, "loss": 2.1855, "step": 3152 }, { "epoch": 0.68, "learning_rate": 0.000496586191388373, "loss": 2.2207, "step": 3153 }, { "epoch": 0.68, "learning_rate": 0.0004959845669358031, "loss": 2.1572, "step": 3154 }, { "epoch": 0.68, "learning_rate": 0.000495383186937661, "loss": 2.0801, "step": 3155 }, { "epoch": 0.68, "learning_rate": 0.0004947820516856253, "loss": 2.2139, "step": 3156 }, { "epoch": 0.68, "learning_rate": 0.0004941811614712537, "loss": 2.1904, "step": 3157 }, { "epoch": 0.68, "learning_rate": 0.0004935805165859862, "loss": 2.1475, "step": 3158 }, { "epoch": 0.68, "learning_rate": 0.0004929801173211448, "loss": 2.1475, "step": 3159 }, { "epoch": 0.68, "learning_rate": 0.0004923799639679308, "loss": 2.1436, "step": 3160 }, { "epoch": 0.68, "learning_rate": 0.0004917800568174271, "loss": 2.0801, "step": 3161 }, { "epoch": 0.68, "learning_rate": 0.0004911803961605966, "loss": 2.0684, "step": 3162 }, { "epoch": 0.68, "learning_rate": 0.0004905809822882828, "loss": 2.1211, "step": 3163 }, { "epoch": 0.68, "learning_rate": 0.0004899818154912105, "loss": 2.1094, "step": 3164 }, { "epoch": 0.68, "learning_rate": 0.0004893828960599833, "loss": 2.127, "step": 3165 }, { "epoch": 0.68, "learning_rate": 0.0004887842242850855, "loss": 2.0938, "step": 3166 }, { "epoch": 0.68, "learning_rate": 0.0004881858004568812, "loss": 2.1846, "step": 3167 }, { "epoch": 0.68, "learning_rate": 0.00048758762486561405, "loss": 2.0293, "step": 3168 }, { "epoch": 0.68, "learning_rate": 0.0004869896978014071, "loss": 2.1729, "step": 3169 }, { "epoch": 0.68, "learning_rate": 0.00048639201955426407, "loss": 2.2217, "step": 3170 }, { "epoch": 0.68, "learning_rate": 0.0004857945904140667, "loss": 2.2314, "step": 3171 }, { "epoch": 0.68, "learning_rate": 0.0004851974106705763, "loss": 2.1924, "step": 3172 }, { "epoch": 0.68, "learning_rate": 0.0004846004806134334, "loss": 2.082, "step": 3173 }, { "epoch": 0.68, "learning_rate": 0.0004840038005321572, "loss": 2.2129, "step": 3174 }, { "epoch": 0.68, "learning_rate": 0.0004834073707161455, "loss": 2.1416, "step": 3175 }, { "epoch": 0.68, "learning_rate": 0.00048281119145467555, "loss": 2.209, "step": 3176 }, { "epoch": 0.68, "learning_rate": 0.00048221526303690224, "loss": 2.1631, "step": 3177 }, { "epoch": 0.68, "learning_rate": 0.00048161958575185904, "loss": 2.1963, "step": 3178 }, { "epoch": 0.68, "learning_rate": 0.0004810241598884577, "loss": 1.9805, "step": 3179 }, { "epoch": 0.68, "learning_rate": 0.00048042898573548757, "loss": 2.1807, "step": 3180 }, { "epoch": 0.68, "learning_rate": 0.00047983406358161697, "loss": 2.1416, "step": 3181 }, { "epoch": 0.68, "learning_rate": 0.00047923939371539135, "loss": 2.0186, "step": 3182 }, { "epoch": 0.68, "learning_rate": 0.00047864497642523274, "loss": 2.0615, "step": 3183 }, { "epoch": 0.68, "learning_rate": 0.0004780508119994429, "loss": 2.0967, "step": 3184 }, { "epoch": 0.68, "learning_rate": 0.0004774569007261992, "loss": 2.207, "step": 3185 }, { "epoch": 0.68, "learning_rate": 0.00047686324289355656, "loss": 2.0117, "step": 3186 }, { "epoch": 0.69, "learning_rate": 0.00047626983878944787, "loss": 2.1377, "step": 3187 }, { "epoch": 0.69, "learning_rate": 0.00047567668870168234, "loss": 2.127, "step": 3188 }, { "epoch": 0.69, "learning_rate": 0.0004750837929179448, "loss": 2.2676, "step": 3189 }, { "epoch": 0.69, "learning_rate": 0.0004744911517257989, "loss": 2.0586, "step": 3190 }, { "epoch": 0.69, "learning_rate": 0.00047389876541268375, "loss": 2.2422, "step": 3191 }, { "epoch": 0.69, "learning_rate": 0.00047330663426591404, "loss": 2.3184, "step": 3192 }, { "epoch": 0.69, "learning_rate": 0.00047271475857268297, "loss": 2.2139, "step": 3193 }, { "epoch": 0.69, "learning_rate": 0.00047212313862005675, "loss": 2.0811, "step": 3194 }, { "epoch": 0.69, "learning_rate": 0.00047153177469498044, "loss": 2.1543, "step": 3195 }, { "epoch": 0.69, "learning_rate": 0.00047094066708427317, "loss": 2.0596, "step": 3196 }, { "epoch": 0.69, "learning_rate": 0.00047034981607463013, "loss": 2.1641, "step": 3197 }, { "epoch": 0.69, "learning_rate": 0.0004697592219526231, "loss": 2.0684, "step": 3198 }, { "epoch": 0.69, "learning_rate": 0.00046916888500469735, "loss": 2.2607, "step": 3199 }, { "epoch": 0.69, "learning_rate": 0.0004685788055171744, "loss": 2.1533, "step": 3200 }, { "epoch": 0.69, "learning_rate": 0.00046798898377625167, "loss": 2.2773, "step": 3201 }, { "epoch": 0.69, "learning_rate": 0.0004673994200680004, "loss": 2.2236, "step": 3202 }, { "epoch": 0.69, "learning_rate": 0.0004668101146783673, "loss": 2.2207, "step": 3203 }, { "epoch": 0.69, "learning_rate": 0.0004662210678931734, "loss": 2.0312, "step": 3204 }, { "epoch": 0.69, "learning_rate": 0.00046563227999811465, "loss": 2.1377, "step": 3205 }, { "epoch": 0.69, "learning_rate": 0.0004650437512787609, "loss": 2.292, "step": 3206 }, { "epoch": 0.69, "learning_rate": 0.0004644554820205572, "loss": 2.1289, "step": 3207 }, { "epoch": 0.69, "learning_rate": 0.00046386747250882224, "loss": 2.1172, "step": 3208 }, { "epoch": 0.69, "learning_rate": 0.00046327972302874845, "loss": 2.0635, "step": 3209 }, { "epoch": 0.69, "learning_rate": 0.0004626922338654026, "loss": 2.1182, "step": 3210 }, { "epoch": 0.69, "learning_rate": 0.00046210500530372445, "loss": 2.2168, "step": 3211 }, { "epoch": 0.69, "learning_rate": 0.00046151803762852894, "loss": 2.1318, "step": 3212 }, { "epoch": 0.69, "learning_rate": 0.0004609313311245029, "loss": 2.1455, "step": 3213 }, { "epoch": 0.69, "learning_rate": 0.00046034488607620726, "loss": 2.0146, "step": 3214 }, { "epoch": 0.69, "learning_rate": 0.0004597587027680757, "loss": 2.0527, "step": 3215 }, { "epoch": 0.69, "learning_rate": 0.0004591727814844154, "loss": 2.2012, "step": 3216 }, { "epoch": 0.69, "learning_rate": 0.00045858712250940593, "loss": 2.1484, "step": 3217 }, { "epoch": 0.69, "learning_rate": 0.00045800172612710043, "loss": 2.2188, "step": 3218 }, { "epoch": 0.69, "learning_rate": 0.0004574165926214241, "loss": 2.167, "step": 3219 }, { "epoch": 0.69, "learning_rate": 0.0004568317222761745, "loss": 2.1406, "step": 3220 }, { "epoch": 0.69, "learning_rate": 0.00045624711537502205, "loss": 2.124, "step": 3221 }, { "epoch": 0.69, "learning_rate": 0.0004556627722015091, "loss": 2.1465, "step": 3222 }, { "epoch": 0.69, "learning_rate": 0.0004550786930390497, "loss": 2.3193, "step": 3223 }, { "epoch": 0.69, "learning_rate": 0.00045449487817093105, "loss": 2.207, "step": 3224 }, { "epoch": 0.69, "learning_rate": 0.000453911327880311, "loss": 2.1514, "step": 3225 }, { "epoch": 0.69, "learning_rate": 0.0004533280424502195, "loss": 2.124, "step": 3226 }, { "epoch": 0.69, "learning_rate": 0.00045274502216355805, "loss": 2.3594, "step": 3227 }, { "epoch": 0.69, "learning_rate": 0.0004521622673030992, "loss": 2.3027, "step": 3228 }, { "epoch": 0.69, "learning_rate": 0.00045157977815148777, "loss": 2.1357, "step": 3229 }, { "epoch": 0.69, "learning_rate": 0.00045099755499123886, "loss": 2.0781, "step": 3230 }, { "epoch": 0.69, "learning_rate": 0.00045041559810473785, "loss": 2.2373, "step": 3231 }, { "epoch": 0.69, "learning_rate": 0.00044983390777424294, "loss": 2.0986, "step": 3232 }, { "epoch": 0.7, "learning_rate": 0.0004492524842818815, "loss": 2.1953, "step": 3233 }, { "epoch": 0.7, "learning_rate": 0.00044867132790965146, "loss": 2.1416, "step": 3234 }, { "epoch": 0.7, "learning_rate": 0.000448090438939423, "loss": 2.1934, "step": 3235 }, { "epoch": 0.7, "learning_rate": 0.000447509817652934, "loss": 2.0586, "step": 3236 }, { "epoch": 0.7, "learning_rate": 0.0004469294643317938, "loss": 2.0908, "step": 3237 }, { "epoch": 0.7, "learning_rate": 0.00044634937925748264, "loss": 2.1543, "step": 3238 }, { "epoch": 0.7, "learning_rate": 0.0004457695627113493, "loss": 2.1201, "step": 3239 }, { "epoch": 0.7, "learning_rate": 0.00044519001497461273, "loss": 2.1309, "step": 3240 }, { "epoch": 0.7, "learning_rate": 0.0004446107363283616, "loss": 2.2285, "step": 3241 }, { "epoch": 0.7, "learning_rate": 0.00044403172705355376, "loss": 2.291, "step": 3242 }, { "epoch": 0.7, "learning_rate": 0.00044345298743101726, "loss": 2.166, "step": 3243 }, { "epoch": 0.7, "learning_rate": 0.0004428745177414486, "loss": 2.1895, "step": 3244 }, { "epoch": 0.7, "learning_rate": 0.00044229631826541306, "loss": 2.0674, "step": 3245 }, { "epoch": 0.7, "learning_rate": 0.0004417183892833465, "loss": 2.2324, "step": 3246 }, { "epoch": 0.7, "learning_rate": 0.0004411407310755513, "loss": 2.1807, "step": 3247 }, { "epoch": 0.7, "learning_rate": 0.00044056334392219963, "loss": 2.124, "step": 3248 }, { "epoch": 0.7, "learning_rate": 0.000439986228103333, "loss": 2.0898, "step": 3249 }, { "epoch": 0.7, "learning_rate": 0.0004394093838988602, "loss": 2.1973, "step": 3250 }, { "epoch": 0.7, "learning_rate": 0.00043883281158855857, "loss": 2.1777, "step": 3251 }, { "epoch": 0.7, "learning_rate": 0.00043825651145207355, "loss": 2.0605, "step": 3252 }, { "epoch": 0.7, "learning_rate": 0.0004376804837689188, "loss": 2.0811, "step": 3253 }, { "epoch": 0.7, "learning_rate": 0.0004371047288184752, "loss": 2.1953, "step": 3254 }, { "epoch": 0.7, "learning_rate": 0.00043652924687999253, "loss": 2.1787, "step": 3255 }, { "epoch": 0.7, "learning_rate": 0.0004359540382325872, "loss": 2.168, "step": 3256 }, { "epoch": 0.7, "learning_rate": 0.0004353791031552433, "loss": 2.3574, "step": 3257 }, { "epoch": 0.7, "learning_rate": 0.0004348044419268122, "loss": 2.1533, "step": 3258 }, { "epoch": 0.7, "learning_rate": 0.0004342300548260122, "loss": 2.0576, "step": 3259 }, { "epoch": 0.7, "learning_rate": 0.0004336559421314298, "loss": 2.1875, "step": 3260 }, { "epoch": 0.7, "learning_rate": 0.00043308210412151696, "loss": 2.2168, "step": 3261 }, { "epoch": 0.7, "learning_rate": 0.00043250854107459313, "loss": 2.2158, "step": 3262 }, { "epoch": 0.7, "learning_rate": 0.0004319352532688443, "loss": 2.042, "step": 3263 }, { "epoch": 0.7, "learning_rate": 0.00043136224098232304, "loss": 2.21, "step": 3264 }, { "epoch": 0.7, "learning_rate": 0.00043078950449294763, "loss": 2.1016, "step": 3265 }, { "epoch": 0.7, "learning_rate": 0.00043021704407850413, "loss": 2.1611, "step": 3266 }, { "epoch": 0.7, "learning_rate": 0.00042964486001664315, "loss": 2.1006, "step": 3267 }, { "epoch": 0.7, "learning_rate": 0.000429072952584882, "loss": 2.125, "step": 3268 }, { "epoch": 0.7, "learning_rate": 0.0004285013220606034, "loss": 2.1865, "step": 3269 }, { "epoch": 0.7, "learning_rate": 0.0004279299687210565, "loss": 2.0986, "step": 3270 }, { "epoch": 0.7, "learning_rate": 0.0004273588928433547, "loss": 2.0312, "step": 3271 }, { "epoch": 0.7, "learning_rate": 0.000426788094704479, "loss": 2.0732, "step": 3272 }, { "epoch": 0.7, "learning_rate": 0.00042621757458127286, "loss": 2.1768, "step": 3273 }, { "epoch": 0.7, "learning_rate": 0.00042564733275044756, "loss": 2.2314, "step": 3274 }, { "epoch": 0.7, "learning_rate": 0.0004250773694885778, "loss": 2.1914, "step": 3275 }, { "epoch": 0.7, "learning_rate": 0.0004245076850721031, "loss": 2.2012, "step": 3276 }, { "epoch": 0.7, "learning_rate": 0.00042393827977732935, "loss": 2.1699, "step": 3277 }, { "epoch": 0.7, "learning_rate": 0.00042336915388042565, "loss": 2.2188, "step": 3278 }, { "epoch": 0.7, "learning_rate": 0.00042280030765742496, "loss": 2.3262, "step": 3279 }, { "epoch": 0.71, "learning_rate": 0.0004222317413842265, "loss": 2.2334, "step": 3280 }, { "epoch": 0.71, "learning_rate": 0.00042166345533659245, "loss": 2.0811, "step": 3281 }, { "epoch": 0.71, "learning_rate": 0.00042109544979014905, "loss": 2.1445, "step": 3282 }, { "epoch": 0.71, "learning_rate": 0.0004205277250203878, "loss": 2.1016, "step": 3283 }, { "epoch": 0.71, "learning_rate": 0.0004199602813026622, "loss": 2.2207, "step": 3284 }, { "epoch": 0.71, "learning_rate": 0.0004193931189121899, "loss": 2.252, "step": 3285 }, { "epoch": 0.71, "learning_rate": 0.0004188262381240534, "loss": 2.2002, "step": 3286 }, { "epoch": 0.71, "learning_rate": 0.00041825963921319744, "loss": 2.1738, "step": 3287 }, { "epoch": 0.71, "learning_rate": 0.00041769332245443003, "loss": 2.1738, "step": 3288 }, { "epoch": 0.71, "learning_rate": 0.0004171272881224227, "loss": 2.1855, "step": 3289 }, { "epoch": 0.71, "learning_rate": 0.0004165615364917097, "loss": 2.1602, "step": 3290 }, { "epoch": 0.71, "learning_rate": 0.00041599606783668887, "loss": 2.252, "step": 3291 }, { "epoch": 0.71, "learning_rate": 0.00041543088243162007, "loss": 2.1631, "step": 3292 }, { "epoch": 0.71, "learning_rate": 0.000414865980550626, "loss": 2.1543, "step": 3293 }, { "epoch": 0.71, "learning_rate": 0.00041430136246769177, "loss": 2.2217, "step": 3294 }, { "epoch": 0.71, "learning_rate": 0.00041373702845666493, "loss": 2.1465, "step": 3295 }, { "epoch": 0.71, "learning_rate": 0.00041317297879125503, "loss": 2.1113, "step": 3296 }, { "epoch": 0.71, "learning_rate": 0.00041260921374503445, "loss": 2.1357, "step": 3297 }, { "epoch": 0.71, "learning_rate": 0.0004120457335914368, "loss": 2.2979, "step": 3298 }, { "epoch": 0.71, "learning_rate": 0.00041148253860375764, "loss": 2.0811, "step": 3299 }, { "epoch": 0.71, "learning_rate": 0.00041091962905515436, "loss": 2.0518, "step": 3300 }, { "epoch": 0.71, "learning_rate": 0.0004103570052186457, "loss": 2.1084, "step": 3301 }, { "epoch": 0.71, "learning_rate": 0.00040979466736711177, "loss": 2.1416, "step": 3302 }, { "epoch": 0.71, "learning_rate": 0.00040923261577329486, "loss": 2.082, "step": 3303 }, { "epoch": 0.71, "learning_rate": 0.0004086708507097974, "loss": 2.1104, "step": 3304 }, { "epoch": 0.71, "learning_rate": 0.0004081093724490831, "loss": 2.1475, "step": 3305 }, { "epoch": 0.71, "learning_rate": 0.0004075481812634766, "loss": 2.2598, "step": 3306 }, { "epoch": 0.71, "learning_rate": 0.0004069872774251632, "loss": 2.0918, "step": 3307 }, { "epoch": 0.71, "learning_rate": 0.00040642666120618956, "loss": 1.9844, "step": 3308 }, { "epoch": 0.71, "learning_rate": 0.0004058663328784622, "loss": 2.167, "step": 3309 }, { "epoch": 0.71, "learning_rate": 0.0004053062927137472, "loss": 2.1914, "step": 3310 }, { "epoch": 0.71, "learning_rate": 0.0004047465409836728, "loss": 2.1406, "step": 3311 }, { "epoch": 0.71, "learning_rate": 0.0004041870779597258, "loss": 2.1074, "step": 3312 }, { "epoch": 0.71, "learning_rate": 0.0004036279039132531, "loss": 2.2383, "step": 3313 }, { "epoch": 0.71, "learning_rate": 0.0004030690191154628, "loss": 2.2529, "step": 3314 }, { "epoch": 0.71, "learning_rate": 0.00040251042383742107, "loss": 2.1465, "step": 3315 }, { "epoch": 0.71, "learning_rate": 0.0004019521183500544, "loss": 2.0977, "step": 3316 }, { "epoch": 0.71, "learning_rate": 0.0004013941029241488, "loss": 2.043, "step": 3317 }, { "epoch": 0.71, "learning_rate": 0.0004008363778303492, "loss": 2.1582, "step": 3318 }, { "epoch": 0.71, "learning_rate": 0.00040027894333915983, "loss": 2.0215, "step": 3319 }, { "epoch": 0.71, "learning_rate": 0.0003997217997209449, "loss": 2.1748, "step": 3320 }, { "epoch": 0.71, "learning_rate": 0.00039916494724592555, "loss": 2.1445, "step": 3321 }, { "epoch": 0.71, "learning_rate": 0.0003986083861841839, "loss": 2.0479, "step": 3322 }, { "epoch": 0.71, "learning_rate": 0.00039805211680565936, "loss": 2.1289, "step": 3323 }, { "epoch": 0.71, "learning_rate": 0.00039749613938014984, "loss": 2.2656, "step": 3324 }, { "epoch": 0.71, "learning_rate": 0.0003969404541773132, "loss": 1.9541, "step": 3325 }, { "epoch": 0.72, "learning_rate": 0.0003963850614666633, "loss": 2.1436, "step": 3326 }, { "epoch": 0.72, "learning_rate": 0.00039582996151757324, "loss": 2.0254, "step": 3327 }, { "epoch": 0.72, "learning_rate": 0.0003952751545992749, "loss": 2.125, "step": 3328 }, { "epoch": 0.72, "learning_rate": 0.0003947206409808569, "loss": 2.0488, "step": 3329 }, { "epoch": 0.72, "learning_rate": 0.00039416642093126596, "loss": 2.2354, "step": 3330 }, { "epoch": 0.72, "learning_rate": 0.0003936124947193065, "loss": 2.1475, "step": 3331 }, { "epoch": 0.72, "learning_rate": 0.0003930588626136403, "loss": 2.0459, "step": 3332 }, { "epoch": 0.72, "learning_rate": 0.00039250552488278635, "loss": 2.3086, "step": 3333 }, { "epoch": 0.72, "learning_rate": 0.00039195248179512153, "loss": 2.0596, "step": 3334 }, { "epoch": 0.72, "learning_rate": 0.00039139973361887914, "loss": 2.0146, "step": 3335 }, { "epoch": 0.72, "learning_rate": 0.0003908472806221497, "loss": 2.292, "step": 3336 }, { "epoch": 0.72, "learning_rate": 0.00039029512307288053, "loss": 2.2754, "step": 3337 }, { "epoch": 0.72, "learning_rate": 0.00038974326123887516, "loss": 2.1211, "step": 3338 }, { "epoch": 0.72, "learning_rate": 0.000389191695387795, "loss": 2.0947, "step": 3339 }, { "epoch": 0.72, "learning_rate": 0.00038864042578715666, "loss": 2.043, "step": 3340 }, { "epoch": 0.72, "learning_rate": 0.0003880894527043335, "loss": 2.1152, "step": 3341 }, { "epoch": 0.72, "learning_rate": 0.0003875387764065552, "loss": 2.1641, "step": 3342 }, { "epoch": 0.72, "learning_rate": 0.0003869883971609072, "loss": 2.2578, "step": 3343 }, { "epoch": 0.72, "learning_rate": 0.0003864383152343307, "loss": 2.2539, "step": 3344 }, { "epoch": 0.72, "learning_rate": 0.00038588853089362363, "loss": 2.1709, "step": 3345 }, { "epoch": 0.72, "learning_rate": 0.00038533904440543875, "loss": 2.2881, "step": 3346 }, { "epoch": 0.72, "learning_rate": 0.0003847898560362846, "loss": 2.1279, "step": 3347 }, { "epoch": 0.72, "learning_rate": 0.000384240966052525, "loss": 2.1641, "step": 3348 }, { "epoch": 0.72, "learning_rate": 0.0003836923747203792, "loss": 2.2041, "step": 3349 }, { "epoch": 0.72, "learning_rate": 0.0003831440823059212, "loss": 2.4551, "step": 3350 }, { "epoch": 0.72, "learning_rate": 0.00038259608907508105, "loss": 2.166, "step": 3351 }, { "epoch": 0.72, "learning_rate": 0.00038204839529364267, "loss": 2.2988, "step": 3352 }, { "epoch": 0.72, "learning_rate": 0.00038150100122724505, "loss": 2.0195, "step": 3353 }, { "epoch": 0.72, "learning_rate": 0.000380953907141382, "loss": 2.1006, "step": 3354 }, { "epoch": 0.72, "learning_rate": 0.0003804071133014012, "loss": 2.0352, "step": 3355 }, { "epoch": 0.72, "learning_rate": 0.0003798606199725059, "loss": 2.0059, "step": 3356 }, { "epoch": 0.72, "learning_rate": 0.00037931442741975297, "loss": 2.0225, "step": 3357 }, { "epoch": 0.72, "learning_rate": 0.0003787685359080523, "loss": 2.0918, "step": 3358 }, { "epoch": 0.72, "learning_rate": 0.0003782229457021696, "loss": 2.249, "step": 3359 }, { "epoch": 0.72, "learning_rate": 0.00037767765706672366, "loss": 2.085, "step": 3360 }, { "epoch": 0.72, "learning_rate": 0.00037713267026618627, "loss": 2.1895, "step": 3361 }, { "epoch": 0.72, "learning_rate": 0.0003765879855648848, "loss": 2.2256, "step": 3362 }, { "epoch": 0.72, "learning_rate": 0.0003760436032269974, "loss": 2.3545, "step": 3363 }, { "epoch": 0.72, "learning_rate": 0.0003754995235165579, "loss": 2.1738, "step": 3364 }, { "epoch": 0.72, "learning_rate": 0.00037495574669745235, "loss": 2.2812, "step": 3365 }, { "epoch": 0.72, "learning_rate": 0.00037441227303341995, "loss": 2.1357, "step": 3366 }, { "epoch": 0.72, "learning_rate": 0.0003738691027880524, "loss": 2.125, "step": 3367 }, { "epoch": 0.72, "learning_rate": 0.0003733262362247959, "loss": 2.1191, "step": 3368 }, { "epoch": 0.72, "learning_rate": 0.00037278367360694687, "loss": 2.1191, "step": 3369 }, { "epoch": 0.72, "learning_rate": 0.0003722414151976565, "loss": 2.1211, "step": 3370 }, { "epoch": 0.72, "learning_rate": 0.00037169946125992757, "loss": 2.166, "step": 3371 }, { "epoch": 0.72, "learning_rate": 0.0003711578120566146, "loss": 2.1816, "step": 3372 }, { "epoch": 0.73, "learning_rate": 0.00037061646785042603, "loss": 2.0469, "step": 3373 }, { "epoch": 0.73, "learning_rate": 0.0003700754289039201, "loss": 2.1855, "step": 3374 }, { "epoch": 0.73, "learning_rate": 0.0003695346954795084, "loss": 2.165, "step": 3375 }, { "epoch": 0.73, "learning_rate": 0.00036899426783945465, "loss": 2.2656, "step": 3376 }, { "epoch": 0.73, "learning_rate": 0.00036845414624587326, "loss": 2.1719, "step": 3377 }, { "epoch": 0.73, "learning_rate": 0.0003679143309607308, "loss": 2.2227, "step": 3378 }, { "epoch": 0.73, "learning_rate": 0.00036737482224584504, "loss": 2.2158, "step": 3379 }, { "epoch": 0.73, "learning_rate": 0.00036683562036288507, "loss": 2.1904, "step": 3380 }, { "epoch": 0.73, "learning_rate": 0.00036629672557337114, "loss": 2.2314, "step": 3381 }, { "epoch": 0.73, "learning_rate": 0.0003657581381386751, "loss": 2.0869, "step": 3382 }, { "epoch": 0.73, "learning_rate": 0.0003652198583200189, "loss": 2.1709, "step": 3383 }, { "epoch": 0.73, "learning_rate": 0.0003646818863784759, "loss": 2.1953, "step": 3384 }, { "epoch": 0.73, "learning_rate": 0.0003641442225749696, "loss": 2.0059, "step": 3385 }, { "epoch": 0.73, "learning_rate": 0.00036360686717027427, "loss": 2.1709, "step": 3386 }, { "epoch": 0.73, "learning_rate": 0.00036306982042501524, "loss": 2.0312, "step": 3387 }, { "epoch": 0.73, "learning_rate": 0.00036253308259966735, "loss": 2.2393, "step": 3388 }, { "epoch": 0.73, "learning_rate": 0.0003619966539545557, "loss": 2.1035, "step": 3389 }, { "epoch": 0.73, "learning_rate": 0.00036146053474985564, "loss": 2.1934, "step": 3390 }, { "epoch": 0.73, "learning_rate": 0.0003609247252455923, "loss": 2.2129, "step": 3391 }, { "epoch": 0.73, "learning_rate": 0.00036038922570164045, "loss": 2.2051, "step": 3392 }, { "epoch": 0.73, "learning_rate": 0.0003598540363777254, "loss": 2.2012, "step": 3393 }, { "epoch": 0.73, "learning_rate": 0.0003593191575334209, "loss": 2.0918, "step": 3394 }, { "epoch": 0.73, "learning_rate": 0.0003587845894281506, "loss": 2.1426, "step": 3395 }, { "epoch": 0.73, "learning_rate": 0.00035825033232118756, "loss": 2.1689, "step": 3396 }, { "epoch": 0.73, "learning_rate": 0.0003577163864716536, "loss": 2.1758, "step": 3397 }, { "epoch": 0.73, "learning_rate": 0.0003571827521385196, "loss": 2.1396, "step": 3398 }, { "epoch": 0.73, "learning_rate": 0.00035664942958060655, "loss": 2.1064, "step": 3399 }, { "epoch": 0.73, "learning_rate": 0.0003561164190565819, "loss": 2.2295, "step": 3400 }, { "epoch": 0.73, "learning_rate": 0.00035558372082496404, "loss": 2.2324, "step": 3401 }, { "epoch": 0.73, "learning_rate": 0.0003550513351441187, "loss": 2.0527, "step": 3402 }, { "epoch": 0.73, "learning_rate": 0.0003545192622722599, "loss": 2.1758, "step": 3403 }, { "epoch": 0.73, "learning_rate": 0.00035398750246745105, "loss": 2.2305, "step": 3404 }, { "epoch": 0.73, "learning_rate": 0.0003534560559876028, "loss": 2.1514, "step": 3405 }, { "epoch": 0.73, "learning_rate": 0.0003529249230904732, "loss": 2.4102, "step": 3406 }, { "epoch": 0.73, "learning_rate": 0.0003523941040336699, "loss": 2.0879, "step": 3407 }, { "epoch": 0.73, "learning_rate": 0.00035186359907464726, "loss": 2.0332, "step": 3408 }, { "epoch": 0.73, "learning_rate": 0.0003513334084707069, "loss": 2.2549, "step": 3409 }, { "epoch": 0.73, "learning_rate": 0.000350803532479, "loss": 2.1719, "step": 3410 }, { "epoch": 0.73, "learning_rate": 0.0003502739713565219, "loss": 2.1494, "step": 3411 }, { "epoch": 0.73, "learning_rate": 0.00034974472536011836, "loss": 2.0215, "step": 3412 }, { "epoch": 0.73, "learning_rate": 0.0003492157947464805, "loss": 2.085, "step": 3413 }, { "epoch": 0.73, "learning_rate": 0.000348687179772147, "loss": 2.0615, "step": 3414 }, { "epoch": 0.73, "learning_rate": 0.0003481588806935034, "loss": 2.1123, "step": 3415 }, { "epoch": 0.73, "learning_rate": 0.000347630897766782, "loss": 2.1084, "step": 3416 }, { "epoch": 0.73, "learning_rate": 0.0003471032312480615, "loss": 2.0947, "step": 3417 }, { "epoch": 0.73, "learning_rate": 0.0003465758813932682, "loss": 2.2285, "step": 3418 }, { "epoch": 0.74, "learning_rate": 0.0003460488484581735, "loss": 2.0869, "step": 3419 }, { "epoch": 0.74, "learning_rate": 0.0003455221326983955, "loss": 2.0146, "step": 3420 }, { "epoch": 0.74, "learning_rate": 0.00034499573436939934, "loss": 2.0986, "step": 3421 }, { "epoch": 0.74, "learning_rate": 0.00034446965372649473, "loss": 2.2559, "step": 3422 }, { "epoch": 0.74, "learning_rate": 0.00034394389102483793, "loss": 2.0449, "step": 3423 }, { "epoch": 0.74, "learning_rate": 0.0003434184465194317, "loss": 2.2041, "step": 3424 }, { "epoch": 0.74, "learning_rate": 0.00034289332046512355, "loss": 2.2617, "step": 3425 }, { "epoch": 0.74, "learning_rate": 0.0003423685131166069, "loss": 2.1475, "step": 3426 }, { "epoch": 0.74, "learning_rate": 0.0003418440247284206, "loss": 2.1406, "step": 3427 }, { "epoch": 0.74, "learning_rate": 0.0003413198555549486, "loss": 2.3066, "step": 3428 }, { "epoch": 0.74, "learning_rate": 0.0003407960058504199, "loss": 2.2129, "step": 3429 }, { "epoch": 0.74, "learning_rate": 0.0003402724758689094, "loss": 2.166, "step": 3430 }, { "epoch": 0.74, "learning_rate": 0.00033974926586433627, "loss": 2.0527, "step": 3431 }, { "epoch": 0.74, "learning_rate": 0.00033922637609046436, "loss": 2.0029, "step": 3432 }, { "epoch": 0.74, "learning_rate": 0.0003387038068009024, "loss": 2.1504, "step": 3433 }, { "epoch": 0.74, "learning_rate": 0.0003381815582491036, "loss": 2.1387, "step": 3434 }, { "epoch": 0.74, "learning_rate": 0.00033765963068836603, "loss": 2.1084, "step": 3435 }, { "epoch": 0.74, "learning_rate": 0.00033713802437183193, "loss": 2.1064, "step": 3436 }, { "epoch": 0.74, "learning_rate": 0.0003366167395524865, "loss": 2.2207, "step": 3437 }, { "epoch": 0.74, "learning_rate": 0.00033609577648316115, "loss": 2.1377, "step": 3438 }, { "epoch": 0.74, "learning_rate": 0.0003355751354165297, "loss": 2.0322, "step": 3439 }, { "epoch": 0.74, "learning_rate": 0.0003350548166051101, "loss": 2.1982, "step": 3440 }, { "epoch": 0.74, "learning_rate": 0.00033453482030126446, "loss": 2.1504, "step": 3441 }, { "epoch": 0.74, "learning_rate": 0.00033401514675719814, "loss": 2.2676, "step": 3442 }, { "epoch": 0.74, "learning_rate": 0.00033349579622495983, "loss": 2.1465, "step": 3443 }, { "epoch": 0.74, "learning_rate": 0.00033297676895644167, "loss": 2.3213, "step": 3444 }, { "epoch": 0.74, "learning_rate": 0.0003324580652033791, "loss": 2.1387, "step": 3445 }, { "epoch": 0.74, "learning_rate": 0.0003319396852173503, "loss": 2.2363, "step": 3446 }, { "epoch": 0.74, "learning_rate": 0.0003314216292497775, "loss": 2.1475, "step": 3447 }, { "epoch": 0.74, "learning_rate": 0.00033090389755192386, "loss": 2.2344, "step": 3448 }, { "epoch": 0.74, "learning_rate": 0.0003303864903748973, "loss": 2.2812, "step": 3449 }, { "epoch": 0.74, "learning_rate": 0.00032986940796964705, "loss": 2.1504, "step": 3450 }, { "epoch": 0.74, "learning_rate": 0.00032935265058696493, "loss": 2.3252, "step": 3451 }, { "epoch": 0.74, "learning_rate": 0.00032883621847748637, "loss": 2.0537, "step": 3452 }, { "epoch": 0.74, "learning_rate": 0.0003283201118916871, "loss": 2.1309, "step": 3453 }, { "epoch": 0.74, "learning_rate": 0.00032780433107988585, "loss": 2.2314, "step": 3454 }, { "epoch": 0.74, "learning_rate": 0.00032728887629224413, "loss": 2.0908, "step": 3455 }, { "epoch": 0.74, "learning_rate": 0.00032677374777876435, "loss": 2.2168, "step": 3456 }, { "epoch": 0.74, "learning_rate": 0.00032625894578929037, "loss": 2.1631, "step": 3457 }, { "epoch": 0.74, "learning_rate": 0.0003257444705735095, "loss": 2.1016, "step": 3458 }, { "epoch": 0.74, "learning_rate": 0.00032523032238094787, "loss": 2.3467, "step": 3459 }, { "epoch": 0.74, "learning_rate": 0.0003247165014609754, "loss": 2.3242, "step": 3460 }, { "epoch": 0.74, "learning_rate": 0.000324203008062802, "loss": 2.2939, "step": 3461 }, { "epoch": 0.74, "learning_rate": 0.0003236898424354792, "loss": 2.1055, "step": 3462 }, { "epoch": 0.74, "learning_rate": 0.00032317700482789916, "loss": 2.1035, "step": 3463 }, { "epoch": 0.74, "learning_rate": 0.0003226644954887952, "loss": 1.9746, "step": 3464 }, { "epoch": 0.74, "learning_rate": 0.0003221523146667412, "loss": 2.3193, "step": 3465 }, { "epoch": 0.75, "learning_rate": 0.00032164046261015254, "loss": 2.2021, "step": 3466 }, { "epoch": 0.75, "learning_rate": 0.0003211289395672843, "loss": 2.1006, "step": 3467 }, { "epoch": 0.75, "learning_rate": 0.000320617745786232, "loss": 2.3301, "step": 3468 }, { "epoch": 0.75, "learning_rate": 0.0003201068815149317, "loss": 2.0225, "step": 3469 }, { "epoch": 0.75, "learning_rate": 0.0003195963470011598, "loss": 2.1074, "step": 3470 }, { "epoch": 0.75, "learning_rate": 0.000319086142492532, "loss": 1.9834, "step": 3471 }, { "epoch": 0.75, "learning_rate": 0.0003185762682365053, "loss": 2.1172, "step": 3472 }, { "epoch": 0.75, "learning_rate": 0.0003180667244803754, "loss": 2.1367, "step": 3473 }, { "epoch": 0.75, "learning_rate": 0.000317557511471278, "loss": 2.0625, "step": 3474 }, { "epoch": 0.75, "learning_rate": 0.0003170486294561885, "loss": 2.0693, "step": 3475 }, { "epoch": 0.75, "learning_rate": 0.0003165400786819216, "loss": 2.0928, "step": 3476 }, { "epoch": 0.75, "learning_rate": 0.0003160318593951311, "loss": 2.0996, "step": 3477 }, { "epoch": 0.75, "learning_rate": 0.00031552397184231106, "loss": 2.1172, "step": 3478 }, { "epoch": 0.75, "learning_rate": 0.0003150164162697937, "loss": 2.0967, "step": 3479 }, { "epoch": 0.75, "learning_rate": 0.00031450919292375036, "loss": 2.167, "step": 3480 }, { "epoch": 0.75, "learning_rate": 0.0003140023020501912, "loss": 2.3271, "step": 3481 }, { "epoch": 0.75, "learning_rate": 0.0003134957438949653, "loss": 1.9746, "step": 3482 }, { "epoch": 0.75, "learning_rate": 0.00031298951870376067, "loss": 2.2148, "step": 3483 }, { "epoch": 0.75, "learning_rate": 0.00031248362672210375, "loss": 2.0762, "step": 3484 }, { "epoch": 0.75, "learning_rate": 0.0003119780681953579, "loss": 2.1104, "step": 3485 }, { "epoch": 0.75, "learning_rate": 0.00031147284336872715, "loss": 2.0977, "step": 3486 }, { "epoch": 0.75, "learning_rate": 0.0003109679524872521, "loss": 2.1543, "step": 3487 }, { "epoch": 0.75, "learning_rate": 0.0003104633957958115, "loss": 2.1299, "step": 3488 }, { "epoch": 0.75, "learning_rate": 0.0003099591735391232, "loss": 2.1182, "step": 3489 }, { "epoch": 0.75, "learning_rate": 0.00030945528596174054, "loss": 1.998, "step": 3490 }, { "epoch": 0.75, "learning_rate": 0.00030895173330805714, "loss": 2.0693, "step": 3491 }, { "epoch": 0.75, "learning_rate": 0.0003084485158223026, "loss": 2.1953, "step": 3492 }, { "epoch": 0.75, "learning_rate": 0.0003079456337485442, "loss": 2.0703, "step": 3493 }, { "epoch": 0.75, "learning_rate": 0.0003074430873306865, "loss": 2.0439, "step": 3494 }, { "epoch": 0.75, "learning_rate": 0.0003069408768124723, "loss": 2.168, "step": 3495 }, { "epoch": 0.75, "learning_rate": 0.00030643900243747935, "loss": 2.0146, "step": 3496 }, { "epoch": 0.75, "learning_rate": 0.0003059374644491246, "loss": 2.043, "step": 3497 }, { "epoch": 0.75, "learning_rate": 0.00030543626309066066, "loss": 2.2617, "step": 3498 }, { "epoch": 0.75, "learning_rate": 0.0003049353986051766, "loss": 2.123, "step": 3499 }, { "epoch": 0.75, "learning_rate": 0.00030443487123559974, "loss": 2.1133, "step": 3500 }, { "epoch": 0.75, "learning_rate": 0.0003039346812246916, "loss": 2.1904, "step": 3501 }, { "epoch": 0.75, "learning_rate": 0.00030343482881505136, "loss": 2.1289, "step": 3502 }, { "epoch": 0.75, "learning_rate": 0.0003029353142491149, "loss": 2.127, "step": 3503 }, { "epoch": 0.75, "learning_rate": 0.00030243613776915346, "loss": 1.998, "step": 3504 }, { "epoch": 0.75, "learning_rate": 0.00030193729961727436, "loss": 2.2031, "step": 3505 }, { "epoch": 0.75, "learning_rate": 0.000301438800035421, "loss": 2.3281, "step": 3506 }, { "epoch": 0.75, "learning_rate": 0.0003009406392653723, "loss": 2.2695, "step": 3507 }, { "epoch": 0.75, "learning_rate": 0.0003004428175487437, "loss": 2.1816, "step": 3508 }, { "epoch": 0.75, "learning_rate": 0.0002999453351269853, "loss": 2.0391, "step": 3509 }, { "epoch": 0.75, "learning_rate": 0.00029944819224138276, "loss": 2.1709, "step": 3510 }, { "epoch": 0.75, "learning_rate": 0.00029895138913305753, "loss": 2.085, "step": 3511 }, { "epoch": 0.76, "learning_rate": 0.00029845492604296564, "loss": 2.0762, "step": 3512 }, { "epoch": 0.76, "learning_rate": 0.0002979588032118985, "loss": 2.2217, "step": 3513 }, { "epoch": 0.76, "learning_rate": 0.0002974630208804829, "loss": 2.1045, "step": 3514 }, { "epoch": 0.76, "learning_rate": 0.00029696757928917997, "loss": 2.1104, "step": 3515 }, { "epoch": 0.76, "learning_rate": 0.00029647247867828553, "loss": 2.1543, "step": 3516 }, { "epoch": 0.76, "learning_rate": 0.0002959777192879304, "loss": 2.2002, "step": 3517 }, { "epoch": 0.76, "learning_rate": 0.00029548330135807963, "loss": 2.1406, "step": 3518 }, { "epoch": 0.76, "learning_rate": 0.0002949892251285323, "loss": 2.1455, "step": 3519 }, { "epoch": 0.76, "learning_rate": 0.0002944954908389229, "loss": 2.1318, "step": 3520 }, { "epoch": 0.76, "learning_rate": 0.00029400209872871907, "loss": 2.0, "step": 3521 }, { "epoch": 0.76, "learning_rate": 0.00029350904903722275, "loss": 2.2812, "step": 3522 }, { "epoch": 0.76, "learning_rate": 0.00029301634200356983, "loss": 2.1924, "step": 3523 }, { "epoch": 0.76, "learning_rate": 0.0002925239778667301, "loss": 2.0557, "step": 3524 }, { "epoch": 0.76, "learning_rate": 0.00029203195686550653, "loss": 2.1299, "step": 3525 }, { "epoch": 0.76, "learning_rate": 0.000291540279238537, "loss": 2.21, "step": 3526 }, { "epoch": 0.76, "learning_rate": 0.0002910489452242907, "loss": 2.168, "step": 3527 }, { "epoch": 0.76, "learning_rate": 0.0002905579550610724, "loss": 2.2051, "step": 3528 }, { "epoch": 0.76, "learning_rate": 0.0002900673089870187, "loss": 2.2412, "step": 3529 }, { "epoch": 0.76, "learning_rate": 0.0002895770072400995, "loss": 2.1562, "step": 3530 }, { "epoch": 0.76, "learning_rate": 0.0002890870500581184, "loss": 2.2139, "step": 3531 }, { "epoch": 0.76, "learning_rate": 0.0002885974376787115, "loss": 2.251, "step": 3532 }, { "epoch": 0.76, "learning_rate": 0.00028810817033934655, "loss": 2.2061, "step": 3533 }, { "epoch": 0.76, "learning_rate": 0.00028761924827732575, "loss": 2.1406, "step": 3534 }, { "epoch": 0.76, "learning_rate": 0.00028713067172978304, "loss": 2.1143, "step": 3535 }, { "epoch": 0.76, "learning_rate": 0.0002866424409336842, "loss": 2.085, "step": 3536 }, { "epoch": 0.76, "learning_rate": 0.000286154556125829, "loss": 2.1396, "step": 3537 }, { "epoch": 0.76, "learning_rate": 0.00028566701754284687, "loss": 2.1602, "step": 3538 }, { "epoch": 0.76, "learning_rate": 0.0002851798254212017, "loss": 2.1494, "step": 3539 }, { "epoch": 0.76, "learning_rate": 0.00028469297999718824, "loss": 2.165, "step": 3540 }, { "epoch": 0.76, "learning_rate": 0.0002842064815069331, "loss": 2.1152, "step": 3541 }, { "epoch": 0.76, "learning_rate": 0.0002837203301863948, "loss": 2.0801, "step": 3542 }, { "epoch": 0.76, "learning_rate": 0.00028323452627136323, "loss": 2.1943, "step": 3543 }, { "epoch": 0.76, "learning_rate": 0.00028274906999746, "loss": 2.1533, "step": 3544 }, { "epoch": 0.76, "learning_rate": 0.00028226396160013854, "loss": 2.2188, "step": 3545 }, { "epoch": 0.76, "learning_rate": 0.0002817792013146827, "loss": 2.0947, "step": 3546 }, { "epoch": 0.76, "learning_rate": 0.00028129478937620776, "loss": 2.1201, "step": 3547 }, { "epoch": 0.76, "learning_rate": 0.00028081072601966105, "loss": 2.0996, "step": 3548 }, { "epoch": 0.76, "learning_rate": 0.00028032701147981897, "loss": 2.2275, "step": 3549 }, { "epoch": 0.76, "learning_rate": 0.00027984364599128986, "loss": 2.1406, "step": 3550 }, { "epoch": 0.76, "learning_rate": 0.00027936062978851315, "loss": 2.2607, "step": 3551 }, { "epoch": 0.76, "learning_rate": 0.0002788779631057581, "loss": 2.042, "step": 3552 }, { "epoch": 0.76, "learning_rate": 0.00027839564617712463, "loss": 2.1416, "step": 3553 }, { "epoch": 0.76, "learning_rate": 0.00027791367923654333, "loss": 2.1289, "step": 3554 }, { "epoch": 0.76, "learning_rate": 0.0002774320625177741, "loss": 2.1045, "step": 3555 }, { "epoch": 0.76, "learning_rate": 0.0002769507962544087, "loss": 2.1973, "step": 3556 }, { "epoch": 0.76, "learning_rate": 0.00027646988067986725, "loss": 2.123, "step": 3557 }, { "epoch": 0.76, "learning_rate": 0.0002759893160274006, "loss": 2.2529, "step": 3558 }, { "epoch": 0.77, "learning_rate": 0.0002755091025300893, "loss": 2.1611, "step": 3559 }, { "epoch": 0.77, "learning_rate": 0.0002750292404208433, "loss": 2.3574, "step": 3560 }, { "epoch": 0.77, "learning_rate": 0.00027454972993240224, "loss": 2.2275, "step": 3561 }, { "epoch": 0.77, "learning_rate": 0.00027407057129733584, "loss": 2.1709, "step": 3562 }, { "epoch": 0.77, "learning_rate": 0.0002735917647480426, "loss": 2.2441, "step": 3563 }, { "epoch": 0.77, "learning_rate": 0.0002731133105167495, "loss": 2.2715, "step": 3564 }, { "epoch": 0.77, "learning_rate": 0.00027263520883551406, "loss": 2.0811, "step": 3565 }, { "epoch": 0.77, "learning_rate": 0.0002721574599362222, "loss": 2.1885, "step": 3566 }, { "epoch": 0.77, "learning_rate": 0.0002716800640505883, "loss": 2.1133, "step": 3567 }, { "epoch": 0.77, "learning_rate": 0.0002712030214101565, "loss": 2.1816, "step": 3568 }, { "epoch": 0.77, "learning_rate": 0.0002707263322462987, "loss": 2.1611, "step": 3569 }, { "epoch": 0.77, "learning_rate": 0.0002702499967902159, "loss": 2.002, "step": 3570 }, { "epoch": 0.77, "learning_rate": 0.00026977401527293723, "loss": 2.1465, "step": 3571 }, { "epoch": 0.77, "learning_rate": 0.00026929838792532035, "loss": 2.1211, "step": 3572 }, { "epoch": 0.77, "learning_rate": 0.0002688231149780507, "loss": 2.0098, "step": 3573 }, { "epoch": 0.77, "learning_rate": 0.0002683481966616431, "loss": 2.1797, "step": 3574 }, { "epoch": 0.77, "learning_rate": 0.0002678736332064383, "loss": 2.083, "step": 3575 }, { "epoch": 0.77, "learning_rate": 0.00026739942484260704, "loss": 2.2227, "step": 3576 }, { "epoch": 0.77, "learning_rate": 0.00026692557180014667, "loss": 2.0664, "step": 3577 }, { "epoch": 0.77, "learning_rate": 0.00026645207430888194, "loss": 2.1094, "step": 3578 }, { "epoch": 0.77, "learning_rate": 0.0002659789325984667, "loss": 2.1533, "step": 3579 }, { "epoch": 0.77, "learning_rate": 0.0002655061468983804, "loss": 2.1875, "step": 3580 }, { "epoch": 0.77, "learning_rate": 0.0002650337174379304, "loss": 2.0293, "step": 3581 }, { "epoch": 0.77, "learning_rate": 0.0002645616444462522, "loss": 1.9512, "step": 3582 }, { "epoch": 0.77, "learning_rate": 0.00026408992815230746, "loss": 2.2529, "step": 3583 }, { "epoch": 0.77, "learning_rate": 0.00026361856878488467, "loss": 2.2529, "step": 3584 }, { "epoch": 0.77, "learning_rate": 0.0002631475665726005, "loss": 2.1621, "step": 3585 }, { "epoch": 0.77, "learning_rate": 0.0002626769217438965, "loss": 2.1758, "step": 3586 }, { "epoch": 0.77, "learning_rate": 0.00026220663452704263, "loss": 2.2471, "step": 3587 }, { "epoch": 0.77, "learning_rate": 0.00026173670515013434, "loss": 1.9492, "step": 3588 }, { "epoch": 0.77, "learning_rate": 0.000261267133841094, "loss": 2.0947, "step": 3589 }, { "epoch": 0.77, "learning_rate": 0.0002607979208276701, "loss": 2.083, "step": 3590 }, { "epoch": 0.77, "learning_rate": 0.00026032906633743734, "loss": 2.124, "step": 3591 }, { "epoch": 0.77, "learning_rate": 0.00025986057059779633, "loss": 2.1377, "step": 3592 }, { "epoch": 0.77, "learning_rate": 0.0002593924338359747, "loss": 2.1484, "step": 3593 }, { "epoch": 0.77, "learning_rate": 0.0002589246562790247, "loss": 2.1396, "step": 3594 }, { "epoch": 0.77, "learning_rate": 0.00025845723815382513, "loss": 2.1133, "step": 3595 }, { "epoch": 0.77, "learning_rate": 0.00025799017968708004, "loss": 2.1572, "step": 3596 }, { "epoch": 0.77, "learning_rate": 0.00025752348110531933, "loss": 1.958, "step": 3597 }, { "epoch": 0.77, "learning_rate": 0.00025705714263489775, "loss": 2.1836, "step": 3598 }, { "epoch": 0.77, "learning_rate": 0.0002565911645019967, "loss": 2.1055, "step": 3599 }, { "epoch": 0.77, "learning_rate": 0.0002561255469326217, "loss": 2.1143, "step": 3600 }, { "epoch": 0.77, "learning_rate": 0.0002556602901526035, "loss": 2.251, "step": 3601 }, { "epoch": 0.77, "learning_rate": 0.0002551953943875982, "loss": 2.1836, "step": 3602 }, { "epoch": 0.77, "learning_rate": 0.00025473085986308633, "loss": 2.1113, "step": 3603 }, { "epoch": 0.77, "learning_rate": 0.000254266686804374, "loss": 2.1602, "step": 3604 }, { "epoch": 0.78, "learning_rate": 0.0002538028754365914, "loss": 2.1475, "step": 3605 }, { "epoch": 0.78, "learning_rate": 0.00025333942598469337, "loss": 2.252, "step": 3606 }, { "epoch": 0.78, "learning_rate": 0.00025287633867345936, "loss": 2.1289, "step": 3607 }, { "epoch": 0.78, "learning_rate": 0.0002524136137274929, "loss": 2.0527, "step": 3608 }, { "epoch": 0.78, "learning_rate": 0.0002519512513712219, "loss": 2.1035, "step": 3609 }, { "epoch": 0.78, "learning_rate": 0.0002514892518288988, "loss": 2.1318, "step": 3610 }, { "epoch": 0.78, "learning_rate": 0.00025102761532460005, "loss": 2.0332, "step": 3611 }, { "epoch": 0.78, "learning_rate": 0.0002505663420822245, "loss": 2.2588, "step": 3612 }, { "epoch": 0.78, "learning_rate": 0.0002501054323254972, "loss": 1.959, "step": 3613 }, { "epoch": 0.78, "learning_rate": 0.00024964488627796533, "loss": 2.3613, "step": 3614 }, { "epoch": 0.78, "learning_rate": 0.00024918470416299967, "loss": 2.001, "step": 3615 }, { "epoch": 0.78, "learning_rate": 0.000248724886203796, "loss": 2.1221, "step": 3616 }, { "epoch": 0.78, "learning_rate": 0.0002482654326233709, "loss": 2.1758, "step": 3617 }, { "epoch": 0.78, "learning_rate": 0.0002478063436445667, "loss": 2.0762, "step": 3618 }, { "epoch": 0.78, "learning_rate": 0.00024734761949004767, "loss": 2.0957, "step": 3619 }, { "epoch": 0.78, "learning_rate": 0.0002468892603823012, "loss": 2.2334, "step": 3620 }, { "epoch": 0.78, "learning_rate": 0.00024643126654363767, "loss": 2.1572, "step": 3621 }, { "epoch": 0.78, "learning_rate": 0.000245973638196191, "loss": 2.1406, "step": 3622 }, { "epoch": 0.78, "learning_rate": 0.0002455163755619164, "loss": 2.2061, "step": 3623 }, { "epoch": 0.78, "learning_rate": 0.0002450594788625932, "loss": 2.2168, "step": 3624 }, { "epoch": 0.78, "learning_rate": 0.0002446029483198223, "loss": 2.0654, "step": 3625 }, { "epoch": 0.78, "learning_rate": 0.0002441467841550271, "loss": 2.1582, "step": 3626 }, { "epoch": 0.78, "learning_rate": 0.00024369098658945433, "loss": 2.1104, "step": 3627 }, { "epoch": 0.78, "learning_rate": 0.00024323555584417124, "loss": 2.1152, "step": 3628 }, { "epoch": 0.78, "learning_rate": 0.00024278049214006804, "loss": 2.2168, "step": 3629 }, { "epoch": 0.78, "learning_rate": 0.0002423257956978574, "loss": 2.334, "step": 3630 }, { "epoch": 0.78, "learning_rate": 0.00024187146673807302, "loss": 2.0137, "step": 3631 }, { "epoch": 0.78, "learning_rate": 0.000241417505481071, "loss": 2.084, "step": 3632 }, { "epoch": 0.78, "learning_rate": 0.0002409639121470284, "loss": 2.0254, "step": 3633 }, { "epoch": 0.78, "learning_rate": 0.00024051068695594424, "loss": 2.0449, "step": 3634 }, { "epoch": 0.78, "learning_rate": 0.0002400578301276395, "loss": 2.1055, "step": 3635 }, { "epoch": 0.78, "learning_rate": 0.00023960534188175554, "loss": 1.9268, "step": 3636 }, { "epoch": 0.78, "learning_rate": 0.0002391532224377556, "loss": 2.248, "step": 3637 }, { "epoch": 0.78, "learning_rate": 0.00023870147201492364, "loss": 2.083, "step": 3638 }, { "epoch": 0.78, "learning_rate": 0.00023825009083236503, "loss": 2.1514, "step": 3639 }, { "epoch": 0.78, "learning_rate": 0.00023779907910900544, "loss": 2.1846, "step": 3640 }, { "epoch": 0.78, "learning_rate": 0.00023734843706359245, "loss": 2.0469, "step": 3641 }, { "epoch": 0.78, "learning_rate": 0.00023689816491469318, "loss": 2.126, "step": 3642 }, { "epoch": 0.78, "learning_rate": 0.0002364482628806961, "loss": 2.207, "step": 3643 }, { "epoch": 0.78, "learning_rate": 0.00023599873117980964, "loss": 2.0059, "step": 3644 }, { "epoch": 0.78, "learning_rate": 0.00023554957003006305, "loss": 2.1035, "step": 3645 }, { "epoch": 0.78, "learning_rate": 0.00023510077964930532, "loss": 2.2051, "step": 3646 }, { "epoch": 0.78, "learning_rate": 0.00023465236025520654, "loss": 2.0811, "step": 3647 }, { "epoch": 0.78, "learning_rate": 0.00023420431206525594, "loss": 2.1504, "step": 3648 }, { "epoch": 0.78, "learning_rate": 0.00023375663529676317, "loss": 2.1533, "step": 3649 }, { "epoch": 0.78, "learning_rate": 0.00023330933016685752, "loss": 2.1553, "step": 3650 }, { "epoch": 0.78, "learning_rate": 0.000232862396892488, "loss": 2.2314, "step": 3651 }, { "epoch": 0.79, "learning_rate": 0.0002324158356904239, "loss": 2.1387, "step": 3652 }, { "epoch": 0.79, "learning_rate": 0.00023196964677725353, "loss": 2.1533, "step": 3653 }, { "epoch": 0.79, "learning_rate": 0.00023152383036938385, "loss": 2.2236, "step": 3654 }, { "epoch": 0.79, "learning_rate": 0.0002310783866830427, "loss": 2.0664, "step": 3655 }, { "epoch": 0.79, "learning_rate": 0.00023063331593427627, "loss": 2.0195, "step": 3656 }, { "epoch": 0.79, "learning_rate": 0.00023018861833894966, "loss": 2.0293, "step": 3657 }, { "epoch": 0.79, "learning_rate": 0.0002297442941127479, "loss": 2.1182, "step": 3658 }, { "epoch": 0.79, "learning_rate": 0.00022930034347117434, "loss": 2.1719, "step": 3659 }, { "epoch": 0.79, "learning_rate": 0.00022885676662955025, "loss": 2.2412, "step": 3660 }, { "epoch": 0.79, "learning_rate": 0.00022841356380301757, "loss": 2.0176, "step": 3661 }, { "epoch": 0.79, "learning_rate": 0.00022797073520653522, "loss": 2.0898, "step": 3662 }, { "epoch": 0.79, "learning_rate": 0.0002275282810548811, "loss": 2.2412, "step": 3663 }, { "epoch": 0.79, "learning_rate": 0.00022708620156265235, "loss": 2.1182, "step": 3664 }, { "epoch": 0.79, "learning_rate": 0.00022664449694426248, "loss": 2.1367, "step": 3665 }, { "epoch": 0.79, "learning_rate": 0.00022620316741394508, "loss": 1.9453, "step": 3666 }, { "epoch": 0.79, "learning_rate": 0.00022576221318575086, "loss": 2.1143, "step": 3667 }, { "epoch": 0.79, "learning_rate": 0.00022532163447354868, "loss": 2.1143, "step": 3668 }, { "epoch": 0.79, "learning_rate": 0.00022488143149102535, "loss": 2.1943, "step": 3669 }, { "epoch": 0.79, "learning_rate": 0.00022444160445168527, "loss": 2.0537, "step": 3670 }, { "epoch": 0.79, "learning_rate": 0.00022400215356885035, "loss": 2.1348, "step": 3671 }, { "epoch": 0.79, "learning_rate": 0.00022356307905566086, "loss": 2.166, "step": 3672 }, { "epoch": 0.79, "learning_rate": 0.00022312438112507383, "loss": 2.1035, "step": 3673 }, { "epoch": 0.79, "learning_rate": 0.00022268605998986346, "loss": 2.1484, "step": 3674 }, { "epoch": 0.79, "learning_rate": 0.0002222481158626223, "loss": 2.1426, "step": 3675 }, { "epoch": 0.79, "learning_rate": 0.00022181054895575847, "loss": 2.1611, "step": 3676 }, { "epoch": 0.79, "learning_rate": 0.00022137335948149806, "loss": 2.1084, "step": 3677 }, { "epoch": 0.79, "learning_rate": 0.00022093654765188454, "loss": 2.0332, "step": 3678 }, { "epoch": 0.79, "learning_rate": 0.0002205001136787772, "loss": 2.1719, "step": 3679 }, { "epoch": 0.79, "learning_rate": 0.00022006405777385264, "loss": 2.0635, "step": 3680 }, { "epoch": 0.79, "learning_rate": 0.00021962838014860398, "loss": 2.1182, "step": 3681 }, { "epoch": 0.79, "learning_rate": 0.00021919308101434065, "loss": 2.1904, "step": 3682 }, { "epoch": 0.79, "learning_rate": 0.00021875816058218922, "loss": 2.1152, "step": 3683 }, { "epoch": 0.79, "learning_rate": 0.00021832361906309184, "loss": 2.2686, "step": 3684 }, { "epoch": 0.79, "learning_rate": 0.00021788945666780714, "loss": 2.1768, "step": 3685 }, { "epoch": 0.79, "learning_rate": 0.00021745567360690978, "loss": 2.123, "step": 3686 }, { "epoch": 0.79, "learning_rate": 0.00021702227009079056, "loss": 2.1367, "step": 3687 }, { "epoch": 0.79, "learning_rate": 0.00021658924632965594, "loss": 2.0605, "step": 3688 }, { "epoch": 0.79, "learning_rate": 0.00021615660253352886, "loss": 1.9941, "step": 3689 }, { "epoch": 0.79, "learning_rate": 0.0002157243389122474, "loss": 2.2188, "step": 3690 }, { "epoch": 0.79, "learning_rate": 0.00021529245567546517, "loss": 2.1514, "step": 3691 }, { "epoch": 0.79, "learning_rate": 0.00021486095303265175, "loss": 2.2695, "step": 3692 }, { "epoch": 0.79, "learning_rate": 0.00021442983119309157, "loss": 2.2266, "step": 3693 }, { "epoch": 0.79, "learning_rate": 0.0002139990903658845, "loss": 2.0664, "step": 3694 }, { "epoch": 0.79, "learning_rate": 0.00021356873075994642, "loss": 2.1748, "step": 3695 }, { "epoch": 0.79, "learning_rate": 0.00021313875258400738, "loss": 2.083, "step": 3696 }, { "epoch": 0.79, "learning_rate": 0.00021270915604661256, "loss": 2.1816, "step": 3697 }, { "epoch": 0.8, "learning_rate": 0.0002122799413561224, "loss": 1.9805, "step": 3698 }, { "epoch": 0.8, "learning_rate": 0.00021185110872071146, "loss": 2.1953, "step": 3699 }, { "epoch": 0.8, "learning_rate": 0.00021142265834837016, "loss": 1.9941, "step": 3700 }, { "epoch": 0.8, "learning_rate": 0.00021099459044690284, "loss": 2.0352, "step": 3701 }, { "epoch": 0.8, "learning_rate": 0.0002105669052239274, "loss": 2.1396, "step": 3702 }, { "epoch": 0.8, "learning_rate": 0.00021013960288687795, "loss": 2.1133, "step": 3703 }, { "epoch": 0.8, "learning_rate": 0.00020971268364300177, "loss": 2.1973, "step": 3704 }, { "epoch": 0.8, "learning_rate": 0.00020928614769936038, "loss": 2.1035, "step": 3705 }, { "epoch": 0.8, "learning_rate": 0.00020885999526283017, "loss": 2.124, "step": 3706 }, { "epoch": 0.8, "learning_rate": 0.00020843422654010025, "loss": 2.0479, "step": 3707 }, { "epoch": 0.8, "learning_rate": 0.0002080088417376743, "loss": 2.2168, "step": 3708 }, { "epoch": 0.8, "learning_rate": 0.00020758384106187044, "loss": 2.1152, "step": 3709 }, { "epoch": 0.8, "learning_rate": 0.00020715922471881953, "loss": 2.1582, "step": 3710 }, { "epoch": 0.8, "learning_rate": 0.00020673499291446596, "loss": 2.25, "step": 3711 }, { "epoch": 0.8, "learning_rate": 0.000206311145854569, "loss": 2.0527, "step": 3712 }, { "epoch": 0.8, "learning_rate": 0.00020588768374469902, "loss": 2.0039, "step": 3713 }, { "epoch": 0.8, "learning_rate": 0.00020546460679024203, "loss": 2.1709, "step": 3714 }, { "epoch": 0.8, "learning_rate": 0.0002050419151963957, "loss": 2.2021, "step": 3715 }, { "epoch": 0.8, "learning_rate": 0.00020461960916817146, "loss": 2.2822, "step": 3716 }, { "epoch": 0.8, "learning_rate": 0.00020419768891039337, "loss": 2.085, "step": 3717 }, { "epoch": 0.8, "learning_rate": 0.0002037761546276986, "loss": 2.1143, "step": 3718 }, { "epoch": 0.8, "learning_rate": 0.00020335500652453697, "loss": 2.0654, "step": 3719 }, { "epoch": 0.8, "learning_rate": 0.00020293424480517154, "loss": 2.1152, "step": 3720 }, { "epoch": 0.8, "learning_rate": 0.00020251386967367723, "loss": 2.1064, "step": 3721 }, { "epoch": 0.8, "learning_rate": 0.00020209388133394179, "loss": 2.2734, "step": 3722 }, { "epoch": 0.8, "learning_rate": 0.00020167427998966537, "loss": 2.0801, "step": 3723 }, { "epoch": 0.8, "learning_rate": 0.0002012550658443605, "loss": 2.2422, "step": 3724 }, { "epoch": 0.8, "learning_rate": 0.0002008362391013514, "loss": 2.0537, "step": 3725 }, { "epoch": 0.8, "learning_rate": 0.00020041779996377541, "loss": 1.959, "step": 3726 }, { "epoch": 0.8, "learning_rate": 0.00019999974863458102, "loss": 2.2051, "step": 3727 }, { "epoch": 0.8, "learning_rate": 0.00019958208531652876, "loss": 2.1309, "step": 3728 }, { "epoch": 0.8, "learning_rate": 0.00019916481021219136, "loss": 2.1084, "step": 3729 }, { "epoch": 0.8, "learning_rate": 0.00019874792352395242, "loss": 2.2832, "step": 3730 }, { "epoch": 0.8, "learning_rate": 0.0001983314254540084, "loss": 2.3115, "step": 3731 }, { "epoch": 0.8, "learning_rate": 0.00019791531620436643, "loss": 2.1201, "step": 3732 }, { "epoch": 0.8, "learning_rate": 0.0001974995959768451, "loss": 2.2158, "step": 3733 }, { "epoch": 0.8, "learning_rate": 0.00019708426497307442, "loss": 2.1367, "step": 3734 }, { "epoch": 0.8, "learning_rate": 0.00019666932339449584, "loss": 2.1309, "step": 3735 }, { "epoch": 0.8, "learning_rate": 0.00019625477144236125, "loss": 2.2119, "step": 3736 }, { "epoch": 0.8, "learning_rate": 0.0001958406093177346, "loss": 2.0967, "step": 3737 }, { "epoch": 0.8, "learning_rate": 0.00019542683722149047, "loss": 2.1826, "step": 3738 }, { "epoch": 0.8, "learning_rate": 0.00019501345535431302, "loss": 1.998, "step": 3739 }, { "epoch": 0.8, "learning_rate": 0.00019460046391669893, "loss": 2.1699, "step": 3740 }, { "epoch": 0.8, "learning_rate": 0.00019418786310895463, "loss": 2.0996, "step": 3741 }, { "epoch": 0.8, "learning_rate": 0.00019377565313119684, "loss": 2.2266, "step": 3742 }, { "epoch": 0.8, "learning_rate": 0.0001933638341833538, "loss": 2.0361, "step": 3743 }, { "epoch": 0.8, "learning_rate": 0.00019295240646516242, "loss": 2.2256, "step": 3744 }, { "epoch": 0.81, "learning_rate": 0.00019254137017617157, "loss": 1.9248, "step": 3745 }, { "epoch": 0.81, "learning_rate": 0.0001921307255157393, "loss": 1.9844, "step": 3746 }, { "epoch": 0.81, "learning_rate": 0.0001917204726830335, "loss": 2.0352, "step": 3747 }, { "epoch": 0.81, "learning_rate": 0.00019131061187703313, "loss": 2.0547, "step": 3748 }, { "epoch": 0.81, "learning_rate": 0.0001909011432965263, "loss": 2.0977, "step": 3749 }, { "epoch": 0.81, "learning_rate": 0.00019049206714011002, "loss": 1.9824, "step": 3750 }, { "epoch": 0.81, "learning_rate": 0.0001900833836061928, "loss": 2.2773, "step": 3751 }, { "epoch": 0.81, "learning_rate": 0.00018967509289299147, "loss": 2.0908, "step": 3752 }, { "epoch": 0.81, "learning_rate": 0.0001892671951985323, "loss": 2.1094, "step": 3753 }, { "epoch": 0.81, "learning_rate": 0.00018885969072065224, "loss": 2.1777, "step": 3754 }, { "epoch": 0.81, "learning_rate": 0.0001884525796569957, "loss": 2.0225, "step": 3755 }, { "epoch": 0.81, "learning_rate": 0.00018804586220501706, "loss": 2.1211, "step": 3756 }, { "epoch": 0.81, "learning_rate": 0.00018763953856198057, "loss": 2.167, "step": 3757 }, { "epoch": 0.81, "learning_rate": 0.00018723360892495844, "loss": 2.0195, "step": 3758 }, { "epoch": 0.81, "learning_rate": 0.0001868280734908321, "loss": 2.1104, "step": 3759 }, { "epoch": 0.81, "learning_rate": 0.00018642293245629195, "loss": 2.1641, "step": 3760 }, { "epoch": 0.81, "learning_rate": 0.0001860181860178366, "loss": 2.0898, "step": 3761 }, { "epoch": 0.81, "learning_rate": 0.00018561383437177425, "loss": 2.334, "step": 3762 }, { "epoch": 0.81, "learning_rate": 0.0001852098777142207, "loss": 2.0645, "step": 3763 }, { "epoch": 0.81, "learning_rate": 0.00018480631624110056, "loss": 2.1348, "step": 3764 }, { "epoch": 0.81, "learning_rate": 0.00018440315014814669, "loss": 2.252, "step": 3765 }, { "epoch": 0.81, "learning_rate": 0.00018400037963089999, "loss": 2.1416, "step": 3766 }, { "epoch": 0.81, "learning_rate": 0.00018359800488470978, "loss": 2.1162, "step": 3767 }, { "epoch": 0.81, "learning_rate": 0.00018319602610473374, "loss": 2.1348, "step": 3768 }, { "epoch": 0.81, "learning_rate": 0.00018279444348593676, "loss": 2.1895, "step": 3769 }, { "epoch": 0.81, "learning_rate": 0.00018239325722309196, "loss": 2.1982, "step": 3770 }, { "epoch": 0.81, "learning_rate": 0.00018199246751078035, "loss": 1.959, "step": 3771 }, { "epoch": 0.81, "learning_rate": 0.00018159207454339043, "loss": 2.1016, "step": 3772 }, { "epoch": 0.81, "learning_rate": 0.00018119207851511788, "loss": 1.9873, "step": 3773 }, { "epoch": 0.81, "learning_rate": 0.0001807924796199669, "loss": 2.1621, "step": 3774 }, { "epoch": 0.81, "learning_rate": 0.0001803932780517483, "loss": 2.1094, "step": 3775 }, { "epoch": 0.81, "learning_rate": 0.0001799944740040802, "loss": 2.0596, "step": 3776 }, { "epoch": 0.81, "learning_rate": 0.00017959606767038817, "loss": 2.124, "step": 3777 }, { "epoch": 0.81, "learning_rate": 0.00017919805924390442, "loss": 2.1895, "step": 3778 }, { "epoch": 0.81, "learning_rate": 0.000178800448917669, "loss": 2.1133, "step": 3779 }, { "epoch": 0.81, "learning_rate": 0.00017840323688452832, "loss": 2.1914, "step": 3780 }, { "epoch": 0.81, "learning_rate": 0.00017800642333713545, "loss": 2.1436, "step": 3781 }, { "epoch": 0.81, "learning_rate": 0.0001776100084679506, "loss": 2.1934, "step": 3782 }, { "epoch": 0.81, "learning_rate": 0.0001772139924692404, "loss": 2.1807, "step": 3783 }, { "epoch": 0.81, "learning_rate": 0.00017681837553307778, "loss": 2.2178, "step": 3784 }, { "epoch": 0.81, "learning_rate": 0.000176423157851343, "loss": 2.1211, "step": 3785 }, { "epoch": 0.81, "learning_rate": 0.00017602833961572197, "loss": 2.2529, "step": 3786 }, { "epoch": 0.81, "learning_rate": 0.00017563392101770626, "loss": 2.2578, "step": 3787 }, { "epoch": 0.81, "learning_rate": 0.00017523990224859498, "loss": 2.0156, "step": 3788 }, { "epoch": 0.81, "learning_rate": 0.0001748462834994926, "loss": 2.1758, "step": 3789 }, { "epoch": 0.81, "learning_rate": 0.0001744530649613093, "loss": 2.1025, "step": 3790 }, { "epoch": 0.82, "learning_rate": 0.00017406024682476218, "loss": 2.1699, "step": 3791 }, { "epoch": 0.82, "learning_rate": 0.0001736678292803725, "loss": 2.0986, "step": 3792 }, { "epoch": 0.82, "learning_rate": 0.00017327581251846903, "loss": 2.0264, "step": 3793 }, { "epoch": 0.82, "learning_rate": 0.00017288419672918488, "loss": 2.2539, "step": 3794 }, { "epoch": 0.82, "learning_rate": 0.0001724929821024588, "loss": 2.1797, "step": 3795 }, { "epoch": 0.82, "learning_rate": 0.0001721021688280362, "loss": 2.0938, "step": 3796 }, { "epoch": 0.82, "learning_rate": 0.00017171175709546616, "loss": 2.0801, "step": 3797 }, { "epoch": 0.82, "learning_rate": 0.00017132174709410364, "loss": 2.0439, "step": 3798 }, { "epoch": 0.82, "learning_rate": 0.00017093213901310934, "loss": 1.999, "step": 3799 }, { "epoch": 0.82, "learning_rate": 0.00017054293304144842, "loss": 2.2793, "step": 3800 }, { "epoch": 0.82, "learning_rate": 0.00017015412936789088, "loss": 2.1748, "step": 3801 }, { "epoch": 0.82, "learning_rate": 0.00016976572818101245, "loss": 2.127, "step": 3802 }, { "epoch": 0.82, "learning_rate": 0.0001693777296691924, "loss": 2.1074, "step": 3803 }, { "epoch": 0.82, "learning_rate": 0.00016899013402061548, "loss": 2.208, "step": 3804 }, { "epoch": 0.82, "learning_rate": 0.00016860294142327116, "loss": 2.1777, "step": 3805 }, { "epoch": 0.82, "learning_rate": 0.0001682161520649531, "loss": 2.1191, "step": 3806 }, { "epoch": 0.82, "learning_rate": 0.00016782976613325952, "loss": 2.0146, "step": 3807 }, { "epoch": 0.82, "learning_rate": 0.00016744378381559278, "loss": 2.1406, "step": 3808 }, { "epoch": 0.82, "learning_rate": 0.00016705820529915939, "loss": 2.0557, "step": 3809 }, { "epoch": 0.82, "learning_rate": 0.00016667303077097084, "loss": 2.1934, "step": 3810 }, { "epoch": 0.82, "learning_rate": 0.00016628826041784173, "loss": 2.1709, "step": 3811 }, { "epoch": 0.82, "learning_rate": 0.0001659038944263911, "loss": 2.0771, "step": 3812 }, { "epoch": 0.82, "learning_rate": 0.0001655199329830417, "loss": 2.1006, "step": 3813 }, { "epoch": 0.82, "learning_rate": 0.00016513637627402, "loss": 2.123, "step": 3814 }, { "epoch": 0.82, "learning_rate": 0.00016475322448535613, "loss": 2.2246, "step": 3815 }, { "epoch": 0.82, "learning_rate": 0.0001643704778028845, "loss": 2.1318, "step": 3816 }, { "epoch": 0.82, "learning_rate": 0.00016398813641224232, "loss": 2.2158, "step": 3817 }, { "epoch": 0.82, "learning_rate": 0.0001636062004988703, "loss": 2.127, "step": 3818 }, { "epoch": 0.82, "learning_rate": 0.0001632246702480128, "loss": 2.2314, "step": 3819 }, { "epoch": 0.82, "learning_rate": 0.00016284354584471705, "loss": 1.9561, "step": 3820 }, { "epoch": 0.82, "learning_rate": 0.00016246282747383356, "loss": 2.1084, "step": 3821 }, { "epoch": 0.82, "learning_rate": 0.00016208251532001628, "loss": 2.2363, "step": 3822 }, { "epoch": 0.82, "learning_rate": 0.00016170260956772177, "loss": 2.3164, "step": 3823 }, { "epoch": 0.82, "learning_rate": 0.0001613231104012095, "loss": 2.2012, "step": 3824 }, { "epoch": 0.82, "learning_rate": 0.00016094401800454184, "loss": 2.2109, "step": 3825 }, { "epoch": 0.82, "learning_rate": 0.00016056533256158346, "loss": 1.9971, "step": 3826 }, { "epoch": 0.82, "learning_rate": 0.00016018705425600255, "loss": 2.0371, "step": 3827 }, { "epoch": 0.82, "learning_rate": 0.00015980918327126949, "loss": 2.0176, "step": 3828 }, { "epoch": 0.82, "learning_rate": 0.00015943171979065584, "loss": 2.208, "step": 3829 }, { "epoch": 0.82, "learning_rate": 0.00015905466399723756, "loss": 2.0205, "step": 3830 }, { "epoch": 0.82, "learning_rate": 0.00015867801607389153, "loss": 2.0283, "step": 3831 }, { "epoch": 0.82, "learning_rate": 0.0001583017762032971, "loss": 2.0498, "step": 3832 }, { "epoch": 0.82, "learning_rate": 0.0001579259445679364, "loss": 2.0576, "step": 3833 }, { "epoch": 0.82, "learning_rate": 0.00015755052135009228, "loss": 1.9111, "step": 3834 }, { "epoch": 0.82, "learning_rate": 0.0001571755067318501, "loss": 2.1689, "step": 3835 }, { "epoch": 0.82, "learning_rate": 0.00015680090089509758, "loss": 2.1611, "step": 3836 }, { "epoch": 0.82, "learning_rate": 0.0001564267040215236, "loss": 2.0859, "step": 3837 }, { "epoch": 0.83, "learning_rate": 0.00015605291629261842, "loss": 2.2783, "step": 3838 }, { "epoch": 0.83, "learning_rate": 0.00015567953788967503, "loss": 2.1455, "step": 3839 }, { "epoch": 0.83, "learning_rate": 0.00015530656899378624, "loss": 2.0801, "step": 3840 }, { "epoch": 0.83, "learning_rate": 0.0001549340097858476, "loss": 2.1875, "step": 3841 }, { "epoch": 0.83, "learning_rate": 0.0001545618604465554, "loss": 2.1211, "step": 3842 }, { "epoch": 0.83, "learning_rate": 0.00015419012115640686, "loss": 2.1465, "step": 3843 }, { "epoch": 0.83, "learning_rate": 0.00015381879209570138, "loss": 2.0889, "step": 3844 }, { "epoch": 0.83, "learning_rate": 0.00015344787344453802, "loss": 2.0127, "step": 3845 }, { "epoch": 0.83, "learning_rate": 0.00015307736538281737, "loss": 2.2285, "step": 3846 }, { "epoch": 0.83, "learning_rate": 0.00015270726809024139, "loss": 1.9482, "step": 3847 }, { "epoch": 0.83, "learning_rate": 0.00015233758174631228, "loss": 2.0684, "step": 3848 }, { "epoch": 0.83, "learning_rate": 0.00015196830653033277, "loss": 2.3066, "step": 3849 }, { "epoch": 0.83, "learning_rate": 0.00015159944262140669, "loss": 2.1553, "step": 3850 }, { "epoch": 0.83, "learning_rate": 0.000151230990198438, "loss": 2.0547, "step": 3851 }, { "epoch": 0.83, "learning_rate": 0.0001508629494401309, "loss": 2.0947, "step": 3852 }, { "epoch": 0.83, "learning_rate": 0.00015049532052499072, "loss": 2.1094, "step": 3853 }, { "epoch": 0.83, "learning_rate": 0.00015012810363132246, "loss": 2.1924, "step": 3854 }, { "epoch": 0.83, "learning_rate": 0.0001497612989372311, "loss": 2.2012, "step": 3855 }, { "epoch": 0.83, "learning_rate": 0.00014939490662062229, "loss": 2.1436, "step": 3856 }, { "epoch": 0.83, "learning_rate": 0.00014902892685920088, "loss": 2.1699, "step": 3857 }, { "epoch": 0.83, "learning_rate": 0.00014866335983047262, "loss": 2.0879, "step": 3858 }, { "epoch": 0.83, "learning_rate": 0.00014829820571174234, "loss": 2.1973, "step": 3859 }, { "epoch": 0.83, "learning_rate": 0.0001479334646801148, "loss": 2.2148, "step": 3860 }, { "epoch": 0.83, "learning_rate": 0.0001475691369124945, "loss": 2.0898, "step": 3861 }, { "epoch": 0.83, "learning_rate": 0.00014720522258558543, "loss": 2.0146, "step": 3862 }, { "epoch": 0.83, "learning_rate": 0.00014684172187589062, "loss": 2.042, "step": 3863 }, { "epoch": 0.83, "learning_rate": 0.0001464786349597136, "loss": 2.0898, "step": 3864 }, { "epoch": 0.83, "learning_rate": 0.00014611596201315647, "loss": 2.1514, "step": 3865 }, { "epoch": 0.83, "learning_rate": 0.0001457537032121199, "loss": 2.0352, "step": 3866 }, { "epoch": 0.83, "learning_rate": 0.000145391858732305, "loss": 2.084, "step": 3867 }, { "epoch": 0.83, "learning_rate": 0.00014503042874921125, "loss": 2.1055, "step": 3868 }, { "epoch": 0.83, "learning_rate": 0.00014466941343813688, "loss": 2.0947, "step": 3869 }, { "epoch": 0.83, "learning_rate": 0.00014430881297417965, "loss": 2.0801, "step": 3870 }, { "epoch": 0.83, "learning_rate": 0.0001439486275322357, "loss": 2.0957, "step": 3871 }, { "epoch": 0.83, "learning_rate": 0.00014358885728699977, "loss": 2.0215, "step": 3872 }, { "epoch": 0.83, "learning_rate": 0.0001432295024129655, "loss": 2.2178, "step": 3873 }, { "epoch": 0.83, "learning_rate": 0.0001428705630844247, "loss": 2.1719, "step": 3874 }, { "epoch": 0.83, "learning_rate": 0.00014251203947546832, "loss": 2.2148, "step": 3875 }, { "epoch": 0.83, "learning_rate": 0.00014215393175998525, "loss": 2.1475, "step": 3876 }, { "epoch": 0.83, "learning_rate": 0.0001417962401116619, "loss": 2.0596, "step": 3877 }, { "epoch": 0.83, "learning_rate": 0.00014143896470398455, "loss": 2.208, "step": 3878 }, { "epoch": 0.83, "learning_rate": 0.00014108210571023628, "loss": 2.1143, "step": 3879 }, { "epoch": 0.83, "learning_rate": 0.00014072566330349846, "loss": 2.1543, "step": 3880 }, { "epoch": 0.83, "learning_rate": 0.00014036963765665122, "loss": 2.1807, "step": 3881 }, { "epoch": 0.83, "learning_rate": 0.00014001402894237125, "loss": 2.1797, "step": 3882 }, { "epoch": 0.83, "learning_rate": 0.00013965883733313368, "loss": 2.1699, "step": 3883 }, { "epoch": 0.83, "learning_rate": 0.00013930406300121178, "loss": 2.1357, "step": 3884 }, { "epoch": 0.84, "learning_rate": 0.00013894970611867574, "loss": 2.0986, "step": 3885 }, { "epoch": 0.84, "learning_rate": 0.00013859576685739351, "loss": 2.1416, "step": 3886 }, { "epoch": 0.84, "learning_rate": 0.00013824224538903053, "loss": 2.1357, "step": 3887 }, { "epoch": 0.84, "learning_rate": 0.00013788914188504943, "loss": 2.2393, "step": 3888 }, { "epoch": 0.84, "learning_rate": 0.00013753645651671053, "loss": 2.0625, "step": 3889 }, { "epoch": 0.84, "learning_rate": 0.00013718418945507117, "loss": 2.1543, "step": 3890 }, { "epoch": 0.84, "learning_rate": 0.00013683234087098517, "loss": 2.0176, "step": 3891 }, { "epoch": 0.84, "learning_rate": 0.00013648091093510485, "loss": 2.0371, "step": 3892 }, { "epoch": 0.84, "learning_rate": 0.0001361298998178778, "loss": 2.377, "step": 3893 }, { "epoch": 0.84, "learning_rate": 0.00013577930768954926, "loss": 2.1914, "step": 3894 }, { "epoch": 0.84, "learning_rate": 0.0001354291347201617, "loss": 2.1885, "step": 3895 }, { "epoch": 0.84, "learning_rate": 0.00013507938107955365, "loss": 2.002, "step": 3896 }, { "epoch": 0.84, "learning_rate": 0.00013473004693736036, "loss": 2.1621, "step": 3897 }, { "epoch": 0.84, "learning_rate": 0.00013438113246301374, "loss": 2.0781, "step": 3898 }, { "epoch": 0.84, "learning_rate": 0.00013403263782574217, "loss": 2.2373, "step": 3899 }, { "epoch": 0.84, "learning_rate": 0.00013368456319457002, "loss": 2.0713, "step": 3900 }, { "epoch": 0.84, "learning_rate": 0.00013333690873831882, "loss": 2.1582, "step": 3901 }, { "epoch": 0.84, "learning_rate": 0.0001329896746256054, "loss": 2.1387, "step": 3902 }, { "epoch": 0.84, "learning_rate": 0.00013264286102484336, "loss": 2.1885, "step": 3903 }, { "epoch": 0.84, "learning_rate": 0.0001322964681042418, "loss": 1.9805, "step": 3904 }, { "epoch": 0.84, "learning_rate": 0.00013195049603180597, "loss": 2.1201, "step": 3905 }, { "epoch": 0.84, "learning_rate": 0.0001316049449753375, "loss": 2.0137, "step": 3906 }, { "epoch": 0.84, "learning_rate": 0.00013125981510243322, "loss": 2.0693, "step": 3907 }, { "epoch": 0.84, "learning_rate": 0.0001309151065804859, "loss": 2.0059, "step": 3908 }, { "epoch": 0.84, "learning_rate": 0.00013057081957668383, "loss": 2.1768, "step": 3909 }, { "epoch": 0.84, "learning_rate": 0.0001302269542580109, "loss": 2.0957, "step": 3910 }, { "epoch": 0.84, "learning_rate": 0.0001298835107912465, "loss": 2.2227, "step": 3911 }, { "epoch": 0.84, "learning_rate": 0.00012954048934296582, "loss": 2.0791, "step": 3912 }, { "epoch": 0.84, "learning_rate": 0.00012919789007953886, "loss": 2.207, "step": 3913 }, { "epoch": 0.84, "learning_rate": 0.0001288557131671305, "loss": 2.2314, "step": 3914 }, { "epoch": 0.84, "learning_rate": 0.0001285139587717018, "loss": 2.1006, "step": 3915 }, { "epoch": 0.84, "learning_rate": 0.00012817262705900812, "loss": 2.3008, "step": 3916 }, { "epoch": 0.84, "learning_rate": 0.00012783171819459995, "loss": 2.1816, "step": 3917 }, { "epoch": 0.84, "learning_rate": 0.00012749123234382333, "loss": 2.1943, "step": 3918 }, { "epoch": 0.84, "learning_rate": 0.00012715116967181774, "loss": 2.1611, "step": 3919 }, { "epoch": 0.84, "learning_rate": 0.00012681153034351878, "loss": 2.0664, "step": 3920 }, { "epoch": 0.84, "learning_rate": 0.00012647231452365626, "loss": 2.1504, "step": 3921 }, { "epoch": 0.84, "learning_rate": 0.00012613352237675414, "loss": 2.0928, "step": 3922 }, { "epoch": 0.84, "learning_rate": 0.00012579515406713193, "loss": 2.0176, "step": 3923 }, { "epoch": 0.84, "learning_rate": 0.0001254572097589024, "loss": 2.168, "step": 3924 }, { "epoch": 0.84, "learning_rate": 0.00012511968961597297, "loss": 2.0215, "step": 3925 }, { "epoch": 0.84, "learning_rate": 0.00012478259380204615, "loss": 2.1064, "step": 3926 }, { "epoch": 0.84, "learning_rate": 0.00012444592248061782, "loss": 2.0898, "step": 3927 }, { "epoch": 0.84, "learning_rate": 0.000124109675814978, "loss": 2.208, "step": 3928 }, { "epoch": 0.84, "learning_rate": 0.0001237738539682115, "loss": 2.1953, "step": 3929 }, { "epoch": 0.84, "learning_rate": 0.000123438457103196, "loss": 2.1289, "step": 3930 }, { "epoch": 0.85, "learning_rate": 0.00012310348538260363, "loss": 2.0488, "step": 3931 }, { "epoch": 0.85, "learning_rate": 0.0001227689389689006, "loss": 2.1045, "step": 3932 }, { "epoch": 0.85, "learning_rate": 0.0001224348180243464, "loss": 2.0566, "step": 3933 }, { "epoch": 0.85, "learning_rate": 0.00012210112271099428, "loss": 2.1875, "step": 3934 }, { "epoch": 0.85, "learning_rate": 0.00012176785319069095, "loss": 2.1338, "step": 3935 }, { "epoch": 0.85, "learning_rate": 0.0001214350096250767, "loss": 2.2373, "step": 3936 }, { "epoch": 0.85, "learning_rate": 0.0001211025921755855, "loss": 2.1162, "step": 3937 }, { "epoch": 0.85, "learning_rate": 0.0001207706010034444, "loss": 2.2227, "step": 3938 }, { "epoch": 0.85, "learning_rate": 0.00012043903626967345, "loss": 2.2949, "step": 3939 }, { "epoch": 0.85, "learning_rate": 0.00012010789813508615, "loss": 2.2695, "step": 3940 }, { "epoch": 0.85, "learning_rate": 0.00011977718676028915, "loss": 2.1719, "step": 3941 }, { "epoch": 0.85, "learning_rate": 0.0001194469023056819, "loss": 2.0527, "step": 3942 }, { "epoch": 0.85, "learning_rate": 0.00011911704493145702, "loss": 2.1953, "step": 3943 }, { "epoch": 0.85, "learning_rate": 0.00011878761479760004, "loss": 2.1777, "step": 3944 }, { "epoch": 0.85, "learning_rate": 0.00011845861206388874, "loss": 2.1699, "step": 3945 }, { "epoch": 0.85, "learning_rate": 0.00011813003688989432, "loss": 2.1602, "step": 3946 }, { "epoch": 0.85, "learning_rate": 0.00011780188943498005, "loss": 2.0156, "step": 3947 }, { "epoch": 0.85, "learning_rate": 0.00011747416985830184, "loss": 2.1484, "step": 3948 }, { "epoch": 0.85, "learning_rate": 0.00011714687831880865, "loss": 2.0801, "step": 3949 }, { "epoch": 0.85, "learning_rate": 0.00011682001497524108, "loss": 2.0527, "step": 3950 }, { "epoch": 0.85, "learning_rate": 0.00011649357998613241, "loss": 2.1201, "step": 3951 }, { "epoch": 0.85, "learning_rate": 0.000116167573509808, "loss": 2.0811, "step": 3952 }, { "epoch": 0.85, "learning_rate": 0.00011584199570438547, "loss": 2.123, "step": 3953 }, { "epoch": 0.85, "learning_rate": 0.00011551684672777485, "loss": 2.1494, "step": 3954 }, { "epoch": 0.85, "learning_rate": 0.00011519212673767787, "loss": 2.167, "step": 3955 }, { "epoch": 0.85, "learning_rate": 0.00011486783589158755, "loss": 2.0596, "step": 3956 }, { "epoch": 0.85, "learning_rate": 0.0001145439743467902, "loss": 2.0283, "step": 3957 }, { "epoch": 0.85, "learning_rate": 0.00011422054226036271, "loss": 2.126, "step": 3958 }, { "epoch": 0.85, "learning_rate": 0.00011389753978917394, "loss": 2.0879, "step": 3959 }, { "epoch": 0.85, "learning_rate": 0.00011357496708988501, "loss": 2.1309, "step": 3960 }, { "epoch": 0.85, "learning_rate": 0.00011325282431894812, "loss": 2.1533, "step": 3961 }, { "epoch": 0.85, "learning_rate": 0.00011293111163260639, "loss": 2.1641, "step": 3962 }, { "epoch": 0.85, "learning_rate": 0.00011260982918689533, "loss": 1.9941, "step": 3963 }, { "epoch": 0.85, "learning_rate": 0.0001122889771376413, "loss": 2.1387, "step": 3964 }, { "epoch": 0.85, "learning_rate": 0.00011196855564046171, "loss": 2.0967, "step": 3965 }, { "epoch": 0.85, "learning_rate": 0.00011164856485076614, "loss": 2.2061, "step": 3966 }, { "epoch": 0.85, "learning_rate": 0.00011132900492375341, "loss": 2.1377, "step": 3967 }, { "epoch": 0.85, "learning_rate": 0.00011100987601441547, "loss": 2.2305, "step": 3968 }, { "epoch": 0.85, "learning_rate": 0.00011069117827753373, "loss": 2.2334, "step": 3969 }, { "epoch": 0.85, "learning_rate": 0.00011037291186768095, "loss": 2.1113, "step": 3970 }, { "epoch": 0.85, "learning_rate": 0.00011005507693922134, "loss": 2.0537, "step": 3971 }, { "epoch": 0.85, "learning_rate": 0.00010973767364630849, "loss": 2.0713, "step": 3972 }, { "epoch": 0.85, "learning_rate": 0.00010942070214288746, "loss": 2.0361, "step": 3973 }, { "epoch": 0.85, "learning_rate": 0.00010910416258269407, "loss": 2.1533, "step": 3974 }, { "epoch": 0.85, "learning_rate": 0.00010878805511925438, "loss": 2.1631, "step": 3975 }, { "epoch": 0.85, "learning_rate": 0.00010847237990588476, "loss": 1.9551, "step": 3976 }, { "epoch": 0.85, "learning_rate": 0.00010815713709569196, "loss": 2.0918, "step": 3977 }, { "epoch": 0.86, "learning_rate": 0.00010784232684157324, "loss": 1.9863, "step": 3978 }, { "epoch": 0.86, "learning_rate": 0.00010752794929621557, "loss": 2.2256, "step": 3979 }, { "epoch": 0.86, "learning_rate": 0.00010721400461209684, "loss": 2.1465, "step": 3980 }, { "epoch": 0.86, "learning_rate": 0.00010690049294148441, "loss": 2.0869, "step": 3981 }, { "epoch": 0.86, "learning_rate": 0.00010658741443643561, "loss": 1.8467, "step": 3982 }, { "epoch": 0.86, "learning_rate": 0.00010627476924879798, "loss": 2.0518, "step": 3983 }, { "epoch": 0.86, "learning_rate": 0.00010596255753020856, "loss": 2.0654, "step": 3984 }, { "epoch": 0.86, "learning_rate": 0.00010565077943209456, "loss": 2.0498, "step": 3985 }, { "epoch": 0.86, "learning_rate": 0.00010533943510567256, "loss": 2.0459, "step": 3986 }, { "epoch": 0.86, "learning_rate": 0.00010502852470194878, "loss": 1.8906, "step": 3987 }, { "epoch": 0.86, "learning_rate": 0.00010471804837171917, "loss": 1.9941, "step": 3988 }, { "epoch": 0.86, "learning_rate": 0.00010440800626556902, "loss": 2.1729, "step": 3989 }, { "epoch": 0.86, "learning_rate": 0.00010409839853387271, "loss": 2.1416, "step": 3990 }, { "epoch": 0.86, "learning_rate": 0.00010378922532679469, "loss": 2.1836, "step": 3991 }, { "epoch": 0.86, "learning_rate": 0.000103480486794288, "loss": 2.2324, "step": 3992 }, { "epoch": 0.86, "learning_rate": 0.00010317218308609522, "loss": 2.1377, "step": 3993 }, { "epoch": 0.86, "learning_rate": 0.0001028643143517477, "loss": 2.291, "step": 3994 }, { "epoch": 0.86, "learning_rate": 0.00010255688074056624, "loss": 2.167, "step": 3995 }, { "epoch": 0.86, "learning_rate": 0.00010224988240166, "loss": 1.9561, "step": 3996 }, { "epoch": 0.86, "learning_rate": 0.00010194331948392777, "loss": 2.1875, "step": 3997 }, { "epoch": 0.86, "learning_rate": 0.00010163719213605682, "loss": 2.0615, "step": 3998 }, { "epoch": 0.86, "learning_rate": 0.00010133150050652296, "loss": 2.3359, "step": 3999 }, { "epoch": 0.86, "learning_rate": 0.0001010262447435908, "loss": 2.0859, "step": 4000 }, { "epoch": 0.86, "learning_rate": 0.00010072142499531345, "loss": 2.0342, "step": 4001 }, { "epoch": 0.86, "learning_rate": 0.00010041704140953312, "loss": 2.2441, "step": 4002 }, { "epoch": 0.86, "learning_rate": 0.00010011309413388003, "loss": 1.9004, "step": 4003 }, { "epoch": 0.86, "learning_rate": 9.980958331577217e-05, "loss": 2.1006, "step": 4004 }, { "epoch": 0.86, "learning_rate": 9.95065091024171e-05, "loss": 2.082, "step": 4005 }, { "epoch": 0.86, "learning_rate": 9.92038716408098e-05, "loss": 2.1553, "step": 4006 }, { "epoch": 0.86, "learning_rate": 9.89016710777334e-05, "loss": 2.0771, "step": 4007 }, { "epoch": 0.86, "learning_rate": 9.859990755975979e-05, "loss": 1.9355, "step": 4008 }, { "epoch": 0.86, "learning_rate": 9.829858123324797e-05, "loss": 2.1016, "step": 4009 }, { "epoch": 0.86, "learning_rate": 9.79976922443454e-05, "loss": 2.4072, "step": 4010 }, { "epoch": 0.86, "learning_rate": 9.769724073898778e-05, "loss": 2.0977, "step": 4011 }, { "epoch": 0.86, "learning_rate": 9.739722686289787e-05, "loss": 2.0752, "step": 4012 }, { "epoch": 0.86, "learning_rate": 9.709765076158661e-05, "loss": 2.0762, "step": 4013 }, { "epoch": 0.86, "learning_rate": 9.679851258035277e-05, "loss": 2.1074, "step": 4014 }, { "epoch": 0.86, "learning_rate": 9.649981246428197e-05, "loss": 2.0879, "step": 4015 }, { "epoch": 0.86, "learning_rate": 9.620155055824841e-05, "loss": 2.0947, "step": 4016 }, { "epoch": 0.86, "learning_rate": 9.590372700691297e-05, "loss": 2.0381, "step": 4017 }, { "epoch": 0.86, "learning_rate": 9.560634195472406e-05, "loss": 2.2217, "step": 4018 }, { "epoch": 0.86, "learning_rate": 9.530939554591811e-05, "loss": 2.2617, "step": 4019 }, { "epoch": 0.86, "learning_rate": 9.501288792451746e-05, "loss": 2.1025, "step": 4020 }, { "epoch": 0.86, "learning_rate": 9.471681923433261e-05, "loss": 2.2021, "step": 4021 }, { "epoch": 0.86, "learning_rate": 9.442118961896129e-05, "loss": 2.1328, "step": 4022 }, { "epoch": 0.86, "learning_rate": 9.412599922178767e-05, "loss": 1.9834, "step": 4023 }, { "epoch": 0.87, "learning_rate": 9.383124818598322e-05, "loss": 1.915, "step": 4024 }, { "epoch": 0.87, "learning_rate": 9.353693665450624e-05, "loss": 2.0029, "step": 4025 }, { "epoch": 0.87, "learning_rate": 9.32430647701018e-05, "loss": 2.0488, "step": 4026 }, { "epoch": 0.87, "learning_rate": 9.294963267530176e-05, "loss": 1.9854, "step": 4027 }, { "epoch": 0.87, "learning_rate": 9.265664051242506e-05, "loss": 2.166, "step": 4028 }, { "epoch": 0.87, "learning_rate": 9.236408842357669e-05, "loss": 2.1299, "step": 4029 }, { "epoch": 0.87, "learning_rate": 9.207197655064847e-05, "loss": 2.084, "step": 4030 }, { "epoch": 0.87, "learning_rate": 9.178030503531865e-05, "loss": 2.2227, "step": 4031 }, { "epoch": 0.87, "learning_rate": 9.148907401905182e-05, "loss": 2.0898, "step": 4032 }, { "epoch": 0.87, "learning_rate": 9.119828364309934e-05, "loss": 2.2109, "step": 4033 }, { "epoch": 0.87, "learning_rate": 9.090793404849852e-05, "loss": 2.1982, "step": 4034 }, { "epoch": 0.87, "learning_rate": 9.061802537607267e-05, "loss": 2.1426, "step": 4035 }, { "epoch": 0.87, "learning_rate": 9.032855776643167e-05, "loss": 2.1895, "step": 4036 }, { "epoch": 0.87, "learning_rate": 9.003953135997122e-05, "loss": 2.1885, "step": 4037 }, { "epoch": 0.87, "learning_rate": 8.975094629687287e-05, "loss": 1.9912, "step": 4038 }, { "epoch": 0.87, "learning_rate": 8.946280271710494e-05, "loss": 2.041, "step": 4039 }, { "epoch": 0.87, "learning_rate": 8.917510076042057e-05, "loss": 2.2129, "step": 4040 }, { "epoch": 0.87, "learning_rate": 8.888784056635935e-05, "loss": 2.2949, "step": 4041 }, { "epoch": 0.87, "learning_rate": 8.860102227424637e-05, "loss": 2.1729, "step": 4042 }, { "epoch": 0.87, "learning_rate": 8.83146460231925e-05, "loss": 2.1211, "step": 4043 }, { "epoch": 0.87, "learning_rate": 8.802871195209394e-05, "loss": 2.0352, "step": 4044 }, { "epoch": 0.87, "learning_rate": 8.774322019963322e-05, "loss": 2.2266, "step": 4045 }, { "epoch": 0.87, "learning_rate": 8.745817090427699e-05, "loss": 2.1191, "step": 4046 }, { "epoch": 0.87, "learning_rate": 8.717356420427869e-05, "loss": 2.1963, "step": 4047 }, { "epoch": 0.87, "learning_rate": 8.688940023767634e-05, "loss": 2.1006, "step": 4048 }, { "epoch": 0.87, "learning_rate": 8.660567914229312e-05, "loss": 2.1484, "step": 4049 }, { "epoch": 0.87, "learning_rate": 8.632240105573808e-05, "loss": 2.0771, "step": 4050 }, { "epoch": 0.87, "learning_rate": 8.603956611540498e-05, "loss": 1.9883, "step": 4051 }, { "epoch": 0.87, "learning_rate": 8.575717445847208e-05, "loss": 2.1104, "step": 4052 }, { "epoch": 0.87, "learning_rate": 8.547522622190384e-05, "loss": 2.0205, "step": 4053 }, { "epoch": 0.87, "learning_rate": 8.519372154244886e-05, "loss": 2.1338, "step": 4054 }, { "epoch": 0.87, "learning_rate": 8.491266055664049e-05, "loss": 2.0264, "step": 4055 }, { "epoch": 0.87, "learning_rate": 8.463204340079789e-05, "loss": 1.998, "step": 4056 }, { "epoch": 0.87, "learning_rate": 8.435187021102353e-05, "loss": 2.1025, "step": 4057 }, { "epoch": 0.87, "learning_rate": 8.407214112320538e-05, "loss": 2.0684, "step": 4058 }, { "epoch": 0.87, "learning_rate": 8.379285627301625e-05, "loss": 2.0547, "step": 4059 }, { "epoch": 0.87, "learning_rate": 8.3514015795913e-05, "loss": 2.1436, "step": 4060 }, { "epoch": 0.87, "learning_rate": 8.32356198271369e-05, "loss": 2.1982, "step": 4061 }, { "epoch": 0.87, "learning_rate": 8.295766850171404e-05, "loss": 2.1953, "step": 4062 }, { "epoch": 0.87, "learning_rate": 8.268016195445449e-05, "loss": 2.043, "step": 4063 }, { "epoch": 0.87, "learning_rate": 8.240310031995291e-05, "loss": 2.1172, "step": 4064 }, { "epoch": 0.87, "learning_rate": 8.212648373258791e-05, "loss": 2.1484, "step": 4065 }, { "epoch": 0.87, "learning_rate": 8.185031232652251e-05, "loss": 2.1846, "step": 4066 }, { "epoch": 0.87, "learning_rate": 8.157458623570335e-05, "loss": 2.0254, "step": 4067 }, { "epoch": 0.87, "learning_rate": 8.129930559386167e-05, "loss": 2.1406, "step": 4068 }, { "epoch": 0.87, "learning_rate": 8.1024470534512e-05, "loss": 2.1562, "step": 4069 }, { "epoch": 0.87, "learning_rate": 8.075008119095351e-05, "loss": 2.0977, "step": 4070 }, { "epoch": 0.88, "learning_rate": 8.047613769626871e-05, "loss": 2.1328, "step": 4071 }, { "epoch": 0.88, "learning_rate": 8.02026401833239e-05, "loss": 2.04, "step": 4072 }, { "epoch": 0.88, "learning_rate": 7.99295887847693e-05, "loss": 2.1895, "step": 4073 }, { "epoch": 0.88, "learning_rate": 7.96569836330383e-05, "loss": 2.0977, "step": 4074 }, { "epoch": 0.88, "learning_rate": 7.938482486034826e-05, "loss": 2.2412, "step": 4075 }, { "epoch": 0.88, "learning_rate": 7.911311259870014e-05, "loss": 2.1143, "step": 4076 }, { "epoch": 0.88, "learning_rate": 7.884184697987806e-05, "loss": 2.1104, "step": 4077 }, { "epoch": 0.88, "learning_rate": 7.857102813544936e-05, "loss": 2.1865, "step": 4078 }, { "epoch": 0.88, "learning_rate": 7.830065619676518e-05, "loss": 2.1348, "step": 4079 }, { "epoch": 0.88, "learning_rate": 7.80307312949593e-05, "loss": 2.2148, "step": 4080 }, { "epoch": 0.88, "learning_rate": 7.776125356094943e-05, "loss": 2.3047, "step": 4081 }, { "epoch": 0.88, "learning_rate": 7.749222312543602e-05, "loss": 2.2422, "step": 4082 }, { "epoch": 0.88, "learning_rate": 7.722364011890182e-05, "loss": 2.1875, "step": 4083 }, { "epoch": 0.88, "learning_rate": 7.695550467161405e-05, "loss": 1.9355, "step": 4084 }, { "epoch": 0.88, "learning_rate": 7.668781691362181e-05, "loss": 2.165, "step": 4085 }, { "epoch": 0.88, "learning_rate": 7.642057697475713e-05, "loss": 2.2158, "step": 4086 }, { "epoch": 0.88, "learning_rate": 7.615378498463543e-05, "loss": 2.2188, "step": 4087 }, { "epoch": 0.88, "learning_rate": 7.588744107265443e-05, "loss": 2.1104, "step": 4088 }, { "epoch": 0.88, "learning_rate": 7.562154536799448e-05, "loss": 2.0146, "step": 4089 }, { "epoch": 0.88, "learning_rate": 7.535609799961873e-05, "loss": 2.1553, "step": 4090 }, { "epoch": 0.88, "learning_rate": 7.509109909627287e-05, "loss": 2.0176, "step": 4091 }, { "epoch": 0.88, "learning_rate": 7.482654878648464e-05, "loss": 2.0342, "step": 4092 }, { "epoch": 0.88, "learning_rate": 7.456244719856531e-05, "loss": 2.1738, "step": 4093 }, { "epoch": 0.88, "learning_rate": 7.429879446060695e-05, "loss": 2.125, "step": 4094 }, { "epoch": 0.88, "learning_rate": 7.403559070048538e-05, "loss": 2.1992, "step": 4095 }, { "epoch": 0.88, "learning_rate": 7.377283604585783e-05, "loss": 2.0684, "step": 4096 }, { "epoch": 0.88, "learning_rate": 7.35105306241638e-05, "loss": 2.1943, "step": 4097 }, { "epoch": 0.88, "learning_rate": 7.32486745626254e-05, "loss": 2.1562, "step": 4098 }, { "epoch": 0.88, "learning_rate": 7.298726798824618e-05, "loss": 2.1152, "step": 4099 }, { "epoch": 0.88, "learning_rate": 7.272631102781168e-05, "loss": 2.0791, "step": 4100 }, { "epoch": 0.88, "learning_rate": 7.246580380789014e-05, "loss": 2.0957, "step": 4101 }, { "epoch": 0.88, "learning_rate": 7.22057464548308e-05, "loss": 2.1514, "step": 4102 }, { "epoch": 0.88, "learning_rate": 7.19461390947651e-05, "loss": 2.0781, "step": 4103 }, { "epoch": 0.88, "learning_rate": 7.168698185360656e-05, "loss": 2.2197, "step": 4104 }, { "epoch": 0.88, "learning_rate": 7.142827485704951e-05, "loss": 2.083, "step": 4105 }, { "epoch": 0.88, "learning_rate": 7.117001823057045e-05, "loss": 2.1582, "step": 4106 }, { "epoch": 0.88, "learning_rate": 7.091221209942766e-05, "loss": 2.2002, "step": 4107 }, { "epoch": 0.88, "learning_rate": 7.065485658866067e-05, "loss": 2.0312, "step": 4108 }, { "epoch": 0.88, "learning_rate": 7.039795182309027e-05, "loss": 2.1768, "step": 4109 }, { "epoch": 0.88, "learning_rate": 7.014149792731883e-05, "loss": 2.1631, "step": 4110 }, { "epoch": 0.88, "learning_rate": 6.988549502572993e-05, "loss": 2.0742, "step": 4111 }, { "epoch": 0.88, "learning_rate": 6.962994324248883e-05, "loss": 2.2168, "step": 4112 }, { "epoch": 0.88, "learning_rate": 6.937484270154138e-05, "loss": 2.1895, "step": 4113 }, { "epoch": 0.88, "learning_rate": 6.912019352661502e-05, "loss": 2.0244, "step": 4114 }, { "epoch": 0.88, "learning_rate": 6.8865995841218e-05, "loss": 2.1445, "step": 4115 }, { "epoch": 0.88, "learning_rate": 6.861224976863978e-05, "loss": 2.0654, "step": 4116 }, { "epoch": 0.89, "learning_rate": 6.835895543195047e-05, "loss": 2.1436, "step": 4117 }, { "epoch": 0.89, "learning_rate": 6.81061129540017e-05, "loss": 2.1279, "step": 4118 }, { "epoch": 0.89, "learning_rate": 6.785372245742527e-05, "loss": 2.0225, "step": 4119 }, { "epoch": 0.89, "learning_rate": 6.760178406463424e-05, "loss": 2.208, "step": 4120 }, { "epoch": 0.89, "learning_rate": 6.7350297897822e-05, "loss": 2.2617, "step": 4121 }, { "epoch": 0.89, "learning_rate": 6.709926407896294e-05, "loss": 2.293, "step": 4122 }, { "epoch": 0.89, "learning_rate": 6.68486827298117e-05, "loss": 2.1172, "step": 4123 }, { "epoch": 0.89, "learning_rate": 6.659855397190406e-05, "loss": 2.2832, "step": 4124 }, { "epoch": 0.89, "learning_rate": 6.634887792655575e-05, "loss": 2.334, "step": 4125 }, { "epoch": 0.89, "learning_rate": 6.6099654714863e-05, "loss": 2.1289, "step": 4126 }, { "epoch": 0.89, "learning_rate": 6.585088445770249e-05, "loss": 2.04, "step": 4127 }, { "epoch": 0.89, "learning_rate": 6.560256727573122e-05, "loss": 2.1621, "step": 4128 }, { "epoch": 0.89, "learning_rate": 6.535470328938665e-05, "loss": 2.2676, "step": 4129 }, { "epoch": 0.89, "learning_rate": 6.510729261888626e-05, "loss": 2.0283, "step": 4130 }, { "epoch": 0.89, "learning_rate": 6.48603353842272e-05, "loss": 2.1084, "step": 4131 }, { "epoch": 0.89, "learning_rate": 6.461383170518743e-05, "loss": 2.1396, "step": 4132 }, { "epoch": 0.89, "learning_rate": 6.436778170132474e-05, "loss": 2.2168, "step": 4133 }, { "epoch": 0.89, "learning_rate": 6.412218549197635e-05, "loss": 2.1279, "step": 4134 }, { "epoch": 0.89, "learning_rate": 6.387704319626042e-05, "loss": 2.0781, "step": 4135 }, { "epoch": 0.89, "learning_rate": 6.363235493307374e-05, "loss": 2.1953, "step": 4136 }, { "epoch": 0.89, "learning_rate": 6.33881208210939e-05, "loss": 2.0273, "step": 4137 }, { "epoch": 0.89, "learning_rate": 6.314434097877763e-05, "loss": 2.0791, "step": 4138 }, { "epoch": 0.89, "learning_rate": 6.290101552436155e-05, "loss": 2.2607, "step": 4139 }, { "epoch": 0.89, "learning_rate": 6.265814457586172e-05, "loss": 2.1641, "step": 4140 }, { "epoch": 0.89, "learning_rate": 6.241572825107433e-05, "loss": 2.2158, "step": 4141 }, { "epoch": 0.89, "learning_rate": 6.217376666757402e-05, "loss": 2.168, "step": 4142 }, { "epoch": 0.89, "learning_rate": 6.193225994271601e-05, "loss": 2.1045, "step": 4143 }, { "epoch": 0.89, "learning_rate": 6.169120819363405e-05, "loss": 2.0771, "step": 4144 }, { "epoch": 0.89, "learning_rate": 6.145061153724163e-05, "loss": 1.9971, "step": 4145 }, { "epoch": 0.89, "learning_rate": 6.121047009023173e-05, "loss": 1.998, "step": 4146 }, { "epoch": 0.89, "learning_rate": 6.097078396907596e-05, "loss": 2.1963, "step": 4147 }, { "epoch": 0.89, "learning_rate": 6.073155329002533e-05, "loss": 2.167, "step": 4148 }, { "epoch": 0.89, "learning_rate": 6.0492778169110254e-05, "loss": 2.041, "step": 4149 }, { "epoch": 0.89, "learning_rate": 6.025445872213986e-05, "loss": 2.1953, "step": 4150 }, { "epoch": 0.89, "learning_rate": 6.0016595064702364e-05, "loss": 2.2275, "step": 4151 }, { "epoch": 0.89, "learning_rate": 5.977918731216481e-05, "loss": 2.248, "step": 4152 }, { "epoch": 0.89, "learning_rate": 5.954223557967342e-05, "loss": 2.126, "step": 4153 }, { "epoch": 0.89, "learning_rate": 5.9305739982152826e-05, "loss": 2.1914, "step": 4154 }, { "epoch": 0.89, "learning_rate": 5.906970063430683e-05, "loss": 1.9863, "step": 4155 }, { "epoch": 0.89, "learning_rate": 5.883411765061775e-05, "loss": 2.1104, "step": 4156 }, { "epoch": 0.89, "learning_rate": 5.859899114534661e-05, "loss": 2.0596, "step": 4157 }, { "epoch": 0.89, "learning_rate": 5.836432123253288e-05, "loss": 2.2158, "step": 4158 }, { "epoch": 0.89, "learning_rate": 5.813010802599461e-05, "loss": 2.002, "step": 4159 }, { "epoch": 0.89, "learning_rate": 5.7896351639328715e-05, "loss": 2.0791, "step": 4160 }, { "epoch": 0.89, "learning_rate": 5.766305218591006e-05, "loss": 2.1387, "step": 4161 }, { "epoch": 0.89, "learning_rate": 5.743020977889224e-05, "loss": 2.2383, "step": 4162 }, { "epoch": 0.89, "learning_rate": 5.719782453120692e-05, "loss": 2.0908, "step": 4163 }, { "epoch": 0.9, "learning_rate": 5.696589655556428e-05, "loss": 2.084, "step": 4164 }, { "epoch": 0.9, "learning_rate": 5.673442596445222e-05, "loss": 2.1172, "step": 4165 }, { "epoch": 0.9, "learning_rate": 5.6503412870137605e-05, "loss": 2.1074, "step": 4166 }, { "epoch": 0.9, "learning_rate": 5.62728573846647e-05, "loss": 2.1875, "step": 4167 }, { "epoch": 0.9, "learning_rate": 5.604275961985628e-05, "loss": 2.1895, "step": 4168 }, { "epoch": 0.9, "learning_rate": 5.581311968731284e-05, "loss": 2.1943, "step": 4169 }, { "epoch": 0.9, "learning_rate": 5.5583937698412854e-05, "loss": 2.1367, "step": 4170 }, { "epoch": 0.9, "learning_rate": 5.535521376431263e-05, "loss": 2.1924, "step": 4171 }, { "epoch": 0.9, "learning_rate": 5.5126947995946866e-05, "loss": 2.208, "step": 4172 }, { "epoch": 0.9, "learning_rate": 5.489914050402711e-05, "loss": 2.0264, "step": 4173 }, { "epoch": 0.9, "learning_rate": 5.467179139904344e-05, "loss": 2.0625, "step": 4174 }, { "epoch": 0.9, "learning_rate": 5.4444900791263184e-05, "loss": 2.0586, "step": 4175 }, { "epoch": 0.9, "learning_rate": 5.421846879073133e-05, "loss": 2.1338, "step": 4176 }, { "epoch": 0.9, "learning_rate": 5.399249550727081e-05, "loss": 2.2002, "step": 4177 }, { "epoch": 0.9, "learning_rate": 5.376698105048161e-05, "loss": 2.1748, "step": 4178 }, { "epoch": 0.9, "learning_rate": 5.354192552974124e-05, "loss": 2.0713, "step": 4179 }, { "epoch": 0.9, "learning_rate": 5.3317329054204835e-05, "loss": 1.9639, "step": 4180 }, { "epoch": 0.9, "learning_rate": 5.3093191732804805e-05, "loss": 2.1279, "step": 4181 }, { "epoch": 0.9, "learning_rate": 5.2869513674250635e-05, "loss": 2.1973, "step": 4182 }, { "epoch": 0.9, "learning_rate": 5.2646294987029666e-05, "loss": 2.1855, "step": 4183 }, { "epoch": 0.9, "learning_rate": 5.242353577940562e-05, "loss": 2.2031, "step": 4184 }, { "epoch": 0.9, "learning_rate": 5.2201236159420074e-05, "loss": 2.1592, "step": 4185 }, { "epoch": 0.9, "learning_rate": 5.197939623489123e-05, "loss": 2.0645, "step": 4186 }, { "epoch": 0.9, "learning_rate": 5.175801611341457e-05, "loss": 2.1475, "step": 4187 }, { "epoch": 0.9, "learning_rate": 5.1537095902362553e-05, "loss": 2.2754, "step": 4188 }, { "epoch": 0.9, "learning_rate": 5.131663570888456e-05, "loss": 2.0322, "step": 4189 }, { "epoch": 0.9, "learning_rate": 5.1096635639906406e-05, "loss": 2.1465, "step": 4190 }, { "epoch": 0.9, "learning_rate": 5.087709580213173e-05, "loss": 2.2031, "step": 4191 }, { "epoch": 0.9, "learning_rate": 5.0658016302040144e-05, "loss": 2.1816, "step": 4192 }, { "epoch": 0.9, "learning_rate": 5.043939724588808e-05, "loss": 2.0645, "step": 4193 }, { "epoch": 0.9, "learning_rate": 5.022123873970919e-05, "loss": 2.0605, "step": 4194 }, { "epoch": 0.9, "learning_rate": 5.0003540889312915e-05, "loss": 2.2529, "step": 4195 }, { "epoch": 0.9, "learning_rate": 4.978630380028581e-05, "loss": 2.3008, "step": 4196 }, { "epoch": 0.9, "learning_rate": 4.9569527577991045e-05, "loss": 1.9551, "step": 4197 }, { "epoch": 0.9, "learning_rate": 4.9353212327567956e-05, "loss": 2.1553, "step": 4198 }, { "epoch": 0.9, "learning_rate": 4.913735815393239e-05, "loss": 2.1387, "step": 4199 }, { "epoch": 0.9, "learning_rate": 4.8921965161776606e-05, "loss": 2.2354, "step": 4200 }, { "epoch": 0.9, "learning_rate": 4.870703345556926e-05, "loss": 2.0342, "step": 4201 }, { "epoch": 0.9, "learning_rate": 4.849256313955497e-05, "loss": 2.1348, "step": 4202 }, { "epoch": 0.9, "learning_rate": 4.8278554317755076e-05, "loss": 2.0859, "step": 4203 }, { "epoch": 0.9, "learning_rate": 4.8065007093966664e-05, "loss": 2.0098, "step": 4204 }, { "epoch": 0.9, "learning_rate": 4.78519215717631e-05, "loss": 2.0635, "step": 4205 }, { "epoch": 0.9, "learning_rate": 4.763929785449383e-05, "loss": 2.1172, "step": 4206 }, { "epoch": 0.9, "learning_rate": 4.742713604528404e-05, "loss": 2.0264, "step": 4207 }, { "epoch": 0.9, "learning_rate": 4.7215436247035506e-05, "loss": 2.1143, "step": 4208 }, { "epoch": 0.9, "learning_rate": 4.700419856242555e-05, "loss": 2.0225, "step": 4209 }, { "epoch": 0.91, "learning_rate": 4.6793423093906885e-05, "loss": 2.3135, "step": 4210 }, { "epoch": 0.91, "learning_rate": 4.6583109943708954e-05, "loss": 2.2646, "step": 4211 }, { "epoch": 0.91, "learning_rate": 4.6373259213836395e-05, "loss": 2.0889, "step": 4212 }, { "epoch": 0.91, "learning_rate": 4.6163871006069554e-05, "loss": 2.1631, "step": 4213 }, { "epoch": 0.91, "learning_rate": 4.595494542196488e-05, "loss": 2.0098, "step": 4214 }, { "epoch": 0.91, "learning_rate": 4.5746482562854075e-05, "loss": 2.1787, "step": 4215 }, { "epoch": 0.91, "learning_rate": 4.5538482529844386e-05, "loss": 2.1367, "step": 4216 }, { "epoch": 0.91, "learning_rate": 4.533094542381877e-05, "loss": 2.0918, "step": 4217 }, { "epoch": 0.91, "learning_rate": 4.51238713454355e-05, "loss": 2.0127, "step": 4218 }, { "epoch": 0.91, "learning_rate": 4.4917260395128444e-05, "loss": 2.2344, "step": 4219 }, { "epoch": 0.91, "learning_rate": 4.47111126731069e-05, "loss": 2.2734, "step": 4220 }, { "epoch": 0.91, "learning_rate": 4.450542827935489e-05, "loss": 2.0273, "step": 4221 }, { "epoch": 0.91, "learning_rate": 4.430020731363271e-05, "loss": 2.2715, "step": 4222 }, { "epoch": 0.91, "learning_rate": 4.409544987547509e-05, "loss": 2.0703, "step": 4223 }, { "epoch": 0.91, "learning_rate": 4.389115606419203e-05, "loss": 2.3086, "step": 4224 }, { "epoch": 0.91, "learning_rate": 4.3687325978869376e-05, "loss": 2.1396, "step": 4225 }, { "epoch": 0.91, "learning_rate": 4.348395971836716e-05, "loss": 2.2051, "step": 4226 }, { "epoch": 0.91, "learning_rate": 4.3281057381320596e-05, "loss": 2.0869, "step": 4227 }, { "epoch": 0.91, "learning_rate": 4.307861906614063e-05, "loss": 2.2246, "step": 4228 }, { "epoch": 0.91, "learning_rate": 4.287664487101239e-05, "loss": 2.0957, "step": 4229 }, { "epoch": 0.91, "learning_rate": 4.267513489389596e-05, "loss": 2.0791, "step": 4230 }, { "epoch": 0.91, "learning_rate": 4.2474089232526824e-05, "loss": 2.1699, "step": 4231 }, { "epoch": 0.91, "learning_rate": 4.2273507984414536e-05, "loss": 2.0811, "step": 4232 }, { "epoch": 0.91, "learning_rate": 4.207339124684406e-05, "loss": 2.1514, "step": 4233 }, { "epoch": 0.91, "learning_rate": 4.187373911687464e-05, "loss": 2.1572, "step": 4234 }, { "epoch": 0.91, "learning_rate": 4.167455169134027e-05, "loss": 2.0469, "step": 4235 }, { "epoch": 0.91, "learning_rate": 4.147582906684977e-05, "loss": 2.0752, "step": 4236 }, { "epoch": 0.91, "learning_rate": 4.127757133978605e-05, "loss": 2.1279, "step": 4237 }, { "epoch": 0.91, "learning_rate": 4.107977860630696e-05, "loss": 2.0342, "step": 4238 }, { "epoch": 0.91, "learning_rate": 4.0882450962344886e-05, "loss": 2.1533, "step": 4239 }, { "epoch": 0.91, "learning_rate": 4.0685588503606376e-05, "loss": 2.1475, "step": 4240 }, { "epoch": 0.91, "learning_rate": 4.048919132557227e-05, "loss": 2.1328, "step": 4241 }, { "epoch": 0.91, "learning_rate": 4.029325952349816e-05, "loss": 1.9814, "step": 4242 }, { "epoch": 0.91, "learning_rate": 4.0097793192413355e-05, "loss": 2.2236, "step": 4243 }, { "epoch": 0.91, "learning_rate": 3.99027924271218e-05, "loss": 2.0479, "step": 4244 }, { "epoch": 0.91, "learning_rate": 3.970825732220185e-05, "loss": 1.9717, "step": 4245 }, { "epoch": 0.91, "learning_rate": 3.9514187972005455e-05, "loss": 2.0342, "step": 4246 }, { "epoch": 0.91, "learning_rate": 3.932058447065889e-05, "loss": 2.0527, "step": 4247 }, { "epoch": 0.91, "learning_rate": 3.91274469120626e-05, "loss": 2.0811, "step": 4248 }, { "epoch": 0.91, "learning_rate": 3.89347753898911e-05, "loss": 2.1543, "step": 4249 }, { "epoch": 0.91, "learning_rate": 3.8742569997592294e-05, "loss": 2.1963, "step": 4250 }, { "epoch": 0.91, "learning_rate": 3.855083082838895e-05, "loss": 2.0361, "step": 4251 }, { "epoch": 0.91, "learning_rate": 3.8359557975277015e-05, "loss": 2.1465, "step": 4252 }, { "epoch": 0.91, "learning_rate": 3.8168751531026394e-05, "loss": 2.1172, "step": 4253 }, { "epoch": 0.91, "learning_rate": 3.797841158818094e-05, "loss": 2.2031, "step": 4254 }, { "epoch": 0.91, "learning_rate": 3.778853823905792e-05, "loss": 1.9199, "step": 4255 }, { "epoch": 0.91, "learning_rate": 3.759913157574879e-05, "loss": 2.0938, "step": 4256 }, { "epoch": 0.92, "learning_rate": 3.74101916901185e-05, "loss": 2.0654, "step": 4257 }, { "epoch": 0.92, "learning_rate": 3.722171867380497e-05, "loss": 2.1709, "step": 4258 }, { "epoch": 0.92, "learning_rate": 3.703371261822075e-05, "loss": 2.1162, "step": 4259 }, { "epoch": 0.92, "learning_rate": 3.6846173614551113e-05, "loss": 2.0928, "step": 4260 }, { "epoch": 0.92, "learning_rate": 3.6659101753754974e-05, "loss": 2.1582, "step": 4261 }, { "epoch": 0.92, "learning_rate": 3.6472497126565085e-05, "loss": 2.1729, "step": 4262 }, { "epoch": 0.92, "learning_rate": 3.628635982348683e-05, "loss": 2.0693, "step": 4263 }, { "epoch": 0.92, "learning_rate": 3.610068993479976e-05, "loss": 2.1641, "step": 4264 }, { "epoch": 0.92, "learning_rate": 3.591548755055618e-05, "loss": 1.9814, "step": 4265 }, { "epoch": 0.92, "learning_rate": 3.5730752760581684e-05, "loss": 2.1699, "step": 4266 }, { "epoch": 0.92, "learning_rate": 3.554648565447527e-05, "loss": 2.1104, "step": 4267 }, { "epoch": 0.92, "learning_rate": 3.5362686321609216e-05, "loss": 2.1162, "step": 4268 }, { "epoch": 0.92, "learning_rate": 3.5179354851128445e-05, "loss": 2.3027, "step": 4269 }, { "epoch": 0.92, "learning_rate": 3.499649133195138e-05, "loss": 2.1934, "step": 4270 }, { "epoch": 0.92, "learning_rate": 3.481409585276929e-05, "loss": 2.1221, "step": 4271 }, { "epoch": 0.92, "learning_rate": 3.4632168502046426e-05, "loss": 2.1348, "step": 4272 }, { "epoch": 0.92, "learning_rate": 3.4450709368020416e-05, "loss": 1.9092, "step": 4273 }, { "epoch": 0.92, "learning_rate": 3.426971853870109e-05, "loss": 2.3047, "step": 4274 }, { "epoch": 0.92, "learning_rate": 3.408919610187145e-05, "loss": 2.0254, "step": 4275 }, { "epoch": 0.92, "learning_rate": 3.3909142145087554e-05, "loss": 2.085, "step": 4276 }, { "epoch": 0.92, "learning_rate": 3.372955675567812e-05, "loss": 2.2598, "step": 4277 }, { "epoch": 0.92, "learning_rate": 3.3550440020744365e-05, "loss": 2.0459, "step": 4278 }, { "epoch": 0.92, "learning_rate": 3.337179202716045e-05, "loss": 2.1738, "step": 4279 }, { "epoch": 0.92, "learning_rate": 3.319361286157285e-05, "loss": 2.0918, "step": 4280 }, { "epoch": 0.92, "learning_rate": 3.301590261040144e-05, "loss": 2.2129, "step": 4281 }, { "epoch": 0.92, "learning_rate": 3.283866135983771e-05, "loss": 2.2051, "step": 4282 }, { "epoch": 0.92, "learning_rate": 3.266188919584634e-05, "loss": 2.0537, "step": 4283 }, { "epoch": 0.92, "learning_rate": 3.2485586204164086e-05, "loss": 2.2373, "step": 4284 }, { "epoch": 0.92, "learning_rate": 3.2309752470300545e-05, "loss": 2.165, "step": 4285 }, { "epoch": 0.92, "learning_rate": 3.213438807953717e-05, "loss": 2.2969, "step": 4286 }, { "epoch": 0.92, "learning_rate": 3.195949311692847e-05, "loss": 2.1221, "step": 4287 }, { "epoch": 0.92, "learning_rate": 3.178506766730071e-05, "loss": 2.125, "step": 4288 }, { "epoch": 0.92, "learning_rate": 3.161111181525267e-05, "loss": 2.1055, "step": 4289 }, { "epoch": 0.92, "learning_rate": 3.14376256451554e-05, "loss": 2.1494, "step": 4290 }, { "epoch": 0.92, "learning_rate": 3.126460924115193e-05, "loss": 2.2744, "step": 4291 }, { "epoch": 0.92, "learning_rate": 3.109206268715758e-05, "loss": 2.3203, "step": 4292 }, { "epoch": 0.92, "learning_rate": 3.091998606686008e-05, "loss": 2.0967, "step": 4293 }, { "epoch": 0.92, "learning_rate": 3.074837946371878e-05, "loss": 2.2354, "step": 4294 }, { "epoch": 0.92, "learning_rate": 3.0577242960965314e-05, "loss": 2.25, "step": 4295 }, { "epoch": 0.92, "learning_rate": 3.0406576641603177e-05, "loss": 2.207, "step": 4296 }, { "epoch": 0.92, "learning_rate": 3.023638058840783e-05, "loss": 2.1133, "step": 4297 }, { "epoch": 0.92, "learning_rate": 3.006665488392679e-05, "loss": 2.1074, "step": 4298 }, { "epoch": 0.92, "learning_rate": 2.9897399610479437e-05, "loss": 2.1162, "step": 4299 }, { "epoch": 0.92, "learning_rate": 2.9728614850156656e-05, "loss": 2.0732, "step": 4300 }, { "epoch": 0.92, "learning_rate": 2.9560300684821627e-05, "loss": 2.252, "step": 4301 }, { "epoch": 0.92, "learning_rate": 2.9392457196108947e-05, "loss": 2.2266, "step": 4302 }, { "epoch": 0.93, "learning_rate": 2.9225084465425043e-05, "loss": 2.1475, "step": 4303 }, { "epoch": 0.93, "learning_rate": 2.905818257394799e-05, "loss": 2.0674, "step": 4304 }, { "epoch": 0.93, "learning_rate": 2.889175160262758e-05, "loss": 2.0762, "step": 4305 }, { "epoch": 0.93, "learning_rate": 2.8725791632184806e-05, "loss": 2.2266, "step": 4306 }, { "epoch": 0.93, "learning_rate": 2.8560302743112942e-05, "loss": 2.1904, "step": 4307 }, { "epoch": 0.93, "learning_rate": 2.839528501567623e-05, "loss": 2.1611, "step": 4308 }, { "epoch": 0.93, "learning_rate": 2.8230738529910427e-05, "loss": 2.2061, "step": 4309 }, { "epoch": 0.93, "learning_rate": 2.8066663365623136e-05, "loss": 2.0547, "step": 4310 }, { "epoch": 0.93, "learning_rate": 2.7903059602392588e-05, "loss": 2.1797, "step": 4311 }, { "epoch": 0.93, "learning_rate": 2.7739927319569314e-05, "loss": 2.0518, "step": 4312 }, { "epoch": 0.93, "learning_rate": 2.7577266596274574e-05, "loss": 2.0176, "step": 4313 }, { "epoch": 0.93, "learning_rate": 2.7415077511401043e-05, "loss": 2.0273, "step": 4314 }, { "epoch": 0.93, "learning_rate": 2.725336014361268e-05, "loss": 2.1455, "step": 4315 }, { "epoch": 0.93, "learning_rate": 2.709211457134464e-05, "loss": 1.9629, "step": 4316 }, { "epoch": 0.93, "learning_rate": 2.6931340872803024e-05, "loss": 2.0068, "step": 4317 }, { "epoch": 0.93, "learning_rate": 2.677103912596568e-05, "loss": 2.25, "step": 4318 }, { "epoch": 0.93, "learning_rate": 2.6611209408580862e-05, "loss": 2.1416, "step": 4319 }, { "epoch": 0.93, "learning_rate": 2.6451851798168337e-05, "loss": 2.0879, "step": 4320 }, { "epoch": 0.93, "learning_rate": 2.6292966372018833e-05, "loss": 2.1846, "step": 4321 }, { "epoch": 0.93, "learning_rate": 2.6134553207193712e-05, "loss": 2.1123, "step": 4322 }, { "epoch": 0.93, "learning_rate": 2.597661238052551e-05, "loss": 2.0635, "step": 4323 }, { "epoch": 0.93, "learning_rate": 2.5819143968618065e-05, "loss": 2.0527, "step": 4324 }, { "epoch": 0.93, "learning_rate": 2.56621480478455e-05, "loss": 1.9307, "step": 4325 }, { "epoch": 0.93, "learning_rate": 2.5505624694353023e-05, "loss": 2.0635, "step": 4326 }, { "epoch": 0.93, "learning_rate": 2.5349573984056574e-05, "loss": 2.1035, "step": 4327 }, { "epoch": 0.93, "learning_rate": 2.5193995992642938e-05, "loss": 2.0107, "step": 4328 }, { "epoch": 0.93, "learning_rate": 2.5038890795569537e-05, "loss": 2.2109, "step": 4329 }, { "epoch": 0.93, "learning_rate": 2.4884258468064745e-05, "loss": 2.2461, "step": 4330 }, { "epoch": 0.93, "learning_rate": 2.473009908512702e-05, "loss": 2.3467, "step": 4331 }, { "epoch": 0.93, "learning_rate": 2.457641272152611e-05, "loss": 1.9746, "step": 4332 }, { "epoch": 0.93, "learning_rate": 2.4423199451801825e-05, "loss": 2.1641, "step": 4333 }, { "epoch": 0.93, "learning_rate": 2.427045935026462e-05, "loss": 2.1455, "step": 4334 }, { "epoch": 0.93, "learning_rate": 2.4118192490995892e-05, "loss": 2.2686, "step": 4335 }, { "epoch": 0.93, "learning_rate": 2.3966398947846913e-05, "loss": 2.1006, "step": 4336 }, { "epoch": 0.93, "learning_rate": 2.381507879443967e-05, "loss": 2.0742, "step": 4337 }, { "epoch": 0.93, "learning_rate": 2.3664232104166462e-05, "loss": 2.0635, "step": 4338 }, { "epoch": 0.93, "learning_rate": 2.3513858950190204e-05, "loss": 2.2031, "step": 4339 }, { "epoch": 0.93, "learning_rate": 2.336395940544378e-05, "loss": 2.0859, "step": 4340 }, { "epoch": 0.93, "learning_rate": 2.321453354263059e-05, "loss": 2.1738, "step": 4341 }, { "epoch": 0.93, "learning_rate": 2.3065581434224325e-05, "loss": 1.9902, "step": 4342 }, { "epoch": 0.93, "learning_rate": 2.2917103152468644e-05, "loss": 2.2568, "step": 4343 }, { "epoch": 0.93, "learning_rate": 2.2769098769377607e-05, "loss": 2.0381, "step": 4344 }, { "epoch": 0.93, "learning_rate": 2.2621568356735457e-05, "loss": 2.3291, "step": 4345 }, { "epoch": 0.93, "learning_rate": 2.24745119860964e-05, "loss": 2.1689, "step": 4346 }, { "epoch": 0.93, "learning_rate": 2.2327929728784945e-05, "loss": 2.1631, "step": 4347 }, { "epoch": 0.93, "learning_rate": 2.218182165589522e-05, "loss": 2.0146, "step": 4348 }, { "epoch": 0.93, "learning_rate": 2.2036187838291976e-05, "loss": 2.0342, "step": 4349 }, { "epoch": 0.94, "learning_rate": 2.1891028346609386e-05, "loss": 2.1035, "step": 4350 }, { "epoch": 0.94, "learning_rate": 2.1746343251251798e-05, "loss": 2.1719, "step": 4351 }, { "epoch": 0.94, "learning_rate": 2.1602132622393743e-05, "loss": 2.1895, "step": 4352 }, { "epoch": 0.94, "learning_rate": 2.1458396529979164e-05, "loss": 2.2852, "step": 4353 }, { "epoch": 0.94, "learning_rate": 2.1315135043721955e-05, "loss": 2.208, "step": 4354 }, { "epoch": 0.94, "learning_rate": 2.1172348233105986e-05, "loss": 2.0303, "step": 4355 }, { "epoch": 0.94, "learning_rate": 2.1030036167384968e-05, "loss": 2.0312, "step": 4356 }, { "epoch": 0.94, "learning_rate": 2.0888198915582025e-05, "loss": 2.3027, "step": 4357 }, { "epoch": 0.94, "learning_rate": 2.0746836546490456e-05, "loss": 2.2012, "step": 4358 }, { "epoch": 0.94, "learning_rate": 2.060594912867253e-05, "loss": 2.1719, "step": 4359 }, { "epoch": 0.94, "learning_rate": 2.046553673046092e-05, "loss": 2.2119, "step": 4360 }, { "epoch": 0.94, "learning_rate": 2.0325599419957486e-05, "loss": 2.0537, "step": 4361 }, { "epoch": 0.94, "learning_rate": 2.018613726503371e-05, "loss": 2.1289, "step": 4362 }, { "epoch": 0.94, "learning_rate": 2.004715033333071e-05, "loss": 2.1914, "step": 4363 }, { "epoch": 0.94, "learning_rate": 1.9908638692259006e-05, "loss": 2.1416, "step": 4364 }, { "epoch": 0.94, "learning_rate": 1.977060240899864e-05, "loss": 2.1445, "step": 4365 }, { "epoch": 0.94, "learning_rate": 1.9633041550499166e-05, "loss": 2.0967, "step": 4366 }, { "epoch": 0.94, "learning_rate": 1.9495956183479548e-05, "loss": 2.0332, "step": 4367 }, { "epoch": 0.94, "learning_rate": 1.9359346374428043e-05, "loss": 1.959, "step": 4368 }, { "epoch": 0.94, "learning_rate": 1.9223212189602103e-05, "loss": 2.1367, "step": 4369 }, { "epoch": 0.94, "learning_rate": 1.9087553695029014e-05, "loss": 2.2207, "step": 4370 }, { "epoch": 0.94, "learning_rate": 1.8952370956504705e-05, "loss": 2.0635, "step": 4371 }, { "epoch": 0.94, "learning_rate": 1.881766403959484e-05, "loss": 1.9912, "step": 4372 }, { "epoch": 0.94, "learning_rate": 1.8683433009634043e-05, "loss": 2.1133, "step": 4373 }, { "epoch": 0.94, "learning_rate": 1.854967793172635e-05, "loss": 2.2793, "step": 4374 }, { "epoch": 0.94, "learning_rate": 1.8416398870744754e-05, "loss": 2.2051, "step": 4375 }, { "epoch": 0.94, "learning_rate": 1.8283595891331217e-05, "loss": 2.1816, "step": 4376 }, { "epoch": 0.94, "learning_rate": 1.8151269057897435e-05, "loss": 2.0654, "step": 4377 }, { "epoch": 0.94, "learning_rate": 1.80194184346234e-05, "loss": 2.1641, "step": 4378 }, { "epoch": 0.94, "learning_rate": 1.7888044085458634e-05, "loss": 2.1074, "step": 4379 }, { "epoch": 0.94, "learning_rate": 1.775714607412138e-05, "loss": 2.1279, "step": 4380 }, { "epoch": 0.94, "learning_rate": 1.7626724464099185e-05, "loss": 1.9922, "step": 4381 }, { "epoch": 0.94, "learning_rate": 1.7496779318648015e-05, "loss": 2.0703, "step": 4382 }, { "epoch": 0.94, "learning_rate": 1.7367310700793447e-05, "loss": 2.0986, "step": 4383 }, { "epoch": 0.94, "learning_rate": 1.7238318673329366e-05, "loss": 2.1553, "step": 4384 }, { "epoch": 0.94, "learning_rate": 1.7109803298818504e-05, "loss": 2.2988, "step": 4385 }, { "epoch": 0.94, "learning_rate": 1.6981764639592778e-05, "loss": 2.292, "step": 4386 }, { "epoch": 0.94, "learning_rate": 1.6854202757752735e-05, "loss": 2.0811, "step": 4387 }, { "epoch": 0.94, "learning_rate": 1.672711771516744e-05, "loss": 2.0664, "step": 4388 }, { "epoch": 0.94, "learning_rate": 1.6600509573475254e-05, "loss": 2.0342, "step": 4389 }, { "epoch": 0.94, "learning_rate": 1.647437839408239e-05, "loss": 2.1201, "step": 4390 }, { "epoch": 0.94, "learning_rate": 1.634872423816458e-05, "loss": 2.0732, "step": 4391 }, { "epoch": 0.94, "learning_rate": 1.6223547166665743e-05, "loss": 1.9668, "step": 4392 }, { "epoch": 0.94, "learning_rate": 1.6098847240298308e-05, "loss": 2.1992, "step": 4393 }, { "epoch": 0.94, "learning_rate": 1.5974624519543677e-05, "loss": 1.999, "step": 4394 }, { "epoch": 0.94, "learning_rate": 1.5850879064651545e-05, "loss": 2.0977, "step": 4395 }, { "epoch": 0.95, "learning_rate": 1.5727610935640013e-05, "loss": 2.0137, "step": 4396 }, { "epoch": 0.95, "learning_rate": 1.560482019229592e-05, "loss": 1.96, "step": 4397 }, { "epoch": 0.95, "learning_rate": 1.548250689417452e-05, "loss": 2.2432, "step": 4398 }, { "epoch": 0.95, "learning_rate": 1.5360671100599356e-05, "loss": 2.1328, "step": 4399 }, { "epoch": 0.95, "learning_rate": 1.5239312870662603e-05, "loss": 2.2832, "step": 4400 }, { "epoch": 0.95, "learning_rate": 1.5118432263224513e-05, "loss": 2.1934, "step": 4401 }, { "epoch": 0.95, "learning_rate": 1.4998029336913743e-05, "loss": 2.084, "step": 4402 }, { "epoch": 0.95, "learning_rate": 1.4878104150127691e-05, "loss": 2.0791, "step": 4403 }, { "epoch": 0.95, "learning_rate": 1.4758656761031608e-05, "loss": 2.0859, "step": 4404 }, { "epoch": 0.95, "learning_rate": 1.463968722755904e-05, "loss": 2.1807, "step": 4405 }, { "epoch": 0.95, "learning_rate": 1.4521195607411942e-05, "loss": 2.1191, "step": 4406 }, { "epoch": 0.95, "learning_rate": 1.440318195806023e-05, "loss": 2.0361, "step": 4407 }, { "epoch": 0.95, "learning_rate": 1.4285646336742341e-05, "loss": 2.1133, "step": 4408 }, { "epoch": 0.95, "learning_rate": 1.4168588800464566e-05, "loss": 2.126, "step": 4409 }, { "epoch": 0.95, "learning_rate": 1.4052009406001375e-05, "loss": 2.2109, "step": 4410 }, { "epoch": 0.95, "learning_rate": 1.3935908209895542e-05, "loss": 2.1787, "step": 4411 }, { "epoch": 0.95, "learning_rate": 1.3820285268457578e-05, "loss": 2.0654, "step": 4412 }, { "epoch": 0.95, "learning_rate": 1.370514063776629e-05, "loss": 2.126, "step": 4413 }, { "epoch": 0.95, "learning_rate": 1.3590474373668338e-05, "loss": 1.9688, "step": 4414 }, { "epoch": 0.95, "learning_rate": 1.347628653177857e-05, "loss": 2.1777, "step": 4415 }, { "epoch": 0.95, "learning_rate": 1.3362577167479573e-05, "loss": 2.2266, "step": 4416 }, { "epoch": 0.95, "learning_rate": 1.3249346335922008e-05, "loss": 2.167, "step": 4417 }, { "epoch": 0.95, "learning_rate": 1.313659409202439e-05, "loss": 2.0518, "step": 4418 }, { "epoch": 0.95, "learning_rate": 1.3024320490472862e-05, "loss": 2.1572, "step": 4419 }, { "epoch": 0.95, "learning_rate": 1.2912525585722091e-05, "loss": 2.1113, "step": 4420 }, { "epoch": 0.95, "learning_rate": 1.280120943199381e-05, "loss": 2.0967, "step": 4421 }, { "epoch": 0.95, "learning_rate": 1.2690372083278056e-05, "loss": 2.0693, "step": 4422 }, { "epoch": 0.95, "learning_rate": 1.2580013593332495e-05, "loss": 2.3184, "step": 4423 }, { "epoch": 0.95, "learning_rate": 1.2470134015682311e-05, "loss": 2.0781, "step": 4424 }, { "epoch": 0.95, "learning_rate": 1.2360733403620761e-05, "loss": 2.2383, "step": 4425 }, { "epoch": 0.95, "learning_rate": 1.2251811810208736e-05, "loss": 1.9893, "step": 4426 }, { "epoch": 0.95, "learning_rate": 1.2143369288274531e-05, "loss": 2.2207, "step": 4427 }, { "epoch": 0.95, "learning_rate": 1.2035405890414297e-05, "loss": 2.1934, "step": 4428 }, { "epoch": 0.95, "learning_rate": 1.1927921668991815e-05, "loss": 2.0215, "step": 4429 }, { "epoch": 0.95, "learning_rate": 1.1820916676138383e-05, "loss": 2.0137, "step": 4430 }, { "epoch": 0.95, "learning_rate": 1.171439096375304e-05, "loss": 1.9199, "step": 4431 }, { "epoch": 0.95, "learning_rate": 1.1608344583502128e-05, "loss": 2.1309, "step": 4432 }, { "epoch": 0.95, "learning_rate": 1.1502777586819391e-05, "loss": 2.0439, "step": 4433 }, { "epoch": 0.95, "learning_rate": 1.1397690024906648e-05, "loss": 1.9355, "step": 4434 }, { "epoch": 0.95, "learning_rate": 1.1293081948732575e-05, "loss": 2.165, "step": 4435 }, { "epoch": 0.95, "learning_rate": 1.1188953409033476e-05, "loss": 2.0635, "step": 4436 }, { "epoch": 0.95, "learning_rate": 1.1085304456313394e-05, "loss": 2.1484, "step": 4437 }, { "epoch": 0.95, "learning_rate": 1.0982135140843231e-05, "loss": 2.0957, "step": 4438 }, { "epoch": 0.95, "learning_rate": 1.0879445512661624e-05, "loss": 2.0645, "step": 4439 }, { "epoch": 0.95, "learning_rate": 1.077723562157451e-05, "loss": 2.1426, "step": 4440 }, { "epoch": 0.95, "learning_rate": 1.0675505517155126e-05, "loss": 2.1104, "step": 4441 }, { "epoch": 0.95, "learning_rate": 1.0574255248743891e-05, "loss": 2.0654, "step": 4442 }, { "epoch": 0.96, "learning_rate": 1.0473484865448525e-05, "loss": 1.9648, "step": 4443 }, { "epoch": 0.96, "learning_rate": 1.0373194416144039e-05, "loss": 2.001, "step": 4444 }, { "epoch": 0.96, "learning_rate": 1.0273383949472859e-05, "loss": 2.1816, "step": 4445 }, { "epoch": 0.96, "learning_rate": 1.0174053513844373e-05, "loss": 2.2441, "step": 4446 }, { "epoch": 0.96, "learning_rate": 1.0075203157435042e-05, "loss": 2.1348, "step": 4447 }, { "epoch": 0.96, "learning_rate": 9.976832928188961e-06, "loss": 2.1465, "step": 4448 }, { "epoch": 0.96, "learning_rate": 9.878942873816854e-06, "loss": 2.0117, "step": 4449 }, { "epoch": 0.96, "learning_rate": 9.78153304179663e-06, "loss": 1.9766, "step": 4450 }, { "epoch": 0.96, "learning_rate": 9.684603479373611e-06, "loss": 2.0977, "step": 4451 }, { "epoch": 0.96, "learning_rate": 9.588154233559853e-06, "loss": 2.2109, "step": 4452 }, { "epoch": 0.96, "learning_rate": 9.49218535113472e-06, "loss": 2.2188, "step": 4453 }, { "epoch": 0.96, "learning_rate": 9.396696878644195e-06, "loss": 2.1553, "step": 4454 }, { "epoch": 0.96, "learning_rate": 9.30168886240168e-06, "loss": 2.1689, "step": 4455 }, { "epoch": 0.96, "learning_rate": 9.207161348487314e-06, "loss": 2.2188, "step": 4456 }, { "epoch": 0.96, "learning_rate": 9.1131143827482e-06, "loss": 2.3613, "step": 4457 }, { "epoch": 0.96, "learning_rate": 9.019548010798628e-06, "loss": 2.0938, "step": 4458 }, { "epoch": 0.96, "learning_rate": 8.926462278019298e-06, "loss": 2.0303, "step": 4459 }, { "epoch": 0.96, "learning_rate": 8.833857229558207e-06, "loss": 2.1729, "step": 4460 }, { "epoch": 0.96, "learning_rate": 8.741732910330092e-06, "loss": 2.2344, "step": 4461 }, { "epoch": 0.96, "learning_rate": 8.650089365016433e-06, "loss": 2.165, "step": 4462 }, { "epoch": 0.96, "learning_rate": 8.558926638065789e-06, "loss": 2.0771, "step": 4463 }, { "epoch": 0.96, "learning_rate": 8.468244773693123e-06, "loss": 2.0605, "step": 4464 }, { "epoch": 0.96, "learning_rate": 8.378043815880476e-06, "loss": 2.251, "step": 4465 }, { "epoch": 0.96, "learning_rate": 8.28832380837663e-06, "loss": 2.1758, "step": 4466 }, { "epoch": 0.96, "learning_rate": 8.199084794696888e-06, "loss": 2.0908, "step": 4467 }, { "epoch": 0.96, "learning_rate": 8.110326818123515e-06, "loss": 2.1533, "step": 4468 }, { "epoch": 0.96, "learning_rate": 8.0220499217053e-06, "loss": 2.2578, "step": 4469 }, { "epoch": 0.96, "learning_rate": 7.93425414825777e-06, "loss": 2.1074, "step": 4470 }, { "epoch": 0.96, "learning_rate": 7.846939540363086e-06, "loss": 2.0889, "step": 4471 }, { "epoch": 0.96, "learning_rate": 7.760106140369928e-06, "loss": 2.1758, "step": 4472 }, { "epoch": 0.96, "learning_rate": 7.673753990393828e-06, "loss": 2.1602, "step": 4473 }, { "epoch": 0.96, "learning_rate": 7.587883132316842e-06, "loss": 2.166, "step": 4474 }, { "epoch": 0.96, "learning_rate": 7.50249360778732e-06, "loss": 2.248, "step": 4475 }, { "epoch": 0.96, "learning_rate": 7.417585458220466e-06, "loss": 2.1172, "step": 4476 }, { "epoch": 0.96, "learning_rate": 7.333158724797784e-06, "loss": 2.1338, "step": 4477 }, { "epoch": 0.96, "learning_rate": 7.249213448467518e-06, "loss": 2.0986, "step": 4478 }, { "epoch": 0.96, "learning_rate": 7.165749669944433e-06, "loss": 2.2021, "step": 4479 }, { "epoch": 0.96, "learning_rate": 7.0827674297093694e-06, "loss": 2.1992, "step": 4480 }, { "epoch": 0.96, "learning_rate": 7.0002667680098e-06, "loss": 2.1914, "step": 4481 }, { "epoch": 0.96, "learning_rate": 6.918247724859938e-06, "loss": 2.2783, "step": 4482 }, { "epoch": 0.96, "learning_rate": 6.836710340039965e-06, "loss": 2.0273, "step": 4483 }, { "epoch": 0.96, "learning_rate": 6.755654653096688e-06, "loss": 2.1436, "step": 4484 }, { "epoch": 0.96, "learning_rate": 6.675080703343328e-06, "loss": 2.0762, "step": 4485 }, { "epoch": 0.96, "learning_rate": 6.594988529859181e-06, "loss": 2.1162, "step": 4486 }, { "epoch": 0.96, "learning_rate": 6.515378171490172e-06, "loss": 2.1729, "step": 4487 }, { "epoch": 0.96, "learning_rate": 6.436249666848415e-06, "loss": 2.2021, "step": 4488 }, { "epoch": 0.97, "learning_rate": 6.357603054312211e-06, "loss": 1.8857, "step": 4489 }, { "epoch": 0.97, "learning_rate": 6.279438372026491e-06, "loss": 2.209, "step": 4490 }, { "epoch": 0.97, "learning_rate": 6.201755657901931e-06, "loss": 2.1162, "step": 4491 }, { "epoch": 0.97, "learning_rate": 6.124554949615835e-06, "loss": 2.0371, "step": 4492 }, { "epoch": 0.97, "learning_rate": 6.0478362846116965e-06, "loss": 2.1172, "step": 4493 }, { "epoch": 0.97, "learning_rate": 5.971599700098973e-06, "loss": 2.3555, "step": 4494 }, { "epoch": 0.97, "learning_rate": 5.895845233053643e-06, "loss": 2.208, "step": 4495 }, { "epoch": 0.97, "learning_rate": 5.820572920217426e-06, "loss": 2.0029, "step": 4496 }, { "epoch": 0.97, "learning_rate": 5.745782798098676e-06, "loss": 2.1895, "step": 4497 }, { "epoch": 0.97, "learning_rate": 5.6714749029713745e-06, "loss": 2.083, "step": 4498 }, { "epoch": 0.97, "learning_rate": 5.597649270876138e-06, "loss": 2.2598, "step": 4499 }, { "epoch": 0.97, "learning_rate": 5.524305937619212e-06, "loss": 2.2422, "step": 4500 }, { "epoch": 0.97, "learning_rate": 5.451444938773253e-06, "loss": 2.1084, "step": 4501 }, { "epoch": 0.97, "learning_rate": 5.379066309676772e-06, "loss": 2.1318, "step": 4502 }, { "epoch": 0.97, "learning_rate": 5.307170085434354e-06, "loss": 2.2412, "step": 4503 }, { "epoch": 0.97, "learning_rate": 5.235756300916772e-06, "loss": 2.2119, "step": 4504 }, { "epoch": 0.97, "learning_rate": 5.164824990760764e-06, "loss": 2.2285, "step": 4505 }, { "epoch": 0.97, "learning_rate": 5.09437618936881e-06, "loss": 2.1201, "step": 4506 }, { "epoch": 0.97, "learning_rate": 5.0244099309095795e-06, "loss": 2.0801, "step": 4507 }, { "epoch": 0.97, "learning_rate": 4.954926249317815e-06, "loss": 2.0859, "step": 4508 }, { "epoch": 0.97, "learning_rate": 4.885925178293782e-06, "loss": 2.0566, "step": 4509 }, { "epoch": 0.97, "learning_rate": 4.8174067513042655e-06, "loss": 2.083, "step": 4510 }, { "epoch": 0.97, "learning_rate": 4.74937100158157e-06, "loss": 2.1182, "step": 4511 }, { "epoch": 0.97, "learning_rate": 4.681817962123747e-06, "loss": 2.1895, "step": 4512 }, { "epoch": 0.97, "learning_rate": 4.614747665695251e-06, "loss": 2.0488, "step": 4513 }, { "epoch": 0.97, "learning_rate": 4.548160144825841e-06, "loss": 2.1328, "step": 4514 }, { "epoch": 0.97, "learning_rate": 4.4820554318115715e-06, "loss": 2.0166, "step": 4515 }, { "epoch": 0.97, "learning_rate": 4.416433558714128e-06, "loss": 2.1377, "step": 4516 }, { "epoch": 0.97, "learning_rate": 4.35129455736083e-06, "loss": 2.1367, "step": 4517 }, { "epoch": 0.97, "learning_rate": 4.2866384593451825e-06, "loss": 2.084, "step": 4518 }, { "epoch": 0.97, "learning_rate": 4.222465296026212e-06, "loss": 2.2285, "step": 4519 }, { "epoch": 0.97, "learning_rate": 4.1587750985288e-06, "loss": 2.0771, "step": 4520 }, { "epoch": 0.97, "learning_rate": 4.09556789774368e-06, "loss": 1.9912, "step": 4521 }, { "epoch": 0.97, "learning_rate": 4.032843724326996e-06, "loss": 2.1289, "step": 4522 }, { "epoch": 0.97, "learning_rate": 3.970602608700969e-06, "loss": 2.0498, "step": 4523 }, { "epoch": 0.97, "learning_rate": 3.90884458105345e-06, "loss": 2.1211, "step": 4524 }, { "epoch": 0.97, "learning_rate": 3.847569671337703e-06, "loss": 2.1338, "step": 4525 }, { "epoch": 0.97, "learning_rate": 3.786777909273176e-06, "loss": 2.0869, "step": 4526 }, { "epoch": 0.97, "learning_rate": 3.726469324344617e-06, "loss": 2.1475, "step": 4527 }, { "epoch": 0.97, "learning_rate": 3.666643945802406e-06, "loss": 2.0957, "step": 4528 }, { "epoch": 0.97, "learning_rate": 3.6073018026627767e-06, "loss": 2.2148, "step": 4529 }, { "epoch": 0.97, "learning_rate": 3.5484429237075954e-06, "loss": 2.1895, "step": 4530 }, { "epoch": 0.97, "learning_rate": 3.4900673374840262e-06, "loss": 2.1025, "step": 4531 }, { "epoch": 0.97, "learning_rate": 3.4321750723050883e-06, "loss": 1.9863, "step": 4532 }, { "epoch": 0.97, "learning_rate": 3.3747661562493202e-06, "loss": 2.0674, "step": 4533 }, { "epoch": 0.97, "learning_rate": 3.3178406171608944e-06, "loss": 1.9746, "step": 4534 }, { "epoch": 0.97, "learning_rate": 3.2613984826495023e-06, "loss": 2.1006, "step": 4535 }, { "epoch": 0.98, "learning_rate": 3.205439780090358e-06, "loss": 2.1104, "step": 4536 }, { "epoch": 0.98, "learning_rate": 3.1499645366240837e-06, "loss": 2.1104, "step": 4537 }, { "epoch": 0.98, "learning_rate": 3.094972779157046e-06, "loss": 2.0215, "step": 4538 }, { "epoch": 0.98, "learning_rate": 3.0404645343610205e-06, "loss": 2.1318, "step": 4539 }, { "epoch": 0.98, "learning_rate": 2.986439828673082e-06, "loss": 2.2529, "step": 4540 }, { "epoch": 0.98, "learning_rate": 2.9328986882961594e-06, "loss": 2.2021, "step": 4541 }, { "epoch": 0.98, "learning_rate": 2.8798411391983694e-06, "loss": 2.082, "step": 4542 }, { "epoch": 0.98, "learning_rate": 2.827267207113349e-06, "loss": 2.2012, "step": 4543 }, { "epoch": 0.98, "learning_rate": 2.7751769175401454e-06, "loss": 2.1523, "step": 4544 }, { "epoch": 0.98, "learning_rate": 2.7235702957433273e-06, "loss": 2.1846, "step": 4545 }, { "epoch": 0.98, "learning_rate": 2.6724473667527617e-06, "loss": 2.2129, "step": 4546 }, { "epoch": 0.98, "learning_rate": 2.6218081553638363e-06, "loss": 2.1162, "step": 4547 }, { "epoch": 0.98, "learning_rate": 2.571652686137238e-06, "loss": 2.1729, "step": 4548 }, { "epoch": 0.98, "learning_rate": 2.521980983399064e-06, "loss": 2.123, "step": 4549 }, { "epoch": 0.98, "learning_rate": 2.4727930712408198e-06, "loss": 2.1377, "step": 4550 }, { "epoch": 0.98, "learning_rate": 2.4240889735192006e-06, "loss": 2.3311, "step": 4551 }, { "epoch": 0.98, "learning_rate": 2.3758687138564218e-06, "loss": 2.0498, "step": 4552 }, { "epoch": 0.98, "learning_rate": 2.328132315639997e-06, "loss": 2.2031, "step": 4553 }, { "epoch": 0.98, "learning_rate": 2.2808798020227393e-06, "loss": 2.0762, "step": 4554 }, { "epoch": 0.98, "learning_rate": 2.23411119592265e-06, "loss": 2.2148, "step": 4555 }, { "epoch": 0.98, "learning_rate": 2.1878265200233617e-06, "loss": 2.1406, "step": 4556 }, { "epoch": 0.98, "learning_rate": 2.1420257967734723e-06, "loss": 2.1973, "step": 4557 }, { "epoch": 0.98, "learning_rate": 2.09670904838688e-06, "loss": 2.1416, "step": 4558 }, { "epoch": 0.98, "learning_rate": 2.0518762968430025e-06, "loss": 2.1562, "step": 4559 }, { "epoch": 0.98, "learning_rate": 2.0075275638862243e-06, "loss": 2.1221, "step": 4560 }, { "epoch": 0.98, "learning_rate": 1.9636628710263394e-06, "loss": 2.1387, "step": 4561 }, { "epoch": 0.98, "learning_rate": 1.920282239538218e-06, "loss": 2.1445, "step": 4562 }, { "epoch": 0.98, "learning_rate": 1.8773856904621412e-06, "loss": 2.2812, "step": 4563 }, { "epoch": 0.98, "learning_rate": 1.8349732446036882e-06, "loss": 2.1846, "step": 4564 }, { "epoch": 0.98, "learning_rate": 1.7930449225331824e-06, "loss": 2.1484, "step": 4565 }, { "epoch": 0.98, "learning_rate": 1.75160074458669e-06, "loss": 1.9814, "step": 4566 }, { "epoch": 0.98, "learning_rate": 1.7106407308650207e-06, "loss": 2.0791, "step": 4567 }, { "epoch": 0.98, "learning_rate": 1.6701649012345055e-06, "loss": 2.1992, "step": 4568 }, { "epoch": 0.98, "learning_rate": 1.6301732753263298e-06, "loss": 2.2285, "step": 4569 }, { "epoch": 0.98, "learning_rate": 1.5906658725370893e-06, "loss": 2.1533, "step": 4570 }, { "epoch": 0.98, "learning_rate": 1.551642712028345e-06, "loss": 2.1514, "step": 4571 }, { "epoch": 0.98, "learning_rate": 1.513103812726957e-06, "loss": 2.1914, "step": 4572 }, { "epoch": 0.98, "learning_rate": 1.4750491933247513e-06, "loss": 1.9385, "step": 4573 }, { "epoch": 0.98, "learning_rate": 1.4374788722787414e-06, "loss": 2.2188, "step": 4574 }, { "epoch": 0.98, "learning_rate": 1.400392867811129e-06, "loss": 2.1865, "step": 4575 }, { "epoch": 0.98, "learning_rate": 1.3637911979090812e-06, "loss": 2.1377, "step": 4576 }, { "epoch": 0.98, "learning_rate": 1.3276738803248423e-06, "loss": 2.0254, "step": 4577 }, { "epoch": 0.98, "learning_rate": 1.2920409325759552e-06, "loss": 2.2061, "step": 4578 }, { "epoch": 0.98, "learning_rate": 1.2568923719447068e-06, "loss": 2.2256, "step": 4579 }, { "epoch": 0.98, "learning_rate": 1.2222282154787933e-06, "loss": 2.0332, "step": 4580 }, { "epoch": 0.98, "learning_rate": 1.1880484799907665e-06, "loss": 2.1914, "step": 4581 }, { "epoch": 0.99, "learning_rate": 1.1543531820582542e-06, "loss": 2.1396, "step": 4582 }, { "epoch": 0.99, "learning_rate": 1.1211423380239615e-06, "loss": 2.1387, "step": 4583 }, { "epoch": 0.99, "learning_rate": 1.0884159639955593e-06, "loss": 2.2324, "step": 4584 }, { "epoch": 0.99, "learning_rate": 1.056174075845795e-06, "loss": 2.1045, "step": 4585 }, { "epoch": 0.99, "learning_rate": 1.0244166892124928e-06, "loss": 1.9863, "step": 4586 }, { "epoch": 0.99, "learning_rate": 9.931438194983322e-07, "loss": 2.1221, "step": 4587 }, { "epoch": 0.99, "learning_rate": 9.6235548187118e-07, "loss": 2.166, "step": 4588 }, { "epoch": 0.99, "learning_rate": 9.320516912637577e-07, "loss": 2.2256, "step": 4589 }, { "epoch": 0.99, "learning_rate": 9.022324623737532e-07, "loss": 2.0586, "step": 4590 }, { "epoch": 0.99, "learning_rate": 8.728978096640417e-07, "loss": 2.1152, "step": 4591 }, { "epoch": 0.99, "learning_rate": 8.440477473622421e-07, "loss": 2.0625, "step": 4592 }, { "epoch": 0.99, "learning_rate": 8.156822894610505e-07, "loss": 2.043, "step": 4593 }, { "epoch": 0.99, "learning_rate": 7.878014497181285e-07, "loss": 2.1514, "step": 4594 }, { "epoch": 0.99, "learning_rate": 7.604052416559925e-07, "loss": 2.0273, "step": 4595 }, { "epoch": 0.99, "learning_rate": 7.334936785622359e-07, "loss": 2.0605, "step": 4596 }, { "epoch": 0.99, "learning_rate": 7.070667734894176e-07, "loss": 1.9609, "step": 4597 }, { "epoch": 0.99, "learning_rate": 6.811245392548405e-07, "loss": 2.1641, "step": 4598 }, { "epoch": 0.99, "learning_rate": 6.556669884408839e-07, "loss": 2.1582, "step": 4599 }, { "epoch": 0.99, "learning_rate": 6.306941333946715e-07, "loss": 2.2041, "step": 4600 }, { "epoch": 0.99, "learning_rate": 6.062059862286251e-07, "loss": 2.0596, "step": 4601 }, { "epoch": 0.99, "learning_rate": 5.822025588196889e-07, "loss": 2.0332, "step": 4602 }, { "epoch": 0.99, "learning_rate": 5.586838628099944e-07, "loss": 2.1494, "step": 4603 }, { "epoch": 0.99, "learning_rate": 5.356499096061951e-07, "loss": 1.9873, "step": 4604 }, { "epoch": 0.99, "learning_rate": 5.131007103802432e-07, "loss": 2.0752, "step": 4605 }, { "epoch": 0.99, "learning_rate": 4.910362760688347e-07, "loss": 2.1221, "step": 4606 }, { "epoch": 0.99, "learning_rate": 4.6945661737340937e-07, "loss": 2.0537, "step": 4607 }, { "epoch": 0.99, "learning_rate": 4.4836174476037274e-07, "loss": 2.2676, "step": 4608 }, { "epoch": 0.99, "learning_rate": 4.277516684612071e-07, "loss": 2.2852, "step": 4609 }, { "epoch": 0.99, "learning_rate": 4.0762639847191643e-07, "loss": 2.2119, "step": 4610 }, { "epoch": 0.99, "learning_rate": 3.8798594455369265e-07, "loss": 2.1221, "step": 4611 }, { "epoch": 0.99, "learning_rate": 3.6883031623224927e-07, "loss": 2.1025, "step": 4612 }, { "epoch": 0.99, "learning_rate": 3.501595227984877e-07, "loss": 2.0645, "step": 4613 }, { "epoch": 0.99, "learning_rate": 3.319735733079421e-07, "loss": 2.0889, "step": 4614 }, { "epoch": 0.99, "learning_rate": 3.1427247658100124e-07, "loss": 2.0186, "step": 4615 }, { "epoch": 0.99, "learning_rate": 2.9705624120290876e-07, "loss": 2.1221, "step": 4616 }, { "epoch": 0.99, "learning_rate": 2.803248755238741e-07, "loss": 1.9814, "step": 4617 }, { "epoch": 0.99, "learning_rate": 2.640783876588504e-07, "loss": 2.1689, "step": 4618 }, { "epoch": 0.99, "learning_rate": 2.4831678548753456e-07, "loss": 2.2119, "step": 4619 }, { "epoch": 0.99, "learning_rate": 2.3304007665458927e-07, "loss": 2.2041, "step": 4620 }, { "epoch": 0.99, "learning_rate": 2.1824826856942092e-07, "loss": 2.2539, "step": 4621 }, { "epoch": 0.99, "learning_rate": 2.0394136840617972e-07, "loss": 2.0986, "step": 4622 }, { "epoch": 0.99, "learning_rate": 1.9011938310387056e-07, "loss": 2.0967, "step": 4623 }, { "epoch": 0.99, "learning_rate": 1.7678231936657518e-07, "loss": 2.082, "step": 4624 }, { "epoch": 0.99, "learning_rate": 1.63930183662786e-07, "loss": 2.0322, "step": 4625 }, { "epoch": 0.99, "learning_rate": 1.515629822259612e-07, "loss": 1.9717, "step": 4626 }, { "epoch": 0.99, "learning_rate": 1.3968072105441375e-07, "loss": 2.1797, "step": 4627 }, { "epoch": 0.99, "learning_rate": 1.2828340591120035e-07, "loss": 2.1572, "step": 4628 }, { "epoch": 1.0, "learning_rate": 1.1737104232412144e-07, "loss": 2.1426, "step": 4629 }, { "epoch": 1.0, "learning_rate": 1.0694363558594323e-07, "loss": 2.0801, "step": 4630 }, { "epoch": 1.0, "learning_rate": 9.700119075395363e-08, "loss": 2.0889, "step": 4631 }, { "epoch": 1.0, "learning_rate": 8.754371265040639e-08, "loss": 2.3115, "step": 4632 }, { "epoch": 1.0, "learning_rate": 7.857120586240996e-08, "loss": 1.9727, "step": 4633 }, { "epoch": 1.0, "learning_rate": 7.008367474170551e-08, "loss": 2.2646, "step": 4634 }, { "epoch": 1.0, "learning_rate": 6.208112340488902e-08, "loss": 2.0127, "step": 4635 }, { "epoch": 1.0, "learning_rate": 5.4563555733189163e-08, "loss": 2.1094, "step": 4636 }, { "epoch": 1.0, "learning_rate": 4.7530975372800375e-08, "loss": 2.1504, "step": 4637 }, { "epoch": 1.0, "learning_rate": 4.098338573466087e-08, "loss": 2.0293, "step": 4638 }, { "epoch": 1.0, "learning_rate": 3.492078999434156e-08, "loss": 2.0732, "step": 4639 }, { "epoch": 1.0, "learning_rate": 2.9343191092490173e-08, "loss": 2.1631, "step": 4640 }, { "epoch": 1.0, "learning_rate": 2.4250591734054083e-08, "loss": 2.1201, "step": 4641 }, { "epoch": 1.0, "learning_rate": 1.96429943891685e-08, "loss": 2.2236, "step": 4642 }, { "epoch": 1.0, "learning_rate": 1.552040129260135e-08, "loss": 1.9844, "step": 4643 }, { "epoch": 1.0, "learning_rate": 1.1882814443864298e-08, "loss": 2.2998, "step": 4644 }, { "epoch": 1.0, "learning_rate": 8.730235607101733e-09, "loss": 2.1611, "step": 4645 }, { "epoch": 1.0, "learning_rate": 6.062666311534848e-09, "loss": 2.1484, "step": 4646 }, { "epoch": 1.0, "learning_rate": 3.8801078507955115e-09, "loss": 2.1387, "step": 4647 }, { "epoch": 1.0, "learning_rate": 2.182561283592399e-09, "loss": 2.0029, "step": 4648 }, { "epoch": 1.0, "learning_rate": 9.700274331558844e-10, "loss": 2.0723, "step": 4649 }, { "epoch": 1.0, "learning_rate": 2.425068876821257e-10, "loss": 2.0986, "step": 4650 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 2.0771, "step": 4651 }, { "epoch": 1.0, "step": 4651, "total_flos": 550753257259008.0, "train_loss": 2.269884416657708, "train_runtime": 29841.694, "train_samples_per_second": 19.951, "train_steps_per_second": 0.156 } ], "max_steps": 4651, "num_train_epochs": 1, "total_flos": 550753257259008.0, "trial_name": null, "trial_params": null }