{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 9039, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.352941176470589e-08, "loss": 1.3555, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.4705882352941178e-07, "loss": 1.3203, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.2058823529411768e-07, "loss": 1.3379, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.9411764705882356e-07, "loss": 1.3828, "step": 4 }, { "epoch": 0.0, "learning_rate": 3.6764705882352943e-07, "loss": 1.3193, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.4117647058823536e-07, "loss": 1.2822, "step": 6 }, { "epoch": 0.0, "learning_rate": 5.147058823529412e-07, "loss": 1.3701, "step": 7 }, { "epoch": 0.0, "learning_rate": 5.882352941176471e-07, "loss": 1.3789, "step": 8 }, { "epoch": 0.0, "learning_rate": 6.61764705882353e-07, "loss": 1.3867, "step": 9 }, { "epoch": 0.0, "learning_rate": 7.352941176470589e-07, "loss": 1.2676, "step": 10 }, { "epoch": 0.0, "learning_rate": 8.088235294117648e-07, "loss": 1.25, "step": 11 }, { "epoch": 0.0, "learning_rate": 8.823529411764707e-07, "loss": 1.3047, "step": 12 }, { "epoch": 0.0, "learning_rate": 9.558823529411764e-07, "loss": 1.3145, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.0294117647058825e-06, "loss": 1.291, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.1029411764705884e-06, "loss": 1.292, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.1764705882352942e-06, "loss": 1.2891, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.25e-06, "loss": 1.2959, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.323529411764706e-06, "loss": 1.2422, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.3970588235294119e-06, "loss": 1.1416, "step": 19 }, { "epoch": 0.01, "learning_rate": 1.4705882352941177e-06, "loss": 1.1787, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.5441176470588238e-06, "loss": 1.2334, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.6176470588235297e-06, "loss": 1.1953, "step": 22 }, { "epoch": 0.01, "learning_rate": 1.6911764705882356e-06, "loss": 1.1333, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.7647058823529414e-06, "loss": 1.0879, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.8382352941176473e-06, "loss": 0.9995, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.9117647058823528e-06, "loss": 1.0942, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.985294117647059e-06, "loss": 0.9902, "step": 27 }, { "epoch": 0.01, "learning_rate": 2.058823529411765e-06, "loss": 1.02, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.132352941176471e-06, "loss": 1.1006, "step": 29 }, { "epoch": 0.01, "learning_rate": 2.2058823529411767e-06, "loss": 1.0933, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.2794117647058826e-06, "loss": 1.0161, "step": 31 }, { "epoch": 0.01, "learning_rate": 2.3529411764705885e-06, "loss": 1.0557, "step": 32 }, { "epoch": 0.01, "learning_rate": 2.4264705882352943e-06, "loss": 0.9512, "step": 33 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 0.9692, "step": 34 }, { "epoch": 0.01, "learning_rate": 2.5735294117647057e-06, "loss": 0.9048, "step": 35 }, { "epoch": 0.01, "learning_rate": 2.647058823529412e-06, "loss": 0.9534, "step": 36 }, { "epoch": 0.01, "learning_rate": 2.720588235294118e-06, "loss": 0.9536, "step": 37 }, { "epoch": 0.01, "learning_rate": 2.7941176470588237e-06, "loss": 0.9434, "step": 38 }, { "epoch": 0.01, "learning_rate": 2.8676470588235296e-06, "loss": 0.9917, "step": 39 }, { "epoch": 0.01, "learning_rate": 2.9411764705882355e-06, "loss": 0.8555, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.0147058823529413e-06, "loss": 1.0083, "step": 41 }, { "epoch": 0.01, "learning_rate": 3.0882352941176476e-06, "loss": 0.9946, "step": 42 }, { "epoch": 0.01, "learning_rate": 3.161764705882353e-06, "loss": 0.8613, "step": 43 }, { "epoch": 0.01, "learning_rate": 3.2352941176470594e-06, "loss": 0.895, "step": 44 }, { "epoch": 0.01, "learning_rate": 3.308823529411765e-06, "loss": 0.8682, "step": 45 }, { "epoch": 0.02, "learning_rate": 3.382352941176471e-06, "loss": 0.8232, "step": 46 }, { "epoch": 0.02, "learning_rate": 3.4558823529411766e-06, "loss": 0.9326, "step": 47 }, { "epoch": 0.02, "learning_rate": 3.529411764705883e-06, "loss": 0.9263, "step": 48 }, { "epoch": 0.02, "learning_rate": 3.6029411764705883e-06, "loss": 0.9482, "step": 49 }, { "epoch": 0.02, "learning_rate": 3.6764705882352946e-06, "loss": 0.9258, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.7500000000000005e-06, "loss": 0.8931, "step": 51 }, { "epoch": 0.02, "learning_rate": 3.8235294117647055e-06, "loss": 0.8, "step": 52 }, { "epoch": 0.02, "learning_rate": 3.897058823529412e-06, "loss": 0.9512, "step": 53 }, { "epoch": 0.02, "learning_rate": 3.970588235294118e-06, "loss": 0.9004, "step": 54 }, { "epoch": 0.02, "learning_rate": 4.044117647058824e-06, "loss": 0.978, "step": 55 }, { "epoch": 0.02, "learning_rate": 4.11764705882353e-06, "loss": 0.9316, "step": 56 }, { "epoch": 0.02, "learning_rate": 4.191176470588236e-06, "loss": 0.9575, "step": 57 }, { "epoch": 0.02, "learning_rate": 4.264705882352942e-06, "loss": 0.8643, "step": 58 }, { "epoch": 0.02, "learning_rate": 4.3382352941176475e-06, "loss": 0.9277, "step": 59 }, { "epoch": 0.02, "learning_rate": 4.411764705882353e-06, "loss": 0.9331, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.485294117647059e-06, "loss": 0.9102, "step": 61 }, { "epoch": 0.02, "learning_rate": 4.558823529411765e-06, "loss": 0.9741, "step": 62 }, { "epoch": 0.02, "learning_rate": 4.632352941176471e-06, "loss": 0.8672, "step": 63 }, { "epoch": 0.02, "learning_rate": 4.705882352941177e-06, "loss": 0.9077, "step": 64 }, { "epoch": 0.02, "learning_rate": 4.779411764705883e-06, "loss": 0.9185, "step": 65 }, { "epoch": 0.02, "learning_rate": 4.852941176470589e-06, "loss": 0.9365, "step": 66 }, { "epoch": 0.02, "learning_rate": 4.9264705882352945e-06, "loss": 0.9956, "step": 67 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 0.855, "step": 68 }, { "epoch": 0.02, "learning_rate": 5.073529411764706e-06, "loss": 0.834, "step": 69 }, { "epoch": 0.02, "learning_rate": 5.147058823529411e-06, "loss": 0.9028, "step": 70 }, { "epoch": 0.02, "learning_rate": 5.220588235294118e-06, "loss": 0.9404, "step": 71 }, { "epoch": 0.02, "learning_rate": 5.294117647058824e-06, "loss": 0.8613, "step": 72 }, { "epoch": 0.02, "learning_rate": 5.36764705882353e-06, "loss": 0.9204, "step": 73 }, { "epoch": 0.02, "learning_rate": 5.441176470588236e-06, "loss": 0.8643, "step": 74 }, { "epoch": 0.02, "learning_rate": 5.514705882352942e-06, "loss": 0.9653, "step": 75 }, { "epoch": 0.03, "learning_rate": 5.588235294117647e-06, "loss": 0.8555, "step": 76 }, { "epoch": 0.03, "learning_rate": 5.661764705882353e-06, "loss": 0.875, "step": 77 }, { "epoch": 0.03, "learning_rate": 5.735294117647059e-06, "loss": 0.8501, "step": 78 }, { "epoch": 0.03, "learning_rate": 5.808823529411766e-06, "loss": 0.9238, "step": 79 }, { "epoch": 0.03, "learning_rate": 5.882352941176471e-06, "loss": 0.896, "step": 80 }, { "epoch": 0.03, "learning_rate": 5.955882352941177e-06, "loss": 0.8984, "step": 81 }, { "epoch": 0.03, "learning_rate": 6.029411764705883e-06, "loss": 0.9141, "step": 82 }, { "epoch": 0.03, "learning_rate": 6.102941176470589e-06, "loss": 0.832, "step": 83 }, { "epoch": 0.03, "learning_rate": 6.176470588235295e-06, "loss": 0.8818, "step": 84 }, { "epoch": 0.03, "learning_rate": 6.25e-06, "loss": 0.8784, "step": 85 }, { "epoch": 0.03, "learning_rate": 6.323529411764706e-06, "loss": 0.9097, "step": 86 }, { "epoch": 0.03, "learning_rate": 6.397058823529412e-06, "loss": 0.9102, "step": 87 }, { "epoch": 0.03, "learning_rate": 6.470588235294119e-06, "loss": 0.7819, "step": 88 }, { "epoch": 0.03, "learning_rate": 6.544117647058824e-06, "loss": 0.7759, "step": 89 }, { "epoch": 0.03, "learning_rate": 6.61764705882353e-06, "loss": 0.8931, "step": 90 }, { "epoch": 0.03, "learning_rate": 6.6911764705882356e-06, "loss": 0.8228, "step": 91 }, { "epoch": 0.03, "learning_rate": 6.764705882352942e-06, "loss": 0.8701, "step": 92 }, { "epoch": 0.03, "learning_rate": 6.838235294117648e-06, "loss": 0.9067, "step": 93 }, { "epoch": 0.03, "learning_rate": 6.911764705882353e-06, "loss": 0.8184, "step": 94 }, { "epoch": 0.03, "learning_rate": 6.985294117647059e-06, "loss": 0.8892, "step": 95 }, { "epoch": 0.03, "learning_rate": 7.058823529411766e-06, "loss": 0.877, "step": 96 }, { "epoch": 0.03, "learning_rate": 7.132352941176472e-06, "loss": 0.9224, "step": 97 }, { "epoch": 0.03, "learning_rate": 7.205882352941177e-06, "loss": 0.916, "step": 98 }, { "epoch": 0.03, "learning_rate": 7.2794117647058826e-06, "loss": 0.8696, "step": 99 }, { "epoch": 0.03, "learning_rate": 7.352941176470589e-06, "loss": 0.8843, "step": 100 }, { "epoch": 0.03, "learning_rate": 7.426470588235295e-06, "loss": 0.9185, "step": 101 }, { "epoch": 0.03, "learning_rate": 7.500000000000001e-06, "loss": 0.8613, "step": 102 }, { "epoch": 0.03, "learning_rate": 7.573529411764706e-06, "loss": 0.8423, "step": 103 }, { "epoch": 0.03, "learning_rate": 7.647058823529411e-06, "loss": 0.8442, "step": 104 }, { "epoch": 0.03, "learning_rate": 7.720588235294119e-06, "loss": 0.8818, "step": 105 }, { "epoch": 0.04, "learning_rate": 7.794117647058825e-06, "loss": 0.8584, "step": 106 }, { "epoch": 0.04, "learning_rate": 7.86764705882353e-06, "loss": 0.906, "step": 107 }, { "epoch": 0.04, "learning_rate": 7.941176470588236e-06, "loss": 0.9346, "step": 108 }, { "epoch": 0.04, "learning_rate": 8.014705882352942e-06, "loss": 0.9087, "step": 109 }, { "epoch": 0.04, "learning_rate": 8.088235294117648e-06, "loss": 0.8843, "step": 110 }, { "epoch": 0.04, "learning_rate": 8.161764705882354e-06, "loss": 0.9482, "step": 111 }, { "epoch": 0.04, "learning_rate": 8.23529411764706e-06, "loss": 0.9048, "step": 112 }, { "epoch": 0.04, "learning_rate": 8.308823529411766e-06, "loss": 0.9492, "step": 113 }, { "epoch": 0.04, "learning_rate": 8.382352941176472e-06, "loss": 0.8418, "step": 114 }, { "epoch": 0.04, "learning_rate": 8.455882352941177e-06, "loss": 0.9146, "step": 115 }, { "epoch": 0.04, "learning_rate": 8.529411764705883e-06, "loss": 0.6845, "step": 116 }, { "epoch": 0.04, "learning_rate": 8.60294117647059e-06, "loss": 0.854, "step": 117 }, { "epoch": 0.04, "learning_rate": 8.676470588235295e-06, "loss": 0.9663, "step": 118 }, { "epoch": 0.04, "learning_rate": 8.750000000000001e-06, "loss": 0.874, "step": 119 }, { "epoch": 0.04, "learning_rate": 8.823529411764707e-06, "loss": 0.9561, "step": 120 }, { "epoch": 0.04, "learning_rate": 8.897058823529413e-06, "loss": 0.8701, "step": 121 }, { "epoch": 0.04, "learning_rate": 8.970588235294119e-06, "loss": 0.9019, "step": 122 }, { "epoch": 0.04, "learning_rate": 9.044117647058824e-06, "loss": 0.7856, "step": 123 }, { "epoch": 0.04, "learning_rate": 9.11764705882353e-06, "loss": 0.9106, "step": 124 }, { "epoch": 0.04, "learning_rate": 9.191176470588236e-06, "loss": 0.8711, "step": 125 }, { "epoch": 0.04, "learning_rate": 9.264705882352942e-06, "loss": 0.877, "step": 126 }, { "epoch": 0.04, "learning_rate": 9.338235294117648e-06, "loss": 0.8389, "step": 127 }, { "epoch": 0.04, "learning_rate": 9.411764705882354e-06, "loss": 0.8452, "step": 128 }, { "epoch": 0.04, "learning_rate": 9.48529411764706e-06, "loss": 0.9058, "step": 129 }, { "epoch": 0.04, "learning_rate": 9.558823529411766e-06, "loss": 0.8413, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.632352941176471e-06, "loss": 0.771, "step": 131 }, { "epoch": 0.04, "learning_rate": 9.705882352941177e-06, "loss": 0.8394, "step": 132 }, { "epoch": 0.04, "learning_rate": 9.779411764705883e-06, "loss": 0.8604, "step": 133 }, { "epoch": 0.04, "learning_rate": 9.852941176470589e-06, "loss": 0.8838, "step": 134 }, { "epoch": 0.04, "learning_rate": 9.926470588235295e-06, "loss": 0.8511, "step": 135 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 0.8589, "step": 136 }, { "epoch": 0.05, "learning_rate": 1.0073529411764707e-05, "loss": 0.8506, "step": 137 }, { "epoch": 0.05, "learning_rate": 1.0147058823529413e-05, "loss": 0.8003, "step": 138 }, { "epoch": 0.05, "learning_rate": 1.0220588235294118e-05, "loss": 0.9028, "step": 139 }, { "epoch": 0.05, "learning_rate": 1.0294117647058823e-05, "loss": 0.8394, "step": 140 }, { "epoch": 0.05, "learning_rate": 1.0367647058823532e-05, "loss": 0.9243, "step": 141 }, { "epoch": 0.05, "learning_rate": 1.0441176470588236e-05, "loss": 0.8848, "step": 142 }, { "epoch": 0.05, "learning_rate": 1.0514705882352942e-05, "loss": 0.7837, "step": 143 }, { "epoch": 0.05, "learning_rate": 1.0588235294117648e-05, "loss": 0.917, "step": 144 }, { "epoch": 0.05, "learning_rate": 1.0661764705882354e-05, "loss": 0.8408, "step": 145 }, { "epoch": 0.05, "learning_rate": 1.073529411764706e-05, "loss": 0.8906, "step": 146 }, { "epoch": 0.05, "learning_rate": 1.0808823529411765e-05, "loss": 0.8916, "step": 147 }, { "epoch": 0.05, "learning_rate": 1.0882352941176471e-05, "loss": 0.8472, "step": 148 }, { "epoch": 0.05, "learning_rate": 1.0955882352941179e-05, "loss": 0.8423, "step": 149 }, { "epoch": 0.05, "learning_rate": 1.1029411764705885e-05, "loss": 0.8472, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.1102941176470589e-05, "loss": 0.8311, "step": 151 }, { "epoch": 0.05, "learning_rate": 1.1176470588235295e-05, "loss": 0.7622, "step": 152 }, { "epoch": 0.05, "learning_rate": 1.125e-05, "loss": 0.853, "step": 153 }, { "epoch": 0.05, "learning_rate": 1.1323529411764707e-05, "loss": 0.8589, "step": 154 }, { "epoch": 0.05, "learning_rate": 1.1397058823529412e-05, "loss": 0.8979, "step": 155 }, { "epoch": 0.05, "learning_rate": 1.1470588235294118e-05, "loss": 0.8726, "step": 156 }, { "epoch": 0.05, "learning_rate": 1.1544117647058824e-05, "loss": 0.8418, "step": 157 }, { "epoch": 0.05, "learning_rate": 1.1617647058823532e-05, "loss": 0.8521, "step": 158 }, { "epoch": 0.05, "learning_rate": 1.1691176470588238e-05, "loss": 0.8989, "step": 159 }, { "epoch": 0.05, "learning_rate": 1.1764705882352942e-05, "loss": 0.8984, "step": 160 }, { "epoch": 0.05, "learning_rate": 1.1838235294117648e-05, "loss": 0.8545, "step": 161 }, { "epoch": 0.05, "learning_rate": 1.1911764705882354e-05, "loss": 0.8843, "step": 162 }, { "epoch": 0.05, "learning_rate": 1.198529411764706e-05, "loss": 0.8799, "step": 163 }, { "epoch": 0.05, "learning_rate": 1.2058823529411765e-05, "loss": 0.8745, "step": 164 }, { "epoch": 0.05, "learning_rate": 1.2132352941176471e-05, "loss": 0.8794, "step": 165 }, { "epoch": 0.06, "learning_rate": 1.2205882352941179e-05, "loss": 0.8608, "step": 166 }, { "epoch": 0.06, "learning_rate": 1.2279411764705885e-05, "loss": 0.8379, "step": 167 }, { "epoch": 0.06, "learning_rate": 1.235294117647059e-05, "loss": 0.8308, "step": 168 }, { "epoch": 0.06, "learning_rate": 1.2426470588235295e-05, "loss": 0.853, "step": 169 }, { "epoch": 0.06, "learning_rate": 1.25e-05, "loss": 0.876, "step": 170 }, { "epoch": 0.06, "learning_rate": 1.2573529411764706e-05, "loss": 0.8032, "step": 171 }, { "epoch": 0.06, "learning_rate": 1.2647058823529412e-05, "loss": 0.8862, "step": 172 }, { "epoch": 0.06, "learning_rate": 1.2720588235294118e-05, "loss": 0.8423, "step": 173 }, { "epoch": 0.06, "learning_rate": 1.2794117647058824e-05, "loss": 0.958, "step": 174 }, { "epoch": 0.06, "learning_rate": 1.2867647058823532e-05, "loss": 0.8691, "step": 175 }, { "epoch": 0.06, "learning_rate": 1.2941176470588238e-05, "loss": 0.8638, "step": 176 }, { "epoch": 0.06, "learning_rate": 1.3014705882352943e-05, "loss": 0.8215, "step": 177 }, { "epoch": 0.06, "learning_rate": 1.3088235294117648e-05, "loss": 0.8633, "step": 178 }, { "epoch": 0.06, "learning_rate": 1.3161764705882353e-05, "loss": 0.8643, "step": 179 }, { "epoch": 0.06, "learning_rate": 1.323529411764706e-05, "loss": 0.9399, "step": 180 }, { "epoch": 0.06, "learning_rate": 1.3308823529411765e-05, "loss": 0.8286, "step": 181 }, { "epoch": 0.06, "learning_rate": 1.3382352941176471e-05, "loss": 0.8818, "step": 182 }, { "epoch": 0.06, "learning_rate": 1.3455882352941179e-05, "loss": 0.8276, "step": 183 }, { "epoch": 0.06, "learning_rate": 1.3529411764705885e-05, "loss": 0.9263, "step": 184 }, { "epoch": 0.06, "learning_rate": 1.360294117647059e-05, "loss": 0.8599, "step": 185 }, { "epoch": 0.06, "learning_rate": 1.3676470588235296e-05, "loss": 0.9087, "step": 186 }, { "epoch": 0.06, "learning_rate": 1.375e-05, "loss": 0.8496, "step": 187 }, { "epoch": 0.06, "learning_rate": 1.3823529411764706e-05, "loss": 0.7974, "step": 188 }, { "epoch": 0.06, "learning_rate": 1.3897058823529412e-05, "loss": 0.8633, "step": 189 }, { "epoch": 0.06, "learning_rate": 1.3970588235294118e-05, "loss": 0.7773, "step": 190 }, { "epoch": 0.06, "learning_rate": 1.4044117647058824e-05, "loss": 0.8711, "step": 191 }, { "epoch": 0.06, "learning_rate": 1.4117647058823532e-05, "loss": 0.8813, "step": 192 }, { "epoch": 0.06, "learning_rate": 1.4191176470588237e-05, "loss": 0.832, "step": 193 }, { "epoch": 0.06, "learning_rate": 1.4264705882352943e-05, "loss": 0.9038, "step": 194 }, { "epoch": 0.06, "learning_rate": 1.433823529411765e-05, "loss": 0.8481, "step": 195 }, { "epoch": 0.07, "learning_rate": 1.4411764705882353e-05, "loss": 0.8647, "step": 196 }, { "epoch": 0.07, "learning_rate": 1.448529411764706e-05, "loss": 0.8057, "step": 197 }, { "epoch": 0.07, "learning_rate": 1.4558823529411765e-05, "loss": 0.7849, "step": 198 }, { "epoch": 0.07, "learning_rate": 1.4632352941176471e-05, "loss": 0.8442, "step": 199 }, { "epoch": 0.07, "learning_rate": 1.4705882352941179e-05, "loss": 0.8545, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.4779411764705884e-05, "loss": 0.9453, "step": 201 }, { "epoch": 0.07, "learning_rate": 1.485294117647059e-05, "loss": 0.8813, "step": 202 }, { "epoch": 0.07, "learning_rate": 1.4926470588235296e-05, "loss": 0.875, "step": 203 }, { "epoch": 0.07, "learning_rate": 1.5000000000000002e-05, "loss": 0.8784, "step": 204 }, { "epoch": 0.07, "learning_rate": 1.5073529411764706e-05, "loss": 0.9292, "step": 205 }, { "epoch": 0.07, "learning_rate": 1.5147058823529412e-05, "loss": 0.9321, "step": 206 }, { "epoch": 0.07, "learning_rate": 1.5220588235294118e-05, "loss": 0.8335, "step": 207 }, { "epoch": 0.07, "learning_rate": 1.5294117647058822e-05, "loss": 0.8003, "step": 208 }, { "epoch": 0.07, "learning_rate": 1.536764705882353e-05, "loss": 0.7678, "step": 209 }, { "epoch": 0.07, "learning_rate": 1.5441176470588237e-05, "loss": 0.9473, "step": 210 }, { "epoch": 0.07, "learning_rate": 1.5514705882352943e-05, "loss": 0.895, "step": 211 }, { "epoch": 0.07, "learning_rate": 1.558823529411765e-05, "loss": 0.8535, "step": 212 }, { "epoch": 0.07, "learning_rate": 1.5661764705882355e-05, "loss": 0.9004, "step": 213 }, { "epoch": 0.07, "learning_rate": 1.573529411764706e-05, "loss": 0.7864, "step": 214 }, { "epoch": 0.07, "learning_rate": 1.5808823529411767e-05, "loss": 0.8281, "step": 215 }, { "epoch": 0.07, "learning_rate": 1.5882352941176473e-05, "loss": 0.8794, "step": 216 }, { "epoch": 0.07, "learning_rate": 1.595588235294118e-05, "loss": 0.8799, "step": 217 }, { "epoch": 0.07, "learning_rate": 1.6029411764705884e-05, "loss": 0.8594, "step": 218 }, { "epoch": 0.07, "learning_rate": 1.610294117647059e-05, "loss": 0.8931, "step": 219 }, { "epoch": 0.07, "learning_rate": 1.6176470588235296e-05, "loss": 0.8247, "step": 220 }, { "epoch": 0.07, "learning_rate": 1.6250000000000002e-05, "loss": 0.8936, "step": 221 }, { "epoch": 0.07, "learning_rate": 1.6323529411764708e-05, "loss": 0.8643, "step": 222 }, { "epoch": 0.07, "learning_rate": 1.6397058823529414e-05, "loss": 0.8772, "step": 223 }, { "epoch": 0.07, "learning_rate": 1.647058823529412e-05, "loss": 0.8252, "step": 224 }, { "epoch": 0.07, "learning_rate": 1.6544117647058825e-05, "loss": 0.8325, "step": 225 }, { "epoch": 0.08, "learning_rate": 1.661764705882353e-05, "loss": 0.9131, "step": 226 }, { "epoch": 0.08, "learning_rate": 1.6691176470588237e-05, "loss": 0.8545, "step": 227 }, { "epoch": 0.08, "learning_rate": 1.6764705882352943e-05, "loss": 0.9204, "step": 228 }, { "epoch": 0.08, "learning_rate": 1.683823529411765e-05, "loss": 0.853, "step": 229 }, { "epoch": 0.08, "learning_rate": 1.6911764705882355e-05, "loss": 0.8677, "step": 230 }, { "epoch": 0.08, "learning_rate": 1.698529411764706e-05, "loss": 0.8652, "step": 231 }, { "epoch": 0.08, "learning_rate": 1.7058823529411767e-05, "loss": 0.8486, "step": 232 }, { "epoch": 0.08, "learning_rate": 1.7132352941176472e-05, "loss": 0.9053, "step": 233 }, { "epoch": 0.08, "learning_rate": 1.720588235294118e-05, "loss": 0.8271, "step": 234 }, { "epoch": 0.08, "learning_rate": 1.7279411764705884e-05, "loss": 0.8647, "step": 235 }, { "epoch": 0.08, "learning_rate": 1.735294117647059e-05, "loss": 0.876, "step": 236 }, { "epoch": 0.08, "learning_rate": 1.7426470588235296e-05, "loss": 0.8484, "step": 237 }, { "epoch": 0.08, "learning_rate": 1.7500000000000002e-05, "loss": 0.915, "step": 238 }, { "epoch": 0.08, "learning_rate": 1.7573529411764708e-05, "loss": 0.7986, "step": 239 }, { "epoch": 0.08, "learning_rate": 1.7647058823529414e-05, "loss": 0.7778, "step": 240 }, { "epoch": 0.08, "learning_rate": 1.772058823529412e-05, "loss": 0.8403, "step": 241 }, { "epoch": 0.08, "learning_rate": 1.7794117647058825e-05, "loss": 0.8403, "step": 242 }, { "epoch": 0.08, "learning_rate": 1.786764705882353e-05, "loss": 0.8188, "step": 243 }, { "epoch": 0.08, "learning_rate": 1.7941176470588237e-05, "loss": 0.9009, "step": 244 }, { "epoch": 0.08, "learning_rate": 1.8014705882352943e-05, "loss": 0.9126, "step": 245 }, { "epoch": 0.08, "learning_rate": 1.808823529411765e-05, "loss": 0.8911, "step": 246 }, { "epoch": 0.08, "learning_rate": 1.8161764705882355e-05, "loss": 0.8799, "step": 247 }, { "epoch": 0.08, "learning_rate": 1.823529411764706e-05, "loss": 0.8452, "step": 248 }, { "epoch": 0.08, "learning_rate": 1.8308823529411766e-05, "loss": 0.8623, "step": 249 }, { "epoch": 0.08, "learning_rate": 1.8382352941176472e-05, "loss": 0.9185, "step": 250 }, { "epoch": 0.08, "learning_rate": 1.8455882352941178e-05, "loss": 0.814, "step": 251 }, { "epoch": 0.08, "learning_rate": 1.8529411764705884e-05, "loss": 0.9487, "step": 252 }, { "epoch": 0.08, "learning_rate": 1.860294117647059e-05, "loss": 0.8662, "step": 253 }, { "epoch": 0.08, "learning_rate": 1.8676470588235296e-05, "loss": 0.782, "step": 254 }, { "epoch": 0.08, "learning_rate": 1.8750000000000002e-05, "loss": 0.8467, "step": 255 }, { "epoch": 0.08, "learning_rate": 1.8823529411764708e-05, "loss": 0.8223, "step": 256 }, { "epoch": 0.09, "learning_rate": 1.8897058823529413e-05, "loss": 0.8599, "step": 257 }, { "epoch": 0.09, "learning_rate": 1.897058823529412e-05, "loss": 0.8809, "step": 258 }, { "epoch": 0.09, "learning_rate": 1.9044117647058825e-05, "loss": 0.825, "step": 259 }, { "epoch": 0.09, "learning_rate": 1.911764705882353e-05, "loss": 0.8945, "step": 260 }, { "epoch": 0.09, "learning_rate": 1.9191176470588237e-05, "loss": 0.8691, "step": 261 }, { "epoch": 0.09, "learning_rate": 1.9264705882352943e-05, "loss": 0.7937, "step": 262 }, { "epoch": 0.09, "learning_rate": 1.933823529411765e-05, "loss": 0.854, "step": 263 }, { "epoch": 0.09, "learning_rate": 1.9411764705882355e-05, "loss": 0.8682, "step": 264 }, { "epoch": 0.09, "learning_rate": 1.948529411764706e-05, "loss": 0.8398, "step": 265 }, { "epoch": 0.09, "learning_rate": 1.9558823529411766e-05, "loss": 0.8296, "step": 266 }, { "epoch": 0.09, "learning_rate": 1.9632352941176472e-05, "loss": 0.8545, "step": 267 }, { "epoch": 0.09, "learning_rate": 1.9705882352941178e-05, "loss": 0.8623, "step": 268 }, { "epoch": 0.09, "learning_rate": 1.9779411764705884e-05, "loss": 0.7583, "step": 269 }, { "epoch": 0.09, "learning_rate": 1.985294117647059e-05, "loss": 0.856, "step": 270 }, { "epoch": 0.09, "learning_rate": 1.9926470588235296e-05, "loss": 0.8647, "step": 271 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 0.835, "step": 272 }, { "epoch": 0.09, "learning_rate": 1.9999999357951653e-05, "loss": 0.8384, "step": 273 }, { "epoch": 0.09, "learning_rate": 1.9999997431806697e-05, "loss": 0.8647, "step": 274 }, { "epoch": 0.09, "learning_rate": 1.9999994221565376e-05, "loss": 0.8931, "step": 275 }, { "epoch": 0.09, "learning_rate": 1.9999989727228104e-05, "loss": 0.8872, "step": 276 }, { "epoch": 0.09, "learning_rate": 1.9999983948795457e-05, "loss": 0.8818, "step": 277 }, { "epoch": 0.09, "learning_rate": 1.9999976886268177e-05, "loss": 0.8481, "step": 278 }, { "epoch": 0.09, "learning_rate": 1.9999968539647175e-05, "loss": 0.8159, "step": 279 }, { "epoch": 0.09, "learning_rate": 1.9999958908933515e-05, "loss": 0.8916, "step": 280 }, { "epoch": 0.09, "learning_rate": 1.999994799412844e-05, "loss": 0.8975, "step": 281 }, { "epoch": 0.09, "learning_rate": 1.9999935795233353e-05, "loss": 0.7998, "step": 282 }, { "epoch": 0.09, "learning_rate": 1.9999922312249814e-05, "loss": 0.8682, "step": 283 }, { "epoch": 0.09, "learning_rate": 1.9999907545179557e-05, "loss": 0.9102, "step": 284 }, { "epoch": 0.09, "learning_rate": 1.999989149402448e-05, "loss": 0.8604, "step": 285 }, { "epoch": 0.09, "learning_rate": 1.999987415878664e-05, "loss": 0.8145, "step": 286 }, { "epoch": 0.1, "learning_rate": 1.999985553946827e-05, "loss": 0.917, "step": 287 }, { "epoch": 0.1, "learning_rate": 1.9999835636071755e-05, "loss": 0.8672, "step": 288 }, { "epoch": 0.1, "learning_rate": 1.999981444859965e-05, "loss": 0.8677, "step": 289 }, { "epoch": 0.1, "learning_rate": 1.9999791977054683e-05, "loss": 0.8882, "step": 290 }, { "epoch": 0.1, "learning_rate": 1.9999768221439732e-05, "loss": 0.8604, "step": 291 }, { "epoch": 0.1, "learning_rate": 1.999974318175785e-05, "loss": 0.8403, "step": 292 }, { "epoch": 0.1, "learning_rate": 1.9999716858012254e-05, "loss": 0.8687, "step": 293 }, { "epoch": 0.1, "learning_rate": 1.9999689250206324e-05, "loss": 0.8481, "step": 294 }, { "epoch": 0.1, "learning_rate": 1.9999660358343602e-05, "loss": 0.896, "step": 295 }, { "epoch": 0.1, "learning_rate": 1.99996301824278e-05, "loss": 0.8623, "step": 296 }, { "epoch": 0.1, "learning_rate": 1.9999598722462792e-05, "loss": 0.8438, "step": 297 }, { "epoch": 0.1, "learning_rate": 1.999956597845262e-05, "loss": 0.8193, "step": 298 }, { "epoch": 0.1, "learning_rate": 1.9999531950401486e-05, "loss": 0.8647, "step": 299 }, { "epoch": 0.1, "learning_rate": 1.9999496638313763e-05, "loss": 0.8555, "step": 300 }, { "epoch": 0.1, "learning_rate": 1.9999460042193982e-05, "loss": 0.7656, "step": 301 }, { "epoch": 0.1, "learning_rate": 1.9999422162046843e-05, "loss": 0.854, "step": 302 }, { "epoch": 0.1, "learning_rate": 1.9999382997877207e-05, "loss": 0.7715, "step": 303 }, { "epoch": 0.1, "learning_rate": 1.9999342549690113e-05, "loss": 0.894, "step": 304 }, { "epoch": 0.1, "learning_rate": 1.9999300817490745e-05, "loss": 0.8447, "step": 305 }, { "epoch": 0.1, "learning_rate": 1.9999257801284467e-05, "loss": 0.8291, "step": 306 }, { "epoch": 0.1, "learning_rate": 1.99992135010768e-05, "loss": 0.7705, "step": 307 }, { "epoch": 0.1, "learning_rate": 1.9999167916873436e-05, "loss": 0.8472, "step": 308 }, { "epoch": 0.1, "learning_rate": 1.9999121048680224e-05, "loss": 0.8193, "step": 309 }, { "epoch": 0.1, "learning_rate": 1.9999072896503185e-05, "loss": 0.7937, "step": 310 }, { "epoch": 0.1, "learning_rate": 1.99990234603485e-05, "loss": 0.7953, "step": 311 }, { "epoch": 0.1, "learning_rate": 1.9998972740222522e-05, "loss": 0.8608, "step": 312 }, { "epoch": 0.1, "learning_rate": 1.999892073613176e-05, "loss": 0.8853, "step": 313 }, { "epoch": 0.1, "learning_rate": 1.999886744808289e-05, "loss": 0.8501, "step": 314 }, { "epoch": 0.1, "learning_rate": 1.999881287608276e-05, "loss": 0.8232, "step": 315 }, { "epoch": 0.1, "learning_rate": 1.9998757020138372e-05, "loss": 0.873, "step": 316 }, { "epoch": 0.11, "learning_rate": 1.9998699880256906e-05, "loss": 0.873, "step": 317 }, { "epoch": 0.11, "learning_rate": 1.999864145644569e-05, "loss": 0.9111, "step": 318 }, { "epoch": 0.11, "learning_rate": 1.9998581748712232e-05, "loss": 0.8467, "step": 319 }, { "epoch": 0.11, "learning_rate": 1.99985207570642e-05, "loss": 0.855, "step": 320 }, { "epoch": 0.11, "learning_rate": 1.999845848150942e-05, "loss": 0.9087, "step": 321 }, { "epoch": 0.11, "learning_rate": 1.9998394922055897e-05, "loss": 0.8794, "step": 322 }, { "epoch": 0.11, "learning_rate": 1.9998330078711787e-05, "loss": 0.8501, "step": 323 }, { "epoch": 0.11, "learning_rate": 1.9998263951485418e-05, "loss": 0.9004, "step": 324 }, { "epoch": 0.11, "learning_rate": 1.999819654038528e-05, "loss": 0.8813, "step": 325 }, { "epoch": 0.11, "learning_rate": 1.9998127845420025e-05, "loss": 0.8557, "step": 326 }, { "epoch": 0.11, "learning_rate": 1.9998057866598487e-05, "loss": 0.8672, "step": 327 }, { "epoch": 0.11, "learning_rate": 1.9997986603929642e-05, "loss": 0.894, "step": 328 }, { "epoch": 0.11, "learning_rate": 1.999791405742264e-05, "loss": 0.8169, "step": 329 }, { "epoch": 0.11, "learning_rate": 1.9997840227086805e-05, "loss": 0.9375, "step": 330 }, { "epoch": 0.11, "learning_rate": 1.999776511293161e-05, "loss": 0.8706, "step": 331 }, { "epoch": 0.11, "learning_rate": 1.99976887149667e-05, "loss": 0.8569, "step": 332 }, { "epoch": 0.11, "learning_rate": 1.9997611033201892e-05, "loss": 0.8115, "step": 333 }, { "epoch": 0.11, "learning_rate": 1.999753206764716e-05, "loss": 0.876, "step": 334 }, { "epoch": 0.11, "learning_rate": 1.9997451818312634e-05, "loss": 0.8228, "step": 335 }, { "epoch": 0.11, "learning_rate": 1.9997370285208628e-05, "loss": 0.8345, "step": 336 }, { "epoch": 0.11, "learning_rate": 1.999728746834561e-05, "loss": 0.9058, "step": 337 }, { "epoch": 0.11, "learning_rate": 1.9997203367734218e-05, "loss": 0.8086, "step": 338 }, { "epoch": 0.11, "learning_rate": 1.999711798338524e-05, "loss": 0.9106, "step": 339 }, { "epoch": 0.11, "learning_rate": 1.999703131530965e-05, "loss": 0.873, "step": 340 }, { "epoch": 0.11, "learning_rate": 1.9996943363518578e-05, "loss": 0.8276, "step": 341 }, { "epoch": 0.11, "learning_rate": 1.999685412802331e-05, "loss": 0.8442, "step": 342 }, { "epoch": 0.11, "learning_rate": 1.9996763608835312e-05, "loss": 0.8667, "step": 343 }, { "epoch": 0.11, "learning_rate": 1.99966718059662e-05, "loss": 0.9014, "step": 344 }, { "epoch": 0.11, "learning_rate": 1.9996578719427773e-05, "loss": 0.9067, "step": 345 }, { "epoch": 0.11, "learning_rate": 1.9996484349231976e-05, "loss": 0.9561, "step": 346 }, { "epoch": 0.12, "learning_rate": 1.999638869539093e-05, "loss": 0.8418, "step": 347 }, { "epoch": 0.12, "learning_rate": 1.999629175791691e-05, "loss": 0.8652, "step": 348 }, { "epoch": 0.12, "learning_rate": 1.9996193536822378e-05, "loss": 0.835, "step": 349 }, { "epoch": 0.12, "learning_rate": 1.999609403211994e-05, "loss": 0.9165, "step": 350 }, { "epoch": 0.12, "learning_rate": 1.999599324382237e-05, "loss": 0.8149, "step": 351 }, { "epoch": 0.12, "learning_rate": 1.9995891171942614e-05, "loss": 0.8359, "step": 352 }, { "epoch": 0.12, "learning_rate": 1.9995787816493778e-05, "loss": 0.8904, "step": 353 }, { "epoch": 0.12, "learning_rate": 1.9995683177489133e-05, "loss": 0.877, "step": 354 }, { "epoch": 0.12, "learning_rate": 1.9995577254942117e-05, "loss": 0.8335, "step": 355 }, { "epoch": 0.12, "learning_rate": 1.9995470048866327e-05, "loss": 0.8291, "step": 356 }, { "epoch": 0.12, "learning_rate": 1.9995361559275538e-05, "loss": 0.8125, "step": 357 }, { "epoch": 0.12, "learning_rate": 1.9995251786183677e-05, "loss": 0.8823, "step": 358 }, { "epoch": 0.12, "learning_rate": 1.9995140729604837e-05, "loss": 0.9009, "step": 359 }, { "epoch": 0.12, "learning_rate": 1.999502838955328e-05, "loss": 0.8047, "step": 360 }, { "epoch": 0.12, "learning_rate": 1.999491476604343e-05, "loss": 0.8779, "step": 361 }, { "epoch": 0.12, "learning_rate": 1.9994799859089886e-05, "loss": 0.8379, "step": 362 }, { "epoch": 0.12, "learning_rate": 1.9994683668707396e-05, "loss": 0.8306, "step": 363 }, { "epoch": 0.12, "learning_rate": 1.9994566194910877e-05, "loss": 0.8125, "step": 364 }, { "epoch": 0.12, "learning_rate": 1.999444743771542e-05, "loss": 0.9028, "step": 365 }, { "epoch": 0.12, "learning_rate": 1.9994327397136276e-05, "loss": 0.8691, "step": 366 }, { "epoch": 0.12, "learning_rate": 1.999420607318885e-05, "loss": 0.854, "step": 367 }, { "epoch": 0.12, "learning_rate": 1.9994083465888727e-05, "loss": 0.8643, "step": 368 }, { "epoch": 0.12, "learning_rate": 1.9993959575251655e-05, "loss": 0.769, "step": 369 }, { "epoch": 0.12, "learning_rate": 1.9993834401293536e-05, "loss": 0.8696, "step": 370 }, { "epoch": 0.12, "learning_rate": 1.9993707944030448e-05, "loss": 0.8662, "step": 371 }, { "epoch": 0.12, "learning_rate": 1.9993580203478625e-05, "loss": 0.8921, "step": 372 }, { "epoch": 0.12, "learning_rate": 1.9993451179654472e-05, "loss": 0.8965, "step": 373 }, { "epoch": 0.12, "learning_rate": 1.999332087257456e-05, "loss": 0.8774, "step": 374 }, { "epoch": 0.12, "learning_rate": 1.9993189282255617e-05, "loss": 0.8652, "step": 375 }, { "epoch": 0.12, "learning_rate": 1.9993056408714545e-05, "loss": 0.8872, "step": 376 }, { "epoch": 0.13, "learning_rate": 1.9992922251968405e-05, "loss": 0.7944, "step": 377 }, { "epoch": 0.13, "learning_rate": 1.9992786812034418e-05, "loss": 0.8374, "step": 378 }, { "epoch": 0.13, "learning_rate": 1.9992650088929985e-05, "loss": 0.8467, "step": 379 }, { "epoch": 0.13, "learning_rate": 1.9992512082672657e-05, "loss": 0.8892, "step": 380 }, { "epoch": 0.13, "learning_rate": 1.9992372793280157e-05, "loss": 0.7783, "step": 381 }, { "epoch": 0.13, "learning_rate": 1.999223222077037e-05, "loss": 0.8274, "step": 382 }, { "epoch": 0.13, "learning_rate": 1.999209036516135e-05, "loss": 0.8232, "step": 383 }, { "epoch": 0.13, "learning_rate": 1.9991947226471307e-05, "loss": 0.7866, "step": 384 }, { "epoch": 0.13, "learning_rate": 1.9991802804718626e-05, "loss": 0.8257, "step": 385 }, { "epoch": 0.13, "learning_rate": 1.9991657099921847e-05, "loss": 0.8433, "step": 386 }, { "epoch": 0.13, "learning_rate": 1.9991510112099692e-05, "loss": 0.8315, "step": 387 }, { "epoch": 0.13, "learning_rate": 1.9991361841271022e-05, "loss": 0.7847, "step": 388 }, { "epoch": 0.13, "learning_rate": 1.9991212287454883e-05, "loss": 0.8853, "step": 389 }, { "epoch": 0.13, "learning_rate": 1.9991061450670475e-05, "loss": 0.8511, "step": 390 }, { "epoch": 0.13, "learning_rate": 1.9990909330937175e-05, "loss": 0.9312, "step": 391 }, { "epoch": 0.13, "learning_rate": 1.9990755928274505e-05, "loss": 0.7983, "step": 392 }, { "epoch": 0.13, "learning_rate": 1.9990601242702174e-05, "loss": 0.8555, "step": 393 }, { "epoch": 0.13, "learning_rate": 1.9990445274240043e-05, "loss": 0.8867, "step": 394 }, { "epoch": 0.13, "learning_rate": 1.9990288022908133e-05, "loss": 0.9214, "step": 395 }, { "epoch": 0.13, "learning_rate": 1.9990129488726643e-05, "loss": 0.9409, "step": 396 }, { "epoch": 0.13, "learning_rate": 1.998996967171593e-05, "loss": 0.8589, "step": 397 }, { "epoch": 0.13, "learning_rate": 1.9989808571896515e-05, "loss": 0.8853, "step": 398 }, { "epoch": 0.13, "learning_rate": 1.9989646189289084e-05, "loss": 0.8701, "step": 399 }, { "epoch": 0.13, "learning_rate": 1.998948252391449e-05, "loss": 0.8848, "step": 400 }, { "epoch": 0.13, "learning_rate": 1.9989317575793745e-05, "loss": 0.834, "step": 401 }, { "epoch": 0.13, "learning_rate": 1.9989151344948036e-05, "loss": 0.8921, "step": 402 }, { "epoch": 0.13, "learning_rate": 1.9988983831398703e-05, "loss": 0.9146, "step": 403 }, { "epoch": 0.13, "learning_rate": 1.998881503516726e-05, "loss": 0.8813, "step": 404 }, { "epoch": 0.13, "learning_rate": 1.9988644956275383e-05, "loss": 0.8389, "step": 405 }, { "epoch": 0.13, "learning_rate": 1.9988473594744912e-05, "loss": 0.8896, "step": 406 }, { "epoch": 0.14, "learning_rate": 1.9988300950597846e-05, "loss": 0.7395, "step": 407 }, { "epoch": 0.14, "learning_rate": 1.998812702385636e-05, "loss": 0.8398, "step": 408 }, { "epoch": 0.14, "learning_rate": 1.9987951814542784e-05, "loss": 0.7244, "step": 409 }, { "epoch": 0.14, "learning_rate": 1.998777532267962e-05, "loss": 0.8457, "step": 410 }, { "epoch": 0.14, "learning_rate": 1.998759754828953e-05, "loss": 0.8198, "step": 411 }, { "epoch": 0.14, "learning_rate": 1.998741849139534e-05, "loss": 0.8408, "step": 412 }, { "epoch": 0.14, "learning_rate": 1.998723815202004e-05, "loss": 0.7866, "step": 413 }, { "epoch": 0.14, "learning_rate": 1.9987056530186795e-05, "loss": 0.8345, "step": 414 }, { "epoch": 0.14, "learning_rate": 1.9986873625918923e-05, "loss": 0.8164, "step": 415 }, { "epoch": 0.14, "learning_rate": 1.9986689439239912e-05, "loss": 0.8726, "step": 416 }, { "epoch": 0.14, "learning_rate": 1.998650397017341e-05, "loss": 0.9121, "step": 417 }, { "epoch": 0.14, "learning_rate": 1.998631721874324e-05, "loss": 0.8004, "step": 418 }, { "epoch": 0.14, "learning_rate": 1.9986129184973372e-05, "loss": 0.8516, "step": 419 }, { "epoch": 0.14, "learning_rate": 1.9985939868887964e-05, "loss": 0.8276, "step": 420 }, { "epoch": 0.14, "learning_rate": 1.9985749270511313e-05, "loss": 0.8784, "step": 421 }, { "epoch": 0.14, "learning_rate": 1.9985557389867903e-05, "loss": 0.896, "step": 422 }, { "epoch": 0.14, "learning_rate": 1.998536422698237e-05, "loss": 0.8198, "step": 423 }, { "epoch": 0.14, "learning_rate": 1.998516978187952e-05, "loss": 0.9224, "step": 424 }, { "epoch": 0.14, "learning_rate": 1.998497405458432e-05, "loss": 0.8955, "step": 425 }, { "epoch": 0.14, "learning_rate": 1.99847770451219e-05, "loss": 0.7341, "step": 426 }, { "epoch": 0.14, "learning_rate": 1.9984578753517564e-05, "loss": 0.8828, "step": 427 }, { "epoch": 0.14, "learning_rate": 1.998437917979677e-05, "loss": 0.8506, "step": 428 }, { "epoch": 0.14, "learning_rate": 1.998417832398515e-05, "loss": 0.8818, "step": 429 }, { "epoch": 0.14, "learning_rate": 1.998397618610849e-05, "loss": 0.874, "step": 430 }, { "epoch": 0.14, "learning_rate": 1.9983772766192753e-05, "loss": 0.7876, "step": 431 }, { "epoch": 0.14, "learning_rate": 1.9983568064264053e-05, "loss": 0.8574, "step": 432 }, { "epoch": 0.14, "learning_rate": 1.998336208034868e-05, "loss": 0.8042, "step": 433 }, { "epoch": 0.14, "learning_rate": 1.998315481447308e-05, "loss": 0.7922, "step": 434 }, { "epoch": 0.14, "learning_rate": 1.998294626666388e-05, "loss": 0.897, "step": 435 }, { "epoch": 0.14, "learning_rate": 1.9982736436947844e-05, "loss": 0.8074, "step": 436 }, { "epoch": 0.15, "learning_rate": 1.9982525325351924e-05, "loss": 0.8638, "step": 437 }, { "epoch": 0.15, "learning_rate": 1.9982312931903226e-05, "loss": 0.9019, "step": 438 }, { "epoch": 0.15, "learning_rate": 1.998209925662903e-05, "loss": 0.8184, "step": 439 }, { "epoch": 0.15, "learning_rate": 1.9981884299556765e-05, "loss": 0.7991, "step": 440 }, { "epoch": 0.15, "learning_rate": 1.9981668060714042e-05, "loss": 0.8525, "step": 441 }, { "epoch": 0.15, "learning_rate": 1.998145054012862e-05, "loss": 0.8696, "step": 442 }, { "epoch": 0.15, "learning_rate": 1.9981231737828434e-05, "loss": 0.8452, "step": 443 }, { "epoch": 0.15, "learning_rate": 1.9981011653841585e-05, "loss": 0.6923, "step": 444 }, { "epoch": 0.15, "learning_rate": 1.998079028819633e-05, "loss": 0.8354, "step": 445 }, { "epoch": 0.15, "learning_rate": 1.9980567640921093e-05, "loss": 0.7915, "step": 446 }, { "epoch": 0.15, "learning_rate": 1.9980343712044464e-05, "loss": 0.769, "step": 447 }, { "epoch": 0.15, "learning_rate": 1.99801185015952e-05, "loss": 0.8711, "step": 448 }, { "epoch": 0.15, "learning_rate": 1.997989200960222e-05, "loss": 0.9292, "step": 449 }, { "epoch": 0.15, "learning_rate": 1.9979664236094604e-05, "loss": 0.8237, "step": 450 }, { "epoch": 0.15, "learning_rate": 1.9979435181101607e-05, "loss": 0.854, "step": 451 }, { "epoch": 0.15, "learning_rate": 1.9979204844652637e-05, "loss": 0.8691, "step": 452 }, { "epoch": 0.15, "learning_rate": 1.9978973226777272e-05, "loss": 0.7183, "step": 453 }, { "epoch": 0.15, "learning_rate": 1.9978740327505254e-05, "loss": 0.8428, "step": 454 }, { "epoch": 0.15, "learning_rate": 1.9978506146866496e-05, "loss": 0.8533, "step": 455 }, { "epoch": 0.15, "learning_rate": 1.9978270684891057e-05, "loss": 0.8354, "step": 456 }, { "epoch": 0.15, "learning_rate": 1.997803394160918e-05, "loss": 0.7886, "step": 457 }, { "epoch": 0.15, "learning_rate": 1.9977795917051262e-05, "loss": 0.8667, "step": 458 }, { "epoch": 0.15, "learning_rate": 1.9977556611247872e-05, "loss": 0.8242, "step": 459 }, { "epoch": 0.15, "learning_rate": 1.997731602422974e-05, "loss": 0.8916, "step": 460 }, { "epoch": 0.15, "learning_rate": 1.9977074156027753e-05, "loss": 0.8149, "step": 461 }, { "epoch": 0.15, "learning_rate": 1.9976831006672973e-05, "loss": 0.8735, "step": 462 }, { "epoch": 0.15, "learning_rate": 1.997658657619662e-05, "loss": 0.8105, "step": 463 }, { "epoch": 0.15, "learning_rate": 1.9976340864630087e-05, "loss": 0.8115, "step": 464 }, { "epoch": 0.15, "learning_rate": 1.997609387200492e-05, "loss": 0.7737, "step": 465 }, { "epoch": 0.15, "learning_rate": 1.997584559835284e-05, "loss": 0.8989, "step": 466 }, { "epoch": 0.15, "learning_rate": 1.9975596043705726e-05, "loss": 0.8242, "step": 467 }, { "epoch": 0.16, "learning_rate": 1.997534520809562e-05, "loss": 0.8467, "step": 468 }, { "epoch": 0.16, "learning_rate": 1.9975093091554736e-05, "loss": 0.8237, "step": 469 }, { "epoch": 0.16, "learning_rate": 1.9974839694115444e-05, "loss": 0.8389, "step": 470 }, { "epoch": 0.16, "learning_rate": 1.997458501581029e-05, "loss": 0.7881, "step": 471 }, { "epoch": 0.16, "learning_rate": 1.9974329056671968e-05, "loss": 0.8367, "step": 472 }, { "epoch": 0.16, "learning_rate": 1.9974071816733352e-05, "loss": 0.8604, "step": 473 }, { "epoch": 0.16, "learning_rate": 1.9973813296027472e-05, "loss": 0.8667, "step": 474 }, { "epoch": 0.16, "learning_rate": 1.9973553494587525e-05, "loss": 0.8174, "step": 475 }, { "epoch": 0.16, "learning_rate": 1.997329241244687e-05, "loss": 0.8584, "step": 476 }, { "epoch": 0.16, "learning_rate": 1.9973030049639036e-05, "loss": 0.7847, "step": 477 }, { "epoch": 0.16, "learning_rate": 1.997276640619771e-05, "loss": 0.8257, "step": 478 }, { "epoch": 0.16, "learning_rate": 1.997250148215675e-05, "loss": 0.8867, "step": 479 }, { "epoch": 0.16, "learning_rate": 1.9972235277550166e-05, "loss": 0.8481, "step": 480 }, { "epoch": 0.16, "learning_rate": 1.9971967792412154e-05, "loss": 0.8223, "step": 481 }, { "epoch": 0.16, "learning_rate": 1.997169902677705e-05, "loss": 0.793, "step": 482 }, { "epoch": 0.16, "learning_rate": 1.9971428980679374e-05, "loss": 0.8643, "step": 483 }, { "epoch": 0.16, "learning_rate": 1.99711576541538e-05, "loss": 0.8257, "step": 484 }, { "epoch": 0.16, "learning_rate": 1.9970885047235166e-05, "loss": 0.7827, "step": 485 }, { "epoch": 0.16, "learning_rate": 1.9970611159958484e-05, "loss": 0.9067, "step": 486 }, { "epoch": 0.16, "learning_rate": 1.997033599235892e-05, "loss": 0.8789, "step": 487 }, { "epoch": 0.16, "learning_rate": 1.9970059544471804e-05, "loss": 0.7856, "step": 488 }, { "epoch": 0.16, "learning_rate": 1.996978181633264e-05, "loss": 0.8477, "step": 489 }, { "epoch": 0.16, "learning_rate": 1.9969502807977093e-05, "loss": 0.7827, "step": 490 }, { "epoch": 0.16, "learning_rate": 1.996922251944098e-05, "loss": 0.8296, "step": 491 }, { "epoch": 0.16, "learning_rate": 1.9968940950760308e-05, "loss": 0.9272, "step": 492 }, { "epoch": 0.16, "learning_rate": 1.9968658101971223e-05, "loss": 0.8013, "step": 493 }, { "epoch": 0.16, "learning_rate": 1.9968373973110044e-05, "loss": 0.9219, "step": 494 }, { "epoch": 0.16, "learning_rate": 1.996808856421326e-05, "loss": 0.877, "step": 495 }, { "epoch": 0.16, "learning_rate": 1.9967801875317524e-05, "loss": 0.8538, "step": 496 }, { "epoch": 0.16, "learning_rate": 1.9967513906459637e-05, "loss": 0.728, "step": 497 }, { "epoch": 0.17, "learning_rate": 1.996722465767659e-05, "loss": 0.8384, "step": 498 }, { "epoch": 0.17, "learning_rate": 1.9966934129005523e-05, "loss": 0.8813, "step": 499 }, { "epoch": 0.17, "learning_rate": 1.9966642320483736e-05, "loss": 0.855, "step": 500 }, { "epoch": 0.17, "learning_rate": 1.9966349232148708e-05, "loss": 0.8652, "step": 501 }, { "epoch": 0.17, "learning_rate": 1.9966054864038068e-05, "loss": 0.8906, "step": 502 }, { "epoch": 0.17, "learning_rate": 1.9965759216189617e-05, "loss": 0.897, "step": 503 }, { "epoch": 0.17, "learning_rate": 1.9965462288641323e-05, "loss": 0.8206, "step": 504 }, { "epoch": 0.17, "learning_rate": 1.9965164081431313e-05, "loss": 0.8491, "step": 505 }, { "epoch": 0.17, "learning_rate": 1.9964864594597875e-05, "loss": 0.8711, "step": 506 }, { "epoch": 0.17, "learning_rate": 1.9964563828179472e-05, "loss": 0.8467, "step": 507 }, { "epoch": 0.17, "learning_rate": 1.996426178221472e-05, "loss": 0.9106, "step": 508 }, { "epoch": 0.17, "learning_rate": 1.9963958456742407e-05, "loss": 0.8633, "step": 509 }, { "epoch": 0.17, "learning_rate": 1.9963653851801487e-05, "loss": 0.8218, "step": 510 }, { "epoch": 0.17, "learning_rate": 1.996334796743107e-05, "loss": 0.9077, "step": 511 }, { "epoch": 0.17, "learning_rate": 1.996304080367043e-05, "loss": 0.8008, "step": 512 }, { "epoch": 0.17, "learning_rate": 1.996273236055902e-05, "loss": 0.7012, "step": 513 }, { "epoch": 0.17, "learning_rate": 1.996242263813644e-05, "loss": 0.8228, "step": 514 }, { "epoch": 0.17, "learning_rate": 1.9962111636442464e-05, "loss": 0.8159, "step": 515 }, { "epoch": 0.17, "learning_rate": 1.9961799355517028e-05, "loss": 0.8784, "step": 516 }, { "epoch": 0.17, "learning_rate": 1.9961485795400232e-05, "loss": 0.8579, "step": 517 }, { "epoch": 0.17, "learning_rate": 1.9961170956132335e-05, "loss": 0.7856, "step": 518 }, { "epoch": 0.17, "learning_rate": 1.996085483775377e-05, "loss": 0.9155, "step": 519 }, { "epoch": 0.17, "learning_rate": 1.996053744030513e-05, "loss": 0.8608, "step": 520 }, { "epoch": 0.17, "learning_rate": 1.996021876382717e-05, "loss": 0.7969, "step": 521 }, { "epoch": 0.17, "learning_rate": 1.9959898808360814e-05, "loss": 0.7869, "step": 522 }, { "epoch": 0.17, "learning_rate": 1.9959577573947145e-05, "loss": 0.8428, "step": 523 }, { "epoch": 0.17, "learning_rate": 1.995925506062741e-05, "loss": 0.8311, "step": 524 }, { "epoch": 0.17, "learning_rate": 1.9958931268443028e-05, "loss": 0.77, "step": 525 }, { "epoch": 0.17, "learning_rate": 1.9958606197435576e-05, "loss": 0.7373, "step": 526 }, { "epoch": 0.17, "learning_rate": 1.9958279847646793e-05, "loss": 0.8569, "step": 527 }, { "epoch": 0.18, "learning_rate": 1.9957952219118587e-05, "loss": 0.7793, "step": 528 }, { "epoch": 0.18, "learning_rate": 1.995762331189303e-05, "loss": 0.856, "step": 529 }, { "epoch": 0.18, "learning_rate": 1.995729312601236e-05, "loss": 0.8301, "step": 530 }, { "epoch": 0.18, "learning_rate": 1.995696166151897e-05, "loss": 0.7781, "step": 531 }, { "epoch": 0.18, "learning_rate": 1.9956628918455424e-05, "loss": 0.9004, "step": 532 }, { "epoch": 0.18, "learning_rate": 1.995629489686445e-05, "loss": 0.8252, "step": 533 }, { "epoch": 0.18, "learning_rate": 1.9955959596788943e-05, "loss": 0.8735, "step": 534 }, { "epoch": 0.18, "learning_rate": 1.9955623018271953e-05, "loss": 0.8745, "step": 535 }, { "epoch": 0.18, "learning_rate": 1.9955285161356708e-05, "loss": 0.8833, "step": 536 }, { "epoch": 0.18, "learning_rate": 1.9954946026086585e-05, "loss": 0.8296, "step": 537 }, { "epoch": 0.18, "learning_rate": 1.9954605612505134e-05, "loss": 0.792, "step": 538 }, { "epoch": 0.18, "learning_rate": 1.995426392065607e-05, "loss": 0.8027, "step": 539 }, { "epoch": 0.18, "learning_rate": 1.9953920950583264e-05, "loss": 0.834, "step": 540 }, { "epoch": 0.18, "learning_rate": 1.995357670233076e-05, "loss": 0.7466, "step": 541 }, { "epoch": 0.18, "learning_rate": 1.9953231175942765e-05, "loss": 0.8501, "step": 542 }, { "epoch": 0.18, "learning_rate": 1.995288437146365e-05, "loss": 0.7656, "step": 543 }, { "epoch": 0.18, "learning_rate": 1.9952536288937935e-05, "loss": 0.8979, "step": 544 }, { "epoch": 0.18, "learning_rate": 1.9952186928410333e-05, "loss": 0.8853, "step": 545 }, { "epoch": 0.18, "learning_rate": 1.9951836289925696e-05, "loss": 0.8677, "step": 546 }, { "epoch": 0.18, "learning_rate": 1.995148437352905e-05, "loss": 0.8589, "step": 547 }, { "epoch": 0.18, "learning_rate": 1.995113117926559e-05, "loss": 0.7397, "step": 548 }, { "epoch": 0.18, "learning_rate": 1.9950776707180662e-05, "loss": 0.8501, "step": 549 }, { "epoch": 0.18, "learning_rate": 1.995042095731979e-05, "loss": 0.8667, "step": 550 }, { "epoch": 0.18, "learning_rate": 1.9950063929728655e-05, "loss": 0.8472, "step": 551 }, { "epoch": 0.18, "learning_rate": 1.99497056244531e-05, "loss": 0.7209, "step": 552 }, { "epoch": 0.18, "learning_rate": 1.994934604153913e-05, "loss": 0.8999, "step": 553 }, { "epoch": 0.18, "learning_rate": 1.994898518103293e-05, "loss": 0.8052, "step": 554 }, { "epoch": 0.18, "learning_rate": 1.9948623042980834e-05, "loss": 0.8867, "step": 555 }, { "epoch": 0.18, "learning_rate": 1.9948259627429337e-05, "loss": 0.8779, "step": 556 }, { "epoch": 0.18, "learning_rate": 1.9947894934425114e-05, "loss": 0.8398, "step": 557 }, { "epoch": 0.19, "learning_rate": 1.9947528964014992e-05, "loss": 0.8643, "step": 558 }, { "epoch": 0.19, "learning_rate": 1.9947161716245965e-05, "loss": 0.781, "step": 559 }, { "epoch": 0.19, "learning_rate": 1.9946793191165193e-05, "loss": 0.8447, "step": 560 }, { "epoch": 0.19, "learning_rate": 1.9946423388819994e-05, "loss": 0.8589, "step": 561 }, { "epoch": 0.19, "learning_rate": 1.994605230925786e-05, "loss": 0.8286, "step": 562 }, { "epoch": 0.19, "learning_rate": 1.9945679952526432e-05, "loss": 0.8203, "step": 563 }, { "epoch": 0.19, "learning_rate": 1.994530631867353e-05, "loss": 0.8584, "step": 564 }, { "epoch": 0.19, "learning_rate": 1.9944931407747137e-05, "loss": 0.8687, "step": 565 }, { "epoch": 0.19, "learning_rate": 1.9944555219795386e-05, "loss": 0.8525, "step": 566 }, { "epoch": 0.19, "learning_rate": 1.9944177754866593e-05, "loss": 0.7817, "step": 567 }, { "epoch": 0.19, "learning_rate": 1.994379901300922e-05, "loss": 0.8735, "step": 568 }, { "epoch": 0.19, "learning_rate": 1.99434189942719e-05, "loss": 0.825, "step": 569 }, { "epoch": 0.19, "learning_rate": 1.994303769870344e-05, "loss": 0.7556, "step": 570 }, { "epoch": 0.19, "learning_rate": 1.9942655126352797e-05, "loss": 0.8276, "step": 571 }, { "epoch": 0.19, "learning_rate": 1.9942271277269094e-05, "loss": 0.8208, "step": 572 }, { "epoch": 0.19, "learning_rate": 1.9941886151501622e-05, "loss": 0.8564, "step": 573 }, { "epoch": 0.19, "learning_rate": 1.994149974909984e-05, "loss": 0.8037, "step": 574 }, { "epoch": 0.19, "learning_rate": 1.9941112070113363e-05, "loss": 0.8745, "step": 575 }, { "epoch": 0.19, "learning_rate": 1.9940723114591974e-05, "loss": 0.918, "step": 576 }, { "epoch": 0.19, "learning_rate": 1.9940332882585617e-05, "loss": 0.7939, "step": 577 }, { "epoch": 0.19, "learning_rate": 1.9939941374144394e-05, "loss": 0.8584, "step": 578 }, { "epoch": 0.19, "learning_rate": 1.993954858931859e-05, "loss": 0.8579, "step": 579 }, { "epoch": 0.19, "learning_rate": 1.993915452815864e-05, "loss": 0.8945, "step": 580 }, { "epoch": 0.19, "learning_rate": 1.9938759190715144e-05, "loss": 0.8433, "step": 581 }, { "epoch": 0.19, "learning_rate": 1.993836257703887e-05, "loss": 0.8262, "step": 582 }, { "epoch": 0.19, "learning_rate": 1.9937964687180736e-05, "loss": 0.8467, "step": 583 }, { "epoch": 0.19, "learning_rate": 1.9937565521191845e-05, "loss": 0.8252, "step": 584 }, { "epoch": 0.19, "learning_rate": 1.9937165079123452e-05, "loss": 0.7715, "step": 585 }, { "epoch": 0.19, "learning_rate": 1.9936763361026978e-05, "loss": 0.8176, "step": 586 }, { "epoch": 0.19, "learning_rate": 1.9936360366954007e-05, "loss": 0.8799, "step": 587 }, { "epoch": 0.2, "learning_rate": 1.9935956096956283e-05, "loss": 0.8599, "step": 588 }, { "epoch": 0.2, "learning_rate": 1.9935550551085727e-05, "loss": 0.748, "step": 589 }, { "epoch": 0.2, "learning_rate": 1.9935143729394405e-05, "loss": 0.7759, "step": 590 }, { "epoch": 0.2, "learning_rate": 1.9934735631934568e-05, "loss": 0.8179, "step": 591 }, { "epoch": 0.2, "learning_rate": 1.993432625875861e-05, "loss": 0.8999, "step": 592 }, { "epoch": 0.2, "learning_rate": 1.9933915609919104e-05, "loss": 0.8999, "step": 593 }, { "epoch": 0.2, "learning_rate": 1.9933503685468775e-05, "loss": 0.8867, "step": 594 }, { "epoch": 0.2, "learning_rate": 1.993309048546053e-05, "loss": 0.8833, "step": 595 }, { "epoch": 0.2, "learning_rate": 1.993267600994741e-05, "loss": 0.8545, "step": 596 }, { "epoch": 0.2, "learning_rate": 1.9932260258982654e-05, "loss": 0.8535, "step": 597 }, { "epoch": 0.2, "learning_rate": 1.9931843232619644e-05, "loss": 0.8369, "step": 598 }, { "epoch": 0.2, "learning_rate": 1.9931424930911926e-05, "loss": 0.7687, "step": 599 }, { "epoch": 0.2, "learning_rate": 1.9931005353913218e-05, "loss": 0.7983, "step": 600 }, { "epoch": 0.2, "learning_rate": 1.9930584501677392e-05, "loss": 0.8359, "step": 601 }, { "epoch": 0.2, "learning_rate": 1.9930162374258496e-05, "loss": 0.8281, "step": 602 }, { "epoch": 0.2, "learning_rate": 1.9929738971710736e-05, "loss": 0.7939, "step": 603 }, { "epoch": 0.2, "learning_rate": 1.9929314294088478e-05, "loss": 0.8398, "step": 604 }, { "epoch": 0.2, "learning_rate": 1.9928888341446254e-05, "loss": 0.7876, "step": 605 }, { "epoch": 0.2, "learning_rate": 1.992846111383876e-05, "loss": 0.744, "step": 606 }, { "epoch": 0.2, "learning_rate": 1.9928032611320856e-05, "loss": 0.8594, "step": 607 }, { "epoch": 0.2, "learning_rate": 1.9927602833947566e-05, "loss": 0.8135, "step": 608 }, { "epoch": 0.2, "learning_rate": 1.9927171781774085e-05, "loss": 0.8267, "step": 609 }, { "epoch": 0.2, "learning_rate": 1.9926739454855754e-05, "loss": 0.8491, "step": 610 }, { "epoch": 0.2, "learning_rate": 1.9926305853248095e-05, "loss": 0.7993, "step": 611 }, { "epoch": 0.2, "learning_rate": 1.992587097700678e-05, "loss": 0.7944, "step": 612 }, { "epoch": 0.2, "learning_rate": 1.9925434826187657e-05, "loss": 0.8159, "step": 613 }, { "epoch": 0.2, "learning_rate": 1.992499740084673e-05, "loss": 0.8203, "step": 614 }, { "epoch": 0.2, "learning_rate": 1.9924558701040165e-05, "loss": 0.7744, "step": 615 }, { "epoch": 0.2, "learning_rate": 1.9924118726824305e-05, "loss": 0.8486, "step": 616 }, { "epoch": 0.2, "learning_rate": 1.9923677478255636e-05, "loss": 0.8127, "step": 617 }, { "epoch": 0.21, "learning_rate": 1.9923234955390825e-05, "loss": 0.8374, "step": 618 }, { "epoch": 0.21, "learning_rate": 1.9922791158286697e-05, "loss": 0.8672, "step": 619 }, { "epoch": 0.21, "learning_rate": 1.9922346087000234e-05, "loss": 0.8144, "step": 620 }, { "epoch": 0.21, "learning_rate": 1.9921899741588594e-05, "loss": 0.7876, "step": 621 }, { "epoch": 0.21, "learning_rate": 1.9921452122109087e-05, "loss": 0.8022, "step": 622 }, { "epoch": 0.21, "learning_rate": 1.9921003228619193e-05, "loss": 0.8584, "step": 623 }, { "epoch": 0.21, "learning_rate": 1.9920553061176557e-05, "loss": 0.8013, "step": 624 }, { "epoch": 0.21, "learning_rate": 1.992010161983898e-05, "loss": 0.8438, "step": 625 }, { "epoch": 0.21, "learning_rate": 1.9919648904664438e-05, "loss": 0.7456, "step": 626 }, { "epoch": 0.21, "learning_rate": 1.9919194915711054e-05, "loss": 0.8691, "step": 627 }, { "epoch": 0.21, "learning_rate": 1.9918739653037137e-05, "loss": 0.8027, "step": 628 }, { "epoch": 0.21, "learning_rate": 1.991828311670114e-05, "loss": 0.7798, "step": 629 }, { "epoch": 0.21, "learning_rate": 1.9917825306761685e-05, "loss": 0.7759, "step": 630 }, { "epoch": 0.21, "learning_rate": 1.9917366223277567e-05, "loss": 0.8042, "step": 631 }, { "epoch": 0.21, "learning_rate": 1.9916905866307727e-05, "loss": 0.9194, "step": 632 }, { "epoch": 0.21, "learning_rate": 1.9916444235911285e-05, "loss": 0.7751, "step": 633 }, { "epoch": 0.21, "learning_rate": 1.9915981332147516e-05, "loss": 0.8418, "step": 634 }, { "epoch": 0.21, "learning_rate": 1.9915517155075865e-05, "loss": 0.8706, "step": 635 }, { "epoch": 0.21, "learning_rate": 1.9915051704755933e-05, "loss": 0.8176, "step": 636 }, { "epoch": 0.21, "learning_rate": 1.9914584981247493e-05, "loss": 0.8882, "step": 637 }, { "epoch": 0.21, "learning_rate": 1.991411698461047e-05, "loss": 0.8169, "step": 638 }, { "epoch": 0.21, "learning_rate": 1.991364771490497e-05, "loss": 0.8682, "step": 639 }, { "epoch": 0.21, "learning_rate": 1.991317717219124e-05, "loss": 0.8535, "step": 640 }, { "epoch": 0.21, "learning_rate": 1.9912705356529707e-05, "loss": 0.835, "step": 641 }, { "epoch": 0.21, "learning_rate": 1.9912232267980956e-05, "loss": 0.7524, "step": 642 }, { "epoch": 0.21, "learning_rate": 1.991175790660574e-05, "loss": 0.7939, "step": 643 }, { "epoch": 0.21, "learning_rate": 1.9911282272464967e-05, "loss": 0.8047, "step": 644 }, { "epoch": 0.21, "learning_rate": 1.9910805365619718e-05, "loss": 0.8491, "step": 645 }, { "epoch": 0.21, "learning_rate": 1.9910327186131226e-05, "loss": 0.8584, "step": 646 }, { "epoch": 0.21, "learning_rate": 1.9909847734060896e-05, "loss": 0.8174, "step": 647 }, { "epoch": 0.22, "learning_rate": 1.99093670094703e-05, "loss": 0.7681, "step": 648 }, { "epoch": 0.22, "learning_rate": 1.990888501242116e-05, "loss": 0.8093, "step": 649 }, { "epoch": 0.22, "learning_rate": 1.990840174297537e-05, "loss": 0.7446, "step": 650 }, { "epoch": 0.22, "learning_rate": 1.990791720119499e-05, "loss": 0.8149, "step": 651 }, { "epoch": 0.22, "learning_rate": 1.990743138714224e-05, "loss": 0.8755, "step": 652 }, { "epoch": 0.22, "learning_rate": 1.99069443008795e-05, "loss": 0.8472, "step": 653 }, { "epoch": 0.22, "learning_rate": 1.9906455942469322e-05, "loss": 0.8657, "step": 654 }, { "epoch": 0.22, "learning_rate": 1.9905966311974407e-05, "loss": 0.7126, "step": 655 }, { "epoch": 0.22, "learning_rate": 1.9905475409457634e-05, "loss": 0.8433, "step": 656 }, { "epoch": 0.22, "learning_rate": 1.990498323498204e-05, "loss": 0.8599, "step": 657 }, { "epoch": 0.22, "learning_rate": 1.9904489788610827e-05, "loss": 0.751, "step": 658 }, { "epoch": 0.22, "learning_rate": 1.990399507040735e-05, "loss": 0.9102, "step": 659 }, { "epoch": 0.22, "learning_rate": 1.9903499080435144e-05, "loss": 0.8413, "step": 660 }, { "epoch": 0.22, "learning_rate": 1.9903001818757898e-05, "loss": 0.8735, "step": 661 }, { "epoch": 0.22, "learning_rate": 1.990250328543946e-05, "loss": 0.8159, "step": 662 }, { "epoch": 0.22, "learning_rate": 1.990200348054385e-05, "loss": 0.8267, "step": 663 }, { "epoch": 0.22, "learning_rate": 1.9901502404135246e-05, "loss": 0.8193, "step": 664 }, { "epoch": 0.22, "learning_rate": 1.9901000056277995e-05, "loss": 0.7925, "step": 665 }, { "epoch": 0.22, "learning_rate": 1.99004964370366e-05, "loss": 0.8203, "step": 666 }, { "epoch": 0.22, "learning_rate": 1.989999154647573e-05, "loss": 0.772, "step": 667 }, { "epoch": 0.22, "learning_rate": 1.989948538466022e-05, "loss": 0.7515, "step": 668 }, { "epoch": 0.22, "learning_rate": 1.9898977951655066e-05, "loss": 0.873, "step": 669 }, { "epoch": 0.22, "learning_rate": 1.9898469247525424e-05, "loss": 0.8242, "step": 670 }, { "epoch": 0.22, "learning_rate": 1.989795927233662e-05, "loss": 0.8687, "step": 671 }, { "epoch": 0.22, "learning_rate": 1.989744802615414e-05, "loss": 0.8359, "step": 672 }, { "epoch": 0.22, "learning_rate": 1.989693550904363e-05, "loss": 0.7998, "step": 673 }, { "epoch": 0.22, "learning_rate": 1.9896421721070904e-05, "loss": 0.833, "step": 674 }, { "epoch": 0.22, "learning_rate": 1.989590666230194e-05, "loss": 0.7993, "step": 675 }, { "epoch": 0.22, "learning_rate": 1.9895390332802872e-05, "loss": 0.8174, "step": 676 }, { "epoch": 0.22, "learning_rate": 1.9894872732640004e-05, "loss": 0.877, "step": 677 }, { "epoch": 0.23, "learning_rate": 1.9894353861879797e-05, "loss": 0.8345, "step": 678 }, { "epoch": 0.23, "learning_rate": 1.9893833720588885e-05, "loss": 0.8198, "step": 679 }, { "epoch": 0.23, "learning_rate": 1.9893312308834058e-05, "loss": 0.9331, "step": 680 }, { "epoch": 0.23, "learning_rate": 1.9892789626682266e-05, "loss": 0.825, "step": 681 }, { "epoch": 0.23, "learning_rate": 1.989226567420063e-05, "loss": 0.7905, "step": 682 }, { "epoch": 0.23, "learning_rate": 1.9891740451456432e-05, "loss": 0.8213, "step": 683 }, { "epoch": 0.23, "learning_rate": 1.989121395851711e-05, "loss": 0.8203, "step": 684 }, { "epoch": 0.23, "learning_rate": 1.9890686195450275e-05, "loss": 0.7739, "step": 685 }, { "epoch": 0.23, "learning_rate": 1.9890157162323697e-05, "loss": 0.8408, "step": 686 }, { "epoch": 0.23, "learning_rate": 1.988962685920531e-05, "loss": 0.8208, "step": 687 }, { "epoch": 0.23, "learning_rate": 1.9889095286163205e-05, "loss": 0.8433, "step": 688 }, { "epoch": 0.23, "learning_rate": 1.9888562443265647e-05, "loss": 0.8364, "step": 689 }, { "epoch": 0.23, "learning_rate": 1.9888028330581052e-05, "loss": 0.7798, "step": 690 }, { "epoch": 0.23, "learning_rate": 1.9887492948178012e-05, "loss": 0.8179, "step": 691 }, { "epoch": 0.23, "learning_rate": 1.988695629612527e-05, "loss": 0.873, "step": 692 }, { "epoch": 0.23, "learning_rate": 1.988641837449174e-05, "loss": 0.8354, "step": 693 }, { "epoch": 0.23, "learning_rate": 1.9885879183346497e-05, "loss": 0.855, "step": 694 }, { "epoch": 0.23, "learning_rate": 1.9885338722758775e-05, "loss": 0.8291, "step": 695 }, { "epoch": 0.23, "learning_rate": 1.9884796992797975e-05, "loss": 0.7871, "step": 696 }, { "epoch": 0.23, "learning_rate": 1.9884253993533665e-05, "loss": 0.8604, "step": 697 }, { "epoch": 0.23, "learning_rate": 1.9883709725035566e-05, "loss": 0.8027, "step": 698 }, { "epoch": 0.23, "learning_rate": 1.9883164187373572e-05, "loss": 0.8584, "step": 699 }, { "epoch": 0.23, "learning_rate": 1.988261738061773e-05, "loss": 0.855, "step": 700 }, { "epoch": 0.23, "learning_rate": 1.988206930483826e-05, "loss": 0.7549, "step": 701 }, { "epoch": 0.23, "learning_rate": 1.9881519960105535e-05, "loss": 0.8345, "step": 702 }, { "epoch": 0.23, "learning_rate": 1.98809693464901e-05, "loss": 0.7656, "step": 703 }, { "epoch": 0.23, "learning_rate": 1.9880417464062664e-05, "loss": 0.8618, "step": 704 }, { "epoch": 0.23, "learning_rate": 1.9879864312894083e-05, "loss": 0.8584, "step": 705 }, { "epoch": 0.23, "learning_rate": 1.9879309893055394e-05, "loss": 0.8564, "step": 706 }, { "epoch": 0.23, "learning_rate": 1.9878754204617787e-05, "loss": 0.8638, "step": 707 }, { "epoch": 0.23, "learning_rate": 1.987819724765262e-05, "loss": 0.8428, "step": 708 }, { "epoch": 0.24, "learning_rate": 1.987763902223141e-05, "loss": 0.7046, "step": 709 }, { "epoch": 0.24, "learning_rate": 1.9877079528425843e-05, "loss": 0.7839, "step": 710 }, { "epoch": 0.24, "learning_rate": 1.9876518766307757e-05, "loss": 0.8262, "step": 711 }, { "epoch": 0.24, "learning_rate": 1.987595673594916e-05, "loss": 0.7309, "step": 712 }, { "epoch": 0.24, "learning_rate": 1.9875393437422227e-05, "loss": 0.7935, "step": 713 }, { "epoch": 0.24, "learning_rate": 1.987482887079929e-05, "loss": 0.7896, "step": 714 }, { "epoch": 0.24, "learning_rate": 1.9874263036152843e-05, "loss": 0.8242, "step": 715 }, { "epoch": 0.24, "learning_rate": 1.987369593355554e-05, "loss": 0.8779, "step": 716 }, { "epoch": 0.24, "learning_rate": 1.987312756308021e-05, "loss": 0.7421, "step": 717 }, { "epoch": 0.24, "learning_rate": 1.9872557924799834e-05, "loss": 0.7671, "step": 718 }, { "epoch": 0.24, "learning_rate": 1.987198701878756e-05, "loss": 0.7959, "step": 719 }, { "epoch": 0.24, "learning_rate": 1.9871414845116694e-05, "loss": 0.877, "step": 720 }, { "epoch": 0.24, "learning_rate": 1.9870841403860713e-05, "loss": 0.8423, "step": 721 }, { "epoch": 0.24, "learning_rate": 1.9870266695093253e-05, "loss": 0.8809, "step": 722 }, { "epoch": 0.24, "learning_rate": 1.986969071888811e-05, "loss": 0.8696, "step": 723 }, { "epoch": 0.24, "learning_rate": 1.9869113475319245e-05, "loss": 0.74, "step": 724 }, { "epoch": 0.24, "learning_rate": 1.9868534964460785e-05, "loss": 0.8877, "step": 725 }, { "epoch": 0.24, "learning_rate": 1.986795518638701e-05, "loss": 0.832, "step": 726 }, { "epoch": 0.24, "learning_rate": 1.9867374141172374e-05, "loss": 0.793, "step": 727 }, { "epoch": 0.24, "learning_rate": 1.9866791828891487e-05, "loss": 0.8223, "step": 728 }, { "epoch": 0.24, "learning_rate": 1.986620824961912e-05, "loss": 0.8574, "step": 729 }, { "epoch": 0.24, "learning_rate": 1.986562340343022e-05, "loss": 0.8809, "step": 730 }, { "epoch": 0.24, "learning_rate": 1.9865037290399874e-05, "loss": 0.7803, "step": 731 }, { "epoch": 0.24, "learning_rate": 1.986444991060336e-05, "loss": 0.8418, "step": 732 }, { "epoch": 0.24, "learning_rate": 1.986386126411609e-05, "loss": 0.7725, "step": 733 }, { "epoch": 0.24, "learning_rate": 1.986327135101366e-05, "loss": 0.8286, "step": 734 }, { "epoch": 0.24, "learning_rate": 1.986268017137181e-05, "loss": 0.7859, "step": 735 }, { "epoch": 0.24, "learning_rate": 1.9862087725266467e-05, "loss": 0.8467, "step": 736 }, { "epoch": 0.24, "learning_rate": 1.98614940127737e-05, "loss": 0.8594, "step": 737 }, { "epoch": 0.24, "learning_rate": 1.9860899033969744e-05, "loss": 0.8281, "step": 738 }, { "epoch": 0.25, "learning_rate": 1.9860302788931006e-05, "loss": 0.8174, "step": 739 }, { "epoch": 0.25, "learning_rate": 1.9859705277734046e-05, "loss": 0.7881, "step": 740 }, { "epoch": 0.25, "learning_rate": 1.9859106500455594e-05, "loss": 0.8149, "step": 741 }, { "epoch": 0.25, "learning_rate": 1.9858506457172534e-05, "loss": 0.7935, "step": 742 }, { "epoch": 0.25, "learning_rate": 1.9857905147961923e-05, "loss": 0.7512, "step": 743 }, { "epoch": 0.25, "learning_rate": 1.985730257290097e-05, "loss": 0.8335, "step": 744 }, { "epoch": 0.25, "learning_rate": 1.985669873206705e-05, "loss": 0.7456, "step": 745 }, { "epoch": 0.25, "learning_rate": 1.985609362553771e-05, "loss": 0.8232, "step": 746 }, { "epoch": 0.25, "learning_rate": 1.9855487253390642e-05, "loss": 0.813, "step": 747 }, { "epoch": 0.25, "learning_rate": 1.9854879615703717e-05, "loss": 0.7451, "step": 748 }, { "epoch": 0.25, "learning_rate": 1.985427071255496e-05, "loss": 0.7493, "step": 749 }, { "epoch": 0.25, "learning_rate": 1.9853660544022557e-05, "loss": 0.7891, "step": 750 }, { "epoch": 0.25, "learning_rate": 1.9853049110184863e-05, "loss": 0.7544, "step": 751 }, { "epoch": 0.25, "learning_rate": 1.9852436411120392e-05, "loss": 0.833, "step": 752 }, { "epoch": 0.25, "learning_rate": 1.9851822446907816e-05, "loss": 0.7015, "step": 753 }, { "epoch": 0.25, "learning_rate": 1.985120721762598e-05, "loss": 0.7871, "step": 754 }, { "epoch": 0.25, "learning_rate": 1.985059072335388e-05, "loss": 0.8291, "step": 755 }, { "epoch": 0.25, "learning_rate": 1.9849972964170687e-05, "loss": 0.8237, "step": 756 }, { "epoch": 0.25, "learning_rate": 1.984935394015572e-05, "loss": 0.8691, "step": 757 }, { "epoch": 0.25, "learning_rate": 1.984873365138847e-05, "loss": 0.8579, "step": 758 }, { "epoch": 0.25, "learning_rate": 1.984811209794859e-05, "loss": 0.7983, "step": 759 }, { "epoch": 0.25, "learning_rate": 1.9847489279915894e-05, "loss": 0.7983, "step": 760 }, { "epoch": 0.25, "learning_rate": 1.9846865197370348e-05, "loss": 0.7876, "step": 761 }, { "epoch": 0.25, "learning_rate": 1.9846239850392104e-05, "loss": 0.8037, "step": 762 }, { "epoch": 0.25, "learning_rate": 1.9845613239061457e-05, "loss": 0.7837, "step": 763 }, { "epoch": 0.25, "learning_rate": 1.984498536345887e-05, "loss": 0.7915, "step": 764 }, { "epoch": 0.25, "learning_rate": 1.984435622366496e-05, "loss": 0.8071, "step": 765 }, { "epoch": 0.25, "learning_rate": 1.9843725819760532e-05, "loss": 0.8291, "step": 766 }, { "epoch": 0.25, "learning_rate": 1.9843094151826522e-05, "loss": 0.7214, "step": 767 }, { "epoch": 0.25, "learning_rate": 1.9842461219944046e-05, "loss": 0.7588, "step": 768 }, { "epoch": 0.26, "learning_rate": 1.9841827024194382e-05, "loss": 0.8052, "step": 769 }, { "epoch": 0.26, "learning_rate": 1.9841191564658964e-05, "loss": 0.855, "step": 770 }, { "epoch": 0.26, "learning_rate": 1.984055484141939e-05, "loss": 0.8154, "step": 771 }, { "epoch": 0.26, "learning_rate": 1.9839916854557422e-05, "loss": 0.8872, "step": 772 }, { "epoch": 0.26, "learning_rate": 1.9839277604154986e-05, "loss": 0.8032, "step": 773 }, { "epoch": 0.26, "learning_rate": 1.9838637090294168e-05, "loss": 0.8887, "step": 774 }, { "epoch": 0.26, "learning_rate": 1.9837995313057213e-05, "loss": 0.7949, "step": 775 }, { "epoch": 0.26, "learning_rate": 1.983735227252653e-05, "loss": 0.7476, "step": 776 }, { "epoch": 0.26, "learning_rate": 1.9836707968784696e-05, "loss": 0.7739, "step": 777 }, { "epoch": 0.26, "learning_rate": 1.9836062401914447e-05, "loss": 0.8228, "step": 778 }, { "epoch": 0.26, "learning_rate": 1.9835415571998674e-05, "loss": 0.7672, "step": 779 }, { "epoch": 0.26, "learning_rate": 1.983476747912044e-05, "loss": 0.6572, "step": 780 }, { "epoch": 0.26, "learning_rate": 1.983411812336297e-05, "loss": 0.8306, "step": 781 }, { "epoch": 0.26, "learning_rate": 1.983346750480964e-05, "loss": 0.8804, "step": 782 }, { "epoch": 0.26, "learning_rate": 1.9832815623544e-05, "loss": 0.8813, "step": 783 }, { "epoch": 0.26, "learning_rate": 1.983216247964976e-05, "loss": 0.8013, "step": 784 }, { "epoch": 0.26, "learning_rate": 1.9831508073210782e-05, "loss": 0.8467, "step": 785 }, { "epoch": 0.26, "learning_rate": 1.9830852404311108e-05, "loss": 0.8076, "step": 786 }, { "epoch": 0.26, "learning_rate": 1.9830195473034923e-05, "loss": 0.7715, "step": 787 }, { "epoch": 0.26, "learning_rate": 1.982953727946659e-05, "loss": 0.855, "step": 788 }, { "epoch": 0.26, "learning_rate": 1.9828877823690623e-05, "loss": 0.8975, "step": 789 }, { "epoch": 0.26, "learning_rate": 1.982821710579171e-05, "loss": 0.9028, "step": 790 }, { "epoch": 0.26, "learning_rate": 1.9827555125854686e-05, "loss": 0.853, "step": 791 }, { "epoch": 0.26, "learning_rate": 1.9826891883964556e-05, "loss": 0.8018, "step": 792 }, { "epoch": 0.26, "learning_rate": 1.9826227380206487e-05, "loss": 0.8174, "step": 793 }, { "epoch": 0.26, "learning_rate": 1.9825561614665815e-05, "loss": 0.8115, "step": 794 }, { "epoch": 0.26, "learning_rate": 1.9824894587428022e-05, "loss": 0.8076, "step": 795 }, { "epoch": 0.26, "learning_rate": 1.9824226298578765e-05, "loss": 0.8394, "step": 796 }, { "epoch": 0.26, "learning_rate": 1.9823556748203858e-05, "loss": 0.7761, "step": 797 }, { "epoch": 0.26, "learning_rate": 1.9822885936389274e-05, "loss": 0.793, "step": 798 }, { "epoch": 0.27, "learning_rate": 1.9822213863221157e-05, "loss": 0.7037, "step": 799 }, { "epoch": 0.27, "learning_rate": 1.9821540528785802e-05, "loss": 0.8369, "step": 800 }, { "epoch": 0.27, "learning_rate": 1.9820865933169682e-05, "loss": 0.7856, "step": 801 }, { "epoch": 0.27, "learning_rate": 1.982019007645941e-05, "loss": 0.79, "step": 802 }, { "epoch": 0.27, "learning_rate": 1.981951295874178e-05, "loss": 0.8379, "step": 803 }, { "epoch": 0.27, "learning_rate": 1.9818834580103735e-05, "loss": 0.8755, "step": 804 }, { "epoch": 0.27, "learning_rate": 1.9818154940632392e-05, "loss": 0.8306, "step": 805 }, { "epoch": 0.27, "learning_rate": 1.981747404041502e-05, "loss": 0.8379, "step": 806 }, { "epoch": 0.27, "learning_rate": 1.9816791879539048e-05, "loss": 0.8418, "step": 807 }, { "epoch": 0.27, "learning_rate": 1.981610845809208e-05, "loss": 0.8311, "step": 808 }, { "epoch": 0.27, "learning_rate": 1.981542377616187e-05, "loss": 0.9023, "step": 809 }, { "epoch": 0.27, "learning_rate": 1.981473783383634e-05, "loss": 0.8105, "step": 810 }, { "epoch": 0.27, "learning_rate": 1.981405063120357e-05, "loss": 0.748, "step": 811 }, { "epoch": 0.27, "learning_rate": 1.98133621683518e-05, "loss": 0.8359, "step": 812 }, { "epoch": 0.27, "learning_rate": 1.9812672445369444e-05, "loss": 0.7646, "step": 813 }, { "epoch": 0.27, "learning_rate": 1.981198146234506e-05, "loss": 0.7446, "step": 814 }, { "epoch": 0.27, "learning_rate": 1.9811289219367382e-05, "loss": 0.8857, "step": 815 }, { "epoch": 0.27, "learning_rate": 1.9810595716525302e-05, "loss": 0.7847, "step": 816 }, { "epoch": 0.27, "learning_rate": 1.980990095390787e-05, "loss": 0.7437, "step": 817 }, { "epoch": 0.27, "learning_rate": 1.9809204931604296e-05, "loss": 0.8882, "step": 818 }, { "epoch": 0.27, "learning_rate": 1.9808507649703962e-05, "loss": 0.8096, "step": 819 }, { "epoch": 0.27, "learning_rate": 1.9807809108296406e-05, "loss": 0.7175, "step": 820 }, { "epoch": 0.27, "learning_rate": 1.980710930747132e-05, "loss": 0.7358, "step": 821 }, { "epoch": 0.27, "learning_rate": 1.9806408247318576e-05, "loss": 0.8291, "step": 822 }, { "epoch": 0.27, "learning_rate": 1.980570592792819e-05, "loss": 0.752, "step": 823 }, { "epoch": 0.27, "learning_rate": 1.9805002349390353e-05, "loss": 0.7448, "step": 824 }, { "epoch": 0.27, "learning_rate": 1.98042975117954e-05, "loss": 0.8545, "step": 825 }, { "epoch": 0.27, "learning_rate": 1.9803591415233847e-05, "loss": 0.7095, "step": 826 }, { "epoch": 0.27, "learning_rate": 1.9802884059796365e-05, "loss": 0.8164, "step": 827 }, { "epoch": 0.27, "learning_rate": 1.9802175445573777e-05, "loss": 0.8564, "step": 828 }, { "epoch": 0.28, "learning_rate": 1.9801465572657083e-05, "loss": 0.7678, "step": 829 }, { "epoch": 0.28, "learning_rate": 1.980075444113744e-05, "loss": 0.7898, "step": 830 }, { "epoch": 0.28, "learning_rate": 1.9800042051106155e-05, "loss": 0.8306, "step": 831 }, { "epoch": 0.28, "learning_rate": 1.9799328402654715e-05, "loss": 0.8345, "step": 832 }, { "epoch": 0.28, "learning_rate": 1.9798613495874754e-05, "loss": 0.8818, "step": 833 }, { "epoch": 0.28, "learning_rate": 1.979789733085807e-05, "loss": 0.7534, "step": 834 }, { "epoch": 0.28, "learning_rate": 1.9797179907696634e-05, "loss": 0.791, "step": 835 }, { "epoch": 0.28, "learning_rate": 1.9796461226482566e-05, "loss": 0.8481, "step": 836 }, { "epoch": 0.28, "learning_rate": 1.9795741287308148e-05, "loss": 0.8091, "step": 837 }, { "epoch": 0.28, "learning_rate": 1.9795020090265832e-05, "loss": 0.7104, "step": 838 }, { "epoch": 0.28, "learning_rate": 1.9794297635448223e-05, "loss": 0.7798, "step": 839 }, { "epoch": 0.28, "learning_rate": 1.9793573922948095e-05, "loss": 0.833, "step": 840 }, { "epoch": 0.28, "learning_rate": 1.979284895285838e-05, "loss": 0.8049, "step": 841 }, { "epoch": 0.28, "learning_rate": 1.9792122725272168e-05, "loss": 0.7739, "step": 842 }, { "epoch": 0.28, "learning_rate": 1.9791395240282713e-05, "loss": 0.7949, "step": 843 }, { "epoch": 0.28, "learning_rate": 1.9790666497983433e-05, "loss": 0.811, "step": 844 }, { "epoch": 0.28, "learning_rate": 1.978993649846791e-05, "loss": 0.7915, "step": 845 }, { "epoch": 0.28, "learning_rate": 1.9789205241829872e-05, "loss": 0.8257, "step": 846 }, { "epoch": 0.28, "learning_rate": 1.978847272816323e-05, "loss": 0.7192, "step": 847 }, { "epoch": 0.28, "learning_rate": 1.978773895756204e-05, "loss": 0.7722, "step": 848 }, { "epoch": 0.28, "learning_rate": 1.9787003930120527e-05, "loss": 0.7717, "step": 849 }, { "epoch": 0.28, "learning_rate": 1.9786267645933075e-05, "loss": 0.7966, "step": 850 }, { "epoch": 0.28, "learning_rate": 1.978553010509423e-05, "loss": 0.7876, "step": 851 }, { "epoch": 0.28, "learning_rate": 1.9784791307698704e-05, "loss": 0.8467, "step": 852 }, { "epoch": 0.28, "learning_rate": 1.9784051253841356e-05, "loss": 0.8169, "step": 853 }, { "epoch": 0.28, "learning_rate": 1.9783309943617226e-05, "loss": 0.7415, "step": 854 }, { "epoch": 0.28, "learning_rate": 1.97825673771215e-05, "loss": 0.8274, "step": 855 }, { "epoch": 0.28, "learning_rate": 1.978182355444953e-05, "loss": 0.8223, "step": 856 }, { "epoch": 0.28, "learning_rate": 1.9781078475696836e-05, "loss": 0.8433, "step": 857 }, { "epoch": 0.28, "learning_rate": 1.9780332140959088e-05, "loss": 0.8086, "step": 858 }, { "epoch": 0.29, "learning_rate": 1.977958455033212e-05, "loss": 0.8486, "step": 859 }, { "epoch": 0.29, "learning_rate": 1.977883570391194e-05, "loss": 0.8071, "step": 860 }, { "epoch": 0.29, "learning_rate": 1.9778085601794695e-05, "loss": 0.7837, "step": 861 }, { "epoch": 0.29, "learning_rate": 1.9777334244076712e-05, "loss": 0.8672, "step": 862 }, { "epoch": 0.29, "learning_rate": 1.9776581630854473e-05, "loss": 0.7954, "step": 863 }, { "epoch": 0.29, "learning_rate": 1.977582776222462e-05, "loss": 0.7839, "step": 864 }, { "epoch": 0.29, "learning_rate": 1.9775072638283954e-05, "loss": 0.7354, "step": 865 }, { "epoch": 0.29, "learning_rate": 1.9774316259129444e-05, "loss": 0.8149, "step": 866 }, { "epoch": 0.29, "learning_rate": 1.977355862485822e-05, "loss": 0.7515, "step": 867 }, { "epoch": 0.29, "learning_rate": 1.9772799735567557e-05, "loss": 0.7231, "step": 868 }, { "epoch": 0.29, "learning_rate": 1.9772039591354917e-05, "loss": 0.8501, "step": 869 }, { "epoch": 0.29, "learning_rate": 1.97712781923179e-05, "loss": 0.7749, "step": 870 }, { "epoch": 0.29, "learning_rate": 1.9770515538554286e-05, "loss": 0.769, "step": 871 }, { "epoch": 0.29, "learning_rate": 1.9769751630162e-05, "loss": 0.8315, "step": 872 }, { "epoch": 0.29, "learning_rate": 1.976898646723914e-05, "loss": 0.8569, "step": 873 }, { "epoch": 0.29, "learning_rate": 1.9768220049883955e-05, "loss": 0.8105, "step": 874 }, { "epoch": 0.29, "learning_rate": 1.9767452378194863e-05, "loss": 0.895, "step": 875 }, { "epoch": 0.29, "learning_rate": 1.9766683452270442e-05, "loss": 0.7715, "step": 876 }, { "epoch": 0.29, "learning_rate": 1.976591327220943e-05, "loss": 0.8472, "step": 877 }, { "epoch": 0.29, "learning_rate": 1.9765141838110723e-05, "loss": 0.7998, "step": 878 }, { "epoch": 0.29, "learning_rate": 1.9764369150073383e-05, "loss": 0.8105, "step": 879 }, { "epoch": 0.29, "learning_rate": 1.9763595208196627e-05, "loss": 0.7974, "step": 880 }, { "epoch": 0.29, "learning_rate": 1.9762820012579836e-05, "loss": 0.8887, "step": 881 }, { "epoch": 0.29, "learning_rate": 1.976204356332256e-05, "loss": 0.7776, "step": 882 }, { "epoch": 0.29, "learning_rate": 1.9761265860524496e-05, "loss": 0.6831, "step": 883 }, { "epoch": 0.29, "learning_rate": 1.9760486904285514e-05, "loss": 0.7617, "step": 884 }, { "epoch": 0.29, "learning_rate": 1.975970669470563e-05, "loss": 0.8286, "step": 885 }, { "epoch": 0.29, "learning_rate": 1.975892523188504e-05, "loss": 0.7338, "step": 886 }, { "epoch": 0.29, "learning_rate": 1.975814251592409e-05, "loss": 0.8267, "step": 887 }, { "epoch": 0.29, "learning_rate": 1.9757358546923286e-05, "loss": 0.7915, "step": 888 }, { "epoch": 0.3, "learning_rate": 1.9756573324983292e-05, "loss": 0.8477, "step": 889 }, { "epoch": 0.3, "learning_rate": 1.9755786850204947e-05, "loss": 0.7627, "step": 890 }, { "epoch": 0.3, "learning_rate": 1.9754999122689236e-05, "loss": 0.8154, "step": 891 }, { "epoch": 0.3, "learning_rate": 1.9754210142537317e-05, "loss": 0.7686, "step": 892 }, { "epoch": 0.3, "learning_rate": 1.97534199098505e-05, "loss": 0.7229, "step": 893 }, { "epoch": 0.3, "learning_rate": 1.9752628424730253e-05, "loss": 0.8101, "step": 894 }, { "epoch": 0.3, "learning_rate": 1.975183568727822e-05, "loss": 0.7988, "step": 895 }, { "epoch": 0.3, "learning_rate": 1.9751041697596186e-05, "loss": 0.8608, "step": 896 }, { "epoch": 0.3, "learning_rate": 1.9750246455786113e-05, "loss": 0.8171, "step": 897 }, { "epoch": 0.3, "learning_rate": 1.974944996195012e-05, "loss": 0.8618, "step": 898 }, { "epoch": 0.3, "learning_rate": 1.9748652216190477e-05, "loss": 0.8032, "step": 899 }, { "epoch": 0.3, "learning_rate": 1.9747853218609628e-05, "loss": 0.8618, "step": 900 }, { "epoch": 0.3, "learning_rate": 1.974705296931017e-05, "loss": 0.7764, "step": 901 }, { "epoch": 0.3, "learning_rate": 1.9746251468394865e-05, "loss": 0.8696, "step": 902 }, { "epoch": 0.3, "learning_rate": 1.974544871596663e-05, "loss": 0.7588, "step": 903 }, { "epoch": 0.3, "learning_rate": 1.974464471212855e-05, "loss": 0.8311, "step": 904 }, { "epoch": 0.3, "learning_rate": 1.974383945698386e-05, "loss": 0.769, "step": 905 }, { "epoch": 0.3, "learning_rate": 1.974303295063597e-05, "loss": 0.8042, "step": 906 }, { "epoch": 0.3, "learning_rate": 1.9742225193188443e-05, "loss": 0.8027, "step": 907 }, { "epoch": 0.3, "learning_rate": 1.9741416184745e-05, "loss": 0.8325, "step": 908 }, { "epoch": 0.3, "learning_rate": 1.974060592540952e-05, "loss": 0.7976, "step": 909 }, { "epoch": 0.3, "learning_rate": 1.973979441528606e-05, "loss": 0.7566, "step": 910 }, { "epoch": 0.3, "learning_rate": 1.9738981654478818e-05, "loss": 0.8589, "step": 911 }, { "epoch": 0.3, "learning_rate": 1.973816764309216e-05, "loss": 0.8369, "step": 912 }, { "epoch": 0.3, "learning_rate": 1.9737352381230617e-05, "loss": 0.8081, "step": 913 }, { "epoch": 0.3, "learning_rate": 1.9736535868998875e-05, "loss": 0.7263, "step": 914 }, { "epoch": 0.3, "learning_rate": 1.973571810650178e-05, "loss": 0.8169, "step": 915 }, { "epoch": 0.3, "learning_rate": 1.9734899093844342e-05, "loss": 0.8096, "step": 916 }, { "epoch": 0.3, "learning_rate": 1.9734078831131734e-05, "loss": 0.7798, "step": 917 }, { "epoch": 0.3, "learning_rate": 1.9733257318469282e-05, "loss": 0.8335, "step": 918 }, { "epoch": 0.31, "learning_rate": 1.9732434555962473e-05, "loss": 0.7725, "step": 919 }, { "epoch": 0.31, "learning_rate": 1.973161054371696e-05, "loss": 0.8154, "step": 920 }, { "epoch": 0.31, "learning_rate": 1.9730785281838554e-05, "loss": 0.8613, "step": 921 }, { "epoch": 0.31, "learning_rate": 1.9729958770433235e-05, "loss": 0.8257, "step": 922 }, { "epoch": 0.31, "learning_rate": 1.972913100960712e-05, "loss": 0.8979, "step": 923 }, { "epoch": 0.31, "learning_rate": 1.9728301999466513e-05, "loss": 0.8035, "step": 924 }, { "epoch": 0.31, "learning_rate": 1.972747174011786e-05, "loss": 0.7559, "step": 925 }, { "epoch": 0.31, "learning_rate": 1.972664023166778e-05, "loss": 0.7676, "step": 926 }, { "epoch": 0.31, "learning_rate": 1.9725807474223043e-05, "loss": 0.6367, "step": 927 }, { "epoch": 0.31, "learning_rate": 1.9724973467890586e-05, "loss": 0.7915, "step": 928 }, { "epoch": 0.31, "learning_rate": 1.9724138212777502e-05, "loss": 0.7175, "step": 929 }, { "epoch": 0.31, "learning_rate": 1.9723301708991043e-05, "loss": 0.8472, "step": 930 }, { "epoch": 0.31, "learning_rate": 1.972246395663863e-05, "loss": 0.813, "step": 931 }, { "epoch": 0.31, "learning_rate": 1.972162495582783e-05, "loss": 0.8306, "step": 932 }, { "epoch": 0.31, "learning_rate": 1.972078470666639e-05, "loss": 0.8179, "step": 933 }, { "epoch": 0.31, "learning_rate": 1.9719943209262195e-05, "loss": 0.7886, "step": 934 }, { "epoch": 0.31, "learning_rate": 1.9719100463723307e-05, "loss": 0.874, "step": 935 }, { "epoch": 0.31, "learning_rate": 1.9718256470157947e-05, "loss": 0.8228, "step": 936 }, { "epoch": 0.31, "learning_rate": 1.971741122867448e-05, "loss": 0.8379, "step": 937 }, { "epoch": 0.31, "learning_rate": 1.9716564739381455e-05, "loss": 0.8267, "step": 938 }, { "epoch": 0.31, "learning_rate": 1.9715717002387567e-05, "loss": 0.8232, "step": 939 }, { "epoch": 0.31, "learning_rate": 1.9714868017801667e-05, "loss": 0.8184, "step": 940 }, { "epoch": 0.31, "learning_rate": 1.971401778573278e-05, "loss": 0.8091, "step": 941 }, { "epoch": 0.31, "learning_rate": 1.9713166306290078e-05, "loss": 0.7646, "step": 942 }, { "epoch": 0.31, "learning_rate": 1.9712313579582907e-05, "loss": 0.8062, "step": 943 }, { "epoch": 0.31, "learning_rate": 1.971145960572076e-05, "loss": 0.7489, "step": 944 }, { "epoch": 0.31, "learning_rate": 1.9710604384813294e-05, "loss": 0.8384, "step": 945 }, { "epoch": 0.31, "learning_rate": 1.9709747916970327e-05, "loss": 0.8252, "step": 946 }, { "epoch": 0.31, "learning_rate": 1.9708890202301845e-05, "loss": 0.8823, "step": 947 }, { "epoch": 0.31, "learning_rate": 1.970803124091798e-05, "loss": 0.8027, "step": 948 }, { "epoch": 0.31, "learning_rate": 1.9707171032929037e-05, "loss": 0.8594, "step": 949 }, { "epoch": 0.32, "learning_rate": 1.9706309578445467e-05, "loss": 0.7988, "step": 950 }, { "epoch": 0.32, "learning_rate": 1.9705446877577896e-05, "loss": 0.8398, "step": 951 }, { "epoch": 0.32, "learning_rate": 1.9704582930437102e-05, "loss": 0.7837, "step": 952 }, { "epoch": 0.32, "learning_rate": 1.970371773713402e-05, "loss": 0.8354, "step": 953 }, { "epoch": 0.32, "learning_rate": 1.9702851297779755e-05, "loss": 0.9058, "step": 954 }, { "epoch": 0.32, "learning_rate": 1.970198361248556e-05, "loss": 0.7224, "step": 955 }, { "epoch": 0.32, "learning_rate": 1.970111468136286e-05, "loss": 0.7761, "step": 956 }, { "epoch": 0.32, "learning_rate": 1.970024450452323e-05, "loss": 0.7456, "step": 957 }, { "epoch": 0.32, "learning_rate": 1.9699373082078413e-05, "loss": 0.7256, "step": 958 }, { "epoch": 0.32, "learning_rate": 1.96985004141403e-05, "loss": 0.7419, "step": 959 }, { "epoch": 0.32, "learning_rate": 1.9697626500820966e-05, "loss": 0.752, "step": 960 }, { "epoch": 0.32, "learning_rate": 1.969675134223261e-05, "loss": 0.8037, "step": 961 }, { "epoch": 0.32, "learning_rate": 1.9695874938487628e-05, "loss": 0.8833, "step": 962 }, { "epoch": 0.32, "learning_rate": 1.9694997289698545e-05, "loss": 0.8652, "step": 963 }, { "epoch": 0.32, "learning_rate": 1.969411839597807e-05, "loss": 0.8076, "step": 964 }, { "epoch": 0.32, "learning_rate": 1.9693238257439056e-05, "loss": 0.8315, "step": 965 }, { "epoch": 0.32, "learning_rate": 1.969235687419452e-05, "loss": 0.7202, "step": 966 }, { "epoch": 0.32, "learning_rate": 1.9691474246357644e-05, "loss": 0.8154, "step": 967 }, { "epoch": 0.32, "learning_rate": 1.9690590374041767e-05, "loss": 0.7332, "step": 968 }, { "epoch": 0.32, "learning_rate": 1.9689705257360382e-05, "loss": 0.7515, "step": 969 }, { "epoch": 0.32, "learning_rate": 1.968881889642715e-05, "loss": 0.7915, "step": 970 }, { "epoch": 0.32, "learning_rate": 1.9687931291355888e-05, "loss": 0.8237, "step": 971 }, { "epoch": 0.32, "learning_rate": 1.9687042442260572e-05, "loss": 0.8525, "step": 972 }, { "epoch": 0.32, "learning_rate": 1.968615234925534e-05, "loss": 0.8569, "step": 973 }, { "epoch": 0.32, "learning_rate": 1.9685261012454484e-05, "loss": 0.8608, "step": 974 }, { "epoch": 0.32, "learning_rate": 1.9684368431972468e-05, "loss": 0.8569, "step": 975 }, { "epoch": 0.32, "learning_rate": 1.9683474607923902e-05, "loss": 0.7754, "step": 976 }, { "epoch": 0.32, "learning_rate": 1.9682579540423564e-05, "loss": 0.8228, "step": 977 }, { "epoch": 0.32, "learning_rate": 1.9681683229586388e-05, "loss": 0.7603, "step": 978 }, { "epoch": 0.32, "learning_rate": 1.968078567552747e-05, "loss": 0.8152, "step": 979 }, { "epoch": 0.33, "learning_rate": 1.9679886878362066e-05, "loss": 0.7695, "step": 980 }, { "epoch": 0.33, "learning_rate": 1.967898683820559e-05, "loss": 0.8081, "step": 981 }, { "epoch": 0.33, "learning_rate": 1.967808555517361e-05, "loss": 0.7549, "step": 982 }, { "epoch": 0.33, "learning_rate": 1.9677183029381867e-05, "loss": 0.8293, "step": 983 }, { "epoch": 0.33, "learning_rate": 1.967627926094625e-05, "loss": 0.8594, "step": 984 }, { "epoch": 0.33, "learning_rate": 1.9675374249982815e-05, "loss": 0.7876, "step": 985 }, { "epoch": 0.33, "learning_rate": 1.967446799660777e-05, "loss": 0.7527, "step": 986 }, { "epoch": 0.33, "learning_rate": 1.967356050093749e-05, "loss": 0.8159, "step": 987 }, { "epoch": 0.33, "learning_rate": 1.96726517630885e-05, "loss": 0.7429, "step": 988 }, { "epoch": 0.33, "learning_rate": 1.96717417831775e-05, "loss": 0.8535, "step": 989 }, { "epoch": 0.33, "learning_rate": 1.9670830561321335e-05, "loss": 0.8599, "step": 990 }, { "epoch": 0.33, "learning_rate": 1.966991809763702e-05, "loss": 0.8101, "step": 991 }, { "epoch": 0.33, "learning_rate": 1.9669004392241712e-05, "loss": 0.8403, "step": 992 }, { "epoch": 0.33, "learning_rate": 1.966808944525275e-05, "loss": 0.7593, "step": 993 }, { "epoch": 0.33, "learning_rate": 1.966717325678762e-05, "loss": 0.8633, "step": 994 }, { "epoch": 0.33, "learning_rate": 1.9666255826963967e-05, "loss": 0.8174, "step": 995 }, { "epoch": 0.33, "learning_rate": 1.96653371558996e-05, "loss": 0.772, "step": 996 }, { "epoch": 0.33, "learning_rate": 1.9664417243712484e-05, "loss": 0.8003, "step": 997 }, { "epoch": 0.33, "learning_rate": 1.966349609052075e-05, "loss": 0.8311, "step": 998 }, { "epoch": 0.33, "learning_rate": 1.9662573696442676e-05, "loss": 0.7036, "step": 999 }, { "epoch": 0.33, "learning_rate": 1.9661650061596705e-05, "loss": 0.8516, "step": 1000 }, { "epoch": 0.33, "learning_rate": 1.9660725186101448e-05, "loss": 0.8125, "step": 1001 }, { "epoch": 0.33, "learning_rate": 1.9659799070075664e-05, "loss": 0.8784, "step": 1002 }, { "epoch": 0.33, "learning_rate": 1.9658871713638275e-05, "loss": 0.8203, "step": 1003 }, { "epoch": 0.33, "learning_rate": 1.9657943116908363e-05, "loss": 0.7939, "step": 1004 }, { "epoch": 0.33, "learning_rate": 1.965701328000517e-05, "loss": 0.9009, "step": 1005 }, { "epoch": 0.33, "learning_rate": 1.9656082203048092e-05, "loss": 0.7791, "step": 1006 }, { "epoch": 0.33, "learning_rate": 1.9655149886156693e-05, "loss": 0.7856, "step": 1007 }, { "epoch": 0.33, "learning_rate": 1.965421632945069e-05, "loss": 0.8584, "step": 1008 }, { "epoch": 0.33, "learning_rate": 1.965328153304996e-05, "loss": 0.7837, "step": 1009 }, { "epoch": 0.34, "learning_rate": 1.965234549707454e-05, "loss": 0.7898, "step": 1010 }, { "epoch": 0.34, "learning_rate": 1.965140822164463e-05, "loss": 0.8252, "step": 1011 }, { "epoch": 0.34, "learning_rate": 1.9650469706880578e-05, "loss": 0.7974, "step": 1012 }, { "epoch": 0.34, "learning_rate": 1.9649529952902902e-05, "loss": 0.8252, "step": 1013 }, { "epoch": 0.34, "learning_rate": 1.9648588959832278e-05, "loss": 0.8359, "step": 1014 }, { "epoch": 0.34, "learning_rate": 1.9647646727789533e-05, "loss": 0.8267, "step": 1015 }, { "epoch": 0.34, "learning_rate": 1.9646703256895665e-05, "loss": 0.8311, "step": 1016 }, { "epoch": 0.34, "learning_rate": 1.9645758547271822e-05, "loss": 0.8472, "step": 1017 }, { "epoch": 0.34, "learning_rate": 1.964481259903931e-05, "loss": 0.711, "step": 1018 }, { "epoch": 0.34, "learning_rate": 1.9643865412319604e-05, "loss": 0.7422, "step": 1019 }, { "epoch": 0.34, "learning_rate": 1.9642916987234327e-05, "loss": 0.7847, "step": 1020 }, { "epoch": 0.34, "learning_rate": 1.964196732390527e-05, "loss": 0.8203, "step": 1021 }, { "epoch": 0.34, "learning_rate": 1.9641016422454377e-05, "loss": 0.8281, "step": 1022 }, { "epoch": 0.34, "learning_rate": 1.9640064283003754e-05, "loss": 0.8364, "step": 1023 }, { "epoch": 0.34, "learning_rate": 1.963911090567566e-05, "loss": 0.8232, "step": 1024 }, { "epoch": 0.34, "learning_rate": 1.9638156290592522e-05, "loss": 0.7559, "step": 1025 }, { "epoch": 0.34, "learning_rate": 1.9637200437876927e-05, "loss": 0.7637, "step": 1026 }, { "epoch": 0.34, "learning_rate": 1.963624334765161e-05, "loss": 0.8301, "step": 1027 }, { "epoch": 0.34, "learning_rate": 1.963528502003947e-05, "loss": 0.8105, "step": 1028 }, { "epoch": 0.34, "learning_rate": 1.9634325455163564e-05, "loss": 0.8047, "step": 1029 }, { "epoch": 0.34, "learning_rate": 1.963336465314711e-05, "loss": 0.7964, "step": 1030 }, { "epoch": 0.34, "learning_rate": 1.963240261411349e-05, "loss": 0.8438, "step": 1031 }, { "epoch": 0.34, "learning_rate": 1.9631439338186235e-05, "loss": 0.8599, "step": 1032 }, { "epoch": 0.34, "learning_rate": 1.963047482548904e-05, "loss": 0.8267, "step": 1033 }, { "epoch": 0.34, "learning_rate": 1.962950907614576e-05, "loss": 0.7739, "step": 1034 }, { "epoch": 0.34, "learning_rate": 1.96285420902804e-05, "loss": 0.8159, "step": 1035 }, { "epoch": 0.34, "learning_rate": 1.9627573868017133e-05, "loss": 0.8579, "step": 1036 }, { "epoch": 0.34, "learning_rate": 1.962660440948029e-05, "loss": 0.751, "step": 1037 }, { "epoch": 0.34, "learning_rate": 1.962563371479436e-05, "loss": 0.7974, "step": 1038 }, { "epoch": 0.34, "learning_rate": 1.9624661784083985e-05, "loss": 0.7368, "step": 1039 }, { "epoch": 0.35, "learning_rate": 1.9623688617473975e-05, "loss": 0.8325, "step": 1040 }, { "epoch": 0.35, "learning_rate": 1.962271421508929e-05, "loss": 0.832, "step": 1041 }, { "epoch": 0.35, "learning_rate": 1.9621738577055056e-05, "loss": 0.7983, "step": 1042 }, { "epoch": 0.35, "learning_rate": 1.962076170349655e-05, "loss": 0.7051, "step": 1043 }, { "epoch": 0.35, "learning_rate": 1.961978359453922e-05, "loss": 0.7798, "step": 1044 }, { "epoch": 0.35, "learning_rate": 1.9618804250308655e-05, "loss": 0.8403, "step": 1045 }, { "epoch": 0.35, "learning_rate": 1.9617823670930617e-05, "loss": 0.7822, "step": 1046 }, { "epoch": 0.35, "learning_rate": 1.9616841856531026e-05, "loss": 0.76, "step": 1047 }, { "epoch": 0.35, "learning_rate": 1.961585880723595e-05, "loss": 0.7812, "step": 1048 }, { "epoch": 0.35, "learning_rate": 1.961487452317162e-05, "loss": 0.7905, "step": 1049 }, { "epoch": 0.35, "learning_rate": 1.9613889004464438e-05, "loss": 0.7983, "step": 1050 }, { "epoch": 0.35, "learning_rate": 1.9612902251240945e-05, "loss": 0.7712, "step": 1051 }, { "epoch": 0.35, "learning_rate": 1.961191426362785e-05, "loss": 0.7852, "step": 1052 }, { "epoch": 0.35, "learning_rate": 1.961092504175203e-05, "loss": 0.7397, "step": 1053 }, { "epoch": 0.35, "learning_rate": 1.9609934585740498e-05, "loss": 0.752, "step": 1054 }, { "epoch": 0.35, "learning_rate": 1.9608942895720445e-05, "loss": 0.8296, "step": 1055 }, { "epoch": 0.35, "learning_rate": 1.960794997181921e-05, "loss": 0.7019, "step": 1056 }, { "epoch": 0.35, "learning_rate": 1.9606955814164295e-05, "loss": 0.7656, "step": 1057 }, { "epoch": 0.35, "learning_rate": 1.9605960422883366e-05, "loss": 0.7549, "step": 1058 }, { "epoch": 0.35, "learning_rate": 1.9604963798104233e-05, "loss": 0.835, "step": 1059 }, { "epoch": 0.35, "learning_rate": 1.9603965939954876e-05, "loss": 0.8442, "step": 1060 }, { "epoch": 0.35, "learning_rate": 1.9602966848563426e-05, "loss": 0.7529, "step": 1061 }, { "epoch": 0.35, "learning_rate": 1.9601966524058177e-05, "loss": 0.8452, "step": 1062 }, { "epoch": 0.35, "learning_rate": 1.9600964966567588e-05, "loss": 0.7964, "step": 1063 }, { "epoch": 0.35, "learning_rate": 1.9599962176220256e-05, "loss": 0.6797, "step": 1064 }, { "epoch": 0.35, "learning_rate": 1.9598958153144954e-05, "loss": 0.7295, "step": 1065 }, { "epoch": 0.35, "learning_rate": 1.9597952897470615e-05, "loss": 0.8301, "step": 1066 }, { "epoch": 0.35, "learning_rate": 1.9596946409326318e-05, "loss": 0.8052, "step": 1067 }, { "epoch": 0.35, "learning_rate": 1.95959386888413e-05, "loss": 0.7397, "step": 1068 }, { "epoch": 0.35, "learning_rate": 1.9594929736144978e-05, "loss": 0.6943, "step": 1069 }, { "epoch": 0.36, "learning_rate": 1.9593919551366892e-05, "loss": 0.6978, "step": 1070 }, { "epoch": 0.36, "learning_rate": 1.9592908134636773e-05, "loss": 0.8042, "step": 1071 }, { "epoch": 0.36, "learning_rate": 1.9591895486084494e-05, "loss": 0.7212, "step": 1072 }, { "epoch": 0.36, "learning_rate": 1.9590881605840086e-05, "loss": 0.8105, "step": 1073 }, { "epoch": 0.36, "learning_rate": 1.958986649403374e-05, "loss": 0.8354, "step": 1074 }, { "epoch": 0.36, "learning_rate": 1.958885015079581e-05, "loss": 0.8389, "step": 1075 }, { "epoch": 0.36, "learning_rate": 1.95878325762568e-05, "loss": 0.7842, "step": 1076 }, { "epoch": 0.36, "learning_rate": 1.9586813770547382e-05, "loss": 0.7529, "step": 1077 }, { "epoch": 0.36, "learning_rate": 1.958579373379838e-05, "loss": 0.8296, "step": 1078 }, { "epoch": 0.36, "learning_rate": 1.958477246614077e-05, "loss": 0.7739, "step": 1079 }, { "epoch": 0.36, "learning_rate": 1.95837499677057e-05, "loss": 0.8286, "step": 1080 }, { "epoch": 0.36, "learning_rate": 1.9582726238624464e-05, "loss": 0.7205, "step": 1081 }, { "epoch": 0.36, "learning_rate": 1.958170127902852e-05, "loss": 0.8311, "step": 1082 }, { "epoch": 0.36, "learning_rate": 1.9580675089049487e-05, "loss": 0.6711, "step": 1083 }, { "epoch": 0.36, "learning_rate": 1.957964766881913e-05, "loss": 0.8379, "step": 1084 }, { "epoch": 0.36, "learning_rate": 1.9578619018469385e-05, "loss": 0.7886, "step": 1085 }, { "epoch": 0.36, "learning_rate": 1.9577589138132338e-05, "loss": 0.8682, "step": 1086 }, { "epoch": 0.36, "learning_rate": 1.957655802794024e-05, "loss": 0.7964, "step": 1087 }, { "epoch": 0.36, "learning_rate": 1.9575525688025487e-05, "loss": 0.7959, "step": 1088 }, { "epoch": 0.36, "learning_rate": 1.957449211852065e-05, "loss": 0.6912, "step": 1089 }, { "epoch": 0.36, "learning_rate": 1.9573457319558447e-05, "loss": 0.7383, "step": 1090 }, { "epoch": 0.36, "learning_rate": 1.9572421291271754e-05, "loss": 0.8208, "step": 1091 }, { "epoch": 0.36, "learning_rate": 1.9571384033793606e-05, "loss": 0.8164, "step": 1092 }, { "epoch": 0.36, "learning_rate": 1.95703455472572e-05, "loss": 0.7712, "step": 1093 }, { "epoch": 0.36, "learning_rate": 1.9569305831795888e-05, "loss": 0.7256, "step": 1094 }, { "epoch": 0.36, "learning_rate": 1.956826488754318e-05, "loss": 0.7085, "step": 1095 }, { "epoch": 0.36, "learning_rate": 1.9567222714632737e-05, "loss": 0.835, "step": 1096 }, { "epoch": 0.36, "learning_rate": 1.9566179313198393e-05, "loss": 0.7891, "step": 1097 }, { "epoch": 0.36, "learning_rate": 1.9565134683374127e-05, "loss": 0.7954, "step": 1098 }, { "epoch": 0.36, "learning_rate": 1.9564088825294075e-05, "loss": 0.855, "step": 1099 }, { "epoch": 0.37, "learning_rate": 1.9563041739092545e-05, "loss": 0.7695, "step": 1100 }, { "epoch": 0.37, "learning_rate": 1.9561993424903982e-05, "loss": 0.7686, "step": 1101 }, { "epoch": 0.37, "learning_rate": 1.956094388286301e-05, "loss": 0.7642, "step": 1102 }, { "epoch": 0.37, "learning_rate": 1.955989311310439e-05, "loss": 0.7439, "step": 1103 }, { "epoch": 0.37, "learning_rate": 1.955884111576306e-05, "loss": 0.8496, "step": 1104 }, { "epoch": 0.37, "learning_rate": 1.95577878909741e-05, "loss": 0.709, "step": 1105 }, { "epoch": 0.37, "learning_rate": 1.9556733438872763e-05, "loss": 0.896, "step": 1106 }, { "epoch": 0.37, "learning_rate": 1.955567775959444e-05, "loss": 0.8247, "step": 1107 }, { "epoch": 0.37, "learning_rate": 1.9554620853274698e-05, "loss": 0.8164, "step": 1108 }, { "epoch": 0.37, "learning_rate": 1.9553562720049252e-05, "loss": 0.7388, "step": 1109 }, { "epoch": 0.37, "learning_rate": 1.9552503360053975e-05, "loss": 0.7864, "step": 1110 }, { "epoch": 0.37, "learning_rate": 1.9551442773424902e-05, "loss": 0.6926, "step": 1111 }, { "epoch": 0.37, "learning_rate": 1.9550380960298217e-05, "loss": 0.7783, "step": 1112 }, { "epoch": 0.37, "learning_rate": 1.9549317920810275e-05, "loss": 0.8667, "step": 1113 }, { "epoch": 0.37, "learning_rate": 1.9548253655097574e-05, "loss": 0.8647, "step": 1114 }, { "epoch": 0.37, "learning_rate": 1.954718816329678e-05, "loss": 0.6943, "step": 1115 }, { "epoch": 0.37, "learning_rate": 1.954612144554471e-05, "loss": 0.813, "step": 1116 }, { "epoch": 0.37, "learning_rate": 1.954505350197834e-05, "loss": 0.7908, "step": 1117 }, { "epoch": 0.37, "learning_rate": 1.954398433273481e-05, "loss": 0.8135, "step": 1118 }, { "epoch": 0.37, "learning_rate": 1.9542913937951405e-05, "loss": 0.8438, "step": 1119 }, { "epoch": 0.37, "learning_rate": 1.9541842317765577e-05, "loss": 0.8594, "step": 1120 }, { "epoch": 0.37, "learning_rate": 1.9540769472314932e-05, "loss": 0.8191, "step": 1121 }, { "epoch": 0.37, "learning_rate": 1.9539695401737235e-05, "loss": 0.8364, "step": 1122 }, { "epoch": 0.37, "learning_rate": 1.9538620106170404e-05, "loss": 0.8018, "step": 1123 }, { "epoch": 0.37, "learning_rate": 1.953754358575252e-05, "loss": 0.8672, "step": 1124 }, { "epoch": 0.37, "learning_rate": 1.953646584062182e-05, "loss": 0.8218, "step": 1125 }, { "epoch": 0.37, "learning_rate": 1.9535386870916692e-05, "loss": 0.8237, "step": 1126 }, { "epoch": 0.37, "learning_rate": 1.953430667677569e-05, "loss": 0.8384, "step": 1127 }, { "epoch": 0.37, "learning_rate": 1.953322525833752e-05, "loss": 0.6882, "step": 1128 }, { "epoch": 0.37, "learning_rate": 1.9532142615741047e-05, "loss": 0.8267, "step": 1129 }, { "epoch": 0.38, "learning_rate": 1.9531058749125295e-05, "loss": 0.7654, "step": 1130 }, { "epoch": 0.38, "learning_rate": 1.9529973658629435e-05, "loss": 0.813, "step": 1131 }, { "epoch": 0.38, "learning_rate": 1.9528887344392812e-05, "loss": 0.874, "step": 1132 }, { "epoch": 0.38, "learning_rate": 1.9527799806554917e-05, "loss": 0.8149, "step": 1133 }, { "epoch": 0.38, "learning_rate": 1.95267110452554e-05, "loss": 0.7881, "step": 1134 }, { "epoch": 0.38, "learning_rate": 1.9525621060634066e-05, "loss": 0.7822, "step": 1135 }, { "epoch": 0.38, "learning_rate": 1.9524529852830883e-05, "loss": 0.7739, "step": 1136 }, { "epoch": 0.38, "learning_rate": 1.9523437421985968e-05, "loss": 0.6904, "step": 1137 }, { "epoch": 0.38, "learning_rate": 1.9522343768239603e-05, "loss": 0.6565, "step": 1138 }, { "epoch": 0.38, "learning_rate": 1.9521248891732228e-05, "loss": 0.8564, "step": 1139 }, { "epoch": 0.38, "learning_rate": 1.9520152792604428e-05, "loss": 0.811, "step": 1140 }, { "epoch": 0.38, "learning_rate": 1.9519055470996954e-05, "loss": 0.7998, "step": 1141 }, { "epoch": 0.38, "learning_rate": 1.9517956927050718e-05, "loss": 0.8, "step": 1142 }, { "epoch": 0.38, "learning_rate": 1.9516857160906778e-05, "loss": 0.7441, "step": 1143 }, { "epoch": 0.38, "learning_rate": 1.951575617270636e-05, "loss": 0.8345, "step": 1144 }, { "epoch": 0.38, "learning_rate": 1.9514653962590833e-05, "loss": 0.79, "step": 1145 }, { "epoch": 0.38, "learning_rate": 1.951355053070174e-05, "loss": 0.753, "step": 1146 }, { "epoch": 0.38, "learning_rate": 1.951244587718077e-05, "loss": 0.6106, "step": 1147 }, { "epoch": 0.38, "learning_rate": 1.951134000216977e-05, "loss": 0.7573, "step": 1148 }, { "epoch": 0.38, "learning_rate": 1.951023290581074e-05, "loss": 0.8818, "step": 1149 }, { "epoch": 0.38, "learning_rate": 1.9509124588245854e-05, "loss": 0.696, "step": 1150 }, { "epoch": 0.38, "learning_rate": 1.9508015049617423e-05, "loss": 0.7998, "step": 1151 }, { "epoch": 0.38, "learning_rate": 1.9506904290067922e-05, "loss": 0.8145, "step": 1152 }, { "epoch": 0.38, "learning_rate": 1.9505792309739986e-05, "loss": 0.8003, "step": 1153 }, { "epoch": 0.38, "learning_rate": 1.95046791087764e-05, "loss": 0.7734, "step": 1154 }, { "epoch": 0.38, "learning_rate": 1.9503564687320117e-05, "loss": 0.811, "step": 1155 }, { "epoch": 0.38, "learning_rate": 1.950244904551423e-05, "loss": 0.8516, "step": 1156 }, { "epoch": 0.38, "learning_rate": 1.9501332183502007e-05, "loss": 0.6422, "step": 1157 }, { "epoch": 0.38, "learning_rate": 1.9500214101426857e-05, "loss": 0.7578, "step": 1158 }, { "epoch": 0.38, "learning_rate": 1.949909479943236e-05, "loss": 0.7383, "step": 1159 }, { "epoch": 0.38, "learning_rate": 1.9497974277662236e-05, "loss": 0.8467, "step": 1160 }, { "epoch": 0.39, "learning_rate": 1.949685253626038e-05, "loss": 0.769, "step": 1161 }, { "epoch": 0.39, "learning_rate": 1.9495729575370826e-05, "loss": 0.8066, "step": 1162 }, { "epoch": 0.39, "learning_rate": 1.9494605395137778e-05, "loss": 0.8184, "step": 1163 }, { "epoch": 0.39, "learning_rate": 1.9493479995705595e-05, "loss": 0.7715, "step": 1164 }, { "epoch": 0.39, "learning_rate": 1.949235337721878e-05, "loss": 0.7783, "step": 1165 }, { "epoch": 0.39, "learning_rate": 1.9491225539822008e-05, "loss": 0.7988, "step": 1166 }, { "epoch": 0.39, "learning_rate": 1.9490096483660103e-05, "loss": 0.8281, "step": 1167 }, { "epoch": 0.39, "learning_rate": 1.9488966208878043e-05, "loss": 0.856, "step": 1168 }, { "epoch": 0.39, "learning_rate": 1.9487834715620976e-05, "loss": 0.7688, "step": 1169 }, { "epoch": 0.39, "learning_rate": 1.9486702004034187e-05, "loss": 0.7866, "step": 1170 }, { "epoch": 0.39, "learning_rate": 1.9485568074263134e-05, "loss": 0.7944, "step": 1171 }, { "epoch": 0.39, "learning_rate": 1.9484432926453416e-05, "loss": 0.8569, "step": 1172 }, { "epoch": 0.39, "learning_rate": 1.9483296560750803e-05, "loss": 0.7544, "step": 1173 }, { "epoch": 0.39, "learning_rate": 1.9482158977301217e-05, "loss": 0.7793, "step": 1174 }, { "epoch": 0.39, "learning_rate": 1.948102017625073e-05, "loss": 0.9102, "step": 1175 }, { "epoch": 0.39, "learning_rate": 1.947988015774558e-05, "loss": 0.6687, "step": 1176 }, { "epoch": 0.39, "learning_rate": 1.9478738921932155e-05, "loss": 0.7529, "step": 1177 }, { "epoch": 0.39, "learning_rate": 1.9477596468956993e-05, "loss": 0.7729, "step": 1178 }, { "epoch": 0.39, "learning_rate": 1.9476452798966807e-05, "loss": 0.8271, "step": 1179 }, { "epoch": 0.39, "learning_rate": 1.947530791210845e-05, "loss": 0.7656, "step": 1180 }, { "epoch": 0.39, "learning_rate": 1.9474161808528938e-05, "loss": 0.7571, "step": 1181 }, { "epoch": 0.39, "learning_rate": 1.9473014488375436e-05, "loss": 0.729, "step": 1182 }, { "epoch": 0.39, "learning_rate": 1.947186595179528e-05, "loss": 0.8726, "step": 1183 }, { "epoch": 0.39, "learning_rate": 1.947071619893595e-05, "loss": 0.8096, "step": 1184 }, { "epoch": 0.39, "learning_rate": 1.9469565229945083e-05, "loss": 0.7993, "step": 1185 }, { "epoch": 0.39, "learning_rate": 1.9468413044970477e-05, "loss": 0.7861, "step": 1186 }, { "epoch": 0.39, "learning_rate": 1.946725964416008e-05, "loss": 0.7842, "step": 1187 }, { "epoch": 0.39, "learning_rate": 1.9466105027662006e-05, "loss": 0.7539, "step": 1188 }, { "epoch": 0.39, "learning_rate": 1.946494919562451e-05, "loss": 0.7969, "step": 1189 }, { "epoch": 0.39, "learning_rate": 1.9463792148196027e-05, "loss": 0.7192, "step": 1190 }, { "epoch": 0.4, "learning_rate": 1.9462633885525115e-05, "loss": 0.8086, "step": 1191 }, { "epoch": 0.4, "learning_rate": 1.9461474407760518e-05, "loss": 0.8511, "step": 1192 }, { "epoch": 0.4, "learning_rate": 1.9460313715051122e-05, "loss": 0.8223, "step": 1193 }, { "epoch": 0.4, "learning_rate": 1.9459151807545966e-05, "loss": 0.7949, "step": 1194 }, { "epoch": 0.4, "learning_rate": 1.945798868539426e-05, "loss": 0.8325, "step": 1195 }, { "epoch": 0.4, "learning_rate": 1.945682434874535e-05, "loss": 0.8403, "step": 1196 }, { "epoch": 0.4, "learning_rate": 1.9455658797748755e-05, "loss": 0.8213, "step": 1197 }, { "epoch": 0.4, "learning_rate": 1.945449203255414e-05, "loss": 0.8057, "step": 1198 }, { "epoch": 0.4, "learning_rate": 1.9453324053311327e-05, "loss": 0.7783, "step": 1199 }, { "epoch": 0.4, "learning_rate": 1.94521548601703e-05, "loss": 0.7808, "step": 1200 }, { "epoch": 0.4, "learning_rate": 1.945098445328119e-05, "loss": 0.7305, "step": 1201 }, { "epoch": 0.4, "learning_rate": 1.9449812832794297e-05, "loss": 0.7248, "step": 1202 }, { "epoch": 0.4, "learning_rate": 1.9448639998860058e-05, "loss": 0.7686, "step": 1203 }, { "epoch": 0.4, "learning_rate": 1.9447465951629082e-05, "loss": 0.7896, "step": 1204 }, { "epoch": 0.4, "learning_rate": 1.944629069125213e-05, "loss": 0.8213, "step": 1205 }, { "epoch": 0.4, "learning_rate": 1.9445114217880113e-05, "loss": 0.8022, "step": 1206 }, { "epoch": 0.4, "learning_rate": 1.94439365316641e-05, "loss": 0.7803, "step": 1207 }, { "epoch": 0.4, "learning_rate": 1.944275763275532e-05, "loss": 0.7407, "step": 1208 }, { "epoch": 0.4, "learning_rate": 1.9441577521305158e-05, "loss": 0.7725, "step": 1209 }, { "epoch": 0.4, "learning_rate": 1.9440396197465146e-05, "loss": 0.8555, "step": 1210 }, { "epoch": 0.4, "learning_rate": 1.9439213661386982e-05, "loss": 0.7676, "step": 1211 }, { "epoch": 0.4, "learning_rate": 1.943802991322251e-05, "loss": 0.8062, "step": 1212 }, { "epoch": 0.4, "learning_rate": 1.943684495312374e-05, "loss": 0.7549, "step": 1213 }, { "epoch": 0.4, "learning_rate": 1.9435658781242827e-05, "loss": 0.7705, "step": 1214 }, { "epoch": 0.4, "learning_rate": 1.9434471397732092e-05, "loss": 0.7905, "step": 1215 }, { "epoch": 0.4, "learning_rate": 1.9433282802744003e-05, "loss": 0.8696, "step": 1216 }, { "epoch": 0.4, "learning_rate": 1.9432092996431193e-05, "loss": 0.752, "step": 1217 }, { "epoch": 0.4, "learning_rate": 1.9430901978946436e-05, "loss": 0.7617, "step": 1218 }, { "epoch": 0.4, "learning_rate": 1.9429709750442676e-05, "loss": 0.7632, "step": 1219 }, { "epoch": 0.4, "learning_rate": 1.9428516311073005e-05, "loss": 0.7739, "step": 1220 }, { "epoch": 0.41, "learning_rate": 1.942732166099067e-05, "loss": 0.833, "step": 1221 }, { "epoch": 0.41, "learning_rate": 1.942612580034908e-05, "loss": 0.8022, "step": 1222 }, { "epoch": 0.41, "learning_rate": 1.9424928729301792e-05, "loss": 0.8354, "step": 1223 }, { "epoch": 0.41, "learning_rate": 1.9423730448002526e-05, "loss": 0.8203, "step": 1224 }, { "epoch": 0.41, "learning_rate": 1.9422530956605144e-05, "loss": 0.8242, "step": 1225 }, { "epoch": 0.41, "learning_rate": 1.942133025526368e-05, "loss": 0.7922, "step": 1226 }, { "epoch": 0.41, "learning_rate": 1.9420128344132313e-05, "loss": 0.749, "step": 1227 }, { "epoch": 0.41, "learning_rate": 1.941892522336538e-05, "loss": 0.7944, "step": 1228 }, { "epoch": 0.41, "learning_rate": 1.941772089311737e-05, "loss": 0.8374, "step": 1229 }, { "epoch": 0.41, "learning_rate": 1.9416515353542944e-05, "loss": 0.7671, "step": 1230 }, { "epoch": 0.41, "learning_rate": 1.9415308604796885e-05, "loss": 0.7451, "step": 1231 }, { "epoch": 0.41, "learning_rate": 1.9414100647034165e-05, "loss": 0.7769, "step": 1232 }, { "epoch": 0.41, "learning_rate": 1.9412891480409895e-05, "loss": 0.8457, "step": 1233 }, { "epoch": 0.41, "learning_rate": 1.9411681105079344e-05, "loss": 0.8057, "step": 1234 }, { "epoch": 0.41, "learning_rate": 1.941046952119793e-05, "loss": 0.8677, "step": 1235 }, { "epoch": 0.41, "learning_rate": 1.940925672892124e-05, "loss": 0.811, "step": 1236 }, { "epoch": 0.41, "learning_rate": 1.9408042728405e-05, "loss": 0.8374, "step": 1237 }, { "epoch": 0.41, "learning_rate": 1.9406827519805108e-05, "loss": 0.8638, "step": 1238 }, { "epoch": 0.41, "learning_rate": 1.9405611103277605e-05, "loss": 0.8149, "step": 1239 }, { "epoch": 0.41, "learning_rate": 1.9404393478978688e-05, "loss": 0.8755, "step": 1240 }, { "epoch": 0.41, "learning_rate": 1.9403174647064718e-05, "loss": 0.7334, "step": 1241 }, { "epoch": 0.41, "learning_rate": 1.94019546076922e-05, "loss": 0.7637, "step": 1242 }, { "epoch": 0.41, "learning_rate": 1.9400733361017798e-05, "loss": 0.7437, "step": 1243 }, { "epoch": 0.41, "learning_rate": 1.9399510907198335e-05, "loss": 0.7808, "step": 1244 }, { "epoch": 0.41, "learning_rate": 1.9398287246390783e-05, "loss": 0.812, "step": 1245 }, { "epoch": 0.41, "learning_rate": 1.939706237875227e-05, "loss": 0.8188, "step": 1246 }, { "epoch": 0.41, "learning_rate": 1.939583630444009e-05, "loss": 0.8647, "step": 1247 }, { "epoch": 0.41, "learning_rate": 1.9394609023611674e-05, "loss": 0.7256, "step": 1248 }, { "epoch": 0.41, "learning_rate": 1.939338053642462e-05, "loss": 0.7305, "step": 1249 }, { "epoch": 0.41, "learning_rate": 1.939215084303668e-05, "loss": 0.707, "step": 1250 }, { "epoch": 0.42, "learning_rate": 1.939091994360575e-05, "loss": 0.6714, "step": 1251 }, { "epoch": 0.42, "learning_rate": 1.9389687838289903e-05, "loss": 0.7778, "step": 1252 }, { "epoch": 0.42, "learning_rate": 1.938845452724734e-05, "loss": 0.8081, "step": 1253 }, { "epoch": 0.42, "learning_rate": 1.9387220010636438e-05, "loss": 0.8438, "step": 1254 }, { "epoch": 0.42, "learning_rate": 1.938598428861572e-05, "loss": 0.7905, "step": 1255 }, { "epoch": 0.42, "learning_rate": 1.938474736134386e-05, "loss": 0.7961, "step": 1256 }, { "epoch": 0.42, "learning_rate": 1.9383509228979697e-05, "loss": 0.729, "step": 1257 }, { "epoch": 0.42, "learning_rate": 1.9382269891682216e-05, "loss": 0.8491, "step": 1258 }, { "epoch": 0.42, "learning_rate": 1.9381029349610558e-05, "loss": 0.7432, "step": 1259 }, { "epoch": 0.42, "learning_rate": 1.9379787602924023e-05, "loss": 0.8032, "step": 1260 }, { "epoch": 0.42, "learning_rate": 1.9378544651782066e-05, "loss": 0.7012, "step": 1261 }, { "epoch": 0.42, "learning_rate": 1.937730049634429e-05, "loss": 0.8252, "step": 1262 }, { "epoch": 0.42, "learning_rate": 1.937605513677046e-05, "loss": 0.8291, "step": 1263 }, { "epoch": 0.42, "learning_rate": 1.937480857322049e-05, "loss": 0.7493, "step": 1264 }, { "epoch": 0.42, "learning_rate": 1.937356080585445e-05, "loss": 0.7646, "step": 1265 }, { "epoch": 0.42, "learning_rate": 1.9372311834832567e-05, "loss": 0.8037, "step": 1266 }, { "epoch": 0.42, "learning_rate": 1.9371061660315222e-05, "loss": 0.8418, "step": 1267 }, { "epoch": 0.42, "learning_rate": 1.9369810282462943e-05, "loss": 0.7744, "step": 1268 }, { "epoch": 0.42, "learning_rate": 1.936855770143643e-05, "loss": 0.7668, "step": 1269 }, { "epoch": 0.42, "learning_rate": 1.9367303917396515e-05, "loss": 0.6978, "step": 1270 }, { "epoch": 0.42, "learning_rate": 1.9366048930504204e-05, "loss": 0.7451, "step": 1271 }, { "epoch": 0.42, "learning_rate": 1.9364792740920647e-05, "loss": 0.7529, "step": 1272 }, { "epoch": 0.42, "learning_rate": 1.936353534880715e-05, "loss": 0.7419, "step": 1273 }, { "epoch": 0.42, "learning_rate": 1.9362276754325175e-05, "loss": 0.855, "step": 1274 }, { "epoch": 0.42, "learning_rate": 1.9361016957636338e-05, "loss": 0.8252, "step": 1275 }, { "epoch": 0.42, "learning_rate": 1.935975595890241e-05, "loss": 0.7759, "step": 1276 }, { "epoch": 0.42, "learning_rate": 1.935849375828531e-05, "loss": 0.7734, "step": 1277 }, { "epoch": 0.42, "learning_rate": 1.9357230355947124e-05, "loss": 0.7231, "step": 1278 }, { "epoch": 0.42, "learning_rate": 1.9355965752050084e-05, "loss": 0.7263, "step": 1279 }, { "epoch": 0.42, "learning_rate": 1.9354699946756576e-05, "loss": 0.8218, "step": 1280 }, { "epoch": 0.43, "learning_rate": 1.9353432940229134e-05, "loss": 0.8311, "step": 1281 }, { "epoch": 0.43, "learning_rate": 1.9352164732630467e-05, "loss": 0.7515, "step": 1282 }, { "epoch": 0.43, "learning_rate": 1.935089532412342e-05, "loss": 0.7974, "step": 1283 }, { "epoch": 0.43, "learning_rate": 1.9349624714870994e-05, "loss": 0.7671, "step": 1284 }, { "epoch": 0.43, "learning_rate": 1.934835290503635e-05, "loss": 0.8535, "step": 1285 }, { "epoch": 0.43, "learning_rate": 1.93470798947828e-05, "loss": 0.7451, "step": 1286 }, { "epoch": 0.43, "learning_rate": 1.9345805684273812e-05, "loss": 0.7974, "step": 1287 }, { "epoch": 0.43, "learning_rate": 1.9344530273673008e-05, "loss": 0.7202, "step": 1288 }, { "epoch": 0.43, "learning_rate": 1.934325366314416e-05, "loss": 0.7441, "step": 1289 }, { "epoch": 0.43, "learning_rate": 1.9341975852851197e-05, "loss": 0.771, "step": 1290 }, { "epoch": 0.43, "learning_rate": 1.9340696842958206e-05, "loss": 0.7754, "step": 1291 }, { "epoch": 0.43, "learning_rate": 1.9339416633629424e-05, "loss": 0.7334, "step": 1292 }, { "epoch": 0.43, "learning_rate": 1.9338135225029237e-05, "loss": 0.729, "step": 1293 }, { "epoch": 0.43, "learning_rate": 1.9336852617322196e-05, "loss": 0.8027, "step": 1294 }, { "epoch": 0.43, "learning_rate": 1.9335568810672995e-05, "loss": 0.7988, "step": 1295 }, { "epoch": 0.43, "learning_rate": 1.933428380524649e-05, "loss": 0.7397, "step": 1296 }, { "epoch": 0.43, "learning_rate": 1.933299760120769e-05, "loss": 0.6548, "step": 1297 }, { "epoch": 0.43, "learning_rate": 1.9331710198721753e-05, "loss": 0.7915, "step": 1298 }, { "epoch": 0.43, "learning_rate": 1.9330421597953996e-05, "loss": 0.772, "step": 1299 }, { "epoch": 0.43, "learning_rate": 1.9329131799069885e-05, "loss": 0.7593, "step": 1300 }, { "epoch": 0.43, "learning_rate": 1.9327840802235046e-05, "loss": 0.7837, "step": 1301 }, { "epoch": 0.43, "learning_rate": 1.9326548607615256e-05, "loss": 0.8159, "step": 1302 }, { "epoch": 0.43, "learning_rate": 1.9325255215376443e-05, "loss": 0.8491, "step": 1303 }, { "epoch": 0.43, "learning_rate": 1.932396062568469e-05, "loss": 0.7915, "step": 1304 }, { "epoch": 0.43, "learning_rate": 1.9322664838706233e-05, "loss": 0.7256, "step": 1305 }, { "epoch": 0.43, "learning_rate": 1.9321367854607472e-05, "loss": 0.6592, "step": 1306 }, { "epoch": 0.43, "learning_rate": 1.9320069673554945e-05, "loss": 0.8188, "step": 1307 }, { "epoch": 0.43, "learning_rate": 1.931877029571535e-05, "loss": 0.8115, "step": 1308 }, { "epoch": 0.43, "learning_rate": 1.9317469721255545e-05, "loss": 0.7734, "step": 1309 }, { "epoch": 0.43, "learning_rate": 1.9316167950342533e-05, "loss": 0.7783, "step": 1310 }, { "epoch": 0.44, "learning_rate": 1.9314864983143476e-05, "loss": 0.781, "step": 1311 }, { "epoch": 0.44, "learning_rate": 1.9313560819825688e-05, "loss": 0.738, "step": 1312 }, { "epoch": 0.44, "learning_rate": 1.9312255460556633e-05, "loss": 0.7988, "step": 1313 }, { "epoch": 0.44, "learning_rate": 1.9310948905503936e-05, "loss": 0.7441, "step": 1314 }, { "epoch": 0.44, "learning_rate": 1.9309641154835367e-05, "loss": 0.7124, "step": 1315 }, { "epoch": 0.44, "learning_rate": 1.9308332208718856e-05, "loss": 0.7861, "step": 1316 }, { "epoch": 0.44, "learning_rate": 1.9307022067322484e-05, "loss": 0.8037, "step": 1317 }, { "epoch": 0.44, "learning_rate": 1.9305710730814485e-05, "loss": 0.7886, "step": 1318 }, { "epoch": 0.44, "learning_rate": 1.930439819936325e-05, "loss": 0.7783, "step": 1319 }, { "epoch": 0.44, "learning_rate": 1.9303084473137318e-05, "loss": 0.8081, "step": 1320 }, { "epoch": 0.44, "learning_rate": 1.9301769552305384e-05, "loss": 0.8413, "step": 1321 }, { "epoch": 0.44, "learning_rate": 1.9300453437036296e-05, "loss": 0.7983, "step": 1322 }, { "epoch": 0.44, "learning_rate": 1.929913612749906e-05, "loss": 0.7905, "step": 1323 }, { "epoch": 0.44, "learning_rate": 1.929781762386283e-05, "loss": 0.8452, "step": 1324 }, { "epoch": 0.44, "learning_rate": 1.9296497926296914e-05, "loss": 0.7588, "step": 1325 }, { "epoch": 0.44, "learning_rate": 1.929517703497077e-05, "loss": 0.6628, "step": 1326 }, { "epoch": 0.44, "learning_rate": 1.9293854950054016e-05, "loss": 0.8047, "step": 1327 }, { "epoch": 0.44, "learning_rate": 1.9292531671716423e-05, "loss": 0.8325, "step": 1328 }, { "epoch": 0.44, "learning_rate": 1.929120720012791e-05, "loss": 0.8145, "step": 1329 }, { "epoch": 0.44, "learning_rate": 1.9289881535458553e-05, "loss": 0.895, "step": 1330 }, { "epoch": 0.44, "learning_rate": 1.9288554677878578e-05, "loss": 0.7979, "step": 1331 }, { "epoch": 0.44, "learning_rate": 1.9287226627558374e-05, "loss": 0.7651, "step": 1332 }, { "epoch": 0.44, "learning_rate": 1.9285897384668466e-05, "loss": 0.7419, "step": 1333 }, { "epoch": 0.44, "learning_rate": 1.9284566949379547e-05, "loss": 0.8022, "step": 1334 }, { "epoch": 0.44, "learning_rate": 1.928323532186245e-05, "loss": 0.8125, "step": 1335 }, { "epoch": 0.44, "learning_rate": 1.928190250228818e-05, "loss": 0.7881, "step": 1336 }, { "epoch": 0.44, "learning_rate": 1.928056849082788e-05, "loss": 0.7905, "step": 1337 }, { "epoch": 0.44, "learning_rate": 1.9279233287652847e-05, "loss": 0.8452, "step": 1338 }, { "epoch": 0.44, "learning_rate": 1.9277896892934536e-05, "loss": 0.7979, "step": 1339 }, { "epoch": 0.44, "learning_rate": 1.9276559306844552e-05, "loss": 0.741, "step": 1340 }, { "epoch": 0.45, "learning_rate": 1.9275220529554657e-05, "loss": 0.7886, "step": 1341 }, { "epoch": 0.45, "learning_rate": 1.9273880561236757e-05, "loss": 0.8315, "step": 1342 }, { "epoch": 0.45, "learning_rate": 1.9272539402062924e-05, "loss": 0.792, "step": 1343 }, { "epoch": 0.45, "learning_rate": 1.9271197052205373e-05, "loss": 0.8613, "step": 1344 }, { "epoch": 0.45, "learning_rate": 1.9269853511836475e-05, "loss": 0.8066, "step": 1345 }, { "epoch": 0.45, "learning_rate": 1.926850878112875e-05, "loss": 0.8188, "step": 1346 }, { "epoch": 0.45, "learning_rate": 1.9267162860254878e-05, "loss": 0.7354, "step": 1347 }, { "epoch": 0.45, "learning_rate": 1.9265815749387682e-05, "loss": 0.8096, "step": 1348 }, { "epoch": 0.45, "learning_rate": 1.926446744870016e-05, "loss": 0.7393, "step": 1349 }, { "epoch": 0.45, "learning_rate": 1.9263117958365428e-05, "loss": 0.7651, "step": 1350 }, { "epoch": 0.45, "learning_rate": 1.9261767278556785e-05, "loss": 0.7449, "step": 1351 }, { "epoch": 0.45, "learning_rate": 1.926041540944767e-05, "loss": 0.7461, "step": 1352 }, { "epoch": 0.45, "learning_rate": 1.9259062351211674e-05, "loss": 0.8335, "step": 1353 }, { "epoch": 0.45, "learning_rate": 1.925770810402254e-05, "loss": 0.7593, "step": 1354 }, { "epoch": 0.45, "learning_rate": 1.9256352668054173e-05, "loss": 0.7808, "step": 1355 }, { "epoch": 0.45, "learning_rate": 1.9254996043480617e-05, "loss": 0.7793, "step": 1356 }, { "epoch": 0.45, "learning_rate": 1.925363823047608e-05, "loss": 0.7769, "step": 1357 }, { "epoch": 0.45, "learning_rate": 1.925227922921492e-05, "loss": 0.8569, "step": 1358 }, { "epoch": 0.45, "learning_rate": 1.9250919039871644e-05, "loss": 0.6157, "step": 1359 }, { "epoch": 0.45, "learning_rate": 1.924955766262091e-05, "loss": 0.7612, "step": 1360 }, { "epoch": 0.45, "learning_rate": 1.9248195097637538e-05, "loss": 0.8799, "step": 1361 }, { "epoch": 0.45, "learning_rate": 1.924683134509649e-05, "loss": 0.8232, "step": 1362 }, { "epoch": 0.45, "learning_rate": 1.9245466405172885e-05, "loss": 0.7583, "step": 1363 }, { "epoch": 0.45, "learning_rate": 1.9244100278041998e-05, "loss": 0.8091, "step": 1364 }, { "epoch": 0.45, "learning_rate": 1.9242732963879254e-05, "loss": 0.739, "step": 1365 }, { "epoch": 0.45, "learning_rate": 1.9241364462860222e-05, "loss": 0.8228, "step": 1366 }, { "epoch": 0.45, "learning_rate": 1.923999477516064e-05, "loss": 0.7695, "step": 1367 }, { "epoch": 0.45, "learning_rate": 1.9238623900956378e-05, "loss": 0.769, "step": 1368 }, { "epoch": 0.45, "learning_rate": 1.923725184042348e-05, "loss": 0.77, "step": 1369 }, { "epoch": 0.45, "learning_rate": 1.9235878593738125e-05, "loss": 0.7341, "step": 1370 }, { "epoch": 0.46, "learning_rate": 1.9234504161076655e-05, "loss": 0.7202, "step": 1371 }, { "epoch": 0.46, "learning_rate": 1.9233128542615558e-05, "loss": 0.78, "step": 1372 }, { "epoch": 0.46, "learning_rate": 1.923175173853148e-05, "loss": 0.8076, "step": 1373 }, { "epoch": 0.46, "learning_rate": 1.923037374900121e-05, "loss": 0.7, "step": 1374 }, { "epoch": 0.46, "learning_rate": 1.9228994574201706e-05, "loss": 0.8174, "step": 1375 }, { "epoch": 0.46, "learning_rate": 1.9227614214310053e-05, "loss": 0.7378, "step": 1376 }, { "epoch": 0.46, "learning_rate": 1.9226232669503515e-05, "loss": 0.7595, "step": 1377 }, { "epoch": 0.46, "learning_rate": 1.922484993995949e-05, "loss": 0.7661, "step": 1378 }, { "epoch": 0.46, "learning_rate": 1.9223466025855532e-05, "loss": 0.7622, "step": 1379 }, { "epoch": 0.46, "learning_rate": 1.9222080927369352e-05, "loss": 0.7729, "step": 1380 }, { "epoch": 0.46, "learning_rate": 1.9220694644678813e-05, "loss": 0.811, "step": 1381 }, { "epoch": 0.46, "learning_rate": 1.921930717796192e-05, "loss": 0.7354, "step": 1382 }, { "epoch": 0.46, "learning_rate": 1.9217918527396842e-05, "loss": 0.7415, "step": 1383 }, { "epoch": 0.46, "learning_rate": 1.9216528693161892e-05, "loss": 0.7759, "step": 1384 }, { "epoch": 0.46, "learning_rate": 1.9215137675435544e-05, "loss": 0.8369, "step": 1385 }, { "epoch": 0.46, "learning_rate": 1.9213745474396413e-05, "loss": 0.7847, "step": 1386 }, { "epoch": 0.46, "learning_rate": 1.921235209022327e-05, "loss": 0.7954, "step": 1387 }, { "epoch": 0.46, "learning_rate": 1.9210957523095044e-05, "loss": 0.7649, "step": 1388 }, { "epoch": 0.46, "learning_rate": 1.9209561773190807e-05, "loss": 0.7358, "step": 1389 }, { "epoch": 0.46, "learning_rate": 1.9208164840689787e-05, "loss": 0.8164, "step": 1390 }, { "epoch": 0.46, "learning_rate": 1.9206766725771366e-05, "loss": 0.6758, "step": 1391 }, { "epoch": 0.46, "learning_rate": 1.9205367428615074e-05, "loss": 0.7563, "step": 1392 }, { "epoch": 0.46, "learning_rate": 1.9203966949400597e-05, "loss": 0.769, "step": 1393 }, { "epoch": 0.46, "learning_rate": 1.9202565288307764e-05, "loss": 0.7808, "step": 1394 }, { "epoch": 0.46, "learning_rate": 1.9201162445516567e-05, "loss": 0.856, "step": 1395 }, { "epoch": 0.46, "learning_rate": 1.9199758421207137e-05, "loss": 0.7471, "step": 1396 }, { "epoch": 0.46, "learning_rate": 1.919835321555978e-05, "loss": 0.7205, "step": 1397 }, { "epoch": 0.46, "learning_rate": 1.919694682875492e-05, "loss": 0.7832, "step": 1398 }, { "epoch": 0.46, "learning_rate": 1.9195539260973165e-05, "loss": 0.7988, "step": 1399 }, { "epoch": 0.46, "learning_rate": 1.919413051239525e-05, "loss": 0.6826, "step": 1400 }, { "epoch": 0.46, "learning_rate": 1.9192720583202077e-05, "loss": 0.8022, "step": 1401 }, { "epoch": 0.47, "learning_rate": 1.9191309473574693e-05, "loss": 0.8579, "step": 1402 }, { "epoch": 0.47, "learning_rate": 1.9189897183694296e-05, "loss": 0.7139, "step": 1403 }, { "epoch": 0.47, "learning_rate": 1.9188483713742248e-05, "loss": 0.7886, "step": 1404 }, { "epoch": 0.47, "learning_rate": 1.918706906390004e-05, "loss": 0.7314, "step": 1405 }, { "epoch": 0.47, "learning_rate": 1.9185653234349334e-05, "loss": 0.7734, "step": 1406 }, { "epoch": 0.47, "learning_rate": 1.918423622527193e-05, "loss": 0.7905, "step": 1407 }, { "epoch": 0.47, "learning_rate": 1.918281803684979e-05, "loss": 0.7578, "step": 1408 }, { "epoch": 0.47, "learning_rate": 1.9181398669265025e-05, "loss": 0.8076, "step": 1409 }, { "epoch": 0.47, "learning_rate": 1.917997812269989e-05, "loss": 0.8359, "step": 1410 }, { "epoch": 0.47, "learning_rate": 1.91785563973368e-05, "loss": 0.8662, "step": 1411 }, { "epoch": 0.47, "learning_rate": 1.917713349335832e-05, "loss": 0.8242, "step": 1412 }, { "epoch": 0.47, "learning_rate": 1.917570941094716e-05, "loss": 0.7988, "step": 1413 }, { "epoch": 0.47, "learning_rate": 1.917428415028619e-05, "loss": 0.8164, "step": 1414 }, { "epoch": 0.47, "learning_rate": 1.9172857711558426e-05, "loss": 0.7207, "step": 1415 }, { "epoch": 0.47, "learning_rate": 1.9171430094947036e-05, "loss": 0.6982, "step": 1416 }, { "epoch": 0.47, "learning_rate": 1.917000130063534e-05, "loss": 0.7998, "step": 1417 }, { "epoch": 0.47, "learning_rate": 1.916857132880681e-05, "loss": 0.7886, "step": 1418 }, { "epoch": 0.47, "learning_rate": 1.9167140179645067e-05, "loss": 0.7612, "step": 1419 }, { "epoch": 0.47, "learning_rate": 1.9165707853333884e-05, "loss": 0.7576, "step": 1420 }, { "epoch": 0.47, "learning_rate": 1.9164274350057187e-05, "loss": 0.8066, "step": 1421 }, { "epoch": 0.47, "learning_rate": 1.9162839669999052e-05, "loss": 0.7983, "step": 1422 }, { "epoch": 0.47, "learning_rate": 1.9161403813343703e-05, "loss": 0.731, "step": 1423 }, { "epoch": 0.47, "learning_rate": 1.9159966780275523e-05, "loss": 0.6511, "step": 1424 }, { "epoch": 0.47, "learning_rate": 1.9158528570979034e-05, "loss": 0.8804, "step": 1425 }, { "epoch": 0.47, "learning_rate": 1.9157089185638922e-05, "loss": 0.7246, "step": 1426 }, { "epoch": 0.47, "learning_rate": 1.9155648624440012e-05, "loss": 0.7598, "step": 1427 }, { "epoch": 0.47, "learning_rate": 1.9154206887567292e-05, "loss": 0.731, "step": 1428 }, { "epoch": 0.47, "learning_rate": 1.9152763975205894e-05, "loss": 0.7729, "step": 1429 }, { "epoch": 0.47, "learning_rate": 1.91513198875411e-05, "loss": 0.707, "step": 1430 }, { "epoch": 0.47, "learning_rate": 1.9149874624758342e-05, "loss": 0.7197, "step": 1431 }, { "epoch": 0.48, "learning_rate": 1.9148428187043212e-05, "loss": 0.8398, "step": 1432 }, { "epoch": 0.48, "learning_rate": 1.9146980574581446e-05, "loss": 0.7341, "step": 1433 }, { "epoch": 0.48, "learning_rate": 1.914553178755892e-05, "loss": 0.801, "step": 1434 }, { "epoch": 0.48, "learning_rate": 1.9144081826161693e-05, "loss": 0.8477, "step": 1435 }, { "epoch": 0.48, "learning_rate": 1.9142630690575934e-05, "loss": 0.7974, "step": 1436 }, { "epoch": 0.48, "learning_rate": 1.9141178380987995e-05, "loss": 0.79, "step": 1437 }, { "epoch": 0.48, "learning_rate": 1.913972489758436e-05, "loss": 0.8037, "step": 1438 }, { "epoch": 0.48, "learning_rate": 1.9138270240551676e-05, "loss": 0.8252, "step": 1439 }, { "epoch": 0.48, "learning_rate": 1.913681441007673e-05, "loss": 0.7014, "step": 1440 }, { "epoch": 0.48, "learning_rate": 1.9135357406346467e-05, "loss": 0.8311, "step": 1441 }, { "epoch": 0.48, "learning_rate": 1.9133899229547982e-05, "loss": 0.6879, "step": 1442 }, { "epoch": 0.48, "learning_rate": 1.9132439879868515e-05, "loss": 0.7148, "step": 1443 }, { "epoch": 0.48, "learning_rate": 1.9130979357495462e-05, "loss": 0.7466, "step": 1444 }, { "epoch": 0.48, "learning_rate": 1.9129517662616372e-05, "loss": 0.7271, "step": 1445 }, { "epoch": 0.48, "learning_rate": 1.9128054795418935e-05, "loss": 0.79, "step": 1446 }, { "epoch": 0.48, "learning_rate": 1.9126590756091e-05, "loss": 0.6938, "step": 1447 }, { "epoch": 0.48, "learning_rate": 1.912512554482057e-05, "loss": 0.8076, "step": 1448 }, { "epoch": 0.48, "learning_rate": 1.912365916179578e-05, "loss": 0.6565, "step": 1449 }, { "epoch": 0.48, "learning_rate": 1.9122191607204933e-05, "loss": 0.7544, "step": 1450 }, { "epoch": 0.48, "learning_rate": 1.912072288123648e-05, "loss": 0.7463, "step": 1451 }, { "epoch": 0.48, "learning_rate": 1.9119252984079023e-05, "loss": 0.7507, "step": 1452 }, { "epoch": 0.48, "learning_rate": 1.91177819159213e-05, "loss": 0.7959, "step": 1453 }, { "epoch": 0.48, "learning_rate": 1.9116309676952218e-05, "loss": 0.729, "step": 1454 }, { "epoch": 0.48, "learning_rate": 1.911483626736082e-05, "loss": 0.7979, "step": 1455 }, { "epoch": 0.48, "learning_rate": 1.9113361687336316e-05, "loss": 0.7896, "step": 1456 }, { "epoch": 0.48, "learning_rate": 1.9111885937068048e-05, "loss": 0.7871, "step": 1457 }, { "epoch": 0.48, "learning_rate": 1.9110409016745523e-05, "loss": 0.792, "step": 1458 }, { "epoch": 0.48, "learning_rate": 1.910893092655839e-05, "loss": 0.7334, "step": 1459 }, { "epoch": 0.48, "learning_rate": 1.9107451666696443e-05, "loss": 0.7307, "step": 1460 }, { "epoch": 0.48, "learning_rate": 1.9105971237349644e-05, "loss": 0.7834, "step": 1461 }, { "epoch": 0.49, "learning_rate": 1.9104489638708087e-05, "loss": 0.7798, "step": 1462 }, { "epoch": 0.49, "learning_rate": 1.9103006870962027e-05, "loss": 0.7769, "step": 1463 }, { "epoch": 0.49, "learning_rate": 1.9101522934301864e-05, "loss": 0.792, "step": 1464 }, { "epoch": 0.49, "learning_rate": 1.9100037828918153e-05, "loss": 0.7671, "step": 1465 }, { "epoch": 0.49, "learning_rate": 1.909855155500159e-05, "loss": 0.7822, "step": 1466 }, { "epoch": 0.49, "learning_rate": 1.909706411274303e-05, "loss": 0.7534, "step": 1467 }, { "epoch": 0.49, "learning_rate": 1.9095575502333478e-05, "loss": 0.7476, "step": 1468 }, { "epoch": 0.49, "learning_rate": 1.909408572396408e-05, "loss": 0.8218, "step": 1469 }, { "epoch": 0.49, "learning_rate": 1.9092594777826145e-05, "loss": 0.7505, "step": 1470 }, { "epoch": 0.49, "learning_rate": 1.909110266411112e-05, "loss": 0.7205, "step": 1471 }, { "epoch": 0.49, "learning_rate": 1.908960938301061e-05, "loss": 0.6777, "step": 1472 }, { "epoch": 0.49, "learning_rate": 1.908811493471636e-05, "loss": 0.7793, "step": 1473 }, { "epoch": 0.49, "learning_rate": 1.908661931942028e-05, "loss": 0.7021, "step": 1474 }, { "epoch": 0.49, "learning_rate": 1.9085122537314416e-05, "loss": 0.7239, "step": 1475 }, { "epoch": 0.49, "learning_rate": 1.9083624588590974e-05, "loss": 0.7158, "step": 1476 }, { "epoch": 0.49, "learning_rate": 1.90821254734423e-05, "loss": 0.7964, "step": 1477 }, { "epoch": 0.49, "learning_rate": 1.90806251920609e-05, "loss": 0.6753, "step": 1478 }, { "epoch": 0.49, "learning_rate": 1.9079123744639415e-05, "loss": 0.7666, "step": 1479 }, { "epoch": 0.49, "learning_rate": 1.9077621131370655e-05, "loss": 0.7979, "step": 1480 }, { "epoch": 0.49, "learning_rate": 1.907611735244757e-05, "loss": 0.8179, "step": 1481 }, { "epoch": 0.49, "learning_rate": 1.9074612408063257e-05, "loss": 0.7563, "step": 1482 }, { "epoch": 0.49, "learning_rate": 1.9073106298410963e-05, "loss": 0.6692, "step": 1483 }, { "epoch": 0.49, "learning_rate": 1.9071599023684088e-05, "loss": 0.7632, "step": 1484 }, { "epoch": 0.49, "learning_rate": 1.9070090584076185e-05, "loss": 0.7144, "step": 1485 }, { "epoch": 0.49, "learning_rate": 1.9068580979780948e-05, "loss": 0.7637, "step": 1486 }, { "epoch": 0.49, "learning_rate": 1.9067070210992228e-05, "loss": 0.8198, "step": 1487 }, { "epoch": 0.49, "learning_rate": 1.9065558277904016e-05, "loss": 0.7283, "step": 1488 }, { "epoch": 0.49, "learning_rate": 1.9064045180710467e-05, "loss": 0.7847, "step": 1489 }, { "epoch": 0.49, "learning_rate": 1.9062530919605874e-05, "loss": 0.7759, "step": 1490 }, { "epoch": 0.49, "learning_rate": 1.9061015494784678e-05, "loss": 0.8267, "step": 1491 }, { "epoch": 0.5, "learning_rate": 1.9059498906441484e-05, "loss": 0.7451, "step": 1492 }, { "epoch": 0.5, "learning_rate": 1.9057981154771026e-05, "loss": 0.8291, "step": 1493 }, { "epoch": 0.5, "learning_rate": 1.9056462239968204e-05, "loss": 0.7681, "step": 1494 }, { "epoch": 0.5, "learning_rate": 1.9054942162228064e-05, "loss": 0.7776, "step": 1495 }, { "epoch": 0.5, "learning_rate": 1.9053420921745794e-05, "loss": 0.7617, "step": 1496 }, { "epoch": 0.5, "learning_rate": 1.9051898518716733e-05, "loss": 0.7134, "step": 1497 }, { "epoch": 0.5, "learning_rate": 1.905037495333638e-05, "loss": 0.8013, "step": 1498 }, { "epoch": 0.5, "learning_rate": 1.9048850225800372e-05, "loss": 0.793, "step": 1499 }, { "epoch": 0.5, "learning_rate": 1.9047324336304496e-05, "loss": 0.8098, "step": 1500 }, { "epoch": 0.5, "learning_rate": 1.9045797285044695e-05, "loss": 0.8296, "step": 1501 }, { "epoch": 0.5, "learning_rate": 1.904426907221706e-05, "loss": 0.7979, "step": 1502 }, { "epoch": 0.5, "learning_rate": 1.9042739698017815e-05, "loss": 0.7661, "step": 1503 }, { "epoch": 0.5, "learning_rate": 1.9041209162643365e-05, "loss": 0.7451, "step": 1504 }, { "epoch": 0.5, "learning_rate": 1.903967746629023e-05, "loss": 0.7349, "step": 1505 }, { "epoch": 0.5, "learning_rate": 1.90381446091551e-05, "loss": 0.7876, "step": 1506 }, { "epoch": 0.5, "learning_rate": 1.9036610591434813e-05, "loss": 0.6821, "step": 1507 }, { "epoch": 0.5, "learning_rate": 1.9035075413326347e-05, "loss": 0.7173, "step": 1508 }, { "epoch": 0.5, "learning_rate": 1.9033539075026834e-05, "loss": 0.7532, "step": 1509 }, { "epoch": 0.5, "learning_rate": 1.903200157673356e-05, "loss": 0.7725, "step": 1510 }, { "epoch": 0.5, "learning_rate": 1.9030462918643945e-05, "loss": 0.7942, "step": 1511 }, { "epoch": 0.5, "learning_rate": 1.9028923100955573e-05, "loss": 0.7734, "step": 1512 }, { "epoch": 0.5, "learning_rate": 1.9027382123866172e-05, "loss": 0.7102, "step": 1513 }, { "epoch": 0.5, "learning_rate": 1.9025839987573616e-05, "loss": 0.7666, "step": 1514 }, { "epoch": 0.5, "learning_rate": 1.902429669227593e-05, "loss": 0.5975, "step": 1515 }, { "epoch": 0.5, "learning_rate": 1.9022752238171298e-05, "loss": 0.7842, "step": 1516 }, { "epoch": 0.5, "learning_rate": 1.902120662545803e-05, "loss": 0.8198, "step": 1517 }, { "epoch": 0.5, "learning_rate": 1.90196598543346e-05, "loss": 0.7852, "step": 1518 }, { "epoch": 0.5, "learning_rate": 1.901811192499963e-05, "loss": 0.7158, "step": 1519 }, { "epoch": 0.5, "learning_rate": 1.9016562837651894e-05, "loss": 0.7258, "step": 1520 }, { "epoch": 0.5, "learning_rate": 1.9015012592490307e-05, "loss": 0.8232, "step": 1521 }, { "epoch": 0.51, "learning_rate": 1.9013461189713927e-05, "loss": 0.7681, "step": 1522 }, { "epoch": 0.51, "learning_rate": 1.901190862952198e-05, "loss": 0.7373, "step": 1523 }, { "epoch": 0.51, "learning_rate": 1.9010354912113823e-05, "loss": 0.8105, "step": 1524 }, { "epoch": 0.51, "learning_rate": 1.9008800037688975e-05, "loss": 0.8047, "step": 1525 }, { "epoch": 0.51, "learning_rate": 1.9007244006447093e-05, "loss": 0.793, "step": 1526 }, { "epoch": 0.51, "learning_rate": 1.9005686818587983e-05, "loss": 0.8047, "step": 1527 }, { "epoch": 0.51, "learning_rate": 1.900412847431161e-05, "loss": 0.7461, "step": 1528 }, { "epoch": 0.51, "learning_rate": 1.9002568973818075e-05, "loss": 0.7476, "step": 1529 }, { "epoch": 0.51, "learning_rate": 1.9001008317307638e-05, "loss": 0.707, "step": 1530 }, { "epoch": 0.51, "learning_rate": 1.8999446504980693e-05, "loss": 0.7783, "step": 1531 }, { "epoch": 0.51, "learning_rate": 1.89978835370378e-05, "loss": 0.6731, "step": 1532 }, { "epoch": 0.51, "learning_rate": 1.899631941367966e-05, "loss": 0.811, "step": 1533 }, { "epoch": 0.51, "learning_rate": 1.899475413510712e-05, "loss": 0.8154, "step": 1534 }, { "epoch": 0.51, "learning_rate": 1.8993187701521175e-05, "loss": 0.7959, "step": 1535 }, { "epoch": 0.51, "learning_rate": 1.899162011312297e-05, "loss": 0.8228, "step": 1536 }, { "epoch": 0.51, "learning_rate": 1.8990051370113796e-05, "loss": 0.8364, "step": 1537 }, { "epoch": 0.51, "learning_rate": 1.8988481472695102e-05, "loss": 0.7788, "step": 1538 }, { "epoch": 0.51, "learning_rate": 1.8986910421068474e-05, "loss": 0.7476, "step": 1539 }, { "epoch": 0.51, "learning_rate": 1.898533821543565e-05, "loss": 0.8101, "step": 1540 }, { "epoch": 0.51, "learning_rate": 1.8983764855998518e-05, "loss": 0.7937, "step": 1541 }, { "epoch": 0.51, "learning_rate": 1.898219034295911e-05, "loss": 0.8022, "step": 1542 }, { "epoch": 0.51, "learning_rate": 1.8980614676519612e-05, "loss": 0.6451, "step": 1543 }, { "epoch": 0.51, "learning_rate": 1.8979037856882348e-05, "loss": 0.8091, "step": 1544 }, { "epoch": 0.51, "learning_rate": 1.8977459884249807e-05, "loss": 0.7847, "step": 1545 }, { "epoch": 0.51, "learning_rate": 1.897588075882461e-05, "loss": 0.8577, "step": 1546 }, { "epoch": 0.51, "learning_rate": 1.897430048080953e-05, "loss": 0.8149, "step": 1547 }, { "epoch": 0.51, "learning_rate": 1.8972719050407494e-05, "loss": 0.7695, "step": 1548 }, { "epoch": 0.51, "learning_rate": 1.8971136467821576e-05, "loss": 0.7749, "step": 1549 }, { "epoch": 0.51, "learning_rate": 1.8969552733254987e-05, "loss": 0.7681, "step": 1550 }, { "epoch": 0.51, "learning_rate": 1.8967967846911098e-05, "loss": 0.6619, "step": 1551 }, { "epoch": 0.52, "learning_rate": 1.8966381808993423e-05, "loss": 0.7944, "step": 1552 }, { "epoch": 0.52, "learning_rate": 1.8964794619705624e-05, "loss": 0.7681, "step": 1553 }, { "epoch": 0.52, "learning_rate": 1.8963206279251514e-05, "loss": 0.7896, "step": 1554 }, { "epoch": 0.52, "learning_rate": 1.8961616787835046e-05, "loss": 0.6992, "step": 1555 }, { "epoch": 0.52, "learning_rate": 1.896002614566033e-05, "loss": 0.814, "step": 1556 }, { "epoch": 0.52, "learning_rate": 1.8958434352931623e-05, "loss": 0.8418, "step": 1557 }, { "epoch": 0.52, "learning_rate": 1.895684140985332e-05, "loss": 0.708, "step": 1558 }, { "epoch": 0.52, "learning_rate": 1.8955247316629975e-05, "loss": 0.668, "step": 1559 }, { "epoch": 0.52, "learning_rate": 1.8953652073466284e-05, "loss": 0.6711, "step": 1560 }, { "epoch": 0.52, "learning_rate": 1.8952055680567088e-05, "loss": 0.7224, "step": 1561 }, { "epoch": 0.52, "learning_rate": 1.8950458138137385e-05, "loss": 0.7461, "step": 1562 }, { "epoch": 0.52, "learning_rate": 1.8948859446382313e-05, "loss": 0.772, "step": 1563 }, { "epoch": 0.52, "learning_rate": 1.8947259605507152e-05, "loss": 0.8154, "step": 1564 }, { "epoch": 0.52, "learning_rate": 1.894565861571735e-05, "loss": 0.6753, "step": 1565 }, { "epoch": 0.52, "learning_rate": 1.894405647721848e-05, "loss": 0.7832, "step": 1566 }, { "epoch": 0.52, "learning_rate": 1.894245319021628e-05, "loss": 0.8232, "step": 1567 }, { "epoch": 0.52, "learning_rate": 1.8940848754916618e-05, "loss": 0.6477, "step": 1568 }, { "epoch": 0.52, "learning_rate": 1.8939243171525525e-05, "loss": 0.7812, "step": 1569 }, { "epoch": 0.52, "learning_rate": 1.893763644024917e-05, "loss": 0.7039, "step": 1570 }, { "epoch": 0.52, "learning_rate": 1.893602856129388e-05, "loss": 0.812, "step": 1571 }, { "epoch": 0.52, "learning_rate": 1.8934419534866112e-05, "loss": 0.7222, "step": 1572 }, { "epoch": 0.52, "learning_rate": 1.893280936117249e-05, "loss": 0.8257, "step": 1573 }, { "epoch": 0.52, "learning_rate": 1.8931198040419772e-05, "loss": 0.7524, "step": 1574 }, { "epoch": 0.52, "learning_rate": 1.8929585572814865e-05, "loss": 0.6741, "step": 1575 }, { "epoch": 0.52, "learning_rate": 1.8927971958564824e-05, "loss": 0.8076, "step": 1576 }, { "epoch": 0.52, "learning_rate": 1.8926357197876862e-05, "loss": 0.728, "step": 1577 }, { "epoch": 0.52, "learning_rate": 1.8924741290958322e-05, "loss": 0.8027, "step": 1578 }, { "epoch": 0.52, "learning_rate": 1.8923124238016703e-05, "loss": 0.7134, "step": 1579 }, { "epoch": 0.52, "learning_rate": 1.892150603925965e-05, "loss": 0.7817, "step": 1580 }, { "epoch": 0.52, "learning_rate": 1.891988669489496e-05, "loss": 0.7603, "step": 1581 }, { "epoch": 0.53, "learning_rate": 1.8918266205130565e-05, "loss": 0.77, "step": 1582 }, { "epoch": 0.53, "learning_rate": 1.8916644570174555e-05, "loss": 0.8408, "step": 1583 }, { "epoch": 0.53, "learning_rate": 1.8915021790235172e-05, "loss": 0.7383, "step": 1584 }, { "epoch": 0.53, "learning_rate": 1.8913397865520782e-05, "loss": 0.7866, "step": 1585 }, { "epoch": 0.53, "learning_rate": 1.891177279623992e-05, "loss": 0.7847, "step": 1586 }, { "epoch": 0.53, "learning_rate": 1.8910146582601258e-05, "loss": 0.7153, "step": 1587 }, { "epoch": 0.53, "learning_rate": 1.8908519224813626e-05, "loss": 0.7222, "step": 1588 }, { "epoch": 0.53, "learning_rate": 1.890689072308598e-05, "loss": 0.6501, "step": 1589 }, { "epoch": 0.53, "learning_rate": 1.8905261077627446e-05, "loss": 0.7197, "step": 1590 }, { "epoch": 0.53, "learning_rate": 1.890363028864728e-05, "loss": 0.7439, "step": 1591 }, { "epoch": 0.53, "learning_rate": 1.890199835635489e-05, "loss": 0.8296, "step": 1592 }, { "epoch": 0.53, "learning_rate": 1.890036528095984e-05, "loss": 0.73, "step": 1593 }, { "epoch": 0.53, "learning_rate": 1.8898731062671823e-05, "loss": 0.7759, "step": 1594 }, { "epoch": 0.53, "learning_rate": 1.8897095701700693e-05, "loss": 0.7842, "step": 1595 }, { "epoch": 0.53, "learning_rate": 1.8895459198256447e-05, "loss": 0.7458, "step": 1596 }, { "epoch": 0.53, "learning_rate": 1.8893821552549227e-05, "loss": 0.7793, "step": 1597 }, { "epoch": 0.53, "learning_rate": 1.8892182764789324e-05, "loss": 0.8276, "step": 1598 }, { "epoch": 0.53, "learning_rate": 1.8890542835187167e-05, "loss": 0.7705, "step": 1599 }, { "epoch": 0.53, "learning_rate": 1.8888901763953348e-05, "loss": 0.7661, "step": 1600 }, { "epoch": 0.53, "learning_rate": 1.8887259551298593e-05, "loss": 0.8242, "step": 1601 }, { "epoch": 0.53, "learning_rate": 1.888561619743378e-05, "loss": 0.7446, "step": 1602 }, { "epoch": 0.53, "learning_rate": 1.8883971702569924e-05, "loss": 0.8076, "step": 1603 }, { "epoch": 0.53, "learning_rate": 1.8882326066918203e-05, "loss": 0.7598, "step": 1604 }, { "epoch": 0.53, "learning_rate": 1.888067929068993e-05, "loss": 0.8066, "step": 1605 }, { "epoch": 0.53, "learning_rate": 1.8879031374096565e-05, "loss": 0.7402, "step": 1606 }, { "epoch": 0.53, "learning_rate": 1.8877382317349714e-05, "loss": 0.6875, "step": 1607 }, { "epoch": 0.53, "learning_rate": 1.8875732120661135e-05, "loss": 0.7905, "step": 1608 }, { "epoch": 0.53, "learning_rate": 1.8874080784242732e-05, "loss": 0.8174, "step": 1609 }, { "epoch": 0.53, "learning_rate": 1.8872428308306548e-05, "loss": 0.7856, "step": 1610 }, { "epoch": 0.53, "learning_rate": 1.8870774693064778e-05, "loss": 0.8311, "step": 1611 }, { "epoch": 0.54, "learning_rate": 1.8869119938729767e-05, "loss": 0.812, "step": 1612 }, { "epoch": 0.54, "learning_rate": 1.8867464045513997e-05, "loss": 0.7842, "step": 1613 }, { "epoch": 0.54, "learning_rate": 1.88658070136301e-05, "loss": 0.7915, "step": 1614 }, { "epoch": 0.54, "learning_rate": 1.8864148843290853e-05, "loss": 0.731, "step": 1615 }, { "epoch": 0.54, "learning_rate": 1.8862489534709187e-05, "loss": 0.7058, "step": 1616 }, { "epoch": 0.54, "learning_rate": 1.886082908809817e-05, "loss": 0.8071, "step": 1617 }, { "epoch": 0.54, "learning_rate": 1.885916750367102e-05, "loss": 0.8096, "step": 1618 }, { "epoch": 0.54, "learning_rate": 1.8857504781641097e-05, "loss": 0.7256, "step": 1619 }, { "epoch": 0.54, "learning_rate": 1.8855840922221917e-05, "loss": 0.7739, "step": 1620 }, { "epoch": 0.54, "learning_rate": 1.885417592562713e-05, "loss": 0.7769, "step": 1621 }, { "epoch": 0.54, "learning_rate": 1.8852509792070542e-05, "loss": 0.7896, "step": 1622 }, { "epoch": 0.54, "learning_rate": 1.88508425217661e-05, "loss": 0.7583, "step": 1623 }, { "epoch": 0.54, "learning_rate": 1.8849174114927895e-05, "loss": 0.8413, "step": 1624 }, { "epoch": 0.54, "learning_rate": 1.8847504571770167e-05, "loss": 0.7241, "step": 1625 }, { "epoch": 0.54, "learning_rate": 1.8845833892507306e-05, "loss": 0.7483, "step": 1626 }, { "epoch": 0.54, "learning_rate": 1.8844162077353838e-05, "loss": 0.7583, "step": 1627 }, { "epoch": 0.54, "learning_rate": 1.884248912652444e-05, "loss": 0.8516, "step": 1628 }, { "epoch": 0.54, "learning_rate": 1.8840815040233938e-05, "loss": 0.7148, "step": 1629 }, { "epoch": 0.54, "learning_rate": 1.8839139818697304e-05, "loss": 0.8008, "step": 1630 }, { "epoch": 0.54, "learning_rate": 1.8837463462129645e-05, "loss": 0.8428, "step": 1631 }, { "epoch": 0.54, "learning_rate": 1.8835785970746228e-05, "loss": 0.856, "step": 1632 }, { "epoch": 0.54, "learning_rate": 1.8834107344762453e-05, "loss": 0.7949, "step": 1633 }, { "epoch": 0.54, "learning_rate": 1.8832427584393875e-05, "loss": 0.7231, "step": 1634 }, { "epoch": 0.54, "learning_rate": 1.883074668985619e-05, "loss": 0.7412, "step": 1635 }, { "epoch": 0.54, "learning_rate": 1.8829064661365247e-05, "loss": 0.7139, "step": 1636 }, { "epoch": 0.54, "learning_rate": 1.8827381499137027e-05, "loss": 0.7754, "step": 1637 }, { "epoch": 0.54, "learning_rate": 1.882569720338767e-05, "loss": 0.7319, "step": 1638 }, { "epoch": 0.54, "learning_rate": 1.8824011774333453e-05, "loss": 0.7749, "step": 1639 }, { "epoch": 0.54, "learning_rate": 1.8822325212190802e-05, "loss": 0.7632, "step": 1640 }, { "epoch": 0.54, "learning_rate": 1.8820637517176287e-05, "loss": 0.7568, "step": 1641 }, { "epoch": 0.54, "learning_rate": 1.8818948689506623e-05, "loss": 0.7207, "step": 1642 }, { "epoch": 0.55, "learning_rate": 1.8817258729398677e-05, "loss": 0.7146, "step": 1643 }, { "epoch": 0.55, "learning_rate": 1.881556763706945e-05, "loss": 0.7256, "step": 1644 }, { "epoch": 0.55, "learning_rate": 1.8813875412736104e-05, "loss": 0.7871, "step": 1645 }, { "epoch": 0.55, "learning_rate": 1.8812182056615923e-05, "loss": 0.6992, "step": 1646 }, { "epoch": 0.55, "learning_rate": 1.8810487568926362e-05, "loss": 0.6709, "step": 1647 }, { "epoch": 0.55, "learning_rate": 1.8808791949885004e-05, "loss": 0.7759, "step": 1648 }, { "epoch": 0.55, "learning_rate": 1.8807095199709587e-05, "loss": 0.7422, "step": 1649 }, { "epoch": 0.55, "learning_rate": 1.8805397318617985e-05, "loss": 0.7925, "step": 1650 }, { "epoch": 0.55, "learning_rate": 1.8803698306828228e-05, "loss": 0.7593, "step": 1651 }, { "epoch": 0.55, "learning_rate": 1.8801998164558483e-05, "loss": 0.7451, "step": 1652 }, { "epoch": 0.55, "learning_rate": 1.880029689202706e-05, "loss": 0.7637, "step": 1653 }, { "epoch": 0.55, "learning_rate": 1.8798594489452425e-05, "loss": 0.6992, "step": 1654 }, { "epoch": 0.55, "learning_rate": 1.879689095705318e-05, "loss": 0.8208, "step": 1655 }, { "epoch": 0.55, "learning_rate": 1.8795186295048075e-05, "loss": 0.7415, "step": 1656 }, { "epoch": 0.55, "learning_rate": 1.879348050365601e-05, "loss": 0.7075, "step": 1657 }, { "epoch": 0.55, "learning_rate": 1.879177358309602e-05, "loss": 0.7019, "step": 1658 }, { "epoch": 0.55, "learning_rate": 1.879006553358729e-05, "loss": 0.8257, "step": 1659 }, { "epoch": 0.55, "learning_rate": 1.878835635534915e-05, "loss": 0.7847, "step": 1660 }, { "epoch": 0.55, "learning_rate": 1.878664604860108e-05, "loss": 0.7749, "step": 1661 }, { "epoch": 0.55, "learning_rate": 1.8784934613562695e-05, "loss": 0.7334, "step": 1662 }, { "epoch": 0.55, "learning_rate": 1.878322205045376e-05, "loss": 0.7983, "step": 1663 }, { "epoch": 0.55, "learning_rate": 1.8781508359494188e-05, "loss": 0.7466, "step": 1664 }, { "epoch": 0.55, "learning_rate": 1.877979354090403e-05, "loss": 0.8003, "step": 1665 }, { "epoch": 0.55, "learning_rate": 1.8778077594903487e-05, "loss": 0.7412, "step": 1666 }, { "epoch": 0.55, "learning_rate": 1.87763605217129e-05, "loss": 0.7983, "step": 1667 }, { "epoch": 0.55, "learning_rate": 1.877464232155276e-05, "loss": 0.7954, "step": 1668 }, { "epoch": 0.55, "learning_rate": 1.8772922994643707e-05, "loss": 0.7661, "step": 1669 }, { "epoch": 0.55, "learning_rate": 1.877120254120651e-05, "loss": 0.7119, "step": 1670 }, { "epoch": 0.55, "learning_rate": 1.8769480961462093e-05, "loss": 0.7231, "step": 1671 }, { "epoch": 0.55, "learning_rate": 1.876775825563153e-05, "loss": 0.7812, "step": 1672 }, { "epoch": 0.56, "learning_rate": 1.8766034423936025e-05, "loss": 0.6851, "step": 1673 }, { "epoch": 0.56, "learning_rate": 1.876430946659694e-05, "loss": 0.7734, "step": 1674 }, { "epoch": 0.56, "learning_rate": 1.8762583383835774e-05, "loss": 0.7405, "step": 1675 }, { "epoch": 0.56, "learning_rate": 1.8760856175874172e-05, "loss": 0.769, "step": 1676 }, { "epoch": 0.56, "learning_rate": 1.8759127842933923e-05, "loss": 0.8066, "step": 1677 }, { "epoch": 0.56, "learning_rate": 1.8757398385236967e-05, "loss": 0.8257, "step": 1678 }, { "epoch": 0.56, "learning_rate": 1.875566780300538e-05, "loss": 0.7998, "step": 1679 }, { "epoch": 0.56, "learning_rate": 1.8753936096461388e-05, "loss": 0.7725, "step": 1680 }, { "epoch": 0.56, "learning_rate": 1.8752203265827353e-05, "loss": 0.8032, "step": 1681 }, { "epoch": 0.56, "learning_rate": 1.8750469311325796e-05, "loss": 0.7529, "step": 1682 }, { "epoch": 0.56, "learning_rate": 1.874873423317936e-05, "loss": 0.7173, "step": 1683 }, { "epoch": 0.56, "learning_rate": 1.874699803161086e-05, "loss": 0.7617, "step": 1684 }, { "epoch": 0.56, "learning_rate": 1.8745260706843233e-05, "loss": 0.7256, "step": 1685 }, { "epoch": 0.56, "learning_rate": 1.874352225909957e-05, "loss": 0.7856, "step": 1686 }, { "epoch": 0.56, "learning_rate": 1.8741782688603107e-05, "loss": 0.7188, "step": 1687 }, { "epoch": 0.56, "learning_rate": 1.8740041995577216e-05, "loss": 0.7151, "step": 1688 }, { "epoch": 0.56, "learning_rate": 1.8738300180245426e-05, "loss": 0.7441, "step": 1689 }, { "epoch": 0.56, "learning_rate": 1.87365572428314e-05, "loss": 0.75, "step": 1690 }, { "epoch": 0.56, "learning_rate": 1.8734813183558942e-05, "loss": 0.7017, "step": 1691 }, { "epoch": 0.56, "learning_rate": 1.8733068002652014e-05, "loss": 0.7871, "step": 1692 }, { "epoch": 0.56, "learning_rate": 1.8731321700334714e-05, "loss": 0.8047, "step": 1693 }, { "epoch": 0.56, "learning_rate": 1.872957427683128e-05, "loss": 0.8052, "step": 1694 }, { "epoch": 0.56, "learning_rate": 1.8727825732366098e-05, "loss": 0.7969, "step": 1695 }, { "epoch": 0.56, "learning_rate": 1.87260760671637e-05, "loss": 0.6479, "step": 1696 }, { "epoch": 0.56, "learning_rate": 1.8724325281448758e-05, "loss": 0.7109, "step": 1697 }, { "epoch": 0.56, "learning_rate": 1.8722573375446096e-05, "loss": 0.7974, "step": 1698 }, { "epoch": 0.56, "learning_rate": 1.872082034938067e-05, "loss": 0.6733, "step": 1699 }, { "epoch": 0.56, "learning_rate": 1.8719066203477584e-05, "loss": 0.7266, "step": 1700 }, { "epoch": 0.56, "learning_rate": 1.8717310937962092e-05, "loss": 0.6973, "step": 1701 }, { "epoch": 0.56, "learning_rate": 1.8715554553059587e-05, "loss": 0.7319, "step": 1702 }, { "epoch": 0.57, "learning_rate": 1.87137970489956e-05, "loss": 0.7935, "step": 1703 }, { "epoch": 0.57, "learning_rate": 1.8712038425995817e-05, "loss": 0.8286, "step": 1704 }, { "epoch": 0.57, "learning_rate": 1.871027868428606e-05, "loss": 0.8042, "step": 1705 }, { "epoch": 0.57, "learning_rate": 1.8708517824092295e-05, "loss": 0.8184, "step": 1706 }, { "epoch": 0.57, "learning_rate": 1.870675584564064e-05, "loss": 0.6804, "step": 1707 }, { "epoch": 0.57, "learning_rate": 1.8704992749157344e-05, "loss": 0.855, "step": 1708 }, { "epoch": 0.57, "learning_rate": 1.8703228534868808e-05, "loss": 0.7195, "step": 1709 }, { "epoch": 0.57, "learning_rate": 1.8701463203001572e-05, "loss": 0.7861, "step": 1710 }, { "epoch": 0.57, "learning_rate": 1.8699696753782327e-05, "loss": 0.7573, "step": 1711 }, { "epoch": 0.57, "learning_rate": 1.8697929187437897e-05, "loss": 0.7754, "step": 1712 }, { "epoch": 0.57, "learning_rate": 1.8696160504195253e-05, "loss": 0.7856, "step": 1713 }, { "epoch": 0.57, "learning_rate": 1.8694390704281517e-05, "loss": 0.7876, "step": 1714 }, { "epoch": 0.57, "learning_rate": 1.8692619787923946e-05, "loss": 0.8032, "step": 1715 }, { "epoch": 0.57, "learning_rate": 1.869084775534994e-05, "loss": 0.7271, "step": 1716 }, { "epoch": 0.57, "learning_rate": 1.8689074606787054e-05, "loss": 0.7754, "step": 1717 }, { "epoch": 0.57, "learning_rate": 1.8687300342462962e-05, "loss": 0.7012, "step": 1718 }, { "epoch": 0.57, "learning_rate": 1.868552496260551e-05, "loss": 0.7544, "step": 1719 }, { "epoch": 0.57, "learning_rate": 1.868374846744267e-05, "loss": 0.7112, "step": 1720 }, { "epoch": 0.57, "learning_rate": 1.8681970857202557e-05, "loss": 0.7344, "step": 1721 }, { "epoch": 0.57, "learning_rate": 1.8680192132113442e-05, "loss": 0.7622, "step": 1722 }, { "epoch": 0.57, "learning_rate": 1.8678412292403725e-05, "loss": 0.7292, "step": 1723 }, { "epoch": 0.57, "learning_rate": 1.8676631338301955e-05, "loss": 0.7622, "step": 1724 }, { "epoch": 0.57, "learning_rate": 1.867484927003682e-05, "loss": 0.8076, "step": 1725 }, { "epoch": 0.57, "learning_rate": 1.867306608783716e-05, "loss": 0.7837, "step": 1726 }, { "epoch": 0.57, "learning_rate": 1.8671281791931953e-05, "loss": 0.7871, "step": 1727 }, { "epoch": 0.57, "learning_rate": 1.8669496382550322e-05, "loss": 0.7671, "step": 1728 }, { "epoch": 0.57, "learning_rate": 1.8667709859921518e-05, "loss": 0.6573, "step": 1729 }, { "epoch": 0.57, "learning_rate": 1.8665922224274965e-05, "loss": 0.71, "step": 1730 }, { "epoch": 0.57, "learning_rate": 1.8664133475840203e-05, "loss": 0.7598, "step": 1731 }, { "epoch": 0.57, "learning_rate": 1.8662343614846925e-05, "loss": 0.7495, "step": 1732 }, { "epoch": 0.58, "learning_rate": 1.8660552641524967e-05, "loss": 0.6777, "step": 1733 }, { "epoch": 0.58, "learning_rate": 1.865876055610431e-05, "loss": 0.7241, "step": 1734 }, { "epoch": 0.58, "learning_rate": 1.8656967358815073e-05, "loss": 0.7617, "step": 1735 }, { "epoch": 0.58, "learning_rate": 1.865517304988752e-05, "loss": 0.7817, "step": 1736 }, { "epoch": 0.58, "learning_rate": 1.8653377629552058e-05, "loss": 0.7097, "step": 1737 }, { "epoch": 0.58, "learning_rate": 1.865158109803923e-05, "loss": 0.6938, "step": 1738 }, { "epoch": 0.58, "learning_rate": 1.8649783455579743e-05, "loss": 0.7686, "step": 1739 }, { "epoch": 0.58, "learning_rate": 1.8647984702404417e-05, "loss": 0.7007, "step": 1740 }, { "epoch": 0.58, "learning_rate": 1.8646184838744236e-05, "loss": 0.7451, "step": 1741 }, { "epoch": 0.58, "learning_rate": 1.8644383864830316e-05, "loss": 0.6819, "step": 1742 }, { "epoch": 0.58, "learning_rate": 1.864258178089393e-05, "loss": 0.7671, "step": 1743 }, { "epoch": 0.58, "learning_rate": 1.864077858716647e-05, "loss": 0.7871, "step": 1744 }, { "epoch": 0.58, "learning_rate": 1.863897428387949e-05, "loss": 0.7764, "step": 1745 }, { "epoch": 0.58, "learning_rate": 1.863716887126468e-05, "loss": 0.7739, "step": 1746 }, { "epoch": 0.58, "learning_rate": 1.8635362349553866e-05, "loss": 0.8018, "step": 1747 }, { "epoch": 0.58, "learning_rate": 1.8633554718979033e-05, "loss": 0.8032, "step": 1748 }, { "epoch": 0.58, "learning_rate": 1.8631745979772293e-05, "loss": 0.7705, "step": 1749 }, { "epoch": 0.58, "learning_rate": 1.8629936132165906e-05, "loss": 0.7407, "step": 1750 }, { "epoch": 0.58, "learning_rate": 1.8628125176392274e-05, "loss": 0.7139, "step": 1751 }, { "epoch": 0.58, "learning_rate": 1.8626313112683938e-05, "loss": 0.7109, "step": 1752 }, { "epoch": 0.58, "learning_rate": 1.862449994127359e-05, "loss": 0.7671, "step": 1753 }, { "epoch": 0.58, "learning_rate": 1.862268566239406e-05, "loss": 0.7935, "step": 1754 }, { "epoch": 0.58, "learning_rate": 1.862087027627831e-05, "loss": 0.7939, "step": 1755 }, { "epoch": 0.58, "learning_rate": 1.861905378315946e-05, "loss": 0.7744, "step": 1756 }, { "epoch": 0.58, "learning_rate": 1.8617236183270764e-05, "loss": 0.6934, "step": 1757 }, { "epoch": 0.58, "learning_rate": 1.861541747684562e-05, "loss": 0.772, "step": 1758 }, { "epoch": 0.58, "learning_rate": 1.861359766411756e-05, "loss": 0.7542, "step": 1759 }, { "epoch": 0.58, "learning_rate": 1.8611776745320284e-05, "loss": 0.7119, "step": 1760 }, { "epoch": 0.58, "learning_rate": 1.8609954720687592e-05, "loss": 0.7612, "step": 1761 }, { "epoch": 0.58, "learning_rate": 1.8608131590453468e-05, "loss": 0.7866, "step": 1762 }, { "epoch": 0.59, "learning_rate": 1.8606307354852013e-05, "loss": 0.874, "step": 1763 }, { "epoch": 0.59, "learning_rate": 1.8604482014117476e-05, "loss": 0.772, "step": 1764 }, { "epoch": 0.59, "learning_rate": 1.8602655568484246e-05, "loss": 0.7798, "step": 1765 }, { "epoch": 0.59, "learning_rate": 1.860082801818686e-05, "loss": 0.7095, "step": 1766 }, { "epoch": 0.59, "learning_rate": 1.8598999363459994e-05, "loss": 0.7781, "step": 1767 }, { "epoch": 0.59, "learning_rate": 1.859716960453846e-05, "loss": 0.7583, "step": 1768 }, { "epoch": 0.59, "learning_rate": 1.8595338741657226e-05, "loss": 0.7661, "step": 1769 }, { "epoch": 0.59, "learning_rate": 1.859350677505138e-05, "loss": 0.7642, "step": 1770 }, { "epoch": 0.59, "learning_rate": 1.8591673704956174e-05, "loss": 0.7432, "step": 1771 }, { "epoch": 0.59, "learning_rate": 1.8589839531606988e-05, "loss": 0.77, "step": 1772 }, { "epoch": 0.59, "learning_rate": 1.8588004255239345e-05, "loss": 0.7515, "step": 1773 }, { "epoch": 0.59, "learning_rate": 1.858616787608892e-05, "loss": 0.7124, "step": 1774 }, { "epoch": 0.59, "learning_rate": 1.8584330394391513e-05, "loss": 0.7959, "step": 1775 }, { "epoch": 0.59, "learning_rate": 1.8582491810383084e-05, "loss": 0.7156, "step": 1776 }, { "epoch": 0.59, "learning_rate": 1.8580652124299715e-05, "loss": 0.791, "step": 1777 }, { "epoch": 0.59, "learning_rate": 1.8578811336377643e-05, "loss": 0.7666, "step": 1778 }, { "epoch": 0.59, "learning_rate": 1.8576969446853248e-05, "loss": 0.7002, "step": 1779 }, { "epoch": 0.59, "learning_rate": 1.857512645596304e-05, "loss": 0.7524, "step": 1780 }, { "epoch": 0.59, "learning_rate": 1.8573282363943682e-05, "loss": 0.7866, "step": 1781 }, { "epoch": 0.59, "learning_rate": 1.857143717103197e-05, "loss": 0.7471, "step": 1782 }, { "epoch": 0.59, "learning_rate": 1.856959087746484e-05, "loss": 0.79, "step": 1783 }, { "epoch": 0.59, "learning_rate": 1.8567743483479382e-05, "loss": 0.7117, "step": 1784 }, { "epoch": 0.59, "learning_rate": 1.8565894989312817e-05, "loss": 0.7163, "step": 1785 }, { "epoch": 0.59, "learning_rate": 1.856404539520251e-05, "loss": 0.8115, "step": 1786 }, { "epoch": 0.59, "learning_rate": 1.8562194701385964e-05, "loss": 0.7397, "step": 1787 }, { "epoch": 0.59, "learning_rate": 1.8560342908100832e-05, "loss": 0.7402, "step": 1788 }, { "epoch": 0.59, "learning_rate": 1.8558490015584893e-05, "loss": 0.751, "step": 1789 }, { "epoch": 0.59, "learning_rate": 1.855663602407608e-05, "loss": 0.7883, "step": 1790 }, { "epoch": 0.59, "learning_rate": 1.8554780933812465e-05, "loss": 0.7812, "step": 1791 }, { "epoch": 0.59, "learning_rate": 1.8552924745032263e-05, "loss": 0.7842, "step": 1792 }, { "epoch": 0.6, "learning_rate": 1.855106745797382e-05, "loss": 0.6897, "step": 1793 }, { "epoch": 0.6, "learning_rate": 1.8549209072875635e-05, "loss": 0.6685, "step": 1794 }, { "epoch": 0.6, "learning_rate": 1.8547349589976336e-05, "loss": 0.8491, "step": 1795 }, { "epoch": 0.6, "learning_rate": 1.8545489009514705e-05, "loss": 0.7998, "step": 1796 }, { "epoch": 0.6, "learning_rate": 1.8543627331729653e-05, "loss": 0.7368, "step": 1797 }, { "epoch": 0.6, "learning_rate": 1.8541764556860243e-05, "loss": 0.7729, "step": 1798 }, { "epoch": 0.6, "learning_rate": 1.8539900685145668e-05, "loss": 0.7266, "step": 1799 }, { "epoch": 0.6, "learning_rate": 1.8538035716825274e-05, "loss": 0.7964, "step": 1800 }, { "epoch": 0.6, "learning_rate": 1.8536169652138532e-05, "loss": 0.6958, "step": 1801 }, { "epoch": 0.6, "learning_rate": 1.853430249132507e-05, "loss": 0.6687, "step": 1802 }, { "epoch": 0.6, "learning_rate": 1.853243423462465e-05, "loss": 0.7417, "step": 1803 }, { "epoch": 0.6, "learning_rate": 1.8530564882277168e-05, "loss": 0.7759, "step": 1804 }, { "epoch": 0.6, "learning_rate": 1.8528694434522672e-05, "loss": 0.7847, "step": 1805 }, { "epoch": 0.6, "learning_rate": 1.8526822891601338e-05, "loss": 0.7124, "step": 1806 }, { "epoch": 0.6, "learning_rate": 1.8524950253753503e-05, "loss": 0.7866, "step": 1807 }, { "epoch": 0.6, "learning_rate": 1.8523076521219627e-05, "loss": 0.8247, "step": 1808 }, { "epoch": 0.6, "learning_rate": 1.8521201694240303e-05, "loss": 0.8208, "step": 1809 }, { "epoch": 0.6, "learning_rate": 1.8519325773056294e-05, "loss": 0.7632, "step": 1810 }, { "epoch": 0.6, "learning_rate": 1.8517448757908477e-05, "loss": 0.717, "step": 1811 }, { "epoch": 0.6, "learning_rate": 1.8515570649037885e-05, "loss": 0.6904, "step": 1812 }, { "epoch": 0.6, "learning_rate": 1.8513691446685678e-05, "loss": 0.7803, "step": 1813 }, { "epoch": 0.6, "learning_rate": 1.851181115109317e-05, "loss": 0.833, "step": 1814 }, { "epoch": 0.6, "learning_rate": 1.8509929762501807e-05, "loss": 0.7617, "step": 1815 }, { "epoch": 0.6, "learning_rate": 1.8508047281153174e-05, "loss": 0.79, "step": 1816 }, { "epoch": 0.6, "learning_rate": 1.8506163707289006e-05, "loss": 0.7637, "step": 1817 }, { "epoch": 0.6, "learning_rate": 1.850427904115117e-05, "loss": 0.7761, "step": 1818 }, { "epoch": 0.6, "learning_rate": 1.850239328298167e-05, "loss": 0.6899, "step": 1819 }, { "epoch": 0.6, "learning_rate": 1.8500506433022663e-05, "loss": 0.7847, "step": 1820 }, { "epoch": 0.6, "learning_rate": 1.8498618491516433e-05, "loss": 0.7007, "step": 1821 }, { "epoch": 0.6, "learning_rate": 1.8496729458705415e-05, "loss": 0.7458, "step": 1822 }, { "epoch": 0.61, "learning_rate": 1.8494839334832176e-05, "loss": 0.7363, "step": 1823 }, { "epoch": 0.61, "learning_rate": 1.8492948120139428e-05, "loss": 0.7798, "step": 1824 }, { "epoch": 0.61, "learning_rate": 1.849105581487002e-05, "loss": 0.74, "step": 1825 }, { "epoch": 0.61, "learning_rate": 1.8489162419266945e-05, "loss": 0.7202, "step": 1826 }, { "epoch": 0.61, "learning_rate": 1.8487267933573326e-05, "loss": 0.7266, "step": 1827 }, { "epoch": 0.61, "learning_rate": 1.848537235803244e-05, "loss": 0.7349, "step": 1828 }, { "epoch": 0.61, "learning_rate": 1.8483475692887694e-05, "loss": 0.8052, "step": 1829 }, { "epoch": 0.61, "learning_rate": 1.848157793838264e-05, "loss": 0.7275, "step": 1830 }, { "epoch": 0.61, "learning_rate": 1.847967909476097e-05, "loss": 0.7622, "step": 1831 }, { "epoch": 0.61, "learning_rate": 1.847777916226651e-05, "loss": 0.8081, "step": 1832 }, { "epoch": 0.61, "learning_rate": 1.847587814114323e-05, "loss": 0.666, "step": 1833 }, { "epoch": 0.61, "learning_rate": 1.847397603163524e-05, "loss": 0.6636, "step": 1834 }, { "epoch": 0.61, "learning_rate": 1.8472072833986792e-05, "loss": 0.7573, "step": 1835 }, { "epoch": 0.61, "learning_rate": 1.847016854844227e-05, "loss": 0.7417, "step": 1836 }, { "epoch": 0.61, "learning_rate": 1.846826317524621e-05, "loss": 0.7332, "step": 1837 }, { "epoch": 0.61, "learning_rate": 1.8466356714643273e-05, "loss": 0.8193, "step": 1838 }, { "epoch": 0.61, "learning_rate": 1.846444916687827e-05, "loss": 0.7961, "step": 1839 }, { "epoch": 0.61, "learning_rate": 1.8462540532196148e-05, "loss": 0.8062, "step": 1840 }, { "epoch": 0.61, "learning_rate": 1.8460630810841994e-05, "loss": 0.729, "step": 1841 }, { "epoch": 0.61, "learning_rate": 1.8458720003061036e-05, "loss": 0.7041, "step": 1842 }, { "epoch": 0.61, "learning_rate": 1.845680810909864e-05, "loss": 0.7236, "step": 1843 }, { "epoch": 0.61, "learning_rate": 1.8454895129200312e-05, "loss": 0.856, "step": 1844 }, { "epoch": 0.61, "learning_rate": 1.8452981063611696e-05, "loss": 0.6824, "step": 1845 }, { "epoch": 0.61, "learning_rate": 1.8451065912578574e-05, "loss": 0.7764, "step": 1846 }, { "epoch": 0.61, "learning_rate": 1.8449149676346874e-05, "loss": 0.7856, "step": 1847 }, { "epoch": 0.61, "learning_rate": 1.844723235516266e-05, "loss": 0.7651, "step": 1848 }, { "epoch": 0.61, "learning_rate": 1.8445313949272128e-05, "loss": 0.7183, "step": 1849 }, { "epoch": 0.61, "learning_rate": 1.8443394458921628e-05, "loss": 0.7378, "step": 1850 }, { "epoch": 0.61, "learning_rate": 1.8441473884357636e-05, "loss": 0.751, "step": 1851 }, { "epoch": 0.61, "learning_rate": 1.8439552225826777e-05, "loss": 0.7344, "step": 1852 }, { "epoch": 0.62, "learning_rate": 1.8437629483575803e-05, "loss": 0.8018, "step": 1853 }, { "epoch": 0.62, "learning_rate": 1.8435705657851615e-05, "loss": 0.6802, "step": 1854 }, { "epoch": 0.62, "learning_rate": 1.843378074890126e-05, "loss": 0.7549, "step": 1855 }, { "epoch": 0.62, "learning_rate": 1.8431854756971905e-05, "loss": 0.7881, "step": 1856 }, { "epoch": 0.62, "learning_rate": 1.8429927682310865e-05, "loss": 0.8037, "step": 1857 }, { "epoch": 0.62, "learning_rate": 1.8427999525165602e-05, "loss": 0.7705, "step": 1858 }, { "epoch": 0.62, "learning_rate": 1.842607028578371e-05, "loss": 0.7554, "step": 1859 }, { "epoch": 0.62, "learning_rate": 1.8424139964412915e-05, "loss": 0.7661, "step": 1860 }, { "epoch": 0.62, "learning_rate": 1.842220856130109e-05, "loss": 0.7871, "step": 1861 }, { "epoch": 0.62, "learning_rate": 1.8420276076696253e-05, "loss": 0.7319, "step": 1862 }, { "epoch": 0.62, "learning_rate": 1.841834251084655e-05, "loss": 0.6875, "step": 1863 }, { "epoch": 0.62, "learning_rate": 1.8416407864000265e-05, "loss": 0.7378, "step": 1864 }, { "epoch": 0.62, "learning_rate": 1.841447213640583e-05, "loss": 0.7388, "step": 1865 }, { "epoch": 0.62, "learning_rate": 1.8412535328311813e-05, "loss": 0.7686, "step": 1866 }, { "epoch": 0.62, "learning_rate": 1.8410597439966915e-05, "loss": 0.7312, "step": 1867 }, { "epoch": 0.62, "learning_rate": 1.840865847161998e-05, "loss": 0.7474, "step": 1868 }, { "epoch": 0.62, "learning_rate": 1.8406718423519994e-05, "loss": 0.7427, "step": 1869 }, { "epoch": 0.62, "learning_rate": 1.8404777295916074e-05, "loss": 0.7441, "step": 1870 }, { "epoch": 0.62, "learning_rate": 1.8402835089057476e-05, "loss": 0.6953, "step": 1871 }, { "epoch": 0.62, "learning_rate": 1.8400891803193606e-05, "loss": 0.7114, "step": 1872 }, { "epoch": 0.62, "learning_rate": 1.8398947438573998e-05, "loss": 0.7632, "step": 1873 }, { "epoch": 0.62, "learning_rate": 1.8397001995448325e-05, "loss": 0.7136, "step": 1874 }, { "epoch": 0.62, "learning_rate": 1.8395055474066403e-05, "loss": 0.7651, "step": 1875 }, { "epoch": 0.62, "learning_rate": 1.8393107874678186e-05, "loss": 0.7612, "step": 1876 }, { "epoch": 0.62, "learning_rate": 1.8391159197533757e-05, "loss": 0.7417, "step": 1877 }, { "epoch": 0.62, "learning_rate": 1.8389209442883355e-05, "loss": 0.6645, "step": 1878 }, { "epoch": 0.62, "learning_rate": 1.838725861097734e-05, "loss": 0.7124, "step": 1879 }, { "epoch": 0.62, "learning_rate": 1.838530670206622e-05, "loss": 0.7891, "step": 1880 }, { "epoch": 0.62, "learning_rate": 1.8383353716400642e-05, "loss": 0.7466, "step": 1881 }, { "epoch": 0.62, "learning_rate": 1.8381399654231383e-05, "loss": 0.7305, "step": 1882 }, { "epoch": 0.62, "learning_rate": 1.8379444515809366e-05, "loss": 0.8027, "step": 1883 }, { "epoch": 0.63, "learning_rate": 1.837748830138565e-05, "loss": 0.7444, "step": 1884 }, { "epoch": 0.63, "learning_rate": 1.837553101121143e-05, "loss": 0.7372, "step": 1885 }, { "epoch": 0.63, "learning_rate": 1.8373572645538042e-05, "loss": 0.8452, "step": 1886 }, { "epoch": 0.63, "learning_rate": 1.837161320461696e-05, "loss": 0.8477, "step": 1887 }, { "epoch": 0.63, "learning_rate": 1.83696526886998e-05, "loss": 0.7427, "step": 1888 }, { "epoch": 0.63, "learning_rate": 1.83676910980383e-05, "loss": 0.7507, "step": 1889 }, { "epoch": 0.63, "learning_rate": 1.8365728432884352e-05, "loss": 0.7437, "step": 1890 }, { "epoch": 0.63, "learning_rate": 1.8363764693489984e-05, "loss": 0.7749, "step": 1891 }, { "epoch": 0.63, "learning_rate": 1.836179988010736e-05, "loss": 0.7244, "step": 1892 }, { "epoch": 0.63, "learning_rate": 1.8359833992988772e-05, "loss": 0.7231, "step": 1893 }, { "epoch": 0.63, "learning_rate": 1.8357867032386673e-05, "loss": 0.7188, "step": 1894 }, { "epoch": 0.63, "learning_rate": 1.8355898998553626e-05, "loss": 0.7603, "step": 1895 }, { "epoch": 0.63, "learning_rate": 1.8353929891742358e-05, "loss": 0.8198, "step": 1896 }, { "epoch": 0.63, "learning_rate": 1.835195971220571e-05, "loss": 0.7983, "step": 1897 }, { "epoch": 0.63, "learning_rate": 1.8349988460196676e-05, "loss": 0.8052, "step": 1898 }, { "epoch": 0.63, "learning_rate": 1.8348016135968387e-05, "loss": 0.7754, "step": 1899 }, { "epoch": 0.63, "learning_rate": 1.8346042739774105e-05, "loss": 0.814, "step": 1900 }, { "epoch": 0.63, "learning_rate": 1.8344068271867237e-05, "loss": 0.7773, "step": 1901 }, { "epoch": 0.63, "learning_rate": 1.834209273250132e-05, "loss": 0.7217, "step": 1902 }, { "epoch": 0.63, "learning_rate": 1.8340116121930035e-05, "loss": 0.717, "step": 1903 }, { "epoch": 0.63, "learning_rate": 1.8338138440407195e-05, "loss": 0.7256, "step": 1904 }, { "epoch": 0.63, "learning_rate": 1.833615968818675e-05, "loss": 0.7402, "step": 1905 }, { "epoch": 0.63, "learning_rate": 1.8334179865522803e-05, "loss": 0.6638, "step": 1906 }, { "epoch": 0.63, "learning_rate": 1.8332198972669575e-05, "loss": 0.7495, "step": 1907 }, { "epoch": 0.63, "learning_rate": 1.833021700988143e-05, "loss": 0.7478, "step": 1908 }, { "epoch": 0.63, "learning_rate": 1.8328233977412875e-05, "loss": 0.8027, "step": 1909 }, { "epoch": 0.63, "learning_rate": 1.8326249875518548e-05, "loss": 0.7632, "step": 1910 }, { "epoch": 0.63, "learning_rate": 1.8324264704453228e-05, "loss": 0.792, "step": 1911 }, { "epoch": 0.63, "learning_rate": 1.8322278464471827e-05, "loss": 0.772, "step": 1912 }, { "epoch": 0.63, "learning_rate": 1.83202911558294e-05, "loss": 0.8306, "step": 1913 }, { "epoch": 0.64, "learning_rate": 1.8318302778781142e-05, "loss": 0.7192, "step": 1914 }, { "epoch": 0.64, "learning_rate": 1.8316313333582373e-05, "loss": 0.7886, "step": 1915 }, { "epoch": 0.64, "learning_rate": 1.8314322820488556e-05, "loss": 0.6902, "step": 1916 }, { "epoch": 0.64, "learning_rate": 1.8312331239755297e-05, "loss": 0.7705, "step": 1917 }, { "epoch": 0.64, "learning_rate": 1.831033859163833e-05, "loss": 0.8101, "step": 1918 }, { "epoch": 0.64, "learning_rate": 1.8308344876393538e-05, "loss": 0.7559, "step": 1919 }, { "epoch": 0.64, "learning_rate": 1.830635009427692e-05, "loss": 0.7817, "step": 1920 }, { "epoch": 0.64, "learning_rate": 1.8304354245544637e-05, "loss": 0.7378, "step": 1921 }, { "epoch": 0.64, "learning_rate": 1.8302357330452972e-05, "loss": 0.6448, "step": 1922 }, { "epoch": 0.64, "learning_rate": 1.830035934925835e-05, "loss": 0.8013, "step": 1923 }, { "epoch": 0.64, "learning_rate": 1.8298360302217322e-05, "loss": 0.7451, "step": 1924 }, { "epoch": 0.64, "learning_rate": 1.8296360189586594e-05, "loss": 0.792, "step": 1925 }, { "epoch": 0.64, "learning_rate": 1.8294359011623e-05, "loss": 0.7944, "step": 1926 }, { "epoch": 0.64, "learning_rate": 1.8292356768583504e-05, "loss": 0.8042, "step": 1927 }, { "epoch": 0.64, "learning_rate": 1.829035346072522e-05, "loss": 0.7568, "step": 1928 }, { "epoch": 0.64, "learning_rate": 1.828834908830539e-05, "loss": 0.751, "step": 1929 }, { "epoch": 0.64, "learning_rate": 1.8286343651581395e-05, "loss": 0.6675, "step": 1930 }, { "epoch": 0.64, "learning_rate": 1.828433715081075e-05, "loss": 0.7476, "step": 1931 }, { "epoch": 0.64, "learning_rate": 1.828232958625111e-05, "loss": 0.7024, "step": 1932 }, { "epoch": 0.64, "learning_rate": 1.8280320958160268e-05, "loss": 0.7397, "step": 1933 }, { "epoch": 0.64, "learning_rate": 1.8278311266796148e-05, "loss": 0.7729, "step": 1934 }, { "epoch": 0.64, "learning_rate": 1.8276300512416816e-05, "loss": 0.73, "step": 1935 }, { "epoch": 0.64, "learning_rate": 1.8274288695280472e-05, "loss": 0.72, "step": 1936 }, { "epoch": 0.64, "learning_rate": 1.8272275815645453e-05, "loss": 0.769, "step": 1937 }, { "epoch": 0.64, "learning_rate": 1.8270261873770234e-05, "loss": 0.6371, "step": 1938 }, { "epoch": 0.64, "learning_rate": 1.826824686991342e-05, "loss": 0.7129, "step": 1939 }, { "epoch": 0.64, "learning_rate": 1.826623080433376e-05, "loss": 0.6838, "step": 1940 }, { "epoch": 0.64, "learning_rate": 1.8264213677290136e-05, "loss": 0.7251, "step": 1941 }, { "epoch": 0.64, "learning_rate": 1.8262195489041566e-05, "loss": 0.6802, "step": 1942 }, { "epoch": 0.64, "learning_rate": 1.8260176239847204e-05, "loss": 0.7021, "step": 1943 }, { "epoch": 0.65, "learning_rate": 1.8258155929966344e-05, "loss": 0.6377, "step": 1944 }, { "epoch": 0.65, "learning_rate": 1.8256134559658408e-05, "loss": 0.6553, "step": 1945 }, { "epoch": 0.65, "learning_rate": 1.8254112129182968e-05, "loss": 0.7588, "step": 1946 }, { "epoch": 0.65, "learning_rate": 1.825208863879972e-05, "loss": 0.7402, "step": 1947 }, { "epoch": 0.65, "learning_rate": 1.8250064088768495e-05, "loss": 0.6913, "step": 1948 }, { "epoch": 0.65, "learning_rate": 1.824803847934927e-05, "loss": 0.7329, "step": 1949 }, { "epoch": 0.65, "learning_rate": 1.824601181080215e-05, "loss": 0.7197, "step": 1950 }, { "epoch": 0.65, "learning_rate": 1.824398408338738e-05, "loss": 0.7368, "step": 1951 }, { "epoch": 0.65, "learning_rate": 1.8241955297365342e-05, "loss": 0.748, "step": 1952 }, { "epoch": 0.65, "learning_rate": 1.8239925452996547e-05, "loss": 0.7209, "step": 1953 }, { "epoch": 0.65, "learning_rate": 1.823789455054165e-05, "loss": 0.7769, "step": 1954 }, { "epoch": 0.65, "learning_rate": 1.823586259026144e-05, "loss": 0.7708, "step": 1955 }, { "epoch": 0.65, "learning_rate": 1.8233829572416833e-05, "loss": 0.7368, "step": 1956 }, { "epoch": 0.65, "learning_rate": 1.82317954972689e-05, "loss": 0.7397, "step": 1957 }, { "epoch": 0.65, "learning_rate": 1.8229760365078824e-05, "loss": 0.7256, "step": 1958 }, { "epoch": 0.65, "learning_rate": 1.8227724176107942e-05, "loss": 0.7681, "step": 1959 }, { "epoch": 0.65, "learning_rate": 1.822568693061772e-05, "loss": 0.6711, "step": 1960 }, { "epoch": 0.65, "learning_rate": 1.822364862886976e-05, "loss": 0.7666, "step": 1961 }, { "epoch": 0.65, "learning_rate": 1.8221609271125797e-05, "loss": 0.7051, "step": 1962 }, { "epoch": 0.65, "learning_rate": 1.8219568857647707e-05, "loss": 0.7144, "step": 1963 }, { "epoch": 0.65, "learning_rate": 1.8217527388697497e-05, "loss": 0.7188, "step": 1964 }, { "epoch": 0.65, "learning_rate": 1.821548486453731e-05, "loss": 0.749, "step": 1965 }, { "epoch": 0.65, "learning_rate": 1.8213441285429435e-05, "loss": 0.73, "step": 1966 }, { "epoch": 0.65, "learning_rate": 1.8211396651636276e-05, "loss": 0.7046, "step": 1967 }, { "epoch": 0.65, "learning_rate": 1.820935096342039e-05, "loss": 0.7666, "step": 1968 }, { "epoch": 0.65, "learning_rate": 1.820730422104446e-05, "loss": 0.7146, "step": 1969 }, { "epoch": 0.65, "learning_rate": 1.820525642477131e-05, "loss": 0.6948, "step": 1970 }, { "epoch": 0.65, "learning_rate": 1.820320757486389e-05, "loss": 0.7285, "step": 1971 }, { "epoch": 0.65, "learning_rate": 1.8201157671585302e-05, "loss": 0.7852, "step": 1972 }, { "epoch": 0.65, "learning_rate": 1.819910671519877e-05, "loss": 0.7031, "step": 1973 }, { "epoch": 0.66, "learning_rate": 1.8197054705967657e-05, "loss": 0.71, "step": 1974 }, { "epoch": 0.66, "learning_rate": 1.819500164415546e-05, "loss": 0.7593, "step": 1975 }, { "epoch": 0.66, "learning_rate": 1.8192947530025807e-05, "loss": 0.833, "step": 1976 }, { "epoch": 0.66, "learning_rate": 1.8190892363842474e-05, "loss": 0.6855, "step": 1977 }, { "epoch": 0.66, "learning_rate": 1.818883614586936e-05, "loss": 0.8047, "step": 1978 }, { "epoch": 0.66, "learning_rate": 1.8186778876370506e-05, "loss": 0.7891, "step": 1979 }, { "epoch": 0.66, "learning_rate": 1.8184720555610083e-05, "loss": 0.7783, "step": 1980 }, { "epoch": 0.66, "learning_rate": 1.81826611838524e-05, "loss": 0.7681, "step": 1981 }, { "epoch": 0.66, "learning_rate": 1.8180600761361897e-05, "loss": 0.6711, "step": 1982 }, { "epoch": 0.66, "learning_rate": 1.8178539288403157e-05, "loss": 0.8105, "step": 1983 }, { "epoch": 0.66, "learning_rate": 1.8176476765240893e-05, "loss": 0.6758, "step": 1984 }, { "epoch": 0.66, "learning_rate": 1.817441319213995e-05, "loss": 0.717, "step": 1985 }, { "epoch": 0.66, "learning_rate": 1.8172348569365312e-05, "loss": 0.7422, "step": 1986 }, { "epoch": 0.66, "learning_rate": 1.8170282897182097e-05, "loss": 0.7368, "step": 1987 }, { "epoch": 0.66, "learning_rate": 1.8168216175855557e-05, "loss": 0.7095, "step": 1988 }, { "epoch": 0.66, "learning_rate": 1.816614840565108e-05, "loss": 0.7625, "step": 1989 }, { "epoch": 0.66, "learning_rate": 1.8164079586834184e-05, "loss": 0.8052, "step": 1990 }, { "epoch": 0.66, "learning_rate": 1.8162009719670527e-05, "loss": 0.7769, "step": 1991 }, { "epoch": 0.66, "learning_rate": 1.8159938804425902e-05, "loss": 0.7705, "step": 1992 }, { "epoch": 0.66, "learning_rate": 1.8157866841366237e-05, "loss": 0.6838, "step": 1993 }, { "epoch": 0.66, "learning_rate": 1.8155793830757583e-05, "loss": 0.6365, "step": 1994 }, { "epoch": 0.66, "learning_rate": 1.8153719772866143e-05, "loss": 0.6982, "step": 1995 }, { "epoch": 0.66, "learning_rate": 1.815164466795824e-05, "loss": 0.7349, "step": 1996 }, { "epoch": 0.66, "learning_rate": 1.8149568516300343e-05, "loss": 0.792, "step": 1997 }, { "epoch": 0.66, "learning_rate": 1.814749131815905e-05, "loss": 0.7246, "step": 1998 }, { "epoch": 0.66, "learning_rate": 1.814541307380109e-05, "loss": 0.657, "step": 1999 }, { "epoch": 0.66, "learning_rate": 1.8143333783493326e-05, "loss": 0.8066, "step": 2000 }, { "epoch": 0.66, "learning_rate": 1.8141253447502766e-05, "loss": 0.7275, "step": 2001 }, { "epoch": 0.66, "learning_rate": 1.8139172066096543e-05, "loss": 0.7612, "step": 2002 }, { "epoch": 0.66, "learning_rate": 1.8137089639541926e-05, "loss": 0.7295, "step": 2003 }, { "epoch": 0.67, "learning_rate": 1.813500616810632e-05, "loss": 0.707, "step": 2004 }, { "epoch": 0.67, "learning_rate": 1.8132921652057263e-05, "loss": 0.751, "step": 2005 }, { "epoch": 0.67, "learning_rate": 1.8130836091662424e-05, "loss": 0.6982, "step": 2006 }, { "epoch": 0.67, "learning_rate": 1.8128749487189613e-05, "loss": 0.7275, "step": 2007 }, { "epoch": 0.67, "learning_rate": 1.8126661838906768e-05, "loss": 0.7383, "step": 2008 }, { "epoch": 0.67, "learning_rate": 1.8124573147081964e-05, "loss": 0.6934, "step": 2009 }, { "epoch": 0.67, "learning_rate": 1.8122483411983403e-05, "loss": 0.7466, "step": 2010 }, { "epoch": 0.67, "learning_rate": 1.812039263387944e-05, "loss": 0.7206, "step": 2011 }, { "epoch": 0.67, "learning_rate": 1.811830081303854e-05, "loss": 0.7749, "step": 2012 }, { "epoch": 0.67, "learning_rate": 1.811620794972932e-05, "loss": 0.7871, "step": 2013 }, { "epoch": 0.67, "learning_rate": 1.8114114044220515e-05, "loss": 0.7456, "step": 2014 }, { "epoch": 0.67, "learning_rate": 1.8112019096781016e-05, "loss": 0.7559, "step": 2015 }, { "epoch": 0.67, "learning_rate": 1.8109923107679823e-05, "loss": 0.7107, "step": 2016 }, { "epoch": 0.67, "learning_rate": 1.8107826077186085e-05, "loss": 0.751, "step": 2017 }, { "epoch": 0.67, "learning_rate": 1.8105728005569084e-05, "loss": 0.6938, "step": 2018 }, { "epoch": 0.67, "learning_rate": 1.8103628893098228e-05, "loss": 0.7212, "step": 2019 }, { "epoch": 0.67, "learning_rate": 1.8101528740043066e-05, "loss": 0.77, "step": 2020 }, { "epoch": 0.67, "learning_rate": 1.8099427546673277e-05, "loss": 0.729, "step": 2021 }, { "epoch": 0.67, "learning_rate": 1.8097325313258678e-05, "loss": 0.7275, "step": 2022 }, { "epoch": 0.67, "learning_rate": 1.8095222040069208e-05, "loss": 0.8188, "step": 2023 }, { "epoch": 0.67, "learning_rate": 1.8093117727374955e-05, "loss": 0.6787, "step": 2024 }, { "epoch": 0.67, "learning_rate": 1.809101237544613e-05, "loss": 0.7214, "step": 2025 }, { "epoch": 0.67, "learning_rate": 1.8088905984553086e-05, "loss": 0.7271, "step": 2026 }, { "epoch": 0.67, "learning_rate": 1.8086798554966292e-05, "loss": 0.7524, "step": 2027 }, { "epoch": 0.67, "learning_rate": 1.8084690086956374e-05, "loss": 0.7695, "step": 2028 }, { "epoch": 0.67, "learning_rate": 1.8082580580794077e-05, "loss": 0.6029, "step": 2029 }, { "epoch": 0.67, "learning_rate": 1.8080470036750275e-05, "loss": 0.7317, "step": 2030 }, { "epoch": 0.67, "learning_rate": 1.807835845509599e-05, "loss": 0.6948, "step": 2031 }, { "epoch": 0.67, "learning_rate": 1.8076245836102368e-05, "loss": 0.7476, "step": 2032 }, { "epoch": 0.67, "learning_rate": 1.8074132180040686e-05, "loss": 0.7422, "step": 2033 }, { "epoch": 0.68, "learning_rate": 1.8072017487182362e-05, "loss": 0.7412, "step": 2034 }, { "epoch": 0.68, "learning_rate": 1.8069901757798942e-05, "loss": 0.7231, "step": 2035 }, { "epoch": 0.68, "learning_rate": 1.806778499216211e-05, "loss": 0.7184, "step": 2036 }, { "epoch": 0.68, "learning_rate": 1.806566719054367e-05, "loss": 0.77, "step": 2037 }, { "epoch": 0.68, "learning_rate": 1.8063548353215574e-05, "loss": 0.7583, "step": 2038 }, { "epoch": 0.68, "learning_rate": 1.8061428480449898e-05, "loss": 0.7705, "step": 2039 }, { "epoch": 0.68, "learning_rate": 1.8059307572518857e-05, "loss": 0.645, "step": 2040 }, { "epoch": 0.68, "learning_rate": 1.80571856296948e-05, "loss": 0.7434, "step": 2041 }, { "epoch": 0.68, "learning_rate": 1.8055062652250195e-05, "loss": 0.7451, "step": 2042 }, { "epoch": 0.68, "learning_rate": 1.805293864045766e-05, "loss": 0.7147, "step": 2043 }, { "epoch": 0.68, "learning_rate": 1.8050813594589938e-05, "loss": 0.7305, "step": 2044 }, { "epoch": 0.68, "learning_rate": 1.8048687514919904e-05, "loss": 0.8101, "step": 2045 }, { "epoch": 0.68, "learning_rate": 1.804656040172057e-05, "loss": 0.6709, "step": 2046 }, { "epoch": 0.68, "learning_rate": 1.804443225526507e-05, "loss": 0.6907, "step": 2047 }, { "epoch": 0.68, "learning_rate": 1.8042303075826687e-05, "loss": 0.7495, "step": 2048 }, { "epoch": 0.68, "learning_rate": 1.804017286367883e-05, "loss": 0.6577, "step": 2049 }, { "epoch": 0.68, "learning_rate": 1.8038041619095027e-05, "loss": 0.7485, "step": 2050 }, { "epoch": 0.68, "learning_rate": 1.803590934234896e-05, "loss": 0.7681, "step": 2051 }, { "epoch": 0.68, "learning_rate": 1.8033776033714433e-05, "loss": 0.6963, "step": 2052 }, { "epoch": 0.68, "learning_rate": 1.803164169346538e-05, "loss": 0.7593, "step": 2053 }, { "epoch": 0.68, "learning_rate": 1.8029506321875875e-05, "loss": 0.7102, "step": 2054 }, { "epoch": 0.68, "learning_rate": 1.802736991922012e-05, "loss": 0.7671, "step": 2055 }, { "epoch": 0.68, "learning_rate": 1.8025232485772446e-05, "loss": 0.6606, "step": 2056 }, { "epoch": 0.68, "learning_rate": 1.802309402180732e-05, "loss": 0.7183, "step": 2057 }, { "epoch": 0.68, "learning_rate": 1.8020954527599347e-05, "loss": 0.718, "step": 2058 }, { "epoch": 0.68, "learning_rate": 1.801881400342325e-05, "loss": 0.7471, "step": 2059 }, { "epoch": 0.68, "learning_rate": 1.8016672449553904e-05, "loss": 0.772, "step": 2060 }, { "epoch": 0.68, "learning_rate": 1.80145298662663e-05, "loss": 0.7671, "step": 2061 }, { "epoch": 0.68, "learning_rate": 1.801238625383556e-05, "loss": 0.7153, "step": 2062 }, { "epoch": 0.68, "learning_rate": 1.8010241612536953e-05, "loss": 0.7253, "step": 2063 }, { "epoch": 0.69, "learning_rate": 1.800809594264587e-05, "loss": 0.7573, "step": 2064 }, { "epoch": 0.69, "learning_rate": 1.8005949244437836e-05, "loss": 0.7573, "step": 2065 }, { "epoch": 0.69, "learning_rate": 1.8003801518188507e-05, "loss": 0.7412, "step": 2066 }, { "epoch": 0.69, "learning_rate": 1.8001652764173673e-05, "loss": 0.7593, "step": 2067 }, { "epoch": 0.69, "learning_rate": 1.799950298266925e-05, "loss": 0.7163, "step": 2068 }, { "epoch": 0.69, "learning_rate": 1.7997352173951294e-05, "loss": 0.7417, "step": 2069 }, { "epoch": 0.69, "learning_rate": 1.799520033829599e-05, "loss": 0.6943, "step": 2070 }, { "epoch": 0.69, "learning_rate": 1.7993047475979655e-05, "loss": 0.7271, "step": 2071 }, { "epoch": 0.69, "learning_rate": 1.799089358727874e-05, "loss": 0.7256, "step": 2072 }, { "epoch": 0.69, "learning_rate": 1.7988738672469818e-05, "loss": 0.7842, "step": 2073 }, { "epoch": 0.69, "learning_rate": 1.7986582731829604e-05, "loss": 0.688, "step": 2074 }, { "epoch": 0.69, "learning_rate": 1.7984425765634946e-05, "loss": 0.7607, "step": 2075 }, { "epoch": 0.69, "learning_rate": 1.7982267774162813e-05, "loss": 0.7358, "step": 2076 }, { "epoch": 0.69, "learning_rate": 1.7980108757690314e-05, "loss": 0.6404, "step": 2077 }, { "epoch": 0.69, "learning_rate": 1.7977948716494688e-05, "loss": 0.7593, "step": 2078 }, { "epoch": 0.69, "learning_rate": 1.797578765085331e-05, "loss": 0.7783, "step": 2079 }, { "epoch": 0.69, "learning_rate": 1.7973625561043672e-05, "loss": 0.616, "step": 2080 }, { "epoch": 0.69, "learning_rate": 1.7971462447343414e-05, "loss": 0.751, "step": 2081 }, { "epoch": 0.69, "learning_rate": 1.79692983100303e-05, "loss": 0.7339, "step": 2082 }, { "epoch": 0.69, "learning_rate": 1.7967133149382222e-05, "loss": 0.7847, "step": 2083 }, { "epoch": 0.69, "learning_rate": 1.7964966965677213e-05, "loss": 0.6934, "step": 2084 }, { "epoch": 0.69, "learning_rate": 1.796279975919343e-05, "loss": 0.7036, "step": 2085 }, { "epoch": 0.69, "learning_rate": 1.7960631530209164e-05, "loss": 0.7476, "step": 2086 }, { "epoch": 0.69, "learning_rate": 1.7958462279002834e-05, "loss": 0.7646, "step": 2087 }, { "epoch": 0.69, "learning_rate": 1.7956292005852992e-05, "loss": 0.7124, "step": 2088 }, { "epoch": 0.69, "learning_rate": 1.7954120711038327e-05, "loss": 0.7305, "step": 2089 }, { "epoch": 0.69, "learning_rate": 1.7951948394837654e-05, "loss": 0.709, "step": 2090 }, { "epoch": 0.69, "learning_rate": 1.794977505752991e-05, "loss": 0.6863, "step": 2091 }, { "epoch": 0.69, "learning_rate": 1.7947600699394184e-05, "loss": 0.7351, "step": 2092 }, { "epoch": 0.69, "learning_rate": 1.794542532070968e-05, "loss": 0.7769, "step": 2093 }, { "epoch": 0.69, "learning_rate": 1.794324892175574e-05, "loss": 0.7297, "step": 2094 }, { "epoch": 0.7, "learning_rate": 1.794107150281183e-05, "loss": 0.6982, "step": 2095 }, { "epoch": 0.7, "learning_rate": 1.7938893064157555e-05, "loss": 0.6831, "step": 2096 }, { "epoch": 0.7, "learning_rate": 1.7936713606072646e-05, "loss": 0.6699, "step": 2097 }, { "epoch": 0.7, "learning_rate": 1.793453312883697e-05, "loss": 0.7622, "step": 2098 }, { "epoch": 0.7, "learning_rate": 1.7932351632730512e-05, "loss": 0.7324, "step": 2099 }, { "epoch": 0.7, "learning_rate": 1.7930169118033413e-05, "loss": 0.7446, "step": 2100 }, { "epoch": 0.7, "learning_rate": 1.792798558502591e-05, "loss": 0.6643, "step": 2101 }, { "epoch": 0.7, "learning_rate": 1.7925801033988406e-05, "loss": 0.7642, "step": 2102 }, { "epoch": 0.7, "learning_rate": 1.7923615465201408e-05, "loss": 0.7046, "step": 2103 }, { "epoch": 0.7, "learning_rate": 1.792142887894557e-05, "loss": 0.7852, "step": 2104 }, { "epoch": 0.7, "learning_rate": 1.7919241275501672e-05, "loss": 0.689, "step": 2105 }, { "epoch": 0.7, "learning_rate": 1.7917052655150617e-05, "loss": 0.7832, "step": 2106 }, { "epoch": 0.7, "learning_rate": 1.791486301817345e-05, "loss": 0.7012, "step": 2107 }, { "epoch": 0.7, "learning_rate": 1.791267236485134e-05, "loss": 0.7588, "step": 2108 }, { "epoch": 0.7, "learning_rate": 1.791048069546559e-05, "loss": 0.8516, "step": 2109 }, { "epoch": 0.7, "learning_rate": 1.7908288010297627e-05, "loss": 0.7402, "step": 2110 }, { "epoch": 0.7, "learning_rate": 1.7906094309629013e-05, "loss": 0.7251, "step": 2111 }, { "epoch": 0.7, "learning_rate": 1.7903899593741446e-05, "loss": 0.7173, "step": 2112 }, { "epoch": 0.7, "learning_rate": 1.7901703862916746e-05, "loss": 0.6615, "step": 2113 }, { "epoch": 0.7, "learning_rate": 1.7899507117436865e-05, "loss": 0.7417, "step": 2114 }, { "epoch": 0.7, "learning_rate": 1.7897309357583887e-05, "loss": 0.7168, "step": 2115 }, { "epoch": 0.7, "learning_rate": 1.7895110583640024e-05, "loss": 0.7095, "step": 2116 }, { "epoch": 0.7, "learning_rate": 1.7892910795887624e-05, "loss": 0.7305, "step": 2117 }, { "epoch": 0.7, "learning_rate": 1.789070999460916e-05, "loss": 0.7178, "step": 2118 }, { "epoch": 0.7, "learning_rate": 1.7888508180087233e-05, "loss": 0.7197, "step": 2119 }, { "epoch": 0.7, "learning_rate": 1.788630535260458e-05, "loss": 0.6843, "step": 2120 }, { "epoch": 0.7, "learning_rate": 1.788410151244406e-05, "loss": 0.6816, "step": 2121 }, { "epoch": 0.7, "learning_rate": 1.7881896659888676e-05, "loss": 0.7012, "step": 2122 }, { "epoch": 0.7, "learning_rate": 1.7879690795221548e-05, "loss": 0.7363, "step": 2123 }, { "epoch": 0.7, "learning_rate": 1.7877483918725927e-05, "loss": 0.7158, "step": 2124 }, { "epoch": 0.71, "learning_rate": 1.7875276030685208e-05, "loss": 0.7007, "step": 2125 }, { "epoch": 0.71, "learning_rate": 1.787306713138289e-05, "loss": 0.6826, "step": 2126 }, { "epoch": 0.71, "learning_rate": 1.787085722110263e-05, "loss": 0.7466, "step": 2127 }, { "epoch": 0.71, "learning_rate": 1.7868646300128198e-05, "loss": 0.7261, "step": 2128 }, { "epoch": 0.71, "learning_rate": 1.7866434368743493e-05, "loss": 0.6843, "step": 2129 }, { "epoch": 0.71, "learning_rate": 1.786422142723255e-05, "loss": 0.7275, "step": 2130 }, { "epoch": 0.71, "learning_rate": 1.786200747587954e-05, "loss": 0.7705, "step": 2131 }, { "epoch": 0.71, "learning_rate": 1.7859792514968747e-05, "loss": 0.7378, "step": 2132 }, { "epoch": 0.71, "learning_rate": 1.7857576544784593e-05, "loss": 0.6985, "step": 2133 }, { "epoch": 0.71, "learning_rate": 1.7855359565611637e-05, "loss": 0.7065, "step": 2134 }, { "epoch": 0.71, "learning_rate": 1.7853141577734555e-05, "loss": 0.7778, "step": 2135 }, { "epoch": 0.71, "learning_rate": 1.7850922581438163e-05, "loss": 0.7188, "step": 2136 }, { "epoch": 0.71, "learning_rate": 1.784870257700739e-05, "loss": 0.665, "step": 2137 }, { "epoch": 0.71, "learning_rate": 1.7846481564727326e-05, "loss": 0.7144, "step": 2138 }, { "epoch": 0.71, "learning_rate": 1.7844259544883148e-05, "loss": 0.7256, "step": 2139 }, { "epoch": 0.71, "learning_rate": 1.7842036517760203e-05, "loss": 0.6895, "step": 2140 }, { "epoch": 0.71, "learning_rate": 1.783981248364394e-05, "loss": 0.7026, "step": 2141 }, { "epoch": 0.71, "learning_rate": 1.7837587442819946e-05, "loss": 0.7095, "step": 2142 }, { "epoch": 0.71, "learning_rate": 1.7835361395573943e-05, "loss": 0.6914, "step": 2143 }, { "epoch": 0.71, "learning_rate": 1.7833134342191773e-05, "loss": 0.7156, "step": 2144 }, { "epoch": 0.71, "learning_rate": 1.7830906282959412e-05, "loss": 0.7432, "step": 2145 }, { "epoch": 0.71, "learning_rate": 1.7828677218162963e-05, "loss": 0.7239, "step": 2146 }, { "epoch": 0.71, "learning_rate": 1.7826447148088662e-05, "loss": 0.7666, "step": 2147 }, { "epoch": 0.71, "learning_rate": 1.7824216073022875e-05, "loss": 0.771, "step": 2148 }, { "epoch": 0.71, "learning_rate": 1.7821983993252084e-05, "loss": 0.6724, "step": 2149 }, { "epoch": 0.71, "learning_rate": 1.7819750909062917e-05, "loss": 0.7246, "step": 2150 }, { "epoch": 0.71, "learning_rate": 1.781751682074212e-05, "loss": 0.6287, "step": 2151 }, { "epoch": 0.71, "learning_rate": 1.7815281728576575e-05, "loss": 0.6782, "step": 2152 }, { "epoch": 0.71, "learning_rate": 1.7813045632853284e-05, "loss": 0.7705, "step": 2153 }, { "epoch": 0.71, "learning_rate": 1.7810808533859392e-05, "loss": 0.7358, "step": 2154 }, { "epoch": 0.72, "learning_rate": 1.7808570431882156e-05, "loss": 0.7393, "step": 2155 }, { "epoch": 0.72, "learning_rate": 1.780633132720897e-05, "loss": 0.7031, "step": 2156 }, { "epoch": 0.72, "learning_rate": 1.7804091220127366e-05, "loss": 0.7261, "step": 2157 }, { "epoch": 0.72, "learning_rate": 1.7801850110924985e-05, "loss": 0.6802, "step": 2158 }, { "epoch": 0.72, "learning_rate": 1.779960799988961e-05, "loss": 0.7329, "step": 2159 }, { "epoch": 0.72, "learning_rate": 1.7797364887309155e-05, "loss": 0.7646, "step": 2160 }, { "epoch": 0.72, "learning_rate": 1.779512077347165e-05, "loss": 0.7358, "step": 2161 }, { "epoch": 0.72, "learning_rate": 1.7792875658665263e-05, "loss": 0.6912, "step": 2162 }, { "epoch": 0.72, "learning_rate": 1.779062954317829e-05, "loss": 0.7549, "step": 2163 }, { "epoch": 0.72, "learning_rate": 1.7788382427299158e-05, "loss": 0.6582, "step": 2164 }, { "epoch": 0.72, "learning_rate": 1.778613431131641e-05, "loss": 0.7666, "step": 2165 }, { "epoch": 0.72, "learning_rate": 1.7783885195518728e-05, "loss": 0.6743, "step": 2166 }, { "epoch": 0.72, "learning_rate": 1.7781635080194926e-05, "loss": 0.6743, "step": 2167 }, { "epoch": 0.72, "learning_rate": 1.7779383965633933e-05, "loss": 0.7283, "step": 2168 }, { "epoch": 0.72, "learning_rate": 1.777713185212482e-05, "loss": 0.7725, "step": 2169 }, { "epoch": 0.72, "learning_rate": 1.7774878739956776e-05, "loss": 0.6921, "step": 2170 }, { "epoch": 0.72, "learning_rate": 1.7772624629419123e-05, "loss": 0.7617, "step": 2171 }, { "epoch": 0.72, "learning_rate": 1.7770369520801315e-05, "loss": 0.7178, "step": 2172 }, { "epoch": 0.72, "learning_rate": 1.7768113414392925e-05, "loss": 0.8008, "step": 2173 }, { "epoch": 0.72, "learning_rate": 1.7765856310483656e-05, "loss": 0.7856, "step": 2174 }, { "epoch": 0.72, "learning_rate": 1.776359820936335e-05, "loss": 0.688, "step": 2175 }, { "epoch": 0.72, "learning_rate": 1.7761339111321965e-05, "loss": 0.8003, "step": 2176 }, { "epoch": 0.72, "learning_rate": 1.775907901664959e-05, "loss": 0.729, "step": 2177 }, { "epoch": 0.72, "learning_rate": 1.775681792563644e-05, "loss": 0.7544, "step": 2178 }, { "epoch": 0.72, "learning_rate": 1.7754555838572875e-05, "loss": 0.678, "step": 2179 }, { "epoch": 0.72, "learning_rate": 1.7752292755749353e-05, "loss": 0.749, "step": 2180 }, { "epoch": 0.72, "learning_rate": 1.775002867745648e-05, "loss": 0.7705, "step": 2181 }, { "epoch": 0.72, "learning_rate": 1.774776360398499e-05, "loss": 0.7344, "step": 2182 }, { "epoch": 0.72, "learning_rate": 1.7745497535625736e-05, "loss": 0.7212, "step": 2183 }, { "epoch": 0.72, "learning_rate": 1.774323047266971e-05, "loss": 0.6968, "step": 2184 }, { "epoch": 0.73, "learning_rate": 1.774096241540801e-05, "loss": 0.772, "step": 2185 }, { "epoch": 0.73, "learning_rate": 1.7738693364131893e-05, "loss": 0.7036, "step": 2186 }, { "epoch": 0.73, "learning_rate": 1.773642331913272e-05, "loss": 0.7217, "step": 2187 }, { "epoch": 0.73, "learning_rate": 1.773415228070198e-05, "loss": 0.7031, "step": 2188 }, { "epoch": 0.73, "learning_rate": 1.773188024913131e-05, "loss": 0.7876, "step": 2189 }, { "epoch": 0.73, "learning_rate": 1.772960722471245e-05, "loss": 0.7964, "step": 2190 }, { "epoch": 0.73, "learning_rate": 1.7727333207737282e-05, "loss": 0.7427, "step": 2191 }, { "epoch": 0.73, "learning_rate": 1.7725058198497817e-05, "loss": 0.6985, "step": 2192 }, { "epoch": 0.73, "learning_rate": 1.772278219728618e-05, "loss": 0.6267, "step": 2193 }, { "epoch": 0.73, "learning_rate": 1.7720505204394635e-05, "loss": 0.7642, "step": 2194 }, { "epoch": 0.73, "learning_rate": 1.771822722011557e-05, "loss": 0.7075, "step": 2195 }, { "epoch": 0.73, "learning_rate": 1.7715948244741495e-05, "loss": 0.7217, "step": 2196 }, { "epoch": 0.73, "learning_rate": 1.7713668278565062e-05, "loss": 0.687, "step": 2197 }, { "epoch": 0.73, "learning_rate": 1.7711387321879034e-05, "loss": 0.7061, "step": 2198 }, { "epoch": 0.73, "learning_rate": 1.7709105374976312e-05, "loss": 0.6453, "step": 2199 }, { "epoch": 0.73, "learning_rate": 1.7706822438149918e-05, "loss": 0.6428, "step": 2200 }, { "epoch": 0.73, "learning_rate": 1.7704538511693e-05, "loss": 0.7581, "step": 2201 }, { "epoch": 0.73, "learning_rate": 1.7702253595898842e-05, "loss": 0.6768, "step": 2202 }, { "epoch": 0.73, "learning_rate": 1.7699967691060846e-05, "loss": 0.7212, "step": 2203 }, { "epoch": 0.73, "learning_rate": 1.769768079747255e-05, "loss": 0.7573, "step": 2204 }, { "epoch": 0.73, "learning_rate": 1.76953929154276e-05, "loss": 0.6909, "step": 2205 }, { "epoch": 0.73, "learning_rate": 1.7693104045219792e-05, "loss": 0.6865, "step": 2206 }, { "epoch": 0.73, "learning_rate": 1.7690814187143038e-05, "loss": 0.7512, "step": 2207 }, { "epoch": 0.73, "learning_rate": 1.768852334149138e-05, "loss": 0.6719, "step": 2208 }, { "epoch": 0.73, "learning_rate": 1.768623150855898e-05, "loss": 0.8062, "step": 2209 }, { "epoch": 0.73, "learning_rate": 1.7683938688640133e-05, "loss": 0.7329, "step": 2210 }, { "epoch": 0.73, "learning_rate": 1.768164488202926e-05, "loss": 0.8403, "step": 2211 }, { "epoch": 0.73, "learning_rate": 1.767935008902091e-05, "loss": 0.7603, "step": 2212 }, { "epoch": 0.73, "learning_rate": 1.767705430990975e-05, "loss": 0.7271, "step": 2213 }, { "epoch": 0.73, "learning_rate": 1.7674757544990588e-05, "loss": 0.7083, "step": 2214 }, { "epoch": 0.74, "learning_rate": 1.7672459794558347e-05, "loss": 0.7134, "step": 2215 }, { "epoch": 0.74, "learning_rate": 1.767016105890808e-05, "loss": 0.7207, "step": 2216 }, { "epoch": 0.74, "learning_rate": 1.766786133833497e-05, "loss": 0.6479, "step": 2217 }, { "epoch": 0.74, "learning_rate": 1.766556063313432e-05, "loss": 0.677, "step": 2218 }, { "epoch": 0.74, "learning_rate": 1.766325894360156e-05, "loss": 0.7656, "step": 2219 }, { "epoch": 0.74, "learning_rate": 1.7660956270032256e-05, "loss": 0.7285, "step": 2220 }, { "epoch": 0.74, "learning_rate": 1.7658652612722092e-05, "loss": 0.7446, "step": 2221 }, { "epoch": 0.74, "learning_rate": 1.7656347971966876e-05, "loss": 0.7114, "step": 2222 }, { "epoch": 0.74, "learning_rate": 1.765404234806255e-05, "loss": 0.6364, "step": 2223 }, { "epoch": 0.74, "learning_rate": 1.7651735741305175e-05, "loss": 0.75, "step": 2224 }, { "epoch": 0.74, "learning_rate": 1.7649428151990946e-05, "loss": 0.7129, "step": 2225 }, { "epoch": 0.74, "learning_rate": 1.7647119580416176e-05, "loss": 0.6411, "step": 2226 }, { "epoch": 0.74, "learning_rate": 1.764481002687731e-05, "loss": 0.7568, "step": 2227 }, { "epoch": 0.74, "learning_rate": 1.7642499491670916e-05, "loss": 0.6892, "step": 2228 }, { "epoch": 0.74, "learning_rate": 1.764018797509369e-05, "loss": 0.6929, "step": 2229 }, { "epoch": 0.74, "learning_rate": 1.763787547744245e-05, "loss": 0.7112, "step": 2230 }, { "epoch": 0.74, "learning_rate": 1.763556199901415e-05, "loss": 0.7529, "step": 2231 }, { "epoch": 0.74, "learning_rate": 1.7633247540105857e-05, "loss": 0.7007, "step": 2232 }, { "epoch": 0.74, "learning_rate": 1.763093210101477e-05, "loss": 0.7173, "step": 2233 }, { "epoch": 0.74, "learning_rate": 1.7628615682038214e-05, "loss": 0.6914, "step": 2234 }, { "epoch": 0.74, "learning_rate": 1.7626298283473646e-05, "loss": 0.7764, "step": 2235 }, { "epoch": 0.74, "learning_rate": 1.7623979905618633e-05, "loss": 0.7163, "step": 2236 }, { "epoch": 0.74, "learning_rate": 1.762166054877088e-05, "loss": 0.7344, "step": 2237 }, { "epoch": 0.74, "learning_rate": 1.761934021322822e-05, "loss": 0.5466, "step": 2238 }, { "epoch": 0.74, "learning_rate": 1.7617018899288602e-05, "loss": 0.7666, "step": 2239 }, { "epoch": 0.74, "learning_rate": 1.7614696607250102e-05, "loss": 0.6721, "step": 2240 }, { "epoch": 0.74, "learning_rate": 1.761237333741093e-05, "loss": 0.6643, "step": 2241 }, { "epoch": 0.74, "learning_rate": 1.7610049090069418e-05, "loss": 0.7524, "step": 2242 }, { "epoch": 0.74, "learning_rate": 1.7607723865524016e-05, "loss": 0.6816, "step": 2243 }, { "epoch": 0.74, "learning_rate": 1.760539766407331e-05, "loss": 0.7231, "step": 2244 }, { "epoch": 0.75, "learning_rate": 1.7603070486016e-05, "loss": 0.7048, "step": 2245 }, { "epoch": 0.75, "learning_rate": 1.760074233165093e-05, "loss": 0.6655, "step": 2246 }, { "epoch": 0.75, "learning_rate": 1.7598413201277047e-05, "loss": 0.7131, "step": 2247 }, { "epoch": 0.75, "learning_rate": 1.7596083095193438e-05, "loss": 0.7773, "step": 2248 }, { "epoch": 0.75, "learning_rate": 1.759375201369931e-05, "loss": 0.6816, "step": 2249 }, { "epoch": 0.75, "learning_rate": 1.7591419957093997e-05, "loss": 0.708, "step": 2250 }, { "epoch": 0.75, "learning_rate": 1.7589086925676957e-05, "loss": 0.7588, "step": 2251 }, { "epoch": 0.75, "learning_rate": 1.7586752919747776e-05, "loss": 0.7451, "step": 2252 }, { "epoch": 0.75, "learning_rate": 1.758441793960616e-05, "loss": 0.7378, "step": 2253 }, { "epoch": 0.75, "learning_rate": 1.7582081985551945e-05, "loss": 0.7671, "step": 2254 }, { "epoch": 0.75, "learning_rate": 1.7579745057885087e-05, "loss": 0.7109, "step": 2255 }, { "epoch": 0.75, "learning_rate": 1.7577407156905675e-05, "loss": 0.655, "step": 2256 }, { "epoch": 0.75, "learning_rate": 1.7575068282913913e-05, "loss": 0.7415, "step": 2257 }, { "epoch": 0.75, "learning_rate": 1.7572728436210142e-05, "loss": 0.7437, "step": 2258 }, { "epoch": 0.75, "learning_rate": 1.757038761709481e-05, "loss": 0.6184, "step": 2259 }, { "epoch": 0.75, "learning_rate": 1.7568045825868513e-05, "loss": 0.7827, "step": 2260 }, { "epoch": 0.75, "learning_rate": 1.7565703062831954e-05, "loss": 0.7417, "step": 2261 }, { "epoch": 0.75, "learning_rate": 1.7563359328285962e-05, "loss": 0.7637, "step": 2262 }, { "epoch": 0.75, "learning_rate": 1.75610146225315e-05, "loss": 0.7295, "step": 2263 }, { "epoch": 0.75, "learning_rate": 1.755866894586966e-05, "loss": 0.7974, "step": 2264 }, { "epoch": 0.75, "learning_rate": 1.7556322298601628e-05, "loss": 0.7017, "step": 2265 }, { "epoch": 0.75, "learning_rate": 1.7553974681028754e-05, "loss": 0.7471, "step": 2266 }, { "epoch": 0.75, "learning_rate": 1.755162609345249e-05, "loss": 0.7559, "step": 2267 }, { "epoch": 0.75, "learning_rate": 1.754927653617441e-05, "loss": 0.7559, "step": 2268 }, { "epoch": 0.75, "learning_rate": 1.7546926009496233e-05, "loss": 0.707, "step": 2269 }, { "epoch": 0.75, "learning_rate": 1.754457451371978e-05, "loss": 0.6985, "step": 2270 }, { "epoch": 0.75, "learning_rate": 1.754222204914701e-05, "loss": 0.6934, "step": 2271 }, { "epoch": 0.75, "learning_rate": 1.753986861608e-05, "loss": 0.6213, "step": 2272 }, { "epoch": 0.75, "learning_rate": 1.7537514214820953e-05, "loss": 0.7847, "step": 2273 }, { "epoch": 0.75, "learning_rate": 1.75351588456722e-05, "loss": 0.7041, "step": 2274 }, { "epoch": 0.76, "learning_rate": 1.7532802508936187e-05, "loss": 0.6755, "step": 2275 }, { "epoch": 0.76, "learning_rate": 1.7530445204915495e-05, "loss": 0.7446, "step": 2276 }, { "epoch": 0.76, "learning_rate": 1.752808693391283e-05, "loss": 0.7397, "step": 2277 }, { "epoch": 0.76, "learning_rate": 1.7525727696231007e-05, "loss": 0.7744, "step": 2278 }, { "epoch": 0.76, "learning_rate": 1.752336749217298e-05, "loss": 0.6185, "step": 2279 }, { "epoch": 0.76, "learning_rate": 1.7521006322041822e-05, "loss": 0.6851, "step": 2280 }, { "epoch": 0.76, "learning_rate": 1.7518644186140727e-05, "loss": 0.7886, "step": 2281 }, { "epoch": 0.76, "learning_rate": 1.751628108477302e-05, "loss": 0.6381, "step": 2282 }, { "epoch": 0.76, "learning_rate": 1.7513917018242144e-05, "loss": 0.6941, "step": 2283 }, { "epoch": 0.76, "learning_rate": 1.7511551986851666e-05, "loss": 0.6672, "step": 2284 }, { "epoch": 0.76, "learning_rate": 1.7509185990905285e-05, "loss": 0.6729, "step": 2285 }, { "epoch": 0.76, "learning_rate": 1.7506819030706813e-05, "loss": 0.7065, "step": 2286 }, { "epoch": 0.76, "learning_rate": 1.7504451106560188e-05, "loss": 0.7446, "step": 2287 }, { "epoch": 0.76, "learning_rate": 1.750208221876948e-05, "loss": 0.6519, "step": 2288 }, { "epoch": 0.76, "learning_rate": 1.7499712367638876e-05, "loss": 0.7798, "step": 2289 }, { "epoch": 0.76, "learning_rate": 1.7497341553472688e-05, "loss": 0.6865, "step": 2290 }, { "epoch": 0.76, "learning_rate": 1.7494969776575347e-05, "loss": 0.7451, "step": 2291 }, { "epoch": 0.76, "learning_rate": 1.7492597037251416e-05, "loss": 0.5564, "step": 2292 }, { "epoch": 0.76, "learning_rate": 1.7490223335805575e-05, "loss": 0.7422, "step": 2293 }, { "epoch": 0.76, "learning_rate": 1.748784867254264e-05, "loss": 0.7324, "step": 2294 }, { "epoch": 0.76, "learning_rate": 1.7485473047767525e-05, "loss": 0.7837, "step": 2295 }, { "epoch": 0.76, "learning_rate": 1.7483096461785295e-05, "loss": 0.6851, "step": 2296 }, { "epoch": 0.76, "learning_rate": 1.748071891490112e-05, "loss": 0.7734, "step": 2297 }, { "epoch": 0.76, "learning_rate": 1.7478340407420304e-05, "loss": 0.7466, "step": 2298 }, { "epoch": 0.76, "learning_rate": 1.7475960939648272e-05, "loss": 0.7661, "step": 2299 }, { "epoch": 0.76, "learning_rate": 1.7473580511890565e-05, "loss": 0.7847, "step": 2300 }, { "epoch": 0.76, "learning_rate": 1.7471199124452854e-05, "loss": 0.7788, "step": 2301 }, { "epoch": 0.76, "learning_rate": 1.7468816777640937e-05, "loss": 0.7192, "step": 2302 }, { "epoch": 0.76, "learning_rate": 1.7466433471760726e-05, "loss": 0.7144, "step": 2303 }, { "epoch": 0.76, "learning_rate": 1.746404920711826e-05, "loss": 0.6169, "step": 2304 }, { "epoch": 0.77, "learning_rate": 1.7461663984019708e-05, "loss": 0.7166, "step": 2305 }, { "epoch": 0.77, "learning_rate": 1.7459277802771347e-05, "loss": 0.6982, "step": 2306 }, { "epoch": 0.77, "learning_rate": 1.7456890663679593e-05, "loss": 0.73, "step": 2307 }, { "epoch": 0.77, "learning_rate": 1.7454502567050973e-05, "loss": 0.769, "step": 2308 }, { "epoch": 0.77, "learning_rate": 1.745211351319214e-05, "loss": 0.6758, "step": 2309 }, { "epoch": 0.77, "learning_rate": 1.7449723502409877e-05, "loss": 0.7007, "step": 2310 }, { "epoch": 0.77, "learning_rate": 1.7447332535011084e-05, "loss": 0.7671, "step": 2311 }, { "epoch": 0.77, "learning_rate": 1.7444940611302783e-05, "loss": 0.7603, "step": 2312 }, { "epoch": 0.77, "learning_rate": 1.744254773159212e-05, "loss": 0.6719, "step": 2313 }, { "epoch": 0.77, "learning_rate": 1.744015389618636e-05, "loss": 0.6968, "step": 2314 }, { "epoch": 0.77, "learning_rate": 1.7437759105392903e-05, "loss": 0.6772, "step": 2315 }, { "epoch": 0.77, "learning_rate": 1.7435363359519256e-05, "loss": 0.7314, "step": 2316 }, { "epoch": 0.77, "learning_rate": 1.7432966658873063e-05, "loss": 0.6938, "step": 2317 }, { "epoch": 0.77, "learning_rate": 1.7430569003762077e-05, "loss": 0.6772, "step": 2318 }, { "epoch": 0.77, "learning_rate": 1.742817039449418e-05, "loss": 0.6279, "step": 2319 }, { "epoch": 0.77, "learning_rate": 1.742577083137738e-05, "loss": 0.7246, "step": 2320 }, { "epoch": 0.77, "learning_rate": 1.7423370314719802e-05, "loss": 0.8262, "step": 2321 }, { "epoch": 0.77, "learning_rate": 1.7420968844829697e-05, "loss": 0.7144, "step": 2322 }, { "epoch": 0.77, "learning_rate": 1.741856642201544e-05, "loss": 0.635, "step": 2323 }, { "epoch": 0.77, "learning_rate": 1.7416163046585514e-05, "loss": 0.645, "step": 2324 }, { "epoch": 0.77, "learning_rate": 1.741375871884855e-05, "loss": 0.7351, "step": 2325 }, { "epoch": 0.77, "learning_rate": 1.741135343911328e-05, "loss": 0.7788, "step": 2326 }, { "epoch": 0.77, "learning_rate": 1.7408947207688563e-05, "loss": 0.7397, "step": 2327 }, { "epoch": 0.77, "learning_rate": 1.7406540024883388e-05, "loss": 0.6396, "step": 2328 }, { "epoch": 0.77, "learning_rate": 1.7404131891006852e-05, "loss": 0.7334, "step": 2329 }, { "epoch": 0.77, "learning_rate": 1.7401722806368193e-05, "loss": 0.7144, "step": 2330 }, { "epoch": 0.77, "learning_rate": 1.7399312771276754e-05, "loss": 0.7312, "step": 2331 }, { "epoch": 0.77, "learning_rate": 1.739690178604201e-05, "loss": 0.7056, "step": 2332 }, { "epoch": 0.77, "learning_rate": 1.7394489850973552e-05, "loss": 0.6334, "step": 2333 }, { "epoch": 0.77, "learning_rate": 1.73920769663811e-05, "loss": 0.7542, "step": 2334 }, { "epoch": 0.77, "learning_rate": 1.7389663132574486e-05, "loss": 0.7327, "step": 2335 }, { "epoch": 0.78, "learning_rate": 1.7387248349863676e-05, "loss": 0.6938, "step": 2336 }, { "epoch": 0.78, "learning_rate": 1.7384832618558743e-05, "loss": 0.6982, "step": 2337 }, { "epoch": 0.78, "learning_rate": 1.7382415938969903e-05, "loss": 0.6688, "step": 2338 }, { "epoch": 0.78, "learning_rate": 1.7379998311407466e-05, "loss": 0.7183, "step": 2339 }, { "epoch": 0.78, "learning_rate": 1.737757973618189e-05, "loss": 0.729, "step": 2340 }, { "epoch": 0.78, "learning_rate": 1.737516021360374e-05, "loss": 0.7661, "step": 2341 }, { "epoch": 0.78, "learning_rate": 1.7372739743983702e-05, "loss": 0.7432, "step": 2342 }, { "epoch": 0.78, "learning_rate": 1.7370318327632593e-05, "loss": 0.7302, "step": 2343 }, { "epoch": 0.78, "learning_rate": 1.736789596486135e-05, "loss": 0.7661, "step": 2344 }, { "epoch": 0.78, "learning_rate": 1.7365472655981012e-05, "loss": 0.6919, "step": 2345 }, { "epoch": 0.78, "learning_rate": 1.7363048401302775e-05, "loss": 0.7202, "step": 2346 }, { "epoch": 0.78, "learning_rate": 1.736062320113792e-05, "loss": 0.7373, "step": 2347 }, { "epoch": 0.78, "learning_rate": 1.735819705579788e-05, "loss": 0.7334, "step": 2348 }, { "epoch": 0.78, "learning_rate": 1.7355769965594188e-05, "loss": 0.7231, "step": 2349 }, { "epoch": 0.78, "learning_rate": 1.7353341930838506e-05, "loss": 0.7588, "step": 2350 }, { "epoch": 0.78, "learning_rate": 1.735091295184262e-05, "loss": 0.7168, "step": 2351 }, { "epoch": 0.78, "learning_rate": 1.734848302891843e-05, "loss": 0.6829, "step": 2352 }, { "epoch": 0.78, "learning_rate": 1.7346052162377963e-05, "loss": 0.8242, "step": 2353 }, { "epoch": 0.78, "learning_rate": 1.7343620352533367e-05, "loss": 0.7695, "step": 2354 }, { "epoch": 0.78, "learning_rate": 1.734118759969691e-05, "loss": 0.7739, "step": 2355 }, { "epoch": 0.78, "learning_rate": 1.7338753904180982e-05, "loss": 0.6721, "step": 2356 }, { "epoch": 0.78, "learning_rate": 1.7336319266298093e-05, "loss": 0.6863, "step": 2357 }, { "epoch": 0.78, "learning_rate": 1.7333883686360874e-05, "loss": 0.6982, "step": 2358 }, { "epoch": 0.78, "learning_rate": 1.7331447164682073e-05, "loss": 0.6709, "step": 2359 }, { "epoch": 0.78, "learning_rate": 1.7329009701574565e-05, "loss": 0.6841, "step": 2360 }, { "epoch": 0.78, "learning_rate": 1.7326571297351348e-05, "loss": 0.6843, "step": 2361 }, { "epoch": 0.78, "learning_rate": 1.732413195232553e-05, "loss": 0.7026, "step": 2362 }, { "epoch": 0.78, "learning_rate": 1.7321691666810353e-05, "loss": 0.7705, "step": 2363 }, { "epoch": 0.78, "learning_rate": 1.731925044111917e-05, "loss": 0.6821, "step": 2364 }, { "epoch": 0.78, "learning_rate": 1.7316808275565454e-05, "loss": 0.6792, "step": 2365 }, { "epoch": 0.79, "learning_rate": 1.731436517046281e-05, "loss": 0.6362, "step": 2366 }, { "epoch": 0.79, "learning_rate": 1.731192112612495e-05, "loss": 0.6926, "step": 2367 }, { "epoch": 0.79, "learning_rate": 1.730947614286572e-05, "loss": 0.6848, "step": 2368 }, { "epoch": 0.79, "learning_rate": 1.7307030220999076e-05, "loss": 0.6406, "step": 2369 }, { "epoch": 0.79, "learning_rate": 1.7304583360839092e-05, "loss": 0.7456, "step": 2370 }, { "epoch": 0.79, "learning_rate": 1.730213556269998e-05, "loss": 0.7578, "step": 2371 }, { "epoch": 0.79, "learning_rate": 1.729968682689605e-05, "loss": 0.7144, "step": 2372 }, { "epoch": 0.79, "learning_rate": 1.7297237153741754e-05, "loss": 0.7129, "step": 2373 }, { "epoch": 0.79, "learning_rate": 1.729478654355164e-05, "loss": 0.7095, "step": 2374 }, { "epoch": 0.79, "learning_rate": 1.7292334996640406e-05, "loss": 0.7407, "step": 2375 }, { "epoch": 0.79, "learning_rate": 1.7289882513322843e-05, "loss": 0.7549, "step": 2376 }, { "epoch": 0.79, "learning_rate": 1.7287429093913876e-05, "loss": 0.7612, "step": 2377 }, { "epoch": 0.79, "learning_rate": 1.7284974738728554e-05, "loss": 0.696, "step": 2378 }, { "epoch": 0.79, "learning_rate": 1.7282519448082026e-05, "loss": 0.728, "step": 2379 }, { "epoch": 0.79, "learning_rate": 1.728006322228959e-05, "loss": 0.6714, "step": 2380 }, { "epoch": 0.79, "learning_rate": 1.727760606166664e-05, "loss": 0.6702, "step": 2381 }, { "epoch": 0.79, "learning_rate": 1.7275147966528705e-05, "loss": 0.813, "step": 2382 }, { "epoch": 0.79, "learning_rate": 1.7272688937191424e-05, "loss": 0.708, "step": 2383 }, { "epoch": 0.79, "learning_rate": 1.727022897397056e-05, "loss": 0.7559, "step": 2384 }, { "epoch": 0.79, "learning_rate": 1.7267768077181998e-05, "loss": 0.6296, "step": 2385 }, { "epoch": 0.79, "learning_rate": 1.7265306247141742e-05, "loss": 0.7212, "step": 2386 }, { "epoch": 0.79, "learning_rate": 1.726284348416591e-05, "loss": 0.7319, "step": 2387 }, { "epoch": 0.79, "learning_rate": 1.726037978857075e-05, "loss": 0.8062, "step": 2388 }, { "epoch": 0.79, "learning_rate": 1.7257915160672623e-05, "loss": 0.6943, "step": 2389 }, { "epoch": 0.79, "learning_rate": 1.725544960078801e-05, "loss": 0.6865, "step": 2390 }, { "epoch": 0.79, "learning_rate": 1.7252983109233514e-05, "loss": 0.6093, "step": 2391 }, { "epoch": 0.79, "learning_rate": 1.725051568632585e-05, "loss": 0.6838, "step": 2392 }, { "epoch": 0.79, "learning_rate": 1.724804733238187e-05, "loss": 0.7212, "step": 2393 }, { "epoch": 0.79, "learning_rate": 1.7245578047718522e-05, "loss": 0.6823, "step": 2394 }, { "epoch": 0.79, "learning_rate": 1.72431078326529e-05, "loss": 0.6489, "step": 2395 }, { "epoch": 0.8, "learning_rate": 1.7240636687502193e-05, "loss": 0.665, "step": 2396 }, { "epoch": 0.8, "learning_rate": 1.7238164612583723e-05, "loss": 0.6362, "step": 2397 }, { "epoch": 0.8, "learning_rate": 1.723569160821493e-05, "loss": 0.7227, "step": 2398 }, { "epoch": 0.8, "learning_rate": 1.7233217674713367e-05, "loss": 0.7158, "step": 2399 }, { "epoch": 0.8, "learning_rate": 1.7230742812396715e-05, "loss": 0.7515, "step": 2400 }, { "epoch": 0.8, "learning_rate": 1.722826702158277e-05, "loss": 0.7539, "step": 2401 }, { "epoch": 0.8, "learning_rate": 1.722579030258945e-05, "loss": 0.7339, "step": 2402 }, { "epoch": 0.8, "learning_rate": 1.7223312655734786e-05, "loss": 0.61, "step": 2403 }, { "epoch": 0.8, "learning_rate": 1.7220834081336927e-05, "loss": 0.7505, "step": 2404 }, { "epoch": 0.8, "learning_rate": 1.7218354579714155e-05, "loss": 0.7649, "step": 2405 }, { "epoch": 0.8, "learning_rate": 1.721587415118486e-05, "loss": 0.6541, "step": 2406 }, { "epoch": 0.8, "learning_rate": 1.7213392796067546e-05, "loss": 0.6974, "step": 2407 }, { "epoch": 0.8, "learning_rate": 1.7210910514680853e-05, "loss": 0.6697, "step": 2408 }, { "epoch": 0.8, "learning_rate": 1.7208427307343526e-05, "loss": 0.6997, "step": 2409 }, { "epoch": 0.8, "learning_rate": 1.720594317437443e-05, "loss": 0.7227, "step": 2410 }, { "epoch": 0.8, "learning_rate": 1.720345811609255e-05, "loss": 0.698, "step": 2411 }, { "epoch": 0.8, "learning_rate": 1.7200972132817e-05, "loss": 0.7671, "step": 2412 }, { "epoch": 0.8, "learning_rate": 1.7198485224866997e-05, "loss": 0.7632, "step": 2413 }, { "epoch": 0.8, "learning_rate": 1.719599739256189e-05, "loss": 0.668, "step": 2414 }, { "epoch": 0.8, "learning_rate": 1.7193508636221133e-05, "loss": 0.6306, "step": 2415 }, { "epoch": 0.8, "learning_rate": 1.7191018956164314e-05, "loss": 0.7104, "step": 2416 }, { "epoch": 0.8, "learning_rate": 1.718852835271113e-05, "loss": 0.7388, "step": 2417 }, { "epoch": 0.8, "learning_rate": 1.7186036826181394e-05, "loss": 0.7129, "step": 2418 }, { "epoch": 0.8, "learning_rate": 1.7183544376895044e-05, "loss": 0.7212, "step": 2419 }, { "epoch": 0.8, "learning_rate": 1.718105100517214e-05, "loss": 0.7007, "step": 2420 }, { "epoch": 0.8, "learning_rate": 1.7178556711332846e-05, "loss": 0.6592, "step": 2421 }, { "epoch": 0.8, "learning_rate": 1.7176061495697463e-05, "loss": 0.6609, "step": 2422 }, { "epoch": 0.8, "learning_rate": 1.7173565358586396e-05, "loss": 0.6968, "step": 2423 }, { "epoch": 0.8, "learning_rate": 1.7171068300320168e-05, "loss": 0.6216, "step": 2424 }, { "epoch": 0.8, "learning_rate": 1.7168570321219435e-05, "loss": 0.6917, "step": 2425 }, { "epoch": 0.81, "learning_rate": 1.7166071421604956e-05, "loss": 0.731, "step": 2426 }, { "epoch": 0.81, "learning_rate": 1.716357160179762e-05, "loss": 0.7334, "step": 2427 }, { "epoch": 0.81, "learning_rate": 1.716107086211842e-05, "loss": 0.7336, "step": 2428 }, { "epoch": 0.81, "learning_rate": 1.7158569202888474e-05, "loss": 0.709, "step": 2429 }, { "epoch": 0.81, "learning_rate": 1.715606662442903e-05, "loss": 0.6399, "step": 2430 }, { "epoch": 0.81, "learning_rate": 1.7153563127061437e-05, "loss": 0.7368, "step": 2431 }, { "epoch": 0.81, "learning_rate": 1.7151058711107166e-05, "loss": 0.7041, "step": 2432 }, { "epoch": 0.81, "learning_rate": 1.714855337688781e-05, "loss": 0.7373, "step": 2433 }, { "epoch": 0.81, "learning_rate": 1.714604712472508e-05, "loss": 0.6865, "step": 2434 }, { "epoch": 0.81, "learning_rate": 1.71435399549408e-05, "loss": 0.729, "step": 2435 }, { "epoch": 0.81, "learning_rate": 1.7141031867856916e-05, "loss": 0.7549, "step": 2436 }, { "epoch": 0.81, "learning_rate": 1.7138522863795494e-05, "loss": 0.6614, "step": 2437 }, { "epoch": 0.81, "learning_rate": 1.713601294307871e-05, "loss": 0.6865, "step": 2438 }, { "epoch": 0.81, "learning_rate": 1.7133502106028864e-05, "loss": 0.7388, "step": 2439 }, { "epoch": 0.81, "learning_rate": 1.7130990352968372e-05, "loss": 0.6907, "step": 2440 }, { "epoch": 0.81, "learning_rate": 1.7128477684219765e-05, "loss": 0.7319, "step": 2441 }, { "epoch": 0.81, "learning_rate": 1.7125964100105698e-05, "loss": 0.6934, "step": 2442 }, { "epoch": 0.81, "learning_rate": 1.7123449600948938e-05, "loss": 0.7021, "step": 2443 }, { "epoch": 0.81, "learning_rate": 1.7120934187072363e-05, "loss": 0.7363, "step": 2444 }, { "epoch": 0.81, "learning_rate": 1.7118417858798992e-05, "loss": 0.6885, "step": 2445 }, { "epoch": 0.81, "learning_rate": 1.7115900616451934e-05, "loss": 0.7095, "step": 2446 }, { "epoch": 0.81, "learning_rate": 1.711338246035443e-05, "loss": 0.71, "step": 2447 }, { "epoch": 0.81, "learning_rate": 1.711086339082984e-05, "loss": 0.7275, "step": 2448 }, { "epoch": 0.81, "learning_rate": 1.710834340820163e-05, "loss": 0.7139, "step": 2449 }, { "epoch": 0.81, "learning_rate": 1.7105822512793392e-05, "loss": 0.5996, "step": 2450 }, { "epoch": 0.81, "learning_rate": 1.710330070492884e-05, "loss": 0.7104, "step": 2451 }, { "epoch": 0.81, "learning_rate": 1.710077798493179e-05, "loss": 0.7175, "step": 2452 }, { "epoch": 0.81, "learning_rate": 1.7098254353126185e-05, "loss": 0.7031, "step": 2453 }, { "epoch": 0.81, "learning_rate": 1.709572980983609e-05, "loss": 0.7009, "step": 2454 }, { "epoch": 0.81, "learning_rate": 1.7093204355385675e-05, "loss": 0.6914, "step": 2455 }, { "epoch": 0.82, "learning_rate": 1.7090677990099234e-05, "loss": 0.6582, "step": 2456 }, { "epoch": 0.82, "learning_rate": 1.7088150714301173e-05, "loss": 0.6978, "step": 2457 }, { "epoch": 0.82, "learning_rate": 1.708562252831603e-05, "loss": 0.7129, "step": 2458 }, { "epoch": 0.82, "learning_rate": 1.7083093432468436e-05, "loss": 0.686, "step": 2459 }, { "epoch": 0.82, "learning_rate": 1.7080563427083156e-05, "loss": 0.6406, "step": 2460 }, { "epoch": 0.82, "learning_rate": 1.7078032512485073e-05, "loss": 0.595, "step": 2461 }, { "epoch": 0.82, "learning_rate": 1.707550068899917e-05, "loss": 0.7024, "step": 2462 }, { "epoch": 0.82, "learning_rate": 1.7072967956950567e-05, "loss": 0.7371, "step": 2463 }, { "epoch": 0.82, "learning_rate": 1.7070434316664485e-05, "loss": 0.7793, "step": 2464 }, { "epoch": 0.82, "learning_rate": 1.706789976846627e-05, "loss": 0.7207, "step": 2465 }, { "epoch": 0.82, "learning_rate": 1.706536431268138e-05, "loss": 0.7202, "step": 2466 }, { "epoch": 0.82, "learning_rate": 1.7062827949635403e-05, "loss": 0.6665, "step": 2467 }, { "epoch": 0.82, "learning_rate": 1.706029067965402e-05, "loss": 0.7119, "step": 2468 }, { "epoch": 0.82, "learning_rate": 1.7057752503063042e-05, "loss": 0.6558, "step": 2469 }, { "epoch": 0.82, "learning_rate": 1.7055213420188404e-05, "loss": 0.6992, "step": 2470 }, { "epoch": 0.82, "learning_rate": 1.7052673431356143e-05, "loss": 0.7163, "step": 2471 }, { "epoch": 0.82, "learning_rate": 1.7050132536892415e-05, "loss": 0.6812, "step": 2472 }, { "epoch": 0.82, "learning_rate": 1.7047590737123502e-05, "loss": 0.6467, "step": 2473 }, { "epoch": 0.82, "learning_rate": 1.7045048032375793e-05, "loss": 0.7012, "step": 2474 }, { "epoch": 0.82, "learning_rate": 1.7042504422975795e-05, "loss": 0.6765, "step": 2475 }, { "epoch": 0.82, "learning_rate": 1.7039959909250134e-05, "loss": 0.7036, "step": 2476 }, { "epoch": 0.82, "learning_rate": 1.7037414491525547e-05, "loss": 0.7134, "step": 2477 }, { "epoch": 0.82, "learning_rate": 1.7034868170128893e-05, "loss": 0.6479, "step": 2478 }, { "epoch": 0.82, "learning_rate": 1.7032320945387144e-05, "loss": 0.6948, "step": 2479 }, { "epoch": 0.82, "learning_rate": 1.702977281762739e-05, "loss": 0.6924, "step": 2480 }, { "epoch": 0.82, "learning_rate": 1.7027223787176827e-05, "loss": 0.637, "step": 2481 }, { "epoch": 0.82, "learning_rate": 1.7024673854362787e-05, "loss": 0.7261, "step": 2482 }, { "epoch": 0.82, "learning_rate": 1.70221230195127e-05, "loss": 0.6958, "step": 2483 }, { "epoch": 0.82, "learning_rate": 1.7019571282954113e-05, "loss": 0.7629, "step": 2484 }, { "epoch": 0.82, "learning_rate": 1.70170186450147e-05, "loss": 0.7432, "step": 2485 }, { "epoch": 0.83, "learning_rate": 1.7014465106022245e-05, "loss": 0.6797, "step": 2486 }, { "epoch": 0.83, "learning_rate": 1.7011910666304644e-05, "loss": 0.728, "step": 2487 }, { "epoch": 0.83, "learning_rate": 1.7009355326189915e-05, "loss": 0.6868, "step": 2488 }, { "epoch": 0.83, "learning_rate": 1.7006799086006186e-05, "loss": 0.7217, "step": 2489 }, { "epoch": 0.83, "learning_rate": 1.70042419460817e-05, "loss": 0.6846, "step": 2490 }, { "epoch": 0.83, "learning_rate": 1.7001683906744823e-05, "loss": 0.5717, "step": 2491 }, { "epoch": 0.83, "learning_rate": 1.6999124968324034e-05, "loss": 0.7173, "step": 2492 }, { "epoch": 0.83, "learning_rate": 1.6996565131147917e-05, "loss": 0.6943, "step": 2493 }, { "epoch": 0.83, "learning_rate": 1.699400439554519e-05, "loss": 0.7075, "step": 2494 }, { "epoch": 0.83, "learning_rate": 1.6991442761844668e-05, "loss": 0.6621, "step": 2495 }, { "epoch": 0.83, "learning_rate": 1.6988880230375294e-05, "loss": 0.7822, "step": 2496 }, { "epoch": 0.83, "learning_rate": 1.698631680146612e-05, "loss": 0.6973, "step": 2497 }, { "epoch": 0.83, "learning_rate": 1.6983752475446316e-05, "loss": 0.731, "step": 2498 }, { "epoch": 0.83, "learning_rate": 1.6981187252645165e-05, "loss": 0.7583, "step": 2499 }, { "epoch": 0.83, "learning_rate": 1.6978621133392068e-05, "loss": 0.7566, "step": 2500 }, { "epoch": 0.83, "learning_rate": 1.697605411801654e-05, "loss": 0.6311, "step": 2501 }, { "epoch": 0.83, "learning_rate": 1.6973486206848208e-05, "loss": 0.6985, "step": 2502 }, { "epoch": 0.83, "learning_rate": 1.697091740021682e-05, "loss": 0.7098, "step": 2503 }, { "epoch": 0.83, "learning_rate": 1.696834769845223e-05, "loss": 0.7588, "step": 2504 }, { "epoch": 0.83, "learning_rate": 1.6965777101884423e-05, "loss": 0.6934, "step": 2505 }, { "epoch": 0.83, "learning_rate": 1.6963205610843476e-05, "loss": 0.6685, "step": 2506 }, { "epoch": 0.83, "learning_rate": 1.69606332256596e-05, "loss": 0.6626, "step": 2507 }, { "epoch": 0.83, "learning_rate": 1.6958059946663115e-05, "loss": 0.7603, "step": 2508 }, { "epoch": 0.83, "learning_rate": 1.695548577418445e-05, "loss": 0.6497, "step": 2509 }, { "epoch": 0.83, "learning_rate": 1.695291070855416e-05, "loss": 0.75, "step": 2510 }, { "epoch": 0.83, "learning_rate": 1.69503347501029e-05, "loss": 0.6307, "step": 2511 }, { "epoch": 0.83, "learning_rate": 1.694775789916146e-05, "loss": 0.7207, "step": 2512 }, { "epoch": 0.83, "learning_rate": 1.6945180156060718e-05, "loss": 0.6855, "step": 2513 }, { "epoch": 0.83, "learning_rate": 1.6942601521131693e-05, "loss": 0.6428, "step": 2514 }, { "epoch": 0.83, "learning_rate": 1.6940021994705503e-05, "loss": 0.6826, "step": 2515 }, { "epoch": 0.84, "learning_rate": 1.693744157711338e-05, "loss": 0.7651, "step": 2516 }, { "epoch": 0.84, "learning_rate": 1.693486026868668e-05, "loss": 0.6997, "step": 2517 }, { "epoch": 0.84, "learning_rate": 1.693227806975687e-05, "loss": 0.6465, "step": 2518 }, { "epoch": 0.84, "learning_rate": 1.692969498065552e-05, "loss": 0.6323, "step": 2519 }, { "epoch": 0.84, "learning_rate": 1.692711100171433e-05, "loss": 0.645, "step": 2520 }, { "epoch": 0.84, "learning_rate": 1.6924526133265108e-05, "loss": 0.7749, "step": 2521 }, { "epoch": 0.84, "learning_rate": 1.6921940375639777e-05, "loss": 0.6655, "step": 2522 }, { "epoch": 0.84, "learning_rate": 1.6919353729170363e-05, "loss": 0.6885, "step": 2523 }, { "epoch": 0.84, "learning_rate": 1.6916766194189033e-05, "loss": 0.6348, "step": 2524 }, { "epoch": 0.84, "learning_rate": 1.691417777102804e-05, "loss": 0.6653, "step": 2525 }, { "epoch": 0.84, "learning_rate": 1.6911588460019765e-05, "loss": 0.6777, "step": 2526 }, { "epoch": 0.84, "learning_rate": 1.6908998261496704e-05, "loss": 0.7505, "step": 2527 }, { "epoch": 0.84, "learning_rate": 1.6906407175791456e-05, "loss": 0.7236, "step": 2528 }, { "epoch": 0.84, "learning_rate": 1.690381520323675e-05, "loss": 0.751, "step": 2529 }, { "epoch": 0.84, "learning_rate": 1.6901222344165414e-05, "loss": 0.6885, "step": 2530 }, { "epoch": 0.84, "learning_rate": 1.6898628598910405e-05, "loss": 0.7358, "step": 2531 }, { "epoch": 0.84, "learning_rate": 1.6896033967804774e-05, "loss": 0.6948, "step": 2532 }, { "epoch": 0.84, "learning_rate": 1.6893438451181702e-05, "loss": 0.6729, "step": 2533 }, { "epoch": 0.84, "learning_rate": 1.6890842049374475e-05, "loss": 0.7441, "step": 2534 }, { "epoch": 0.84, "learning_rate": 1.6888244762716505e-05, "loss": 0.6689, "step": 2535 }, { "epoch": 0.84, "learning_rate": 1.6885646591541297e-05, "loss": 0.6704, "step": 2536 }, { "epoch": 0.84, "learning_rate": 1.688304753618249e-05, "loss": 0.6846, "step": 2537 }, { "epoch": 0.84, "learning_rate": 1.6880447596973826e-05, "loss": 0.6445, "step": 2538 }, { "epoch": 0.84, "learning_rate": 1.687784677424916e-05, "loss": 0.7168, "step": 2539 }, { "epoch": 0.84, "learning_rate": 1.6875245068342464e-05, "loss": 0.7407, "step": 2540 }, { "epoch": 0.84, "learning_rate": 1.687264247958782e-05, "loss": 0.7197, "step": 2541 }, { "epoch": 0.84, "learning_rate": 1.6870039008319428e-05, "loss": 0.6433, "step": 2542 }, { "epoch": 0.84, "learning_rate": 1.68674346548716e-05, "loss": 0.7539, "step": 2543 }, { "epoch": 0.84, "learning_rate": 1.686482941957876e-05, "loss": 0.7236, "step": 2544 }, { "epoch": 0.84, "learning_rate": 1.6862223302775446e-05, "loss": 0.717, "step": 2545 }, { "epoch": 0.85, "learning_rate": 1.6859616304796303e-05, "loss": 0.677, "step": 2546 }, { "epoch": 0.85, "learning_rate": 1.6857008425976098e-05, "loss": 0.7483, "step": 2547 }, { "epoch": 0.85, "learning_rate": 1.6854399666649712e-05, "loss": 0.6914, "step": 2548 }, { "epoch": 0.85, "learning_rate": 1.685179002715213e-05, "loss": 0.729, "step": 2549 }, { "epoch": 0.85, "learning_rate": 1.6849179507818455e-05, "loss": 0.7383, "step": 2550 }, { "epoch": 0.85, "learning_rate": 1.6846568108983902e-05, "loss": 0.75, "step": 2551 }, { "epoch": 0.85, "learning_rate": 1.6843955830983808e-05, "loss": 0.7363, "step": 2552 }, { "epoch": 0.85, "learning_rate": 1.6841342674153603e-05, "loss": 0.6904, "step": 2553 }, { "epoch": 0.85, "learning_rate": 1.683872863882885e-05, "loss": 0.6531, "step": 2554 }, { "epoch": 0.85, "learning_rate": 1.683611372534521e-05, "loss": 0.6519, "step": 2555 }, { "epoch": 0.85, "learning_rate": 1.6833497934038473e-05, "loss": 0.6655, "step": 2556 }, { "epoch": 0.85, "learning_rate": 1.6830881265244525e-05, "loss": 0.6812, "step": 2557 }, { "epoch": 0.85, "learning_rate": 1.682826371929937e-05, "loss": 0.6309, "step": 2558 }, { "epoch": 0.85, "learning_rate": 1.682564529653913e-05, "loss": 0.7725, "step": 2559 }, { "epoch": 0.85, "learning_rate": 1.6823025997300034e-05, "loss": 0.7324, "step": 2560 }, { "epoch": 0.85, "learning_rate": 1.6820405821918427e-05, "loss": 0.645, "step": 2561 }, { "epoch": 0.85, "learning_rate": 1.6817784770730762e-05, "loss": 0.6143, "step": 2562 }, { "epoch": 0.85, "learning_rate": 1.6815162844073608e-05, "loss": 0.7393, "step": 2563 }, { "epoch": 0.85, "learning_rate": 1.681254004228365e-05, "loss": 0.6176, "step": 2564 }, { "epoch": 0.85, "learning_rate": 1.6809916365697673e-05, "loss": 0.7231, "step": 2565 }, { "epoch": 0.85, "learning_rate": 1.680729181465259e-05, "loss": 0.7734, "step": 2566 }, { "epoch": 0.85, "learning_rate": 1.680466638948542e-05, "loss": 0.7021, "step": 2567 }, { "epoch": 0.85, "learning_rate": 1.6802040090533282e-05, "loss": 0.6758, "step": 2568 }, { "epoch": 0.85, "learning_rate": 1.679941291813343e-05, "loss": 0.6992, "step": 2569 }, { "epoch": 0.85, "learning_rate": 1.6796784872623213e-05, "loss": 0.6821, "step": 2570 }, { "epoch": 0.85, "learning_rate": 1.6794155954340098e-05, "loss": 0.6157, "step": 2571 }, { "epoch": 0.85, "learning_rate": 1.679152616362166e-05, "loss": 0.7153, "step": 2572 }, { "epoch": 0.85, "learning_rate": 1.6788895500805598e-05, "loss": 0.7568, "step": 2573 }, { "epoch": 0.85, "learning_rate": 1.6786263966229708e-05, "loss": 0.7373, "step": 2574 }, { "epoch": 0.85, "learning_rate": 1.6783631560231906e-05, "loss": 0.7358, "step": 2575 }, { "epoch": 0.85, "learning_rate": 1.6780998283150218e-05, "loss": 0.7803, "step": 2576 }, { "epoch": 0.86, "learning_rate": 1.677836413532278e-05, "loss": 0.6846, "step": 2577 }, { "epoch": 0.86, "learning_rate": 1.677572911708785e-05, "loss": 0.6875, "step": 2578 }, { "epoch": 0.86, "learning_rate": 1.677309322878378e-05, "loss": 0.6646, "step": 2579 }, { "epoch": 0.86, "learning_rate": 1.677045647074905e-05, "loss": 0.7769, "step": 2580 }, { "epoch": 0.86, "learning_rate": 1.6767818843322243e-05, "loss": 0.7109, "step": 2581 }, { "epoch": 0.86, "learning_rate": 1.6765180346842057e-05, "loss": 0.7046, "step": 2582 }, { "epoch": 0.86, "learning_rate": 1.67625409816473e-05, "loss": 0.7371, "step": 2583 }, { "epoch": 0.86, "learning_rate": 1.6759900748076892e-05, "loss": 0.6797, "step": 2584 }, { "epoch": 0.86, "learning_rate": 1.675725964646986e-05, "loss": 0.7173, "step": 2585 }, { "epoch": 0.86, "learning_rate": 1.6754617677165355e-05, "loss": 0.7549, "step": 2586 }, { "epoch": 0.86, "learning_rate": 1.675197484050263e-05, "loss": 0.7598, "step": 2587 }, { "epoch": 0.86, "learning_rate": 1.6749331136821043e-05, "loss": 0.6848, "step": 2588 }, { "epoch": 0.86, "learning_rate": 1.674668656646008e-05, "loss": 0.5839, "step": 2589 }, { "epoch": 0.86, "learning_rate": 1.674404112975932e-05, "loss": 0.6719, "step": 2590 }, { "epoch": 0.86, "learning_rate": 1.6741394827058476e-05, "loss": 0.6826, "step": 2591 }, { "epoch": 0.86, "learning_rate": 1.6738747658697348e-05, "loss": 0.6992, "step": 2592 }, { "epoch": 0.86, "learning_rate": 1.6736099625015862e-05, "loss": 0.6863, "step": 2593 }, { "epoch": 0.86, "learning_rate": 1.6733450726354054e-05, "loss": 0.7012, "step": 2594 }, { "epoch": 0.86, "learning_rate": 1.673080096305206e-05, "loss": 0.5808, "step": 2595 }, { "epoch": 0.86, "learning_rate": 1.672815033545014e-05, "loss": 0.7681, "step": 2596 }, { "epoch": 0.86, "learning_rate": 1.672549884388866e-05, "loss": 0.6641, "step": 2597 }, { "epoch": 0.86, "learning_rate": 1.67228464887081e-05, "loss": 0.7046, "step": 2598 }, { "epoch": 0.86, "learning_rate": 1.6720193270249044e-05, "loss": 0.5789, "step": 2599 }, { "epoch": 0.86, "learning_rate": 1.671753918885219e-05, "loss": 0.6919, "step": 2600 }, { "epoch": 0.86, "learning_rate": 1.6714884244858354e-05, "loss": 0.6826, "step": 2601 }, { "epoch": 0.86, "learning_rate": 1.671222843860845e-05, "loss": 0.6831, "step": 2602 }, { "epoch": 0.86, "learning_rate": 1.6709571770443514e-05, "loss": 0.6885, "step": 2603 }, { "epoch": 0.86, "learning_rate": 1.6706914240704682e-05, "loss": 0.7158, "step": 2604 }, { "epoch": 0.86, "learning_rate": 1.6704255849733216e-05, "loss": 0.7881, "step": 2605 }, { "epoch": 0.86, "learning_rate": 1.670159659787047e-05, "loss": 0.6929, "step": 2606 }, { "epoch": 0.87, "learning_rate": 1.6698936485457918e-05, "loss": 0.7847, "step": 2607 }, { "epoch": 0.87, "learning_rate": 1.669627551283715e-05, "loss": 0.6694, "step": 2608 }, { "epoch": 0.87, "learning_rate": 1.6693613680349854e-05, "loss": 0.6394, "step": 2609 }, { "epoch": 0.87, "learning_rate": 1.6690950988337843e-05, "loss": 0.6404, "step": 2610 }, { "epoch": 0.87, "learning_rate": 1.668828743714303e-05, "loss": 0.687, "step": 2611 }, { "epoch": 0.87, "learning_rate": 1.6685623027107434e-05, "loss": 0.7285, "step": 2612 }, { "epoch": 0.87, "learning_rate": 1.66829577585732e-05, "loss": 0.7231, "step": 2613 }, { "epoch": 0.87, "learning_rate": 1.6680291631882563e-05, "loss": 0.7319, "step": 2614 }, { "epoch": 0.87, "learning_rate": 1.6677624647377893e-05, "loss": 0.6788, "step": 2615 }, { "epoch": 0.87, "learning_rate": 1.667495680540165e-05, "loss": 0.8057, "step": 2616 }, { "epoch": 0.87, "learning_rate": 1.6672288106296408e-05, "loss": 0.6807, "step": 2617 }, { "epoch": 0.87, "learning_rate": 1.6669618550404855e-05, "loss": 0.6201, "step": 2618 }, { "epoch": 0.87, "learning_rate": 1.666694813806979e-05, "loss": 0.7505, "step": 2619 }, { "epoch": 0.87, "learning_rate": 1.666427686963412e-05, "loss": 0.6809, "step": 2620 }, { "epoch": 0.87, "learning_rate": 1.6661604745440863e-05, "loss": 0.7285, "step": 2621 }, { "epoch": 0.87, "learning_rate": 1.6658931765833146e-05, "loss": 0.7515, "step": 2622 }, { "epoch": 0.87, "learning_rate": 1.6656257931154196e-05, "loss": 0.7075, "step": 2623 }, { "epoch": 0.87, "learning_rate": 1.665358324174737e-05, "loss": 0.7173, "step": 2624 }, { "epoch": 0.87, "learning_rate": 1.6650907697956117e-05, "loss": 0.6873, "step": 2625 }, { "epoch": 0.87, "learning_rate": 1.664823130012401e-05, "loss": 0.7266, "step": 2626 }, { "epoch": 0.87, "learning_rate": 1.6645554048594713e-05, "loss": 0.6899, "step": 2627 }, { "epoch": 0.87, "learning_rate": 1.6642875943712025e-05, "loss": 0.7271, "step": 2628 }, { "epoch": 0.87, "learning_rate": 1.664019698581983e-05, "loss": 0.6572, "step": 2629 }, { "epoch": 0.87, "learning_rate": 1.663751717526214e-05, "loss": 0.7866, "step": 2630 }, { "epoch": 0.87, "learning_rate": 1.6634836512383058e-05, "loss": 0.7026, "step": 2631 }, { "epoch": 0.87, "learning_rate": 1.6632154997526823e-05, "loss": 0.6826, "step": 2632 }, { "epoch": 0.87, "learning_rate": 1.6629472631037747e-05, "loss": 0.6802, "step": 2633 }, { "epoch": 0.87, "learning_rate": 1.662678941326029e-05, "loss": 0.6107, "step": 2634 }, { "epoch": 0.87, "learning_rate": 1.662410534453899e-05, "loss": 0.6919, "step": 2635 }, { "epoch": 0.87, "learning_rate": 1.6621420425218516e-05, "loss": 0.7073, "step": 2636 }, { "epoch": 0.88, "learning_rate": 1.6618734655643637e-05, "loss": 0.6792, "step": 2637 }, { "epoch": 0.88, "learning_rate": 1.6616048036159226e-05, "loss": 0.7178, "step": 2638 }, { "epoch": 0.88, "learning_rate": 1.6613360567110276e-05, "loss": 0.7632, "step": 2639 }, { "epoch": 0.88, "learning_rate": 1.661067224884188e-05, "loss": 0.6997, "step": 2640 }, { "epoch": 0.88, "learning_rate": 1.660798308169925e-05, "loss": 0.7388, "step": 2641 }, { "epoch": 0.88, "learning_rate": 1.6605293066027695e-05, "loss": 0.6543, "step": 2642 }, { "epoch": 0.88, "learning_rate": 1.660260220217264e-05, "loss": 0.5925, "step": 2643 }, { "epoch": 0.88, "learning_rate": 1.6599910490479626e-05, "loss": 0.7808, "step": 2644 }, { "epoch": 0.88, "learning_rate": 1.659721793129428e-05, "loss": 0.7275, "step": 2645 }, { "epoch": 0.88, "learning_rate": 1.6594524524962368e-05, "loss": 0.7153, "step": 2646 }, { "epoch": 0.88, "learning_rate": 1.659183027182974e-05, "loss": 0.6931, "step": 2647 }, { "epoch": 0.88, "learning_rate": 1.6589135172242363e-05, "loss": 0.6902, "step": 2648 }, { "epoch": 0.88, "learning_rate": 1.6586439226546322e-05, "loss": 0.6982, "step": 2649 }, { "epoch": 0.88, "learning_rate": 1.6583742435087794e-05, "loss": 0.6299, "step": 2650 }, { "epoch": 0.88, "learning_rate": 1.6581044798213078e-05, "loss": 0.6782, "step": 2651 }, { "epoch": 0.88, "learning_rate": 1.657834631626858e-05, "loss": 0.7153, "step": 2652 }, { "epoch": 0.88, "learning_rate": 1.65756469896008e-05, "loss": 0.6299, "step": 2653 }, { "epoch": 0.88, "learning_rate": 1.6572946818556368e-05, "loss": 0.7212, "step": 2654 }, { "epoch": 0.88, "learning_rate": 1.657024580348201e-05, "loss": 0.6584, "step": 2655 }, { "epoch": 0.88, "learning_rate": 1.656754394472456e-05, "loss": 0.613, "step": 2656 }, { "epoch": 0.88, "learning_rate": 1.656484124263096e-05, "loss": 0.644, "step": 2657 }, { "epoch": 0.88, "learning_rate": 1.6562137697548268e-05, "loss": 0.6909, "step": 2658 }, { "epoch": 0.88, "learning_rate": 1.655943330982365e-05, "loss": 0.6934, "step": 2659 }, { "epoch": 0.88, "learning_rate": 1.6556728079804365e-05, "loss": 0.687, "step": 2660 }, { "epoch": 0.88, "learning_rate": 1.6554022007837794e-05, "loss": 0.6953, "step": 2661 }, { "epoch": 0.88, "learning_rate": 1.6551315094271426e-05, "loss": 0.6782, "step": 2662 }, { "epoch": 0.88, "learning_rate": 1.6548607339452853e-05, "loss": 0.6519, "step": 2663 }, { "epoch": 0.88, "learning_rate": 1.654589874372978e-05, "loss": 0.6479, "step": 2664 }, { "epoch": 0.88, "learning_rate": 1.6543189307450006e-05, "loss": 0.6279, "step": 2665 }, { "epoch": 0.88, "learning_rate": 1.6540479030961464e-05, "loss": 0.6484, "step": 2666 }, { "epoch": 0.89, "learning_rate": 1.653776791461217e-05, "loss": 0.6816, "step": 2667 }, { "epoch": 0.89, "learning_rate": 1.6535055958750258e-05, "loss": 0.6689, "step": 2668 }, { "epoch": 0.89, "learning_rate": 1.6532343163723973e-05, "loss": 0.6895, "step": 2669 }, { "epoch": 0.89, "learning_rate": 1.652962952988166e-05, "loss": 0.7256, "step": 2670 }, { "epoch": 0.89, "learning_rate": 1.652691505757178e-05, "loss": 0.6392, "step": 2671 }, { "epoch": 0.89, "learning_rate": 1.6524199747142895e-05, "loss": 0.7568, "step": 2672 }, { "epoch": 0.89, "learning_rate": 1.6521483598943683e-05, "loss": 0.7, "step": 2673 }, { "epoch": 0.89, "learning_rate": 1.6518766613322912e-05, "loss": 0.6523, "step": 2674 }, { "epoch": 0.89, "learning_rate": 1.6516048790629478e-05, "loss": 0.5901, "step": 2675 }, { "epoch": 0.89, "learning_rate": 1.651333013121237e-05, "loss": 0.7466, "step": 2676 }, { "epoch": 0.89, "learning_rate": 1.65106106354207e-05, "loss": 0.6396, "step": 2677 }, { "epoch": 0.89, "learning_rate": 1.650789030360367e-05, "loss": 0.6458, "step": 2678 }, { "epoch": 0.89, "learning_rate": 1.6505169136110596e-05, "loss": 0.6919, "step": 2679 }, { "epoch": 0.89, "learning_rate": 1.6502447133290905e-05, "loss": 0.6567, "step": 2680 }, { "epoch": 0.89, "learning_rate": 1.6499724295494125e-05, "loss": 0.7935, "step": 2681 }, { "epoch": 0.89, "learning_rate": 1.64970006230699e-05, "loss": 0.7358, "step": 2682 }, { "epoch": 0.89, "learning_rate": 1.649427611636797e-05, "loss": 0.5432, "step": 2683 }, { "epoch": 0.89, "learning_rate": 1.6491550775738197e-05, "loss": 0.688, "step": 2684 }, { "epoch": 0.89, "learning_rate": 1.648882460153053e-05, "loss": 0.6992, "step": 2685 }, { "epoch": 0.89, "learning_rate": 1.6486097594095045e-05, "loss": 0.6807, "step": 2686 }, { "epoch": 0.89, "learning_rate": 1.648336975378191e-05, "loss": 0.7275, "step": 2687 }, { "epoch": 0.89, "learning_rate": 1.6480641080941416e-05, "loss": 0.71, "step": 2688 }, { "epoch": 0.89, "learning_rate": 1.647791157592394e-05, "loss": 0.709, "step": 2689 }, { "epoch": 0.89, "learning_rate": 1.6475181239079975e-05, "loss": 0.7273, "step": 2690 }, { "epoch": 0.89, "learning_rate": 1.647245007076013e-05, "loss": 0.6631, "step": 2691 }, { "epoch": 0.89, "learning_rate": 1.6469718071315114e-05, "loss": 0.6511, "step": 2692 }, { "epoch": 0.89, "learning_rate": 1.646698524109574e-05, "loss": 0.7163, "step": 2693 }, { "epoch": 0.89, "learning_rate": 1.6464251580452932e-05, "loss": 0.6914, "step": 2694 }, { "epoch": 0.89, "learning_rate": 1.646151708973771e-05, "loss": 0.6863, "step": 2695 }, { "epoch": 0.89, "learning_rate": 1.645878176930122e-05, "loss": 0.6667, "step": 2696 }, { "epoch": 0.9, "learning_rate": 1.6456045619494696e-05, "loss": 0.6931, "step": 2697 }, { "epoch": 0.9, "learning_rate": 1.6453308640669493e-05, "loss": 0.687, "step": 2698 }, { "epoch": 0.9, "learning_rate": 1.6450570833177056e-05, "loss": 0.7119, "step": 2699 }, { "epoch": 0.9, "learning_rate": 1.6447832197368952e-05, "loss": 0.7278, "step": 2700 }, { "epoch": 0.9, "learning_rate": 1.644509273359685e-05, "loss": 0.6707, "step": 2701 }, { "epoch": 0.9, "learning_rate": 1.644235244221252e-05, "loss": 0.6992, "step": 2702 }, { "epoch": 0.9, "learning_rate": 1.6439611323567844e-05, "loss": 0.6223, "step": 2703 }, { "epoch": 0.9, "learning_rate": 1.6436869378014803e-05, "loss": 0.7373, "step": 2704 }, { "epoch": 0.9, "learning_rate": 1.64341266059055e-05, "loss": 0.73, "step": 2705 }, { "epoch": 0.9, "learning_rate": 1.6431383007592122e-05, "loss": 0.6777, "step": 2706 }, { "epoch": 0.9, "learning_rate": 1.6428638583426985e-05, "loss": 0.7871, "step": 2707 }, { "epoch": 0.9, "learning_rate": 1.642589333376249e-05, "loss": 0.665, "step": 2708 }, { "epoch": 0.9, "learning_rate": 1.6423147258951156e-05, "loss": 0.7397, "step": 2709 }, { "epoch": 0.9, "learning_rate": 1.6420400359345606e-05, "loss": 0.7212, "step": 2710 }, { "epoch": 0.9, "learning_rate": 1.6417652635298565e-05, "loss": 0.6658, "step": 2711 }, { "epoch": 0.9, "learning_rate": 1.6414904087162877e-05, "loss": 0.6675, "step": 2712 }, { "epoch": 0.9, "learning_rate": 1.6412154715291476e-05, "loss": 0.6602, "step": 2713 }, { "epoch": 0.9, "learning_rate": 1.6409404520037406e-05, "loss": 0.7534, "step": 2714 }, { "epoch": 0.9, "learning_rate": 1.6406653501753825e-05, "loss": 0.6655, "step": 2715 }, { "epoch": 0.9, "learning_rate": 1.640390166079398e-05, "loss": 0.678, "step": 2716 }, { "epoch": 0.9, "learning_rate": 1.6401148997511246e-05, "loss": 0.7102, "step": 2717 }, { "epoch": 0.9, "learning_rate": 1.6398395512259084e-05, "loss": 0.6392, "step": 2718 }, { "epoch": 0.9, "learning_rate": 1.6395641205391068e-05, "loss": 0.7227, "step": 2719 }, { "epoch": 0.9, "learning_rate": 1.6392886077260882e-05, "loss": 0.7432, "step": 2720 }, { "epoch": 0.9, "learning_rate": 1.639013012822231e-05, "loss": 0.5887, "step": 2721 }, { "epoch": 0.9, "learning_rate": 1.6387373358629238e-05, "loss": 0.6953, "step": 2722 }, { "epoch": 0.9, "learning_rate": 1.6384615768835662e-05, "loss": 0.7349, "step": 2723 }, { "epoch": 0.9, "learning_rate": 1.638185735919569e-05, "loss": 0.6921, "step": 2724 }, { "epoch": 0.9, "learning_rate": 1.637909813006353e-05, "loss": 0.7214, "step": 2725 }, { "epoch": 0.9, "learning_rate": 1.637633808179348e-05, "loss": 0.6338, "step": 2726 }, { "epoch": 0.91, "learning_rate": 1.637357721473997e-05, "loss": 0.6078, "step": 2727 }, { "epoch": 0.91, "learning_rate": 1.637081552925752e-05, "loss": 0.7222, "step": 2728 }, { "epoch": 0.91, "learning_rate": 1.6368053025700746e-05, "loss": 0.6711, "step": 2729 }, { "epoch": 0.91, "learning_rate": 1.6365289704424397e-05, "loss": 0.6689, "step": 2730 }, { "epoch": 0.91, "learning_rate": 1.6362525565783297e-05, "loss": 0.7021, "step": 2731 }, { "epoch": 0.91, "learning_rate": 1.6359760610132397e-05, "loss": 0.5923, "step": 2732 }, { "epoch": 0.91, "learning_rate": 1.6356994837826736e-05, "loss": 0.7749, "step": 2733 }, { "epoch": 0.91, "learning_rate": 1.6354228249221467e-05, "loss": 0.6155, "step": 2734 }, { "epoch": 0.91, "learning_rate": 1.6351460844671855e-05, "loss": 0.6509, "step": 2735 }, { "epoch": 0.91, "learning_rate": 1.6348692624533258e-05, "loss": 0.6953, "step": 2736 }, { "epoch": 0.91, "learning_rate": 1.6345923589161137e-05, "loss": 0.6924, "step": 2737 }, { "epoch": 0.91, "learning_rate": 1.6343153738911066e-05, "loss": 0.688, "step": 2738 }, { "epoch": 0.91, "learning_rate": 1.634038307413872e-05, "loss": 0.6455, "step": 2739 }, { "epoch": 0.91, "learning_rate": 1.6337611595199882e-05, "loss": 0.6516, "step": 2740 }, { "epoch": 0.91, "learning_rate": 1.6334839302450436e-05, "loss": 0.7417, "step": 2741 }, { "epoch": 0.91, "learning_rate": 1.6332066196246366e-05, "loss": 0.7188, "step": 2742 }, { "epoch": 0.91, "learning_rate": 1.6329292276943773e-05, "loss": 0.731, "step": 2743 }, { "epoch": 0.91, "learning_rate": 1.632651754489885e-05, "loss": 0.6777, "step": 2744 }, { "epoch": 0.91, "learning_rate": 1.63237420004679e-05, "loss": 0.6066, "step": 2745 }, { "epoch": 0.91, "learning_rate": 1.6320965644007335e-05, "loss": 0.6792, "step": 2746 }, { "epoch": 0.91, "learning_rate": 1.6318188475873657e-05, "loss": 0.668, "step": 2747 }, { "epoch": 0.91, "learning_rate": 1.631541049642349e-05, "loss": 0.6406, "step": 2748 }, { "epoch": 0.91, "learning_rate": 1.6312631706013547e-05, "loss": 0.6743, "step": 2749 }, { "epoch": 0.91, "learning_rate": 1.6309852105000654e-05, "loss": 0.6396, "step": 2750 }, { "epoch": 0.91, "learning_rate": 1.6307071693741738e-05, "loss": 0.7744, "step": 2751 }, { "epoch": 0.91, "learning_rate": 1.6304290472593832e-05, "loss": 0.6797, "step": 2752 }, { "epoch": 0.91, "learning_rate": 1.630150844191407e-05, "loss": 0.7241, "step": 2753 }, { "epoch": 0.91, "learning_rate": 1.6298725602059694e-05, "loss": 0.6626, "step": 2754 }, { "epoch": 0.91, "learning_rate": 1.6295941953388047e-05, "loss": 0.7017, "step": 2755 }, { "epoch": 0.91, "learning_rate": 1.6293157496256577e-05, "loss": 0.7031, "step": 2756 }, { "epoch": 0.92, "learning_rate": 1.629037223102283e-05, "loss": 0.6943, "step": 2757 }, { "epoch": 0.92, "learning_rate": 1.6287586158044467e-05, "loss": 0.6675, "step": 2758 }, { "epoch": 0.92, "learning_rate": 1.6284799277679244e-05, "loss": 0.7246, "step": 2759 }, { "epoch": 0.92, "learning_rate": 1.6282011590285024e-05, "loss": 0.7295, "step": 2760 }, { "epoch": 0.92, "learning_rate": 1.6279223096219773e-05, "loss": 0.6499, "step": 2761 }, { "epoch": 0.92, "learning_rate": 1.6276433795841558e-05, "loss": 0.7026, "step": 2762 }, { "epoch": 0.92, "learning_rate": 1.6273643689508557e-05, "loss": 0.6692, "step": 2763 }, { "epoch": 0.92, "learning_rate": 1.6270852777579047e-05, "loss": 0.645, "step": 2764 }, { "epoch": 0.92, "learning_rate": 1.62680610604114e-05, "loss": 0.7148, "step": 2765 }, { "epoch": 0.92, "learning_rate": 1.6265268538364106e-05, "loss": 0.6416, "step": 2766 }, { "epoch": 0.92, "learning_rate": 1.6262475211795758e-05, "loss": 0.644, "step": 2767 }, { "epoch": 0.92, "learning_rate": 1.625968108106503e-05, "loss": 0.729, "step": 2768 }, { "epoch": 0.92, "learning_rate": 1.625688614653073e-05, "loss": 0.6851, "step": 2769 }, { "epoch": 0.92, "learning_rate": 1.6254090408551746e-05, "loss": 0.6785, "step": 2770 }, { "epoch": 0.92, "learning_rate": 1.6251293867487077e-05, "loss": 0.6656, "step": 2771 }, { "epoch": 0.92, "learning_rate": 1.6248496523695837e-05, "loss": 0.6667, "step": 2772 }, { "epoch": 0.92, "learning_rate": 1.624569837753722e-05, "loss": 0.7476, "step": 2773 }, { "epoch": 0.92, "learning_rate": 1.624289942937054e-05, "loss": 0.6421, "step": 2774 }, { "epoch": 0.92, "learning_rate": 1.624009967955521e-05, "loss": 0.6465, "step": 2775 }, { "epoch": 0.92, "learning_rate": 1.6237299128450742e-05, "loss": 0.6492, "step": 2776 }, { "epoch": 0.92, "learning_rate": 1.6234497776416756e-05, "loss": 0.6592, "step": 2777 }, { "epoch": 0.92, "learning_rate": 1.623169562381297e-05, "loss": 0.644, "step": 2778 }, { "epoch": 0.92, "learning_rate": 1.6228892670999214e-05, "loss": 0.6812, "step": 2779 }, { "epoch": 0.92, "learning_rate": 1.6226088918335406e-05, "loss": 0.7124, "step": 2780 }, { "epoch": 0.92, "learning_rate": 1.6223284366181583e-05, "loss": 0.6541, "step": 2781 }, { "epoch": 0.92, "learning_rate": 1.6220479014897867e-05, "loss": 0.6748, "step": 2782 }, { "epoch": 0.92, "learning_rate": 1.62176728648445e-05, "loss": 0.6526, "step": 2783 }, { "epoch": 0.92, "learning_rate": 1.6214865916381815e-05, "loss": 0.7075, "step": 2784 }, { "epoch": 0.92, "learning_rate": 1.6212058169870257e-05, "loss": 0.6538, "step": 2785 }, { "epoch": 0.92, "learning_rate": 1.620924962567036e-05, "loss": 0.6086, "step": 2786 }, { "epoch": 0.92, "learning_rate": 1.6206440284142773e-05, "loss": 0.6677, "step": 2787 }, { "epoch": 0.93, "learning_rate": 1.620363014564824e-05, "loss": 0.7139, "step": 2788 }, { "epoch": 0.93, "learning_rate": 1.6200819210547614e-05, "loss": 0.6987, "step": 2789 }, { "epoch": 0.93, "learning_rate": 1.619800747920184e-05, "loss": 0.6206, "step": 2790 }, { "epoch": 0.93, "learning_rate": 1.619519495197198e-05, "loss": 0.6367, "step": 2791 }, { "epoch": 0.93, "learning_rate": 1.619238162921918e-05, "loss": 0.6068, "step": 2792 }, { "epoch": 0.93, "learning_rate": 1.61895675113047e-05, "loss": 0.7202, "step": 2793 }, { "epoch": 0.93, "learning_rate": 1.618675259858991e-05, "loss": 0.584, "step": 2794 }, { "epoch": 0.93, "learning_rate": 1.6183936891436262e-05, "loss": 0.6982, "step": 2795 }, { "epoch": 0.93, "learning_rate": 1.6181120390205322e-05, "loss": 0.6838, "step": 2796 }, { "epoch": 0.93, "learning_rate": 1.6178303095258754e-05, "loss": 0.7407, "step": 2797 }, { "epoch": 0.93, "learning_rate": 1.6175485006958335e-05, "loss": 0.6838, "step": 2798 }, { "epoch": 0.93, "learning_rate": 1.6172666125665922e-05, "loss": 0.7251, "step": 2799 }, { "epoch": 0.93, "learning_rate": 1.6169846451743492e-05, "loss": 0.6313, "step": 2800 }, { "epoch": 0.93, "learning_rate": 1.6167025985553125e-05, "loss": 0.7749, "step": 2801 }, { "epoch": 0.93, "learning_rate": 1.616420472745699e-05, "loss": 0.6687, "step": 2802 }, { "epoch": 0.93, "learning_rate": 1.616138267781736e-05, "loss": 0.6526, "step": 2803 }, { "epoch": 0.93, "learning_rate": 1.615855983699662e-05, "loss": 0.6953, "step": 2804 }, { "epoch": 0.93, "learning_rate": 1.6155736205357246e-05, "loss": 0.6833, "step": 2805 }, { "epoch": 0.93, "learning_rate": 1.6152911783261825e-05, "loss": 0.6851, "step": 2806 }, { "epoch": 0.93, "learning_rate": 1.6150086571073033e-05, "loss": 0.6636, "step": 2807 }, { "epoch": 0.93, "learning_rate": 1.614726056915366e-05, "loss": 0.6865, "step": 2808 }, { "epoch": 0.93, "learning_rate": 1.614443377786659e-05, "loss": 0.7046, "step": 2809 }, { "epoch": 0.93, "learning_rate": 1.6141606197574807e-05, "loss": 0.6245, "step": 2810 }, { "epoch": 0.93, "learning_rate": 1.6138777828641408e-05, "loss": 0.6223, "step": 2811 }, { "epoch": 0.93, "learning_rate": 1.6135948671429577e-05, "loss": 0.7158, "step": 2812 }, { "epoch": 0.93, "learning_rate": 1.6133118726302603e-05, "loss": 0.687, "step": 2813 }, { "epoch": 0.93, "learning_rate": 1.6130287993623882e-05, "loss": 0.6931, "step": 2814 }, { "epoch": 0.93, "learning_rate": 1.612745647375691e-05, "loss": 0.7124, "step": 2815 }, { "epoch": 0.93, "learning_rate": 1.6124624167065276e-05, "loss": 0.7124, "step": 2816 }, { "epoch": 0.93, "learning_rate": 1.6121791073912675e-05, "loss": 0.5524, "step": 2817 }, { "epoch": 0.94, "learning_rate": 1.611895719466291e-05, "loss": 0.6738, "step": 2818 }, { "epoch": 0.94, "learning_rate": 1.6116122529679877e-05, "loss": 0.8008, "step": 2819 }, { "epoch": 0.94, "learning_rate": 1.6113287079327568e-05, "loss": 0.686, "step": 2820 }, { "epoch": 0.94, "learning_rate": 1.611045084397009e-05, "loss": 0.6753, "step": 2821 }, { "epoch": 0.94, "learning_rate": 1.610761382397164e-05, "loss": 0.6826, "step": 2822 }, { "epoch": 0.94, "learning_rate": 1.6104776019696514e-05, "loss": 0.689, "step": 2823 }, { "epoch": 0.94, "learning_rate": 1.6101937431509118e-05, "loss": 0.5708, "step": 2824 }, { "epoch": 0.94, "learning_rate": 1.6099098059773957e-05, "loss": 0.6536, "step": 2825 }, { "epoch": 0.94, "learning_rate": 1.6096257904855634e-05, "loss": 0.6899, "step": 2826 }, { "epoch": 0.94, "learning_rate": 1.6093416967118842e-05, "loss": 0.6506, "step": 2827 }, { "epoch": 0.94, "learning_rate": 1.6090575246928396e-05, "loss": 0.7295, "step": 2828 }, { "epoch": 0.94, "learning_rate": 1.6087732744649194e-05, "loss": 0.6406, "step": 2829 }, { "epoch": 0.94, "learning_rate": 1.608488946064625e-05, "loss": 0.6594, "step": 2830 }, { "epoch": 0.94, "learning_rate": 1.6082045395284655e-05, "loss": 0.6943, "step": 2831 }, { "epoch": 0.94, "learning_rate": 1.607920054892963e-05, "loss": 0.6577, "step": 2832 }, { "epoch": 0.94, "learning_rate": 1.607635492194646e-05, "loss": 0.6753, "step": 2833 }, { "epoch": 0.94, "learning_rate": 1.6073508514700576e-05, "loss": 0.5955, "step": 2834 }, { "epoch": 0.94, "learning_rate": 1.6070661327557465e-05, "loss": 0.6157, "step": 2835 }, { "epoch": 0.94, "learning_rate": 1.6067813360882745e-05, "loss": 0.6162, "step": 2836 }, { "epoch": 0.94, "learning_rate": 1.6064964615042113e-05, "loss": 0.7734, "step": 2837 }, { "epoch": 0.94, "learning_rate": 1.606211509040138e-05, "loss": 0.6843, "step": 2838 }, { "epoch": 0.94, "learning_rate": 1.6059264787326457e-05, "loss": 0.6633, "step": 2839 }, { "epoch": 0.94, "learning_rate": 1.605641370618335e-05, "loss": 0.6621, "step": 2840 }, { "epoch": 0.94, "learning_rate": 1.6053561847338152e-05, "loss": 0.7148, "step": 2841 }, { "epoch": 0.94, "learning_rate": 1.6050709211157084e-05, "loss": 0.5237, "step": 2842 }, { "epoch": 0.94, "learning_rate": 1.6047855798006446e-05, "loss": 0.7031, "step": 2843 }, { "epoch": 0.94, "learning_rate": 1.604500160825264e-05, "loss": 0.446, "step": 2844 }, { "epoch": 0.94, "learning_rate": 1.6042146642262183e-05, "loss": 0.6208, "step": 2845 }, { "epoch": 0.94, "learning_rate": 1.6039290900401674e-05, "loss": 0.7251, "step": 2846 }, { "epoch": 0.94, "learning_rate": 1.6036434383037813e-05, "loss": 0.6479, "step": 2847 }, { "epoch": 0.95, "learning_rate": 1.6033577090537413e-05, "loss": 0.7188, "step": 2848 }, { "epoch": 0.95, "learning_rate": 1.6030719023267372e-05, "loss": 0.6907, "step": 2849 }, { "epoch": 0.95, "learning_rate": 1.6027860181594696e-05, "loss": 0.6978, "step": 2850 }, { "epoch": 0.95, "learning_rate": 1.6025000565886486e-05, "loss": 0.7114, "step": 2851 }, { "epoch": 0.95, "learning_rate": 1.6022140176509946e-05, "loss": 0.6411, "step": 2852 }, { "epoch": 0.95, "learning_rate": 1.6019279013832377e-05, "loss": 0.6497, "step": 2853 }, { "epoch": 0.95, "learning_rate": 1.601641707822118e-05, "loss": 0.6252, "step": 2854 }, { "epoch": 0.95, "learning_rate": 1.6013554370043856e-05, "loss": 0.7192, "step": 2855 }, { "epoch": 0.95, "learning_rate": 1.6010690889668007e-05, "loss": 0.7505, "step": 2856 }, { "epoch": 0.95, "learning_rate": 1.6007826637461323e-05, "loss": 0.6846, "step": 2857 }, { "epoch": 0.95, "learning_rate": 1.600496161379161e-05, "loss": 0.6768, "step": 2858 }, { "epoch": 0.95, "learning_rate": 1.600209581902676e-05, "loss": 0.7417, "step": 2859 }, { "epoch": 0.95, "learning_rate": 1.5999229253534773e-05, "loss": 0.7095, "step": 2860 }, { "epoch": 0.95, "learning_rate": 1.599636191768374e-05, "loss": 0.6482, "step": 2861 }, { "epoch": 0.95, "learning_rate": 1.5993493811841855e-05, "loss": 0.6184, "step": 2862 }, { "epoch": 0.95, "learning_rate": 1.599062493637741e-05, "loss": 0.6802, "step": 2863 }, { "epoch": 0.95, "learning_rate": 1.5987755291658804e-05, "loss": 0.6055, "step": 2864 }, { "epoch": 0.95, "learning_rate": 1.5984884878054516e-05, "loss": 0.6289, "step": 2865 }, { "epoch": 0.95, "learning_rate": 1.598201369593314e-05, "loss": 0.6636, "step": 2866 }, { "epoch": 0.95, "learning_rate": 1.5979141745663364e-05, "loss": 0.6885, "step": 2867 }, { "epoch": 0.95, "learning_rate": 1.5976269027613975e-05, "loss": 0.6851, "step": 2868 }, { "epoch": 0.95, "learning_rate": 1.5973395542153855e-05, "loss": 0.6353, "step": 2869 }, { "epoch": 0.95, "learning_rate": 1.597052128965199e-05, "loss": 0.7007, "step": 2870 }, { "epoch": 0.95, "learning_rate": 1.5967646270477456e-05, "loss": 0.6807, "step": 2871 }, { "epoch": 0.95, "learning_rate": 1.5964770484999442e-05, "loss": 0.6396, "step": 2872 }, { "epoch": 0.95, "learning_rate": 1.5961893933587224e-05, "loss": 0.6968, "step": 2873 }, { "epoch": 0.95, "learning_rate": 1.595901661661017e-05, "loss": 0.7422, "step": 2874 }, { "epoch": 0.95, "learning_rate": 1.595613853443777e-05, "loss": 0.6084, "step": 2875 }, { "epoch": 0.95, "learning_rate": 1.595325968743959e-05, "loss": 0.6772, "step": 2876 }, { "epoch": 0.95, "learning_rate": 1.5950380075985297e-05, "loss": 0.6841, "step": 2877 }, { "epoch": 0.96, "learning_rate": 1.594749970044467e-05, "loss": 0.7866, "step": 2878 }, { "epoch": 0.96, "learning_rate": 1.5944618561187573e-05, "loss": 0.5991, "step": 2879 }, { "epoch": 0.96, "learning_rate": 1.5941736658583968e-05, "loss": 0.6235, "step": 2880 }, { "epoch": 0.96, "learning_rate": 1.5938853993003926e-05, "loss": 0.7471, "step": 2881 }, { "epoch": 0.96, "learning_rate": 1.5935970564817608e-05, "loss": 0.5967, "step": 2882 }, { "epoch": 0.96, "learning_rate": 1.593308637439527e-05, "loss": 0.6382, "step": 2883 }, { "epoch": 0.96, "learning_rate": 1.593020142210728e-05, "loss": 0.6467, "step": 2884 }, { "epoch": 0.96, "learning_rate": 1.592731570832408e-05, "loss": 0.6641, "step": 2885 }, { "epoch": 0.96, "learning_rate": 1.5924429233416224e-05, "loss": 0.71, "step": 2886 }, { "epoch": 0.96, "learning_rate": 1.5921541997754378e-05, "loss": 0.6836, "step": 2887 }, { "epoch": 0.96, "learning_rate": 1.591865400170928e-05, "loss": 0.7832, "step": 2888 }, { "epoch": 0.96, "learning_rate": 1.5915765245651778e-05, "loss": 0.7471, "step": 2889 }, { "epoch": 0.96, "learning_rate": 1.5912875729952817e-05, "loss": 0.7173, "step": 2890 }, { "epoch": 0.96, "learning_rate": 1.590998545498344e-05, "loss": 0.677, "step": 2891 }, { "epoch": 0.96, "learning_rate": 1.5907094421114782e-05, "loss": 0.6846, "step": 2892 }, { "epoch": 0.96, "learning_rate": 1.5904202628718084e-05, "loss": 0.688, "step": 2893 }, { "epoch": 0.96, "learning_rate": 1.590131007816468e-05, "loss": 0.7109, "step": 2894 }, { "epoch": 0.96, "learning_rate": 1.5898416769826003e-05, "loss": 0.6855, "step": 2895 }, { "epoch": 0.96, "learning_rate": 1.5895522704073573e-05, "loss": 0.7197, "step": 2896 }, { "epoch": 0.96, "learning_rate": 1.5892627881279023e-05, "loss": 0.7217, "step": 2897 }, { "epoch": 0.96, "learning_rate": 1.5889732301814077e-05, "loss": 0.6333, "step": 2898 }, { "epoch": 0.96, "learning_rate": 1.5886835966050554e-05, "loss": 0.653, "step": 2899 }, { "epoch": 0.96, "learning_rate": 1.588393887436037e-05, "loss": 0.6636, "step": 2900 }, { "epoch": 0.96, "learning_rate": 1.588104102711554e-05, "loss": 0.6924, "step": 2901 }, { "epoch": 0.96, "learning_rate": 1.587814242468818e-05, "loss": 0.6768, "step": 2902 }, { "epoch": 0.96, "learning_rate": 1.587524306745049e-05, "loss": 0.677, "step": 2903 }, { "epoch": 0.96, "learning_rate": 1.5872342955774785e-05, "loss": 0.6387, "step": 2904 }, { "epoch": 0.96, "learning_rate": 1.5869442090033462e-05, "loss": 0.6785, "step": 2905 }, { "epoch": 0.96, "learning_rate": 1.5866540470599018e-05, "loss": 0.7085, "step": 2906 }, { "epoch": 0.96, "learning_rate": 1.5863638097844054e-05, "loss": 0.5989, "step": 2907 }, { "epoch": 0.97, "learning_rate": 1.5860734972141264e-05, "loss": 0.6992, "step": 2908 }, { "epoch": 0.97, "learning_rate": 1.5857831093863428e-05, "loss": 0.6274, "step": 2909 }, { "epoch": 0.97, "learning_rate": 1.5854926463383442e-05, "loss": 0.6699, "step": 2910 }, { "epoch": 0.97, "learning_rate": 1.585202108107428e-05, "loss": 0.7329, "step": 2911 }, { "epoch": 0.97, "learning_rate": 1.5849114947309033e-05, "loss": 0.6924, "step": 2912 }, { "epoch": 0.97, "learning_rate": 1.5846208062460866e-05, "loss": 0.7107, "step": 2913 }, { "epoch": 0.97, "learning_rate": 1.5843300426903055e-05, "loss": 0.6707, "step": 2914 }, { "epoch": 0.97, "learning_rate": 1.584039204100897e-05, "loss": 0.6951, "step": 2915 }, { "epoch": 0.97, "learning_rate": 1.5837482905152072e-05, "loss": 0.6494, "step": 2916 }, { "epoch": 0.97, "learning_rate": 1.583457301970593e-05, "loss": 0.7812, "step": 2917 }, { "epoch": 0.97, "learning_rate": 1.583166238504419e-05, "loss": 0.6375, "step": 2918 }, { "epoch": 0.97, "learning_rate": 1.5828751001540613e-05, "loss": 0.6575, "step": 2919 }, { "epoch": 0.97, "learning_rate": 1.5825838869569046e-05, "loss": 0.6089, "step": 2920 }, { "epoch": 0.97, "learning_rate": 1.5822925989503434e-05, "loss": 0.6982, "step": 2921 }, { "epoch": 0.97, "learning_rate": 1.5820012361717828e-05, "loss": 0.6445, "step": 2922 }, { "epoch": 0.97, "learning_rate": 1.5817097986586355e-05, "loss": 0.6018, "step": 2923 }, { "epoch": 0.97, "learning_rate": 1.581418286448325e-05, "loss": 0.5432, "step": 2924 }, { "epoch": 0.97, "learning_rate": 1.581126699578285e-05, "loss": 0.6133, "step": 2925 }, { "epoch": 0.97, "learning_rate": 1.5808350380859575e-05, "loss": 0.7212, "step": 2926 }, { "epoch": 0.97, "learning_rate": 1.5805433020087952e-05, "loss": 0.6477, "step": 2927 }, { "epoch": 0.97, "learning_rate": 1.580251491384259e-05, "loss": 0.7183, "step": 2928 }, { "epoch": 0.97, "learning_rate": 1.5799596062498214e-05, "loss": 0.6321, "step": 2929 }, { "epoch": 0.97, "learning_rate": 1.5796676466429618e-05, "loss": 0.6655, "step": 2930 }, { "epoch": 0.97, "learning_rate": 1.5793756126011713e-05, "loss": 0.5964, "step": 2931 }, { "epoch": 0.97, "learning_rate": 1.5790835041619503e-05, "loss": 0.7114, "step": 2932 }, { "epoch": 0.97, "learning_rate": 1.578791321362808e-05, "loss": 0.6792, "step": 2933 }, { "epoch": 0.97, "learning_rate": 1.5784990642412635e-05, "loss": 0.6929, "step": 2934 }, { "epoch": 0.97, "learning_rate": 1.578206732834845e-05, "loss": 0.6831, "step": 2935 }, { "epoch": 0.97, "learning_rate": 1.5779143271810918e-05, "loss": 0.6497, "step": 2936 }, { "epoch": 0.97, "learning_rate": 1.5776218473175507e-05, "loss": 0.6638, "step": 2937 }, { "epoch": 0.98, "learning_rate": 1.577329293281779e-05, "loss": 0.6553, "step": 2938 }, { "epoch": 0.98, "learning_rate": 1.5770366651113438e-05, "loss": 0.6968, "step": 2939 }, { "epoch": 0.98, "learning_rate": 1.5767439628438214e-05, "loss": 0.623, "step": 2940 }, { "epoch": 0.98, "learning_rate": 1.576451186516797e-05, "loss": 0.6145, "step": 2941 }, { "epoch": 0.98, "learning_rate": 1.5761583361678666e-05, "loss": 0.6777, "step": 2942 }, { "epoch": 0.98, "learning_rate": 1.5758654118346352e-05, "loss": 0.6328, "step": 2943 }, { "epoch": 0.98, "learning_rate": 1.575572413554716e-05, "loss": 0.6206, "step": 2944 }, { "epoch": 0.98, "learning_rate": 1.5752793413657336e-05, "loss": 0.658, "step": 2945 }, { "epoch": 0.98, "learning_rate": 1.5749861953053215e-05, "loss": 0.5718, "step": 2946 }, { "epoch": 0.98, "learning_rate": 1.5746929754111223e-05, "loss": 0.7373, "step": 2947 }, { "epoch": 0.98, "learning_rate": 1.5743996817207876e-05, "loss": 0.7051, "step": 2948 }, { "epoch": 0.98, "learning_rate": 1.5741063142719803e-05, "loss": 0.6624, "step": 2949 }, { "epoch": 0.98, "learning_rate": 1.573812873102371e-05, "loss": 0.687, "step": 2950 }, { "epoch": 0.98, "learning_rate": 1.5735193582496398e-05, "loss": 0.7388, "step": 2951 }, { "epoch": 0.98, "learning_rate": 1.5732257697514777e-05, "loss": 0.6416, "step": 2952 }, { "epoch": 0.98, "learning_rate": 1.5729321076455843e-05, "loss": 0.6987, "step": 2953 }, { "epoch": 0.98, "learning_rate": 1.572638371969668e-05, "loss": 0.7212, "step": 2954 }, { "epoch": 0.98, "learning_rate": 1.5723445627614482e-05, "loss": 0.6794, "step": 2955 }, { "epoch": 0.98, "learning_rate": 1.5720506800586522e-05, "loss": 0.6443, "step": 2956 }, { "epoch": 0.98, "learning_rate": 1.5717567238990173e-05, "loss": 0.7036, "step": 2957 }, { "epoch": 0.98, "learning_rate": 1.5714626943202908e-05, "loss": 0.6548, "step": 2958 }, { "epoch": 0.98, "learning_rate": 1.5711685913602286e-05, "loss": 0.7212, "step": 2959 }, { "epoch": 0.98, "learning_rate": 1.5708744150565966e-05, "loss": 0.6433, "step": 2960 }, { "epoch": 0.98, "learning_rate": 1.5705801654471696e-05, "loss": 0.585, "step": 2961 }, { "epoch": 0.98, "learning_rate": 1.570285842569732e-05, "loss": 0.6426, "step": 2962 }, { "epoch": 0.98, "learning_rate": 1.569991446462078e-05, "loss": 0.5938, "step": 2963 }, { "epoch": 0.98, "learning_rate": 1.569696977162011e-05, "loss": 0.7334, "step": 2964 }, { "epoch": 0.98, "learning_rate": 1.5694024347073432e-05, "loss": 0.6777, "step": 2965 }, { "epoch": 0.98, "learning_rate": 1.5691078191358976e-05, "loss": 0.656, "step": 2966 }, { "epoch": 0.98, "learning_rate": 1.5688131304855047e-05, "loss": 0.7505, "step": 2967 }, { "epoch": 0.99, "learning_rate": 1.5685183687940062e-05, "loss": 0.6042, "step": 2968 }, { "epoch": 0.99, "learning_rate": 1.5682235340992516e-05, "loss": 0.7051, "step": 2969 }, { "epoch": 0.99, "learning_rate": 1.567928626439101e-05, "loss": 0.5991, "step": 2970 }, { "epoch": 0.99, "learning_rate": 1.5676336458514235e-05, "loss": 0.6519, "step": 2971 }, { "epoch": 0.99, "learning_rate": 1.567338592374097e-05, "loss": 0.6177, "step": 2972 }, { "epoch": 0.99, "learning_rate": 1.5670434660450093e-05, "loss": 0.7402, "step": 2973 }, { "epoch": 0.99, "learning_rate": 1.566748266902058e-05, "loss": 0.7188, "step": 2974 }, { "epoch": 0.99, "learning_rate": 1.566452994983149e-05, "loss": 0.6748, "step": 2975 }, { "epoch": 0.99, "learning_rate": 1.5661576503261976e-05, "loss": 0.7178, "step": 2976 }, { "epoch": 0.99, "learning_rate": 1.56586223296913e-05, "loss": 0.6079, "step": 2977 }, { "epoch": 0.99, "learning_rate": 1.5655667429498803e-05, "loss": 0.623, "step": 2978 }, { "epoch": 0.99, "learning_rate": 1.565271180306392e-05, "loss": 0.6582, "step": 2979 }, { "epoch": 0.99, "learning_rate": 1.5649755450766185e-05, "loss": 0.7109, "step": 2980 }, { "epoch": 0.99, "learning_rate": 1.5646798372985224e-05, "loss": 0.5823, "step": 2981 }, { "epoch": 0.99, "learning_rate": 1.5643840570100747e-05, "loss": 0.644, "step": 2982 }, { "epoch": 0.99, "learning_rate": 1.564088204249257e-05, "loss": 0.6257, "step": 2983 }, { "epoch": 0.99, "learning_rate": 1.563792279054059e-05, "loss": 0.7202, "step": 2984 }, { "epoch": 0.99, "learning_rate": 1.5634962814624817e-05, "loss": 0.668, "step": 2985 }, { "epoch": 0.99, "learning_rate": 1.5632002115125328e-05, "loss": 0.718, "step": 2986 }, { "epoch": 0.99, "learning_rate": 1.5629040692422307e-05, "loss": 0.5828, "step": 2987 }, { "epoch": 0.99, "learning_rate": 1.5626078546896037e-05, "loss": 0.5572, "step": 2988 }, { "epoch": 0.99, "learning_rate": 1.562311567892688e-05, "loss": 0.6748, "step": 2989 }, { "epoch": 0.99, "learning_rate": 1.5620152088895297e-05, "loss": 0.6338, "step": 2990 }, { "epoch": 0.99, "learning_rate": 1.561718777718184e-05, "loss": 0.6658, "step": 2991 }, { "epoch": 0.99, "learning_rate": 1.5614222744167163e-05, "loss": 0.7134, "step": 2992 }, { "epoch": 0.99, "learning_rate": 1.5611256990232e-05, "loss": 0.4883, "step": 2993 }, { "epoch": 0.99, "learning_rate": 1.5608290515757176e-05, "loss": 0.6738, "step": 2994 }, { "epoch": 0.99, "learning_rate": 1.5605323321123625e-05, "loss": 0.5825, "step": 2995 }, { "epoch": 0.99, "learning_rate": 1.5602355406712358e-05, "loss": 0.6562, "step": 2996 }, { "epoch": 0.99, "learning_rate": 1.559938677290449e-05, "loss": 0.7314, "step": 2997 }, { "epoch": 1.0, "learning_rate": 1.5596417420081218e-05, "loss": 0.6204, "step": 2998 }, { "epoch": 1.0, "learning_rate": 1.5593447348623834e-05, "loss": 0.623, "step": 2999 }, { "epoch": 1.0, "learning_rate": 1.559047655891372e-05, "loss": 0.6309, "step": 3000 }, { "epoch": 1.0, "learning_rate": 1.5587505051332368e-05, "loss": 0.6602, "step": 3001 }, { "epoch": 1.0, "learning_rate": 1.5584532826261335e-05, "loss": 0.6982, "step": 3002 }, { "epoch": 1.0, "learning_rate": 1.558155988408229e-05, "loss": 0.678, "step": 3003 }, { "epoch": 1.0, "learning_rate": 1.5578586225176986e-05, "loss": 0.6953, "step": 3004 }, { "epoch": 1.0, "learning_rate": 1.5575611849927273e-05, "loss": 0.6321, "step": 3005 }, { "epoch": 1.0, "learning_rate": 1.557263675871508e-05, "loss": 0.6055, "step": 3006 }, { "epoch": 1.0, "learning_rate": 1.5569660951922445e-05, "loss": 0.6938, "step": 3007 }, { "epoch": 1.0, "learning_rate": 1.556668442993149e-05, "loss": 0.6194, "step": 3008 }, { "epoch": 1.0, "learning_rate": 1.5563707193124427e-05, "loss": 0.6582, "step": 3009 }, { "epoch": 1.0, "learning_rate": 1.5560729241883564e-05, "loss": 0.6694, "step": 3010 }, { "epoch": 1.0, "learning_rate": 1.5557750576591297e-05, "loss": 0.6697, "step": 3011 }, { "epoch": 1.0, "learning_rate": 1.555477119763012e-05, "loss": 0.7666, "step": 3012 }, { "epoch": 1.0, "learning_rate": 1.5551791105382606e-05, "loss": 0.6567, "step": 3013 }, { "epoch": 1.0, "learning_rate": 1.554881030023143e-05, "loss": 0.4897, "step": 3014 }, { "epoch": 1.0, "learning_rate": 1.5545828782559364e-05, "loss": 0.5391, "step": 3015 }, { "epoch": 1.0, "learning_rate": 1.5542846552749255e-05, "loss": 0.5286, "step": 3016 }, { "epoch": 1.0, "learning_rate": 1.5539863611184052e-05, "loss": 0.4607, "step": 3017 }, { "epoch": 1.0, "learning_rate": 1.5536879958246794e-05, "loss": 0.509, "step": 3018 }, { "epoch": 1.0, "learning_rate": 1.5533895594320613e-05, "loss": 0.4246, "step": 3019 }, { "epoch": 1.0, "learning_rate": 1.5530910519788732e-05, "loss": 0.4393, "step": 3020 }, { "epoch": 1.0, "learning_rate": 1.5527924735034453e-05, "loss": 0.4412, "step": 3021 }, { "epoch": 1.0, "learning_rate": 1.552493824044119e-05, "loss": 0.4397, "step": 3022 }, { "epoch": 1.0, "learning_rate": 1.5521951036392436e-05, "loss": 0.4961, "step": 3023 }, { "epoch": 1.0, "learning_rate": 1.551896312327177e-05, "loss": 0.4968, "step": 3024 }, { "epoch": 1.0, "learning_rate": 1.5515974501462882e-05, "loss": 0.4854, "step": 3025 }, { "epoch": 1.0, "learning_rate": 1.5512985171349528e-05, "loss": 0.4546, "step": 3026 }, { "epoch": 1.0, "learning_rate": 1.5509995133315575e-05, "loss": 0.4844, "step": 3027 }, { "epoch": 1.0, "learning_rate": 1.5507004387744964e-05, "loss": 0.4246, "step": 3028 }, { "epoch": 1.01, "learning_rate": 1.5504012935021743e-05, "loss": 0.437, "step": 3029 }, { "epoch": 1.01, "learning_rate": 1.550102077553004e-05, "loss": 0.4016, "step": 3030 }, { "epoch": 1.01, "learning_rate": 1.5498027909654076e-05, "loss": 0.4773, "step": 3031 }, { "epoch": 1.01, "learning_rate": 1.549503433777817e-05, "loss": 0.4885, "step": 3032 }, { "epoch": 1.01, "learning_rate": 1.549204006028672e-05, "loss": 0.3882, "step": 3033 }, { "epoch": 1.01, "learning_rate": 1.548904507756422e-05, "loss": 0.4329, "step": 3034 }, { "epoch": 1.01, "learning_rate": 1.5486049389995258e-05, "loss": 0.4675, "step": 3035 }, { "epoch": 1.01, "learning_rate": 1.548305299796451e-05, "loss": 0.4507, "step": 3036 }, { "epoch": 1.01, "learning_rate": 1.5480055901856736e-05, "loss": 0.4849, "step": 3037 }, { "epoch": 1.01, "learning_rate": 1.54770581020568e-05, "loss": 0.5063, "step": 3038 }, { "epoch": 1.01, "learning_rate": 1.547405959894964e-05, "loss": 0.4795, "step": 3039 }, { "epoch": 1.01, "learning_rate": 1.54710603929203e-05, "loss": 0.4141, "step": 3040 }, { "epoch": 1.01, "learning_rate": 1.5468060484353903e-05, "loss": 0.4309, "step": 3041 }, { "epoch": 1.01, "learning_rate": 1.5465059873635667e-05, "loss": 0.3997, "step": 3042 }, { "epoch": 1.01, "learning_rate": 1.5462058561150902e-05, "loss": 0.4066, "step": 3043 }, { "epoch": 1.01, "learning_rate": 1.5459056547284998e-05, "loss": 0.4441, "step": 3044 }, { "epoch": 1.01, "learning_rate": 1.5456053832423452e-05, "loss": 0.5371, "step": 3045 }, { "epoch": 1.01, "learning_rate": 1.5453050416951836e-05, "loss": 0.4768, "step": 3046 }, { "epoch": 1.01, "learning_rate": 1.545004630125582e-05, "loss": 0.4731, "step": 3047 }, { "epoch": 1.01, "learning_rate": 1.544704148572116e-05, "loss": 0.4607, "step": 3048 }, { "epoch": 1.01, "learning_rate": 1.5444035970733706e-05, "loss": 0.5325, "step": 3049 }, { "epoch": 1.01, "learning_rate": 1.544102975667939e-05, "loss": 0.468, "step": 3050 }, { "epoch": 1.01, "learning_rate": 1.5438022843944245e-05, "loss": 0.4875, "step": 3051 }, { "epoch": 1.01, "learning_rate": 1.5435015232914384e-05, "loss": 0.4087, "step": 3052 }, { "epoch": 1.01, "learning_rate": 1.543200692397601e-05, "loss": 0.498, "step": 3053 }, { "epoch": 1.01, "learning_rate": 1.542899791751543e-05, "loss": 0.3679, "step": 3054 }, { "epoch": 1.01, "learning_rate": 1.5425988213919016e-05, "loss": 0.4075, "step": 3055 }, { "epoch": 1.01, "learning_rate": 1.5422977813573254e-05, "loss": 0.4578, "step": 3056 }, { "epoch": 1.01, "learning_rate": 1.5419966716864703e-05, "loss": 0.4834, "step": 3057 }, { "epoch": 1.01, "learning_rate": 1.541695492418002e-05, "loss": 0.4215, "step": 3058 }, { "epoch": 1.02, "learning_rate": 1.5413942435905943e-05, "loss": 0.4451, "step": 3059 }, { "epoch": 1.02, "learning_rate": 1.5410929252429308e-05, "loss": 0.4956, "step": 3060 }, { "epoch": 1.02, "learning_rate": 1.5407915374137038e-05, "loss": 0.4167, "step": 3061 }, { "epoch": 1.02, "learning_rate": 1.5404900801416143e-05, "loss": 0.4895, "step": 3062 }, { "epoch": 1.02, "learning_rate": 1.5401885534653723e-05, "loss": 0.5393, "step": 3063 }, { "epoch": 1.02, "learning_rate": 1.539886957423697e-05, "loss": 0.4094, "step": 3064 }, { "epoch": 1.02, "learning_rate": 1.539585292055316e-05, "loss": 0.5471, "step": 3065 }, { "epoch": 1.02, "learning_rate": 1.5392835573989656e-05, "loss": 0.4189, "step": 3066 }, { "epoch": 1.02, "learning_rate": 1.5389817534933923e-05, "loss": 0.4844, "step": 3067 }, { "epoch": 1.02, "learning_rate": 1.5386798803773505e-05, "loss": 0.4325, "step": 3068 }, { "epoch": 1.02, "learning_rate": 1.5383779380896028e-05, "loss": 0.4033, "step": 3069 }, { "epoch": 1.02, "learning_rate": 1.5380759266689223e-05, "loss": 0.3837, "step": 3070 }, { "epoch": 1.02, "learning_rate": 1.53777384615409e-05, "loss": 0.4839, "step": 3071 }, { "epoch": 1.02, "learning_rate": 1.537471696583896e-05, "loss": 0.4402, "step": 3072 }, { "epoch": 1.02, "learning_rate": 1.537169477997139e-05, "loss": 0.4563, "step": 3073 }, { "epoch": 1.02, "learning_rate": 1.5368671904326274e-05, "loss": 0.5371, "step": 3074 }, { "epoch": 1.02, "learning_rate": 1.536564833929177e-05, "loss": 0.4465, "step": 3075 }, { "epoch": 1.02, "learning_rate": 1.5362624085256136e-05, "loss": 0.458, "step": 3076 }, { "epoch": 1.02, "learning_rate": 1.5359599142607722e-05, "loss": 0.4822, "step": 3077 }, { "epoch": 1.02, "learning_rate": 1.535657351173495e-05, "loss": 0.4724, "step": 3078 }, { "epoch": 1.02, "learning_rate": 1.5353547193026344e-05, "loss": 0.4225, "step": 3079 }, { "epoch": 1.02, "learning_rate": 1.5350520186870514e-05, "loss": 0.5005, "step": 3080 }, { "epoch": 1.02, "learning_rate": 1.5347492493656158e-05, "loss": 0.4987, "step": 3081 }, { "epoch": 1.02, "learning_rate": 1.5344464113772056e-05, "loss": 0.5176, "step": 3082 }, { "epoch": 1.02, "learning_rate": 1.5341435047607088e-05, "loss": 0.4194, "step": 3083 }, { "epoch": 1.02, "learning_rate": 1.5338405295550208e-05, "loss": 0.4229, "step": 3084 }, { "epoch": 1.02, "learning_rate": 1.533537485799047e-05, "loss": 0.4829, "step": 3085 }, { "epoch": 1.02, "learning_rate": 1.5332343735317016e-05, "loss": 0.5474, "step": 3086 }, { "epoch": 1.02, "learning_rate": 1.532931192791906e-05, "loss": 0.5168, "step": 3087 }, { "epoch": 1.02, "learning_rate": 1.5326279436185923e-05, "loss": 0.3803, "step": 3088 }, { "epoch": 1.03, "learning_rate": 1.5323246260507004e-05, "loss": 0.4287, "step": 3089 }, { "epoch": 1.03, "learning_rate": 1.5320212401271795e-05, "loss": 0.4873, "step": 3090 }, { "epoch": 1.03, "learning_rate": 1.531717785886987e-05, "loss": 0.4485, "step": 3091 }, { "epoch": 1.03, "learning_rate": 1.5314142633690896e-05, "loss": 0.4849, "step": 3092 }, { "epoch": 1.03, "learning_rate": 1.5311106726124618e-05, "loss": 0.5427, "step": 3093 }, { "epoch": 1.03, "learning_rate": 1.530807013656089e-05, "loss": 0.541, "step": 3094 }, { "epoch": 1.03, "learning_rate": 1.5305032865389623e-05, "loss": 0.4221, "step": 3095 }, { "epoch": 1.03, "learning_rate": 1.5301994913000843e-05, "loss": 0.4651, "step": 3096 }, { "epoch": 1.03, "learning_rate": 1.5298956279784652e-05, "loss": 0.4105, "step": 3097 }, { "epoch": 1.03, "learning_rate": 1.5295916966131232e-05, "loss": 0.399, "step": 3098 }, { "epoch": 1.03, "learning_rate": 1.529287697243087e-05, "loss": 0.5046, "step": 3099 }, { "epoch": 1.03, "learning_rate": 1.5289836299073922e-05, "loss": 0.4645, "step": 3100 }, { "epoch": 1.03, "learning_rate": 1.528679494645085e-05, "loss": 0.37, "step": 3101 }, { "epoch": 1.03, "learning_rate": 1.5283752914952182e-05, "loss": 0.4387, "step": 3102 }, { "epoch": 1.03, "learning_rate": 1.528071020496855e-05, "loss": 0.4954, "step": 3103 }, { "epoch": 1.03, "learning_rate": 1.527766681689067e-05, "loss": 0.4062, "step": 3104 }, { "epoch": 1.03, "learning_rate": 1.5274622751109337e-05, "loss": 0.5166, "step": 3105 }, { "epoch": 1.03, "learning_rate": 1.5271578008015443e-05, "loss": 0.4795, "step": 3106 }, { "epoch": 1.03, "learning_rate": 1.5268532587999957e-05, "loss": 0.4302, "step": 3107 }, { "epoch": 1.03, "learning_rate": 1.5265486491453944e-05, "loss": 0.4333, "step": 3108 }, { "epoch": 1.03, "learning_rate": 1.5262439718768552e-05, "loss": 0.4609, "step": 3109 }, { "epoch": 1.03, "learning_rate": 1.5259392270335017e-05, "loss": 0.3975, "step": 3110 }, { "epoch": 1.03, "learning_rate": 1.5256344146544657e-05, "loss": 0.5259, "step": 3111 }, { "epoch": 1.03, "learning_rate": 1.5253295347788885e-05, "loss": 0.5261, "step": 3112 }, { "epoch": 1.03, "learning_rate": 1.5250245874459193e-05, "loss": 0.5059, "step": 3113 }, { "epoch": 1.03, "learning_rate": 1.5247195726947169e-05, "loss": 0.4697, "step": 3114 }, { "epoch": 1.03, "learning_rate": 1.5244144905644472e-05, "loss": 0.4954, "step": 3115 }, { "epoch": 1.03, "learning_rate": 1.5241093410942862e-05, "loss": 0.4601, "step": 3116 }, { "epoch": 1.03, "learning_rate": 1.523804124323418e-05, "loss": 0.3915, "step": 3117 }, { "epoch": 1.03, "learning_rate": 1.5234988402910355e-05, "loss": 0.4361, "step": 3118 }, { "epoch": 1.04, "learning_rate": 1.52319348903634e-05, "loss": 0.4509, "step": 3119 }, { "epoch": 1.04, "learning_rate": 1.5228880705985418e-05, "loss": 0.5237, "step": 3120 }, { "epoch": 1.04, "learning_rate": 1.522582585016859e-05, "loss": 0.4503, "step": 3121 }, { "epoch": 1.04, "learning_rate": 1.5222770323305194e-05, "loss": 0.5237, "step": 3122 }, { "epoch": 1.04, "learning_rate": 1.521971412578759e-05, "loss": 0.4434, "step": 3123 }, { "epoch": 1.04, "learning_rate": 1.5216657258008218e-05, "loss": 0.4049, "step": 3124 }, { "epoch": 1.04, "learning_rate": 1.5213599720359616e-05, "loss": 0.4325, "step": 3125 }, { "epoch": 1.04, "learning_rate": 1.5210541513234396e-05, "loss": 0.4183, "step": 3126 }, { "epoch": 1.04, "learning_rate": 1.5207482637025259e-05, "loss": 0.4556, "step": 3127 }, { "epoch": 1.04, "learning_rate": 1.5204423092125002e-05, "loss": 0.4546, "step": 3128 }, { "epoch": 1.04, "learning_rate": 1.5201362878926498e-05, "loss": 0.4773, "step": 3129 }, { "epoch": 1.04, "learning_rate": 1.5198301997822707e-05, "loss": 0.4324, "step": 3130 }, { "epoch": 1.04, "learning_rate": 1.5195240449206672e-05, "loss": 0.4197, "step": 3131 }, { "epoch": 1.04, "learning_rate": 1.519217823347153e-05, "loss": 0.4758, "step": 3132 }, { "epoch": 1.04, "learning_rate": 1.51891153510105e-05, "loss": 0.4026, "step": 3133 }, { "epoch": 1.04, "learning_rate": 1.5186051802216884e-05, "loss": 0.3745, "step": 3134 }, { "epoch": 1.04, "learning_rate": 1.5182987587484066e-05, "loss": 0.444, "step": 3135 }, { "epoch": 1.04, "learning_rate": 1.5179922707205528e-05, "loss": 0.4875, "step": 3136 }, { "epoch": 1.04, "learning_rate": 1.5176857161774829e-05, "loss": 0.4592, "step": 3137 }, { "epoch": 1.04, "learning_rate": 1.5173790951585613e-05, "loss": 0.4739, "step": 3138 }, { "epoch": 1.04, "learning_rate": 1.5170724077031613e-05, "loss": 0.4573, "step": 3139 }, { "epoch": 1.04, "learning_rate": 1.5167656538506643e-05, "loss": 0.5076, "step": 3140 }, { "epoch": 1.04, "learning_rate": 1.5164588336404606e-05, "loss": 0.4205, "step": 3141 }, { "epoch": 1.04, "learning_rate": 1.516151947111949e-05, "loss": 0.5266, "step": 3142 }, { "epoch": 1.04, "learning_rate": 1.5158449943045361e-05, "loss": 0.4631, "step": 3143 }, { "epoch": 1.04, "learning_rate": 1.5155379752576386e-05, "loss": 0.4536, "step": 3144 }, { "epoch": 1.04, "learning_rate": 1.5152308900106798e-05, "loss": 0.4382, "step": 3145 }, { "epoch": 1.04, "learning_rate": 1.5149237386030929e-05, "loss": 0.5117, "step": 3146 }, { "epoch": 1.04, "learning_rate": 1.5146165210743192e-05, "loss": 0.4951, "step": 3147 }, { "epoch": 1.04, "learning_rate": 1.5143092374638077e-05, "loss": 0.4587, "step": 3148 }, { "epoch": 1.05, "learning_rate": 1.5140018878110173e-05, "loss": 0.3909, "step": 3149 }, { "epoch": 1.05, "learning_rate": 1.5136944721554145e-05, "loss": 0.4885, "step": 3150 }, { "epoch": 1.05, "learning_rate": 1.5133869905364745e-05, "loss": 0.4553, "step": 3151 }, { "epoch": 1.05, "learning_rate": 1.5130794429936808e-05, "loss": 0.4446, "step": 3152 }, { "epoch": 1.05, "learning_rate": 1.5127718295665253e-05, "loss": 0.439, "step": 3153 }, { "epoch": 1.05, "learning_rate": 1.512464150294509e-05, "loss": 0.4224, "step": 3154 }, { "epoch": 1.05, "learning_rate": 1.5121564052171403e-05, "loss": 0.4907, "step": 3155 }, { "epoch": 1.05, "learning_rate": 1.511848594373937e-05, "loss": 0.406, "step": 3156 }, { "epoch": 1.05, "learning_rate": 1.511540717804425e-05, "loss": 0.4196, "step": 3157 }, { "epoch": 1.05, "learning_rate": 1.5112327755481386e-05, "loss": 0.4149, "step": 3158 }, { "epoch": 1.05, "learning_rate": 1.5109247676446207e-05, "loss": 0.4658, "step": 3159 }, { "epoch": 1.05, "learning_rate": 1.5106166941334218e-05, "loss": 0.4736, "step": 3160 }, { "epoch": 1.05, "learning_rate": 1.5103085550541026e-05, "loss": 0.4832, "step": 3161 }, { "epoch": 1.05, "learning_rate": 1.5100003504462306e-05, "loss": 0.439, "step": 3162 }, { "epoch": 1.05, "learning_rate": 1.5096920803493818e-05, "loss": 0.4626, "step": 3163 }, { "epoch": 1.05, "learning_rate": 1.5093837448031419e-05, "loss": 0.4951, "step": 3164 }, { "epoch": 1.05, "learning_rate": 1.5090753438471034e-05, "loss": 0.4534, "step": 3165 }, { "epoch": 1.05, "learning_rate": 1.5087668775208686e-05, "loss": 0.5037, "step": 3166 }, { "epoch": 1.05, "learning_rate": 1.5084583458640472e-05, "loss": 0.4512, "step": 3167 }, { "epoch": 1.05, "learning_rate": 1.508149748916258e-05, "loss": 0.511, "step": 3168 }, { "epoch": 1.05, "learning_rate": 1.507841086717127e-05, "loss": 0.4132, "step": 3169 }, { "epoch": 1.05, "learning_rate": 1.5075323593062906e-05, "loss": 0.4365, "step": 3170 }, { "epoch": 1.05, "learning_rate": 1.5072235667233913e-05, "loss": 0.4453, "step": 3171 }, { "epoch": 1.05, "learning_rate": 1.5069147090080819e-05, "loss": 0.4771, "step": 3172 }, { "epoch": 1.05, "learning_rate": 1.5066057862000221e-05, "loss": 0.4741, "step": 3173 }, { "epoch": 1.05, "learning_rate": 1.5062967983388807e-05, "loss": 0.4344, "step": 3174 }, { "epoch": 1.05, "learning_rate": 1.505987745464335e-05, "loss": 0.4519, "step": 3175 }, { "epoch": 1.05, "learning_rate": 1.5056786276160703e-05, "loss": 0.4673, "step": 3176 }, { "epoch": 1.05, "learning_rate": 1.5053694448337798e-05, "loss": 0.4161, "step": 3177 }, { "epoch": 1.05, "learning_rate": 1.5050601971571665e-05, "loss": 0.5039, "step": 3178 }, { "epoch": 1.06, "learning_rate": 1.5047508846259403e-05, "loss": 0.4924, "step": 3179 }, { "epoch": 1.06, "learning_rate": 1.5044415072798195e-05, "loss": 0.4265, "step": 3180 }, { "epoch": 1.06, "learning_rate": 1.5041320651585318e-05, "loss": 0.4868, "step": 3181 }, { "epoch": 1.06, "learning_rate": 1.5038225583018125e-05, "loss": 0.4944, "step": 3182 }, { "epoch": 1.06, "learning_rate": 1.5035129867494047e-05, "loss": 0.4839, "step": 3183 }, { "epoch": 1.06, "learning_rate": 1.5032033505410609e-05, "loss": 0.4491, "step": 3184 }, { "epoch": 1.06, "learning_rate": 1.5028936497165414e-05, "loss": 0.4602, "step": 3185 }, { "epoch": 1.06, "learning_rate": 1.5025838843156147e-05, "loss": 0.457, "step": 3186 }, { "epoch": 1.06, "learning_rate": 1.5022740543780572e-05, "loss": 0.4229, "step": 3187 }, { "epoch": 1.06, "learning_rate": 1.5019641599436548e-05, "loss": 0.4922, "step": 3188 }, { "epoch": 1.06, "learning_rate": 1.5016542010522003e-05, "loss": 0.4651, "step": 3189 }, { "epoch": 1.06, "learning_rate": 1.501344177743496e-05, "loss": 0.3944, "step": 3190 }, { "epoch": 1.06, "learning_rate": 1.5010340900573517e-05, "loss": 0.3938, "step": 3191 }, { "epoch": 1.06, "learning_rate": 1.5007239380335855e-05, "loss": 0.4419, "step": 3192 }, { "epoch": 1.06, "learning_rate": 1.5004137217120235e-05, "loss": 0.5083, "step": 3193 }, { "epoch": 1.06, "learning_rate": 1.5001034411325015e-05, "loss": 0.3926, "step": 3194 }, { "epoch": 1.06, "learning_rate": 1.4997930963348619e-05, "loss": 0.4839, "step": 3195 }, { "epoch": 1.06, "learning_rate": 1.4994826873589558e-05, "loss": 0.4666, "step": 3196 }, { "epoch": 1.06, "learning_rate": 1.4991722142446431e-05, "loss": 0.4968, "step": 3197 }, { "epoch": 1.06, "learning_rate": 1.4988616770317915e-05, "loss": 0.4653, "step": 3198 }, { "epoch": 1.06, "learning_rate": 1.498551075760277e-05, "loss": 0.4561, "step": 3199 }, { "epoch": 1.06, "learning_rate": 1.4982404104699838e-05, "loss": 0.4645, "step": 3200 }, { "epoch": 1.06, "learning_rate": 1.497929681200804e-05, "loss": 0.3997, "step": 3201 }, { "epoch": 1.06, "learning_rate": 1.4976188879926387e-05, "loss": 0.4242, "step": 3202 }, { "epoch": 1.06, "learning_rate": 1.4973080308853962e-05, "loss": 0.4805, "step": 3203 }, { "epoch": 1.06, "learning_rate": 1.4969971099189941e-05, "loss": 0.4734, "step": 3204 }, { "epoch": 1.06, "learning_rate": 1.4966861251333573e-05, "loss": 0.4661, "step": 3205 }, { "epoch": 1.06, "learning_rate": 1.4963750765684197e-05, "loss": 0.446, "step": 3206 }, { "epoch": 1.06, "learning_rate": 1.4960639642641224e-05, "loss": 0.4004, "step": 3207 }, { "epoch": 1.06, "learning_rate": 1.4957527882604158e-05, "loss": 0.4829, "step": 3208 }, { "epoch": 1.07, "learning_rate": 1.4954415485972573e-05, "loss": 0.4441, "step": 3209 }, { "epoch": 1.07, "learning_rate": 1.4951302453146134e-05, "loss": 0.4839, "step": 3210 }, { "epoch": 1.07, "learning_rate": 1.4948188784524585e-05, "loss": 0.4493, "step": 3211 }, { "epoch": 1.07, "learning_rate": 1.4945074480507748e-05, "loss": 0.4592, "step": 3212 }, { "epoch": 1.07, "learning_rate": 1.4941959541495537e-05, "loss": 0.4321, "step": 3213 }, { "epoch": 1.07, "learning_rate": 1.4938843967887933e-05, "loss": 0.4526, "step": 3214 }, { "epoch": 1.07, "learning_rate": 1.4935727760085006e-05, "loss": 0.4541, "step": 3215 }, { "epoch": 1.07, "learning_rate": 1.4932610918486911e-05, "loss": 0.4751, "step": 3216 }, { "epoch": 1.07, "learning_rate": 1.492949344349388e-05, "loss": 0.5195, "step": 3217 }, { "epoch": 1.07, "learning_rate": 1.4926375335506227e-05, "loss": 0.3982, "step": 3218 }, { "epoch": 1.07, "learning_rate": 1.4923256594924342e-05, "loss": 0.465, "step": 3219 }, { "epoch": 1.07, "learning_rate": 1.492013722214871e-05, "loss": 0.4966, "step": 3220 }, { "epoch": 1.07, "learning_rate": 1.4917017217579882e-05, "loss": 0.5049, "step": 3221 }, { "epoch": 1.07, "learning_rate": 1.4913896581618497e-05, "loss": 0.4197, "step": 3222 }, { "epoch": 1.07, "learning_rate": 1.491077531466528e-05, "loss": 0.4181, "step": 3223 }, { "epoch": 1.07, "learning_rate": 1.4907653417121027e-05, "loss": 0.3962, "step": 3224 }, { "epoch": 1.07, "learning_rate": 1.4904530889386622e-05, "loss": 0.4563, "step": 3225 }, { "epoch": 1.07, "learning_rate": 1.4901407731863024e-05, "loss": 0.4329, "step": 3226 }, { "epoch": 1.07, "learning_rate": 1.4898283944951287e-05, "loss": 0.421, "step": 3227 }, { "epoch": 1.07, "learning_rate": 1.4895159529052526e-05, "loss": 0.4343, "step": 3228 }, { "epoch": 1.07, "learning_rate": 1.4892034484567942e-05, "loss": 0.4446, "step": 3229 }, { "epoch": 1.07, "learning_rate": 1.4888908811898836e-05, "loss": 0.4702, "step": 3230 }, { "epoch": 1.07, "learning_rate": 1.4885782511446562e-05, "loss": 0.4919, "step": 3231 }, { "epoch": 1.07, "learning_rate": 1.4882655583612571e-05, "loss": 0.4938, "step": 3232 }, { "epoch": 1.07, "learning_rate": 1.4879528028798394e-05, "loss": 0.45, "step": 3233 }, { "epoch": 1.07, "learning_rate": 1.4876399847405631e-05, "loss": 0.4558, "step": 3234 }, { "epoch": 1.07, "learning_rate": 1.487327103983598e-05, "loss": 0.4614, "step": 3235 }, { "epoch": 1.07, "learning_rate": 1.4870141606491203e-05, "loss": 0.4761, "step": 3236 }, { "epoch": 1.07, "learning_rate": 1.4867011547773158e-05, "loss": 0.5007, "step": 3237 }, { "epoch": 1.07, "learning_rate": 1.4863880864083768e-05, "loss": 0.4683, "step": 3238 }, { "epoch": 1.08, "learning_rate": 1.4860749555825043e-05, "loss": 0.4189, "step": 3239 }, { "epoch": 1.08, "learning_rate": 1.4857617623399074e-05, "loss": 0.4314, "step": 3240 }, { "epoch": 1.08, "learning_rate": 1.4854485067208033e-05, "loss": 0.5005, "step": 3241 }, { "epoch": 1.08, "learning_rate": 1.4851351887654168e-05, "loss": 0.436, "step": 3242 }, { "epoch": 1.08, "learning_rate": 1.4848218085139814e-05, "loss": 0.4631, "step": 3243 }, { "epoch": 1.08, "learning_rate": 1.4845083660067378e-05, "loss": 0.4482, "step": 3244 }, { "epoch": 1.08, "learning_rate": 1.484194861283935e-05, "loss": 0.4458, "step": 3245 }, { "epoch": 1.08, "learning_rate": 1.4838812943858302e-05, "loss": 0.4863, "step": 3246 }, { "epoch": 1.08, "learning_rate": 1.4835676653526887e-05, "loss": 0.4197, "step": 3247 }, { "epoch": 1.08, "learning_rate": 1.4832539742247832e-05, "loss": 0.4683, "step": 3248 }, { "epoch": 1.08, "learning_rate": 1.4829402210423944e-05, "loss": 0.4812, "step": 3249 }, { "epoch": 1.08, "learning_rate": 1.4826264058458113e-05, "loss": 0.3967, "step": 3250 }, { "epoch": 1.08, "learning_rate": 1.4823125286753315e-05, "loss": 0.4033, "step": 3251 }, { "epoch": 1.08, "learning_rate": 1.4819985895712589e-05, "loss": 0.4573, "step": 3252 }, { "epoch": 1.08, "learning_rate": 1.4816845885739071e-05, "loss": 0.4343, "step": 3253 }, { "epoch": 1.08, "learning_rate": 1.4813705257235966e-05, "loss": 0.4741, "step": 3254 }, { "epoch": 1.08, "learning_rate": 1.4810564010606558e-05, "loss": 0.4541, "step": 3255 }, { "epoch": 1.08, "learning_rate": 1.480742214625422e-05, "loss": 0.4673, "step": 3256 }, { "epoch": 1.08, "learning_rate": 1.480427966458239e-05, "loss": 0.5161, "step": 3257 }, { "epoch": 1.08, "learning_rate": 1.48011365659946e-05, "loss": 0.4882, "step": 3258 }, { "epoch": 1.08, "learning_rate": 1.479799285089445e-05, "loss": 0.3876, "step": 3259 }, { "epoch": 1.08, "learning_rate": 1.4794848519685622e-05, "loss": 0.425, "step": 3260 }, { "epoch": 1.08, "learning_rate": 1.4791703572771883e-05, "loss": 0.5198, "step": 3261 }, { "epoch": 1.08, "learning_rate": 1.4788558010557072e-05, "loss": 0.4465, "step": 3262 }, { "epoch": 1.08, "learning_rate": 1.4785411833445112e-05, "loss": 0.5017, "step": 3263 }, { "epoch": 1.08, "learning_rate": 1.4782265041839999e-05, "loss": 0.5012, "step": 3264 }, { "epoch": 1.08, "learning_rate": 1.4779117636145815e-05, "loss": 0.4567, "step": 3265 }, { "epoch": 1.08, "learning_rate": 1.4775969616766716e-05, "loss": 0.4277, "step": 3266 }, { "epoch": 1.08, "learning_rate": 1.4772820984106937e-05, "loss": 0.4595, "step": 3267 }, { "epoch": 1.08, "learning_rate": 1.4769671738570793e-05, "loss": 0.4851, "step": 3268 }, { "epoch": 1.08, "learning_rate": 1.4766521880562676e-05, "loss": 0.467, "step": 3269 }, { "epoch": 1.09, "learning_rate": 1.4763371410487065e-05, "loss": 0.4463, "step": 3270 }, { "epoch": 1.09, "learning_rate": 1.4760220328748504e-05, "loss": 0.448, "step": 3271 }, { "epoch": 1.09, "learning_rate": 1.4757068635751623e-05, "loss": 0.4541, "step": 3272 }, { "epoch": 1.09, "learning_rate": 1.4753916331901134e-05, "loss": 0.3816, "step": 3273 }, { "epoch": 1.09, "learning_rate": 1.4750763417601819e-05, "loss": 0.3759, "step": 3274 }, { "epoch": 1.09, "learning_rate": 1.4747609893258546e-05, "loss": 0.439, "step": 3275 }, { "epoch": 1.09, "learning_rate": 1.4744455759276257e-05, "loss": 0.4041, "step": 3276 }, { "epoch": 1.09, "learning_rate": 1.4741301016059972e-05, "loss": 0.476, "step": 3277 }, { "epoch": 1.09, "learning_rate": 1.4738145664014791e-05, "loss": 0.4641, "step": 3278 }, { "epoch": 1.09, "learning_rate": 1.4734989703545892e-05, "loss": 0.4104, "step": 3279 }, { "epoch": 1.09, "learning_rate": 1.4731833135058531e-05, "loss": 0.5142, "step": 3280 }, { "epoch": 1.09, "learning_rate": 1.4728675958958041e-05, "loss": 0.4456, "step": 3281 }, { "epoch": 1.09, "learning_rate": 1.4725518175649836e-05, "loss": 0.4539, "step": 3282 }, { "epoch": 1.09, "learning_rate": 1.4722359785539402e-05, "loss": 0.4421, "step": 3283 }, { "epoch": 1.09, "learning_rate": 1.4719200789032312e-05, "loss": 0.427, "step": 3284 }, { "epoch": 1.09, "learning_rate": 1.4716041186534209e-05, "loss": 0.4854, "step": 3285 }, { "epoch": 1.09, "learning_rate": 1.4712880978450818e-05, "loss": 0.4194, "step": 3286 }, { "epoch": 1.09, "learning_rate": 1.4709720165187937e-05, "loss": 0.4573, "step": 3287 }, { "epoch": 1.09, "learning_rate": 1.4706558747151446e-05, "loss": 0.3621, "step": 3288 }, { "epoch": 1.09, "learning_rate": 1.4703396724747305e-05, "loss": 0.3911, "step": 3289 }, { "epoch": 1.09, "learning_rate": 1.4700234098381545e-05, "loss": 0.4479, "step": 3290 }, { "epoch": 1.09, "learning_rate": 1.4697070868460277e-05, "loss": 0.4087, "step": 3291 }, { "epoch": 1.09, "learning_rate": 1.4693907035389695e-05, "loss": 0.4478, "step": 3292 }, { "epoch": 1.09, "learning_rate": 1.4690742599576058e-05, "loss": 0.4103, "step": 3293 }, { "epoch": 1.09, "learning_rate": 1.468757756142572e-05, "loss": 0.4545, "step": 3294 }, { "epoch": 1.09, "learning_rate": 1.4684411921345094e-05, "loss": 0.4215, "step": 3295 }, { "epoch": 1.09, "learning_rate": 1.4681245679740683e-05, "loss": 0.4666, "step": 3296 }, { "epoch": 1.09, "learning_rate": 1.4678078837019064e-05, "loss": 0.4053, "step": 3297 }, { "epoch": 1.09, "learning_rate": 1.4674911393586885e-05, "loss": 0.5161, "step": 3298 }, { "epoch": 1.09, "learning_rate": 1.4671743349850882e-05, "loss": 0.426, "step": 3299 }, { "epoch": 1.1, "learning_rate": 1.466857470621786e-05, "loss": 0.5266, "step": 3300 }, { "epoch": 1.1, "learning_rate": 1.4665405463094704e-05, "loss": 0.4055, "step": 3301 }, { "epoch": 1.1, "learning_rate": 1.4662235620888372e-05, "loss": 0.4127, "step": 3302 }, { "epoch": 1.1, "learning_rate": 1.4659065180005908e-05, "loss": 0.4661, "step": 3303 }, { "epoch": 1.1, "learning_rate": 1.465589414085443e-05, "loss": 0.4573, "step": 3304 }, { "epoch": 1.1, "learning_rate": 1.465272250384112e-05, "loss": 0.479, "step": 3305 }, { "epoch": 1.1, "learning_rate": 1.4649550269373251e-05, "loss": 0.4395, "step": 3306 }, { "epoch": 1.1, "learning_rate": 1.4646377437858167e-05, "loss": 0.4622, "step": 3307 }, { "epoch": 1.1, "learning_rate": 1.4643204009703298e-05, "loss": 0.4592, "step": 3308 }, { "epoch": 1.1, "learning_rate": 1.4640029985316136e-05, "loss": 0.4907, "step": 3309 }, { "epoch": 1.1, "learning_rate": 1.4636855365104257e-05, "loss": 0.403, "step": 3310 }, { "epoch": 1.1, "learning_rate": 1.4633680149475313e-05, "loss": 0.4507, "step": 3311 }, { "epoch": 1.1, "learning_rate": 1.4630504338837032e-05, "loss": 0.4504, "step": 3312 }, { "epoch": 1.1, "learning_rate": 1.4627327933597224e-05, "loss": 0.4301, "step": 3313 }, { "epoch": 1.1, "learning_rate": 1.4624150934163765e-05, "loss": 0.47, "step": 3314 }, { "epoch": 1.1, "learning_rate": 1.4620973340944611e-05, "loss": 0.3953, "step": 3315 }, { "epoch": 1.1, "learning_rate": 1.4617795154347803e-05, "loss": 0.5117, "step": 3316 }, { "epoch": 1.1, "learning_rate": 1.4614616374781441e-05, "loss": 0.479, "step": 3317 }, { "epoch": 1.1, "learning_rate": 1.4611437002653719e-05, "loss": 0.4729, "step": 3318 }, { "epoch": 1.1, "learning_rate": 1.4608257038372894e-05, "loss": 0.4283, "step": 3319 }, { "epoch": 1.1, "learning_rate": 1.460507648234731e-05, "loss": 0.4692, "step": 3320 }, { "epoch": 1.1, "learning_rate": 1.4601895334985377e-05, "loss": 0.4849, "step": 3321 }, { "epoch": 1.1, "learning_rate": 1.4598713596695582e-05, "loss": 0.4741, "step": 3322 }, { "epoch": 1.1, "learning_rate": 1.4595531267886498e-05, "loss": 0.405, "step": 3323 }, { "epoch": 1.1, "learning_rate": 1.4592348348966765e-05, "loss": 0.4608, "step": 3324 }, { "epoch": 1.1, "learning_rate": 1.4589164840345096e-05, "loss": 0.3464, "step": 3325 }, { "epoch": 1.1, "learning_rate": 1.4585980742430288e-05, "loss": 0.3657, "step": 3326 }, { "epoch": 1.1, "learning_rate": 1.4582796055631209e-05, "loss": 0.4574, "step": 3327 }, { "epoch": 1.1, "learning_rate": 1.4579610780356803e-05, "loss": 0.4402, "step": 3328 }, { "epoch": 1.1, "learning_rate": 1.4576424917016092e-05, "loss": 0.3854, "step": 3329 }, { "epoch": 1.11, "learning_rate": 1.4573238466018173e-05, "loss": 0.4695, "step": 3330 }, { "epoch": 1.11, "learning_rate": 1.4570051427772213e-05, "loss": 0.4775, "step": 3331 }, { "epoch": 1.11, "learning_rate": 1.4566863802687459e-05, "loss": 0.4641, "step": 3332 }, { "epoch": 1.11, "learning_rate": 1.4563675591173236e-05, "loss": 0.4939, "step": 3333 }, { "epoch": 1.11, "learning_rate": 1.4560486793638942e-05, "loss": 0.4607, "step": 3334 }, { "epoch": 1.11, "learning_rate": 1.455729741049404e-05, "loss": 0.4573, "step": 3335 }, { "epoch": 1.11, "learning_rate": 1.4554107442148089e-05, "loss": 0.5149, "step": 3336 }, { "epoch": 1.11, "learning_rate": 1.4550916889010706e-05, "loss": 0.3324, "step": 3337 }, { "epoch": 1.11, "learning_rate": 1.4547725751491591e-05, "loss": 0.5178, "step": 3338 }, { "epoch": 1.11, "learning_rate": 1.4544534030000515e-05, "loss": 0.4578, "step": 3339 }, { "epoch": 1.11, "learning_rate": 1.4541341724947327e-05, "loss": 0.4204, "step": 3340 }, { "epoch": 1.11, "learning_rate": 1.453814883674195e-05, "loss": 0.4526, "step": 3341 }, { "epoch": 1.11, "learning_rate": 1.4534955365794383e-05, "loss": 0.4702, "step": 3342 }, { "epoch": 1.11, "learning_rate": 1.4531761312514693e-05, "loss": 0.4673, "step": 3343 }, { "epoch": 1.11, "learning_rate": 1.4528566677313036e-05, "loss": 0.4556, "step": 3344 }, { "epoch": 1.11, "learning_rate": 1.4525371460599626e-05, "loss": 0.4497, "step": 3345 }, { "epoch": 1.11, "learning_rate": 1.4522175662784765e-05, "loss": 0.4629, "step": 3346 }, { "epoch": 1.11, "learning_rate": 1.451897928427882e-05, "loss": 0.4492, "step": 3347 }, { "epoch": 1.11, "learning_rate": 1.4515782325492242e-05, "loss": 0.4655, "step": 3348 }, { "epoch": 1.11, "learning_rate": 1.4512584786835549e-05, "loss": 0.5044, "step": 3349 }, { "epoch": 1.11, "learning_rate": 1.4509386668719334e-05, "loss": 0.4382, "step": 3350 }, { "epoch": 1.11, "learning_rate": 1.4506187971554271e-05, "loss": 0.4568, "step": 3351 }, { "epoch": 1.11, "learning_rate": 1.45029886957511e-05, "loss": 0.4757, "step": 3352 }, { "epoch": 1.11, "learning_rate": 1.4499788841720635e-05, "loss": 0.407, "step": 3353 }, { "epoch": 1.11, "learning_rate": 1.4496588409873778e-05, "loss": 0.5217, "step": 3354 }, { "epoch": 1.11, "learning_rate": 1.4493387400621485e-05, "loss": 0.4404, "step": 3355 }, { "epoch": 1.11, "learning_rate": 1.4490185814374804e-05, "loss": 0.4949, "step": 3356 }, { "epoch": 1.11, "learning_rate": 1.4486983651544848e-05, "loss": 0.4672, "step": 3357 }, { "epoch": 1.11, "learning_rate": 1.4483780912542806e-05, "loss": 0.4059, "step": 3358 }, { "epoch": 1.11, "learning_rate": 1.4480577597779938e-05, "loss": 0.4944, "step": 3359 }, { "epoch": 1.12, "learning_rate": 1.4477373707667581e-05, "loss": 0.4678, "step": 3360 }, { "epoch": 1.12, "learning_rate": 1.4474169242617151e-05, "loss": 0.4454, "step": 3361 }, { "epoch": 1.12, "learning_rate": 1.4470964203040126e-05, "loss": 0.3821, "step": 3362 }, { "epoch": 1.12, "learning_rate": 1.4467758589348065e-05, "loss": 0.4551, "step": 3363 }, { "epoch": 1.12, "learning_rate": 1.4464552401952601e-05, "loss": 0.4613, "step": 3364 }, { "epoch": 1.12, "learning_rate": 1.446134564126544e-05, "loss": 0.4658, "step": 3365 }, { "epoch": 1.12, "learning_rate": 1.4458138307698359e-05, "loss": 0.4275, "step": 3366 }, { "epoch": 1.12, "learning_rate": 1.4454930401663215e-05, "loss": 0.4043, "step": 3367 }, { "epoch": 1.12, "learning_rate": 1.4451721923571929e-05, "loss": 0.51, "step": 3368 }, { "epoch": 1.12, "learning_rate": 1.4448512873836502e-05, "loss": 0.4268, "step": 3369 }, { "epoch": 1.12, "learning_rate": 1.444530325286901e-05, "loss": 0.4517, "step": 3370 }, { "epoch": 1.12, "learning_rate": 1.4442093061081597e-05, "loss": 0.318, "step": 3371 }, { "epoch": 1.12, "learning_rate": 1.4438882298886483e-05, "loss": 0.4863, "step": 3372 }, { "epoch": 1.12, "learning_rate": 1.4435670966695957e-05, "loss": 0.4543, "step": 3373 }, { "epoch": 1.12, "learning_rate": 1.4432459064922393e-05, "loss": 0.3733, "step": 3374 }, { "epoch": 1.12, "learning_rate": 1.4429246593978225e-05, "loss": 0.3947, "step": 3375 }, { "epoch": 1.12, "learning_rate": 1.4426033554275966e-05, "loss": 0.4238, "step": 3376 }, { "epoch": 1.12, "learning_rate": 1.4422819946228202e-05, "loss": 0.4836, "step": 3377 }, { "epoch": 1.12, "learning_rate": 1.4419605770247591e-05, "loss": 0.4353, "step": 3378 }, { "epoch": 1.12, "learning_rate": 1.4416391026746866e-05, "loss": 0.4233, "step": 3379 }, { "epoch": 1.12, "learning_rate": 1.441317571613883e-05, "loss": 0.4487, "step": 3380 }, { "epoch": 1.12, "learning_rate": 1.4409959838836356e-05, "loss": 0.4182, "step": 3381 }, { "epoch": 1.12, "learning_rate": 1.44067433952524e-05, "loss": 0.4369, "step": 3382 }, { "epoch": 1.12, "learning_rate": 1.4403526385799982e-05, "loss": 0.4099, "step": 3383 }, { "epoch": 1.12, "learning_rate": 1.4400308810892195e-05, "loss": 0.4778, "step": 3384 }, { "epoch": 1.12, "learning_rate": 1.4397090670942208e-05, "loss": 0.4253, "step": 3385 }, { "epoch": 1.12, "learning_rate": 1.4393871966363264e-05, "loss": 0.4834, "step": 3386 }, { "epoch": 1.12, "learning_rate": 1.4390652697568674e-05, "loss": 0.4949, "step": 3387 }, { "epoch": 1.12, "learning_rate": 1.4387432864971822e-05, "loss": 0.4458, "step": 3388 }, { "epoch": 1.12, "learning_rate": 1.4384212468986168e-05, "loss": 0.4622, "step": 3389 }, { "epoch": 1.13, "learning_rate": 1.438099151002524e-05, "loss": 0.4819, "step": 3390 }, { "epoch": 1.13, "learning_rate": 1.4377769988502638e-05, "loss": 0.4714, "step": 3391 }, { "epoch": 1.13, "learning_rate": 1.4374547904832044e-05, "loss": 0.4482, "step": 3392 }, { "epoch": 1.13, "learning_rate": 1.4371325259427199e-05, "loss": 0.3691, "step": 3393 }, { "epoch": 1.13, "learning_rate": 1.4368102052701918e-05, "loss": 0.5032, "step": 3394 }, { "epoch": 1.13, "learning_rate": 1.43648782850701e-05, "loss": 0.4187, "step": 3395 }, { "epoch": 1.13, "learning_rate": 1.4361653956945708e-05, "loss": 0.5552, "step": 3396 }, { "epoch": 1.13, "learning_rate": 1.4358429068742767e-05, "loss": 0.4224, "step": 3397 }, { "epoch": 1.13, "learning_rate": 1.4355203620875395e-05, "loss": 0.5183, "step": 3398 }, { "epoch": 1.13, "learning_rate": 1.4351977613757766e-05, "loss": 0.337, "step": 3399 }, { "epoch": 1.13, "learning_rate": 1.434875104780413e-05, "loss": 0.4221, "step": 3400 }, { "epoch": 1.13, "learning_rate": 1.434552392342881e-05, "loss": 0.4731, "step": 3401 }, { "epoch": 1.13, "learning_rate": 1.4342296241046199e-05, "loss": 0.4717, "step": 3402 }, { "epoch": 1.13, "learning_rate": 1.4339068001070765e-05, "loss": 0.4309, "step": 3403 }, { "epoch": 1.13, "learning_rate": 1.4335839203917044e-05, "loss": 0.5364, "step": 3404 }, { "epoch": 1.13, "learning_rate": 1.4332609849999641e-05, "loss": 0.4255, "step": 3405 }, { "epoch": 1.13, "learning_rate": 1.4329379939733244e-05, "loss": 0.5134, "step": 3406 }, { "epoch": 1.13, "learning_rate": 1.4326149473532598e-05, "loss": 0.4851, "step": 3407 }, { "epoch": 1.13, "learning_rate": 1.4322918451812528e-05, "loss": 0.4434, "step": 3408 }, { "epoch": 1.13, "learning_rate": 1.4319686874987933e-05, "loss": 0.4324, "step": 3409 }, { "epoch": 1.13, "learning_rate": 1.4316454743473773e-05, "loss": 0.373, "step": 3410 }, { "epoch": 1.13, "learning_rate": 1.4313222057685086e-05, "loss": 0.4587, "step": 3411 }, { "epoch": 1.13, "learning_rate": 1.4309988818036979e-05, "loss": 0.4133, "step": 3412 }, { "epoch": 1.13, "learning_rate": 1.4306755024944636e-05, "loss": 0.4517, "step": 3413 }, { "epoch": 1.13, "learning_rate": 1.4303520678823301e-05, "loss": 0.4368, "step": 3414 }, { "epoch": 1.13, "learning_rate": 1.4300285780088301e-05, "loss": 0.5518, "step": 3415 }, { "epoch": 1.13, "learning_rate": 1.4297050329155027e-05, "loss": 0.4829, "step": 3416 }, { "epoch": 1.13, "learning_rate": 1.429381432643894e-05, "loss": 0.4266, "step": 3417 }, { "epoch": 1.13, "learning_rate": 1.4290577772355575e-05, "loss": 0.4351, "step": 3418 }, { "epoch": 1.13, "learning_rate": 1.4287340667320537e-05, "loss": 0.4712, "step": 3419 }, { "epoch": 1.14, "learning_rate": 1.4284103011749503e-05, "loss": 0.4263, "step": 3420 }, { "epoch": 1.14, "learning_rate": 1.4280864806058217e-05, "loss": 0.5454, "step": 3421 }, { "epoch": 1.14, "learning_rate": 1.4277626050662494e-05, "loss": 0.4351, "step": 3422 }, { "epoch": 1.14, "learning_rate": 1.4274386745978225e-05, "loss": 0.4104, "step": 3423 }, { "epoch": 1.14, "learning_rate": 1.4271146892421373e-05, "loss": 0.4258, "step": 3424 }, { "epoch": 1.14, "learning_rate": 1.4267906490407954e-05, "loss": 0.5271, "step": 3425 }, { "epoch": 1.14, "learning_rate": 1.4264665540354077e-05, "loss": 0.4988, "step": 3426 }, { "epoch": 1.14, "learning_rate": 1.426142404267591e-05, "loss": 0.4551, "step": 3427 }, { "epoch": 1.14, "learning_rate": 1.4258181997789689e-05, "loss": 0.4724, "step": 3428 }, { "epoch": 1.14, "learning_rate": 1.4254939406111723e-05, "loss": 0.4366, "step": 3429 }, { "epoch": 1.14, "learning_rate": 1.42516962680584e-05, "loss": 0.4812, "step": 3430 }, { "epoch": 1.14, "learning_rate": 1.4248452584046159e-05, "loss": 0.4958, "step": 3431 }, { "epoch": 1.14, "learning_rate": 1.4245208354491529e-05, "loss": 0.448, "step": 3432 }, { "epoch": 1.14, "learning_rate": 1.4241963579811097e-05, "loss": 0.4221, "step": 3433 }, { "epoch": 1.14, "learning_rate": 1.4238718260421529e-05, "loss": 0.3525, "step": 3434 }, { "epoch": 1.14, "learning_rate": 1.4235472396739548e-05, "loss": 0.4424, "step": 3435 }, { "epoch": 1.14, "learning_rate": 1.4232225989181956e-05, "loss": 0.4492, "step": 3436 }, { "epoch": 1.14, "learning_rate": 1.4228979038165624e-05, "loss": 0.4412, "step": 3437 }, { "epoch": 1.14, "learning_rate": 1.4225731544107495e-05, "loss": 0.4395, "step": 3438 }, { "epoch": 1.14, "learning_rate": 1.4222483507424574e-05, "loss": 0.3635, "step": 3439 }, { "epoch": 1.14, "learning_rate": 1.421923492853394e-05, "loss": 0.4873, "step": 3440 }, { "epoch": 1.14, "learning_rate": 1.4215985807852745e-05, "loss": 0.4812, "step": 3441 }, { "epoch": 1.14, "learning_rate": 1.4212736145798208e-05, "loss": 0.4648, "step": 3442 }, { "epoch": 1.14, "learning_rate": 1.4209485942787615e-05, "loss": 0.447, "step": 3443 }, { "epoch": 1.14, "learning_rate": 1.4206235199238325e-05, "loss": 0.5015, "step": 3444 }, { "epoch": 1.14, "learning_rate": 1.4202983915567763e-05, "loss": 0.478, "step": 3445 }, { "epoch": 1.14, "learning_rate": 1.4199732092193424e-05, "loss": 0.4625, "step": 3446 }, { "epoch": 1.14, "learning_rate": 1.4196479729532879e-05, "loss": 0.4155, "step": 3447 }, { "epoch": 1.14, "learning_rate": 1.4193226828003759e-05, "loss": 0.4683, "step": 3448 }, { "epoch": 1.14, "learning_rate": 1.4189973388023768e-05, "loss": 0.436, "step": 3449 }, { "epoch": 1.15, "learning_rate": 1.4186719410010677e-05, "loss": 0.4507, "step": 3450 }, { "epoch": 1.15, "learning_rate": 1.4183464894382335e-05, "loss": 0.4006, "step": 3451 }, { "epoch": 1.15, "learning_rate": 1.4180209841556647e-05, "loss": 0.4214, "step": 3452 }, { "epoch": 1.15, "learning_rate": 1.4176954251951596e-05, "loss": 0.4675, "step": 3453 }, { "epoch": 1.15, "learning_rate": 1.4173698125985232e-05, "loss": 0.4236, "step": 3454 }, { "epoch": 1.15, "learning_rate": 1.417044146407567e-05, "loss": 0.3959, "step": 3455 }, { "epoch": 1.15, "learning_rate": 1.4167184266641098e-05, "loss": 0.4178, "step": 3456 }, { "epoch": 1.15, "learning_rate": 1.4163926534099774e-05, "loss": 0.4214, "step": 3457 }, { "epoch": 1.15, "learning_rate": 1.4160668266870023e-05, "loss": 0.4009, "step": 3458 }, { "epoch": 1.15, "learning_rate": 1.415740946537023e-05, "loss": 0.4197, "step": 3459 }, { "epoch": 1.15, "learning_rate": 1.4154150130018867e-05, "loss": 0.4163, "step": 3460 }, { "epoch": 1.15, "learning_rate": 1.4150890261234456e-05, "loss": 0.4286, "step": 3461 }, { "epoch": 1.15, "learning_rate": 1.4147629859435601e-05, "loss": 0.4309, "step": 3462 }, { "epoch": 1.15, "learning_rate": 1.4144368925040965e-05, "loss": 0.4081, "step": 3463 }, { "epoch": 1.15, "learning_rate": 1.4141107458469288e-05, "loss": 0.4386, "step": 3464 }, { "epoch": 1.15, "learning_rate": 1.4137845460139371e-05, "loss": 0.5227, "step": 3465 }, { "epoch": 1.15, "learning_rate": 1.413458293047009e-05, "loss": 0.4634, "step": 3466 }, { "epoch": 1.15, "learning_rate": 1.4131319869880377e-05, "loss": 0.5256, "step": 3467 }, { "epoch": 1.15, "learning_rate": 1.4128056278789245e-05, "loss": 0.441, "step": 3468 }, { "epoch": 1.15, "learning_rate": 1.4124792157615776e-05, "loss": 0.4224, "step": 3469 }, { "epoch": 1.15, "learning_rate": 1.4121527506779108e-05, "loss": 0.5139, "step": 3470 }, { "epoch": 1.15, "learning_rate": 1.4118262326698456e-05, "loss": 0.5293, "step": 3471 }, { "epoch": 1.15, "learning_rate": 1.41149966177931e-05, "loss": 0.4144, "step": 3472 }, { "epoch": 1.15, "learning_rate": 1.4111730380482387e-05, "loss": 0.416, "step": 3473 }, { "epoch": 1.15, "learning_rate": 1.4108463615185736e-05, "loss": 0.4509, "step": 3474 }, { "epoch": 1.15, "learning_rate": 1.4105196322322634e-05, "loss": 0.457, "step": 3475 }, { "epoch": 1.15, "learning_rate": 1.410192850231263e-05, "loss": 0.4302, "step": 3476 }, { "epoch": 1.15, "learning_rate": 1.409866015557534e-05, "loss": 0.369, "step": 3477 }, { "epoch": 1.15, "learning_rate": 1.4095391282530453e-05, "loss": 0.429, "step": 3478 }, { "epoch": 1.15, "learning_rate": 1.409212188359773e-05, "loss": 0.4128, "step": 3479 }, { "epoch": 1.15, "learning_rate": 1.4088851959196987e-05, "loss": 0.4868, "step": 3480 }, { "epoch": 1.16, "learning_rate": 1.4085581509748116e-05, "loss": 0.4233, "step": 3481 }, { "epoch": 1.16, "learning_rate": 1.4082310535671076e-05, "loss": 0.4846, "step": 3482 }, { "epoch": 1.16, "learning_rate": 1.4079039037385886e-05, "loss": 0.4407, "step": 3483 }, { "epoch": 1.16, "learning_rate": 1.4075767015312644e-05, "loss": 0.4648, "step": 3484 }, { "epoch": 1.16, "learning_rate": 1.4072494469871512e-05, "loss": 0.491, "step": 3485 }, { "epoch": 1.16, "learning_rate": 1.4069221401482707e-05, "loss": 0.3967, "step": 3486 }, { "epoch": 1.16, "learning_rate": 1.4065947810566528e-05, "loss": 0.4387, "step": 3487 }, { "epoch": 1.16, "learning_rate": 1.4062673697543334e-05, "loss": 0.3574, "step": 3488 }, { "epoch": 1.16, "learning_rate": 1.4059399062833556e-05, "loss": 0.4187, "step": 3489 }, { "epoch": 1.16, "learning_rate": 1.4056123906857684e-05, "loss": 0.4327, "step": 3490 }, { "epoch": 1.16, "learning_rate": 1.4052848230036285e-05, "loss": 0.479, "step": 3491 }, { "epoch": 1.16, "learning_rate": 1.4049572032789982e-05, "loss": 0.4773, "step": 3492 }, { "epoch": 1.16, "learning_rate": 1.4046295315539478e-05, "loss": 0.4308, "step": 3493 }, { "epoch": 1.16, "learning_rate": 1.4043018078705526e-05, "loss": 0.417, "step": 3494 }, { "epoch": 1.16, "learning_rate": 1.4039740322708961e-05, "loss": 0.4446, "step": 3495 }, { "epoch": 1.16, "learning_rate": 1.403646204797068e-05, "loss": 0.489, "step": 3496 }, { "epoch": 1.16, "learning_rate": 1.4033183254911638e-05, "loss": 0.4143, "step": 3497 }, { "epoch": 1.16, "learning_rate": 1.4029903943952867e-05, "loss": 0.3906, "step": 3498 }, { "epoch": 1.16, "learning_rate": 1.4026624115515466e-05, "loss": 0.4714, "step": 3499 }, { "epoch": 1.16, "learning_rate": 1.4023343770020594e-05, "loss": 0.4282, "step": 3500 }, { "epoch": 1.16, "learning_rate": 1.4020062907889479e-05, "loss": 0.4612, "step": 3501 }, { "epoch": 1.16, "learning_rate": 1.4016781529543414e-05, "loss": 0.4189, "step": 3502 }, { "epoch": 1.16, "learning_rate": 1.401349963540376e-05, "loss": 0.4551, "step": 3503 }, { "epoch": 1.16, "learning_rate": 1.4010217225891947e-05, "loss": 0.5361, "step": 3504 }, { "epoch": 1.16, "learning_rate": 1.4006934301429464e-05, "loss": 0.4634, "step": 3505 }, { "epoch": 1.16, "learning_rate": 1.4003650862437874e-05, "loss": 0.4182, "step": 3506 }, { "epoch": 1.16, "learning_rate": 1.4000366909338801e-05, "loss": 0.438, "step": 3507 }, { "epoch": 1.16, "learning_rate": 1.3997082442553933e-05, "loss": 0.3804, "step": 3508 }, { "epoch": 1.16, "learning_rate": 1.3993797462505032e-05, "loss": 0.4088, "step": 3509 }, { "epoch": 1.16, "learning_rate": 1.3990511969613922e-05, "loss": 0.3148, "step": 3510 }, { "epoch": 1.17, "learning_rate": 1.3987225964302486e-05, "loss": 0.4448, "step": 3511 }, { "epoch": 1.17, "learning_rate": 1.3983939446992683e-05, "loss": 0.5054, "step": 3512 }, { "epoch": 1.17, "learning_rate": 1.3980652418106535e-05, "loss": 0.4392, "step": 3513 }, { "epoch": 1.17, "learning_rate": 1.3977364878066126e-05, "loss": 0.4519, "step": 3514 }, { "epoch": 1.17, "learning_rate": 1.3974076827293605e-05, "loss": 0.4457, "step": 3515 }, { "epoch": 1.17, "learning_rate": 1.3970788266211196e-05, "loss": 0.4597, "step": 3516 }, { "epoch": 1.17, "learning_rate": 1.3967499195241176e-05, "loss": 0.531, "step": 3517 }, { "epoch": 1.17, "learning_rate": 1.3964209614805899e-05, "loss": 0.4305, "step": 3518 }, { "epoch": 1.17, "learning_rate": 1.3960919525327775e-05, "loss": 0.4915, "step": 3519 }, { "epoch": 1.17, "learning_rate": 1.3957628927229285e-05, "loss": 0.4608, "step": 3520 }, { "epoch": 1.17, "learning_rate": 1.3954337820932971e-05, "loss": 0.3608, "step": 3521 }, { "epoch": 1.17, "learning_rate": 1.3951046206861444e-05, "loss": 0.4529, "step": 3522 }, { "epoch": 1.17, "learning_rate": 1.3947754085437386e-05, "loss": 0.4785, "step": 3523 }, { "epoch": 1.17, "learning_rate": 1.3944461457083529e-05, "loss": 0.4017, "step": 3524 }, { "epoch": 1.17, "learning_rate": 1.3941168322222679e-05, "loss": 0.4958, "step": 3525 }, { "epoch": 1.17, "learning_rate": 1.3937874681277705e-05, "loss": 0.48, "step": 3526 }, { "epoch": 1.17, "learning_rate": 1.3934580534671551e-05, "loss": 0.4834, "step": 3527 }, { "epoch": 1.17, "learning_rate": 1.3931285882827208e-05, "loss": 0.4194, "step": 3528 }, { "epoch": 1.17, "learning_rate": 1.3927990726167746e-05, "loss": 0.468, "step": 3529 }, { "epoch": 1.17, "learning_rate": 1.3924695065116293e-05, "loss": 0.5054, "step": 3530 }, { "epoch": 1.17, "learning_rate": 1.3921398900096046e-05, "loss": 0.4976, "step": 3531 }, { "epoch": 1.17, "learning_rate": 1.3918102231530261e-05, "loss": 0.3925, "step": 3532 }, { "epoch": 1.17, "learning_rate": 1.3914805059842268e-05, "loss": 0.3524, "step": 3533 }, { "epoch": 1.17, "learning_rate": 1.391150738545545e-05, "loss": 0.4099, "step": 3534 }, { "epoch": 1.17, "learning_rate": 1.390820920879326e-05, "loss": 0.4697, "step": 3535 }, { "epoch": 1.17, "learning_rate": 1.3904910530279217e-05, "loss": 0.4414, "step": 3536 }, { "epoch": 1.17, "learning_rate": 1.3901611350336906e-05, "loss": 0.4403, "step": 3537 }, { "epoch": 1.17, "learning_rate": 1.389831166938997e-05, "loss": 0.4834, "step": 3538 }, { "epoch": 1.17, "learning_rate": 1.3895011487862123e-05, "loss": 0.4802, "step": 3539 }, { "epoch": 1.17, "learning_rate": 1.3891710806177137e-05, "loss": 0.4358, "step": 3540 }, { "epoch": 1.18, "learning_rate": 1.3888409624758856e-05, "loss": 0.3833, "step": 3541 }, { "epoch": 1.18, "learning_rate": 1.3885107944031182e-05, "loss": 0.4575, "step": 3542 }, { "epoch": 1.18, "learning_rate": 1.3881805764418078e-05, "loss": 0.4568, "step": 3543 }, { "epoch": 1.18, "learning_rate": 1.387850308634358e-05, "loss": 0.4893, "step": 3544 }, { "epoch": 1.18, "learning_rate": 1.3875199910231784e-05, "loss": 0.3625, "step": 3545 }, { "epoch": 1.18, "learning_rate": 1.3871896236506848e-05, "loss": 0.3907, "step": 3546 }, { "epoch": 1.18, "learning_rate": 1.3868592065592998e-05, "loss": 0.4758, "step": 3547 }, { "epoch": 1.18, "learning_rate": 1.386528739791452e-05, "loss": 0.3967, "step": 3548 }, { "epoch": 1.18, "learning_rate": 1.3861982233895763e-05, "loss": 0.5012, "step": 3549 }, { "epoch": 1.18, "learning_rate": 1.3858676573961147e-05, "loss": 0.4043, "step": 3550 }, { "epoch": 1.18, "learning_rate": 1.3855370418535148e-05, "loss": 0.4316, "step": 3551 }, { "epoch": 1.18, "learning_rate": 1.3852063768042308e-05, "loss": 0.4585, "step": 3552 }, { "epoch": 1.18, "learning_rate": 1.3848756622907233e-05, "loss": 0.4172, "step": 3553 }, { "epoch": 1.18, "learning_rate": 1.384544898355459e-05, "loss": 0.3977, "step": 3554 }, { "epoch": 1.18, "learning_rate": 1.3842140850409121e-05, "loss": 0.4514, "step": 3555 }, { "epoch": 1.18, "learning_rate": 1.383883222389561e-05, "loss": 0.4827, "step": 3556 }, { "epoch": 1.18, "learning_rate": 1.3835523104438923e-05, "loss": 0.4956, "step": 3557 }, { "epoch": 1.18, "learning_rate": 1.3832213492463987e-05, "loss": 0.3552, "step": 3558 }, { "epoch": 1.18, "learning_rate": 1.3828903388395778e-05, "loss": 0.4558, "step": 3559 }, { "epoch": 1.18, "learning_rate": 1.3825592792659353e-05, "loss": 0.4556, "step": 3560 }, { "epoch": 1.18, "learning_rate": 1.3822281705679826e-05, "loss": 0.4636, "step": 3561 }, { "epoch": 1.18, "learning_rate": 1.3818970127882365e-05, "loss": 0.439, "step": 3562 }, { "epoch": 1.18, "learning_rate": 1.3815658059692213e-05, "loss": 0.4441, "step": 3563 }, { "epoch": 1.18, "learning_rate": 1.3812345501534672e-05, "loss": 0.39, "step": 3564 }, { "epoch": 1.18, "learning_rate": 1.3809032453835107e-05, "loss": 0.4333, "step": 3565 }, { "epoch": 1.18, "learning_rate": 1.3805718917018943e-05, "loss": 0.4272, "step": 3566 }, { "epoch": 1.18, "learning_rate": 1.380240489151167e-05, "loss": 0.4207, "step": 3567 }, { "epoch": 1.18, "learning_rate": 1.3799090377738841e-05, "loss": 0.4001, "step": 3568 }, { "epoch": 1.18, "learning_rate": 1.3795775376126077e-05, "loss": 0.4685, "step": 3569 }, { "epoch": 1.18, "learning_rate": 1.379245988709905e-05, "loss": 0.4746, "step": 3570 }, { "epoch": 1.19, "learning_rate": 1.3789143911083504e-05, "loss": 0.4524, "step": 3571 }, { "epoch": 1.19, "learning_rate": 1.3785827448505243e-05, "loss": 0.4385, "step": 3572 }, { "epoch": 1.19, "learning_rate": 1.3782510499790127e-05, "loss": 0.4336, "step": 3573 }, { "epoch": 1.19, "learning_rate": 1.3779193065364089e-05, "loss": 0.4758, "step": 3574 }, { "epoch": 1.19, "learning_rate": 1.3775875145653119e-05, "loss": 0.3862, "step": 3575 }, { "epoch": 1.19, "learning_rate": 1.3772556741083273e-05, "loss": 0.4888, "step": 3576 }, { "epoch": 1.19, "learning_rate": 1.3769237852080662e-05, "loss": 0.3981, "step": 3577 }, { "epoch": 1.19, "learning_rate": 1.3765918479071463e-05, "loss": 0.3799, "step": 3578 }, { "epoch": 1.19, "learning_rate": 1.376259862248192e-05, "loss": 0.4132, "step": 3579 }, { "epoch": 1.19, "learning_rate": 1.3759278282738331e-05, "loss": 0.4551, "step": 3580 }, { "epoch": 1.19, "learning_rate": 1.375595746026706e-05, "loss": 0.47, "step": 3581 }, { "epoch": 1.19, "learning_rate": 1.3752636155494534e-05, "loss": 0.4365, "step": 3582 }, { "epoch": 1.19, "learning_rate": 1.3749314368847242e-05, "loss": 0.4832, "step": 3583 }, { "epoch": 1.19, "learning_rate": 1.3745992100751728e-05, "loss": 0.3873, "step": 3584 }, { "epoch": 1.19, "learning_rate": 1.374266935163461e-05, "loss": 0.4583, "step": 3585 }, { "epoch": 1.19, "learning_rate": 1.3739346121922556e-05, "loss": 0.4584, "step": 3586 }, { "epoch": 1.19, "learning_rate": 1.3736022412042305e-05, "loss": 0.5239, "step": 3587 }, { "epoch": 1.19, "learning_rate": 1.3732698222420648e-05, "loss": 0.4402, "step": 3588 }, { "epoch": 1.19, "learning_rate": 1.372937355348445e-05, "loss": 0.434, "step": 3589 }, { "epoch": 1.19, "learning_rate": 1.3726048405660628e-05, "loss": 0.3835, "step": 3590 }, { "epoch": 1.19, "learning_rate": 1.372272277937616e-05, "loss": 0.3655, "step": 3591 }, { "epoch": 1.19, "learning_rate": 1.3719396675058091e-05, "loss": 0.4468, "step": 3592 }, { "epoch": 1.19, "learning_rate": 1.3716070093133526e-05, "loss": 0.4744, "step": 3593 }, { "epoch": 1.19, "learning_rate": 1.3712743034029628e-05, "loss": 0.459, "step": 3594 }, { "epoch": 1.19, "learning_rate": 1.3709415498173624e-05, "loss": 0.5012, "step": 3595 }, { "epoch": 1.19, "learning_rate": 1.3706087485992806e-05, "loss": 0.3718, "step": 3596 }, { "epoch": 1.19, "learning_rate": 1.3702758997914517e-05, "loss": 0.4692, "step": 3597 }, { "epoch": 1.19, "learning_rate": 1.3699430034366166e-05, "loss": 0.4597, "step": 3598 }, { "epoch": 1.19, "learning_rate": 1.3696100595775232e-05, "loss": 0.4817, "step": 3599 }, { "epoch": 1.19, "learning_rate": 1.3692770682569242e-05, "loss": 0.4451, "step": 3600 }, { "epoch": 1.2, "learning_rate": 1.3689440295175789e-05, "loss": 0.4907, "step": 3601 }, { "epoch": 1.2, "learning_rate": 1.3686109434022527e-05, "loss": 0.4587, "step": 3602 }, { "epoch": 1.2, "learning_rate": 1.3682778099537175e-05, "loss": 0.4407, "step": 3603 }, { "epoch": 1.2, "learning_rate": 1.36794462921475e-05, "loss": 0.4688, "step": 3604 }, { "epoch": 1.2, "learning_rate": 1.3676114012281345e-05, "loss": 0.4565, "step": 3605 }, { "epoch": 1.2, "learning_rate": 1.3672781260366605e-05, "loss": 0.4434, "step": 3606 }, { "epoch": 1.2, "learning_rate": 1.3669448036831242e-05, "loss": 0.4412, "step": 3607 }, { "epoch": 1.2, "learning_rate": 1.3666114342103264e-05, "loss": 0.3691, "step": 3608 }, { "epoch": 1.2, "learning_rate": 1.366278017661076e-05, "loss": 0.4365, "step": 3609 }, { "epoch": 1.2, "learning_rate": 1.3659445540781864e-05, "loss": 0.446, "step": 3610 }, { "epoch": 1.2, "learning_rate": 1.3656110435044774e-05, "loss": 0.4333, "step": 3611 }, { "epoch": 1.2, "learning_rate": 1.3652774859827754e-05, "loss": 0.3779, "step": 3612 }, { "epoch": 1.2, "learning_rate": 1.364943881555912e-05, "loss": 0.4282, "step": 3613 }, { "epoch": 1.2, "learning_rate": 1.3646102302667258e-05, "loss": 0.3989, "step": 3614 }, { "epoch": 1.2, "learning_rate": 1.36427653215806e-05, "loss": 0.4536, "step": 3615 }, { "epoch": 1.2, "learning_rate": 1.3639427872727654e-05, "loss": 0.4915, "step": 3616 }, { "epoch": 1.2, "learning_rate": 1.3636089956536977e-05, "loss": 0.4727, "step": 3617 }, { "epoch": 1.2, "learning_rate": 1.3632751573437192e-05, "loss": 0.4817, "step": 3618 }, { "epoch": 1.2, "learning_rate": 1.362941272385698e-05, "loss": 0.4639, "step": 3619 }, { "epoch": 1.2, "learning_rate": 1.362607340822508e-05, "loss": 0.4583, "step": 3620 }, { "epoch": 1.2, "learning_rate": 1.3622733626970292e-05, "loss": 0.3918, "step": 3621 }, { "epoch": 1.2, "learning_rate": 1.3619393380521477e-05, "loss": 0.4634, "step": 3622 }, { "epoch": 1.2, "learning_rate": 1.3616052669307553e-05, "loss": 0.439, "step": 3623 }, { "epoch": 1.2, "learning_rate": 1.3612711493757505e-05, "loss": 0.3877, "step": 3624 }, { "epoch": 1.2, "learning_rate": 1.3609369854300367e-05, "loss": 0.429, "step": 3625 }, { "epoch": 1.2, "learning_rate": 1.3606027751365238e-05, "loss": 0.4534, "step": 3626 }, { "epoch": 1.2, "learning_rate": 1.3602685185381282e-05, "loss": 0.4731, "step": 3627 }, { "epoch": 1.2, "learning_rate": 1.3599342156777708e-05, "loss": 0.4351, "step": 3628 }, { "epoch": 1.2, "learning_rate": 1.3595998665983801e-05, "loss": 0.3514, "step": 3629 }, { "epoch": 1.2, "learning_rate": 1.3592654713428892e-05, "loss": 0.3872, "step": 3630 }, { "epoch": 1.21, "learning_rate": 1.358931029954238e-05, "loss": 0.4529, "step": 3631 }, { "epoch": 1.21, "learning_rate": 1.358596542475372e-05, "loss": 0.3961, "step": 3632 }, { "epoch": 1.21, "learning_rate": 1.3582620089492425e-05, "loss": 0.4543, "step": 3633 }, { "epoch": 1.21, "learning_rate": 1.3579274294188069e-05, "loss": 0.4055, "step": 3634 }, { "epoch": 1.21, "learning_rate": 1.3575928039270284e-05, "loss": 0.4697, "step": 3635 }, { "epoch": 1.21, "learning_rate": 1.3572581325168761e-05, "loss": 0.4604, "step": 3636 }, { "epoch": 1.21, "learning_rate": 1.3569234152313255e-05, "loss": 0.4553, "step": 3637 }, { "epoch": 1.21, "learning_rate": 1.3565886521133572e-05, "loss": 0.4014, "step": 3638 }, { "epoch": 1.21, "learning_rate": 1.3562538432059573e-05, "loss": 0.4302, "step": 3639 }, { "epoch": 1.21, "learning_rate": 1.3559189885521198e-05, "loss": 0.4695, "step": 3640 }, { "epoch": 1.21, "learning_rate": 1.3555840881948426e-05, "loss": 0.3447, "step": 3641 }, { "epoch": 1.21, "learning_rate": 1.35524914217713e-05, "loss": 0.3674, "step": 3642 }, { "epoch": 1.21, "learning_rate": 1.3549141505419926e-05, "loss": 0.4166, "step": 3643 }, { "epoch": 1.21, "learning_rate": 1.3545791133324463e-05, "loss": 0.4421, "step": 3644 }, { "epoch": 1.21, "learning_rate": 1.3542440305915136e-05, "loss": 0.3601, "step": 3645 }, { "epoch": 1.21, "learning_rate": 1.3539089023622221e-05, "loss": 0.4564, "step": 3646 }, { "epoch": 1.21, "learning_rate": 1.3535737286876054e-05, "loss": 0.4592, "step": 3647 }, { "epoch": 1.21, "learning_rate": 1.3532385096107033e-05, "loss": 0.4197, "step": 3648 }, { "epoch": 1.21, "learning_rate": 1.3529032451745608e-05, "loss": 0.4485, "step": 3649 }, { "epoch": 1.21, "learning_rate": 1.352567935422229e-05, "loss": 0.4171, "step": 3650 }, { "epoch": 1.21, "learning_rate": 1.3522325803967654e-05, "loss": 0.5051, "step": 3651 }, { "epoch": 1.21, "learning_rate": 1.3518971801412328e-05, "loss": 0.4106, "step": 3652 }, { "epoch": 1.21, "learning_rate": 1.3515617346986994e-05, "loss": 0.3737, "step": 3653 }, { "epoch": 1.21, "learning_rate": 1.35122624411224e-05, "loss": 0.3707, "step": 3654 }, { "epoch": 1.21, "learning_rate": 1.3508907084249347e-05, "loss": 0.4141, "step": 3655 }, { "epoch": 1.21, "learning_rate": 1.3505551276798696e-05, "loss": 0.447, "step": 3656 }, { "epoch": 1.21, "learning_rate": 1.3502195019201367e-05, "loss": 0.3436, "step": 3657 }, { "epoch": 1.21, "learning_rate": 1.349883831188833e-05, "loss": 0.3834, "step": 3658 }, { "epoch": 1.21, "learning_rate": 1.3495481155290624e-05, "loss": 0.4976, "step": 3659 }, { "epoch": 1.21, "learning_rate": 1.3492123549839339e-05, "loss": 0.4546, "step": 3660 }, { "epoch": 1.22, "learning_rate": 1.348876549596562e-05, "loss": 0.4236, "step": 3661 }, { "epoch": 1.22, "learning_rate": 1.3485406994100685e-05, "loss": 0.4128, "step": 3662 }, { "epoch": 1.22, "learning_rate": 1.3482048044675784e-05, "loss": 0.4556, "step": 3663 }, { "epoch": 1.22, "learning_rate": 1.3478688648122246e-05, "loss": 0.4707, "step": 3664 }, { "epoch": 1.22, "learning_rate": 1.347532880487145e-05, "loss": 0.3958, "step": 3665 }, { "epoch": 1.22, "learning_rate": 1.3471968515354833e-05, "loss": 0.4807, "step": 3666 }, { "epoch": 1.22, "learning_rate": 1.3468607780003884e-05, "loss": 0.3855, "step": 3667 }, { "epoch": 1.22, "learning_rate": 1.3465246599250157e-05, "loss": 0.4377, "step": 3668 }, { "epoch": 1.22, "learning_rate": 1.3461884973525263e-05, "loss": 0.3567, "step": 3669 }, { "epoch": 1.22, "learning_rate": 1.3458522903260863e-05, "loss": 0.3958, "step": 3670 }, { "epoch": 1.22, "learning_rate": 1.3455160388888678e-05, "loss": 0.4712, "step": 3671 }, { "epoch": 1.22, "learning_rate": 1.3451797430840492e-05, "loss": 0.4307, "step": 3672 }, { "epoch": 1.22, "learning_rate": 1.3448434029548139e-05, "loss": 0.4153, "step": 3673 }, { "epoch": 1.22, "learning_rate": 1.3445070185443512e-05, "loss": 0.4208, "step": 3674 }, { "epoch": 1.22, "learning_rate": 1.3441705898958563e-05, "loss": 0.3842, "step": 3675 }, { "epoch": 1.22, "learning_rate": 1.3438341170525298e-05, "loss": 0.4441, "step": 3676 }, { "epoch": 1.22, "learning_rate": 1.343497600057578e-05, "loss": 0.3994, "step": 3677 }, { "epoch": 1.22, "learning_rate": 1.3431610389542127e-05, "loss": 0.4268, "step": 3678 }, { "epoch": 1.22, "learning_rate": 1.3428244337856521e-05, "loss": 0.4575, "step": 3679 }, { "epoch": 1.22, "learning_rate": 1.3424877845951193e-05, "loss": 0.4517, "step": 3680 }, { "epoch": 1.22, "learning_rate": 1.3421510914258431e-05, "loss": 0.4259, "step": 3681 }, { "epoch": 1.22, "learning_rate": 1.3418143543210585e-05, "loss": 0.3927, "step": 3682 }, { "epoch": 1.22, "learning_rate": 1.341477573324006e-05, "loss": 0.4139, "step": 3683 }, { "epoch": 1.22, "learning_rate": 1.3411407484779306e-05, "loss": 0.3759, "step": 3684 }, { "epoch": 1.22, "learning_rate": 1.3408038798260851e-05, "loss": 0.415, "step": 3685 }, { "epoch": 1.22, "learning_rate": 1.340466967411726e-05, "loss": 0.4639, "step": 3686 }, { "epoch": 1.22, "learning_rate": 1.340130011278116e-05, "loss": 0.478, "step": 3687 }, { "epoch": 1.22, "learning_rate": 1.3397930114685235e-05, "loss": 0.4109, "step": 3688 }, { "epoch": 1.22, "learning_rate": 1.3394559680262231e-05, "loss": 0.4188, "step": 3689 }, { "epoch": 1.22, "learning_rate": 1.339118880994494e-05, "loss": 0.4316, "step": 3690 }, { "epoch": 1.23, "learning_rate": 1.3387817504166217e-05, "loss": 0.4314, "step": 3691 }, { "epoch": 1.23, "learning_rate": 1.3384445763358963e-05, "loss": 0.4414, "step": 3692 }, { "epoch": 1.23, "learning_rate": 1.3381073587956154e-05, "loss": 0.4526, "step": 3693 }, { "epoch": 1.23, "learning_rate": 1.3377700978390802e-05, "loss": 0.3938, "step": 3694 }, { "epoch": 1.23, "learning_rate": 1.3374327935095983e-05, "loss": 0.3402, "step": 3695 }, { "epoch": 1.23, "learning_rate": 1.337095445850483e-05, "loss": 0.4897, "step": 3696 }, { "epoch": 1.23, "learning_rate": 1.3367580549050532e-05, "loss": 0.4181, "step": 3697 }, { "epoch": 1.23, "learning_rate": 1.3364206207166325e-05, "loss": 0.4575, "step": 3698 }, { "epoch": 1.23, "learning_rate": 1.3360831433285512e-05, "loss": 0.4709, "step": 3699 }, { "epoch": 1.23, "learning_rate": 1.335745622784145e-05, "loss": 0.422, "step": 3700 }, { "epoch": 1.23, "learning_rate": 1.3354080591267539e-05, "loss": 0.4851, "step": 3701 }, { "epoch": 1.23, "learning_rate": 1.3350704523997251e-05, "loss": 0.4978, "step": 3702 }, { "epoch": 1.23, "learning_rate": 1.3347328026464103e-05, "loss": 0.4917, "step": 3703 }, { "epoch": 1.23, "learning_rate": 1.3343951099101671e-05, "loss": 0.4014, "step": 3704 }, { "epoch": 1.23, "learning_rate": 1.3340573742343583e-05, "loss": 0.3867, "step": 3705 }, { "epoch": 1.23, "learning_rate": 1.3337195956623523e-05, "loss": 0.3757, "step": 3706 }, { "epoch": 1.23, "learning_rate": 1.3333817742375238e-05, "loss": 0.4648, "step": 3707 }, { "epoch": 1.23, "learning_rate": 1.3330439100032514e-05, "loss": 0.4458, "step": 3708 }, { "epoch": 1.23, "learning_rate": 1.332706003002921e-05, "loss": 0.4558, "step": 3709 }, { "epoch": 1.23, "learning_rate": 1.3323680532799226e-05, "loss": 0.3997, "step": 3710 }, { "epoch": 1.23, "learning_rate": 1.3320300608776527e-05, "loss": 0.4885, "step": 3711 }, { "epoch": 1.23, "learning_rate": 1.3316920258395121e-05, "loss": 0.4392, "step": 3712 }, { "epoch": 1.23, "learning_rate": 1.3313539482089084e-05, "loss": 0.375, "step": 3713 }, { "epoch": 1.23, "learning_rate": 1.3310158280292538e-05, "loss": 0.4713, "step": 3714 }, { "epoch": 1.23, "learning_rate": 1.330677665343966e-05, "loss": 0.4172, "step": 3715 }, { "epoch": 1.23, "learning_rate": 1.3303394601964685e-05, "loss": 0.4036, "step": 3716 }, { "epoch": 1.23, "learning_rate": 1.3300012126301902e-05, "loss": 0.425, "step": 3717 }, { "epoch": 1.23, "learning_rate": 1.3296629226885654e-05, "loss": 0.3879, "step": 3718 }, { "epoch": 1.23, "learning_rate": 1.3293245904150335e-05, "loss": 0.4407, "step": 3719 }, { "epoch": 1.23, "learning_rate": 1.32898621585304e-05, "loss": 0.4132, "step": 3720 }, { "epoch": 1.23, "learning_rate": 1.3286477990460353e-05, "loss": 0.4436, "step": 3721 }, { "epoch": 1.24, "learning_rate": 1.3283093400374753e-05, "loss": 0.4543, "step": 3722 }, { "epoch": 1.24, "learning_rate": 1.3279708388708217e-05, "loss": 0.4375, "step": 3723 }, { "epoch": 1.24, "learning_rate": 1.3276322955895413e-05, "loss": 0.4502, "step": 3724 }, { "epoch": 1.24, "learning_rate": 1.3272937102371058e-05, "loss": 0.5266, "step": 3725 }, { "epoch": 1.24, "learning_rate": 1.326955082856993e-05, "loss": 0.4108, "step": 3726 }, { "epoch": 1.24, "learning_rate": 1.3266164134926865e-05, "loss": 0.4553, "step": 3727 }, { "epoch": 1.24, "learning_rate": 1.3262777021876743e-05, "loss": 0.4336, "step": 3728 }, { "epoch": 1.24, "learning_rate": 1.3259389489854502e-05, "loss": 0.39, "step": 3729 }, { "epoch": 1.24, "learning_rate": 1.3256001539295131e-05, "loss": 0.4668, "step": 3730 }, { "epoch": 1.24, "learning_rate": 1.3252613170633684e-05, "loss": 0.3918, "step": 3731 }, { "epoch": 1.24, "learning_rate": 1.3249224384305253e-05, "loss": 0.4045, "step": 3732 }, { "epoch": 1.24, "learning_rate": 1.3245835180744995e-05, "loss": 0.4222, "step": 3733 }, { "epoch": 1.24, "learning_rate": 1.3242445560388113e-05, "loss": 0.4447, "step": 3734 }, { "epoch": 1.24, "learning_rate": 1.323905552366987e-05, "loss": 0.4783, "step": 3735 }, { "epoch": 1.24, "learning_rate": 1.3235665071025578e-05, "loss": 0.3992, "step": 3736 }, { "epoch": 1.24, "learning_rate": 1.3232274202890602e-05, "loss": 0.4244, "step": 3737 }, { "epoch": 1.24, "learning_rate": 1.3228882919700368e-05, "loss": 0.3862, "step": 3738 }, { "epoch": 1.24, "learning_rate": 1.3225491221890343e-05, "loss": 0.448, "step": 3739 }, { "epoch": 1.24, "learning_rate": 1.3222099109896056e-05, "loss": 0.3507, "step": 3740 }, { "epoch": 1.24, "learning_rate": 1.3218706584153091e-05, "loss": 0.4485, "step": 3741 }, { "epoch": 1.24, "learning_rate": 1.3215313645097077e-05, "loss": 0.4176, "step": 3742 }, { "epoch": 1.24, "learning_rate": 1.32119202931637e-05, "loss": 0.4281, "step": 3743 }, { "epoch": 1.24, "learning_rate": 1.3208526528788702e-05, "loss": 0.4083, "step": 3744 }, { "epoch": 1.24, "learning_rate": 1.3205132352407872e-05, "loss": 0.4802, "step": 3745 }, { "epoch": 1.24, "learning_rate": 1.3201737764457054e-05, "loss": 0.4202, "step": 3746 }, { "epoch": 1.24, "learning_rate": 1.3198342765372151e-05, "loss": 0.4534, "step": 3747 }, { "epoch": 1.24, "learning_rate": 1.319494735558911e-05, "loss": 0.4844, "step": 3748 }, { "epoch": 1.24, "learning_rate": 1.3191551535543938e-05, "loss": 0.4795, "step": 3749 }, { "epoch": 1.24, "learning_rate": 1.3188155305672687e-05, "loss": 0.4736, "step": 3750 }, { "epoch": 1.24, "learning_rate": 1.3184758666411468e-05, "loss": 0.374, "step": 3751 }, { "epoch": 1.25, "learning_rate": 1.3181361618196441e-05, "loss": 0.4792, "step": 3752 }, { "epoch": 1.25, "learning_rate": 1.317796416146382e-05, "loss": 0.3542, "step": 3753 }, { "epoch": 1.25, "learning_rate": 1.3174566296649874e-05, "loss": 0.4709, "step": 3754 }, { "epoch": 1.25, "learning_rate": 1.317116802419092e-05, "loss": 0.3958, "step": 3755 }, { "epoch": 1.25, "learning_rate": 1.3167769344523324e-05, "loss": 0.4399, "step": 3756 }, { "epoch": 1.25, "learning_rate": 1.3164370258083516e-05, "loss": 0.426, "step": 3757 }, { "epoch": 1.25, "learning_rate": 1.316097076530797e-05, "loss": 0.4163, "step": 3758 }, { "epoch": 1.25, "learning_rate": 1.3157570866633216e-05, "loss": 0.429, "step": 3759 }, { "epoch": 1.25, "learning_rate": 1.3154170562495827e-05, "loss": 0.4724, "step": 3760 }, { "epoch": 1.25, "learning_rate": 1.3150769853332441e-05, "loss": 0.4543, "step": 3761 }, { "epoch": 1.25, "learning_rate": 1.3147368739579741e-05, "loss": 0.4381, "step": 3762 }, { "epoch": 1.25, "learning_rate": 1.3143967221674461e-05, "loss": 0.4353, "step": 3763 }, { "epoch": 1.25, "learning_rate": 1.3140565300053387e-05, "loss": 0.4619, "step": 3764 }, { "epoch": 1.25, "learning_rate": 1.3137162975153363e-05, "loss": 0.5127, "step": 3765 }, { "epoch": 1.25, "learning_rate": 1.313376024741128e-05, "loss": 0.4486, "step": 3766 }, { "epoch": 1.25, "learning_rate": 1.313035711726408e-05, "loss": 0.3945, "step": 3767 }, { "epoch": 1.25, "learning_rate": 1.3126953585148754e-05, "loss": 0.3999, "step": 3768 }, { "epoch": 1.25, "learning_rate": 1.3123549651502357e-05, "loss": 0.3759, "step": 3769 }, { "epoch": 1.25, "learning_rate": 1.3120145316761979e-05, "loss": 0.4468, "step": 3770 }, { "epoch": 1.25, "learning_rate": 1.3116740581364775e-05, "loss": 0.4287, "step": 3771 }, { "epoch": 1.25, "learning_rate": 1.3113335445747942e-05, "loss": 0.41, "step": 3772 }, { "epoch": 1.25, "learning_rate": 1.3109929910348735e-05, "loss": 0.3857, "step": 3773 }, { "epoch": 1.25, "learning_rate": 1.3106523975604453e-05, "loss": 0.4319, "step": 3774 }, { "epoch": 1.25, "learning_rate": 1.3103117641952457e-05, "loss": 0.4109, "step": 3775 }, { "epoch": 1.25, "learning_rate": 1.3099710909830152e-05, "loss": 0.467, "step": 3776 }, { "epoch": 1.25, "learning_rate": 1.3096303779674992e-05, "loss": 0.4172, "step": 3777 }, { "epoch": 1.25, "learning_rate": 1.3092896251924487e-05, "loss": 0.4058, "step": 3778 }, { "epoch": 1.25, "learning_rate": 1.30894883270162e-05, "loss": 0.4319, "step": 3779 }, { "epoch": 1.25, "learning_rate": 1.3086080005387736e-05, "loss": 0.4473, "step": 3780 }, { "epoch": 1.25, "learning_rate": 1.308267128747676e-05, "loss": 0.4368, "step": 3781 }, { "epoch": 1.26, "learning_rate": 1.3079262173720984e-05, "loss": 0.4775, "step": 3782 }, { "epoch": 1.26, "learning_rate": 1.307585266455817e-05, "loss": 0.48, "step": 3783 }, { "epoch": 1.26, "learning_rate": 1.3072442760426131e-05, "loss": 0.4011, "step": 3784 }, { "epoch": 1.26, "learning_rate": 1.3069032461762734e-05, "loss": 0.4724, "step": 3785 }, { "epoch": 1.26, "learning_rate": 1.3065621769005892e-05, "loss": 0.468, "step": 3786 }, { "epoch": 1.26, "learning_rate": 1.3062210682593575e-05, "loss": 0.4253, "step": 3787 }, { "epoch": 1.26, "learning_rate": 1.3058799202963795e-05, "loss": 0.4158, "step": 3788 }, { "epoch": 1.26, "learning_rate": 1.3055387330554623e-05, "loss": 0.428, "step": 3789 }, { "epoch": 1.26, "learning_rate": 1.305197506580417e-05, "loss": 0.3767, "step": 3790 }, { "epoch": 1.26, "learning_rate": 1.3048562409150614e-05, "loss": 0.4495, "step": 3791 }, { "epoch": 1.26, "learning_rate": 1.3045149361032161e-05, "loss": 0.3433, "step": 3792 }, { "epoch": 1.26, "learning_rate": 1.3041735921887088e-05, "loss": 0.3799, "step": 3793 }, { "epoch": 1.26, "learning_rate": 1.3038322092153712e-05, "loss": 0.4175, "step": 3794 }, { "epoch": 1.26, "learning_rate": 1.30349078722704e-05, "loss": 0.3685, "step": 3795 }, { "epoch": 1.26, "learning_rate": 1.3031493262675569e-05, "loss": 0.4658, "step": 3796 }, { "epoch": 1.26, "learning_rate": 1.3028078263807694e-05, "loss": 0.4978, "step": 3797 }, { "epoch": 1.26, "learning_rate": 1.3024662876105292e-05, "loss": 0.4196, "step": 3798 }, { "epoch": 1.26, "learning_rate": 1.3021247100006929e-05, "loss": 0.33, "step": 3799 }, { "epoch": 1.26, "learning_rate": 1.3017830935951223e-05, "loss": 0.3346, "step": 3800 }, { "epoch": 1.26, "learning_rate": 1.3014414384376845e-05, "loss": 0.4449, "step": 3801 }, { "epoch": 1.26, "learning_rate": 1.3010997445722513e-05, "loss": 0.4314, "step": 3802 }, { "epoch": 1.26, "learning_rate": 1.3007580120426993e-05, "loss": 0.4575, "step": 3803 }, { "epoch": 1.26, "learning_rate": 1.3004162408929107e-05, "loss": 0.4021, "step": 3804 }, { "epoch": 1.26, "learning_rate": 1.3000744311667717e-05, "loss": 0.4714, "step": 3805 }, { "epoch": 1.26, "learning_rate": 1.299732582908174e-05, "loss": 0.4321, "step": 3806 }, { "epoch": 1.26, "learning_rate": 1.2993906961610151e-05, "loss": 0.4553, "step": 3807 }, { "epoch": 1.26, "learning_rate": 1.2990487709691955e-05, "loss": 0.4915, "step": 3808 }, { "epoch": 1.26, "learning_rate": 1.2987068073766223e-05, "loss": 0.3578, "step": 3809 }, { "epoch": 1.26, "learning_rate": 1.2983648054272066e-05, "loss": 0.4016, "step": 3810 }, { "epoch": 1.26, "learning_rate": 1.2980227651648649e-05, "loss": 0.3377, "step": 3811 }, { "epoch": 1.27, "learning_rate": 1.2976806866335185e-05, "loss": 0.4332, "step": 3812 }, { "epoch": 1.27, "learning_rate": 1.2973385698770935e-05, "loss": 0.3888, "step": 3813 }, { "epoch": 1.27, "learning_rate": 1.2969964149395212e-05, "loss": 0.3616, "step": 3814 }, { "epoch": 1.27, "learning_rate": 1.2966542218647373e-05, "loss": 0.4766, "step": 3815 }, { "epoch": 1.27, "learning_rate": 1.2963119906966828e-05, "loss": 0.365, "step": 3816 }, { "epoch": 1.27, "learning_rate": 1.295969721479304e-05, "loss": 0.4412, "step": 3817 }, { "epoch": 1.27, "learning_rate": 1.2956274142565507e-05, "loss": 0.3552, "step": 3818 }, { "epoch": 1.27, "learning_rate": 1.295285069072379e-05, "loss": 0.3784, "step": 3819 }, { "epoch": 1.27, "learning_rate": 1.2949426859707492e-05, "loss": 0.4202, "step": 3820 }, { "epoch": 1.27, "learning_rate": 1.2946002649956268e-05, "loss": 0.4109, "step": 3821 }, { "epoch": 1.27, "learning_rate": 1.2942578061909816e-05, "loss": 0.5137, "step": 3822 }, { "epoch": 1.27, "learning_rate": 1.2939153096007886e-05, "loss": 0.3707, "step": 3823 }, { "epoch": 1.27, "learning_rate": 1.2935727752690281e-05, "loss": 0.4183, "step": 3824 }, { "epoch": 1.27, "learning_rate": 1.2932302032396849e-05, "loss": 0.3452, "step": 3825 }, { "epoch": 1.27, "learning_rate": 1.2928875935567478e-05, "loss": 0.3992, "step": 3826 }, { "epoch": 1.27, "learning_rate": 1.2925449462642119e-05, "loss": 0.4192, "step": 3827 }, { "epoch": 1.27, "learning_rate": 1.2922022614060761e-05, "loss": 0.4216, "step": 3828 }, { "epoch": 1.27, "learning_rate": 1.2918595390263446e-05, "loss": 0.4243, "step": 3829 }, { "epoch": 1.27, "learning_rate": 1.2915167791690262e-05, "loss": 0.4058, "step": 3830 }, { "epoch": 1.27, "learning_rate": 1.2911739818781346e-05, "loss": 0.3933, "step": 3831 }, { "epoch": 1.27, "learning_rate": 1.2908311471976882e-05, "loss": 0.439, "step": 3832 }, { "epoch": 1.27, "learning_rate": 1.2904882751717102e-05, "loss": 0.498, "step": 3833 }, { "epoch": 1.27, "learning_rate": 1.2901453658442291e-05, "loss": 0.4473, "step": 3834 }, { "epoch": 1.27, "learning_rate": 1.2898024192592775e-05, "loss": 0.4503, "step": 3835 }, { "epoch": 1.27, "learning_rate": 1.289459435460893e-05, "loss": 0.4683, "step": 3836 }, { "epoch": 1.27, "learning_rate": 1.289116414493118e-05, "loss": 0.396, "step": 3837 }, { "epoch": 1.27, "learning_rate": 1.2887733564000002e-05, "loss": 0.4656, "step": 3838 }, { "epoch": 1.27, "learning_rate": 1.2884302612255907e-05, "loss": 0.4327, "step": 3839 }, { "epoch": 1.27, "learning_rate": 1.2880871290139469e-05, "loss": 0.4409, "step": 3840 }, { "epoch": 1.27, "learning_rate": 1.2877439598091302e-05, "loss": 0.4803, "step": 3841 }, { "epoch": 1.28, "learning_rate": 1.2874007536552067e-05, "loss": 0.4351, "step": 3842 }, { "epoch": 1.28, "learning_rate": 1.2870575105962475e-05, "loss": 0.4644, "step": 3843 }, { "epoch": 1.28, "learning_rate": 1.2867142306763283e-05, "loss": 0.4126, "step": 3844 }, { "epoch": 1.28, "learning_rate": 1.2863709139395295e-05, "loss": 0.391, "step": 3845 }, { "epoch": 1.28, "learning_rate": 1.2860275604299364e-05, "loss": 0.4399, "step": 3846 }, { "epoch": 1.28, "learning_rate": 1.2856841701916389e-05, "loss": 0.3994, "step": 3847 }, { "epoch": 1.28, "learning_rate": 1.2853407432687313e-05, "loss": 0.4012, "step": 3848 }, { "epoch": 1.28, "learning_rate": 1.2849972797053134e-05, "loss": 0.3677, "step": 3849 }, { "epoch": 1.28, "learning_rate": 1.2846537795454891e-05, "loss": 0.4009, "step": 3850 }, { "epoch": 1.28, "learning_rate": 1.284310242833367e-05, "loss": 0.4319, "step": 3851 }, { "epoch": 1.28, "learning_rate": 1.2839666696130606e-05, "loss": 0.4109, "step": 3852 }, { "epoch": 1.28, "learning_rate": 1.283623059928688e-05, "loss": 0.403, "step": 3853 }, { "epoch": 1.28, "learning_rate": 1.2832794138243723e-05, "loss": 0.4658, "step": 3854 }, { "epoch": 1.28, "learning_rate": 1.2829357313442406e-05, "loss": 0.4541, "step": 3855 }, { "epoch": 1.28, "learning_rate": 1.2825920125324252e-05, "loss": 0.4543, "step": 3856 }, { "epoch": 1.28, "learning_rate": 1.2822482574330631e-05, "loss": 0.3236, "step": 3857 }, { "epoch": 1.28, "learning_rate": 1.2819044660902953e-05, "loss": 0.4165, "step": 3858 }, { "epoch": 1.28, "learning_rate": 1.2815606385482685e-05, "loss": 0.448, "step": 3859 }, { "epoch": 1.28, "learning_rate": 1.2812167748511329e-05, "loss": 0.3816, "step": 3860 }, { "epoch": 1.28, "learning_rate": 1.2808728750430444e-05, "loss": 0.4536, "step": 3861 }, { "epoch": 1.28, "learning_rate": 1.2805289391681625e-05, "loss": 0.4099, "step": 3862 }, { "epoch": 1.28, "learning_rate": 1.2801849672706529e-05, "loss": 0.4548, "step": 3863 }, { "epoch": 1.28, "learning_rate": 1.2798409593946839e-05, "loss": 0.3687, "step": 3864 }, { "epoch": 1.28, "learning_rate": 1.2794969155844302e-05, "loss": 0.4185, "step": 3865 }, { "epoch": 1.28, "learning_rate": 1.2791528358840696e-05, "loss": 0.4485, "step": 3866 }, { "epoch": 1.28, "learning_rate": 1.2788087203377859e-05, "loss": 0.4055, "step": 3867 }, { "epoch": 1.28, "learning_rate": 1.2784645689897666e-05, "loss": 0.3953, "step": 3868 }, { "epoch": 1.28, "learning_rate": 1.2781203818842038e-05, "loss": 0.52, "step": 3869 }, { "epoch": 1.28, "learning_rate": 1.277776159065295e-05, "loss": 0.4768, "step": 3870 }, { "epoch": 1.28, "learning_rate": 1.2774319005772414e-05, "loss": 0.3857, "step": 3871 }, { "epoch": 1.29, "learning_rate": 1.277087606464249e-05, "loss": 0.4688, "step": 3872 }, { "epoch": 1.29, "learning_rate": 1.276743276770529e-05, "loss": 0.4091, "step": 3873 }, { "epoch": 1.29, "learning_rate": 1.2763989115402962e-05, "loss": 0.46, "step": 3874 }, { "epoch": 1.29, "learning_rate": 1.2760545108177707e-05, "loss": 0.3678, "step": 3875 }, { "epoch": 1.29, "learning_rate": 1.2757100746471766e-05, "loss": 0.4832, "step": 3876 }, { "epoch": 1.29, "learning_rate": 1.275365603072743e-05, "loss": 0.37, "step": 3877 }, { "epoch": 1.29, "learning_rate": 1.2750210961387035e-05, "loss": 0.4595, "step": 3878 }, { "epoch": 1.29, "learning_rate": 1.2746765538892956e-05, "loss": 0.4568, "step": 3879 }, { "epoch": 1.29, "learning_rate": 1.2743319763687627e-05, "loss": 0.4358, "step": 3880 }, { "epoch": 1.29, "learning_rate": 1.2739873636213512e-05, "loss": 0.4539, "step": 3881 }, { "epoch": 1.29, "learning_rate": 1.2736427156913127e-05, "loss": 0.396, "step": 3882 }, { "epoch": 1.29, "learning_rate": 1.2732980326229041e-05, "loss": 0.3695, "step": 3883 }, { "epoch": 1.29, "learning_rate": 1.272953314460385e-05, "loss": 0.4194, "step": 3884 }, { "epoch": 1.29, "learning_rate": 1.2726085612480214e-05, "loss": 0.3928, "step": 3885 }, { "epoch": 1.29, "learning_rate": 1.2722637730300825e-05, "loss": 0.3525, "step": 3886 }, { "epoch": 1.29, "learning_rate": 1.2719189498508424e-05, "loss": 0.4055, "step": 3887 }, { "epoch": 1.29, "learning_rate": 1.2715740917545798e-05, "loss": 0.3892, "step": 3888 }, { "epoch": 1.29, "learning_rate": 1.271229198785578e-05, "loss": 0.4468, "step": 3889 }, { "epoch": 1.29, "learning_rate": 1.2708842709881246e-05, "loss": 0.4166, "step": 3890 }, { "epoch": 1.29, "learning_rate": 1.2705393084065114e-05, "loss": 0.4861, "step": 3891 }, { "epoch": 1.29, "learning_rate": 1.270194311085035e-05, "loss": 0.4377, "step": 3892 }, { "epoch": 1.29, "learning_rate": 1.2698492790679966e-05, "loss": 0.4927, "step": 3893 }, { "epoch": 1.29, "learning_rate": 1.2695042123997014e-05, "loss": 0.4274, "step": 3894 }, { "epoch": 1.29, "learning_rate": 1.2691591111244597e-05, "loss": 0.4565, "step": 3895 }, { "epoch": 1.29, "learning_rate": 1.2688139752865851e-05, "loss": 0.4075, "step": 3896 }, { "epoch": 1.29, "learning_rate": 1.2684688049303973e-05, "loss": 0.4048, "step": 3897 }, { "epoch": 1.29, "learning_rate": 1.2681236001002186e-05, "loss": 0.316, "step": 3898 }, { "epoch": 1.29, "learning_rate": 1.267778360840377e-05, "loss": 0.3909, "step": 3899 }, { "epoch": 1.29, "learning_rate": 1.267433087195205e-05, "loss": 0.4717, "step": 3900 }, { "epoch": 1.29, "learning_rate": 1.2670877792090385e-05, "loss": 0.467, "step": 3901 }, { "epoch": 1.3, "learning_rate": 1.2667424369262186e-05, "loss": 0.4172, "step": 3902 }, { "epoch": 1.3, "learning_rate": 1.2663970603910909e-05, "loss": 0.3977, "step": 3903 }, { "epoch": 1.3, "learning_rate": 1.2660516496480045e-05, "loss": 0.5056, "step": 3904 }, { "epoch": 1.3, "learning_rate": 1.2657062047413136e-05, "loss": 0.4966, "step": 3905 }, { "epoch": 1.3, "learning_rate": 1.2653607257153768e-05, "loss": 0.4272, "step": 3906 }, { "epoch": 1.3, "learning_rate": 1.2650152126145573e-05, "loss": 0.3936, "step": 3907 }, { "epoch": 1.3, "learning_rate": 1.2646696654832218e-05, "loss": 0.4963, "step": 3908 }, { "epoch": 1.3, "learning_rate": 1.2643240843657418e-05, "loss": 0.4475, "step": 3909 }, { "epoch": 1.3, "learning_rate": 1.2639784693064938e-05, "loss": 0.363, "step": 3910 }, { "epoch": 1.3, "learning_rate": 1.2636328203498578e-05, "loss": 0.4417, "step": 3911 }, { "epoch": 1.3, "learning_rate": 1.2632871375402185e-05, "loss": 0.3271, "step": 3912 }, { "epoch": 1.3, "learning_rate": 1.262941420921965e-05, "loss": 0.452, "step": 3913 }, { "epoch": 1.3, "learning_rate": 1.2625956705394907e-05, "loss": 0.421, "step": 3914 }, { "epoch": 1.3, "learning_rate": 1.262249886437193e-05, "loss": 0.3958, "step": 3915 }, { "epoch": 1.3, "learning_rate": 1.2619040686594741e-05, "loss": 0.4644, "step": 3916 }, { "epoch": 1.3, "learning_rate": 1.2615582172507402e-05, "loss": 0.4285, "step": 3917 }, { "epoch": 1.3, "learning_rate": 1.2612123322554024e-05, "loss": 0.4509, "step": 3918 }, { "epoch": 1.3, "learning_rate": 1.2608664137178753e-05, "loss": 0.3723, "step": 3919 }, { "epoch": 1.3, "learning_rate": 1.260520461682578e-05, "loss": 0.3828, "step": 3920 }, { "epoch": 1.3, "learning_rate": 1.2601744761939347e-05, "loss": 0.4111, "step": 3921 }, { "epoch": 1.3, "learning_rate": 1.2598284572963728e-05, "loss": 0.4834, "step": 3922 }, { "epoch": 1.3, "learning_rate": 1.2594824050343248e-05, "loss": 0.3967, "step": 3923 }, { "epoch": 1.3, "learning_rate": 1.2591363194522268e-05, "loss": 0.3393, "step": 3924 }, { "epoch": 1.3, "learning_rate": 1.2587902005945198e-05, "loss": 0.4888, "step": 3925 }, { "epoch": 1.3, "learning_rate": 1.2584440485056486e-05, "loss": 0.4731, "step": 3926 }, { "epoch": 1.3, "learning_rate": 1.2580978632300627e-05, "loss": 0.4318, "step": 3927 }, { "epoch": 1.3, "learning_rate": 1.2577516448122156e-05, "loss": 0.4044, "step": 3928 }, { "epoch": 1.3, "learning_rate": 1.2574053932965646e-05, "loss": 0.3975, "step": 3929 }, { "epoch": 1.3, "learning_rate": 1.2570591087275725e-05, "loss": 0.4607, "step": 3930 }, { "epoch": 1.3, "learning_rate": 1.256712791149705e-05, "loss": 0.4553, "step": 3931 }, { "epoch": 1.31, "learning_rate": 1.2563664406074332e-05, "loss": 0.3601, "step": 3932 }, { "epoch": 1.31, "learning_rate": 1.2560200571452317e-05, "loss": 0.3958, "step": 3933 }, { "epoch": 1.31, "learning_rate": 1.255673640807579e-05, "loss": 0.4182, "step": 3934 }, { "epoch": 1.31, "learning_rate": 1.2553271916389587e-05, "loss": 0.4709, "step": 3935 }, { "epoch": 1.31, "learning_rate": 1.254980709683858e-05, "loss": 0.3923, "step": 3936 }, { "epoch": 1.31, "learning_rate": 1.2546341949867686e-05, "loss": 0.4102, "step": 3937 }, { "epoch": 1.31, "learning_rate": 1.2542876475921866e-05, "loss": 0.4407, "step": 3938 }, { "epoch": 1.31, "learning_rate": 1.253941067544612e-05, "loss": 0.4153, "step": 3939 }, { "epoch": 1.31, "learning_rate": 1.2535944548885486e-05, "loss": 0.4254, "step": 3940 }, { "epoch": 1.31, "learning_rate": 1.2532478096685054e-05, "loss": 0.4614, "step": 3941 }, { "epoch": 1.31, "learning_rate": 1.2529011319289945e-05, "loss": 0.4115, "step": 3942 }, { "epoch": 1.31, "learning_rate": 1.2525544217145332e-05, "loss": 0.491, "step": 3943 }, { "epoch": 1.31, "learning_rate": 1.2522076790696416e-05, "loss": 0.4944, "step": 3944 }, { "epoch": 1.31, "learning_rate": 1.2518609040388454e-05, "loss": 0.4829, "step": 3945 }, { "epoch": 1.31, "learning_rate": 1.2515140966666742e-05, "loss": 0.3816, "step": 3946 }, { "epoch": 1.31, "learning_rate": 1.2511672569976608e-05, "loss": 0.4807, "step": 3947 }, { "epoch": 1.31, "learning_rate": 1.2508203850763428e-05, "loss": 0.5232, "step": 3948 }, { "epoch": 1.31, "learning_rate": 1.2504734809472623e-05, "loss": 0.4133, "step": 3949 }, { "epoch": 1.31, "learning_rate": 1.2501265446549647e-05, "loss": 0.4691, "step": 3950 }, { "epoch": 1.31, "learning_rate": 1.2497795762440006e-05, "loss": 0.4563, "step": 3951 }, { "epoch": 1.31, "learning_rate": 1.2494325757589234e-05, "loss": 0.434, "step": 3952 }, { "epoch": 1.31, "learning_rate": 1.2490855432442918e-05, "loss": 0.3298, "step": 3953 }, { "epoch": 1.31, "learning_rate": 1.248738478744668e-05, "loss": 0.4354, "step": 3954 }, { "epoch": 1.31, "learning_rate": 1.248391382304618e-05, "loss": 0.3748, "step": 3955 }, { "epoch": 1.31, "learning_rate": 1.2480442539687132e-05, "loss": 0.374, "step": 3956 }, { "epoch": 1.31, "learning_rate": 1.2476970937815274e-05, "loss": 0.4309, "step": 3957 }, { "epoch": 1.31, "learning_rate": 1.2473499017876399e-05, "loss": 0.4075, "step": 3958 }, { "epoch": 1.31, "learning_rate": 1.2470026780316333e-05, "loss": 0.4429, "step": 3959 }, { "epoch": 1.31, "learning_rate": 1.2466554225580945e-05, "loss": 0.4563, "step": 3960 }, { "epoch": 1.31, "learning_rate": 1.2463081354116144e-05, "loss": 0.3987, "step": 3961 }, { "epoch": 1.31, "learning_rate": 1.2459608166367881e-05, "loss": 0.4382, "step": 3962 }, { "epoch": 1.32, "learning_rate": 1.2456134662782147e-05, "loss": 0.4041, "step": 3963 }, { "epoch": 1.32, "learning_rate": 1.2452660843804972e-05, "loss": 0.4232, "step": 3964 }, { "epoch": 1.32, "learning_rate": 1.2449186709882429e-05, "loss": 0.4341, "step": 3965 }, { "epoch": 1.32, "learning_rate": 1.2445712261460632e-05, "loss": 0.4028, "step": 3966 }, { "epoch": 1.32, "learning_rate": 1.2442237498985729e-05, "loss": 0.4124, "step": 3967 }, { "epoch": 1.32, "learning_rate": 1.2438762422903921e-05, "loss": 0.5005, "step": 3968 }, { "epoch": 1.32, "learning_rate": 1.2435287033661432e-05, "loss": 0.439, "step": 3969 }, { "epoch": 1.32, "learning_rate": 1.2431811331704545e-05, "loss": 0.3838, "step": 3970 }, { "epoch": 1.32, "learning_rate": 1.2428335317479567e-05, "loss": 0.4141, "step": 3971 }, { "epoch": 1.32, "learning_rate": 1.2424858991432851e-05, "loss": 0.4407, "step": 3972 }, { "epoch": 1.32, "learning_rate": 1.2421382354010798e-05, "loss": 0.3517, "step": 3973 }, { "epoch": 1.32, "learning_rate": 1.2417905405659835e-05, "loss": 0.3516, "step": 3974 }, { "epoch": 1.32, "learning_rate": 1.2414428146826439e-05, "loss": 0.4595, "step": 3975 }, { "epoch": 1.32, "learning_rate": 1.2410950577957121e-05, "loss": 0.4553, "step": 3976 }, { "epoch": 1.32, "learning_rate": 1.2407472699498442e-05, "loss": 0.4534, "step": 3977 }, { "epoch": 1.32, "learning_rate": 1.2403994511896984e-05, "loss": 0.4724, "step": 3978 }, { "epoch": 1.32, "learning_rate": 1.2400516015599389e-05, "loss": 0.3875, "step": 3979 }, { "epoch": 1.32, "learning_rate": 1.2397037211052326e-05, "loss": 0.4575, "step": 3980 }, { "epoch": 1.32, "learning_rate": 1.2393558098702508e-05, "loss": 0.4343, "step": 3981 }, { "epoch": 1.32, "learning_rate": 1.2390078678996683e-05, "loss": 0.432, "step": 3982 }, { "epoch": 1.32, "learning_rate": 1.2386598952381646e-05, "loss": 0.4341, "step": 3983 }, { "epoch": 1.32, "learning_rate": 1.2383118919304229e-05, "loss": 0.4167, "step": 3984 }, { "epoch": 1.32, "learning_rate": 1.2379638580211297e-05, "loss": 0.3988, "step": 3985 }, { "epoch": 1.32, "learning_rate": 1.2376157935549761e-05, "loss": 0.3646, "step": 3986 }, { "epoch": 1.32, "learning_rate": 1.2372676985766573e-05, "loss": 0.4043, "step": 3987 }, { "epoch": 1.32, "learning_rate": 1.2369195731308716e-05, "loss": 0.3092, "step": 3988 }, { "epoch": 1.32, "learning_rate": 1.2365714172623222e-05, "loss": 0.413, "step": 3989 }, { "epoch": 1.32, "learning_rate": 1.2362232310157149e-05, "loss": 0.3823, "step": 3990 }, { "epoch": 1.32, "learning_rate": 1.235875014435761e-05, "loss": 0.4258, "step": 3991 }, { "epoch": 1.32, "learning_rate": 1.2355267675671743e-05, "loss": 0.4119, "step": 3992 }, { "epoch": 1.33, "learning_rate": 1.235178490454673e-05, "loss": 0.4829, "step": 3993 }, { "epoch": 1.33, "learning_rate": 1.2348301831429798e-05, "loss": 0.4546, "step": 3994 }, { "epoch": 1.33, "learning_rate": 1.2344818456768204e-05, "loss": 0.4108, "step": 3995 }, { "epoch": 1.33, "learning_rate": 1.2341334781009246e-05, "loss": 0.3887, "step": 3996 }, { "epoch": 1.33, "learning_rate": 1.2337850804600267e-05, "loss": 0.3003, "step": 3997 }, { "epoch": 1.33, "learning_rate": 1.2334366527988635e-05, "loss": 0.453, "step": 3998 }, { "epoch": 1.33, "learning_rate": 1.233088195162177e-05, "loss": 0.368, "step": 3999 }, { "epoch": 1.33, "learning_rate": 1.2327397075947126e-05, "loss": 0.4412, "step": 4000 }, { "epoch": 1.33, "learning_rate": 1.232391190141219e-05, "loss": 0.4192, "step": 4001 }, { "epoch": 1.33, "learning_rate": 1.2320426428464499e-05, "loss": 0.428, "step": 4002 }, { "epoch": 1.33, "learning_rate": 1.2316940657551613e-05, "loss": 0.4268, "step": 4003 }, { "epoch": 1.33, "learning_rate": 1.2313454589121148e-05, "loss": 0.4088, "step": 4004 }, { "epoch": 1.33, "learning_rate": 1.230996822362074e-05, "loss": 0.488, "step": 4005 }, { "epoch": 1.33, "learning_rate": 1.2306481561498078e-05, "loss": 0.3861, "step": 4006 }, { "epoch": 1.33, "learning_rate": 1.2302994603200879e-05, "loss": 0.4497, "step": 4007 }, { "epoch": 1.33, "learning_rate": 1.2299507349176909e-05, "loss": 0.4443, "step": 4008 }, { "epoch": 1.33, "learning_rate": 1.2296019799873962e-05, "loss": 0.403, "step": 4009 }, { "epoch": 1.33, "learning_rate": 1.2292531955739868e-05, "loss": 0.3846, "step": 4010 }, { "epoch": 1.33, "learning_rate": 1.2289043817222503e-05, "loss": 0.4739, "step": 4011 }, { "epoch": 1.33, "learning_rate": 1.228555538476978e-05, "loss": 0.4255, "step": 4012 }, { "epoch": 1.33, "learning_rate": 1.2282066658829646e-05, "loss": 0.4636, "step": 4013 }, { "epoch": 1.33, "learning_rate": 1.2278577639850084e-05, "loss": 0.4445, "step": 4014 }, { "epoch": 1.33, "learning_rate": 1.2275088328279124e-05, "loss": 0.4512, "step": 4015 }, { "epoch": 1.33, "learning_rate": 1.2271598724564821e-05, "loss": 0.4451, "step": 4016 }, { "epoch": 1.33, "learning_rate": 1.2268108829155279e-05, "loss": 0.4265, "step": 4017 }, { "epoch": 1.33, "learning_rate": 1.2264618642498633e-05, "loss": 0.4363, "step": 4018 }, { "epoch": 1.33, "learning_rate": 1.2261128165043057e-05, "loss": 0.4155, "step": 4019 }, { "epoch": 1.33, "learning_rate": 1.2257637397236759e-05, "loss": 0.4403, "step": 4020 }, { "epoch": 1.33, "learning_rate": 1.2254146339527988e-05, "loss": 0.3911, "step": 4021 }, { "epoch": 1.33, "learning_rate": 1.2250654992365033e-05, "loss": 0.4209, "step": 4022 }, { "epoch": 1.34, "learning_rate": 1.2247163356196212e-05, "loss": 0.4624, "step": 4023 }, { "epoch": 1.34, "learning_rate": 1.224367143146989e-05, "loss": 0.3977, "step": 4024 }, { "epoch": 1.34, "learning_rate": 1.2240179218634463e-05, "loss": 0.4489, "step": 4025 }, { "epoch": 1.34, "learning_rate": 1.2236686718138359e-05, "loss": 0.4076, "step": 4026 }, { "epoch": 1.34, "learning_rate": 1.2233193930430055e-05, "loss": 0.3934, "step": 4027 }, { "epoch": 1.34, "learning_rate": 1.2229700855958058e-05, "loss": 0.4106, "step": 4028 }, { "epoch": 1.34, "learning_rate": 1.2226207495170911e-05, "loss": 0.4192, "step": 4029 }, { "epoch": 1.34, "learning_rate": 1.2222713848517196e-05, "loss": 0.48, "step": 4030 }, { "epoch": 1.34, "learning_rate": 1.2219219916445529e-05, "loss": 0.3521, "step": 4031 }, { "epoch": 1.34, "learning_rate": 1.2215725699404568e-05, "loss": 0.3802, "step": 4032 }, { "epoch": 1.34, "learning_rate": 1.2212231197843001e-05, "loss": 0.4072, "step": 4033 }, { "epoch": 1.34, "learning_rate": 1.2208736412209557e-05, "loss": 0.4663, "step": 4034 }, { "epoch": 1.34, "learning_rate": 1.2205241342953002e-05, "loss": 0.4412, "step": 4035 }, { "epoch": 1.34, "learning_rate": 1.2201745990522133e-05, "loss": 0.3917, "step": 4036 }, { "epoch": 1.34, "learning_rate": 1.2198250355365795e-05, "loss": 0.382, "step": 4037 }, { "epoch": 1.34, "learning_rate": 1.219475443793285e-05, "loss": 0.4429, "step": 4038 }, { "epoch": 1.34, "learning_rate": 1.2191258238672217e-05, "loss": 0.4314, "step": 4039 }, { "epoch": 1.34, "learning_rate": 1.2187761758032836e-05, "loss": 0.358, "step": 4040 }, { "epoch": 1.34, "learning_rate": 1.2184264996463692e-05, "loss": 0.4578, "step": 4041 }, { "epoch": 1.34, "learning_rate": 1.2180767954413804e-05, "loss": 0.3516, "step": 4042 }, { "epoch": 1.34, "learning_rate": 1.217727063233222e-05, "loss": 0.4331, "step": 4043 }, { "epoch": 1.34, "learning_rate": 1.2173773030668035e-05, "loss": 0.3613, "step": 4044 }, { "epoch": 1.34, "learning_rate": 1.2170275149870374e-05, "loss": 0.4169, "step": 4045 }, { "epoch": 1.34, "learning_rate": 1.2166776990388401e-05, "loss": 0.3119, "step": 4046 }, { "epoch": 1.34, "learning_rate": 1.2163278552671311e-05, "loss": 0.3749, "step": 4047 }, { "epoch": 1.34, "learning_rate": 1.2159779837168335e-05, "loss": 0.3997, "step": 4048 }, { "epoch": 1.34, "learning_rate": 1.2156280844328747e-05, "loss": 0.457, "step": 4049 }, { "epoch": 1.34, "learning_rate": 1.2152781574601847e-05, "loss": 0.465, "step": 4050 }, { "epoch": 1.34, "learning_rate": 1.2149282028436974e-05, "loss": 0.4929, "step": 4051 }, { "epoch": 1.34, "learning_rate": 1.214578220628351e-05, "loss": 0.4355, "step": 4052 }, { "epoch": 1.35, "learning_rate": 1.2142282108590864e-05, "loss": 0.4753, "step": 4053 }, { "epoch": 1.35, "learning_rate": 1.2138781735808478e-05, "loss": 0.3748, "step": 4054 }, { "epoch": 1.35, "learning_rate": 1.2135281088385835e-05, "loss": 0.3993, "step": 4055 }, { "epoch": 1.35, "learning_rate": 1.2131780166772458e-05, "loss": 0.4419, "step": 4056 }, { "epoch": 1.35, "learning_rate": 1.2128278971417893e-05, "loss": 0.4094, "step": 4057 }, { "epoch": 1.35, "learning_rate": 1.2124777502771727e-05, "loss": 0.4128, "step": 4058 }, { "epoch": 1.35, "learning_rate": 1.2121275761283586e-05, "loss": 0.4303, "step": 4059 }, { "epoch": 1.35, "learning_rate": 1.2117773747403128e-05, "loss": 0.4851, "step": 4060 }, { "epoch": 1.35, "learning_rate": 1.2114271461580041e-05, "loss": 0.4226, "step": 4061 }, { "epoch": 1.35, "learning_rate": 1.2110768904264052e-05, "loss": 0.4309, "step": 4062 }, { "epoch": 1.35, "learning_rate": 1.210726607590493e-05, "loss": 0.427, "step": 4063 }, { "epoch": 1.35, "learning_rate": 1.2103762976952467e-05, "loss": 0.4539, "step": 4064 }, { "epoch": 1.35, "learning_rate": 1.2100259607856497e-05, "loss": 0.4016, "step": 4065 }, { "epoch": 1.35, "learning_rate": 1.2096755969066882e-05, "loss": 0.4084, "step": 4066 }, { "epoch": 1.35, "learning_rate": 1.2093252061033532e-05, "loss": 0.3864, "step": 4067 }, { "epoch": 1.35, "learning_rate": 1.2089747884206371e-05, "loss": 0.4423, "step": 4068 }, { "epoch": 1.35, "learning_rate": 1.2086243439035376e-05, "loss": 0.4814, "step": 4069 }, { "epoch": 1.35, "learning_rate": 1.2082738725970553e-05, "loss": 0.437, "step": 4070 }, { "epoch": 1.35, "learning_rate": 1.2079233745461938e-05, "loss": 0.4122, "step": 4071 }, { "epoch": 1.35, "learning_rate": 1.2075728497959604e-05, "loss": 0.37, "step": 4072 }, { "epoch": 1.35, "learning_rate": 1.2072222983913664e-05, "loss": 0.3796, "step": 4073 }, { "epoch": 1.35, "learning_rate": 1.2068717203774252e-05, "loss": 0.4211, "step": 4074 }, { "epoch": 1.35, "learning_rate": 1.206521115799155e-05, "loss": 0.416, "step": 4075 }, { "epoch": 1.35, "learning_rate": 1.2061704847015765e-05, "loss": 0.4778, "step": 4076 }, { "epoch": 1.35, "learning_rate": 1.2058198271297144e-05, "loss": 0.5161, "step": 4077 }, { "epoch": 1.35, "learning_rate": 1.2054691431285962e-05, "loss": 0.4031, "step": 4078 }, { "epoch": 1.35, "learning_rate": 1.2051184327432531e-05, "loss": 0.4456, "step": 4079 }, { "epoch": 1.35, "learning_rate": 1.2047676960187202e-05, "loss": 0.4236, "step": 4080 }, { "epoch": 1.35, "learning_rate": 1.2044169330000347e-05, "loss": 0.4021, "step": 4081 }, { "epoch": 1.35, "learning_rate": 1.2040661437322389e-05, "loss": 0.3632, "step": 4082 }, { "epoch": 1.36, "learning_rate": 1.2037153282603768e-05, "loss": 0.4272, "step": 4083 }, { "epoch": 1.36, "learning_rate": 1.203364486629497e-05, "loss": 0.4121, "step": 4084 }, { "epoch": 1.36, "learning_rate": 1.2030136188846506e-05, "loss": 0.4224, "step": 4085 }, { "epoch": 1.36, "learning_rate": 1.2026627250708926e-05, "loss": 0.3351, "step": 4086 }, { "epoch": 1.36, "learning_rate": 1.2023118052332812e-05, "loss": 0.341, "step": 4087 }, { "epoch": 1.36, "learning_rate": 1.2019608594168775e-05, "loss": 0.4778, "step": 4088 }, { "epoch": 1.36, "learning_rate": 1.2016098876667465e-05, "loss": 0.3933, "step": 4089 }, { "epoch": 1.36, "learning_rate": 1.2012588900279567e-05, "loss": 0.364, "step": 4090 }, { "epoch": 1.36, "learning_rate": 1.2009078665455795e-05, "loss": 0.3389, "step": 4091 }, { "epoch": 1.36, "learning_rate": 1.2005568172646894e-05, "loss": 0.4321, "step": 4092 }, { "epoch": 1.36, "learning_rate": 1.2002057422303648e-05, "loss": 0.3273, "step": 4093 }, { "epoch": 1.36, "learning_rate": 1.1998546414876871e-05, "loss": 0.3361, "step": 4094 }, { "epoch": 1.36, "learning_rate": 1.199503515081741e-05, "loss": 0.4705, "step": 4095 }, { "epoch": 1.36, "learning_rate": 1.1991523630576142e-05, "loss": 0.3135, "step": 4096 }, { "epoch": 1.36, "learning_rate": 1.1988011854603985e-05, "loss": 0.4546, "step": 4097 }, { "epoch": 1.36, "learning_rate": 1.1984499823351886e-05, "loss": 0.4277, "step": 4098 }, { "epoch": 1.36, "learning_rate": 1.1980987537270817e-05, "loss": 0.3787, "step": 4099 }, { "epoch": 1.36, "learning_rate": 1.1977474996811793e-05, "loss": 0.4271, "step": 4100 }, { "epoch": 1.36, "learning_rate": 1.1973962202425862e-05, "loss": 0.4172, "step": 4101 }, { "epoch": 1.36, "learning_rate": 1.1970449154564094e-05, "loss": 0.3778, "step": 4102 }, { "epoch": 1.36, "learning_rate": 1.1966935853677602e-05, "loss": 0.3459, "step": 4103 }, { "epoch": 1.36, "learning_rate": 1.1963422300217528e-05, "loss": 0.355, "step": 4104 }, { "epoch": 1.36, "learning_rate": 1.1959908494635048e-05, "loss": 0.4001, "step": 4105 }, { "epoch": 1.36, "learning_rate": 1.1956394437381364e-05, "loss": 0.4382, "step": 4106 }, { "epoch": 1.36, "learning_rate": 1.1952880128907717e-05, "loss": 0.4277, "step": 4107 }, { "epoch": 1.36, "learning_rate": 1.194936556966538e-05, "loss": 0.4294, "step": 4108 }, { "epoch": 1.36, "learning_rate": 1.1945850760105652e-05, "loss": 0.3803, "step": 4109 }, { "epoch": 1.36, "learning_rate": 1.1942335700679872e-05, "loss": 0.3942, "step": 4110 }, { "epoch": 1.36, "learning_rate": 1.1938820391839408e-05, "loss": 0.4585, "step": 4111 }, { "epoch": 1.36, "learning_rate": 1.1935304834035662e-05, "loss": 0.4692, "step": 4112 }, { "epoch": 1.37, "learning_rate": 1.1931789027720058e-05, "loss": 0.3777, "step": 4113 }, { "epoch": 1.37, "learning_rate": 1.1928272973344065e-05, "loss": 0.4376, "step": 4114 }, { "epoch": 1.37, "learning_rate": 1.1924756671359177e-05, "loss": 0.3774, "step": 4115 }, { "epoch": 1.37, "learning_rate": 1.192124012221692e-05, "loss": 0.4485, "step": 4116 }, { "epoch": 1.37, "learning_rate": 1.1917723326368854e-05, "loss": 0.3839, "step": 4117 }, { "epoch": 1.37, "learning_rate": 1.1914206284266571e-05, "loss": 0.4727, "step": 4118 }, { "epoch": 1.37, "learning_rate": 1.1910688996361693e-05, "loss": 0.3694, "step": 4119 }, { "epoch": 1.37, "learning_rate": 1.1907171463105871e-05, "loss": 0.5015, "step": 4120 }, { "epoch": 1.37, "learning_rate": 1.1903653684950791e-05, "loss": 0.3936, "step": 4121 }, { "epoch": 1.37, "learning_rate": 1.1900135662348173e-05, "loss": 0.4419, "step": 4122 }, { "epoch": 1.37, "learning_rate": 1.1896617395749765e-05, "loss": 0.3809, "step": 4123 }, { "epoch": 1.37, "learning_rate": 1.1893098885607342e-05, "loss": 0.4167, "step": 4124 }, { "epoch": 1.37, "learning_rate": 1.1889580132372717e-05, "loss": 0.4297, "step": 4125 }, { "epoch": 1.37, "learning_rate": 1.1886061136497731e-05, "loss": 0.4041, "step": 4126 }, { "epoch": 1.37, "learning_rate": 1.1882541898434256e-05, "loss": 0.4346, "step": 4127 }, { "epoch": 1.37, "learning_rate": 1.1879022418634202e-05, "loss": 0.45, "step": 4128 }, { "epoch": 1.37, "learning_rate": 1.1875502697549502e-05, "loss": 0.4399, "step": 4129 }, { "epoch": 1.37, "learning_rate": 1.1871982735632118e-05, "loss": 0.4678, "step": 4130 }, { "epoch": 1.37, "learning_rate": 1.1868462533334049e-05, "loss": 0.4351, "step": 4131 }, { "epoch": 1.37, "learning_rate": 1.1864942091107325e-05, "loss": 0.4043, "step": 4132 }, { "epoch": 1.37, "learning_rate": 1.1861421409404006e-05, "loss": 0.4309, "step": 4133 }, { "epoch": 1.37, "learning_rate": 1.1857900488676178e-05, "loss": 0.4047, "step": 4134 }, { "epoch": 1.37, "learning_rate": 1.185437932937596e-05, "loss": 0.4087, "step": 4135 }, { "epoch": 1.37, "learning_rate": 1.1850857931955511e-05, "loss": 0.3877, "step": 4136 }, { "epoch": 1.37, "learning_rate": 1.1847336296867002e-05, "loss": 0.4381, "step": 4137 }, { "epoch": 1.37, "learning_rate": 1.1843814424562649e-05, "loss": 0.4277, "step": 4138 }, { "epoch": 1.37, "learning_rate": 1.1840292315494701e-05, "loss": 0.4294, "step": 4139 }, { "epoch": 1.37, "learning_rate": 1.183676997011542e-05, "loss": 0.3676, "step": 4140 }, { "epoch": 1.37, "learning_rate": 1.1833247388877116e-05, "loss": 0.4108, "step": 4141 }, { "epoch": 1.37, "learning_rate": 1.1829724572232124e-05, "loss": 0.421, "step": 4142 }, { "epoch": 1.38, "learning_rate": 1.1826201520632802e-05, "loss": 0.3887, "step": 4143 }, { "epoch": 1.38, "learning_rate": 1.1822678234531549e-05, "loss": 0.4099, "step": 4144 }, { "epoch": 1.38, "learning_rate": 1.1819154714380782e-05, "loss": 0.3997, "step": 4145 }, { "epoch": 1.38, "learning_rate": 1.1815630960632964e-05, "loss": 0.4148, "step": 4146 }, { "epoch": 1.38, "learning_rate": 1.1812106973740573e-05, "loss": 0.3864, "step": 4147 }, { "epoch": 1.38, "learning_rate": 1.1808582754156123e-05, "loss": 0.4105, "step": 4148 }, { "epoch": 1.38, "learning_rate": 1.1805058302332163e-05, "loss": 0.3704, "step": 4149 }, { "epoch": 1.38, "learning_rate": 1.1801533618721261e-05, "loss": 0.4119, "step": 4150 }, { "epoch": 1.38, "learning_rate": 1.1798008703776025e-05, "loss": 0.4392, "step": 4151 }, { "epoch": 1.38, "learning_rate": 1.1794483557949083e-05, "loss": 0.4558, "step": 4152 }, { "epoch": 1.38, "learning_rate": 1.1790958181693102e-05, "loss": 0.4651, "step": 4153 }, { "epoch": 1.38, "learning_rate": 1.1787432575460773e-05, "loss": 0.3408, "step": 4154 }, { "epoch": 1.38, "learning_rate": 1.1783906739704819e-05, "loss": 0.3669, "step": 4155 }, { "epoch": 1.38, "learning_rate": 1.1780380674877988e-05, "loss": 0.4365, "step": 4156 }, { "epoch": 1.38, "learning_rate": 1.1776854381433065e-05, "loss": 0.5114, "step": 4157 }, { "epoch": 1.38, "learning_rate": 1.1773327859822857e-05, "loss": 0.4248, "step": 4158 }, { "epoch": 1.38, "learning_rate": 1.1769801110500205e-05, "loss": 0.4231, "step": 4159 }, { "epoch": 1.38, "learning_rate": 1.176627413391798e-05, "loss": 0.4148, "step": 4160 }, { "epoch": 1.38, "learning_rate": 1.1762746930529077e-05, "loss": 0.4327, "step": 4161 }, { "epoch": 1.38, "learning_rate": 1.1759219500786424e-05, "loss": 0.3468, "step": 4162 }, { "epoch": 1.38, "learning_rate": 1.1755691845142977e-05, "loss": 0.4557, "step": 4163 }, { "epoch": 1.38, "learning_rate": 1.1752163964051716e-05, "loss": 0.4524, "step": 4164 }, { "epoch": 1.38, "learning_rate": 1.1748635857965663e-05, "loss": 0.4073, "step": 4165 }, { "epoch": 1.38, "learning_rate": 1.1745107527337855e-05, "loss": 0.3578, "step": 4166 }, { "epoch": 1.38, "learning_rate": 1.174157897262137e-05, "loss": 0.3599, "step": 4167 }, { "epoch": 1.38, "learning_rate": 1.1738050194269304e-05, "loss": 0.417, "step": 4168 }, { "epoch": 1.38, "learning_rate": 1.1734521192734785e-05, "loss": 0.4109, "step": 4169 }, { "epoch": 1.38, "learning_rate": 1.1730991968470977e-05, "loss": 0.4265, "step": 4170 }, { "epoch": 1.38, "learning_rate": 1.1727462521931064e-05, "loss": 0.3661, "step": 4171 }, { "epoch": 1.38, "learning_rate": 1.1723932853568254e-05, "loss": 0.4541, "step": 4172 }, { "epoch": 1.38, "learning_rate": 1.17204029638358e-05, "loss": 0.4639, "step": 4173 }, { "epoch": 1.39, "learning_rate": 1.171687285318697e-05, "loss": 0.3013, "step": 4174 }, { "epoch": 1.39, "learning_rate": 1.1713342522075064e-05, "loss": 0.4204, "step": 4175 }, { "epoch": 1.39, "learning_rate": 1.170981197095341e-05, "loss": 0.3915, "step": 4176 }, { "epoch": 1.39, "learning_rate": 1.1706281200275368e-05, "loss": 0.4536, "step": 4177 }, { "epoch": 1.39, "learning_rate": 1.170275021049432e-05, "loss": 0.4443, "step": 4178 }, { "epoch": 1.39, "learning_rate": 1.169921900206368e-05, "loss": 0.4045, "step": 4179 }, { "epoch": 1.39, "learning_rate": 1.1695687575436895e-05, "loss": 0.3884, "step": 4180 }, { "epoch": 1.39, "learning_rate": 1.1692155931067423e-05, "loss": 0.3574, "step": 4181 }, { "epoch": 1.39, "learning_rate": 1.1688624069408769e-05, "loss": 0.3356, "step": 4182 }, { "epoch": 1.39, "learning_rate": 1.168509199091446e-05, "loss": 0.4934, "step": 4183 }, { "epoch": 1.39, "learning_rate": 1.1681559696038043e-05, "loss": 0.3945, "step": 4184 }, { "epoch": 1.39, "learning_rate": 1.1678027185233101e-05, "loss": 0.4272, "step": 4185 }, { "epoch": 1.39, "learning_rate": 1.167449445895324e-05, "loss": 0.4028, "step": 4186 }, { "epoch": 1.39, "learning_rate": 1.1670961517652104e-05, "loss": 0.4419, "step": 4187 }, { "epoch": 1.39, "learning_rate": 1.166742836178335e-05, "loss": 0.3875, "step": 4188 }, { "epoch": 1.39, "learning_rate": 1.1663894991800675e-05, "loss": 0.3648, "step": 4189 }, { "epoch": 1.39, "learning_rate": 1.166036140815779e-05, "loss": 0.444, "step": 4190 }, { "epoch": 1.39, "learning_rate": 1.1656827611308447e-05, "loss": 0.442, "step": 4191 }, { "epoch": 1.39, "learning_rate": 1.1653293601706419e-05, "loss": 0.4437, "step": 4192 }, { "epoch": 1.39, "learning_rate": 1.1649759379805505e-05, "loss": 0.4341, "step": 4193 }, { "epoch": 1.39, "learning_rate": 1.1646224946059536e-05, "loss": 0.4091, "step": 4194 }, { "epoch": 1.39, "learning_rate": 1.1642690300922363e-05, "loss": 0.3547, "step": 4195 }, { "epoch": 1.39, "learning_rate": 1.1639155444847874e-05, "loss": 0.4387, "step": 4196 }, { "epoch": 1.39, "learning_rate": 1.1635620378289975e-05, "loss": 0.3488, "step": 4197 }, { "epoch": 1.39, "learning_rate": 1.1632085101702608e-05, "loss": 0.3875, "step": 4198 }, { "epoch": 1.39, "learning_rate": 1.162854961553973e-05, "loss": 0.3202, "step": 4199 }, { "epoch": 1.39, "learning_rate": 1.1625013920255334e-05, "loss": 0.3419, "step": 4200 }, { "epoch": 1.39, "learning_rate": 1.162147801630344e-05, "loss": 0.4656, "step": 4201 }, { "epoch": 1.39, "learning_rate": 1.1617941904138087e-05, "loss": 0.4373, "step": 4202 }, { "epoch": 1.39, "learning_rate": 1.1614405584213349e-05, "loss": 0.4034, "step": 4203 }, { "epoch": 1.4, "learning_rate": 1.1610869056983324e-05, "loss": 0.4058, "step": 4204 }, { "epoch": 1.4, "learning_rate": 1.1607332322902138e-05, "loss": 0.4329, "step": 4205 }, { "epoch": 1.4, "learning_rate": 1.160379538242394e-05, "loss": 0.4039, "step": 4206 }, { "epoch": 1.4, "learning_rate": 1.1600258236002906e-05, "loss": 0.3073, "step": 4207 }, { "epoch": 1.4, "learning_rate": 1.1596720884093242e-05, "loss": 0.4387, "step": 4208 }, { "epoch": 1.4, "learning_rate": 1.1593183327149177e-05, "loss": 0.3483, "step": 4209 }, { "epoch": 1.4, "learning_rate": 1.1589645565624968e-05, "loss": 0.4005, "step": 4210 }, { "epoch": 1.4, "learning_rate": 1.1586107599974897e-05, "loss": 0.4229, "step": 4211 }, { "epoch": 1.4, "learning_rate": 1.1582569430653275e-05, "loss": 0.4269, "step": 4212 }, { "epoch": 1.4, "learning_rate": 1.1579031058114431e-05, "loss": 0.4016, "step": 4213 }, { "epoch": 1.4, "learning_rate": 1.1575492482812733e-05, "loss": 0.3616, "step": 4214 }, { "epoch": 1.4, "learning_rate": 1.157195370520257e-05, "loss": 0.4514, "step": 4215 }, { "epoch": 1.4, "learning_rate": 1.1568414725738348e-05, "loss": 0.3398, "step": 4216 }, { "epoch": 1.4, "learning_rate": 1.156487554487451e-05, "loss": 0.4014, "step": 4217 }, { "epoch": 1.4, "learning_rate": 1.1561336163065522e-05, "loss": 0.4878, "step": 4218 }, { "epoch": 1.4, "learning_rate": 1.1557796580765874e-05, "loss": 0.4614, "step": 4219 }, { "epoch": 1.4, "learning_rate": 1.1554256798430082e-05, "loss": 0.3546, "step": 4220 }, { "epoch": 1.4, "learning_rate": 1.1550716816512686e-05, "loss": 0.4052, "step": 4221 }, { "epoch": 1.4, "learning_rate": 1.154717663546826e-05, "loss": 0.3372, "step": 4222 }, { "epoch": 1.4, "learning_rate": 1.154363625575139e-05, "loss": 0.467, "step": 4223 }, { "epoch": 1.4, "learning_rate": 1.1540095677816701e-05, "loss": 0.4227, "step": 4224 }, { "epoch": 1.4, "learning_rate": 1.1536554902118834e-05, "loss": 0.4041, "step": 4225 }, { "epoch": 1.4, "learning_rate": 1.1533013929112463e-05, "loss": 0.465, "step": 4226 }, { "epoch": 1.4, "learning_rate": 1.1529472759252275e-05, "loss": 0.4011, "step": 4227 }, { "epoch": 1.4, "learning_rate": 1.1525931392993002e-05, "loss": 0.4246, "step": 4228 }, { "epoch": 1.4, "learning_rate": 1.152238983078938e-05, "loss": 0.4216, "step": 4229 }, { "epoch": 1.4, "learning_rate": 1.1518848073096182e-05, "loss": 0.4504, "step": 4230 }, { "epoch": 1.4, "learning_rate": 1.1515306120368208e-05, "loss": 0.3222, "step": 4231 }, { "epoch": 1.4, "learning_rate": 1.1511763973060273e-05, "loss": 0.3994, "step": 4232 }, { "epoch": 1.4, "learning_rate": 1.150822163162723e-05, "loss": 0.4052, "step": 4233 }, { "epoch": 1.41, "learning_rate": 1.1504679096523944e-05, "loss": 0.3947, "step": 4234 }, { "epoch": 1.41, "learning_rate": 1.1501136368205313e-05, "loss": 0.458, "step": 4235 }, { "epoch": 1.41, "learning_rate": 1.1497593447126256e-05, "loss": 0.4114, "step": 4236 }, { "epoch": 1.41, "learning_rate": 1.1494050333741722e-05, "loss": 0.3385, "step": 4237 }, { "epoch": 1.41, "learning_rate": 1.1490507028506677e-05, "loss": 0.4198, "step": 4238 }, { "epoch": 1.41, "learning_rate": 1.1486963531876119e-05, "loss": 0.4053, "step": 4239 }, { "epoch": 1.41, "learning_rate": 1.1483419844305064e-05, "loss": 0.386, "step": 4240 }, { "epoch": 1.41, "learning_rate": 1.1479875966248557e-05, "loss": 0.4797, "step": 4241 }, { "epoch": 1.41, "learning_rate": 1.1476331898161666e-05, "loss": 0.4114, "step": 4242 }, { "epoch": 1.41, "learning_rate": 1.1472787640499488e-05, "loss": 0.3508, "step": 4243 }, { "epoch": 1.41, "learning_rate": 1.1469243193717133e-05, "loss": 0.3695, "step": 4244 }, { "epoch": 1.41, "learning_rate": 1.1465698558269743e-05, "loss": 0.3212, "step": 4245 }, { "epoch": 1.41, "learning_rate": 1.146215373461249e-05, "loss": 0.3854, "step": 4246 }, { "epoch": 1.41, "learning_rate": 1.145860872320056e-05, "loss": 0.3606, "step": 4247 }, { "epoch": 1.41, "learning_rate": 1.1455063524489162e-05, "loss": 0.4041, "step": 4248 }, { "epoch": 1.41, "learning_rate": 1.1451518138933539e-05, "loss": 0.3779, "step": 4249 }, { "epoch": 1.41, "learning_rate": 1.1447972566988953e-05, "loss": 0.4519, "step": 4250 }, { "epoch": 1.41, "learning_rate": 1.1444426809110689e-05, "loss": 0.4249, "step": 4251 }, { "epoch": 1.41, "learning_rate": 1.1440880865754054e-05, "loss": 0.4097, "step": 4252 }, { "epoch": 1.41, "learning_rate": 1.1437334737374386e-05, "loss": 0.4363, "step": 4253 }, { "epoch": 1.41, "learning_rate": 1.1433788424427038e-05, "loss": 0.4463, "step": 4254 }, { "epoch": 1.41, "learning_rate": 1.1430241927367392e-05, "loss": 0.4622, "step": 4255 }, { "epoch": 1.41, "learning_rate": 1.1426695246650856e-05, "loss": 0.3702, "step": 4256 }, { "epoch": 1.41, "learning_rate": 1.1423148382732854e-05, "loss": 0.3833, "step": 4257 }, { "epoch": 1.41, "learning_rate": 1.1419601336068837e-05, "loss": 0.3506, "step": 4258 }, { "epoch": 1.41, "learning_rate": 1.1416054107114282e-05, "loss": 0.3884, "step": 4259 }, { "epoch": 1.41, "learning_rate": 1.141250669632469e-05, "loss": 0.4683, "step": 4260 }, { "epoch": 1.41, "learning_rate": 1.1408959104155577e-05, "loss": 0.4573, "step": 4261 }, { "epoch": 1.41, "learning_rate": 1.1405411331062493e-05, "loss": 0.4104, "step": 4262 }, { "epoch": 1.41, "learning_rate": 1.1401863377501003e-05, "loss": 0.3816, "step": 4263 }, { "epoch": 1.42, "learning_rate": 1.1398315243926703e-05, "loss": 0.3975, "step": 4264 }, { "epoch": 1.42, "learning_rate": 1.1394766930795202e-05, "loss": 0.4363, "step": 4265 }, { "epoch": 1.42, "learning_rate": 1.1391218438562143e-05, "loss": 0.3776, "step": 4266 }, { "epoch": 1.42, "learning_rate": 1.1387669767683185e-05, "loss": 0.5273, "step": 4267 }, { "epoch": 1.42, "learning_rate": 1.1384120918614007e-05, "loss": 0.4019, "step": 4268 }, { "epoch": 1.42, "learning_rate": 1.138057189181032e-05, "loss": 0.3632, "step": 4269 }, { "epoch": 1.42, "learning_rate": 1.1377022687727857e-05, "loss": 0.384, "step": 4270 }, { "epoch": 1.42, "learning_rate": 1.1373473306822364e-05, "loss": 0.4393, "step": 4271 }, { "epoch": 1.42, "learning_rate": 1.1369923749549617e-05, "loss": 0.46, "step": 4272 }, { "epoch": 1.42, "learning_rate": 1.1366374016365414e-05, "loss": 0.3568, "step": 4273 }, { "epoch": 1.42, "learning_rate": 1.1362824107725579e-05, "loss": 0.364, "step": 4274 }, { "epoch": 1.42, "learning_rate": 1.135927402408595e-05, "loss": 0.4363, "step": 4275 }, { "epoch": 1.42, "learning_rate": 1.1355723765902388e-05, "loss": 0.3962, "step": 4276 }, { "epoch": 1.42, "learning_rate": 1.1352173333630791e-05, "loss": 0.415, "step": 4277 }, { "epoch": 1.42, "learning_rate": 1.1348622727727061e-05, "loss": 0.3761, "step": 4278 }, { "epoch": 1.42, "learning_rate": 1.1345071948647131e-05, "loss": 0.3843, "step": 4279 }, { "epoch": 1.42, "learning_rate": 1.1341520996846958e-05, "loss": 0.3461, "step": 4280 }, { "epoch": 1.42, "learning_rate": 1.133796987278252e-05, "loss": 0.4436, "step": 4281 }, { "epoch": 1.42, "learning_rate": 1.1334418576909809e-05, "loss": 0.4143, "step": 4282 }, { "epoch": 1.42, "learning_rate": 1.1330867109684851e-05, "loss": 0.3577, "step": 4283 }, { "epoch": 1.42, "learning_rate": 1.132731547156369e-05, "loss": 0.3765, "step": 4284 }, { "epoch": 1.42, "learning_rate": 1.1323763663002387e-05, "loss": 0.3862, "step": 4285 }, { "epoch": 1.42, "learning_rate": 1.1320211684457027e-05, "loss": 0.3649, "step": 4286 }, { "epoch": 1.42, "learning_rate": 1.1316659536383723e-05, "loss": 0.3887, "step": 4287 }, { "epoch": 1.42, "learning_rate": 1.1313107219238606e-05, "loss": 0.4141, "step": 4288 }, { "epoch": 1.42, "learning_rate": 1.1309554733477822e-05, "loss": 0.4241, "step": 4289 }, { "epoch": 1.42, "learning_rate": 1.1306002079557548e-05, "loss": 0.4001, "step": 4290 }, { "epoch": 1.42, "learning_rate": 1.130244925793398e-05, "loss": 0.391, "step": 4291 }, { "epoch": 1.42, "learning_rate": 1.1298896269063333e-05, "loss": 0.4453, "step": 4292 }, { "epoch": 1.42, "learning_rate": 1.1295343113401844e-05, "loss": 0.3451, "step": 4293 }, { "epoch": 1.43, "learning_rate": 1.1291789791405775e-05, "loss": 0.3489, "step": 4294 }, { "epoch": 1.43, "learning_rate": 1.128823630353141e-05, "loss": 0.4041, "step": 4295 }, { "epoch": 1.43, "learning_rate": 1.1284682650235042e-05, "loss": 0.3992, "step": 4296 }, { "epoch": 1.43, "learning_rate": 1.1281128831973e-05, "loss": 0.3199, "step": 4297 }, { "epoch": 1.43, "learning_rate": 1.1277574849201632e-05, "loss": 0.3444, "step": 4298 }, { "epoch": 1.43, "learning_rate": 1.1274020702377298e-05, "loss": 0.3943, "step": 4299 }, { "epoch": 1.43, "learning_rate": 1.1270466391956385e-05, "loss": 0.4058, "step": 4300 }, { "epoch": 1.43, "learning_rate": 1.1266911918395303e-05, "loss": 0.3384, "step": 4301 }, { "epoch": 1.43, "learning_rate": 1.1263357282150485e-05, "loss": 0.4258, "step": 4302 }, { "epoch": 1.43, "learning_rate": 1.125980248367837e-05, "loss": 0.3921, "step": 4303 }, { "epoch": 1.43, "learning_rate": 1.1256247523435439e-05, "loss": 0.4019, "step": 4304 }, { "epoch": 1.43, "learning_rate": 1.125269240187818e-05, "loss": 0.4358, "step": 4305 }, { "epoch": 1.43, "learning_rate": 1.1249137119463101e-05, "loss": 0.3853, "step": 4306 }, { "epoch": 1.43, "learning_rate": 1.1245581676646738e-05, "loss": 0.3992, "step": 4307 }, { "epoch": 1.43, "learning_rate": 1.1242026073885642e-05, "loss": 0.436, "step": 4308 }, { "epoch": 1.43, "learning_rate": 1.1238470311636392e-05, "loss": 0.427, "step": 4309 }, { "epoch": 1.43, "learning_rate": 1.1234914390355576e-05, "loss": 0.3358, "step": 4310 }, { "epoch": 1.43, "learning_rate": 1.123135831049981e-05, "loss": 0.3552, "step": 4311 }, { "epoch": 1.43, "learning_rate": 1.1227802072525736e-05, "loss": 0.342, "step": 4312 }, { "epoch": 1.43, "learning_rate": 1.1224245676890001e-05, "loss": 0.4236, "step": 4313 }, { "epoch": 1.43, "learning_rate": 1.1220689124049284e-05, "loss": 0.3391, "step": 4314 }, { "epoch": 1.43, "learning_rate": 1.1217132414460281e-05, "loss": 0.4084, "step": 4315 }, { "epoch": 1.43, "learning_rate": 1.1213575548579705e-05, "loss": 0.4067, "step": 4316 }, { "epoch": 1.43, "learning_rate": 1.1210018526864292e-05, "loss": 0.3641, "step": 4317 }, { "epoch": 1.43, "learning_rate": 1.1206461349770804e-05, "loss": 0.3465, "step": 4318 }, { "epoch": 1.43, "learning_rate": 1.1202904017756013e-05, "loss": 0.3553, "step": 4319 }, { "epoch": 1.43, "learning_rate": 1.1199346531276714e-05, "loss": 0.4331, "step": 4320 }, { "epoch": 1.43, "learning_rate": 1.119578889078972e-05, "loss": 0.4424, "step": 4321 }, { "epoch": 1.43, "learning_rate": 1.1192231096751875e-05, "loss": 0.373, "step": 4322 }, { "epoch": 1.43, "learning_rate": 1.1188673149620032e-05, "loss": 0.3682, "step": 4323 }, { "epoch": 1.44, "learning_rate": 1.1185115049851054e-05, "loss": 0.4536, "step": 4324 }, { "epoch": 1.44, "learning_rate": 1.118155679790185e-05, "loss": 0.4741, "step": 4325 }, { "epoch": 1.44, "learning_rate": 1.1177998394229329e-05, "loss": 0.4141, "step": 4326 }, { "epoch": 1.44, "learning_rate": 1.1174439839290418e-05, "loss": 0.3958, "step": 4327 }, { "epoch": 1.44, "learning_rate": 1.117088113354208e-05, "loss": 0.4076, "step": 4328 }, { "epoch": 1.44, "learning_rate": 1.1167322277441282e-05, "loss": 0.3915, "step": 4329 }, { "epoch": 1.44, "learning_rate": 1.1163763271445017e-05, "loss": 0.3955, "step": 4330 }, { "epoch": 1.44, "learning_rate": 1.1160204116010291e-05, "loss": 0.4751, "step": 4331 }, { "epoch": 1.44, "learning_rate": 1.1156644811594144e-05, "loss": 0.413, "step": 4332 }, { "epoch": 1.44, "learning_rate": 1.1153085358653619e-05, "loss": 0.3818, "step": 4333 }, { "epoch": 1.44, "learning_rate": 1.1149525757645781e-05, "loss": 0.4138, "step": 4334 }, { "epoch": 1.44, "learning_rate": 1.114596600902772e-05, "loss": 0.4261, "step": 4335 }, { "epoch": 1.44, "learning_rate": 1.1142406113256545e-05, "loss": 0.4724, "step": 4336 }, { "epoch": 1.44, "learning_rate": 1.1138846070789378e-05, "loss": 0.459, "step": 4337 }, { "epoch": 1.44, "learning_rate": 1.1135285882083364e-05, "loss": 0.4098, "step": 4338 }, { "epoch": 1.44, "learning_rate": 1.1131725547595664e-05, "loss": 0.3187, "step": 4339 }, { "epoch": 1.44, "learning_rate": 1.1128165067783463e-05, "loss": 0.3945, "step": 4340 }, { "epoch": 1.44, "learning_rate": 1.1124604443103958e-05, "loss": 0.3809, "step": 4341 }, { "epoch": 1.44, "learning_rate": 1.1121043674014367e-05, "loss": 0.4209, "step": 4342 }, { "epoch": 1.44, "learning_rate": 1.1117482760971932e-05, "loss": 0.3801, "step": 4343 }, { "epoch": 1.44, "learning_rate": 1.1113921704433901e-05, "loss": 0.3916, "step": 4344 }, { "epoch": 1.44, "learning_rate": 1.111036050485755e-05, "loss": 0.3719, "step": 4345 }, { "epoch": 1.44, "learning_rate": 1.110679916270018e-05, "loss": 0.4219, "step": 4346 }, { "epoch": 1.44, "learning_rate": 1.1103237678419094e-05, "loss": 0.4279, "step": 4347 }, { "epoch": 1.44, "learning_rate": 1.109967605247162e-05, "loss": 0.3822, "step": 4348 }, { "epoch": 1.44, "learning_rate": 1.1096114285315107e-05, "loss": 0.4417, "step": 4349 }, { "epoch": 1.44, "learning_rate": 1.1092552377406927e-05, "loss": 0.446, "step": 4350 }, { "epoch": 1.44, "learning_rate": 1.1088990329204452e-05, "loss": 0.3618, "step": 4351 }, { "epoch": 1.44, "learning_rate": 1.108542814116509e-05, "loss": 0.3452, "step": 4352 }, { "epoch": 1.44, "learning_rate": 1.1081865813746259e-05, "loss": 0.3921, "step": 4353 }, { "epoch": 1.45, "learning_rate": 1.1078303347405402e-05, "loss": 0.3457, "step": 4354 }, { "epoch": 1.45, "learning_rate": 1.1074740742599963e-05, "loss": 0.425, "step": 4355 }, { "epoch": 1.45, "learning_rate": 1.107117799978742e-05, "loss": 0.4436, "step": 4356 }, { "epoch": 1.45, "learning_rate": 1.1067615119425267e-05, "loss": 0.4146, "step": 4357 }, { "epoch": 1.45, "learning_rate": 1.1064052101971008e-05, "loss": 0.3934, "step": 4358 }, { "epoch": 1.45, "learning_rate": 1.1060488947882171e-05, "loss": 0.3538, "step": 4359 }, { "epoch": 1.45, "learning_rate": 1.10569256576163e-05, "loss": 0.2985, "step": 4360 }, { "epoch": 1.45, "learning_rate": 1.1053362231630955e-05, "loss": 0.479, "step": 4361 }, { "epoch": 1.45, "learning_rate": 1.1049798670383711e-05, "loss": 0.3367, "step": 4362 }, { "epoch": 1.45, "learning_rate": 1.1046234974332168e-05, "loss": 0.4197, "step": 4363 }, { "epoch": 1.45, "learning_rate": 1.1042671143933941e-05, "loss": 0.3962, "step": 4364 }, { "epoch": 1.45, "learning_rate": 1.1039107179646654e-05, "loss": 0.413, "step": 4365 }, { "epoch": 1.45, "learning_rate": 1.1035543081927958e-05, "loss": 0.3813, "step": 4366 }, { "epoch": 1.45, "learning_rate": 1.1031978851235518e-05, "loss": 0.3771, "step": 4367 }, { "epoch": 1.45, "learning_rate": 1.1028414488027015e-05, "loss": 0.4604, "step": 4368 }, { "epoch": 1.45, "learning_rate": 1.1024849992760147e-05, "loss": 0.4119, "step": 4369 }, { "epoch": 1.45, "learning_rate": 1.102128536589263e-05, "loss": 0.4485, "step": 4370 }, { "epoch": 1.45, "learning_rate": 1.1017720607882201e-05, "loss": 0.3761, "step": 4371 }, { "epoch": 1.45, "learning_rate": 1.1014155719186601e-05, "loss": 0.4639, "step": 4372 }, { "epoch": 1.45, "learning_rate": 1.10105907002636e-05, "loss": 0.326, "step": 4373 }, { "epoch": 1.45, "learning_rate": 1.1007025551570984e-05, "loss": 0.384, "step": 4374 }, { "epoch": 1.45, "learning_rate": 1.1003460273566546e-05, "loss": 0.3772, "step": 4375 }, { "epoch": 1.45, "learning_rate": 1.099989486670811e-05, "loss": 0.4912, "step": 4376 }, { "epoch": 1.45, "learning_rate": 1.0996329331453502e-05, "loss": 0.3684, "step": 4377 }, { "epoch": 1.45, "learning_rate": 1.0992763668260575e-05, "loss": 0.4218, "step": 4378 }, { "epoch": 1.45, "learning_rate": 1.0989197877587194e-05, "loss": 0.4082, "step": 4379 }, { "epoch": 1.45, "learning_rate": 1.0985631959891243e-05, "loss": 0.343, "step": 4380 }, { "epoch": 1.45, "learning_rate": 1.0982065915630616e-05, "loss": 0.3356, "step": 4381 }, { "epoch": 1.45, "learning_rate": 1.0978499745263228e-05, "loss": 0.4218, "step": 4382 }, { "epoch": 1.45, "learning_rate": 1.0974933449247011e-05, "loss": 0.499, "step": 4383 }, { "epoch": 1.46, "learning_rate": 1.0971367028039914e-05, "loss": 0.3238, "step": 4384 }, { "epoch": 1.46, "learning_rate": 1.09678004820999e-05, "loss": 0.4116, "step": 4385 }, { "epoch": 1.46, "learning_rate": 1.0964233811884942e-05, "loss": 0.2971, "step": 4386 }, { "epoch": 1.46, "learning_rate": 1.096066701785304e-05, "loss": 0.3739, "step": 4387 }, { "epoch": 1.46, "learning_rate": 1.0957100100462206e-05, "loss": 0.3816, "step": 4388 }, { "epoch": 1.46, "learning_rate": 1.0953533060170462e-05, "loss": 0.3727, "step": 4389 }, { "epoch": 1.46, "learning_rate": 1.0949965897435857e-05, "loss": 0.3701, "step": 4390 }, { "epoch": 1.46, "learning_rate": 1.094639861271644e-05, "loss": 0.4482, "step": 4391 }, { "epoch": 1.46, "learning_rate": 1.0942831206470294e-05, "loss": 0.36, "step": 4392 }, { "epoch": 1.46, "learning_rate": 1.0939263679155502e-05, "loss": 0.3654, "step": 4393 }, { "epoch": 1.46, "learning_rate": 1.0935696031230174e-05, "loss": 0.4351, "step": 4394 }, { "epoch": 1.46, "learning_rate": 1.0932128263152428e-05, "loss": 0.3812, "step": 4395 }, { "epoch": 1.46, "learning_rate": 1.0928560375380397e-05, "loss": 0.4087, "step": 4396 }, { "epoch": 1.46, "learning_rate": 1.0924992368372236e-05, "loss": 0.4115, "step": 4397 }, { "epoch": 1.46, "learning_rate": 1.0921424242586114e-05, "loss": 0.4333, "step": 4398 }, { "epoch": 1.46, "learning_rate": 1.0917855998480208e-05, "loss": 0.3749, "step": 4399 }, { "epoch": 1.46, "learning_rate": 1.0914287636512714e-05, "loss": 0.3873, "step": 4400 }, { "epoch": 1.46, "learning_rate": 1.091071915714185e-05, "loss": 0.3763, "step": 4401 }, { "epoch": 1.46, "learning_rate": 1.0907150560825842e-05, "loss": 0.3516, "step": 4402 }, { "epoch": 1.46, "learning_rate": 1.0903581848022925e-05, "loss": 0.4954, "step": 4403 }, { "epoch": 1.46, "learning_rate": 1.0900013019191364e-05, "loss": 0.4492, "step": 4404 }, { "epoch": 1.46, "learning_rate": 1.0896444074789431e-05, "loss": 0.4104, "step": 4405 }, { "epoch": 1.46, "learning_rate": 1.0892875015275408e-05, "loss": 0.4558, "step": 4406 }, { "epoch": 1.46, "learning_rate": 1.08893058411076e-05, "loss": 0.4443, "step": 4407 }, { "epoch": 1.46, "learning_rate": 1.0885736552744325e-05, "loss": 0.3998, "step": 4408 }, { "epoch": 1.46, "learning_rate": 1.0882167150643912e-05, "loss": 0.3698, "step": 4409 }, { "epoch": 1.46, "learning_rate": 1.0878597635264705e-05, "loss": 0.4172, "step": 4410 }, { "epoch": 1.46, "learning_rate": 1.0875028007065065e-05, "loss": 0.3764, "step": 4411 }, { "epoch": 1.46, "learning_rate": 1.0871458266503371e-05, "loss": 0.4451, "step": 4412 }, { "epoch": 1.46, "learning_rate": 1.0867888414038007e-05, "loss": 0.3306, "step": 4413 }, { "epoch": 1.46, "learning_rate": 1.086431845012738e-05, "loss": 0.389, "step": 4414 }, { "epoch": 1.47, "learning_rate": 1.0860748375229906e-05, "loss": 0.3633, "step": 4415 }, { "epoch": 1.47, "learning_rate": 1.0857178189804018e-05, "loss": 0.3745, "step": 4416 }, { "epoch": 1.47, "learning_rate": 1.0853607894308163e-05, "loss": 0.4299, "step": 4417 }, { "epoch": 1.47, "learning_rate": 1.08500374892008e-05, "loss": 0.4415, "step": 4418 }, { "epoch": 1.47, "learning_rate": 1.0846466974940403e-05, "loss": 0.4253, "step": 4419 }, { "epoch": 1.47, "learning_rate": 1.0842896351985464e-05, "loss": 0.4036, "step": 4420 }, { "epoch": 1.47, "learning_rate": 1.083932562079448e-05, "loss": 0.405, "step": 4421 }, { "epoch": 1.47, "learning_rate": 1.083575478182597e-05, "loss": 0.3806, "step": 4422 }, { "epoch": 1.47, "learning_rate": 1.0832183835538471e-05, "loss": 0.4136, "step": 4423 }, { "epoch": 1.47, "learning_rate": 1.0828612782390514e-05, "loss": 0.3436, "step": 4424 }, { "epoch": 1.47, "learning_rate": 1.0825041622840666e-05, "loss": 0.4768, "step": 4425 }, { "epoch": 1.47, "learning_rate": 1.0821470357347499e-05, "loss": 0.4297, "step": 4426 }, { "epoch": 1.47, "learning_rate": 1.081789898636959e-05, "loss": 0.3824, "step": 4427 }, { "epoch": 1.47, "learning_rate": 1.0814327510365548e-05, "loss": 0.436, "step": 4428 }, { "epoch": 1.47, "learning_rate": 1.0810755929793975e-05, "loss": 0.3872, "step": 4429 }, { "epoch": 1.47, "learning_rate": 1.0807184245113505e-05, "loss": 0.4218, "step": 4430 }, { "epoch": 1.47, "learning_rate": 1.0803612456782768e-05, "loss": 0.4636, "step": 4431 }, { "epoch": 1.47, "learning_rate": 1.0800040565260423e-05, "loss": 0.4431, "step": 4432 }, { "epoch": 1.47, "learning_rate": 1.0796468571005137e-05, "loss": 0.3003, "step": 4433 }, { "epoch": 1.47, "learning_rate": 1.0792896474475582e-05, "loss": 0.3839, "step": 4434 }, { "epoch": 1.47, "learning_rate": 1.0789324276130453e-05, "loss": 0.3431, "step": 4435 }, { "epoch": 1.47, "learning_rate": 1.0785751976428455e-05, "loss": 0.3873, "step": 4436 }, { "epoch": 1.47, "learning_rate": 1.078217957582831e-05, "loss": 0.4816, "step": 4437 }, { "epoch": 1.47, "learning_rate": 1.0778607074788738e-05, "loss": 0.41, "step": 4438 }, { "epoch": 1.47, "learning_rate": 1.0775034473768491e-05, "loss": 0.3298, "step": 4439 }, { "epoch": 1.47, "learning_rate": 1.0771461773226323e-05, "loss": 0.3767, "step": 4440 }, { "epoch": 1.47, "learning_rate": 1.0767888973621006e-05, "loss": 0.3921, "step": 4441 }, { "epoch": 1.47, "learning_rate": 1.0764316075411316e-05, "loss": 0.4282, "step": 4442 }, { "epoch": 1.47, "learning_rate": 1.0760743079056054e-05, "loss": 0.3894, "step": 4443 }, { "epoch": 1.47, "learning_rate": 1.0757169985014024e-05, "loss": 0.3872, "step": 4444 }, { "epoch": 1.48, "learning_rate": 1.0753596793744049e-05, "loss": 0.3896, "step": 4445 }, { "epoch": 1.48, "learning_rate": 1.0750023505704956e-05, "loss": 0.3896, "step": 4446 }, { "epoch": 1.48, "learning_rate": 1.0746450121355596e-05, "loss": 0.3789, "step": 4447 }, { "epoch": 1.48, "learning_rate": 1.074287664115482e-05, "loss": 0.4026, "step": 4448 }, { "epoch": 1.48, "learning_rate": 1.07393030655615e-05, "loss": 0.4333, "step": 4449 }, { "epoch": 1.48, "learning_rate": 1.073572939503452e-05, "loss": 0.3773, "step": 4450 }, { "epoch": 1.48, "learning_rate": 1.0732155630032768e-05, "loss": 0.3304, "step": 4451 }, { "epoch": 1.48, "learning_rate": 1.0728581771015154e-05, "loss": 0.302, "step": 4452 }, { "epoch": 1.48, "learning_rate": 1.0725007818440599e-05, "loss": 0.4446, "step": 4453 }, { "epoch": 1.48, "learning_rate": 1.0721433772768028e-05, "loss": 0.3674, "step": 4454 }, { "epoch": 1.48, "learning_rate": 1.0717859634456384e-05, "loss": 0.3916, "step": 4455 }, { "epoch": 1.48, "learning_rate": 1.0714285403964625e-05, "loss": 0.3908, "step": 4456 }, { "epoch": 1.48, "learning_rate": 1.0710711081751713e-05, "loss": 0.3965, "step": 4457 }, { "epoch": 1.48, "learning_rate": 1.0707136668276621e-05, "loss": 0.4331, "step": 4458 }, { "epoch": 1.48, "learning_rate": 1.0703562163998346e-05, "loss": 0.3933, "step": 4459 }, { "epoch": 1.48, "learning_rate": 1.0699987569375886e-05, "loss": 0.3693, "step": 4460 }, { "epoch": 1.48, "learning_rate": 1.0696412884868255e-05, "loss": 0.457, "step": 4461 }, { "epoch": 1.48, "learning_rate": 1.0692838110934476e-05, "loss": 0.4109, "step": 4462 }, { "epoch": 1.48, "learning_rate": 1.0689263248033581e-05, "loss": 0.4211, "step": 4463 }, { "epoch": 1.48, "learning_rate": 1.0685688296624623e-05, "loss": 0.3712, "step": 4464 }, { "epoch": 1.48, "learning_rate": 1.0682113257166658e-05, "loss": 0.4053, "step": 4465 }, { "epoch": 1.48, "learning_rate": 1.0678538130118754e-05, "loss": 0.4069, "step": 4466 }, { "epoch": 1.48, "learning_rate": 1.0674962915939992e-05, "loss": 0.3752, "step": 4467 }, { "epoch": 1.48, "learning_rate": 1.067138761508947e-05, "loss": 0.4156, "step": 4468 }, { "epoch": 1.48, "learning_rate": 1.0667812228026281e-05, "loss": 0.3778, "step": 4469 }, { "epoch": 1.48, "learning_rate": 1.0664236755209545e-05, "loss": 0.3652, "step": 4470 }, { "epoch": 1.48, "learning_rate": 1.066066119709839e-05, "loss": 0.3563, "step": 4471 }, { "epoch": 1.48, "learning_rate": 1.0657085554151947e-05, "loss": 0.5115, "step": 4472 }, { "epoch": 1.48, "learning_rate": 1.0653509826829365e-05, "loss": 0.3629, "step": 4473 }, { "epoch": 1.48, "learning_rate": 1.0649934015589805e-05, "loss": 0.4126, "step": 4474 }, { "epoch": 1.49, "learning_rate": 1.0646358120892429e-05, "loss": 0.4023, "step": 4475 }, { "epoch": 1.49, "learning_rate": 1.0642782143196424e-05, "loss": 0.4198, "step": 4476 }, { "epoch": 1.49, "learning_rate": 1.0639206082960973e-05, "loss": 0.402, "step": 4477 }, { "epoch": 1.49, "learning_rate": 1.0635629940645285e-05, "loss": 0.368, "step": 4478 }, { "epoch": 1.49, "learning_rate": 1.0632053716708562e-05, "loss": 0.3076, "step": 4479 }, { "epoch": 1.49, "learning_rate": 1.0628477411610032e-05, "loss": 0.3597, "step": 4480 }, { "epoch": 1.49, "learning_rate": 1.0624901025808926e-05, "loss": 0.4746, "step": 4481 }, { "epoch": 1.49, "learning_rate": 1.0621324559764483e-05, "loss": 0.3522, "step": 4482 }, { "epoch": 1.49, "learning_rate": 1.0617748013935962e-05, "loss": 0.3889, "step": 4483 }, { "epoch": 1.49, "learning_rate": 1.0614171388782622e-05, "loss": 0.3783, "step": 4484 }, { "epoch": 1.49, "learning_rate": 1.0610594684763737e-05, "loss": 0.397, "step": 4485 }, { "epoch": 1.49, "learning_rate": 1.060701790233859e-05, "loss": 0.3188, "step": 4486 }, { "epoch": 1.49, "learning_rate": 1.0603441041966476e-05, "loss": 0.425, "step": 4487 }, { "epoch": 1.49, "learning_rate": 1.0599864104106697e-05, "loss": 0.4328, "step": 4488 }, { "epoch": 1.49, "learning_rate": 1.0596287089218565e-05, "loss": 0.2928, "step": 4489 }, { "epoch": 1.49, "learning_rate": 1.0592709997761406e-05, "loss": 0.3242, "step": 4490 }, { "epoch": 1.49, "learning_rate": 1.0589132830194551e-05, "loss": 0.3274, "step": 4491 }, { "epoch": 1.49, "learning_rate": 1.0585555586977347e-05, "loss": 0.436, "step": 4492 }, { "epoch": 1.49, "learning_rate": 1.0581978268569143e-05, "loss": 0.427, "step": 4493 }, { "epoch": 1.49, "learning_rate": 1.05784008754293e-05, "loss": 0.3842, "step": 4494 }, { "epoch": 1.49, "learning_rate": 1.0574823408017194e-05, "loss": 0.3375, "step": 4495 }, { "epoch": 1.49, "learning_rate": 1.0571245866792204e-05, "loss": 0.3945, "step": 4496 }, { "epoch": 1.49, "learning_rate": 1.056766825221372e-05, "loss": 0.3176, "step": 4497 }, { "epoch": 1.49, "learning_rate": 1.0564090564741142e-05, "loss": 0.3906, "step": 4498 }, { "epoch": 1.49, "learning_rate": 1.0560512804833884e-05, "loss": 0.3865, "step": 4499 }, { "epoch": 1.49, "learning_rate": 1.0556934972951361e-05, "loss": 0.3921, "step": 4500 }, { "epoch": 1.49, "learning_rate": 1.0553357069553001e-05, "loss": 0.4031, "step": 4501 }, { "epoch": 1.49, "learning_rate": 1.0549779095098245e-05, "loss": 0.4039, "step": 4502 }, { "epoch": 1.49, "learning_rate": 1.0546201050046537e-05, "loss": 0.2685, "step": 4503 }, { "epoch": 1.49, "learning_rate": 1.0542622934857331e-05, "loss": 0.4132, "step": 4504 }, { "epoch": 1.5, "learning_rate": 1.0539044749990096e-05, "loss": 0.4089, "step": 4505 }, { "epoch": 1.5, "learning_rate": 1.05354664959043e-05, "loss": 0.4033, "step": 4506 }, { "epoch": 1.5, "learning_rate": 1.0531888173059428e-05, "loss": 0.4415, "step": 4507 }, { "epoch": 1.5, "learning_rate": 1.0528309781914974e-05, "loss": 0.3384, "step": 4508 }, { "epoch": 1.5, "learning_rate": 1.0524731322930434e-05, "loss": 0.4243, "step": 4509 }, { "epoch": 1.5, "learning_rate": 1.0521152796565319e-05, "loss": 0.3904, "step": 4510 }, { "epoch": 1.5, "learning_rate": 1.0517574203279147e-05, "loss": 0.3771, "step": 4511 }, { "epoch": 1.5, "learning_rate": 1.0513995543531441e-05, "loss": 0.3466, "step": 4512 }, { "epoch": 1.5, "learning_rate": 1.0510416817781737e-05, "loss": 0.3724, "step": 4513 }, { "epoch": 1.5, "learning_rate": 1.0506838026489581e-05, "loss": 0.4268, "step": 4514 }, { "epoch": 1.5, "learning_rate": 1.0503259170114517e-05, "loss": 0.3517, "step": 4515 }, { "epoch": 1.5, "learning_rate": 1.0499680249116112e-05, "loss": 0.3105, "step": 4516 }, { "epoch": 1.5, "learning_rate": 1.0496101263953932e-05, "loss": 0.4657, "step": 4517 }, { "epoch": 1.5, "learning_rate": 1.049252221508755e-05, "loss": 0.3699, "step": 4518 }, { "epoch": 1.5, "learning_rate": 1.0488943102976558e-05, "loss": 0.4497, "step": 4519 }, { "epoch": 1.5, "learning_rate": 1.0485363928080541e-05, "loss": 0.4102, "step": 4520 }, { "epoch": 1.5, "learning_rate": 1.0481784690859101e-05, "loss": 0.3632, "step": 4521 }, { "epoch": 1.5, "learning_rate": 1.0478205391771852e-05, "loss": 0.3716, "step": 4522 }, { "epoch": 1.5, "learning_rate": 1.0474626031278405e-05, "loss": 0.4188, "step": 4523 }, { "epoch": 1.5, "learning_rate": 1.0471046609838385e-05, "loss": 0.4775, "step": 4524 }, { "epoch": 1.5, "learning_rate": 1.0467467127911425e-05, "loss": 0.4072, "step": 4525 }, { "epoch": 1.5, "learning_rate": 1.0463887585957168e-05, "loss": 0.3038, "step": 4526 }, { "epoch": 1.5, "learning_rate": 1.0460307984435259e-05, "loss": 0.3617, "step": 4527 }, { "epoch": 1.5, "learning_rate": 1.0456728323805352e-05, "loss": 0.3972, "step": 4528 }, { "epoch": 1.5, "learning_rate": 1.0453148604527112e-05, "loss": 0.3777, "step": 4529 }, { "epoch": 1.5, "learning_rate": 1.0449568827060212e-05, "loss": 0.4283, "step": 4530 }, { "epoch": 1.5, "learning_rate": 1.0445988991864326e-05, "loss": 0.2989, "step": 4531 }, { "epoch": 1.5, "learning_rate": 1.0442409099399142e-05, "loss": 0.2933, "step": 4532 }, { "epoch": 1.5, "learning_rate": 1.0438829150124354e-05, "loss": 0.3752, "step": 4533 }, { "epoch": 1.5, "learning_rate": 1.0435249144499655e-05, "loss": 0.417, "step": 4534 }, { "epoch": 1.51, "learning_rate": 1.0431669082984759e-05, "loss": 0.3867, "step": 4535 }, { "epoch": 1.51, "learning_rate": 1.0428088966039379e-05, "loss": 0.3719, "step": 4536 }, { "epoch": 1.51, "learning_rate": 1.0424508794123238e-05, "loss": 0.3102, "step": 4537 }, { "epoch": 1.51, "learning_rate": 1.042092856769606e-05, "loss": 0.3804, "step": 4538 }, { "epoch": 1.51, "learning_rate": 1.0417348287217586e-05, "loss": 0.385, "step": 4539 }, { "epoch": 1.51, "learning_rate": 1.0413767953147558e-05, "loss": 0.3837, "step": 4540 }, { "epoch": 1.51, "learning_rate": 1.041018756594572e-05, "loss": 0.3458, "step": 4541 }, { "epoch": 1.51, "learning_rate": 1.0406607126071837e-05, "loss": 0.3906, "step": 4542 }, { "epoch": 1.51, "learning_rate": 1.0403026633985661e-05, "loss": 0.3305, "step": 4543 }, { "epoch": 1.51, "learning_rate": 1.0399446090146975e-05, "loss": 0.3375, "step": 4544 }, { "epoch": 1.51, "learning_rate": 1.0395865495015545e-05, "loss": 0.3779, "step": 4545 }, { "epoch": 1.51, "learning_rate": 1.0392284849051157e-05, "loss": 0.3767, "step": 4546 }, { "epoch": 1.51, "learning_rate": 1.0388704152713605e-05, "loss": 0.2619, "step": 4547 }, { "epoch": 1.51, "learning_rate": 1.0385123406462678e-05, "loss": 0.366, "step": 4548 }, { "epoch": 1.51, "learning_rate": 1.0381542610758182e-05, "loss": 0.3763, "step": 4549 }, { "epoch": 1.51, "learning_rate": 1.0377961766059928e-05, "loss": 0.3582, "step": 4550 }, { "epoch": 1.51, "learning_rate": 1.0374380872827725e-05, "loss": 0.3978, "step": 4551 }, { "epoch": 1.51, "learning_rate": 1.03707999315214e-05, "loss": 0.3848, "step": 4552 }, { "epoch": 1.51, "learning_rate": 1.0367218942600778e-05, "loss": 0.3036, "step": 4553 }, { "epoch": 1.51, "learning_rate": 1.0363637906525693e-05, "loss": 0.3813, "step": 4554 }, { "epoch": 1.51, "learning_rate": 1.0360056823755984e-05, "loss": 0.3987, "step": 4555 }, { "epoch": 1.51, "learning_rate": 1.0356475694751497e-05, "loss": 0.3403, "step": 4556 }, { "epoch": 1.51, "learning_rate": 1.0352894519972087e-05, "loss": 0.3621, "step": 4557 }, { "epoch": 1.51, "learning_rate": 1.0349313299877605e-05, "loss": 0.3217, "step": 4558 }, { "epoch": 1.51, "learning_rate": 1.0345732034927918e-05, "loss": 0.3317, "step": 4559 }, { "epoch": 1.51, "learning_rate": 1.0342150725582897e-05, "loss": 0.3937, "step": 4560 }, { "epoch": 1.51, "learning_rate": 1.0338569372302417e-05, "loss": 0.3572, "step": 4561 }, { "epoch": 1.51, "learning_rate": 1.033498797554635e-05, "loss": 0.3524, "step": 4562 }, { "epoch": 1.51, "learning_rate": 1.033140653577459e-05, "loss": 0.4165, "step": 4563 }, { "epoch": 1.51, "learning_rate": 1.0327825053447025e-05, "loss": 0.3317, "step": 4564 }, { "epoch": 1.52, "learning_rate": 1.0324243529023554e-05, "loss": 0.3098, "step": 4565 }, { "epoch": 1.52, "learning_rate": 1.0320661962964078e-05, "loss": 0.3915, "step": 4566 }, { "epoch": 1.52, "learning_rate": 1.0317080355728507e-05, "loss": 0.4275, "step": 4567 }, { "epoch": 1.52, "learning_rate": 1.0313498707776753e-05, "loss": 0.4094, "step": 4568 }, { "epoch": 1.52, "learning_rate": 1.0309917019568732e-05, "loss": 0.3422, "step": 4569 }, { "epoch": 1.52, "learning_rate": 1.0306335291564369e-05, "loss": 0.4373, "step": 4570 }, { "epoch": 1.52, "learning_rate": 1.0302753524223594e-05, "loss": 0.4209, "step": 4571 }, { "epoch": 1.52, "learning_rate": 1.0299171718006338e-05, "loss": 0.3618, "step": 4572 }, { "epoch": 1.52, "learning_rate": 1.029558987337254e-05, "loss": 0.3395, "step": 4573 }, { "epoch": 1.52, "learning_rate": 1.0292007990782143e-05, "loss": 0.3607, "step": 4574 }, { "epoch": 1.52, "learning_rate": 1.0288426070695098e-05, "loss": 0.2781, "step": 4575 }, { "epoch": 1.52, "learning_rate": 1.0284844113571357e-05, "loss": 0.4006, "step": 4576 }, { "epoch": 1.52, "learning_rate": 1.0281262119870877e-05, "loss": 0.3257, "step": 4577 }, { "epoch": 1.52, "learning_rate": 1.0277680090053623e-05, "loss": 0.4675, "step": 4578 }, { "epoch": 1.52, "learning_rate": 1.0274098024579558e-05, "loss": 0.3781, "step": 4579 }, { "epoch": 1.52, "learning_rate": 1.027051592390866e-05, "loss": 0.3292, "step": 4580 }, { "epoch": 1.52, "learning_rate": 1.0266933788500897e-05, "loss": 0.3855, "step": 4581 }, { "epoch": 1.52, "learning_rate": 1.0263351618816257e-05, "loss": 0.3783, "step": 4582 }, { "epoch": 1.52, "learning_rate": 1.0259769415314723e-05, "loss": 0.4022, "step": 4583 }, { "epoch": 1.52, "learning_rate": 1.025618717845628e-05, "loss": 0.3387, "step": 4584 }, { "epoch": 1.52, "learning_rate": 1.0252604908700932e-05, "loss": 0.4241, "step": 4585 }, { "epoch": 1.52, "learning_rate": 1.0249022606508667e-05, "loss": 0.4629, "step": 4586 }, { "epoch": 1.52, "learning_rate": 1.0245440272339493e-05, "loss": 0.3474, "step": 4587 }, { "epoch": 1.52, "learning_rate": 1.0241857906653416e-05, "loss": 0.4573, "step": 4588 }, { "epoch": 1.52, "learning_rate": 1.0238275509910444e-05, "loss": 0.3845, "step": 4589 }, { "epoch": 1.52, "learning_rate": 1.0234693082570594e-05, "loss": 0.377, "step": 4590 }, { "epoch": 1.52, "learning_rate": 1.0231110625093881e-05, "loss": 0.3796, "step": 4591 }, { "epoch": 1.52, "learning_rate": 1.022752813794033e-05, "loss": 0.4158, "step": 4592 }, { "epoch": 1.52, "learning_rate": 1.0223945621569967e-05, "loss": 0.3962, "step": 4593 }, { "epoch": 1.52, "learning_rate": 1.0220363076442816e-05, "loss": 0.4025, "step": 4594 }, { "epoch": 1.53, "learning_rate": 1.0216780503018919e-05, "loss": 0.3665, "step": 4595 }, { "epoch": 1.53, "learning_rate": 1.0213197901758312e-05, "loss": 0.4626, "step": 4596 }, { "epoch": 1.53, "learning_rate": 1.0209615273121028e-05, "loss": 0.3497, "step": 4597 }, { "epoch": 1.53, "learning_rate": 1.0206032617567118e-05, "loss": 0.3665, "step": 4598 }, { "epoch": 1.53, "learning_rate": 1.0202449935556628e-05, "loss": 0.3619, "step": 4599 }, { "epoch": 1.53, "learning_rate": 1.0198867227549609e-05, "loss": 0.3787, "step": 4600 }, { "epoch": 1.53, "learning_rate": 1.0195284494006115e-05, "loss": 0.3195, "step": 4601 }, { "epoch": 1.53, "learning_rate": 1.0191701735386204e-05, "loss": 0.3363, "step": 4602 }, { "epoch": 1.53, "learning_rate": 1.0188118952149935e-05, "loss": 0.4165, "step": 4603 }, { "epoch": 1.53, "learning_rate": 1.0184536144757374e-05, "loss": 0.3806, "step": 4604 }, { "epoch": 1.53, "learning_rate": 1.0180953313668587e-05, "loss": 0.3568, "step": 4605 }, { "epoch": 1.53, "learning_rate": 1.0177370459343648e-05, "loss": 0.4198, "step": 4606 }, { "epoch": 1.53, "learning_rate": 1.0173787582242623e-05, "loss": 0.4207, "step": 4607 }, { "epoch": 1.53, "learning_rate": 1.0170204682825596e-05, "loss": 0.4198, "step": 4608 }, { "epoch": 1.53, "learning_rate": 1.016662176155264e-05, "loss": 0.3578, "step": 4609 }, { "epoch": 1.53, "learning_rate": 1.0163038818883836e-05, "loss": 0.3872, "step": 4610 }, { "epoch": 1.53, "learning_rate": 1.0159455855279274e-05, "loss": 0.3496, "step": 4611 }, { "epoch": 1.53, "learning_rate": 1.0155872871199037e-05, "loss": 0.3066, "step": 4612 }, { "epoch": 1.53, "learning_rate": 1.0152289867103218e-05, "loss": 0.3835, "step": 4613 }, { "epoch": 1.53, "learning_rate": 1.0148706843451905e-05, "loss": 0.4658, "step": 4614 }, { "epoch": 1.53, "learning_rate": 1.0145123800705197e-05, "loss": 0.4093, "step": 4615 }, { "epoch": 1.53, "learning_rate": 1.0141540739323193e-05, "loss": 0.319, "step": 4616 }, { "epoch": 1.53, "learning_rate": 1.0137957659765985e-05, "loss": 0.4088, "step": 4617 }, { "epoch": 1.53, "learning_rate": 1.013437456249368e-05, "loss": 0.4128, "step": 4618 }, { "epoch": 1.53, "learning_rate": 1.0130791447966385e-05, "loss": 0.347, "step": 4619 }, { "epoch": 1.53, "learning_rate": 1.01272083166442e-05, "loss": 0.3474, "step": 4620 }, { "epoch": 1.53, "learning_rate": 1.0123625168987236e-05, "loss": 0.3381, "step": 4621 }, { "epoch": 1.53, "learning_rate": 1.0120042005455608e-05, "loss": 0.347, "step": 4622 }, { "epoch": 1.53, "learning_rate": 1.0116458826509426e-05, "loss": 0.2529, "step": 4623 }, { "epoch": 1.53, "learning_rate": 1.0112875632608802e-05, "loss": 0.3741, "step": 4624 }, { "epoch": 1.54, "learning_rate": 1.0109292424213858e-05, "loss": 0.3284, "step": 4625 }, { "epoch": 1.54, "learning_rate": 1.0105709201784709e-05, "loss": 0.4457, "step": 4626 }, { "epoch": 1.54, "learning_rate": 1.0102125965781475e-05, "loss": 0.4044, "step": 4627 }, { "epoch": 1.54, "learning_rate": 1.0098542716664282e-05, "loss": 0.3979, "step": 4628 }, { "epoch": 1.54, "learning_rate": 1.009495945489325e-05, "loss": 0.4297, "step": 4629 }, { "epoch": 1.54, "learning_rate": 1.0091376180928507e-05, "loss": 0.3546, "step": 4630 }, { "epoch": 1.54, "learning_rate": 1.0087792895230176e-05, "loss": 0.3881, "step": 4631 }, { "epoch": 1.54, "learning_rate": 1.008420959825839e-05, "loss": 0.4287, "step": 4632 }, { "epoch": 1.54, "learning_rate": 1.0080626290473277e-05, "loss": 0.3792, "step": 4633 }, { "epoch": 1.54, "learning_rate": 1.007704297233497e-05, "loss": 0.405, "step": 4634 }, { "epoch": 1.54, "learning_rate": 1.0073459644303598e-05, "loss": 0.375, "step": 4635 }, { "epoch": 1.54, "learning_rate": 1.0069876306839299e-05, "loss": 0.3457, "step": 4636 }, { "epoch": 1.54, "learning_rate": 1.0066292960402207e-05, "loss": 0.4802, "step": 4637 }, { "epoch": 1.54, "learning_rate": 1.0062709605452456e-05, "loss": 0.361, "step": 4638 }, { "epoch": 1.54, "learning_rate": 1.0059126242450187e-05, "loss": 0.4185, "step": 4639 }, { "epoch": 1.54, "learning_rate": 1.0055542871855536e-05, "loss": 0.4185, "step": 4640 }, { "epoch": 1.54, "learning_rate": 1.0051959494128644e-05, "loss": 0.3489, "step": 4641 }, { "epoch": 1.54, "learning_rate": 1.004837610972965e-05, "loss": 0.3876, "step": 4642 }, { "epoch": 1.54, "learning_rate": 1.0044792719118694e-05, "loss": 0.4058, "step": 4643 }, { "epoch": 1.54, "learning_rate": 1.0041209322755922e-05, "loss": 0.3884, "step": 4644 }, { "epoch": 1.54, "learning_rate": 1.0037625921101474e-05, "loss": 0.3531, "step": 4645 }, { "epoch": 1.54, "learning_rate": 1.0034042514615495e-05, "loss": 0.3942, "step": 4646 }, { "epoch": 1.54, "learning_rate": 1.0030459103758127e-05, "loss": 0.3473, "step": 4647 }, { "epoch": 1.54, "learning_rate": 1.0026875688989515e-05, "loss": 0.365, "step": 4648 }, { "epoch": 1.54, "learning_rate": 1.0023292270769805e-05, "loss": 0.3428, "step": 4649 }, { "epoch": 1.54, "learning_rate": 1.0019708849559142e-05, "loss": 0.374, "step": 4650 }, { "epoch": 1.54, "learning_rate": 1.0016125425817673e-05, "loss": 0.2873, "step": 4651 }, { "epoch": 1.54, "learning_rate": 1.0012542000005542e-05, "loss": 0.2854, "step": 4652 }, { "epoch": 1.54, "learning_rate": 1.0008958572582897e-05, "loss": 0.3171, "step": 4653 }, { "epoch": 1.54, "learning_rate": 1.0005375144009886e-05, "loss": 0.4086, "step": 4654 }, { "epoch": 1.54, "learning_rate": 1.0001791714746654e-05, "loss": 0.3497, "step": 4655 }, { "epoch": 1.55, "learning_rate": 9.99820828525335e-06, "loss": 0.3857, "step": 4656 }, { "epoch": 1.55, "learning_rate": 9.994624855990115e-06, "loss": 0.4119, "step": 4657 }, { "epoch": 1.55, "learning_rate": 9.991041427417105e-06, "loss": 0.4231, "step": 4658 }, { "epoch": 1.55, "learning_rate": 9.987457999994463e-06, "loss": 0.3763, "step": 4659 }, { "epoch": 1.55, "learning_rate": 9.983874574182332e-06, "loss": 0.3732, "step": 4660 }, { "epoch": 1.55, "learning_rate": 9.980291150440861e-06, "loss": 0.3043, "step": 4661 }, { "epoch": 1.55, "learning_rate": 9.976707729230198e-06, "loss": 0.3352, "step": 4662 }, { "epoch": 1.55, "learning_rate": 9.973124311010486e-06, "loss": 0.3284, "step": 4663 }, { "epoch": 1.55, "learning_rate": 9.969540896241875e-06, "loss": 0.4194, "step": 4664 }, { "epoch": 1.55, "learning_rate": 9.965957485384507e-06, "loss": 0.3927, "step": 4665 }, { "epoch": 1.55, "learning_rate": 9.962374078898529e-06, "loss": 0.3323, "step": 4666 }, { "epoch": 1.55, "learning_rate": 9.958790677244083e-06, "loss": 0.4258, "step": 4667 }, { "epoch": 1.55, "learning_rate": 9.955207280881307e-06, "loss": 0.3617, "step": 4668 }, { "epoch": 1.55, "learning_rate": 9.951623890270353e-06, "loss": 0.4213, "step": 4669 }, { "epoch": 1.55, "learning_rate": 9.948040505871359e-06, "loss": 0.3689, "step": 4670 }, { "epoch": 1.55, "learning_rate": 9.944457128144465e-06, "loss": 0.4454, "step": 4671 }, { "epoch": 1.55, "learning_rate": 9.940873757549815e-06, "loss": 0.3839, "step": 4672 }, { "epoch": 1.55, "learning_rate": 9.937290394547547e-06, "loss": 0.4214, "step": 4673 }, { "epoch": 1.55, "learning_rate": 9.933707039597798e-06, "loss": 0.3734, "step": 4674 }, { "epoch": 1.55, "learning_rate": 9.930123693160704e-06, "loss": 0.38, "step": 4675 }, { "epoch": 1.55, "learning_rate": 9.926540355696406e-06, "loss": 0.4175, "step": 4676 }, { "epoch": 1.55, "learning_rate": 9.922957027665035e-06, "loss": 0.326, "step": 4677 }, { "epoch": 1.55, "learning_rate": 9.919373709526725e-06, "loss": 0.449, "step": 4678 }, { "epoch": 1.55, "learning_rate": 9.915790401741611e-06, "loss": 0.339, "step": 4679 }, { "epoch": 1.55, "learning_rate": 9.91220710476983e-06, "loss": 0.3495, "step": 4680 }, { "epoch": 1.55, "learning_rate": 9.908623819071498e-06, "loss": 0.4419, "step": 4681 }, { "epoch": 1.55, "learning_rate": 9.905040545106752e-06, "loss": 0.3365, "step": 4682 }, { "epoch": 1.55, "learning_rate": 9.90145728333572e-06, "loss": 0.2947, "step": 4683 }, { "epoch": 1.55, "learning_rate": 9.897874034218528e-06, "loss": 0.3713, "step": 4684 }, { "epoch": 1.55, "learning_rate": 9.894290798215293e-06, "loss": 0.3652, "step": 4685 }, { "epoch": 1.56, "learning_rate": 9.890707575786144e-06, "loss": 0.3344, "step": 4686 }, { "epoch": 1.56, "learning_rate": 9.887124367391203e-06, "loss": 0.4407, "step": 4687 }, { "epoch": 1.56, "learning_rate": 9.883541173490579e-06, "loss": 0.4407, "step": 4688 }, { "epoch": 1.56, "learning_rate": 9.879957994544395e-06, "loss": 0.4414, "step": 4689 }, { "epoch": 1.56, "learning_rate": 9.876374831012765e-06, "loss": 0.3477, "step": 4690 }, { "epoch": 1.56, "learning_rate": 9.872791683355802e-06, "loss": 0.3466, "step": 4691 }, { "epoch": 1.56, "learning_rate": 9.869208552033617e-06, "loss": 0.3794, "step": 4692 }, { "epoch": 1.56, "learning_rate": 9.865625437506321e-06, "loss": 0.314, "step": 4693 }, { "epoch": 1.56, "learning_rate": 9.862042340234019e-06, "loss": 0.366, "step": 4694 }, { "epoch": 1.56, "learning_rate": 9.858459260676814e-06, "loss": 0.4646, "step": 4695 }, { "epoch": 1.56, "learning_rate": 9.854876199294805e-06, "loss": 0.3335, "step": 4696 }, { "epoch": 1.56, "learning_rate": 9.851293156548097e-06, "loss": 0.3757, "step": 4697 }, { "epoch": 1.56, "learning_rate": 9.847710132896784e-06, "loss": 0.4039, "step": 4698 }, { "epoch": 1.56, "learning_rate": 9.844127128800963e-06, "loss": 0.3395, "step": 4699 }, { "epoch": 1.56, "learning_rate": 9.840544144720728e-06, "loss": 0.3945, "step": 4700 }, { "epoch": 1.56, "learning_rate": 9.836961181116169e-06, "loss": 0.4045, "step": 4701 }, { "epoch": 1.56, "learning_rate": 9.833378238447365e-06, "loss": 0.4041, "step": 4702 }, { "epoch": 1.56, "learning_rate": 9.82979531717441e-06, "loss": 0.3893, "step": 4703 }, { "epoch": 1.56, "learning_rate": 9.82621241775738e-06, "loss": 0.3735, "step": 4704 }, { "epoch": 1.56, "learning_rate": 9.822629540656357e-06, "loss": 0.3899, "step": 4705 }, { "epoch": 1.56, "learning_rate": 9.819046686331415e-06, "loss": 0.4379, "step": 4706 }, { "epoch": 1.56, "learning_rate": 9.815463855242628e-06, "loss": 0.3452, "step": 4707 }, { "epoch": 1.56, "learning_rate": 9.81188104785007e-06, "loss": 0.3104, "step": 4708 }, { "epoch": 1.56, "learning_rate": 9.808298264613801e-06, "loss": 0.4441, "step": 4709 }, { "epoch": 1.56, "learning_rate": 9.804715505993888e-06, "loss": 0.3755, "step": 4710 }, { "epoch": 1.56, "learning_rate": 9.801132772450393e-06, "loss": 0.3604, "step": 4711 }, { "epoch": 1.56, "learning_rate": 9.797550064443375e-06, "loss": 0.3495, "step": 4712 }, { "epoch": 1.56, "learning_rate": 9.793967382432885e-06, "loss": 0.354, "step": 4713 }, { "epoch": 1.56, "learning_rate": 9.790384726878974e-06, "loss": 0.4202, "step": 4714 }, { "epoch": 1.56, "learning_rate": 9.786802098241695e-06, "loss": 0.3558, "step": 4715 }, { "epoch": 1.57, "learning_rate": 9.783219496981083e-06, "loss": 0.3235, "step": 4716 }, { "epoch": 1.57, "learning_rate": 9.779636923557186e-06, "loss": 0.386, "step": 4717 }, { "epoch": 1.57, "learning_rate": 9.776054378430037e-06, "loss": 0.3472, "step": 4718 }, { "epoch": 1.57, "learning_rate": 9.772471862059672e-06, "loss": 0.3951, "step": 4719 }, { "epoch": 1.57, "learning_rate": 9.768889374906119e-06, "loss": 0.3967, "step": 4720 }, { "epoch": 1.57, "learning_rate": 9.76530691742941e-06, "loss": 0.4534, "step": 4721 }, { "epoch": 1.57, "learning_rate": 9.761724490089559e-06, "loss": 0.3595, "step": 4722 }, { "epoch": 1.57, "learning_rate": 9.758142093346587e-06, "loss": 0.3634, "step": 4723 }, { "epoch": 1.57, "learning_rate": 9.75455972766051e-06, "loss": 0.3801, "step": 4724 }, { "epoch": 1.57, "learning_rate": 9.750977393491335e-06, "loss": 0.4189, "step": 4725 }, { "epoch": 1.57, "learning_rate": 9.74739509129907e-06, "loss": 0.2897, "step": 4726 }, { "epoch": 1.57, "learning_rate": 9.74381282154372e-06, "loss": 0.3982, "step": 4727 }, { "epoch": 1.57, "learning_rate": 9.740230584685279e-06, "loss": 0.355, "step": 4728 }, { "epoch": 1.57, "learning_rate": 9.736648381183748e-06, "loss": 0.4438, "step": 4729 }, { "epoch": 1.57, "learning_rate": 9.733066211499106e-06, "loss": 0.4156, "step": 4730 }, { "epoch": 1.57, "learning_rate": 9.729484076091345e-06, "loss": 0.3691, "step": 4731 }, { "epoch": 1.57, "learning_rate": 9.725901975420444e-06, "loss": 0.4158, "step": 4732 }, { "epoch": 1.57, "learning_rate": 9.722319909946379e-06, "loss": 0.4065, "step": 4733 }, { "epoch": 1.57, "learning_rate": 9.718737880129123e-06, "loss": 0.2805, "step": 4734 }, { "epoch": 1.57, "learning_rate": 9.715155886428643e-06, "loss": 0.3435, "step": 4735 }, { "epoch": 1.57, "learning_rate": 9.711573929304905e-06, "loss": 0.3409, "step": 4736 }, { "epoch": 1.57, "learning_rate": 9.707992009217859e-06, "loss": 0.308, "step": 4737 }, { "epoch": 1.57, "learning_rate": 9.704410126627464e-06, "loss": 0.3512, "step": 4738 }, { "epoch": 1.57, "learning_rate": 9.700828281993665e-06, "loss": 0.3583, "step": 4739 }, { "epoch": 1.57, "learning_rate": 9.69724647577641e-06, "loss": 0.3434, "step": 4740 }, { "epoch": 1.57, "learning_rate": 9.693664708435634e-06, "loss": 0.3645, "step": 4741 }, { "epoch": 1.57, "learning_rate": 9.690082980431273e-06, "loss": 0.3024, "step": 4742 }, { "epoch": 1.57, "learning_rate": 9.686501292223252e-06, "loss": 0.3595, "step": 4743 }, { "epoch": 1.57, "learning_rate": 9.682919644271497e-06, "loss": 0.3363, "step": 4744 }, { "epoch": 1.57, "learning_rate": 9.679338037035924e-06, "loss": 0.4011, "step": 4745 }, { "epoch": 1.58, "learning_rate": 9.675756470976449e-06, "loss": 0.2913, "step": 4746 }, { "epoch": 1.58, "learning_rate": 9.672174946552977e-06, "loss": 0.3605, "step": 4747 }, { "epoch": 1.58, "learning_rate": 9.668593464225412e-06, "loss": 0.3838, "step": 4748 }, { "epoch": 1.58, "learning_rate": 9.665012024453656e-06, "loss": 0.3527, "step": 4749 }, { "epoch": 1.58, "learning_rate": 9.66143062769759e-06, "loss": 0.3718, "step": 4750 }, { "epoch": 1.58, "learning_rate": 9.657849274417104e-06, "loss": 0.3442, "step": 4751 }, { "epoch": 1.58, "learning_rate": 9.654267965072084e-06, "loss": 0.2988, "step": 4752 }, { "epoch": 1.58, "learning_rate": 9.650686700122396e-06, "loss": 0.3319, "step": 4753 }, { "epoch": 1.58, "learning_rate": 9.647105480027916e-06, "loss": 0.4407, "step": 4754 }, { "epoch": 1.58, "learning_rate": 9.643524305248503e-06, "loss": 0.3213, "step": 4755 }, { "epoch": 1.58, "learning_rate": 9.63994317624402e-06, "loss": 0.335, "step": 4756 }, { "epoch": 1.58, "learning_rate": 9.636362093474312e-06, "loss": 0.3835, "step": 4757 }, { "epoch": 1.58, "learning_rate": 9.632781057399225e-06, "loss": 0.3938, "step": 4758 }, { "epoch": 1.58, "learning_rate": 9.629200068478603e-06, "loss": 0.3884, "step": 4759 }, { "epoch": 1.58, "learning_rate": 9.625619127172278e-06, "loss": 0.3787, "step": 4760 }, { "epoch": 1.58, "learning_rate": 9.622038233940076e-06, "loss": 0.3807, "step": 4761 }, { "epoch": 1.58, "learning_rate": 9.61845738924182e-06, "loss": 0.3644, "step": 4762 }, { "epoch": 1.58, "learning_rate": 9.614876593537327e-06, "loss": 0.3342, "step": 4763 }, { "epoch": 1.58, "learning_rate": 9.6112958472864e-06, "loss": 0.324, "step": 4764 }, { "epoch": 1.58, "learning_rate": 9.607715150948846e-06, "loss": 0.4321, "step": 4765 }, { "epoch": 1.58, "learning_rate": 9.604134504984458e-06, "loss": 0.4675, "step": 4766 }, { "epoch": 1.58, "learning_rate": 9.600553909853028e-06, "loss": 0.3738, "step": 4767 }, { "epoch": 1.58, "learning_rate": 9.596973366014337e-06, "loss": 0.3285, "step": 4768 }, { "epoch": 1.58, "learning_rate": 9.593392873928166e-06, "loss": 0.3829, "step": 4769 }, { "epoch": 1.58, "learning_rate": 9.589812434054282e-06, "loss": 0.3909, "step": 4770 }, { "epoch": 1.58, "learning_rate": 9.586232046852447e-06, "loss": 0.4873, "step": 4771 }, { "epoch": 1.58, "learning_rate": 9.582651712782418e-06, "loss": 0.396, "step": 4772 }, { "epoch": 1.58, "learning_rate": 9.579071432303941e-06, "loss": 0.3743, "step": 4773 }, { "epoch": 1.58, "learning_rate": 9.575491205876764e-06, "loss": 0.3809, "step": 4774 }, { "epoch": 1.58, "learning_rate": 9.571911033960621e-06, "loss": 0.3784, "step": 4775 }, { "epoch": 1.59, "learning_rate": 9.568330917015241e-06, "loss": 0.3926, "step": 4776 }, { "epoch": 1.59, "learning_rate": 9.56475085550035e-06, "loss": 0.3833, "step": 4777 }, { "epoch": 1.59, "learning_rate": 9.561170849875651e-06, "loss": 0.3872, "step": 4778 }, { "epoch": 1.59, "learning_rate": 9.55759090060086e-06, "loss": 0.3762, "step": 4779 }, { "epoch": 1.59, "learning_rate": 9.554011008135677e-06, "loss": 0.3557, "step": 4780 }, { "epoch": 1.59, "learning_rate": 9.55043117293979e-06, "loss": 0.3796, "step": 4781 }, { "epoch": 1.59, "learning_rate": 9.54685139547289e-06, "loss": 0.3177, "step": 4782 }, { "epoch": 1.59, "learning_rate": 9.54327167619465e-06, "loss": 0.3779, "step": 4783 }, { "epoch": 1.59, "learning_rate": 9.539692015564748e-06, "loss": 0.3057, "step": 4784 }, { "epoch": 1.59, "learning_rate": 9.536112414042837e-06, "loss": 0.3987, "step": 4785 }, { "epoch": 1.59, "learning_rate": 9.532532872088578e-06, "loss": 0.4868, "step": 4786 }, { "epoch": 1.59, "learning_rate": 9.528953390161618e-06, "loss": 0.3717, "step": 4787 }, { "epoch": 1.59, "learning_rate": 9.525373968721598e-06, "loss": 0.4327, "step": 4788 }, { "epoch": 1.59, "learning_rate": 9.521794608228152e-06, "loss": 0.3599, "step": 4789 }, { "epoch": 1.59, "learning_rate": 9.5182153091409e-06, "loss": 0.4048, "step": 4790 }, { "epoch": 1.59, "learning_rate": 9.514636071919466e-06, "loss": 0.3164, "step": 4791 }, { "epoch": 1.59, "learning_rate": 9.511056897023447e-06, "loss": 0.3289, "step": 4792 }, { "epoch": 1.59, "learning_rate": 9.507477784912451e-06, "loss": 0.3604, "step": 4793 }, { "epoch": 1.59, "learning_rate": 9.503898736046071e-06, "loss": 0.3903, "step": 4794 }, { "epoch": 1.59, "learning_rate": 9.50031975088389e-06, "loss": 0.3083, "step": 4795 }, { "epoch": 1.59, "learning_rate": 9.496740829885483e-06, "loss": 0.3221, "step": 4796 }, { "epoch": 1.59, "learning_rate": 9.493161973510424e-06, "loss": 0.3591, "step": 4797 }, { "epoch": 1.59, "learning_rate": 9.489583182218266e-06, "loss": 0.3673, "step": 4798 }, { "epoch": 1.59, "learning_rate": 9.486004456468562e-06, "loss": 0.3704, "step": 4799 }, { "epoch": 1.59, "learning_rate": 9.482425796720858e-06, "loss": 0.4209, "step": 4800 }, { "epoch": 1.59, "learning_rate": 9.478847203434683e-06, "loss": 0.3677, "step": 4801 }, { "epoch": 1.59, "learning_rate": 9.475268677069567e-06, "loss": 0.356, "step": 4802 }, { "epoch": 1.59, "learning_rate": 9.471690218085028e-06, "loss": 0.4199, "step": 4803 }, { "epoch": 1.59, "learning_rate": 9.468111826940572e-06, "loss": 0.3082, "step": 4804 }, { "epoch": 1.59, "learning_rate": 9.464533504095705e-06, "loss": 0.3323, "step": 4805 }, { "epoch": 1.6, "learning_rate": 9.46095525000991e-06, "loss": 0.447, "step": 4806 }, { "epoch": 1.6, "learning_rate": 9.45737706514267e-06, "loss": 0.3927, "step": 4807 }, { "epoch": 1.6, "learning_rate": 9.453798949953468e-06, "loss": 0.3502, "step": 4808 }, { "epoch": 1.6, "learning_rate": 9.450220904901758e-06, "loss": 0.4277, "step": 4809 }, { "epoch": 1.6, "learning_rate": 9.446642930447e-06, "loss": 0.4797, "step": 4810 }, { "epoch": 1.6, "learning_rate": 9.44306502704864e-06, "loss": 0.3767, "step": 4811 }, { "epoch": 1.6, "learning_rate": 9.439487195166121e-06, "loss": 0.3729, "step": 4812 }, { "epoch": 1.6, "learning_rate": 9.435909435258861e-06, "loss": 0.3264, "step": 4813 }, { "epoch": 1.6, "learning_rate": 9.432331747786284e-06, "loss": 0.3451, "step": 4814 }, { "epoch": 1.6, "learning_rate": 9.428754133207799e-06, "loss": 0.324, "step": 4815 }, { "epoch": 1.6, "learning_rate": 9.425176591982808e-06, "loss": 0.3344, "step": 4816 }, { "epoch": 1.6, "learning_rate": 9.421599124570702e-06, "loss": 0.3573, "step": 4817 }, { "epoch": 1.6, "learning_rate": 9.418021731430862e-06, "loss": 0.3901, "step": 4818 }, { "epoch": 1.6, "learning_rate": 9.414444413022658e-06, "loss": 0.2999, "step": 4819 }, { "epoch": 1.6, "learning_rate": 9.410867169805452e-06, "loss": 0.4204, "step": 4820 }, { "epoch": 1.6, "learning_rate": 9.407290002238598e-06, "loss": 0.3835, "step": 4821 }, { "epoch": 1.6, "learning_rate": 9.403712910781437e-06, "loss": 0.4182, "step": 4822 }, { "epoch": 1.6, "learning_rate": 9.400135895893307e-06, "loss": 0.3422, "step": 4823 }, { "epoch": 1.6, "learning_rate": 9.396558958033526e-06, "loss": 0.3762, "step": 4824 }, { "epoch": 1.6, "learning_rate": 9.392982097661414e-06, "loss": 0.323, "step": 4825 }, { "epoch": 1.6, "learning_rate": 9.389405315236266e-06, "loss": 0.4249, "step": 4826 }, { "epoch": 1.6, "learning_rate": 9.38582861121738e-06, "loss": 0.3616, "step": 4827 }, { "epoch": 1.6, "learning_rate": 9.382251986064042e-06, "loss": 0.3265, "step": 4828 }, { "epoch": 1.6, "learning_rate": 9.378675440235519e-06, "loss": 0.3456, "step": 4829 }, { "epoch": 1.6, "learning_rate": 9.375098974191077e-06, "loss": 0.3962, "step": 4830 }, { "epoch": 1.6, "learning_rate": 9.37152258838997e-06, "loss": 0.348, "step": 4831 }, { "epoch": 1.6, "learning_rate": 9.367946283291443e-06, "loss": 0.3412, "step": 4832 }, { "epoch": 1.6, "learning_rate": 9.36437005935472e-06, "loss": 0.3605, "step": 4833 }, { "epoch": 1.6, "learning_rate": 9.360793917039029e-06, "loss": 0.3468, "step": 4834 }, { "epoch": 1.6, "learning_rate": 9.357217856803578e-06, "loss": 0.3723, "step": 4835 }, { "epoch": 1.61, "learning_rate": 9.353641879107573e-06, "loss": 0.3965, "step": 4836 }, { "epoch": 1.61, "learning_rate": 9.350065984410198e-06, "loss": 0.4343, "step": 4837 }, { "epoch": 1.61, "learning_rate": 9.346490173170637e-06, "loss": 0.3572, "step": 4838 }, { "epoch": 1.61, "learning_rate": 9.342914445848058e-06, "loss": 0.4136, "step": 4839 }, { "epoch": 1.61, "learning_rate": 9.339338802901615e-06, "loss": 0.3185, "step": 4840 }, { "epoch": 1.61, "learning_rate": 9.335763244790457e-06, "loss": 0.3649, "step": 4841 }, { "epoch": 1.61, "learning_rate": 9.332187771973722e-06, "loss": 0.3456, "step": 4842 }, { "epoch": 1.61, "learning_rate": 9.328612384910533e-06, "loss": 0.3696, "step": 4843 }, { "epoch": 1.61, "learning_rate": 9.325037084060006e-06, "loss": 0.2914, "step": 4844 }, { "epoch": 1.61, "learning_rate": 9.321461869881248e-06, "loss": 0.3236, "step": 4845 }, { "epoch": 1.61, "learning_rate": 9.317886742833345e-06, "loss": 0.3937, "step": 4846 }, { "epoch": 1.61, "learning_rate": 9.314311703375378e-06, "loss": 0.3396, "step": 4847 }, { "epoch": 1.61, "learning_rate": 9.310736751966422e-06, "loss": 0.3615, "step": 4848 }, { "epoch": 1.61, "learning_rate": 9.307161889065529e-06, "loss": 0.4065, "step": 4849 }, { "epoch": 1.61, "learning_rate": 9.303587115131747e-06, "loss": 0.2742, "step": 4850 }, { "epoch": 1.61, "learning_rate": 9.300012430624115e-06, "loss": 0.3625, "step": 4851 }, { "epoch": 1.61, "learning_rate": 9.296437836001653e-06, "loss": 0.3808, "step": 4852 }, { "epoch": 1.61, "learning_rate": 9.292863331723384e-06, "loss": 0.4348, "step": 4853 }, { "epoch": 1.61, "learning_rate": 9.289288918248294e-06, "loss": 0.3541, "step": 4854 }, { "epoch": 1.61, "learning_rate": 9.285714596035379e-06, "loss": 0.3176, "step": 4855 }, { "epoch": 1.61, "learning_rate": 9.282140365543618e-06, "loss": 0.4869, "step": 4856 }, { "epoch": 1.61, "learning_rate": 9.278566227231973e-06, "loss": 0.3292, "step": 4857 }, { "epoch": 1.61, "learning_rate": 9.274992181559403e-06, "loss": 0.4103, "step": 4858 }, { "epoch": 1.61, "learning_rate": 9.271418228984846e-06, "loss": 0.3848, "step": 4859 }, { "epoch": 1.61, "learning_rate": 9.267844369967237e-06, "loss": 0.3431, "step": 4860 }, { "epoch": 1.61, "learning_rate": 9.264270604965486e-06, "loss": 0.287, "step": 4861 }, { "epoch": 1.61, "learning_rate": 9.260696934438504e-06, "loss": 0.3885, "step": 4862 }, { "epoch": 1.61, "learning_rate": 9.257123358845184e-06, "loss": 0.2932, "step": 4863 }, { "epoch": 1.61, "learning_rate": 9.253549878644406e-06, "loss": 0.3625, "step": 4864 }, { "epoch": 1.61, "learning_rate": 9.249976494295045e-06, "loss": 0.3434, "step": 4865 }, { "epoch": 1.62, "learning_rate": 9.246403206255953e-06, "loss": 0.3297, "step": 4866 }, { "epoch": 1.62, "learning_rate": 9.242830014985979e-06, "loss": 0.3176, "step": 4867 }, { "epoch": 1.62, "learning_rate": 9.239256920943949e-06, "loss": 0.3749, "step": 4868 }, { "epoch": 1.62, "learning_rate": 9.235683924588687e-06, "loss": 0.3809, "step": 4869 }, { "epoch": 1.62, "learning_rate": 9.232111026378997e-06, "loss": 0.37, "step": 4870 }, { "epoch": 1.62, "learning_rate": 9.228538226773678e-06, "loss": 0.3801, "step": 4871 }, { "epoch": 1.62, "learning_rate": 9.22496552623151e-06, "loss": 0.3865, "step": 4872 }, { "epoch": 1.62, "learning_rate": 9.221392925211264e-06, "loss": 0.3751, "step": 4873 }, { "epoch": 1.62, "learning_rate": 9.217820424171696e-06, "loss": 0.2837, "step": 4874 }, { "epoch": 1.62, "learning_rate": 9.214248023571547e-06, "loss": 0.3867, "step": 4875 }, { "epoch": 1.62, "learning_rate": 9.21067572386955e-06, "loss": 0.332, "step": 4876 }, { "epoch": 1.62, "learning_rate": 9.207103525524421e-06, "loss": 0.4025, "step": 4877 }, { "epoch": 1.62, "learning_rate": 9.203531428994865e-06, "loss": 0.3928, "step": 4878 }, { "epoch": 1.62, "learning_rate": 9.199959434739577e-06, "loss": 0.4292, "step": 4879 }, { "epoch": 1.62, "learning_rate": 9.196387543217232e-06, "loss": 0.3988, "step": 4880 }, { "epoch": 1.62, "learning_rate": 9.192815754886502e-06, "loss": 0.3585, "step": 4881 }, { "epoch": 1.62, "learning_rate": 9.189244070206028e-06, "loss": 0.3492, "step": 4882 }, { "epoch": 1.62, "learning_rate": 9.185672489634457e-06, "loss": 0.3256, "step": 4883 }, { "epoch": 1.62, "learning_rate": 9.182101013630413e-06, "loss": 0.4136, "step": 4884 }, { "epoch": 1.62, "learning_rate": 9.178529642652504e-06, "loss": 0.3949, "step": 4885 }, { "epoch": 1.62, "learning_rate": 9.174958377159334e-06, "loss": 0.4116, "step": 4886 }, { "epoch": 1.62, "learning_rate": 9.171387217609487e-06, "loss": 0.3594, "step": 4887 }, { "epoch": 1.62, "learning_rate": 9.167816164461536e-06, "loss": 0.3634, "step": 4888 }, { "epoch": 1.62, "learning_rate": 9.164245218174031e-06, "loss": 0.3733, "step": 4889 }, { "epoch": 1.62, "learning_rate": 9.160674379205523e-06, "loss": 0.3689, "step": 4890 }, { "epoch": 1.62, "learning_rate": 9.157103648014539e-06, "loss": 0.4307, "step": 4891 }, { "epoch": 1.62, "learning_rate": 9.153533025059598e-06, "loss": 0.3884, "step": 4892 }, { "epoch": 1.62, "learning_rate": 9.149962510799204e-06, "loss": 0.3174, "step": 4893 }, { "epoch": 1.62, "learning_rate": 9.146392105691839e-06, "loss": 0.4458, "step": 4894 }, { "epoch": 1.62, "learning_rate": 9.142821810195987e-06, "loss": 0.299, "step": 4895 }, { "epoch": 1.62, "learning_rate": 9.139251624770098e-06, "loss": 0.4042, "step": 4896 }, { "epoch": 1.63, "learning_rate": 9.135681549872623e-06, "loss": 0.4146, "step": 4897 }, { "epoch": 1.63, "learning_rate": 9.132111585961994e-06, "loss": 0.4099, "step": 4898 }, { "epoch": 1.63, "learning_rate": 9.12854173349663e-06, "loss": 0.4517, "step": 4899 }, { "epoch": 1.63, "learning_rate": 9.124971992934934e-06, "loss": 0.3218, "step": 4900 }, { "epoch": 1.63, "learning_rate": 9.1214023647353e-06, "loss": 0.4733, "step": 4901 }, { "epoch": 1.63, "learning_rate": 9.117832849356093e-06, "loss": 0.3407, "step": 4902 }, { "epoch": 1.63, "learning_rate": 9.114263447255677e-06, "loss": 0.3383, "step": 4903 }, { "epoch": 1.63, "learning_rate": 9.110694158892403e-06, "loss": 0.3958, "step": 4904 }, { "epoch": 1.63, "learning_rate": 9.107124984724594e-06, "loss": 0.3828, "step": 4905 }, { "epoch": 1.63, "learning_rate": 9.103555925210572e-06, "loss": 0.3076, "step": 4906 }, { "epoch": 1.63, "learning_rate": 9.099986980808636e-06, "loss": 0.3647, "step": 4907 }, { "epoch": 1.63, "learning_rate": 9.096418151977078e-06, "loss": 0.3744, "step": 4908 }, { "epoch": 1.63, "learning_rate": 9.092849439174163e-06, "loss": 0.376, "step": 4909 }, { "epoch": 1.63, "learning_rate": 9.089280842858152e-06, "loss": 0.3247, "step": 4910 }, { "epoch": 1.63, "learning_rate": 9.085712363487287e-06, "loss": 0.392, "step": 4911 }, { "epoch": 1.63, "learning_rate": 9.082144001519795e-06, "loss": 0.3506, "step": 4912 }, { "epoch": 1.63, "learning_rate": 9.078575757413888e-06, "loss": 0.3818, "step": 4913 }, { "epoch": 1.63, "learning_rate": 9.075007631627764e-06, "loss": 0.297, "step": 4914 }, { "epoch": 1.63, "learning_rate": 9.071439624619608e-06, "loss": 0.3689, "step": 4915 }, { "epoch": 1.63, "learning_rate": 9.067871736847578e-06, "loss": 0.3733, "step": 4916 }, { "epoch": 1.63, "learning_rate": 9.064303968769829e-06, "loss": 0.3573, "step": 4917 }, { "epoch": 1.63, "learning_rate": 9.0607363208445e-06, "loss": 0.4154, "step": 4918 }, { "epoch": 1.63, "learning_rate": 9.057168793529707e-06, "loss": 0.4117, "step": 4919 }, { "epoch": 1.63, "learning_rate": 9.053601387283559e-06, "loss": 0.3392, "step": 4920 }, { "epoch": 1.63, "learning_rate": 9.050034102564148e-06, "loss": 0.3383, "step": 4921 }, { "epoch": 1.63, "learning_rate": 9.04646693982954e-06, "loss": 0.3918, "step": 4922 }, { "epoch": 1.63, "learning_rate": 9.042899899537797e-06, "loss": 0.3939, "step": 4923 }, { "epoch": 1.63, "learning_rate": 9.039332982146963e-06, "loss": 0.3561, "step": 4924 }, { "epoch": 1.63, "learning_rate": 9.03576618811506e-06, "loss": 0.3687, "step": 4925 }, { "epoch": 1.63, "learning_rate": 9.032199517900105e-06, "loss": 0.3212, "step": 4926 }, { "epoch": 1.64, "learning_rate": 9.028632971960086e-06, "loss": 0.3813, "step": 4927 }, { "epoch": 1.64, "learning_rate": 9.025066550752987e-06, "loss": 0.3337, "step": 4928 }, { "epoch": 1.64, "learning_rate": 9.021500254736777e-06, "loss": 0.3727, "step": 4929 }, { "epoch": 1.64, "learning_rate": 9.017934084369388e-06, "loss": 0.3501, "step": 4930 }, { "epoch": 1.64, "learning_rate": 9.01436804010876e-06, "loss": 0.3923, "step": 4931 }, { "epoch": 1.64, "learning_rate": 9.01080212241281e-06, "loss": 0.4263, "step": 4932 }, { "epoch": 1.64, "learning_rate": 9.007236331739426e-06, "loss": 0.3632, "step": 4933 }, { "epoch": 1.64, "learning_rate": 9.0036706685465e-06, "loss": 0.287, "step": 4934 }, { "epoch": 1.64, "learning_rate": 9.000105133291892e-06, "loss": 0.3854, "step": 4935 }, { "epoch": 1.64, "learning_rate": 8.996539726433458e-06, "loss": 0.2667, "step": 4936 }, { "epoch": 1.64, "learning_rate": 8.992974448429021e-06, "loss": 0.361, "step": 4937 }, { "epoch": 1.64, "learning_rate": 8.989409299736403e-06, "loss": 0.3601, "step": 4938 }, { "epoch": 1.64, "learning_rate": 8.985844280813402e-06, "loss": 0.3574, "step": 4939 }, { "epoch": 1.64, "learning_rate": 8.982279392117804e-06, "loss": 0.3667, "step": 4940 }, { "epoch": 1.64, "learning_rate": 8.978714634107371e-06, "loss": 0.3995, "step": 4941 }, { "epoch": 1.64, "learning_rate": 8.975150007239855e-06, "loss": 0.3936, "step": 4942 }, { "epoch": 1.64, "learning_rate": 8.97158551197299e-06, "loss": 0.3749, "step": 4943 }, { "epoch": 1.64, "learning_rate": 8.968021148764487e-06, "loss": 0.3757, "step": 4944 }, { "epoch": 1.64, "learning_rate": 8.964456918072045e-06, "loss": 0.3391, "step": 4945 }, { "epoch": 1.64, "learning_rate": 8.960892820353348e-06, "loss": 0.3322, "step": 4946 }, { "epoch": 1.64, "learning_rate": 8.957328856066062e-06, "loss": 0.3716, "step": 4947 }, { "epoch": 1.64, "learning_rate": 8.953765025667832e-06, "loss": 0.4442, "step": 4948 }, { "epoch": 1.64, "learning_rate": 8.95020132961629e-06, "loss": 0.3363, "step": 4949 }, { "epoch": 1.64, "learning_rate": 8.94663776836905e-06, "loss": 0.4222, "step": 4950 }, { "epoch": 1.64, "learning_rate": 8.943074342383704e-06, "loss": 0.3604, "step": 4951 }, { "epoch": 1.64, "learning_rate": 8.939511052117832e-06, "loss": 0.362, "step": 4952 }, { "epoch": 1.64, "learning_rate": 8.935947898028995e-06, "loss": 0.3789, "step": 4953 }, { "epoch": 1.64, "learning_rate": 8.932384880574736e-06, "loss": 0.4286, "step": 4954 }, { "epoch": 1.64, "learning_rate": 8.92882200021258e-06, "loss": 0.3752, "step": 4955 }, { "epoch": 1.64, "learning_rate": 8.925259257400038e-06, "loss": 0.3298, "step": 4956 }, { "epoch": 1.65, "learning_rate": 8.921696652594605e-06, "loss": 0.3774, "step": 4957 }, { "epoch": 1.65, "learning_rate": 8.918134186253743e-06, "loss": 0.3062, "step": 4958 }, { "epoch": 1.65, "learning_rate": 8.914571858834911e-06, "loss": 0.326, "step": 4959 }, { "epoch": 1.65, "learning_rate": 8.911009670795552e-06, "loss": 0.351, "step": 4960 }, { "epoch": 1.65, "learning_rate": 8.907447622593078e-06, "loss": 0.3686, "step": 4961 }, { "epoch": 1.65, "learning_rate": 8.903885714684893e-06, "loss": 0.4128, "step": 4962 }, { "epoch": 1.65, "learning_rate": 8.900323947528382e-06, "loss": 0.3599, "step": 4963 }, { "epoch": 1.65, "learning_rate": 8.896762321580912e-06, "loss": 0.4521, "step": 4964 }, { "epoch": 1.65, "learning_rate": 8.893200837299824e-06, "loss": 0.4088, "step": 4965 }, { "epoch": 1.65, "learning_rate": 8.88963949514245e-06, "loss": 0.3345, "step": 4966 }, { "epoch": 1.65, "learning_rate": 8.886078295566104e-06, "loss": 0.359, "step": 4967 }, { "epoch": 1.65, "learning_rate": 8.882517239028073e-06, "loss": 0.4128, "step": 4968 }, { "epoch": 1.65, "learning_rate": 8.878956325985636e-06, "loss": 0.3495, "step": 4969 }, { "epoch": 1.65, "learning_rate": 8.875395556896044e-06, "loss": 0.3378, "step": 4970 }, { "epoch": 1.65, "learning_rate": 8.87183493221654e-06, "loss": 0.4126, "step": 4971 }, { "epoch": 1.65, "learning_rate": 8.86827445240434e-06, "loss": 0.4329, "step": 4972 }, { "epoch": 1.65, "learning_rate": 8.864714117916639e-06, "loss": 0.3695, "step": 4973 }, { "epoch": 1.65, "learning_rate": 8.861153929210624e-06, "loss": 0.3257, "step": 4974 }, { "epoch": 1.65, "learning_rate": 8.857593886743457e-06, "loss": 0.3862, "step": 4975 }, { "epoch": 1.65, "learning_rate": 8.85403399097228e-06, "loss": 0.3689, "step": 4976 }, { "epoch": 1.65, "learning_rate": 8.85047424235422e-06, "loss": 0.3097, "step": 4977 }, { "epoch": 1.65, "learning_rate": 8.846914641346387e-06, "loss": 0.3297, "step": 4978 }, { "epoch": 1.65, "learning_rate": 8.84335518840586e-06, "loss": 0.3416, "step": 4979 }, { "epoch": 1.65, "learning_rate": 8.83979588398971e-06, "loss": 0.3562, "step": 4980 }, { "epoch": 1.65, "learning_rate": 8.836236728554988e-06, "loss": 0.2836, "step": 4981 }, { "epoch": 1.65, "learning_rate": 8.83267772255872e-06, "loss": 0.296, "step": 4982 }, { "epoch": 1.65, "learning_rate": 8.829118866457922e-06, "loss": 0.4011, "step": 4983 }, { "epoch": 1.65, "learning_rate": 8.825560160709587e-06, "loss": 0.3538, "step": 4984 }, { "epoch": 1.65, "learning_rate": 8.822001605770678e-06, "loss": 0.3583, "step": 4985 }, { "epoch": 1.65, "learning_rate": 8.818443202098154e-06, "loss": 0.3052, "step": 4986 }, { "epoch": 1.66, "learning_rate": 8.814884950148948e-06, "loss": 0.3809, "step": 4987 }, { "epoch": 1.66, "learning_rate": 8.811326850379973e-06, "loss": 0.3859, "step": 4988 }, { "epoch": 1.66, "learning_rate": 8.807768903248127e-06, "loss": 0.3364, "step": 4989 }, { "epoch": 1.66, "learning_rate": 8.804211109210278e-06, "loss": 0.3904, "step": 4990 }, { "epoch": 1.66, "learning_rate": 8.800653468723293e-06, "loss": 0.3485, "step": 4991 }, { "epoch": 1.66, "learning_rate": 8.797095982243992e-06, "loss": 0.4136, "step": 4992 }, { "epoch": 1.66, "learning_rate": 8.7935386502292e-06, "loss": 0.3743, "step": 4993 }, { "epoch": 1.66, "learning_rate": 8.78998147313571e-06, "loss": 0.3247, "step": 4994 }, { "epoch": 1.66, "learning_rate": 8.786424451420299e-06, "loss": 0.3957, "step": 4995 }, { "epoch": 1.66, "learning_rate": 8.782867585539722e-06, "loss": 0.3416, "step": 4996 }, { "epoch": 1.66, "learning_rate": 8.77931087595072e-06, "loss": 0.3718, "step": 4997 }, { "epoch": 1.66, "learning_rate": 8.775754323110004e-06, "loss": 0.3977, "step": 4998 }, { "epoch": 1.66, "learning_rate": 8.772197927474268e-06, "loss": 0.3381, "step": 4999 }, { "epoch": 1.66, "learning_rate": 8.768641689500191e-06, "loss": 0.3804, "step": 5000 }, { "epoch": 1.66, "learning_rate": 8.765085609644428e-06, "loss": 0.3319, "step": 5001 }, { "epoch": 1.66, "learning_rate": 8.761529688363612e-06, "loss": 0.3075, "step": 5002 }, { "epoch": 1.66, "learning_rate": 8.75797392611436e-06, "loss": 0.3572, "step": 5003 }, { "epoch": 1.66, "learning_rate": 8.754418323353264e-06, "loss": 0.3056, "step": 5004 }, { "epoch": 1.66, "learning_rate": 8.750862880536904e-06, "loss": 0.3706, "step": 5005 }, { "epoch": 1.66, "learning_rate": 8.747307598121825e-06, "loss": 0.4272, "step": 5006 }, { "epoch": 1.66, "learning_rate": 8.743752476564563e-06, "loss": 0.3927, "step": 5007 }, { "epoch": 1.66, "learning_rate": 8.740197516321632e-06, "loss": 0.3848, "step": 5008 }, { "epoch": 1.66, "learning_rate": 8.73664271784952e-06, "loss": 0.4178, "step": 5009 }, { "epoch": 1.66, "learning_rate": 8.733088081604697e-06, "loss": 0.3148, "step": 5010 }, { "epoch": 1.66, "learning_rate": 8.729533608043616e-06, "loss": 0.3845, "step": 5011 }, { "epoch": 1.66, "learning_rate": 8.725979297622709e-06, "loss": 0.3826, "step": 5012 }, { "epoch": 1.66, "learning_rate": 8.722425150798373e-06, "loss": 0.4426, "step": 5013 }, { "epoch": 1.66, "learning_rate": 8.718871168027003e-06, "loss": 0.3475, "step": 5014 }, { "epoch": 1.66, "learning_rate": 8.71531734976496e-06, "loss": 0.3203, "step": 5015 }, { "epoch": 1.66, "learning_rate": 8.711763696468594e-06, "loss": 0.4368, "step": 5016 }, { "epoch": 1.67, "learning_rate": 8.708210208594226e-06, "loss": 0.4868, "step": 5017 }, { "epoch": 1.67, "learning_rate": 8.704656886598157e-06, "loss": 0.285, "step": 5018 }, { "epoch": 1.67, "learning_rate": 8.701103730936674e-06, "loss": 0.3929, "step": 5019 }, { "epoch": 1.67, "learning_rate": 8.697550742066025e-06, "loss": 0.3872, "step": 5020 }, { "epoch": 1.67, "learning_rate": 8.693997920442455e-06, "loss": 0.3855, "step": 5021 }, { "epoch": 1.67, "learning_rate": 8.69044526652218e-06, "loss": 0.2839, "step": 5022 }, { "epoch": 1.67, "learning_rate": 8.686892780761397e-06, "loss": 0.2993, "step": 5023 }, { "epoch": 1.67, "learning_rate": 8.683340463616277e-06, "loss": 0.3286, "step": 5024 }, { "epoch": 1.67, "learning_rate": 8.679788315542975e-06, "loss": 0.3391, "step": 5025 }, { "epoch": 1.67, "learning_rate": 8.676236336997618e-06, "loss": 0.3096, "step": 5026 }, { "epoch": 1.67, "learning_rate": 8.672684528436314e-06, "loss": 0.3669, "step": 5027 }, { "epoch": 1.67, "learning_rate": 8.669132890315152e-06, "loss": 0.4648, "step": 5028 }, { "epoch": 1.67, "learning_rate": 8.665581423090194e-06, "loss": 0.4075, "step": 5029 }, { "epoch": 1.67, "learning_rate": 8.662030127217484e-06, "loss": 0.2832, "step": 5030 }, { "epoch": 1.67, "learning_rate": 8.658479003153042e-06, "loss": 0.394, "step": 5031 }, { "epoch": 1.67, "learning_rate": 8.654928051352869e-06, "loss": 0.3523, "step": 5032 }, { "epoch": 1.67, "learning_rate": 8.651377272272946e-06, "loss": 0.3823, "step": 5033 }, { "epoch": 1.67, "learning_rate": 8.647826666369215e-06, "loss": 0.2319, "step": 5034 }, { "epoch": 1.67, "learning_rate": 8.644276234097613e-06, "loss": 0.2896, "step": 5035 }, { "epoch": 1.67, "learning_rate": 8.640725975914054e-06, "loss": 0.3292, "step": 5036 }, { "epoch": 1.67, "learning_rate": 8.637175892274425e-06, "loss": 0.3447, "step": 5037 }, { "epoch": 1.67, "learning_rate": 8.633625983634586e-06, "loss": 0.3802, "step": 5038 }, { "epoch": 1.67, "learning_rate": 8.630076250450383e-06, "loss": 0.2565, "step": 5039 }, { "epoch": 1.67, "learning_rate": 8.626526693177641e-06, "loss": 0.4001, "step": 5040 }, { "epoch": 1.67, "learning_rate": 8.622977312272146e-06, "loss": 0.3577, "step": 5041 }, { "epoch": 1.67, "learning_rate": 8.619428108189681e-06, "loss": 0.3155, "step": 5042 }, { "epoch": 1.67, "learning_rate": 8.615879081385994e-06, "loss": 0.3777, "step": 5043 }, { "epoch": 1.67, "learning_rate": 8.612330232316818e-06, "loss": 0.3215, "step": 5044 }, { "epoch": 1.67, "learning_rate": 8.60878156143786e-06, "loss": 0.3933, "step": 5045 }, { "epoch": 1.67, "learning_rate": 8.6052330692048e-06, "loss": 0.3333, "step": 5046 }, { "epoch": 1.68, "learning_rate": 8.6016847560733e-06, "loss": 0.3895, "step": 5047 }, { "epoch": 1.68, "learning_rate": 8.598136622499e-06, "loss": 0.3549, "step": 5048 }, { "epoch": 1.68, "learning_rate": 8.594588668937512e-06, "loss": 0.4019, "step": 5049 }, { "epoch": 1.68, "learning_rate": 8.591040895844426e-06, "loss": 0.3713, "step": 5050 }, { "epoch": 1.68, "learning_rate": 8.587493303675314e-06, "loss": 0.4324, "step": 5051 }, { "epoch": 1.68, "learning_rate": 8.58394589288572e-06, "loss": 0.3428, "step": 5052 }, { "epoch": 1.68, "learning_rate": 8.580398663931163e-06, "loss": 0.2955, "step": 5053 }, { "epoch": 1.68, "learning_rate": 8.576851617267151e-06, "loss": 0.3156, "step": 5054 }, { "epoch": 1.68, "learning_rate": 8.573304753349147e-06, "loss": 0.3475, "step": 5055 }, { "epoch": 1.68, "learning_rate": 8.56975807263261e-06, "loss": 0.4275, "step": 5056 }, { "epoch": 1.68, "learning_rate": 8.566211575572965e-06, "loss": 0.359, "step": 5057 }, { "epoch": 1.68, "learning_rate": 8.562665262625616e-06, "loss": 0.3879, "step": 5058 }, { "epoch": 1.68, "learning_rate": 8.559119134245946e-06, "loss": 0.3622, "step": 5059 }, { "epoch": 1.68, "learning_rate": 8.555573190889316e-06, "loss": 0.3441, "step": 5060 }, { "epoch": 1.68, "learning_rate": 8.55202743301105e-06, "loss": 0.2655, "step": 5061 }, { "epoch": 1.68, "learning_rate": 8.548481861066463e-06, "loss": 0.261, "step": 5062 }, { "epoch": 1.68, "learning_rate": 8.544936475510841e-06, "loss": 0.3344, "step": 5063 }, { "epoch": 1.68, "learning_rate": 8.541391276799443e-06, "loss": 0.4111, "step": 5064 }, { "epoch": 1.68, "learning_rate": 8.537846265387512e-06, "loss": 0.3169, "step": 5065 }, { "epoch": 1.68, "learning_rate": 8.534301441730257e-06, "loss": 0.3403, "step": 5066 }, { "epoch": 1.68, "learning_rate": 8.530756806282874e-06, "loss": 0.3591, "step": 5067 }, { "epoch": 1.68, "learning_rate": 8.527212359500518e-06, "loss": 0.3092, "step": 5068 }, { "epoch": 1.68, "learning_rate": 8.523668101838336e-06, "loss": 0.3457, "step": 5069 }, { "epoch": 1.68, "learning_rate": 8.520124033751446e-06, "loss": 0.3913, "step": 5070 }, { "epoch": 1.68, "learning_rate": 8.51658015569494e-06, "loss": 0.2635, "step": 5071 }, { "epoch": 1.68, "learning_rate": 8.513036468123883e-06, "loss": 0.3367, "step": 5072 }, { "epoch": 1.68, "learning_rate": 8.509492971493324e-06, "loss": 0.3555, "step": 5073 }, { "epoch": 1.68, "learning_rate": 8.505949666258281e-06, "loss": 0.3817, "step": 5074 }, { "epoch": 1.68, "learning_rate": 8.502406552873746e-06, "loss": 0.402, "step": 5075 }, { "epoch": 1.68, "learning_rate": 8.498863631794692e-06, "loss": 0.3696, "step": 5076 }, { "epoch": 1.69, "learning_rate": 8.49532090347606e-06, "loss": 0.46, "step": 5077 }, { "epoch": 1.69, "learning_rate": 8.491778368372772e-06, "loss": 0.4221, "step": 5078 }, { "epoch": 1.69, "learning_rate": 8.488236026939727e-06, "loss": 0.4072, "step": 5079 }, { "epoch": 1.69, "learning_rate": 8.484693879631794e-06, "loss": 0.3977, "step": 5080 }, { "epoch": 1.69, "learning_rate": 8.481151926903821e-06, "loss": 0.3743, "step": 5081 }, { "epoch": 1.69, "learning_rate": 8.477610169210624e-06, "loss": 0.3999, "step": 5082 }, { "epoch": 1.69, "learning_rate": 8.474068607007002e-06, "loss": 0.3075, "step": 5083 }, { "epoch": 1.69, "learning_rate": 8.470527240747726e-06, "loss": 0.3536, "step": 5084 }, { "epoch": 1.69, "learning_rate": 8.466986070887542e-06, "loss": 0.3361, "step": 5085 }, { "epoch": 1.69, "learning_rate": 8.463445097881168e-06, "loss": 0.3468, "step": 5086 }, { "epoch": 1.69, "learning_rate": 8.4599043221833e-06, "loss": 0.3405, "step": 5087 }, { "epoch": 1.69, "learning_rate": 8.456363744248614e-06, "loss": 0.3574, "step": 5088 }, { "epoch": 1.69, "learning_rate": 8.452823364531746e-06, "loss": 0.3004, "step": 5089 }, { "epoch": 1.69, "learning_rate": 8.449283183487317e-06, "loss": 0.4456, "step": 5090 }, { "epoch": 1.69, "learning_rate": 8.445743201569922e-06, "loss": 0.3108, "step": 5091 }, { "epoch": 1.69, "learning_rate": 8.442203419234128e-06, "loss": 0.3409, "step": 5092 }, { "epoch": 1.69, "learning_rate": 8.43866383693448e-06, "loss": 0.3475, "step": 5093 }, { "epoch": 1.69, "learning_rate": 8.43512445512549e-06, "loss": 0.3295, "step": 5094 }, { "epoch": 1.69, "learning_rate": 8.431585274261655e-06, "loss": 0.4658, "step": 5095 }, { "epoch": 1.69, "learning_rate": 8.428046294797436e-06, "loss": 0.4346, "step": 5096 }, { "epoch": 1.69, "learning_rate": 8.424507517187268e-06, "loss": 0.3785, "step": 5097 }, { "epoch": 1.69, "learning_rate": 8.42096894188557e-06, "loss": 0.4216, "step": 5098 }, { "epoch": 1.69, "learning_rate": 8.41743056934673e-06, "loss": 0.3571, "step": 5099 }, { "epoch": 1.69, "learning_rate": 8.413892400025105e-06, "loss": 0.3735, "step": 5100 }, { "epoch": 1.69, "learning_rate": 8.410354434375034e-06, "loss": 0.381, "step": 5101 }, { "epoch": 1.69, "learning_rate": 8.406816672850828e-06, "loss": 0.3668, "step": 5102 }, { "epoch": 1.69, "learning_rate": 8.403279115906761e-06, "loss": 0.3494, "step": 5103 }, { "epoch": 1.69, "learning_rate": 8.399741763997099e-06, "loss": 0.3992, "step": 5104 }, { "epoch": 1.69, "learning_rate": 8.396204617576062e-06, "loss": 0.3762, "step": 5105 }, { "epoch": 1.69, "learning_rate": 8.392667677097863e-06, "loss": 0.3503, "step": 5106 }, { "epoch": 1.69, "learning_rate": 8.389130943016676e-06, "loss": 0.3584, "step": 5107 }, { "epoch": 1.7, "learning_rate": 8.385594415786651e-06, "loss": 0.2667, "step": 5108 }, { "epoch": 1.7, "learning_rate": 8.382058095861918e-06, "loss": 0.373, "step": 5109 }, { "epoch": 1.7, "learning_rate": 8.378521983696565e-06, "loss": 0.3051, "step": 5110 }, { "epoch": 1.7, "learning_rate": 8.37498607974467e-06, "loss": 0.3536, "step": 5111 }, { "epoch": 1.7, "learning_rate": 8.371450384460272e-06, "loss": 0.3684, "step": 5112 }, { "epoch": 1.7, "learning_rate": 8.367914898297395e-06, "loss": 0.3469, "step": 5113 }, { "epoch": 1.7, "learning_rate": 8.364379621710025e-06, "loss": 0.3634, "step": 5114 }, { "epoch": 1.7, "learning_rate": 8.360844555152126e-06, "loss": 0.4088, "step": 5115 }, { "epoch": 1.7, "learning_rate": 8.35730969907764e-06, "loss": 0.2485, "step": 5116 }, { "epoch": 1.7, "learning_rate": 8.353775053940469e-06, "loss": 0.2922, "step": 5117 }, { "epoch": 1.7, "learning_rate": 8.350240620194498e-06, "loss": 0.408, "step": 5118 }, { "epoch": 1.7, "learning_rate": 8.346706398293583e-06, "loss": 0.3534, "step": 5119 }, { "epoch": 1.7, "learning_rate": 8.343172388691554e-06, "loss": 0.4011, "step": 5120 }, { "epoch": 1.7, "learning_rate": 8.339638591842213e-06, "loss": 0.3344, "step": 5121 }, { "epoch": 1.7, "learning_rate": 8.336105008199328e-06, "loss": 0.3925, "step": 5122 }, { "epoch": 1.7, "learning_rate": 8.332571638216652e-06, "loss": 0.4219, "step": 5123 }, { "epoch": 1.7, "learning_rate": 8.329038482347899e-06, "loss": 0.2933, "step": 5124 }, { "epoch": 1.7, "learning_rate": 8.325505541046761e-06, "loss": 0.3424, "step": 5125 }, { "epoch": 1.7, "learning_rate": 8.321972814766904e-06, "loss": 0.4125, "step": 5126 }, { "epoch": 1.7, "learning_rate": 8.318440303961962e-06, "loss": 0.3914, "step": 5127 }, { "epoch": 1.7, "learning_rate": 8.314908009085542e-06, "loss": 0.3157, "step": 5128 }, { "epoch": 1.7, "learning_rate": 8.31137593059123e-06, "loss": 0.3354, "step": 5129 }, { "epoch": 1.7, "learning_rate": 8.30784406893258e-06, "loss": 0.3484, "step": 5130 }, { "epoch": 1.7, "learning_rate": 8.30431242456311e-06, "loss": 0.3888, "step": 5131 }, { "epoch": 1.7, "learning_rate": 8.300780997936321e-06, "loss": 0.3732, "step": 5132 }, { "epoch": 1.7, "learning_rate": 8.297249789505683e-06, "loss": 0.3794, "step": 5133 }, { "epoch": 1.7, "learning_rate": 8.293718799724635e-06, "loss": 0.3707, "step": 5134 }, { "epoch": 1.7, "learning_rate": 8.29018802904659e-06, "loss": 0.3396, "step": 5135 }, { "epoch": 1.7, "learning_rate": 8.286657477924938e-06, "loss": 0.2661, "step": 5136 }, { "epoch": 1.7, "learning_rate": 8.283127146813035e-06, "loss": 0.3812, "step": 5137 }, { "epoch": 1.71, "learning_rate": 8.279597036164203e-06, "loss": 0.3168, "step": 5138 }, { "epoch": 1.71, "learning_rate": 8.276067146431747e-06, "loss": 0.3383, "step": 5139 }, { "epoch": 1.71, "learning_rate": 8.27253747806894e-06, "loss": 0.3914, "step": 5140 }, { "epoch": 1.71, "learning_rate": 8.269008031529026e-06, "loss": 0.3608, "step": 5141 }, { "epoch": 1.71, "learning_rate": 8.265478807265215e-06, "loss": 0.2775, "step": 5142 }, { "epoch": 1.71, "learning_rate": 8.261949805730701e-06, "loss": 0.3218, "step": 5143 }, { "epoch": 1.71, "learning_rate": 8.258421027378635e-06, "loss": 0.3623, "step": 5144 }, { "epoch": 1.71, "learning_rate": 8.254892472662148e-06, "loss": 0.3647, "step": 5145 }, { "epoch": 1.71, "learning_rate": 8.251364142034342e-06, "loss": 0.3159, "step": 5146 }, { "epoch": 1.71, "learning_rate": 8.247836035948286e-06, "loss": 0.3124, "step": 5147 }, { "epoch": 1.71, "learning_rate": 8.244308154857028e-06, "loss": 0.3309, "step": 5148 }, { "epoch": 1.71, "learning_rate": 8.240780499213582e-06, "loss": 0.3427, "step": 5149 }, { "epoch": 1.71, "learning_rate": 8.237253069470928e-06, "loss": 0.3118, "step": 5150 }, { "epoch": 1.71, "learning_rate": 8.233725866082022e-06, "loss": 0.3258, "step": 5151 }, { "epoch": 1.71, "learning_rate": 8.230198889499799e-06, "loss": 0.2814, "step": 5152 }, { "epoch": 1.71, "learning_rate": 8.226672140177145e-06, "loss": 0.2522, "step": 5153 }, { "epoch": 1.71, "learning_rate": 8.223145618566939e-06, "loss": 0.3169, "step": 5154 }, { "epoch": 1.71, "learning_rate": 8.219619325122015e-06, "loss": 0.3916, "step": 5155 }, { "epoch": 1.71, "learning_rate": 8.216093260295183e-06, "loss": 0.3495, "step": 5156 }, { "epoch": 1.71, "learning_rate": 8.212567424539233e-06, "loss": 0.3055, "step": 5157 }, { "epoch": 1.71, "learning_rate": 8.209041818306902e-06, "loss": 0.3273, "step": 5158 }, { "epoch": 1.71, "learning_rate": 8.20551644205092e-06, "loss": 0.3346, "step": 5159 }, { "epoch": 1.71, "learning_rate": 8.201991296223979e-06, "loss": 0.3995, "step": 5160 }, { "epoch": 1.71, "learning_rate": 8.198466381278742e-06, "loss": 0.3962, "step": 5161 }, { "epoch": 1.71, "learning_rate": 8.194941697667839e-06, "loss": 0.3613, "step": 5162 }, { "epoch": 1.71, "learning_rate": 8.191417245843878e-06, "loss": 0.3492, "step": 5163 }, { "epoch": 1.71, "learning_rate": 8.187893026259432e-06, "loss": 0.2906, "step": 5164 }, { "epoch": 1.71, "learning_rate": 8.184369039367041e-06, "loss": 0.319, "step": 5165 }, { "epoch": 1.71, "learning_rate": 8.18084528561922e-06, "loss": 0.2953, "step": 5166 }, { "epoch": 1.71, "learning_rate": 8.177321765468456e-06, "loss": 0.301, "step": 5167 }, { "epoch": 1.72, "learning_rate": 8.1737984793672e-06, "loss": 0.4254, "step": 5168 }, { "epoch": 1.72, "learning_rate": 8.170275427767878e-06, "loss": 0.3394, "step": 5169 }, { "epoch": 1.72, "learning_rate": 8.166752611122884e-06, "loss": 0.3192, "step": 5170 }, { "epoch": 1.72, "learning_rate": 8.163230029884582e-06, "loss": 0.3184, "step": 5171 }, { "epoch": 1.72, "learning_rate": 8.159707684505305e-06, "loss": 0.3984, "step": 5172 }, { "epoch": 1.72, "learning_rate": 8.156185575437353e-06, "loss": 0.3254, "step": 5173 }, { "epoch": 1.72, "learning_rate": 8.152663703133001e-06, "loss": 0.3143, "step": 5174 }, { "epoch": 1.72, "learning_rate": 8.149142068044494e-06, "loss": 0.306, "step": 5175 }, { "epoch": 1.72, "learning_rate": 8.14562067062404e-06, "loss": 0.3395, "step": 5176 }, { "epoch": 1.72, "learning_rate": 8.142099511323824e-06, "loss": 0.3167, "step": 5177 }, { "epoch": 1.72, "learning_rate": 8.138578590595998e-06, "loss": 0.3773, "step": 5178 }, { "epoch": 1.72, "learning_rate": 8.135057908892677e-06, "loss": 0.3773, "step": 5179 }, { "epoch": 1.72, "learning_rate": 8.131537466665954e-06, "loss": 0.3459, "step": 5180 }, { "epoch": 1.72, "learning_rate": 8.128017264367886e-06, "loss": 0.3435, "step": 5181 }, { "epoch": 1.72, "learning_rate": 8.124497302450502e-06, "loss": 0.3523, "step": 5182 }, { "epoch": 1.72, "learning_rate": 8.1209775813658e-06, "loss": 0.3041, "step": 5183 }, { "epoch": 1.72, "learning_rate": 8.117458101565742e-06, "loss": 0.3063, "step": 5184 }, { "epoch": 1.72, "learning_rate": 8.113938863502274e-06, "loss": 0.355, "step": 5185 }, { "epoch": 1.72, "learning_rate": 8.110419867627288e-06, "loss": 0.2998, "step": 5186 }, { "epoch": 1.72, "learning_rate": 8.106901114392663e-06, "loss": 0.3466, "step": 5187 }, { "epoch": 1.72, "learning_rate": 8.103382604250238e-06, "loss": 0.271, "step": 5188 }, { "epoch": 1.72, "learning_rate": 8.099864337651829e-06, "loss": 0.3526, "step": 5189 }, { "epoch": 1.72, "learning_rate": 8.096346315049209e-06, "loss": 0.4397, "step": 5190 }, { "epoch": 1.72, "learning_rate": 8.09282853689413e-06, "loss": 0.3011, "step": 5191 }, { "epoch": 1.72, "learning_rate": 8.089311003638312e-06, "loss": 0.3882, "step": 5192 }, { "epoch": 1.72, "learning_rate": 8.085793715733432e-06, "loss": 0.3907, "step": 5193 }, { "epoch": 1.72, "learning_rate": 8.082276673631149e-06, "loss": 0.3745, "step": 5194 }, { "epoch": 1.72, "learning_rate": 8.078759877783082e-06, "loss": 0.4578, "step": 5195 }, { "epoch": 1.72, "learning_rate": 8.075243328640826e-06, "loss": 0.3665, "step": 5196 }, { "epoch": 1.72, "learning_rate": 8.071727026655939e-06, "loss": 0.4033, "step": 5197 }, { "epoch": 1.73, "learning_rate": 8.068210972279944e-06, "loss": 0.307, "step": 5198 }, { "epoch": 1.73, "learning_rate": 8.064695165964343e-06, "loss": 0.3663, "step": 5199 }, { "epoch": 1.73, "learning_rate": 8.061179608160594e-06, "loss": 0.3221, "step": 5200 }, { "epoch": 1.73, "learning_rate": 8.05766429932013e-06, "loss": 0.3534, "step": 5201 }, { "epoch": 1.73, "learning_rate": 8.05414923989435e-06, "loss": 0.3564, "step": 5202 }, { "epoch": 1.73, "learning_rate": 8.050634430334623e-06, "loss": 0.3739, "step": 5203 }, { "epoch": 1.73, "learning_rate": 8.047119871092284e-06, "loss": 0.2874, "step": 5204 }, { "epoch": 1.73, "learning_rate": 8.043605562618637e-06, "loss": 0.2697, "step": 5205 }, { "epoch": 1.73, "learning_rate": 8.040091505364957e-06, "loss": 0.3899, "step": 5206 }, { "epoch": 1.73, "learning_rate": 8.036577699782474e-06, "loss": 0.3311, "step": 5207 }, { "epoch": 1.73, "learning_rate": 8.033064146322401e-06, "loss": 0.3622, "step": 5208 }, { "epoch": 1.73, "learning_rate": 8.029550845435909e-06, "loss": 0.2917, "step": 5209 }, { "epoch": 1.73, "learning_rate": 8.026037797574141e-06, "loss": 0.3795, "step": 5210 }, { "epoch": 1.73, "learning_rate": 8.022525003188208e-06, "loss": 0.2913, "step": 5211 }, { "epoch": 1.73, "learning_rate": 8.019012462729185e-06, "loss": 0.3352, "step": 5212 }, { "epoch": 1.73, "learning_rate": 8.01550017664812e-06, "loss": 0.288, "step": 5213 }, { "epoch": 1.73, "learning_rate": 8.011988145396019e-06, "loss": 0.3528, "step": 5214 }, { "epoch": 1.73, "learning_rate": 8.00847636942386e-06, "loss": 0.3193, "step": 5215 }, { "epoch": 1.73, "learning_rate": 8.004964849182592e-06, "loss": 0.3534, "step": 5216 }, { "epoch": 1.73, "learning_rate": 8.001453585123132e-06, "loss": 0.3918, "step": 5217 }, { "epoch": 1.73, "learning_rate": 7.997942577696353e-06, "loss": 0.3246, "step": 5218 }, { "epoch": 1.73, "learning_rate": 7.994431827353108e-06, "loss": 0.3402, "step": 5219 }, { "epoch": 1.73, "learning_rate": 7.99092133454421e-06, "loss": 0.3093, "step": 5220 }, { "epoch": 1.73, "learning_rate": 7.987411099720438e-06, "loss": 0.3889, "step": 5221 }, { "epoch": 1.73, "learning_rate": 7.983901123332538e-06, "loss": 0.2532, "step": 5222 }, { "epoch": 1.73, "learning_rate": 7.98039140583123e-06, "loss": 0.408, "step": 5223 }, { "epoch": 1.73, "learning_rate": 7.976881947667191e-06, "loss": 0.3984, "step": 5224 }, { "epoch": 1.73, "learning_rate": 7.973372749291075e-06, "loss": 0.3319, "step": 5225 }, { "epoch": 1.73, "learning_rate": 7.969863811153497e-06, "loss": 0.3451, "step": 5226 }, { "epoch": 1.73, "learning_rate": 7.966355133705032e-06, "loss": 0.3018, "step": 5227 }, { "epoch": 1.74, "learning_rate": 7.962846717396235e-06, "loss": 0.361, "step": 5228 }, { "epoch": 1.74, "learning_rate": 7.959338562677615e-06, "loss": 0.3381, "step": 5229 }, { "epoch": 1.74, "learning_rate": 7.955830669999654e-06, "loss": 0.3943, "step": 5230 }, { "epoch": 1.74, "learning_rate": 7.952323039812801e-06, "loss": 0.3296, "step": 5231 }, { "epoch": 1.74, "learning_rate": 7.948815672567469e-06, "loss": 0.28, "step": 5232 }, { "epoch": 1.74, "learning_rate": 7.945308568714043e-06, "loss": 0.287, "step": 5233 }, { "epoch": 1.74, "learning_rate": 7.941801728702861e-06, "loss": 0.4172, "step": 5234 }, { "epoch": 1.74, "learning_rate": 7.938295152984238e-06, "loss": 0.3667, "step": 5235 }, { "epoch": 1.74, "learning_rate": 7.934788842008452e-06, "loss": 0.3546, "step": 5236 }, { "epoch": 1.74, "learning_rate": 7.931282796225752e-06, "loss": 0.3235, "step": 5237 }, { "epoch": 1.74, "learning_rate": 7.927777016086339e-06, "loss": 0.3495, "step": 5238 }, { "epoch": 1.74, "learning_rate": 7.924271502040396e-06, "loss": 0.4344, "step": 5239 }, { "epoch": 1.74, "learning_rate": 7.920766254538067e-06, "loss": 0.3126, "step": 5240 }, { "epoch": 1.74, "learning_rate": 7.91726127402945e-06, "loss": 0.2951, "step": 5241 }, { "epoch": 1.74, "learning_rate": 7.913756560964627e-06, "loss": 0.3571, "step": 5242 }, { "epoch": 1.74, "learning_rate": 7.910252115793632e-06, "loss": 0.3104, "step": 5243 }, { "epoch": 1.74, "learning_rate": 7.906747938966473e-06, "loss": 0.3499, "step": 5244 }, { "epoch": 1.74, "learning_rate": 7.90324403093312e-06, "loss": 0.4033, "step": 5245 }, { "epoch": 1.74, "learning_rate": 7.899740392143506e-06, "loss": 0.3137, "step": 5246 }, { "epoch": 1.74, "learning_rate": 7.896237023047536e-06, "loss": 0.3314, "step": 5247 }, { "epoch": 1.74, "learning_rate": 7.892733924095073e-06, "loss": 0.3999, "step": 5248 }, { "epoch": 1.74, "learning_rate": 7.88923109573595e-06, "loss": 0.3319, "step": 5249 }, { "epoch": 1.74, "learning_rate": 7.885728538419964e-06, "loss": 0.3524, "step": 5250 }, { "epoch": 1.74, "learning_rate": 7.882226252596877e-06, "loss": 0.376, "step": 5251 }, { "epoch": 1.74, "learning_rate": 7.878724238716414e-06, "loss": 0.364, "step": 5252 }, { "epoch": 1.74, "learning_rate": 7.875222497228271e-06, "loss": 0.3059, "step": 5253 }, { "epoch": 1.74, "learning_rate": 7.87172102858211e-06, "loss": 0.3491, "step": 5254 }, { "epoch": 1.74, "learning_rate": 7.868219833227546e-06, "loss": 0.3501, "step": 5255 }, { "epoch": 1.74, "learning_rate": 7.864718911614167e-06, "loss": 0.2987, "step": 5256 }, { "epoch": 1.74, "learning_rate": 7.861218264191526e-06, "loss": 0.2925, "step": 5257 }, { "epoch": 1.75, "learning_rate": 7.85771789140914e-06, "loss": 0.2943, "step": 5258 }, { "epoch": 1.75, "learning_rate": 7.85421779371649e-06, "loss": 0.308, "step": 5259 }, { "epoch": 1.75, "learning_rate": 7.850717971563024e-06, "loss": 0.3573, "step": 5260 }, { "epoch": 1.75, "learning_rate": 7.847218425398158e-06, "loss": 0.3527, "step": 5261 }, { "epoch": 1.75, "learning_rate": 7.843719155671258e-06, "loss": 0.36, "step": 5262 }, { "epoch": 1.75, "learning_rate": 7.840220162831667e-06, "loss": 0.377, "step": 5263 }, { "epoch": 1.75, "learning_rate": 7.83672144732869e-06, "loss": 0.3425, "step": 5264 }, { "epoch": 1.75, "learning_rate": 7.8332230096116e-06, "loss": 0.3185, "step": 5265 }, { "epoch": 1.75, "learning_rate": 7.829724850129626e-06, "loss": 0.3823, "step": 5266 }, { "epoch": 1.75, "learning_rate": 7.826226969331965e-06, "loss": 0.3524, "step": 5267 }, { "epoch": 1.75, "learning_rate": 7.822729367667785e-06, "loss": 0.3008, "step": 5268 }, { "epoch": 1.75, "learning_rate": 7.819232045586202e-06, "loss": 0.3691, "step": 5269 }, { "epoch": 1.75, "learning_rate": 7.815735003536311e-06, "loss": 0.2655, "step": 5270 }, { "epoch": 1.75, "learning_rate": 7.812238241967166e-06, "loss": 0.4309, "step": 5271 }, { "epoch": 1.75, "learning_rate": 7.808741761327784e-06, "loss": 0.3707, "step": 5272 }, { "epoch": 1.75, "learning_rate": 7.805245562067151e-06, "loss": 0.45, "step": 5273 }, { "epoch": 1.75, "learning_rate": 7.801749644634209e-06, "loss": 0.2762, "step": 5274 }, { "epoch": 1.75, "learning_rate": 7.798254009477869e-06, "loss": 0.3652, "step": 5275 }, { "epoch": 1.75, "learning_rate": 7.794758657047002e-06, "loss": 0.3522, "step": 5276 }, { "epoch": 1.75, "learning_rate": 7.791263587790446e-06, "loss": 0.3033, "step": 5277 }, { "epoch": 1.75, "learning_rate": 7.787768802157004e-06, "loss": 0.3132, "step": 5278 }, { "epoch": 1.75, "learning_rate": 7.784274300595435e-06, "loss": 0.3162, "step": 5279 }, { "epoch": 1.75, "learning_rate": 7.780780083554473e-06, "loss": 0.3455, "step": 5280 }, { "epoch": 1.75, "learning_rate": 7.777286151482806e-06, "loss": 0.2288, "step": 5281 }, { "epoch": 1.75, "learning_rate": 7.773792504829094e-06, "loss": 0.301, "step": 5282 }, { "epoch": 1.75, "learning_rate": 7.770299144041945e-06, "loss": 0.3746, "step": 5283 }, { "epoch": 1.75, "learning_rate": 7.766806069569946e-06, "loss": 0.3851, "step": 5284 }, { "epoch": 1.75, "learning_rate": 7.763313281861644e-06, "loss": 0.3416, "step": 5285 }, { "epoch": 1.75, "learning_rate": 7.759820781365542e-06, "loss": 0.4039, "step": 5286 }, { "epoch": 1.75, "learning_rate": 7.756328568530112e-06, "loss": 0.3596, "step": 5287 }, { "epoch": 1.76, "learning_rate": 7.752836643803787e-06, "loss": 0.4363, "step": 5288 }, { "epoch": 1.76, "learning_rate": 7.749345007634972e-06, "loss": 0.3929, "step": 5289 }, { "epoch": 1.76, "learning_rate": 7.745853660472016e-06, "loss": 0.3042, "step": 5290 }, { "epoch": 1.76, "learning_rate": 7.742362602763245e-06, "loss": 0.3073, "step": 5291 }, { "epoch": 1.76, "learning_rate": 7.738871834956946e-06, "loss": 0.3125, "step": 5292 }, { "epoch": 1.76, "learning_rate": 7.735381357501369e-06, "loss": 0.4014, "step": 5293 }, { "epoch": 1.76, "learning_rate": 7.731891170844723e-06, "loss": 0.4335, "step": 5294 }, { "epoch": 1.76, "learning_rate": 7.728401275435178e-06, "loss": 0.3702, "step": 5295 }, { "epoch": 1.76, "learning_rate": 7.724911671720881e-06, "loss": 0.2895, "step": 5296 }, { "epoch": 1.76, "learning_rate": 7.721422360149919e-06, "loss": 0.3416, "step": 5297 }, { "epoch": 1.76, "learning_rate": 7.717933341170358e-06, "loss": 0.3823, "step": 5298 }, { "epoch": 1.76, "learning_rate": 7.714444615230223e-06, "loss": 0.3562, "step": 5299 }, { "epoch": 1.76, "learning_rate": 7.710956182777498e-06, "loss": 0.3683, "step": 5300 }, { "epoch": 1.76, "learning_rate": 7.707468044260134e-06, "loss": 0.3654, "step": 5301 }, { "epoch": 1.76, "learning_rate": 7.703980200126043e-06, "loss": 0.3535, "step": 5302 }, { "epoch": 1.76, "learning_rate": 7.700492650823093e-06, "loss": 0.3323, "step": 5303 }, { "epoch": 1.76, "learning_rate": 7.697005396799123e-06, "loss": 0.2904, "step": 5304 }, { "epoch": 1.76, "learning_rate": 7.693518438501924e-06, "loss": 0.3528, "step": 5305 }, { "epoch": 1.76, "learning_rate": 7.690031776379262e-06, "loss": 0.2624, "step": 5306 }, { "epoch": 1.76, "learning_rate": 7.686545410878855e-06, "loss": 0.3488, "step": 5307 }, { "epoch": 1.76, "learning_rate": 7.683059342448387e-06, "loss": 0.2992, "step": 5308 }, { "epoch": 1.76, "learning_rate": 7.679573571535506e-06, "loss": 0.3467, "step": 5309 }, { "epoch": 1.76, "learning_rate": 7.676088098587813e-06, "loss": 0.2773, "step": 5310 }, { "epoch": 1.76, "learning_rate": 7.67260292405288e-06, "loss": 0.348, "step": 5311 }, { "epoch": 1.76, "learning_rate": 7.669118048378233e-06, "loss": 0.3783, "step": 5312 }, { "epoch": 1.76, "learning_rate": 7.665633472011368e-06, "loss": 0.374, "step": 5313 }, { "epoch": 1.76, "learning_rate": 7.662149195399737e-06, "loss": 0.3571, "step": 5314 }, { "epoch": 1.76, "learning_rate": 7.658665218990754e-06, "loss": 0.3171, "step": 5315 }, { "epoch": 1.76, "learning_rate": 7.655181543231801e-06, "loss": 0.3091, "step": 5316 }, { "epoch": 1.76, "learning_rate": 7.651698168570205e-06, "loss": 0.3629, "step": 5317 }, { "epoch": 1.77, "learning_rate": 7.648215095453271e-06, "loss": 0.3224, "step": 5318 }, { "epoch": 1.77, "learning_rate": 7.64473232432826e-06, "loss": 0.3668, "step": 5319 }, { "epoch": 1.77, "learning_rate": 7.641249855642393e-06, "loss": 0.4429, "step": 5320 }, { "epoch": 1.77, "learning_rate": 7.637767689842853e-06, "loss": 0.3406, "step": 5321 }, { "epoch": 1.77, "learning_rate": 7.63428582737678e-06, "loss": 0.2716, "step": 5322 }, { "epoch": 1.77, "learning_rate": 7.630804268691286e-06, "loss": 0.3408, "step": 5323 }, { "epoch": 1.77, "learning_rate": 7.627323014233431e-06, "loss": 0.2567, "step": 5324 }, { "epoch": 1.77, "learning_rate": 7.623842064450241e-06, "loss": 0.4121, "step": 5325 }, { "epoch": 1.77, "learning_rate": 7.620361419788705e-06, "loss": 0.3268, "step": 5326 }, { "epoch": 1.77, "learning_rate": 7.616881080695774e-06, "loss": 0.3654, "step": 5327 }, { "epoch": 1.77, "learning_rate": 7.613401047618354e-06, "loss": 0.3328, "step": 5328 }, { "epoch": 1.77, "learning_rate": 7.609921321003317e-06, "loss": 0.299, "step": 5329 }, { "epoch": 1.77, "learning_rate": 7.6064419012974974e-06, "loss": 0.3214, "step": 5330 }, { "epoch": 1.77, "learning_rate": 7.602962788947677e-06, "loss": 0.3351, "step": 5331 }, { "epoch": 1.77, "learning_rate": 7.599483984400615e-06, "loss": 0.3422, "step": 5332 }, { "epoch": 1.77, "learning_rate": 7.596005488103018e-06, "loss": 0.4431, "step": 5333 }, { "epoch": 1.77, "learning_rate": 7.592527300501562e-06, "loss": 0.3317, "step": 5334 }, { "epoch": 1.77, "learning_rate": 7.589049422042878e-06, "loss": 0.2772, "step": 5335 }, { "epoch": 1.77, "learning_rate": 7.585571853173561e-06, "loss": 0.3409, "step": 5336 }, { "epoch": 1.77, "learning_rate": 7.582094594340169e-06, "loss": 0.3228, "step": 5337 }, { "epoch": 1.77, "learning_rate": 7.578617645989206e-06, "loss": 0.3291, "step": 5338 }, { "epoch": 1.77, "learning_rate": 7.575141008567151e-06, "loss": 0.293, "step": 5339 }, { "epoch": 1.77, "learning_rate": 7.571664682520437e-06, "loss": 0.3177, "step": 5340 }, { "epoch": 1.77, "learning_rate": 7.56818866829546e-06, "loss": 0.3495, "step": 5341 }, { "epoch": 1.77, "learning_rate": 7.564712966338569e-06, "loss": 0.4006, "step": 5342 }, { "epoch": 1.77, "learning_rate": 7.5612375770960814e-06, "loss": 0.3527, "step": 5343 }, { "epoch": 1.77, "learning_rate": 7.557762501014274e-06, "loss": 0.2968, "step": 5344 }, { "epoch": 1.77, "learning_rate": 7.554287738539373e-06, "loss": 0.3241, "step": 5345 }, { "epoch": 1.77, "learning_rate": 7.550813290117574e-06, "loss": 0.3527, "step": 5346 }, { "epoch": 1.77, "learning_rate": 7.54733915619503e-06, "loss": 0.2477, "step": 5347 }, { "epoch": 1.77, "learning_rate": 7.543865337217856e-06, "loss": 0.4071, "step": 5348 }, { "epoch": 1.78, "learning_rate": 7.5403918336321205e-06, "loss": 0.4112, "step": 5349 }, { "epoch": 1.78, "learning_rate": 7.536918645883858e-06, "loss": 0.3367, "step": 5350 }, { "epoch": 1.78, "learning_rate": 7.533445774419059e-06, "loss": 0.3573, "step": 5351 }, { "epoch": 1.78, "learning_rate": 7.529973219683672e-06, "loss": 0.2628, "step": 5352 }, { "epoch": 1.78, "learning_rate": 7.526500982123604e-06, "loss": 0.2968, "step": 5353 }, { "epoch": 1.78, "learning_rate": 7.523029062184728e-06, "loss": 0.3766, "step": 5354 }, { "epoch": 1.78, "learning_rate": 7.519557460312872e-06, "loss": 0.3311, "step": 5355 }, { "epoch": 1.78, "learning_rate": 7.516086176953821e-06, "loss": 0.3466, "step": 5356 }, { "epoch": 1.78, "learning_rate": 7.512615212553322e-06, "loss": 0.3292, "step": 5357 }, { "epoch": 1.78, "learning_rate": 7.509144567557086e-06, "loss": 0.3192, "step": 5358 }, { "epoch": 1.78, "learning_rate": 7.505674242410769e-06, "loss": 0.3301, "step": 5359 }, { "epoch": 1.78, "learning_rate": 7.502204237559997e-06, "loss": 0.198, "step": 5360 }, { "epoch": 1.78, "learning_rate": 7.498734553450355e-06, "loss": 0.2797, "step": 5361 }, { "epoch": 1.78, "learning_rate": 7.49526519052738e-06, "loss": 0.3228, "step": 5362 }, { "epoch": 1.78, "learning_rate": 7.491796149236574e-06, "loss": 0.299, "step": 5363 }, { "epoch": 1.78, "learning_rate": 7.488327430023395e-06, "loss": 0.361, "step": 5364 }, { "epoch": 1.78, "learning_rate": 7.4848590333332635e-06, "loss": 0.2667, "step": 5365 }, { "epoch": 1.78, "learning_rate": 7.4813909596115485e-06, "loss": 0.3269, "step": 5366 }, { "epoch": 1.78, "learning_rate": 7.477923209303588e-06, "loss": 0.356, "step": 5367 }, { "epoch": 1.78, "learning_rate": 7.474455782854672e-06, "loss": 0.3129, "step": 5368 }, { "epoch": 1.78, "learning_rate": 7.470988680710058e-06, "loss": 0.2815, "step": 5369 }, { "epoch": 1.78, "learning_rate": 7.467521903314949e-06, "loss": 0.2906, "step": 5370 }, { "epoch": 1.78, "learning_rate": 7.464055451114514e-06, "loss": 0.3408, "step": 5371 }, { "epoch": 1.78, "learning_rate": 7.460589324553887e-06, "loss": 0.3087, "step": 5372 }, { "epoch": 1.78, "learning_rate": 7.457123524078137e-06, "loss": 0.2539, "step": 5373 }, { "epoch": 1.78, "learning_rate": 7.453658050132316e-06, "loss": 0.3827, "step": 5374 }, { "epoch": 1.78, "learning_rate": 7.450192903161424e-06, "loss": 0.3789, "step": 5375 }, { "epoch": 1.78, "learning_rate": 7.446728083610417e-06, "loss": 0.323, "step": 5376 }, { "epoch": 1.78, "learning_rate": 7.443263591924212e-06, "loss": 0.3558, "step": 5377 }, { "epoch": 1.78, "learning_rate": 7.439799428547687e-06, "loss": 0.376, "step": 5378 }, { "epoch": 1.79, "learning_rate": 7.43633559392567e-06, "loss": 0.3518, "step": 5379 }, { "epoch": 1.79, "learning_rate": 7.432872088502951e-06, "loss": 0.4194, "step": 5380 }, { "epoch": 1.79, "learning_rate": 7.429408912724279e-06, "loss": 0.3237, "step": 5381 }, { "epoch": 1.79, "learning_rate": 7.4259460670343555e-06, "loss": 0.2549, "step": 5382 }, { "epoch": 1.79, "learning_rate": 7.422483551877848e-06, "loss": 0.4307, "step": 5383 }, { "epoch": 1.79, "learning_rate": 7.419021367699375e-06, "loss": 0.3285, "step": 5384 }, { "epoch": 1.79, "learning_rate": 7.415559514943518e-06, "loss": 0.3816, "step": 5385 }, { "epoch": 1.79, "learning_rate": 7.4120979940548054e-06, "loss": 0.2934, "step": 5386 }, { "epoch": 1.79, "learning_rate": 7.408636805477734e-06, "loss": 0.3148, "step": 5387 }, { "epoch": 1.79, "learning_rate": 7.4051759496567535e-06, "loss": 0.2224, "step": 5388 }, { "epoch": 1.79, "learning_rate": 7.401715427036273e-06, "loss": 0.3206, "step": 5389 }, { "epoch": 1.79, "learning_rate": 7.398255238060654e-06, "loss": 0.3512, "step": 5390 }, { "epoch": 1.79, "learning_rate": 7.39479538317422e-06, "loss": 0.3271, "step": 5391 }, { "epoch": 1.79, "learning_rate": 7.3913358628212515e-06, "loss": 0.3315, "step": 5392 }, { "epoch": 1.79, "learning_rate": 7.38787667744598e-06, "loss": 0.2655, "step": 5393 }, { "epoch": 1.79, "learning_rate": 7.3844178274926e-06, "loss": 0.3645, "step": 5394 }, { "epoch": 1.79, "learning_rate": 7.380959313405262e-06, "loss": 0.3428, "step": 5395 }, { "epoch": 1.79, "learning_rate": 7.377501135628072e-06, "loss": 0.2894, "step": 5396 }, { "epoch": 1.79, "learning_rate": 7.374043294605096e-06, "loss": 0.3107, "step": 5397 }, { "epoch": 1.79, "learning_rate": 7.370585790780352e-06, "loss": 0.2616, "step": 5398 }, { "epoch": 1.79, "learning_rate": 7.367128624597817e-06, "loss": 0.3274, "step": 5399 }, { "epoch": 1.79, "learning_rate": 7.363671796501426e-06, "loss": 0.3306, "step": 5400 }, { "epoch": 1.79, "learning_rate": 7.360215306935066e-06, "loss": 0.3524, "step": 5401 }, { "epoch": 1.79, "learning_rate": 7.356759156342584e-06, "loss": 0.3793, "step": 5402 }, { "epoch": 1.79, "learning_rate": 7.353303345167786e-06, "loss": 0.3921, "step": 5403 }, { "epoch": 1.79, "learning_rate": 7.34984787385443e-06, "loss": 0.3093, "step": 5404 }, { "epoch": 1.79, "learning_rate": 7.3463927428462315e-06, "loss": 0.2607, "step": 5405 }, { "epoch": 1.79, "learning_rate": 7.3429379525868685e-06, "loss": 0.3242, "step": 5406 }, { "epoch": 1.79, "learning_rate": 7.339483503519959e-06, "loss": 0.3571, "step": 5407 }, { "epoch": 1.79, "learning_rate": 7.336029396089095e-06, "loss": 0.3634, "step": 5408 }, { "epoch": 1.8, "learning_rate": 7.332575630737817e-06, "loss": 0.3551, "step": 5409 }, { "epoch": 1.8, "learning_rate": 7.329122207909617e-06, "loss": 0.2513, "step": 5410 }, { "epoch": 1.8, "learning_rate": 7.325669128047952e-06, "loss": 0.3365, "step": 5411 }, { "epoch": 1.8, "learning_rate": 7.322216391596229e-06, "loss": 0.3562, "step": 5412 }, { "epoch": 1.8, "learning_rate": 7.3187639989978186e-06, "loss": 0.3237, "step": 5413 }, { "epoch": 1.8, "learning_rate": 7.315311950696032e-06, "loss": 0.305, "step": 5414 }, { "epoch": 1.8, "learning_rate": 7.31186024713415e-06, "loss": 0.2714, "step": 5415 }, { "epoch": 1.8, "learning_rate": 7.308408888755406e-06, "loss": 0.3759, "step": 5416 }, { "epoch": 1.8, "learning_rate": 7.304957876002988e-06, "loss": 0.3298, "step": 5417 }, { "epoch": 1.8, "learning_rate": 7.301507209320036e-06, "loss": 0.3223, "step": 5418 }, { "epoch": 1.8, "learning_rate": 7.298056889149651e-06, "loss": 0.303, "step": 5419 }, { "epoch": 1.8, "learning_rate": 7.294606915934891e-06, "loss": 0.2981, "step": 5420 }, { "epoch": 1.8, "learning_rate": 7.291157290118758e-06, "loss": 0.3185, "step": 5421 }, { "epoch": 1.8, "learning_rate": 7.287708012144222e-06, "loss": 0.3882, "step": 5422 }, { "epoch": 1.8, "learning_rate": 7.284259082454204e-06, "loss": 0.3123, "step": 5423 }, { "epoch": 1.8, "learning_rate": 7.280810501491578e-06, "loss": 0.3297, "step": 5424 }, { "epoch": 1.8, "learning_rate": 7.277362269699178e-06, "loss": 0.3278, "step": 5425 }, { "epoch": 1.8, "learning_rate": 7.273914387519789e-06, "loss": 0.3132, "step": 5426 }, { "epoch": 1.8, "learning_rate": 7.270466855396152e-06, "loss": 0.368, "step": 5427 }, { "epoch": 1.8, "learning_rate": 7.267019673770965e-06, "loss": 0.4122, "step": 5428 }, { "epoch": 1.8, "learning_rate": 7.263572843086876e-06, "loss": 0.3513, "step": 5429 }, { "epoch": 1.8, "learning_rate": 7.260126363786492e-06, "loss": 0.2738, "step": 5430 }, { "epoch": 1.8, "learning_rate": 7.256680236312377e-06, "loss": 0.3658, "step": 5431 }, { "epoch": 1.8, "learning_rate": 7.253234461107044e-06, "loss": 0.3372, "step": 5432 }, { "epoch": 1.8, "learning_rate": 7.249789038612966e-06, "loss": 0.303, "step": 5433 }, { "epoch": 1.8, "learning_rate": 7.246343969272574e-06, "loss": 0.2742, "step": 5434 }, { "epoch": 1.8, "learning_rate": 7.242899253528237e-06, "loss": 0.2701, "step": 5435 }, { "epoch": 1.8, "learning_rate": 7.239454891822295e-06, "loss": 0.3368, "step": 5436 }, { "epoch": 1.8, "learning_rate": 7.23601088459704e-06, "loss": 0.3429, "step": 5437 }, { "epoch": 1.8, "learning_rate": 7.232567232294711e-06, "loss": 0.2966, "step": 5438 }, { "epoch": 1.81, "learning_rate": 7.229123935357509e-06, "loss": 0.3584, "step": 5439 }, { "epoch": 1.81, "learning_rate": 7.225680994227586e-06, "loss": 0.3982, "step": 5440 }, { "epoch": 1.81, "learning_rate": 7.222238409347053e-06, "loss": 0.3251, "step": 5441 }, { "epoch": 1.81, "learning_rate": 7.218796181157965e-06, "loss": 0.379, "step": 5442 }, { "epoch": 1.81, "learning_rate": 7.2153543101023386e-06, "loss": 0.3372, "step": 5443 }, { "epoch": 1.81, "learning_rate": 7.211912796622142e-06, "loss": 0.3056, "step": 5444 }, { "epoch": 1.81, "learning_rate": 7.208471641159307e-06, "loss": 0.402, "step": 5445 }, { "epoch": 1.81, "learning_rate": 7.205030844155702e-06, "loss": 0.3762, "step": 5446 }, { "epoch": 1.81, "learning_rate": 7.2015904060531606e-06, "loss": 0.3315, "step": 5447 }, { "epoch": 1.81, "learning_rate": 7.1981503272934765e-06, "loss": 0.3906, "step": 5448 }, { "epoch": 1.81, "learning_rate": 7.194710608318376e-06, "loss": 0.2997, "step": 5449 }, { "epoch": 1.81, "learning_rate": 7.1912712495695605e-06, "loss": 0.2657, "step": 5450 }, { "epoch": 1.81, "learning_rate": 7.187832251488675e-06, "loss": 0.3781, "step": 5451 }, { "epoch": 1.81, "learning_rate": 7.184393614517319e-06, "loss": 0.3496, "step": 5452 }, { "epoch": 1.81, "learning_rate": 7.180955339097049e-06, "loss": 0.2825, "step": 5453 }, { "epoch": 1.81, "learning_rate": 7.177517425669372e-06, "loss": 0.4019, "step": 5454 }, { "epoch": 1.81, "learning_rate": 7.174079874675751e-06, "loss": 0.2866, "step": 5455 }, { "epoch": 1.81, "learning_rate": 7.170642686557598e-06, "loss": 0.3234, "step": 5456 }, { "epoch": 1.81, "learning_rate": 7.1672058617562816e-06, "loss": 0.3661, "step": 5457 }, { "epoch": 1.81, "learning_rate": 7.163769400713121e-06, "loss": 0.3915, "step": 5458 }, { "epoch": 1.81, "learning_rate": 7.1603333038693955e-06, "loss": 0.2982, "step": 5459 }, { "epoch": 1.81, "learning_rate": 7.156897571666331e-06, "loss": 0.2728, "step": 5460 }, { "epoch": 1.81, "learning_rate": 7.153462204545114e-06, "loss": 0.3718, "step": 5461 }, { "epoch": 1.81, "learning_rate": 7.15002720294687e-06, "loss": 0.3904, "step": 5462 }, { "epoch": 1.81, "learning_rate": 7.146592567312689e-06, "loss": 0.3706, "step": 5463 }, { "epoch": 1.81, "learning_rate": 7.143158298083615e-06, "loss": 0.251, "step": 5464 }, { "epoch": 1.81, "learning_rate": 7.13972439570064e-06, "loss": 0.2982, "step": 5465 }, { "epoch": 1.81, "learning_rate": 7.136290860604706e-06, "loss": 0.3263, "step": 5466 }, { "epoch": 1.81, "learning_rate": 7.132857693236719e-06, "loss": 0.3201, "step": 5467 }, { "epoch": 1.81, "learning_rate": 7.12942489403753e-06, "loss": 0.3793, "step": 5468 }, { "epoch": 1.82, "learning_rate": 7.125992463447937e-06, "loss": 0.3036, "step": 5469 }, { "epoch": 1.82, "learning_rate": 7.122560401908701e-06, "loss": 0.2863, "step": 5470 }, { "epoch": 1.82, "learning_rate": 7.119128709860533e-06, "loss": 0.3661, "step": 5471 }, { "epoch": 1.82, "learning_rate": 7.115697387744095e-06, "loss": 0.2597, "step": 5472 }, { "epoch": 1.82, "learning_rate": 7.112266436000001e-06, "loss": 0.3846, "step": 5473 }, { "epoch": 1.82, "learning_rate": 7.108835855068821e-06, "loss": 0.3001, "step": 5474 }, { "epoch": 1.82, "learning_rate": 7.105405645391075e-06, "loss": 0.4539, "step": 5475 }, { "epoch": 1.82, "learning_rate": 7.1019758074072305e-06, "loss": 0.2601, "step": 5476 }, { "epoch": 1.82, "learning_rate": 7.098546341557713e-06, "loss": 0.2897, "step": 5477 }, { "epoch": 1.82, "learning_rate": 7.0951172482829005e-06, "loss": 0.3722, "step": 5478 }, { "epoch": 1.82, "learning_rate": 7.091688528023121e-06, "loss": 0.4644, "step": 5479 }, { "epoch": 1.82, "learning_rate": 7.0882601812186555e-06, "loss": 0.3278, "step": 5480 }, { "epoch": 1.82, "learning_rate": 7.084832208309739e-06, "loss": 0.3511, "step": 5481 }, { "epoch": 1.82, "learning_rate": 7.081404609736558e-06, "loss": 0.3042, "step": 5482 }, { "epoch": 1.82, "learning_rate": 7.077977385939242e-06, "loss": 0.3548, "step": 5483 }, { "epoch": 1.82, "learning_rate": 7.074550537357883e-06, "loss": 0.2883, "step": 5484 }, { "epoch": 1.82, "learning_rate": 7.0711240644325255e-06, "loss": 0.2626, "step": 5485 }, { "epoch": 1.82, "learning_rate": 7.067697967603155e-06, "loss": 0.3071, "step": 5486 }, { "epoch": 1.82, "learning_rate": 7.06427224730972e-06, "loss": 0.3197, "step": 5487 }, { "epoch": 1.82, "learning_rate": 7.060846903992113e-06, "loss": 0.3915, "step": 5488 }, { "epoch": 1.82, "learning_rate": 7.05742193809019e-06, "loss": 0.2897, "step": 5489 }, { "epoch": 1.82, "learning_rate": 7.053997350043738e-06, "loss": 0.2526, "step": 5490 }, { "epoch": 1.82, "learning_rate": 7.050573140292511e-06, "loss": 0.3015, "step": 5491 }, { "epoch": 1.82, "learning_rate": 7.047149309276213e-06, "loss": 0.333, "step": 5492 }, { "epoch": 1.82, "learning_rate": 7.043725857434497e-06, "loss": 0.3116, "step": 5493 }, { "epoch": 1.82, "learning_rate": 7.040302785206965e-06, "loss": 0.2651, "step": 5494 }, { "epoch": 1.82, "learning_rate": 7.0368800930331715e-06, "loss": 0.2903, "step": 5495 }, { "epoch": 1.82, "learning_rate": 7.033457781352633e-06, "loss": 0.3375, "step": 5496 }, { "epoch": 1.82, "learning_rate": 7.030035850604792e-06, "loss": 0.2812, "step": 5497 }, { "epoch": 1.82, "learning_rate": 7.026614301229069e-06, "loss": 0.2797, "step": 5498 }, { "epoch": 1.83, "learning_rate": 7.023193133664819e-06, "loss": 0.3455, "step": 5499 }, { "epoch": 1.83, "learning_rate": 7.019772348351353e-06, "loss": 0.3949, "step": 5500 }, { "epoch": 1.83, "learning_rate": 7.016351945727935e-06, "loss": 0.3807, "step": 5501 }, { "epoch": 1.83, "learning_rate": 7.012931926233779e-06, "loss": 0.3738, "step": 5502 }, { "epoch": 1.83, "learning_rate": 7.009512290308047e-06, "loss": 0.2612, "step": 5503 }, { "epoch": 1.83, "learning_rate": 7.006093038389854e-06, "loss": 0.4934, "step": 5504 }, { "epoch": 1.83, "learning_rate": 7.00267417091826e-06, "loss": 0.2926, "step": 5505 }, { "epoch": 1.83, "learning_rate": 6.999255688332286e-06, "loss": 0.363, "step": 5506 }, { "epoch": 1.83, "learning_rate": 6.9958375910708955e-06, "loss": 0.3387, "step": 5507 }, { "epoch": 1.83, "learning_rate": 6.9924198795730084e-06, "loss": 0.4009, "step": 5508 }, { "epoch": 1.83, "learning_rate": 6.989002554277489e-06, "loss": 0.3579, "step": 5509 }, { "epoch": 1.83, "learning_rate": 6.98558561562316e-06, "loss": 0.3311, "step": 5510 }, { "epoch": 1.83, "learning_rate": 6.982169064048781e-06, "loss": 0.3132, "step": 5511 }, { "epoch": 1.83, "learning_rate": 6.978752899993075e-06, "loss": 0.3527, "step": 5512 }, { "epoch": 1.83, "learning_rate": 6.9753371238947125e-06, "loss": 0.3023, "step": 5513 }, { "epoch": 1.83, "learning_rate": 6.971921736192306e-06, "loss": 0.3822, "step": 5514 }, { "epoch": 1.83, "learning_rate": 6.968506737324429e-06, "loss": 0.2676, "step": 5515 }, { "epoch": 1.83, "learning_rate": 6.9650921277296015e-06, "loss": 0.387, "step": 5516 }, { "epoch": 1.83, "learning_rate": 6.961677907846293e-06, "loss": 0.2838, "step": 5517 }, { "epoch": 1.83, "learning_rate": 6.958264078112915e-06, "loss": 0.2716, "step": 5518 }, { "epoch": 1.83, "learning_rate": 6.95485063896784e-06, "loss": 0.3425, "step": 5519 }, { "epoch": 1.83, "learning_rate": 6.9514375908493905e-06, "loss": 0.349, "step": 5520 }, { "epoch": 1.83, "learning_rate": 6.948024934195832e-06, "loss": 0.3477, "step": 5521 }, { "epoch": 1.83, "learning_rate": 6.944612669445381e-06, "loss": 0.3766, "step": 5522 }, { "epoch": 1.83, "learning_rate": 6.941200797036206e-06, "loss": 0.3108, "step": 5523 }, { "epoch": 1.83, "learning_rate": 6.937789317406429e-06, "loss": 0.3619, "step": 5524 }, { "epoch": 1.83, "learning_rate": 6.934378230994111e-06, "loss": 0.2697, "step": 5525 }, { "epoch": 1.83, "learning_rate": 6.93096753823727e-06, "loss": 0.3394, "step": 5526 }, { "epoch": 1.83, "learning_rate": 6.927557239573872e-06, "loss": 0.3174, "step": 5527 }, { "epoch": 1.83, "learning_rate": 6.924147335441833e-06, "loss": 0.3597, "step": 5528 }, { "epoch": 1.84, "learning_rate": 6.920737826279018e-06, "loss": 0.3564, "step": 5529 }, { "epoch": 1.84, "learning_rate": 6.917328712523242e-06, "loss": 0.3287, "step": 5530 }, { "epoch": 1.84, "learning_rate": 6.913919994612267e-06, "loss": 0.3148, "step": 5531 }, { "epoch": 1.84, "learning_rate": 6.910511672983803e-06, "loss": 0.2347, "step": 5532 }, { "epoch": 1.84, "learning_rate": 6.9071037480755155e-06, "loss": 0.3051, "step": 5533 }, { "epoch": 1.84, "learning_rate": 6.9036962203250115e-06, "loss": 0.3673, "step": 5534 }, { "epoch": 1.84, "learning_rate": 6.90028909016985e-06, "loss": 0.3832, "step": 5535 }, { "epoch": 1.84, "learning_rate": 6.896882358047543e-06, "loss": 0.2195, "step": 5536 }, { "epoch": 1.84, "learning_rate": 6.893476024395547e-06, "loss": 0.2339, "step": 5537 }, { "epoch": 1.84, "learning_rate": 6.890070089651271e-06, "loss": 0.3773, "step": 5538 }, { "epoch": 1.84, "learning_rate": 6.886664554252061e-06, "loss": 0.2761, "step": 5539 }, { "epoch": 1.84, "learning_rate": 6.883259418635229e-06, "loss": 0.3711, "step": 5540 }, { "epoch": 1.84, "learning_rate": 6.879854683238023e-06, "loss": 0.4177, "step": 5541 }, { "epoch": 1.84, "learning_rate": 6.8764503484976454e-06, "loss": 0.2723, "step": 5542 }, { "epoch": 1.84, "learning_rate": 6.873046414851245e-06, "loss": 0.3329, "step": 5543 }, { "epoch": 1.84, "learning_rate": 6.8696428827359265e-06, "loss": 0.3319, "step": 5544 }, { "epoch": 1.84, "learning_rate": 6.866239752588723e-06, "loss": 0.3328, "step": 5545 }, { "epoch": 1.84, "learning_rate": 6.86283702484664e-06, "loss": 0.3525, "step": 5546 }, { "epoch": 1.84, "learning_rate": 6.859434699946616e-06, "loss": 0.3115, "step": 5547 }, { "epoch": 1.84, "learning_rate": 6.856032778325543e-06, "loss": 0.3093, "step": 5548 }, { "epoch": 1.84, "learning_rate": 6.852631260420262e-06, "loss": 0.35, "step": 5549 }, { "epoch": 1.84, "learning_rate": 6.849230146667561e-06, "loss": 0.3549, "step": 5550 }, { "epoch": 1.84, "learning_rate": 6.8458294375041766e-06, "loss": 0.2766, "step": 5551 }, { "epoch": 1.84, "learning_rate": 6.84242913336679e-06, "loss": 0.3547, "step": 5552 }, { "epoch": 1.84, "learning_rate": 6.839029234692033e-06, "loss": 0.3485, "step": 5553 }, { "epoch": 1.84, "learning_rate": 6.835629741916487e-06, "loss": 0.28, "step": 5554 }, { "epoch": 1.84, "learning_rate": 6.832230655476678e-06, "loss": 0.2852, "step": 5555 }, { "epoch": 1.84, "learning_rate": 6.828831975809085e-06, "loss": 0.3062, "step": 5556 }, { "epoch": 1.84, "learning_rate": 6.8254337033501286e-06, "loss": 0.3774, "step": 5557 }, { "epoch": 1.84, "learning_rate": 6.822035838536183e-06, "loss": 0.3806, "step": 5558 }, { "epoch": 1.85, "learning_rate": 6.818638381803563e-06, "loss": 0.3531, "step": 5559 }, { "epoch": 1.85, "learning_rate": 6.815241333588535e-06, "loss": 0.2813, "step": 5560 }, { "epoch": 1.85, "learning_rate": 6.811844694327317e-06, "loss": 0.3408, "step": 5561 }, { "epoch": 1.85, "learning_rate": 6.808448464456066e-06, "loss": 0.2758, "step": 5562 }, { "epoch": 1.85, "learning_rate": 6.80505264441089e-06, "loss": 0.3676, "step": 5563 }, { "epoch": 1.85, "learning_rate": 6.80165723462785e-06, "loss": 0.3033, "step": 5564 }, { "epoch": 1.85, "learning_rate": 6.79826223554295e-06, "loss": 0.3162, "step": 5565 }, { "epoch": 1.85, "learning_rate": 6.794867647592134e-06, "loss": 0.3479, "step": 5566 }, { "epoch": 1.85, "learning_rate": 6.791473471211303e-06, "loss": 0.3799, "step": 5567 }, { "epoch": 1.85, "learning_rate": 6.788079706836303e-06, "loss": 0.3034, "step": 5568 }, { "epoch": 1.85, "learning_rate": 6.784686354902928e-06, "loss": 0.2427, "step": 5569 }, { "epoch": 1.85, "learning_rate": 6.781293415846911e-06, "loss": 0.3354, "step": 5570 }, { "epoch": 1.85, "learning_rate": 6.777900890103943e-06, "loss": 0.2696, "step": 5571 }, { "epoch": 1.85, "learning_rate": 6.774508778109663e-06, "loss": 0.3154, "step": 5572 }, { "epoch": 1.85, "learning_rate": 6.7711170802996375e-06, "loss": 0.2937, "step": 5573 }, { "epoch": 1.85, "learning_rate": 6.767725797109401e-06, "loss": 0.3431, "step": 5574 }, { "epoch": 1.85, "learning_rate": 6.764334928974425e-06, "loss": 0.2858, "step": 5575 }, { "epoch": 1.85, "learning_rate": 6.760944476330133e-06, "loss": 0.3248, "step": 5576 }, { "epoch": 1.85, "learning_rate": 6.757554439611887e-06, "loss": 0.3989, "step": 5577 }, { "epoch": 1.85, "learning_rate": 6.754164819255007e-06, "loss": 0.2621, "step": 5578 }, { "epoch": 1.85, "learning_rate": 6.750775615694749e-06, "loss": 0.3843, "step": 5579 }, { "epoch": 1.85, "learning_rate": 6.747386829366318e-06, "loss": 0.3411, "step": 5580 }, { "epoch": 1.85, "learning_rate": 6.74399846070487e-06, "loss": 0.2862, "step": 5581 }, { "epoch": 1.85, "learning_rate": 6.740610510145502e-06, "loss": 0.3732, "step": 5582 }, { "epoch": 1.85, "learning_rate": 6.73722297812326e-06, "loss": 0.3052, "step": 5583 }, { "epoch": 1.85, "learning_rate": 6.733835865073137e-06, "loss": 0.3403, "step": 5584 }, { "epoch": 1.85, "learning_rate": 6.7304491714300705e-06, "loss": 0.286, "step": 5585 }, { "epoch": 1.85, "learning_rate": 6.727062897628949e-06, "loss": 0.4324, "step": 5586 }, { "epoch": 1.85, "learning_rate": 6.723677044104593e-06, "loss": 0.3978, "step": 5587 }, { "epoch": 1.85, "learning_rate": 6.720291611291785e-06, "loss": 0.3364, "step": 5588 }, { "epoch": 1.85, "learning_rate": 6.71690659962525e-06, "loss": 0.3167, "step": 5589 }, { "epoch": 1.86, "learning_rate": 6.7135220095396505e-06, "loss": 0.3213, "step": 5590 }, { "epoch": 1.86, "learning_rate": 6.710137841469602e-06, "loss": 0.3099, "step": 5591 }, { "epoch": 1.86, "learning_rate": 6.7067540958496654e-06, "loss": 0.302, "step": 5592 }, { "epoch": 1.86, "learning_rate": 6.703370773114352e-06, "loss": 0.3118, "step": 5593 }, { "epoch": 1.86, "learning_rate": 6.6999878736981014e-06, "loss": 0.3752, "step": 5594 }, { "epoch": 1.86, "learning_rate": 6.696605398035319e-06, "loss": 0.3442, "step": 5595 }, { "epoch": 1.86, "learning_rate": 6.693223346560344e-06, "loss": 0.3206, "step": 5596 }, { "epoch": 1.86, "learning_rate": 6.689841719707465e-06, "loss": 0.3282, "step": 5597 }, { "epoch": 1.86, "learning_rate": 6.686460517910919e-06, "loss": 0.3259, "step": 5598 }, { "epoch": 1.86, "learning_rate": 6.6830797416048795e-06, "loss": 0.3212, "step": 5599 }, { "epoch": 1.86, "learning_rate": 6.679699391223478e-06, "loss": 0.2951, "step": 5600 }, { "epoch": 1.86, "learning_rate": 6.676319467200777e-06, "loss": 0.2592, "step": 5601 }, { "epoch": 1.86, "learning_rate": 6.6729399699707935e-06, "loss": 0.4392, "step": 5602 }, { "epoch": 1.86, "learning_rate": 6.669560899967486e-06, "loss": 0.2653, "step": 5603 }, { "epoch": 1.86, "learning_rate": 6.666182257624765e-06, "loss": 0.2814, "step": 5604 }, { "epoch": 1.86, "learning_rate": 6.662804043376478e-06, "loss": 0.3529, "step": 5605 }, { "epoch": 1.86, "learning_rate": 6.659426257656421e-06, "loss": 0.2719, "step": 5606 }, { "epoch": 1.86, "learning_rate": 6.656048900898333e-06, "loss": 0.3191, "step": 5607 }, { "epoch": 1.86, "learning_rate": 6.652671973535899e-06, "loss": 0.3691, "step": 5608 }, { "epoch": 1.86, "learning_rate": 6.649295476002753e-06, "loss": 0.3414, "step": 5609 }, { "epoch": 1.86, "learning_rate": 6.645919408732463e-06, "loss": 0.321, "step": 5610 }, { "epoch": 1.86, "learning_rate": 6.642543772158554e-06, "loss": 0.3304, "step": 5611 }, { "epoch": 1.86, "learning_rate": 6.639168566714488e-06, "loss": 0.3799, "step": 5612 }, { "epoch": 1.86, "learning_rate": 6.635793792833676e-06, "loss": 0.3721, "step": 5613 }, { "epoch": 1.86, "learning_rate": 6.632419450949473e-06, "loss": 0.3204, "step": 5614 }, { "epoch": 1.86, "learning_rate": 6.6290455414951715e-06, "loss": 0.2645, "step": 5615 }, { "epoch": 1.86, "learning_rate": 6.625672064904018e-06, "loss": 0.3317, "step": 5616 }, { "epoch": 1.86, "learning_rate": 6.622299021609202e-06, "loss": 0.3314, "step": 5617 }, { "epoch": 1.86, "learning_rate": 6.618926412043847e-06, "loss": 0.3336, "step": 5618 }, { "epoch": 1.86, "learning_rate": 6.615554236641036e-06, "loss": 0.3109, "step": 5619 }, { "epoch": 1.87, "learning_rate": 6.612182495833784e-06, "loss": 0.2789, "step": 5620 }, { "epoch": 1.87, "learning_rate": 6.608811190055063e-06, "loss": 0.4037, "step": 5621 }, { "epoch": 1.87, "learning_rate": 6.605440319737771e-06, "loss": 0.2715, "step": 5622 }, { "epoch": 1.87, "learning_rate": 6.602069885314766e-06, "loss": 0.2561, "step": 5623 }, { "epoch": 1.87, "learning_rate": 6.598699887218842e-06, "loss": 0.2749, "step": 5624 }, { "epoch": 1.87, "learning_rate": 6.595330325882743e-06, "loss": 0.3853, "step": 5625 }, { "epoch": 1.87, "learning_rate": 6.591961201739152e-06, "loss": 0.3156, "step": 5626 }, { "epoch": 1.87, "learning_rate": 6.588592515220695e-06, "loss": 0.3231, "step": 5627 }, { "epoch": 1.87, "learning_rate": 6.585224266759947e-06, "loss": 0.3246, "step": 5628 }, { "epoch": 1.87, "learning_rate": 6.581856456789419e-06, "loss": 0.2269, "step": 5629 }, { "epoch": 1.87, "learning_rate": 6.578489085741573e-06, "loss": 0.2297, "step": 5630 }, { "epoch": 1.87, "learning_rate": 6.575122154048811e-06, "loss": 0.3652, "step": 5631 }, { "epoch": 1.87, "learning_rate": 6.571755662143482e-06, "loss": 0.3209, "step": 5632 }, { "epoch": 1.87, "learning_rate": 6.5683896104578745e-06, "loss": 0.2611, "step": 5633 }, { "epoch": 1.87, "learning_rate": 6.565023999424227e-06, "loss": 0.3351, "step": 5634 }, { "epoch": 1.87, "learning_rate": 6.561658829474707e-06, "loss": 0.2787, "step": 5635 }, { "epoch": 1.87, "learning_rate": 6.55829410104144e-06, "loss": 0.2679, "step": 5636 }, { "epoch": 1.87, "learning_rate": 6.554929814556492e-06, "loss": 0.3181, "step": 5637 }, { "epoch": 1.87, "learning_rate": 6.551565970451864e-06, "loss": 0.3126, "step": 5638 }, { "epoch": 1.87, "learning_rate": 6.5482025691595104e-06, "loss": 0.3431, "step": 5639 }, { "epoch": 1.87, "learning_rate": 6.544839611111324e-06, "loss": 0.2771, "step": 5640 }, { "epoch": 1.87, "learning_rate": 6.541477096739144e-06, "loss": 0.3461, "step": 5641 }, { "epoch": 1.87, "learning_rate": 6.538115026474743e-06, "loss": 0.3018, "step": 5642 }, { "epoch": 1.87, "learning_rate": 6.5347534007498444e-06, "loss": 0.3419, "step": 5643 }, { "epoch": 1.87, "learning_rate": 6.531392219996119e-06, "loss": 0.2827, "step": 5644 }, { "epoch": 1.87, "learning_rate": 6.52803148464517e-06, "loss": 0.3063, "step": 5645 }, { "epoch": 1.87, "learning_rate": 6.524671195128553e-06, "loss": 0.2922, "step": 5646 }, { "epoch": 1.87, "learning_rate": 6.521311351877755e-06, "loss": 0.3562, "step": 5647 }, { "epoch": 1.87, "learning_rate": 6.517951955324222e-06, "loss": 0.3574, "step": 5648 }, { "epoch": 1.87, "learning_rate": 6.514593005899321e-06, "loss": 0.2437, "step": 5649 }, { "epoch": 1.88, "learning_rate": 6.5112345040343805e-06, "loss": 0.2632, "step": 5650 }, { "epoch": 1.88, "learning_rate": 6.507876450160664e-06, "loss": 0.298, "step": 5651 }, { "epoch": 1.88, "learning_rate": 6.504518844709378e-06, "loss": 0.2333, "step": 5652 }, { "epoch": 1.88, "learning_rate": 6.50116168811167e-06, "loss": 0.3875, "step": 5653 }, { "epoch": 1.88, "learning_rate": 6.497804980798637e-06, "loss": 0.4114, "step": 5654 }, { "epoch": 1.88, "learning_rate": 6.4944487232013065e-06, "loss": 0.2993, "step": 5655 }, { "epoch": 1.88, "learning_rate": 6.491092915750654e-06, "loss": 0.3368, "step": 5656 }, { "epoch": 1.88, "learning_rate": 6.487737558877604e-06, "loss": 0.3262, "step": 5657 }, { "epoch": 1.88, "learning_rate": 6.4843826530130085e-06, "loss": 0.3083, "step": 5658 }, { "epoch": 1.88, "learning_rate": 6.481028198587675e-06, "loss": 0.3661, "step": 5659 }, { "epoch": 1.88, "learning_rate": 6.477674196032346e-06, "loss": 0.2837, "step": 5660 }, { "epoch": 1.88, "learning_rate": 6.474320645777711e-06, "loss": 0.269, "step": 5661 }, { "epoch": 1.88, "learning_rate": 6.470967548254399e-06, "loss": 0.2659, "step": 5662 }, { "epoch": 1.88, "learning_rate": 6.467614903892973e-06, "loss": 0.3134, "step": 5663 }, { "epoch": 1.88, "learning_rate": 6.464262713123948e-06, "loss": 0.3049, "step": 5664 }, { "epoch": 1.88, "learning_rate": 6.460910976377782e-06, "loss": 0.2542, "step": 5665 }, { "epoch": 1.88, "learning_rate": 6.457559694084865e-06, "loss": 0.3881, "step": 5666 }, { "epoch": 1.88, "learning_rate": 6.454208866675537e-06, "loss": 0.2789, "step": 5667 }, { "epoch": 1.88, "learning_rate": 6.450858494580076e-06, "loss": 0.3982, "step": 5668 }, { "epoch": 1.88, "learning_rate": 6.447508578228705e-06, "loss": 0.278, "step": 5669 }, { "epoch": 1.88, "learning_rate": 6.4441591180515794e-06, "loss": 0.2024, "step": 5670 }, { "epoch": 1.88, "learning_rate": 6.440810114478806e-06, "loss": 0.304, "step": 5671 }, { "epoch": 1.88, "learning_rate": 6.437461567940428e-06, "loss": 0.2839, "step": 5672 }, { "epoch": 1.88, "learning_rate": 6.4341134788664325e-06, "loss": 0.3353, "step": 5673 }, { "epoch": 1.88, "learning_rate": 6.430765847686749e-06, "loss": 0.2883, "step": 5674 }, { "epoch": 1.88, "learning_rate": 6.4274186748312394e-06, "loss": 0.3774, "step": 5675 }, { "epoch": 1.88, "learning_rate": 6.4240719607297205e-06, "loss": 0.2979, "step": 5676 }, { "epoch": 1.88, "learning_rate": 6.420725705811935e-06, "loss": 0.3208, "step": 5677 }, { "epoch": 1.88, "learning_rate": 6.417379910507579e-06, "loss": 0.3467, "step": 5678 }, { "epoch": 1.88, "learning_rate": 6.414034575246283e-06, "loss": 0.268, "step": 5679 }, { "epoch": 1.89, "learning_rate": 6.410689700457622e-06, "loss": 0.3643, "step": 5680 }, { "epoch": 1.89, "learning_rate": 6.407345286571109e-06, "loss": 0.2833, "step": 5681 }, { "epoch": 1.89, "learning_rate": 6.404001334016203e-06, "loss": 0.2306, "step": 5682 }, { "epoch": 1.89, "learning_rate": 6.400657843222295e-06, "loss": 0.3325, "step": 5683 }, { "epoch": 1.89, "learning_rate": 6.397314814618722e-06, "loss": 0.2549, "step": 5684 }, { "epoch": 1.89, "learning_rate": 6.393972248634766e-06, "loss": 0.3435, "step": 5685 }, { "epoch": 1.89, "learning_rate": 6.390630145699636e-06, "loss": 0.2729, "step": 5686 }, { "epoch": 1.89, "learning_rate": 6.387288506242497e-06, "loss": 0.2598, "step": 5687 }, { "epoch": 1.89, "learning_rate": 6.383947330692446e-06, "loss": 0.3735, "step": 5688 }, { "epoch": 1.89, "learning_rate": 6.380606619478524e-06, "loss": 0.2399, "step": 5689 }, { "epoch": 1.89, "learning_rate": 6.377266373029711e-06, "loss": 0.3108, "step": 5690 }, { "epoch": 1.89, "learning_rate": 6.373926591774923e-06, "loss": 0.3881, "step": 5691 }, { "epoch": 1.89, "learning_rate": 6.370587276143021e-06, "loss": 0.2693, "step": 5692 }, { "epoch": 1.89, "learning_rate": 6.36724842656281e-06, "loss": 0.2696, "step": 5693 }, { "epoch": 1.89, "learning_rate": 6.363910043463024e-06, "loss": 0.2703, "step": 5694 }, { "epoch": 1.89, "learning_rate": 6.360572127272349e-06, "loss": 0.2755, "step": 5695 }, { "epoch": 1.89, "learning_rate": 6.357234678419401e-06, "loss": 0.3214, "step": 5696 }, { "epoch": 1.89, "learning_rate": 6.353897697332749e-06, "loss": 0.2423, "step": 5697 }, { "epoch": 1.89, "learning_rate": 6.3505611844408845e-06, "loss": 0.2969, "step": 5698 }, { "epoch": 1.89, "learning_rate": 6.347225140172251e-06, "loss": 0.3286, "step": 5699 }, { "epoch": 1.89, "learning_rate": 6.343889564955228e-06, "loss": 0.3478, "step": 5700 }, { "epoch": 1.89, "learning_rate": 6.340554459218139e-06, "loss": 0.2999, "step": 5701 }, { "epoch": 1.89, "learning_rate": 6.337219823389243e-06, "loss": 0.3688, "step": 5702 }, { "epoch": 1.89, "learning_rate": 6.333885657896739e-06, "loss": 0.304, "step": 5703 }, { "epoch": 1.89, "learning_rate": 6.330551963168763e-06, "loss": 0.2968, "step": 5704 }, { "epoch": 1.89, "learning_rate": 6.327218739633397e-06, "loss": 0.3026, "step": 5705 }, { "epoch": 1.89, "learning_rate": 6.323885987718657e-06, "loss": 0.3134, "step": 5706 }, { "epoch": 1.89, "learning_rate": 6.320553707852501e-06, "loss": 0.2775, "step": 5707 }, { "epoch": 1.89, "learning_rate": 6.317221900462829e-06, "loss": 0.3532, "step": 5708 }, { "epoch": 1.89, "learning_rate": 6.313890565977473e-06, "loss": 0.2926, "step": 5709 }, { "epoch": 1.9, "learning_rate": 6.310559704824215e-06, "loss": 0.3138, "step": 5710 }, { "epoch": 1.9, "learning_rate": 6.307229317430762e-06, "loss": 0.36, "step": 5711 }, { "epoch": 1.9, "learning_rate": 6.30389940422477e-06, "loss": 0.3766, "step": 5712 }, { "epoch": 1.9, "learning_rate": 6.300569965633836e-06, "loss": 0.3199, "step": 5713 }, { "epoch": 1.9, "learning_rate": 6.2972410020854875e-06, "loss": 0.2633, "step": 5714 }, { "epoch": 1.9, "learning_rate": 6.293912514007199e-06, "loss": 0.2345, "step": 5715 }, { "epoch": 1.9, "learning_rate": 6.290584501826376e-06, "loss": 0.3281, "step": 5716 }, { "epoch": 1.9, "learning_rate": 6.287256965970377e-06, "loss": 0.3333, "step": 5717 }, { "epoch": 1.9, "learning_rate": 6.283929906866478e-06, "loss": 0.282, "step": 5718 }, { "epoch": 1.9, "learning_rate": 6.280603324941912e-06, "loss": 0.2904, "step": 5719 }, { "epoch": 1.9, "learning_rate": 6.277277220623843e-06, "loss": 0.2253, "step": 5720 }, { "epoch": 1.9, "learning_rate": 6.273951594339374e-06, "loss": 0.3466, "step": 5721 }, { "epoch": 1.9, "learning_rate": 6.270626446515551e-06, "loss": 0.2844, "step": 5722 }, { "epoch": 1.9, "learning_rate": 6.267301777579351e-06, "loss": 0.262, "step": 5723 }, { "epoch": 1.9, "learning_rate": 6.263977587957702e-06, "loss": 0.3494, "step": 5724 }, { "epoch": 1.9, "learning_rate": 6.2606538780774474e-06, "loss": 0.1749, "step": 5725 }, { "epoch": 1.9, "learning_rate": 6.257330648365394e-06, "loss": 0.2787, "step": 5726 }, { "epoch": 1.9, "learning_rate": 6.254007899248274e-06, "loss": 0.2716, "step": 5727 }, { "epoch": 1.9, "learning_rate": 6.250685631152762e-06, "loss": 0.3264, "step": 5728 }, { "epoch": 1.9, "learning_rate": 6.247363844505467e-06, "loss": 0.3266, "step": 5729 }, { "epoch": 1.9, "learning_rate": 6.244042539732942e-06, "loss": 0.3588, "step": 5730 }, { "epoch": 1.9, "learning_rate": 6.240721717261673e-06, "loss": 0.2927, "step": 5731 }, { "epoch": 1.9, "learning_rate": 6.2374013775180835e-06, "loss": 0.3437, "step": 5732 }, { "epoch": 1.9, "learning_rate": 6.234081520928541e-06, "loss": 0.302, "step": 5733 }, { "epoch": 1.9, "learning_rate": 6.230762147919342e-06, "loss": 0.2708, "step": 5734 }, { "epoch": 1.9, "learning_rate": 6.22744325891673e-06, "loss": 0.2499, "step": 5735 }, { "epoch": 1.9, "learning_rate": 6.224124854346881e-06, "loss": 0.3744, "step": 5736 }, { "epoch": 1.9, "learning_rate": 6.220806934635912e-06, "loss": 0.3896, "step": 5737 }, { "epoch": 1.9, "learning_rate": 6.217489500209879e-06, "loss": 0.2816, "step": 5738 }, { "epoch": 1.9, "learning_rate": 6.214172551494764e-06, "loss": 0.3607, "step": 5739 }, { "epoch": 1.91, "learning_rate": 6.210856088916499e-06, "loss": 0.3951, "step": 5740 }, { "epoch": 1.91, "learning_rate": 6.2075401129009535e-06, "loss": 0.3389, "step": 5741 }, { "epoch": 1.91, "learning_rate": 6.204224623873925e-06, "loss": 0.3126, "step": 5742 }, { "epoch": 1.91, "learning_rate": 6.2009096222611595e-06, "loss": 0.2588, "step": 5743 }, { "epoch": 1.91, "learning_rate": 6.1975951084883325e-06, "loss": 0.3389, "step": 5744 }, { "epoch": 1.91, "learning_rate": 6.194281082981063e-06, "loss": 0.3706, "step": 5745 }, { "epoch": 1.91, "learning_rate": 6.190967546164899e-06, "loss": 0.2848, "step": 5746 }, { "epoch": 1.91, "learning_rate": 6.187654498465331e-06, "loss": 0.3687, "step": 5747 }, { "epoch": 1.91, "learning_rate": 6.18434194030779e-06, "loss": 0.3638, "step": 5748 }, { "epoch": 1.91, "learning_rate": 6.1810298721176365e-06, "loss": 0.4244, "step": 5749 }, { "epoch": 1.91, "learning_rate": 6.177718294320178e-06, "loss": 0.3081, "step": 5750 }, { "epoch": 1.91, "learning_rate": 6.174407207340647e-06, "loss": 0.3492, "step": 5751 }, { "epoch": 1.91, "learning_rate": 6.171096611604226e-06, "loss": 0.2505, "step": 5752 }, { "epoch": 1.91, "learning_rate": 6.16778650753602e-06, "loss": 0.2907, "step": 5753 }, { "epoch": 1.91, "learning_rate": 6.16447689556108e-06, "loss": 0.2651, "step": 5754 }, { "epoch": 1.91, "learning_rate": 6.161167776104393e-06, "loss": 0.3065, "step": 5755 }, { "epoch": 1.91, "learning_rate": 6.157859149590883e-06, "loss": 0.3424, "step": 5756 }, { "epoch": 1.91, "learning_rate": 6.154551016445409e-06, "loss": 0.2705, "step": 5757 }, { "epoch": 1.91, "learning_rate": 6.15124337709277e-06, "loss": 0.3079, "step": 5758 }, { "epoch": 1.91, "learning_rate": 6.147936231957696e-06, "loss": 0.2507, "step": 5759 }, { "epoch": 1.91, "learning_rate": 6.144629581464855e-06, "loss": 0.277, "step": 5760 }, { "epoch": 1.91, "learning_rate": 6.141323426038857e-06, "loss": 0.2653, "step": 5761 }, { "epoch": 1.91, "learning_rate": 6.138017766104238e-06, "loss": 0.287, "step": 5762 }, { "epoch": 1.91, "learning_rate": 6.134712602085482e-06, "loss": 0.2304, "step": 5763 }, { "epoch": 1.91, "learning_rate": 6.131407934407003e-06, "loss": 0.3578, "step": 5764 }, { "epoch": 1.91, "learning_rate": 6.128103763493152e-06, "loss": 0.2797, "step": 5765 }, { "epoch": 1.91, "learning_rate": 6.124800089768219e-06, "loss": 0.2781, "step": 5766 }, { "epoch": 1.91, "learning_rate": 6.1214969136564225e-06, "loss": 0.303, "step": 5767 }, { "epoch": 1.91, "learning_rate": 6.118194235581925e-06, "loss": 0.2892, "step": 5768 }, { "epoch": 1.91, "learning_rate": 6.114892055968822e-06, "loss": 0.3312, "step": 5769 }, { "epoch": 1.92, "learning_rate": 6.111590375241146e-06, "loss": 0.3141, "step": 5770 }, { "epoch": 1.92, "learning_rate": 6.1082891938228625e-06, "loss": 0.3109, "step": 5771 }, { "epoch": 1.92, "learning_rate": 6.104988512137877e-06, "loss": 0.3577, "step": 5772 }, { "epoch": 1.92, "learning_rate": 6.101688330610034e-06, "loss": 0.2524, "step": 5773 }, { "epoch": 1.92, "learning_rate": 6.098388649663099e-06, "loss": 0.2543, "step": 5774 }, { "epoch": 1.92, "learning_rate": 6.095089469720786e-06, "loss": 0.2446, "step": 5775 }, { "epoch": 1.92, "learning_rate": 6.091790791206744e-06, "loss": 0.2601, "step": 5776 }, { "epoch": 1.92, "learning_rate": 6.088492614544554e-06, "loss": 0.3634, "step": 5777 }, { "epoch": 1.92, "learning_rate": 6.085194940157736e-06, "loss": 0.332, "step": 5778 }, { "epoch": 1.92, "learning_rate": 6.08189776846974e-06, "loss": 0.3806, "step": 5779 }, { "epoch": 1.92, "learning_rate": 6.078601099903956e-06, "loss": 0.3307, "step": 5780 }, { "epoch": 1.92, "learning_rate": 6.07530493488371e-06, "loss": 0.3831, "step": 5781 }, { "epoch": 1.92, "learning_rate": 6.072009273832257e-06, "loss": 0.3131, "step": 5782 }, { "epoch": 1.92, "learning_rate": 6.068714117172793e-06, "loss": 0.2809, "step": 5783 }, { "epoch": 1.92, "learning_rate": 6.065419465328452e-06, "loss": 0.2527, "step": 5784 }, { "epoch": 1.92, "learning_rate": 6.062125318722294e-06, "loss": 0.4037, "step": 5785 }, { "epoch": 1.92, "learning_rate": 6.058831677777326e-06, "loss": 0.3601, "step": 5786 }, { "epoch": 1.92, "learning_rate": 6.055538542916477e-06, "loss": 0.2334, "step": 5787 }, { "epoch": 1.92, "learning_rate": 6.052245914562618e-06, "loss": 0.3208, "step": 5788 }, { "epoch": 1.92, "learning_rate": 6.048953793138556e-06, "loss": 0.3344, "step": 5789 }, { "epoch": 1.92, "learning_rate": 6.045662179067031e-06, "loss": 0.3096, "step": 5790 }, { "epoch": 1.92, "learning_rate": 6.042371072770719e-06, "loss": 0.3649, "step": 5791 }, { "epoch": 1.92, "learning_rate": 6.039080474672228e-06, "loss": 0.3629, "step": 5792 }, { "epoch": 1.92, "learning_rate": 6.035790385194107e-06, "loss": 0.3773, "step": 5793 }, { "epoch": 1.92, "learning_rate": 6.032500804758827e-06, "loss": 0.339, "step": 5794 }, { "epoch": 1.92, "learning_rate": 6.029211733788808e-06, "loss": 0.2368, "step": 5795 }, { "epoch": 1.92, "learning_rate": 6.025923172706396e-06, "loss": 0.392, "step": 5796 }, { "epoch": 1.92, "learning_rate": 6.022635121933877e-06, "loss": 0.3137, "step": 5797 }, { "epoch": 1.92, "learning_rate": 6.019347581893469e-06, "loss": 0.3728, "step": 5798 }, { "epoch": 1.92, "learning_rate": 6.016060553007318e-06, "loss": 0.3384, "step": 5799 }, { "epoch": 1.92, "learning_rate": 6.012774035697519e-06, "loss": 0.2552, "step": 5800 }, { "epoch": 1.93, "learning_rate": 6.009488030386083e-06, "loss": 0.3486, "step": 5801 }, { "epoch": 1.93, "learning_rate": 6.006202537494969e-06, "loss": 0.2997, "step": 5802 }, { "epoch": 1.93, "learning_rate": 6.0029175574460685e-06, "loss": 0.3348, "step": 5803 }, { "epoch": 1.93, "learning_rate": 5.999633090661202e-06, "loss": 0.2467, "step": 5804 }, { "epoch": 1.93, "learning_rate": 5.996349137562128e-06, "loss": 0.2789, "step": 5805 }, { "epoch": 1.93, "learning_rate": 5.993065698570538e-06, "loss": 0.2571, "step": 5806 }, { "epoch": 1.93, "learning_rate": 5.989782774108057e-06, "loss": 0.3691, "step": 5807 }, { "epoch": 1.93, "learning_rate": 5.986500364596243e-06, "loss": 0.292, "step": 5808 }, { "epoch": 1.93, "learning_rate": 5.983218470456591e-06, "loss": 0.256, "step": 5809 }, { "epoch": 1.93, "learning_rate": 5.979937092110526e-06, "loss": 0.2254, "step": 5810 }, { "epoch": 1.93, "learning_rate": 5.976656229979408e-06, "loss": 0.2112, "step": 5811 }, { "epoch": 1.93, "learning_rate": 5.973375884484535e-06, "loss": 0.2834, "step": 5812 }, { "epoch": 1.93, "learning_rate": 5.970096056047132e-06, "loss": 0.3232, "step": 5813 }, { "epoch": 1.93, "learning_rate": 5.966816745088366e-06, "loss": 0.2869, "step": 5814 }, { "epoch": 1.93, "learning_rate": 5.963537952029325e-06, "loss": 0.2961, "step": 5815 }, { "epoch": 1.93, "learning_rate": 5.96025967729104e-06, "loss": 0.3464, "step": 5816 }, { "epoch": 1.93, "learning_rate": 5.956981921294477e-06, "loss": 0.3182, "step": 5817 }, { "epoch": 1.93, "learning_rate": 5.953704684460526e-06, "loss": 0.3616, "step": 5818 }, { "epoch": 1.93, "learning_rate": 5.950427967210019e-06, "loss": 0.2338, "step": 5819 }, { "epoch": 1.93, "learning_rate": 5.947151769963717e-06, "loss": 0.3519, "step": 5820 }, { "epoch": 1.93, "learning_rate": 5.94387609314232e-06, "loss": 0.2457, "step": 5821 }, { "epoch": 1.93, "learning_rate": 5.940600937166449e-06, "loss": 0.3276, "step": 5822 }, { "epoch": 1.93, "learning_rate": 5.93732630245667e-06, "loss": 0.2507, "step": 5823 }, { "epoch": 1.93, "learning_rate": 5.934052189433475e-06, "loss": 0.3159, "step": 5824 }, { "epoch": 1.93, "learning_rate": 5.9307785985172946e-06, "loss": 0.2609, "step": 5825 }, { "epoch": 1.93, "learning_rate": 5.927505530128492e-06, "loss": 0.2268, "step": 5826 }, { "epoch": 1.93, "learning_rate": 5.924232984687355e-06, "loss": 0.2255, "step": 5827 }, { "epoch": 1.93, "learning_rate": 5.920960962614115e-06, "loss": 0.2706, "step": 5828 }, { "epoch": 1.93, "learning_rate": 5.917689464328928e-06, "loss": 0.271, "step": 5829 }, { "epoch": 1.93, "learning_rate": 5.914418490251888e-06, "loss": 0.3958, "step": 5830 }, { "epoch": 1.94, "learning_rate": 5.911148040803015e-06, "loss": 0.2815, "step": 5831 }, { "epoch": 1.94, "learning_rate": 5.9078781164022725e-06, "loss": 0.2843, "step": 5832 }, { "epoch": 1.94, "learning_rate": 5.904608717469546e-06, "loss": 0.2744, "step": 5833 }, { "epoch": 1.94, "learning_rate": 5.9013398444246615e-06, "loss": 0.3181, "step": 5834 }, { "epoch": 1.94, "learning_rate": 5.898071497687376e-06, "loss": 0.4065, "step": 5835 }, { "epoch": 1.94, "learning_rate": 5.894803677677368e-06, "loss": 0.3434, "step": 5836 }, { "epoch": 1.94, "learning_rate": 5.891536384814266e-06, "loss": 0.3257, "step": 5837 }, { "epoch": 1.94, "learning_rate": 5.888269619517615e-06, "loss": 0.3046, "step": 5838 }, { "epoch": 1.94, "learning_rate": 5.885003382206903e-06, "loss": 0.2935, "step": 5839 }, { "epoch": 1.94, "learning_rate": 5.881737673301546e-06, "loss": 0.2811, "step": 5840 }, { "epoch": 1.94, "learning_rate": 5.878472493220892e-06, "loss": 0.2361, "step": 5841 }, { "epoch": 1.94, "learning_rate": 5.875207842384227e-06, "loss": 0.3116, "step": 5842 }, { "epoch": 1.94, "learning_rate": 5.8719437212107555e-06, "loss": 0.3037, "step": 5843 }, { "epoch": 1.94, "learning_rate": 5.868680130119626e-06, "loss": 0.3259, "step": 5844 }, { "epoch": 1.94, "learning_rate": 5.865417069529913e-06, "loss": 0.2637, "step": 5845 }, { "epoch": 1.94, "learning_rate": 5.86215453986063e-06, "loss": 0.3765, "step": 5846 }, { "epoch": 1.94, "learning_rate": 5.858892541530713e-06, "loss": 0.2994, "step": 5847 }, { "epoch": 1.94, "learning_rate": 5.855631074959034e-06, "loss": 0.3069, "step": 5848 }, { "epoch": 1.94, "learning_rate": 5.852370140564403e-06, "loss": 0.3051, "step": 5849 }, { "epoch": 1.94, "learning_rate": 5.849109738765548e-06, "loss": 0.307, "step": 5850 }, { "epoch": 1.94, "learning_rate": 5.845849869981137e-06, "loss": 0.2803, "step": 5851 }, { "epoch": 1.94, "learning_rate": 5.842590534629771e-06, "loss": 0.2803, "step": 5852 }, { "epoch": 1.94, "learning_rate": 5.8393317331299806e-06, "loss": 0.2715, "step": 5853 }, { "epoch": 1.94, "learning_rate": 5.836073465900225e-06, "loss": 0.2856, "step": 5854 }, { "epoch": 1.94, "learning_rate": 5.832815733358903e-06, "loss": 0.3453, "step": 5855 }, { "epoch": 1.94, "learning_rate": 5.829558535924333e-06, "loss": 0.2645, "step": 5856 }, { "epoch": 1.94, "learning_rate": 5.8263018740147725e-06, "loss": 0.3134, "step": 5857 }, { "epoch": 1.94, "learning_rate": 5.823045748048405e-06, "loss": 0.3005, "step": 5858 }, { "epoch": 1.94, "learning_rate": 5.819790158443355e-06, "loss": 0.2578, "step": 5859 }, { "epoch": 1.94, "learning_rate": 5.816535105617669e-06, "loss": 0.3466, "step": 5860 }, { "epoch": 1.95, "learning_rate": 5.813280589989323e-06, "loss": 0.3594, "step": 5861 }, { "epoch": 1.95, "learning_rate": 5.8100266119762406e-06, "loss": 0.2717, "step": 5862 }, { "epoch": 1.95, "learning_rate": 5.806773171996245e-06, "loss": 0.2799, "step": 5863 }, { "epoch": 1.95, "learning_rate": 5.8035202704671264e-06, "loss": 0.2966, "step": 5864 }, { "epoch": 1.95, "learning_rate": 5.800267907806577e-06, "loss": 0.2679, "step": 5865 }, { "epoch": 1.95, "learning_rate": 5.797016084432241e-06, "loss": 0.3654, "step": 5866 }, { "epoch": 1.95, "learning_rate": 5.793764800761676e-06, "loss": 0.3213, "step": 5867 }, { "epoch": 1.95, "learning_rate": 5.790514057212386e-06, "loss": 0.202, "step": 5868 }, { "epoch": 1.95, "learning_rate": 5.787263854201795e-06, "loss": 0.3721, "step": 5869 }, { "epoch": 1.95, "learning_rate": 5.784014192147258e-06, "loss": 0.3274, "step": 5870 }, { "epoch": 1.95, "learning_rate": 5.7807650714660644e-06, "loss": 0.3092, "step": 5871 }, { "epoch": 1.95, "learning_rate": 5.77751649257543e-06, "loss": 0.3245, "step": 5872 }, { "epoch": 1.95, "learning_rate": 5.77426845589251e-06, "loss": 0.3611, "step": 5873 }, { "epoch": 1.95, "learning_rate": 5.771020961834376e-06, "loss": 0.3318, "step": 5874 }, { "epoch": 1.95, "learning_rate": 5.767774010818045e-06, "loss": 0.2839, "step": 5875 }, { "epoch": 1.95, "learning_rate": 5.764527603260457e-06, "loss": 0.3149, "step": 5876 }, { "epoch": 1.95, "learning_rate": 5.761281739578476e-06, "loss": 0.2549, "step": 5877 }, { "epoch": 1.95, "learning_rate": 5.758036420188901e-06, "loss": 0.3199, "step": 5878 }, { "epoch": 1.95, "learning_rate": 5.754791645508473e-06, "loss": 0.3834, "step": 5879 }, { "epoch": 1.95, "learning_rate": 5.751547415953844e-06, "loss": 0.266, "step": 5880 }, { "epoch": 1.95, "learning_rate": 5.748303731941605e-06, "loss": 0.3329, "step": 5881 }, { "epoch": 1.95, "learning_rate": 5.74506059388828e-06, "loss": 0.3519, "step": 5882 }, { "epoch": 1.95, "learning_rate": 5.741818002210318e-06, "loss": 0.2808, "step": 5883 }, { "epoch": 1.95, "learning_rate": 5.738575957324097e-06, "loss": 0.2562, "step": 5884 }, { "epoch": 1.95, "learning_rate": 5.735334459645925e-06, "loss": 0.2633, "step": 5885 }, { "epoch": 1.95, "learning_rate": 5.732093509592048e-06, "loss": 0.2678, "step": 5886 }, { "epoch": 1.95, "learning_rate": 5.728853107578629e-06, "loss": 0.2903, "step": 5887 }, { "epoch": 1.95, "learning_rate": 5.7256132540217746e-06, "loss": 0.2567, "step": 5888 }, { "epoch": 1.95, "learning_rate": 5.722373949337508e-06, "loss": 0.3516, "step": 5889 }, { "epoch": 1.95, "learning_rate": 5.71913519394179e-06, "loss": 0.2659, "step": 5890 }, { "epoch": 1.96, "learning_rate": 5.7158969882505035e-06, "loss": 0.3167, "step": 5891 }, { "epoch": 1.96, "learning_rate": 5.712659332679465e-06, "loss": 0.3389, "step": 5892 }, { "epoch": 1.96, "learning_rate": 5.709422227644428e-06, "loss": 0.3241, "step": 5893 }, { "epoch": 1.96, "learning_rate": 5.706185673561061e-06, "loss": 0.2826, "step": 5894 }, { "epoch": 1.96, "learning_rate": 5.702949670844975e-06, "loss": 0.2305, "step": 5895 }, { "epoch": 1.96, "learning_rate": 5.699714219911696e-06, "loss": 0.3101, "step": 5896 }, { "epoch": 1.96, "learning_rate": 5.696479321176702e-06, "loss": 0.3506, "step": 5897 }, { "epoch": 1.96, "learning_rate": 5.6932449750553665e-06, "loss": 0.3374, "step": 5898 }, { "epoch": 1.96, "learning_rate": 5.6900111819630235e-06, "loss": 0.3024, "step": 5899 }, { "epoch": 1.96, "learning_rate": 5.686777942314919e-06, "loss": 0.311, "step": 5900 }, { "epoch": 1.96, "learning_rate": 5.683545256526229e-06, "loss": 0.2891, "step": 5901 }, { "epoch": 1.96, "learning_rate": 5.68031312501207e-06, "loss": 0.2668, "step": 5902 }, { "epoch": 1.96, "learning_rate": 5.677081548187469e-06, "loss": 0.2866, "step": 5903 }, { "epoch": 1.96, "learning_rate": 5.673850526467406e-06, "loss": 0.3149, "step": 5904 }, { "epoch": 1.96, "learning_rate": 5.6706200602667585e-06, "loss": 0.2344, "step": 5905 }, { "epoch": 1.96, "learning_rate": 5.667390150000361e-06, "loss": 0.2954, "step": 5906 }, { "epoch": 1.96, "learning_rate": 5.664160796082958e-06, "loss": 0.2447, "step": 5907 }, { "epoch": 1.96, "learning_rate": 5.660931998929238e-06, "loss": 0.3993, "step": 5908 }, { "epoch": 1.96, "learning_rate": 5.6577037589538035e-06, "loss": 0.3093, "step": 5909 }, { "epoch": 1.96, "learning_rate": 5.6544760765711914e-06, "loss": 0.2611, "step": 5910 }, { "epoch": 1.96, "learning_rate": 5.651248952195875e-06, "loss": 0.2547, "step": 5911 }, { "epoch": 1.96, "learning_rate": 5.648022386242237e-06, "loss": 0.2513, "step": 5912 }, { "epoch": 1.96, "learning_rate": 5.644796379124609e-06, "loss": 0.3457, "step": 5913 }, { "epoch": 1.96, "learning_rate": 5.641570931257232e-06, "loss": 0.2979, "step": 5914 }, { "epoch": 1.96, "learning_rate": 5.638346043054297e-06, "loss": 0.3025, "step": 5915 }, { "epoch": 1.96, "learning_rate": 5.6351217149299035e-06, "loss": 0.3575, "step": 5916 }, { "epoch": 1.96, "learning_rate": 5.631897947298082e-06, "loss": 0.3303, "step": 5917 }, { "epoch": 1.96, "learning_rate": 5.628674740572806e-06, "loss": 0.2578, "step": 5918 }, { "epoch": 1.96, "learning_rate": 5.62545209516796e-06, "loss": 0.3282, "step": 5919 }, { "epoch": 1.96, "learning_rate": 5.622230011497365e-06, "loss": 0.2997, "step": 5920 }, { "epoch": 1.97, "learning_rate": 5.619008489974762e-06, "loss": 0.3829, "step": 5921 }, { "epoch": 1.97, "learning_rate": 5.615787531013836e-06, "loss": 0.259, "step": 5922 }, { "epoch": 1.97, "learning_rate": 5.6125671350281776e-06, "loss": 0.3529, "step": 5923 }, { "epoch": 1.97, "learning_rate": 5.6093473024313275e-06, "loss": 0.2483, "step": 5924 }, { "epoch": 1.97, "learning_rate": 5.6061280336367375e-06, "loss": 0.3256, "step": 5925 }, { "epoch": 1.97, "learning_rate": 5.602909329057794e-06, "loss": 0.2997, "step": 5926 }, { "epoch": 1.97, "learning_rate": 5.5996911891078096e-06, "loss": 0.3267, "step": 5927 }, { "epoch": 1.97, "learning_rate": 5.596473614200022e-06, "loss": 0.2499, "step": 5928 }, { "epoch": 1.97, "learning_rate": 5.593256604747603e-06, "loss": 0.2724, "step": 5929 }, { "epoch": 1.97, "learning_rate": 5.590040161163644e-06, "loss": 0.3362, "step": 5930 }, { "epoch": 1.97, "learning_rate": 5.586824283861174e-06, "loss": 0.2729, "step": 5931 }, { "epoch": 1.97, "learning_rate": 5.583608973253137e-06, "loss": 0.3774, "step": 5932 }, { "epoch": 1.97, "learning_rate": 5.580394229752413e-06, "loss": 0.2366, "step": 5933 }, { "epoch": 1.97, "learning_rate": 5.577180053771799e-06, "loss": 0.2667, "step": 5934 }, { "epoch": 1.97, "learning_rate": 5.5739664457240375e-06, "loss": 0.3982, "step": 5935 }, { "epoch": 1.97, "learning_rate": 5.570753406021778e-06, "loss": 0.2124, "step": 5936 }, { "epoch": 1.97, "learning_rate": 5.567540935077609e-06, "loss": 0.2747, "step": 5937 }, { "epoch": 1.97, "learning_rate": 5.564329033304044e-06, "loss": 0.301, "step": 5938 }, { "epoch": 1.97, "learning_rate": 5.561117701113523e-06, "loss": 0.3921, "step": 5939 }, { "epoch": 1.97, "learning_rate": 5.557906938918409e-06, "loss": 0.2549, "step": 5940 }, { "epoch": 1.97, "learning_rate": 5.554696747130992e-06, "loss": 0.2441, "step": 5941 }, { "epoch": 1.97, "learning_rate": 5.5514871261635005e-06, "loss": 0.295, "step": 5942 }, { "epoch": 1.97, "learning_rate": 5.548278076428072e-06, "loss": 0.263, "step": 5943 }, { "epoch": 1.97, "learning_rate": 5.545069598336788e-06, "loss": 0.3834, "step": 5944 }, { "epoch": 1.97, "learning_rate": 5.5418616923016435e-06, "loss": 0.2293, "step": 5945 }, { "epoch": 1.97, "learning_rate": 5.538654358734565e-06, "loss": 0.2654, "step": 5946 }, { "epoch": 1.97, "learning_rate": 5.535447598047405e-06, "loss": 0.2607, "step": 5947 }, { "epoch": 1.97, "learning_rate": 5.532241410651937e-06, "loss": 0.3708, "step": 5948 }, { "epoch": 1.97, "learning_rate": 5.529035796959879e-06, "loss": 0.2236, "step": 5949 }, { "epoch": 1.97, "learning_rate": 5.525830757382851e-06, "loss": 0.2235, "step": 5950 }, { "epoch": 1.98, "learning_rate": 5.522626292332419e-06, "loss": 0.3342, "step": 5951 }, { "epoch": 1.98, "learning_rate": 5.519422402220065e-06, "loss": 0.2924, "step": 5952 }, { "epoch": 1.98, "learning_rate": 5.516219087457199e-06, "loss": 0.3759, "step": 5953 }, { "epoch": 1.98, "learning_rate": 5.513016348455153e-06, "loss": 0.2668, "step": 5954 }, { "epoch": 1.98, "learning_rate": 5.509814185625198e-06, "loss": 0.2983, "step": 5955 }, { "epoch": 1.98, "learning_rate": 5.506612599378518e-06, "loss": 0.25, "step": 5956 }, { "epoch": 1.98, "learning_rate": 5.503411590126226e-06, "loss": 0.2124, "step": 5957 }, { "epoch": 1.98, "learning_rate": 5.500211158279367e-06, "loss": 0.3475, "step": 5958 }, { "epoch": 1.98, "learning_rate": 5.497011304248907e-06, "loss": 0.316, "step": 5959 }, { "epoch": 1.98, "learning_rate": 5.493812028445736e-06, "loss": 0.2493, "step": 5960 }, { "epoch": 1.98, "learning_rate": 5.4906133312806675e-06, "loss": 0.2179, "step": 5961 }, { "epoch": 1.98, "learning_rate": 5.487415213164454e-06, "loss": 0.2942, "step": 5962 }, { "epoch": 1.98, "learning_rate": 5.484217674507757e-06, "loss": 0.3632, "step": 5963 }, { "epoch": 1.98, "learning_rate": 5.4810207157211795e-06, "loss": 0.3032, "step": 5964 }, { "epoch": 1.98, "learning_rate": 5.477824337215239e-06, "loss": 0.2776, "step": 5965 }, { "epoch": 1.98, "learning_rate": 5.474628539400378e-06, "loss": 0.2865, "step": 5966 }, { "epoch": 1.98, "learning_rate": 5.47143332268697e-06, "loss": 0.318, "step": 5967 }, { "epoch": 1.98, "learning_rate": 5.468238687485307e-06, "loss": 0.255, "step": 5968 }, { "epoch": 1.98, "learning_rate": 5.465044634205623e-06, "loss": 0.2903, "step": 5969 }, { "epoch": 1.98, "learning_rate": 5.461851163258052e-06, "loss": 0.2244, "step": 5970 }, { "epoch": 1.98, "learning_rate": 5.458658275052675e-06, "loss": 0.2571, "step": 5971 }, { "epoch": 1.98, "learning_rate": 5.455465969999485e-06, "loss": 0.3179, "step": 5972 }, { "epoch": 1.98, "learning_rate": 5.452274248508415e-06, "loss": 0.2352, "step": 5973 }, { "epoch": 1.98, "learning_rate": 5.449083110989296e-06, "loss": 0.2673, "step": 5974 }, { "epoch": 1.98, "learning_rate": 5.445892557851915e-06, "loss": 0.2756, "step": 5975 }, { "epoch": 1.98, "learning_rate": 5.442702589505964e-06, "loss": 0.3124, "step": 5976 }, { "epoch": 1.98, "learning_rate": 5.439513206361062e-06, "loss": 0.2896, "step": 5977 }, { "epoch": 1.98, "learning_rate": 5.436324408826766e-06, "loss": 0.2644, "step": 5978 }, { "epoch": 1.98, "learning_rate": 5.43313619731254e-06, "loss": 0.2221, "step": 5979 }, { "epoch": 1.98, "learning_rate": 5.4299485722277925e-06, "loss": 0.2472, "step": 5980 }, { "epoch": 1.99, "learning_rate": 5.426761533981829e-06, "loss": 0.2491, "step": 5981 }, { "epoch": 1.99, "learning_rate": 5.423575082983909e-06, "loss": 0.2755, "step": 5982 }, { "epoch": 1.99, "learning_rate": 5.420389219643196e-06, "loss": 0.2314, "step": 5983 }, { "epoch": 1.99, "learning_rate": 5.4172039443687925e-06, "loss": 0.2432, "step": 5984 }, { "epoch": 1.99, "learning_rate": 5.414019257569716e-06, "loss": 0.2176, "step": 5985 }, { "epoch": 1.99, "learning_rate": 5.410835159654905e-06, "loss": 0.3417, "step": 5986 }, { "epoch": 1.99, "learning_rate": 5.407651651033241e-06, "loss": 0.2931, "step": 5987 }, { "epoch": 1.99, "learning_rate": 5.4044687321135034e-06, "loss": 0.2236, "step": 5988 }, { "epoch": 1.99, "learning_rate": 5.40128640330442e-06, "loss": 0.263, "step": 5989 }, { "epoch": 1.99, "learning_rate": 5.398104665014625e-06, "loss": 0.2833, "step": 5990 }, { "epoch": 1.99, "learning_rate": 5.3949235176526925e-06, "loss": 0.3093, "step": 5991 }, { "epoch": 1.99, "learning_rate": 5.391742961627109e-06, "loss": 0.2484, "step": 5992 }, { "epoch": 1.99, "learning_rate": 5.388562997346283e-06, "loss": 0.25, "step": 5993 }, { "epoch": 1.99, "learning_rate": 5.385383625218561e-06, "loss": 0.2483, "step": 5994 }, { "epoch": 1.99, "learning_rate": 5.382204845652202e-06, "loss": 0.3293, "step": 5995 }, { "epoch": 1.99, "learning_rate": 5.379026659055393e-06, "loss": 0.1899, "step": 5996 }, { "epoch": 1.99, "learning_rate": 5.375849065836238e-06, "loss": 0.3885, "step": 5997 }, { "epoch": 1.99, "learning_rate": 5.37267206640278e-06, "loss": 0.3268, "step": 5998 }, { "epoch": 1.99, "learning_rate": 5.369495661162967e-06, "loss": 0.2979, "step": 5999 }, { "epoch": 1.99, "learning_rate": 5.366319850524688e-06, "loss": 0.2393, "step": 6000 }, { "epoch": 1.99, "learning_rate": 5.363144634895747e-06, "loss": 0.2633, "step": 6001 }, { "epoch": 1.99, "learning_rate": 5.359970014683868e-06, "loss": 0.3123, "step": 6002 }, { "epoch": 1.99, "learning_rate": 5.356795990296708e-06, "loss": 0.3163, "step": 6003 }, { "epoch": 1.99, "learning_rate": 5.353622562141833e-06, "loss": 0.2859, "step": 6004 }, { "epoch": 1.99, "learning_rate": 5.350449730626755e-06, "loss": 0.2639, "step": 6005 }, { "epoch": 1.99, "learning_rate": 5.347277496158882e-06, "loss": 0.2903, "step": 6006 }, { "epoch": 1.99, "learning_rate": 5.344105859145574e-06, "loss": 0.3062, "step": 6007 }, { "epoch": 1.99, "learning_rate": 5.3409348199940924e-06, "loss": 0.3337, "step": 6008 }, { "epoch": 1.99, "learning_rate": 5.337764379111629e-06, "loss": 0.2633, "step": 6009 }, { "epoch": 1.99, "learning_rate": 5.334594536905298e-06, "loss": 0.3386, "step": 6010 }, { "epoch": 2.0, "learning_rate": 5.331425293782142e-06, "loss": 0.3026, "step": 6011 }, { "epoch": 2.0, "learning_rate": 5.328256650149121e-06, "loss": 0.2628, "step": 6012 }, { "epoch": 2.0, "learning_rate": 5.325088606413115e-06, "loss": 0.302, "step": 6013 }, { "epoch": 2.0, "learning_rate": 5.32192116298094e-06, "loss": 0.2397, "step": 6014 }, { "epoch": 2.0, "learning_rate": 5.318754320259321e-06, "loss": 0.2535, "step": 6015 }, { "epoch": 2.0, "learning_rate": 5.315588078654911e-06, "loss": 0.2437, "step": 6016 }, { "epoch": 2.0, "learning_rate": 5.312422438574283e-06, "loss": 0.3495, "step": 6017 }, { "epoch": 2.0, "learning_rate": 5.309257400423944e-06, "loss": 0.3388, "step": 6018 }, { "epoch": 2.0, "learning_rate": 5.306092964610308e-06, "loss": 0.3117, "step": 6019 }, { "epoch": 2.0, "learning_rate": 5.302929131539725e-06, "loss": 0.2411, "step": 6020 }, { "epoch": 2.0, "learning_rate": 5.299765901618459e-06, "loss": 0.3142, "step": 6021 }, { "epoch": 2.0, "learning_rate": 5.2966032752527e-06, "loss": 0.292, "step": 6022 }, { "epoch": 2.0, "learning_rate": 5.2934412528485594e-06, "loss": 0.3359, "step": 6023 }, { "epoch": 2.0, "learning_rate": 5.290279834812067e-06, "loss": 0.3077, "step": 6024 }, { "epoch": 2.0, "learning_rate": 5.2871190215491865e-06, "loss": 0.2606, "step": 6025 }, { "epoch": 2.0, "learning_rate": 5.28395881346579e-06, "loss": 0.2518, "step": 6026 }, { "epoch": 2.0, "learning_rate": 5.280799210967688e-06, "loss": 0.198, "step": 6027 }, { "epoch": 2.0, "learning_rate": 5.2776402144606e-06, "loss": 0.1418, "step": 6028 }, { "epoch": 2.0, "learning_rate": 5.274481824350169e-06, "loss": 0.1568, "step": 6029 }, { "epoch": 2.0, "learning_rate": 5.2713240410419604e-06, "loss": 0.133, "step": 6030 }, { "epoch": 2.0, "learning_rate": 5.268166864941473e-06, "loss": 0.1461, "step": 6031 }, { "epoch": 2.0, "learning_rate": 5.265010296454112e-06, "loss": 0.1381, "step": 6032 }, { "epoch": 2.0, "learning_rate": 5.261854335985211e-06, "loss": 0.2068, "step": 6033 }, { "epoch": 2.0, "learning_rate": 5.258698983940032e-06, "loss": 0.2247, "step": 6034 }, { "epoch": 2.0, "learning_rate": 5.255544240723747e-06, "loss": 0.2238, "step": 6035 }, { "epoch": 2.0, "learning_rate": 5.252390106741458e-06, "loss": 0.1442, "step": 6036 }, { "epoch": 2.0, "learning_rate": 5.249236582398181e-06, "loss": 0.1673, "step": 6037 }, { "epoch": 2.0, "learning_rate": 5.246083668098869e-06, "loss": 0.1722, "step": 6038 }, { "epoch": 2.0, "learning_rate": 5.2429313642483756e-06, "loss": 0.1692, "step": 6039 }, { "epoch": 2.0, "learning_rate": 5.239779671251499e-06, "loss": 0.1695, "step": 6040 }, { "epoch": 2.0, "learning_rate": 5.236628589512939e-06, "loss": 0.2126, "step": 6041 }, { "epoch": 2.01, "learning_rate": 5.233478119437327e-06, "loss": 0.1768, "step": 6042 }, { "epoch": 2.01, "learning_rate": 5.230328261429214e-06, "loss": 0.1091, "step": 6043 }, { "epoch": 2.01, "learning_rate": 5.227179015893067e-06, "loss": 0.1001, "step": 6044 }, { "epoch": 2.01, "learning_rate": 5.22403038323329e-06, "loss": 0.1682, "step": 6045 }, { "epoch": 2.01, "learning_rate": 5.220882363854186e-06, "loss": 0.1923, "step": 6046 }, { "epoch": 2.01, "learning_rate": 5.217734958160002e-06, "loss": 0.1193, "step": 6047 }, { "epoch": 2.01, "learning_rate": 5.214588166554888e-06, "loss": 0.1207, "step": 6048 }, { "epoch": 2.01, "learning_rate": 5.211441989442932e-06, "loss": 0.1408, "step": 6049 }, { "epoch": 2.01, "learning_rate": 5.208296427228119e-06, "loss": 0.2042, "step": 6050 }, { "epoch": 2.01, "learning_rate": 5.205151480314381e-06, "loss": 0.1087, "step": 6051 }, { "epoch": 2.01, "learning_rate": 5.202007149105557e-06, "loss": 0.1671, "step": 6052 }, { "epoch": 2.01, "learning_rate": 5.198863434005404e-06, "loss": 0.1149, "step": 6053 }, { "epoch": 2.01, "learning_rate": 5.195720335417613e-06, "loss": 0.1549, "step": 6054 }, { "epoch": 2.01, "learning_rate": 5.192577853745781e-06, "loss": 0.11, "step": 6055 }, { "epoch": 2.01, "learning_rate": 5.189435989393446e-06, "loss": 0.1481, "step": 6056 }, { "epoch": 2.01, "learning_rate": 5.186294742764038e-06, "loss": 0.1923, "step": 6057 }, { "epoch": 2.01, "learning_rate": 5.183154114260932e-06, "loss": 0.1671, "step": 6058 }, { "epoch": 2.01, "learning_rate": 5.180014104287411e-06, "loss": 0.1379, "step": 6059 }, { "epoch": 2.01, "learning_rate": 5.176874713246689e-06, "loss": 0.1421, "step": 6060 }, { "epoch": 2.01, "learning_rate": 5.1737359415418884e-06, "loss": 0.1787, "step": 6061 }, { "epoch": 2.01, "learning_rate": 5.170597789576057e-06, "loss": 0.116, "step": 6062 }, { "epoch": 2.01, "learning_rate": 5.167460257752175e-06, "loss": 0.1407, "step": 6063 }, { "epoch": 2.01, "learning_rate": 5.164323346473115e-06, "loss": 0.1373, "step": 6064 }, { "epoch": 2.01, "learning_rate": 5.161187056141699e-06, "loss": 0.1596, "step": 6065 }, { "epoch": 2.01, "learning_rate": 5.15805138716065e-06, "loss": 0.1652, "step": 6066 }, { "epoch": 2.01, "learning_rate": 5.154916339932624e-06, "loss": 0.1371, "step": 6067 }, { "epoch": 2.01, "learning_rate": 5.15178191486019e-06, "loss": 0.1799, "step": 6068 }, { "epoch": 2.01, "learning_rate": 5.148648112345831e-06, "loss": 0.146, "step": 6069 }, { "epoch": 2.01, "learning_rate": 5.1455149327919695e-06, "loss": 0.1746, "step": 6070 }, { "epoch": 2.01, "learning_rate": 5.14238237660093e-06, "loss": 0.144, "step": 6071 }, { "epoch": 2.02, "learning_rate": 5.1392504441749634e-06, "loss": 0.1691, "step": 6072 }, { "epoch": 2.02, "learning_rate": 5.136119135916236e-06, "loss": 0.0878, "step": 6073 }, { "epoch": 2.02, "learning_rate": 5.1329884522268454e-06, "loss": 0.1615, "step": 6074 }, { "epoch": 2.02, "learning_rate": 5.129858393508796e-06, "loss": 0.1316, "step": 6075 }, { "epoch": 2.02, "learning_rate": 5.1267289601640216e-06, "loss": 0.1511, "step": 6076 }, { "epoch": 2.02, "learning_rate": 5.12360015259437e-06, "loss": 0.1266, "step": 6077 }, { "epoch": 2.02, "learning_rate": 5.120471971201611e-06, "loss": 0.1898, "step": 6078 }, { "epoch": 2.02, "learning_rate": 5.11734441638743e-06, "loss": 0.1163, "step": 6079 }, { "epoch": 2.02, "learning_rate": 5.114217488553441e-06, "loss": 0.1828, "step": 6080 }, { "epoch": 2.02, "learning_rate": 5.111091188101169e-06, "loss": 0.1905, "step": 6081 }, { "epoch": 2.02, "learning_rate": 5.107965515432056e-06, "loss": 0.1054, "step": 6082 }, { "epoch": 2.02, "learning_rate": 5.104840470947479e-06, "loss": 0.1605, "step": 6083 }, { "epoch": 2.02, "learning_rate": 5.101716055048718e-06, "loss": 0.1448, "step": 6084 }, { "epoch": 2.02, "learning_rate": 5.098592268136977e-06, "loss": 0.2524, "step": 6085 }, { "epoch": 2.02, "learning_rate": 5.09546911061338e-06, "loss": 0.1311, "step": 6086 }, { "epoch": 2.02, "learning_rate": 5.0923465828789755e-06, "loss": 0.108, "step": 6087 }, { "epoch": 2.02, "learning_rate": 5.0892246853347245e-06, "loss": 0.1624, "step": 6088 }, { "epoch": 2.02, "learning_rate": 5.086103418381504e-06, "loss": 0.148, "step": 6089 }, { "epoch": 2.02, "learning_rate": 5.082982782420123e-06, "loss": 0.1533, "step": 6090 }, { "epoch": 2.02, "learning_rate": 5.0798627778512955e-06, "loss": 0.126, "step": 6091 }, { "epoch": 2.02, "learning_rate": 5.076743405075663e-06, "loss": 0.1307, "step": 6092 }, { "epoch": 2.02, "learning_rate": 5.073624664493777e-06, "loss": 0.1519, "step": 6093 }, { "epoch": 2.02, "learning_rate": 5.070506556506123e-06, "loss": 0.101, "step": 6094 }, { "epoch": 2.02, "learning_rate": 5.067389081513087e-06, "loss": 0.1444, "step": 6095 }, { "epoch": 2.02, "learning_rate": 5.064272239914994e-06, "loss": 0.1791, "step": 6096 }, { "epoch": 2.02, "learning_rate": 5.0611560321120714e-06, "loss": 0.2155, "step": 6097 }, { "epoch": 2.02, "learning_rate": 5.058040458504467e-06, "loss": 0.1205, "step": 6098 }, { "epoch": 2.02, "learning_rate": 5.054925519492255e-06, "loss": 0.1372, "step": 6099 }, { "epoch": 2.02, "learning_rate": 5.051811215475417e-06, "loss": 0.1178, "step": 6100 }, { "epoch": 2.02, "learning_rate": 5.048697546853869e-06, "loss": 0.1685, "step": 6101 }, { "epoch": 2.03, "learning_rate": 5.045584514027429e-06, "loss": 0.1624, "step": 6102 }, { "epoch": 2.03, "learning_rate": 5.042472117395845e-06, "loss": 0.1297, "step": 6103 }, { "epoch": 2.03, "learning_rate": 5.039360357358778e-06, "loss": 0.1711, "step": 6104 }, { "epoch": 2.03, "learning_rate": 5.036249234315807e-06, "loss": 0.0997, "step": 6105 }, { "epoch": 2.03, "learning_rate": 5.0331387486664266e-06, "loss": 0.1501, "step": 6106 }, { "epoch": 2.03, "learning_rate": 5.030028900810062e-06, "loss": 0.1609, "step": 6107 }, { "epoch": 2.03, "learning_rate": 5.026919691146042e-06, "loss": 0.1202, "step": 6108 }, { "epoch": 2.03, "learning_rate": 5.023811120073616e-06, "loss": 0.15, "step": 6109 }, { "epoch": 2.03, "learning_rate": 5.020703187991962e-06, "loss": 0.1572, "step": 6110 }, { "epoch": 2.03, "learning_rate": 5.0175958953001665e-06, "loss": 0.1613, "step": 6111 }, { "epoch": 2.03, "learning_rate": 5.014489242397234e-06, "loss": 0.0998, "step": 6112 }, { "epoch": 2.03, "learning_rate": 5.011383229682085e-06, "loss": 0.1637, "step": 6113 }, { "epoch": 2.03, "learning_rate": 5.00827785755357e-06, "loss": 0.1554, "step": 6114 }, { "epoch": 2.03, "learning_rate": 5.005173126410442e-06, "loss": 0.1077, "step": 6115 }, { "epoch": 2.03, "learning_rate": 5.002069036651384e-06, "loss": 0.1454, "step": 6116 }, { "epoch": 2.03, "learning_rate": 4.998965588674989e-06, "loss": 0.1384, "step": 6117 }, { "epoch": 2.03, "learning_rate": 4.995862782879768e-06, "loss": 0.1636, "step": 6118 }, { "epoch": 2.03, "learning_rate": 4.992760619664153e-06, "loss": 0.1494, "step": 6119 }, { "epoch": 2.03, "learning_rate": 4.989659099426487e-06, "loss": 0.1411, "step": 6120 }, { "epoch": 2.03, "learning_rate": 4.986558222565045e-06, "loss": 0.1302, "step": 6121 }, { "epoch": 2.03, "learning_rate": 4.983457989477997e-06, "loss": 0.1652, "step": 6122 }, { "epoch": 2.03, "learning_rate": 4.980358400563455e-06, "loss": 0.1904, "step": 6123 }, { "epoch": 2.03, "learning_rate": 4.977259456219427e-06, "loss": 0.1428, "step": 6124 }, { "epoch": 2.03, "learning_rate": 4.97416115684386e-06, "loss": 0.1539, "step": 6125 }, { "epoch": 2.03, "learning_rate": 4.971063502834589e-06, "loss": 0.1438, "step": 6126 }, { "epoch": 2.03, "learning_rate": 4.967966494589394e-06, "loss": 0.1406, "step": 6127 }, { "epoch": 2.03, "learning_rate": 4.964870132505957e-06, "loss": 0.1802, "step": 6128 }, { "epoch": 2.03, "learning_rate": 4.961774416981878e-06, "loss": 0.1505, "step": 6129 }, { "epoch": 2.03, "learning_rate": 4.958679348414684e-06, "loss": 0.1485, "step": 6130 }, { "epoch": 2.03, "learning_rate": 4.9555849272018044e-06, "loss": 0.1222, "step": 6131 }, { "epoch": 2.04, "learning_rate": 4.952491153740604e-06, "loss": 0.1622, "step": 6132 }, { "epoch": 2.04, "learning_rate": 4.949398028428337e-06, "loss": 0.1318, "step": 6133 }, { "epoch": 2.04, "learning_rate": 4.9463055516622035e-06, "loss": 0.1259, "step": 6134 }, { "epoch": 2.04, "learning_rate": 4.9432137238393e-06, "loss": 0.1588, "step": 6135 }, { "epoch": 2.04, "learning_rate": 4.940122545356653e-06, "loss": 0.1629, "step": 6136 }, { "epoch": 2.04, "learning_rate": 4.9370320166111965e-06, "loss": 0.1734, "step": 6137 }, { "epoch": 2.04, "learning_rate": 4.933942137999781e-06, "loss": 0.1801, "step": 6138 }, { "epoch": 2.04, "learning_rate": 4.930852909919188e-06, "loss": 0.2141, "step": 6139 }, { "epoch": 2.04, "learning_rate": 4.92776433276609e-06, "loss": 0.1454, "step": 6140 }, { "epoch": 2.04, "learning_rate": 4.9246764069371e-06, "loss": 0.1647, "step": 6141 }, { "epoch": 2.04, "learning_rate": 4.92158913282873e-06, "loss": 0.1463, "step": 6142 }, { "epoch": 2.04, "learning_rate": 4.918502510837424e-06, "loss": 0.1386, "step": 6143 }, { "epoch": 2.04, "learning_rate": 4.9154165413595275e-06, "loss": 0.1476, "step": 6144 }, { "epoch": 2.04, "learning_rate": 4.912331224791314e-06, "loss": 0.1561, "step": 6145 }, { "epoch": 2.04, "learning_rate": 4.909246561528967e-06, "loss": 0.15, "step": 6146 }, { "epoch": 2.04, "learning_rate": 4.9061625519685855e-06, "loss": 0.1525, "step": 6147 }, { "epoch": 2.04, "learning_rate": 4.903079196506186e-06, "loss": 0.1745, "step": 6148 }, { "epoch": 2.04, "learning_rate": 4.8999964955376976e-06, "loss": 0.1259, "step": 6149 }, { "epoch": 2.04, "learning_rate": 4.896914449458977e-06, "loss": 0.1253, "step": 6150 }, { "epoch": 2.04, "learning_rate": 4.89383305866578e-06, "loss": 0.1403, "step": 6151 }, { "epoch": 2.04, "learning_rate": 4.890752323553796e-06, "loss": 0.1978, "step": 6152 }, { "epoch": 2.04, "learning_rate": 4.887672244518617e-06, "loss": 0.1241, "step": 6153 }, { "epoch": 2.04, "learning_rate": 4.884592821955754e-06, "loss": 0.1425, "step": 6154 }, { "epoch": 2.04, "learning_rate": 4.881514056260631e-06, "loss": 0.1576, "step": 6155 }, { "epoch": 2.04, "learning_rate": 4.8784359478286005e-06, "loss": 0.1089, "step": 6156 }, { "epoch": 2.04, "learning_rate": 4.875358497054915e-06, "loss": 0.1435, "step": 6157 }, { "epoch": 2.04, "learning_rate": 4.872281704334748e-06, "loss": 0.1724, "step": 6158 }, { "epoch": 2.04, "learning_rate": 4.869205570063195e-06, "loss": 0.1024, "step": 6159 }, { "epoch": 2.04, "learning_rate": 4.866130094635258e-06, "loss": 0.1197, "step": 6160 }, { "epoch": 2.04, "learning_rate": 4.8630552784458585e-06, "loss": 0.117, "step": 6161 }, { "epoch": 2.05, "learning_rate": 4.859981121889827e-06, "loss": 0.1423, "step": 6162 }, { "epoch": 2.05, "learning_rate": 4.856907625361925e-06, "loss": 0.1517, "step": 6163 }, { "epoch": 2.05, "learning_rate": 4.8538347892568135e-06, "loss": 0.1154, "step": 6164 }, { "epoch": 2.05, "learning_rate": 4.850762613969073e-06, "loss": 0.123, "step": 6165 }, { "epoch": 2.05, "learning_rate": 4.847691099893203e-06, "loss": 0.1845, "step": 6166 }, { "epoch": 2.05, "learning_rate": 4.844620247423618e-06, "loss": 0.1162, "step": 6167 }, { "epoch": 2.05, "learning_rate": 4.841550056954641e-06, "loss": 0.1562, "step": 6168 }, { "epoch": 2.05, "learning_rate": 4.838480528880513e-06, "loss": 0.1882, "step": 6169 }, { "epoch": 2.05, "learning_rate": 4.835411663595396e-06, "loss": 0.1436, "step": 6170 }, { "epoch": 2.05, "learning_rate": 4.832343461493356e-06, "loss": 0.1661, "step": 6171 }, { "epoch": 2.05, "learning_rate": 4.829275922968388e-06, "loss": 0.1529, "step": 6172 }, { "epoch": 2.05, "learning_rate": 4.826209048414388e-06, "loss": 0.1319, "step": 6173 }, { "epoch": 2.05, "learning_rate": 4.823142838225175e-06, "loss": 0.1126, "step": 6174 }, { "epoch": 2.05, "learning_rate": 4.820077292794476e-06, "loss": 0.1214, "step": 6175 }, { "epoch": 2.05, "learning_rate": 4.817012412515936e-06, "loss": 0.1629, "step": 6176 }, { "epoch": 2.05, "learning_rate": 4.813948197783123e-06, "loss": 0.1698, "step": 6177 }, { "epoch": 2.05, "learning_rate": 4.810884648989502e-06, "loss": 0.1483, "step": 6178 }, { "epoch": 2.05, "learning_rate": 4.807821766528471e-06, "loss": 0.1819, "step": 6179 }, { "epoch": 2.05, "learning_rate": 4.804759550793328e-06, "loss": 0.1893, "step": 6180 }, { "epoch": 2.05, "learning_rate": 4.8016980021772995e-06, "loss": 0.1364, "step": 6181 }, { "epoch": 2.05, "learning_rate": 4.798637121073504e-06, "loss": 0.1264, "step": 6182 }, { "epoch": 2.05, "learning_rate": 4.795576907875e-06, "loss": 0.1278, "step": 6183 }, { "epoch": 2.05, "learning_rate": 4.792517362974745e-06, "loss": 0.1296, "step": 6184 }, { "epoch": 2.05, "learning_rate": 4.789458486765608e-06, "loss": 0.0895, "step": 6185 }, { "epoch": 2.05, "learning_rate": 4.7864002796403886e-06, "loss": 0.1487, "step": 6186 }, { "epoch": 2.05, "learning_rate": 4.783342741991785e-06, "loss": 0.1677, "step": 6187 }, { "epoch": 2.05, "learning_rate": 4.7802858742124155e-06, "loss": 0.1258, "step": 6188 }, { "epoch": 2.05, "learning_rate": 4.777229676694808e-06, "loss": 0.1504, "step": 6189 }, { "epoch": 2.05, "learning_rate": 4.774174149831413e-06, "loss": 0.1454, "step": 6190 }, { "epoch": 2.05, "learning_rate": 4.771119294014583e-06, "loss": 0.1447, "step": 6191 }, { "epoch": 2.06, "learning_rate": 4.768065109636602e-06, "loss": 0.1718, "step": 6192 }, { "epoch": 2.06, "learning_rate": 4.765011597089647e-06, "loss": 0.1805, "step": 6193 }, { "epoch": 2.06, "learning_rate": 4.761958756765825e-06, "loss": 0.1251, "step": 6194 }, { "epoch": 2.06, "learning_rate": 4.758906589057144e-06, "loss": 0.1714, "step": 6195 }, { "epoch": 2.06, "learning_rate": 4.755855094355532e-06, "loss": 0.1526, "step": 6196 }, { "epoch": 2.06, "learning_rate": 4.752804273052838e-06, "loss": 0.11, "step": 6197 }, { "epoch": 2.06, "learning_rate": 4.749754125540808e-06, "loss": 0.183, "step": 6198 }, { "epoch": 2.06, "learning_rate": 4.746704652211118e-06, "loss": 0.1396, "step": 6199 }, { "epoch": 2.06, "learning_rate": 4.743655853455344e-06, "loss": 0.1774, "step": 6200 }, { "epoch": 2.06, "learning_rate": 4.74060772966499e-06, "loss": 0.1779, "step": 6201 }, { "epoch": 2.06, "learning_rate": 4.737560281231451e-06, "loss": 0.2186, "step": 6202 }, { "epoch": 2.06, "learning_rate": 4.7345135085460606e-06, "loss": 0.1401, "step": 6203 }, { "epoch": 2.06, "learning_rate": 4.731467412000048e-06, "loss": 0.1288, "step": 6204 }, { "epoch": 2.06, "learning_rate": 4.728421991984561e-06, "loss": 0.134, "step": 6205 }, { "epoch": 2.06, "learning_rate": 4.7253772488906655e-06, "loss": 0.1478, "step": 6206 }, { "epoch": 2.06, "learning_rate": 4.72233318310933e-06, "loss": 0.1508, "step": 6207 }, { "epoch": 2.06, "learning_rate": 4.719289795031453e-06, "loss": 0.151, "step": 6208 }, { "epoch": 2.06, "learning_rate": 4.7162470850478195e-06, "loss": 0.1608, "step": 6209 }, { "epoch": 2.06, "learning_rate": 4.713205053549154e-06, "loss": 0.1671, "step": 6210 }, { "epoch": 2.06, "learning_rate": 4.710163700926076e-06, "loss": 0.1501, "step": 6211 }, { "epoch": 2.06, "learning_rate": 4.707123027569132e-06, "loss": 0.1313, "step": 6212 }, { "epoch": 2.06, "learning_rate": 4.70408303386877e-06, "loss": 0.1406, "step": 6213 }, { "epoch": 2.06, "learning_rate": 4.70104372021535e-06, "loss": 0.1562, "step": 6214 }, { "epoch": 2.06, "learning_rate": 4.6980050869991615e-06, "loss": 0.1425, "step": 6215 }, { "epoch": 2.06, "learning_rate": 4.694967134610379e-06, "loss": 0.1663, "step": 6216 }, { "epoch": 2.06, "learning_rate": 4.691929863439117e-06, "loss": 0.1673, "step": 6217 }, { "epoch": 2.06, "learning_rate": 4.688893273875381e-06, "loss": 0.1269, "step": 6218 }, { "epoch": 2.06, "learning_rate": 4.685857366309108e-06, "loss": 0.1523, "step": 6219 }, { "epoch": 2.06, "learning_rate": 4.68282214113013e-06, "loss": 0.1309, "step": 6220 }, { "epoch": 2.06, "learning_rate": 4.679787598728206e-06, "loss": 0.1934, "step": 6221 }, { "epoch": 2.07, "learning_rate": 4.676753739492998e-06, "loss": 0.1545, "step": 6222 }, { "epoch": 2.07, "learning_rate": 4.67372056381408e-06, "loss": 0.165, "step": 6223 }, { "epoch": 2.07, "learning_rate": 4.670688072080946e-06, "loss": 0.197, "step": 6224 }, { "epoch": 2.07, "learning_rate": 4.667656264682988e-06, "loss": 0.1655, "step": 6225 }, { "epoch": 2.07, "learning_rate": 4.664625142009531e-06, "loss": 0.1464, "step": 6226 }, { "epoch": 2.07, "learning_rate": 4.661594704449792e-06, "loss": 0.1319, "step": 6227 }, { "epoch": 2.07, "learning_rate": 4.6585649523929145e-06, "loss": 0.1164, "step": 6228 }, { "epoch": 2.07, "learning_rate": 4.6555358862279456e-06, "loss": 0.1298, "step": 6229 }, { "epoch": 2.07, "learning_rate": 4.652507506343846e-06, "loss": 0.1638, "step": 6230 }, { "epoch": 2.07, "learning_rate": 4.649479813129486e-06, "loss": 0.1379, "step": 6231 }, { "epoch": 2.07, "learning_rate": 4.646452806973658e-06, "loss": 0.1769, "step": 6232 }, { "epoch": 2.07, "learning_rate": 4.643426488265055e-06, "loss": 0.1397, "step": 6233 }, { "epoch": 2.07, "learning_rate": 4.640400857392281e-06, "loss": 0.1423, "step": 6234 }, { "epoch": 2.07, "learning_rate": 4.637375914743865e-06, "loss": 0.1243, "step": 6235 }, { "epoch": 2.07, "learning_rate": 4.634351660708233e-06, "loss": 0.1793, "step": 6236 }, { "epoch": 2.07, "learning_rate": 4.631328095673732e-06, "loss": 0.1633, "step": 6237 }, { "epoch": 2.07, "learning_rate": 4.6283052200286095e-06, "loss": 0.1373, "step": 6238 }, { "epoch": 2.07, "learning_rate": 4.625283034161042e-06, "loss": 0.1816, "step": 6239 }, { "epoch": 2.07, "learning_rate": 4.622261538459103e-06, "loss": 0.1334, "step": 6240 }, { "epoch": 2.07, "learning_rate": 4.619240733310778e-06, "loss": 0.1363, "step": 6241 }, { "epoch": 2.07, "learning_rate": 4.616220619103975e-06, "loss": 0.1007, "step": 6242 }, { "epoch": 2.07, "learning_rate": 4.613201196226501e-06, "loss": 0.111, "step": 6243 }, { "epoch": 2.07, "learning_rate": 4.610182465066082e-06, "loss": 0.1324, "step": 6244 }, { "epoch": 2.07, "learning_rate": 4.607164426010346e-06, "loss": 0.1351, "step": 6245 }, { "epoch": 2.07, "learning_rate": 4.604147079446845e-06, "loss": 0.1056, "step": 6246 }, { "epoch": 2.07, "learning_rate": 4.6011304257630314e-06, "loss": 0.1249, "step": 6247 }, { "epoch": 2.07, "learning_rate": 4.598114465346278e-06, "loss": 0.1549, "step": 6248 }, { "epoch": 2.07, "learning_rate": 4.595099198583859e-06, "loss": 0.1864, "step": 6249 }, { "epoch": 2.07, "learning_rate": 4.592084625862966e-06, "loss": 0.1086, "step": 6250 }, { "epoch": 2.07, "learning_rate": 4.5890707475706975e-06, "loss": 0.1613, "step": 6251 }, { "epoch": 2.08, "learning_rate": 4.586057564094061e-06, "loss": 0.1343, "step": 6252 }, { "epoch": 2.08, "learning_rate": 4.5830450758199865e-06, "loss": 0.1179, "step": 6253 }, { "epoch": 2.08, "learning_rate": 4.580033283135299e-06, "loss": 0.1628, "step": 6254 }, { "epoch": 2.08, "learning_rate": 4.577022186426748e-06, "loss": 0.0883, "step": 6255 }, { "epoch": 2.08, "learning_rate": 4.574011786080983e-06, "loss": 0.1275, "step": 6256 }, { "epoch": 2.08, "learning_rate": 4.571002082484577e-06, "loss": 0.1447, "step": 6257 }, { "epoch": 2.08, "learning_rate": 4.56799307602399e-06, "loss": 0.1496, "step": 6258 }, { "epoch": 2.08, "learning_rate": 4.5649847670856205e-06, "loss": 0.1102, "step": 6259 }, { "epoch": 2.08, "learning_rate": 4.5619771560557604e-06, "loss": 0.1484, "step": 6260 }, { "epoch": 2.08, "learning_rate": 4.558970243320611e-06, "loss": 0.1228, "step": 6261 }, { "epoch": 2.08, "learning_rate": 4.555964029266298e-06, "loss": 0.1735, "step": 6262 }, { "epoch": 2.08, "learning_rate": 4.552958514278839e-06, "loss": 0.1432, "step": 6263 }, { "epoch": 2.08, "learning_rate": 4.549953698744185e-06, "loss": 0.1254, "step": 6264 }, { "epoch": 2.08, "learning_rate": 4.546949583048166e-06, "loss": 0.1537, "step": 6265 }, { "epoch": 2.08, "learning_rate": 4.543946167576552e-06, "loss": 0.1144, "step": 6266 }, { "epoch": 2.08, "learning_rate": 4.540943452715002e-06, "loss": 0.1569, "step": 6267 }, { "epoch": 2.08, "learning_rate": 4.537941438849103e-06, "loss": 0.1844, "step": 6268 }, { "epoch": 2.08, "learning_rate": 4.5349401263643365e-06, "loss": 0.1711, "step": 6269 }, { "epoch": 2.08, "learning_rate": 4.531939515646102e-06, "loss": 0.1385, "step": 6270 }, { "epoch": 2.08, "learning_rate": 4.528939607079705e-06, "loss": 0.104, "step": 6271 }, { "epoch": 2.08, "learning_rate": 4.525940401050362e-06, "loss": 0.1286, "step": 6272 }, { "epoch": 2.08, "learning_rate": 4.522941897943205e-06, "loss": 0.1694, "step": 6273 }, { "epoch": 2.08, "learning_rate": 4.519944098143264e-06, "loss": 0.1554, "step": 6274 }, { "epoch": 2.08, "learning_rate": 4.516947002035493e-06, "loss": 0.1346, "step": 6275 }, { "epoch": 2.08, "learning_rate": 4.513950610004741e-06, "loss": 0.1508, "step": 6276 }, { "epoch": 2.08, "learning_rate": 4.510954922435784e-06, "loss": 0.1397, "step": 6277 }, { "epoch": 2.08, "learning_rate": 4.507959939713283e-06, "loss": 0.1168, "step": 6278 }, { "epoch": 2.08, "learning_rate": 4.504965662221835e-06, "loss": 0.1239, "step": 6279 }, { "epoch": 2.08, "learning_rate": 4.501972090345929e-06, "loss": 0.1852, "step": 6280 }, { "epoch": 2.08, "learning_rate": 4.498979224469964e-06, "loss": 0.1523, "step": 6281 }, { "epoch": 2.08, "learning_rate": 4.495987064978262e-06, "loss": 0.1029, "step": 6282 }, { "epoch": 2.09, "learning_rate": 4.492995612255038e-06, "loss": 0.1951, "step": 6283 }, { "epoch": 2.09, "learning_rate": 4.490004866684433e-06, "loss": 0.1055, "step": 6284 }, { "epoch": 2.09, "learning_rate": 4.4870148286504734e-06, "loss": 0.1438, "step": 6285 }, { "epoch": 2.09, "learning_rate": 4.484025498537121e-06, "loss": 0.1121, "step": 6286 }, { "epoch": 2.09, "learning_rate": 4.4810368767282276e-06, "loss": 0.1382, "step": 6287 }, { "epoch": 2.09, "learning_rate": 4.4780489636075666e-06, "loss": 0.1393, "step": 6288 }, { "epoch": 2.09, "learning_rate": 4.475061759558812e-06, "loss": 0.1126, "step": 6289 }, { "epoch": 2.09, "learning_rate": 4.472075264965546e-06, "loss": 0.1143, "step": 6290 }, { "epoch": 2.09, "learning_rate": 4.469089480211276e-06, "loss": 0.2153, "step": 6291 }, { "epoch": 2.09, "learning_rate": 4.466104405679389e-06, "loss": 0.1472, "step": 6292 }, { "epoch": 2.09, "learning_rate": 4.463120041753209e-06, "loss": 0.2135, "step": 6293 }, { "epoch": 2.09, "learning_rate": 4.4601363888159495e-06, "loss": 0.1397, "step": 6294 }, { "epoch": 2.09, "learning_rate": 4.4571534472507485e-06, "loss": 0.1261, "step": 6295 }, { "epoch": 2.09, "learning_rate": 4.454171217440637e-06, "loss": 0.168, "step": 6296 }, { "epoch": 2.09, "learning_rate": 4.45118969976857e-06, "loss": 0.1602, "step": 6297 }, { "epoch": 2.09, "learning_rate": 4.4482088946173976e-06, "loss": 0.1406, "step": 6298 }, { "epoch": 2.09, "learning_rate": 4.445228802369885e-06, "loss": 0.1361, "step": 6299 }, { "epoch": 2.09, "learning_rate": 4.442249423408707e-06, "loss": 0.1991, "step": 6300 }, { "epoch": 2.09, "learning_rate": 4.439270758116437e-06, "loss": 0.1481, "step": 6301 }, { "epoch": 2.09, "learning_rate": 4.436292806875575e-06, "loss": 0.1155, "step": 6302 }, { "epoch": 2.09, "learning_rate": 4.4333155700685105e-06, "loss": 0.1298, "step": 6303 }, { "epoch": 2.09, "learning_rate": 4.430339048077557e-06, "loss": 0.165, "step": 6304 }, { "epoch": 2.09, "learning_rate": 4.427363241284923e-06, "loss": 0.1153, "step": 6305 }, { "epoch": 2.09, "learning_rate": 4.424388150072734e-06, "loss": 0.1576, "step": 6306 }, { "epoch": 2.09, "learning_rate": 4.421413774823014e-06, "loss": 0.1334, "step": 6307 }, { "epoch": 2.09, "learning_rate": 4.418440115917712e-06, "loss": 0.1537, "step": 6308 }, { "epoch": 2.09, "learning_rate": 4.4154671737386675e-06, "loss": 0.1506, "step": 6309 }, { "epoch": 2.09, "learning_rate": 4.4124949486676335e-06, "loss": 0.119, "step": 6310 }, { "epoch": 2.09, "learning_rate": 4.4095234410862785e-06, "loss": 0.1649, "step": 6311 }, { "epoch": 2.09, "learning_rate": 4.406552651376171e-06, "loss": 0.1786, "step": 6312 }, { "epoch": 2.1, "learning_rate": 4.403582579918787e-06, "loss": 0.1675, "step": 6313 }, { "epoch": 2.1, "learning_rate": 4.40061322709551e-06, "loss": 0.1243, "step": 6314 }, { "epoch": 2.1, "learning_rate": 4.3976445932876426e-06, "loss": 0.0992, "step": 6315 }, { "epoch": 2.1, "learning_rate": 4.3946766788763775e-06, "loss": 0.1465, "step": 6316 }, { "epoch": 2.1, "learning_rate": 4.391709484242824e-06, "loss": 0.1403, "step": 6317 }, { "epoch": 2.1, "learning_rate": 4.388743009768005e-06, "loss": 0.1368, "step": 6318 }, { "epoch": 2.1, "learning_rate": 4.3857772558328415e-06, "loss": 0.1552, "step": 6319 }, { "epoch": 2.1, "learning_rate": 4.382812222818162e-06, "loss": 0.1329, "step": 6320 }, { "epoch": 2.1, "learning_rate": 4.379847911104707e-06, "loss": 0.1116, "step": 6321 }, { "epoch": 2.1, "learning_rate": 4.376884321073124e-06, "loss": 0.1468, "step": 6322 }, { "epoch": 2.1, "learning_rate": 4.373921453103964e-06, "loss": 0.13, "step": 6323 }, { "epoch": 2.1, "learning_rate": 4.370959307577693e-06, "loss": 0.1827, "step": 6324 }, { "epoch": 2.1, "learning_rate": 4.367997884874677e-06, "loss": 0.1382, "step": 6325 }, { "epoch": 2.1, "learning_rate": 4.365037185375188e-06, "loss": 0.1945, "step": 6326 }, { "epoch": 2.1, "learning_rate": 4.362077209459409e-06, "loss": 0.1725, "step": 6327 }, { "epoch": 2.1, "learning_rate": 4.359117957507434e-06, "loss": 0.1067, "step": 6328 }, { "epoch": 2.1, "learning_rate": 4.356159429899257e-06, "loss": 0.1818, "step": 6329 }, { "epoch": 2.1, "learning_rate": 4.353201627014778e-06, "loss": 0.1316, "step": 6330 }, { "epoch": 2.1, "learning_rate": 4.350244549233814e-06, "loss": 0.1478, "step": 6331 }, { "epoch": 2.1, "learning_rate": 4.347288196936076e-06, "loss": 0.1487, "step": 6332 }, { "epoch": 2.1, "learning_rate": 4.3443325705012e-06, "loss": 0.1004, "step": 6333 }, { "epoch": 2.1, "learning_rate": 4.3413776703087e-06, "loss": 0.104, "step": 6334 }, { "epoch": 2.1, "learning_rate": 4.338423496738024e-06, "loss": 0.1948, "step": 6335 }, { "epoch": 2.1, "learning_rate": 4.335470050168518e-06, "loss": 0.132, "step": 6336 }, { "epoch": 2.1, "learning_rate": 4.332517330979425e-06, "loss": 0.1528, "step": 6337 }, { "epoch": 2.1, "learning_rate": 4.3295653395499105e-06, "loss": 0.1385, "step": 6338 }, { "epoch": 2.1, "learning_rate": 4.326614076259033e-06, "loss": 0.1493, "step": 6339 }, { "epoch": 2.1, "learning_rate": 4.323663541485772e-06, "loss": 0.1751, "step": 6340 }, { "epoch": 2.1, "learning_rate": 4.3207137356089925e-06, "loss": 0.1643, "step": 6341 }, { "epoch": 2.1, "learning_rate": 4.317764659007487e-06, "loss": 0.1268, "step": 6342 }, { "epoch": 2.11, "learning_rate": 4.31481631205994e-06, "loss": 0.14, "step": 6343 }, { "epoch": 2.11, "learning_rate": 4.3118686951449525e-06, "loss": 0.0977, "step": 6344 }, { "epoch": 2.11, "learning_rate": 4.308921808641027e-06, "loss": 0.1139, "step": 6345 }, { "epoch": 2.11, "learning_rate": 4.3059756529265695e-06, "loss": 0.1627, "step": 6346 }, { "epoch": 2.11, "learning_rate": 4.303030228379895e-06, "loss": 0.1964, "step": 6347 }, { "epoch": 2.11, "learning_rate": 4.300085535379223e-06, "loss": 0.1558, "step": 6348 }, { "epoch": 2.11, "learning_rate": 4.297141574302685e-06, "loss": 0.1747, "step": 6349 }, { "epoch": 2.11, "learning_rate": 4.294198345528308e-06, "loss": 0.1466, "step": 6350 }, { "epoch": 2.11, "learning_rate": 4.2912558494340385e-06, "loss": 0.1774, "step": 6351 }, { "epoch": 2.11, "learning_rate": 4.288314086397715e-06, "loss": 0.1422, "step": 6352 }, { "epoch": 2.11, "learning_rate": 4.285373056797097e-06, "loss": 0.1623, "step": 6353 }, { "epoch": 2.11, "learning_rate": 4.2824327610098286e-06, "loss": 0.1465, "step": 6354 }, { "epoch": 2.11, "learning_rate": 4.279493199413482e-06, "loss": 0.1182, "step": 6355 }, { "epoch": 2.11, "learning_rate": 4.276554372385522e-06, "loss": 0.1641, "step": 6356 }, { "epoch": 2.11, "learning_rate": 4.27361628030332e-06, "loss": 0.1239, "step": 6357 }, { "epoch": 2.11, "learning_rate": 4.2706789235441606e-06, "loss": 0.1584, "step": 6358 }, { "epoch": 2.11, "learning_rate": 4.267742302485223e-06, "loss": 0.0944, "step": 6359 }, { "epoch": 2.11, "learning_rate": 4.264806417503608e-06, "loss": 0.1425, "step": 6360 }, { "epoch": 2.11, "learning_rate": 4.261871268976296e-06, "loss": 0.1655, "step": 6361 }, { "epoch": 2.11, "learning_rate": 4.258936857280201e-06, "loss": 0.1584, "step": 6362 }, { "epoch": 2.11, "learning_rate": 4.2560031827921235e-06, "loss": 0.1275, "step": 6363 }, { "epoch": 2.11, "learning_rate": 4.253070245888781e-06, "loss": 0.1546, "step": 6364 }, { "epoch": 2.11, "learning_rate": 4.2501380469467875e-06, "loss": 0.1918, "step": 6365 }, { "epoch": 2.11, "learning_rate": 4.247206586342663e-06, "loss": 0.1741, "step": 6366 }, { "epoch": 2.11, "learning_rate": 4.244275864452845e-06, "loss": 0.1327, "step": 6367 }, { "epoch": 2.11, "learning_rate": 4.241345881653653e-06, "loss": 0.1151, "step": 6368 }, { "epoch": 2.11, "learning_rate": 4.238416638321337e-06, "loss": 0.1616, "step": 6369 }, { "epoch": 2.11, "learning_rate": 4.23548813483203e-06, "loss": 0.134, "step": 6370 }, { "epoch": 2.11, "learning_rate": 4.232560371561791e-06, "loss": 0.1732, "step": 6371 }, { "epoch": 2.11, "learning_rate": 4.229633348886562e-06, "loss": 0.1301, "step": 6372 }, { "epoch": 2.12, "learning_rate": 4.226707067182211e-06, "loss": 0.2125, "step": 6373 }, { "epoch": 2.12, "learning_rate": 4.223781526824496e-06, "loss": 0.1834, "step": 6374 }, { "epoch": 2.12, "learning_rate": 4.220856728189085e-06, "loss": 0.1051, "step": 6375 }, { "epoch": 2.12, "learning_rate": 4.2179326716515525e-06, "loss": 0.1478, "step": 6376 }, { "epoch": 2.12, "learning_rate": 4.2150093575873685e-06, "loss": 0.1352, "step": 6377 }, { "epoch": 2.12, "learning_rate": 4.212086786371924e-06, "loss": 0.14, "step": 6378 }, { "epoch": 2.12, "learning_rate": 4.209164958380499e-06, "loss": 0.1503, "step": 6379 }, { "epoch": 2.12, "learning_rate": 4.206243873988288e-06, "loss": 0.0903, "step": 6380 }, { "epoch": 2.12, "learning_rate": 4.203323533570387e-06, "loss": 0.1429, "step": 6381 }, { "epoch": 2.12, "learning_rate": 4.200403937501793e-06, "loss": 0.0889, "step": 6382 }, { "epoch": 2.12, "learning_rate": 4.197485086157409e-06, "loss": 0.1624, "step": 6383 }, { "epoch": 2.12, "learning_rate": 4.1945669799120506e-06, "loss": 0.1219, "step": 6384 }, { "epoch": 2.12, "learning_rate": 4.191649619140426e-06, "loss": 0.0951, "step": 6385 }, { "epoch": 2.12, "learning_rate": 4.188733004217149e-06, "loss": 0.1159, "step": 6386 }, { "epoch": 2.12, "learning_rate": 4.18581713551675e-06, "loss": 0.1447, "step": 6387 }, { "epoch": 2.12, "learning_rate": 4.182902013413649e-06, "loss": 0.1118, "step": 6388 }, { "epoch": 2.12, "learning_rate": 4.179987638282178e-06, "loss": 0.1034, "step": 6389 }, { "epoch": 2.12, "learning_rate": 4.177074010496566e-06, "loss": 0.1463, "step": 6390 }, { "epoch": 2.12, "learning_rate": 4.1741611304309585e-06, "loss": 0.1637, "step": 6391 }, { "epoch": 2.12, "learning_rate": 4.171248998459389e-06, "loss": 0.1321, "step": 6392 }, { "epoch": 2.12, "learning_rate": 4.168337614955813e-06, "loss": 0.0941, "step": 6393 }, { "epoch": 2.12, "learning_rate": 4.165426980294076e-06, "loss": 0.1429, "step": 6394 }, { "epoch": 2.12, "learning_rate": 4.162517094847931e-06, "loss": 0.1616, "step": 6395 }, { "epoch": 2.12, "learning_rate": 4.159607958991035e-06, "loss": 0.1295, "step": 6396 }, { "epoch": 2.12, "learning_rate": 4.156699573096946e-06, "loss": 0.1004, "step": 6397 }, { "epoch": 2.12, "learning_rate": 4.153791937539136e-06, "loss": 0.1594, "step": 6398 }, { "epoch": 2.12, "learning_rate": 4.150885052690968e-06, "loss": 0.1751, "step": 6399 }, { "epoch": 2.12, "learning_rate": 4.147978918925719e-06, "loss": 0.1271, "step": 6400 }, { "epoch": 2.12, "learning_rate": 4.145073536616562e-06, "loss": 0.1856, "step": 6401 }, { "epoch": 2.12, "learning_rate": 4.142168906136575e-06, "loss": 0.1142, "step": 6402 }, { "epoch": 2.13, "learning_rate": 4.1392650278587396e-06, "loss": 0.1323, "step": 6403 }, { "epoch": 2.13, "learning_rate": 4.136361902155947e-06, "loss": 0.1708, "step": 6404 }, { "epoch": 2.13, "learning_rate": 4.133459529400985e-06, "loss": 0.1925, "step": 6405 }, { "epoch": 2.13, "learning_rate": 4.13055790996654e-06, "loss": 0.1412, "step": 6406 }, { "epoch": 2.13, "learning_rate": 4.1276570442252175e-06, "loss": 0.0745, "step": 6407 }, { "epoch": 2.13, "learning_rate": 4.124756932549508e-06, "loss": 0.0884, "step": 6408 }, { "epoch": 2.13, "learning_rate": 4.121857575311825e-06, "loss": 0.1625, "step": 6409 }, { "epoch": 2.13, "learning_rate": 4.1189589728844605e-06, "loss": 0.1812, "step": 6410 }, { "epoch": 2.13, "learning_rate": 4.116061125639633e-06, "loss": 0.0917, "step": 6411 }, { "epoch": 2.13, "learning_rate": 4.11316403394945e-06, "loss": 0.1143, "step": 6412 }, { "epoch": 2.13, "learning_rate": 4.110267698185924e-06, "loss": 0.1069, "step": 6413 }, { "epoch": 2.13, "learning_rate": 4.107372118720979e-06, "loss": 0.1524, "step": 6414 }, { "epoch": 2.13, "learning_rate": 4.104477295926428e-06, "loss": 0.1838, "step": 6415 }, { "epoch": 2.13, "learning_rate": 4.101583230174005e-06, "loss": 0.2007, "step": 6416 }, { "epoch": 2.13, "learning_rate": 4.0986899218353214e-06, "loss": 0.144, "step": 6417 }, { "epoch": 2.13, "learning_rate": 4.0957973712819185e-06, "loss": 0.1342, "step": 6418 }, { "epoch": 2.13, "learning_rate": 4.092905578885218e-06, "loss": 0.1658, "step": 6419 }, { "epoch": 2.13, "learning_rate": 4.090014545016563e-06, "loss": 0.1717, "step": 6420 }, { "epoch": 2.13, "learning_rate": 4.087124270047186e-06, "loss": 0.1076, "step": 6421 }, { "epoch": 2.13, "learning_rate": 4.084234754348223e-06, "loss": 0.1401, "step": 6422 }, { "epoch": 2.13, "learning_rate": 4.081345998290726e-06, "loss": 0.1406, "step": 6423 }, { "epoch": 2.13, "learning_rate": 4.078458002245624e-06, "loss": 0.2033, "step": 6424 }, { "epoch": 2.13, "learning_rate": 4.075570766583777e-06, "loss": 0.1121, "step": 6425 }, { "epoch": 2.13, "learning_rate": 4.0726842916759245e-06, "loss": 0.1836, "step": 6426 }, { "epoch": 2.13, "learning_rate": 4.069798577892726e-06, "loss": 0.1639, "step": 6427 }, { "epoch": 2.13, "learning_rate": 4.066913625604728e-06, "loss": 0.1249, "step": 6428 }, { "epoch": 2.13, "learning_rate": 4.064029435182396e-06, "loss": 0.1864, "step": 6429 }, { "epoch": 2.13, "learning_rate": 4.061146006996075e-06, "loss": 0.1387, "step": 6430 }, { "epoch": 2.13, "learning_rate": 4.058263341416034e-06, "loss": 0.1627, "step": 6431 }, { "epoch": 2.13, "learning_rate": 4.055381438812433e-06, "loss": 0.1352, "step": 6432 }, { "epoch": 2.14, "learning_rate": 4.052500299555333e-06, "loss": 0.1122, "step": 6433 }, { "epoch": 2.14, "learning_rate": 4.049619924014705e-06, "loss": 0.202, "step": 6434 }, { "epoch": 2.14, "learning_rate": 4.0467403125604125e-06, "loss": 0.1343, "step": 6435 }, { "epoch": 2.14, "learning_rate": 4.043861465562234e-06, "loss": 0.1663, "step": 6436 }, { "epoch": 2.14, "learning_rate": 4.0409833833898295e-06, "loss": 0.1271, "step": 6437 }, { "epoch": 2.14, "learning_rate": 4.038106066412781e-06, "loss": 0.1024, "step": 6438 }, { "epoch": 2.14, "learning_rate": 4.0352295150005586e-06, "loss": 0.1633, "step": 6439 }, { "epoch": 2.14, "learning_rate": 4.032353729522545e-06, "loss": 0.1687, "step": 6440 }, { "epoch": 2.14, "learning_rate": 4.029478710348015e-06, "loss": 0.1255, "step": 6441 }, { "epoch": 2.14, "learning_rate": 4.026604457846146e-06, "loss": 0.1035, "step": 6442 }, { "epoch": 2.14, "learning_rate": 4.02373097238603e-06, "loss": 0.1548, "step": 6443 }, { "epoch": 2.14, "learning_rate": 4.020858254336638e-06, "loss": 0.1332, "step": 6444 }, { "epoch": 2.14, "learning_rate": 4.017986304066863e-06, "loss": 0.1743, "step": 6445 }, { "epoch": 2.14, "learning_rate": 4.015115121945485e-06, "loss": 0.1176, "step": 6446 }, { "epoch": 2.14, "learning_rate": 4.0122447083411996e-06, "loss": 0.1452, "step": 6447 }, { "epoch": 2.14, "learning_rate": 4.009375063622588e-06, "loss": 0.138, "step": 6448 }, { "epoch": 2.14, "learning_rate": 4.006506188158146e-06, "loss": 0.1405, "step": 6449 }, { "epoch": 2.14, "learning_rate": 4.003638082316263e-06, "loss": 0.1277, "step": 6450 }, { "epoch": 2.14, "learning_rate": 4.0007707464652314e-06, "loss": 0.1711, "step": 6451 }, { "epoch": 2.14, "learning_rate": 3.997904180973244e-06, "loss": 0.1089, "step": 6452 }, { "epoch": 2.14, "learning_rate": 3.995038386208393e-06, "loss": 0.1258, "step": 6453 }, { "epoch": 2.14, "learning_rate": 3.99217336253868e-06, "loss": 0.1328, "step": 6454 }, { "epoch": 2.14, "learning_rate": 3.989309110331996e-06, "loss": 0.1365, "step": 6455 }, { "epoch": 2.14, "learning_rate": 3.986445629956145e-06, "loss": 0.1229, "step": 6456 }, { "epoch": 2.14, "learning_rate": 3.983582921778821e-06, "loss": 0.12, "step": 6457 }, { "epoch": 2.14, "learning_rate": 3.9807209861676265e-06, "loss": 0.1923, "step": 6458 }, { "epoch": 2.14, "learning_rate": 3.977859823490055e-06, "loss": 0.1275, "step": 6459 }, { "epoch": 2.14, "learning_rate": 3.974999434113517e-06, "loss": 0.1404, "step": 6460 }, { "epoch": 2.14, "learning_rate": 3.9721398184053085e-06, "loss": 0.1274, "step": 6461 }, { "epoch": 2.14, "learning_rate": 3.969280976732629e-06, "loss": 0.1276, "step": 6462 }, { "epoch": 2.15, "learning_rate": 3.966422909462589e-06, "loss": 0.1292, "step": 6463 }, { "epoch": 2.15, "learning_rate": 3.963565616962188e-06, "loss": 0.1429, "step": 6464 }, { "epoch": 2.15, "learning_rate": 3.960709099598331e-06, "loss": 0.1461, "step": 6465 }, { "epoch": 2.15, "learning_rate": 3.957853357737817e-06, "loss": 0.1467, "step": 6466 }, { "epoch": 2.15, "learning_rate": 3.954998391747359e-06, "loss": 0.2008, "step": 6467 }, { "epoch": 2.15, "learning_rate": 3.952144201993555e-06, "loss": 0.1338, "step": 6468 }, { "epoch": 2.15, "learning_rate": 3.9492907888429186e-06, "loss": 0.1474, "step": 6469 }, { "epoch": 2.15, "learning_rate": 3.9464381526618516e-06, "loss": 0.1188, "step": 6470 }, { "epoch": 2.15, "learning_rate": 3.943586293816658e-06, "loss": 0.1623, "step": 6471 }, { "epoch": 2.15, "learning_rate": 3.940735212673547e-06, "loss": 0.1556, "step": 6472 }, { "epoch": 2.15, "learning_rate": 3.93788490959862e-06, "loss": 0.1855, "step": 6473 }, { "epoch": 2.15, "learning_rate": 3.93503538495789e-06, "loss": 0.1484, "step": 6474 }, { "epoch": 2.15, "learning_rate": 3.932186639117258e-06, "loss": 0.1783, "step": 6475 }, { "epoch": 2.15, "learning_rate": 3.929338672442536e-06, "loss": 0.1253, "step": 6476 }, { "epoch": 2.15, "learning_rate": 3.926491485299429e-06, "loss": 0.1195, "step": 6477 }, { "epoch": 2.15, "learning_rate": 3.92364507805354e-06, "loss": 0.1201, "step": 6478 }, { "epoch": 2.15, "learning_rate": 3.920799451070376e-06, "loss": 0.1688, "step": 6479 }, { "epoch": 2.15, "learning_rate": 3.917954604715347e-06, "loss": 0.1482, "step": 6480 }, { "epoch": 2.15, "learning_rate": 3.915110539353756e-06, "loss": 0.1022, "step": 6481 }, { "epoch": 2.15, "learning_rate": 3.912267255350805e-06, "loss": 0.1016, "step": 6482 }, { "epoch": 2.15, "learning_rate": 3.909424753071607e-06, "loss": 0.183, "step": 6483 }, { "epoch": 2.15, "learning_rate": 3.906583032881157e-06, "loss": 0.1372, "step": 6484 }, { "epoch": 2.15, "learning_rate": 3.903742095144374e-06, "loss": 0.1553, "step": 6485 }, { "epoch": 2.15, "learning_rate": 3.900901940226045e-06, "loss": 0.1572, "step": 6486 }, { "epoch": 2.15, "learning_rate": 3.898062568490885e-06, "loss": 0.1584, "step": 6487 }, { "epoch": 2.15, "learning_rate": 3.895223980303492e-06, "loss": 0.164, "step": 6488 }, { "epoch": 2.15, "learning_rate": 3.8923861760283655e-06, "loss": 0.1697, "step": 6489 }, { "epoch": 2.15, "learning_rate": 3.8895491560299145e-06, "loss": 0.2128, "step": 6490 }, { "epoch": 2.15, "learning_rate": 3.886712920672433e-06, "loss": 0.1251, "step": 6491 }, { "epoch": 2.15, "learning_rate": 3.88387747032013e-06, "loss": 0.1921, "step": 6492 }, { "epoch": 2.15, "learning_rate": 3.881042805337092e-06, "loss": 0.1341, "step": 6493 }, { "epoch": 2.16, "learning_rate": 3.878208926087328e-06, "loss": 0.1207, "step": 6494 }, { "epoch": 2.16, "learning_rate": 3.875375832934727e-06, "loss": 0.1653, "step": 6495 }, { "epoch": 2.16, "learning_rate": 3.8725435262430945e-06, "loss": 0.1934, "step": 6496 }, { "epoch": 2.16, "learning_rate": 3.869712006376121e-06, "loss": 0.1386, "step": 6497 }, { "epoch": 2.16, "learning_rate": 3.866881273697398e-06, "loss": 0.1639, "step": 6498 }, { "epoch": 2.16, "learning_rate": 3.8640513285704306e-06, "loss": 0.1789, "step": 6499 }, { "epoch": 2.16, "learning_rate": 3.861222171358596e-06, "loss": 0.131, "step": 6500 }, { "epoch": 2.16, "learning_rate": 3.858393802425196e-06, "loss": 0.135, "step": 6501 }, { "epoch": 2.16, "learning_rate": 3.855566222133412e-06, "loss": 0.1652, "step": 6502 }, { "epoch": 2.16, "learning_rate": 3.852739430846343e-06, "loss": 0.1257, "step": 6503 }, { "epoch": 2.16, "learning_rate": 3.849913428926967e-06, "loss": 0.1466, "step": 6504 }, { "epoch": 2.16, "learning_rate": 3.847088216738177e-06, "loss": 0.1796, "step": 6505 }, { "epoch": 2.16, "learning_rate": 3.844263794642755e-06, "loss": 0.1468, "step": 6506 }, { "epoch": 2.16, "learning_rate": 3.841440163003383e-06, "loss": 0.1539, "step": 6507 }, { "epoch": 2.16, "learning_rate": 3.838617322182644e-06, "loss": 0.0979, "step": 6508 }, { "epoch": 2.16, "learning_rate": 3.835795272543014e-06, "loss": 0.1464, "step": 6509 }, { "epoch": 2.16, "learning_rate": 3.832974014446877e-06, "loss": 0.1884, "step": 6510 }, { "epoch": 2.16, "learning_rate": 3.830153548256505e-06, "loss": 0.1848, "step": 6511 }, { "epoch": 2.16, "learning_rate": 3.827333874334084e-06, "loss": 0.1123, "step": 6512 }, { "epoch": 2.16, "learning_rate": 3.824514993041671e-06, "loss": 0.1855, "step": 6513 }, { "epoch": 2.16, "learning_rate": 3.821696904741248e-06, "loss": 0.1362, "step": 6514 }, { "epoch": 2.16, "learning_rate": 3.81887960979468e-06, "loss": 0.1382, "step": 6515 }, { "epoch": 2.16, "learning_rate": 3.816063108563741e-06, "loss": 0.1036, "step": 6516 }, { "epoch": 2.16, "learning_rate": 3.8132474014100928e-06, "loss": 0.1147, "step": 6517 }, { "epoch": 2.16, "learning_rate": 3.810432488695297e-06, "loss": 0.112, "step": 6518 }, { "epoch": 2.16, "learning_rate": 3.807618370780827e-06, "loss": 0.1028, "step": 6519 }, { "epoch": 2.16, "learning_rate": 3.804805048028025e-06, "loss": 0.147, "step": 6520 }, { "epoch": 2.16, "learning_rate": 3.8019925207981635e-06, "loss": 0.1483, "step": 6521 }, { "epoch": 2.16, "learning_rate": 3.799180789452389e-06, "loss": 0.162, "step": 6522 }, { "epoch": 2.16, "learning_rate": 3.7963698543517625e-06, "loss": 0.1541, "step": 6523 }, { "epoch": 2.17, "learning_rate": 3.7935597158572266e-06, "loss": 0.1408, "step": 6524 }, { "epoch": 2.17, "learning_rate": 3.79075037432964e-06, "loss": 0.099, "step": 6525 }, { "epoch": 2.17, "learning_rate": 3.7879418301297456e-06, "loss": 0.1377, "step": 6526 }, { "epoch": 2.17, "learning_rate": 3.785134083618186e-06, "loss": 0.1459, "step": 6527 }, { "epoch": 2.17, "learning_rate": 3.7823271351555034e-06, "loss": 0.1935, "step": 6528 }, { "epoch": 2.17, "learning_rate": 3.779520985102134e-06, "loss": 0.1841, "step": 6529 }, { "epoch": 2.17, "learning_rate": 3.776715633818422e-06, "loss": 0.157, "step": 6530 }, { "epoch": 2.17, "learning_rate": 3.773911081664594e-06, "loss": 0.1537, "step": 6531 }, { "epoch": 2.17, "learning_rate": 3.771107329000788e-06, "loss": 0.134, "step": 6532 }, { "epoch": 2.17, "learning_rate": 3.768304376187031e-06, "loss": 0.1194, "step": 6533 }, { "epoch": 2.17, "learning_rate": 3.765502223583248e-06, "loss": 0.1044, "step": 6534 }, { "epoch": 2.17, "learning_rate": 3.7627008715492597e-06, "loss": 0.1437, "step": 6535 }, { "epoch": 2.17, "learning_rate": 3.7599003204447928e-06, "loss": 0.1271, "step": 6536 }, { "epoch": 2.17, "learning_rate": 3.757100570629463e-06, "loss": 0.189, "step": 6537 }, { "epoch": 2.17, "learning_rate": 3.7543016224627815e-06, "loss": 0.1435, "step": 6538 }, { "epoch": 2.17, "learning_rate": 3.751503476304167e-06, "loss": 0.1443, "step": 6539 }, { "epoch": 2.17, "learning_rate": 3.7487061325129238e-06, "loss": 0.1405, "step": 6540 }, { "epoch": 2.17, "learning_rate": 3.74590959144826e-06, "loss": 0.1215, "step": 6541 }, { "epoch": 2.17, "learning_rate": 3.7431138534692736e-06, "loss": 0.1738, "step": 6542 }, { "epoch": 2.17, "learning_rate": 3.740318918934972e-06, "loss": 0.0888, "step": 6543 }, { "epoch": 2.17, "learning_rate": 3.737524788204244e-06, "loss": 0.0993, "step": 6544 }, { "epoch": 2.17, "learning_rate": 3.734731461635892e-06, "loss": 0.0928, "step": 6545 }, { "epoch": 2.17, "learning_rate": 3.7319389395886008e-06, "loss": 0.1078, "step": 6546 }, { "epoch": 2.17, "learning_rate": 3.7291472224209567e-06, "loss": 0.1295, "step": 6547 }, { "epoch": 2.17, "learning_rate": 3.7263563104914457e-06, "loss": 0.1653, "step": 6548 }, { "epoch": 2.17, "learning_rate": 3.7235662041584418e-06, "loss": 0.1131, "step": 6549 }, { "epoch": 2.17, "learning_rate": 3.720776903780231e-06, "loss": 0.101, "step": 6550 }, { "epoch": 2.17, "learning_rate": 3.7179884097149767e-06, "loss": 0.1199, "step": 6551 }, { "epoch": 2.17, "learning_rate": 3.7152007223207577e-06, "loss": 0.1357, "step": 6552 }, { "epoch": 2.17, "learning_rate": 3.7124138419555357e-06, "loss": 0.167, "step": 6553 }, { "epoch": 2.18, "learning_rate": 3.709627768977173e-06, "loss": 0.1426, "step": 6554 }, { "epoch": 2.18, "learning_rate": 3.706842503743425e-06, "loss": 0.1178, "step": 6555 }, { "epoch": 2.18, "learning_rate": 3.7040580466119545e-06, "loss": 0.1065, "step": 6556 }, { "epoch": 2.18, "learning_rate": 3.7012743979403076e-06, "loss": 0.1492, "step": 6557 }, { "epoch": 2.18, "learning_rate": 3.6984915580859293e-06, "loss": 0.1624, "step": 6558 }, { "epoch": 2.18, "learning_rate": 3.6957095274061692e-06, "loss": 0.1425, "step": 6559 }, { "epoch": 2.18, "learning_rate": 3.692928306258261e-06, "loss": 0.1119, "step": 6560 }, { "epoch": 2.18, "learning_rate": 3.6901478949993506e-06, "loss": 0.1522, "step": 6561 }, { "epoch": 2.18, "learning_rate": 3.6873682939864564e-06, "loss": 0.1376, "step": 6562 }, { "epoch": 2.18, "learning_rate": 3.6845895035765143e-06, "loss": 0.1269, "step": 6563 }, { "epoch": 2.18, "learning_rate": 3.6818115241263463e-06, "loss": 0.1846, "step": 6564 }, { "epoch": 2.18, "learning_rate": 3.6790343559926676e-06, "loss": 0.1867, "step": 6565 }, { "epoch": 2.18, "learning_rate": 3.6762579995321002e-06, "loss": 0.1058, "step": 6566 }, { "epoch": 2.18, "learning_rate": 3.6734824551011493e-06, "loss": 0.0962, "step": 6567 }, { "epoch": 2.18, "learning_rate": 3.6707077230562316e-06, "loss": 0.1454, "step": 6568 }, { "epoch": 2.18, "learning_rate": 3.667933803753635e-06, "loss": 0.139, "step": 6569 }, { "epoch": 2.18, "learning_rate": 3.665160697549568e-06, "loss": 0.1348, "step": 6570 }, { "epoch": 2.18, "learning_rate": 3.662388404800118e-06, "loss": 0.1209, "step": 6571 }, { "epoch": 2.18, "learning_rate": 3.65961692586128e-06, "loss": 0.1265, "step": 6572 }, { "epoch": 2.18, "learning_rate": 3.6568462610889366e-06, "loss": 0.1378, "step": 6573 }, { "epoch": 2.18, "learning_rate": 3.654076410838864e-06, "loss": 0.1799, "step": 6574 }, { "epoch": 2.18, "learning_rate": 3.651307375466745e-06, "loss": 0.1294, "step": 6575 }, { "epoch": 2.18, "learning_rate": 3.648539155328146e-06, "loss": 0.1325, "step": 6576 }, { "epoch": 2.18, "learning_rate": 3.645771750778534e-06, "loss": 0.1015, "step": 6577 }, { "epoch": 2.18, "learning_rate": 3.6430051621732665e-06, "loss": 0.1405, "step": 6578 }, { "epoch": 2.18, "learning_rate": 3.640239389867608e-06, "loss": 0.1302, "step": 6579 }, { "epoch": 2.18, "learning_rate": 3.6374744342167033e-06, "loss": 0.1217, "step": 6580 }, { "epoch": 2.18, "learning_rate": 3.6347102955756053e-06, "loss": 0.1503, "step": 6581 }, { "epoch": 2.18, "learning_rate": 3.631946974299254e-06, "loss": 0.1202, "step": 6582 }, { "epoch": 2.18, "learning_rate": 3.6291844707424863e-06, "loss": 0.0803, "step": 6583 }, { "epoch": 2.19, "learning_rate": 3.626422785260034e-06, "loss": 0.1316, "step": 6584 }, { "epoch": 2.19, "learning_rate": 3.6236619182065205e-06, "loss": 0.134, "step": 6585 }, { "epoch": 2.19, "learning_rate": 3.6209018699364752e-06, "loss": 0.1583, "step": 6586 }, { "epoch": 2.19, "learning_rate": 3.6181426408043076e-06, "loss": 0.1632, "step": 6587 }, { "epoch": 2.19, "learning_rate": 3.6153842311643407e-06, "loss": 0.1096, "step": 6588 }, { "epoch": 2.19, "learning_rate": 3.6126266413707666e-06, "loss": 0.1578, "step": 6589 }, { "epoch": 2.19, "learning_rate": 3.609869871777697e-06, "loss": 0.1752, "step": 6590 }, { "epoch": 2.19, "learning_rate": 3.6071139227391206e-06, "loss": 0.1404, "step": 6591 }, { "epoch": 2.19, "learning_rate": 3.604358794608934e-06, "loss": 0.1523, "step": 6592 }, { "epoch": 2.19, "learning_rate": 3.6016044877409207e-06, "loss": 0.1072, "step": 6593 }, { "epoch": 2.19, "learning_rate": 3.5988510024887557e-06, "loss": 0.139, "step": 6594 }, { "epoch": 2.19, "learning_rate": 3.5960983392060244e-06, "loss": 0.1293, "step": 6595 }, { "epoch": 2.19, "learning_rate": 3.5933464982461807e-06, "loss": 0.1639, "step": 6596 }, { "epoch": 2.19, "learning_rate": 3.5905954799625977e-06, "loss": 0.1492, "step": 6597 }, { "epoch": 2.19, "learning_rate": 3.5878452847085267e-06, "loss": 0.1818, "step": 6598 }, { "epoch": 2.19, "learning_rate": 3.5850959128371254e-06, "loss": 0.0652, "step": 6599 }, { "epoch": 2.19, "learning_rate": 3.582347364701433e-06, "loss": 0.1285, "step": 6600 }, { "epoch": 2.19, "learning_rate": 3.5795996406543977e-06, "loss": 0.1355, "step": 6601 }, { "epoch": 2.19, "learning_rate": 3.5768527410488484e-06, "loss": 0.1715, "step": 6602 }, { "epoch": 2.19, "learning_rate": 3.5741066662375157e-06, "loss": 0.154, "step": 6603 }, { "epoch": 2.19, "learning_rate": 3.5713614165730216e-06, "loss": 0.1286, "step": 6604 }, { "epoch": 2.19, "learning_rate": 3.568616992407877e-06, "loss": 0.1313, "step": 6605 }, { "epoch": 2.19, "learning_rate": 3.5658733940945033e-06, "loss": 0.0933, "step": 6606 }, { "epoch": 2.19, "learning_rate": 3.563130621985195e-06, "loss": 0.1804, "step": 6607 }, { "epoch": 2.19, "learning_rate": 3.560388676432158e-06, "loss": 0.1582, "step": 6608 }, { "epoch": 2.19, "learning_rate": 3.5576475577874836e-06, "loss": 0.1219, "step": 6609 }, { "epoch": 2.19, "learning_rate": 3.5549072664031546e-06, "loss": 0.1284, "step": 6610 }, { "epoch": 2.19, "learning_rate": 3.5521678026310492e-06, "loss": 0.135, "step": 6611 }, { "epoch": 2.19, "learning_rate": 3.5494291668229475e-06, "loss": 0.1328, "step": 6612 }, { "epoch": 2.19, "learning_rate": 3.546691359330513e-06, "loss": 0.1362, "step": 6613 }, { "epoch": 2.2, "learning_rate": 3.5439543805053047e-06, "loss": 0.1208, "step": 6614 }, { "epoch": 2.2, "learning_rate": 3.541218230698783e-06, "loss": 0.1611, "step": 6615 }, { "epoch": 2.2, "learning_rate": 3.538482910262292e-06, "loss": 0.1579, "step": 6616 }, { "epoch": 2.2, "learning_rate": 3.535748419547074e-06, "loss": 0.1478, "step": 6617 }, { "epoch": 2.2, "learning_rate": 3.533014758904261e-06, "loss": 0.1017, "step": 6618 }, { "epoch": 2.2, "learning_rate": 3.5302819286848875e-06, "loss": 0.1785, "step": 6619 }, { "epoch": 2.2, "learning_rate": 3.5275499292398686e-06, "loss": 0.1957, "step": 6620 }, { "epoch": 2.2, "learning_rate": 3.524818760920027e-06, "loss": 0.1777, "step": 6621 }, { "epoch": 2.2, "learning_rate": 3.5220884240760666e-06, "loss": 0.0956, "step": 6622 }, { "epoch": 2.2, "learning_rate": 3.5193589190585896e-06, "loss": 0.1519, "step": 6623 }, { "epoch": 2.2, "learning_rate": 3.5166302462180912e-06, "loss": 0.1369, "step": 6624 }, { "epoch": 2.2, "learning_rate": 3.5139024059049565e-06, "loss": 0.1335, "step": 6625 }, { "epoch": 2.2, "learning_rate": 3.5111753984694717e-06, "loss": 0.1168, "step": 6626 }, { "epoch": 2.2, "learning_rate": 3.508449224261805e-06, "loss": 0.1199, "step": 6627 }, { "epoch": 2.2, "learning_rate": 3.5057238836320294e-06, "loss": 0.1947, "step": 6628 }, { "epoch": 2.2, "learning_rate": 3.502999376930104e-06, "loss": 0.1333, "step": 6629 }, { "epoch": 2.2, "learning_rate": 3.50027570450588e-06, "loss": 0.0882, "step": 6630 }, { "epoch": 2.2, "learning_rate": 3.4975528667090997e-06, "loss": 0.1076, "step": 6631 }, { "epoch": 2.2, "learning_rate": 3.494830863889409e-06, "loss": 0.1254, "step": 6632 }, { "epoch": 2.2, "learning_rate": 3.492109696396335e-06, "loss": 0.1032, "step": 6633 }, { "epoch": 2.2, "learning_rate": 3.489389364579302e-06, "loss": 0.1436, "step": 6634 }, { "epoch": 2.2, "learning_rate": 3.48666986878763e-06, "loss": 0.1058, "step": 6635 }, { "epoch": 2.2, "learning_rate": 3.483951209370522e-06, "loss": 0.1171, "step": 6636 }, { "epoch": 2.2, "learning_rate": 3.481233386677093e-06, "loss": 0.1612, "step": 6637 }, { "epoch": 2.2, "learning_rate": 3.478516401056322e-06, "loss": 0.1391, "step": 6638 }, { "epoch": 2.2, "learning_rate": 3.4758002528571066e-06, "loss": 0.0632, "step": 6639 }, { "epoch": 2.2, "learning_rate": 3.4730849424282197e-06, "loss": 0.1082, "step": 6640 }, { "epoch": 2.2, "learning_rate": 3.4703704701183416e-06, "loss": 0.1155, "step": 6641 }, { "epoch": 2.2, "learning_rate": 3.467656836276031e-06, "loss": 0.1659, "step": 6642 }, { "epoch": 2.2, "learning_rate": 3.464944041249744e-06, "loss": 0.1412, "step": 6643 }, { "epoch": 2.21, "learning_rate": 3.4622320853878376e-06, "loss": 0.1201, "step": 6644 }, { "epoch": 2.21, "learning_rate": 3.459520969038541e-06, "loss": 0.097, "step": 6645 }, { "epoch": 2.21, "learning_rate": 3.4568106925499966e-06, "loss": 0.1617, "step": 6646 }, { "epoch": 2.21, "learning_rate": 3.4541012562702247e-06, "loss": 0.1691, "step": 6647 }, { "epoch": 2.21, "learning_rate": 3.4513926605471504e-06, "loss": 0.147, "step": 6648 }, { "epoch": 2.21, "learning_rate": 3.448684905728578e-06, "loss": 0.1266, "step": 6649 }, { "epoch": 2.21, "learning_rate": 3.445977992162207e-06, "loss": 0.1471, "step": 6650 }, { "epoch": 2.21, "learning_rate": 3.4432719201956387e-06, "loss": 0.1852, "step": 6651 }, { "epoch": 2.21, "learning_rate": 3.440566690176356e-06, "loss": 0.1179, "step": 6652 }, { "epoch": 2.21, "learning_rate": 3.4378623024517345e-06, "loss": 0.1523, "step": 6653 }, { "epoch": 2.21, "learning_rate": 3.4351587573690416e-06, "loss": 0.1456, "step": 6654 }, { "epoch": 2.21, "learning_rate": 3.432456055275445e-06, "loss": 0.1304, "step": 6655 }, { "epoch": 2.21, "learning_rate": 3.4297541965179916e-06, "loss": 0.1807, "step": 6656 }, { "epoch": 2.21, "learning_rate": 3.4270531814436327e-06, "loss": 0.1647, "step": 6657 }, { "epoch": 2.21, "learning_rate": 3.4243530103992005e-06, "loss": 0.0937, "step": 6658 }, { "epoch": 2.21, "learning_rate": 3.4216536837314252e-06, "loss": 0.1636, "step": 6659 }, { "epoch": 2.21, "learning_rate": 3.4189552017869243e-06, "loss": 0.1586, "step": 6660 }, { "epoch": 2.21, "learning_rate": 3.4162575649122075e-06, "loss": 0.1264, "step": 6661 }, { "epoch": 2.21, "learning_rate": 3.4135607734536824e-06, "loss": 0.1104, "step": 6662 }, { "epoch": 2.21, "learning_rate": 3.4108648277576373e-06, "loss": 0.101, "step": 6663 }, { "epoch": 2.21, "learning_rate": 3.4081697281702643e-06, "loss": 0.1752, "step": 6664 }, { "epoch": 2.21, "learning_rate": 3.405475475037636e-06, "loss": 0.1136, "step": 6665 }, { "epoch": 2.21, "learning_rate": 3.4027820687057225e-06, "loss": 0.1512, "step": 6666 }, { "epoch": 2.21, "learning_rate": 3.400089509520378e-06, "loss": 0.1273, "step": 6667 }, { "epoch": 2.21, "learning_rate": 3.39739779782736e-06, "loss": 0.1938, "step": 6668 }, { "epoch": 2.21, "learning_rate": 3.394706933972308e-06, "loss": 0.1762, "step": 6669 }, { "epoch": 2.21, "learning_rate": 3.3920169183007522e-06, "loss": 0.1474, "step": 6670 }, { "epoch": 2.21, "learning_rate": 3.389327751158125e-06, "loss": 0.1581, "step": 6671 }, { "epoch": 2.21, "learning_rate": 3.386639432889728e-06, "loss": 0.1475, "step": 6672 }, { "epoch": 2.21, "learning_rate": 3.383951963840778e-06, "loss": 0.1162, "step": 6673 }, { "epoch": 2.22, "learning_rate": 3.381265344356366e-06, "loss": 0.1348, "step": 6674 }, { "epoch": 2.22, "learning_rate": 3.3785795747814853e-06, "loss": 0.1277, "step": 6675 }, { "epoch": 2.22, "learning_rate": 3.3758946554610095e-06, "loss": 0.168, "step": 6676 }, { "epoch": 2.22, "learning_rate": 3.3732105867397137e-06, "loss": 0.0878, "step": 6677 }, { "epoch": 2.22, "learning_rate": 3.3705273689622543e-06, "loss": 0.1237, "step": 6678 }, { "epoch": 2.22, "learning_rate": 3.3678450024731845e-06, "loss": 0.1081, "step": 6679 }, { "epoch": 2.22, "learning_rate": 3.365163487616944e-06, "loss": 0.1202, "step": 6680 }, { "epoch": 2.22, "learning_rate": 3.3624828247378626e-06, "loss": 0.1895, "step": 6681 }, { "epoch": 2.22, "learning_rate": 3.359803014180171e-06, "loss": 0.1884, "step": 6682 }, { "epoch": 2.22, "learning_rate": 3.3571240562879747e-06, "loss": 0.1412, "step": 6683 }, { "epoch": 2.22, "learning_rate": 3.354445951405286e-06, "loss": 0.1113, "step": 6684 }, { "epoch": 2.22, "learning_rate": 3.3517686998759946e-06, "loss": 0.171, "step": 6685 }, { "epoch": 2.22, "learning_rate": 3.349092302043886e-06, "loss": 0.1371, "step": 6686 }, { "epoch": 2.22, "learning_rate": 3.346416758252633e-06, "loss": 0.1733, "step": 6687 }, { "epoch": 2.22, "learning_rate": 3.343742068845807e-06, "loss": 0.1252, "step": 6688 }, { "epoch": 2.22, "learning_rate": 3.34106823416686e-06, "loss": 0.1615, "step": 6689 }, { "epoch": 2.22, "learning_rate": 3.338395254559137e-06, "loss": 0.1595, "step": 6690 }, { "epoch": 2.22, "learning_rate": 3.33572313036588e-06, "loss": 0.1142, "step": 6691 }, { "epoch": 2.22, "learning_rate": 3.333051861930211e-06, "loss": 0.167, "step": 6692 }, { "epoch": 2.22, "learning_rate": 3.3303814495951493e-06, "loss": 0.1429, "step": 6693 }, { "epoch": 2.22, "learning_rate": 3.3277118937035957e-06, "loss": 0.1656, "step": 6694 }, { "epoch": 2.22, "learning_rate": 3.325043194598355e-06, "loss": 0.1453, "step": 6695 }, { "epoch": 2.22, "learning_rate": 3.322375352622108e-06, "loss": 0.1411, "step": 6696 }, { "epoch": 2.22, "learning_rate": 3.319708368117437e-06, "loss": 0.1377, "step": 6697 }, { "epoch": 2.22, "learning_rate": 3.3170422414268055e-06, "loss": 0.132, "step": 6698 }, { "epoch": 2.22, "learning_rate": 3.31437697289257e-06, "loss": 0.1472, "step": 6699 }, { "epoch": 2.22, "learning_rate": 3.311712562856976e-06, "loss": 0.1259, "step": 6700 }, { "epoch": 2.22, "learning_rate": 3.3090490116621574e-06, "loss": 0.1813, "step": 6701 }, { "epoch": 2.22, "learning_rate": 3.3063863196501467e-06, "loss": 0.1834, "step": 6702 }, { "epoch": 2.22, "learning_rate": 3.3037244871628504e-06, "loss": 0.1149, "step": 6703 }, { "epoch": 2.23, "learning_rate": 3.3010635145420833e-06, "loss": 0.192, "step": 6704 }, { "epoch": 2.23, "learning_rate": 3.298403402129532e-06, "loss": 0.1349, "step": 6705 }, { "epoch": 2.23, "learning_rate": 3.29574415026679e-06, "loss": 0.1296, "step": 6706 }, { "epoch": 2.23, "learning_rate": 3.2930857592953167e-06, "loss": 0.1424, "step": 6707 }, { "epoch": 2.23, "learning_rate": 3.290428229556488e-06, "loss": 0.1201, "step": 6708 }, { "epoch": 2.23, "learning_rate": 3.2877715613915517e-06, "loss": 0.2085, "step": 6709 }, { "epoch": 2.23, "learning_rate": 3.285115755141647e-06, "loss": 0.1224, "step": 6710 }, { "epoch": 2.23, "learning_rate": 3.282460811147811e-06, "loss": 0.1457, "step": 6711 }, { "epoch": 2.23, "learning_rate": 3.279806729750956e-06, "loss": 0.1366, "step": 6712 }, { "epoch": 2.23, "learning_rate": 3.277153511291905e-06, "loss": 0.1288, "step": 6713 }, { "epoch": 2.23, "learning_rate": 3.2745011561113418e-06, "loss": 0.1933, "step": 6714 }, { "epoch": 2.23, "learning_rate": 3.2718496645498645e-06, "loss": 0.1339, "step": 6715 }, { "epoch": 2.23, "learning_rate": 3.2691990369479432e-06, "loss": 0.1861, "step": 6716 }, { "epoch": 2.23, "learning_rate": 3.2665492736459514e-06, "loss": 0.1421, "step": 6717 }, { "epoch": 2.23, "learning_rate": 3.26390037498414e-06, "loss": 0.1378, "step": 6718 }, { "epoch": 2.23, "learning_rate": 3.261252341302652e-06, "loss": 0.1652, "step": 6719 }, { "epoch": 2.23, "learning_rate": 3.258605172941529e-06, "loss": 0.0788, "step": 6720 }, { "epoch": 2.23, "learning_rate": 3.2559588702406797e-06, "loss": 0.1316, "step": 6721 }, { "epoch": 2.23, "learning_rate": 3.2533134335399252e-06, "loss": 0.0948, "step": 6722 }, { "epoch": 2.23, "learning_rate": 3.2506688631789585e-06, "loss": 0.1347, "step": 6723 }, { "epoch": 2.23, "learning_rate": 3.2480251594973756e-06, "loss": 0.0865, "step": 6724 }, { "epoch": 2.23, "learning_rate": 3.245382322834648e-06, "loss": 0.1677, "step": 6725 }, { "epoch": 2.23, "learning_rate": 3.2427403535301406e-06, "loss": 0.1161, "step": 6726 }, { "epoch": 2.23, "learning_rate": 3.2400992519231122e-06, "loss": 0.1367, "step": 6727 }, { "epoch": 2.23, "learning_rate": 3.2374590183527042e-06, "loss": 0.1969, "step": 6728 }, { "epoch": 2.23, "learning_rate": 3.2348196531579466e-06, "loss": 0.1415, "step": 6729 }, { "epoch": 2.23, "learning_rate": 3.2321811566777585e-06, "loss": 0.1418, "step": 6730 }, { "epoch": 2.23, "learning_rate": 3.2295435292509526e-06, "loss": 0.0931, "step": 6731 }, { "epoch": 2.23, "learning_rate": 3.2269067712162205e-06, "loss": 0.1125, "step": 6732 }, { "epoch": 2.23, "learning_rate": 3.2242708829121527e-06, "loss": 0.1143, "step": 6733 }, { "epoch": 2.23, "learning_rate": 3.2216358646772218e-06, "loss": 0.0869, "step": 6734 }, { "epoch": 2.24, "learning_rate": 3.2190017168497865e-06, "loss": 0.1345, "step": 6735 }, { "epoch": 2.24, "learning_rate": 3.2163684397680994e-06, "loss": 0.1177, "step": 6736 }, { "epoch": 2.24, "learning_rate": 3.213736033770295e-06, "loss": 0.1125, "step": 6737 }, { "epoch": 2.24, "learning_rate": 3.211104499194405e-06, "loss": 0.098, "step": 6738 }, { "epoch": 2.24, "learning_rate": 3.2084738363783385e-06, "loss": 0.1219, "step": 6739 }, { "epoch": 2.24, "learning_rate": 3.2058440456599038e-06, "loss": 0.1614, "step": 6740 }, { "epoch": 2.24, "learning_rate": 3.2032151273767886e-06, "loss": 0.1429, "step": 6741 }, { "epoch": 2.24, "learning_rate": 3.200587081866572e-06, "loss": 0.1571, "step": 6742 }, { "epoch": 2.24, "learning_rate": 3.1979599094667167e-06, "loss": 0.1416, "step": 6743 }, { "epoch": 2.24, "learning_rate": 3.1953336105145826e-06, "loss": 0.1267, "step": 6744 }, { "epoch": 2.24, "learning_rate": 3.192708185347411e-06, "loss": 0.2266, "step": 6745 }, { "epoch": 2.24, "learning_rate": 3.1900836343023254e-06, "loss": 0.1482, "step": 6746 }, { "epoch": 2.24, "learning_rate": 3.187459957716357e-06, "loss": 0.131, "step": 6747 }, { "epoch": 2.24, "learning_rate": 3.1848371559263946e-06, "loss": 0.1285, "step": 6748 }, { "epoch": 2.24, "learning_rate": 3.1822152292692433e-06, "loss": 0.0833, "step": 6749 }, { "epoch": 2.24, "learning_rate": 3.1795941780815763e-06, "loss": 0.1458, "step": 6750 }, { "epoch": 2.24, "learning_rate": 3.1769740026999696e-06, "loss": 0.1036, "step": 6751 }, { "epoch": 2.24, "learning_rate": 3.1743547034608712e-06, "loss": 0.0932, "step": 6752 }, { "epoch": 2.24, "learning_rate": 3.1717362807006326e-06, "loss": 0.1055, "step": 6753 }, { "epoch": 2.24, "learning_rate": 3.1691187347554787e-06, "loss": 0.2097, "step": 6754 }, { "epoch": 2.24, "learning_rate": 3.166502065961531e-06, "loss": 0.1469, "step": 6755 }, { "epoch": 2.24, "learning_rate": 3.1638862746547915e-06, "loss": 0.1427, "step": 6756 }, { "epoch": 2.24, "learning_rate": 3.161271361171153e-06, "loss": 0.182, "step": 6757 }, { "epoch": 2.24, "learning_rate": 3.1586573258464015e-06, "loss": 0.0978, "step": 6758 }, { "epoch": 2.24, "learning_rate": 3.156044169016197e-06, "loss": 0.1279, "step": 6759 }, { "epoch": 2.24, "learning_rate": 3.1534318910161e-06, "loss": 0.1528, "step": 6760 }, { "epoch": 2.24, "learning_rate": 3.150820492181551e-06, "loss": 0.11, "step": 6761 }, { "epoch": 2.24, "learning_rate": 3.1482099728478765e-06, "loss": 0.1407, "step": 6762 }, { "epoch": 2.24, "learning_rate": 3.1456003333502916e-06, "loss": 0.1068, "step": 6763 }, { "epoch": 2.24, "learning_rate": 3.142991574023905e-06, "loss": 0.1371, "step": 6764 }, { "epoch": 2.25, "learning_rate": 3.1403836952037024e-06, "loss": 0.1225, "step": 6765 }, { "epoch": 2.25, "learning_rate": 3.1377766972245572e-06, "loss": 0.1102, "step": 6766 }, { "epoch": 2.25, "learning_rate": 3.135170580421242e-06, "loss": 0.153, "step": 6767 }, { "epoch": 2.25, "learning_rate": 3.132565345128402e-06, "loss": 0.1353, "step": 6768 }, { "epoch": 2.25, "learning_rate": 3.129960991680575e-06, "loss": 0.1152, "step": 6769 }, { "epoch": 2.25, "learning_rate": 3.127357520412182e-06, "loss": 0.1457, "step": 6770 }, { "epoch": 2.25, "learning_rate": 3.124754931657541e-06, "loss": 0.0974, "step": 6771 }, { "epoch": 2.25, "learning_rate": 3.1221532257508426e-06, "loss": 0.0777, "step": 6772 }, { "epoch": 2.25, "learning_rate": 3.119552403026177e-06, "loss": 0.1523, "step": 6773 }, { "epoch": 2.25, "learning_rate": 3.1169524638175117e-06, "loss": 0.1593, "step": 6774 }, { "epoch": 2.25, "learning_rate": 3.1143534084587058e-06, "loss": 0.1492, "step": 6775 }, { "epoch": 2.25, "learning_rate": 3.1117552372835012e-06, "loss": 0.1541, "step": 6776 }, { "epoch": 2.25, "learning_rate": 3.1091579506255253e-06, "loss": 0.1323, "step": 6777 }, { "epoch": 2.25, "learning_rate": 3.106561548818302e-06, "loss": 0.0962, "step": 6778 }, { "epoch": 2.25, "learning_rate": 3.1039660321952283e-06, "loss": 0.1641, "step": 6779 }, { "epoch": 2.25, "learning_rate": 3.101371401089598e-06, "loss": 0.1332, "step": 6780 }, { "epoch": 2.25, "learning_rate": 3.0987776558345827e-06, "loss": 0.1124, "step": 6781 }, { "epoch": 2.25, "learning_rate": 3.096184796763253e-06, "loss": 0.1453, "step": 6782 }, { "epoch": 2.25, "learning_rate": 3.0935928242085443e-06, "loss": 0.1808, "step": 6783 }, { "epoch": 2.25, "learning_rate": 3.0910017385032997e-06, "loss": 0.1082, "step": 6784 }, { "epoch": 2.25, "learning_rate": 3.088411539980238e-06, "loss": 0.2003, "step": 6785 }, { "epoch": 2.25, "learning_rate": 3.0858222289719617e-06, "loss": 0.1148, "step": 6786 }, { "epoch": 2.25, "learning_rate": 3.08323380581097e-06, "loss": 0.1351, "step": 6787 }, { "epoch": 2.25, "learning_rate": 3.0806462708296346e-06, "loss": 0.1348, "step": 6788 }, { "epoch": 2.25, "learning_rate": 3.0780596243602303e-06, "loss": 0.1447, "step": 6789 }, { "epoch": 2.25, "learning_rate": 3.075473866734895e-06, "loss": 0.116, "step": 6790 }, { "epoch": 2.25, "learning_rate": 3.0728889982856724e-06, "loss": 0.1398, "step": 6791 }, { "epoch": 2.25, "learning_rate": 3.0703050193444817e-06, "loss": 0.1274, "step": 6792 }, { "epoch": 2.25, "learning_rate": 3.0677219302431336e-06, "loss": 0.161, "step": 6793 }, { "epoch": 2.25, "learning_rate": 3.065139731313321e-06, "loss": 0.1996, "step": 6794 }, { "epoch": 2.26, "learning_rate": 3.062558422886619e-06, "loss": 0.1168, "step": 6795 }, { "epoch": 2.26, "learning_rate": 3.059978005294503e-06, "loss": 0.1513, "step": 6796 }, { "epoch": 2.26, "learning_rate": 3.0573984788683086e-06, "loss": 0.1414, "step": 6797 }, { "epoch": 2.26, "learning_rate": 3.054819843939284e-06, "loss": 0.1402, "step": 6798 }, { "epoch": 2.26, "learning_rate": 3.052242100838544e-06, "loss": 0.1074, "step": 6799 }, { "epoch": 2.26, "learning_rate": 3.0496652498971e-06, "loss": 0.152, "step": 6800 }, { "epoch": 2.26, "learning_rate": 3.047089291445845e-06, "loss": 0.1593, "step": 6801 }, { "epoch": 2.26, "learning_rate": 3.0445142258155513e-06, "loss": 0.1728, "step": 6802 }, { "epoch": 2.26, "learning_rate": 3.0419400533368883e-06, "loss": 0.1129, "step": 6803 }, { "epoch": 2.26, "learning_rate": 3.039366774340403e-06, "loss": 0.1602, "step": 6804 }, { "epoch": 2.26, "learning_rate": 3.0367943891565276e-06, "loss": 0.1412, "step": 6805 }, { "epoch": 2.26, "learning_rate": 3.0342228981155797e-06, "loss": 0.1178, "step": 6806 }, { "epoch": 2.26, "learning_rate": 3.0316523015477696e-06, "loss": 0.1911, "step": 6807 }, { "epoch": 2.26, "learning_rate": 3.02908259978318e-06, "loss": 0.211, "step": 6808 }, { "epoch": 2.26, "learning_rate": 3.026513793151792e-06, "loss": 0.1194, "step": 6809 }, { "epoch": 2.26, "learning_rate": 3.023945881983461e-06, "loss": 0.1382, "step": 6810 }, { "epoch": 2.26, "learning_rate": 3.0213788666079335e-06, "loss": 0.0823, "step": 6811 }, { "epoch": 2.26, "learning_rate": 3.0188127473548377e-06, "loss": 0.1322, "step": 6812 }, { "epoch": 2.26, "learning_rate": 3.016247524553686e-06, "loss": 0.1987, "step": 6813 }, { "epoch": 2.26, "learning_rate": 3.013683198533882e-06, "loss": 0.1938, "step": 6814 }, { "epoch": 2.26, "learning_rate": 3.011119769624706e-06, "loss": 0.1135, "step": 6815 }, { "epoch": 2.26, "learning_rate": 3.008557238155334e-06, "loss": 0.1286, "step": 6816 }, { "epoch": 2.26, "learning_rate": 3.0059956044548133e-06, "loss": 0.1492, "step": 6817 }, { "epoch": 2.26, "learning_rate": 3.0034348688520853e-06, "loss": 0.1346, "step": 6818 }, { "epoch": 2.26, "learning_rate": 3.0008750316759692e-06, "loss": 0.1223, "step": 6819 }, { "epoch": 2.26, "learning_rate": 2.9983160932551792e-06, "loss": 0.108, "step": 6820 }, { "epoch": 2.26, "learning_rate": 2.995758053918304e-06, "loss": 0.1273, "step": 6821 }, { "epoch": 2.26, "learning_rate": 2.993200913993818e-06, "loss": 0.1472, "step": 6822 }, { "epoch": 2.26, "learning_rate": 2.990644673810088e-06, "loss": 0.0775, "step": 6823 }, { "epoch": 2.26, "learning_rate": 2.9880893336953585e-06, "loss": 0.1165, "step": 6824 }, { "epoch": 2.27, "learning_rate": 2.9855348939777597e-06, "loss": 0.175, "step": 6825 }, { "epoch": 2.27, "learning_rate": 2.9829813549853013e-06, "loss": 0.1299, "step": 6826 }, { "epoch": 2.27, "learning_rate": 2.980428717045891e-06, "loss": 0.1554, "step": 6827 }, { "epoch": 2.27, "learning_rate": 2.9778769804873042e-06, "loss": 0.1777, "step": 6828 }, { "epoch": 2.27, "learning_rate": 2.975326145637216e-06, "loss": 0.1282, "step": 6829 }, { "epoch": 2.27, "learning_rate": 2.972776212823174e-06, "loss": 0.1815, "step": 6830 }, { "epoch": 2.27, "learning_rate": 2.970227182372616e-06, "loss": 0.1917, "step": 6831 }, { "epoch": 2.27, "learning_rate": 2.9676790546128597e-06, "loss": 0.1456, "step": 6832 }, { "epoch": 2.27, "learning_rate": 2.9651318298711085e-06, "loss": 0.1463, "step": 6833 }, { "epoch": 2.27, "learning_rate": 2.962585508474456e-06, "loss": 0.1428, "step": 6834 }, { "epoch": 2.27, "learning_rate": 2.9600400907498672e-06, "loss": 0.1333, "step": 6835 }, { "epoch": 2.27, "learning_rate": 2.9574955770242065e-06, "loss": 0.1467, "step": 6836 }, { "epoch": 2.27, "learning_rate": 2.95495196762421e-06, "loss": 0.126, "step": 6837 }, { "epoch": 2.27, "learning_rate": 2.952409262876501e-06, "loss": 0.181, "step": 6838 }, { "epoch": 2.27, "learning_rate": 2.949867463107585e-06, "loss": 0.1297, "step": 6839 }, { "epoch": 2.27, "learning_rate": 2.947326568643861e-06, "loss": 0.1241, "step": 6840 }, { "epoch": 2.27, "learning_rate": 2.9447865798116006e-06, "loss": 0.1067, "step": 6841 }, { "epoch": 2.27, "learning_rate": 2.9422474969369572e-06, "loss": 0.136, "step": 6842 }, { "epoch": 2.27, "learning_rate": 2.9397093203459837e-06, "loss": 0.1194, "step": 6843 }, { "epoch": 2.27, "learning_rate": 2.9371720503646016e-06, "loss": 0.1555, "step": 6844 }, { "epoch": 2.27, "learning_rate": 2.934635687318621e-06, "loss": 0.1553, "step": 6845 }, { "epoch": 2.27, "learning_rate": 2.932100231533733e-06, "loss": 0.1547, "step": 6846 }, { "epoch": 2.27, "learning_rate": 2.9295656833355203e-06, "loss": 0.1152, "step": 6847 }, { "epoch": 2.27, "learning_rate": 2.9270320430494358e-06, "loss": 0.1102, "step": 6848 }, { "epoch": 2.27, "learning_rate": 2.9244993110008325e-06, "loss": 0.1901, "step": 6849 }, { "epoch": 2.27, "learning_rate": 2.9219674875149317e-06, "loss": 0.153, "step": 6850 }, { "epoch": 2.27, "learning_rate": 2.9194365729168463e-06, "loss": 0.0775, "step": 6851 }, { "epoch": 2.27, "learning_rate": 2.916906567531569e-06, "loss": 0.1549, "step": 6852 }, { "epoch": 2.27, "learning_rate": 2.9143774716839733e-06, "loss": 0.1721, "step": 6853 }, { "epoch": 2.27, "learning_rate": 2.9118492856988278e-06, "loss": 0.1299, "step": 6854 }, { "epoch": 2.28, "learning_rate": 2.909322009900768e-06, "loss": 0.1236, "step": 6855 }, { "epoch": 2.28, "learning_rate": 2.9067956446143273e-06, "loss": 0.1614, "step": 6856 }, { "epoch": 2.28, "learning_rate": 2.90427019016391e-06, "loss": 0.1226, "step": 6857 }, { "epoch": 2.28, "learning_rate": 2.901745646873817e-06, "loss": 0.1217, "step": 6858 }, { "epoch": 2.28, "learning_rate": 2.8992220150682125e-06, "loss": 0.148, "step": 6859 }, { "epoch": 2.28, "learning_rate": 2.8966992950711637e-06, "loss": 0.0845, "step": 6860 }, { "epoch": 2.28, "learning_rate": 2.89417748720661e-06, "loss": 0.1333, "step": 6861 }, { "epoch": 2.28, "learning_rate": 2.8916565917983717e-06, "loss": 0.1532, "step": 6862 }, { "epoch": 2.28, "learning_rate": 2.8891366091701635e-06, "loss": 0.1461, "step": 6863 }, { "epoch": 2.28, "learning_rate": 2.8866175396455698e-06, "loss": 0.1487, "step": 6864 }, { "epoch": 2.28, "learning_rate": 2.88409938354807e-06, "loss": 0.1821, "step": 6865 }, { "epoch": 2.28, "learning_rate": 2.8815821412010102e-06, "loss": 0.1393, "step": 6866 }, { "epoch": 2.28, "learning_rate": 2.879065812927637e-06, "loss": 0.1123, "step": 6867 }, { "epoch": 2.28, "learning_rate": 2.876550399051066e-06, "loss": 0.1331, "step": 6868 }, { "epoch": 2.28, "learning_rate": 2.874035899894304e-06, "loss": 0.1594, "step": 6869 }, { "epoch": 2.28, "learning_rate": 2.8715223157802373e-06, "loss": 0.1488, "step": 6870 }, { "epoch": 2.28, "learning_rate": 2.8690096470316286e-06, "loss": 0.1393, "step": 6871 }, { "epoch": 2.28, "learning_rate": 2.8664978939711397e-06, "loss": 0.1419, "step": 6872 }, { "epoch": 2.28, "learning_rate": 2.8639870569212924e-06, "loss": 0.1653, "step": 6873 }, { "epoch": 2.28, "learning_rate": 2.861477136204509e-06, "loss": 0.1298, "step": 6874 }, { "epoch": 2.28, "learning_rate": 2.858968132143084e-06, "loss": 0.1553, "step": 6875 }, { "epoch": 2.28, "learning_rate": 2.8564600450592016e-06, "loss": 0.1258, "step": 6876 }, { "epoch": 2.28, "learning_rate": 2.8539528752749244e-06, "loss": 0.1618, "step": 6877 }, { "epoch": 2.28, "learning_rate": 2.851446623112192e-06, "loss": 0.0978, "step": 6878 }, { "epoch": 2.28, "learning_rate": 2.848941288892837e-06, "loss": 0.1446, "step": 6879 }, { "epoch": 2.28, "learning_rate": 2.846436872938567e-06, "loss": 0.1521, "step": 6880 }, { "epoch": 2.28, "learning_rate": 2.843933375570973e-06, "loss": 0.1334, "step": 6881 }, { "epoch": 2.28, "learning_rate": 2.8414307971115253e-06, "loss": 0.1148, "step": 6882 }, { "epoch": 2.28, "learning_rate": 2.838929137881584e-06, "loss": 0.1296, "step": 6883 }, { "epoch": 2.28, "learning_rate": 2.836428398202382e-06, "loss": 0.1699, "step": 6884 }, { "epoch": 2.29, "learning_rate": 2.8339285783950432e-06, "loss": 0.1563, "step": 6885 }, { "epoch": 2.29, "learning_rate": 2.8314296787805663e-06, "loss": 0.127, "step": 6886 }, { "epoch": 2.29, "learning_rate": 2.8289316996798333e-06, "loss": 0.1284, "step": 6887 }, { "epoch": 2.29, "learning_rate": 2.826434641413608e-06, "loss": 0.1469, "step": 6888 }, { "epoch": 2.29, "learning_rate": 2.8239385043025404e-06, "loss": 0.1271, "step": 6889 }, { "epoch": 2.29, "learning_rate": 2.8214432886671563e-06, "loss": 0.1551, "step": 6890 }, { "epoch": 2.29, "learning_rate": 2.8189489948278623e-06, "loss": 0.0906, "step": 6891 }, { "epoch": 2.29, "learning_rate": 2.8164556231049577e-06, "loss": 0.0848, "step": 6892 }, { "epoch": 2.29, "learning_rate": 2.81396317381861e-06, "loss": 0.1489, "step": 6893 }, { "epoch": 2.29, "learning_rate": 2.8114716472888758e-06, "loss": 0.1458, "step": 6894 }, { "epoch": 2.29, "learning_rate": 2.808981043835688e-06, "loss": 0.1483, "step": 6895 }, { "epoch": 2.29, "learning_rate": 2.8064913637788682e-06, "loss": 0.1349, "step": 6896 }, { "epoch": 2.29, "learning_rate": 2.8040026074381142e-06, "loss": 0.1581, "step": 6897 }, { "epoch": 2.29, "learning_rate": 2.801514775133003e-06, "loss": 0.1748, "step": 6898 }, { "epoch": 2.29, "learning_rate": 2.7990278671830017e-06, "loss": 0.1206, "step": 6899 }, { "epoch": 2.29, "learning_rate": 2.796541883907452e-06, "loss": 0.1342, "step": 6900 }, { "epoch": 2.29, "learning_rate": 2.7940568256255762e-06, "loss": 0.1636, "step": 6901 }, { "epoch": 2.29, "learning_rate": 2.7915726926564778e-06, "loss": 0.1242, "step": 6902 }, { "epoch": 2.29, "learning_rate": 2.7890894853191497e-06, "loss": 0.1279, "step": 6903 }, { "epoch": 2.29, "learning_rate": 2.7866072039324534e-06, "loss": 0.1531, "step": 6904 }, { "epoch": 2.29, "learning_rate": 2.7841258488151433e-06, "loss": 0.1369, "step": 6905 }, { "epoch": 2.29, "learning_rate": 2.7816454202858467e-06, "loss": 0.1694, "step": 6906 }, { "epoch": 2.29, "learning_rate": 2.779165918663076e-06, "loss": 0.1621, "step": 6907 }, { "epoch": 2.29, "learning_rate": 2.7766873442652207e-06, "loss": 0.2036, "step": 6908 }, { "epoch": 2.29, "learning_rate": 2.7742096974105526e-06, "loss": 0.1101, "step": 6909 }, { "epoch": 2.29, "learning_rate": 2.7717329784172307e-06, "loss": 0.1808, "step": 6910 }, { "epoch": 2.29, "learning_rate": 2.769257187603285e-06, "loss": 0.0856, "step": 6911 }, { "epoch": 2.29, "learning_rate": 2.7667823252866343e-06, "loss": 0.1514, "step": 6912 }, { "epoch": 2.29, "learning_rate": 2.764308391785074e-06, "loss": 0.1173, "step": 6913 }, { "epoch": 2.29, "learning_rate": 2.7618353874162817e-06, "loss": 0.1315, "step": 6914 }, { "epoch": 2.3, "learning_rate": 2.759363312497809e-06, "loss": 0.1459, "step": 6915 }, { "epoch": 2.3, "learning_rate": 2.756892167347104e-06, "loss": 0.132, "step": 6916 }, { "epoch": 2.3, "learning_rate": 2.7544219522814795e-06, "loss": 0.0995, "step": 6917 }, { "epoch": 2.3, "learning_rate": 2.751952667618133e-06, "loss": 0.1588, "step": 6918 }, { "epoch": 2.3, "learning_rate": 2.749484313674152e-06, "loss": 0.0991, "step": 6919 }, { "epoch": 2.3, "learning_rate": 2.7470168907664917e-06, "loss": 0.1032, "step": 6920 }, { "epoch": 2.3, "learning_rate": 2.7445503992119947e-06, "loss": 0.1405, "step": 6921 }, { "epoch": 2.3, "learning_rate": 2.7420848393273792e-06, "loss": 0.0997, "step": 6922 }, { "epoch": 2.3, "learning_rate": 2.7396202114292515e-06, "loss": 0.1548, "step": 6923 }, { "epoch": 2.3, "learning_rate": 2.7371565158340896e-06, "loss": 0.1332, "step": 6924 }, { "epoch": 2.3, "learning_rate": 2.7346937528582596e-06, "loss": 0.1148, "step": 6925 }, { "epoch": 2.3, "learning_rate": 2.7322319228180037e-06, "loss": 0.134, "step": 6926 }, { "epoch": 2.3, "learning_rate": 2.729771026029443e-06, "loss": 0.1528, "step": 6927 }, { "epoch": 2.3, "learning_rate": 2.727311062808581e-06, "loss": 0.126, "step": 6928 }, { "epoch": 2.3, "learning_rate": 2.724852033471298e-06, "loss": 0.1725, "step": 6929 }, { "epoch": 2.3, "learning_rate": 2.722393938333362e-06, "loss": 0.1059, "step": 6930 }, { "epoch": 2.3, "learning_rate": 2.7199367777104115e-06, "loss": 0.068, "step": 6931 }, { "epoch": 2.3, "learning_rate": 2.7174805519179737e-06, "loss": 0.1475, "step": 6932 }, { "epoch": 2.3, "learning_rate": 2.715025261271449e-06, "loss": 0.1207, "step": 6933 }, { "epoch": 2.3, "learning_rate": 2.712570906086126e-06, "loss": 0.1104, "step": 6934 }, { "epoch": 2.3, "learning_rate": 2.7101174866771586e-06, "loss": 0.1774, "step": 6935 }, { "epoch": 2.3, "learning_rate": 2.7076650033595965e-06, "loss": 0.1826, "step": 6936 }, { "epoch": 2.3, "learning_rate": 2.70521345644836e-06, "loss": 0.1195, "step": 6937 }, { "epoch": 2.3, "learning_rate": 2.702762846258249e-06, "loss": 0.1774, "step": 6938 }, { "epoch": 2.3, "learning_rate": 2.70031317310395e-06, "loss": 0.1311, "step": 6939 }, { "epoch": 2.3, "learning_rate": 2.697864437300021e-06, "loss": 0.1158, "step": 6940 }, { "epoch": 2.3, "learning_rate": 2.6954166391609104e-06, "loss": 0.1461, "step": 6941 }, { "epoch": 2.3, "learning_rate": 2.692969779000928e-06, "loss": 0.109, "step": 6942 }, { "epoch": 2.3, "learning_rate": 2.6905238571342818e-06, "loss": 0.1497, "step": 6943 }, { "epoch": 2.3, "learning_rate": 2.6880788738750486e-06, "loss": 0.1235, "step": 6944 }, { "epoch": 2.31, "learning_rate": 2.6856348295371915e-06, "loss": 0.1368, "step": 6945 }, { "epoch": 2.31, "learning_rate": 2.6831917244345483e-06, "loss": 0.165, "step": 6946 }, { "epoch": 2.31, "learning_rate": 2.680749558880833e-06, "loss": 0.1247, "step": 6947 }, { "epoch": 2.31, "learning_rate": 2.678308333189652e-06, "loss": 0.1334, "step": 6948 }, { "epoch": 2.31, "learning_rate": 2.6758680476744715e-06, "loss": 0.1557, "step": 6949 }, { "epoch": 2.31, "learning_rate": 2.673428702648656e-06, "loss": 0.1281, "step": 6950 }, { "epoch": 2.31, "learning_rate": 2.6709902984254355e-06, "loss": 0.1518, "step": 6951 }, { "epoch": 2.31, "learning_rate": 2.6685528353179303e-06, "loss": 0.14, "step": 6952 }, { "epoch": 2.31, "learning_rate": 2.666116313639128e-06, "loss": 0.1592, "step": 6953 }, { "epoch": 2.31, "learning_rate": 2.6636807337019077e-06, "loss": 0.1129, "step": 6954 }, { "epoch": 2.31, "learning_rate": 2.661246095819018e-06, "loss": 0.1281, "step": 6955 }, { "epoch": 2.31, "learning_rate": 2.658812400303091e-06, "loss": 0.1497, "step": 6956 }, { "epoch": 2.31, "learning_rate": 2.656379647466637e-06, "loss": 0.0754, "step": 6957 }, { "epoch": 2.31, "learning_rate": 2.6539478376220397e-06, "loss": 0.1471, "step": 6958 }, { "epoch": 2.31, "learning_rate": 2.651516971081576e-06, "loss": 0.103, "step": 6959 }, { "epoch": 2.31, "learning_rate": 2.649087048157384e-06, "loss": 0.162, "step": 6960 }, { "epoch": 2.31, "learning_rate": 2.6466580691614973e-06, "loss": 0.1576, "step": 6961 }, { "epoch": 2.31, "learning_rate": 2.644230034405816e-06, "loss": 0.1892, "step": 6962 }, { "epoch": 2.31, "learning_rate": 2.641802944202123e-06, "loss": 0.1618, "step": 6963 }, { "epoch": 2.31, "learning_rate": 2.6393767988620788e-06, "loss": 0.1238, "step": 6964 }, { "epoch": 2.31, "learning_rate": 2.6369515986972283e-06, "loss": 0.1117, "step": 6965 }, { "epoch": 2.31, "learning_rate": 2.634527344018989e-06, "loss": 0.133, "step": 6966 }, { "epoch": 2.31, "learning_rate": 2.632104035138655e-06, "loss": 0.1675, "step": 6967 }, { "epoch": 2.31, "learning_rate": 2.6296816723674077e-06, "loss": 0.1184, "step": 6968 }, { "epoch": 2.31, "learning_rate": 2.627260256016301e-06, "loss": 0.1208, "step": 6969 }, { "epoch": 2.31, "learning_rate": 2.624839786396266e-06, "loss": 0.1675, "step": 6970 }, { "epoch": 2.31, "learning_rate": 2.622420263818112e-06, "loss": 0.1596, "step": 6971 }, { "epoch": 2.31, "learning_rate": 2.620001688592536e-06, "loss": 0.1419, "step": 6972 }, { "epoch": 2.31, "learning_rate": 2.617584061030103e-06, "loss": 0.118, "step": 6973 }, { "epoch": 2.31, "learning_rate": 2.615167381441256e-06, "loss": 0.162, "step": 6974 }, { "epoch": 2.31, "learning_rate": 2.612751650136327e-06, "loss": 0.057, "step": 6975 }, { "epoch": 2.32, "learning_rate": 2.6103368674255157e-06, "loss": 0.1517, "step": 6976 }, { "epoch": 2.32, "learning_rate": 2.607923033618904e-06, "loss": 0.1053, "step": 6977 }, { "epoch": 2.32, "learning_rate": 2.6055101490264488e-06, "loss": 0.1152, "step": 6978 }, { "epoch": 2.32, "learning_rate": 2.603098213957992e-06, "loss": 0.1617, "step": 6979 }, { "epoch": 2.32, "learning_rate": 2.6006872287232456e-06, "loss": 0.175, "step": 6980 }, { "epoch": 2.32, "learning_rate": 2.5982771936318086e-06, "loss": 0.1226, "step": 6981 }, { "epoch": 2.32, "learning_rate": 2.595868108993149e-06, "loss": 0.1546, "step": 6982 }, { "epoch": 2.32, "learning_rate": 2.593459975116618e-06, "loss": 0.1285, "step": 6983 }, { "epoch": 2.32, "learning_rate": 2.5910527923114416e-06, "loss": 0.0971, "step": 6984 }, { "epoch": 2.32, "learning_rate": 2.588646560886724e-06, "loss": 0.1497, "step": 6985 }, { "epoch": 2.32, "learning_rate": 2.5862412811514525e-06, "loss": 0.1407, "step": 6986 }, { "epoch": 2.32, "learning_rate": 2.583836953414485e-06, "loss": 0.1261, "step": 6987 }, { "epoch": 2.32, "learning_rate": 2.5814335779845645e-06, "loss": 0.1961, "step": 6988 }, { "epoch": 2.32, "learning_rate": 2.579031155170305e-06, "loss": 0.118, "step": 6989 }, { "epoch": 2.32, "learning_rate": 2.576629685280202e-06, "loss": 0.1409, "step": 6990 }, { "epoch": 2.32, "learning_rate": 2.5742291686226228e-06, "loss": 0.1464, "step": 6991 }, { "epoch": 2.32, "learning_rate": 2.5718296055058233e-06, "loss": 0.1093, "step": 6992 }, { "epoch": 2.32, "learning_rate": 2.5694309962379283e-06, "loss": 0.1159, "step": 6993 }, { "epoch": 2.32, "learning_rate": 2.5670333411269388e-06, "loss": 0.1205, "step": 6994 }, { "epoch": 2.32, "learning_rate": 2.5646366404807445e-06, "loss": 0.1136, "step": 6995 }, { "epoch": 2.32, "learning_rate": 2.5622408946070987e-06, "loss": 0.132, "step": 6996 }, { "epoch": 2.32, "learning_rate": 2.5598461038136413e-06, "loss": 0.1491, "step": 6997 }, { "epoch": 2.32, "learning_rate": 2.5574522684078817e-06, "loss": 0.1666, "step": 6998 }, { "epoch": 2.32, "learning_rate": 2.5550593886972186e-06, "loss": 0.1233, "step": 6999 }, { "epoch": 2.32, "learning_rate": 2.552667464988915e-06, "loss": 0.1173, "step": 7000 }, { "epoch": 2.32, "learning_rate": 2.550276497590123e-06, "loss": 0.1306, "step": 7001 }, { "epoch": 2.32, "learning_rate": 2.547886486807861e-06, "loss": 0.1625, "step": 7002 }, { "epoch": 2.32, "learning_rate": 2.5454974329490323e-06, "loss": 0.0796, "step": 7003 }, { "epoch": 2.32, "learning_rate": 2.543109336320414e-06, "loss": 0.1276, "step": 7004 }, { "epoch": 2.32, "learning_rate": 2.5407221972286557e-06, "loss": 0.0998, "step": 7005 }, { "epoch": 2.33, "learning_rate": 2.5383360159802973e-06, "loss": 0.1309, "step": 7006 }, { "epoch": 2.33, "learning_rate": 2.5359507928817407e-06, "loss": 0.1824, "step": 7007 }, { "epoch": 2.33, "learning_rate": 2.5335665282392774e-06, "loss": 0.0923, "step": 7008 }, { "epoch": 2.33, "learning_rate": 2.531183222359064e-06, "loss": 0.2017, "step": 7009 }, { "epoch": 2.33, "learning_rate": 2.5288008755471495e-06, "loss": 0.1843, "step": 7010 }, { "epoch": 2.33, "learning_rate": 2.5264194881094384e-06, "loss": 0.1342, "step": 7011 }, { "epoch": 2.33, "learning_rate": 2.5240390603517327e-06, "loss": 0.1575, "step": 7012 }, { "epoch": 2.33, "learning_rate": 2.5216595925796995e-06, "loss": 0.0986, "step": 7013 }, { "epoch": 2.33, "learning_rate": 2.5192810850988814e-06, "loss": 0.1605, "step": 7014 }, { "epoch": 2.33, "learning_rate": 2.5169035382147088e-06, "loss": 0.1039, "step": 7015 }, { "epoch": 2.33, "learning_rate": 2.5145269522324756e-06, "loss": 0.1446, "step": 7016 }, { "epoch": 2.33, "learning_rate": 2.5121513274573673e-06, "loss": 0.1241, "step": 7017 }, { "epoch": 2.33, "learning_rate": 2.5097766641944243e-06, "loss": 0.1021, "step": 7018 }, { "epoch": 2.33, "learning_rate": 2.5074029627485872e-06, "loss": 0.1234, "step": 7019 }, { "epoch": 2.33, "learning_rate": 2.5050302234246547e-06, "loss": 0.1134, "step": 7020 }, { "epoch": 2.33, "learning_rate": 2.502658446527316e-06, "loss": 0.1416, "step": 7021 }, { "epoch": 2.33, "learning_rate": 2.500287632361127e-06, "loss": 0.1656, "step": 7022 }, { "epoch": 2.33, "learning_rate": 2.49791778123052e-06, "loss": 0.1411, "step": 7023 }, { "epoch": 2.33, "learning_rate": 2.4955488934398162e-06, "loss": 0.1621, "step": 7024 }, { "epoch": 2.33, "learning_rate": 2.493180969293192e-06, "loss": 0.1704, "step": 7025 }, { "epoch": 2.33, "learning_rate": 2.4908140090947197e-06, "loss": 0.1024, "step": 7026 }, { "epoch": 2.33, "learning_rate": 2.4884480131483347e-06, "loss": 0.1907, "step": 7027 }, { "epoch": 2.33, "learning_rate": 2.4860829817578592e-06, "loss": 0.1309, "step": 7028 }, { "epoch": 2.33, "learning_rate": 2.4837189152269813e-06, "loss": 0.1209, "step": 7029 }, { "epoch": 2.33, "learning_rate": 2.481355813859274e-06, "loss": 0.203, "step": 7030 }, { "epoch": 2.33, "learning_rate": 2.478993677958181e-06, "loss": 0.1243, "step": 7031 }, { "epoch": 2.33, "learning_rate": 2.4766325078270226e-06, "loss": 0.1211, "step": 7032 }, { "epoch": 2.33, "learning_rate": 2.4742723037689964e-06, "loss": 0.1483, "step": 7033 }, { "epoch": 2.33, "learning_rate": 2.4719130660871726e-06, "loss": 0.1755, "step": 7034 }, { "epoch": 2.33, "learning_rate": 2.4695547950845056e-06, "loss": 0.158, "step": 7035 }, { "epoch": 2.34, "learning_rate": 2.467197491063814e-06, "loss": 0.1433, "step": 7036 }, { "epoch": 2.34, "learning_rate": 2.464841154327804e-06, "loss": 0.0781, "step": 7037 }, { "epoch": 2.34, "learning_rate": 2.4624857851790506e-06, "loss": 0.114, "step": 7038 }, { "epoch": 2.34, "learning_rate": 2.460131383920005e-06, "loss": 0.1431, "step": 7039 }, { "epoch": 2.34, "learning_rate": 2.457777950852993e-06, "loss": 0.1332, "step": 7040 }, { "epoch": 2.34, "learning_rate": 2.455425486280222e-06, "loss": 0.1758, "step": 7041 }, { "epoch": 2.34, "learning_rate": 2.4530739905037705e-06, "loss": 0.1279, "step": 7042 }, { "epoch": 2.34, "learning_rate": 2.4507234638255897e-06, "loss": 0.2191, "step": 7043 }, { "epoch": 2.34, "learning_rate": 2.448373906547514e-06, "loss": 0.1322, "step": 7044 }, { "epoch": 2.34, "learning_rate": 2.446025318971249e-06, "loss": 0.1151, "step": 7045 }, { "epoch": 2.34, "learning_rate": 2.4436777013983747e-06, "loss": 0.1578, "step": 7046 }, { "epoch": 2.34, "learning_rate": 2.4413310541303457e-06, "loss": 0.1443, "step": 7047 }, { "epoch": 2.34, "learning_rate": 2.438985377468499e-06, "loss": 0.1491, "step": 7048 }, { "epoch": 2.34, "learning_rate": 2.4366406717140402e-06, "loss": 0.1328, "step": 7049 }, { "epoch": 2.34, "learning_rate": 2.434296937168049e-06, "loss": 0.1083, "step": 7050 }, { "epoch": 2.34, "learning_rate": 2.431954174131489e-06, "loss": 0.1269, "step": 7051 }, { "epoch": 2.34, "learning_rate": 2.429612382905191e-06, "loss": 0.1524, "step": 7052 }, { "epoch": 2.34, "learning_rate": 2.4272715637898636e-06, "loss": 0.1827, "step": 7053 }, { "epoch": 2.34, "learning_rate": 2.4249317170860874e-06, "loss": 0.145, "step": 7054 }, { "epoch": 2.34, "learning_rate": 2.4225928430943293e-06, "loss": 0.1971, "step": 7055 }, { "epoch": 2.34, "learning_rate": 2.4202549421149133e-06, "loss": 0.116, "step": 7056 }, { "epoch": 2.34, "learning_rate": 2.4179180144480585e-06, "loss": 0.1293, "step": 7057 }, { "epoch": 2.34, "learning_rate": 2.4155820603938427e-06, "loss": 0.1335, "step": 7058 }, { "epoch": 2.34, "learning_rate": 2.413247080252228e-06, "loss": 0.1126, "step": 7059 }, { "epoch": 2.34, "learning_rate": 2.4109130743230468e-06, "loss": 0.1503, "step": 7060 }, { "epoch": 2.34, "learning_rate": 2.408580042906006e-06, "loss": 0.119, "step": 7061 }, { "epoch": 2.34, "learning_rate": 2.4062479863006926e-06, "loss": 0.1382, "step": 7062 }, { "epoch": 2.34, "learning_rate": 2.4039169048065637e-06, "loss": 0.1124, "step": 7063 }, { "epoch": 2.34, "learning_rate": 2.4015867987229547e-06, "loss": 0.1183, "step": 7064 }, { "epoch": 2.34, "learning_rate": 2.3992576683490697e-06, "loss": 0.1263, "step": 7065 }, { "epoch": 2.35, "learning_rate": 2.396929513984e-06, "loss": 0.1523, "step": 7066 }, { "epoch": 2.35, "learning_rate": 2.394602335926692e-06, "loss": 0.1271, "step": 7067 }, { "epoch": 2.35, "learning_rate": 2.392276134475986e-06, "loss": 0.1322, "step": 7068 }, { "epoch": 2.35, "learning_rate": 2.3899509099305852e-06, "loss": 0.1234, "step": 7069 }, { "epoch": 2.35, "learning_rate": 2.3876266625890697e-06, "loss": 0.1129, "step": 7070 }, { "epoch": 2.35, "learning_rate": 2.3853033927498993e-06, "loss": 0.1083, "step": 7071 }, { "epoch": 2.35, "learning_rate": 2.3829811007114034e-06, "loss": 0.0956, "step": 7072 }, { "epoch": 2.35, "learning_rate": 2.380659786771785e-06, "loss": 0.1487, "step": 7073 }, { "epoch": 2.35, "learning_rate": 2.3783394512291214e-06, "loss": 0.1373, "step": 7074 }, { "epoch": 2.35, "learning_rate": 2.3760200943813707e-06, "loss": 0.1757, "step": 7075 }, { "epoch": 2.35, "learning_rate": 2.3737017165263566e-06, "loss": 0.1767, "step": 7076 }, { "epoch": 2.35, "learning_rate": 2.3713843179617856e-06, "loss": 0.1385, "step": 7077 }, { "epoch": 2.35, "learning_rate": 2.3690678989852324e-06, "loss": 0.1546, "step": 7078 }, { "epoch": 2.35, "learning_rate": 2.366752459894147e-06, "loss": 0.1261, "step": 7079 }, { "epoch": 2.35, "learning_rate": 2.364438000985855e-06, "loss": 0.2117, "step": 7080 }, { "epoch": 2.35, "learning_rate": 2.3621245225575497e-06, "loss": 0.1727, "step": 7081 }, { "epoch": 2.35, "learning_rate": 2.359812024906313e-06, "loss": 0.1224, "step": 7082 }, { "epoch": 2.35, "learning_rate": 2.3575005083290846e-06, "loss": 0.1144, "step": 7083 }, { "epoch": 2.35, "learning_rate": 2.3551899731226913e-06, "loss": 0.1549, "step": 7084 }, { "epoch": 2.35, "learning_rate": 2.352880419583824e-06, "loss": 0.1437, "step": 7085 }, { "epoch": 2.35, "learning_rate": 2.3505718480090577e-06, "loss": 0.1451, "step": 7086 }, { "epoch": 2.35, "learning_rate": 2.3482642586948255e-06, "loss": 0.1582, "step": 7087 }, { "epoch": 2.35, "learning_rate": 2.345957651937454e-06, "loss": 0.1038, "step": 7088 }, { "epoch": 2.35, "learning_rate": 2.3436520280331276e-06, "loss": 0.1318, "step": 7089 }, { "epoch": 2.35, "learning_rate": 2.3413473872779115e-06, "loss": 0.1361, "step": 7090 }, { "epoch": 2.35, "learning_rate": 2.339043729967747e-06, "loss": 0.1463, "step": 7091 }, { "epoch": 2.35, "learning_rate": 2.3367410563984403e-06, "loss": 0.124, "step": 7092 }, { "epoch": 2.35, "learning_rate": 2.3344393668656875e-06, "loss": 0.1274, "step": 7093 }, { "epoch": 2.35, "learning_rate": 2.3321386616650344e-06, "loss": 0.1418, "step": 7094 }, { "epoch": 2.35, "learning_rate": 2.3298389410919232e-06, "loss": 0.1624, "step": 7095 }, { "epoch": 2.36, "learning_rate": 2.327540205441655e-06, "loss": 0.1491, "step": 7096 }, { "epoch": 2.36, "learning_rate": 2.3252424550094145e-06, "loss": 0.1092, "step": 7097 }, { "epoch": 2.36, "learning_rate": 2.3229456900902524e-06, "loss": 0.1326, "step": 7098 }, { "epoch": 2.36, "learning_rate": 2.3206499109790924e-06, "loss": 0.1201, "step": 7099 }, { "epoch": 2.36, "learning_rate": 2.3183551179707453e-06, "loss": 0.1014, "step": 7100 }, { "epoch": 2.36, "learning_rate": 2.3160613113598705e-06, "loss": 0.168, "step": 7101 }, { "epoch": 2.36, "learning_rate": 2.3137684914410254e-06, "loss": 0.1012, "step": 7102 }, { "epoch": 2.36, "learning_rate": 2.3114766585086234e-06, "loss": 0.1092, "step": 7103 }, { "epoch": 2.36, "learning_rate": 2.309185812856963e-06, "loss": 0.0798, "step": 7104 }, { "epoch": 2.36, "learning_rate": 2.3068959547802082e-06, "loss": 0.158, "step": 7105 }, { "epoch": 2.36, "learning_rate": 2.3046070845724035e-06, "loss": 0.1385, "step": 7106 }, { "epoch": 2.36, "learning_rate": 2.3023192025274567e-06, "loss": 0.1555, "step": 7107 }, { "epoch": 2.36, "learning_rate": 2.300032308939156e-06, "loss": 0.1248, "step": 7108 }, { "epoch": 2.36, "learning_rate": 2.297746404101161e-06, "loss": 0.1224, "step": 7109 }, { "epoch": 2.36, "learning_rate": 2.2954614883070002e-06, "loss": 0.1766, "step": 7110 }, { "epoch": 2.36, "learning_rate": 2.293177561850085e-06, "loss": 0.1556, "step": 7111 }, { "epoch": 2.36, "learning_rate": 2.2908946250236884e-06, "loss": 0.1992, "step": 7112 }, { "epoch": 2.36, "learning_rate": 2.288612678120966e-06, "loss": 0.1232, "step": 7113 }, { "epoch": 2.36, "learning_rate": 2.28633172143494e-06, "loss": 0.145, "step": 7114 }, { "epoch": 2.36, "learning_rate": 2.284051755258507e-06, "loss": 0.137, "step": 7115 }, { "epoch": 2.36, "learning_rate": 2.2817727798844345e-06, "loss": 0.1516, "step": 7116 }, { "epoch": 2.36, "learning_rate": 2.279494795605369e-06, "loss": 0.1226, "step": 7117 }, { "epoch": 2.36, "learning_rate": 2.2772178027138247e-06, "loss": 0.1429, "step": 7118 }, { "epoch": 2.36, "learning_rate": 2.274941801502185e-06, "loss": 0.1204, "step": 7119 }, { "epoch": 2.36, "learning_rate": 2.2726667922627175e-06, "loss": 0.2114, "step": 7120 }, { "epoch": 2.36, "learning_rate": 2.270392775287552e-06, "loss": 0.1104, "step": 7121 }, { "epoch": 2.36, "learning_rate": 2.2681197508686948e-06, "loss": 0.1279, "step": 7122 }, { "epoch": 2.36, "learning_rate": 2.26584771929802e-06, "loss": 0.136, "step": 7123 }, { "epoch": 2.36, "learning_rate": 2.2635766808672854e-06, "loss": 0.1552, "step": 7124 }, { "epoch": 2.36, "learning_rate": 2.2613066358681112e-06, "loss": 0.0906, "step": 7125 }, { "epoch": 2.37, "learning_rate": 2.25903758459199e-06, "loss": 0.1242, "step": 7126 }, { "epoch": 2.37, "learning_rate": 2.2567695273302957e-06, "loss": 0.1268, "step": 7127 }, { "epoch": 2.37, "learning_rate": 2.2545024643742664e-06, "loss": 0.1605, "step": 7128 }, { "epoch": 2.37, "learning_rate": 2.2522363960150138e-06, "loss": 0.122, "step": 7129 }, { "epoch": 2.37, "learning_rate": 2.2499713225435203e-06, "loss": 0.141, "step": 7130 }, { "epoch": 2.37, "learning_rate": 2.2477072442506507e-06, "loss": 0.1275, "step": 7131 }, { "epoch": 2.37, "learning_rate": 2.245444161427127e-06, "loss": 0.2053, "step": 7132 }, { "epoch": 2.37, "learning_rate": 2.243182074363558e-06, "loss": 0.1356, "step": 7133 }, { "epoch": 2.37, "learning_rate": 2.2409209833504133e-06, "loss": 0.1918, "step": 7134 }, { "epoch": 2.37, "learning_rate": 2.238660888678039e-06, "loss": 0.1139, "step": 7135 }, { "epoch": 2.37, "learning_rate": 2.236401790636651e-06, "loss": 0.1065, "step": 7136 }, { "epoch": 2.37, "learning_rate": 2.234143689516345e-06, "loss": 0.1315, "step": 7137 }, { "epoch": 2.37, "learning_rate": 2.2318865856070805e-06, "loss": 0.1484, "step": 7138 }, { "epoch": 2.37, "learning_rate": 2.2296304791986867e-06, "loss": 0.134, "step": 7139 }, { "epoch": 2.37, "learning_rate": 2.2273753705808777e-06, "loss": 0.1224, "step": 7140 }, { "epoch": 2.37, "learning_rate": 2.225121260043224e-06, "loss": 0.1722, "step": 7141 }, { "epoch": 2.37, "learning_rate": 2.2228681478751833e-06, "loss": 0.1518, "step": 7142 }, { "epoch": 2.37, "learning_rate": 2.2206160343660676e-06, "loss": 0.1337, "step": 7143 }, { "epoch": 2.37, "learning_rate": 2.218364919805077e-06, "loss": 0.1242, "step": 7144 }, { "epoch": 2.37, "learning_rate": 2.216114804481274e-06, "loss": 0.1245, "step": 7145 }, { "epoch": 2.37, "learning_rate": 2.2138656886835917e-06, "loss": 0.1272, "step": 7146 }, { "epoch": 2.37, "learning_rate": 2.211617572700846e-06, "loss": 0.1464, "step": 7147 }, { "epoch": 2.37, "learning_rate": 2.209370456821708e-06, "loss": 0.142, "step": 7148 }, { "epoch": 2.37, "learning_rate": 2.20712434133474e-06, "loss": 0.1243, "step": 7149 }, { "epoch": 2.37, "learning_rate": 2.2048792265283526e-06, "loss": 0.1124, "step": 7150 }, { "epoch": 2.37, "learning_rate": 2.2026351126908496e-06, "loss": 0.14, "step": 7151 }, { "epoch": 2.37, "learning_rate": 2.2003920001103895e-06, "loss": 0.1197, "step": 7152 }, { "epoch": 2.37, "learning_rate": 2.1981498890750177e-06, "loss": 0.1962, "step": 7153 }, { "epoch": 2.37, "learning_rate": 2.1959087798726377e-06, "loss": 0.1431, "step": 7154 }, { "epoch": 2.37, "learning_rate": 2.1936686727910307e-06, "loss": 0.1476, "step": 7155 }, { "epoch": 2.38, "learning_rate": 2.191429568117849e-06, "loss": 0.1556, "step": 7156 }, { "epoch": 2.38, "learning_rate": 2.189191466140611e-06, "loss": 0.1243, "step": 7157 }, { "epoch": 2.38, "learning_rate": 2.1869543671467177e-06, "loss": 0.1232, "step": 7158 }, { "epoch": 2.38, "learning_rate": 2.184718271423427e-06, "loss": 0.1425, "step": 7159 }, { "epoch": 2.38, "learning_rate": 2.1824831792578817e-06, "loss": 0.1329, "step": 7160 }, { "epoch": 2.38, "learning_rate": 2.1802490909370843e-06, "loss": 0.1122, "step": 7161 }, { "epoch": 2.38, "learning_rate": 2.1780160067479208e-06, "loss": 0.1122, "step": 7162 }, { "epoch": 2.38, "learning_rate": 2.1757839269771287e-06, "loss": 0.0995, "step": 7163 }, { "epoch": 2.38, "learning_rate": 2.1735528519113393e-06, "loss": 0.1351, "step": 7164 }, { "epoch": 2.38, "learning_rate": 2.17132278183704e-06, "loss": 0.1066, "step": 7165 }, { "epoch": 2.38, "learning_rate": 2.1690937170405903e-06, "loss": 0.1395, "step": 7166 }, { "epoch": 2.38, "learning_rate": 2.16686565780823e-06, "loss": 0.1073, "step": 7167 }, { "epoch": 2.38, "learning_rate": 2.1646386044260583e-06, "loss": 0.1495, "step": 7168 }, { "epoch": 2.38, "learning_rate": 2.162412557180057e-06, "loss": 0.127, "step": 7169 }, { "epoch": 2.38, "learning_rate": 2.1601875163560627e-06, "loss": 0.1327, "step": 7170 }, { "epoch": 2.38, "learning_rate": 2.1579634822398e-06, "loss": 0.1472, "step": 7171 }, { "epoch": 2.38, "learning_rate": 2.15574045511685e-06, "loss": 0.1354, "step": 7172 }, { "epoch": 2.38, "learning_rate": 2.153518435272678e-06, "loss": 0.1561, "step": 7173 }, { "epoch": 2.38, "learning_rate": 2.1512974229926085e-06, "loss": 0.2277, "step": 7174 }, { "epoch": 2.38, "learning_rate": 2.1490774185618403e-06, "loss": 0.1328, "step": 7175 }, { "epoch": 2.38, "learning_rate": 2.1468584222654486e-06, "loss": 0.1235, "step": 7176 }, { "epoch": 2.38, "learning_rate": 2.144640434388365e-06, "loss": 0.1075, "step": 7177 }, { "epoch": 2.38, "learning_rate": 2.142423455215409e-06, "loss": 0.1074, "step": 7178 }, { "epoch": 2.38, "learning_rate": 2.140207485031256e-06, "loss": 0.1116, "step": 7179 }, { "epoch": 2.38, "learning_rate": 2.1379925241204626e-06, "loss": 0.1569, "step": 7180 }, { "epoch": 2.38, "learning_rate": 2.1357785727674476e-06, "loss": 0.1668, "step": 7181 }, { "epoch": 2.38, "learning_rate": 2.133565631256509e-06, "loss": 0.0976, "step": 7182 }, { "epoch": 2.38, "learning_rate": 2.1313536998718056e-06, "loss": 0.1086, "step": 7183 }, { "epoch": 2.38, "learning_rate": 2.1291427788973717e-06, "loss": 0.1343, "step": 7184 }, { "epoch": 2.38, "learning_rate": 2.126932868617111e-06, "loss": 0.1084, "step": 7185 }, { "epoch": 2.38, "learning_rate": 2.124723969314795e-06, "loss": 0.0792, "step": 7186 }, { "epoch": 2.39, "learning_rate": 2.122516081274073e-06, "loss": 0.1325, "step": 7187 }, { "epoch": 2.39, "learning_rate": 2.1203092047784523e-06, "loss": 0.1649, "step": 7188 }, { "epoch": 2.39, "learning_rate": 2.118103340111325e-06, "loss": 0.1371, "step": 7189 }, { "epoch": 2.39, "learning_rate": 2.115898487555941e-06, "loss": 0.1195, "step": 7190 }, { "epoch": 2.39, "learning_rate": 2.113694647395426e-06, "loss": 0.1537, "step": 7191 }, { "epoch": 2.39, "learning_rate": 2.11149181991277e-06, "loss": 0.1285, "step": 7192 }, { "epoch": 2.39, "learning_rate": 2.1092900053908425e-06, "loss": 0.1043, "step": 7193 }, { "epoch": 2.39, "learning_rate": 2.107089204112378e-06, "loss": 0.1307, "step": 7194 }, { "epoch": 2.39, "learning_rate": 2.1048894163599764e-06, "loss": 0.1422, "step": 7195 }, { "epoch": 2.39, "learning_rate": 2.1026906424161155e-06, "loss": 0.1224, "step": 7196 }, { "epoch": 2.39, "learning_rate": 2.1004928825631387e-06, "loss": 0.1257, "step": 7197 }, { "epoch": 2.39, "learning_rate": 2.098296137083259e-06, "loss": 0.1693, "step": 7198 }, { "epoch": 2.39, "learning_rate": 2.0961004062585565e-06, "loss": 0.1592, "step": 7199 }, { "epoch": 2.39, "learning_rate": 2.09390569037099e-06, "loss": 0.1282, "step": 7200 }, { "epoch": 2.39, "learning_rate": 2.0917119897023762e-06, "loss": 0.1234, "step": 7201 }, { "epoch": 2.39, "learning_rate": 2.089519304534414e-06, "loss": 0.1513, "step": 7202 }, { "epoch": 2.39, "learning_rate": 2.0873276351486626e-06, "loss": 0.1481, "step": 7203 }, { "epoch": 2.39, "learning_rate": 2.0851369818265533e-06, "loss": 0.1169, "step": 7204 }, { "epoch": 2.39, "learning_rate": 2.0829473448493854e-06, "loss": 0.1143, "step": 7205 }, { "epoch": 2.39, "learning_rate": 2.0807587244983296e-06, "loss": 0.0842, "step": 7206 }, { "epoch": 2.39, "learning_rate": 2.07857112105443e-06, "loss": 0.1476, "step": 7207 }, { "epoch": 2.39, "learning_rate": 2.0763845347985913e-06, "loss": 0.1588, "step": 7208 }, { "epoch": 2.39, "learning_rate": 2.0741989660115957e-06, "loss": 0.0991, "step": 7209 }, { "epoch": 2.39, "learning_rate": 2.0720144149740906e-06, "loss": 0.156, "step": 7210 }, { "epoch": 2.39, "learning_rate": 2.0698308819665935e-06, "loss": 0.1481, "step": 7211 }, { "epoch": 2.39, "learning_rate": 2.067648367269488e-06, "loss": 0.1661, "step": 7212 }, { "epoch": 2.39, "learning_rate": 2.0654668711630345e-06, "loss": 0.1222, "step": 7213 }, { "epoch": 2.39, "learning_rate": 2.0632863939273574e-06, "loss": 0.1165, "step": 7214 }, { "epoch": 2.39, "learning_rate": 2.061106935842446e-06, "loss": 0.1311, "step": 7215 }, { "epoch": 2.39, "learning_rate": 2.0589284971881716e-06, "loss": 0.1063, "step": 7216 }, { "epoch": 2.4, "learning_rate": 2.0567510782442603e-06, "loss": 0.2145, "step": 7217 }, { "epoch": 2.4, "learning_rate": 2.054574679290322e-06, "loss": 0.1563, "step": 7218 }, { "epoch": 2.4, "learning_rate": 2.052399300605816e-06, "loss": 0.1247, "step": 7219 }, { "epoch": 2.4, "learning_rate": 2.0502249424700914e-06, "loss": 0.1313, "step": 7220 }, { "epoch": 2.4, "learning_rate": 2.048051605162352e-06, "loss": 0.0811, "step": 7221 }, { "epoch": 2.4, "learning_rate": 2.045879288961674e-06, "loss": 0.1523, "step": 7222 }, { "epoch": 2.4, "learning_rate": 2.0437079941470093e-06, "loss": 0.1766, "step": 7223 }, { "epoch": 2.4, "learning_rate": 2.0415377209971686e-06, "loss": 0.1108, "step": 7224 }, { "epoch": 2.4, "learning_rate": 2.0393684697908422e-06, "loss": 0.0954, "step": 7225 }, { "epoch": 2.4, "learning_rate": 2.037200240806573e-06, "loss": 0.1653, "step": 7226 }, { "epoch": 2.4, "learning_rate": 2.0350330343227896e-06, "loss": 0.1309, "step": 7227 }, { "epoch": 2.4, "learning_rate": 2.0328668506177783e-06, "loss": 0.1333, "step": 7228 }, { "epoch": 2.4, "learning_rate": 2.0307016899697028e-06, "loss": 0.1282, "step": 7229 }, { "epoch": 2.4, "learning_rate": 2.0285375526565886e-06, "loss": 0.0947, "step": 7230 }, { "epoch": 2.4, "learning_rate": 2.026374438956331e-06, "loss": 0.1194, "step": 7231 }, { "epoch": 2.4, "learning_rate": 2.0242123491466958e-06, "loss": 0.1364, "step": 7232 }, { "epoch": 2.4, "learning_rate": 2.022051283505313e-06, "loss": 0.0594, "step": 7233 }, { "epoch": 2.4, "learning_rate": 2.0198912423096885e-06, "loss": 0.1433, "step": 7234 }, { "epoch": 2.4, "learning_rate": 2.017732225837189e-06, "loss": 0.1189, "step": 7235 }, { "epoch": 2.4, "learning_rate": 2.0155742343650565e-06, "loss": 0.1043, "step": 7236 }, { "epoch": 2.4, "learning_rate": 2.0134172681703947e-06, "loss": 0.1656, "step": 7237 }, { "epoch": 2.4, "learning_rate": 2.011261327530185e-06, "loss": 0.1952, "step": 7238 }, { "epoch": 2.4, "learning_rate": 2.0091064127212625e-06, "loss": 0.117, "step": 7239 }, { "epoch": 2.4, "learning_rate": 2.006952524020346e-06, "loss": 0.0997, "step": 7240 }, { "epoch": 2.4, "learning_rate": 2.004799661704012e-06, "loss": 0.1556, "step": 7241 }, { "epoch": 2.4, "learning_rate": 2.002647826048707e-06, "loss": 0.0939, "step": 7242 }, { "epoch": 2.4, "learning_rate": 2.0004970173307535e-06, "loss": 0.1485, "step": 7243 }, { "epoch": 2.4, "learning_rate": 1.998347235826329e-06, "loss": 0.1182, "step": 7244 }, { "epoch": 2.4, "learning_rate": 1.9961984818114965e-06, "loss": 0.1507, "step": 7245 }, { "epoch": 2.4, "learning_rate": 1.9940507555621646e-06, "loss": 0.1373, "step": 7246 }, { "epoch": 2.41, "learning_rate": 1.9919040573541315e-06, "loss": 0.0981, "step": 7247 }, { "epoch": 2.41, "learning_rate": 1.9897583874630466e-06, "loss": 0.1652, "step": 7248 }, { "epoch": 2.41, "learning_rate": 1.9876137461644416e-06, "loss": 0.1047, "step": 7249 }, { "epoch": 2.41, "learning_rate": 1.9854701337337057e-06, "loss": 0.138, "step": 7250 }, { "epoch": 2.41, "learning_rate": 1.983327550446097e-06, "loss": 0.1846, "step": 7251 }, { "epoch": 2.41, "learning_rate": 1.9811859965767534e-06, "loss": 0.1075, "step": 7252 }, { "epoch": 2.41, "learning_rate": 1.979045472400658e-06, "loss": 0.1481, "step": 7253 }, { "epoch": 2.41, "learning_rate": 1.9769059781926834e-06, "loss": 0.1401, "step": 7254 }, { "epoch": 2.41, "learning_rate": 1.974767514227557e-06, "loss": 0.1885, "step": 7255 }, { "epoch": 2.41, "learning_rate": 1.972630080779884e-06, "loss": 0.1127, "step": 7256 }, { "epoch": 2.41, "learning_rate": 1.970493678124125e-06, "loss": 0.1114, "step": 7257 }, { "epoch": 2.41, "learning_rate": 1.968358306534619e-06, "loss": 0.1017, "step": 7258 }, { "epoch": 2.41, "learning_rate": 1.9662239662855677e-06, "loss": 0.1036, "step": 7259 }, { "epoch": 2.41, "learning_rate": 1.964090657651041e-06, "loss": 0.1282, "step": 7260 }, { "epoch": 2.41, "learning_rate": 1.9619583809049757e-06, "loss": 0.0885, "step": 7261 }, { "epoch": 2.41, "learning_rate": 1.959827136321174e-06, "loss": 0.1005, "step": 7262 }, { "epoch": 2.41, "learning_rate": 1.9576969241733133e-06, "loss": 0.147, "step": 7263 }, { "epoch": 2.41, "learning_rate": 1.9555677447349284e-06, "loss": 0.1579, "step": 7264 }, { "epoch": 2.41, "learning_rate": 1.953439598279433e-06, "loss": 0.1243, "step": 7265 }, { "epoch": 2.41, "learning_rate": 1.951312485080098e-06, "loss": 0.1316, "step": 7266 }, { "epoch": 2.41, "learning_rate": 1.949186405410065e-06, "loss": 0.1153, "step": 7267 }, { "epoch": 2.41, "learning_rate": 1.9470613595423405e-06, "loss": 0.1369, "step": 7268 }, { "epoch": 2.41, "learning_rate": 1.944937347749808e-06, "loss": 0.1831, "step": 7269 }, { "epoch": 2.41, "learning_rate": 1.942814370305206e-06, "loss": 0.1671, "step": 7270 }, { "epoch": 2.41, "learning_rate": 1.9406924274811436e-06, "loss": 0.1225, "step": 7271 }, { "epoch": 2.41, "learning_rate": 1.938571519550105e-06, "loss": 0.1434, "step": 7272 }, { "epoch": 2.41, "learning_rate": 1.936451646784432e-06, "loss": 0.148, "step": 7273 }, { "epoch": 2.41, "learning_rate": 1.934332809456336e-06, "loss": 0.1494, "step": 7274 }, { "epoch": 2.41, "learning_rate": 1.932215007837894e-06, "loss": 0.1332, "step": 7275 }, { "epoch": 2.41, "learning_rate": 1.9300982422010595e-06, "loss": 0.1084, "step": 7276 }, { "epoch": 2.42, "learning_rate": 1.9279825128176376e-06, "loss": 0.1431, "step": 7277 }, { "epoch": 2.42, "learning_rate": 1.925867819959314e-06, "loss": 0.0891, "step": 7278 }, { "epoch": 2.42, "learning_rate": 1.9237541638976355e-06, "loss": 0.2056, "step": 7279 }, { "epoch": 2.42, "learning_rate": 1.9216415449040127e-06, "loss": 0.1357, "step": 7280 }, { "epoch": 2.42, "learning_rate": 1.919529963249729e-06, "loss": 0.1328, "step": 7281 }, { "epoch": 2.42, "learning_rate": 1.917419419205927e-06, "loss": 0.1185, "step": 7282 }, { "epoch": 2.42, "learning_rate": 1.915309913043628e-06, "loss": 0.1103, "step": 7283 }, { "epoch": 2.42, "learning_rate": 1.913201445033708e-06, "loss": 0.1097, "step": 7284 }, { "epoch": 2.42, "learning_rate": 1.9110940154469172e-06, "loss": 0.1706, "step": 7285 }, { "epoch": 2.42, "learning_rate": 1.908987624553871e-06, "loss": 0.1631, "step": 7286 }, { "epoch": 2.42, "learning_rate": 1.906882272625047e-06, "loss": 0.1384, "step": 7287 }, { "epoch": 2.42, "learning_rate": 1.9047779599307926e-06, "loss": 0.0982, "step": 7288 }, { "epoch": 2.42, "learning_rate": 1.9026746867413259e-06, "loss": 0.1261, "step": 7289 }, { "epoch": 2.42, "learning_rate": 1.9005724533267256e-06, "loss": 0.1618, "step": 7290 }, { "epoch": 2.42, "learning_rate": 1.8984712599569344e-06, "loss": 0.1712, "step": 7291 }, { "epoch": 2.42, "learning_rate": 1.8963711069017743e-06, "loss": 0.1674, "step": 7292 }, { "epoch": 2.42, "learning_rate": 1.8942719944309162e-06, "loss": 0.1093, "step": 7293 }, { "epoch": 2.42, "learning_rate": 1.892173922813918e-06, "loss": 0.1359, "step": 7294 }, { "epoch": 2.42, "learning_rate": 1.8900768923201795e-06, "loss": 0.1284, "step": 7295 }, { "epoch": 2.42, "learning_rate": 1.8879809032189877e-06, "loss": 0.1293, "step": 7296 }, { "epoch": 2.42, "learning_rate": 1.885885955779486e-06, "loss": 0.1451, "step": 7297 }, { "epoch": 2.42, "learning_rate": 1.883792050270683e-06, "loss": 0.1494, "step": 7298 }, { "epoch": 2.42, "learning_rate": 1.881699186961462e-06, "loss": 0.1254, "step": 7299 }, { "epoch": 2.42, "learning_rate": 1.8796073661205616e-06, "loss": 0.1805, "step": 7300 }, { "epoch": 2.42, "learning_rate": 1.8775165880165992e-06, "loss": 0.2012, "step": 7301 }, { "epoch": 2.42, "learning_rate": 1.8754268529180409e-06, "loss": 0.1489, "step": 7302 }, { "epoch": 2.42, "learning_rate": 1.8733381610932354e-06, "loss": 0.157, "step": 7303 }, { "epoch": 2.42, "learning_rate": 1.8712505128103876e-06, "loss": 0.1252, "step": 7304 }, { "epoch": 2.42, "learning_rate": 1.8691639083375767e-06, "loss": 0.1561, "step": 7305 }, { "epoch": 2.42, "learning_rate": 1.867078347942739e-06, "loss": 0.1555, "step": 7306 }, { "epoch": 2.43, "learning_rate": 1.864993831893679e-06, "loss": 0.164, "step": 7307 }, { "epoch": 2.43, "learning_rate": 1.8629103604580734e-06, "loss": 0.1091, "step": 7308 }, { "epoch": 2.43, "learning_rate": 1.8608279339034585e-06, "loss": 0.1789, "step": 7309 }, { "epoch": 2.43, "learning_rate": 1.8587465524972371e-06, "loss": 0.1428, "step": 7310 }, { "epoch": 2.43, "learning_rate": 1.8566662165066752e-06, "loss": 0.1519, "step": 7311 }, { "epoch": 2.43, "learning_rate": 1.8545869261989157e-06, "loss": 0.1331, "step": 7312 }, { "epoch": 2.43, "learning_rate": 1.8525086818409522e-06, "loss": 0.1326, "step": 7313 }, { "epoch": 2.43, "learning_rate": 1.8504314836996594e-06, "loss": 0.1832, "step": 7314 }, { "epoch": 2.43, "learning_rate": 1.8483553320417614e-06, "loss": 0.1368, "step": 7315 }, { "epoch": 2.43, "learning_rate": 1.8462802271338608e-06, "loss": 0.1475, "step": 7316 }, { "epoch": 2.43, "learning_rate": 1.8442061692424207e-06, "loss": 0.1553, "step": 7317 }, { "epoch": 2.43, "learning_rate": 1.8421331586337675e-06, "loss": 0.1252, "step": 7318 }, { "epoch": 2.43, "learning_rate": 1.8400611955741e-06, "loss": 0.1476, "step": 7319 }, { "epoch": 2.43, "learning_rate": 1.8379902803294735e-06, "loss": 0.1349, "step": 7320 }, { "epoch": 2.43, "learning_rate": 1.8359204131658214e-06, "loss": 0.1471, "step": 7321 }, { "epoch": 2.43, "learning_rate": 1.8338515943489243e-06, "loss": 0.0914, "step": 7322 }, { "epoch": 2.43, "learning_rate": 1.8317838241444464e-06, "loss": 0.0879, "step": 7323 }, { "epoch": 2.43, "learning_rate": 1.829717102817905e-06, "loss": 0.125, "step": 7324 }, { "epoch": 2.43, "learning_rate": 1.8276514306346903e-06, "loss": 0.1285, "step": 7325 }, { "epoch": 2.43, "learning_rate": 1.8255868078600525e-06, "loss": 0.1109, "step": 7326 }, { "epoch": 2.43, "learning_rate": 1.8235232347591082e-06, "loss": 0.1605, "step": 7327 }, { "epoch": 2.43, "learning_rate": 1.8214607115968462e-06, "loss": 0.123, "step": 7328 }, { "epoch": 2.43, "learning_rate": 1.8193992386381054e-06, "loss": 0.1026, "step": 7329 }, { "epoch": 2.43, "learning_rate": 1.8173388161476058e-06, "loss": 0.0958, "step": 7330 }, { "epoch": 2.43, "learning_rate": 1.8152794443899202e-06, "loss": 0.1296, "step": 7331 }, { "epoch": 2.43, "learning_rate": 1.8132211236294972e-06, "loss": 0.1242, "step": 7332 }, { "epoch": 2.43, "learning_rate": 1.811163854130641e-06, "loss": 0.162, "step": 7333 }, { "epoch": 2.43, "learning_rate": 1.8091076361575277e-06, "loss": 0.0945, "step": 7334 }, { "epoch": 2.43, "learning_rate": 1.8070524699741953e-06, "loss": 0.1326, "step": 7335 }, { "epoch": 2.43, "learning_rate": 1.804998355844545e-06, "loss": 0.1024, "step": 7336 }, { "epoch": 2.44, "learning_rate": 1.8029452940323477e-06, "loss": 0.0749, "step": 7337 }, { "epoch": 2.44, "learning_rate": 1.8008932848012307e-06, "loss": 0.1077, "step": 7338 }, { "epoch": 2.44, "learning_rate": 1.7988423284146993e-06, "loss": 0.1667, "step": 7339 }, { "epoch": 2.44, "learning_rate": 1.7967924251361102e-06, "loss": 0.1102, "step": 7340 }, { "epoch": 2.44, "learning_rate": 1.7947435752286946e-06, "loss": 0.1686, "step": 7341 }, { "epoch": 2.44, "learning_rate": 1.7926957789555444e-06, "loss": 0.1379, "step": 7342 }, { "epoch": 2.44, "learning_rate": 1.7906490365796158e-06, "loss": 0.1016, "step": 7343 }, { "epoch": 2.44, "learning_rate": 1.7886033483637265e-06, "loss": 0.1423, "step": 7344 }, { "epoch": 2.44, "learning_rate": 1.7865587145705687e-06, "loss": 0.1214, "step": 7345 }, { "epoch": 2.44, "learning_rate": 1.78451513546269e-06, "loss": 0.1631, "step": 7346 }, { "epoch": 2.44, "learning_rate": 1.7824726113025048e-06, "loss": 0.1442, "step": 7347 }, { "epoch": 2.44, "learning_rate": 1.7804311423522959e-06, "loss": 0.1692, "step": 7348 }, { "epoch": 2.44, "learning_rate": 1.7783907288742064e-06, "loss": 0.1344, "step": 7349 }, { "epoch": 2.44, "learning_rate": 1.776351371130245e-06, "loss": 0.2016, "step": 7350 }, { "epoch": 2.44, "learning_rate": 1.7743130693822818e-06, "loss": 0.121, "step": 7351 }, { "epoch": 2.44, "learning_rate": 1.7722758238920602e-06, "loss": 0.1515, "step": 7352 }, { "epoch": 2.44, "learning_rate": 1.770239634921177e-06, "loss": 0.1381, "step": 7353 }, { "epoch": 2.44, "learning_rate": 1.7682045027311024e-06, "loss": 0.1586, "step": 7354 }, { "epoch": 2.44, "learning_rate": 1.766170427583167e-06, "loss": 0.1028, "step": 7355 }, { "epoch": 2.44, "learning_rate": 1.7641374097385645e-06, "loss": 0.1572, "step": 7356 }, { "epoch": 2.44, "learning_rate": 1.7621054494583535e-06, "loss": 0.1127, "step": 7357 }, { "epoch": 2.44, "learning_rate": 1.7600745470034542e-06, "loss": 0.1364, "step": 7358 }, { "epoch": 2.44, "learning_rate": 1.7580447026346613e-06, "loss": 0.1357, "step": 7359 }, { "epoch": 2.44, "learning_rate": 1.75601591661262e-06, "loss": 0.1505, "step": 7360 }, { "epoch": 2.44, "learning_rate": 1.7539881891978515e-06, "loss": 0.1482, "step": 7361 }, { "epoch": 2.44, "learning_rate": 1.7519615206507323e-06, "loss": 0.1596, "step": 7362 }, { "epoch": 2.44, "learning_rate": 1.7499359112315084e-06, "loss": 0.1758, "step": 7363 }, { "epoch": 2.44, "learning_rate": 1.7479113612002818e-06, "loss": 0.0933, "step": 7364 }, { "epoch": 2.44, "learning_rate": 1.7458878708170324e-06, "loss": 0.1563, "step": 7365 }, { "epoch": 2.44, "learning_rate": 1.7438654403415922e-06, "loss": 0.1305, "step": 7366 }, { "epoch": 2.45, "learning_rate": 1.741844070033658e-06, "loss": 0.136, "step": 7367 }, { "epoch": 2.45, "learning_rate": 1.7398237601527978e-06, "loss": 0.1468, "step": 7368 }, { "epoch": 2.45, "learning_rate": 1.737804510958435e-06, "loss": 0.1514, "step": 7369 }, { "epoch": 2.45, "learning_rate": 1.7357863227098692e-06, "loss": 0.1104, "step": 7370 }, { "epoch": 2.45, "learning_rate": 1.7337691956662429e-06, "loss": 0.1629, "step": 7371 }, { "epoch": 2.45, "learning_rate": 1.7317531300865832e-06, "loss": 0.1111, "step": 7372 }, { "epoch": 2.45, "learning_rate": 1.7297381262297687e-06, "loss": 0.112, "step": 7373 }, { "epoch": 2.45, "learning_rate": 1.7277241843545477e-06, "loss": 0.112, "step": 7374 }, { "epoch": 2.45, "learning_rate": 1.7257113047195295e-06, "loss": 0.125, "step": 7375 }, { "epoch": 2.45, "learning_rate": 1.7236994875831848e-06, "loss": 0.1915, "step": 7376 }, { "epoch": 2.45, "learning_rate": 1.7216887332038568e-06, "loss": 0.1356, "step": 7377 }, { "epoch": 2.45, "learning_rate": 1.7196790418397357e-06, "loss": 0.1534, "step": 7378 }, { "epoch": 2.45, "learning_rate": 1.7176704137488932e-06, "loss": 0.1591, "step": 7379 }, { "epoch": 2.45, "learning_rate": 1.7156628491892523e-06, "loss": 0.1463, "step": 7380 }, { "epoch": 2.45, "learning_rate": 1.7136563484186086e-06, "loss": 0.1104, "step": 7381 }, { "epoch": 2.45, "learning_rate": 1.7116509116946123e-06, "loss": 0.1385, "step": 7382 }, { "epoch": 2.45, "learning_rate": 1.7096465392747797e-06, "loss": 0.1057, "step": 7383 }, { "epoch": 2.45, "learning_rate": 1.7076432314164958e-06, "loss": 0.073, "step": 7384 }, { "epoch": 2.45, "learning_rate": 1.705640988377002e-06, "loss": 0.1176, "step": 7385 }, { "epoch": 2.45, "learning_rate": 1.7036398104134078e-06, "loss": 0.1553, "step": 7386 }, { "epoch": 2.45, "learning_rate": 1.7016396977826788e-06, "loss": 0.1161, "step": 7387 }, { "epoch": 2.45, "learning_rate": 1.699640650741654e-06, "loss": 0.11, "step": 7388 }, { "epoch": 2.45, "learning_rate": 1.6976426695470272e-06, "loss": 0.1152, "step": 7389 }, { "epoch": 2.45, "learning_rate": 1.6956457544553629e-06, "loss": 0.1014, "step": 7390 }, { "epoch": 2.45, "learning_rate": 1.6936499057230803e-06, "loss": 0.1701, "step": 7391 }, { "epoch": 2.45, "learning_rate": 1.691655123606466e-06, "loss": 0.1672, "step": 7392 }, { "epoch": 2.45, "learning_rate": 1.6896614083616713e-06, "loss": 0.1557, "step": 7393 }, { "epoch": 2.45, "learning_rate": 1.687668760244704e-06, "loss": 0.1313, "step": 7394 }, { "epoch": 2.45, "learning_rate": 1.6856771795114456e-06, "loss": 0.1528, "step": 7395 }, { "epoch": 2.45, "learning_rate": 1.6836866664176287e-06, "loss": 0.1105, "step": 7396 }, { "epoch": 2.46, "learning_rate": 1.681697221218862e-06, "loss": 0.1253, "step": 7397 }, { "epoch": 2.46, "learning_rate": 1.6797088441706e-06, "loss": 0.194, "step": 7398 }, { "epoch": 2.46, "learning_rate": 1.6777215355281762e-06, "loss": 0.1668, "step": 7399 }, { "epoch": 2.46, "learning_rate": 1.6757352955467754e-06, "loss": 0.0801, "step": 7400 }, { "epoch": 2.46, "learning_rate": 1.6737501244814557e-06, "loss": 0.1043, "step": 7401 }, { "epoch": 2.46, "learning_rate": 1.671766022587129e-06, "loss": 0.073, "step": 7402 }, { "epoch": 2.46, "learning_rate": 1.6697829901185702e-06, "loss": 0.1324, "step": 7403 }, { "epoch": 2.46, "learning_rate": 1.667801027330429e-06, "loss": 0.1462, "step": 7404 }, { "epoch": 2.46, "learning_rate": 1.6658201344771984e-06, "loss": 0.181, "step": 7405 }, { "epoch": 2.46, "learning_rate": 1.6638403118132495e-06, "loss": 0.1078, "step": 7406 }, { "epoch": 2.46, "learning_rate": 1.6618615595928088e-06, "loss": 0.1237, "step": 7407 }, { "epoch": 2.46, "learning_rate": 1.6598838780699689e-06, "loss": 0.1511, "step": 7408 }, { "epoch": 2.46, "learning_rate": 1.6579072674986818e-06, "loss": 0.1206, "step": 7409 }, { "epoch": 2.46, "learning_rate": 1.6559317281327648e-06, "loss": 0.1407, "step": 7410 }, { "epoch": 2.46, "learning_rate": 1.6539572602258968e-06, "loss": 0.1231, "step": 7411 }, { "epoch": 2.46, "learning_rate": 1.6519838640316165e-06, "loss": 0.0934, "step": 7412 }, { "epoch": 2.46, "learning_rate": 1.6500115398033279e-06, "loss": 0.1374, "step": 7413 }, { "epoch": 2.46, "learning_rate": 1.6480402877942936e-06, "loss": 0.1399, "step": 7414 }, { "epoch": 2.46, "learning_rate": 1.646070108257648e-06, "loss": 0.1826, "step": 7415 }, { "epoch": 2.46, "learning_rate": 1.6441010014463732e-06, "loss": 0.1372, "step": 7416 }, { "epoch": 2.46, "learning_rate": 1.6421329676133302e-06, "loss": 0.1597, "step": 7417 }, { "epoch": 2.46, "learning_rate": 1.6401660070112279e-06, "loss": 0.1465, "step": 7418 }, { "epoch": 2.46, "learning_rate": 1.6382001198926446e-06, "loss": 0.1338, "step": 7419 }, { "epoch": 2.46, "learning_rate": 1.6362353065100168e-06, "loss": 0.1644, "step": 7420 }, { "epoch": 2.46, "learning_rate": 1.6342715671156495e-06, "loss": 0.1569, "step": 7421 }, { "epoch": 2.46, "learning_rate": 1.6323089019617045e-06, "loss": 0.1435, "step": 7422 }, { "epoch": 2.46, "learning_rate": 1.6303473113002034e-06, "loss": 0.1194, "step": 7423 }, { "epoch": 2.46, "learning_rate": 1.6283867953830402e-06, "loss": 0.1162, "step": 7424 }, { "epoch": 2.46, "learning_rate": 1.6264273544619592e-06, "loss": 0.1812, "step": 7425 }, { "epoch": 2.46, "learning_rate": 1.6244689887885733e-06, "loss": 0.1399, "step": 7426 }, { "epoch": 2.46, "learning_rate": 1.622511698614353e-06, "loss": 0.1806, "step": 7427 }, { "epoch": 2.47, "learning_rate": 1.6205554841906367e-06, "loss": 0.1336, "step": 7428 }, { "epoch": 2.47, "learning_rate": 1.6186003457686184e-06, "loss": 0.1485, "step": 7429 }, { "epoch": 2.47, "learning_rate": 1.61664628359936e-06, "loss": 0.1392, "step": 7430 }, { "epoch": 2.47, "learning_rate": 1.6146932979337815e-06, "loss": 0.1438, "step": 7431 }, { "epoch": 2.47, "learning_rate": 1.612741389022663e-06, "loss": 0.1282, "step": 7432 }, { "epoch": 2.47, "learning_rate": 1.6107905571166494e-06, "loss": 0.1705, "step": 7433 }, { "epoch": 2.47, "learning_rate": 1.608840802466244e-06, "loss": 0.1424, "step": 7434 }, { "epoch": 2.47, "learning_rate": 1.6068921253218185e-06, "loss": 0.1452, "step": 7435 }, { "epoch": 2.47, "learning_rate": 1.604944525933597e-06, "loss": 0.1516, "step": 7436 }, { "epoch": 2.47, "learning_rate": 1.602998004551677e-06, "loss": 0.1284, "step": 7437 }, { "epoch": 2.47, "learning_rate": 1.6010525614260031e-06, "loss": 0.1326, "step": 7438 }, { "epoch": 2.47, "learning_rate": 1.5991081968063971e-06, "loss": 0.1478, "step": 7439 }, { "epoch": 2.47, "learning_rate": 1.5971649109425259e-06, "loss": 0.1627, "step": 7440 }, { "epoch": 2.47, "learning_rate": 1.5952227040839318e-06, "loss": 0.138, "step": 7441 }, { "epoch": 2.47, "learning_rate": 1.5932815764800114e-06, "loss": 0.1213, "step": 7442 }, { "epoch": 2.47, "learning_rate": 1.5913415283800215e-06, "loss": 0.131, "step": 7443 }, { "epoch": 2.47, "learning_rate": 1.5894025600330876e-06, "loss": 0.1632, "step": 7444 }, { "epoch": 2.47, "learning_rate": 1.587464671688187e-06, "loss": 0.1528, "step": 7445 }, { "epoch": 2.47, "learning_rate": 1.5855278635941719e-06, "loss": 0.197, "step": 7446 }, { "epoch": 2.47, "learning_rate": 1.5835921359997364e-06, "loss": 0.1115, "step": 7447 }, { "epoch": 2.47, "learning_rate": 1.581657489153454e-06, "loss": 0.1427, "step": 7448 }, { "epoch": 2.47, "learning_rate": 1.5797239233037477e-06, "loss": 0.1442, "step": 7449 }, { "epoch": 2.47, "learning_rate": 1.5777914386989102e-06, "loss": 0.0679, "step": 7450 }, { "epoch": 2.47, "learning_rate": 1.575860035587089e-06, "loss": 0.1168, "step": 7451 }, { "epoch": 2.47, "learning_rate": 1.5739297142162923e-06, "loss": 0.0799, "step": 7452 }, { "epoch": 2.47, "learning_rate": 1.572000474834401e-06, "loss": 0.1258, "step": 7453 }, { "epoch": 2.47, "learning_rate": 1.5700723176891353e-06, "loss": 0.1532, "step": 7454 }, { "epoch": 2.47, "learning_rate": 1.5681452430280997e-06, "loss": 0.1876, "step": 7455 }, { "epoch": 2.47, "learning_rate": 1.5662192510987418e-06, "loss": 0.1177, "step": 7456 }, { "epoch": 2.47, "learning_rate": 1.5642943421483837e-06, "loss": 0.1071, "step": 7457 }, { "epoch": 2.48, "learning_rate": 1.562370516424201e-06, "loss": 0.1232, "step": 7458 }, { "epoch": 2.48, "learning_rate": 1.560447774173226e-06, "loss": 0.0923, "step": 7459 }, { "epoch": 2.48, "learning_rate": 1.5585261156423648e-06, "loss": 0.1147, "step": 7460 }, { "epoch": 2.48, "learning_rate": 1.5566055410783742e-06, "loss": 0.1348, "step": 7461 }, { "epoch": 2.48, "learning_rate": 1.5546860507278738e-06, "loss": 0.1666, "step": 7462 }, { "epoch": 2.48, "learning_rate": 1.552767644837343e-06, "loss": 0.1424, "step": 7463 }, { "epoch": 2.48, "learning_rate": 1.550850323653128e-06, "loss": 0.1468, "step": 7464 }, { "epoch": 2.48, "learning_rate": 1.5489340874214264e-06, "loss": 0.0859, "step": 7465 }, { "epoch": 2.48, "learning_rate": 1.547018936388307e-06, "loss": 0.0923, "step": 7466 }, { "epoch": 2.48, "learning_rate": 1.5451048707996907e-06, "loss": 0.1387, "step": 7467 }, { "epoch": 2.48, "learning_rate": 1.5431918909013622e-06, "loss": 0.1528, "step": 7468 }, { "epoch": 2.48, "learning_rate": 1.5412799969389657e-06, "loss": 0.1918, "step": 7469 }, { "epoch": 2.48, "learning_rate": 1.5393691891580066e-06, "loss": 0.115, "step": 7470 }, { "epoch": 2.48, "learning_rate": 1.5374594678038545e-06, "loss": 0.1308, "step": 7471 }, { "epoch": 2.48, "learning_rate": 1.5355508331217305e-06, "loss": 0.1553, "step": 7472 }, { "epoch": 2.48, "learning_rate": 1.5336432853567307e-06, "loss": 0.1566, "step": 7473 }, { "epoch": 2.48, "learning_rate": 1.5317368247537922e-06, "loss": 0.1351, "step": 7474 }, { "epoch": 2.48, "learning_rate": 1.5298314515577305e-06, "loss": 0.1485, "step": 7475 }, { "epoch": 2.48, "learning_rate": 1.527927166013209e-06, "loss": 0.1656, "step": 7476 }, { "epoch": 2.48, "learning_rate": 1.5260239683647616e-06, "loss": 0.176, "step": 7477 }, { "epoch": 2.48, "learning_rate": 1.524121858856773e-06, "loss": 0.1362, "step": 7478 }, { "epoch": 2.48, "learning_rate": 1.5222208377334923e-06, "loss": 0.1865, "step": 7479 }, { "epoch": 2.48, "learning_rate": 1.5203209052390344e-06, "loss": 0.1484, "step": 7480 }, { "epoch": 2.48, "learning_rate": 1.5184220616173617e-06, "loss": 0.1376, "step": 7481 }, { "epoch": 2.48, "learning_rate": 1.5165243071123091e-06, "loss": 0.0929, "step": 7482 }, { "epoch": 2.48, "learning_rate": 1.5146276419675633e-06, "loss": 0.1286, "step": 7483 }, { "epoch": 2.48, "learning_rate": 1.5127320664266776e-06, "loss": 0.1469, "step": 7484 }, { "epoch": 2.48, "learning_rate": 1.510837580733059e-06, "loss": 0.1649, "step": 7485 }, { "epoch": 2.48, "learning_rate": 1.5089441851299814e-06, "loss": 0.0886, "step": 7486 }, { "epoch": 2.48, "learning_rate": 1.5070518798605738e-06, "loss": 0.1417, "step": 7487 }, { "epoch": 2.49, "learning_rate": 1.5051606651678264e-06, "loss": 0.1375, "step": 7488 }, { "epoch": 2.49, "learning_rate": 1.5032705412945882e-06, "loss": 0.0853, "step": 7489 }, { "epoch": 2.49, "learning_rate": 1.5013815084835681e-06, "loss": 0.15, "step": 7490 }, { "epoch": 2.49, "learning_rate": 1.4994935669773402e-06, "loss": 0.1473, "step": 7491 }, { "epoch": 2.49, "learning_rate": 1.4976067170183317e-06, "loss": 0.1209, "step": 7492 }, { "epoch": 2.49, "learning_rate": 1.495720958848834e-06, "loss": 0.1407, "step": 7493 }, { "epoch": 2.49, "learning_rate": 1.4938362927109962e-06, "loss": 0.1141, "step": 7494 }, { "epoch": 2.49, "learning_rate": 1.4919527188468285e-06, "loss": 0.1023, "step": 7495 }, { "epoch": 2.49, "learning_rate": 1.4900702374981956e-06, "loss": 0.1319, "step": 7496 }, { "epoch": 2.49, "learning_rate": 1.488188848906832e-06, "loss": 0.149, "step": 7497 }, { "epoch": 2.49, "learning_rate": 1.486308553314324e-06, "loss": 0.1516, "step": 7498 }, { "epoch": 2.49, "learning_rate": 1.4844293509621167e-06, "loss": 0.1454, "step": 7499 }, { "epoch": 2.49, "learning_rate": 1.4825512420915233e-06, "loss": 0.1028, "step": 7500 }, { "epoch": 2.49, "learning_rate": 1.480674226943708e-06, "loss": 0.1532, "step": 7501 }, { "epoch": 2.49, "learning_rate": 1.4787983057596988e-06, "loss": 0.0895, "step": 7502 }, { "epoch": 2.49, "learning_rate": 1.4769234787803787e-06, "loss": 0.1133, "step": 7503 }, { "epoch": 2.49, "learning_rate": 1.4750497462464985e-06, "loss": 0.1657, "step": 7504 }, { "epoch": 2.49, "learning_rate": 1.4731771083986602e-06, "loss": 0.0998, "step": 7505 }, { "epoch": 2.49, "learning_rate": 1.4713055654773312e-06, "loss": 0.1398, "step": 7506 }, { "epoch": 2.49, "learning_rate": 1.469435117722835e-06, "loss": 0.0919, "step": 7507 }, { "epoch": 2.49, "learning_rate": 1.467565765375354e-06, "loss": 0.1211, "step": 7508 }, { "epoch": 2.49, "learning_rate": 1.4656975086749326e-06, "loss": 0.0638, "step": 7509 }, { "epoch": 2.49, "learning_rate": 1.463830347861469e-06, "loss": 0.1218, "step": 7510 }, { "epoch": 2.49, "learning_rate": 1.4619642831747305e-06, "loss": 0.1748, "step": 7511 }, { "epoch": 2.49, "learning_rate": 1.460099314854333e-06, "loss": 0.1445, "step": 7512 }, { "epoch": 2.49, "learning_rate": 1.4582354431397604e-06, "loss": 0.0886, "step": 7513 }, { "epoch": 2.49, "learning_rate": 1.4563726682703482e-06, "loss": 0.1707, "step": 7514 }, { "epoch": 2.49, "learning_rate": 1.4545109904853006e-06, "loss": 0.1263, "step": 7515 }, { "epoch": 2.49, "learning_rate": 1.4526504100236671e-06, "loss": 0.1096, "step": 7516 }, { "epoch": 2.49, "learning_rate": 1.4507909271243702e-06, "loss": 0.1645, "step": 7517 }, { "epoch": 2.5, "learning_rate": 1.4489325420261836e-06, "loss": 0.1255, "step": 7518 }, { "epoch": 2.5, "learning_rate": 1.447075254967739e-06, "loss": 0.1388, "step": 7519 }, { "epoch": 2.5, "learning_rate": 1.4452190661875353e-06, "loss": 0.1465, "step": 7520 }, { "epoch": 2.5, "learning_rate": 1.4433639759239203e-06, "loss": 0.1575, "step": 7521 }, { "epoch": 2.5, "learning_rate": 1.4415099844151125e-06, "loss": 0.0608, "step": 7522 }, { "epoch": 2.5, "learning_rate": 1.4396570918991725e-06, "loss": 0.0907, "step": 7523 }, { "epoch": 2.5, "learning_rate": 1.4378052986140379e-06, "loss": 0.1226, "step": 7524 }, { "epoch": 2.5, "learning_rate": 1.4359546047974904e-06, "loss": 0.101, "step": 7525 }, { "epoch": 2.5, "learning_rate": 1.4341050106871834e-06, "loss": 0.109, "step": 7526 }, { "epoch": 2.5, "learning_rate": 1.4322565165206193e-06, "loss": 0.1486, "step": 7527 }, { "epoch": 2.5, "learning_rate": 1.4304091225351602e-06, "loss": 0.1941, "step": 7528 }, { "epoch": 2.5, "learning_rate": 1.4285628289680365e-06, "loss": 0.1752, "step": 7529 }, { "epoch": 2.5, "learning_rate": 1.4267176360563218e-06, "loss": 0.1373, "step": 7530 }, { "epoch": 2.5, "learning_rate": 1.424873544036962e-06, "loss": 0.1409, "step": 7531 }, { "epoch": 2.5, "learning_rate": 1.4230305531467537e-06, "loss": 0.184, "step": 7532 }, { "epoch": 2.5, "learning_rate": 1.4211886636223571e-06, "loss": 0.2165, "step": 7533 }, { "epoch": 2.5, "learning_rate": 1.4193478757002888e-06, "loss": 0.1694, "step": 7534 }, { "epoch": 2.5, "learning_rate": 1.4175081896169197e-06, "loss": 0.0718, "step": 7535 }, { "epoch": 2.5, "learning_rate": 1.4156696056084873e-06, "loss": 0.1443, "step": 7536 }, { "epoch": 2.5, "learning_rate": 1.4138321239110831e-06, "loss": 0.1181, "step": 7537 }, { "epoch": 2.5, "learning_rate": 1.411995744760657e-06, "loss": 0.0933, "step": 7538 }, { "epoch": 2.5, "learning_rate": 1.4101604683930148e-06, "loss": 0.1183, "step": 7539 }, { "epoch": 2.5, "learning_rate": 1.4083262950438292e-06, "loss": 0.1549, "step": 7540 }, { "epoch": 2.5, "learning_rate": 1.4064932249486197e-06, "loss": 0.1366, "step": 7541 }, { "epoch": 2.5, "learning_rate": 1.4046612583427777e-06, "loss": 0.1391, "step": 7542 }, { "epoch": 2.5, "learning_rate": 1.4028303954615398e-06, "loss": 0.1022, "step": 7543 }, { "epoch": 2.5, "learning_rate": 1.4010006365400087e-06, "loss": 0.1892, "step": 7544 }, { "epoch": 2.5, "learning_rate": 1.399171981813142e-06, "loss": 0.1007, "step": 7545 }, { "epoch": 2.5, "learning_rate": 1.397344431515756e-06, "loss": 0.1598, "step": 7546 }, { "epoch": 2.5, "learning_rate": 1.3955179858825274e-06, "loss": 0.1243, "step": 7547 }, { "epoch": 2.51, "learning_rate": 1.393692645147988e-06, "loss": 0.1481, "step": 7548 }, { "epoch": 2.51, "learning_rate": 1.391868409546533e-06, "loss": 0.1729, "step": 7549 }, { "epoch": 2.51, "learning_rate": 1.3900452793124074e-06, "loss": 0.1086, "step": 7550 }, { "epoch": 2.51, "learning_rate": 1.3882232546797225e-06, "loss": 0.1403, "step": 7551 }, { "epoch": 2.51, "learning_rate": 1.3864023358824386e-06, "loss": 0.0816, "step": 7552 }, { "epoch": 2.51, "learning_rate": 1.3845825231543841e-06, "loss": 0.1633, "step": 7553 }, { "epoch": 2.51, "learning_rate": 1.38276381672924e-06, "loss": 0.1572, "step": 7554 }, { "epoch": 2.51, "learning_rate": 1.3809462168405418e-06, "loss": 0.13, "step": 7555 }, { "epoch": 2.51, "learning_rate": 1.3791297237216928e-06, "loss": 0.1849, "step": 7556 }, { "epoch": 2.51, "learning_rate": 1.377314337605945e-06, "loss": 0.105, "step": 7557 }, { "epoch": 2.51, "learning_rate": 1.3755000587264123e-06, "loss": 0.1276, "step": 7558 }, { "epoch": 2.51, "learning_rate": 1.3736868873160626e-06, "loss": 0.1094, "step": 7559 }, { "epoch": 2.51, "learning_rate": 1.3718748236077294e-06, "loss": 0.0932, "step": 7560 }, { "epoch": 2.51, "learning_rate": 1.3700638678340949e-06, "loss": 0.1367, "step": 7561 }, { "epoch": 2.51, "learning_rate": 1.3682540202277085e-06, "loss": 0.1323, "step": 7562 }, { "epoch": 2.51, "learning_rate": 1.3664452810209683e-06, "loss": 0.1792, "step": 7563 }, { "epoch": 2.51, "learning_rate": 1.3646376504461346e-06, "loss": 0.1479, "step": 7564 }, { "epoch": 2.51, "learning_rate": 1.3628311287353247e-06, "loss": 0.1046, "step": 7565 }, { "epoch": 2.51, "learning_rate": 1.3610257161205121e-06, "loss": 0.1342, "step": 7566 }, { "epoch": 2.51, "learning_rate": 1.3592214128335323e-06, "loss": 0.1242, "step": 7567 }, { "epoch": 2.51, "learning_rate": 1.3574182191060725e-06, "loss": 0.1215, "step": 7568 }, { "epoch": 2.51, "learning_rate": 1.355616135169683e-06, "loss": 0.1131, "step": 7569 }, { "epoch": 2.51, "learning_rate": 1.353815161255766e-06, "loss": 0.1834, "step": 7570 }, { "epoch": 2.51, "learning_rate": 1.3520152975955868e-06, "loss": 0.1096, "step": 7571 }, { "epoch": 2.51, "learning_rate": 1.3502165444202608e-06, "loss": 0.1606, "step": 7572 }, { "epoch": 2.51, "learning_rate": 1.3484189019607697e-06, "loss": 0.0977, "step": 7573 }, { "epoch": 2.51, "learning_rate": 1.3466223704479474e-06, "loss": 0.1257, "step": 7574 }, { "epoch": 2.51, "learning_rate": 1.3448269501124834e-06, "loss": 0.1586, "step": 7575 }, { "epoch": 2.51, "learning_rate": 1.3430326411849304e-06, "loss": 0.1431, "step": 7576 }, { "epoch": 2.51, "learning_rate": 1.3412394438956933e-06, "loss": 0.0942, "step": 7577 }, { "epoch": 2.52, "learning_rate": 1.3394473584750357e-06, "loss": 0.1081, "step": 7578 }, { "epoch": 2.52, "learning_rate": 1.3376563851530777e-06, "loss": 0.1291, "step": 7579 }, { "epoch": 2.52, "learning_rate": 1.3358665241598e-06, "loss": 0.1158, "step": 7580 }, { "epoch": 2.52, "learning_rate": 1.3340777757250357e-06, "loss": 0.1007, "step": 7581 }, { "epoch": 2.52, "learning_rate": 1.3322901400784816e-06, "loss": 0.1292, "step": 7582 }, { "epoch": 2.52, "learning_rate": 1.3305036174496833e-06, "loss": 0.1134, "step": 7583 }, { "epoch": 2.52, "learning_rate": 1.3287182080680494e-06, "loss": 0.1158, "step": 7584 }, { "epoch": 2.52, "learning_rate": 1.3269339121628422e-06, "loss": 0.2047, "step": 7585 }, { "epoch": 2.52, "learning_rate": 1.3251507299631827e-06, "loss": 0.115, "step": 7586 }, { "epoch": 2.52, "learning_rate": 1.32336866169805e-06, "loss": 0.1502, "step": 7587 }, { "epoch": 2.52, "learning_rate": 1.321587707596278e-06, "loss": 0.1425, "step": 7588 }, { "epoch": 2.52, "learning_rate": 1.31980786788656e-06, "loss": 0.1152, "step": 7589 }, { "epoch": 2.52, "learning_rate": 1.3180291427974412e-06, "loss": 0.1307, "step": 7590 }, { "epoch": 2.52, "learning_rate": 1.3162515325573332e-06, "loss": 0.1264, "step": 7591 }, { "epoch": 2.52, "learning_rate": 1.3144750373944915e-06, "loss": 0.1345, "step": 7592 }, { "epoch": 2.52, "learning_rate": 1.31269965753704e-06, "loss": 0.1105, "step": 7593 }, { "epoch": 2.52, "learning_rate": 1.3109253932129529e-06, "loss": 0.1234, "step": 7594 }, { "epoch": 2.52, "learning_rate": 1.3091522446500604e-06, "loss": 0.1301, "step": 7595 }, { "epoch": 2.52, "learning_rate": 1.307380212076056e-06, "loss": 0.1152, "step": 7596 }, { "epoch": 2.52, "learning_rate": 1.305609295718483e-06, "loss": 0.1377, "step": 7597 }, { "epoch": 2.52, "learning_rate": 1.3038394958047496e-06, "loss": 0.1379, "step": 7598 }, { "epoch": 2.52, "learning_rate": 1.3020708125621063e-06, "loss": 0.1755, "step": 7599 }, { "epoch": 2.52, "learning_rate": 1.3003032462176756e-06, "loss": 0.1227, "step": 7600 }, { "epoch": 2.52, "learning_rate": 1.2985367969984276e-06, "loss": 0.1987, "step": 7601 }, { "epoch": 2.52, "learning_rate": 1.2967714651311935e-06, "loss": 0.1199, "step": 7602 }, { "epoch": 2.52, "learning_rate": 1.2950072508426582e-06, "loss": 0.1646, "step": 7603 }, { "epoch": 2.52, "learning_rate": 1.29324415435936e-06, "loss": 0.1481, "step": 7604 }, { "epoch": 2.52, "learning_rate": 1.2914821759077068e-06, "loss": 0.2095, "step": 7605 }, { "epoch": 2.52, "learning_rate": 1.289721315713942e-06, "loss": 0.1314, "step": 7606 }, { "epoch": 2.52, "learning_rate": 1.2879615740041862e-06, "loss": 0.1499, "step": 7607 }, { "epoch": 2.53, "learning_rate": 1.2862029510044017e-06, "loss": 0.1275, "step": 7608 }, { "epoch": 2.53, "learning_rate": 1.2844454469404167e-06, "loss": 0.1318, "step": 7609 }, { "epoch": 2.53, "learning_rate": 1.2826890620379095e-06, "loss": 0.1477, "step": 7610 }, { "epoch": 2.53, "learning_rate": 1.2809337965224155e-06, "loss": 0.1362, "step": 7611 }, { "epoch": 2.53, "learning_rate": 1.2791796506193321e-06, "loss": 0.0889, "step": 7612 }, { "epoch": 2.53, "learning_rate": 1.2774266245539058e-06, "loss": 0.1341, "step": 7613 }, { "epoch": 2.53, "learning_rate": 1.275674718551243e-06, "loss": 0.1093, "step": 7614 }, { "epoch": 2.53, "learning_rate": 1.2739239328363019e-06, "loss": 0.1382, "step": 7615 }, { "epoch": 2.53, "learning_rate": 1.2721742676339055e-06, "loss": 0.1893, "step": 7616 }, { "epoch": 2.53, "learning_rate": 1.2704257231687234e-06, "loss": 0.1548, "step": 7617 }, { "epoch": 2.53, "learning_rate": 1.268678299665289e-06, "loss": 0.1541, "step": 7618 }, { "epoch": 2.53, "learning_rate": 1.2669319973479876e-06, "loss": 0.094, "step": 7619 }, { "epoch": 2.53, "learning_rate": 1.2651868164410609e-06, "loss": 0.0918, "step": 7620 }, { "epoch": 2.53, "learning_rate": 1.2634427571686047e-06, "loss": 0.0761, "step": 7621 }, { "epoch": 2.53, "learning_rate": 1.2616998197545772e-06, "loss": 0.1057, "step": 7622 }, { "epoch": 2.53, "learning_rate": 1.2599580044227867e-06, "loss": 0.1807, "step": 7623 }, { "epoch": 2.53, "learning_rate": 1.2582173113968954e-06, "loss": 0.09, "step": 7624 }, { "epoch": 2.53, "learning_rate": 1.2564777409004315e-06, "loss": 0.1444, "step": 7625 }, { "epoch": 2.53, "learning_rate": 1.2547392931567703e-06, "loss": 0.1408, "step": 7626 }, { "epoch": 2.53, "learning_rate": 1.2530019683891437e-06, "loss": 0.0823, "step": 7627 }, { "epoch": 2.53, "learning_rate": 1.2512657668206408e-06, "loss": 0.1754, "step": 7628 }, { "epoch": 2.53, "learning_rate": 1.2495306886742099e-06, "loss": 0.0943, "step": 7629 }, { "epoch": 2.53, "learning_rate": 1.2477967341726494e-06, "loss": 0.1765, "step": 7630 }, { "epoch": 2.53, "learning_rate": 1.2460639035386135e-06, "loss": 0.1259, "step": 7631 }, { "epoch": 2.53, "learning_rate": 1.2443321969946198e-06, "loss": 0.188, "step": 7632 }, { "epoch": 2.53, "learning_rate": 1.2426016147630339e-06, "loss": 0.1461, "step": 7633 }, { "epoch": 2.53, "learning_rate": 1.2408721570660787e-06, "loss": 0.1672, "step": 7634 }, { "epoch": 2.53, "learning_rate": 1.2391438241258313e-06, "loss": 0.0763, "step": 7635 }, { "epoch": 2.53, "learning_rate": 1.237416616164231e-06, "loss": 0.1546, "step": 7636 }, { "epoch": 2.53, "learning_rate": 1.2356905334030623e-06, "loss": 0.1156, "step": 7637 }, { "epoch": 2.54, "learning_rate": 1.2339655760639768e-06, "loss": 0.1624, "step": 7638 }, { "epoch": 2.54, "learning_rate": 1.2322417443684742e-06, "loss": 0.1881, "step": 7639 }, { "epoch": 2.54, "learning_rate": 1.2305190385379084e-06, "loss": 0.1384, "step": 7640 }, { "epoch": 2.54, "learning_rate": 1.2287974587934936e-06, "loss": 0.1562, "step": 7641 }, { "epoch": 2.54, "learning_rate": 1.2270770053562953e-06, "loss": 0.1752, "step": 7642 }, { "epoch": 2.54, "learning_rate": 1.22535767844724e-06, "loss": 0.1269, "step": 7643 }, { "epoch": 2.54, "learning_rate": 1.2236394782871008e-06, "loss": 0.1171, "step": 7644 }, { "epoch": 2.54, "learning_rate": 1.2219224050965162e-06, "loss": 0.1708, "step": 7645 }, { "epoch": 2.54, "learning_rate": 1.220206459095974e-06, "loss": 0.1458, "step": 7646 }, { "epoch": 2.54, "learning_rate": 1.2184916405058155e-06, "loss": 0.168, "step": 7647 }, { "epoch": 2.54, "learning_rate": 1.2167779495462417e-06, "loss": 0.1378, "step": 7648 }, { "epoch": 2.54, "learning_rate": 1.2150653864373075e-06, "loss": 0.088, "step": 7649 }, { "epoch": 2.54, "learning_rate": 1.2133539513989233e-06, "loss": 0.1135, "step": 7650 }, { "epoch": 2.54, "learning_rate": 1.2116436446508494e-06, "loss": 0.09, "step": 7651 }, { "epoch": 2.54, "learning_rate": 1.209934466412712e-06, "loss": 0.1722, "step": 7652 }, { "epoch": 2.54, "learning_rate": 1.2082264169039837e-06, "loss": 0.1382, "step": 7653 }, { "epoch": 2.54, "learning_rate": 1.206519496343993e-06, "loss": 0.0629, "step": 7654 }, { "epoch": 2.54, "learning_rate": 1.2048137049519249e-06, "loss": 0.1583, "step": 7655 }, { "epoch": 2.54, "learning_rate": 1.203109042946823e-06, "loss": 0.1366, "step": 7656 }, { "epoch": 2.54, "learning_rate": 1.2014055105475764e-06, "loss": 0.1419, "step": 7657 }, { "epoch": 2.54, "learning_rate": 1.1997031079729426e-06, "loss": 0.1818, "step": 7658 }, { "epoch": 2.54, "learning_rate": 1.1980018354415212e-06, "loss": 0.1553, "step": 7659 }, { "epoch": 2.54, "learning_rate": 1.1963016931717753e-06, "loss": 0.1605, "step": 7660 }, { "epoch": 2.54, "learning_rate": 1.1946026813820166e-06, "loss": 0.1482, "step": 7661 }, { "epoch": 2.54, "learning_rate": 1.1929048002904142e-06, "loss": 0.1524, "step": 7662 }, { "epoch": 2.54, "learning_rate": 1.191208050114997e-06, "loss": 0.1309, "step": 7663 }, { "epoch": 2.54, "learning_rate": 1.1895124310736384e-06, "loss": 0.1388, "step": 7664 }, { "epoch": 2.54, "learning_rate": 1.1878179433840776e-06, "loss": 0.1076, "step": 7665 }, { "epoch": 2.54, "learning_rate": 1.1861245872638983e-06, "loss": 0.1229, "step": 7666 }, { "epoch": 2.54, "learning_rate": 1.184432362930551e-06, "loss": 0.1086, "step": 7667 }, { "epoch": 2.54, "learning_rate": 1.182741270601324e-06, "loss": 0.1258, "step": 7668 }, { "epoch": 2.55, "learning_rate": 1.1810513104933774e-06, "loss": 0.0886, "step": 7669 }, { "epoch": 2.55, "learning_rate": 1.179362482823717e-06, "loss": 0.0947, "step": 7670 }, { "epoch": 2.55, "learning_rate": 1.1776747878091999e-06, "loss": 0.1166, "step": 7671 }, { "epoch": 2.55, "learning_rate": 1.1759882256665489e-06, "loss": 0.1412, "step": 7672 }, { "epoch": 2.55, "learning_rate": 1.17430279661233e-06, "loss": 0.1342, "step": 7673 }, { "epoch": 2.55, "learning_rate": 1.1726185008629743e-06, "loss": 0.1687, "step": 7674 }, { "epoch": 2.55, "learning_rate": 1.1709353386347544e-06, "loss": 0.1217, "step": 7675 }, { "epoch": 2.55, "learning_rate": 1.1692533101438097e-06, "loss": 0.0782, "step": 7676 }, { "epoch": 2.55, "learning_rate": 1.1675724156061262e-06, "loss": 0.1643, "step": 7677 }, { "epoch": 2.55, "learning_rate": 1.1658926552375494e-06, "loss": 0.1125, "step": 7678 }, { "epoch": 2.55, "learning_rate": 1.1642140292537763e-06, "loss": 0.1916, "step": 7679 }, { "epoch": 2.55, "learning_rate": 1.1625365378703558e-06, "loss": 0.1488, "step": 7680 }, { "epoch": 2.55, "learning_rate": 1.1608601813026998e-06, "loss": 0.2092, "step": 7681 }, { "epoch": 2.55, "learning_rate": 1.1591849597660621e-06, "loss": 0.1454, "step": 7682 }, { "epoch": 2.55, "learning_rate": 1.1575108734755614e-06, "loss": 0.1383, "step": 7683 }, { "epoch": 2.55, "learning_rate": 1.1558379226461635e-06, "loss": 0.1667, "step": 7684 }, { "epoch": 2.55, "learning_rate": 1.1541661074926959e-06, "loss": 0.1099, "step": 7685 }, { "epoch": 2.55, "learning_rate": 1.1524954282298317e-06, "loss": 0.1002, "step": 7686 }, { "epoch": 2.55, "learning_rate": 1.1508258850721054e-06, "loss": 0.1432, "step": 7687 }, { "epoch": 2.55, "learning_rate": 1.149157478233901e-06, "loss": 0.14, "step": 7688 }, { "epoch": 2.55, "learning_rate": 1.1474902079294592e-06, "loss": 0.0969, "step": 7689 }, { "epoch": 2.55, "learning_rate": 1.1458240743728721e-06, "loss": 0.1953, "step": 7690 }, { "epoch": 2.55, "learning_rate": 1.1441590777780854e-06, "loss": 0.1522, "step": 7691 }, { "epoch": 2.55, "learning_rate": 1.1424952183589055e-06, "loss": 0.0975, "step": 7692 }, { "epoch": 2.55, "learning_rate": 1.140832496328983e-06, "loss": 0.1441, "step": 7693 }, { "epoch": 2.55, "learning_rate": 1.1391709119018335e-06, "loss": 0.1521, "step": 7694 }, { "epoch": 2.55, "learning_rate": 1.1375104652908163e-06, "loss": 0.1326, "step": 7695 }, { "epoch": 2.55, "learning_rate": 1.1358511567091502e-06, "loss": 0.0837, "step": 7696 }, { "epoch": 2.55, "learning_rate": 1.1341929863699042e-06, "loss": 0.1382, "step": 7697 }, { "epoch": 2.55, "learning_rate": 1.1325359544860071e-06, "loss": 0.1282, "step": 7698 }, { "epoch": 2.56, "learning_rate": 1.1308800612702353e-06, "loss": 0.0901, "step": 7699 }, { "epoch": 2.56, "learning_rate": 1.12922530693522e-06, "loss": 0.0808, "step": 7700 }, { "epoch": 2.56, "learning_rate": 1.127571691693453e-06, "loss": 0.119, "step": 7701 }, { "epoch": 2.56, "learning_rate": 1.1259192157572708e-06, "loss": 0.1682, "step": 7702 }, { "epoch": 2.56, "learning_rate": 1.1242678793388672e-06, "loss": 0.1481, "step": 7703 }, { "epoch": 2.56, "learning_rate": 1.1226176826502888e-06, "loss": 0.1448, "step": 7704 }, { "epoch": 2.56, "learning_rate": 1.1209686259034402e-06, "loss": 0.1382, "step": 7705 }, { "epoch": 2.56, "learning_rate": 1.1193207093100745e-06, "loss": 0.1912, "step": 7706 }, { "epoch": 2.56, "learning_rate": 1.1176739330817977e-06, "loss": 0.1434, "step": 7707 }, { "epoch": 2.56, "learning_rate": 1.1160282974300761e-06, "loss": 0.118, "step": 7708 }, { "epoch": 2.56, "learning_rate": 1.1143838025662235e-06, "loss": 0.1674, "step": 7709 }, { "epoch": 2.56, "learning_rate": 1.1127404487014082e-06, "loss": 0.177, "step": 7710 }, { "epoch": 2.56, "learning_rate": 1.1110982360466516e-06, "loss": 0.1035, "step": 7711 }, { "epoch": 2.56, "learning_rate": 1.1094571648128327e-06, "loss": 0.1547, "step": 7712 }, { "epoch": 2.56, "learning_rate": 1.1078172352106786e-06, "loss": 0.1407, "step": 7713 }, { "epoch": 2.56, "learning_rate": 1.1061784474507742e-06, "loss": 0.1282, "step": 7714 }, { "epoch": 2.56, "learning_rate": 1.1045408017435544e-06, "loss": 0.0999, "step": 7715 }, { "epoch": 2.56, "learning_rate": 1.1029042982993088e-06, "loss": 0.1468, "step": 7716 }, { "epoch": 2.56, "learning_rate": 1.1012689373281804e-06, "loss": 0.0775, "step": 7717 }, { "epoch": 2.56, "learning_rate": 1.0996347190401634e-06, "loss": 0.1241, "step": 7718 }, { "epoch": 2.56, "learning_rate": 1.0980016436451113e-06, "loss": 0.1274, "step": 7719 }, { "epoch": 2.56, "learning_rate": 1.0963697113527227e-06, "loss": 0.1306, "step": 7720 }, { "epoch": 2.56, "learning_rate": 1.0947389223725569e-06, "loss": 0.1572, "step": 7721 }, { "epoch": 2.56, "learning_rate": 1.0931092769140218e-06, "loss": 0.1103, "step": 7722 }, { "epoch": 2.56, "learning_rate": 1.0914807751863787e-06, "loss": 0.1313, "step": 7723 }, { "epoch": 2.56, "learning_rate": 1.0898534173987419e-06, "loss": 0.2556, "step": 7724 }, { "epoch": 2.56, "learning_rate": 1.0882272037600828e-06, "loss": 0.1396, "step": 7725 }, { "epoch": 2.56, "learning_rate": 1.086602134479222e-06, "loss": 0.1404, "step": 7726 }, { "epoch": 2.56, "learning_rate": 1.084978209764832e-06, "loss": 0.1525, "step": 7727 }, { "epoch": 2.56, "learning_rate": 1.0833554298254445e-06, "loss": 0.1543, "step": 7728 }, { "epoch": 2.57, "learning_rate": 1.0817337948694373e-06, "loss": 0.1171, "step": 7729 }, { "epoch": 2.57, "learning_rate": 1.080113305105045e-06, "loss": 0.1446, "step": 7730 }, { "epoch": 2.57, "learning_rate": 1.078493960740351e-06, "loss": 0.1062, "step": 7731 }, { "epoch": 2.57, "learning_rate": 1.0768757619833003e-06, "loss": 0.1018, "step": 7732 }, { "epoch": 2.57, "learning_rate": 1.0752587090416799e-06, "loss": 0.1306, "step": 7733 }, { "epoch": 2.57, "learning_rate": 1.0736428021231393e-06, "loss": 0.0851, "step": 7734 }, { "epoch": 2.57, "learning_rate": 1.0720280414351757e-06, "loss": 0.1271, "step": 7735 }, { "epoch": 2.57, "learning_rate": 1.070414427185139e-06, "loss": 0.0948, "step": 7736 }, { "epoch": 2.57, "learning_rate": 1.0688019595802324e-06, "loss": 0.1902, "step": 7737 }, { "epoch": 2.57, "learning_rate": 1.067190638827511e-06, "loss": 0.1149, "step": 7738 }, { "epoch": 2.57, "learning_rate": 1.0655804651338886e-06, "loss": 0.146, "step": 7739 }, { "epoch": 2.57, "learning_rate": 1.0639714387061217e-06, "loss": 0.1285, "step": 7740 }, { "epoch": 2.57, "learning_rate": 1.0623635597508297e-06, "loss": 0.1232, "step": 7741 }, { "epoch": 2.57, "learning_rate": 1.0607568284744752e-06, "loss": 0.1543, "step": 7742 }, { "epoch": 2.57, "learning_rate": 1.0591512450833851e-06, "loss": 0.1759, "step": 7743 }, { "epoch": 2.57, "learning_rate": 1.057546809783724e-06, "loss": 0.1167, "step": 7744 }, { "epoch": 2.57, "learning_rate": 1.0559435227815207e-06, "loss": 0.1021, "step": 7745 }, { "epoch": 2.57, "learning_rate": 1.0543413842826521e-06, "loss": 0.1562, "step": 7746 }, { "epoch": 2.57, "learning_rate": 1.052740394492847e-06, "loss": 0.1532, "step": 7747 }, { "epoch": 2.57, "learning_rate": 1.0511405536176912e-06, "loss": 0.1544, "step": 7748 }, { "epoch": 2.57, "learning_rate": 1.0495418618626152e-06, "loss": 0.1076, "step": 7749 }, { "epoch": 2.57, "learning_rate": 1.0479443194329142e-06, "loss": 0.0973, "step": 7750 }, { "epoch": 2.57, "learning_rate": 1.0463479265337185e-06, "loss": 0.1058, "step": 7751 }, { "epoch": 2.57, "learning_rate": 1.044752683370026e-06, "loss": 0.1143, "step": 7752 }, { "epoch": 2.57, "learning_rate": 1.04315859014668e-06, "loss": 0.1241, "step": 7753 }, { "epoch": 2.57, "learning_rate": 1.0415656470683788e-06, "loss": 0.0652, "step": 7754 }, { "epoch": 2.57, "learning_rate": 1.0399738543396698e-06, "loss": 0.0938, "step": 7755 }, { "epoch": 2.57, "learning_rate": 1.0383832121649552e-06, "loss": 0.1359, "step": 7756 }, { "epoch": 2.57, "learning_rate": 1.0367937207484913e-06, "loss": 0.1685, "step": 7757 }, { "epoch": 2.57, "learning_rate": 1.0352053802943783e-06, "loss": 0.133, "step": 7758 }, { "epoch": 2.58, "learning_rate": 1.0336181910065812e-06, "loss": 0.1666, "step": 7759 }, { "epoch": 2.58, "learning_rate": 1.0320321530889045e-06, "loss": 0.1238, "step": 7760 }, { "epoch": 2.58, "learning_rate": 1.0304472667450149e-06, "loss": 0.1413, "step": 7761 }, { "epoch": 2.58, "learning_rate": 1.0288635321784257e-06, "loss": 0.1354, "step": 7762 }, { "epoch": 2.58, "learning_rate": 1.027280949592504e-06, "loss": 0.1374, "step": 7763 }, { "epoch": 2.58, "learning_rate": 1.0256995191904696e-06, "loss": 0.0838, "step": 7764 }, { "epoch": 2.58, "learning_rate": 1.024119241175392e-06, "loss": 0.0894, "step": 7765 }, { "epoch": 2.58, "learning_rate": 1.0225401157501958e-06, "loss": 0.0764, "step": 7766 }, { "epoch": 2.58, "learning_rate": 1.0209621431176508e-06, "loss": 0.1627, "step": 7767 }, { "epoch": 2.58, "learning_rate": 1.019385323480392e-06, "loss": 0.1279, "step": 7768 }, { "epoch": 2.58, "learning_rate": 1.0178096570408914e-06, "loss": 0.1051, "step": 7769 }, { "epoch": 2.58, "learning_rate": 1.016235144001484e-06, "loss": 0.1604, "step": 7770 }, { "epoch": 2.58, "learning_rate": 1.0146617845643514e-06, "loss": 0.1505, "step": 7771 }, { "epoch": 2.58, "learning_rate": 1.0130895789315287e-06, "loss": 0.1004, "step": 7772 }, { "epoch": 2.58, "learning_rate": 1.0115185273048988e-06, "loss": 0.0929, "step": 7773 }, { "epoch": 2.58, "learning_rate": 1.0099486298862048e-06, "loss": 0.1276, "step": 7774 }, { "epoch": 2.58, "learning_rate": 1.0083798868770345e-06, "loss": 0.152, "step": 7775 }, { "epoch": 2.58, "learning_rate": 1.0068122984788275e-06, "loss": 0.1142, "step": 7776 }, { "epoch": 2.58, "learning_rate": 1.005245864892882e-06, "loss": 0.1528, "step": 7777 }, { "epoch": 2.58, "learning_rate": 1.0036805863203403e-06, "loss": 0.132, "step": 7778 }, { "epoch": 2.58, "learning_rate": 1.0021164629622005e-06, "loss": 0.1467, "step": 7779 }, { "epoch": 2.58, "learning_rate": 1.0005534950193074e-06, "loss": 0.1818, "step": 7780 }, { "epoch": 2.58, "learning_rate": 9.989916826923673e-07, "loss": 0.1391, "step": 7781 }, { "epoch": 2.58, "learning_rate": 9.974310261819275e-07, "loss": 0.1739, "step": 7782 }, { "epoch": 2.58, "learning_rate": 9.958715256883922e-07, "loss": 0.1451, "step": 7783 }, { "epoch": 2.58, "learning_rate": 9.943131814120178e-07, "loss": 0.1342, "step": 7784 }, { "epoch": 2.58, "learning_rate": 9.927559935529107e-07, "loss": 0.1456, "step": 7785 }, { "epoch": 2.58, "learning_rate": 9.91199962311028e-07, "loss": 0.1377, "step": 7786 }, { "epoch": 2.58, "learning_rate": 9.896450878861775e-07, "loss": 0.1158, "step": 7787 }, { "epoch": 2.58, "learning_rate": 9.880913704780226e-07, "loss": 0.1637, "step": 7788 }, { "epoch": 2.59, "learning_rate": 9.86538810286074e-07, "loss": 0.145, "step": 7789 }, { "epoch": 2.59, "learning_rate": 9.849874075096977e-07, "loss": 0.1812, "step": 7790 }, { "epoch": 2.59, "learning_rate": 9.834371623481086e-07, "loss": 0.0915, "step": 7791 }, { "epoch": 2.59, "learning_rate": 9.818880750003711e-07, "loss": 0.1686, "step": 7792 }, { "epoch": 2.59, "learning_rate": 9.803401456654039e-07, "loss": 0.0818, "step": 7793 }, { "epoch": 2.59, "learning_rate": 9.787933745419742e-07, "loss": 0.0579, "step": 7794 }, { "epoch": 2.59, "learning_rate": 9.772477618287058e-07, "loss": 0.1325, "step": 7795 }, { "epoch": 2.59, "learning_rate": 9.757033077240673e-07, "loss": 0.1752, "step": 7796 }, { "epoch": 2.59, "learning_rate": 9.74160012426385e-07, "loss": 0.1184, "step": 7797 }, { "epoch": 2.59, "learning_rate": 9.726178761338313e-07, "loss": 0.1051, "step": 7798 }, { "epoch": 2.59, "learning_rate": 9.710768990444308e-07, "loss": 0.1443, "step": 7799 }, { "epoch": 2.59, "learning_rate": 9.695370813560579e-07, "loss": 0.1472, "step": 7800 }, { "epoch": 2.59, "learning_rate": 9.679984232664452e-07, "loss": 0.1532, "step": 7801 }, { "epoch": 2.59, "learning_rate": 9.66460924973167e-07, "loss": 0.1314, "step": 7802 }, { "epoch": 2.59, "learning_rate": 9.649245866736535e-07, "loss": 0.1748, "step": 7803 }, { "epoch": 2.59, "learning_rate": 9.633894085651885e-07, "loss": 0.1636, "step": 7804 }, { "epoch": 2.59, "learning_rate": 9.618553908449001e-07, "loss": 0.1438, "step": 7805 }, { "epoch": 2.59, "learning_rate": 9.603225337097732e-07, "loss": 0.1193, "step": 7806 }, { "epoch": 2.59, "learning_rate": 9.58790837356639e-07, "loss": 0.1562, "step": 7807 }, { "epoch": 2.59, "learning_rate": 9.572603019821847e-07, "loss": 0.0868, "step": 7808 }, { "epoch": 2.59, "learning_rate": 9.55730927782944e-07, "loss": 0.1393, "step": 7809 }, { "epoch": 2.59, "learning_rate": 9.542027149553058e-07, "loss": 0.1112, "step": 7810 }, { "epoch": 2.59, "learning_rate": 9.526756636955059e-07, "loss": 0.1165, "step": 7811 }, { "epoch": 2.59, "learning_rate": 9.511497741996312e-07, "loss": 0.1581, "step": 7812 }, { "epoch": 2.59, "learning_rate": 9.496250466636236e-07, "loss": 0.1815, "step": 7813 }, { "epoch": 2.59, "learning_rate": 9.481014812832679e-07, "loss": 0.1765, "step": 7814 }, { "epoch": 2.59, "learning_rate": 9.465790782542106e-07, "loss": 0.1449, "step": 7815 }, { "epoch": 2.59, "learning_rate": 9.450578377719377e-07, "loss": 0.1379, "step": 7816 }, { "epoch": 2.59, "learning_rate": 9.435377600317964e-07, "loss": 0.1396, "step": 7817 }, { "epoch": 2.59, "learning_rate": 9.420188452289747e-07, "loss": 0.0944, "step": 7818 }, { "epoch": 2.6, "learning_rate": 9.405010935585212e-07, "loss": 0.1197, "step": 7819 }, { "epoch": 2.6, "learning_rate": 9.389845052153224e-07, "loss": 0.1029, "step": 7820 }, { "epoch": 2.6, "learning_rate": 9.3746908039413e-07, "loss": 0.0716, "step": 7821 }, { "epoch": 2.6, "learning_rate": 9.359548192895362e-07, "loss": 0.1294, "step": 7822 }, { "epoch": 2.6, "learning_rate": 9.344417220959844e-07, "loss": 0.1717, "step": 7823 }, { "epoch": 2.6, "learning_rate": 9.329297890077759e-07, "loss": 0.137, "step": 7824 }, { "epoch": 2.6, "learning_rate": 9.314190202190521e-07, "loss": 0.1442, "step": 7825 }, { "epoch": 2.6, "learning_rate": 9.299094159238186e-07, "loss": 0.0992, "step": 7826 }, { "epoch": 2.6, "learning_rate": 9.284009763159129e-07, "loss": 0.1258, "step": 7827 }, { "epoch": 2.6, "learning_rate": 9.2689370158904e-07, "loss": 0.1807, "step": 7828 }, { "epoch": 2.6, "learning_rate": 9.253875919367461e-07, "loss": 0.0673, "step": 7829 }, { "epoch": 2.6, "learning_rate": 9.23882647552431e-07, "loss": 0.1576, "step": 7830 }, { "epoch": 2.6, "learning_rate": 9.223788686293455e-07, "loss": 0.1027, "step": 7831 }, { "epoch": 2.6, "learning_rate": 9.208762553605854e-07, "loss": 0.1404, "step": 7832 }, { "epoch": 2.6, "learning_rate": 9.193748079391063e-07, "loss": 0.1624, "step": 7833 }, { "epoch": 2.6, "learning_rate": 9.178745265577028e-07, "loss": 0.1245, "step": 7834 }, { "epoch": 2.6, "learning_rate": 9.163754114090295e-07, "loss": 0.1078, "step": 7835 }, { "epoch": 2.6, "learning_rate": 9.148774626855838e-07, "loss": 0.1405, "step": 7836 }, { "epoch": 2.6, "learning_rate": 9.133806805797219e-07, "loss": 0.1397, "step": 7837 }, { "epoch": 2.6, "learning_rate": 9.118850652836387e-07, "loss": 0.1375, "step": 7838 }, { "epoch": 2.6, "learning_rate": 9.103906169893928e-07, "loss": 0.1476, "step": 7839 }, { "epoch": 2.6, "learning_rate": 9.088973358888809e-07, "loss": 0.1579, "step": 7840 }, { "epoch": 2.6, "learning_rate": 9.07405222173856e-07, "loss": 0.104, "step": 7841 }, { "epoch": 2.6, "learning_rate": 9.059142760359208e-07, "loss": 0.1062, "step": 7842 }, { "epoch": 2.6, "learning_rate": 9.04424497666524e-07, "loss": 0.1629, "step": 7843 }, { "epoch": 2.6, "learning_rate": 9.029358872569716e-07, "loss": 0.0798, "step": 7844 }, { "epoch": 2.6, "learning_rate": 9.014484449984117e-07, "loss": 0.0852, "step": 7845 }, { "epoch": 2.6, "learning_rate": 8.999621710818507e-07, "loss": 0.1509, "step": 7846 }, { "epoch": 2.6, "learning_rate": 8.984770656981379e-07, "loss": 0.1019, "step": 7847 }, { "epoch": 2.6, "learning_rate": 8.969931290379763e-07, "loss": 0.2066, "step": 7848 }, { "epoch": 2.61, "learning_rate": 8.955103612919147e-07, "loss": 0.1468, "step": 7849 }, { "epoch": 2.61, "learning_rate": 8.940287626503585e-07, "loss": 0.1465, "step": 7850 }, { "epoch": 2.61, "learning_rate": 8.925483333035578e-07, "loss": 0.1281, "step": 7851 }, { "epoch": 2.61, "learning_rate": 8.910690734416128e-07, "loss": 0.1473, "step": 7852 }, { "epoch": 2.61, "learning_rate": 8.895909832544781e-07, "loss": 0.132, "step": 7853 }, { "epoch": 2.61, "learning_rate": 8.881140629319529e-07, "loss": 0.1294, "step": 7854 }, { "epoch": 2.61, "learning_rate": 8.866383126636868e-07, "loss": 0.1221, "step": 7855 }, { "epoch": 2.61, "learning_rate": 8.851637326391804e-07, "loss": 0.1555, "step": 7856 }, { "epoch": 2.61, "learning_rate": 8.836903230477867e-07, "loss": 0.1238, "step": 7857 }, { "epoch": 2.61, "learning_rate": 8.822180840787042e-07, "loss": 0.1825, "step": 7858 }, { "epoch": 2.61, "learning_rate": 8.807470159209808e-07, "loss": 0.124, "step": 7859 }, { "epoch": 2.61, "learning_rate": 8.792771187635196e-07, "loss": 0.1414, "step": 7860 }, { "epoch": 2.61, "learning_rate": 8.778083927950676e-07, "loss": 0.1296, "step": 7861 }, { "epoch": 2.61, "learning_rate": 8.763408382042238e-07, "loss": 0.1309, "step": 7862 }, { "epoch": 2.61, "learning_rate": 8.748744551794341e-07, "loss": 0.1358, "step": 7863 }, { "epoch": 2.61, "learning_rate": 8.734092439090003e-07, "loss": 0.1227, "step": 7864 }, { "epoch": 2.61, "learning_rate": 8.719452045810651e-07, "loss": 0.1479, "step": 7865 }, { "epoch": 2.61, "learning_rate": 8.704823373836302e-07, "loss": 0.1104, "step": 7866 }, { "epoch": 2.61, "learning_rate": 8.690206425045389e-07, "loss": 0.1017, "step": 7867 }, { "epoch": 2.61, "learning_rate": 8.675601201314887e-07, "loss": 0.1231, "step": 7868 }, { "epoch": 2.61, "learning_rate": 8.661007704520208e-07, "loss": 0.1179, "step": 7869 }, { "epoch": 2.61, "learning_rate": 8.646425936535352e-07, "loss": 0.1452, "step": 7870 }, { "epoch": 2.61, "learning_rate": 8.631855899232733e-07, "loss": 0.1443, "step": 7871 }, { "epoch": 2.61, "learning_rate": 8.617297594483265e-07, "loss": 0.151, "step": 7872 }, { "epoch": 2.61, "learning_rate": 8.60275102415642e-07, "loss": 0.1307, "step": 7873 }, { "epoch": 2.61, "learning_rate": 8.588216190120091e-07, "loss": 0.1111, "step": 7874 }, { "epoch": 2.61, "learning_rate": 8.573693094240687e-07, "loss": 0.1596, "step": 7875 }, { "epoch": 2.61, "learning_rate": 8.559181738383116e-07, "loss": 0.1509, "step": 7876 }, { "epoch": 2.61, "learning_rate": 8.544682124410786e-07, "loss": 0.146, "step": 7877 }, { "epoch": 2.61, "learning_rate": 8.530194254185597e-07, "loss": 0.144, "step": 7878 }, { "epoch": 2.62, "learning_rate": 8.515718129567885e-07, "loss": 0.1612, "step": 7879 }, { "epoch": 2.62, "learning_rate": 8.501253752416582e-07, "loss": 0.1377, "step": 7880 }, { "epoch": 2.62, "learning_rate": 8.486801124589039e-07, "loss": 0.0816, "step": 7881 }, { "epoch": 2.62, "learning_rate": 8.472360247941092e-07, "loss": 0.0696, "step": 7882 }, { "epoch": 2.62, "learning_rate": 8.45793112432709e-07, "loss": 0.0949, "step": 7883 }, { "epoch": 2.62, "learning_rate": 8.443513755599897e-07, "loss": 0.1319, "step": 7884 }, { "epoch": 2.62, "learning_rate": 8.429108143610809e-07, "loss": 0.134, "step": 7885 }, { "epoch": 2.62, "learning_rate": 8.41471429020968e-07, "loss": 0.1538, "step": 7886 }, { "epoch": 2.62, "learning_rate": 8.40033219724481e-07, "loss": 0.1071, "step": 7887 }, { "epoch": 2.62, "learning_rate": 8.385961866562986e-07, "loss": 0.1011, "step": 7888 }, { "epoch": 2.62, "learning_rate": 8.371603300009512e-07, "loss": 0.0991, "step": 7889 }, { "epoch": 2.62, "learning_rate": 8.357256499428146e-07, "loss": 0.1457, "step": 7890 }, { "epoch": 2.62, "learning_rate": 8.34292146666118e-07, "loss": 0.1339, "step": 7891 }, { "epoch": 2.62, "learning_rate": 8.328598203549343e-07, "loss": 0.1104, "step": 7892 }, { "epoch": 2.62, "learning_rate": 8.314286711931929e-07, "loss": 0.1207, "step": 7893 }, { "epoch": 2.62, "learning_rate": 8.299986993646602e-07, "loss": 0.1451, "step": 7894 }, { "epoch": 2.62, "learning_rate": 8.285699050529672e-07, "loss": 0.1584, "step": 7895 }, { "epoch": 2.62, "learning_rate": 8.27142288441577e-07, "loss": 0.0938, "step": 7896 }, { "epoch": 2.62, "learning_rate": 8.257158497138129e-07, "loss": 0.1777, "step": 7897 }, { "epoch": 2.62, "learning_rate": 8.242905890528441e-07, "loss": 0.0953, "step": 7898 }, { "epoch": 2.62, "learning_rate": 8.228665066416841e-07, "loss": 0.1329, "step": 7899 }, { "epoch": 2.62, "learning_rate": 8.214436026632034e-07, "loss": 0.0926, "step": 7900 }, { "epoch": 2.62, "learning_rate": 8.200218773001123e-07, "loss": 0.0948, "step": 7901 }, { "epoch": 2.62, "learning_rate": 8.186013307349805e-07, "loss": 0.1245, "step": 7902 }, { "epoch": 2.62, "learning_rate": 8.171819631502121e-07, "loss": 0.1205, "step": 7903 }, { "epoch": 2.62, "learning_rate": 8.157637747280733e-07, "loss": 0.1581, "step": 7904 }, { "epoch": 2.62, "learning_rate": 8.143467656506699e-07, "loss": 0.0704, "step": 7905 }, { "epoch": 2.62, "learning_rate": 8.129309360999627e-07, "loss": 0.1061, "step": 7906 }, { "epoch": 2.62, "learning_rate": 8.115162862577553e-07, "loss": 0.1646, "step": 7907 }, { "epoch": 2.62, "learning_rate": 8.101028163057023e-07, "loss": 0.1349, "step": 7908 }, { "epoch": 2.62, "learning_rate": 8.086905264253108e-07, "loss": 0.1207, "step": 7909 }, { "epoch": 2.63, "learning_rate": 8.07279416797927e-07, "loss": 0.133, "step": 7910 }, { "epoch": 2.63, "learning_rate": 8.058694876047546e-07, "loss": 0.1494, "step": 7911 }, { "epoch": 2.63, "learning_rate": 8.044607390268388e-07, "loss": 0.1752, "step": 7912 }, { "epoch": 2.63, "learning_rate": 8.030531712450817e-07, "loss": 0.145, "step": 7913 }, { "epoch": 2.63, "learning_rate": 8.016467844402243e-07, "loss": 0.1353, "step": 7914 }, { "epoch": 2.63, "learning_rate": 8.002415787928619e-07, "loss": 0.1075, "step": 7915 }, { "epoch": 2.63, "learning_rate": 7.988375544834382e-07, "loss": 0.1181, "step": 7916 }, { "epoch": 2.63, "learning_rate": 7.974347116922398e-07, "loss": 0.1248, "step": 7917 }, { "epoch": 2.63, "learning_rate": 7.960330505994085e-07, "loss": 0.1602, "step": 7918 }, { "epoch": 2.63, "learning_rate": 7.946325713849268e-07, "loss": 0.1192, "step": 7919 }, { "epoch": 2.63, "learning_rate": 7.932332742286352e-07, "loss": 0.1317, "step": 7920 }, { "epoch": 2.63, "learning_rate": 7.918351593102125e-07, "loss": 0.1092, "step": 7921 }, { "epoch": 2.63, "learning_rate": 7.904382268091948e-07, "loss": 0.1198, "step": 7922 }, { "epoch": 2.63, "learning_rate": 7.890424769049587e-07, "loss": 0.1057, "step": 7923 }, { "epoch": 2.63, "learning_rate": 7.87647909776732e-07, "loss": 0.1717, "step": 7924 }, { "epoch": 2.63, "learning_rate": 7.862545256035892e-07, "loss": 0.1765, "step": 7925 }, { "epoch": 2.63, "learning_rate": 7.848623245644582e-07, "loss": 0.1081, "step": 7926 }, { "epoch": 2.63, "learning_rate": 7.834713068381084e-07, "loss": 0.1827, "step": 7927 }, { "epoch": 2.63, "learning_rate": 7.820814726031589e-07, "loss": 0.1736, "step": 7928 }, { "epoch": 2.63, "learning_rate": 7.806928220380817e-07, "loss": 0.137, "step": 7929 }, { "epoch": 2.63, "learning_rate": 7.793053553211905e-07, "loss": 0.1048, "step": 7930 }, { "epoch": 2.63, "learning_rate": 7.779190726306496e-07, "loss": 0.1799, "step": 7931 }, { "epoch": 2.63, "learning_rate": 7.765339741444699e-07, "loss": 0.1076, "step": 7932 }, { "epoch": 2.63, "learning_rate": 7.751500600405137e-07, "loss": 0.1237, "step": 7933 }, { "epoch": 2.63, "learning_rate": 7.737673304964865e-07, "loss": 0.1566, "step": 7934 }, { "epoch": 2.63, "learning_rate": 7.723857856899475e-07, "loss": 0.0876, "step": 7935 }, { "epoch": 2.63, "learning_rate": 7.71005425798298e-07, "loss": 0.1282, "step": 7936 }, { "epoch": 2.63, "learning_rate": 7.696262509987906e-07, "loss": 0.0972, "step": 7937 }, { "epoch": 2.63, "learning_rate": 7.682482614685238e-07, "loss": 0.1563, "step": 7938 }, { "epoch": 2.63, "learning_rate": 7.668714573844438e-07, "loss": 0.128, "step": 7939 }, { "epoch": 2.64, "learning_rate": 7.65495838923348e-07, "loss": 0.1544, "step": 7940 }, { "epoch": 2.64, "learning_rate": 7.641214062618773e-07, "loss": 0.1514, "step": 7941 }, { "epoch": 2.64, "learning_rate": 7.62748159576524e-07, "loss": 0.1522, "step": 7942 }, { "epoch": 2.64, "learning_rate": 7.613760990436248e-07, "loss": 0.0955, "step": 7943 }, { "epoch": 2.64, "learning_rate": 7.600052248393652e-07, "loss": 0.1298, "step": 7944 }, { "epoch": 2.64, "learning_rate": 7.586355371397792e-07, "loss": 0.1435, "step": 7945 }, { "epoch": 2.64, "learning_rate": 7.57267036120749e-07, "loss": 0.0956, "step": 7946 }, { "epoch": 2.64, "learning_rate": 7.558997219580022e-07, "loss": 0.1538, "step": 7947 }, { "epoch": 2.64, "learning_rate": 7.545335948271137e-07, "loss": 0.0893, "step": 7948 }, { "epoch": 2.64, "learning_rate": 7.531686549035111e-07, "loss": 0.1512, "step": 7949 }, { "epoch": 2.64, "learning_rate": 7.518049023624629e-07, "loss": 0.2069, "step": 7950 }, { "epoch": 2.64, "learning_rate": 7.504423373790915e-07, "loss": 0.0714, "step": 7951 }, { "epoch": 2.64, "learning_rate": 7.490809601283577e-07, "loss": 0.1726, "step": 7952 }, { "epoch": 2.64, "learning_rate": 7.477207707850809e-07, "loss": 0.1207, "step": 7953 }, { "epoch": 2.64, "learning_rate": 7.463617695239211e-07, "loss": 0.1607, "step": 7954 }, { "epoch": 2.64, "learning_rate": 7.450039565193845e-07, "loss": 0.1333, "step": 7955 }, { "epoch": 2.64, "learning_rate": 7.436473319458304e-07, "loss": 0.0811, "step": 7956 }, { "epoch": 2.64, "learning_rate": 7.422918959774628e-07, "loss": 0.1555, "step": 7957 }, { "epoch": 2.64, "learning_rate": 7.409376487883313e-07, "loss": 0.1288, "step": 7958 }, { "epoch": 2.64, "learning_rate": 7.395845905523325e-07, "loss": 0.1127, "step": 7959 }, { "epoch": 2.64, "learning_rate": 7.382327214432161e-07, "loss": 0.1157, "step": 7960 }, { "epoch": 2.64, "learning_rate": 7.368820416345724e-07, "loss": 0.1018, "step": 7961 }, { "epoch": 2.64, "learning_rate": 7.355325512998434e-07, "loss": 0.1104, "step": 7962 }, { "epoch": 2.64, "learning_rate": 7.341842506123176e-07, "loss": 0.169, "step": 7963 }, { "epoch": 2.64, "learning_rate": 7.328371397451273e-07, "loss": 0.1113, "step": 7964 }, { "epoch": 2.64, "learning_rate": 7.314912188712552e-07, "loss": 0.0835, "step": 7965 }, { "epoch": 2.64, "learning_rate": 7.301464881635301e-07, "loss": 0.0995, "step": 7966 }, { "epoch": 2.64, "learning_rate": 7.288029477946301e-07, "loss": 0.1468, "step": 7967 }, { "epoch": 2.64, "learning_rate": 7.274605979370764e-07, "loss": 0.1336, "step": 7968 }, { "epoch": 2.64, "learning_rate": 7.261194387632431e-07, "loss": 0.1027, "step": 7969 }, { "epoch": 2.65, "learning_rate": 7.247794704453448e-07, "loss": 0.1776, "step": 7970 }, { "epoch": 2.65, "learning_rate": 7.234406931554505e-07, "loss": 0.1203, "step": 7971 }, { "epoch": 2.65, "learning_rate": 7.221031070654661e-07, "loss": 0.0891, "step": 7972 }, { "epoch": 2.65, "learning_rate": 7.207667123471551e-07, "loss": 0.1277, "step": 7973 }, { "epoch": 2.65, "learning_rate": 7.194315091721227e-07, "loss": 0.1424, "step": 7974 }, { "epoch": 2.65, "learning_rate": 7.180974977118204e-07, "loss": 0.1261, "step": 7975 }, { "epoch": 2.65, "learning_rate": 7.167646781375504e-07, "loss": 0.1537, "step": 7976 }, { "epoch": 2.65, "learning_rate": 7.154330506204565e-07, "loss": 0.1051, "step": 7977 }, { "epoch": 2.65, "learning_rate": 7.141026153315378e-07, "loss": 0.1319, "step": 7978 }, { "epoch": 2.65, "learning_rate": 7.127733724416286e-07, "loss": 0.1009, "step": 7979 }, { "epoch": 2.65, "learning_rate": 7.114453221214213e-07, "loss": 0.1377, "step": 7980 }, { "epoch": 2.65, "learning_rate": 7.101184645414471e-07, "loss": 0.1014, "step": 7981 }, { "epoch": 2.65, "learning_rate": 7.087927998720912e-07, "loss": 0.1056, "step": 7982 }, { "epoch": 2.65, "learning_rate": 7.074683282835792e-07, "loss": 0.1402, "step": 7983 }, { "epoch": 2.65, "learning_rate": 7.061450499459843e-07, "loss": 0.1519, "step": 7984 }, { "epoch": 2.65, "learning_rate": 7.048229650292349e-07, "loss": 0.1317, "step": 7985 }, { "epoch": 2.65, "learning_rate": 7.035020737030906e-07, "loss": 0.0814, "step": 7986 }, { "epoch": 2.65, "learning_rate": 7.021823761371727e-07, "loss": 0.1453, "step": 7987 }, { "epoch": 2.65, "learning_rate": 7.008638725009398e-07, "loss": 0.1402, "step": 7988 }, { "epoch": 2.65, "learning_rate": 6.995465629637044e-07, "loss": 0.1203, "step": 7989 }, { "epoch": 2.65, "learning_rate": 6.982304476946178e-07, "loss": 0.2206, "step": 7990 }, { "epoch": 2.65, "learning_rate": 6.969155268626848e-07, "loss": 0.0952, "step": 7991 }, { "epoch": 2.65, "learning_rate": 6.956018006367526e-07, "loss": 0.2187, "step": 7992 }, { "epoch": 2.65, "learning_rate": 6.942892691855175e-07, "loss": 0.1173, "step": 7993 }, { "epoch": 2.65, "learning_rate": 6.929779326775187e-07, "loss": 0.1425, "step": 7994 }, { "epoch": 2.65, "learning_rate": 6.916677912811465e-07, "loss": 0.1035, "step": 7995 }, { "epoch": 2.65, "learning_rate": 6.903588451646348e-07, "loss": 0.1028, "step": 7996 }, { "epoch": 2.65, "learning_rate": 6.890510944960649e-07, "loss": 0.1119, "step": 7997 }, { "epoch": 2.65, "learning_rate": 6.877445394433668e-07, "loss": 0.1474, "step": 7998 }, { "epoch": 2.65, "learning_rate": 6.864391801743131e-07, "loss": 0.1303, "step": 7999 }, { "epoch": 2.66, "learning_rate": 6.851350168565252e-07, "loss": 0.0845, "step": 8000 }, { "epoch": 2.66, "learning_rate": 6.838320496574669e-07, "loss": 0.1166, "step": 8001 }, { "epoch": 2.66, "learning_rate": 6.825302787444566e-07, "loss": 0.148, "step": 8002 }, { "epoch": 2.66, "learning_rate": 6.812297042846527e-07, "loss": 0.1172, "step": 8003 }, { "epoch": 2.66, "learning_rate": 6.799303264450585e-07, "loss": 0.1487, "step": 8004 }, { "epoch": 2.66, "learning_rate": 6.786321453925316e-07, "loss": 0.1421, "step": 8005 }, { "epoch": 2.66, "learning_rate": 6.773351612937684e-07, "loss": 0.0935, "step": 8006 }, { "epoch": 2.66, "learning_rate": 6.760393743153149e-07, "loss": 0.1434, "step": 8007 }, { "epoch": 2.66, "learning_rate": 6.747447846235599e-07, "loss": 0.0868, "step": 8008 }, { "epoch": 2.66, "learning_rate": 6.734513923847464e-07, "loss": 0.1447, "step": 8009 }, { "epoch": 2.66, "learning_rate": 6.721591977649522e-07, "loss": 0.1227, "step": 8010 }, { "epoch": 2.66, "learning_rate": 6.708682009301137e-07, "loss": 0.1078, "step": 8011 }, { "epoch": 2.66, "learning_rate": 6.69578402046005e-07, "loss": 0.165, "step": 8012 }, { "epoch": 2.66, "learning_rate": 6.68289801278248e-07, "loss": 0.0849, "step": 8013 }, { "epoch": 2.66, "learning_rate": 6.670023987923124e-07, "loss": 0.1687, "step": 8014 }, { "epoch": 2.66, "learning_rate": 6.657161947535107e-07, "loss": 0.0821, "step": 8015 }, { "epoch": 2.66, "learning_rate": 6.644311893270072e-07, "loss": 0.1746, "step": 8016 }, { "epoch": 2.66, "learning_rate": 6.631473826778068e-07, "loss": 0.1591, "step": 8017 }, { "epoch": 2.66, "learning_rate": 6.618647749707652e-07, "loss": 0.1555, "step": 8018 }, { "epoch": 2.66, "learning_rate": 6.605833663705797e-07, "loss": 0.1127, "step": 8019 }, { "epoch": 2.66, "learning_rate": 6.593031570417951e-07, "loss": 0.1829, "step": 8020 }, { "epoch": 2.66, "learning_rate": 6.580241471488036e-07, "loss": 0.1253, "step": 8021 }, { "epoch": 2.66, "learning_rate": 6.567463368558425e-07, "loss": 0.1404, "step": 8022 }, { "epoch": 2.66, "learning_rate": 6.554697263269949e-07, "loss": 0.1175, "step": 8023 }, { "epoch": 2.66, "learning_rate": 6.541943157261887e-07, "loss": 0.0851, "step": 8024 }, { "epoch": 2.66, "learning_rate": 6.529201052172018e-07, "loss": 0.0943, "step": 8025 }, { "epoch": 2.66, "learning_rate": 6.516470949636522e-07, "loss": 0.1304, "step": 8026 }, { "epoch": 2.66, "learning_rate": 6.503752851290102e-07, "loss": 0.0894, "step": 8027 }, { "epoch": 2.66, "learning_rate": 6.49104675876584e-07, "loss": 0.1219, "step": 8028 }, { "epoch": 2.66, "learning_rate": 6.478352673695354e-07, "loss": 0.1754, "step": 8029 }, { "epoch": 2.67, "learning_rate": 6.465670597708673e-07, "loss": 0.162, "step": 8030 }, { "epoch": 2.67, "learning_rate": 6.453000532434283e-07, "loss": 0.1069, "step": 8031 }, { "epoch": 2.67, "learning_rate": 6.440342479499195e-07, "loss": 0.0914, "step": 8032 }, { "epoch": 2.67, "learning_rate": 6.427696440528764e-07, "loss": 0.1086, "step": 8033 }, { "epoch": 2.67, "learning_rate": 6.415062417146922e-07, "loss": 0.0922, "step": 8034 }, { "epoch": 2.67, "learning_rate": 6.402440410975941e-07, "loss": 0.0958, "step": 8035 }, { "epoch": 2.67, "learning_rate": 6.389830423636656e-07, "loss": 0.1083, "step": 8036 }, { "epoch": 2.67, "learning_rate": 6.377232456748272e-07, "loss": 0.1993, "step": 8037 }, { "epoch": 2.67, "learning_rate": 6.364646511928529e-07, "loss": 0.1035, "step": 8038 }, { "epoch": 2.67, "learning_rate": 6.352072590793568e-07, "loss": 0.1725, "step": 8039 }, { "epoch": 2.67, "learning_rate": 6.339510694957996e-07, "loss": 0.128, "step": 8040 }, { "epoch": 2.67, "learning_rate": 6.326960826034889e-07, "loss": 0.1192, "step": 8041 }, { "epoch": 2.67, "learning_rate": 6.314422985635749e-07, "loss": 0.1315, "step": 8042 }, { "epoch": 2.67, "learning_rate": 6.301897175370597e-07, "loss": 0.143, "step": 8043 }, { "epoch": 2.67, "learning_rate": 6.289383396847826e-07, "loss": 0.0938, "step": 8044 }, { "epoch": 2.67, "learning_rate": 6.276881651674361e-07, "loss": 0.1331, "step": 8045 }, { "epoch": 2.67, "learning_rate": 6.264391941455506e-07, "loss": 0.1135, "step": 8046 }, { "epoch": 2.67, "learning_rate": 6.251914267795134e-07, "loss": 0.1076, "step": 8047 }, { "epoch": 2.67, "learning_rate": 6.239448632295408e-07, "loss": 0.1529, "step": 8048 }, { "epoch": 2.67, "learning_rate": 6.226995036557104e-07, "loss": 0.1544, "step": 8049 }, { "epoch": 2.67, "learning_rate": 6.214553482179364e-07, "loss": 0.0915, "step": 8050 }, { "epoch": 2.67, "learning_rate": 6.202123970759777e-07, "loss": 0.1944, "step": 8051 }, { "epoch": 2.67, "learning_rate": 6.189706503894455e-07, "loss": 0.133, "step": 8052 }, { "epoch": 2.67, "learning_rate": 6.177301083177878e-07, "loss": 0.1362, "step": 8053 }, { "epoch": 2.67, "learning_rate": 6.164907710203083e-07, "loss": 0.1337, "step": 8054 }, { "epoch": 2.67, "learning_rate": 6.152526386561419e-07, "loss": 0.0862, "step": 8055 }, { "epoch": 2.67, "learning_rate": 6.140157113842837e-07, "loss": 0.1689, "step": 8056 }, { "epoch": 2.67, "learning_rate": 6.127799893635622e-07, "loss": 0.1199, "step": 8057 }, { "epoch": 2.67, "learning_rate": 6.115454727526604e-07, "loss": 0.1337, "step": 8058 }, { "epoch": 2.67, "learning_rate": 6.103121617100993e-07, "loss": 0.1238, "step": 8059 }, { "epoch": 2.68, "learning_rate": 6.090800563942479e-07, "loss": 0.134, "step": 8060 }, { "epoch": 2.68, "learning_rate": 6.078491569633238e-07, "loss": 0.1325, "step": 8061 }, { "epoch": 2.68, "learning_rate": 6.066194635753808e-07, "loss": 0.1689, "step": 8062 }, { "epoch": 2.68, "learning_rate": 6.053909763883281e-07, "loss": 0.1655, "step": 8063 }, { "epoch": 2.68, "learning_rate": 6.041636955599117e-07, "loss": 0.132, "step": 8064 }, { "epoch": 2.68, "learning_rate": 6.0293762124773e-07, "loss": 0.1485, "step": 8065 }, { "epoch": 2.68, "learning_rate": 6.017127536092204e-07, "loss": 0.1576, "step": 8066 }, { "epoch": 2.68, "learning_rate": 6.004890928016693e-07, "loss": 0.1247, "step": 8067 }, { "epoch": 2.68, "learning_rate": 5.992666389822055e-07, "loss": 0.1199, "step": 8068 }, { "epoch": 2.68, "learning_rate": 5.980453923078034e-07, "loss": 0.1165, "step": 8069 }, { "epoch": 2.68, "learning_rate": 5.968253529352852e-07, "loss": 0.1303, "step": 8070 }, { "epoch": 2.68, "learning_rate": 5.956065210213114e-07, "loss": 0.098, "step": 8071 }, { "epoch": 2.68, "learning_rate": 5.943888967223965e-07, "loss": 0.1037, "step": 8072 }, { "epoch": 2.68, "learning_rate": 5.931724801948913e-07, "loss": 0.1048, "step": 8073 }, { "epoch": 2.68, "learning_rate": 5.919572715949995e-07, "loss": 0.1251, "step": 8074 }, { "epoch": 2.68, "learning_rate": 5.90743271078763e-07, "loss": 0.1017, "step": 8075 }, { "epoch": 2.68, "learning_rate": 5.895304788020728e-07, "loss": 0.1688, "step": 8076 }, { "epoch": 2.68, "learning_rate": 5.883188949206598e-07, "loss": 0.1564, "step": 8077 }, { "epoch": 2.68, "learning_rate": 5.871085195901072e-07, "loss": 0.0885, "step": 8078 }, { "epoch": 2.68, "learning_rate": 5.858993529658364e-07, "loss": 0.0973, "step": 8079 }, { "epoch": 2.68, "learning_rate": 5.846913952031164e-07, "loss": 0.1304, "step": 8080 }, { "epoch": 2.68, "learning_rate": 5.834846464570609e-07, "loss": 0.0808, "step": 8081 }, { "epoch": 2.68, "learning_rate": 5.822791068826294e-07, "loss": 0.1268, "step": 8082 }, { "epoch": 2.68, "learning_rate": 5.810747766346237e-07, "loss": 0.0957, "step": 8083 }, { "epoch": 2.68, "learning_rate": 5.798716558676898e-07, "loss": 0.1479, "step": 8084 }, { "epoch": 2.68, "learning_rate": 5.786697447363232e-07, "loss": 0.1066, "step": 8085 }, { "epoch": 2.68, "learning_rate": 5.774690433948571e-07, "loss": 0.1047, "step": 8086 }, { "epoch": 2.68, "learning_rate": 5.762695519974781e-07, "loss": 0.1164, "step": 8087 }, { "epoch": 2.68, "learning_rate": 5.750712706982087e-07, "loss": 0.1492, "step": 8088 }, { "epoch": 2.68, "learning_rate": 5.738741996509223e-07, "loss": 0.1186, "step": 8089 }, { "epoch": 2.69, "learning_rate": 5.726783390093316e-07, "loss": 0.1387, "step": 8090 }, { "epoch": 2.69, "learning_rate": 5.714836889269981e-07, "loss": 0.1164, "step": 8091 }, { "epoch": 2.69, "learning_rate": 5.70290249557327e-07, "loss": 0.1537, "step": 8092 }, { "epoch": 2.69, "learning_rate": 5.690980210535646e-07, "loss": 0.1366, "step": 8093 }, { "epoch": 2.69, "learning_rate": 5.679070035688095e-07, "loss": 0.1126, "step": 8094 }, { "epoch": 2.69, "learning_rate": 5.667171972559971e-07, "loss": 0.1228, "step": 8095 }, { "epoch": 2.69, "learning_rate": 5.655286022679097e-07, "loss": 0.1534, "step": 8096 }, { "epoch": 2.69, "learning_rate": 5.643412187571739e-07, "loss": 0.1057, "step": 8097 }, { "epoch": 2.69, "learning_rate": 5.631550468762637e-07, "loss": 0.1971, "step": 8098 }, { "epoch": 2.69, "learning_rate": 5.619700867774924e-07, "loss": 0.0822, "step": 8099 }, { "epoch": 2.69, "learning_rate": 5.607863386130208e-07, "loss": 0.136, "step": 8100 }, { "epoch": 2.69, "learning_rate": 5.59603802534856e-07, "loss": 0.1055, "step": 8101 }, { "epoch": 2.69, "learning_rate": 5.584224786948433e-07, "loss": 0.1261, "step": 8102 }, { "epoch": 2.69, "learning_rate": 5.572423672446814e-07, "loss": 0.1173, "step": 8103 }, { "epoch": 2.69, "learning_rate": 5.560634683359012e-07, "loss": 0.1171, "step": 8104 }, { "epoch": 2.69, "learning_rate": 5.548857821198905e-07, "loss": 0.1428, "step": 8105 }, { "epoch": 2.69, "learning_rate": 5.537093087478729e-07, "loss": 0.0581, "step": 8106 }, { "epoch": 2.69, "learning_rate": 5.525340483709174e-07, "loss": 0.0957, "step": 8107 }, { "epoch": 2.69, "learning_rate": 5.513600011399434e-07, "loss": 0.1253, "step": 8108 }, { "epoch": 2.69, "learning_rate": 5.501871672057057e-07, "loss": 0.1445, "step": 8109 }, { "epoch": 2.69, "learning_rate": 5.490155467188107e-07, "loss": 0.1358, "step": 8110 }, { "epoch": 2.69, "learning_rate": 5.478451398297025e-07, "loss": 0.1074, "step": 8111 }, { "epoch": 2.69, "learning_rate": 5.466759466886762e-07, "loss": 0.1483, "step": 8112 }, { "epoch": 2.69, "learning_rate": 5.455079674458641e-07, "loss": 0.0881, "step": 8113 }, { "epoch": 2.69, "learning_rate": 5.443412022512484e-07, "loss": 0.1296, "step": 8114 }, { "epoch": 2.69, "learning_rate": 5.431756512546526e-07, "loss": 0.1227, "step": 8115 }, { "epoch": 2.69, "learning_rate": 5.420113146057438e-07, "loss": 0.1335, "step": 8116 }, { "epoch": 2.69, "learning_rate": 5.408481924540343e-07, "loss": 0.0872, "step": 8117 }, { "epoch": 2.69, "learning_rate": 5.396862849488816e-07, "loss": 0.1281, "step": 8118 }, { "epoch": 2.69, "learning_rate": 5.385255922394838e-07, "loss": 0.1797, "step": 8119 }, { "epoch": 2.69, "learning_rate": 5.373661144748865e-07, "loss": 0.1177, "step": 8120 }, { "epoch": 2.7, "learning_rate": 5.362078518039782e-07, "loss": 0.1475, "step": 8121 }, { "epoch": 2.7, "learning_rate": 5.350508043754876e-07, "loss": 0.1508, "step": 8122 }, { "epoch": 2.7, "learning_rate": 5.338949723379983e-07, "loss": 0.1275, "step": 8123 }, { "epoch": 2.7, "learning_rate": 5.327403558399213e-07, "loss": 0.1418, "step": 8124 }, { "epoch": 2.7, "learning_rate": 5.315869550295272e-07, "loss": 0.098, "step": 8125 }, { "epoch": 2.7, "learning_rate": 5.304347700549206e-07, "loss": 0.1137, "step": 8126 }, { "epoch": 2.7, "learning_rate": 5.292838010640522e-07, "loss": 0.1113, "step": 8127 }, { "epoch": 2.7, "learning_rate": 5.281340482047214e-07, "loss": 0.1248, "step": 8128 }, { "epoch": 2.7, "learning_rate": 5.269855116245637e-07, "loss": 0.1381, "step": 8129 }, { "epoch": 2.7, "learning_rate": 5.258381914710675e-07, "loss": 0.1513, "step": 8130 }, { "epoch": 2.7, "learning_rate": 5.246920878915529e-07, "loss": 0.1173, "step": 8131 }, { "epoch": 2.7, "learning_rate": 5.235472010331955e-07, "loss": 0.0978, "step": 8132 }, { "epoch": 2.7, "learning_rate": 5.224035310430065e-07, "loss": 0.1263, "step": 8133 }, { "epoch": 2.7, "learning_rate": 5.212610780678484e-07, "loss": 0.0651, "step": 8134 }, { "epoch": 2.7, "learning_rate": 5.201198422544218e-07, "loss": 0.1824, "step": 8135 }, { "epoch": 2.7, "learning_rate": 5.189798237492683e-07, "loss": 0.157, "step": 8136 }, { "epoch": 2.7, "learning_rate": 5.178410226987851e-07, "loss": 0.1701, "step": 8137 }, { "epoch": 2.7, "learning_rate": 5.167034392491965e-07, "loss": 0.1102, "step": 8138 }, { "epoch": 2.7, "learning_rate": 5.155670735465857e-07, "loss": 0.1835, "step": 8139 }, { "epoch": 2.7, "learning_rate": 5.144319257368701e-07, "loss": 0.1029, "step": 8140 }, { "epoch": 2.7, "learning_rate": 5.132979959658147e-07, "loss": 0.107, "step": 8141 }, { "epoch": 2.7, "learning_rate": 5.121652843790259e-07, "loss": 0.1525, "step": 8142 }, { "epoch": 2.7, "learning_rate": 5.110337911219565e-07, "loss": 0.1478, "step": 8143 }, { "epoch": 2.7, "learning_rate": 5.099035163399002e-07, "loss": 0.1723, "step": 8144 }, { "epoch": 2.7, "learning_rate": 5.087744601779954e-07, "loss": 0.1516, "step": 8145 }, { "epoch": 2.7, "learning_rate": 5.076466227812249e-07, "loss": 0.1556, "step": 8146 }, { "epoch": 2.7, "learning_rate": 5.065200042944096e-07, "loss": 0.1426, "step": 8147 }, { "epoch": 2.7, "learning_rate": 5.053946048622238e-07, "loss": 0.1385, "step": 8148 }, { "epoch": 2.7, "learning_rate": 5.042704246291763e-07, "loss": 0.1524, "step": 8149 }, { "epoch": 2.7, "learning_rate": 5.031474637396239e-07, "loss": 0.1149, "step": 8150 }, { "epoch": 2.71, "learning_rate": 5.020257223377667e-07, "loss": 0.1009, "step": 8151 }, { "epoch": 2.71, "learning_rate": 5.009052005676451e-07, "loss": 0.1837, "step": 8152 }, { "epoch": 2.71, "learning_rate": 4.99785898573144e-07, "loss": 0.1459, "step": 8153 }, { "epoch": 2.71, "learning_rate": 4.986678164979963e-07, "loss": 0.1288, "step": 8154 }, { "epoch": 2.71, "learning_rate": 4.975509544857715e-07, "loss": 0.1197, "step": 8155 }, { "epoch": 2.71, "learning_rate": 4.96435312679886e-07, "loss": 0.1041, "step": 8156 }, { "epoch": 2.71, "learning_rate": 4.953208912236007e-07, "loss": 0.1237, "step": 8157 }, { "epoch": 2.71, "learning_rate": 4.942076902600168e-07, "loss": 0.1129, "step": 8158 }, { "epoch": 2.71, "learning_rate": 4.93095709932081e-07, "loss": 0.1443, "step": 8159 }, { "epoch": 2.71, "learning_rate": 4.919849503825791e-07, "loss": 0.1045, "step": 8160 }, { "epoch": 2.71, "learning_rate": 4.90875411754147e-07, "loss": 0.1838, "step": 8161 }, { "epoch": 2.71, "learning_rate": 4.897670941892585e-07, "loss": 0.1721, "step": 8162 }, { "epoch": 2.71, "learning_rate": 4.886599978302331e-07, "loss": 0.0846, "step": 8163 }, { "epoch": 2.71, "learning_rate": 4.875541228192338e-07, "loss": 0.1051, "step": 8164 }, { "epoch": 2.71, "learning_rate": 4.864494692982624e-07, "loss": 0.2305, "step": 8165 }, { "epoch": 2.71, "learning_rate": 4.853460374091701e-07, "loss": 0.1322, "step": 8166 }, { "epoch": 2.71, "learning_rate": 4.842438272936445e-07, "loss": 0.1269, "step": 8167 }, { "epoch": 2.71, "learning_rate": 4.831428390932247e-07, "loss": 0.1651, "step": 8168 }, { "epoch": 2.71, "learning_rate": 4.820430729492842e-07, "loss": 0.1456, "step": 8169 }, { "epoch": 2.71, "learning_rate": 4.809445290030479e-07, "loss": 0.1363, "step": 8170 }, { "epoch": 2.71, "learning_rate": 4.798472073955762e-07, "loss": 0.1353, "step": 8171 }, { "epoch": 2.71, "learning_rate": 4.787511082677765e-07, "loss": 0.1168, "step": 8172 }, { "epoch": 2.71, "learning_rate": 4.776562317603972e-07, "loss": 0.0852, "step": 8173 }, { "epoch": 2.71, "learning_rate": 4.765625780140337e-07, "loss": 0.1263, "step": 8174 }, { "epoch": 2.71, "learning_rate": 4.754701471691214e-07, "loss": 0.1269, "step": 8175 }, { "epoch": 2.71, "learning_rate": 4.743789393659359e-07, "loss": 0.1239, "step": 8176 }, { "epoch": 2.71, "learning_rate": 4.732889547446029e-07, "loss": 0.1556, "step": 8177 }, { "epoch": 2.71, "learning_rate": 4.722001934450826e-07, "loss": 0.0845, "step": 8178 }, { "epoch": 2.71, "learning_rate": 4.711126556071888e-07, "loss": 0.1531, "step": 8179 }, { "epoch": 2.71, "learning_rate": 4.7002634137056526e-07, "loss": 0.1275, "step": 8180 }, { "epoch": 2.72, "learning_rate": 4.689412508747082e-07, "loss": 0.1198, "step": 8181 }, { "epoch": 2.72, "learning_rate": 4.67857384258954e-07, "loss": 0.1228, "step": 8182 }, { "epoch": 2.72, "learning_rate": 4.667747416624813e-07, "loss": 0.1311, "step": 8183 }, { "epoch": 2.72, "learning_rate": 4.6569332322431213e-07, "loss": 0.182, "step": 8184 }, { "epoch": 2.72, "learning_rate": 4.646131290833089e-07, "loss": 0.1166, "step": 8185 }, { "epoch": 2.72, "learning_rate": 4.6353415937818393e-07, "loss": 0.1484, "step": 8186 }, { "epoch": 2.72, "learning_rate": 4.624564142474808e-07, "loss": 0.1009, "step": 8187 }, { "epoch": 2.72, "learning_rate": 4.613798938295977e-07, "loss": 0.1887, "step": 8188 }, { "epoch": 2.72, "learning_rate": 4.603045982627663e-07, "loss": 0.1075, "step": 8189 }, { "epoch": 2.72, "learning_rate": 4.5923052768506947e-07, "loss": 0.1594, "step": 8190 }, { "epoch": 2.72, "learning_rate": 4.581576822344247e-07, "loss": 0.1415, "step": 8191 }, { "epoch": 2.72, "learning_rate": 4.5708606204859617e-07, "loss": 0.1525, "step": 8192 }, { "epoch": 2.72, "learning_rate": 4.5601566726519165e-07, "loss": 0.1106, "step": 8193 }, { "epoch": 2.72, "learning_rate": 4.5494649802166004e-07, "loss": 0.1443, "step": 8194 }, { "epoch": 2.72, "learning_rate": 4.5387855445529263e-07, "loss": 0.1196, "step": 8195 }, { "epoch": 2.72, "learning_rate": 4.528118367032219e-07, "loss": 0.1326, "step": 8196 }, { "epoch": 2.72, "learning_rate": 4.517463449024273e-07, "loss": 0.1054, "step": 8197 }, { "epoch": 2.72, "learning_rate": 4.5068207918972594e-07, "loss": 0.1167, "step": 8198 }, { "epoch": 2.72, "learning_rate": 4.496190397017841e-07, "loss": 0.1658, "step": 8199 }, { "epoch": 2.72, "learning_rate": 4.4855722657510036e-07, "loss": 0.1288, "step": 8200 }, { "epoch": 2.72, "learning_rate": 4.474966399460268e-07, "loss": 0.0877, "step": 8201 }, { "epoch": 2.72, "learning_rate": 4.464372799507499e-07, "loss": 0.1229, "step": 8202 }, { "epoch": 2.72, "learning_rate": 4.4537914672530214e-07, "loss": 0.1098, "step": 8203 }, { "epoch": 2.72, "learning_rate": 4.4432224040556025e-07, "loss": 0.1216, "step": 8204 }, { "epoch": 2.72, "learning_rate": 4.432665611272391e-07, "loss": 0.1417, "step": 8205 }, { "epoch": 2.72, "learning_rate": 4.4221210902590016e-07, "loss": 0.1347, "step": 8206 }, { "epoch": 2.72, "learning_rate": 4.4115888423694184e-07, "loss": 0.0891, "step": 8207 }, { "epoch": 2.72, "learning_rate": 4.401068868956115e-07, "loss": 0.1094, "step": 8208 }, { "epoch": 2.72, "learning_rate": 4.3905611713699335e-07, "loss": 0.1471, "step": 8209 }, { "epoch": 2.72, "learning_rate": 4.380065750960194e-07, "loss": 0.0997, "step": 8210 }, { "epoch": 2.73, "learning_rate": 4.3695826090745965e-07, "loss": 0.1457, "step": 8211 }, { "epoch": 2.73, "learning_rate": 4.3591117470592527e-07, "loss": 0.1392, "step": 8212 }, { "epoch": 2.73, "learning_rate": 4.348653166258776e-07, "loss": 0.1073, "step": 8213 }, { "epoch": 2.73, "learning_rate": 4.3382068680160816e-07, "loss": 0.1408, "step": 8214 }, { "epoch": 2.73, "learning_rate": 4.3277728536726405e-07, "loss": 0.1182, "step": 8215 }, { "epoch": 2.73, "learning_rate": 4.3173511245682253e-07, "loss": 0.1431, "step": 8216 }, { "epoch": 2.73, "learning_rate": 4.3069416820411327e-07, "loss": 0.1304, "step": 8217 }, { "epoch": 2.73, "learning_rate": 4.296544527428004e-07, "loss": 0.1208, "step": 8218 }, { "epoch": 2.73, "learning_rate": 4.28615966206396e-07, "loss": 0.052, "step": 8219 }, { "epoch": 2.73, "learning_rate": 4.275787087282501e-07, "loss": 0.0915, "step": 8220 }, { "epoch": 2.73, "learning_rate": 4.265426804415573e-07, "loss": 0.1424, "step": 8221 }, { "epoch": 2.73, "learning_rate": 4.255078814793523e-07, "loss": 0.1476, "step": 8222 }, { "epoch": 2.73, "learning_rate": 4.244743119745143e-07, "loss": 0.0835, "step": 8223 }, { "epoch": 2.73, "learning_rate": 4.234419720597638e-07, "loss": 0.1142, "step": 8224 }, { "epoch": 2.73, "learning_rate": 4.224108618676626e-07, "loss": 0.1568, "step": 8225 }, { "epoch": 2.73, "learning_rate": 4.213809815306169e-07, "loss": 0.1887, "step": 8226 }, { "epoch": 2.73, "learning_rate": 4.2035233118087215e-07, "loss": 0.1478, "step": 8227 }, { "epoch": 2.73, "learning_rate": 4.1932491095051596e-07, "loss": 0.0972, "step": 8228 }, { "epoch": 2.73, "learning_rate": 4.182987209714795e-07, "loss": 0.1165, "step": 8229 }, { "epoch": 2.73, "learning_rate": 4.172737613755373e-07, "loss": 0.1273, "step": 8230 }, { "epoch": 2.73, "learning_rate": 4.1625003229430196e-07, "loss": 0.1585, "step": 8231 }, { "epoch": 2.73, "learning_rate": 4.152275338592304e-07, "loss": 0.1277, "step": 8232 }, { "epoch": 2.73, "learning_rate": 4.142062662016233e-07, "loss": 0.0709, "step": 8233 }, { "epoch": 2.73, "learning_rate": 4.13186229452619e-07, "loss": 0.0944, "step": 8234 }, { "epoch": 2.73, "learning_rate": 4.121674237432016e-07, "loss": 0.085, "step": 8235 }, { "epoch": 2.73, "learning_rate": 4.1114984920419323e-07, "loss": 0.1727, "step": 8236 }, { "epoch": 2.73, "learning_rate": 4.101335059662637e-07, "loss": 0.1226, "step": 8237 }, { "epoch": 2.73, "learning_rate": 4.091183941599186e-07, "loss": 0.1091, "step": 8238 }, { "epoch": 2.73, "learning_rate": 4.0810451391550933e-07, "loss": 0.1089, "step": 8239 }, { "epoch": 2.73, "learning_rate": 4.070918653632283e-07, "loss": 0.1432, "step": 8240 }, { "epoch": 2.74, "learning_rate": 4.0608044863310935e-07, "loss": 0.1317, "step": 8241 }, { "epoch": 2.74, "learning_rate": 4.0507026385502747e-07, "loss": 0.1162, "step": 8242 }, { "epoch": 2.74, "learning_rate": 4.0406131115869885e-07, "loss": 0.0945, "step": 8243 }, { "epoch": 2.74, "learning_rate": 4.0305359067368543e-07, "loss": 0.1581, "step": 8244 }, { "epoch": 2.74, "learning_rate": 4.0204710252938596e-07, "loss": 0.1335, "step": 8245 }, { "epoch": 2.74, "learning_rate": 4.010418468550448e-07, "loss": 0.1502, "step": 8246 }, { "epoch": 2.74, "learning_rate": 4.000378237797453e-07, "loss": 0.0892, "step": 8247 }, { "epoch": 2.74, "learning_rate": 3.9903503343241777e-07, "loss": 0.1173, "step": 8248 }, { "epoch": 2.74, "learning_rate": 3.9803347594182364e-07, "loss": 0.1375, "step": 8249 }, { "epoch": 2.74, "learning_rate": 3.970331514365766e-07, "loss": 0.1177, "step": 8250 }, { "epoch": 2.74, "learning_rate": 3.960340600451285e-07, "loss": 0.141, "step": 8251 }, { "epoch": 2.74, "learning_rate": 3.9503620189576874e-07, "loss": 0.1357, "step": 8252 }, { "epoch": 2.74, "learning_rate": 3.940395771166361e-07, "loss": 0.1782, "step": 8253 }, { "epoch": 2.74, "learning_rate": 3.930441858357026e-07, "loss": 0.1356, "step": 8254 }, { "epoch": 2.74, "learning_rate": 3.9205002818079263e-07, "loss": 0.136, "step": 8255 }, { "epoch": 2.74, "learning_rate": 3.9105710427955855e-07, "loss": 0.1393, "step": 8256 }, { "epoch": 2.74, "learning_rate": 3.9006541425950505e-07, "loss": 0.1586, "step": 8257 }, { "epoch": 2.74, "learning_rate": 3.8907495824797246e-07, "loss": 0.1278, "step": 8258 }, { "epoch": 2.74, "learning_rate": 3.880857363721491e-07, "loss": 0.1597, "step": 8259 }, { "epoch": 2.74, "learning_rate": 3.870977487590566e-07, "loss": 0.1128, "step": 8260 }, { "epoch": 2.74, "learning_rate": 3.8611099553556354e-07, "loss": 0.176, "step": 8261 }, { "epoch": 2.74, "learning_rate": 3.8512547682838076e-07, "loss": 0.1336, "step": 8262 }, { "epoch": 2.74, "learning_rate": 3.841411927640526e-07, "loss": 0.0854, "step": 8263 }, { "epoch": 2.74, "learning_rate": 3.831581434689768e-07, "loss": 0.0987, "step": 8264 }, { "epoch": 2.74, "learning_rate": 3.821763290693825e-07, "loss": 0.1432, "step": 8265 }, { "epoch": 2.74, "learning_rate": 3.8119574969134653e-07, "loss": 0.1259, "step": 8266 }, { "epoch": 2.74, "learning_rate": 3.802164054607826e-07, "loss": 0.1096, "step": 8267 }, { "epoch": 2.74, "learning_rate": 3.7923829650344913e-07, "loss": 0.1494, "step": 8268 }, { "epoch": 2.74, "learning_rate": 3.782614229449455e-07, "loss": 0.1309, "step": 8269 }, { "epoch": 2.74, "learning_rate": 3.772857849107114e-07, "loss": 0.1419, "step": 8270 }, { "epoch": 2.75, "learning_rate": 3.7631138252602784e-07, "loss": 0.1406, "step": 8271 }, { "epoch": 2.75, "learning_rate": 3.753382159160157e-07, "loss": 0.133, "step": 8272 }, { "epoch": 2.75, "learning_rate": 3.743662852056429e-07, "loss": 0.1659, "step": 8273 }, { "epoch": 2.75, "learning_rate": 3.7339559051971066e-07, "loss": 0.1221, "step": 8274 }, { "epoch": 2.75, "learning_rate": 3.7242613198286924e-07, "loss": 0.1101, "step": 8275 }, { "epoch": 2.75, "learning_rate": 3.714579097196036e-07, "loss": 0.1131, "step": 8276 }, { "epoch": 2.75, "learning_rate": 3.7049092385424426e-07, "loss": 0.0752, "step": 8277 }, { "epoch": 2.75, "learning_rate": 3.695251745109607e-07, "loss": 0.1559, "step": 8278 }, { "epoch": 2.75, "learning_rate": 3.685606618137649e-07, "loss": 0.1613, "step": 8279 }, { "epoch": 2.75, "learning_rate": 3.6759738588651117e-07, "loss": 0.1251, "step": 8280 }, { "epoch": 2.75, "learning_rate": 3.666353468528894e-07, "loss": 0.1535, "step": 8281 }, { "epoch": 2.75, "learning_rate": 3.6567454483644073e-07, "loss": 0.1138, "step": 8282 }, { "epoch": 2.75, "learning_rate": 3.647149799605354e-07, "loss": 0.0991, "step": 8283 }, { "epoch": 2.75, "learning_rate": 3.6375665234839487e-07, "loss": 0.1024, "step": 8284 }, { "epoch": 2.75, "learning_rate": 3.627995621230751e-07, "loss": 0.1068, "step": 8285 }, { "epoch": 2.75, "learning_rate": 3.618437094074778e-07, "loss": 0.0928, "step": 8286 }, { "epoch": 2.75, "learning_rate": 3.608890943243426e-07, "loss": 0.1246, "step": 8287 }, { "epoch": 2.75, "learning_rate": 3.599357169962503e-07, "loss": 0.1127, "step": 8288 }, { "epoch": 2.75, "learning_rate": 3.589835775456274e-07, "loss": 0.139, "step": 8289 }, { "epoch": 2.75, "learning_rate": 3.580326760947339e-07, "loss": 0.0957, "step": 8290 }, { "epoch": 2.75, "learning_rate": 3.570830127656766e-07, "loss": 0.1045, "step": 8291 }, { "epoch": 2.75, "learning_rate": 3.561345876804001e-07, "loss": 0.0702, "step": 8292 }, { "epoch": 2.75, "learning_rate": 3.551874009606926e-07, "loss": 0.1522, "step": 8293 }, { "epoch": 2.75, "learning_rate": 3.542414527281812e-07, "loss": 0.1951, "step": 8294 }, { "epoch": 2.75, "learning_rate": 3.532967431043366e-07, "loss": 0.1618, "step": 8295 }, { "epoch": 2.75, "learning_rate": 3.523532722104672e-07, "loss": 0.0982, "step": 8296 }, { "epoch": 2.75, "learning_rate": 3.5141104016772397e-07, "loss": 0.0682, "step": 8297 }, { "epoch": 2.75, "learning_rate": 3.50470047097099e-07, "loss": 0.1208, "step": 8298 }, { "epoch": 2.75, "learning_rate": 3.495302931194233e-07, "loss": 0.1168, "step": 8299 }, { "epoch": 2.75, "learning_rate": 3.4859177835537274e-07, "loss": 0.1617, "step": 8300 }, { "epoch": 2.76, "learning_rate": 3.4765450292545856e-07, "loss": 0.0974, "step": 8301 }, { "epoch": 2.76, "learning_rate": 3.4671846695004006e-07, "loss": 0.1448, "step": 8302 }, { "epoch": 2.76, "learning_rate": 3.457836705493112e-07, "loss": 0.1435, "step": 8303 }, { "epoch": 2.76, "learning_rate": 3.44850113843308e-07, "loss": 0.1057, "step": 8304 }, { "epoch": 2.76, "learning_rate": 3.439177969519092e-07, "loss": 0.1353, "step": 8305 }, { "epoch": 2.76, "learning_rate": 3.4298671999483335e-07, "loss": 0.0898, "step": 8306 }, { "epoch": 2.76, "learning_rate": 3.4205688309163933e-07, "loss": 0.1256, "step": 8307 }, { "epoch": 2.76, "learning_rate": 3.4112828636172713e-07, "loss": 0.1221, "step": 8308 }, { "epoch": 2.76, "learning_rate": 3.402009299243392e-07, "loss": 0.1604, "step": 8309 }, { "epoch": 2.76, "learning_rate": 3.3927481389855466e-07, "loss": 0.1206, "step": 8310 }, { "epoch": 2.76, "learning_rate": 3.383499384032973e-07, "loss": 0.1817, "step": 8311 }, { "epoch": 2.76, "learning_rate": 3.3742630355732884e-07, "loss": 0.1593, "step": 8312 }, { "epoch": 2.76, "learning_rate": 3.365039094792544e-07, "loss": 0.1346, "step": 8313 }, { "epoch": 2.76, "learning_rate": 3.355827562875158e-07, "loss": 0.1141, "step": 8314 }, { "epoch": 2.76, "learning_rate": 3.3466284410040183e-07, "loss": 0.0947, "step": 8315 }, { "epoch": 2.76, "learning_rate": 3.3374417303603577e-07, "loss": 0.1357, "step": 8316 }, { "epoch": 2.76, "learning_rate": 3.328267432123833e-07, "loss": 0.0959, "step": 8317 }, { "epoch": 2.76, "learning_rate": 3.3191055474725343e-07, "loss": 0.1146, "step": 8318 }, { "epoch": 2.76, "learning_rate": 3.30995607758291e-07, "loss": 0.1255, "step": 8319 }, { "epoch": 2.76, "learning_rate": 3.300819023629864e-07, "loss": 0.1491, "step": 8320 }, { "epoch": 2.76, "learning_rate": 3.2916943867866593e-07, "loss": 0.1782, "step": 8321 }, { "epoch": 2.76, "learning_rate": 3.2825821682250016e-07, "loss": 0.1176, "step": 8322 }, { "epoch": 2.76, "learning_rate": 3.2734823691149886e-07, "loss": 0.1585, "step": 8323 }, { "epoch": 2.76, "learning_rate": 3.2643949906251414e-07, "loss": 0.1405, "step": 8324 }, { "epoch": 2.76, "learning_rate": 3.2553200339223157e-07, "loss": 0.1111, "step": 8325 }, { "epoch": 2.76, "learning_rate": 3.246257500171879e-07, "loss": 0.1626, "step": 8326 }, { "epoch": 2.76, "learning_rate": 3.2372073905375115e-07, "loss": 0.1461, "step": 8327 }, { "epoch": 2.76, "learning_rate": 3.228169706181339e-07, "loss": 0.1134, "step": 8328 }, { "epoch": 2.76, "learning_rate": 3.219144448263911e-07, "loss": 0.14, "step": 8329 }, { "epoch": 2.76, "learning_rate": 3.2101316179441343e-07, "loss": 0.0708, "step": 8330 }, { "epoch": 2.77, "learning_rate": 3.2011312163793605e-07, "loss": 0.1294, "step": 8331 }, { "epoch": 2.77, "learning_rate": 3.1921432447253096e-07, "loss": 0.1111, "step": 8332 }, { "epoch": 2.77, "learning_rate": 3.1831677041361364e-07, "loss": 0.1479, "step": 8333 }, { "epoch": 2.77, "learning_rate": 3.1742045957643744e-07, "loss": 0.1227, "step": 8334 }, { "epoch": 2.77, "learning_rate": 3.1652539207610025e-07, "loss": 0.0937, "step": 8335 }, { "epoch": 2.77, "learning_rate": 3.156315680275346e-07, "loss": 0.1372, "step": 8336 }, { "epoch": 2.77, "learning_rate": 3.147389875455165e-07, "loss": 0.1744, "step": 8337 }, { "epoch": 2.77, "learning_rate": 3.1384765074466416e-07, "loss": 0.1088, "step": 8338 }, { "epoch": 2.77, "learning_rate": 3.129575577394295e-07, "loss": 0.1132, "step": 8339 }, { "epoch": 2.77, "learning_rate": 3.1206870864411433e-07, "loss": 0.0909, "step": 8340 }, { "epoch": 2.77, "learning_rate": 3.111811035728507e-07, "loss": 0.128, "step": 8341 }, { "epoch": 2.77, "learning_rate": 3.1029474263961856e-07, "loss": 0.0995, "step": 8342 }, { "epoch": 2.77, "learning_rate": 3.0940962595823464e-07, "loss": 0.1366, "step": 8343 }, { "epoch": 2.77, "learning_rate": 3.085257536423558e-07, "loss": 0.0694, "step": 8344 }, { "epoch": 2.77, "learning_rate": 3.076431258054813e-07, "loss": 0.1234, "step": 8345 }, { "epoch": 2.77, "learning_rate": 3.0676174256094703e-07, "loss": 0.1127, "step": 8346 }, { "epoch": 2.77, "learning_rate": 3.058816040219337e-07, "loss": 0.1535, "step": 8347 }, { "epoch": 2.77, "learning_rate": 3.0500271030145635e-07, "loss": 0.1413, "step": 8348 }, { "epoch": 2.77, "learning_rate": 3.041250615123759e-07, "loss": 0.1189, "step": 8349 }, { "epoch": 2.77, "learning_rate": 3.032486577673899e-07, "loss": 0.1872, "step": 8350 }, { "epoch": 2.77, "learning_rate": 3.0237349917903726e-07, "loss": 0.1505, "step": 8351 }, { "epoch": 2.77, "learning_rate": 3.0149958585969807e-07, "loss": 0.1394, "step": 8352 }, { "epoch": 2.77, "learning_rate": 3.0062691792158926e-07, "loss": 0.1507, "step": 8353 }, { "epoch": 2.77, "learning_rate": 2.997554954767712e-07, "loss": 0.0986, "step": 8354 }, { "epoch": 2.77, "learning_rate": 2.98885318637141e-07, "loss": 0.1259, "step": 8355 }, { "epoch": 2.77, "learning_rate": 2.9801638751444153e-07, "loss": 0.1309, "step": 8356 }, { "epoch": 2.77, "learning_rate": 2.971487022202468e-07, "loss": 0.1126, "step": 8357 }, { "epoch": 2.77, "learning_rate": 2.962822628659823e-07, "loss": 0.11, "step": 8358 }, { "epoch": 2.77, "learning_rate": 2.954170695629011e-07, "loss": 0.117, "step": 8359 }, { "epoch": 2.77, "learning_rate": 2.945531224221054e-07, "loss": 0.1383, "step": 8360 }, { "epoch": 2.77, "learning_rate": 2.9369042155453444e-07, "loss": 0.1279, "step": 8361 }, { "epoch": 2.78, "learning_rate": 2.928289670709661e-07, "loss": 0.242, "step": 8362 }, { "epoch": 2.78, "learning_rate": 2.9196875908202194e-07, "loss": 0.1335, "step": 8363 }, { "epoch": 2.78, "learning_rate": 2.911097976981569e-07, "loss": 0.1501, "step": 8364 }, { "epoch": 2.78, "learning_rate": 2.902520830296751e-07, "loss": 0.1357, "step": 8365 }, { "epoch": 2.78, "learning_rate": 2.8939561518671164e-07, "loss": 0.1367, "step": 8366 }, { "epoch": 2.78, "learning_rate": 2.885403942792464e-07, "loss": 0.1689, "step": 8367 }, { "epoch": 2.78, "learning_rate": 2.8768642041709595e-07, "loss": 0.0788, "step": 8368 }, { "epoch": 2.78, "learning_rate": 2.8683369370992254e-07, "loss": 0.1061, "step": 8369 }, { "epoch": 2.78, "learning_rate": 2.8598221426722196e-07, "loss": 0.1096, "step": 8370 }, { "epoch": 2.78, "learning_rate": 2.851319821983345e-07, "loss": 0.1276, "step": 8371 }, { "epoch": 2.78, "learning_rate": 2.842829976124362e-07, "loss": 0.1231, "step": 8372 }, { "epoch": 2.78, "learning_rate": 2.834352606185453e-07, "loss": 0.1148, "step": 8373 }, { "epoch": 2.78, "learning_rate": 2.825887713255193e-07, "loss": 0.0977, "step": 8374 }, { "epoch": 2.78, "learning_rate": 2.8174352984205565e-07, "loss": 0.1248, "step": 8375 }, { "epoch": 2.78, "learning_rate": 2.808995362766931e-07, "loss": 0.1069, "step": 8376 }, { "epoch": 2.78, "learning_rate": 2.80056790737806e-07, "loss": 0.1713, "step": 8377 }, { "epoch": 2.78, "learning_rate": 2.792152933336134e-07, "loss": 0.1456, "step": 8378 }, { "epoch": 2.78, "learning_rate": 2.783750441721711e-07, "loss": 0.14, "step": 8379 }, { "epoch": 2.78, "learning_rate": 2.775360433613739e-07, "loss": 0.1382, "step": 8380 }, { "epoch": 2.78, "learning_rate": 2.766982910089577e-07, "loss": 0.1116, "step": 8381 }, { "epoch": 2.78, "learning_rate": 2.758617872225e-07, "loss": 0.1267, "step": 8382 }, { "epoch": 2.78, "learning_rate": 2.7502653210941475e-07, "loss": 0.1302, "step": 8383 }, { "epoch": 2.78, "learning_rate": 2.741925257769562e-07, "loss": 0.1435, "step": 8384 }, { "epoch": 2.78, "learning_rate": 2.733597683322198e-07, "loss": 0.1305, "step": 8385 }, { "epoch": 2.78, "learning_rate": 2.725282598821399e-07, "loss": 0.1258, "step": 8386 }, { "epoch": 2.78, "learning_rate": 2.7169800053348907e-07, "loss": 0.1255, "step": 8387 }, { "epoch": 2.78, "learning_rate": 2.708689903928807e-07, "loss": 0.0996, "step": 8388 }, { "epoch": 2.78, "learning_rate": 2.700412295667687e-07, "loss": 0.0912, "step": 8389 }, { "epoch": 2.78, "learning_rate": 2.692147181614446e-07, "loss": 0.1001, "step": 8390 }, { "epoch": 2.78, "learning_rate": 2.683894562830414e-07, "loss": 0.1214, "step": 8391 }, { "epoch": 2.79, "learning_rate": 2.675654440375308e-07, "loss": 0.1173, "step": 8392 }, { "epoch": 2.79, "learning_rate": 2.667426815307228e-07, "loss": 0.1001, "step": 8393 }, { "epoch": 2.79, "learning_rate": 2.6592116886826837e-07, "loss": 0.139, "step": 8394 }, { "epoch": 2.79, "learning_rate": 2.651009061556575e-07, "loss": 0.1404, "step": 8395 }, { "epoch": 2.79, "learning_rate": 2.642818934982205e-07, "loss": 0.1461, "step": 8396 }, { "epoch": 2.79, "learning_rate": 2.6346413100112656e-07, "loss": 0.12, "step": 8397 }, { "epoch": 2.79, "learning_rate": 2.626476187693838e-07, "loss": 0.1469, "step": 8398 }, { "epoch": 2.79, "learning_rate": 2.6183235690783957e-07, "loss": 0.1361, "step": 8399 }, { "epoch": 2.79, "learning_rate": 2.6101834552118564e-07, "loss": 0.0976, "step": 8400 }, { "epoch": 2.79, "learning_rate": 2.6020558471394284e-07, "loss": 0.1476, "step": 8401 }, { "epoch": 2.79, "learning_rate": 2.5939407459048107e-07, "loss": 0.1777, "step": 8402 }, { "epoch": 2.79, "learning_rate": 2.585838152550058e-07, "loss": 0.1591, "step": 8403 }, { "epoch": 2.79, "learning_rate": 2.577748068115593e-07, "loss": 0.1187, "step": 8404 }, { "epoch": 2.79, "learning_rate": 2.569670493640308e-07, "loss": 0.0958, "step": 8405 }, { "epoch": 2.79, "learning_rate": 2.561605430161407e-07, "loss": 0.0927, "step": 8406 }, { "epoch": 2.79, "learning_rate": 2.553552878714549e-07, "loss": 0.1844, "step": 8407 }, { "epoch": 2.79, "learning_rate": 2.5455128403337306e-07, "loss": 0.1334, "step": 8408 }, { "epoch": 2.79, "learning_rate": 2.53748531605138e-07, "loss": 0.1331, "step": 8409 }, { "epoch": 2.79, "learning_rate": 2.5294703068983186e-07, "loss": 0.1491, "step": 8410 }, { "epoch": 2.79, "learning_rate": 2.521467813903744e-07, "loss": 0.1529, "step": 8411 }, { "epoch": 2.79, "learning_rate": 2.5134778380952573e-07, "loss": 0.1111, "step": 8412 }, { "epoch": 2.79, "learning_rate": 2.5055003804988374e-07, "loss": 0.1093, "step": 8413 }, { "epoch": 2.79, "learning_rate": 2.4975354421388986e-07, "loss": 0.1565, "step": 8414 }, { "epoch": 2.79, "learning_rate": 2.4895830240381667e-07, "loss": 0.1329, "step": 8415 }, { "epoch": 2.79, "learning_rate": 2.4816431272178477e-07, "loss": 0.1337, "step": 8416 }, { "epoch": 2.79, "learning_rate": 2.473715752697481e-07, "loss": 0.1568, "step": 8417 }, { "epoch": 2.79, "learning_rate": 2.46580090149503e-07, "loss": 0.1625, "step": 8418 }, { "epoch": 2.79, "learning_rate": 2.457898574626838e-07, "loss": 0.146, "step": 8419 }, { "epoch": 2.79, "learning_rate": 2.450008773107626e-07, "loss": 0.082, "step": 8420 }, { "epoch": 2.79, "learning_rate": 2.4421314979505506e-07, "loss": 0.1217, "step": 8421 }, { "epoch": 2.8, "learning_rate": 2.434266750167102e-07, "loss": 0.1682, "step": 8422 }, { "epoch": 2.8, "learning_rate": 2.426414530767196e-07, "loss": 0.1297, "step": 8423 }, { "epoch": 2.8, "learning_rate": 2.4185748407591247e-07, "loss": 0.0794, "step": 8424 }, { "epoch": 2.8, "learning_rate": 2.4107476811496054e-07, "loss": 0.1613, "step": 8425 }, { "epoch": 2.8, "learning_rate": 2.402933052943701e-07, "loss": 0.1045, "step": 8426 }, { "epoch": 2.8, "learning_rate": 2.3951309571448977e-07, "loss": 0.0845, "step": 8427 }, { "epoch": 2.8, "learning_rate": 2.387341394755049e-07, "loss": 0.1631, "step": 8428 }, { "epoch": 2.8, "learning_rate": 2.3795643667744116e-07, "loss": 0.0933, "step": 8429 }, { "epoch": 2.8, "learning_rate": 2.3717998742016412e-07, "loss": 0.1785, "step": 8430 }, { "epoch": 2.8, "learning_rate": 2.3640479180337627e-07, "loss": 0.0657, "step": 8431 }, { "epoch": 2.8, "learning_rate": 2.3563084992662134e-07, "loss": 0.0969, "step": 8432 }, { "epoch": 2.8, "learning_rate": 2.348581618892787e-07, "loss": 0.1175, "step": 8433 }, { "epoch": 2.8, "learning_rate": 2.3408672779057117e-07, "loss": 0.1344, "step": 8434 }, { "epoch": 2.8, "learning_rate": 2.3331654772955847e-07, "loss": 0.0942, "step": 8435 }, { "epoch": 2.8, "learning_rate": 2.3254762180513812e-07, "loss": 0.1339, "step": 8436 }, { "epoch": 2.8, "learning_rate": 2.317799501160467e-07, "loss": 0.1704, "step": 8437 }, { "epoch": 2.8, "learning_rate": 2.3101353276086312e-07, "loss": 0.1224, "step": 8438 }, { "epoch": 2.8, "learning_rate": 2.3024836983800204e-07, "loss": 0.0863, "step": 8439 }, { "epoch": 2.8, "learning_rate": 2.294844614457148e-07, "loss": 0.1599, "step": 8440 }, { "epoch": 2.8, "learning_rate": 2.287218076820996e-07, "loss": 0.1396, "step": 8441 }, { "epoch": 2.8, "learning_rate": 2.2796040864508484e-07, "loss": 0.1313, "step": 8442 }, { "epoch": 2.8, "learning_rate": 2.2720026443244335e-07, "loss": 0.1191, "step": 8443 }, { "epoch": 2.8, "learning_rate": 2.2644137514178378e-07, "loss": 0.1296, "step": 8444 }, { "epoch": 2.8, "learning_rate": 2.2568374087055588e-07, "loss": 0.1261, "step": 8445 }, { "epoch": 2.8, "learning_rate": 2.2492736171604634e-07, "loss": 0.1188, "step": 8446 }, { "epoch": 2.8, "learning_rate": 2.24172237775383e-07, "loss": 0.1148, "step": 8447 }, { "epoch": 2.8, "learning_rate": 2.234183691455294e-07, "loss": 0.134, "step": 8448 }, { "epoch": 2.8, "learning_rate": 2.2266575592329032e-07, "loss": 0.1229, "step": 8449 }, { "epoch": 2.8, "learning_rate": 2.2191439820530848e-07, "loss": 0.1136, "step": 8450 }, { "epoch": 2.8, "learning_rate": 2.2116429608806556e-07, "loss": 0.0944, "step": 8451 }, { "epoch": 2.81, "learning_rate": 2.204154496678812e-07, "loss": 0.0601, "step": 8452 }, { "epoch": 2.81, "learning_rate": 2.1966785904091516e-07, "loss": 0.0942, "step": 8453 }, { "epoch": 2.81, "learning_rate": 2.1892152430316617e-07, "loss": 0.1456, "step": 8454 }, { "epoch": 2.81, "learning_rate": 2.181764455504698e-07, "loss": 0.1191, "step": 8455 }, { "epoch": 2.81, "learning_rate": 2.1743262287850175e-07, "loss": 0.1326, "step": 8456 }, { "epoch": 2.81, "learning_rate": 2.1669005638277563e-07, "loss": 0.1439, "step": 8457 }, { "epoch": 2.81, "learning_rate": 2.1594874615864403e-07, "loss": 0.1772, "step": 8458 }, { "epoch": 2.81, "learning_rate": 2.152086923012997e-07, "loss": 0.1697, "step": 8459 }, { "epoch": 2.81, "learning_rate": 2.1446989490576997e-07, "loss": 0.1334, "step": 8460 }, { "epoch": 2.81, "learning_rate": 2.137323540669267e-07, "loss": 0.1263, "step": 8461 }, { "epoch": 2.81, "learning_rate": 2.129960698794753e-07, "loss": 0.1039, "step": 8462 }, { "epoch": 2.81, "learning_rate": 2.122610424379634e-07, "loss": 0.1428, "step": 8463 }, { "epoch": 2.81, "learning_rate": 2.1152727183677336e-07, "loss": 0.1464, "step": 8464 }, { "epoch": 2.81, "learning_rate": 2.1079475817012974e-07, "loss": 0.1496, "step": 8465 }, { "epoch": 2.81, "learning_rate": 2.1006350153209398e-07, "loss": 0.1096, "step": 8466 }, { "epoch": 2.81, "learning_rate": 2.0933350201656765e-07, "loss": 0.1751, "step": 8467 }, { "epoch": 2.81, "learning_rate": 2.086047597172891e-07, "loss": 0.1173, "step": 8468 }, { "epoch": 2.81, "learning_rate": 2.078772747278346e-07, "loss": 0.1148, "step": 8469 }, { "epoch": 2.81, "learning_rate": 2.071510471416227e-07, "loss": 0.123, "step": 8470 }, { "epoch": 2.81, "learning_rate": 2.0642607705190553e-07, "loss": 0.1264, "step": 8471 }, { "epoch": 2.81, "learning_rate": 2.057023645517775e-07, "loss": 0.1387, "step": 8472 }, { "epoch": 2.81, "learning_rate": 2.0497990973416982e-07, "loss": 0.1041, "step": 8473 }, { "epoch": 2.81, "learning_rate": 2.0425871269185383e-07, "loss": 0.0755, "step": 8474 }, { "epoch": 2.81, "learning_rate": 2.0353877351743655e-07, "loss": 0.1357, "step": 8475 }, { "epoch": 2.81, "learning_rate": 2.0282009230336853e-07, "loss": 0.1169, "step": 8476 }, { "epoch": 2.81, "learning_rate": 2.0210266914193033e-07, "loss": 0.1284, "step": 8477 }, { "epoch": 2.81, "learning_rate": 2.0138650412524941e-07, "loss": 0.1084, "step": 8478 }, { "epoch": 2.81, "learning_rate": 2.0067159734528775e-07, "loss": 0.1058, "step": 8479 }, { "epoch": 2.81, "learning_rate": 1.9995794889384522e-07, "loss": 0.14, "step": 8480 }, { "epoch": 2.81, "learning_rate": 1.9924555886256191e-07, "loss": 0.1849, "step": 8481 }, { "epoch": 2.82, "learning_rate": 1.9853442734291571e-07, "loss": 0.1345, "step": 8482 }, { "epoch": 2.82, "learning_rate": 1.978245544262247e-07, "loss": 0.1001, "step": 8483 }, { "epoch": 2.82, "learning_rate": 1.9711594020363823e-07, "loss": 0.1491, "step": 8484 }, { "epoch": 2.82, "learning_rate": 1.9640858476615453e-07, "loss": 0.1225, "step": 8485 }, { "epoch": 2.82, "learning_rate": 1.9570248820460214e-07, "loss": 0.1174, "step": 8486 }, { "epoch": 2.82, "learning_rate": 1.949976506096507e-07, "loss": 0.1052, "step": 8487 }, { "epoch": 2.82, "learning_rate": 1.9429407207181005e-07, "loss": 0.1365, "step": 8488 }, { "epoch": 2.82, "learning_rate": 1.935917526814235e-07, "loss": 0.0675, "step": 8489 }, { "epoch": 2.82, "learning_rate": 1.9289069252867998e-07, "loss": 0.1175, "step": 8490 }, { "epoch": 2.82, "learning_rate": 1.921908917035975e-07, "loss": 0.1396, "step": 8491 }, { "epoch": 2.82, "learning_rate": 1.9149235029604085e-07, "loss": 0.1469, "step": 8492 }, { "epoch": 2.82, "learning_rate": 1.9079506839570604e-07, "loss": 0.0799, "step": 8493 }, { "epoch": 2.82, "learning_rate": 1.900990460921348e-07, "loss": 0.1029, "step": 8494 }, { "epoch": 2.82, "learning_rate": 1.8940428347470007e-07, "loss": 0.0955, "step": 8495 }, { "epoch": 2.82, "learning_rate": 1.8871078063261828e-07, "loss": 0.1043, "step": 8496 }, { "epoch": 2.82, "learning_rate": 1.880185376549415e-07, "loss": 0.1559, "step": 8497 }, { "epoch": 2.82, "learning_rate": 1.8732755463055863e-07, "loss": 0.1416, "step": 8498 }, { "epoch": 2.82, "learning_rate": 1.8663783164820094e-07, "loss": 0.132, "step": 8499 }, { "epoch": 2.82, "learning_rate": 1.859493687964331e-07, "loss": 0.1539, "step": 8500 }, { "epoch": 2.82, "learning_rate": 1.8526216616366223e-07, "loss": 0.1722, "step": 8501 }, { "epoch": 2.82, "learning_rate": 1.845762238381299e-07, "loss": 0.1398, "step": 8502 }, { "epoch": 2.82, "learning_rate": 1.8389154190792124e-07, "loss": 0.0658, "step": 8503 }, { "epoch": 2.82, "learning_rate": 1.832081204609526e-07, "loss": 0.1123, "step": 8504 }, { "epoch": 2.82, "learning_rate": 1.8252595958498376e-07, "loss": 0.1582, "step": 8505 }, { "epoch": 2.82, "learning_rate": 1.818450593676091e-07, "loss": 0.1051, "step": 8506 }, { "epoch": 2.82, "learning_rate": 1.8116541989626423e-07, "loss": 0.1549, "step": 8507 }, { "epoch": 2.82, "learning_rate": 1.8048704125822158e-07, "loss": 0.1106, "step": 8508 }, { "epoch": 2.82, "learning_rate": 1.7980992354059036e-07, "loss": 0.1023, "step": 8509 }, { "epoch": 2.82, "learning_rate": 1.7913406683032098e-07, "loss": 0.1112, "step": 8510 }, { "epoch": 2.82, "learning_rate": 1.7845947121419739e-07, "loss": 0.1459, "step": 8511 }, { "epoch": 2.83, "learning_rate": 1.777861367788458e-07, "loss": 0.1354, "step": 8512 }, { "epoch": 2.83, "learning_rate": 1.7711406361072825e-07, "loss": 0.1399, "step": 8513 }, { "epoch": 2.83, "learning_rate": 1.7644325179614564e-07, "loss": 0.0722, "step": 8514 }, { "epoch": 2.83, "learning_rate": 1.7577370142123795e-07, "loss": 0.1643, "step": 8515 }, { "epoch": 2.83, "learning_rate": 1.7510541257197866e-07, "loss": 0.1169, "step": 8516 }, { "epoch": 2.83, "learning_rate": 1.7443838533418688e-07, "loss": 0.1069, "step": 8517 }, { "epoch": 2.83, "learning_rate": 1.737726197935119e-07, "loss": 0.1266, "step": 8518 }, { "epoch": 2.83, "learning_rate": 1.731081160354464e-07, "loss": 0.1141, "step": 8519 }, { "epoch": 2.83, "learning_rate": 1.7244487414531663e-07, "loss": 0.1249, "step": 8520 }, { "epoch": 2.83, "learning_rate": 1.717828942082922e-07, "loss": 0.074, "step": 8521 }, { "epoch": 2.83, "learning_rate": 1.7112217630937512e-07, "loss": 0.1117, "step": 8522 }, { "epoch": 2.83, "learning_rate": 1.7046272053341085e-07, "loss": 0.0972, "step": 8523 }, { "epoch": 2.83, "learning_rate": 1.6980452696507832e-07, "loss": 0.1091, "step": 8524 }, { "epoch": 2.83, "learning_rate": 1.6914759568889548e-07, "loss": 0.1591, "step": 8525 }, { "epoch": 2.83, "learning_rate": 1.6849192678921932e-07, "loss": 0.1232, "step": 8526 }, { "epoch": 2.83, "learning_rate": 1.678375203502436e-07, "loss": 0.0672, "step": 8527 }, { "epoch": 2.83, "learning_rate": 1.6718437645600104e-07, "loss": 0.1627, "step": 8528 }, { "epoch": 2.83, "learning_rate": 1.665324951903613e-07, "loss": 0.0999, "step": 8529 }, { "epoch": 2.83, "learning_rate": 1.658818766370318e-07, "loss": 0.1148, "step": 8530 }, { "epoch": 2.83, "learning_rate": 1.652325208795591e-07, "loss": 0.1567, "step": 8531 }, { "epoch": 2.83, "learning_rate": 1.6458442800132757e-07, "loss": 0.127, "step": 8532 }, { "epoch": 2.83, "learning_rate": 1.6393759808555509e-07, "loss": 0.0731, "step": 8533 }, { "epoch": 2.83, "learning_rate": 1.6329203121530522e-07, "loss": 0.1215, "step": 8534 }, { "epoch": 2.83, "learning_rate": 1.6264772747347168e-07, "loss": 0.1537, "step": 8535 }, { "epoch": 2.83, "learning_rate": 1.6200468694279047e-07, "loss": 0.1467, "step": 8536 }, { "epoch": 2.83, "learning_rate": 1.6136290970583557e-07, "loss": 0.1011, "step": 8537 }, { "epoch": 2.83, "learning_rate": 1.6072239584501548e-07, "loss": 0.1568, "step": 8538 }, { "epoch": 2.83, "learning_rate": 1.6008314544257886e-07, "loss": 0.1032, "step": 8539 }, { "epoch": 2.83, "learning_rate": 1.5944515858061226e-07, "loss": 0.1069, "step": 8540 }, { "epoch": 2.83, "learning_rate": 1.5880843534103796e-07, "loss": 0.1074, "step": 8541 }, { "epoch": 2.84, "learning_rate": 1.581729758056183e-07, "loss": 0.1871, "step": 8542 }, { "epoch": 2.84, "learning_rate": 1.5753878005595358e-07, "loss": 0.1147, "step": 8543 }, { "epoch": 2.84, "learning_rate": 1.5690584817347975e-07, "loss": 0.1202, "step": 8544 }, { "epoch": 2.84, "learning_rate": 1.5627418023947072e-07, "loss": 0.1096, "step": 8545 }, { "epoch": 2.84, "learning_rate": 1.5564377633503935e-07, "loss": 0.1234, "step": 8546 }, { "epoch": 2.84, "learning_rate": 1.5501463654113424e-07, "loss": 0.1076, "step": 8547 }, { "epoch": 2.84, "learning_rate": 1.5438676093854633e-07, "loss": 0.1214, "step": 8548 }, { "epoch": 2.84, "learning_rate": 1.5376014960789665e-07, "loss": 0.142, "step": 8549 }, { "epoch": 2.84, "learning_rate": 1.5313480262965197e-07, "loss": 0.1275, "step": 8550 }, { "epoch": 2.84, "learning_rate": 1.5251072008411026e-07, "loss": 0.0877, "step": 8551 }, { "epoch": 2.84, "learning_rate": 1.5188790205141191e-07, "loss": 0.1478, "step": 8552 }, { "epoch": 2.84, "learning_rate": 1.5126634861153067e-07, "loss": 0.0988, "step": 8553 }, { "epoch": 2.84, "learning_rate": 1.506460598442816e-07, "loss": 0.1567, "step": 8554 }, { "epoch": 2.84, "learning_rate": 1.5002703582931433e-07, "loss": 0.1259, "step": 8555 }, { "epoch": 2.84, "learning_rate": 1.494092766461186e-07, "loss": 0.0798, "step": 8556 }, { "epoch": 2.84, "learning_rate": 1.4879278237402095e-07, "loss": 0.1307, "step": 8557 }, { "epoch": 2.84, "learning_rate": 1.481775530921836e-07, "loss": 0.163, "step": 8558 }, { "epoch": 2.84, "learning_rate": 1.475635888796112e-07, "loss": 0.1495, "step": 8559 }, { "epoch": 2.84, "learning_rate": 1.469508898151384e-07, "loss": 0.1383, "step": 8560 }, { "epoch": 2.84, "learning_rate": 1.4633945597744448e-07, "loss": 0.1298, "step": 8561 }, { "epoch": 2.84, "learning_rate": 1.4572928744504223e-07, "loss": 0.0893, "step": 8562 }, { "epoch": 2.84, "learning_rate": 1.451203842962845e-07, "loss": 0.1038, "step": 8563 }, { "epoch": 2.84, "learning_rate": 1.4451274660935987e-07, "loss": 0.1403, "step": 8564 }, { "epoch": 2.84, "learning_rate": 1.4390637446229372e-07, "loss": 0.0935, "step": 8565 }, { "epoch": 2.84, "learning_rate": 1.433012679329515e-07, "loss": 0.1257, "step": 8566 }, { "epoch": 2.84, "learning_rate": 1.4269742709903333e-07, "loss": 0.0969, "step": 8567 }, { "epoch": 2.84, "learning_rate": 1.4209485203808048e-07, "loss": 0.1119, "step": 8568 }, { "epoch": 2.84, "learning_rate": 1.4149354282746663e-07, "loss": 0.13, "step": 8569 }, { "epoch": 2.84, "learning_rate": 1.408934995444078e-07, "loss": 0.1052, "step": 8570 }, { "epoch": 2.84, "learning_rate": 1.4029472226595341e-07, "loss": 0.103, "step": 8571 }, { "epoch": 2.85, "learning_rate": 1.3969721106899537e-07, "loss": 0.1387, "step": 8572 }, { "epoch": 2.85, "learning_rate": 1.3910096603025668e-07, "loss": 0.1395, "step": 8573 }, { "epoch": 2.85, "learning_rate": 1.3850598722630393e-07, "loss": 0.1479, "step": 8574 }, { "epoch": 2.85, "learning_rate": 1.3791227473353486e-07, "loss": 0.1428, "step": 8575 }, { "epoch": 2.85, "learning_rate": 1.373198286281896e-07, "loss": 0.1325, "step": 8576 }, { "epoch": 2.85, "learning_rate": 1.367286489863451e-07, "loss": 0.0854, "step": 8577 }, { "epoch": 2.85, "learning_rate": 1.361387358839117e-07, "loss": 0.1144, "step": 8578 }, { "epoch": 2.85, "learning_rate": 1.3555008939664215e-07, "loss": 0.1228, "step": 8579 }, { "epoch": 2.85, "learning_rate": 1.3496270960012493e-07, "loss": 0.1213, "step": 8580 }, { "epoch": 2.85, "learning_rate": 1.3437659656978297e-07, "loss": 0.1328, "step": 8581 }, { "epoch": 2.85, "learning_rate": 1.3379175038088055e-07, "loss": 0.0822, "step": 8582 }, { "epoch": 2.85, "learning_rate": 1.3320817110851648e-07, "loss": 0.1192, "step": 8583 }, { "epoch": 2.85, "learning_rate": 1.3262585882762967e-07, "loss": 0.1579, "step": 8584 }, { "epoch": 2.85, "learning_rate": 1.320448136129926e-07, "loss": 0.1571, "step": 8585 }, { "epoch": 2.85, "learning_rate": 1.3146503553921775e-07, "loss": 0.0966, "step": 8586 }, { "epoch": 2.85, "learning_rate": 1.3088652468075558e-07, "loss": 0.1387, "step": 8587 }, { "epoch": 2.85, "learning_rate": 1.303092811118911e-07, "loss": 0.1498, "step": 8588 }, { "epoch": 2.85, "learning_rate": 1.297333049067473e-07, "loss": 0.1144, "step": 8589 }, { "epoch": 2.85, "learning_rate": 1.2915859613928717e-07, "loss": 0.1067, "step": 8590 }, { "epoch": 2.85, "learning_rate": 1.2858515488330725e-07, "loss": 0.1188, "step": 8591 }, { "epoch": 2.85, "learning_rate": 1.280129812124431e-07, "loss": 0.1183, "step": 8592 }, { "epoch": 2.85, "learning_rate": 1.2744207520016926e-07, "loss": 0.126, "step": 8593 }, { "epoch": 2.85, "learning_rate": 1.2687243691979267e-07, "loss": 0.1766, "step": 8594 }, { "epoch": 2.85, "learning_rate": 1.2630406644446258e-07, "loss": 0.1706, "step": 8595 }, { "epoch": 2.85, "learning_rate": 1.2573696384716173e-07, "loss": 0.1436, "step": 8596 }, { "epoch": 2.85, "learning_rate": 1.2517112920071184e-07, "loss": 0.1575, "step": 8597 }, { "epoch": 2.85, "learning_rate": 1.246065625777726e-07, "loss": 0.1573, "step": 8598 }, { "epoch": 2.85, "learning_rate": 1.2404326405083934e-07, "loss": 0.1004, "step": 8599 }, { "epoch": 2.85, "learning_rate": 1.234812336922453e-07, "loss": 0.0916, "step": 8600 }, { "epoch": 2.85, "learning_rate": 1.2292047157415944e-07, "loss": 0.1794, "step": 8601 }, { "epoch": 2.85, "learning_rate": 1.223609777685908e-07, "loss": 0.1359, "step": 8602 }, { "epoch": 2.86, "learning_rate": 1.218027523473808e-07, "loss": 0.1518, "step": 8603 }, { "epoch": 2.86, "learning_rate": 1.2124579538221438e-07, "loss": 0.1173, "step": 8604 }, { "epoch": 2.86, "learning_rate": 1.206901069446087e-07, "loss": 0.1836, "step": 8605 }, { "epoch": 2.86, "learning_rate": 1.2013568710592005e-07, "loss": 0.1324, "step": 8606 }, { "epoch": 2.86, "learning_rate": 1.1958253593734037e-07, "loss": 0.2047, "step": 8607 }, { "epoch": 2.86, "learning_rate": 1.1903065350990061e-07, "loss": 0.1631, "step": 8608 }, { "epoch": 2.86, "learning_rate": 1.1848003989446632e-07, "loss": 0.1216, "step": 8609 }, { "epoch": 2.86, "learning_rate": 1.1793069516174427e-07, "loss": 0.1186, "step": 8610 }, { "epoch": 2.86, "learning_rate": 1.1738261938227246e-07, "loss": 0.1069, "step": 8611 }, { "epoch": 2.86, "learning_rate": 1.1683581262643128e-07, "loss": 0.1222, "step": 8612 }, { "epoch": 2.86, "learning_rate": 1.1629027496443568e-07, "loss": 0.1737, "step": 8613 }, { "epoch": 2.86, "learning_rate": 1.1574600646633738e-07, "loss": 0.1425, "step": 8614 }, { "epoch": 2.86, "learning_rate": 1.1520300720202604e-07, "loss": 0.1541, "step": 8615 }, { "epoch": 2.86, "learning_rate": 1.1466127724122811e-07, "loss": 0.1194, "step": 8616 }, { "epoch": 2.86, "learning_rate": 1.1412081665350683e-07, "loss": 0.1218, "step": 8617 }, { "epoch": 2.86, "learning_rate": 1.1358162550826113e-07, "loss": 0.1399, "step": 8618 }, { "epoch": 2.86, "learning_rate": 1.1304370387473119e-07, "loss": 0.0696, "step": 8619 }, { "epoch": 2.86, "learning_rate": 1.1250705182199062e-07, "loss": 0.1522, "step": 8620 }, { "epoch": 2.86, "learning_rate": 1.1197166941894988e-07, "loss": 0.1171, "step": 8621 }, { "epoch": 2.86, "learning_rate": 1.1143755673435619e-07, "loss": 0.1096, "step": 8622 }, { "epoch": 2.86, "learning_rate": 1.109047138367969e-07, "loss": 0.1412, "step": 8623 }, { "epoch": 2.86, "learning_rate": 1.1037314079469286e-07, "loss": 0.1691, "step": 8624 }, { "epoch": 2.86, "learning_rate": 1.0984283767630388e-07, "loss": 0.1253, "step": 8625 }, { "epoch": 2.86, "learning_rate": 1.0931380454972551e-07, "loss": 0.0826, "step": 8626 }, { "epoch": 2.86, "learning_rate": 1.0878604148289118e-07, "loss": 0.1279, "step": 8627 }, { "epoch": 2.86, "learning_rate": 1.0825954854357113e-07, "loss": 0.1031, "step": 8628 }, { "epoch": 2.86, "learning_rate": 1.0773432579937127e-07, "loss": 0.1319, "step": 8629 }, { "epoch": 2.86, "learning_rate": 1.0721037331773543e-07, "loss": 0.142, "step": 8630 }, { "epoch": 2.86, "learning_rate": 1.0668769116594535e-07, "loss": 0.1361, "step": 8631 }, { "epoch": 2.86, "learning_rate": 1.0616627941111623e-07, "loss": 0.1125, "step": 8632 }, { "epoch": 2.87, "learning_rate": 1.0564613812020341e-07, "loss": 0.1249, "step": 8633 }, { "epoch": 2.87, "learning_rate": 1.0512726735999901e-07, "loss": 0.1076, "step": 8634 }, { "epoch": 2.87, "learning_rate": 1.0460966719713084e-07, "loss": 0.1035, "step": 8635 }, { "epoch": 2.87, "learning_rate": 1.040933376980624e-07, "loss": 0.1248, "step": 8636 }, { "epoch": 2.87, "learning_rate": 1.0357827892909622e-07, "loss": 0.1037, "step": 8637 }, { "epoch": 2.87, "learning_rate": 1.030644909563705e-07, "loss": 0.1415, "step": 8638 }, { "epoch": 2.87, "learning_rate": 1.0255197384586135e-07, "loss": 0.1367, "step": 8639 }, { "epoch": 2.87, "learning_rate": 1.0204072766338058e-07, "loss": 0.1626, "step": 8640 }, { "epoch": 2.87, "learning_rate": 1.0153075247457678e-07, "loss": 0.1865, "step": 8641 }, { "epoch": 2.87, "learning_rate": 1.0102204834493756e-07, "loss": 0.1244, "step": 8642 }, { "epoch": 2.87, "learning_rate": 1.0051461533978179e-07, "loss": 0.0783, "step": 8643 }, { "epoch": 2.87, "learning_rate": 1.000084535242718e-07, "loss": 0.129, "step": 8644 }, { "epoch": 2.87, "learning_rate": 9.950356296340224e-08, "loss": 0.1331, "step": 8645 }, { "epoch": 2.87, "learning_rate": 9.899994372200683e-08, "loss": 0.1711, "step": 8646 }, { "epoch": 2.87, "learning_rate": 9.849759586475383e-08, "loss": 0.1415, "step": 8647 }, { "epoch": 2.87, "learning_rate": 9.799651945615162e-08, "loss": 0.1801, "step": 8648 }, { "epoch": 2.87, "learning_rate": 9.74967145605421e-08, "loss": 0.1398, "step": 8649 }, { "epoch": 2.87, "learning_rate": 9.699818124210503e-08, "loss": 0.14, "step": 8650 }, { "epoch": 2.87, "learning_rate": 9.650091956485697e-08, "loss": 0.0867, "step": 8651 }, { "epoch": 2.87, "learning_rate": 9.600492959265017e-08, "loss": 0.1447, "step": 8652 }, { "epoch": 2.87, "learning_rate": 9.551021138917593e-08, "loss": 0.1224, "step": 8653 }, { "epoch": 2.87, "learning_rate": 9.50167650179601e-08, "loss": 0.1278, "step": 8654 }, { "epoch": 2.87, "learning_rate": 9.45245905423664e-08, "loss": 0.1228, "step": 8655 }, { "epoch": 2.87, "learning_rate": 9.403368802559543e-08, "loss": 0.1298, "step": 8656 }, { "epoch": 2.87, "learning_rate": 9.35440575306823e-08, "loss": 0.1079, "step": 8657 }, { "epoch": 2.87, "learning_rate": 9.305569912050116e-08, "loss": 0.1039, "step": 8658 }, { "epoch": 2.87, "learning_rate": 9.256861285776186e-08, "loss": 0.108, "step": 8659 }, { "epoch": 2.87, "learning_rate": 9.208279880501103e-08, "loss": 0.1039, "step": 8660 }, { "epoch": 2.87, "learning_rate": 9.1598257024631e-08, "loss": 0.0882, "step": 8661 }, { "epoch": 2.87, "learning_rate": 9.111498757884307e-08, "loss": 0.1738, "step": 8662 }, { "epoch": 2.88, "learning_rate": 9.06329905297032e-08, "loss": 0.1103, "step": 8663 }, { "epoch": 2.88, "learning_rate": 9.01522659391052e-08, "loss": 0.1066, "step": 8664 }, { "epoch": 2.88, "learning_rate": 8.967281386877636e-08, "loss": 0.1372, "step": 8665 }, { "epoch": 2.88, "learning_rate": 8.919463438028519e-08, "loss": 0.125, "step": 8666 }, { "epoch": 2.88, "learning_rate": 8.871772753503371e-08, "loss": 0.1601, "step": 8667 }, { "epoch": 2.88, "learning_rate": 8.824209339426071e-08, "loss": 0.1436, "step": 8668 }, { "epoch": 2.88, "learning_rate": 8.776773201904398e-08, "loss": 0.0889, "step": 8669 }, { "epoch": 2.88, "learning_rate": 8.729464347029592e-08, "loss": 0.1145, "step": 8670 }, { "epoch": 2.88, "learning_rate": 8.68228278087635e-08, "loss": 0.1663, "step": 8671 }, { "epoch": 2.88, "learning_rate": 8.63522850950338e-08, "loss": 0.0952, "step": 8672 }, { "epoch": 2.88, "learning_rate": 8.588301538952958e-08, "loss": 0.1195, "step": 8673 }, { "epoch": 2.88, "learning_rate": 8.541501875250935e-08, "loss": 0.0995, "step": 8674 }, { "epoch": 2.88, "learning_rate": 8.494829524406722e-08, "loss": 0.1008, "step": 8675 }, { "epoch": 2.88, "learning_rate": 8.44828449241375e-08, "loss": 0.1472, "step": 8676 }, { "epoch": 2.88, "learning_rate": 8.40186678524857e-08, "loss": 0.1395, "step": 8677 }, { "epoch": 2.88, "learning_rate": 8.355576408871858e-08, "loss": 0.148, "step": 8678 }, { "epoch": 2.88, "learning_rate": 8.309413369227637e-08, "loss": 0.1392, "step": 8679 }, { "epoch": 2.88, "learning_rate": 8.263377672243723e-08, "loss": 0.1147, "step": 8680 }, { "epoch": 2.88, "learning_rate": 8.217469323831495e-08, "loss": 0.1302, "step": 8681 }, { "epoch": 2.88, "learning_rate": 8.171688329886241e-08, "loss": 0.1288, "step": 8682 }, { "epoch": 2.88, "learning_rate": 8.126034696286367e-08, "loss": 0.092, "step": 8683 }, { "epoch": 2.88, "learning_rate": 8.080508428894517e-08, "loss": 0.1277, "step": 8684 }, { "epoch": 2.88, "learning_rate": 8.035109533556463e-08, "loss": 0.1523, "step": 8685 }, { "epoch": 2.88, "learning_rate": 7.989838016102092e-08, "loss": 0.1806, "step": 8686 }, { "epoch": 2.88, "learning_rate": 7.944693882344645e-08, "loss": 0.1514, "step": 8687 }, { "epoch": 2.88, "learning_rate": 7.899677138080819e-08, "loss": 0.1056, "step": 8688 }, { "epoch": 2.88, "learning_rate": 7.854787789091545e-08, "loss": 0.1289, "step": 8689 }, { "epoch": 2.88, "learning_rate": 7.810025841140989e-08, "loss": 0.1036, "step": 8690 }, { "epoch": 2.88, "learning_rate": 7.765391299976777e-08, "loss": 0.1267, "step": 8691 }, { "epoch": 2.88, "learning_rate": 7.720884171330544e-08, "loss": 0.1165, "step": 8692 }, { "epoch": 2.89, "learning_rate": 7.67650446091761e-08, "loss": 0.0928, "step": 8693 }, { "epoch": 2.89, "learning_rate": 7.632252174436527e-08, "loss": 0.1453, "step": 8694 }, { "epoch": 2.89, "learning_rate": 7.588127317569749e-08, "loss": 0.1514, "step": 8695 }, { "epoch": 2.89, "learning_rate": 7.544129895983521e-08, "loss": 0.1761, "step": 8696 }, { "epoch": 2.89, "learning_rate": 7.500259915327323e-08, "loss": 0.1175, "step": 8697 }, { "epoch": 2.89, "learning_rate": 7.45651738123454e-08, "loss": 0.1459, "step": 8698 }, { "epoch": 2.89, "learning_rate": 7.412902299322123e-08, "loss": 0.1509, "step": 8699 }, { "epoch": 2.89, "learning_rate": 7.369414675190811e-08, "loss": 0.0838, "step": 8700 }, { "epoch": 2.89, "learning_rate": 7.326054514424696e-08, "loss": 0.1044, "step": 8701 }, { "epoch": 2.89, "learning_rate": 7.282821822591657e-08, "loss": 0.0954, "step": 8702 }, { "epoch": 2.89, "learning_rate": 7.239716605243252e-08, "loss": 0.1137, "step": 8703 }, { "epoch": 2.89, "learning_rate": 7.19673886791461e-08, "loss": 0.1859, "step": 8704 }, { "epoch": 2.89, "learning_rate": 7.153888616124316e-08, "loss": 0.137, "step": 8705 }, { "epoch": 2.89, "learning_rate": 7.111165855374968e-08, "loss": 0.1108, "step": 8706 }, { "epoch": 2.89, "learning_rate": 7.068570591152513e-08, "loss": 0.1685, "step": 8707 }, { "epoch": 2.89, "learning_rate": 7.026102828926462e-08, "loss": 0.1086, "step": 8708 }, { "epoch": 2.89, "learning_rate": 6.983762574150344e-08, "loss": 0.16, "step": 8709 }, { "epoch": 2.89, "learning_rate": 6.941549832260807e-08, "loss": 0.1883, "step": 8710 }, { "epoch": 2.89, "learning_rate": 6.89946460867863e-08, "loss": 0.123, "step": 8711 }, { "epoch": 2.89, "learning_rate": 6.85750690880771e-08, "loss": 0.1341, "step": 8712 }, { "epoch": 2.89, "learning_rate": 6.815676738035958e-08, "loss": 0.1049, "step": 8713 }, { "epoch": 2.89, "learning_rate": 6.773974101734637e-08, "loss": 0.1053, "step": 8714 }, { "epoch": 2.89, "learning_rate": 6.732399005259016e-08, "loss": 0.1335, "step": 8715 }, { "epoch": 2.89, "learning_rate": 6.690951453947603e-08, "loss": 0.1346, "step": 8716 }, { "epoch": 2.89, "learning_rate": 6.649631453122585e-08, "loss": 0.1186, "step": 8717 }, { "epoch": 2.89, "learning_rate": 6.608439008090051e-08, "loss": 0.1545, "step": 8718 }, { "epoch": 2.89, "learning_rate": 6.567374124139214e-08, "loss": 0.1075, "step": 8719 }, { "epoch": 2.89, "learning_rate": 6.526436806543523e-08, "loss": 0.1408, "step": 8720 }, { "epoch": 2.89, "learning_rate": 6.48562706055944e-08, "loss": 0.1736, "step": 8721 }, { "epoch": 2.89, "learning_rate": 6.444944891427551e-08, "loss": 0.14, "step": 8722 }, { "epoch": 2.9, "learning_rate": 6.404390304371677e-08, "loss": 0.0726, "step": 8723 }, { "epoch": 2.9, "learning_rate": 6.363963304599541e-08, "loss": 0.0857, "step": 8724 }, { "epoch": 2.9, "learning_rate": 6.323663897302434e-08, "loss": 0.0962, "step": 8725 }, { "epoch": 2.9, "learning_rate": 6.283492087654885e-08, "loss": 0.1835, "step": 8726 }, { "epoch": 2.9, "learning_rate": 6.243447880815656e-08, "loss": 0.1153, "step": 8727 }, { "epoch": 2.9, "learning_rate": 6.203531281926634e-08, "loss": 0.1428, "step": 8728 }, { "epoch": 2.9, "learning_rate": 6.163742296113606e-08, "loss": 0.1117, "step": 8729 }, { "epoch": 2.9, "learning_rate": 6.12408092848571e-08, "loss": 0.1113, "step": 8730 }, { "epoch": 2.9, "learning_rate": 6.08454718413598e-08, "loss": 0.0906, "step": 8731 }, { "epoch": 2.9, "learning_rate": 6.045141068140914e-08, "loss": 0.0816, "step": 8732 }, { "epoch": 2.9, "learning_rate": 6.005862585560574e-08, "loss": 0.0777, "step": 8733 }, { "epoch": 2.9, "learning_rate": 5.966711741438702e-08, "loss": 0.1169, "step": 8734 }, { "epoch": 2.9, "learning_rate": 5.927688540802834e-08, "loss": 0.1131, "step": 8735 }, { "epoch": 2.9, "learning_rate": 5.8887929886636275e-08, "loss": 0.0813, "step": 8736 }, { "epoch": 2.9, "learning_rate": 5.850025090015865e-08, "loss": 0.0988, "step": 8737 }, { "epoch": 2.9, "learning_rate": 5.8113848498376754e-08, "loss": 0.0847, "step": 8738 }, { "epoch": 2.9, "learning_rate": 5.7728722730907573e-08, "loss": 0.1595, "step": 8739 }, { "epoch": 2.9, "learning_rate": 5.734487364720598e-08, "loss": 0.0909, "step": 8740 }, { "epoch": 2.9, "learning_rate": 5.696230129656033e-08, "loss": 0.2003, "step": 8741 }, { "epoch": 2.9, "learning_rate": 5.6581005728099106e-08, "loss": 0.108, "step": 8742 }, { "epoch": 2.9, "learning_rate": 5.6200986990782026e-08, "loss": 0.1549, "step": 8743 }, { "epoch": 2.9, "learning_rate": 5.582224513340895e-08, "loss": 0.1063, "step": 8744 }, { "epoch": 2.9, "learning_rate": 5.544478020461319e-08, "loss": 0.1604, "step": 8745 }, { "epoch": 2.9, "learning_rate": 5.506859225286487e-08, "loss": 0.1416, "step": 8746 }, { "epoch": 2.9, "learning_rate": 5.469368132646979e-08, "loss": 0.0933, "step": 8747 }, { "epoch": 2.9, "learning_rate": 5.432004747357056e-08, "loss": 0.1267, "step": 8748 }, { "epoch": 2.9, "learning_rate": 5.3947690742145454e-08, "loss": 0.1667, "step": 8749 }, { "epoch": 2.9, "learning_rate": 5.357661118000845e-08, "loss": 0.1109, "step": 8750 }, { "epoch": 2.9, "learning_rate": 5.320680883481033e-08, "loss": 0.1508, "step": 8751 }, { "epoch": 2.9, "learning_rate": 5.283828375403643e-08, "loss": 0.124, "step": 8752 }, { "epoch": 2.91, "learning_rate": 5.247103598500891e-08, "loss": 0.1333, "step": 8753 }, { "epoch": 2.91, "learning_rate": 5.21050655748867e-08, "loss": 0.1103, "step": 8754 }, { "epoch": 2.91, "learning_rate": 5.1740372570664445e-08, "loss": 0.1324, "step": 8755 }, { "epoch": 2.91, "learning_rate": 5.137695701917023e-08, "loss": 0.1273, "step": 8756 }, { "epoch": 2.91, "learning_rate": 5.1014818967071166e-08, "loss": 0.1284, "step": 8757 }, { "epoch": 2.91, "learning_rate": 5.065395846087007e-08, "loss": 0.1675, "step": 8758 }, { "epoch": 2.91, "learning_rate": 5.02943755469043e-08, "loss": 0.0981, "step": 8759 }, { "epoch": 2.91, "learning_rate": 4.993607027134806e-08, "loss": 0.1432, "step": 8760 }, { "epoch": 2.91, "learning_rate": 4.9579042680210077e-08, "loss": 0.1639, "step": 8761 }, { "epoch": 2.91, "learning_rate": 4.922329281933813e-08, "loss": 0.1284, "step": 8762 }, { "epoch": 2.91, "learning_rate": 4.8868820734411236e-08, "loss": 0.169, "step": 8763 }, { "epoch": 2.91, "learning_rate": 4.851562647094965e-08, "loss": 0.1601, "step": 8764 }, { "epoch": 2.91, "learning_rate": 4.816371007430598e-08, "loss": 0.0863, "step": 8765 }, { "epoch": 2.91, "learning_rate": 4.781307158966852e-08, "loss": 0.1319, "step": 8766 }, { "epoch": 2.91, "learning_rate": 4.74637110620646e-08, "loss": 0.0874, "step": 8767 }, { "epoch": 2.91, "learning_rate": 4.711562853635387e-08, "loss": 0.1265, "step": 8768 }, { "epoch": 2.91, "learning_rate": 4.676882405723504e-08, "loss": 0.137, "step": 8769 }, { "epoch": 2.91, "learning_rate": 4.642329766923914e-08, "loss": 0.162, "step": 8770 }, { "epoch": 2.91, "learning_rate": 4.6079049416737356e-08, "loss": 0.1118, "step": 8771 }, { "epoch": 2.91, "learning_rate": 4.5736079343933204e-08, "loss": 0.1097, "step": 8772 }, { "epoch": 2.91, "learning_rate": 4.539438749486813e-08, "loss": 0.1326, "step": 8773 }, { "epoch": 2.91, "learning_rate": 4.5053973913418144e-08, "loss": 0.1214, "step": 8774 }, { "epoch": 2.91, "learning_rate": 4.4714838643293844e-08, "loss": 0.1315, "step": 8775 }, { "epoch": 2.91, "learning_rate": 4.437698172804705e-08, "loss": 0.2035, "step": 8776 }, { "epoch": 2.91, "learning_rate": 4.404040321105862e-08, "loss": 0.1343, "step": 8777 }, { "epoch": 2.91, "learning_rate": 4.370510313555065e-08, "loss": 0.1037, "step": 8778 }, { "epoch": 2.91, "learning_rate": 4.33710815445787e-08, "loss": 0.0991, "step": 8779 }, { "epoch": 2.91, "learning_rate": 4.3038338481034e-08, "loss": 0.1103, "step": 8780 }, { "epoch": 2.91, "learning_rate": 4.27068739876424e-08, "loss": 0.1484, "step": 8781 }, { "epoch": 2.91, "learning_rate": 4.237668810696982e-08, "loss": 0.1196, "step": 8782 }, { "epoch": 2.92, "learning_rate": 4.204778088141348e-08, "loss": 0.1222, "step": 8783 }, { "epoch": 2.92, "learning_rate": 4.1720152353209584e-08, "loss": 0.1429, "step": 8784 }, { "epoch": 2.92, "learning_rate": 4.139380256442671e-08, "loss": 0.1496, "step": 8785 }, { "epoch": 2.92, "learning_rate": 4.1068731556973554e-08, "loss": 0.1535, "step": 8786 }, { "epoch": 2.92, "learning_rate": 4.074493937259227e-08, "loss": 0.1031, "step": 8787 }, { "epoch": 2.92, "learning_rate": 4.042242605285851e-08, "loss": 0.1475, "step": 8788 }, { "epoch": 2.92, "learning_rate": 4.010119163918913e-08, "loss": 0.1242, "step": 8789 }, { "epoch": 2.92, "learning_rate": 3.978123617283114e-08, "loss": 0.098, "step": 8790 }, { "epoch": 2.92, "learning_rate": 3.946255969487167e-08, "loss": 0.1117, "step": 8791 }, { "epoch": 2.92, "learning_rate": 3.914516224623244e-08, "loss": 0.1265, "step": 8792 }, { "epoch": 2.92, "learning_rate": 3.8829043867667504e-08, "loss": 0.1565, "step": 8793 }, { "epoch": 2.92, "learning_rate": 3.8514204599772174e-08, "loss": 0.1515, "step": 8794 }, { "epoch": 2.92, "learning_rate": 3.8200644482973e-08, "loss": 0.159, "step": 8795 }, { "epoch": 2.92, "learning_rate": 3.788836355753667e-08, "loss": 0.1464, "step": 8796 }, { "epoch": 2.92, "learning_rate": 3.757736186355998e-08, "loss": 0.119, "step": 8797 }, { "epoch": 2.92, "learning_rate": 3.7267639440980997e-08, "loss": 0.1577, "step": 8798 }, { "epoch": 2.92, "learning_rate": 3.6959196329569015e-08, "loss": 0.1182, "step": 8799 }, { "epoch": 2.92, "learning_rate": 3.665203256893235e-08, "loss": 0.0637, "step": 8800 }, { "epoch": 2.92, "learning_rate": 3.634614819851501e-08, "loss": 0.1516, "step": 8801 }, { "epoch": 2.92, "learning_rate": 3.604154325759335e-08, "loss": 0.0794, "step": 8802 }, { "epoch": 2.92, "learning_rate": 3.5738217785281637e-08, "loss": 0.1177, "step": 8803 }, { "epoch": 2.92, "learning_rate": 3.543617182053094e-08, "loss": 0.0926, "step": 8804 }, { "epoch": 2.92, "learning_rate": 3.5135405402126897e-08, "loss": 0.187, "step": 8805 }, { "epoch": 2.92, "learning_rate": 3.483591856869084e-08, "loss": 0.1501, "step": 8806 }, { "epoch": 2.92, "learning_rate": 3.4537711358678674e-08, "loss": 0.0907, "step": 8807 }, { "epoch": 2.92, "learning_rate": 3.42407838103842e-08, "loss": 0.1212, "step": 8808 }, { "epoch": 2.92, "learning_rate": 3.394513596193472e-08, "loss": 0.1957, "step": 8809 }, { "epoch": 2.92, "learning_rate": 3.365076785129429e-08, "loss": 0.1274, "step": 8810 }, { "epoch": 2.92, "learning_rate": 3.3357679516264894e-08, "loss": 0.2046, "step": 8811 }, { "epoch": 2.92, "learning_rate": 3.3065870994479775e-08, "loss": 0.1967, "step": 8812 }, { "epoch": 2.92, "learning_rate": 3.277534232341007e-08, "loss": 0.1048, "step": 8813 }, { "epoch": 2.93, "learning_rate": 3.248609354036258e-08, "loss": 0.1189, "step": 8814 }, { "epoch": 2.93, "learning_rate": 3.2198124682480956e-08, "loss": 0.0958, "step": 8815 }, { "epoch": 2.93, "learning_rate": 3.1911435786741165e-08, "loss": 0.1148, "step": 8816 }, { "epoch": 2.93, "learning_rate": 3.16260268899582e-08, "loss": 0.1491, "step": 8817 }, { "epoch": 2.93, "learning_rate": 3.1341898028780516e-08, "loss": 0.1118, "step": 8818 }, { "epoch": 2.93, "learning_rate": 3.105904923969338e-08, "loss": 0.1486, "step": 8819 }, { "epoch": 2.93, "learning_rate": 3.077748055901775e-08, "loss": 0.1274, "step": 8820 }, { "epoch": 2.93, "learning_rate": 3.049719202290913e-08, "loss": 0.1206, "step": 8821 }, { "epoch": 2.93, "learning_rate": 3.0218183667359844e-08, "loss": 0.1259, "step": 8822 }, { "epoch": 2.93, "learning_rate": 2.99404555281968e-08, "loss": 0.1503, "step": 8823 }, { "epoch": 2.93, "learning_rate": 2.966400764108368e-08, "loss": 0.1352, "step": 8824 }, { "epoch": 2.93, "learning_rate": 2.9388840041517652e-08, "loss": 0.0835, "step": 8825 }, { "epoch": 2.93, "learning_rate": 2.911495276483378e-08, "loss": 0.0574, "step": 8826 }, { "epoch": 2.93, "learning_rate": 2.8842345846201712e-08, "loss": 0.1116, "step": 8827 }, { "epoch": 2.93, "learning_rate": 2.8571019320627892e-08, "loss": 0.1298, "step": 8828 }, { "epoch": 2.93, "learning_rate": 2.830097322295111e-08, "loss": 0.0953, "step": 8829 }, { "epoch": 2.93, "learning_rate": 2.8032207587849192e-08, "loss": 0.1148, "step": 8830 }, { "epoch": 2.93, "learning_rate": 2.7764722449834523e-08, "loss": 0.1109, "step": 8831 }, { "epoch": 2.93, "learning_rate": 2.749851784325519e-08, "loss": 0.1235, "step": 8832 }, { "epoch": 2.93, "learning_rate": 2.7233593802292733e-08, "loss": 0.1356, "step": 8833 }, { "epoch": 2.93, "learning_rate": 2.696995036096661e-08, "loss": 0.178, "step": 8834 }, { "epoch": 2.93, "learning_rate": 2.6707587553130852e-08, "loss": 0.1172, "step": 8835 }, { "epoch": 2.93, "learning_rate": 2.6446505412477397e-08, "loss": 0.0986, "step": 8836 }, { "epoch": 2.93, "learning_rate": 2.6186703972529424e-08, "loss": 0.1547, "step": 8837 }, { "epoch": 2.93, "learning_rate": 2.5928183266649143e-08, "loss": 0.1402, "step": 8838 }, { "epoch": 2.93, "learning_rate": 2.5670943328033325e-08, "loss": 0.1571, "step": 8839 }, { "epoch": 2.93, "learning_rate": 2.541498418971222e-08, "loss": 0.1362, "step": 8840 }, { "epoch": 2.93, "learning_rate": 2.5160305884556202e-08, "loss": 0.1462, "step": 8841 }, { "epoch": 2.93, "learning_rate": 2.4906908445265775e-08, "loss": 0.0959, "step": 8842 }, { "epoch": 2.93, "learning_rate": 2.4654791904381582e-08, "loss": 0.1787, "step": 8843 }, { "epoch": 2.94, "learning_rate": 2.440395629427661e-08, "loss": 0.1019, "step": 8844 }, { "epoch": 2.94, "learning_rate": 2.4154401647160652e-08, "loss": 0.1218, "step": 8845 }, { "epoch": 2.94, "learning_rate": 2.3906127995080297e-08, "loss": 0.1349, "step": 8846 }, { "epoch": 2.94, "learning_rate": 2.365913536991449e-08, "loss": 0.1367, "step": 8847 }, { "epoch": 2.94, "learning_rate": 2.3413423803380074e-08, "loss": 0.0813, "step": 8848 }, { "epoch": 2.94, "learning_rate": 2.31689933270296e-08, "loss": 0.1346, "step": 8849 }, { "epoch": 2.94, "learning_rate": 2.292584397225017e-08, "loss": 0.1481, "step": 8850 }, { "epoch": 2.94, "learning_rate": 2.2683975770263488e-08, "loss": 0.1506, "step": 8851 }, { "epoch": 2.94, "learning_rate": 2.2443388752128036e-08, "loss": 0.159, "step": 8852 }, { "epoch": 2.94, "learning_rate": 2.2204082948737994e-08, "loss": 0.1415, "step": 8853 }, { "epoch": 2.94, "learning_rate": 2.196605839082211e-08, "loss": 0.1214, "step": 8854 }, { "epoch": 2.94, "learning_rate": 2.1729315108945937e-08, "loss": 0.1873, "step": 8855 }, { "epoch": 2.94, "learning_rate": 2.1493853133508493e-08, "loss": 0.1685, "step": 8856 }, { "epoch": 2.94, "learning_rate": 2.1259672494746698e-08, "loss": 0.1494, "step": 8857 }, { "epoch": 2.94, "learning_rate": 2.1026773222729835e-08, "loss": 0.0966, "step": 8858 }, { "epoch": 2.94, "learning_rate": 2.0795155347365093e-08, "loss": 0.1175, "step": 8859 }, { "epoch": 2.94, "learning_rate": 2.0564818898395345e-08, "loss": 0.0942, "step": 8860 }, { "epoch": 2.94, "learning_rate": 2.0335763905396933e-08, "loss": 0.1392, "step": 8861 }, { "epoch": 2.94, "learning_rate": 2.0107990397782995e-08, "loss": 0.1493, "step": 8862 }, { "epoch": 2.94, "learning_rate": 1.9881498404802357e-08, "loss": 0.1308, "step": 8863 }, { "epoch": 2.94, "learning_rate": 1.9656287955538423e-08, "loss": 0.1609, "step": 8864 }, { "epoch": 2.94, "learning_rate": 1.9432359078910278e-08, "loss": 0.2119, "step": 8865 }, { "epoch": 2.94, "learning_rate": 1.9209711803672704e-08, "loss": 0.1279, "step": 8866 }, { "epoch": 2.94, "learning_rate": 1.898834615841616e-08, "loss": 0.158, "step": 8867 }, { "epoch": 2.94, "learning_rate": 1.8768262171565687e-08, "loss": 0.106, "step": 8868 }, { "epoch": 2.94, "learning_rate": 1.8549459871380903e-08, "loss": 0.156, "step": 8869 }, { "epoch": 2.94, "learning_rate": 1.8331939285961554e-08, "loss": 0.1485, "step": 8870 }, { "epoch": 2.94, "learning_rate": 1.8115700443235297e-08, "loss": 0.1701, "step": 8871 }, { "epoch": 2.94, "learning_rate": 1.790074337097214e-08, "loss": 0.1688, "step": 8872 }, { "epoch": 2.94, "learning_rate": 1.768706809677445e-08, "loss": 0.0858, "step": 8873 }, { "epoch": 2.95, "learning_rate": 1.747467464807917e-08, "loss": 0.1871, "step": 8874 }, { "epoch": 2.95, "learning_rate": 1.7263563052158927e-08, "loss": 0.1092, "step": 8875 }, { "epoch": 2.95, "learning_rate": 1.7053733336124256e-08, "loss": 0.1236, "step": 8876 }, { "epoch": 2.95, "learning_rate": 1.684518552691916e-08, "loss": 0.1534, "step": 8877 }, { "epoch": 2.95, "learning_rate": 1.6637919651323332e-08, "loss": 0.1169, "step": 8878 }, { "epoch": 2.95, "learning_rate": 1.6431935735949923e-08, "loss": 0.112, "step": 8879 }, { "epoch": 2.95, "learning_rate": 1.622723380725e-08, "loss": 0.1173, "step": 8880 }, { "epoch": 2.95, "learning_rate": 1.602381389151142e-08, "loss": 0.1661, "step": 8881 }, { "epoch": 2.95, "learning_rate": 1.5821676014852184e-08, "loss": 0.1456, "step": 8882 }, { "epoch": 2.95, "learning_rate": 1.5620820203230412e-08, "loss": 0.1609, "step": 8883 }, { "epoch": 2.95, "learning_rate": 1.5421246482437702e-08, "loss": 0.1351, "step": 8884 }, { "epoch": 2.95, "learning_rate": 1.5222954878101327e-08, "loss": 0.1516, "step": 8885 }, { "epoch": 2.95, "learning_rate": 1.502594541568314e-08, "loss": 0.1572, "step": 8886 }, { "epoch": 2.95, "learning_rate": 1.4830218120481799e-08, "loss": 0.1312, "step": 8887 }, { "epoch": 2.95, "learning_rate": 1.4635773017630528e-08, "loss": 0.1269, "step": 8888 }, { "epoch": 2.95, "learning_rate": 1.4442610132098245e-08, "loss": 0.1163, "step": 8889 }, { "epoch": 2.95, "learning_rate": 1.4250729488688442e-08, "loss": 0.1182, "step": 8890 }, { "epoch": 2.95, "learning_rate": 1.4060131112040299e-08, "loss": 0.1261, "step": 8891 }, { "epoch": 2.95, "learning_rate": 1.387081502662868e-08, "loss": 0.1075, "step": 8892 }, { "epoch": 2.95, "learning_rate": 1.3682781256763033e-08, "loss": 0.1262, "step": 8893 }, { "epoch": 2.95, "learning_rate": 1.3496029826590707e-08, "loss": 0.1207, "step": 8894 }, { "epoch": 2.95, "learning_rate": 1.3310560760090297e-08, "loss": 0.1202, "step": 8895 }, { "epoch": 2.95, "learning_rate": 1.3126374081078308e-08, "loss": 0.1327, "step": 8896 }, { "epoch": 2.95, "learning_rate": 1.2943469813205822e-08, "loss": 0.1002, "step": 8897 }, { "epoch": 2.95, "learning_rate": 1.2761847979960717e-08, "loss": 0.1623, "step": 8898 }, { "epoch": 2.95, "learning_rate": 1.258150860466434e-08, "loss": 0.15, "step": 8899 }, { "epoch": 2.95, "learning_rate": 1.2402451710473717e-08, "loss": 0.1246, "step": 8900 }, { "epoch": 2.95, "learning_rate": 1.2224677320382683e-08, "loss": 0.1025, "step": 8901 }, { "epoch": 2.95, "learning_rate": 1.204818545721742e-08, "loss": 0.1489, "step": 8902 }, { "epoch": 2.95, "learning_rate": 1.1872976143642023e-08, "loss": 0.1072, "step": 8903 }, { "epoch": 2.96, "learning_rate": 1.169904940215405e-08, "loss": 0.1253, "step": 8904 }, { "epoch": 2.96, "learning_rate": 1.1526405255090078e-08, "loss": 0.1211, "step": 8905 }, { "epoch": 2.96, "learning_rate": 1.1355043724616821e-08, "loss": 0.088, "step": 8906 }, { "epoch": 2.96, "learning_rate": 1.1184964832738898e-08, "loss": 0.1104, "step": 8907 }, { "epoch": 2.96, "learning_rate": 1.1016168601297727e-08, "loss": 0.1776, "step": 8908 }, { "epoch": 2.96, "learning_rate": 1.0848655051965974e-08, "loss": 0.1485, "step": 8909 }, { "epoch": 2.96, "learning_rate": 1.0682424206256426e-08, "loss": 0.1367, "step": 8910 }, { "epoch": 2.96, "learning_rate": 1.0517476085514233e-08, "loss": 0.1099, "step": 8911 }, { "epoch": 2.96, "learning_rate": 1.035381071091912e-08, "loss": 0.1192, "step": 8912 }, { "epoch": 2.96, "learning_rate": 1.0191428103487611e-08, "loss": 0.1457, "step": 8913 }, { "epoch": 2.96, "learning_rate": 1.0030328284071911e-08, "loss": 0.1103, "step": 8914 }, { "epoch": 2.96, "learning_rate": 9.870511273357696e-09, "loss": 0.1077, "step": 8915 }, { "epoch": 2.96, "learning_rate": 9.711977091868552e-09, "loss": 0.113, "step": 8916 }, { "epoch": 2.96, "learning_rate": 9.554725759960416e-09, "loss": 0.1262, "step": 8917 }, { "epoch": 2.96, "learning_rate": 9.398757297827133e-09, "loss": 0.113, "step": 8918 }, { "epoch": 2.96, "learning_rate": 9.244071725496017e-09, "loss": 0.0867, "step": 8919 }, { "epoch": 2.96, "learning_rate": 9.09066906282896e-09, "loss": 0.1261, "step": 8920 }, { "epoch": 2.96, "learning_rate": 8.938549329526869e-09, "loss": 0.1053, "step": 8921 }, { "epoch": 2.96, "learning_rate": 8.787712545120785e-09, "loss": 0.1319, "step": 8922 }, { "epoch": 2.96, "learning_rate": 8.638158728980773e-09, "loss": 0.159, "step": 8923 }, { "epoch": 2.96, "learning_rate": 8.489887900311466e-09, "loss": 0.1361, "step": 8924 }, { "epoch": 2.96, "learning_rate": 8.342900078152082e-09, "loss": 0.0773, "step": 8925 }, { "epoch": 2.96, "learning_rate": 8.197195281376414e-09, "loss": 0.119, "step": 8926 }, { "epoch": 2.96, "learning_rate": 8.052773528695046e-09, "loss": 0.0861, "step": 8927 }, { "epoch": 2.96, "learning_rate": 7.909634838653146e-09, "loss": 0.1298, "step": 8928 }, { "epoch": 2.96, "learning_rate": 7.767779229631566e-09, "loss": 0.1265, "step": 8929 }, { "epoch": 2.96, "learning_rate": 7.627206719844626e-09, "loss": 0.1094, "step": 8930 }, { "epoch": 2.96, "learning_rate": 7.487917327344552e-09, "loss": 0.1385, "step": 8931 }, { "epoch": 2.96, "learning_rate": 7.349911070015925e-09, "loss": 0.1219, "step": 8932 }, { "epoch": 2.96, "learning_rate": 7.213187965582347e-09, "loss": 0.153, "step": 8933 }, { "epoch": 2.97, "learning_rate": 7.077748031597553e-09, "loss": 0.1089, "step": 8934 }, { "epoch": 2.97, "learning_rate": 6.943591285455409e-09, "loss": 0.157, "step": 8935 }, { "epoch": 2.97, "learning_rate": 6.810717744383244e-09, "loss": 0.0837, "step": 8936 }, { "epoch": 2.97, "learning_rate": 6.679127425440745e-09, "loss": 0.1053, "step": 8937 }, { "epoch": 2.97, "learning_rate": 6.548820345528839e-09, "loss": 0.1103, "step": 8938 }, { "epoch": 2.97, "learning_rate": 6.419796521377475e-09, "loss": 0.0809, "step": 8939 }, { "epoch": 2.97, "learning_rate": 6.2920559695556215e-09, "loss": 0.1577, "step": 8940 }, { "epoch": 2.97, "learning_rate": 6.165598706465714e-09, "loss": 0.11, "step": 8941 }, { "epoch": 2.97, "learning_rate": 6.040424748346985e-09, "loss": 0.0933, "step": 8942 }, { "epoch": 2.97, "learning_rate": 5.916534111273242e-09, "loss": 0.1357, "step": 8943 }, { "epoch": 2.97, "learning_rate": 5.793926811151762e-09, "loss": 0.0995, "step": 8944 }, { "epoch": 2.97, "learning_rate": 5.672602863727728e-09, "loss": 0.1273, "step": 8945 }, { "epoch": 2.97, "learning_rate": 5.552562284580898e-09, "loss": 0.1021, "step": 8946 }, { "epoch": 2.97, "learning_rate": 5.43380508912339e-09, "loss": 0.141, "step": 8947 }, { "epoch": 2.97, "learning_rate": 5.316331292607446e-09, "loss": 0.1945, "step": 8948 }, { "epoch": 2.97, "learning_rate": 5.2001409101154474e-09, "loss": 0.0986, "step": 8949 }, { "epoch": 2.97, "learning_rate": 5.085233956568792e-09, "loss": 0.0935, "step": 8950 }, { "epoch": 2.97, "learning_rate": 4.971610446722341e-09, "loss": 0.1181, "step": 8951 }, { "epoch": 2.97, "learning_rate": 4.859270395166649e-09, "loss": 0.1141, "step": 8952 }, { "epoch": 2.97, "learning_rate": 4.748213816326841e-09, "loss": 0.1191, "step": 8953 }, { "epoch": 2.97, "learning_rate": 4.638440724462623e-09, "loss": 0.0906, "step": 8954 }, { "epoch": 2.97, "learning_rate": 4.529951133672716e-09, "loss": 0.1113, "step": 8955 }, { "epoch": 2.97, "learning_rate": 4.42274505788598e-09, "loss": 0.1271, "step": 8956 }, { "epoch": 2.97, "learning_rate": 4.3168225108691784e-09, "loss": 0.1219, "step": 8957 }, { "epoch": 2.97, "learning_rate": 4.2121835062247654e-09, "loss": 0.1204, "step": 8958 }, { "epoch": 2.97, "learning_rate": 4.108828057387548e-09, "loss": 0.0792, "step": 8959 }, { "epoch": 2.97, "learning_rate": 4.0067561776313545e-09, "loss": 0.1072, "step": 8960 }, { "epoch": 2.97, "learning_rate": 3.9059678800612564e-09, "loss": 0.1316, "step": 8961 }, { "epoch": 2.97, "learning_rate": 3.806463177621345e-09, "loss": 0.134, "step": 8962 }, { "epoch": 2.97, "learning_rate": 3.708242083088065e-09, "loss": 0.143, "step": 8963 }, { "epoch": 2.98, "learning_rate": 3.6113046090735517e-09, "loss": 0.1101, "step": 8964 }, { "epoch": 2.98, "learning_rate": 3.515650768025625e-09, "loss": 0.1675, "step": 8965 }, { "epoch": 2.98, "learning_rate": 3.421280572228902e-09, "loss": 0.1763, "step": 8966 }, { "epoch": 2.98, "learning_rate": 3.328194033799248e-09, "loss": 0.106, "step": 8967 }, { "epoch": 2.98, "learning_rate": 3.2363911646904334e-09, "loss": 0.0959, "step": 8968 }, { "epoch": 2.98, "learning_rate": 3.145871976690806e-09, "loss": 0.1026, "step": 8969 }, { "epoch": 2.98, "learning_rate": 3.056636481424402e-09, "loss": 0.1549, "step": 8970 }, { "epoch": 2.98, "learning_rate": 2.968684690349832e-09, "loss": 0.1299, "step": 8971 }, { "epoch": 2.98, "learning_rate": 2.882016614760286e-09, "loss": 0.1582, "step": 8972 }, { "epoch": 2.98, "learning_rate": 2.796632265785748e-09, "loss": 0.1344, "step": 8973 }, { "epoch": 2.98, "learning_rate": 2.712531654390782e-09, "loss": 0.1318, "step": 8974 }, { "epoch": 2.98, "learning_rate": 2.6297147913723063e-09, "loss": 0.0955, "step": 8975 }, { "epoch": 2.98, "learning_rate": 2.5481816873673683e-09, "loss": 0.1097, "step": 8976 }, { "epoch": 2.98, "learning_rate": 2.4679323528442602e-09, "loss": 0.0976, "step": 8977 }, { "epoch": 2.98, "learning_rate": 2.388966798108072e-09, "loss": 0.153, "step": 8978 }, { "epoch": 2.98, "learning_rate": 2.311285033299582e-09, "loss": 0.066, "step": 8979 }, { "epoch": 2.98, "learning_rate": 2.2348870683919223e-09, "loss": 0.0732, "step": 8980 }, { "epoch": 2.98, "learning_rate": 2.1597729131972444e-09, "loss": 0.106, "step": 8981 }, { "epoch": 2.98, "learning_rate": 2.0859425773600562e-09, "loss": 0.1169, "step": 8982 }, { "epoch": 2.98, "learning_rate": 2.0133960703605517e-09, "loss": 0.1082, "step": 8983 }, { "epoch": 2.98, "learning_rate": 1.942133401515722e-09, "loss": 0.1411, "step": 8984 }, { "epoch": 2.98, "learning_rate": 1.8721545799749163e-09, "loss": 0.1376, "step": 8985 }, { "epoch": 2.98, "learning_rate": 1.803459614724279e-09, "loss": 0.1446, "step": 8986 }, { "epoch": 2.98, "learning_rate": 1.736048514585642e-09, "loss": 0.1076, "step": 8987 }, { "epoch": 2.98, "learning_rate": 1.6699212882154148e-09, "loss": 0.1281, "step": 8988 }, { "epoch": 2.98, "learning_rate": 1.6050779441045828e-09, "loss": 0.1284, "step": 8989 }, { "epoch": 2.98, "learning_rate": 1.5415184905798186e-09, "loss": 0.1219, "step": 8990 }, { "epoch": 2.98, "learning_rate": 1.4792429358012616e-09, "loss": 0.1372, "step": 8991 }, { "epoch": 2.98, "learning_rate": 1.4182512877680687e-09, "loss": 0.0815, "step": 8992 }, { "epoch": 2.98, "learning_rate": 1.358543554310643e-09, "loss": 0.1299, "step": 8993 }, { "epoch": 2.99, "learning_rate": 1.3001197430961843e-09, "loss": 0.1021, "step": 8994 }, { "epoch": 2.99, "learning_rate": 1.2429798616275802e-09, "loss": 0.1159, "step": 8995 }, { "epoch": 2.99, "learning_rate": 1.1871239172411841e-09, "loss": 0.1102, "step": 8996 }, { "epoch": 2.99, "learning_rate": 1.1325519171101474e-09, "loss": 0.1627, "step": 8997 }, { "epoch": 2.99, "learning_rate": 1.0792638682421975e-09, "loss": 0.145, "step": 8998 }, { "epoch": 2.99, "learning_rate": 1.0272597774796389e-09, "loss": 0.1105, "step": 8999 }, { "epoch": 2.99, "learning_rate": 9.765396515004634e-10, "loss": 0.1133, "step": 9000 }, { "epoch": 2.99, "learning_rate": 9.271034968172388e-10, "loss": 0.1376, "step": 9001 }, { "epoch": 2.99, "learning_rate": 8.789513197782207e-10, "loss": 0.1315, "step": 9002 }, { "epoch": 2.99, "learning_rate": 8.32083126566241e-10, "loss": 0.1051, "step": 9003 }, { "epoch": 2.99, "learning_rate": 7.86498923200929e-10, "loss": 0.1326, "step": 9004 }, { "epoch": 2.99, "learning_rate": 7.421987155342702e-10, "loss": 0.144, "step": 9005 }, { "epoch": 2.99, "learning_rate": 6.991825092561577e-10, "loss": 0.218, "step": 9006 }, { "epoch": 2.99, "learning_rate": 6.574503098888408e-10, "loss": 0.166, "step": 9007 }, { "epoch": 2.99, "learning_rate": 6.170021227924761e-10, "loss": 0.1575, "step": 9008 }, { "epoch": 2.99, "learning_rate": 5.778379531595768e-10, "loss": 0.0884, "step": 9009 }, { "epoch": 2.99, "learning_rate": 5.399578060205635e-10, "loss": 0.1206, "step": 9010 }, { "epoch": 2.99, "learning_rate": 5.033616862393231e-10, "loss": 0.0805, "step": 9011 }, { "epoch": 2.99, "learning_rate": 4.680495985154298e-10, "loss": 0.1339, "step": 9012 }, { "epoch": 2.99, "learning_rate": 4.3402154738192424e-10, "loss": 0.1171, "step": 9013 }, { "epoch": 2.99, "learning_rate": 4.012775372097544e-10, "loss": 0.0974, "step": 9014 }, { "epoch": 2.99, "learning_rate": 3.698175722022246e-10, "loss": 0.1261, "step": 9015 }, { "epoch": 2.99, "learning_rate": 3.396416564005467e-10, "loss": 0.1085, "step": 9016 }, { "epoch": 2.99, "learning_rate": 3.1074979367828884e-10, "loss": 0.1673, "step": 9017 }, { "epoch": 2.99, "learning_rate": 2.831419877469266e-10, "loss": 0.1099, "step": 9018 }, { "epoch": 2.99, "learning_rate": 2.5681824215029185e-10, "loss": 0.0976, "step": 9019 }, { "epoch": 2.99, "learning_rate": 2.3177856026901368e-10, "loss": 0.1336, "step": 9020 }, { "epoch": 2.99, "learning_rate": 2.0802294531829802e-10, "loss": 0.0822, "step": 9021 }, { "epoch": 2.99, "learning_rate": 1.8555140035014796e-10, "loss": 0.1175, "step": 9022 }, { "epoch": 2.99, "learning_rate": 1.6436392824670244e-10, "loss": 0.1699, "step": 9023 }, { "epoch": 3.0, "learning_rate": 1.4446053173244877e-10, "loss": 0.1575, "step": 9024 }, { "epoch": 3.0, "learning_rate": 1.2584121336089993e-10, "loss": 0.1288, "step": 9025 }, { "epoch": 3.0, "learning_rate": 1.0850597552236609e-10, "loss": 0.1524, "step": 9026 }, { "epoch": 3.0, "learning_rate": 9.245482044506482e-11, "loss": 0.1866, "step": 9027 }, { "epoch": 3.0, "learning_rate": 7.768775018845987e-11, "loss": 0.0887, "step": 9028 }, { "epoch": 3.0, "learning_rate": 6.420476664992237e-11, "loss": 0.1125, "step": 9029 }, { "epoch": 3.0, "learning_rate": 5.200587156029002e-11, "loss": 0.1153, "step": 9030 }, { "epoch": 3.0, "learning_rate": 4.1091066486087516e-11, "loss": 0.1092, "step": 9031 }, { "epoch": 3.0, "learning_rate": 3.146035282730608e-11, "loss": 0.0948, "step": 9032 }, { "epoch": 3.0, "learning_rate": 2.3113731824064843e-11, "loss": 0.0922, "step": 9033 }, { "epoch": 3.0, "learning_rate": 1.6051204544398347e-11, "loss": 0.1668, "step": 9034 }, { "epoch": 3.0, "learning_rate": 1.0272771897579247e-11, "loss": 0.1543, "step": 9035 }, { "epoch": 3.0, "learning_rate": 5.778434625236529e-12, "loss": 0.1335, "step": 9036 }, { "epoch": 3.0, "learning_rate": 2.5681933046861616e-12, "loss": 0.0735, "step": 9037 }, { "epoch": 3.0, "learning_rate": 6.420483467106664e-13, "loss": 0.1477, "step": 9038 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.1, "step": 9039 }, { "epoch": 3.0, "step": 9039, "total_flos": 1.9490160091070464e+17, "train_loss": 0.43012489277067034, "train_runtime": 99301.107, "train_samples_per_second": 11.65, "train_steps_per_second": 0.091 } ], "max_steps": 9039, "num_train_epochs": 3, "total_flos": 1.9490160091070464e+17, "trial_name": null, "trial_params": null }