{ "best_metric": 1.471304930449711e-13, "best_model_checkpoint": "./checkpoint-1000", "epoch": 2.999922845459455, "global_step": 4860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 9.5135, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 9.8103, "step": 2 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 9.8101, "step": 3 }, { "epoch": 0.0, "learning_rate": 6e-07, "loss": 10.1898, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.2e-06, "loss": 9.8821, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.2e-06, "loss": 9.6124, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.8e-06, "loss": 9.9878, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.8e-06, "loss": 9.1242, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.8e-06, "loss": 9.1298, "step": 9 }, { "epoch": 0.01, "learning_rate": 1.8e-06, "loss": 9.2409, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.4e-06, "loss": 9.132, "step": 11 }, { "epoch": 0.01, "learning_rate": 2.9999999999999997e-06, "loss": 9.4073, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.6e-06, "loss": 8.771, "step": 13 }, { "epoch": 0.01, "learning_rate": 4.2e-06, "loss": 8.6399, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.8e-06, "loss": 7.7917, "step": 15 }, { "epoch": 0.01, "learning_rate": 5.399999999999999e-06, "loss": 7.6343, "step": 16 }, { "epoch": 0.01, "learning_rate": 5.999999999999999e-06, "loss": 7.1902, "step": 17 }, { "epoch": 0.01, "learning_rate": 6.599999999999999e-06, "loss": 6.8994, "step": 18 }, { "epoch": 0.01, "learning_rate": 7.2e-06, "loss": 6.5587, "step": 19 }, { "epoch": 0.01, "learning_rate": 7.799999999999998e-06, "loss": 6.6495, "step": 20 }, { "epoch": 0.01, "learning_rate": 8.4e-06, "loss": 6.097, "step": 21 }, { "epoch": 0.01, "learning_rate": 8.999999999999999e-06, "loss": 5.8177, "step": 22 }, { "epoch": 0.01, "learning_rate": 9.6e-06, "loss": 5.8191, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.02e-05, "loss": 5.7257, "step": 24 }, { "epoch": 0.02, "learning_rate": 1.0799999999999998e-05, "loss": 5.6024, "step": 25 }, { "epoch": 0.02, "learning_rate": 1.14e-05, "loss": 5.3748, "step": 26 }, { "epoch": 0.02, "learning_rate": 1.1999999999999999e-05, "loss": 5.379, "step": 27 }, { "epoch": 0.02, "learning_rate": 1.26e-05, "loss": 5.3335, "step": 28 }, { "epoch": 0.02, "learning_rate": 1.3199999999999997e-05, "loss": 5.0844, "step": 29 }, { "epoch": 0.02, "learning_rate": 1.3799999999999998e-05, "loss": 5.1396, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.44e-05, "loss": 5.1483, "step": 31 }, { "epoch": 0.02, "learning_rate": 1.4999999999999999e-05, "loss": 4.7176, "step": 32 }, { "epoch": 0.02, "learning_rate": 1.5599999999999996e-05, "loss": 4.8332, "step": 33 }, { "epoch": 0.02, "learning_rate": 1.6199999999999997e-05, "loss": 4.8118, "step": 34 }, { "epoch": 0.02, "learning_rate": 1.68e-05, "loss": 4.6633, "step": 35 }, { "epoch": 0.02, "learning_rate": 1.74e-05, "loss": 4.6537, "step": 36 }, { "epoch": 0.02, "learning_rate": 1.7999999999999997e-05, "loss": 4.5941, "step": 37 }, { "epoch": 0.02, "learning_rate": 1.8599999999999998e-05, "loss": 4.6503, "step": 38 }, { "epoch": 0.02, "learning_rate": 1.92e-05, "loss": 4.5184, "step": 39 }, { "epoch": 0.02, "learning_rate": 1.98e-05, "loss": 4.4501, "step": 40 }, { "epoch": 0.03, "learning_rate": 2.04e-05, "loss": 4.3468, "step": 41 }, { "epoch": 0.03, "learning_rate": 2.1e-05, "loss": 4.4475, "step": 42 }, { "epoch": 0.03, "learning_rate": 2.1599999999999996e-05, "loss": 4.3442, "step": 43 }, { "epoch": 0.03, "learning_rate": 2.2199999999999998e-05, "loss": 4.0437, "step": 44 }, { "epoch": 0.03, "learning_rate": 2.28e-05, "loss": 4.0322, "step": 45 }, { "epoch": 0.03, "learning_rate": 2.34e-05, "loss": 3.8393, "step": 46 }, { "epoch": 0.03, "learning_rate": 2.3999999999999997e-05, "loss": 3.7762, "step": 47 }, { "epoch": 0.03, "learning_rate": 2.4599999999999998e-05, "loss": 3.514, "step": 48 }, { "epoch": 0.03, "learning_rate": 2.52e-05, "loss": 3.4973, "step": 49 }, { "epoch": 0.03, "learning_rate": 2.5799999999999997e-05, "loss": 3.3104, "step": 50 }, { "epoch": 0.03, "learning_rate": 2.6399999999999995e-05, "loss": 6.5173, "step": 51 }, { "epoch": 0.03, "learning_rate": 2.6999999999999996e-05, "loss": 6.3584, "step": 52 }, { "epoch": 0.03, "learning_rate": 2.7599999999999997e-05, "loss": 5.862, "step": 53 }, { "epoch": 0.03, "learning_rate": 2.8199999999999998e-05, "loss": 5.544, "step": 54 }, { "epoch": 0.03, "learning_rate": 2.88e-05, "loss": 5.3065, "step": 55 }, { "epoch": 0.03, "learning_rate": 2.94e-05, "loss": 5.209, "step": 56 }, { "epoch": 0.04, "learning_rate": 2.9999999999999997e-05, "loss": 5.3224, "step": 57 }, { "epoch": 0.04, "learning_rate": 3.06e-05, "loss": 5.1511, "step": 58 }, { "epoch": 0.04, "learning_rate": 3.119999999999999e-05, "loss": 5.1337, "step": 59 }, { "epoch": 0.04, "learning_rate": 3.1799999999999994e-05, "loss": 4.9414, "step": 60 }, { "epoch": 0.04, "learning_rate": 3.2399999999999995e-05, "loss": 4.8724, "step": 61 }, { "epoch": 0.04, "learning_rate": 3.2999999999999996e-05, "loss": 4.7764, "step": 62 }, { "epoch": 0.04, "learning_rate": 3.36e-05, "loss": 4.6878, "step": 63 }, { "epoch": 0.04, "learning_rate": 3.42e-05, "loss": 4.7272, "step": 64 }, { "epoch": 0.04, "learning_rate": 3.48e-05, "loss": 4.7984, "step": 65 }, { "epoch": 0.04, "learning_rate": 3.539999999999999e-05, "loss": 4.5159, "step": 66 }, { "epoch": 0.04, "learning_rate": 3.5999999999999994e-05, "loss": 4.8491, "step": 67 }, { "epoch": 0.04, "learning_rate": 3.6599999999999995e-05, "loss": 4.6092, "step": 68 }, { "epoch": 0.04, "learning_rate": 3.7199999999999996e-05, "loss": 4.5272, "step": 69 }, { "epoch": 0.04, "learning_rate": 3.78e-05, "loss": 4.5579, "step": 70 }, { "epoch": 0.04, "learning_rate": 3.84e-05, "loss": 4.4117, "step": 71 }, { "epoch": 0.04, "learning_rate": 3.9e-05, "loss": 4.5083, "step": 72 }, { "epoch": 0.05, "learning_rate": 3.96e-05, "loss": 4.555, "step": 73 }, { "epoch": 0.05, "learning_rate": 4.02e-05, "loss": 4.2987, "step": 74 }, { "epoch": 0.05, "learning_rate": 4.08e-05, "loss": 4.3312, "step": 75 }, { "epoch": 0.05, "learning_rate": 4.14e-05, "loss": 4.3045, "step": 76 }, { "epoch": 0.05, "learning_rate": 4.2e-05, "loss": 4.379, "step": 77 }, { "epoch": 0.05, "learning_rate": 4.259999999999999e-05, "loss": 4.3704, "step": 78 }, { "epoch": 0.05, "learning_rate": 4.319999999999999e-05, "loss": 4.3909, "step": 79 }, { "epoch": 0.05, "learning_rate": 4.3799999999999994e-05, "loss": 4.2669, "step": 80 }, { "epoch": 0.05, "learning_rate": 4.4399999999999995e-05, "loss": 4.0042, "step": 81 }, { "epoch": 0.05, "learning_rate": 4.4999999999999996e-05, "loss": 4.2425, "step": 82 }, { "epoch": 0.05, "learning_rate": 4.56e-05, "loss": 4.0899, "step": 83 }, { "epoch": 0.05, "learning_rate": 4.62e-05, "loss": 4.1337, "step": 84 }, { "epoch": 0.05, "learning_rate": 4.68e-05, "loss": 4.0863, "step": 85 }, { "epoch": 0.05, "learning_rate": 4.7399999999999993e-05, "loss": 4.0835, "step": 86 }, { "epoch": 0.05, "learning_rate": 4.7999999999999994e-05, "loss": 3.9743, "step": 87 }, { "epoch": 0.05, "learning_rate": 4.8599999999999995e-05, "loss": 4.0583, "step": 88 }, { "epoch": 0.05, "learning_rate": 4.9199999999999997e-05, "loss": 3.9071, "step": 89 }, { "epoch": 0.06, "learning_rate": 4.98e-05, "loss": 3.9489, "step": 90 }, { "epoch": 0.06, "learning_rate": 5.04e-05, "loss": 3.97, "step": 91 }, { "epoch": 0.06, "learning_rate": 5.1e-05, "loss": 3.7487, "step": 92 }, { "epoch": 0.06, "learning_rate": 5.1599999999999994e-05, "loss": 3.7244, "step": 93 }, { "epoch": 0.06, "learning_rate": 5.2199999999999995e-05, "loss": 3.6222, "step": 94 }, { "epoch": 0.06, "learning_rate": 5.279999999999999e-05, "loss": 3.6013, "step": 95 }, { "epoch": 0.06, "learning_rate": 5.339999999999999e-05, "loss": 3.4787, "step": 96 }, { "epoch": 0.06, "learning_rate": 5.399999999999999e-05, "loss": 3.3783, "step": 97 }, { "epoch": 0.06, "learning_rate": 5.459999999999999e-05, "loss": 3.392, "step": 98 }, { "epoch": 0.06, "learning_rate": 5.519999999999999e-05, "loss": 3.0215, "step": 99 }, { "epoch": 0.06, "learning_rate": 5.5799999999999994e-05, "loss": 2.9419, "step": 100 }, { "epoch": 0.06, "learning_rate": 5.6399999999999995e-05, "loss": 6.114, "step": 101 }, { "epoch": 0.06, "learning_rate": 5.6999999999999996e-05, "loss": 5.5252, "step": 102 }, { "epoch": 0.06, "learning_rate": 5.76e-05, "loss": 5.0695, "step": 103 }, { "epoch": 0.06, "learning_rate": 5.82e-05, "loss": 4.9737, "step": 104 }, { "epoch": 0.06, "learning_rate": 5.88e-05, "loss": 4.8192, "step": 105 }, { "epoch": 0.07, "learning_rate": 5.94e-05, "loss": 4.6852, "step": 106 }, { "epoch": 0.07, "learning_rate": 5.9999999999999995e-05, "loss": 4.8017, "step": 107 }, { "epoch": 0.07, "learning_rate": 6.0599999999999996e-05, "loss": 4.6657, "step": 108 }, { "epoch": 0.07, "learning_rate": 6.12e-05, "loss": 4.6415, "step": 109 }, { "epoch": 0.07, "learning_rate": 6.18e-05, "loss": 4.4974, "step": 110 }, { "epoch": 0.07, "learning_rate": 6.239999999999999e-05, "loss": 4.4745, "step": 111 }, { "epoch": 0.07, "learning_rate": 6.299999999999999e-05, "loss": 4.4467, "step": 112 }, { "epoch": 0.07, "learning_rate": 6.359999999999999e-05, "loss": 4.4226, "step": 113 }, { "epoch": 0.07, "learning_rate": 6.419999999999999e-05, "loss": 4.4979, "step": 114 }, { "epoch": 0.07, "learning_rate": 6.479999999999999e-05, "loss": 4.2777, "step": 115 }, { "epoch": 0.07, "learning_rate": 6.539999999999999e-05, "loss": 4.4617, "step": 116 }, { "epoch": 0.07, "learning_rate": 6.599999999999999e-05, "loss": 4.3205, "step": 117 }, { "epoch": 0.07, "learning_rate": 6.659999999999999e-05, "loss": 4.2775, "step": 118 }, { "epoch": 0.07, "learning_rate": 6.72e-05, "loss": 4.3194, "step": 119 }, { "epoch": 0.07, "learning_rate": 6.78e-05, "loss": 4.319, "step": 120 }, { "epoch": 0.07, "learning_rate": 6.84e-05, "loss": 4.38, "step": 121 }, { "epoch": 0.08, "learning_rate": 6.9e-05, "loss": 4.3715, "step": 122 }, { "epoch": 0.08, "learning_rate": 6.96e-05, "loss": 4.5273, "step": 123 }, { "epoch": 0.08, "learning_rate": 7.02e-05, "loss": 4.3016, "step": 124 }, { "epoch": 0.08, "learning_rate": 7.079999999999999e-05, "loss": 4.2581, "step": 125 }, { "epoch": 0.08, "learning_rate": 7.139999999999999e-05, "loss": 4.2407, "step": 126 }, { "epoch": 0.08, "learning_rate": 7.199999999999999e-05, "loss": 4.2266, "step": 127 }, { "epoch": 0.08, "learning_rate": 7.259999999999999e-05, "loss": 4.1345, "step": 128 }, { "epoch": 0.08, "learning_rate": 7.319999999999999e-05, "loss": 4.255, "step": 129 }, { "epoch": 0.08, "learning_rate": 7.379999999999999e-05, "loss": 4.1264, "step": 130 }, { "epoch": 0.08, "learning_rate": 7.439999999999999e-05, "loss": 4.1036, "step": 131 }, { "epoch": 0.08, "learning_rate": 7.5e-05, "loss": 4.0236, "step": 132 }, { "epoch": 0.08, "learning_rate": 7.56e-05, "loss": 4.1043, "step": 133 }, { "epoch": 0.08, "learning_rate": 7.62e-05, "loss": 4.0537, "step": 134 }, { "epoch": 0.08, "learning_rate": 7.68e-05, "loss": 4.0848, "step": 135 }, { "epoch": 0.08, "learning_rate": 7.74e-05, "loss": 4.0079, "step": 136 }, { "epoch": 0.08, "learning_rate": 7.8e-05, "loss": 3.9205, "step": 137 }, { "epoch": 0.09, "learning_rate": 7.86e-05, "loss": 3.9029, "step": 138 }, { "epoch": 0.09, "learning_rate": 7.92e-05, "loss": 3.918, "step": 139 }, { "epoch": 0.09, "learning_rate": 7.98e-05, "loss": 3.9781, "step": 140 }, { "epoch": 0.09, "learning_rate": 8.04e-05, "loss": 3.7566, "step": 141 }, { "epoch": 0.09, "learning_rate": 8.1e-05, "loss": 3.7063, "step": 142 }, { "epoch": 0.09, "learning_rate": 8.16e-05, "loss": 3.5859, "step": 143 }, { "epoch": 0.09, "learning_rate": 8.22e-05, "loss": 3.3238, "step": 144 }, { "epoch": 0.09, "learning_rate": 8.28e-05, "loss": 3.3694, "step": 145 }, { "epoch": 0.09, "learning_rate": 8.34e-05, "loss": 3.3059, "step": 146 }, { "epoch": 0.09, "learning_rate": 8.4e-05, "loss": 3.2696, "step": 147 }, { "epoch": 0.09, "learning_rate": 8.459999999999998e-05, "loss": 3.1954, "step": 148 }, { "epoch": 0.09, "learning_rate": 8.519999999999998e-05, "loss": 2.932, "step": 149 }, { "epoch": 0.09, "learning_rate": 8.579999999999998e-05, "loss": 2.9027, "step": 150 }, { "epoch": 0.09, "learning_rate": 8.639999999999999e-05, "loss": 5.9382, "step": 151 }, { "epoch": 0.09, "learning_rate": 8.699999999999999e-05, "loss": 5.2861, "step": 152 }, { "epoch": 0.09, "learning_rate": 8.759999999999999e-05, "loss": 4.9462, "step": 153 }, { "epoch": 0.1, "learning_rate": 8.819999999999999e-05, "loss": 4.7416, "step": 154 }, { "epoch": 0.1, "learning_rate": 8.879999999999999e-05, "loss": 4.6738, "step": 155 }, { "epoch": 0.1, "learning_rate": 8.939999999999999e-05, "loss": 4.6159, "step": 156 }, { "epoch": 0.1, "learning_rate": 8.999999999999999e-05, "loss": 4.661, "step": 157 }, { "epoch": 0.1, "learning_rate": 9.059999999999999e-05, "loss": 4.5277, "step": 158 }, { "epoch": 0.1, "learning_rate": 9.12e-05, "loss": 4.3633, "step": 159 }, { "epoch": 0.1, "learning_rate": 9.18e-05, "loss": 4.3865, "step": 160 }, { "epoch": 0.1, "learning_rate": 9.24e-05, "loss": 4.397, "step": 161 }, { "epoch": 0.1, "learning_rate": 9.3e-05, "loss": 4.2932, "step": 162 }, { "epoch": 0.1, "learning_rate": 9.36e-05, "loss": 4.5437, "step": 163 }, { "epoch": 0.1, "learning_rate": 9.419999999999999e-05, "loss": 4.51, "step": 164 }, { "epoch": 0.1, "learning_rate": 9.479999999999999e-05, "loss": 4.3269, "step": 165 }, { "epoch": 0.1, "learning_rate": 9.539999999999999e-05, "loss": 4.3626, "step": 166 }, { "epoch": 0.1, "learning_rate": 9.599999999999999e-05, "loss": 4.3732, "step": 167 }, { "epoch": 0.1, "learning_rate": 9.659999999999999e-05, "loss": 4.3292, "step": 168 }, { "epoch": 0.1, "learning_rate": 9.719999999999999e-05, "loss": 4.2986, "step": 169 }, { "epoch": 0.1, "learning_rate": 9.779999999999999e-05, "loss": 4.2012, "step": 170 }, { "epoch": 0.11, "learning_rate": 9.839999999999999e-05, "loss": 4.1703, "step": 171 }, { "epoch": 0.11, "learning_rate": 9.9e-05, "loss": 4.2443, "step": 172 }, { "epoch": 0.11, "learning_rate": 9.96e-05, "loss": 4.2526, "step": 173 }, { "epoch": 0.11, "learning_rate": 0.0001002, "loss": 4.2244, "step": 174 }, { "epoch": 0.11, "learning_rate": 0.0001008, "loss": 4.1979, "step": 175 }, { "epoch": 0.11, "learning_rate": 0.0001014, "loss": 4.1536, "step": 176 }, { "epoch": 0.11, "learning_rate": 0.000102, "loss": 4.2401, "step": 177 }, { "epoch": 0.11, "learning_rate": 0.0001026, "loss": 4.0987, "step": 178 }, { "epoch": 0.11, "learning_rate": 0.00010319999999999999, "loss": 4.0979, "step": 179 }, { "epoch": 0.11, "learning_rate": 0.00010379999999999999, "loss": 4.0801, "step": 180 }, { "epoch": 0.11, "learning_rate": 0.00010439999999999999, "loss": 4.1392, "step": 181 }, { "epoch": 0.11, "learning_rate": 0.00010499999999999999, "loss": 4.0059, "step": 182 }, { "epoch": 0.11, "learning_rate": 0.00010559999999999998, "loss": 4.051, "step": 183 }, { "epoch": 0.11, "learning_rate": 0.00010619999999999998, "loss": 3.9285, "step": 184 }, { "epoch": 0.11, "learning_rate": 0.00010679999999999998, "loss": 3.945, "step": 185 }, { "epoch": 0.11, "learning_rate": 0.00010739999999999998, "loss": 3.8085, "step": 186 }, { "epoch": 0.12, "learning_rate": 0.00010799999999999998, "loss": 3.9593, "step": 187 }, { "epoch": 0.12, "learning_rate": 0.00010859999999999998, "loss": 3.8777, "step": 188 }, { "epoch": 0.12, "learning_rate": 0.00010919999999999998, "loss": 3.815, "step": 189 }, { "epoch": 0.12, "learning_rate": 0.00010979999999999999, "loss": 3.7849, "step": 190 }, { "epoch": 0.12, "learning_rate": 0.00011039999999999999, "loss": 3.6886, "step": 191 }, { "epoch": 0.12, "learning_rate": 0.00011099999999999999, "loss": 3.5941, "step": 192 }, { "epoch": 0.12, "learning_rate": 0.00011159999999999999, "loss": 3.4718, "step": 193 }, { "epoch": 0.12, "learning_rate": 0.00011219999999999999, "loss": 3.355, "step": 194 }, { "epoch": 0.12, "learning_rate": 0.00011279999999999999, "loss": 3.2773, "step": 195 }, { "epoch": 0.12, "learning_rate": 0.00011339999999999999, "loss": 3.3025, "step": 196 }, { "epoch": 0.12, "learning_rate": 0.00011399999999999999, "loss": 3.2324, "step": 197 }, { "epoch": 0.12, "learning_rate": 0.0001146, "loss": 3.2263, "step": 198 }, { "epoch": 0.12, "learning_rate": 0.0001152, "loss": 2.8137, "step": 199 }, { "epoch": 0.12, "learning_rate": 0.0001158, "loss": 2.9228, "step": 200 }, { "epoch": 0.12, "learning_rate": 0.0001164, "loss": 5.8291, "step": 201 }, { "epoch": 0.12, "learning_rate": 0.000117, "loss": 4.9896, "step": 202 }, { "epoch": 0.13, "learning_rate": 0.0001176, "loss": 4.513, "step": 203 }, { "epoch": 0.13, "learning_rate": 0.0001182, "loss": 4.6057, "step": 204 }, { "epoch": 0.13, "learning_rate": 0.0001188, "loss": 4.7012, "step": 205 }, { "epoch": 0.13, "learning_rate": 0.0001194, "loss": 4.4101, "step": 206 }, { "epoch": 0.13, "learning_rate": 0.00011999999999999999, "loss": 4.4692, "step": 207 }, { "epoch": 0.13, "learning_rate": 0.00012059999999999999, "loss": 4.3521, "step": 208 }, { "epoch": 0.13, "learning_rate": 0.00012119999999999999, "loss": 4.299, "step": 209 }, { "epoch": 0.13, "learning_rate": 0.00012179999999999999, "loss": 4.3289, "step": 210 }, { "epoch": 0.13, "learning_rate": 0.0001224, "loss": 4.3255, "step": 211 }, { "epoch": 0.13, "learning_rate": 0.00012299999999999998, "loss": 4.2024, "step": 212 }, { "epoch": 0.13, "learning_rate": 0.0001236, "loss": 4.356, "step": 213 }, { "epoch": 0.13, "learning_rate": 0.00012419999999999998, "loss": 4.3464, "step": 214 }, { "epoch": 0.13, "learning_rate": 0.00012479999999999997, "loss": 4.2892, "step": 215 }, { "epoch": 0.13, "learning_rate": 0.00012539999999999999, "loss": 4.2194, "step": 216 }, { "epoch": 0.13, "learning_rate": 0.00012599999999999997, "loss": 4.3643, "step": 217 }, { "epoch": 0.13, "learning_rate": 0.0001266, "loss": 4.0856, "step": 218 }, { "epoch": 0.14, "learning_rate": 0.00012719999999999997, "loss": 4.2346, "step": 219 }, { "epoch": 0.14, "learning_rate": 0.0001278, "loss": 4.2318, "step": 220 }, { "epoch": 0.14, "learning_rate": 0.00012839999999999998, "loss": 4.1432, "step": 221 }, { "epoch": 0.14, "learning_rate": 0.000129, "loss": 4.2051, "step": 222 }, { "epoch": 0.14, "learning_rate": 0.00012959999999999998, "loss": 4.1461, "step": 223 }, { "epoch": 0.14, "learning_rate": 0.0001302, "loss": 4.1237, "step": 224 }, { "epoch": 0.14, "learning_rate": 0.00013079999999999998, "loss": 4.1349, "step": 225 }, { "epoch": 0.14, "learning_rate": 0.0001314, "loss": 4.0425, "step": 226 }, { "epoch": 0.14, "learning_rate": 0.00013199999999999998, "loss": 4.1628, "step": 227 }, { "epoch": 0.14, "learning_rate": 0.0001326, "loss": 4.0593, "step": 228 }, { "epoch": 0.14, "learning_rate": 0.00013319999999999999, "loss": 4.0131, "step": 229 }, { "epoch": 0.14, "learning_rate": 0.0001338, "loss": 4.0298, "step": 230 }, { "epoch": 0.14, "learning_rate": 0.0001344, "loss": 3.9874, "step": 231 }, { "epoch": 0.14, "learning_rate": 0.000135, "loss": 3.9562, "step": 232 }, { "epoch": 0.14, "learning_rate": 0.0001356, "loss": 3.9514, "step": 233 }, { "epoch": 0.14, "learning_rate": 0.0001362, "loss": 3.9638, "step": 234 }, { "epoch": 0.15, "learning_rate": 0.0001368, "loss": 4.055, "step": 235 }, { "epoch": 0.15, "learning_rate": 0.0001374, "loss": 3.9593, "step": 236 }, { "epoch": 0.15, "learning_rate": 0.000138, "loss": 3.7526, "step": 237 }, { "epoch": 0.15, "learning_rate": 0.0001386, "loss": 3.8853, "step": 238 }, { "epoch": 0.15, "learning_rate": 0.0001392, "loss": 3.6557, "step": 239 }, { "epoch": 0.15, "learning_rate": 0.00013979999999999998, "loss": 3.8447, "step": 240 }, { "epoch": 0.15, "learning_rate": 0.0001404, "loss": 3.6613, "step": 241 }, { "epoch": 0.15, "learning_rate": 0.00014099999999999998, "loss": 3.6865, "step": 242 }, { "epoch": 0.15, "learning_rate": 0.00014159999999999997, "loss": 3.5905, "step": 243 }, { "epoch": 0.15, "learning_rate": 0.0001422, "loss": 3.5122, "step": 244 }, { "epoch": 0.15, "learning_rate": 0.00014279999999999997, "loss": 3.5215, "step": 245 }, { "epoch": 0.15, "learning_rate": 0.0001434, "loss": 3.2726, "step": 246 }, { "epoch": 0.15, "learning_rate": 0.00014399999999999998, "loss": 3.4131, "step": 247 }, { "epoch": 0.15, "learning_rate": 0.0001446, "loss": 3.0505, "step": 248 }, { "epoch": 0.15, "learning_rate": 0.00014519999999999998, "loss": 2.9078, "step": 249 }, { "epoch": 0.15, "learning_rate": 0.0001458, "loss": 2.7024, "step": 250 }, { "epoch": 0.15, "learning_rate": 0.00014639999999999998, "loss": 5.8578, "step": 251 }, { "epoch": 0.16, "learning_rate": 0.000147, "loss": 5.3405, "step": 252 }, { "epoch": 0.16, "learning_rate": 0.00014759999999999998, "loss": 4.6349, "step": 253 }, { "epoch": 0.16, "learning_rate": 0.0001482, "loss": 4.7555, "step": 254 }, { "epoch": 0.16, "learning_rate": 0.00014879999999999998, "loss": 4.4432, "step": 255 }, { "epoch": 0.16, "learning_rate": 0.0001494, "loss": 4.4003, "step": 256 }, { "epoch": 0.16, "learning_rate": 0.00015, "loss": 4.5716, "step": 257 }, { "epoch": 0.16, "learning_rate": 0.00015059999999999997, "loss": 4.4934, "step": 258 }, { "epoch": 0.16, "learning_rate": 0.0001512, "loss": 4.3529, "step": 259 }, { "epoch": 0.16, "learning_rate": 0.00015179999999999998, "loss": 4.3214, "step": 260 }, { "epoch": 0.16, "learning_rate": 0.0001524, "loss": 4.3262, "step": 261 }, { "epoch": 0.16, "learning_rate": 0.00015299999999999998, "loss": 4.313, "step": 262 }, { "epoch": 0.16, "learning_rate": 0.0001536, "loss": 4.3297, "step": 263 }, { "epoch": 0.16, "learning_rate": 0.00015419999999999998, "loss": 4.2544, "step": 264 }, { "epoch": 0.16, "learning_rate": 0.0001548, "loss": 4.1435, "step": 265 }, { "epoch": 0.16, "learning_rate": 0.00015539999999999998, "loss": 4.1702, "step": 266 }, { "epoch": 0.16, "learning_rate": 0.000156, "loss": 4.2813, "step": 267 }, { "epoch": 0.17, "learning_rate": 0.00015659999999999998, "loss": 4.0714, "step": 268 }, { "epoch": 0.17, "learning_rate": 0.0001572, "loss": 4.3009, "step": 269 }, { "epoch": 0.17, "learning_rate": 0.0001578, "loss": 4.2906, "step": 270 }, { "epoch": 0.17, "learning_rate": 0.0001584, "loss": 4.1794, "step": 271 }, { "epoch": 0.17, "learning_rate": 0.000159, "loss": 4.0437, "step": 272 }, { "epoch": 0.17, "learning_rate": 0.0001596, "loss": 4.2904, "step": 273 }, { "epoch": 0.17, "learning_rate": 0.0001602, "loss": 4.4145, "step": 274 }, { "epoch": 0.17, "learning_rate": 0.0001608, "loss": 4.19, "step": 275 }, { "epoch": 0.17, "learning_rate": 0.0001614, "loss": 4.0709, "step": 276 }, { "epoch": 0.17, "learning_rate": 0.000162, "loss": 3.9692, "step": 277 }, { "epoch": 0.17, "learning_rate": 0.0001626, "loss": 4.0098, "step": 278 }, { "epoch": 0.17, "learning_rate": 0.0001632, "loss": 4.0831, "step": 279 }, { "epoch": 0.17, "learning_rate": 0.0001638, "loss": 4.2022, "step": 280 }, { "epoch": 0.17, "learning_rate": 0.0001644, "loss": 4.0885, "step": 281 }, { "epoch": 0.17, "learning_rate": 0.000165, "loss": 3.9334, "step": 282 }, { "epoch": 0.17, "learning_rate": 0.0001656, "loss": 3.838, "step": 283 }, { "epoch": 0.18, "learning_rate": 0.0001662, "loss": 4.0429, "step": 284 }, { "epoch": 0.18, "learning_rate": 0.0001668, "loss": 3.9703, "step": 285 }, { "epoch": 0.18, "learning_rate": 0.0001674, "loss": 4.0123, "step": 286 }, { "epoch": 0.18, "learning_rate": 0.000168, "loss": 4.0291, "step": 287 }, { "epoch": 0.18, "learning_rate": 0.0001686, "loss": 3.9261, "step": 288 }, { "epoch": 0.18, "learning_rate": 0.00016919999999999997, "loss": 3.8273, "step": 289 }, { "epoch": 0.18, "learning_rate": 0.00016979999999999998, "loss": 3.7701, "step": 290 }, { "epoch": 0.18, "learning_rate": 0.00017039999999999997, "loss": 3.833, "step": 291 }, { "epoch": 0.18, "learning_rate": 0.00017099999999999998, "loss": 3.5294, "step": 292 }, { "epoch": 0.18, "learning_rate": 0.00017159999999999997, "loss": 3.5094, "step": 293 }, { "epoch": 0.18, "learning_rate": 0.00017219999999999998, "loss": 3.4339, "step": 294 }, { "epoch": 0.18, "learning_rate": 0.00017279999999999997, "loss": 3.5833, "step": 295 }, { "epoch": 0.18, "learning_rate": 0.00017339999999999996, "loss": 3.2647, "step": 296 }, { "epoch": 0.18, "learning_rate": 0.00017399999999999997, "loss": 3.2495, "step": 297 }, { "epoch": 0.18, "learning_rate": 0.00017459999999999996, "loss": 3.2158, "step": 298 }, { "epoch": 0.18, "learning_rate": 0.00017519999999999998, "loss": 3.2869, "step": 299 }, { "epoch": 0.19, "learning_rate": 0.00017579999999999996, "loss": 3.2838, "step": 300 }, { "epoch": 0.19, "learning_rate": 0.00017639999999999998, "loss": 6.0054, "step": 301 }, { "epoch": 0.19, "learning_rate": 0.00017699999999999997, "loss": 5.665, "step": 302 }, { "epoch": 0.19, "learning_rate": 0.00017759999999999998, "loss": 4.9385, "step": 303 }, { "epoch": 0.19, "learning_rate": 0.00017819999999999997, "loss": 4.7416, "step": 304 }, { "epoch": 0.19, "learning_rate": 0.00017879999999999998, "loss": 4.6585, "step": 305 }, { "epoch": 0.19, "learning_rate": 0.00017939999999999997, "loss": 4.783, "step": 306 }, { "epoch": 0.19, "learning_rate": 0.00017999999999999998, "loss": 4.4418, "step": 307 }, { "epoch": 0.19, "learning_rate": 0.00018059999999999997, "loss": 4.5525, "step": 308 }, { "epoch": 0.19, "learning_rate": 0.00018119999999999999, "loss": 4.3392, "step": 309 }, { "epoch": 0.19, "learning_rate": 0.00018179999999999997, "loss": 4.3896, "step": 310 }, { "epoch": 0.19, "learning_rate": 0.0001824, "loss": 4.3516, "step": 311 }, { "epoch": 0.19, "learning_rate": 0.00018299999999999998, "loss": 4.4005, "step": 312 }, { "epoch": 0.19, "learning_rate": 0.0001836, "loss": 4.3409, "step": 313 }, { "epoch": 0.19, "learning_rate": 0.00018419999999999998, "loss": 4.3136, "step": 314 }, { "epoch": 0.19, "learning_rate": 0.0001848, "loss": 4.3484, "step": 315 }, { "epoch": 0.2, "learning_rate": 0.00018539999999999998, "loss": 4.214, "step": 316 }, { "epoch": 0.2, "learning_rate": 0.000186, "loss": 4.2314, "step": 317 }, { "epoch": 0.2, "learning_rate": 0.00018659999999999998, "loss": 4.1683, "step": 318 }, { "epoch": 0.2, "learning_rate": 0.0001872, "loss": 4.1335, "step": 319 }, { "epoch": 0.2, "learning_rate": 0.00018779999999999998, "loss": 4.157, "step": 320 }, { "epoch": 0.2, "learning_rate": 0.00018839999999999997, "loss": 4.3069, "step": 321 }, { "epoch": 0.2, "learning_rate": 0.00018899999999999999, "loss": 4.1282, "step": 322 }, { "epoch": 0.2, "learning_rate": 0.00018959999999999997, "loss": 4.0788, "step": 323 }, { "epoch": 0.2, "learning_rate": 0.0001902, "loss": 4.2503, "step": 324 }, { "epoch": 0.2, "learning_rate": 0.00019079999999999998, "loss": 4.1146, "step": 325 }, { "epoch": 0.2, "learning_rate": 0.0001914, "loss": 4.0915, "step": 326 }, { "epoch": 0.2, "learning_rate": 0.00019199999999999998, "loss": 4.1609, "step": 327 }, { "epoch": 0.2, "learning_rate": 0.0001926, "loss": 3.9915, "step": 328 }, { "epoch": 0.2, "learning_rate": 0.00019319999999999998, "loss": 4.0215, "step": 329 }, { "epoch": 0.2, "learning_rate": 0.0001938, "loss": 4.1747, "step": 330 }, { "epoch": 0.2, "learning_rate": 0.00019439999999999998, "loss": 3.9796, "step": 331 }, { "epoch": 0.2, "learning_rate": 0.000195, "loss": 4.0624, "step": 332 }, { "epoch": 0.21, "learning_rate": 0.00019559999999999998, "loss": 3.9148, "step": 333 }, { "epoch": 0.21, "learning_rate": 0.0001962, "loss": 3.773, "step": 334 }, { "epoch": 0.21, "learning_rate": 0.00019679999999999999, "loss": 3.6838, "step": 335 }, { "epoch": 0.21, "learning_rate": 0.0001974, "loss": 3.9912, "step": 336 }, { "epoch": 0.21, "learning_rate": 0.000198, "loss": 3.7385, "step": 337 }, { "epoch": 0.21, "learning_rate": 0.0001986, "loss": 3.7082, "step": 338 }, { "epoch": 0.21, "learning_rate": 0.0001992, "loss": 3.6715, "step": 339 }, { "epoch": 0.21, "learning_rate": 0.0001998, "loss": 3.6575, "step": 340 }, { "epoch": 0.21, "learning_rate": 0.0002004, "loss": 3.6502, "step": 341 }, { "epoch": 0.21, "learning_rate": 0.000201, "loss": 3.6207, "step": 342 }, { "epoch": 0.21, "learning_rate": 0.0002016, "loss": 3.52, "step": 343 }, { "epoch": 0.21, "learning_rate": 0.0002022, "loss": 3.2921, "step": 344 }, { "epoch": 0.21, "learning_rate": 0.0002028, "loss": 3.2503, "step": 345 }, { "epoch": 0.21, "learning_rate": 0.00020339999999999998, "loss": 3.1721, "step": 346 }, { "epoch": 0.21, "learning_rate": 0.000204, "loss": 3.1538, "step": 347 }, { "epoch": 0.21, "learning_rate": 0.00020459999999999999, "loss": 3.0343, "step": 348 }, { "epoch": 0.22, "learning_rate": 0.0002052, "loss": 2.7939, "step": 349 }, { "epoch": 0.22, "learning_rate": 0.0002058, "loss": 2.6998, "step": 350 }, { "epoch": 0.22, "learning_rate": 0.00020639999999999998, "loss": 5.7549, "step": 351 }, { "epoch": 0.22, "learning_rate": 0.00020699999999999996, "loss": 5.1232, "step": 352 }, { "epoch": 0.22, "learning_rate": 0.00020759999999999998, "loss": 4.7916, "step": 353 }, { "epoch": 0.22, "learning_rate": 0.00020819999999999996, "loss": 4.7006, "step": 354 }, { "epoch": 0.22, "learning_rate": 0.00020879999999999998, "loss": 4.5348, "step": 355 }, { "epoch": 0.22, "learning_rate": 0.00020939999999999997, "loss": 4.5586, "step": 356 }, { "epoch": 0.22, "learning_rate": 0.00020999999999999998, "loss": 4.4419, "step": 357 }, { "epoch": 0.22, "learning_rate": 0.00021059999999999997, "loss": 4.1398, "step": 358 }, { "epoch": 0.22, "learning_rate": 0.00021119999999999996, "loss": 4.3476, "step": 359 }, { "epoch": 0.22, "learning_rate": 0.00021179999999999997, "loss": 4.2306, "step": 360 }, { "epoch": 0.22, "learning_rate": 0.00021239999999999996, "loss": 4.2166, "step": 361 }, { "epoch": 0.22, "learning_rate": 0.00021299999999999997, "loss": 4.2093, "step": 362 }, { "epoch": 0.22, "learning_rate": 0.00021359999999999996, "loss": 4.2283, "step": 363 }, { "epoch": 0.22, "learning_rate": 0.00021419999999999998, "loss": 4.3396, "step": 364 }, { "epoch": 0.23, "learning_rate": 0.00021479999999999996, "loss": 4.2494, "step": 365 }, { "epoch": 0.23, "learning_rate": 0.00021539999999999998, "loss": 4.1817, "step": 366 }, { "epoch": 0.23, "learning_rate": 0.00021599999999999996, "loss": 4.2588, "step": 367 }, { "epoch": 0.23, "learning_rate": 0.00021659999999999998, "loss": 4.2413, "step": 368 }, { "epoch": 0.23, "learning_rate": 0.00021719999999999997, "loss": 4.1388, "step": 369 }, { "epoch": 0.23, "learning_rate": 0.00021779999999999998, "loss": 4.2769, "step": 370 }, { "epoch": 0.23, "learning_rate": 0.00021839999999999997, "loss": 4.1678, "step": 371 }, { "epoch": 0.23, "learning_rate": 0.00021899999999999998, "loss": 4.0985, "step": 372 }, { "epoch": 0.23, "learning_rate": 0.00021959999999999997, "loss": 4.0278, "step": 373 }, { "epoch": 0.23, "learning_rate": 0.00022019999999999999, "loss": 4.0552, "step": 374 }, { "epoch": 0.23, "learning_rate": 0.00022079999999999997, "loss": 4.0627, "step": 375 }, { "epoch": 0.23, "learning_rate": 0.0002214, "loss": 4.1588, "step": 376 }, { "epoch": 0.23, "learning_rate": 0.00022199999999999998, "loss": 4.0489, "step": 377 }, { "epoch": 0.23, "learning_rate": 0.0002226, "loss": 3.8041, "step": 378 }, { "epoch": 0.23, "learning_rate": 0.00022319999999999998, "loss": 3.8734, "step": 379 }, { "epoch": 0.23, "learning_rate": 0.0002238, "loss": 3.9844, "step": 380 }, { "epoch": 0.24, "learning_rate": 0.00022439999999999998, "loss": 4.0289, "step": 381 }, { "epoch": 0.24, "learning_rate": 0.000225, "loss": 3.8937, "step": 382 }, { "epoch": 0.24, "learning_rate": 0.00022559999999999998, "loss": 3.7529, "step": 383 }, { "epoch": 0.24, "learning_rate": 0.00022619999999999997, "loss": 3.9653, "step": 384 }, { "epoch": 0.24, "learning_rate": 0.00022679999999999998, "loss": 3.8649, "step": 385 }, { "epoch": 0.24, "learning_rate": 0.00022739999999999997, "loss": 3.8224, "step": 386 }, { "epoch": 0.24, "learning_rate": 0.00022799999999999999, "loss": 3.749, "step": 387 }, { "epoch": 0.24, "learning_rate": 0.00022859999999999997, "loss": 3.9977, "step": 388 }, { "epoch": 0.24, "learning_rate": 0.0002292, "loss": 3.7363, "step": 389 }, { "epoch": 0.24, "learning_rate": 0.00022979999999999997, "loss": 3.775, "step": 390 }, { "epoch": 0.24, "learning_rate": 0.0002304, "loss": 3.7372, "step": 391 }, { "epoch": 0.24, "learning_rate": 0.00023099999999999998, "loss": 3.5344, "step": 392 }, { "epoch": 0.24, "learning_rate": 0.0002316, "loss": 3.5338, "step": 393 }, { "epoch": 0.24, "learning_rate": 0.00023219999999999998, "loss": 3.3479, "step": 394 }, { "epoch": 0.24, "learning_rate": 0.0002328, "loss": 3.3349, "step": 395 }, { "epoch": 0.24, "learning_rate": 0.00023339999999999998, "loss": 3.278, "step": 396 }, { "epoch": 0.25, "learning_rate": 0.000234, "loss": 3.1448, "step": 397 }, { "epoch": 0.25, "learning_rate": 0.00023459999999999998, "loss": 3.1193, "step": 398 }, { "epoch": 0.25, "learning_rate": 0.0002352, "loss": 2.7586, "step": 399 }, { "epoch": 0.25, "learning_rate": 0.00023579999999999999, "loss": 2.8001, "step": 400 }, { "epoch": 0.25, "learning_rate": 0.0002364, "loss": 5.8658, "step": 401 }, { "epoch": 0.25, "learning_rate": 0.000237, "loss": 4.9548, "step": 402 }, { "epoch": 0.25, "learning_rate": 0.0002376, "loss": 4.9834, "step": 403 }, { "epoch": 0.25, "learning_rate": 0.0002382, "loss": 4.5121, "step": 404 }, { "epoch": 0.25, "learning_rate": 0.0002388, "loss": 4.5158, "step": 405 }, { "epoch": 0.25, "learning_rate": 0.0002394, "loss": 4.389, "step": 406 }, { "epoch": 0.25, "learning_rate": 0.00023999999999999998, "loss": 4.4211, "step": 407 }, { "epoch": 0.25, "learning_rate": 0.0002406, "loss": 4.3436, "step": 408 }, { "epoch": 0.25, "learning_rate": 0.00024119999999999998, "loss": 4.3404, "step": 409 }, { "epoch": 0.25, "learning_rate": 0.0002418, "loss": 4.4149, "step": 410 }, { "epoch": 0.25, "learning_rate": 0.00024239999999999998, "loss": 4.1582, "step": 411 }, { "epoch": 0.25, "learning_rate": 0.000243, "loss": 4.2055, "step": 412 }, { "epoch": 0.25, "learning_rate": 0.00024359999999999999, "loss": 4.3762, "step": 413 }, { "epoch": 0.26, "learning_rate": 0.00024419999999999997, "loss": 4.3286, "step": 414 }, { "epoch": 0.26, "learning_rate": 0.0002448, "loss": 4.3115, "step": 415 }, { "epoch": 0.26, "learning_rate": 0.00024539999999999995, "loss": 4.2365, "step": 416 }, { "epoch": 0.26, "learning_rate": 0.00024599999999999996, "loss": 4.2317, "step": 417 }, { "epoch": 0.26, "learning_rate": 0.0002466, "loss": 4.225, "step": 418 }, { "epoch": 0.26, "learning_rate": 0.0002472, "loss": 4.2532, "step": 419 }, { "epoch": 0.26, "learning_rate": 0.00024779999999999995, "loss": 4.1849, "step": 420 }, { "epoch": 0.26, "learning_rate": 0.00024839999999999997, "loss": 4.1829, "step": 421 }, { "epoch": 0.26, "learning_rate": 0.000249, "loss": 4.1261, "step": 422 }, { "epoch": 0.26, "learning_rate": 0.00024959999999999994, "loss": 4.2554, "step": 423 }, { "epoch": 0.26, "learning_rate": 0.00025019999999999996, "loss": 4.2029, "step": 424 }, { "epoch": 0.26, "learning_rate": 0.00025079999999999997, "loss": 4.0218, "step": 425 }, { "epoch": 0.26, "learning_rate": 0.0002514, "loss": 3.9889, "step": 426 }, { "epoch": 0.26, "learning_rate": 0.00025199999999999995, "loss": 4.0182, "step": 427 }, { "epoch": 0.26, "learning_rate": 0.00025259999999999996, "loss": 3.9755, "step": 428 }, { "epoch": 0.26, "learning_rate": 0.0002532, "loss": 4.0512, "step": 429 }, { "epoch": 0.27, "learning_rate": 0.0002538, "loss": 3.8566, "step": 430 }, { "epoch": 0.27, "learning_rate": 0.00025439999999999995, "loss": 3.9493, "step": 431 }, { "epoch": 0.27, "learning_rate": 0.00025499999999999996, "loss": 3.8412, "step": 432 }, { "epoch": 0.27, "learning_rate": 0.0002556, "loss": 3.8395, "step": 433 }, { "epoch": 0.27, "learning_rate": 0.0002562, "loss": 3.8858, "step": 434 }, { "epoch": 0.27, "learning_rate": 0.00025679999999999995, "loss": 3.7504, "step": 435 }, { "epoch": 0.27, "learning_rate": 0.00025739999999999997, "loss": 3.7391, "step": 436 }, { "epoch": 0.27, "learning_rate": 0.000258, "loss": 3.6711, "step": 437 }, { "epoch": 0.27, "learning_rate": 0.0002586, "loss": 3.742, "step": 438 }, { "epoch": 0.27, "learning_rate": 0.00025919999999999996, "loss": 3.6552, "step": 439 }, { "epoch": 0.27, "learning_rate": 0.00025979999999999997, "loss": 3.4729, "step": 440 }, { "epoch": 0.27, "learning_rate": 0.0002604, "loss": 3.5555, "step": 441 }, { "epoch": 0.27, "learning_rate": 0.000261, "loss": 3.4044, "step": 442 }, { "epoch": 0.27, "learning_rate": 0.00026159999999999996, "loss": 3.3579, "step": 443 }, { "epoch": 0.27, "learning_rate": 0.0002622, "loss": 3.2758, "step": 444 }, { "epoch": 0.27, "learning_rate": 0.0002628, "loss": 3.3752, "step": 445 }, { "epoch": 0.28, "learning_rate": 0.00026339999999999995, "loss": 3.0825, "step": 446 }, { "epoch": 0.28, "learning_rate": 0.00026399999999999997, "loss": 2.949, "step": 447 }, { "epoch": 0.28, "learning_rate": 0.0002646, "loss": 2.8313, "step": 448 }, { "epoch": 0.28, "learning_rate": 0.0002652, "loss": 2.8842, "step": 449 }, { "epoch": 0.28, "learning_rate": 0.00026579999999999996, "loss": 2.6834, "step": 450 }, { "epoch": 0.28, "learning_rate": 0.00026639999999999997, "loss": 5.9447, "step": 451 }, { "epoch": 0.28, "learning_rate": 0.000267, "loss": 5.0768, "step": 452 }, { "epoch": 0.28, "learning_rate": 0.0002676, "loss": 4.7908, "step": 453 }, { "epoch": 0.28, "learning_rate": 0.00026819999999999996, "loss": 4.722, "step": 454 }, { "epoch": 0.28, "learning_rate": 0.0002688, "loss": 4.4559, "step": 455 }, { "epoch": 0.28, "learning_rate": 0.0002694, "loss": 4.4045, "step": 456 }, { "epoch": 0.28, "learning_rate": 0.00027, "loss": 4.4917, "step": 457 }, { "epoch": 0.28, "learning_rate": 0.00027059999999999996, "loss": 4.3895, "step": 458 }, { "epoch": 0.28, "learning_rate": 0.0002712, "loss": 4.4436, "step": 459 }, { "epoch": 0.28, "learning_rate": 0.0002718, "loss": 4.0629, "step": 460 }, { "epoch": 0.28, "learning_rate": 0.0002724, "loss": 4.2174, "step": 461 }, { "epoch": 0.29, "learning_rate": 0.00027299999999999997, "loss": 4.2901, "step": 462 }, { "epoch": 0.29, "learning_rate": 0.0002736, "loss": 4.0733, "step": 463 }, { "epoch": 0.29, "learning_rate": 0.0002742, "loss": 4.2551, "step": 464 }, { "epoch": 0.29, "learning_rate": 0.0002748, "loss": 4.2565, "step": 465 }, { "epoch": 0.29, "learning_rate": 0.00027539999999999997, "loss": 4.4094, "step": 466 }, { "epoch": 0.29, "learning_rate": 0.000276, "loss": 4.1442, "step": 467 }, { "epoch": 0.29, "learning_rate": 0.0002766, "loss": 4.2225, "step": 468 }, { "epoch": 0.29, "learning_rate": 0.0002772, "loss": 4.1311, "step": 469 }, { "epoch": 0.29, "learning_rate": 0.0002778, "loss": 4.2722, "step": 470 }, { "epoch": 0.29, "learning_rate": 0.0002784, "loss": 4.0853, "step": 471 }, { "epoch": 0.29, "learning_rate": 0.000279, "loss": 4.275, "step": 472 }, { "epoch": 0.29, "learning_rate": 0.00027959999999999997, "loss": 4.1574, "step": 473 }, { "epoch": 0.29, "learning_rate": 0.0002802, "loss": 4.1913, "step": 474 }, { "epoch": 0.29, "learning_rate": 0.0002808, "loss": 4.0092, "step": 475 }, { "epoch": 0.29, "learning_rate": 0.00028139999999999996, "loss": 3.9728, "step": 476 }, { "epoch": 0.29, "learning_rate": 0.00028199999999999997, "loss": 4.0581, "step": 477 }, { "epoch": 0.3, "learning_rate": 0.0002826, "loss": 4.1989, "step": 478 }, { "epoch": 0.3, "learning_rate": 0.00028319999999999994, "loss": 4.0023, "step": 479 }, { "epoch": 0.3, "learning_rate": 0.00028379999999999996, "loss": 3.9102, "step": 480 }, { "epoch": 0.3, "learning_rate": 0.0002844, "loss": 3.9074, "step": 481 }, { "epoch": 0.3, "learning_rate": 0.000285, "loss": 4.0167, "step": 482 }, { "epoch": 0.3, "learning_rate": 0.00028559999999999995, "loss": 3.8945, "step": 483 }, { "epoch": 0.3, "learning_rate": 0.00028619999999999996, "loss": 3.8795, "step": 484 }, { "epoch": 0.3, "learning_rate": 0.0002868, "loss": 3.8178, "step": 485 }, { "epoch": 0.3, "learning_rate": 0.00028739999999999994, "loss": 3.6555, "step": 486 }, { "epoch": 0.3, "learning_rate": 0.00028799999999999995, "loss": 3.763, "step": 487 }, { "epoch": 0.3, "learning_rate": 0.00028859999999999997, "loss": 3.5611, "step": 488 }, { "epoch": 0.3, "learning_rate": 0.0002892, "loss": 3.7752, "step": 489 }, { "epoch": 0.3, "learning_rate": 0.00028979999999999994, "loss": 3.5473, "step": 490 }, { "epoch": 0.3, "learning_rate": 0.00029039999999999996, "loss": 3.4998, "step": 491 }, { "epoch": 0.3, "learning_rate": 0.00029099999999999997, "loss": 3.5321, "step": 492 }, { "epoch": 0.3, "learning_rate": 0.0002916, "loss": 3.3559, "step": 493 }, { "epoch": 0.3, "learning_rate": 0.00029219999999999995, "loss": 3.289, "step": 494 }, { "epoch": 0.31, "learning_rate": 0.00029279999999999996, "loss": 3.1925, "step": 495 }, { "epoch": 0.31, "learning_rate": 0.0002934, "loss": 3.0487, "step": 496 }, { "epoch": 0.31, "learning_rate": 0.000294, "loss": 2.9119, "step": 497 }, { "epoch": 0.31, "learning_rate": 0.00029459999999999995, "loss": 3.026, "step": 498 }, { "epoch": 0.31, "learning_rate": 0.00029519999999999997, "loss": 2.753, "step": 499 }, { "epoch": 0.31, "learning_rate": 0.0002958, "loss": 2.5511, "step": 500 }, { "epoch": 0.31, "eval_bleu": 0.0, "eval_loss": 5.10392951965332, "eval_runtime": 2560.6032, "eval_samples_per_second": 5.764, "eval_steps_per_second": 0.721, "step": 500 }, { "epoch": 0.31, "learning_rate": 0.0002964, "loss": 6.4484, "step": 501 }, { "epoch": 0.31, "learning_rate": 0.00029699999999999996, "loss": 5.5086, "step": 502 }, { "epoch": 0.31, "learning_rate": 0.00029759999999999997, "loss": 4.8983, "step": 503 }, { "epoch": 0.31, "learning_rate": 0.0002982, "loss": 4.9282, "step": 504 }, { "epoch": 0.31, "learning_rate": 0.0002988, "loss": 4.6444, "step": 505 }, { "epoch": 0.31, "learning_rate": 0.00029939999999999996, "loss": 4.4085, "step": 506 }, { "epoch": 0.31, "learning_rate": 0.0003, "loss": 4.4217, "step": 507 }, { "epoch": 0.31, "learning_rate": 0.0002999311926605504, "loss": 4.2745, "step": 508 }, { "epoch": 0.31, "learning_rate": 0.0002998623853211009, "loss": 4.4082, "step": 509 }, { "epoch": 0.31, "learning_rate": 0.00029979357798165133, "loss": 4.2605, "step": 510 }, { "epoch": 0.32, "learning_rate": 0.0002997247706422018, "loss": 4.4833, "step": 511 }, { "epoch": 0.32, "learning_rate": 0.00029965596330275225, "loss": 4.0582, "step": 512 }, { "epoch": 0.32, "learning_rate": 0.00029958715596330274, "loss": 4.0002, "step": 513 }, { "epoch": 0.32, "learning_rate": 0.0002995183486238532, "loss": 4.2744, "step": 514 }, { "epoch": 0.32, "learning_rate": 0.00029944954128440366, "loss": 4.1895, "step": 515 }, { "epoch": 0.32, "learning_rate": 0.0002993807339449541, "loss": 4.2262, "step": 516 }, { "epoch": 0.32, "learning_rate": 0.00029931192660550453, "loss": 4.2125, "step": 517 }, { "epoch": 0.32, "learning_rate": 0.000299243119266055, "loss": 4.1036, "step": 518 }, { "epoch": 0.32, "learning_rate": 0.00029917431192660545, "loss": 4.202, "step": 519 }, { "epoch": 0.32, "learning_rate": 0.00029910550458715594, "loss": 4.1888, "step": 520 }, { "epoch": 0.32, "learning_rate": 0.0002990366972477064, "loss": 4.1997, "step": 521 }, { "epoch": 0.32, "learning_rate": 0.00029896788990825687, "loss": 4.1112, "step": 522 }, { "epoch": 0.32, "learning_rate": 0.0002988990825688073, "loss": 4.1197, "step": 523 }, { "epoch": 0.32, "learning_rate": 0.0002988302752293578, "loss": 4.061, "step": 524 }, { "epoch": 0.32, "learning_rate": 0.0002987614678899082, "loss": 3.8829, "step": 525 }, { "epoch": 0.32, "learning_rate": 0.0002986926605504587, "loss": 3.8193, "step": 526 }, { "epoch": 0.33, "learning_rate": 0.00029862385321100914, "loss": 3.9218, "step": 527 }, { "epoch": 0.33, "learning_rate": 0.00029855504587155963, "loss": 4.0122, "step": 528 }, { "epoch": 0.33, "learning_rate": 0.00029848623853211007, "loss": 3.9433, "step": 529 }, { "epoch": 0.33, "learning_rate": 0.00029841743119266056, "loss": 4.0117, "step": 530 }, { "epoch": 0.33, "learning_rate": 0.000298348623853211, "loss": 3.8542, "step": 531 }, { "epoch": 0.33, "learning_rate": 0.0002982798165137614, "loss": 4.008, "step": 532 }, { "epoch": 0.33, "learning_rate": 0.0002982110091743119, "loss": 3.7908, "step": 533 }, { "epoch": 0.33, "learning_rate": 0.00029814220183486235, "loss": 3.7293, "step": 534 }, { "epoch": 0.33, "learning_rate": 0.00029807339449541284, "loss": 3.6423, "step": 535 }, { "epoch": 0.33, "learning_rate": 0.00029800458715596327, "loss": 3.8278, "step": 536 }, { "epoch": 0.33, "learning_rate": 0.00029793577981651376, "loss": 3.634, "step": 537 }, { "epoch": 0.33, "learning_rate": 0.0002978669724770642, "loss": 3.6174, "step": 538 }, { "epoch": 0.33, "learning_rate": 0.0002977981651376146, "loss": 3.5131, "step": 539 }, { "epoch": 0.33, "learning_rate": 0.0002977293577981651, "loss": 3.6799, "step": 540 }, { "epoch": 0.33, "learning_rate": 0.00029766055045871555, "loss": 3.5297, "step": 541 }, { "epoch": 0.33, "learning_rate": 0.00029759174311926604, "loss": 3.3935, "step": 542 }, { "epoch": 0.34, "learning_rate": 0.00029752293577981647, "loss": 3.3705, "step": 543 }, { "epoch": 0.34, "learning_rate": 0.00029745412844036696, "loss": 3.1697, "step": 544 }, { "epoch": 0.34, "learning_rate": 0.0002973853211009174, "loss": 3.2117, "step": 545 }, { "epoch": 0.34, "learning_rate": 0.0002973165137614679, "loss": 3.0101, "step": 546 }, { "epoch": 0.34, "learning_rate": 0.0002972477064220183, "loss": 3.0621, "step": 547 }, { "epoch": 0.34, "learning_rate": 0.00029717889908256875, "loss": 2.76, "step": 548 }, { "epoch": 0.34, "learning_rate": 0.00029711009174311924, "loss": 2.775, "step": 549 }, { "epoch": 0.34, "learning_rate": 0.00029704128440366967, "loss": 2.6342, "step": 550 }, { "epoch": 0.34, "learning_rate": 0.00029697247706422016, "loss": 5.6387, "step": 551 }, { "epoch": 0.34, "learning_rate": 0.0002969036697247706, "loss": 5.1001, "step": 552 }, { "epoch": 0.34, "learning_rate": 0.0002968348623853211, "loss": 4.7732, "step": 553 }, { "epoch": 0.34, "learning_rate": 0.0002967660550458715, "loss": 4.6534, "step": 554 }, { "epoch": 0.34, "learning_rate": 0.000296697247706422, "loss": 4.3346, "step": 555 }, { "epoch": 0.34, "learning_rate": 0.00029662844036697244, "loss": 4.3084, "step": 556 }, { "epoch": 0.34, "learning_rate": 0.00029655963302752293, "loss": 4.3598, "step": 557 }, { "epoch": 0.34, "learning_rate": 0.00029649082568807336, "loss": 4.4294, "step": 558 }, { "epoch": 0.35, "learning_rate": 0.00029642201834862385, "loss": 4.2965, "step": 559 }, { "epoch": 0.35, "learning_rate": 0.0002963532110091743, "loss": 4.2361, "step": 560 }, { "epoch": 0.35, "learning_rate": 0.0002962844036697248, "loss": 4.1292, "step": 561 }, { "epoch": 0.35, "learning_rate": 0.0002962155963302752, "loss": 4.1621, "step": 562 }, { "epoch": 0.35, "learning_rate": 0.0002961467889908257, "loss": 4.1383, "step": 563 }, { "epoch": 0.35, "learning_rate": 0.00029607798165137613, "loss": 4.2404, "step": 564 }, { "epoch": 0.35, "learning_rate": 0.00029600917431192656, "loss": 4.1117, "step": 565 }, { "epoch": 0.35, "learning_rate": 0.00029594036697247705, "loss": 4.0631, "step": 566 }, { "epoch": 0.35, "learning_rate": 0.0002958715596330275, "loss": 4.1108, "step": 567 }, { "epoch": 0.35, "learning_rate": 0.000295802752293578, "loss": 3.9427, "step": 568 }, { "epoch": 0.35, "learning_rate": 0.0002957339449541284, "loss": 4.0572, "step": 569 }, { "epoch": 0.35, "learning_rate": 0.00029566513761467884, "loss": 3.9819, "step": 570 }, { "epoch": 0.35, "learning_rate": 0.00029559633027522933, "loss": 3.9515, "step": 571 }, { "epoch": 0.35, "learning_rate": 0.00029552752293577977, "loss": 3.9857, "step": 572 }, { "epoch": 0.35, "learning_rate": 0.00029545871559633025, "loss": 4.061, "step": 573 }, { "epoch": 0.35, "learning_rate": 0.0002953899082568807, "loss": 4.0238, "step": 574 }, { "epoch": 0.35, "learning_rate": 0.0002953211009174312, "loss": 3.9119, "step": 575 }, { "epoch": 0.36, "learning_rate": 0.0002952522935779816, "loss": 3.743, "step": 576 }, { "epoch": 0.36, "learning_rate": 0.0002951834862385321, "loss": 3.9652, "step": 577 }, { "epoch": 0.36, "learning_rate": 0.00029511467889908253, "loss": 3.8645, "step": 578 }, { "epoch": 0.36, "learning_rate": 0.000295045871559633, "loss": 3.8511, "step": 579 }, { "epoch": 0.36, "learning_rate": 0.00029497706422018346, "loss": 3.8194, "step": 580 }, { "epoch": 0.36, "learning_rate": 0.0002949082568807339, "loss": 3.9449, "step": 581 }, { "epoch": 0.36, "learning_rate": 0.0002948394495412844, "loss": 3.8325, "step": 582 }, { "epoch": 0.36, "learning_rate": 0.0002947706422018348, "loss": 3.6292, "step": 583 }, { "epoch": 0.36, "learning_rate": 0.0002947018348623853, "loss": 3.6024, "step": 584 }, { "epoch": 0.36, "learning_rate": 0.00029463302752293574, "loss": 3.7113, "step": 585 }, { "epoch": 0.36, "learning_rate": 0.0002945642201834862, "loss": 3.6525, "step": 586 }, { "epoch": 0.36, "learning_rate": 0.00029449541284403666, "loss": 3.4445, "step": 587 }, { "epoch": 0.36, "learning_rate": 0.00029442660550458715, "loss": 3.432, "step": 588 }, { "epoch": 0.36, "learning_rate": 0.0002943577981651376, "loss": 3.5035, "step": 589 }, { "epoch": 0.36, "learning_rate": 0.00029428899082568807, "loss": 3.4721, "step": 590 }, { "epoch": 0.36, "learning_rate": 0.0002942201834862385, "loss": 3.4675, "step": 591 }, { "epoch": 0.37, "learning_rate": 0.000294151376146789, "loss": 3.2225, "step": 592 }, { "epoch": 0.37, "learning_rate": 0.0002940825688073394, "loss": 3.1528, "step": 593 }, { "epoch": 0.37, "learning_rate": 0.0002940137614678899, "loss": 3.307, "step": 594 }, { "epoch": 0.37, "learning_rate": 0.00029394495412844035, "loss": 3.2358, "step": 595 }, { "epoch": 0.37, "learning_rate": 0.0002938761467889908, "loss": 3.0446, "step": 596 }, { "epoch": 0.37, "learning_rate": 0.00029380733944954127, "loss": 2.8091, "step": 597 }, { "epoch": 0.37, "learning_rate": 0.0002937385321100917, "loss": 2.8645, "step": 598 }, { "epoch": 0.37, "learning_rate": 0.0002936697247706422, "loss": 2.6645, "step": 599 }, { "epoch": 0.37, "learning_rate": 0.00029360091743119263, "loss": 2.5688, "step": 600 }, { "epoch": 0.37, "learning_rate": 0.00029353211009174306, "loss": 5.9051, "step": 601 }, { "epoch": 0.37, "learning_rate": 0.00029346330275229355, "loss": 5.1959, "step": 602 }, { "epoch": 0.37, "learning_rate": 0.000293394495412844, "loss": 4.8426, "step": 603 }, { "epoch": 0.37, "learning_rate": 0.0002933256880733945, "loss": 4.7532, "step": 604 }, { "epoch": 0.37, "learning_rate": 0.0002932568807339449, "loss": 4.6745, "step": 605 }, { "epoch": 0.37, "learning_rate": 0.0002931880733944954, "loss": 4.4316, "step": 606 }, { "epoch": 0.37, "learning_rate": 0.00029311926605504583, "loss": 4.3115, "step": 607 }, { "epoch": 0.38, "learning_rate": 0.0002930504587155963, "loss": 4.3938, "step": 608 }, { "epoch": 0.38, "learning_rate": 0.00029298165137614675, "loss": 4.3118, "step": 609 }, { "epoch": 0.38, "learning_rate": 0.00029291284403669724, "loss": 4.2531, "step": 610 }, { "epoch": 0.38, "learning_rate": 0.0002928440366972477, "loss": 4.2168, "step": 611 }, { "epoch": 0.38, "learning_rate": 0.0002927752293577981, "loss": 4.1789, "step": 612 }, { "epoch": 0.38, "learning_rate": 0.0002927064220183486, "loss": 4.2307, "step": 613 }, { "epoch": 0.38, "learning_rate": 0.00029263761467889903, "loss": 3.9621, "step": 614 }, { "epoch": 0.38, "learning_rate": 0.0002925688073394495, "loss": 4.07, "step": 615 }, { "epoch": 0.38, "learning_rate": 0.00029249999999999995, "loss": 4.1799, "step": 616 }, { "epoch": 0.38, "learning_rate": 0.00029243119266055044, "loss": 4.1643, "step": 617 }, { "epoch": 0.38, "learning_rate": 0.0002923623853211009, "loss": 4.1368, "step": 618 }, { "epoch": 0.38, "learning_rate": 0.00029229357798165136, "loss": 4.0929, "step": 619 }, { "epoch": 0.38, "learning_rate": 0.0002922247706422018, "loss": 4.104, "step": 620 }, { "epoch": 0.38, "learning_rate": 0.0002921559633027523, "loss": 4.3348, "step": 621 }, { "epoch": 0.38, "learning_rate": 0.0002920871559633027, "loss": 4.1143, "step": 622 }, { "epoch": 0.38, "learning_rate": 0.0002920183486238532, "loss": 4.1904, "step": 623 }, { "epoch": 0.39, "learning_rate": 0.00029194954128440364, "loss": 3.9873, "step": 624 }, { "epoch": 0.39, "learning_rate": 0.00029188073394495413, "loss": 4.0171, "step": 625 }, { "epoch": 0.39, "learning_rate": 0.00029181192660550457, "loss": 4.1695, "step": 626 }, { "epoch": 0.39, "learning_rate": 0.00029174311926605506, "loss": 4.1384, "step": 627 }, { "epoch": 0.39, "learning_rate": 0.0002916743119266055, "loss": 3.822, "step": 628 }, { "epoch": 0.39, "learning_rate": 0.0002916055045871559, "loss": 3.7661, "step": 629 }, { "epoch": 0.39, "learning_rate": 0.0002915366972477064, "loss": 3.8224, "step": 630 }, { "epoch": 0.39, "learning_rate": 0.00029146788990825685, "loss": 3.7437, "step": 631 }, { "epoch": 0.39, "learning_rate": 0.0002913990825688073, "loss": 3.8119, "step": 632 }, { "epoch": 0.39, "learning_rate": 0.00029133027522935777, "loss": 3.668, "step": 633 }, { "epoch": 0.39, "learning_rate": 0.0002912614678899082, "loss": 3.6149, "step": 634 }, { "epoch": 0.39, "learning_rate": 0.0002911926605504587, "loss": 3.6475, "step": 635 }, { "epoch": 0.39, "learning_rate": 0.0002911238532110091, "loss": 3.5243, "step": 636 }, { "epoch": 0.39, "learning_rate": 0.0002910550458715596, "loss": 3.5894, "step": 637 }, { "epoch": 0.39, "learning_rate": 0.00029098623853211005, "loss": 3.5434, "step": 638 }, { "epoch": 0.39, "learning_rate": 0.00029091743119266054, "loss": 3.5424, "step": 639 }, { "epoch": 0.4, "learning_rate": 0.00029084862385321097, "loss": 3.2515, "step": 640 }, { "epoch": 0.4, "learning_rate": 0.00029077981651376146, "loss": 3.3912, "step": 641 }, { "epoch": 0.4, "learning_rate": 0.0002907110091743119, "loss": 3.2805, "step": 642 }, { "epoch": 0.4, "learning_rate": 0.0002906422018348624, "loss": 3.3486, "step": 643 }, { "epoch": 0.4, "learning_rate": 0.0002905733944954128, "loss": 3.1337, "step": 644 }, { "epoch": 0.4, "learning_rate": 0.00029050458715596325, "loss": 2.9084, "step": 645 }, { "epoch": 0.4, "learning_rate": 0.00029043577981651374, "loss": 2.7774, "step": 646 }, { "epoch": 0.4, "learning_rate": 0.00029036697247706417, "loss": 2.8928, "step": 647 }, { "epoch": 0.4, "learning_rate": 0.00029029816513761466, "loss": 2.6588, "step": 648 }, { "epoch": 0.4, "learning_rate": 0.0002902293577981651, "loss": 2.5163, "step": 649 }, { "epoch": 0.4, "learning_rate": 0.0002901605504587156, "loss": 2.4516, "step": 650 }, { "epoch": 0.4, "learning_rate": 0.000290091743119266, "loss": 5.7477, "step": 651 }, { "epoch": 0.4, "learning_rate": 0.0002900229357798165, "loss": 5.2403, "step": 652 }, { "epoch": 0.4, "learning_rate": 0.00028995412844036694, "loss": 4.5533, "step": 653 }, { "epoch": 0.4, "learning_rate": 0.00028988532110091743, "loss": 4.4385, "step": 654 }, { "epoch": 0.4, "learning_rate": 0.00028981651376146786, "loss": 4.5647, "step": 655 }, { "epoch": 0.4, "learning_rate": 0.00028974770642201835, "loss": 4.3326, "step": 656 }, { "epoch": 0.41, "learning_rate": 0.0002896788990825688, "loss": 4.3062, "step": 657 }, { "epoch": 0.41, "learning_rate": 0.0002896100917431193, "loss": 4.3222, "step": 658 }, { "epoch": 0.41, "learning_rate": 0.0002895412844036697, "loss": 4.2311, "step": 659 }, { "epoch": 0.41, "learning_rate": 0.00028947247706422014, "loss": 4.3512, "step": 660 }, { "epoch": 0.41, "learning_rate": 0.00028940366972477063, "loss": 4.2595, "step": 661 }, { "epoch": 0.41, "learning_rate": 0.00028933486238532106, "loss": 4.3594, "step": 662 }, { "epoch": 0.41, "learning_rate": 0.00028926605504587155, "loss": 4.0522, "step": 663 }, { "epoch": 0.41, "learning_rate": 0.000289197247706422, "loss": 4.1923, "step": 664 }, { "epoch": 0.41, "learning_rate": 0.0002891284403669724, "loss": 4.1089, "step": 665 }, { "epoch": 0.41, "learning_rate": 0.0002890596330275229, "loss": 4.0241, "step": 666 }, { "epoch": 0.41, "learning_rate": 0.00028899082568807334, "loss": 3.9912, "step": 667 }, { "epoch": 0.41, "learning_rate": 0.00028892201834862383, "loss": 4.0207, "step": 668 }, { "epoch": 0.41, "learning_rate": 0.00028885321100917427, "loss": 4.0843, "step": 669 }, { "epoch": 0.41, "learning_rate": 0.00028878440366972475, "loss": 4.1729, "step": 670 }, { "epoch": 0.41, "learning_rate": 0.0002887155963302752, "loss": 4.0149, "step": 671 }, { "epoch": 0.41, "learning_rate": 0.0002886467889908257, "loss": 3.9101, "step": 672 }, { "epoch": 0.42, "learning_rate": 0.0002885779816513761, "loss": 3.8613, "step": 673 }, { "epoch": 0.42, "learning_rate": 0.0002885091743119266, "loss": 3.9378, "step": 674 }, { "epoch": 0.42, "learning_rate": 0.00028844036697247703, "loss": 3.7407, "step": 675 }, { "epoch": 0.42, "learning_rate": 0.00028837155963302747, "loss": 3.8231, "step": 676 }, { "epoch": 0.42, "learning_rate": 0.00028830275229357796, "loss": 3.8906, "step": 677 }, { "epoch": 0.42, "learning_rate": 0.0002882339449541284, "loss": 3.7453, "step": 678 }, { "epoch": 0.42, "learning_rate": 0.0002881651376146789, "loss": 3.7523, "step": 679 }, { "epoch": 0.42, "learning_rate": 0.0002880963302752293, "loss": 3.7658, "step": 680 }, { "epoch": 0.42, "learning_rate": 0.0002880275229357798, "loss": 3.6904, "step": 681 }, { "epoch": 0.42, "learning_rate": 0.00028795871559633024, "loss": 3.6741, "step": 682 }, { "epoch": 0.42, "learning_rate": 0.0002878899082568807, "loss": 3.7134, "step": 683 }, { "epoch": 0.42, "learning_rate": 0.00028782110091743116, "loss": 3.6732, "step": 684 }, { "epoch": 0.42, "learning_rate": 0.00028775229357798165, "loss": 3.424, "step": 685 }, { "epoch": 0.42, "learning_rate": 0.0002876834862385321, "loss": 3.4771, "step": 686 }, { "epoch": 0.42, "learning_rate": 0.00028761467889908257, "loss": 3.5035, "step": 687 }, { "epoch": 0.42, "learning_rate": 0.000287545871559633, "loss": 3.3055, "step": 688 }, { "epoch": 0.43, "learning_rate": 0.0002874770642201835, "loss": 3.4968, "step": 689 }, { "epoch": 0.43, "learning_rate": 0.0002874082568807339, "loss": 3.2907, "step": 690 }, { "epoch": 0.43, "learning_rate": 0.0002873394495412844, "loss": 3.0678, "step": 691 }, { "epoch": 0.43, "learning_rate": 0.00028727064220183485, "loss": 3.2938, "step": 692 }, { "epoch": 0.43, "learning_rate": 0.0002872018348623853, "loss": 3.3574, "step": 693 }, { "epoch": 0.43, "learning_rate": 0.00028713302752293577, "loss": 3.0665, "step": 694 }, { "epoch": 0.43, "learning_rate": 0.0002870642201834862, "loss": 3.056, "step": 695 }, { "epoch": 0.43, "learning_rate": 0.00028699541284403664, "loss": 2.8224, "step": 696 }, { "epoch": 0.43, "learning_rate": 0.00028692660550458713, "loss": 2.6976, "step": 697 }, { "epoch": 0.43, "learning_rate": 0.00028685779816513756, "loss": 2.6324, "step": 698 }, { "epoch": 0.43, "learning_rate": 0.00028678899082568805, "loss": 2.5969, "step": 699 }, { "epoch": 0.43, "learning_rate": 0.0002867201834862385, "loss": 2.3751, "step": 700 }, { "epoch": 0.43, "learning_rate": 0.00028665137614678897, "loss": 5.6773, "step": 701 }, { "epoch": 0.43, "learning_rate": 0.0002865825688073394, "loss": 4.9661, "step": 702 }, { "epoch": 0.43, "learning_rate": 0.0002865137614678899, "loss": 4.5686, "step": 703 }, { "epoch": 0.43, "learning_rate": 0.00028644495412844033, "loss": 4.6135, "step": 704 }, { "epoch": 0.44, "learning_rate": 0.0002863761467889908, "loss": 4.2898, "step": 705 }, { "epoch": 0.44, "learning_rate": 0.00028630733944954125, "loss": 4.2391, "step": 706 }, { "epoch": 0.44, "learning_rate": 0.00028623853211009174, "loss": 4.2093, "step": 707 }, { "epoch": 0.44, "learning_rate": 0.0002861697247706422, "loss": 4.1607, "step": 708 }, { "epoch": 0.44, "learning_rate": 0.0002861009174311926, "loss": 3.9372, "step": 709 }, { "epoch": 0.44, "learning_rate": 0.0002860321100917431, "loss": 4.1236, "step": 710 }, { "epoch": 0.44, "learning_rate": 0.00028596330275229353, "loss": 4.0751, "step": 711 }, { "epoch": 0.44, "learning_rate": 0.000285894495412844, "loss": 4.1068, "step": 712 }, { "epoch": 0.44, "learning_rate": 0.00028582568807339445, "loss": 3.8714, "step": 713 }, { "epoch": 0.44, "learning_rate": 0.00028575688073394494, "loss": 3.9801, "step": 714 }, { "epoch": 0.44, "learning_rate": 0.0002856880733944954, "loss": 4.0199, "step": 715 }, { "epoch": 0.44, "learning_rate": 0.00028561926605504586, "loss": 3.8776, "step": 716 }, { "epoch": 0.44, "learning_rate": 0.0002855504587155963, "loss": 3.9058, "step": 717 }, { "epoch": 0.44, "learning_rate": 0.0002854816513761468, "loss": 3.8545, "step": 718 }, { "epoch": 0.44, "learning_rate": 0.0002854128440366972, "loss": 3.9446, "step": 719 }, { "epoch": 0.44, "learning_rate": 0.0002853440366972477, "loss": 4.0342, "step": 720 }, { "epoch": 0.45, "learning_rate": 0.00028527522935779814, "loss": 3.8863, "step": 721 }, { "epoch": 0.45, "learning_rate": 0.00028520642201834863, "loss": 3.8662, "step": 722 }, { "epoch": 0.45, "learning_rate": 0.00028513761467889907, "loss": 3.8483, "step": 723 }, { "epoch": 0.45, "learning_rate": 0.0002850688073394495, "loss": 3.8917, "step": 724 }, { "epoch": 0.45, "learning_rate": 0.000285, "loss": 3.7865, "step": 725 }, { "epoch": 0.45, "learning_rate": 0.0002849311926605504, "loss": 3.8303, "step": 726 }, { "epoch": 0.45, "learning_rate": 0.00028486238532110086, "loss": 3.6543, "step": 727 }, { "epoch": 0.45, "learning_rate": 0.00028479357798165135, "loss": 3.7781, "step": 728 }, { "epoch": 0.45, "learning_rate": 0.0002847247706422018, "loss": 3.536, "step": 729 }, { "epoch": 0.45, "learning_rate": 0.00028465596330275227, "loss": 3.6209, "step": 730 }, { "epoch": 0.45, "learning_rate": 0.0002845871559633027, "loss": 3.6361, "step": 731 }, { "epoch": 0.45, "learning_rate": 0.0002845183486238532, "loss": 3.6567, "step": 732 }, { "epoch": 0.45, "learning_rate": 0.0002844495412844036, "loss": 3.6093, "step": 733 }, { "epoch": 0.45, "learning_rate": 0.0002843807339449541, "loss": 3.6645, "step": 734 }, { "epoch": 0.45, "learning_rate": 0.00028431192660550455, "loss": 3.559, "step": 735 }, { "epoch": 0.45, "learning_rate": 0.00028424311926605504, "loss": 3.4329, "step": 736 }, { "epoch": 0.45, "learning_rate": 0.00028417431192660547, "loss": 3.5064, "step": 737 }, { "epoch": 0.46, "learning_rate": 0.00028410550458715596, "loss": 3.1375, "step": 738 }, { "epoch": 0.46, "learning_rate": 0.0002840366972477064, "loss": 3.3257, "step": 739 }, { "epoch": 0.46, "learning_rate": 0.0002839678899082568, "loss": 3.2393, "step": 740 }, { "epoch": 0.46, "learning_rate": 0.0002838990825688073, "loss": 3.0156, "step": 741 }, { "epoch": 0.46, "learning_rate": 0.00028383027522935775, "loss": 2.9855, "step": 742 }, { "epoch": 0.46, "learning_rate": 0.00028376146788990824, "loss": 2.9816, "step": 743 }, { "epoch": 0.46, "learning_rate": 0.00028369266055045867, "loss": 2.9994, "step": 744 }, { "epoch": 0.46, "learning_rate": 0.00028362385321100916, "loss": 2.8371, "step": 745 }, { "epoch": 0.46, "learning_rate": 0.0002835550458715596, "loss": 2.6648, "step": 746 }, { "epoch": 0.46, "learning_rate": 0.0002834862385321101, "loss": 2.4887, "step": 747 }, { "epoch": 0.46, "learning_rate": 0.0002834174311926605, "loss": 2.4057, "step": 748 }, { "epoch": 0.46, "learning_rate": 0.000283348623853211, "loss": 2.4427, "step": 749 }, { "epoch": 0.46, "learning_rate": 0.00028327981651376144, "loss": 2.3271, "step": 750 }, { "epoch": 0.46, "learning_rate": 0.00028321100917431193, "loss": 5.6453, "step": 751 }, { "epoch": 0.46, "learning_rate": 0.00028314220183486236, "loss": 5.1783, "step": 752 }, { "epoch": 0.46, "learning_rate": 0.00028307339449541285, "loss": 4.7046, "step": 753 }, { "epoch": 0.47, "learning_rate": 0.0002830045871559633, "loss": 4.6663, "step": 754 }, { "epoch": 0.47, "learning_rate": 0.00028293577981651377, "loss": 4.3295, "step": 755 }, { "epoch": 0.47, "learning_rate": 0.0002828669724770642, "loss": 4.2772, "step": 756 }, { "epoch": 0.47, "learning_rate": 0.00028279816513761464, "loss": 4.1738, "step": 757 }, { "epoch": 0.47, "learning_rate": 0.0002827293577981651, "loss": 4.2217, "step": 758 }, { "epoch": 0.47, "learning_rate": 0.00028266055045871556, "loss": 3.9638, "step": 759 }, { "epoch": 0.47, "learning_rate": 0.000282591743119266, "loss": 3.9321, "step": 760 }, { "epoch": 0.47, "learning_rate": 0.0002825229357798165, "loss": 3.9311, "step": 761 }, { "epoch": 0.47, "learning_rate": 0.0002824541284403669, "loss": 4.0939, "step": 762 }, { "epoch": 0.47, "learning_rate": 0.0002823853211009174, "loss": 3.7752, "step": 763 }, { "epoch": 0.47, "learning_rate": 0.00028231651376146784, "loss": 3.9306, "step": 764 }, { "epoch": 0.47, "learning_rate": 0.00028224770642201833, "loss": 4.0087, "step": 765 }, { "epoch": 0.47, "learning_rate": 0.00028217889908256877, "loss": 3.9266, "step": 766 }, { "epoch": 0.47, "learning_rate": 0.00028211009174311925, "loss": 3.8301, "step": 767 }, { "epoch": 0.47, "learning_rate": 0.0002820412844036697, "loss": 3.7035, "step": 768 }, { "epoch": 0.47, "learning_rate": 0.0002819724770642202, "loss": 3.8257, "step": 769 }, { "epoch": 0.48, "learning_rate": 0.0002819036697247706, "loss": 3.786, "step": 770 }, { "epoch": 0.48, "learning_rate": 0.0002818348623853211, "loss": 3.6439, "step": 771 }, { "epoch": 0.48, "learning_rate": 0.00028176605504587153, "loss": 3.5426, "step": 772 }, { "epoch": 0.48, "learning_rate": 0.00028169724770642197, "loss": 3.6834, "step": 773 }, { "epoch": 0.48, "learning_rate": 0.00028162844036697246, "loss": 3.617, "step": 774 }, { "epoch": 0.48, "learning_rate": 0.0002815596330275229, "loss": 3.5549, "step": 775 }, { "epoch": 0.48, "learning_rate": 0.0002814908256880734, "loss": 3.6517, "step": 776 }, { "epoch": 0.48, "learning_rate": 0.0002814220183486238, "loss": 3.6253, "step": 777 }, { "epoch": 0.48, "learning_rate": 0.0002813532110091743, "loss": 3.5861, "step": 778 }, { "epoch": 0.48, "learning_rate": 0.00028128440366972473, "loss": 3.5194, "step": 779 }, { "epoch": 0.48, "learning_rate": 0.0002812155963302752, "loss": 3.5125, "step": 780 }, { "epoch": 0.48, "learning_rate": 0.00028114678899082566, "loss": 3.4713, "step": 781 }, { "epoch": 0.48, "learning_rate": 0.00028107798165137615, "loss": 3.5415, "step": 782 }, { "epoch": 0.48, "learning_rate": 0.0002810091743119266, "loss": 3.2948, "step": 783 }, { "epoch": 0.48, "learning_rate": 0.00028094036697247707, "loss": 3.7558, "step": 784 }, { "epoch": 0.48, "learning_rate": 0.0002808715596330275, "loss": 3.4264, "step": 785 }, { "epoch": 0.49, "learning_rate": 0.000280802752293578, "loss": 3.5141, "step": 786 }, { "epoch": 0.49, "learning_rate": 0.0002807339449541284, "loss": 3.2201, "step": 787 }, { "epoch": 0.49, "learning_rate": 0.00028066513761467886, "loss": 3.3493, "step": 788 }, { "epoch": 0.49, "learning_rate": 0.0002805963302752293, "loss": 3.3462, "step": 789 }, { "epoch": 0.49, "learning_rate": 0.0002805275229357798, "loss": 3.088, "step": 790 }, { "epoch": 0.49, "learning_rate": 0.0002804587155963302, "loss": 3.1689, "step": 791 }, { "epoch": 0.49, "learning_rate": 0.0002803899082568807, "loss": 3.0794, "step": 792 }, { "epoch": 0.49, "learning_rate": 0.00028032110091743114, "loss": 3.018, "step": 793 }, { "epoch": 0.49, "learning_rate": 0.0002802522935779816, "loss": 2.9667, "step": 794 }, { "epoch": 0.49, "learning_rate": 0.00028018348623853206, "loss": 3.0386, "step": 795 }, { "epoch": 0.49, "learning_rate": 0.00028011467889908255, "loss": 2.8074, "step": 796 }, { "epoch": 0.49, "learning_rate": 0.000280045871559633, "loss": 2.5167, "step": 797 }, { "epoch": 0.49, "learning_rate": 0.00027997706422018347, "loss": 2.6814, "step": 798 }, { "epoch": 0.49, "learning_rate": 0.0002799082568807339, "loss": 2.2415, "step": 799 }, { "epoch": 0.49, "learning_rate": 0.0002798394495412844, "loss": 2.3352, "step": 800 }, { "epoch": 0.49, "learning_rate": 0.00027977064220183483, "loss": 5.8354, "step": 801 }, { "epoch": 0.5, "learning_rate": 0.0002797018348623853, "loss": 5.0938, "step": 802 }, { "epoch": 0.5, "learning_rate": 0.00027963302752293575, "loss": 4.5163, "step": 803 }, { "epoch": 0.5, "learning_rate": 0.0002795642201834862, "loss": 4.4829, "step": 804 }, { "epoch": 0.5, "learning_rate": 0.0002794954128440367, "loss": 4.3322, "step": 805 }, { "epoch": 0.5, "learning_rate": 0.0002794266055045871, "loss": 4.1584, "step": 806 }, { "epoch": 0.5, "learning_rate": 0.0002793577981651376, "loss": 3.9791, "step": 807 }, { "epoch": 0.5, "learning_rate": 0.00027928899082568803, "loss": 3.9365, "step": 808 }, { "epoch": 0.5, "learning_rate": 0.0002792201834862385, "loss": 3.9113, "step": 809 }, { "epoch": 0.5, "learning_rate": 0.00027915137614678895, "loss": 3.8803, "step": 810 }, { "epoch": 0.5, "learning_rate": 0.00027908256880733944, "loss": 3.8092, "step": 811 }, { "epoch": 0.5, "learning_rate": 0.0002790137614678899, "loss": 3.8322, "step": 812 }, { "epoch": 0.5, "learning_rate": 0.00027894495412844036, "loss": 3.7589, "step": 813 }, { "epoch": 0.5, "learning_rate": 0.0002788761467889908, "loss": 3.8974, "step": 814 }, { "epoch": 0.5, "learning_rate": 0.0002788073394495413, "loss": 3.6453, "step": 815 }, { "epoch": 0.5, "learning_rate": 0.0002787385321100917, "loss": 3.7593, "step": 816 }, { "epoch": 0.5, "learning_rate": 0.0002786697247706422, "loss": 3.6785, "step": 817 }, { "epoch": 0.5, "learning_rate": 0.00027860091743119264, "loss": 3.6274, "step": 818 }, { "epoch": 0.51, "learning_rate": 0.00027853211009174313, "loss": 3.6767, "step": 819 }, { "epoch": 0.51, "learning_rate": 0.0002784633027522935, "loss": 3.7273, "step": 820 }, { "epoch": 0.51, "learning_rate": 0.000278394495412844, "loss": 3.6245, "step": 821 }, { "epoch": 0.51, "learning_rate": 0.00027832568807339443, "loss": 3.5374, "step": 822 }, { "epoch": 0.51, "learning_rate": 0.0002782568807339449, "loss": 3.7519, "step": 823 }, { "epoch": 0.51, "learning_rate": 0.00027818807339449536, "loss": 3.6401, "step": 824 }, { "epoch": 0.51, "learning_rate": 0.00027811926605504584, "loss": 3.6166, "step": 825 }, { "epoch": 0.51, "learning_rate": 0.0002780504587155963, "loss": 3.5436, "step": 826 }, { "epoch": 0.51, "learning_rate": 0.00027798165137614677, "loss": 3.5718, "step": 827 }, { "epoch": 0.51, "learning_rate": 0.0002779128440366972, "loss": 3.7381, "step": 828 }, { "epoch": 0.51, "learning_rate": 0.0002778440366972477, "loss": 3.4856, "step": 829 }, { "epoch": 0.51, "learning_rate": 0.0002777752293577981, "loss": 3.5257, "step": 830 }, { "epoch": 0.51, "learning_rate": 0.0002777064220183486, "loss": 3.5684, "step": 831 }, { "epoch": 0.51, "learning_rate": 0.00027763761467889905, "loss": 3.5243, "step": 832 }, { "epoch": 0.51, "learning_rate": 0.00027756880733944953, "loss": 3.2728, "step": 833 }, { "epoch": 0.51, "learning_rate": 0.00027749999999999997, "loss": 3.2295, "step": 834 }, { "epoch": 0.52, "learning_rate": 0.00027743119266055046, "loss": 3.2984, "step": 835 }, { "epoch": 0.52, "learning_rate": 0.0002773623853211009, "loss": 3.212, "step": 836 }, { "epoch": 0.52, "learning_rate": 0.0002772935779816513, "loss": 3.0495, "step": 837 }, { "epoch": 0.52, "learning_rate": 0.0002772247706422018, "loss": 3.044, "step": 838 }, { "epoch": 0.52, "learning_rate": 0.00027715596330275225, "loss": 3.1158, "step": 839 }, { "epoch": 0.52, "learning_rate": 0.00027708715596330274, "loss": 3.0624, "step": 840 }, { "epoch": 0.52, "learning_rate": 0.00027701834862385317, "loss": 2.9915, "step": 841 }, { "epoch": 0.52, "learning_rate": 0.00027694954128440366, "loss": 3.0146, "step": 842 }, { "epoch": 0.52, "learning_rate": 0.0002768807339449541, "loss": 2.9135, "step": 843 }, { "epoch": 0.52, "learning_rate": 0.0002768119266055046, "loss": 2.8511, "step": 844 }, { "epoch": 0.52, "learning_rate": 0.000276743119266055, "loss": 2.7068, "step": 845 }, { "epoch": 0.52, "learning_rate": 0.0002766743119266055, "loss": 2.611, "step": 846 }, { "epoch": 0.52, "learning_rate": 0.00027660550458715594, "loss": 2.5175, "step": 847 }, { "epoch": 0.52, "learning_rate": 0.0002765366972477064, "loss": 2.453, "step": 848 }, { "epoch": 0.52, "learning_rate": 0.00027646788990825686, "loss": 2.2522, "step": 849 }, { "epoch": 0.52, "learning_rate": 0.00027639908256880735, "loss": 2.2374, "step": 850 }, { "epoch": 0.53, "learning_rate": 0.0002763302752293578, "loss": 5.4623, "step": 851 }, { "epoch": 0.53, "learning_rate": 0.0002762614678899082, "loss": 5.1737, "step": 852 }, { "epoch": 0.53, "learning_rate": 0.00027619266055045865, "loss": 4.7505, "step": 853 }, { "epoch": 0.53, "learning_rate": 0.00027612385321100914, "loss": 4.5885, "step": 854 }, { "epoch": 0.53, "learning_rate": 0.0002760550458715596, "loss": 4.3554, "step": 855 }, { "epoch": 0.53, "learning_rate": 0.00027598623853211006, "loss": 4.3703, "step": 856 }, { "epoch": 0.53, "learning_rate": 0.0002759174311926605, "loss": 4.1621, "step": 857 }, { "epoch": 0.53, "learning_rate": 0.000275848623853211, "loss": 3.9429, "step": 858 }, { "epoch": 0.53, "learning_rate": 0.0002757798165137614, "loss": 3.7094, "step": 859 }, { "epoch": 0.53, "learning_rate": 0.0002757110091743119, "loss": 3.8591, "step": 860 }, { "epoch": 0.53, "learning_rate": 0.00027564220183486234, "loss": 3.8113, "step": 861 }, { "epoch": 0.53, "learning_rate": 0.00027557339449541283, "loss": 3.6976, "step": 862 }, { "epoch": 0.53, "learning_rate": 0.00027550458715596326, "loss": 3.6116, "step": 863 }, { "epoch": 0.53, "learning_rate": 0.00027543577981651375, "loss": 3.5698, "step": 864 }, { "epoch": 0.53, "learning_rate": 0.0002753669724770642, "loss": 3.5438, "step": 865 }, { "epoch": 0.53, "learning_rate": 0.0002752981651376147, "loss": 3.6478, "step": 866 }, { "epoch": 0.54, "learning_rate": 0.0002752293577981651, "loss": 3.6013, "step": 867 }, { "epoch": 0.54, "learning_rate": 0.00027516055045871554, "loss": 3.6408, "step": 868 }, { "epoch": 0.54, "learning_rate": 0.00027509174311926603, "loss": 3.5644, "step": 869 }, { "epoch": 0.54, "learning_rate": 0.00027502293577981647, "loss": 3.4828, "step": 870 }, { "epoch": 0.54, "learning_rate": 0.00027495412844036695, "loss": 3.5369, "step": 871 }, { "epoch": 0.54, "learning_rate": 0.0002748853211009174, "loss": 3.5172, "step": 872 }, { "epoch": 0.54, "learning_rate": 0.0002748165137614679, "loss": 3.302, "step": 873 }, { "epoch": 0.54, "learning_rate": 0.0002747477064220183, "loss": 3.4129, "step": 874 }, { "epoch": 0.54, "learning_rate": 0.0002746788990825688, "loss": 3.2188, "step": 875 }, { "epoch": 0.54, "learning_rate": 0.00027461009174311923, "loss": 3.37, "step": 876 }, { "epoch": 0.54, "learning_rate": 0.0002745412844036697, "loss": 3.2573, "step": 877 }, { "epoch": 0.54, "learning_rate": 0.00027447247706422016, "loss": 3.3593, "step": 878 }, { "epoch": 0.54, "learning_rate": 0.00027440366972477064, "loss": 3.3993, "step": 879 }, { "epoch": 0.54, "learning_rate": 0.0002743348623853211, "loss": 3.3669, "step": 880 }, { "epoch": 0.54, "learning_rate": 0.00027426605504587157, "loss": 3.3771, "step": 881 }, { "epoch": 0.54, "learning_rate": 0.000274197247706422, "loss": 3.1646, "step": 882 }, { "epoch": 0.55, "learning_rate": 0.0002741284403669725, "loss": 3.2597, "step": 883 }, { "epoch": 0.55, "learning_rate": 0.00027405963302752287, "loss": 3.0931, "step": 884 }, { "epoch": 0.55, "learning_rate": 0.00027399082568807336, "loss": 3.0211, "step": 885 }, { "epoch": 0.55, "learning_rate": 0.0002739220183486238, "loss": 3.0644, "step": 886 }, { "epoch": 0.55, "learning_rate": 0.0002738532110091743, "loss": 2.9487, "step": 887 }, { "epoch": 0.55, "learning_rate": 0.0002737844036697247, "loss": 2.8871, "step": 888 }, { "epoch": 0.55, "learning_rate": 0.0002737155963302752, "loss": 2.9525, "step": 889 }, { "epoch": 0.55, "learning_rate": 0.00027364678899082564, "loss": 2.8661, "step": 890 }, { "epoch": 0.55, "learning_rate": 0.0002735779816513761, "loss": 2.4642, "step": 891 }, { "epoch": 0.55, "learning_rate": 0.00027350917431192656, "loss": 2.7808, "step": 892 }, { "epoch": 0.55, "learning_rate": 0.00027344036697247705, "loss": 2.8034, "step": 893 }, { "epoch": 0.55, "learning_rate": 0.0002733715596330275, "loss": 2.6838, "step": 894 }, { "epoch": 0.55, "learning_rate": 0.00027330275229357797, "loss": 2.4445, "step": 895 }, { "epoch": 0.55, "learning_rate": 0.0002732339449541284, "loss": 2.375, "step": 896 }, { "epoch": 0.55, "learning_rate": 0.0002731651376146789, "loss": 2.4147, "step": 897 }, { "epoch": 0.55, "learning_rate": 0.00027309633027522933, "loss": 2.159, "step": 898 }, { "epoch": 0.55, "learning_rate": 0.0002730275229357798, "loss": 2.087, "step": 899 }, { "epoch": 0.56, "learning_rate": 0.00027295871559633025, "loss": 1.8963, "step": 900 }, { "epoch": 0.56, "learning_rate": 0.0002728899082568807, "loss": 5.6814, "step": 901 }, { "epoch": 0.56, "learning_rate": 0.00027282110091743117, "loss": 4.8834, "step": 902 }, { "epoch": 0.56, "learning_rate": 0.0002727522935779816, "loss": 4.5176, "step": 903 }, { "epoch": 0.56, "learning_rate": 0.0002726834862385321, "loss": 4.1832, "step": 904 }, { "epoch": 0.56, "learning_rate": 0.00027261467889908253, "loss": 4.2073, "step": 905 }, { "epoch": 0.56, "learning_rate": 0.000272545871559633, "loss": 3.726, "step": 906 }, { "epoch": 0.56, "learning_rate": 0.00027247706422018345, "loss": 3.7203, "step": 907 }, { "epoch": 0.56, "learning_rate": 0.00027240825688073394, "loss": 3.5696, "step": 908 }, { "epoch": 0.56, "learning_rate": 0.0002723394495412844, "loss": 3.5516, "step": 909 }, { "epoch": 0.56, "learning_rate": 0.00027227064220183486, "loss": 3.5932, "step": 910 }, { "epoch": 0.56, "learning_rate": 0.0002722018348623853, "loss": 3.621, "step": 911 }, { "epoch": 0.56, "learning_rate": 0.0002721330275229358, "loss": 3.6297, "step": 912 }, { "epoch": 0.56, "learning_rate": 0.0002720642201834862, "loss": 3.4574, "step": 913 }, { "epoch": 0.56, "learning_rate": 0.0002719954128440367, "loss": 3.429, "step": 914 }, { "epoch": 0.56, "learning_rate": 0.00027192660550458714, "loss": 3.3761, "step": 915 }, { "epoch": 0.57, "learning_rate": 0.0002718577981651376, "loss": 3.2979, "step": 916 }, { "epoch": 0.57, "learning_rate": 0.000271788990825688, "loss": 3.4376, "step": 917 }, { "epoch": 0.57, "learning_rate": 0.0002717201834862385, "loss": 3.4223, "step": 918 }, { "epoch": 0.57, "learning_rate": 0.00027165137614678893, "loss": 3.4307, "step": 919 }, { "epoch": 0.57, "learning_rate": 0.0002715825688073394, "loss": 3.2741, "step": 920 }, { "epoch": 0.57, "learning_rate": 0.00027151376146788986, "loss": 3.229, "step": 921 }, { "epoch": 0.57, "learning_rate": 0.00027144495412844034, "loss": 3.387, "step": 922 }, { "epoch": 0.57, "learning_rate": 0.0002713761467889908, "loss": 3.3286, "step": 923 }, { "epoch": 0.57, "learning_rate": 0.00027130733944954127, "loss": 3.2406, "step": 924 }, { "epoch": 0.57, "learning_rate": 0.0002712385321100917, "loss": 3.0068, "step": 925 }, { "epoch": 0.57, "learning_rate": 0.0002711697247706422, "loss": 3.2257, "step": 926 }, { "epoch": 0.57, "learning_rate": 0.0002711009174311926, "loss": 3.4286, "step": 927 }, { "epoch": 0.57, "learning_rate": 0.0002710321100917431, "loss": 3.1836, "step": 928 }, { "epoch": 0.57, "learning_rate": 0.00027096330275229355, "loss": 3.1256, "step": 929 }, { "epoch": 0.57, "learning_rate": 0.00027089449541284403, "loss": 2.9933, "step": 930 }, { "epoch": 0.57, "learning_rate": 0.00027082568807339447, "loss": 3.0308, "step": 931 }, { "epoch": 0.58, "learning_rate": 0.0002707568807339449, "loss": 2.9226, "step": 932 }, { "epoch": 0.58, "learning_rate": 0.0002706880733944954, "loss": 2.9666, "step": 933 }, { "epoch": 0.58, "learning_rate": 0.0002706192660550458, "loss": 2.8008, "step": 934 }, { "epoch": 0.58, "learning_rate": 0.0002705504587155963, "loss": 2.8944, "step": 935 }, { "epoch": 0.58, "learning_rate": 0.00027048165137614675, "loss": 2.9266, "step": 936 }, { "epoch": 0.58, "learning_rate": 0.00027041284403669724, "loss": 2.8044, "step": 937 }, { "epoch": 0.58, "learning_rate": 0.00027034403669724767, "loss": 2.9476, "step": 938 }, { "epoch": 0.58, "learning_rate": 0.00027027522935779816, "loss": 2.7552, "step": 939 }, { "epoch": 0.58, "learning_rate": 0.0002702064220183486, "loss": 2.7592, "step": 940 }, { "epoch": 0.58, "learning_rate": 0.0002701376146788991, "loss": 2.7915, "step": 941 }, { "epoch": 0.58, "learning_rate": 0.0002700688073394495, "loss": 2.8133, "step": 942 }, { "epoch": 0.58, "learning_rate": 0.00027, "loss": 2.5633, "step": 943 }, { "epoch": 0.58, "learning_rate": 0.00026993119266055044, "loss": 2.4147, "step": 944 }, { "epoch": 0.58, "learning_rate": 0.0002698623853211009, "loss": 2.5834, "step": 945 }, { "epoch": 0.58, "learning_rate": 0.00026979357798165136, "loss": 2.3341, "step": 946 }, { "epoch": 0.58, "learning_rate": 0.00026972477064220185, "loss": 2.5304, "step": 947 }, { "epoch": 0.59, "learning_rate": 0.00026965596330275223, "loss": 2.3964, "step": 948 }, { "epoch": 0.59, "learning_rate": 0.0002695871559633027, "loss": 3.3065, "step": 949 }, { "epoch": 0.59, "learning_rate": 0.00026951834862385315, "loss": 2.194, "step": 950 }, { "epoch": 0.59, "learning_rate": 0.00026944954128440364, "loss": 5.7219, "step": 951 }, { "epoch": 0.59, "learning_rate": 0.0002693807339449541, "loss": 5.0634, "step": 952 }, { "epoch": 0.59, "learning_rate": 0.00026931192660550456, "loss": 4.3777, "step": 953 }, { "epoch": 0.59, "learning_rate": 0.000269243119266055, "loss": 4.4132, "step": 954 }, { "epoch": 0.59, "learning_rate": 0.0002691743119266055, "loss": 4.1163, "step": 955 }, { "epoch": 0.59, "learning_rate": 0.0002691055045871559, "loss": 3.8913, "step": 956 }, { "epoch": 0.59, "learning_rate": 0.0002690366972477064, "loss": 3.8223, "step": 957 }, { "epoch": 0.59, "learning_rate": 0.00026896788990825684, "loss": 3.7676, "step": 958 }, { "epoch": 0.59, "learning_rate": 0.00026889908256880733, "loss": 3.7502, "step": 959 }, { "epoch": 0.59, "learning_rate": 0.00026883027522935776, "loss": 3.5551, "step": 960 }, { "epoch": 0.59, "learning_rate": 0.00026876146788990825, "loss": 3.7311, "step": 961 }, { "epoch": 0.59, "learning_rate": 0.0002686926605504587, "loss": 3.7123, "step": 962 }, { "epoch": 0.59, "learning_rate": 0.0002686238532110092, "loss": 3.4324, "step": 963 }, { "epoch": 0.6, "learning_rate": 0.0002685550458715596, "loss": 3.4605, "step": 964 }, { "epoch": 0.6, "learning_rate": 0.00026848623853211004, "loss": 3.3101, "step": 965 }, { "epoch": 0.6, "learning_rate": 0.00026841743119266053, "loss": 3.4289, "step": 966 }, { "epoch": 0.6, "learning_rate": 0.00026834862385321097, "loss": 3.4608, "step": 967 }, { "epoch": 0.6, "learning_rate": 0.00026827981651376145, "loss": 3.2693, "step": 968 }, { "epoch": 0.6, "learning_rate": 0.0002682110091743119, "loss": 3.1958, "step": 969 }, { "epoch": 0.6, "learning_rate": 0.0002681422018348624, "loss": 3.2383, "step": 970 }, { "epoch": 0.6, "learning_rate": 0.0002680733944954128, "loss": 3.149, "step": 971 }, { "epoch": 0.6, "learning_rate": 0.0002680045871559633, "loss": 3.1712, "step": 972 }, { "epoch": 0.6, "learning_rate": 0.00026793577981651373, "loss": 3.2276, "step": 973 }, { "epoch": 0.6, "learning_rate": 0.0002678669724770642, "loss": 3.1085, "step": 974 }, { "epoch": 0.6, "learning_rate": 0.00026779816513761466, "loss": 3.3517, "step": 975 }, { "epoch": 0.6, "learning_rate": 0.00026772935779816514, "loss": 3.0752, "step": 976 }, { "epoch": 0.6, "learning_rate": 0.0002676605504587156, "loss": 3.1543, "step": 977 }, { "epoch": 0.6, "learning_rate": 0.00026759174311926607, "loss": 3.4293, "step": 978 }, { "epoch": 0.6, "learning_rate": 0.0002675229357798165, "loss": 2.9209, "step": 979 }, { "epoch": 0.6, "learning_rate": 0.00026745412844036693, "loss": 3.3701, "step": 980 }, { "epoch": 0.61, "learning_rate": 0.00026738532110091737, "loss": 3.0162, "step": 981 }, { "epoch": 0.61, "learning_rate": 0.00026731651376146786, "loss": 3.1484, "step": 982 }, { "epoch": 0.61, "learning_rate": 0.0002672477064220183, "loss": 2.8463, "step": 983 }, { "epoch": 0.61, "learning_rate": 0.0002671788990825688, "loss": 2.8031, "step": 984 }, { "epoch": 0.61, "learning_rate": 0.0002671100917431192, "loss": 2.8604, "step": 985 }, { "epoch": 0.61, "learning_rate": 0.0002670412844036697, "loss": 2.7669, "step": 986 }, { "epoch": 0.61, "learning_rate": 0.00026697247706422014, "loss": 2.739, "step": 987 }, { "epoch": 0.61, "learning_rate": 0.0002669036697247706, "loss": 2.846, "step": 988 }, { "epoch": 0.61, "learning_rate": 0.00026683486238532106, "loss": 2.7752, "step": 989 }, { "epoch": 0.61, "learning_rate": 0.00026676605504587155, "loss": 2.6357, "step": 990 }, { "epoch": 0.61, "learning_rate": 0.000266697247706422, "loss": 2.5999, "step": 991 }, { "epoch": 0.61, "learning_rate": 0.00026662844036697247, "loss": 2.4139, "step": 992 }, { "epoch": 0.61, "learning_rate": 0.0002665596330275229, "loss": 2.448, "step": 993 }, { "epoch": 0.61, "learning_rate": 0.0002664908256880734, "loss": 2.3161, "step": 994 }, { "epoch": 0.61, "learning_rate": 0.0002664220183486238, "loss": 2.1951, "step": 995 }, { "epoch": 0.61, "learning_rate": 0.00026635321100917426, "loss": 2.3582, "step": 996 }, { "epoch": 0.62, "learning_rate": 0.00026628440366972475, "loss": 2.4091, "step": 997 }, { "epoch": 0.62, "learning_rate": 0.0002662155963302752, "loss": 2.3661, "step": 998 }, { "epoch": 0.62, "learning_rate": 0.00026614678899082567, "loss": 2.1907, "step": 999 }, { "epoch": 0.62, "learning_rate": 0.0002660779816513761, "loss": 2.2033, "step": 1000 }, { "epoch": 0.62, "eval_bleu": 1.471304930449711e-13, "eval_loss": 4.178153991699219, "eval_runtime": 2571.1504, "eval_samples_per_second": 5.741, "eval_steps_per_second": 0.718, "step": 1000 }, { "epoch": 0.62, "learning_rate": 0.0002660091743119266, "loss": 5.5762, "step": 1001 }, { "epoch": 0.62, "learning_rate": 0.00026594036697247703, "loss": 4.7959, "step": 1002 }, { "epoch": 0.62, "learning_rate": 0.0002658715596330275, "loss": 4.3913, "step": 1003 }, { "epoch": 0.62, "learning_rate": 0.00026580275229357795, "loss": 3.9288, "step": 1004 }, { "epoch": 0.62, "learning_rate": 0.00026573394495412844, "loss": 3.9872, "step": 1005 }, { "epoch": 0.62, "learning_rate": 0.0002656651376146789, "loss": 3.6041, "step": 1006 }, { "epoch": 0.62, "learning_rate": 0.00026559633027522936, "loss": 3.6125, "step": 1007 }, { "epoch": 0.62, "learning_rate": 0.0002655275229357798, "loss": 3.7386, "step": 1008 }, { "epoch": 0.62, "learning_rate": 0.0002654587155963303, "loss": 3.4695, "step": 1009 }, { "epoch": 0.62, "learning_rate": 0.0002653899082568807, "loss": 3.5328, "step": 1010 }, { "epoch": 0.62, "learning_rate": 0.0002653211009174312, "loss": 3.3167, "step": 1011 }, { "epoch": 0.62, "learning_rate": 0.0002652522935779816, "loss": 3.3963, "step": 1012 }, { "epoch": 0.63, "learning_rate": 0.0002651834862385321, "loss": 3.205, "step": 1013 }, { "epoch": 0.63, "learning_rate": 0.0002651146788990825, "loss": 3.3196, "step": 1014 }, { "epoch": 0.63, "learning_rate": 0.000265045871559633, "loss": 3.4569, "step": 1015 }, { "epoch": 0.63, "learning_rate": 0.00026497706422018343, "loss": 3.2782, "step": 1016 }, { "epoch": 0.63, "learning_rate": 0.0002649082568807339, "loss": 3.3713, "step": 1017 }, { "epoch": 0.63, "learning_rate": 0.00026483944954128435, "loss": 3.3488, "step": 1018 }, { "epoch": 0.63, "learning_rate": 0.00026477064220183484, "loss": 3.0157, "step": 1019 }, { "epoch": 0.63, "learning_rate": 0.0002647018348623853, "loss": 3.1181, "step": 1020 }, { "epoch": 0.63, "learning_rate": 0.00026463302752293577, "loss": 3.3601, "step": 1021 }, { "epoch": 0.63, "learning_rate": 0.0002645642201834862, "loss": 3.1228, "step": 1022 }, { "epoch": 0.63, "learning_rate": 0.0002644954128440367, "loss": 3.1386, "step": 1023 }, { "epoch": 0.63, "learning_rate": 0.0002644266055045871, "loss": 3.0566, "step": 1024 }, { "epoch": 0.63, "learning_rate": 0.0002643577981651376, "loss": 3.0509, "step": 1025 }, { "epoch": 0.63, "learning_rate": 0.00026428899082568804, "loss": 2.9676, "step": 1026 }, { "epoch": 0.63, "learning_rate": 0.00026422018348623853, "loss": 2.8082, "step": 1027 }, { "epoch": 0.63, "learning_rate": 0.00026415137614678897, "loss": 2.8932, "step": 1028 }, { "epoch": 0.64, "learning_rate": 0.0002640825688073394, "loss": 3.0243, "step": 1029 }, { "epoch": 0.64, "learning_rate": 0.0002640137614678899, "loss": 2.8623, "step": 1030 }, { "epoch": 0.64, "learning_rate": 0.0002639449541284403, "loss": 2.9837, "step": 1031 }, { "epoch": 0.64, "learning_rate": 0.0002638761467889908, "loss": 2.9066, "step": 1032 }, { "epoch": 0.64, "learning_rate": 0.00026380733944954125, "loss": 2.7663, "step": 1033 }, { "epoch": 0.64, "learning_rate": 0.00026373853211009174, "loss": 2.696, "step": 1034 }, { "epoch": 0.64, "learning_rate": 0.00026366972477064217, "loss": 2.8101, "step": 1035 }, { "epoch": 0.64, "learning_rate": 0.00026360091743119266, "loss": 2.6771, "step": 1036 }, { "epoch": 0.64, "learning_rate": 0.0002635321100917431, "loss": 2.707, "step": 1037 }, { "epoch": 0.64, "learning_rate": 0.0002634633027522936, "loss": 2.5267, "step": 1038 }, { "epoch": 0.64, "learning_rate": 0.000263394495412844, "loss": 2.6367, "step": 1039 }, { "epoch": 0.64, "learning_rate": 0.0002633256880733945, "loss": 2.6826, "step": 1040 }, { "epoch": 0.64, "learning_rate": 0.00026325688073394494, "loss": 2.4771, "step": 1041 }, { "epoch": 0.64, "learning_rate": 0.0002631880733944954, "loss": 2.4592, "step": 1042 }, { "epoch": 0.64, "learning_rate": 0.00026311926605504586, "loss": 2.4441, "step": 1043 }, { "epoch": 0.64, "learning_rate": 0.0002630504587155963, "loss": 2.2774, "step": 1044 }, { "epoch": 0.65, "learning_rate": 0.00026298165137614673, "loss": 2.0834, "step": 1045 }, { "epoch": 0.65, "learning_rate": 0.0002629128440366972, "loss": 1.9322, "step": 1046 }, { "epoch": 0.65, "learning_rate": 0.00026284403669724765, "loss": 2.0929, "step": 1047 }, { "epoch": 0.65, "learning_rate": 0.00026277522935779814, "loss": 2.1183, "step": 1048 }, { "epoch": 0.65, "learning_rate": 0.00026270642201834857, "loss": 1.8789, "step": 1049 }, { "epoch": 0.65, "learning_rate": 0.00026263761467889906, "loss": 1.7628, "step": 1050 }, { "epoch": 0.65, "learning_rate": 0.0002625688073394495, "loss": 4.9101, "step": 1051 }, { "epoch": 0.65, "learning_rate": 0.0002625, "loss": 4.2879, "step": 1052 }, { "epoch": 0.65, "learning_rate": 0.0002624311926605504, "loss": 3.8733, "step": 1053 }, { "epoch": 0.65, "learning_rate": 0.0002623623853211009, "loss": 3.6801, "step": 1054 }, { "epoch": 0.65, "learning_rate": 0.00026229357798165134, "loss": 3.679, "step": 1055 }, { "epoch": 0.65, "learning_rate": 0.00026222477064220183, "loss": 3.5236, "step": 1056 }, { "epoch": 0.65, "learning_rate": 0.00026215596330275226, "loss": 3.5384, "step": 1057 }, { "epoch": 0.65, "learning_rate": 0.00026208715596330275, "loss": 3.3687, "step": 1058 }, { "epoch": 0.65, "learning_rate": 0.0002620183486238532, "loss": 3.4308, "step": 1059 }, { "epoch": 0.65, "learning_rate": 0.0002619495412844037, "loss": 3.2832, "step": 1060 }, { "epoch": 0.65, "learning_rate": 0.0002618807339449541, "loss": 3.0896, "step": 1061 }, { "epoch": 0.66, "learning_rate": 0.00026181192660550454, "loss": 3.347, "step": 1062 }, { "epoch": 0.66, "learning_rate": 0.00026174311926605503, "loss": 3.1642, "step": 1063 }, { "epoch": 0.66, "learning_rate": 0.00026167431192660546, "loss": 3.1044, "step": 1064 }, { "epoch": 0.66, "learning_rate": 0.00026160550458715595, "loss": 3.2826, "step": 1065 }, { "epoch": 0.66, "learning_rate": 0.0002615366972477064, "loss": 3.1107, "step": 1066 }, { "epoch": 0.66, "learning_rate": 0.0002614678899082569, "loss": 3.148, "step": 1067 }, { "epoch": 0.66, "learning_rate": 0.0002613990825688073, "loss": 3.0607, "step": 1068 }, { "epoch": 0.66, "learning_rate": 0.0002613302752293578, "loss": 3.0048, "step": 1069 }, { "epoch": 0.66, "learning_rate": 0.00026126146788990823, "loss": 2.9932, "step": 1070 }, { "epoch": 0.66, "learning_rate": 0.0002611926605504587, "loss": 3.0713, "step": 1071 }, { "epoch": 0.66, "learning_rate": 0.00026112385321100915, "loss": 3.0098, "step": 1072 }, { "epoch": 0.66, "learning_rate": 0.00026105504587155964, "loss": 2.9331, "step": 1073 }, { "epoch": 0.66, "learning_rate": 0.0002609862385321101, "loss": 3.0307, "step": 1074 }, { "epoch": 0.66, "learning_rate": 0.00026091743119266057, "loss": 2.8047, "step": 1075 }, { "epoch": 0.66, "learning_rate": 0.000260848623853211, "loss": 2.8941, "step": 1076 }, { "epoch": 0.66, "learning_rate": 0.00026077981651376143, "loss": 2.8448, "step": 1077 }, { "epoch": 0.67, "learning_rate": 0.00026071100917431187, "loss": 2.7392, "step": 1078 }, { "epoch": 0.67, "learning_rate": 0.00026064220183486236, "loss": 2.8165, "step": 1079 }, { "epoch": 0.67, "learning_rate": 0.0002605733944954128, "loss": 2.721, "step": 1080 }, { "epoch": 0.67, "learning_rate": 0.0002605045871559633, "loss": 2.7159, "step": 1081 }, { "epoch": 0.67, "learning_rate": 0.0002604357798165137, "loss": 2.6376, "step": 1082 }, { "epoch": 0.67, "learning_rate": 0.0002603669724770642, "loss": 2.6103, "step": 1083 }, { "epoch": 0.67, "learning_rate": 0.00026029816513761464, "loss": 2.5928, "step": 1084 }, { "epoch": 0.67, "learning_rate": 0.0002602293577981651, "loss": 2.7037, "step": 1085 }, { "epoch": 0.67, "learning_rate": 0.00026016055045871556, "loss": 2.6811, "step": 1086 }, { "epoch": 0.67, "learning_rate": 0.00026009174311926605, "loss": 2.714, "step": 1087 }, { "epoch": 0.67, "learning_rate": 0.0002600229357798165, "loss": 2.6176, "step": 1088 }, { "epoch": 0.67, "learning_rate": 0.00025995412844036697, "loss": 2.5062, "step": 1089 }, { "epoch": 0.67, "learning_rate": 0.0002598853211009174, "loss": 2.5099, "step": 1090 }, { "epoch": 0.67, "learning_rate": 0.0002598165137614679, "loss": 2.4763, "step": 1091 }, { "epoch": 0.67, "learning_rate": 0.0002597477064220183, "loss": 2.3145, "step": 1092 }, { "epoch": 0.67, "learning_rate": 0.00025967889908256876, "loss": 2.194, "step": 1093 }, { "epoch": 0.68, "learning_rate": 0.00025961009174311925, "loss": 2.333, "step": 1094 }, { "epoch": 0.68, "learning_rate": 0.0002595412844036697, "loss": 2.0515, "step": 1095 }, { "epoch": 0.68, "learning_rate": 0.00025947247706422017, "loss": 1.9804, "step": 1096 }, { "epoch": 0.68, "learning_rate": 0.0002594036697247706, "loss": 1.9431, "step": 1097 }, { "epoch": 0.68, "learning_rate": 0.0002593348623853211, "loss": 2.0674, "step": 1098 }, { "epoch": 0.68, "learning_rate": 0.00025926605504587153, "loss": 1.8007, "step": 1099 }, { "epoch": 0.68, "learning_rate": 0.000259197247706422, "loss": 1.7359, "step": 1100 }, { "epoch": 0.68, "learning_rate": 0.00025912844036697245, "loss": 4.9063, "step": 1101 }, { "epoch": 0.68, "learning_rate": 0.00025905963302752294, "loss": 4.5578, "step": 1102 }, { "epoch": 0.68, "learning_rate": 0.0002589908256880734, "loss": 3.9415, "step": 1103 }, { "epoch": 0.68, "learning_rate": 0.00025892201834862386, "loss": 3.9376, "step": 1104 }, { "epoch": 0.68, "learning_rate": 0.0002588532110091743, "loss": 3.6935, "step": 1105 }, { "epoch": 0.68, "learning_rate": 0.0002587844036697248, "loss": 3.2228, "step": 1106 }, { "epoch": 0.68, "learning_rate": 0.0002587155963302752, "loss": 3.1785, "step": 1107 }, { "epoch": 0.68, "learning_rate": 0.0002586467889908257, "loss": 3.4129, "step": 1108 }, { "epoch": 0.68, "learning_rate": 0.0002585779816513761, "loss": 3.1752, "step": 1109 }, { "epoch": 0.69, "learning_rate": 0.0002585091743119266, "loss": 3.1613, "step": 1110 }, { "epoch": 0.69, "learning_rate": 0.000258440366972477, "loss": 3.1191, "step": 1111 }, { "epoch": 0.69, "learning_rate": 0.0002583715596330275, "loss": 3.0338, "step": 1112 }, { "epoch": 0.69, "learning_rate": 0.00025830275229357793, "loss": 3.106, "step": 1113 }, { "epoch": 0.69, "learning_rate": 0.0002582339449541284, "loss": 3.1221, "step": 1114 }, { "epoch": 0.69, "learning_rate": 0.00025816513761467885, "loss": 2.9862, "step": 1115 }, { "epoch": 0.69, "learning_rate": 0.00025809633027522934, "loss": 2.9024, "step": 1116 }, { "epoch": 0.69, "learning_rate": 0.0002580275229357798, "loss": 3.0392, "step": 1117 }, { "epoch": 0.69, "learning_rate": 0.00025795871559633027, "loss": 2.8382, "step": 1118 }, { "epoch": 0.69, "learning_rate": 0.0002578899082568807, "loss": 2.7565, "step": 1119 }, { "epoch": 0.69, "learning_rate": 0.0002578211009174312, "loss": 2.9379, "step": 1120 }, { "epoch": 0.69, "learning_rate": 0.0002577522935779816, "loss": 2.8578, "step": 1121 }, { "epoch": 0.69, "learning_rate": 0.0002576834862385321, "loss": 2.8066, "step": 1122 }, { "epoch": 0.69, "learning_rate": 0.00025761467889908254, "loss": 2.8392, "step": 1123 }, { "epoch": 0.69, "learning_rate": 0.00025754587155963303, "loss": 2.6414, "step": 1124 }, { "epoch": 0.69, "learning_rate": 0.00025747706422018347, "loss": 2.8674, "step": 1125 }, { "epoch": 0.7, "learning_rate": 0.0002574082568807339, "loss": 2.9095, "step": 1126 }, { "epoch": 0.7, "learning_rate": 0.0002573394495412844, "loss": 2.7973, "step": 1127 }, { "epoch": 0.7, "learning_rate": 0.0002572706422018348, "loss": 2.7233, "step": 1128 }, { "epoch": 0.7, "learning_rate": 0.0002572018348623853, "loss": 2.8347, "step": 1129 }, { "epoch": 0.7, "learning_rate": 0.00025713302752293575, "loss": 2.692, "step": 1130 }, { "epoch": 0.7, "learning_rate": 0.00025706422018348623, "loss": 2.6711, "step": 1131 }, { "epoch": 0.7, "learning_rate": 0.00025699541284403667, "loss": 2.7745, "step": 1132 }, { "epoch": 0.7, "learning_rate": 0.00025692660550458716, "loss": 2.5442, "step": 1133 }, { "epoch": 0.7, "learning_rate": 0.0002568577981651376, "loss": 2.5528, "step": 1134 }, { "epoch": 0.7, "learning_rate": 0.0002567889908256881, "loss": 2.6461, "step": 1135 }, { "epoch": 0.7, "learning_rate": 0.0002567201834862385, "loss": 2.5496, "step": 1136 }, { "epoch": 0.7, "learning_rate": 0.000256651376146789, "loss": 2.5886, "step": 1137 }, { "epoch": 0.7, "learning_rate": 0.00025658256880733944, "loss": 2.2997, "step": 1138 }, { "epoch": 0.7, "learning_rate": 0.0002565137614678899, "loss": 2.2357, "step": 1139 }, { "epoch": 0.7, "learning_rate": 0.00025644495412844036, "loss": 2.333, "step": 1140 }, { "epoch": 0.7, "learning_rate": 0.0002563761467889908, "loss": 2.3503, "step": 1141 }, { "epoch": 0.7, "learning_rate": 0.0002563073394495412, "loss": 2.2457, "step": 1142 }, { "epoch": 0.71, "learning_rate": 0.0002562385321100917, "loss": 2.2465, "step": 1143 }, { "epoch": 0.71, "learning_rate": 0.00025616972477064215, "loss": 2.3449, "step": 1144 }, { "epoch": 0.71, "learning_rate": 0.00025610091743119264, "loss": 1.9791, "step": 1145 }, { "epoch": 0.71, "learning_rate": 0.00025603211009174307, "loss": 2.0701, "step": 1146 }, { "epoch": 0.71, "learning_rate": 0.00025596330275229356, "loss": 2.0319, "step": 1147 }, { "epoch": 0.71, "learning_rate": 0.000255894495412844, "loss": 1.8884, "step": 1148 }, { "epoch": 0.71, "learning_rate": 0.0002558256880733945, "loss": 1.7236, "step": 1149 }, { "epoch": 0.71, "learning_rate": 0.0002557568807339449, "loss": 1.7023, "step": 1150 }, { "epoch": 0.71, "learning_rate": 0.0002556880733944954, "loss": 5.0322, "step": 1151 }, { "epoch": 0.71, "learning_rate": 0.00025561926605504584, "loss": 4.4311, "step": 1152 }, { "epoch": 0.71, "learning_rate": 0.00025555045871559633, "loss": 3.7635, "step": 1153 }, { "epoch": 0.71, "learning_rate": 0.00025548165137614676, "loss": 3.4902, "step": 1154 }, { "epoch": 0.71, "learning_rate": 0.00025541284403669725, "loss": 3.5092, "step": 1155 }, { "epoch": 0.71, "learning_rate": 0.0002553440366972477, "loss": 3.1856, "step": 1156 }, { "epoch": 0.71, "learning_rate": 0.0002552752293577981, "loss": 3.2636, "step": 1157 }, { "epoch": 0.71, "learning_rate": 0.0002552064220183486, "loss": 3.2892, "step": 1158 }, { "epoch": 0.72, "learning_rate": 0.00025513761467889904, "loss": 3.0195, "step": 1159 }, { "epoch": 0.72, "learning_rate": 0.00025506880733944953, "loss": 3.2674, "step": 1160 }, { "epoch": 0.72, "learning_rate": 0.00025499999999999996, "loss": 2.9148, "step": 1161 }, { "epoch": 0.72, "learning_rate": 0.00025493119266055045, "loss": 2.8722, "step": 1162 }, { "epoch": 0.72, "learning_rate": 0.0002548623853211009, "loss": 2.8195, "step": 1163 }, { "epoch": 0.72, "learning_rate": 0.0002547935779816514, "loss": 2.9372, "step": 1164 }, { "epoch": 0.72, "learning_rate": 0.0002547247706422018, "loss": 2.9755, "step": 1165 }, { "epoch": 0.72, "learning_rate": 0.0002546559633027523, "loss": 2.9018, "step": 1166 }, { "epoch": 0.72, "learning_rate": 0.00025458715596330273, "loss": 2.8212, "step": 1167 }, { "epoch": 0.72, "learning_rate": 0.0002545183486238532, "loss": 2.9238, "step": 1168 }, { "epoch": 0.72, "learning_rate": 0.00025444954128440365, "loss": 2.7164, "step": 1169 }, { "epoch": 0.72, "learning_rate": 0.00025438073394495414, "loss": 2.8057, "step": 1170 }, { "epoch": 0.72, "learning_rate": 0.0002543119266055046, "loss": 2.7161, "step": 1171 }, { "epoch": 0.72, "learning_rate": 0.00025424311926605507, "loss": 2.9002, "step": 1172 }, { "epoch": 0.72, "learning_rate": 0.00025417431192660545, "loss": 2.6253, "step": 1173 }, { "epoch": 0.72, "learning_rate": 0.00025410550458715593, "loss": 2.6776, "step": 1174 }, { "epoch": 0.73, "learning_rate": 0.00025403669724770637, "loss": 2.6572, "step": 1175 }, { "epoch": 0.73, "learning_rate": 0.00025396788990825686, "loss": 2.7412, "step": 1176 }, { "epoch": 0.73, "learning_rate": 0.0002538990825688073, "loss": 2.5223, "step": 1177 }, { "epoch": 0.73, "learning_rate": 0.0002538302752293578, "loss": 2.6715, "step": 1178 }, { "epoch": 0.73, "learning_rate": 0.0002537614678899082, "loss": 2.549, "step": 1179 }, { "epoch": 0.73, "learning_rate": 0.0002536926605504587, "loss": 2.656, "step": 1180 }, { "epoch": 0.73, "learning_rate": 0.00025362385321100914, "loss": 2.4706, "step": 1181 }, { "epoch": 0.73, "learning_rate": 0.0002535550458715596, "loss": 2.5941, "step": 1182 }, { "epoch": 0.73, "learning_rate": 0.00025348623853211006, "loss": 2.5448, "step": 1183 }, { "epoch": 0.73, "learning_rate": 0.00025341743119266055, "loss": 2.5535, "step": 1184 }, { "epoch": 0.73, "learning_rate": 0.000253348623853211, "loss": 2.4567, "step": 1185 }, { "epoch": 0.73, "learning_rate": 0.00025327981651376147, "loss": 2.43, "step": 1186 }, { "epoch": 0.73, "learning_rate": 0.0002532110091743119, "loss": 2.4834, "step": 1187 }, { "epoch": 0.73, "learning_rate": 0.0002531422018348624, "loss": 2.5156, "step": 1188 }, { "epoch": 0.73, "learning_rate": 0.0002530733944954128, "loss": 2.3824, "step": 1189 }, { "epoch": 0.73, "learning_rate": 0.00025300458715596326, "loss": 2.3285, "step": 1190 }, { "epoch": 0.74, "learning_rate": 0.00025293577981651375, "loss": 2.2779, "step": 1191 }, { "epoch": 0.74, "learning_rate": 0.0002528669724770642, "loss": 2.1884, "step": 1192 }, { "epoch": 0.74, "learning_rate": 0.00025279816513761467, "loss": 2.3475, "step": 1193 }, { "epoch": 0.74, "learning_rate": 0.0002527293577981651, "loss": 2.2281, "step": 1194 }, { "epoch": 0.74, "learning_rate": 0.0002526605504587156, "loss": 1.9123, "step": 1195 }, { "epoch": 0.74, "learning_rate": 0.00025259174311926603, "loss": 1.9846, "step": 1196 }, { "epoch": 0.74, "learning_rate": 0.0002525229357798165, "loss": 1.8164, "step": 1197 }, { "epoch": 0.74, "learning_rate": 0.00025245412844036695, "loss": 1.9029, "step": 1198 }, { "epoch": 0.74, "learning_rate": 0.00025238532110091744, "loss": 1.8056, "step": 1199 }, { "epoch": 0.74, "learning_rate": 0.00025231651376146787, "loss": 1.703, "step": 1200 }, { "epoch": 0.74, "learning_rate": 0.00025224770642201836, "loss": 4.4119, "step": 1201 }, { "epoch": 0.74, "learning_rate": 0.0002521788990825688, "loss": 3.8685, "step": 1202 }, { "epoch": 0.74, "learning_rate": 0.0002521100917431193, "loss": 3.3704, "step": 1203 }, { "epoch": 0.74, "learning_rate": 0.0002520412844036697, "loss": 3.2853, "step": 1204 }, { "epoch": 0.74, "learning_rate": 0.00025197247706422015, "loss": 3.2298, "step": 1205 }, { "epoch": 0.74, "learning_rate": 0.0002519036697247706, "loss": 3.1027, "step": 1206 }, { "epoch": 0.75, "learning_rate": 0.0002518348623853211, "loss": 2.976, "step": 1207 }, { "epoch": 0.75, "learning_rate": 0.0002517660550458715, "loss": 2.9351, "step": 1208 }, { "epoch": 0.75, "learning_rate": 0.000251697247706422, "loss": 2.8098, "step": 1209 }, { "epoch": 0.75, "learning_rate": 0.00025162844036697243, "loss": 2.8428, "step": 1210 }, { "epoch": 0.75, "learning_rate": 0.0002515596330275229, "loss": 3.0106, "step": 1211 }, { "epoch": 0.75, "learning_rate": 0.00025149082568807335, "loss": 2.9271, "step": 1212 }, { "epoch": 0.75, "learning_rate": 0.00025142201834862384, "loss": 2.7298, "step": 1213 }, { "epoch": 0.75, "learning_rate": 0.0002513532110091743, "loss": 2.9279, "step": 1214 }, { "epoch": 0.75, "learning_rate": 0.00025128440366972476, "loss": 2.7588, "step": 1215 }, { "epoch": 0.75, "learning_rate": 0.0002512155963302752, "loss": 2.8592, "step": 1216 }, { "epoch": 0.75, "learning_rate": 0.0002511467889908257, "loss": 2.8537, "step": 1217 }, { "epoch": 0.75, "learning_rate": 0.0002510779816513761, "loss": 2.7539, "step": 1218 }, { "epoch": 0.75, "learning_rate": 0.0002510091743119266, "loss": 2.6405, "step": 1219 }, { "epoch": 0.75, "learning_rate": 0.00025094036697247704, "loss": 2.5818, "step": 1220 }, { "epoch": 0.75, "learning_rate": 0.0002508715596330275, "loss": 2.6985, "step": 1221 }, { "epoch": 0.75, "learning_rate": 0.00025080275229357797, "loss": 2.5826, "step": 1222 }, { "epoch": 0.75, "learning_rate": 0.0002507339449541284, "loss": 2.7408, "step": 1223 }, { "epoch": 0.76, "learning_rate": 0.0002506651376146789, "loss": 2.7314, "step": 1224 }, { "epoch": 0.76, "learning_rate": 0.0002505963302752293, "loss": 2.507, "step": 1225 }, { "epoch": 0.76, "learning_rate": 0.0002505275229357798, "loss": 2.4916, "step": 1226 }, { "epoch": 0.76, "learning_rate": 0.00025045871559633025, "loss": 2.6332, "step": 1227 }, { "epoch": 0.76, "learning_rate": 0.00025038990825688073, "loss": 2.6837, "step": 1228 }, { "epoch": 0.76, "learning_rate": 0.00025032110091743117, "loss": 2.5342, "step": 1229 }, { "epoch": 0.76, "learning_rate": 0.00025025229357798166, "loss": 2.5196, "step": 1230 }, { "epoch": 0.76, "learning_rate": 0.0002501834862385321, "loss": 2.5154, "step": 1231 }, { "epoch": 0.76, "learning_rate": 0.0002501146788990826, "loss": 2.4692, "step": 1232 }, { "epoch": 0.76, "learning_rate": 0.000250045871559633, "loss": 2.4471, "step": 1233 }, { "epoch": 0.76, "learning_rate": 0.0002499770642201835, "loss": 2.3725, "step": 1234 }, { "epoch": 0.76, "learning_rate": 0.00024990825688073394, "loss": 2.4658, "step": 1235 }, { "epoch": 0.76, "learning_rate": 0.0002498394495412844, "loss": 2.4992, "step": 1236 }, { "epoch": 0.76, "learning_rate": 0.0002497706422018348, "loss": 2.3282, "step": 1237 }, { "epoch": 0.76, "learning_rate": 0.0002497018348623853, "loss": 2.1878, "step": 1238 }, { "epoch": 0.76, "learning_rate": 0.0002496330275229357, "loss": 2.2067, "step": 1239 }, { "epoch": 0.77, "learning_rate": 0.0002495642201834862, "loss": 2.2267, "step": 1240 }, { "epoch": 0.77, "learning_rate": 0.00024949541284403665, "loss": 2.2462, "step": 1241 }, { "epoch": 0.77, "learning_rate": 0.00024942660550458714, "loss": 2.1119, "step": 1242 }, { "epoch": 0.77, "learning_rate": 0.00024935779816513757, "loss": 2.1393, "step": 1243 }, { "epoch": 0.77, "learning_rate": 0.00024928899082568806, "loss": 2.0206, "step": 1244 }, { "epoch": 0.77, "learning_rate": 0.0002492201834862385, "loss": 2.1669, "step": 1245 }, { "epoch": 0.77, "learning_rate": 0.000249151376146789, "loss": 1.8983, "step": 1246 }, { "epoch": 0.77, "learning_rate": 0.0002490825688073394, "loss": 1.821, "step": 1247 }, { "epoch": 0.77, "learning_rate": 0.0002490137614678899, "loss": 1.6637, "step": 1248 }, { "epoch": 0.77, "learning_rate": 0.00024894495412844034, "loss": 1.6363, "step": 1249 }, { "epoch": 0.77, "learning_rate": 0.00024887614678899083, "loss": 1.5859, "step": 1250 }, { "epoch": 0.77, "learning_rate": 0.00024880733944954126, "loss": 4.6515, "step": 1251 }, { "epoch": 0.77, "learning_rate": 0.00024873853211009175, "loss": 4.2094, "step": 1252 }, { "epoch": 0.77, "learning_rate": 0.0002486697247706422, "loss": 3.7558, "step": 1253 }, { "epoch": 0.77, "learning_rate": 0.0002486009174311926, "loss": 3.486, "step": 1254 }, { "epoch": 0.77, "learning_rate": 0.0002485321100917431, "loss": 3.3385, "step": 1255 }, { "epoch": 0.78, "learning_rate": 0.00024846330275229354, "loss": 3.1675, "step": 1256 }, { "epoch": 0.78, "learning_rate": 0.00024839449541284403, "loss": 3.1744, "step": 1257 }, { "epoch": 0.78, "learning_rate": 0.00024832568807339446, "loss": 3.0137, "step": 1258 }, { "epoch": 0.78, "learning_rate": 0.00024825688073394495, "loss": 3.0504, "step": 1259 }, { "epoch": 0.78, "learning_rate": 0.0002481880733944954, "loss": 3.0661, "step": 1260 }, { "epoch": 0.78, "learning_rate": 0.0002481192660550459, "loss": 2.7988, "step": 1261 }, { "epoch": 0.78, "learning_rate": 0.0002480504587155963, "loss": 2.8546, "step": 1262 }, { "epoch": 0.78, "learning_rate": 0.0002479816513761468, "loss": 2.654, "step": 1263 }, { "epoch": 0.78, "learning_rate": 0.00024791284403669723, "loss": 2.6323, "step": 1264 }, { "epoch": 0.78, "learning_rate": 0.0002478440366972477, "loss": 2.7632, "step": 1265 }, { "epoch": 0.78, "learning_rate": 0.00024777522935779815, "loss": 2.8722, "step": 1266 }, { "epoch": 0.78, "learning_rate": 0.00024770642201834864, "loss": 2.7517, "step": 1267 }, { "epoch": 0.78, "learning_rate": 0.0002476376146788991, "loss": 2.8213, "step": 1268 }, { "epoch": 0.78, "learning_rate": 0.0002475688073394495, "loss": 2.6378, "step": 1269 }, { "epoch": 0.78, "learning_rate": 0.00024749999999999994, "loss": 2.6336, "step": 1270 }, { "epoch": 0.78, "learning_rate": 0.00024743119266055043, "loss": 2.6516, "step": 1271 }, { "epoch": 0.79, "learning_rate": 0.00024736238532110087, "loss": 2.6232, "step": 1272 }, { "epoch": 0.79, "learning_rate": 0.00024729357798165136, "loss": 2.6596, "step": 1273 }, { "epoch": 0.79, "learning_rate": 0.0002472247706422018, "loss": 2.6417, "step": 1274 }, { "epoch": 0.79, "learning_rate": 0.0002471559633027523, "loss": 2.7287, "step": 1275 }, { "epoch": 0.79, "learning_rate": 0.0002470871559633027, "loss": 2.4056, "step": 1276 }, { "epoch": 0.79, "learning_rate": 0.0002470183486238532, "loss": 2.6234, "step": 1277 }, { "epoch": 0.79, "learning_rate": 0.00024694954128440363, "loss": 2.5816, "step": 1278 }, { "epoch": 0.79, "learning_rate": 0.0002468807339449541, "loss": 2.415, "step": 1279 }, { "epoch": 0.79, "learning_rate": 0.00024681192660550456, "loss": 2.5448, "step": 1280 }, { "epoch": 0.79, "learning_rate": 0.00024674311926605505, "loss": 2.5468, "step": 1281 }, { "epoch": 0.79, "learning_rate": 0.0002466743119266055, "loss": 2.5486, "step": 1282 }, { "epoch": 0.79, "learning_rate": 0.00024660550458715597, "loss": 2.5176, "step": 1283 }, { "epoch": 0.79, "learning_rate": 0.0002465366972477064, "loss": 2.1955, "step": 1284 }, { "epoch": 0.79, "learning_rate": 0.00024646788990825684, "loss": 2.3487, "step": 1285 }, { "epoch": 0.79, "learning_rate": 0.0002463990825688073, "loss": 2.502, "step": 1286 }, { "epoch": 0.79, "learning_rate": 0.00024633027522935776, "loss": 2.1346, "step": 1287 }, { "epoch": 0.8, "learning_rate": 0.00024626146788990825, "loss": 2.2725, "step": 1288 }, { "epoch": 0.8, "learning_rate": 0.0002461926605504587, "loss": 2.3531, "step": 1289 }, { "epoch": 0.8, "learning_rate": 0.00024612385321100917, "loss": 2.1908, "step": 1290 }, { "epoch": 0.8, "learning_rate": 0.0002460550458715596, "loss": 2.0789, "step": 1291 }, { "epoch": 0.8, "learning_rate": 0.0002459862385321101, "loss": 2.1947, "step": 1292 }, { "epoch": 0.8, "learning_rate": 0.0002459174311926605, "loss": 2.0086, "step": 1293 }, { "epoch": 0.8, "learning_rate": 0.000245848623853211, "loss": 1.9971, "step": 1294 }, { "epoch": 0.8, "learning_rate": 0.00024577981651376145, "loss": 1.8451, "step": 1295 }, { "epoch": 0.8, "learning_rate": 0.00024571100917431194, "loss": 1.756, "step": 1296 }, { "epoch": 0.8, "learning_rate": 0.00024564220183486237, "loss": 1.6671, "step": 1297 }, { "epoch": 0.8, "learning_rate": 0.00024557339449541286, "loss": 1.7778, "step": 1298 }, { "epoch": 0.8, "learning_rate": 0.0002455045871559633, "loss": 1.3684, "step": 1299 }, { "epoch": 0.8, "learning_rate": 0.0002454357798165138, "loss": 1.4714, "step": 1300 }, { "epoch": 0.8, "learning_rate": 0.00024536697247706416, "loss": 4.441, "step": 1301 }, { "epoch": 0.8, "learning_rate": 0.00024529816513761465, "loss": 3.8664, "step": 1302 }, { "epoch": 0.8, "learning_rate": 0.0002452293577981651, "loss": 3.3995, "step": 1303 }, { "epoch": 0.8, "learning_rate": 0.0002451605504587156, "loss": 3.3105, "step": 1304 }, { "epoch": 0.81, "learning_rate": 0.000245091743119266, "loss": 3.1228, "step": 1305 }, { "epoch": 0.81, "learning_rate": 0.0002450229357798165, "loss": 2.9794, "step": 1306 }, { "epoch": 0.81, "learning_rate": 0.00024495412844036693, "loss": 2.9494, "step": 1307 }, { "epoch": 0.81, "learning_rate": 0.0002448853211009174, "loss": 3.0211, "step": 1308 }, { "epoch": 0.81, "learning_rate": 0.00024481651376146785, "loss": 2.9939, "step": 1309 }, { "epoch": 0.81, "learning_rate": 0.00024474770642201834, "loss": 3.0454, "step": 1310 }, { "epoch": 0.81, "learning_rate": 0.0002446788990825688, "loss": 2.7784, "step": 1311 }, { "epoch": 0.81, "learning_rate": 0.00024461009174311926, "loss": 2.8133, "step": 1312 }, { "epoch": 0.81, "learning_rate": 0.0002445412844036697, "loss": 2.7374, "step": 1313 }, { "epoch": 0.81, "learning_rate": 0.0002444724770642202, "loss": 2.6958, "step": 1314 }, { "epoch": 0.81, "learning_rate": 0.0002444036697247706, "loss": 2.6172, "step": 1315 }, { "epoch": 0.81, "learning_rate": 0.0002443348623853211, "loss": 2.6109, "step": 1316 }, { "epoch": 0.81, "learning_rate": 0.00024426605504587154, "loss": 2.7339, "step": 1317 }, { "epoch": 0.81, "learning_rate": 0.000244197247706422, "loss": 2.5767, "step": 1318 }, { "epoch": 0.81, "learning_rate": 0.00024412844036697244, "loss": 2.557, "step": 1319 }, { "epoch": 0.81, "learning_rate": 0.0002440596330275229, "loss": 2.586, "step": 1320 }, { "epoch": 0.82, "learning_rate": 0.00024399082568807336, "loss": 2.4573, "step": 1321 }, { "epoch": 0.82, "learning_rate": 0.00024392201834862382, "loss": 2.5958, "step": 1322 }, { "epoch": 0.82, "learning_rate": 0.00024385321100917428, "loss": 2.6669, "step": 1323 }, { "epoch": 0.82, "learning_rate": 0.00024378440366972474, "loss": 2.3446, "step": 1324 }, { "epoch": 0.82, "learning_rate": 0.0002437155963302752, "loss": 2.4018, "step": 1325 }, { "epoch": 0.82, "learning_rate": 0.00024364678899082567, "loss": 2.6164, "step": 1326 }, { "epoch": 0.82, "learning_rate": 0.00024357798165137613, "loss": 2.5206, "step": 1327 }, { "epoch": 0.82, "learning_rate": 0.0002435091743119266, "loss": 2.4659, "step": 1328 }, { "epoch": 0.82, "learning_rate": 0.00024344036697247705, "loss": 2.1809, "step": 1329 }, { "epoch": 0.82, "learning_rate": 0.0002433715596330275, "loss": 2.2729, "step": 1330 }, { "epoch": 0.82, "learning_rate": 0.00024330275229357797, "loss": 2.4214, "step": 1331 }, { "epoch": 0.82, "learning_rate": 0.00024323394495412843, "loss": 2.4258, "step": 1332 }, { "epoch": 0.82, "learning_rate": 0.00024316513761467887, "loss": 2.5117, "step": 1333 }, { "epoch": 0.82, "learning_rate": 0.00024309633027522933, "loss": 2.1816, "step": 1334 }, { "epoch": 0.82, "learning_rate": 0.0002430275229357798, "loss": 2.2469, "step": 1335 }, { "epoch": 0.82, "learning_rate": 0.00024295871559633025, "loss": 2.3788, "step": 1336 }, { "epoch": 0.83, "learning_rate": 0.00024288990825688071, "loss": 2.0899, "step": 1337 }, { "epoch": 0.83, "learning_rate": 0.00024282110091743118, "loss": 2.263, "step": 1338 }, { "epoch": 0.83, "learning_rate": 0.00024275229357798164, "loss": 2.202, "step": 1339 }, { "epoch": 0.83, "learning_rate": 0.0002426834862385321, "loss": 2.1724, "step": 1340 }, { "epoch": 0.83, "learning_rate": 0.00024261467889908256, "loss": 2.0293, "step": 1341 }, { "epoch": 0.83, "learning_rate": 0.00024254587155963302, "loss": 2.1315, "step": 1342 }, { "epoch": 0.83, "learning_rate": 0.00024247706422018348, "loss": 2.0213, "step": 1343 }, { "epoch": 0.83, "learning_rate": 0.00024240825688073394, "loss": 1.9341, "step": 1344 }, { "epoch": 0.83, "learning_rate": 0.0002423394495412844, "loss": 1.7983, "step": 1345 }, { "epoch": 0.83, "learning_rate": 0.00024227064220183487, "loss": 1.77, "step": 1346 }, { "epoch": 0.83, "learning_rate": 0.00024220183486238533, "loss": 1.8767, "step": 1347 }, { "epoch": 0.83, "learning_rate": 0.00024213302752293576, "loss": 1.6549, "step": 1348 }, { "epoch": 0.83, "learning_rate": 0.0002420642201834862, "loss": 1.5778, "step": 1349 }, { "epoch": 0.83, "learning_rate": 0.00024199541284403666, "loss": 1.5833, "step": 1350 }, { "epoch": 0.83, "learning_rate": 0.00024192660550458712, "loss": 4.6938, "step": 1351 }, { "epoch": 0.83, "learning_rate": 0.00024185779816513758, "loss": 4.0161, "step": 1352 }, { "epoch": 0.84, "learning_rate": 0.00024178899082568804, "loss": 3.5704, "step": 1353 }, { "epoch": 0.84, "learning_rate": 0.0002417201834862385, "loss": 3.1779, "step": 1354 }, { "epoch": 0.84, "learning_rate": 0.00024165137614678896, "loss": 3.0573, "step": 1355 }, { "epoch": 0.84, "learning_rate": 0.00024158256880733942, "loss": 3.1404, "step": 1356 }, { "epoch": 0.84, "learning_rate": 0.00024151376146788989, "loss": 2.7964, "step": 1357 }, { "epoch": 0.84, "learning_rate": 0.00024144495412844035, "loss": 2.9402, "step": 1358 }, { "epoch": 0.84, "learning_rate": 0.0002413761467889908, "loss": 2.7826, "step": 1359 }, { "epoch": 0.84, "learning_rate": 0.00024130733944954127, "loss": 2.7306, "step": 1360 }, { "epoch": 0.84, "learning_rate": 0.00024123853211009173, "loss": 2.8451, "step": 1361 }, { "epoch": 0.84, "learning_rate": 0.0002411697247706422, "loss": 2.6798, "step": 1362 }, { "epoch": 0.84, "learning_rate": 0.00024110091743119265, "loss": 2.7597, "step": 1363 }, { "epoch": 0.84, "learning_rate": 0.00024103211009174311, "loss": 2.8166, "step": 1364 }, { "epoch": 0.84, "learning_rate": 0.00024096330275229355, "loss": 2.745, "step": 1365 }, { "epoch": 0.84, "learning_rate": 0.000240894495412844, "loss": 2.5577, "step": 1366 }, { "epoch": 0.84, "learning_rate": 0.00024082568807339447, "loss": 2.6861, "step": 1367 }, { "epoch": 0.84, "learning_rate": 0.00024075688073394493, "loss": 2.6284, "step": 1368 }, { "epoch": 0.84, "learning_rate": 0.0002406880733944954, "loss": 2.4811, "step": 1369 }, { "epoch": 0.85, "learning_rate": 0.00024061926605504585, "loss": 2.5559, "step": 1370 }, { "epoch": 0.85, "learning_rate": 0.00024055045871559632, "loss": 2.5791, "step": 1371 }, { "epoch": 0.85, "learning_rate": 0.00024048165137614678, "loss": 2.529, "step": 1372 }, { "epoch": 0.85, "learning_rate": 0.00024041284403669724, "loss": 2.4775, "step": 1373 }, { "epoch": 0.85, "learning_rate": 0.0002403440366972477, "loss": 2.5257, "step": 1374 }, { "epoch": 0.85, "learning_rate": 0.00024027522935779816, "loss": 2.5076, "step": 1375 }, { "epoch": 0.85, "learning_rate": 0.00024020642201834862, "loss": 2.4796, "step": 1376 }, { "epoch": 0.85, "learning_rate": 0.00024013761467889908, "loss": 2.3824, "step": 1377 }, { "epoch": 0.85, "learning_rate": 0.00024006880733944954, "loss": 2.2693, "step": 1378 }, { "epoch": 0.85, "learning_rate": 0.00023999999999999998, "loss": 2.3754, "step": 1379 }, { "epoch": 0.85, "learning_rate": 0.00023993119266055044, "loss": 2.3962, "step": 1380 }, { "epoch": 0.85, "learning_rate": 0.00023986238532110087, "loss": 2.4415, "step": 1381 }, { "epoch": 0.85, "learning_rate": 0.00023979357798165134, "loss": 2.3266, "step": 1382 }, { "epoch": 0.85, "learning_rate": 0.0002397247706422018, "loss": 2.2076, "step": 1383 }, { "epoch": 0.85, "learning_rate": 0.00023965596330275226, "loss": 2.1958, "step": 1384 }, { "epoch": 0.85, "learning_rate": 0.00023958715596330272, "loss": 2.3991, "step": 1385 }, { "epoch": 0.86, "learning_rate": 0.00023951834862385318, "loss": 2.0174, "step": 1386 }, { "epoch": 0.86, "learning_rate": 0.00023944954128440364, "loss": 2.1168, "step": 1387 }, { "epoch": 0.86, "learning_rate": 0.0002393807339449541, "loss": 2.0234, "step": 1388 }, { "epoch": 0.86, "learning_rate": 0.00023931192660550456, "loss": 2.2305, "step": 1389 }, { "epoch": 0.86, "learning_rate": 0.00023924311926605503, "loss": 2.2301, "step": 1390 }, { "epoch": 0.86, "learning_rate": 0.0002391743119266055, "loss": 2.1488, "step": 1391 }, { "epoch": 0.86, "learning_rate": 0.00023910550458715595, "loss": 1.9698, "step": 1392 }, { "epoch": 0.86, "learning_rate": 0.0002390366972477064, "loss": 1.8753, "step": 1393 }, { "epoch": 0.86, "learning_rate": 0.00023896788990825687, "loss": 1.9798, "step": 1394 }, { "epoch": 0.86, "learning_rate": 0.00023889908256880733, "loss": 1.822, "step": 1395 }, { "epoch": 0.86, "learning_rate": 0.0002388302752293578, "loss": 1.6806, "step": 1396 }, { "epoch": 0.86, "learning_rate": 0.00023876146788990823, "loss": 1.7254, "step": 1397 }, { "epoch": 0.86, "learning_rate": 0.0002386926605504587, "loss": 1.5683, "step": 1398 }, { "epoch": 0.86, "learning_rate": 0.00023862385321100915, "loss": 1.5895, "step": 1399 }, { "epoch": 0.86, "learning_rate": 0.0002385550458715596, "loss": 1.4108, "step": 1400 }, { "epoch": 0.86, "learning_rate": 0.00023848623853211007, "loss": 4.3234, "step": 1401 }, { "epoch": 0.87, "learning_rate": 0.00023841743119266053, "loss": 3.7324, "step": 1402 }, { "epoch": 0.87, "learning_rate": 0.000238348623853211, "loss": 3.3879, "step": 1403 }, { "epoch": 0.87, "learning_rate": 0.00023827981651376146, "loss": 3.0386, "step": 1404 }, { "epoch": 0.87, "learning_rate": 0.00023821100917431192, "loss": 3.051, "step": 1405 }, { "epoch": 0.87, "learning_rate": 0.00023814220183486238, "loss": 2.7574, "step": 1406 }, { "epoch": 0.87, "learning_rate": 0.00023807339449541284, "loss": 2.788, "step": 1407 }, { "epoch": 0.87, "learning_rate": 0.0002380045871559633, "loss": 2.7725, "step": 1408 }, { "epoch": 0.87, "learning_rate": 0.00023793577981651376, "loss": 2.8618, "step": 1409 }, { "epoch": 0.87, "learning_rate": 0.0002378669724770642, "loss": 2.4982, "step": 1410 }, { "epoch": 0.87, "learning_rate": 0.00023779816513761466, "loss": 2.6049, "step": 1411 }, { "epoch": 0.87, "learning_rate": 0.00023772935779816512, "loss": 2.6398, "step": 1412 }, { "epoch": 0.87, "learning_rate": 0.00023766055045871555, "loss": 2.6199, "step": 1413 }, { "epoch": 0.87, "learning_rate": 0.00023759174311926602, "loss": 2.4028, "step": 1414 }, { "epoch": 0.87, "learning_rate": 0.00023752293577981648, "loss": 2.6199, "step": 1415 }, { "epoch": 0.87, "learning_rate": 0.00023745412844036694, "loss": 2.6152, "step": 1416 }, { "epoch": 0.87, "learning_rate": 0.0002373853211009174, "loss": 2.6174, "step": 1417 }, { "epoch": 0.88, "learning_rate": 0.00023731651376146786, "loss": 2.6818, "step": 1418 }, { "epoch": 0.88, "learning_rate": 0.00023724770642201832, "loss": 2.4989, "step": 1419 }, { "epoch": 0.88, "learning_rate": 0.00023717889908256878, "loss": 2.5147, "step": 1420 }, { "epoch": 0.88, "learning_rate": 0.00023711009174311924, "loss": 2.2742, "step": 1421 }, { "epoch": 0.88, "learning_rate": 0.0002370412844036697, "loss": 2.4624, "step": 1422 }, { "epoch": 0.88, "learning_rate": 0.00023697247706422017, "loss": 2.511, "step": 1423 }, { "epoch": 0.88, "learning_rate": 0.00023690366972477063, "loss": 2.3468, "step": 1424 }, { "epoch": 0.88, "learning_rate": 0.0002368348623853211, "loss": 2.5092, "step": 1425 }, { "epoch": 0.88, "learning_rate": 0.00023676605504587155, "loss": 2.4016, "step": 1426 }, { "epoch": 0.88, "learning_rate": 0.000236697247706422, "loss": 2.3635, "step": 1427 }, { "epoch": 0.88, "learning_rate": 0.00023662844036697247, "loss": 2.4423, "step": 1428 }, { "epoch": 0.88, "learning_rate": 0.0002365596330275229, "loss": 2.4538, "step": 1429 }, { "epoch": 0.88, "learning_rate": 0.00023649082568807337, "loss": 2.3316, "step": 1430 }, { "epoch": 0.88, "learning_rate": 0.00023642201834862383, "loss": 2.2705, "step": 1431 }, { "epoch": 0.88, "learning_rate": 0.0002363532110091743, "loss": 2.219, "step": 1432 }, { "epoch": 0.88, "learning_rate": 0.00023628440366972475, "loss": 2.2657, "step": 1433 }, { "epoch": 0.89, "learning_rate": 0.0002362155963302752, "loss": 2.4393, "step": 1434 }, { "epoch": 0.89, "learning_rate": 0.00023614678899082567, "loss": 2.377, "step": 1435 }, { "epoch": 0.89, "learning_rate": 0.00023607798165137614, "loss": 2.2584, "step": 1436 }, { "epoch": 0.89, "learning_rate": 0.0002360091743119266, "loss": 2.1022, "step": 1437 }, { "epoch": 0.89, "learning_rate": 0.00023594036697247706, "loss": 2.1148, "step": 1438 }, { "epoch": 0.89, "learning_rate": 0.00023587155963302752, "loss": 1.9968, "step": 1439 }, { "epoch": 0.89, "learning_rate": 0.00023580275229357798, "loss": 1.9591, "step": 1440 }, { "epoch": 0.89, "learning_rate": 0.00023573394495412842, "loss": 1.9437, "step": 1441 }, { "epoch": 0.89, "learning_rate": 0.00023566513761467888, "loss": 1.9426, "step": 1442 }, { "epoch": 0.89, "learning_rate": 0.00023559633027522934, "loss": 2.0203, "step": 1443 }, { "epoch": 0.89, "learning_rate": 0.0002355275229357798, "loss": 1.8824, "step": 1444 }, { "epoch": 0.89, "learning_rate": 0.00023545871559633023, "loss": 1.9237, "step": 1445 }, { "epoch": 0.89, "learning_rate": 0.0002353899082568807, "loss": 1.7076, "step": 1446 }, { "epoch": 0.89, "learning_rate": 0.00023532110091743116, "loss": 1.5834, "step": 1447 }, { "epoch": 0.89, "learning_rate": 0.00023525229357798162, "loss": 1.5915, "step": 1448 }, { "epoch": 0.89, "learning_rate": 0.00023518348623853208, "loss": 1.4358, "step": 1449 }, { "epoch": 0.89, "learning_rate": 0.00023511467889908254, "loss": 1.4267, "step": 1450 }, { "epoch": 0.9, "learning_rate": 0.000235045871559633, "loss": 4.0403, "step": 1451 }, { "epoch": 0.9, "learning_rate": 0.00023497706422018346, "loss": 3.6418, "step": 1452 }, { "epoch": 0.9, "learning_rate": 0.00023490825688073392, "loss": 3.1654, "step": 1453 }, { "epoch": 0.9, "learning_rate": 0.00023483944954128438, "loss": 2.7494, "step": 1454 }, { "epoch": 0.9, "learning_rate": 0.00023477064220183485, "loss": 2.7254, "step": 1455 }, { "epoch": 0.9, "learning_rate": 0.0002347018348623853, "loss": 2.7735, "step": 1456 }, { "epoch": 0.9, "learning_rate": 0.00023463302752293577, "loss": 2.7853, "step": 1457 }, { "epoch": 0.9, "learning_rate": 0.00023456422018348623, "loss": 2.7738, "step": 1458 }, { "epoch": 0.9, "learning_rate": 0.0002344954128440367, "loss": 2.6205, "step": 1459 }, { "epoch": 0.9, "learning_rate": 0.00023442660550458715, "loss": 2.657, "step": 1460 }, { "epoch": 0.9, "learning_rate": 0.00023435779816513759, "loss": 2.5686, "step": 1461 }, { "epoch": 0.9, "learning_rate": 0.00023428899082568805, "loss": 2.6935, "step": 1462 }, { "epoch": 0.9, "learning_rate": 0.0002342201834862385, "loss": 2.4596, "step": 1463 }, { "epoch": 0.9, "learning_rate": 0.00023415137614678897, "loss": 2.3145, "step": 1464 }, { "epoch": 0.9, "learning_rate": 0.00023408256880733943, "loss": 2.5441, "step": 1465 }, { "epoch": 0.9, "learning_rate": 0.0002340137614678899, "loss": 2.4676, "step": 1466 }, { "epoch": 0.91, "learning_rate": 0.00023394495412844035, "loss": 2.3969, "step": 1467 }, { "epoch": 0.91, "learning_rate": 0.00023387614678899082, "loss": 2.3363, "step": 1468 }, { "epoch": 0.91, "learning_rate": 0.00023380733944954128, "loss": 2.6365, "step": 1469 }, { "epoch": 0.91, "learning_rate": 0.00023373853211009174, "loss": 2.3824, "step": 1470 }, { "epoch": 0.91, "learning_rate": 0.0002336697247706422, "loss": 2.3885, "step": 1471 }, { "epoch": 0.91, "learning_rate": 0.00023360091743119266, "loss": 2.4727, "step": 1472 }, { "epoch": 0.91, "learning_rate": 0.0002335321100917431, "loss": 2.3845, "step": 1473 }, { "epoch": 0.91, "learning_rate": 0.00023346330275229356, "loss": 2.5182, "step": 1474 }, { "epoch": 0.91, "learning_rate": 0.00023339449541284402, "loss": 2.4093, "step": 1475 }, { "epoch": 0.91, "learning_rate": 0.00023332568807339448, "loss": 2.2637, "step": 1476 }, { "epoch": 0.91, "learning_rate": 0.0002332568807339449, "loss": 2.3533, "step": 1477 }, { "epoch": 0.91, "learning_rate": 0.00023318807339449537, "loss": 2.3644, "step": 1478 }, { "epoch": 0.91, "learning_rate": 0.00023311926605504583, "loss": 2.394, "step": 1479 }, { "epoch": 0.91, "learning_rate": 0.0002330504587155963, "loss": 2.2024, "step": 1480 }, { "epoch": 0.91, "learning_rate": 0.00023298165137614676, "loss": 2.0975, "step": 1481 }, { "epoch": 0.91, "learning_rate": 0.00023291284403669722, "loss": 2.2884, "step": 1482 }, { "epoch": 0.92, "learning_rate": 0.00023284403669724768, "loss": 2.148, "step": 1483 }, { "epoch": 0.92, "learning_rate": 0.00023277522935779814, "loss": 2.1001, "step": 1484 }, { "epoch": 0.92, "learning_rate": 0.0002327064220183486, "loss": 2.0864, "step": 1485 }, { "epoch": 0.92, "learning_rate": 0.00023263761467889906, "loss": 2.061, "step": 1486 }, { "epoch": 0.92, "learning_rate": 0.00023256880733944953, "loss": 2.0502, "step": 1487 }, { "epoch": 0.92, "learning_rate": 0.00023249999999999999, "loss": 2.1436, "step": 1488 }, { "epoch": 0.92, "learning_rate": 0.00023243119266055045, "loss": 1.9573, "step": 1489 }, { "epoch": 0.92, "learning_rate": 0.0002323623853211009, "loss": 1.8843, "step": 1490 }, { "epoch": 0.92, "learning_rate": 0.00023229357798165137, "loss": 1.9595, "step": 1491 }, { "epoch": 0.92, "learning_rate": 0.00023222477064220183, "loss": 1.7707, "step": 1492 }, { "epoch": 0.92, "learning_rate": 0.00023215596330275227, "loss": 1.8516, "step": 1493 }, { "epoch": 0.92, "learning_rate": 0.00023208715596330273, "loss": 1.8104, "step": 1494 }, { "epoch": 0.92, "learning_rate": 0.0002320183486238532, "loss": 1.7862, "step": 1495 }, { "epoch": 0.92, "learning_rate": 0.00023194954128440365, "loss": 1.7488, "step": 1496 }, { "epoch": 0.92, "learning_rate": 0.0002318807339449541, "loss": 1.4077, "step": 1497 }, { "epoch": 0.92, "learning_rate": 0.00023181192660550457, "loss": 1.4606, "step": 1498 }, { "epoch": 0.93, "learning_rate": 0.00023174311926605503, "loss": 1.498, "step": 1499 }, { "epoch": 0.93, "learning_rate": 0.0002316743119266055, "loss": 1.4703, "step": 1500 }, { "epoch": 0.93, "eval_bleu": 1.1928315591476622e-15, "eval_loss": 2.897862195968628, "eval_runtime": 2471.822, "eval_samples_per_second": 5.971, "eval_steps_per_second": 0.746, "step": 1500 }, { "epoch": 0.93, "learning_rate": 0.00023160550458715596, "loss": 3.8798, "step": 1501 }, { "epoch": 0.93, "learning_rate": 0.00023153669724770642, "loss": 3.6521, "step": 1502 }, { "epoch": 0.93, "learning_rate": 0.00023146788990825688, "loss": 3.1413, "step": 1503 }, { "epoch": 0.93, "learning_rate": 0.0002313990825688073, "loss": 2.9996, "step": 1504 }, { "epoch": 0.93, "learning_rate": 0.00023133027522935777, "loss": 2.8918, "step": 1505 }, { "epoch": 0.93, "learning_rate": 0.00023126146788990824, "loss": 2.615, "step": 1506 }, { "epoch": 0.93, "learning_rate": 0.0002311926605504587, "loss": 2.7964, "step": 1507 }, { "epoch": 0.93, "learning_rate": 0.00023112385321100916, "loss": 2.7088, "step": 1508 }, { "epoch": 0.93, "learning_rate": 0.0002310550458715596, "loss": 2.6699, "step": 1509 }, { "epoch": 0.93, "learning_rate": 0.00023098623853211005, "loss": 2.6601, "step": 1510 }, { "epoch": 0.93, "learning_rate": 0.00023091743119266051, "loss": 2.5294, "step": 1511 }, { "epoch": 0.93, "learning_rate": 0.00023084862385321098, "loss": 2.5886, "step": 1512 }, { "epoch": 0.93, "learning_rate": 0.00023077981651376144, "loss": 2.4782, "step": 1513 }, { "epoch": 0.93, "learning_rate": 0.0002307110091743119, "loss": 2.444, "step": 1514 }, { "epoch": 0.94, "learning_rate": 0.00023064220183486236, "loss": 2.2496, "step": 1515 }, { "epoch": 0.94, "learning_rate": 0.00023057339449541282, "loss": 2.3786, "step": 1516 }, { "epoch": 0.94, "learning_rate": 0.00023050458715596328, "loss": 2.3432, "step": 1517 }, { "epoch": 0.94, "learning_rate": 0.00023043577981651374, "loss": 2.6102, "step": 1518 }, { "epoch": 0.94, "learning_rate": 0.0002303669724770642, "loss": 2.42, "step": 1519 }, { "epoch": 0.94, "learning_rate": 0.00023029816513761467, "loss": 2.4603, "step": 1520 }, { "epoch": 0.94, "learning_rate": 0.00023022935779816513, "loss": 2.3453, "step": 1521 }, { "epoch": 0.94, "learning_rate": 0.0002301605504587156, "loss": 2.4801, "step": 1522 }, { "epoch": 0.94, "learning_rate": 0.00023009174311926605, "loss": 2.3845, "step": 1523 }, { "epoch": 0.94, "learning_rate": 0.0002300229357798165, "loss": 2.3283, "step": 1524 }, { "epoch": 0.94, "learning_rate": 0.00022995412844036694, "loss": 2.2907, "step": 1525 }, { "epoch": 0.94, "learning_rate": 0.0002298853211009174, "loss": 2.3028, "step": 1526 }, { "epoch": 0.94, "learning_rate": 0.00022981651376146787, "loss": 2.3779, "step": 1527 }, { "epoch": 0.94, "learning_rate": 0.00022974770642201833, "loss": 2.2853, "step": 1528 }, { "epoch": 0.94, "learning_rate": 0.0002296788990825688, "loss": 2.2455, "step": 1529 }, { "epoch": 0.94, "learning_rate": 0.00022961009174311925, "loss": 2.3359, "step": 1530 }, { "epoch": 0.94, "learning_rate": 0.0002295412844036697, "loss": 2.2759, "step": 1531 }, { "epoch": 0.95, "learning_rate": 0.00022947247706422017, "loss": 2.1927, "step": 1532 }, { "epoch": 0.95, "learning_rate": 0.00022940366972477064, "loss": 2.0256, "step": 1533 }, { "epoch": 0.95, "learning_rate": 0.0002293348623853211, "loss": 2.0893, "step": 1534 }, { "epoch": 0.95, "learning_rate": 0.00022926605504587153, "loss": 1.9414, "step": 1535 }, { "epoch": 0.95, "learning_rate": 0.000229197247706422, "loss": 2.0403, "step": 1536 }, { "epoch": 0.95, "learning_rate": 0.00022912844036697245, "loss": 2.151, "step": 1537 }, { "epoch": 0.95, "learning_rate": 0.00022905963302752291, "loss": 2.0342, "step": 1538 }, { "epoch": 0.95, "learning_rate": 0.00022899082568807338, "loss": 1.7482, "step": 1539 }, { "epoch": 0.95, "learning_rate": 0.00022892201834862384, "loss": 1.8345, "step": 1540 }, { "epoch": 0.95, "learning_rate": 0.00022885321100917427, "loss": 1.8883, "step": 1541 }, { "epoch": 0.95, "learning_rate": 0.00022878440366972473, "loss": 1.7642, "step": 1542 }, { "epoch": 0.95, "learning_rate": 0.0002287155963302752, "loss": 1.7402, "step": 1543 }, { "epoch": 0.95, "learning_rate": 0.00022864678899082565, "loss": 1.7481, "step": 1544 }, { "epoch": 0.95, "learning_rate": 0.00022857798165137612, "loss": 1.7225, "step": 1545 }, { "epoch": 0.95, "learning_rate": 0.00022850917431192658, "loss": 1.6718, "step": 1546 }, { "epoch": 0.95, "learning_rate": 0.00022844036697247704, "loss": 1.434, "step": 1547 }, { "epoch": 0.96, "learning_rate": 0.0002283715596330275, "loss": 1.3589, "step": 1548 }, { "epoch": 0.96, "learning_rate": 0.00022830275229357796, "loss": 1.3906, "step": 1549 }, { "epoch": 0.96, "learning_rate": 0.00022823394495412842, "loss": 1.3383, "step": 1550 }, { "epoch": 0.96, "learning_rate": 0.00022816513761467888, "loss": 3.8618, "step": 1551 }, { "epoch": 0.96, "learning_rate": 0.00022809633027522935, "loss": 3.47, "step": 1552 }, { "epoch": 0.96, "learning_rate": 0.0002280275229357798, "loss": 2.9152, "step": 1553 }, { "epoch": 0.96, "learning_rate": 0.00022795871559633027, "loss": 2.7829, "step": 1554 }, { "epoch": 0.96, "learning_rate": 0.00022788990825688073, "loss": 2.7614, "step": 1555 }, { "epoch": 0.96, "learning_rate": 0.0002278211009174312, "loss": 2.794, "step": 1556 }, { "epoch": 0.96, "learning_rate": 0.00022775229357798162, "loss": 2.6156, "step": 1557 }, { "epoch": 0.96, "learning_rate": 0.00022768348623853209, "loss": 2.6174, "step": 1558 }, { "epoch": 0.96, "learning_rate": 0.00022761467889908255, "loss": 2.6436, "step": 1559 }, { "epoch": 0.96, "learning_rate": 0.000227545871559633, "loss": 2.4479, "step": 1560 }, { "epoch": 0.96, "learning_rate": 0.00022747706422018347, "loss": 2.5513, "step": 1561 }, { "epoch": 0.96, "learning_rate": 0.00022740825688073393, "loss": 2.488, "step": 1562 }, { "epoch": 0.96, "learning_rate": 0.0002273394495412844, "loss": 2.613, "step": 1563 }, { "epoch": 0.97, "learning_rate": 0.00022727064220183485, "loss": 2.4519, "step": 1564 }, { "epoch": 0.97, "learning_rate": 0.00022720183486238531, "loss": 2.4788, "step": 1565 }, { "epoch": 0.97, "learning_rate": 0.00022713302752293575, "loss": 2.2182, "step": 1566 }, { "epoch": 0.97, "learning_rate": 0.0002270642201834862, "loss": 2.4339, "step": 1567 }, { "epoch": 0.97, "learning_rate": 0.00022699541284403667, "loss": 2.4258, "step": 1568 }, { "epoch": 0.97, "learning_rate": 0.00022692660550458713, "loss": 2.3051, "step": 1569 }, { "epoch": 0.97, "learning_rate": 0.0002268577981651376, "loss": 2.4419, "step": 1570 }, { "epoch": 0.97, "learning_rate": 0.00022678899082568806, "loss": 2.4325, "step": 1571 }, { "epoch": 0.97, "learning_rate": 0.00022672018348623852, "loss": 2.325, "step": 1572 }, { "epoch": 0.97, "learning_rate": 0.00022665137614678895, "loss": 2.3338, "step": 1573 }, { "epoch": 0.97, "learning_rate": 0.0002265825688073394, "loss": 2.223, "step": 1574 }, { "epoch": 0.97, "learning_rate": 0.00022651376146788987, "loss": 2.1918, "step": 1575 }, { "epoch": 0.97, "learning_rate": 0.00022644495412844033, "loss": 2.3632, "step": 1576 }, { "epoch": 0.97, "learning_rate": 0.0002263761467889908, "loss": 2.1487, "step": 1577 }, { "epoch": 0.97, "learning_rate": 0.00022630733944954126, "loss": 2.3696, "step": 1578 }, { "epoch": 0.97, "learning_rate": 0.00022623853211009172, "loss": 2.0953, "step": 1579 }, { "epoch": 0.98, "learning_rate": 0.00022616972477064218, "loss": 2.1683, "step": 1580 }, { "epoch": 0.98, "learning_rate": 0.00022610091743119264, "loss": 2.0308, "step": 1581 }, { "epoch": 0.98, "learning_rate": 0.0002260321100917431, "loss": 2.0683, "step": 1582 }, { "epoch": 0.98, "learning_rate": 0.00022596330275229356, "loss": 2.0815, "step": 1583 }, { "epoch": 0.98, "learning_rate": 0.00022589449541284402, "loss": 2.0315, "step": 1584 }, { "epoch": 0.98, "learning_rate": 0.00022582568807339449, "loss": 1.9406, "step": 1585 }, { "epoch": 0.98, "learning_rate": 0.00022575688073394495, "loss": 2.0098, "step": 1586 }, { "epoch": 0.98, "learning_rate": 0.0002256880733944954, "loss": 1.9886, "step": 1587 }, { "epoch": 0.98, "learning_rate": 0.00022561926605504587, "loss": 1.9092, "step": 1588 }, { "epoch": 0.98, "learning_rate": 0.0002255504587155963, "loss": 1.8882, "step": 1589 }, { "epoch": 0.98, "learning_rate": 0.00022548165137614676, "loss": 1.8068, "step": 1590 }, { "epoch": 0.98, "learning_rate": 0.00022541284403669723, "loss": 1.7487, "step": 1591 }, { "epoch": 0.98, "learning_rate": 0.0002253440366972477, "loss": 1.8935, "step": 1592 }, { "epoch": 0.98, "learning_rate": 0.00022527522935779815, "loss": 1.8995, "step": 1593 }, { "epoch": 0.98, "learning_rate": 0.0002252064220183486, "loss": 1.7237, "step": 1594 }, { "epoch": 0.98, "learning_rate": 0.00022513761467889907, "loss": 1.5711, "step": 1595 }, { "epoch": 0.99, "learning_rate": 0.00022506880733944953, "loss": 1.5474, "step": 1596 }, { "epoch": 0.99, "learning_rate": 0.000225, "loss": 1.4504, "step": 1597 }, { "epoch": 0.99, "learning_rate": 0.00022493119266055043, "loss": 1.4231, "step": 1598 }, { "epoch": 0.99, "learning_rate": 0.0002248623853211009, "loss": 1.2094, "step": 1599 }, { "epoch": 0.99, "learning_rate": 0.00022479357798165135, "loss": 1.3736, "step": 1600 }, { "epoch": 0.99, "learning_rate": 0.0002247247706422018, "loss": 3.8124, "step": 1601 }, { "epoch": 0.99, "learning_rate": 0.00022465596330275227, "loss": 3.1417, "step": 1602 }, { "epoch": 0.99, "learning_rate": 0.00022458715596330273, "loss": 2.9373, "step": 1603 }, { "epoch": 0.99, "learning_rate": 0.0002245183486238532, "loss": 2.7386, "step": 1604 }, { "epoch": 0.99, "learning_rate": 0.00022444954128440366, "loss": 2.5797, "step": 1605 }, { "epoch": 0.99, "learning_rate": 0.0002243807339449541, "loss": 2.62, "step": 1606 }, { "epoch": 0.99, "learning_rate": 0.00022431192660550455, "loss": 2.4809, "step": 1607 }, { "epoch": 0.99, "learning_rate": 0.00022424311926605501, "loss": 2.527, "step": 1608 }, { "epoch": 0.99, "learning_rate": 0.00022417431192660547, "loss": 2.4485, "step": 1609 }, { "epoch": 0.99, "learning_rate": 0.00022410550458715594, "loss": 2.4327, "step": 1610 }, { "epoch": 0.99, "learning_rate": 0.0002240366972477064, "loss": 2.3558, "step": 1611 }, { "epoch": 0.99, "learning_rate": 0.00022396788990825686, "loss": 2.2636, "step": 1612 }, { "epoch": 1.0, "learning_rate": 0.00022389908256880732, "loss": 2.158, "step": 1613 }, { "epoch": 1.0, "learning_rate": 0.00022383027522935778, "loss": 2.2312, "step": 1614 }, { "epoch": 1.0, "learning_rate": 0.00022376146788990824, "loss": 1.8881, "step": 1615 }, { "epoch": 1.0, "learning_rate": 0.0002236926605504587, "loss": 1.9816, "step": 1616 }, { "epoch": 1.0, "learning_rate": 0.00022362385321100917, "loss": 1.8794, "step": 1617 }, { "epoch": 1.0, "learning_rate": 0.00022355504587155963, "loss": 1.7875, "step": 1618 }, { "epoch": 1.0, "learning_rate": 0.0002234862385321101, "loss": 1.4404, "step": 1619 }, { "epoch": 1.0, "learning_rate": 0.00022341743119266055, "loss": 1.3255, "step": 1620 }, { "epoch": 1.0, "learning_rate": 0.000223348623853211, "loss": 3.5384, "step": 1621 }, { "epoch": 1.0, "learning_rate": 0.00022327981651376144, "loss": 3.0625, "step": 1622 }, { "epoch": 1.0, "learning_rate": 0.0002232110091743119, "loss": 2.5833, "step": 1623 }, { "epoch": 1.0, "learning_rate": 0.00022314220183486237, "loss": 2.7377, "step": 1624 }, { "epoch": 1.0, "learning_rate": 0.00022307339449541283, "loss": 2.5123, "step": 1625 }, { "epoch": 1.0, "learning_rate": 0.0002230045871559633, "loss": 2.0895, "step": 1626 }, { "epoch": 1.0, "learning_rate": 0.00022293577981651375, "loss": 2.4198, "step": 1627 }, { "epoch": 1.0, "learning_rate": 0.0002228669724770642, "loss": 2.3877, "step": 1628 }, { "epoch": 1.01, "learning_rate": 0.00022279816513761465, "loss": 2.297, "step": 1629 }, { "epoch": 1.01, "learning_rate": 0.0002227293577981651, "loss": 2.1656, "step": 1630 }, { "epoch": 1.01, "learning_rate": 0.00022266055045871557, "loss": 2.0874, "step": 1631 }, { "epoch": 1.01, "learning_rate": 0.00022259174311926603, "loss": 2.0864, "step": 1632 }, { "epoch": 1.01, "learning_rate": 0.0002225229357798165, "loss": 2.1314, "step": 1633 }, { "epoch": 1.01, "learning_rate": 0.00022245412844036695, "loss": 2.1392, "step": 1634 }, { "epoch": 1.01, "learning_rate": 0.00022238532110091741, "loss": 2.0376, "step": 1635 }, { "epoch": 1.01, "learning_rate": 0.00022231651376146787, "loss": 1.9865, "step": 1636 }, { "epoch": 1.01, "learning_rate": 0.00022224770642201834, "loss": 2.0534, "step": 1637 }, { "epoch": 1.01, "learning_rate": 0.00022217889908256877, "loss": 1.9698, "step": 1638 }, { "epoch": 1.01, "learning_rate": 0.00022211009174311923, "loss": 1.954, "step": 1639 }, { "epoch": 1.01, "learning_rate": 0.0002220412844036697, "loss": 1.9595, "step": 1640 }, { "epoch": 1.01, "learning_rate": 0.00022197247706422015, "loss": 1.9459, "step": 1641 }, { "epoch": 1.01, "learning_rate": 0.00022190366972477062, "loss": 1.9686, "step": 1642 }, { "epoch": 1.01, "learning_rate": 0.00022183486238532108, "loss": 1.9225, "step": 1643 }, { "epoch": 1.01, "learning_rate": 0.00022176605504587154, "loss": 2.0311, "step": 1644 }, { "epoch": 1.02, "learning_rate": 0.000221697247706422, "loss": 2.0121, "step": 1645 }, { "epoch": 1.02, "learning_rate": 0.00022162844036697246, "loss": 1.8775, "step": 1646 }, { "epoch": 1.02, "learning_rate": 0.00022155963302752292, "loss": 1.933, "step": 1647 }, { "epoch": 1.02, "learning_rate": 0.00022149082568807338, "loss": 1.823, "step": 1648 }, { "epoch": 1.02, "learning_rate": 0.00022142201834862384, "loss": 1.8175, "step": 1649 }, { "epoch": 1.02, "learning_rate": 0.0002213532110091743, "loss": 1.9192, "step": 1650 }, { "epoch": 1.02, "learning_rate": 0.00022128440366972477, "loss": 1.8093, "step": 1651 }, { "epoch": 1.02, "learning_rate": 0.00022121559633027523, "loss": 1.8638, "step": 1652 }, { "epoch": 1.02, "learning_rate": 0.0002211467889908257, "loss": 1.8102, "step": 1653 }, { "epoch": 1.02, "learning_rate": 0.00022107798165137612, "loss": 1.645, "step": 1654 }, { "epoch": 1.02, "learning_rate": 0.00022100917431192658, "loss": 1.7941, "step": 1655 }, { "epoch": 1.02, "learning_rate": 0.00022094036697247705, "loss": 1.7335, "step": 1656 }, { "epoch": 1.02, "learning_rate": 0.0002208715596330275, "loss": 1.7053, "step": 1657 }, { "epoch": 1.02, "learning_rate": 0.00022080275229357797, "loss": 1.7755, "step": 1658 }, { "epoch": 1.02, "learning_rate": 0.00022073394495412843, "loss": 1.6338, "step": 1659 }, { "epoch": 1.02, "learning_rate": 0.00022066513761467886, "loss": 1.6653, "step": 1660 }, { "epoch": 1.03, "learning_rate": 0.00022059633027522933, "loss": 1.7996, "step": 1661 }, { "epoch": 1.03, "learning_rate": 0.0002205275229357798, "loss": 1.4886, "step": 1662 }, { "epoch": 1.03, "learning_rate": 0.00022045871559633025, "loss": 1.4918, "step": 1663 }, { "epoch": 1.03, "learning_rate": 0.0002203899082568807, "loss": 1.5135, "step": 1664 }, { "epoch": 1.03, "learning_rate": 0.00022032110091743117, "loss": 1.3269, "step": 1665 }, { "epoch": 1.03, "learning_rate": 0.00022025229357798163, "loss": 1.3267, "step": 1666 }, { "epoch": 1.03, "learning_rate": 0.0002201834862385321, "loss": 1.2449, "step": 1667 }, { "epoch": 1.03, "learning_rate": 0.00022011467889908255, "loss": 1.2959, "step": 1668 }, { "epoch": 1.03, "learning_rate": 0.00022004587155963302, "loss": 1.1309, "step": 1669 }, { "epoch": 1.03, "learning_rate": 0.00021997706422018345, "loss": 1.244, "step": 1670 }, { "epoch": 1.03, "learning_rate": 0.0002199082568807339, "loss": 3.6016, "step": 1671 }, { "epoch": 1.03, "learning_rate": 0.00021983944954128437, "loss": 3.0305, "step": 1672 }, { "epoch": 1.03, "learning_rate": 0.00021977064220183483, "loss": 2.8526, "step": 1673 }, { "epoch": 1.03, "learning_rate": 0.0002197018348623853, "loss": 2.6346, "step": 1674 }, { "epoch": 1.03, "learning_rate": 0.00021963302752293576, "loss": 2.5328, "step": 1675 }, { "epoch": 1.03, "learning_rate": 0.00021956422018348622, "loss": 2.3991, "step": 1676 }, { "epoch": 1.04, "learning_rate": 0.00021949541284403668, "loss": 2.2768, "step": 1677 }, { "epoch": 1.04, "learning_rate": 0.00021942660550458714, "loss": 2.3904, "step": 1678 }, { "epoch": 1.04, "learning_rate": 0.0002193577981651376, "loss": 2.3287, "step": 1679 }, { "epoch": 1.04, "learning_rate": 0.00021928899082568806, "loss": 2.1604, "step": 1680 }, { "epoch": 1.04, "learning_rate": 0.00021922018348623852, "loss": 2.0885, "step": 1681 }, { "epoch": 1.04, "learning_rate": 0.00021915137614678898, "loss": 2.2015, "step": 1682 }, { "epoch": 1.04, "learning_rate": 0.00021908256880733945, "loss": 2.1696, "step": 1683 }, { "epoch": 1.04, "learning_rate": 0.0002190137614678899, "loss": 2.2066, "step": 1684 }, { "epoch": 1.04, "learning_rate": 0.00021894495412844037, "loss": 2.1416, "step": 1685 }, { "epoch": 1.04, "learning_rate": 0.0002188761467889908, "loss": 2.0317, "step": 1686 }, { "epoch": 1.04, "learning_rate": 0.00021880733944954126, "loss": 2.0359, "step": 1687 }, { "epoch": 1.04, "learning_rate": 0.00021873853211009173, "loss": 2.1068, "step": 1688 }, { "epoch": 1.04, "learning_rate": 0.0002186697247706422, "loss": 2.0652, "step": 1689 }, { "epoch": 1.04, "learning_rate": 0.00021860091743119265, "loss": 2.0542, "step": 1690 }, { "epoch": 1.04, "learning_rate": 0.00021853211009174308, "loss": 2.0819, "step": 1691 }, { "epoch": 1.04, "learning_rate": 0.00021846330275229354, "loss": 1.9453, "step": 1692 }, { "epoch": 1.05, "learning_rate": 0.000218394495412844, "loss": 2.0384, "step": 1693 }, { "epoch": 1.05, "learning_rate": 0.00021832568807339447, "loss": 2.0665, "step": 1694 }, { "epoch": 1.05, "learning_rate": 0.00021825688073394493, "loss": 1.8885, "step": 1695 }, { "epoch": 1.05, "learning_rate": 0.0002181880733944954, "loss": 1.9032, "step": 1696 }, { "epoch": 1.05, "learning_rate": 0.00021811926605504585, "loss": 1.8915, "step": 1697 }, { "epoch": 1.05, "learning_rate": 0.0002180504587155963, "loss": 1.8936, "step": 1698 }, { "epoch": 1.05, "learning_rate": 0.00021798165137614677, "loss": 1.9411, "step": 1699 }, { "epoch": 1.05, "learning_rate": 0.00021791284403669723, "loss": 1.9526, "step": 1700 }, { "epoch": 1.05, "learning_rate": 0.0002178440366972477, "loss": 1.7593, "step": 1701 }, { "epoch": 1.05, "learning_rate": 0.00021777522935779813, "loss": 1.7568, "step": 1702 }, { "epoch": 1.05, "learning_rate": 0.0002177064220183486, "loss": 1.7143, "step": 1703 }, { "epoch": 1.05, "learning_rate": 0.00021763761467889905, "loss": 1.5789, "step": 1704 }, { "epoch": 1.05, "learning_rate": 0.0002175688073394495, "loss": 1.8545, "step": 1705 }, { "epoch": 1.05, "learning_rate": 0.00021749999999999997, "loss": 1.58, "step": 1706 }, { "epoch": 1.05, "learning_rate": 0.00021743119266055044, "loss": 1.6144, "step": 1707 }, { "epoch": 1.05, "learning_rate": 0.0002173623853211009, "loss": 1.6012, "step": 1708 }, { "epoch": 1.05, "learning_rate": 0.00021729357798165136, "loss": 1.615, "step": 1709 }, { "epoch": 1.06, "learning_rate": 0.00021722477064220182, "loss": 1.6088, "step": 1710 }, { "epoch": 1.06, "learning_rate": 0.00021715596330275228, "loss": 1.4713, "step": 1711 }, { "epoch": 1.06, "learning_rate": 0.00021708715596330274, "loss": 1.3805, "step": 1712 }, { "epoch": 1.06, "learning_rate": 0.0002170183486238532, "loss": 1.4281, "step": 1713 }, { "epoch": 1.06, "learning_rate": 0.00021694954128440366, "loss": 1.4774, "step": 1714 }, { "epoch": 1.06, "learning_rate": 0.00021688073394495413, "loss": 1.2971, "step": 1715 }, { "epoch": 1.06, "learning_rate": 0.0002168119266055046, "loss": 1.1743, "step": 1716 }, { "epoch": 1.06, "learning_rate": 0.00021674311926605505, "loss": 1.184, "step": 1717 }, { "epoch": 1.06, "learning_rate": 0.00021667431192660548, "loss": 1.3324, "step": 1718 }, { "epoch": 1.06, "learning_rate": 0.00021660550458715594, "loss": 1.16, "step": 1719 }, { "epoch": 1.06, "learning_rate": 0.0002165366972477064, "loss": 1.2817, "step": 1720 }, { "epoch": 1.06, "learning_rate": 0.00021646788990825687, "loss": 3.5558, "step": 1721 }, { "epoch": 1.06, "learning_rate": 0.0002163990825688073, "loss": 3.0307, "step": 1722 }, { "epoch": 1.06, "learning_rate": 0.00021633027522935776, "loss": 2.6262, "step": 1723 }, { "epoch": 1.06, "learning_rate": 0.00021626146788990822, "loss": 2.4631, "step": 1724 }, { "epoch": 1.06, "learning_rate": 0.00021619266055045868, "loss": 2.5842, "step": 1725 }, { "epoch": 1.07, "learning_rate": 0.00021612385321100915, "loss": 2.4398, "step": 1726 }, { "epoch": 1.07, "learning_rate": 0.0002160550458715596, "loss": 2.4056, "step": 1727 }, { "epoch": 1.07, "learning_rate": 0.00021598623853211007, "loss": 2.4079, "step": 1728 }, { "epoch": 1.07, "learning_rate": 0.00021591743119266053, "loss": 2.1906, "step": 1729 }, { "epoch": 1.07, "learning_rate": 0.000215848623853211, "loss": 2.2679, "step": 1730 }, { "epoch": 1.07, "learning_rate": 0.00021577981651376145, "loss": 1.9226, "step": 1731 }, { "epoch": 1.07, "learning_rate": 0.0002157110091743119, "loss": 2.0811, "step": 1732 }, { "epoch": 1.07, "learning_rate": 0.00021564220183486237, "loss": 2.2899, "step": 1733 }, { "epoch": 1.07, "learning_rate": 0.0002155733944954128, "loss": 2.0975, "step": 1734 }, { "epoch": 1.07, "learning_rate": 0.00021550458715596327, "loss": 2.0941, "step": 1735 }, { "epoch": 1.07, "learning_rate": 0.00021543577981651373, "loss": 1.97, "step": 1736 }, { "epoch": 1.07, "learning_rate": 0.0002153669724770642, "loss": 2.0479, "step": 1737 }, { "epoch": 1.07, "learning_rate": 0.00021529816513761465, "loss": 2.0814, "step": 1738 }, { "epoch": 1.07, "learning_rate": 0.00021522935779816511, "loss": 1.9018, "step": 1739 }, { "epoch": 1.07, "learning_rate": 0.00021516055045871558, "loss": 2.0003, "step": 1740 }, { "epoch": 1.07, "learning_rate": 0.00021509174311926604, "loss": 1.8782, "step": 1741 }, { "epoch": 1.08, "learning_rate": 0.0002150229357798165, "loss": 1.8627, "step": 1742 }, { "epoch": 1.08, "learning_rate": 0.00021495412844036696, "loss": 1.8832, "step": 1743 }, { "epoch": 1.08, "learning_rate": 0.00021488532110091742, "loss": 1.8778, "step": 1744 }, { "epoch": 1.08, "learning_rate": 0.00021481651376146788, "loss": 1.809, "step": 1745 }, { "epoch": 1.08, "learning_rate": 0.00021474770642201834, "loss": 1.705, "step": 1746 }, { "epoch": 1.08, "learning_rate": 0.0002146788990825688, "loss": 2.0336, "step": 1747 }, { "epoch": 1.08, "learning_rate": 0.00021461009174311927, "loss": 1.9158, "step": 1748 }, { "epoch": 1.08, "learning_rate": 0.00021454128440366973, "loss": 1.9045, "step": 1749 }, { "epoch": 1.08, "learning_rate": 0.00021447247706422016, "loss": 1.9342, "step": 1750 }, { "epoch": 1.08, "learning_rate": 0.00021440366972477062, "loss": 1.9411, "step": 1751 }, { "epoch": 1.08, "learning_rate": 0.00021433486238532108, "loss": 1.9156, "step": 1752 }, { "epoch": 1.08, "learning_rate": 0.00021426605504587155, "loss": 1.8337, "step": 1753 }, { "epoch": 1.08, "learning_rate": 0.00021419724770642198, "loss": 1.5281, "step": 1754 }, { "epoch": 1.08, "learning_rate": 0.00021412844036697244, "loss": 1.6437, "step": 1755 }, { "epoch": 1.08, "learning_rate": 0.0002140596330275229, "loss": 1.6482, "step": 1756 }, { "epoch": 1.08, "learning_rate": 0.00021399082568807336, "loss": 1.6374, "step": 1757 }, { "epoch": 1.09, "learning_rate": 0.00021392201834862382, "loss": 1.6254, "step": 1758 }, { "epoch": 1.09, "learning_rate": 0.00021385321100917429, "loss": 1.59, "step": 1759 }, { "epoch": 1.09, "learning_rate": 0.00021378440366972475, "loss": 1.5096, "step": 1760 }, { "epoch": 1.09, "learning_rate": 0.0002137155963302752, "loss": 1.5776, "step": 1761 }, { "epoch": 1.09, "learning_rate": 0.00021364678899082567, "loss": 1.5149, "step": 1762 }, { "epoch": 1.09, "learning_rate": 0.00021357798165137613, "loss": 1.5024, "step": 1763 }, { "epoch": 1.09, "learning_rate": 0.0002135091743119266, "loss": 1.4055, "step": 1764 }, { "epoch": 1.09, "learning_rate": 0.00021344036697247705, "loss": 1.2917, "step": 1765 }, { "epoch": 1.09, "learning_rate": 0.0002133715596330275, "loss": 1.4198, "step": 1766 }, { "epoch": 1.09, "learning_rate": 0.00021330275229357795, "loss": 1.4071, "step": 1767 }, { "epoch": 1.09, "learning_rate": 0.0002132339449541284, "loss": 1.2486, "step": 1768 }, { "epoch": 1.09, "learning_rate": 0.00021316513761467887, "loss": 1.1579, "step": 1769 }, { "epoch": 1.09, "learning_rate": 0.00021309633027522933, "loss": 1.1932, "step": 1770 }, { "epoch": 1.09, "learning_rate": 0.0002130275229357798, "loss": 3.7279, "step": 1771 }, { "epoch": 1.09, "learning_rate": 0.00021295871559633026, "loss": 3.1071, "step": 1772 }, { "epoch": 1.09, "learning_rate": 0.00021288990825688072, "loss": 2.5656, "step": 1773 }, { "epoch": 1.1, "learning_rate": 0.00021282110091743118, "loss": 2.5555, "step": 1774 }, { "epoch": 1.1, "learning_rate": 0.00021275229357798164, "loss": 2.4096, "step": 1775 }, { "epoch": 1.1, "learning_rate": 0.0002126834862385321, "loss": 2.443, "step": 1776 }, { "epoch": 1.1, "learning_rate": 0.00021261467889908256, "loss": 2.247, "step": 1777 }, { "epoch": 1.1, "learning_rate": 0.00021254587155963302, "loss": 2.4117, "step": 1778 }, { "epoch": 1.1, "learning_rate": 0.00021247706422018348, "loss": 2.2131, "step": 1779 }, { "epoch": 1.1, "learning_rate": 0.00021240825688073395, "loss": 2.2154, "step": 1780 }, { "epoch": 1.1, "learning_rate": 0.0002123394495412844, "loss": 2.1035, "step": 1781 }, { "epoch": 1.1, "learning_rate": 0.00021227064220183484, "loss": 2.099, "step": 1782 }, { "epoch": 1.1, "learning_rate": 0.0002122018348623853, "loss": 1.9619, "step": 1783 }, { "epoch": 1.1, "learning_rate": 0.00021213302752293576, "loss": 2.0422, "step": 1784 }, { "epoch": 1.1, "learning_rate": 0.0002120642201834862, "loss": 1.9492, "step": 1785 }, { "epoch": 1.1, "learning_rate": 0.00021199541284403666, "loss": 2.013, "step": 1786 }, { "epoch": 1.1, "learning_rate": 0.00021192660550458712, "loss": 2.0643, "step": 1787 }, { "epoch": 1.1, "learning_rate": 0.00021185779816513758, "loss": 2.1128, "step": 1788 }, { "epoch": 1.1, "learning_rate": 0.00021178899082568804, "loss": 2.069, "step": 1789 }, { "epoch": 1.1, "learning_rate": 0.0002117201834862385, "loss": 2.1215, "step": 1790 }, { "epoch": 1.11, "learning_rate": 0.00021165137614678897, "loss": 1.9409, "step": 1791 }, { "epoch": 1.11, "learning_rate": 0.00021158256880733943, "loss": 1.8557, "step": 1792 }, { "epoch": 1.11, "learning_rate": 0.0002115137614678899, "loss": 1.7989, "step": 1793 }, { "epoch": 1.11, "learning_rate": 0.00021144495412844035, "loss": 1.9805, "step": 1794 }, { "epoch": 1.11, "learning_rate": 0.0002113761467889908, "loss": 1.8681, "step": 1795 }, { "epoch": 1.11, "learning_rate": 0.00021130733944954127, "loss": 1.8311, "step": 1796 }, { "epoch": 1.11, "learning_rate": 0.00021123853211009173, "loss": 1.8996, "step": 1797 }, { "epoch": 1.11, "learning_rate": 0.00021116972477064217, "loss": 1.9109, "step": 1798 }, { "epoch": 1.11, "learning_rate": 0.00021110091743119263, "loss": 1.8419, "step": 1799 }, { "epoch": 1.11, "learning_rate": 0.0002110321100917431, "loss": 1.7898, "step": 1800 }, { "epoch": 1.11, "learning_rate": 0.00021096330275229355, "loss": 1.6416, "step": 1801 }, { "epoch": 1.11, "learning_rate": 0.000210894495412844, "loss": 1.8661, "step": 1802 }, { "epoch": 1.11, "learning_rate": 0.00021082568807339447, "loss": 1.7898, "step": 1803 }, { "epoch": 1.11, "learning_rate": 0.00021075688073394493, "loss": 1.5966, "step": 1804 }, { "epoch": 1.11, "learning_rate": 0.0002106880733944954, "loss": 1.6136, "step": 1805 }, { "epoch": 1.11, "learning_rate": 0.00021061926605504586, "loss": 1.7709, "step": 1806 }, { "epoch": 1.12, "learning_rate": 0.00021055045871559632, "loss": 1.6369, "step": 1807 }, { "epoch": 1.12, "learning_rate": 0.00021048165137614678, "loss": 1.6701, "step": 1808 }, { "epoch": 1.12, "learning_rate": 0.00021041284403669724, "loss": 1.5804, "step": 1809 }, { "epoch": 1.12, "learning_rate": 0.0002103440366972477, "loss": 1.5023, "step": 1810 }, { "epoch": 1.12, "learning_rate": 0.00021027522935779816, "loss": 1.6007, "step": 1811 }, { "epoch": 1.12, "learning_rate": 0.00021020642201834862, "loss": 1.5211, "step": 1812 }, { "epoch": 1.12, "learning_rate": 0.00021013761467889909, "loss": 1.3998, "step": 1813 }, { "epoch": 1.12, "learning_rate": 0.00021006880733944952, "loss": 1.3186, "step": 1814 }, { "epoch": 1.12, "learning_rate": 0.00020999999999999998, "loss": 1.3886, "step": 1815 }, { "epoch": 1.12, "learning_rate": 0.00020993119266055042, "loss": 1.2077, "step": 1816 }, { "epoch": 1.12, "learning_rate": 0.00020986238532110088, "loss": 1.1515, "step": 1817 }, { "epoch": 1.12, "learning_rate": 0.00020979357798165134, "loss": 1.1892, "step": 1818 }, { "epoch": 1.12, "learning_rate": 0.0002097247706422018, "loss": 1.1482, "step": 1819 }, { "epoch": 1.12, "learning_rate": 0.00020965596330275226, "loss": 1.0993, "step": 1820 }, { "epoch": 1.12, "learning_rate": 0.00020958715596330272, "loss": 3.5628, "step": 1821 }, { "epoch": 1.12, "learning_rate": 0.00020951834862385318, "loss": 3.0245, "step": 1822 }, { "epoch": 1.13, "learning_rate": 0.00020944954128440364, "loss": 2.3985, "step": 1823 }, { "epoch": 1.13, "learning_rate": 0.0002093807339449541, "loss": 2.2958, "step": 1824 }, { "epoch": 1.13, "learning_rate": 0.00020931192660550457, "loss": 2.4073, "step": 1825 }, { "epoch": 1.13, "learning_rate": 0.00020924311926605503, "loss": 2.2435, "step": 1826 }, { "epoch": 1.13, "learning_rate": 0.0002091743119266055, "loss": 2.4422, "step": 1827 }, { "epoch": 1.13, "learning_rate": 0.00020910550458715595, "loss": 2.2574, "step": 1828 }, { "epoch": 1.13, "learning_rate": 0.0002090366972477064, "loss": 2.271, "step": 1829 }, { "epoch": 1.13, "learning_rate": 0.00020896788990825685, "loss": 2.1299, "step": 1830 }, { "epoch": 1.13, "learning_rate": 0.0002088990825688073, "loss": 2.2288, "step": 1831 }, { "epoch": 1.13, "learning_rate": 0.00020883027522935777, "loss": 1.9979, "step": 1832 }, { "epoch": 1.13, "learning_rate": 0.00020876146788990823, "loss": 2.3091, "step": 1833 }, { "epoch": 1.13, "learning_rate": 0.0002086926605504587, "loss": 1.9847, "step": 1834 }, { "epoch": 1.13, "learning_rate": 0.00020862385321100915, "loss": 2.1384, "step": 1835 }, { "epoch": 1.13, "learning_rate": 0.00020855504587155961, "loss": 2.0983, "step": 1836 }, { "epoch": 1.13, "learning_rate": 0.00020848623853211008, "loss": 2.1569, "step": 1837 }, { "epoch": 1.13, "learning_rate": 0.00020841743119266054, "loss": 2.1108, "step": 1838 }, { "epoch": 1.14, "learning_rate": 0.000208348623853211, "loss": 1.9124, "step": 1839 }, { "epoch": 1.14, "learning_rate": 0.00020827981651376146, "loss": 1.9459, "step": 1840 }, { "epoch": 1.14, "learning_rate": 0.00020821100917431192, "loss": 1.995, "step": 1841 }, { "epoch": 1.14, "learning_rate": 0.00020814220183486238, "loss": 1.8326, "step": 1842 }, { "epoch": 1.14, "learning_rate": 0.00020807339449541284, "loss": 1.9383, "step": 1843 }, { "epoch": 1.14, "learning_rate": 0.0002080045871559633, "loss": 1.8695, "step": 1844 }, { "epoch": 1.14, "learning_rate": 0.00020793577981651377, "loss": 1.8381, "step": 1845 }, { "epoch": 1.14, "learning_rate": 0.0002078669724770642, "loss": 1.7778, "step": 1846 }, { "epoch": 1.14, "learning_rate": 0.00020779816513761463, "loss": 1.9175, "step": 1847 }, { "epoch": 1.14, "learning_rate": 0.0002077293577981651, "loss": 1.8504, "step": 1848 }, { "epoch": 1.14, "learning_rate": 0.00020766055045871556, "loss": 1.7752, "step": 1849 }, { "epoch": 1.14, "learning_rate": 0.00020759174311926602, "loss": 1.5618, "step": 1850 }, { "epoch": 1.14, "learning_rate": 0.00020752293577981648, "loss": 1.7225, "step": 1851 }, { "epoch": 1.14, "learning_rate": 0.00020745412844036694, "loss": 1.8193, "step": 1852 }, { "epoch": 1.14, "learning_rate": 0.0002073853211009174, "loss": 1.9269, "step": 1853 }, { "epoch": 1.14, "learning_rate": 0.00020731651376146786, "loss": 1.8738, "step": 1854 }, { "epoch": 1.15, "learning_rate": 0.00020724770642201832, "loss": 1.7606, "step": 1855 }, { "epoch": 1.15, "learning_rate": 0.00020717889908256879, "loss": 1.7587, "step": 1856 }, { "epoch": 1.15, "learning_rate": 0.00020711009174311925, "loss": 1.5626, "step": 1857 }, { "epoch": 1.15, "learning_rate": 0.0002070412844036697, "loss": 1.521, "step": 1858 }, { "epoch": 1.15, "learning_rate": 0.00020697247706422017, "loss": 1.4554, "step": 1859 }, { "epoch": 1.15, "learning_rate": 0.00020690366972477063, "loss": 1.5262, "step": 1860 }, { "epoch": 1.15, "learning_rate": 0.0002068348623853211, "loss": 1.4124, "step": 1861 }, { "epoch": 1.15, "learning_rate": 0.00020676605504587153, "loss": 1.5763, "step": 1862 }, { "epoch": 1.15, "learning_rate": 0.000206697247706422, "loss": 1.3704, "step": 1863 }, { "epoch": 1.15, "learning_rate": 0.00020662844036697245, "loss": 1.3973, "step": 1864 }, { "epoch": 1.15, "learning_rate": 0.0002065596330275229, "loss": 1.4078, "step": 1865 }, { "epoch": 1.15, "learning_rate": 0.00020649082568807337, "loss": 1.2323, "step": 1866 }, { "epoch": 1.15, "learning_rate": 0.00020642201834862383, "loss": 1.3553, "step": 1867 }, { "epoch": 1.15, "learning_rate": 0.0002063532110091743, "loss": 1.159, "step": 1868 }, { "epoch": 1.15, "learning_rate": 0.00020628440366972475, "loss": 1.1942, "step": 1869 }, { "epoch": 1.15, "learning_rate": 0.00020621559633027522, "loss": 1.0791, "step": 1870 }, { "epoch": 1.15, "learning_rate": 0.00020614678899082568, "loss": 3.2909, "step": 1871 }, { "epoch": 1.16, "learning_rate": 0.00020607798165137614, "loss": 2.9312, "step": 1872 }, { "epoch": 1.16, "learning_rate": 0.0002060091743119266, "loss": 2.5817, "step": 1873 }, { "epoch": 1.16, "learning_rate": 0.00020594036697247706, "loss": 2.2504, "step": 1874 }, { "epoch": 1.16, "learning_rate": 0.00020587155963302752, "loss": 2.3404, "step": 1875 }, { "epoch": 1.16, "learning_rate": 0.00020580275229357798, "loss": 2.4065, "step": 1876 }, { "epoch": 1.16, "learning_rate": 0.00020573394495412844, "loss": 2.178, "step": 1877 }, { "epoch": 1.16, "learning_rate": 0.00020566513761467885, "loss": 2.1733, "step": 1878 }, { "epoch": 1.16, "learning_rate": 0.0002055963302752293, "loss": 2.3174, "step": 1879 }, { "epoch": 1.16, "learning_rate": 0.00020552752293577977, "loss": 2.1098, "step": 1880 }, { "epoch": 1.16, "learning_rate": 0.00020545871559633024, "loss": 2.2311, "step": 1881 }, { "epoch": 1.16, "learning_rate": 0.0002053899082568807, "loss": 2.0851, "step": 1882 }, { "epoch": 1.16, "learning_rate": 0.00020532110091743116, "loss": 2.0471, "step": 1883 }, { "epoch": 1.16, "learning_rate": 0.00020525229357798162, "loss": 2.0604, "step": 1884 }, { "epoch": 1.16, "learning_rate": 0.00020518348623853208, "loss": 2.0673, "step": 1885 }, { "epoch": 1.16, "learning_rate": 0.00020511467889908254, "loss": 1.9443, "step": 1886 }, { "epoch": 1.16, "learning_rate": 0.000205045871559633, "loss": 1.9506, "step": 1887 }, { "epoch": 1.17, "learning_rate": 0.00020497706422018346, "loss": 2.0262, "step": 1888 }, { "epoch": 1.17, "learning_rate": 0.00020490825688073393, "loss": 1.9836, "step": 1889 }, { "epoch": 1.17, "learning_rate": 0.0002048394495412844, "loss": 2.0778, "step": 1890 }, { "epoch": 1.17, "learning_rate": 0.00020477064220183485, "loss": 1.9802, "step": 1891 }, { "epoch": 1.17, "learning_rate": 0.0002047018348623853, "loss": 1.8413, "step": 1892 }, { "epoch": 1.17, "learning_rate": 0.00020463302752293577, "loss": 1.9037, "step": 1893 }, { "epoch": 1.17, "learning_rate": 0.0002045642201834862, "loss": 1.8697, "step": 1894 }, { "epoch": 1.17, "learning_rate": 0.00020449541284403667, "loss": 1.8834, "step": 1895 }, { "epoch": 1.17, "learning_rate": 0.00020442660550458713, "loss": 1.7303, "step": 1896 }, { "epoch": 1.17, "learning_rate": 0.0002043577981651376, "loss": 1.861, "step": 1897 }, { "epoch": 1.17, "learning_rate": 0.00020428899082568805, "loss": 1.8614, "step": 1898 }, { "epoch": 1.17, "learning_rate": 0.0002042201834862385, "loss": 1.772, "step": 1899 }, { "epoch": 1.17, "learning_rate": 0.00020415137614678897, "loss": 1.8243, "step": 1900 }, { "epoch": 1.17, "learning_rate": 0.00020408256880733943, "loss": 1.695, "step": 1901 }, { "epoch": 1.17, "learning_rate": 0.0002040137614678899, "loss": 1.6338, "step": 1902 }, { "epoch": 1.17, "learning_rate": 0.00020394495412844036, "loss": 1.8594, "step": 1903 }, { "epoch": 1.18, "learning_rate": 0.00020387614678899082, "loss": 1.6646, "step": 1904 }, { "epoch": 1.18, "learning_rate": 0.00020380733944954128, "loss": 1.5384, "step": 1905 }, { "epoch": 1.18, "learning_rate": 0.00020373853211009174, "loss": 1.6177, "step": 1906 }, { "epoch": 1.18, "learning_rate": 0.0002036697247706422, "loss": 1.5614, "step": 1907 }, { "epoch": 1.18, "learning_rate": 0.00020360091743119266, "loss": 1.5878, "step": 1908 }, { "epoch": 1.18, "learning_rate": 0.00020353211009174312, "loss": 1.5746, "step": 1909 }, { "epoch": 1.18, "learning_rate": 0.00020346330275229353, "loss": 1.5444, "step": 1910 }, { "epoch": 1.18, "learning_rate": 0.000203394495412844, "loss": 1.5805, "step": 1911 }, { "epoch": 1.18, "learning_rate": 0.00020332568807339445, "loss": 1.5324, "step": 1912 }, { "epoch": 1.18, "learning_rate": 0.00020325688073394492, "loss": 1.3703, "step": 1913 }, { "epoch": 1.18, "learning_rate": 0.00020318807339449538, "loss": 1.2171, "step": 1914 }, { "epoch": 1.18, "learning_rate": 0.00020311926605504584, "loss": 1.3561, "step": 1915 }, { "epoch": 1.18, "learning_rate": 0.0002030504587155963, "loss": 1.2731, "step": 1916 }, { "epoch": 1.18, "learning_rate": 0.00020298165137614676, "loss": 1.1354, "step": 1917 }, { "epoch": 1.18, "learning_rate": 0.00020291284403669722, "loss": 1.2504, "step": 1918 }, { "epoch": 1.18, "learning_rate": 0.00020284403669724768, "loss": 1.1603, "step": 1919 }, { "epoch": 1.19, "learning_rate": 0.00020277522935779814, "loss": 1.0721, "step": 1920 }, { "epoch": 1.19, "learning_rate": 0.0002027064220183486, "loss": 3.2151, "step": 1921 }, { "epoch": 1.19, "learning_rate": 0.00020263761467889907, "loss": 2.8365, "step": 1922 }, { "epoch": 1.19, "learning_rate": 0.00020256880733944953, "loss": 2.3569, "step": 1923 }, { "epoch": 1.19, "learning_rate": 0.0002025, "loss": 2.439, "step": 1924 }, { "epoch": 1.19, "learning_rate": 0.00020243119266055045, "loss": 2.2511, "step": 1925 }, { "epoch": 1.19, "learning_rate": 0.00020236238532110088, "loss": 2.1246, "step": 1926 }, { "epoch": 1.19, "learning_rate": 0.00020229357798165135, "loss": 2.1318, "step": 1927 }, { "epoch": 1.19, "learning_rate": 0.0002022247706422018, "loss": 2.1898, "step": 1928 }, { "epoch": 1.19, "learning_rate": 0.00020215596330275227, "loss": 2.0258, "step": 1929 }, { "epoch": 1.19, "learning_rate": 0.00020208715596330273, "loss": 2.0232, "step": 1930 }, { "epoch": 1.19, "learning_rate": 0.0002020183486238532, "loss": 2.1099, "step": 1931 }, { "epoch": 1.19, "learning_rate": 0.00020194954128440365, "loss": 2.1024, "step": 1932 }, { "epoch": 1.19, "learning_rate": 0.0002018807339449541, "loss": 1.9652, "step": 1933 }, { "epoch": 1.19, "learning_rate": 0.00020181192660550457, "loss": 1.7831, "step": 1934 }, { "epoch": 1.19, "learning_rate": 0.00020174311926605504, "loss": 1.9607, "step": 1935 }, { "epoch": 1.2, "learning_rate": 0.0002016743119266055, "loss": 1.9349, "step": 1936 }, { "epoch": 1.2, "learning_rate": 0.00020160550458715596, "loss": 1.9872, "step": 1937 }, { "epoch": 1.2, "learning_rate": 0.00020153669724770642, "loss": 1.8682, "step": 1938 }, { "epoch": 1.2, "learning_rate": 0.00020146788990825688, "loss": 1.8741, "step": 1939 }, { "epoch": 1.2, "learning_rate": 0.00020139908256880734, "loss": 1.9381, "step": 1940 }, { "epoch": 1.2, "learning_rate": 0.0002013302752293578, "loss": 1.8689, "step": 1941 }, { "epoch": 1.2, "learning_rate": 0.0002012614678899082, "loss": 1.8468, "step": 1942 }, { "epoch": 1.2, "learning_rate": 0.00020119266055045867, "loss": 1.8137, "step": 1943 }, { "epoch": 1.2, "learning_rate": 0.00020112385321100913, "loss": 1.6493, "step": 1944 }, { "epoch": 1.2, "learning_rate": 0.0002010550458715596, "loss": 1.7694, "step": 1945 }, { "epoch": 1.2, "learning_rate": 0.00020098623853211006, "loss": 1.7595, "step": 1946 }, { "epoch": 1.2, "learning_rate": 0.00020091743119266052, "loss": 1.7865, "step": 1947 }, { "epoch": 1.2, "learning_rate": 0.00020084862385321098, "loss": 1.9081, "step": 1948 }, { "epoch": 1.2, "learning_rate": 0.00020077981651376144, "loss": 1.7594, "step": 1949 }, { "epoch": 1.2, "learning_rate": 0.0002007110091743119, "loss": 1.6857, "step": 1950 }, { "epoch": 1.2, "learning_rate": 0.00020064220183486236, "loss": 1.7442, "step": 1951 }, { "epoch": 1.2, "learning_rate": 0.00020057339449541282, "loss": 1.6748, "step": 1952 }, { "epoch": 1.21, "learning_rate": 0.00020050458715596328, "loss": 1.6105, "step": 1953 }, { "epoch": 1.21, "learning_rate": 0.00020043577981651375, "loss": 1.6602, "step": 1954 }, { "epoch": 1.21, "learning_rate": 0.0002003669724770642, "loss": 1.6126, "step": 1955 }, { "epoch": 1.21, "learning_rate": 0.00020029816513761467, "loss": 1.639, "step": 1956 }, { "epoch": 1.21, "learning_rate": 0.00020022935779816513, "loss": 1.509, "step": 1957 }, { "epoch": 1.21, "learning_rate": 0.00020016055045871556, "loss": 1.6552, "step": 1958 }, { "epoch": 1.21, "learning_rate": 0.00020009174311926603, "loss": 1.6348, "step": 1959 }, { "epoch": 1.21, "learning_rate": 0.00020002293577981649, "loss": 1.4605, "step": 1960 }, { "epoch": 1.21, "learning_rate": 0.00019995412844036695, "loss": 1.3822, "step": 1961 }, { "epoch": 1.21, "learning_rate": 0.0001998853211009174, "loss": 1.3392, "step": 1962 }, { "epoch": 1.21, "learning_rate": 0.00019981651376146787, "loss": 1.4034, "step": 1963 }, { "epoch": 1.21, "learning_rate": 0.00019974770642201833, "loss": 1.2861, "step": 1964 }, { "epoch": 1.21, "learning_rate": 0.0001996788990825688, "loss": 1.2665, "step": 1965 }, { "epoch": 1.21, "learning_rate": 0.00019961009174311925, "loss": 1.2643, "step": 1966 }, { "epoch": 1.21, "learning_rate": 0.00019954128440366972, "loss": 1.1004, "step": 1967 }, { "epoch": 1.21, "learning_rate": 0.00019947247706422018, "loss": 1.0879, "step": 1968 }, { "epoch": 1.22, "learning_rate": 0.00019940366972477064, "loss": 1.0452, "step": 1969 }, { "epoch": 1.22, "learning_rate": 0.0001993348623853211, "loss": 1.0075, "step": 1970 }, { "epoch": 1.22, "learning_rate": 0.00019926605504587156, "loss": 3.1939, "step": 1971 }, { "epoch": 1.22, "learning_rate": 0.00019919724770642202, "loss": 2.6152, "step": 1972 }, { "epoch": 1.22, "learning_rate": 0.00019912844036697248, "loss": 2.4699, "step": 1973 }, { "epoch": 1.22, "learning_rate": 0.0001990596330275229, "loss": 2.4007, "step": 1974 }, { "epoch": 1.22, "learning_rate": 0.00019899082568807335, "loss": 2.1458, "step": 1975 }, { "epoch": 1.22, "learning_rate": 0.0001989220183486238, "loss": 2.3048, "step": 1976 }, { "epoch": 1.22, "learning_rate": 0.00019885321100917427, "loss": 2.1876, "step": 1977 }, { "epoch": 1.22, "learning_rate": 0.00019878440366972474, "loss": 2.0, "step": 1978 }, { "epoch": 1.22, "learning_rate": 0.0001987155963302752, "loss": 1.9698, "step": 1979 }, { "epoch": 1.22, "learning_rate": 0.00019864678899082566, "loss": 2.104, "step": 1980 }, { "epoch": 1.22, "learning_rate": 0.00019857798165137612, "loss": 2.1339, "step": 1981 }, { "epoch": 1.22, "learning_rate": 0.00019850917431192658, "loss": 2.0213, "step": 1982 }, { "epoch": 1.22, "learning_rate": 0.00019844036697247704, "loss": 1.992, "step": 1983 }, { "epoch": 1.22, "learning_rate": 0.0001983715596330275, "loss": 2.2249, "step": 1984 }, { "epoch": 1.23, "learning_rate": 0.00019830275229357796, "loss": 2.0143, "step": 1985 }, { "epoch": 1.23, "learning_rate": 0.00019823394495412843, "loss": 1.853, "step": 1986 }, { "epoch": 1.23, "learning_rate": 0.00019816513761467889, "loss": 2.0188, "step": 1987 }, { "epoch": 1.23, "learning_rate": 0.00019809633027522935, "loss": 1.884, "step": 1988 }, { "epoch": 1.23, "learning_rate": 0.0001980275229357798, "loss": 1.9637, "step": 1989 }, { "epoch": 1.23, "learning_rate": 0.00019795871559633024, "loss": 1.77, "step": 1990 }, { "epoch": 1.23, "learning_rate": 0.0001978899082568807, "loss": 1.8534, "step": 1991 }, { "epoch": 1.23, "learning_rate": 0.00019782110091743117, "loss": 1.9073, "step": 1992 }, { "epoch": 1.23, "learning_rate": 0.00019775229357798163, "loss": 1.8242, "step": 1993 }, { "epoch": 1.23, "learning_rate": 0.0001976834862385321, "loss": 1.9404, "step": 1994 }, { "epoch": 1.23, "learning_rate": 0.00019761467889908255, "loss": 1.6926, "step": 1995 }, { "epoch": 1.23, "learning_rate": 0.000197545871559633, "loss": 1.8429, "step": 1996 }, { "epoch": 1.23, "learning_rate": 0.00019747706422018347, "loss": 1.8468, "step": 1997 }, { "epoch": 1.23, "learning_rate": 0.00019740825688073393, "loss": 1.7018, "step": 1998 }, { "epoch": 1.23, "learning_rate": 0.0001973394495412844, "loss": 1.7915, "step": 1999 }, { "epoch": 1.23, "learning_rate": 0.00019727064220183486, "loss": 1.6507, "step": 2000 }, { "epoch": 1.23, "eval_bleu": 1.7162192922202172e-16, "eval_loss": 2.2250263690948486, "eval_runtime": 2534.2548, "eval_samples_per_second": 5.824, "eval_steps_per_second": 0.728, "step": 2000 }, { "epoch": 1.24, "learning_rate": 0.00019720183486238532, "loss": 1.709, "step": 2001 }, { "epoch": 1.24, "learning_rate": 0.00019713302752293578, "loss": 1.6782, "step": 2002 }, { "epoch": 1.24, "learning_rate": 0.00019706422018348624, "loss": 1.5424, "step": 2003 }, { "epoch": 1.24, "learning_rate": 0.0001969954128440367, "loss": 1.6469, "step": 2004 }, { "epoch": 1.24, "learning_rate": 0.00019692660550458716, "loss": 1.8118, "step": 2005 }, { "epoch": 1.24, "learning_rate": 0.00019685779816513757, "loss": 1.5385, "step": 2006 }, { "epoch": 1.24, "learning_rate": 0.00019678899082568803, "loss": 1.4475, "step": 2007 }, { "epoch": 1.24, "learning_rate": 0.0001967201834862385, "loss": 1.6445, "step": 2008 }, { "epoch": 1.24, "learning_rate": 0.00019665137614678895, "loss": 1.4671, "step": 2009 }, { "epoch": 1.24, "learning_rate": 0.00019658256880733941, "loss": 1.5471, "step": 2010 }, { "epoch": 1.24, "learning_rate": 0.00019651376146788988, "loss": 1.532, "step": 2011 }, { "epoch": 1.24, "learning_rate": 0.00019644495412844034, "loss": 1.4689, "step": 2012 }, { "epoch": 1.24, "learning_rate": 0.0001963761467889908, "loss": 1.4248, "step": 2013 }, { "epoch": 1.24, "learning_rate": 0.00019630733944954126, "loss": 1.362, "step": 2014 }, { "epoch": 1.24, "learning_rate": 0.00019623853211009172, "loss": 1.3883, "step": 2015 }, { "epoch": 1.24, "learning_rate": 0.00019616972477064218, "loss": 1.2565, "step": 2016 }, { "epoch": 1.25, "learning_rate": 0.00019610091743119264, "loss": 1.2136, "step": 2017 }, { "epoch": 1.25, "learning_rate": 0.0001960321100917431, "loss": 1.1219, "step": 2018 }, { "epoch": 1.25, "learning_rate": 0.00019596330275229357, "loss": 1.0602, "step": 2019 }, { "epoch": 1.25, "learning_rate": 0.00019589449541284403, "loss": 0.9342, "step": 2020 }, { "epoch": 1.25, "learning_rate": 0.0001958256880733945, "loss": 3.1753, "step": 2021 }, { "epoch": 1.25, "learning_rate": 0.00019575688073394492, "loss": 2.5807, "step": 2022 }, { "epoch": 1.25, "learning_rate": 0.00019568807339449538, "loss": 2.4483, "step": 2023 }, { "epoch": 1.25, "learning_rate": 0.00019561926605504585, "loss": 2.1783, "step": 2024 }, { "epoch": 1.25, "learning_rate": 0.0001955504587155963, "loss": 2.2004, "step": 2025 }, { "epoch": 1.25, "learning_rate": 0.00019548165137614677, "loss": 2.2108, "step": 2026 }, { "epoch": 1.25, "learning_rate": 0.00019541284403669723, "loss": 2.2816, "step": 2027 }, { "epoch": 1.25, "learning_rate": 0.0001953440366972477, "loss": 2.0895, "step": 2028 }, { "epoch": 1.25, "learning_rate": 0.00019527522935779815, "loss": 2.143, "step": 2029 }, { "epoch": 1.25, "learning_rate": 0.0001952064220183486, "loss": 2.005, "step": 2030 }, { "epoch": 1.25, "learning_rate": 0.00019513761467889907, "loss": 1.9381, "step": 2031 }, { "epoch": 1.25, "learning_rate": 0.00019506880733944954, "loss": 1.8762, "step": 2032 }, { "epoch": 1.25, "learning_rate": 0.000195, "loss": 2.1054, "step": 2033 }, { "epoch": 1.26, "learning_rate": 0.00019493119266055046, "loss": 1.8601, "step": 2034 }, { "epoch": 1.26, "learning_rate": 0.00019486238532110092, "loss": 1.9703, "step": 2035 }, { "epoch": 1.26, "learning_rate": 0.00019479357798165138, "loss": 2.0773, "step": 2036 }, { "epoch": 1.26, "learning_rate": 0.00019472477064220184, "loss": 1.8942, "step": 2037 }, { "epoch": 1.26, "learning_rate": 0.00019465596330275225, "loss": 1.9202, "step": 2038 }, { "epoch": 1.26, "learning_rate": 0.0001945871559633027, "loss": 1.7541, "step": 2039 }, { "epoch": 1.26, "learning_rate": 0.00019451834862385317, "loss": 1.7267, "step": 2040 }, { "epoch": 1.26, "learning_rate": 0.00019444954128440363, "loss": 1.8747, "step": 2041 }, { "epoch": 1.26, "learning_rate": 0.0001943807339449541, "loss": 2.0165, "step": 2042 }, { "epoch": 1.26, "learning_rate": 0.00019431192660550455, "loss": 1.8279, "step": 2043 }, { "epoch": 1.26, "learning_rate": 0.00019424311926605502, "loss": 1.7813, "step": 2044 }, { "epoch": 1.26, "learning_rate": 0.00019417431192660548, "loss": 1.7252, "step": 2045 }, { "epoch": 1.26, "learning_rate": 0.00019410550458715594, "loss": 1.6401, "step": 2046 }, { "epoch": 1.26, "learning_rate": 0.0001940366972477064, "loss": 1.6962, "step": 2047 }, { "epoch": 1.26, "learning_rate": 0.00019396788990825686, "loss": 1.7297, "step": 2048 }, { "epoch": 1.26, "learning_rate": 0.00019389908256880732, "loss": 1.738, "step": 2049 }, { "epoch": 1.27, "learning_rate": 0.00019383027522935778, "loss": 1.7066, "step": 2050 }, { "epoch": 1.27, "learning_rate": 0.00019376146788990825, "loss": 1.6057, "step": 2051 }, { "epoch": 1.27, "learning_rate": 0.0001936926605504587, "loss": 1.7023, "step": 2052 }, { "epoch": 1.27, "learning_rate": 0.00019362385321100917, "loss": 1.528, "step": 2053 }, { "epoch": 1.27, "learning_rate": 0.0001935550458715596, "loss": 1.5115, "step": 2054 }, { "epoch": 1.27, "learning_rate": 0.00019348623853211006, "loss": 1.4674, "step": 2055 }, { "epoch": 1.27, "learning_rate": 0.00019341743119266052, "loss": 1.3539, "step": 2056 }, { "epoch": 1.27, "learning_rate": 0.00019334862385321099, "loss": 1.6316, "step": 2057 }, { "epoch": 1.27, "learning_rate": 0.00019327981651376145, "loss": 1.7101, "step": 2058 }, { "epoch": 1.27, "learning_rate": 0.0001932110091743119, "loss": 1.4022, "step": 2059 }, { "epoch": 1.27, "learning_rate": 0.00019314220183486237, "loss": 1.5265, "step": 2060 }, { "epoch": 1.27, "learning_rate": 0.00019307339449541283, "loss": 1.3397, "step": 2061 }, { "epoch": 1.27, "learning_rate": 0.0001930045871559633, "loss": 1.4825, "step": 2062 }, { "epoch": 1.27, "learning_rate": 0.00019293577981651375, "loss": 1.3488, "step": 2063 }, { "epoch": 1.27, "learning_rate": 0.00019286697247706421, "loss": 1.3008, "step": 2064 }, { "epoch": 1.27, "learning_rate": 0.00019279816513761468, "loss": 1.227, "step": 2065 }, { "epoch": 1.28, "learning_rate": 0.00019272935779816514, "loss": 1.1107, "step": 2066 }, { "epoch": 1.28, "learning_rate": 0.0001926605504587156, "loss": 1.061, "step": 2067 }, { "epoch": 1.28, "learning_rate": 0.00019259174311926606, "loss": 1.1078, "step": 2068 }, { "epoch": 1.28, "learning_rate": 0.00019252293577981652, "loss": 0.998, "step": 2069 }, { "epoch": 1.28, "learning_rate": 0.00019245412844036693, "loss": 0.9788, "step": 2070 }, { "epoch": 1.28, "learning_rate": 0.0001923853211009174, "loss": 3.155, "step": 2071 }, { "epoch": 1.28, "learning_rate": 0.00019231651376146785, "loss": 2.6992, "step": 2072 }, { "epoch": 1.28, "learning_rate": 0.0001922477064220183, "loss": 2.3502, "step": 2073 }, { "epoch": 1.28, "learning_rate": 0.00019217889908256877, "loss": 2.2841, "step": 2074 }, { "epoch": 1.28, "learning_rate": 0.00019211009174311923, "loss": 2.0852, "step": 2075 }, { "epoch": 1.28, "learning_rate": 0.0001920412844036697, "loss": 2.0464, "step": 2076 }, { "epoch": 1.28, "learning_rate": 0.00019197247706422016, "loss": 2.1334, "step": 2077 }, { "epoch": 1.28, "learning_rate": 0.00019190366972477062, "loss": 2.1189, "step": 2078 }, { "epoch": 1.28, "learning_rate": 0.00019183486238532108, "loss": 2.1277, "step": 2079 }, { "epoch": 1.28, "learning_rate": 0.00019176605504587154, "loss": 2.1875, "step": 2080 }, { "epoch": 1.28, "learning_rate": 0.000191697247706422, "loss": 1.8635, "step": 2081 }, { "epoch": 1.29, "learning_rate": 0.00019162844036697246, "loss": 2.1077, "step": 2082 }, { "epoch": 1.29, "learning_rate": 0.00019155963302752292, "loss": 1.978, "step": 2083 }, { "epoch": 1.29, "learning_rate": 0.00019149082568807339, "loss": 1.9189, "step": 2084 }, { "epoch": 1.29, "learning_rate": 0.00019142201834862385, "loss": 2.0011, "step": 2085 }, { "epoch": 1.29, "learning_rate": 0.00019135321100917428, "loss": 1.7829, "step": 2086 }, { "epoch": 1.29, "learning_rate": 0.00019128440366972474, "loss": 1.8061, "step": 2087 }, { "epoch": 1.29, "learning_rate": 0.0001912155963302752, "loss": 1.9791, "step": 2088 }, { "epoch": 1.29, "learning_rate": 0.00019114678899082566, "loss": 1.9986, "step": 2089 }, { "epoch": 1.29, "learning_rate": 0.00019107798165137613, "loss": 1.7943, "step": 2090 }, { "epoch": 1.29, "learning_rate": 0.0001910091743119266, "loss": 1.8172, "step": 2091 }, { "epoch": 1.29, "learning_rate": 0.00019094036697247705, "loss": 1.8964, "step": 2092 }, { "epoch": 1.29, "learning_rate": 0.0001908715596330275, "loss": 1.7612, "step": 2093 }, { "epoch": 1.29, "learning_rate": 0.00019080275229357797, "loss": 1.6614, "step": 2094 }, { "epoch": 1.29, "learning_rate": 0.00019073394495412843, "loss": 1.794, "step": 2095 }, { "epoch": 1.29, "learning_rate": 0.0001906651376146789, "loss": 1.7443, "step": 2096 }, { "epoch": 1.29, "learning_rate": 0.00019059633027522936, "loss": 1.8229, "step": 2097 }, { "epoch": 1.3, "learning_rate": 0.00019052752293577982, "loss": 1.7122, "step": 2098 }, { "epoch": 1.3, "learning_rate": 0.00019045871559633028, "loss": 1.5759, "step": 2099 }, { "epoch": 1.3, "learning_rate": 0.00019038990825688074, "loss": 1.7618, "step": 2100 }, { "epoch": 1.3, "learning_rate": 0.0001903211009174312, "loss": 1.603, "step": 2101 }, { "epoch": 1.3, "learning_rate": 0.0001902522935779816, "loss": 1.5539, "step": 2102 }, { "epoch": 1.3, "learning_rate": 0.00019018348623853207, "loss": 1.5407, "step": 2103 }, { "epoch": 1.3, "learning_rate": 0.00019011467889908253, "loss": 1.6749, "step": 2104 }, { "epoch": 1.3, "learning_rate": 0.000190045871559633, "loss": 1.5617, "step": 2105 }, { "epoch": 1.3, "learning_rate": 0.00018997706422018345, "loss": 1.6097, "step": 2106 }, { "epoch": 1.3, "learning_rate": 0.00018990825688073391, "loss": 1.6182, "step": 2107 }, { "epoch": 1.3, "learning_rate": 0.00018983944954128437, "loss": 1.5297, "step": 2108 }, { "epoch": 1.3, "learning_rate": 0.00018977064220183484, "loss": 1.6091, "step": 2109 }, { "epoch": 1.3, "learning_rate": 0.0001897018348623853, "loss": 1.4417, "step": 2110 }, { "epoch": 1.3, "learning_rate": 0.00018963302752293576, "loss": 1.2602, "step": 2111 }, { "epoch": 1.3, "learning_rate": 0.00018956422018348622, "loss": 1.4811, "step": 2112 }, { "epoch": 1.3, "learning_rate": 0.00018949541284403668, "loss": 1.372, "step": 2113 }, { "epoch": 1.3, "learning_rate": 0.00018942660550458714, "loss": 1.2413, "step": 2114 }, { "epoch": 1.31, "learning_rate": 0.0001893577981651376, "loss": 1.1568, "step": 2115 }, { "epoch": 1.31, "learning_rate": 0.00018928899082568807, "loss": 1.2316, "step": 2116 }, { "epoch": 1.31, "learning_rate": 0.00018922018348623853, "loss": 1.1584, "step": 2117 }, { "epoch": 1.31, "learning_rate": 0.00018915137614678896, "loss": 1.1388, "step": 2118 }, { "epoch": 1.31, "learning_rate": 0.00018908256880733942, "loss": 1.0518, "step": 2119 }, { "epoch": 1.31, "learning_rate": 0.00018901376146788988, "loss": 1.0085, "step": 2120 }, { "epoch": 1.31, "learning_rate": 0.00018894495412844034, "loss": 3.0042, "step": 2121 }, { "epoch": 1.31, "learning_rate": 0.0001888761467889908, "loss": 2.5236, "step": 2122 }, { "epoch": 1.31, "learning_rate": 0.00018880733944954127, "loss": 2.2717, "step": 2123 }, { "epoch": 1.31, "learning_rate": 0.00018873853211009173, "loss": 2.1288, "step": 2124 }, { "epoch": 1.31, "learning_rate": 0.0001886697247706422, "loss": 2.0028, "step": 2125 }, { "epoch": 1.31, "learning_rate": 0.00018860091743119265, "loss": 2.0841, "step": 2126 }, { "epoch": 1.31, "learning_rate": 0.0001885321100917431, "loss": 2.1967, "step": 2127 }, { "epoch": 1.31, "learning_rate": 0.00018846330275229357, "loss": 1.833, "step": 2128 }, { "epoch": 1.31, "learning_rate": 0.00018839449541284403, "loss": 2.0022, "step": 2129 }, { "epoch": 1.31, "learning_rate": 0.0001883256880733945, "loss": 2.0113, "step": 2130 }, { "epoch": 1.32, "learning_rate": 0.00018825688073394496, "loss": 1.9534, "step": 2131 }, { "epoch": 1.32, "learning_rate": 0.00018818807339449542, "loss": 1.8513, "step": 2132 }, { "epoch": 1.32, "learning_rate": 0.00018811926605504588, "loss": 1.8559, "step": 2133 }, { "epoch": 1.32, "learning_rate": 0.0001880504587155963, "loss": 1.9258, "step": 2134 }, { "epoch": 1.32, "learning_rate": 0.00018798165137614675, "loss": 1.8945, "step": 2135 }, { "epoch": 1.32, "learning_rate": 0.0001879128440366972, "loss": 1.8376, "step": 2136 }, { "epoch": 1.32, "learning_rate": 0.00018784403669724767, "loss": 1.8323, "step": 2137 }, { "epoch": 1.32, "learning_rate": 0.00018777522935779813, "loss": 1.8517, "step": 2138 }, { "epoch": 1.32, "learning_rate": 0.0001877064220183486, "loss": 1.8802, "step": 2139 }, { "epoch": 1.32, "learning_rate": 0.00018763761467889905, "loss": 1.8161, "step": 2140 }, { "epoch": 1.32, "learning_rate": 0.00018756880733944952, "loss": 1.7722, "step": 2141 }, { "epoch": 1.32, "learning_rate": 0.00018749999999999998, "loss": 1.7697, "step": 2142 }, { "epoch": 1.32, "learning_rate": 0.00018743119266055044, "loss": 1.9684, "step": 2143 }, { "epoch": 1.32, "learning_rate": 0.0001873623853211009, "loss": 1.8625, "step": 2144 }, { "epoch": 1.32, "learning_rate": 0.00018729357798165136, "loss": 1.68, "step": 2145 }, { "epoch": 1.32, "learning_rate": 0.00018722477064220182, "loss": 1.7741, "step": 2146 }, { "epoch": 1.33, "learning_rate": 0.00018715596330275228, "loss": 1.8442, "step": 2147 }, { "epoch": 1.33, "learning_rate": 0.00018708715596330274, "loss": 1.7399, "step": 2148 }, { "epoch": 1.33, "learning_rate": 0.0001870183486238532, "loss": 1.7896, "step": 2149 }, { "epoch": 1.33, "learning_rate": 0.00018694954128440367, "loss": 1.7657, "step": 2150 }, { "epoch": 1.33, "learning_rate": 0.0001868807339449541, "loss": 1.7501, "step": 2151 }, { "epoch": 1.33, "learning_rate": 0.00018681192660550456, "loss": 1.6063, "step": 2152 }, { "epoch": 1.33, "learning_rate": 0.00018674311926605502, "loss": 1.5323, "step": 2153 }, { "epoch": 1.33, "learning_rate": 0.00018667431192660548, "loss": 1.5954, "step": 2154 }, { "epoch": 1.33, "learning_rate": 0.00018660550458715595, "loss": 1.4607, "step": 2155 }, { "epoch": 1.33, "learning_rate": 0.0001865366972477064, "loss": 1.6201, "step": 2156 }, { "epoch": 1.33, "learning_rate": 0.00018646788990825687, "loss": 1.3629, "step": 2157 }, { "epoch": 1.33, "learning_rate": 0.00018639908256880733, "loss": 1.3295, "step": 2158 }, { "epoch": 1.33, "learning_rate": 0.0001863302752293578, "loss": 1.2844, "step": 2159 }, { "epoch": 1.33, "learning_rate": 0.00018626146788990825, "loss": 1.3591, "step": 2160 }, { "epoch": 1.33, "learning_rate": 0.00018619266055045871, "loss": 1.3799, "step": 2161 }, { "epoch": 1.33, "learning_rate": 0.00018612385321100918, "loss": 1.4718, "step": 2162 }, { "epoch": 1.34, "learning_rate": 0.00018605504587155964, "loss": 1.3193, "step": 2163 }, { "epoch": 1.34, "learning_rate": 0.0001859862385321101, "loss": 1.2736, "step": 2164 }, { "epoch": 1.34, "learning_rate": 0.00018591743119266056, "loss": 1.2972, "step": 2165 }, { "epoch": 1.34, "learning_rate": 0.00018584862385321102, "loss": 1.2635, "step": 2166 }, { "epoch": 1.34, "learning_rate": 0.00018577981651376143, "loss": 1.1203, "step": 2167 }, { "epoch": 1.34, "learning_rate": 0.0001857110091743119, "loss": 1.0905, "step": 2168 }, { "epoch": 1.34, "learning_rate": 0.00018564220183486235, "loss": 0.9921, "step": 2169 }, { "epoch": 1.34, "learning_rate": 0.0001855733944954128, "loss": 1.0113, "step": 2170 }, { "epoch": 1.34, "learning_rate": 0.00018550458715596327, "loss": 3.0903, "step": 2171 }, { "epoch": 1.34, "learning_rate": 0.00018543577981651373, "loss": 2.6619, "step": 2172 }, { "epoch": 1.34, "learning_rate": 0.0001853669724770642, "loss": 2.2993, "step": 2173 }, { "epoch": 1.34, "learning_rate": 0.00018529816513761466, "loss": 2.1252, "step": 2174 }, { "epoch": 1.34, "learning_rate": 0.00018522935779816512, "loss": 2.1888, "step": 2175 }, { "epoch": 1.34, "learning_rate": 0.00018516055045871558, "loss": 2.1205, "step": 2176 }, { "epoch": 1.34, "learning_rate": 0.00018509174311926604, "loss": 1.9359, "step": 2177 }, { "epoch": 1.34, "learning_rate": 0.0001850229357798165, "loss": 2.0144, "step": 2178 }, { "epoch": 1.35, "learning_rate": 0.00018495412844036696, "loss": 2.1088, "step": 2179 }, { "epoch": 1.35, "learning_rate": 0.00018488532110091742, "loss": 2.1593, "step": 2180 }, { "epoch": 1.35, "learning_rate": 0.00018481651376146789, "loss": 2.0112, "step": 2181 }, { "epoch": 1.35, "learning_rate": 0.00018474770642201835, "loss": 1.8821, "step": 2182 }, { "epoch": 1.35, "learning_rate": 0.00018467889908256878, "loss": 1.8886, "step": 2183 }, { "epoch": 1.35, "learning_rate": 0.00018461009174311924, "loss": 2.0067, "step": 2184 }, { "epoch": 1.35, "learning_rate": 0.0001845412844036697, "loss": 1.814, "step": 2185 }, { "epoch": 1.35, "learning_rate": 0.00018447247706422016, "loss": 1.9341, "step": 2186 }, { "epoch": 1.35, "learning_rate": 0.00018440366972477063, "loss": 1.8397, "step": 2187 }, { "epoch": 1.35, "learning_rate": 0.0001843348623853211, "loss": 1.7073, "step": 2188 }, { "epoch": 1.35, "learning_rate": 0.00018426605504587155, "loss": 1.8368, "step": 2189 }, { "epoch": 1.35, "learning_rate": 0.000184197247706422, "loss": 1.8199, "step": 2190 }, { "epoch": 1.35, "learning_rate": 0.00018412844036697247, "loss": 1.8054, "step": 2191 }, { "epoch": 1.35, "learning_rate": 0.00018405963302752293, "loss": 1.7279, "step": 2192 }, { "epoch": 1.35, "learning_rate": 0.0001839908256880734, "loss": 1.8178, "step": 2193 }, { "epoch": 1.35, "learning_rate": 0.00018392201834862385, "loss": 1.778, "step": 2194 }, { "epoch": 1.35, "learning_rate": 0.00018385321100917432, "loss": 1.6164, "step": 2195 }, { "epoch": 1.36, "learning_rate": 0.00018378440366972478, "loss": 1.653, "step": 2196 }, { "epoch": 1.36, "learning_rate": 0.00018371559633027524, "loss": 1.6927, "step": 2197 }, { "epoch": 1.36, "learning_rate": 0.0001836467889908257, "loss": 1.6417, "step": 2198 }, { "epoch": 1.36, "learning_rate": 0.0001835779816513761, "loss": 1.5684, "step": 2199 }, { "epoch": 1.36, "learning_rate": 0.00018350917431192657, "loss": 1.5989, "step": 2200 }, { "epoch": 1.36, "learning_rate": 0.00018344036697247703, "loss": 1.577, "step": 2201 }, { "epoch": 1.36, "learning_rate": 0.0001833715596330275, "loss": 1.5353, "step": 2202 }, { "epoch": 1.36, "learning_rate": 0.00018330275229357795, "loss": 1.5282, "step": 2203 }, { "epoch": 1.36, "learning_rate": 0.0001832339449541284, "loss": 1.4547, "step": 2204 }, { "epoch": 1.36, "learning_rate": 0.00018316513761467887, "loss": 1.3983, "step": 2205 }, { "epoch": 1.36, "learning_rate": 0.00018309633027522934, "loss": 1.4681, "step": 2206 }, { "epoch": 1.36, "learning_rate": 0.0001830275229357798, "loss": 1.4729, "step": 2207 }, { "epoch": 1.36, "learning_rate": 0.00018295871559633026, "loss": 1.5094, "step": 2208 }, { "epoch": 1.36, "learning_rate": 0.00018288990825688072, "loss": 1.4348, "step": 2209 }, { "epoch": 1.36, "learning_rate": 0.00018282110091743118, "loss": 1.3821, "step": 2210 }, { "epoch": 1.36, "learning_rate": 0.00018275229357798164, "loss": 1.4742, "step": 2211 }, { "epoch": 1.37, "learning_rate": 0.0001826834862385321, "loss": 1.4456, "step": 2212 }, { "epoch": 1.37, "learning_rate": 0.00018261467889908256, "loss": 1.3444, "step": 2213 }, { "epoch": 1.37, "learning_rate": 0.00018254587155963303, "loss": 1.195, "step": 2214 }, { "epoch": 1.37, "learning_rate": 0.00018247706422018346, "loss": 1.4143, "step": 2215 }, { "epoch": 1.37, "learning_rate": 0.00018240825688073392, "loss": 1.2932, "step": 2216 }, { "epoch": 1.37, "learning_rate": 0.00018233944954128438, "loss": 1.1407, "step": 2217 }, { "epoch": 1.37, "learning_rate": 0.00018227064220183484, "loss": 1.0317, "step": 2218 }, { "epoch": 1.37, "learning_rate": 0.0001822018348623853, "loss": 0.9022, "step": 2219 }, { "epoch": 1.37, "learning_rate": 0.00018213302752293577, "loss": 1.1126, "step": 2220 }, { "epoch": 1.37, "learning_rate": 0.00018206422018348623, "loss": 3.0819, "step": 2221 }, { "epoch": 1.37, "learning_rate": 0.0001819954128440367, "loss": 2.5293, "step": 2222 }, { "epoch": 1.37, "learning_rate": 0.00018192660550458715, "loss": 2.3659, "step": 2223 }, { "epoch": 1.37, "learning_rate": 0.0001818577981651376, "loss": 2.2317, "step": 2224 }, { "epoch": 1.37, "learning_rate": 0.00018178899082568807, "loss": 2.1227, "step": 2225 }, { "epoch": 1.37, "learning_rate": 0.00018172018348623853, "loss": 2.0041, "step": 2226 }, { "epoch": 1.37, "learning_rate": 0.000181651376146789, "loss": 2.0501, "step": 2227 }, { "epoch": 1.38, "learning_rate": 0.00018158256880733946, "loss": 1.9934, "step": 2228 }, { "epoch": 1.38, "learning_rate": 0.00018151376146788992, "loss": 1.9805, "step": 2229 }, { "epoch": 1.38, "learning_rate": 0.00018144495412844038, "loss": 1.9281, "step": 2230 }, { "epoch": 1.38, "learning_rate": 0.00018137614678899079, "loss": 2.092, "step": 2231 }, { "epoch": 1.38, "learning_rate": 0.00018130733944954125, "loss": 1.8745, "step": 2232 }, { "epoch": 1.38, "learning_rate": 0.0001812385321100917, "loss": 1.8711, "step": 2233 }, { "epoch": 1.38, "learning_rate": 0.00018116972477064217, "loss": 1.9596, "step": 2234 }, { "epoch": 1.38, "learning_rate": 0.00018110091743119263, "loss": 1.9111, "step": 2235 }, { "epoch": 1.38, "learning_rate": 0.0001810321100917431, "loss": 1.8684, "step": 2236 }, { "epoch": 1.38, "learning_rate": 0.00018096330275229355, "loss": 1.8264, "step": 2237 }, { "epoch": 1.38, "learning_rate": 0.00018089449541284401, "loss": 1.8391, "step": 2238 }, { "epoch": 1.38, "learning_rate": 0.00018082568807339448, "loss": 1.8479, "step": 2239 }, { "epoch": 1.38, "learning_rate": 0.00018075688073394494, "loss": 1.9619, "step": 2240 }, { "epoch": 1.38, "learning_rate": 0.0001806880733944954, "loss": 1.8425, "step": 2241 }, { "epoch": 1.38, "learning_rate": 0.00018061926605504586, "loss": 1.8801, "step": 2242 }, { "epoch": 1.38, "learning_rate": 0.00018055045871559632, "loss": 1.8278, "step": 2243 }, { "epoch": 1.39, "learning_rate": 0.00018048165137614678, "loss": 1.7898, "step": 2244 }, { "epoch": 1.39, "learning_rate": 0.00018041284403669724, "loss": 1.7335, "step": 2245 }, { "epoch": 1.39, "learning_rate": 0.0001803440366972477, "loss": 1.6493, "step": 2246 }, { "epoch": 1.39, "learning_rate": 0.00018027522935779814, "loss": 1.6948, "step": 2247 }, { "epoch": 1.39, "learning_rate": 0.0001802064220183486, "loss": 1.6788, "step": 2248 }, { "epoch": 1.39, "learning_rate": 0.00018013761467889906, "loss": 1.7021, "step": 2249 }, { "epoch": 1.39, "learning_rate": 0.00018006880733944952, "loss": 1.5245, "step": 2250 }, { "epoch": 1.39, "learning_rate": 0.00017999999999999998, "loss": 1.4886, "step": 2251 }, { "epoch": 1.39, "learning_rate": 0.00017993119266055045, "loss": 1.5184, "step": 2252 }, { "epoch": 1.39, "learning_rate": 0.0001798623853211009, "loss": 1.3526, "step": 2253 }, { "epoch": 1.39, "learning_rate": 0.00017979357798165137, "loss": 1.4944, "step": 2254 }, { "epoch": 1.39, "learning_rate": 0.00017972477064220183, "loss": 1.4891, "step": 2255 }, { "epoch": 1.39, "learning_rate": 0.0001796559633027523, "loss": 1.5575, "step": 2256 }, { "epoch": 1.39, "learning_rate": 0.00017958715596330275, "loss": 1.4377, "step": 2257 }, { "epoch": 1.39, "learning_rate": 0.0001795183486238532, "loss": 1.4172, "step": 2258 }, { "epoch": 1.39, "learning_rate": 0.00017944954128440367, "loss": 1.3875, "step": 2259 }, { "epoch": 1.4, "learning_rate": 0.00017938073394495414, "loss": 1.4868, "step": 2260 }, { "epoch": 1.4, "learning_rate": 0.0001793119266055046, "loss": 1.335, "step": 2261 }, { "epoch": 1.4, "learning_rate": 0.00017924311926605506, "loss": 1.3073, "step": 2262 }, { "epoch": 1.4, "learning_rate": 0.00017917431192660547, "loss": 1.238, "step": 2263 }, { "epoch": 1.4, "learning_rate": 0.00017910550458715593, "loss": 1.2034, "step": 2264 }, { "epoch": 1.4, "learning_rate": 0.0001790366972477064, "loss": 1.2099, "step": 2265 }, { "epoch": 1.4, "learning_rate": 0.00017896788990825685, "loss": 1.2746, "step": 2266 }, { "epoch": 1.4, "learning_rate": 0.0001788990825688073, "loss": 1.1214, "step": 2267 }, { "epoch": 1.4, "learning_rate": 0.00017883027522935777, "loss": 1.141, "step": 2268 }, { "epoch": 1.4, "learning_rate": 0.00017876146788990823, "loss": 1.1525, "step": 2269 }, { "epoch": 1.4, "learning_rate": 0.0001786926605504587, "loss": 0.9149, "step": 2270 }, { "epoch": 1.4, "learning_rate": 0.00017862385321100916, "loss": 2.7913, "step": 2271 }, { "epoch": 1.4, "learning_rate": 0.00017855504587155962, "loss": 2.5904, "step": 2272 }, { "epoch": 1.4, "learning_rate": 0.00017848623853211008, "loss": 2.4769, "step": 2273 }, { "epoch": 1.4, "learning_rate": 0.00017841743119266054, "loss": 2.2475, "step": 2274 }, { "epoch": 1.4, "learning_rate": 0.000178348623853211, "loss": 1.9642, "step": 2275 }, { "epoch": 1.4, "learning_rate": 0.00017827981651376146, "loss": 1.957, "step": 2276 }, { "epoch": 1.41, "learning_rate": 0.00017821100917431192, "loss": 2.0239, "step": 2277 }, { "epoch": 1.41, "learning_rate": 0.00017814220183486238, "loss": 2.0304, "step": 2278 }, { "epoch": 1.41, "learning_rate": 0.00017807339449541282, "loss": 2.0484, "step": 2279 }, { "epoch": 1.41, "learning_rate": 0.00017800458715596328, "loss": 1.8238, "step": 2280 }, { "epoch": 1.41, "learning_rate": 0.00017793577981651374, "loss": 1.9024, "step": 2281 }, { "epoch": 1.41, "learning_rate": 0.0001778669724770642, "loss": 1.7931, "step": 2282 }, { "epoch": 1.41, "learning_rate": 0.00017779816513761466, "loss": 1.7485, "step": 2283 }, { "epoch": 1.41, "learning_rate": 0.00017772935779816512, "loss": 1.8179, "step": 2284 }, { "epoch": 1.41, "learning_rate": 0.00017766055045871559, "loss": 1.9105, "step": 2285 }, { "epoch": 1.41, "learning_rate": 0.00017759174311926605, "loss": 1.7327, "step": 2286 }, { "epoch": 1.41, "learning_rate": 0.0001775229357798165, "loss": 1.8671, "step": 2287 }, { "epoch": 1.41, "learning_rate": 0.00017745412844036697, "loss": 1.8943, "step": 2288 }, { "epoch": 1.41, "learning_rate": 0.00017738532110091743, "loss": 1.765, "step": 2289 }, { "epoch": 1.41, "learning_rate": 0.0001773165137614679, "loss": 1.704, "step": 2290 }, { "epoch": 1.41, "learning_rate": 0.00017724770642201835, "loss": 1.7748, "step": 2291 }, { "epoch": 1.41, "learning_rate": 0.00017717889908256882, "loss": 1.74, "step": 2292 }, { "epoch": 1.42, "learning_rate": 0.00017711009174311928, "loss": 1.8968, "step": 2293 }, { "epoch": 1.42, "learning_rate": 0.00017704128440366974, "loss": 1.7546, "step": 2294 }, { "epoch": 1.42, "learning_rate": 0.00017697247706422014, "loss": 1.7636, "step": 2295 }, { "epoch": 1.42, "learning_rate": 0.0001769036697247706, "loss": 1.7031, "step": 2296 }, { "epoch": 1.42, "learning_rate": 0.00017683486238532107, "loss": 1.6796, "step": 2297 }, { "epoch": 1.42, "learning_rate": 0.00017676605504587153, "loss": 1.5201, "step": 2298 }, { "epoch": 1.42, "learning_rate": 0.000176697247706422, "loss": 1.7066, "step": 2299 }, { "epoch": 1.42, "learning_rate": 0.00017662844036697245, "loss": 1.7178, "step": 2300 }, { "epoch": 1.42, "learning_rate": 0.0001765596330275229, "loss": 1.5365, "step": 2301 }, { "epoch": 1.42, "learning_rate": 0.00017649082568807337, "loss": 1.6511, "step": 2302 }, { "epoch": 1.42, "learning_rate": 0.00017642201834862383, "loss": 1.4195, "step": 2303 }, { "epoch": 1.42, "learning_rate": 0.0001763532110091743, "loss": 1.5129, "step": 2304 }, { "epoch": 1.42, "learning_rate": 0.00017628440366972476, "loss": 1.5296, "step": 2305 }, { "epoch": 1.42, "learning_rate": 0.00017621559633027522, "loss": 1.5313, "step": 2306 }, { "epoch": 1.42, "learning_rate": 0.00017614678899082568, "loss": 1.5138, "step": 2307 }, { "epoch": 1.42, "learning_rate": 0.00017607798165137614, "loss": 1.3979, "step": 2308 }, { "epoch": 1.43, "learning_rate": 0.0001760091743119266, "loss": 1.4173, "step": 2309 }, { "epoch": 1.43, "learning_rate": 0.00017594036697247706, "loss": 1.3764, "step": 2310 }, { "epoch": 1.43, "learning_rate": 0.0001758715596330275, "loss": 1.2286, "step": 2311 }, { "epoch": 1.43, "learning_rate": 0.00017580275229357796, "loss": 1.2268, "step": 2312 }, { "epoch": 1.43, "learning_rate": 0.00017573394495412842, "loss": 1.29, "step": 2313 }, { "epoch": 1.43, "learning_rate": 0.00017566513761467888, "loss": 1.2289, "step": 2314 }, { "epoch": 1.43, "learning_rate": 0.00017559633027522934, "loss": 1.2193, "step": 2315 }, { "epoch": 1.43, "learning_rate": 0.0001755275229357798, "loss": 1.1629, "step": 2316 }, { "epoch": 1.43, "learning_rate": 0.00017545871559633027, "loss": 1.0802, "step": 2317 }, { "epoch": 1.43, "learning_rate": 0.00017538990825688073, "loss": 1.0788, "step": 2318 }, { "epoch": 1.43, "learning_rate": 0.0001753211009174312, "loss": 1.044, "step": 2319 }, { "epoch": 1.43, "learning_rate": 0.00017525229357798165, "loss": 0.9904, "step": 2320 }, { "epoch": 1.43, "learning_rate": 0.0001751834862385321, "loss": 2.7566, "step": 2321 }, { "epoch": 1.43, "learning_rate": 0.00017511467889908257, "loss": 2.4943, "step": 2322 }, { "epoch": 1.43, "learning_rate": 0.00017504587155963303, "loss": 2.2526, "step": 2323 }, { "epoch": 1.43, "learning_rate": 0.0001749770642201835, "loss": 2.0902, "step": 2324 }, { "epoch": 1.44, "learning_rate": 0.00017490825688073396, "loss": 2.0279, "step": 2325 }, { "epoch": 1.44, "learning_rate": 0.00017483944954128442, "loss": 2.1295, "step": 2326 }, { "epoch": 1.44, "learning_rate": 0.00017477064220183482, "loss": 2.0603, "step": 2327 }, { "epoch": 1.44, "learning_rate": 0.00017470183486238529, "loss": 2.1415, "step": 2328 }, { "epoch": 1.44, "learning_rate": 0.00017463302752293575, "loss": 1.8661, "step": 2329 }, { "epoch": 1.44, "learning_rate": 0.0001745642201834862, "loss": 1.8926, "step": 2330 }, { "epoch": 1.44, "learning_rate": 0.00017449541284403667, "loss": 1.8352, "step": 2331 }, { "epoch": 1.44, "learning_rate": 0.00017442660550458713, "loss": 1.8973, "step": 2332 }, { "epoch": 1.44, "learning_rate": 0.0001743577981651376, "loss": 1.7612, "step": 2333 }, { "epoch": 1.44, "learning_rate": 0.00017428899082568805, "loss": 1.8255, "step": 2334 }, { "epoch": 1.44, "learning_rate": 0.00017422018348623851, "loss": 1.7843, "step": 2335 }, { "epoch": 1.44, "learning_rate": 0.00017415137614678898, "loss": 1.7908, "step": 2336 }, { "epoch": 1.44, "learning_rate": 0.00017408256880733944, "loss": 1.7631, "step": 2337 }, { "epoch": 1.44, "learning_rate": 0.0001740137614678899, "loss": 1.8242, "step": 2338 }, { "epoch": 1.44, "learning_rate": 0.00017394495412844036, "loss": 1.8142, "step": 2339 }, { "epoch": 1.44, "learning_rate": 0.00017387614678899082, "loss": 1.743, "step": 2340 }, { "epoch": 1.45, "learning_rate": 0.00017380733944954128, "loss": 1.8156, "step": 2341 }, { "epoch": 1.45, "learning_rate": 0.00017373853211009174, "loss": 1.8141, "step": 2342 }, { "epoch": 1.45, "learning_rate": 0.00017366972477064218, "loss": 1.653, "step": 2343 }, { "epoch": 1.45, "learning_rate": 0.00017360091743119264, "loss": 1.7386, "step": 2344 }, { "epoch": 1.45, "learning_rate": 0.0001735321100917431, "loss": 1.6271, "step": 2345 }, { "epoch": 1.45, "learning_rate": 0.00017346330275229356, "loss": 1.5777, "step": 2346 }, { "epoch": 1.45, "learning_rate": 0.00017339449541284402, "loss": 1.5498, "step": 2347 }, { "epoch": 1.45, "learning_rate": 0.00017332568807339448, "loss": 1.6421, "step": 2348 }, { "epoch": 1.45, "learning_rate": 0.00017325688073394494, "loss": 1.667, "step": 2349 }, { "epoch": 1.45, "learning_rate": 0.0001731880733944954, "loss": 1.6007, "step": 2350 }, { "epoch": 1.45, "learning_rate": 0.00017311926605504587, "loss": 1.5331, "step": 2351 }, { "epoch": 1.45, "learning_rate": 0.00017305045871559633, "loss": 1.4682, "step": 2352 }, { "epoch": 1.45, "learning_rate": 0.0001729816513761468, "loss": 1.6398, "step": 2353 }, { "epoch": 1.45, "learning_rate": 0.00017291284403669725, "loss": 1.4617, "step": 2354 }, { "epoch": 1.45, "learning_rate": 0.0001728440366972477, "loss": 1.3861, "step": 2355 }, { "epoch": 1.45, "learning_rate": 0.00017277522935779817, "loss": 1.3548, "step": 2356 }, { "epoch": 1.45, "learning_rate": 0.00017270642201834863, "loss": 1.4008, "step": 2357 }, { "epoch": 1.46, "learning_rate": 0.0001726376146788991, "loss": 1.2727, "step": 2358 }, { "epoch": 1.46, "learning_rate": 0.0001725688073394495, "loss": 1.2984, "step": 2359 }, { "epoch": 1.46, "learning_rate": 0.00017249999999999996, "loss": 1.2728, "step": 2360 }, { "epoch": 1.46, "learning_rate": 0.00017243119266055043, "loss": 1.255, "step": 2361 }, { "epoch": 1.46, "learning_rate": 0.0001723623853211009, "loss": 1.2775, "step": 2362 }, { "epoch": 1.46, "learning_rate": 0.00017229357798165135, "loss": 1.3776, "step": 2363 }, { "epoch": 1.46, "learning_rate": 0.0001722247706422018, "loss": 1.2018, "step": 2364 }, { "epoch": 1.46, "learning_rate": 0.00017215596330275227, "loss": 1.0652, "step": 2365 }, { "epoch": 1.46, "learning_rate": 0.00017208715596330273, "loss": 1.1631, "step": 2366 }, { "epoch": 1.46, "learning_rate": 0.0001720183486238532, "loss": 1.1331, "step": 2367 }, { "epoch": 1.46, "learning_rate": 0.00017194954128440365, "loss": 0.9859, "step": 2368 }, { "epoch": 1.46, "learning_rate": 0.00017188073394495412, "loss": 1.1434, "step": 2369 }, { "epoch": 1.46, "learning_rate": 0.00017181192660550458, "loss": 0.8219, "step": 2370 }, { "epoch": 1.46, "learning_rate": 0.00017174311926605504, "loss": 2.6552, "step": 2371 }, { "epoch": 1.46, "learning_rate": 0.0001716743119266055, "loss": 2.6183, "step": 2372 }, { "epoch": 1.46, "learning_rate": 0.00017160550458715596, "loss": 2.2357, "step": 2373 }, { "epoch": 1.47, "learning_rate": 0.00017153669724770642, "loss": 2.1045, "step": 2374 }, { "epoch": 1.47, "learning_rate": 0.00017146788990825686, "loss": 2.2228, "step": 2375 }, { "epoch": 1.47, "learning_rate": 0.00017139908256880732, "loss": 1.99, "step": 2376 }, { "epoch": 1.47, "learning_rate": 0.00017133027522935778, "loss": 1.8998, "step": 2377 }, { "epoch": 1.47, "learning_rate": 0.00017126146788990824, "loss": 2.0302, "step": 2378 }, { "epoch": 1.47, "learning_rate": 0.0001711926605504587, "loss": 1.7413, "step": 2379 }, { "epoch": 1.47, "learning_rate": 0.00017112385321100916, "loss": 1.6514, "step": 2380 }, { "epoch": 1.47, "learning_rate": 0.00017105504587155962, "loss": 1.9371, "step": 2381 }, { "epoch": 1.47, "learning_rate": 0.00017098623853211009, "loss": 1.7878, "step": 2382 }, { "epoch": 1.47, "learning_rate": 0.00017091743119266055, "loss": 1.8151, "step": 2383 }, { "epoch": 1.47, "learning_rate": 0.000170848623853211, "loss": 1.8037, "step": 2384 }, { "epoch": 1.47, "learning_rate": 0.00017077981651376147, "loss": 1.8958, "step": 2385 }, { "epoch": 1.47, "learning_rate": 0.00017071100917431193, "loss": 1.8613, "step": 2386 }, { "epoch": 1.47, "learning_rate": 0.0001706422018348624, "loss": 1.8085, "step": 2387 }, { "epoch": 1.47, "learning_rate": 0.00017057339449541285, "loss": 1.6826, "step": 2388 }, { "epoch": 1.47, "learning_rate": 0.00017050458715596331, "loss": 1.7469, "step": 2389 }, { "epoch": 1.48, "learning_rate": 0.00017043577981651378, "loss": 1.8051, "step": 2390 }, { "epoch": 1.48, "learning_rate": 0.00017036697247706418, "loss": 1.9276, "step": 2391 }, { "epoch": 1.48, "learning_rate": 0.00017029816513761464, "loss": 1.7462, "step": 2392 }, { "epoch": 1.48, "learning_rate": 0.0001702293577981651, "loss": 1.6551, "step": 2393 }, { "epoch": 1.48, "learning_rate": 0.00017016055045871557, "loss": 1.6347, "step": 2394 }, { "epoch": 1.48, "learning_rate": 0.00017009174311926603, "loss": 1.6097, "step": 2395 }, { "epoch": 1.48, "learning_rate": 0.0001700229357798165, "loss": 1.6692, "step": 2396 }, { "epoch": 1.48, "learning_rate": 0.00016995412844036695, "loss": 1.5917, "step": 2397 }, { "epoch": 1.48, "learning_rate": 0.0001698853211009174, "loss": 1.6417, "step": 2398 }, { "epoch": 1.48, "learning_rate": 0.00016981651376146787, "loss": 1.6644, "step": 2399 }, { "epoch": 1.48, "learning_rate": 0.00016974770642201833, "loss": 1.4968, "step": 2400 }, { "epoch": 1.48, "learning_rate": 0.0001696788990825688, "loss": 1.5209, "step": 2401 }, { "epoch": 1.48, "learning_rate": 0.00016961009174311926, "loss": 1.5565, "step": 2402 }, { "epoch": 1.48, "learning_rate": 0.00016954128440366972, "loss": 1.456, "step": 2403 }, { "epoch": 1.48, "learning_rate": 0.00016947247706422018, "loss": 1.5134, "step": 2404 }, { "epoch": 1.48, "learning_rate": 0.00016940366972477064, "loss": 1.4441, "step": 2405 }, { "epoch": 1.49, "learning_rate": 0.0001693348623853211, "loss": 1.3615, "step": 2406 }, { "epoch": 1.49, "learning_rate": 0.00016926605504587154, "loss": 1.3585, "step": 2407 }, { "epoch": 1.49, "learning_rate": 0.000169197247706422, "loss": 1.4528, "step": 2408 }, { "epoch": 1.49, "learning_rate": 0.00016912844036697246, "loss": 1.3891, "step": 2409 }, { "epoch": 1.49, "learning_rate": 0.00016905963302752292, "loss": 1.3184, "step": 2410 }, { "epoch": 1.49, "learning_rate": 0.00016899082568807338, "loss": 1.35, "step": 2411 }, { "epoch": 1.49, "learning_rate": 0.00016892201834862384, "loss": 1.2573, "step": 2412 }, { "epoch": 1.49, "learning_rate": 0.0001688532110091743, "loss": 1.279, "step": 2413 }, { "epoch": 1.49, "learning_rate": 0.00016878440366972476, "loss": 1.1925, "step": 2414 }, { "epoch": 1.49, "learning_rate": 0.00016871559633027523, "loss": 1.0992, "step": 2415 }, { "epoch": 1.49, "learning_rate": 0.0001686467889908257, "loss": 1.1728, "step": 2416 }, { "epoch": 1.49, "learning_rate": 0.00016857798165137615, "loss": 1.1968, "step": 2417 }, { "epoch": 1.49, "learning_rate": 0.0001685091743119266, "loss": 1.0496, "step": 2418 }, { "epoch": 1.49, "learning_rate": 0.00016844036697247707, "loss": 0.9928, "step": 2419 }, { "epoch": 1.49, "learning_rate": 0.00016837155963302753, "loss": 0.9409, "step": 2420 }, { "epoch": 1.49, "learning_rate": 0.000168302752293578, "loss": 2.8201, "step": 2421 }, { "epoch": 1.5, "learning_rate": 0.00016823394495412843, "loss": 2.5297, "step": 2422 }, { "epoch": 1.5, "learning_rate": 0.00016816513761467886, "loss": 2.2047, "step": 2423 }, { "epoch": 1.5, "learning_rate": 0.00016809633027522932, "loss": 2.2145, "step": 2424 }, { "epoch": 1.5, "learning_rate": 0.00016802752293577978, "loss": 2.1811, "step": 2425 }, { "epoch": 1.5, "learning_rate": 0.00016795871559633025, "loss": 2.0704, "step": 2426 }, { "epoch": 1.5, "learning_rate": 0.0001678899082568807, "loss": 1.9387, "step": 2427 }, { "epoch": 1.5, "learning_rate": 0.00016782110091743117, "loss": 1.8659, "step": 2428 }, { "epoch": 1.5, "learning_rate": 0.00016775229357798163, "loss": 2.0039, "step": 2429 }, { "epoch": 1.5, "learning_rate": 0.0001676834862385321, "loss": 1.8378, "step": 2430 }, { "epoch": 1.5, "learning_rate": 0.00016761467889908255, "loss": 1.9685, "step": 2431 }, { "epoch": 1.5, "learning_rate": 0.000167545871559633, "loss": 1.8136, "step": 2432 }, { "epoch": 1.5, "learning_rate": 0.00016747706422018347, "loss": 1.7035, "step": 2433 }, { "epoch": 1.5, "learning_rate": 0.00016740825688073394, "loss": 1.7615, "step": 2434 }, { "epoch": 1.5, "learning_rate": 0.0001673394495412844, "loss": 1.768, "step": 2435 }, { "epoch": 1.5, "learning_rate": 0.00016727064220183486, "loss": 1.8968, "step": 2436 }, { "epoch": 1.5, "learning_rate": 0.00016720183486238532, "loss": 1.8514, "step": 2437 }, { "epoch": 1.5, "learning_rate": 0.00016713302752293578, "loss": 1.5916, "step": 2438 }, { "epoch": 1.51, "learning_rate": 0.00016706422018348622, "loss": 1.5929, "step": 2439 }, { "epoch": 1.51, "learning_rate": 0.00016699541284403668, "loss": 1.6183, "step": 2440 }, { "epoch": 1.51, "learning_rate": 0.00016692660550458714, "loss": 1.6629, "step": 2441 }, { "epoch": 1.51, "learning_rate": 0.0001668577981651376, "loss": 1.8363, "step": 2442 }, { "epoch": 1.51, "learning_rate": 0.00016678899082568806, "loss": 1.636, "step": 2443 }, { "epoch": 1.51, "learning_rate": 0.00016672018348623852, "loss": 1.7425, "step": 2444 }, { "epoch": 1.51, "learning_rate": 0.00016665137614678898, "loss": 1.6772, "step": 2445 }, { "epoch": 1.51, "learning_rate": 0.00016658256880733944, "loss": 1.675, "step": 2446 }, { "epoch": 1.51, "learning_rate": 0.0001665137614678899, "loss": 1.6034, "step": 2447 }, { "epoch": 1.51, "learning_rate": 0.00016644495412844037, "loss": 1.4182, "step": 2448 }, { "epoch": 1.51, "learning_rate": 0.00016637614678899083, "loss": 1.6199, "step": 2449 }, { "epoch": 1.51, "learning_rate": 0.0001663073394495413, "loss": 1.4876, "step": 2450 }, { "epoch": 1.51, "learning_rate": 0.00016623853211009175, "loss": 1.5222, "step": 2451 }, { "epoch": 1.51, "learning_rate": 0.0001661697247706422, "loss": 1.4074, "step": 2452 }, { "epoch": 1.51, "learning_rate": 0.00016610091743119265, "loss": 1.3419, "step": 2453 }, { "epoch": 1.51, "learning_rate": 0.0001660321100917431, "loss": 1.372, "step": 2454 }, { "epoch": 1.52, "learning_rate": 0.00016596330275229354, "loss": 1.6406, "step": 2455 }, { "epoch": 1.52, "learning_rate": 0.000165894495412844, "loss": 1.4601, "step": 2456 }, { "epoch": 1.52, "learning_rate": 0.00016582568807339446, "loss": 1.5373, "step": 2457 }, { "epoch": 1.52, "learning_rate": 0.00016575688073394493, "loss": 1.3078, "step": 2458 }, { "epoch": 1.52, "learning_rate": 0.00016568807339449539, "loss": 1.3279, "step": 2459 }, { "epoch": 1.52, "learning_rate": 0.00016561926605504585, "loss": 1.1597, "step": 2460 }, { "epoch": 1.52, "learning_rate": 0.0001655504587155963, "loss": 1.1937, "step": 2461 }, { "epoch": 1.52, "learning_rate": 0.00016548165137614677, "loss": 1.2922, "step": 2462 }, { "epoch": 1.52, "learning_rate": 0.00016541284403669723, "loss": 1.0909, "step": 2463 }, { "epoch": 1.52, "learning_rate": 0.0001653440366972477, "loss": 1.086, "step": 2464 }, { "epoch": 1.52, "learning_rate": 0.00016527522935779815, "loss": 1.0827, "step": 2465 }, { "epoch": 1.52, "learning_rate": 0.00016520642201834862, "loss": 1.0222, "step": 2466 }, { "epoch": 1.52, "learning_rate": 0.00016513761467889908, "loss": 1.0251, "step": 2467 }, { "epoch": 1.52, "learning_rate": 0.00016506880733944954, "loss": 0.8553, "step": 2468 }, { "epoch": 1.52, "learning_rate": 0.000165, "loss": 1.0218, "step": 2469 }, { "epoch": 1.52, "learning_rate": 0.00016493119266055046, "loss": 0.9624, "step": 2470 }, { "epoch": 1.53, "learning_rate": 0.0001648623853211009, "loss": 2.7151, "step": 2471 }, { "epoch": 1.53, "learning_rate": 0.00016479357798165136, "loss": 2.4307, "step": 2472 }, { "epoch": 1.53, "learning_rate": 0.00016472477064220182, "loss": 2.171, "step": 2473 }, { "epoch": 1.53, "learning_rate": 0.00016465596330275228, "loss": 1.968, "step": 2474 }, { "epoch": 1.53, "learning_rate": 0.00016458715596330274, "loss": 1.9974, "step": 2475 }, { "epoch": 1.53, "learning_rate": 0.0001645183486238532, "loss": 2.0983, "step": 2476 }, { "epoch": 1.53, "learning_rate": 0.00016444954128440366, "loss": 1.88, "step": 2477 }, { "epoch": 1.53, "learning_rate": 0.00016438073394495412, "loss": 1.8526, "step": 2478 }, { "epoch": 1.53, "learning_rate": 0.00016431192660550458, "loss": 1.9102, "step": 2479 }, { "epoch": 1.53, "learning_rate": 0.00016424311926605505, "loss": 1.8006, "step": 2480 }, { "epoch": 1.53, "learning_rate": 0.0001641743119266055, "loss": 1.7544, "step": 2481 }, { "epoch": 1.53, "learning_rate": 0.00016410550458715597, "loss": 2.0858, "step": 2482 }, { "epoch": 1.53, "learning_rate": 0.00016403669724770643, "loss": 1.7207, "step": 2483 }, { "epoch": 1.53, "learning_rate": 0.0001639678899082569, "loss": 1.8056, "step": 2484 }, { "epoch": 1.53, "learning_rate": 0.00016389908256880733, "loss": 1.6819, "step": 2485 }, { "epoch": 1.53, "learning_rate": 0.0001638302752293578, "loss": 1.7391, "step": 2486 }, { "epoch": 1.54, "learning_rate": 0.00016376146788990822, "loss": 1.6962, "step": 2487 }, { "epoch": 1.54, "learning_rate": 0.00016369266055045868, "loss": 1.7615, "step": 2488 }, { "epoch": 1.54, "learning_rate": 0.00016362385321100914, "loss": 1.7741, "step": 2489 }, { "epoch": 1.54, "learning_rate": 0.0001635550458715596, "loss": 1.792, "step": 2490 }, { "epoch": 1.54, "learning_rate": 0.00016348623853211007, "loss": 1.6932, "step": 2491 }, { "epoch": 1.54, "learning_rate": 0.00016341743119266053, "loss": 1.6707, "step": 2492 }, { "epoch": 1.54, "learning_rate": 0.000163348623853211, "loss": 1.7434, "step": 2493 }, { "epoch": 1.54, "learning_rate": 0.00016327981651376145, "loss": 1.713, "step": 2494 }, { "epoch": 1.54, "learning_rate": 0.0001632110091743119, "loss": 1.6532, "step": 2495 }, { "epoch": 1.54, "learning_rate": 0.00016314220183486237, "loss": 1.5183, "step": 2496 }, { "epoch": 1.54, "learning_rate": 0.00016307339449541283, "loss": 1.5773, "step": 2497 }, { "epoch": 1.54, "learning_rate": 0.0001630045871559633, "loss": 1.5828, "step": 2498 }, { "epoch": 1.54, "learning_rate": 0.00016293577981651376, "loss": 1.539, "step": 2499 }, { "epoch": 1.54, "learning_rate": 0.00016286697247706422, "loss": 1.6791, "step": 2500 }, { "epoch": 1.54, "eval_bleu": 9.104417999122743e-14, "eval_loss": 2.0529630184173584, "eval_runtime": 2550.0568, "eval_samples_per_second": 5.788, "eval_steps_per_second": 0.724, "step": 2500 }, { "epoch": 1.54, "learning_rate": 0.00016279816513761468, "loss": 1.5201, "step": 2501 }, { "epoch": 1.54, "learning_rate": 0.00016272935779816514, "loss": 1.4272, "step": 2502 }, { "epoch": 1.55, "learning_rate": 0.00016266055045871557, "loss": 1.4311, "step": 2503 }, { "epoch": 1.55, "learning_rate": 0.00016259174311926604, "loss": 1.4258, "step": 2504 }, { "epoch": 1.55, "learning_rate": 0.0001625229357798165, "loss": 1.4489, "step": 2505 }, { "epoch": 1.55, "learning_rate": 0.00016245412844036696, "loss": 1.3071, "step": 2506 }, { "epoch": 1.55, "learning_rate": 0.00016238532110091742, "loss": 1.289, "step": 2507 }, { "epoch": 1.55, "learning_rate": 0.00016231651376146788, "loss": 1.4723, "step": 2508 }, { "epoch": 1.55, "learning_rate": 0.00016224770642201834, "loss": 1.325, "step": 2509 }, { "epoch": 1.55, "learning_rate": 0.0001621788990825688, "loss": 1.2669, "step": 2510 }, { "epoch": 1.55, "learning_rate": 0.00016211009174311926, "loss": 1.3747, "step": 2511 }, { "epoch": 1.55, "learning_rate": 0.00016204128440366973, "loss": 1.2665, "step": 2512 }, { "epoch": 1.55, "learning_rate": 0.0001619724770642202, "loss": 1.2726, "step": 2513 }, { "epoch": 1.55, "learning_rate": 0.00016190366972477065, "loss": 0.9968, "step": 2514 }, { "epoch": 1.55, "learning_rate": 0.0001618348623853211, "loss": 1.1225, "step": 2515 }, { "epoch": 1.55, "learning_rate": 0.00016176605504587154, "loss": 1.0416, "step": 2516 }, { "epoch": 1.55, "learning_rate": 0.000161697247706422, "loss": 1.1329, "step": 2517 }, { "epoch": 1.55, "learning_rate": 0.00016162844036697247, "loss": 0.9321, "step": 2518 }, { "epoch": 1.55, "learning_rate": 0.0001615596330275229, "loss": 0.9791, "step": 2519 }, { "epoch": 1.56, "learning_rate": 0.00016149082568807336, "loss": 0.8882, "step": 2520 }, { "epoch": 1.56, "learning_rate": 0.00016142201834862382, "loss": 2.6526, "step": 2521 }, { "epoch": 1.56, "learning_rate": 0.00016135321100917428, "loss": 2.365, "step": 2522 }, { "epoch": 1.56, "learning_rate": 0.00016128440366972475, "loss": 2.2288, "step": 2523 }, { "epoch": 1.56, "learning_rate": 0.0001612155963302752, "loss": 2.0181, "step": 2524 }, { "epoch": 1.56, "learning_rate": 0.00016114678899082567, "loss": 2.0531, "step": 2525 }, { "epoch": 1.56, "learning_rate": 0.00016107798165137613, "loss": 1.8414, "step": 2526 }, { "epoch": 1.56, "learning_rate": 0.0001610091743119266, "loss": 2.1092, "step": 2527 }, { "epoch": 1.56, "learning_rate": 0.00016094036697247705, "loss": 1.8598, "step": 2528 }, { "epoch": 1.56, "learning_rate": 0.0001608715596330275, "loss": 1.9902, "step": 2529 }, { "epoch": 1.56, "learning_rate": 0.00016080275229357797, "loss": 1.6837, "step": 2530 }, { "epoch": 1.56, "learning_rate": 0.00016073394495412844, "loss": 1.8995, "step": 2531 }, { "epoch": 1.56, "learning_rate": 0.0001606651376146789, "loss": 1.7875, "step": 2532 }, { "epoch": 1.56, "learning_rate": 0.00016059633027522936, "loss": 1.7717, "step": 2533 }, { "epoch": 1.56, "learning_rate": 0.00016052752293577982, "loss": 1.8133, "step": 2534 }, { "epoch": 1.56, "learning_rate": 0.00016045871559633025, "loss": 1.8402, "step": 2535 }, { "epoch": 1.57, "learning_rate": 0.00016038990825688071, "loss": 1.7249, "step": 2536 }, { "epoch": 1.57, "learning_rate": 0.00016032110091743118, "loss": 1.7668, "step": 2537 }, { "epoch": 1.57, "learning_rate": 0.00016025229357798164, "loss": 1.7421, "step": 2538 }, { "epoch": 1.57, "learning_rate": 0.0001601834862385321, "loss": 1.5446, "step": 2539 }, { "epoch": 1.57, "learning_rate": 0.00016011467889908256, "loss": 1.7223, "step": 2540 }, { "epoch": 1.57, "learning_rate": 0.00016004587155963302, "loss": 1.7396, "step": 2541 }, { "epoch": 1.57, "learning_rate": 0.00015997706422018348, "loss": 1.8328, "step": 2542 }, { "epoch": 1.57, "learning_rate": 0.00015990825688073394, "loss": 1.7118, "step": 2543 }, { "epoch": 1.57, "learning_rate": 0.0001598394495412844, "loss": 1.6988, "step": 2544 }, { "epoch": 1.57, "learning_rate": 0.00015977064220183487, "loss": 1.4266, "step": 2545 }, { "epoch": 1.57, "learning_rate": 0.00015970183486238533, "loss": 1.5278, "step": 2546 }, { "epoch": 1.57, "learning_rate": 0.00015963302752293576, "loss": 1.484, "step": 2547 }, { "epoch": 1.57, "learning_rate": 0.00015956422018348622, "loss": 1.515, "step": 2548 }, { "epoch": 1.57, "learning_rate": 0.00015949541284403668, "loss": 1.2707, "step": 2549 }, { "epoch": 1.57, "learning_rate": 0.00015942660550458715, "loss": 1.4544, "step": 2550 }, { "epoch": 1.57, "learning_rate": 0.00015935779816513758, "loss": 1.4689, "step": 2551 }, { "epoch": 1.58, "learning_rate": 0.00015928899082568804, "loss": 1.412, "step": 2552 }, { "epoch": 1.58, "learning_rate": 0.0001592201834862385, "loss": 1.4757, "step": 2553 }, { "epoch": 1.58, "learning_rate": 0.00015915137614678896, "loss": 1.3614, "step": 2554 }, { "epoch": 1.58, "learning_rate": 0.00015908256880733942, "loss": 1.4429, "step": 2555 }, { "epoch": 1.58, "learning_rate": 0.00015901376146788989, "loss": 1.4448, "step": 2556 }, { "epoch": 1.58, "learning_rate": 0.00015894495412844035, "loss": 1.2, "step": 2557 }, { "epoch": 1.58, "learning_rate": 0.0001588761467889908, "loss": 1.2744, "step": 2558 }, { "epoch": 1.58, "learning_rate": 0.00015880733944954127, "loss": 1.122, "step": 2559 }, { "epoch": 1.58, "learning_rate": 0.00015873853211009173, "loss": 1.2659, "step": 2560 }, { "epoch": 1.58, "learning_rate": 0.0001586697247706422, "loss": 1.1515, "step": 2561 }, { "epoch": 1.58, "learning_rate": 0.00015860091743119265, "loss": 1.1739, "step": 2562 }, { "epoch": 1.58, "learning_rate": 0.00015853211009174311, "loss": 1.1951, "step": 2563 }, { "epoch": 1.58, "learning_rate": 0.00015846330275229358, "loss": 1.2862, "step": 2564 }, { "epoch": 1.58, "learning_rate": 0.00015839449541284404, "loss": 1.1841, "step": 2565 }, { "epoch": 1.58, "learning_rate": 0.0001583256880733945, "loss": 1.111, "step": 2566 }, { "epoch": 1.58, "learning_rate": 0.00015825688073394493, "loss": 1.049, "step": 2567 }, { "epoch": 1.59, "learning_rate": 0.0001581880733944954, "loss": 0.8997, "step": 2568 }, { "epoch": 1.59, "learning_rate": 0.00015811926605504586, "loss": 0.8725, "step": 2569 }, { "epoch": 1.59, "learning_rate": 0.00015805045871559632, "loss": 0.8984, "step": 2570 }, { "epoch": 1.59, "learning_rate": 0.00015798165137614678, "loss": 2.6616, "step": 2571 }, { "epoch": 1.59, "learning_rate": 0.00015791284403669724, "loss": 2.2496, "step": 2572 }, { "epoch": 1.59, "learning_rate": 0.0001578440366972477, "loss": 2.1663, "step": 2573 }, { "epoch": 1.59, "learning_rate": 0.00015777522935779816, "loss": 1.9776, "step": 2574 }, { "epoch": 1.59, "learning_rate": 0.00015770642201834862, "loss": 1.9404, "step": 2575 }, { "epoch": 1.59, "learning_rate": 0.00015763761467889908, "loss": 2.0084, "step": 2576 }, { "epoch": 1.59, "learning_rate": 0.00015756880733944955, "loss": 1.9548, "step": 2577 }, { "epoch": 1.59, "learning_rate": 0.00015749999999999998, "loss": 1.8605, "step": 2578 }, { "epoch": 1.59, "learning_rate": 0.00015743119266055044, "loss": 1.9351, "step": 2579 }, { "epoch": 1.59, "learning_rate": 0.0001573623853211009, "loss": 1.5703, "step": 2580 }, { "epoch": 1.59, "learning_rate": 0.00015729357798165136, "loss": 1.7759, "step": 2581 }, { "epoch": 1.59, "learning_rate": 0.00015722477064220182, "loss": 1.9455, "step": 2582 }, { "epoch": 1.59, "learning_rate": 0.00015715596330275226, "loss": 1.7536, "step": 2583 }, { "epoch": 1.6, "learning_rate": 0.00015708715596330272, "loss": 1.8178, "step": 2584 }, { "epoch": 1.6, "learning_rate": 0.00015701834862385318, "loss": 1.812, "step": 2585 }, { "epoch": 1.6, "learning_rate": 0.00015694954128440364, "loss": 1.7461, "step": 2586 }, { "epoch": 1.6, "learning_rate": 0.0001568807339449541, "loss": 1.6504, "step": 2587 }, { "epoch": 1.6, "learning_rate": 0.00015681192660550457, "loss": 1.6768, "step": 2588 }, { "epoch": 1.6, "learning_rate": 0.00015674311926605503, "loss": 1.6623, "step": 2589 }, { "epoch": 1.6, "learning_rate": 0.0001566743119266055, "loss": 1.746, "step": 2590 }, { "epoch": 1.6, "learning_rate": 0.00015660550458715595, "loss": 1.5993, "step": 2591 }, { "epoch": 1.6, "learning_rate": 0.0001565366972477064, "loss": 1.6449, "step": 2592 }, { "epoch": 1.6, "learning_rate": 0.00015646788990825687, "loss": 1.6487, "step": 2593 }, { "epoch": 1.6, "learning_rate": 0.00015639908256880733, "loss": 1.6896, "step": 2594 }, { "epoch": 1.6, "learning_rate": 0.0001563302752293578, "loss": 1.6811, "step": 2595 }, { "epoch": 1.6, "learning_rate": 0.00015626146788990826, "loss": 1.5278, "step": 2596 }, { "epoch": 1.6, "learning_rate": 0.00015619266055045872, "loss": 1.5104, "step": 2597 }, { "epoch": 1.6, "learning_rate": 0.00015612385321100918, "loss": 1.602, "step": 2598 }, { "epoch": 1.6, "learning_rate": 0.0001560550458715596, "loss": 1.5557, "step": 2599 }, { "epoch": 1.6, "learning_rate": 0.00015598623853211007, "loss": 1.4968, "step": 2600 }, { "epoch": 1.61, "learning_rate": 0.00015591743119266053, "loss": 1.3723, "step": 2601 }, { "epoch": 1.61, "learning_rate": 0.000155848623853211, "loss": 1.5659, "step": 2602 }, { "epoch": 1.61, "learning_rate": 0.00015577981651376146, "loss": 1.344, "step": 2603 }, { "epoch": 1.61, "learning_rate": 0.00015571100917431192, "loss": 1.4165, "step": 2604 }, { "epoch": 1.61, "learning_rate": 0.00015564220183486238, "loss": 1.3802, "step": 2605 }, { "epoch": 1.61, "learning_rate": 0.00015557339449541284, "loss": 1.3201, "step": 2606 }, { "epoch": 1.61, "learning_rate": 0.0001555045871559633, "loss": 1.3926, "step": 2607 }, { "epoch": 1.61, "learning_rate": 0.00015543577981651376, "loss": 1.2347, "step": 2608 }, { "epoch": 1.61, "learning_rate": 0.0001553669724770642, "loss": 1.3002, "step": 2609 }, { "epoch": 1.61, "learning_rate": 0.00015529816513761466, "loss": 1.2601, "step": 2610 }, { "epoch": 1.61, "learning_rate": 0.00015522935779816512, "loss": 1.426, "step": 2611 }, { "epoch": 1.61, "learning_rate": 0.00015516055045871558, "loss": 1.1477, "step": 2612 }, { "epoch": 1.61, "learning_rate": 0.00015509174311926604, "loss": 0.9914, "step": 2613 }, { "epoch": 1.61, "learning_rate": 0.0001550229357798165, "loss": 1.178, "step": 2614 }, { "epoch": 1.61, "learning_rate": 0.00015495412844036694, "loss": 1.0769, "step": 2615 }, { "epoch": 1.61, "learning_rate": 0.0001548853211009174, "loss": 1.0737, "step": 2616 }, { "epoch": 1.62, "learning_rate": 0.00015481651376146786, "loss": 0.9871, "step": 2617 }, { "epoch": 1.62, "learning_rate": 0.00015474770642201832, "loss": 0.8882, "step": 2618 }, { "epoch": 1.62, "learning_rate": 0.00015467889908256878, "loss": 0.9982, "step": 2619 }, { "epoch": 1.62, "learning_rate": 0.00015461009174311924, "loss": 0.8103, "step": 2620 }, { "epoch": 1.62, "learning_rate": 0.0001545412844036697, "loss": 2.3961, "step": 2621 }, { "epoch": 1.62, "learning_rate": 0.00015447247706422017, "loss": 2.2256, "step": 2622 }, { "epoch": 1.62, "learning_rate": 0.00015440366972477063, "loss": 1.9595, "step": 2623 }, { "epoch": 1.62, "learning_rate": 0.0001543348623853211, "loss": 2.0277, "step": 2624 }, { "epoch": 1.62, "learning_rate": 0.00015426605504587155, "loss": 2.0765, "step": 2625 }, { "epoch": 1.62, "learning_rate": 0.000154197247706422, "loss": 1.9673, "step": 2626 }, { "epoch": 1.62, "learning_rate": 0.00015412844036697247, "loss": 1.7778, "step": 2627 }, { "epoch": 1.62, "learning_rate": 0.00015405963302752293, "loss": 1.9129, "step": 2628 }, { "epoch": 1.62, "learning_rate": 0.0001539908256880734, "loss": 1.8891, "step": 2629 }, { "epoch": 1.62, "learning_rate": 0.00015392201834862386, "loss": 1.7402, "step": 2630 }, { "epoch": 1.62, "learning_rate": 0.0001538532110091743, "loss": 1.7574, "step": 2631 }, { "epoch": 1.62, "learning_rate": 0.00015378440366972475, "loss": 1.7611, "step": 2632 }, { "epoch": 1.63, "learning_rate": 0.00015371559633027521, "loss": 1.8226, "step": 2633 }, { "epoch": 1.63, "learning_rate": 0.00015364678899082568, "loss": 1.5753, "step": 2634 }, { "epoch": 1.63, "learning_rate": 0.00015357798165137614, "loss": 1.7198, "step": 2635 }, { "epoch": 1.63, "learning_rate": 0.0001535091743119266, "loss": 1.6867, "step": 2636 }, { "epoch": 1.63, "learning_rate": 0.00015344036697247706, "loss": 1.722, "step": 2637 }, { "epoch": 1.63, "learning_rate": 0.00015337155963302752, "loss": 1.6694, "step": 2638 }, { "epoch": 1.63, "learning_rate": 0.00015330275229357798, "loss": 1.655, "step": 2639 }, { "epoch": 1.63, "learning_rate": 0.00015323394495412844, "loss": 1.5777, "step": 2640 }, { "epoch": 1.63, "learning_rate": 0.00015316513761467888, "loss": 1.6608, "step": 2641 }, { "epoch": 1.63, "learning_rate": 0.00015309633027522934, "loss": 1.6519, "step": 2642 }, { "epoch": 1.63, "learning_rate": 0.0001530275229357798, "loss": 1.623, "step": 2643 }, { "epoch": 1.63, "learning_rate": 0.00015295871559633026, "loss": 1.4663, "step": 2644 }, { "epoch": 1.63, "learning_rate": 0.00015288990825688072, "loss": 1.6839, "step": 2645 }, { "epoch": 1.63, "learning_rate": 0.00015282110091743118, "loss": 1.4738, "step": 2646 }, { "epoch": 1.63, "learning_rate": 0.00015275229357798162, "loss": 1.5288, "step": 2647 }, { "epoch": 1.63, "learning_rate": 0.00015268348623853208, "loss": 1.5614, "step": 2648 }, { "epoch": 1.64, "learning_rate": 0.00015261467889908254, "loss": 1.3896, "step": 2649 }, { "epoch": 1.64, "learning_rate": 0.000152545871559633, "loss": 1.4337, "step": 2650 }, { "epoch": 1.64, "learning_rate": 0.00015247706422018346, "loss": 1.5167, "step": 2651 }, { "epoch": 1.64, "learning_rate": 0.00015240825688073392, "loss": 1.308, "step": 2652 }, { "epoch": 1.64, "learning_rate": 0.00015233944954128438, "loss": 1.4404, "step": 2653 }, { "epoch": 1.64, "learning_rate": 0.00015227064220183485, "loss": 1.4403, "step": 2654 }, { "epoch": 1.64, "learning_rate": 0.0001522018348623853, "loss": 1.433, "step": 2655 }, { "epoch": 1.64, "learning_rate": 0.00015213302752293577, "loss": 1.3051, "step": 2656 }, { "epoch": 1.64, "learning_rate": 0.00015206422018348623, "loss": 1.2807, "step": 2657 }, { "epoch": 1.64, "learning_rate": 0.0001519954128440367, "loss": 1.3028, "step": 2658 }, { "epoch": 1.64, "learning_rate": 0.00015192660550458715, "loss": 1.1943, "step": 2659 }, { "epoch": 1.64, "learning_rate": 0.00015185779816513761, "loss": 1.2522, "step": 2660 }, { "epoch": 1.64, "learning_rate": 0.00015178899082568808, "loss": 1.1313, "step": 2661 }, { "epoch": 1.64, "learning_rate": 0.00015172018348623854, "loss": 1.1258, "step": 2662 }, { "epoch": 1.64, "learning_rate": 0.00015165137614678897, "loss": 1.0589, "step": 2663 }, { "epoch": 1.64, "learning_rate": 0.00015158256880733943, "loss": 1.1104, "step": 2664 }, { "epoch": 1.65, "learning_rate": 0.0001515137614678899, "loss": 1.011, "step": 2665 }, { "epoch": 1.65, "learning_rate": 0.00015144495412844035, "loss": 0.9751, "step": 2666 }, { "epoch": 1.65, "learning_rate": 0.00015137614678899082, "loss": 0.9338, "step": 2667 }, { "epoch": 1.65, "learning_rate": 0.00015130733944954128, "loss": 0.9432, "step": 2668 }, { "epoch": 1.65, "learning_rate": 0.00015123853211009174, "loss": 0.8753, "step": 2669 }, { "epoch": 1.65, "learning_rate": 0.0001511697247706422, "loss": 0.8526, "step": 2670 }, { "epoch": 1.65, "learning_rate": 0.00015110091743119266, "loss": 2.5274, "step": 2671 }, { "epoch": 1.65, "learning_rate": 0.0001510321100917431, "loss": 2.506, "step": 2672 }, { "epoch": 1.65, "learning_rate": 0.00015096330275229356, "loss": 2.1575, "step": 2673 }, { "epoch": 1.65, "learning_rate": 0.00015089449541284402, "loss": 2.0803, "step": 2674 }, { "epoch": 1.65, "learning_rate": 0.00015082568807339448, "loss": 1.9175, "step": 2675 }, { "epoch": 1.65, "learning_rate": 0.00015075688073394494, "loss": 1.9355, "step": 2676 }, { "epoch": 1.65, "learning_rate": 0.0001506880733944954, "loss": 2.0204, "step": 2677 }, { "epoch": 1.65, "learning_rate": 0.00015061926605504586, "loss": 1.889, "step": 2678 }, { "epoch": 1.65, "learning_rate": 0.0001505504587155963, "loss": 2.0395, "step": 2679 }, { "epoch": 1.65, "learning_rate": 0.00015048165137614676, "loss": 1.8121, "step": 2680 }, { "epoch": 1.65, "learning_rate": 0.00015041284403669722, "loss": 1.6833, "step": 2681 }, { "epoch": 1.66, "learning_rate": 0.00015034403669724768, "loss": 1.7684, "step": 2682 }, { "epoch": 1.66, "learning_rate": 0.00015027522935779814, "loss": 1.5983, "step": 2683 }, { "epoch": 1.66, "learning_rate": 0.0001502064220183486, "loss": 1.7694, "step": 2684 }, { "epoch": 1.66, "learning_rate": 0.00015013761467889906, "loss": 1.7431, "step": 2685 }, { "epoch": 1.66, "learning_rate": 0.00015006880733944953, "loss": 1.7292, "step": 2686 }, { "epoch": 1.66, "learning_rate": 0.00015, "loss": 1.565, "step": 2687 }, { "epoch": 1.66, "learning_rate": 0.00014993119266055045, "loss": 1.6728, "step": 2688 }, { "epoch": 1.66, "learning_rate": 0.0001498623853211009, "loss": 1.6175, "step": 2689 }, { "epoch": 1.66, "learning_rate": 0.00014979357798165137, "loss": 1.627, "step": 2690 }, { "epoch": 1.66, "learning_rate": 0.00014972477064220183, "loss": 1.4694, "step": 2691 }, { "epoch": 1.66, "learning_rate": 0.00014965596330275227, "loss": 1.6295, "step": 2692 }, { "epoch": 1.66, "learning_rate": 0.00014958715596330273, "loss": 1.5165, "step": 2693 }, { "epoch": 1.66, "learning_rate": 0.0001495183486238532, "loss": 1.6623, "step": 2694 }, { "epoch": 1.66, "learning_rate": 0.00014944954128440365, "loss": 1.6055, "step": 2695 }, { "epoch": 1.66, "learning_rate": 0.0001493807339449541, "loss": 1.6107, "step": 2696 }, { "epoch": 1.66, "learning_rate": 0.00014931192660550457, "loss": 1.4241, "step": 2697 }, { "epoch": 1.67, "learning_rate": 0.00014924311926605503, "loss": 1.5645, "step": 2698 }, { "epoch": 1.67, "learning_rate": 0.0001491743119266055, "loss": 1.5751, "step": 2699 }, { "epoch": 1.67, "learning_rate": 0.00014910550458715596, "loss": 1.4571, "step": 2700 }, { "epoch": 1.67, "learning_rate": 0.00014903669724770642, "loss": 1.4935, "step": 2701 }, { "epoch": 1.67, "learning_rate": 0.00014896788990825688, "loss": 1.366, "step": 2702 }, { "epoch": 1.67, "learning_rate": 0.0001488990825688073, "loss": 1.4008, "step": 2703 }, { "epoch": 1.67, "learning_rate": 0.00014883027522935777, "loss": 1.3579, "step": 2704 }, { "epoch": 1.67, "learning_rate": 0.00014876146788990824, "loss": 1.2239, "step": 2705 }, { "epoch": 1.67, "learning_rate": 0.0001486926605504587, "loss": 1.4696, "step": 2706 }, { "epoch": 1.67, "learning_rate": 0.00014862385321100916, "loss": 1.3272, "step": 2707 }, { "epoch": 1.67, "learning_rate": 0.00014855504587155962, "loss": 1.1287, "step": 2708 }, { "epoch": 1.67, "learning_rate": 0.00014848623853211008, "loss": 1.2084, "step": 2709 }, { "epoch": 1.67, "learning_rate": 0.00014841743119266054, "loss": 1.0919, "step": 2710 }, { "epoch": 1.67, "learning_rate": 0.000148348623853211, "loss": 1.1432, "step": 2711 }, { "epoch": 1.67, "learning_rate": 0.00014827981651376146, "loss": 1.1812, "step": 2712 }, { "epoch": 1.67, "learning_rate": 0.00014821100917431193, "loss": 1.2734, "step": 2713 }, { "epoch": 1.68, "learning_rate": 0.0001481422018348624, "loss": 1.1117, "step": 2714 }, { "epoch": 1.68, "learning_rate": 0.00014807339449541285, "loss": 1.1729, "step": 2715 }, { "epoch": 1.68, "learning_rate": 0.00014800458715596328, "loss": 0.9826, "step": 2716 }, { "epoch": 1.68, "learning_rate": 0.00014793577981651374, "loss": 0.8985, "step": 2717 }, { "epoch": 1.68, "learning_rate": 0.0001478669724770642, "loss": 0.8843, "step": 2718 }, { "epoch": 1.68, "learning_rate": 0.00014779816513761467, "loss": 0.9639, "step": 2719 }, { "epoch": 1.68, "learning_rate": 0.00014772935779816513, "loss": 0.9416, "step": 2720 }, { "epoch": 1.68, "learning_rate": 0.0001476605504587156, "loss": 2.504, "step": 2721 }, { "epoch": 1.68, "learning_rate": 0.00014759174311926605, "loss": 2.3732, "step": 2722 }, { "epoch": 1.68, "learning_rate": 0.0001475229357798165, "loss": 1.9512, "step": 2723 }, { "epoch": 1.68, "learning_rate": 0.00014745412844036695, "loss": 1.9638, "step": 2724 }, { "epoch": 1.68, "learning_rate": 0.0001473853211009174, "loss": 1.9552, "step": 2725 }, { "epoch": 1.68, "learning_rate": 0.00014731651376146787, "loss": 1.9798, "step": 2726 }, { "epoch": 1.68, "learning_rate": 0.00014724770642201833, "loss": 1.8179, "step": 2727 }, { "epoch": 1.68, "learning_rate": 0.0001471788990825688, "loss": 1.8957, "step": 2728 }, { "epoch": 1.68, "learning_rate": 0.00014711009174311925, "loss": 1.9607, "step": 2729 }, { "epoch": 1.69, "learning_rate": 0.0001470412844036697, "loss": 1.832, "step": 2730 }, { "epoch": 1.69, "learning_rate": 0.00014697247706422017, "loss": 1.7741, "step": 2731 }, { "epoch": 1.69, "learning_rate": 0.00014690366972477064, "loss": 1.8495, "step": 2732 }, { "epoch": 1.69, "learning_rate": 0.0001468348623853211, "loss": 1.8138, "step": 2733 }, { "epoch": 1.69, "learning_rate": 0.00014676605504587153, "loss": 1.7175, "step": 2734 }, { "epoch": 1.69, "learning_rate": 0.000146697247706422, "loss": 1.8109, "step": 2735 }, { "epoch": 1.69, "learning_rate": 0.00014662844036697245, "loss": 1.6513, "step": 2736 }, { "epoch": 1.69, "learning_rate": 0.00014655963302752291, "loss": 1.7149, "step": 2737 }, { "epoch": 1.69, "learning_rate": 0.00014649082568807338, "loss": 1.6808, "step": 2738 }, { "epoch": 1.69, "learning_rate": 0.00014642201834862384, "loss": 1.6835, "step": 2739 }, { "epoch": 1.69, "learning_rate": 0.0001463532110091743, "loss": 1.7405, "step": 2740 }, { "epoch": 1.69, "learning_rate": 0.00014628440366972476, "loss": 1.5068, "step": 2741 }, { "epoch": 1.69, "learning_rate": 0.00014621559633027522, "loss": 1.6685, "step": 2742 }, { "epoch": 1.69, "learning_rate": 0.00014614678899082568, "loss": 1.5865, "step": 2743 }, { "epoch": 1.69, "learning_rate": 0.00014607798165137614, "loss": 1.6137, "step": 2744 }, { "epoch": 1.69, "learning_rate": 0.0001460091743119266, "loss": 1.4727, "step": 2745 }, { "epoch": 1.7, "learning_rate": 0.00014594036697247707, "loss": 1.5242, "step": 2746 }, { "epoch": 1.7, "learning_rate": 0.00014587155963302753, "loss": 1.4384, "step": 2747 }, { "epoch": 1.7, "learning_rate": 0.00014580275229357796, "loss": 1.5791, "step": 2748 }, { "epoch": 1.7, "learning_rate": 0.00014573394495412842, "loss": 1.3213, "step": 2749 }, { "epoch": 1.7, "learning_rate": 0.00014566513761467888, "loss": 1.5016, "step": 2750 }, { "epoch": 1.7, "learning_rate": 0.00014559633027522935, "loss": 1.3458, "step": 2751 }, { "epoch": 1.7, "learning_rate": 0.0001455275229357798, "loss": 1.4696, "step": 2752 }, { "epoch": 1.7, "learning_rate": 0.00014545871559633027, "loss": 1.4191, "step": 2753 }, { "epoch": 1.7, "learning_rate": 0.00014538990825688073, "loss": 1.1216, "step": 2754 }, { "epoch": 1.7, "learning_rate": 0.0001453211009174312, "loss": 1.331, "step": 2755 }, { "epoch": 1.7, "learning_rate": 0.00014525229357798162, "loss": 1.4856, "step": 2756 }, { "epoch": 1.7, "learning_rate": 0.00014518348623853209, "loss": 1.3681, "step": 2757 }, { "epoch": 1.7, "learning_rate": 0.00014511467889908255, "loss": 1.1923, "step": 2758 }, { "epoch": 1.7, "learning_rate": 0.000145045871559633, "loss": 1.1456, "step": 2759 }, { "epoch": 1.7, "learning_rate": 0.00014497706422018347, "loss": 1.2025, "step": 2760 }, { "epoch": 1.7, "learning_rate": 0.00014490825688073393, "loss": 1.1414, "step": 2761 }, { "epoch": 1.7, "learning_rate": 0.0001448394495412844, "loss": 1.1824, "step": 2762 }, { "epoch": 1.71, "learning_rate": 0.00014477064220183485, "loss": 1.1105, "step": 2763 }, { "epoch": 1.71, "learning_rate": 0.00014470183486238531, "loss": 1.1726, "step": 2764 }, { "epoch": 1.71, "learning_rate": 0.00014463302752293578, "loss": 0.9236, "step": 2765 }, { "epoch": 1.71, "learning_rate": 0.0001445642201834862, "loss": 0.8958, "step": 2766 }, { "epoch": 1.71, "learning_rate": 0.00014449541284403667, "loss": 1.0156, "step": 2767 }, { "epoch": 1.71, "learning_rate": 0.00014442660550458713, "loss": 1.0257, "step": 2768 }, { "epoch": 1.71, "learning_rate": 0.0001443577981651376, "loss": 0.8883, "step": 2769 }, { "epoch": 1.71, "learning_rate": 0.00014428899082568806, "loss": 0.7913, "step": 2770 }, { "epoch": 1.71, "learning_rate": 0.00014422018348623852, "loss": 2.459, "step": 2771 }, { "epoch": 1.71, "learning_rate": 0.00014415137614678898, "loss": 2.1902, "step": 2772 }, { "epoch": 1.71, "learning_rate": 0.00014408256880733944, "loss": 1.8838, "step": 2773 }, { "epoch": 1.71, "learning_rate": 0.0001440137614678899, "loss": 1.9688, "step": 2774 }, { "epoch": 1.71, "learning_rate": 0.00014394495412844036, "loss": 1.9483, "step": 2775 }, { "epoch": 1.71, "learning_rate": 0.00014387614678899082, "loss": 1.7471, "step": 2776 }, { "epoch": 1.71, "learning_rate": 0.00014380733944954128, "loss": 1.7646, "step": 2777 }, { "epoch": 1.71, "learning_rate": 0.00014373853211009175, "loss": 1.7671, "step": 2778 }, { "epoch": 1.72, "learning_rate": 0.0001436697247706422, "loss": 1.7177, "step": 2779 }, { "epoch": 1.72, "learning_rate": 0.00014360091743119264, "loss": 1.6666, "step": 2780 }, { "epoch": 1.72, "learning_rate": 0.0001435321100917431, "loss": 1.8098, "step": 2781 }, { "epoch": 1.72, "learning_rate": 0.00014346330275229356, "loss": 1.6711, "step": 2782 }, { "epoch": 1.72, "learning_rate": 0.00014339449541284402, "loss": 1.6581, "step": 2783 }, { "epoch": 1.72, "learning_rate": 0.00014332568807339449, "loss": 1.8288, "step": 2784 }, { "epoch": 1.72, "learning_rate": 0.00014325688073394495, "loss": 1.6395, "step": 2785 }, { "epoch": 1.72, "learning_rate": 0.0001431880733944954, "loss": 1.5874, "step": 2786 }, { "epoch": 1.72, "learning_rate": 0.00014311926605504587, "loss": 1.6246, "step": 2787 }, { "epoch": 1.72, "learning_rate": 0.0001430504587155963, "loss": 1.794, "step": 2788 }, { "epoch": 1.72, "learning_rate": 0.00014298165137614677, "loss": 1.5566, "step": 2789 }, { "epoch": 1.72, "learning_rate": 0.00014291284403669723, "loss": 1.5721, "step": 2790 }, { "epoch": 1.72, "learning_rate": 0.0001428440366972477, "loss": 1.5802, "step": 2791 }, { "epoch": 1.72, "learning_rate": 0.00014277522935779815, "loss": 1.5288, "step": 2792 }, { "epoch": 1.72, "learning_rate": 0.0001427064220183486, "loss": 1.5479, "step": 2793 }, { "epoch": 1.72, "learning_rate": 0.00014263761467889907, "loss": 1.486, "step": 2794 }, { "epoch": 1.73, "learning_rate": 0.00014256880733944953, "loss": 1.5892, "step": 2795 }, { "epoch": 1.73, "learning_rate": 0.0001425, "loss": 1.4275, "step": 2796 }, { "epoch": 1.73, "learning_rate": 0.00014243119266055043, "loss": 1.6013, "step": 2797 }, { "epoch": 1.73, "learning_rate": 0.0001423623853211009, "loss": 1.6975, "step": 2798 }, { "epoch": 1.73, "learning_rate": 0.00014229357798165135, "loss": 1.4974, "step": 2799 }, { "epoch": 1.73, "learning_rate": 0.0001422247706422018, "loss": 1.4928, "step": 2800 }, { "epoch": 1.73, "learning_rate": 0.00014215596330275227, "loss": 1.2671, "step": 2801 }, { "epoch": 1.73, "learning_rate": 0.00014208715596330273, "loss": 1.3971, "step": 2802 }, { "epoch": 1.73, "learning_rate": 0.0001420183486238532, "loss": 1.3389, "step": 2803 }, { "epoch": 1.73, "learning_rate": 0.00014194954128440366, "loss": 1.3498, "step": 2804 }, { "epoch": 1.73, "learning_rate": 0.00014188073394495412, "loss": 1.3314, "step": 2805 }, { "epoch": 1.73, "learning_rate": 0.00014181192660550458, "loss": 1.2777, "step": 2806 }, { "epoch": 1.73, "learning_rate": 0.00014174311926605504, "loss": 1.2437, "step": 2807 }, { "epoch": 1.73, "learning_rate": 0.0001416743119266055, "loss": 1.2039, "step": 2808 }, { "epoch": 1.73, "learning_rate": 0.00014160550458715596, "loss": 1.2735, "step": 2809 }, { "epoch": 1.73, "learning_rate": 0.00014153669724770642, "loss": 1.1883, "step": 2810 }, { "epoch": 1.74, "learning_rate": 0.00014146788990825689, "loss": 1.1214, "step": 2811 }, { "epoch": 1.74, "learning_rate": 0.00014139908256880732, "loss": 1.012, "step": 2812 }, { "epoch": 1.74, "learning_rate": 0.00014133027522935778, "loss": 1.1341, "step": 2813 }, { "epoch": 1.74, "learning_rate": 0.00014126146788990824, "loss": 1.1077, "step": 2814 }, { "epoch": 1.74, "learning_rate": 0.0001411926605504587, "loss": 1.0662, "step": 2815 }, { "epoch": 1.74, "learning_rate": 0.00014112385321100917, "loss": 0.9541, "step": 2816 }, { "epoch": 1.74, "learning_rate": 0.00014105504587155963, "loss": 0.906, "step": 2817 }, { "epoch": 1.74, "learning_rate": 0.0001409862385321101, "loss": 0.8681, "step": 2818 }, { "epoch": 1.74, "learning_rate": 0.00014091743119266055, "loss": 0.806, "step": 2819 }, { "epoch": 1.74, "learning_rate": 0.00014084862385321098, "loss": 1.0313, "step": 2820 }, { "epoch": 1.74, "learning_rate": 0.00014077981651376144, "loss": 2.4214, "step": 2821 }, { "epoch": 1.74, "learning_rate": 0.0001407110091743119, "loss": 2.2117, "step": 2822 }, { "epoch": 1.74, "learning_rate": 0.00014064220183486237, "loss": 1.9729, "step": 2823 }, { "epoch": 1.74, "learning_rate": 0.00014057339449541283, "loss": 1.9015, "step": 2824 }, { "epoch": 1.74, "learning_rate": 0.0001405045871559633, "loss": 1.8548, "step": 2825 }, { "epoch": 1.74, "learning_rate": 0.00014043577981651375, "loss": 1.8444, "step": 2826 }, { "epoch": 1.75, "learning_rate": 0.0001403669724770642, "loss": 1.8318, "step": 2827 }, { "epoch": 1.75, "learning_rate": 0.00014029816513761465, "loss": 1.7103, "step": 2828 }, { "epoch": 1.75, "learning_rate": 0.0001402293577981651, "loss": 1.869, "step": 2829 }, { "epoch": 1.75, "learning_rate": 0.00014016055045871557, "loss": 1.6276, "step": 2830 }, { "epoch": 1.75, "learning_rate": 0.00014009174311926603, "loss": 1.6946, "step": 2831 }, { "epoch": 1.75, "learning_rate": 0.0001400229357798165, "loss": 1.7185, "step": 2832 }, { "epoch": 1.75, "learning_rate": 0.00013995412844036695, "loss": 1.5489, "step": 2833 }, { "epoch": 1.75, "learning_rate": 0.00013988532110091741, "loss": 1.4284, "step": 2834 }, { "epoch": 1.75, "learning_rate": 0.00013981651376146788, "loss": 1.6394, "step": 2835 }, { "epoch": 1.75, "learning_rate": 0.00013974770642201834, "loss": 1.6487, "step": 2836 }, { "epoch": 1.75, "learning_rate": 0.0001396788990825688, "loss": 1.7531, "step": 2837 }, { "epoch": 1.75, "learning_rate": 0.00013961009174311926, "loss": 1.5455, "step": 2838 }, { "epoch": 1.75, "learning_rate": 0.00013954128440366972, "loss": 1.5839, "step": 2839 }, { "epoch": 1.75, "learning_rate": 0.00013947247706422018, "loss": 1.4558, "step": 2840 }, { "epoch": 1.75, "learning_rate": 0.00013940366972477064, "loss": 1.4974, "step": 2841 }, { "epoch": 1.75, "learning_rate": 0.0001393348623853211, "loss": 1.5057, "step": 2842 }, { "epoch": 1.75, "learning_rate": 0.00013926605504587157, "loss": 1.5581, "step": 2843 }, { "epoch": 1.76, "learning_rate": 0.000139197247706422, "loss": 1.6725, "step": 2844 }, { "epoch": 1.76, "learning_rate": 0.00013912844036697246, "loss": 1.4783, "step": 2845 }, { "epoch": 1.76, "learning_rate": 0.00013905963302752292, "loss": 1.5513, "step": 2846 }, { "epoch": 1.76, "learning_rate": 0.00013899082568807338, "loss": 1.569, "step": 2847 }, { "epoch": 1.76, "learning_rate": 0.00013892201834862384, "loss": 1.3348, "step": 2848 }, { "epoch": 1.76, "learning_rate": 0.0001388532110091743, "loss": 1.4112, "step": 2849 }, { "epoch": 1.76, "learning_rate": 0.00013878440366972477, "loss": 1.4632, "step": 2850 }, { "epoch": 1.76, "learning_rate": 0.00013871559633027523, "loss": 1.411, "step": 2851 }, { "epoch": 1.76, "learning_rate": 0.00013864678899082566, "loss": 1.3533, "step": 2852 }, { "epoch": 1.76, "learning_rate": 0.00013857798165137612, "loss": 1.3169, "step": 2853 }, { "epoch": 1.76, "learning_rate": 0.00013850917431192659, "loss": 1.2661, "step": 2854 }, { "epoch": 1.76, "learning_rate": 0.00013844036697247705, "loss": 1.3256, "step": 2855 }, { "epoch": 1.76, "learning_rate": 0.0001383715596330275, "loss": 1.2034, "step": 2856 }, { "epoch": 1.76, "learning_rate": 0.00013830275229357797, "loss": 1.3393, "step": 2857 }, { "epoch": 1.76, "learning_rate": 0.00013823394495412843, "loss": 1.2033, "step": 2858 }, { "epoch": 1.76, "learning_rate": 0.0001381651376146789, "loss": 1.0357, "step": 2859 }, { "epoch": 1.77, "learning_rate": 0.00013809633027522933, "loss": 1.0992, "step": 2860 }, { "epoch": 1.77, "learning_rate": 0.0001380275229357798, "loss": 1.1742, "step": 2861 }, { "epoch": 1.77, "learning_rate": 0.00013795871559633025, "loss": 1.0391, "step": 2862 }, { "epoch": 1.77, "learning_rate": 0.0001378899082568807, "loss": 0.967, "step": 2863 }, { "epoch": 1.77, "learning_rate": 0.00013782110091743117, "loss": 1.2282, "step": 2864 }, { "epoch": 1.77, "learning_rate": 0.00013775229357798163, "loss": 0.9983, "step": 2865 }, { "epoch": 1.77, "learning_rate": 0.0001376834862385321, "loss": 1.0236, "step": 2866 }, { "epoch": 1.77, "learning_rate": 0.00013761467889908255, "loss": 0.9401, "step": 2867 }, { "epoch": 1.77, "learning_rate": 0.00013754587155963302, "loss": 0.7745, "step": 2868 }, { "epoch": 1.77, "learning_rate": 0.00013747706422018348, "loss": 0.8361, "step": 2869 }, { "epoch": 1.77, "learning_rate": 0.00013740825688073394, "loss": 0.9759, "step": 2870 }, { "epoch": 1.77, "learning_rate": 0.0001373394495412844, "loss": 2.5971, "step": 2871 }, { "epoch": 1.77, "learning_rate": 0.00013727064220183486, "loss": 2.1958, "step": 2872 }, { "epoch": 1.77, "learning_rate": 0.00013720183486238532, "loss": 1.958, "step": 2873 }, { "epoch": 1.77, "learning_rate": 0.00013713302752293578, "loss": 1.8039, "step": 2874 }, { "epoch": 1.77, "learning_rate": 0.00013706422018348624, "loss": 1.859, "step": 2875 }, { "epoch": 1.78, "learning_rate": 0.00013699541284403668, "loss": 1.7978, "step": 2876 }, { "epoch": 1.78, "learning_rate": 0.00013692660550458714, "loss": 1.8082, "step": 2877 }, { "epoch": 1.78, "learning_rate": 0.0001368577981651376, "loss": 1.6707, "step": 2878 }, { "epoch": 1.78, "learning_rate": 0.00013678899082568806, "loss": 1.671, "step": 2879 }, { "epoch": 1.78, "learning_rate": 0.00013672018348623852, "loss": 1.6763, "step": 2880 }, { "epoch": 1.78, "learning_rate": 0.00013665137614678899, "loss": 1.6665, "step": 2881 }, { "epoch": 1.78, "learning_rate": 0.00013658256880733945, "loss": 1.6413, "step": 2882 }, { "epoch": 1.78, "learning_rate": 0.0001365137614678899, "loss": 1.5468, "step": 2883 }, { "epoch": 1.78, "learning_rate": 0.00013644495412844034, "loss": 1.5592, "step": 2884 }, { "epoch": 1.78, "learning_rate": 0.0001363761467889908, "loss": 1.6336, "step": 2885 }, { "epoch": 1.78, "learning_rate": 0.00013630733944954126, "loss": 1.6274, "step": 2886 }, { "epoch": 1.78, "learning_rate": 0.00013623853211009173, "loss": 1.6902, "step": 2887 }, { "epoch": 1.78, "learning_rate": 0.0001361697247706422, "loss": 1.6418, "step": 2888 }, { "epoch": 1.78, "learning_rate": 0.00013610091743119265, "loss": 1.5073, "step": 2889 }, { "epoch": 1.78, "learning_rate": 0.0001360321100917431, "loss": 1.4728, "step": 2890 }, { "epoch": 1.78, "learning_rate": 0.00013596330275229357, "loss": 1.6102, "step": 2891 }, { "epoch": 1.79, "learning_rate": 0.000135894495412844, "loss": 1.6547, "step": 2892 }, { "epoch": 1.79, "learning_rate": 0.00013582568807339447, "loss": 1.6617, "step": 2893 }, { "epoch": 1.79, "learning_rate": 0.00013575688073394493, "loss": 1.5925, "step": 2894 }, { "epoch": 1.79, "learning_rate": 0.0001356880733944954, "loss": 1.3673, "step": 2895 }, { "epoch": 1.79, "learning_rate": 0.00013561926605504585, "loss": 1.4199, "step": 2896 }, { "epoch": 1.79, "learning_rate": 0.0001355504587155963, "loss": 1.4639, "step": 2897 }, { "epoch": 1.79, "learning_rate": 0.00013548165137614677, "loss": 1.4093, "step": 2898 }, { "epoch": 1.79, "learning_rate": 0.00013541284403669723, "loss": 1.3339, "step": 2899 }, { "epoch": 1.79, "learning_rate": 0.0001353440366972477, "loss": 1.3778, "step": 2900 }, { "epoch": 1.79, "learning_rate": 0.00013527522935779816, "loss": 1.3368, "step": 2901 }, { "epoch": 1.79, "learning_rate": 0.00013520642201834862, "loss": 1.4735, "step": 2902 }, { "epoch": 1.79, "learning_rate": 0.00013513761467889908, "loss": 1.2763, "step": 2903 }, { "epoch": 1.79, "learning_rate": 0.00013506880733944954, "loss": 1.2386, "step": 2904 }, { "epoch": 1.79, "learning_rate": 0.000135, "loss": 1.3115, "step": 2905 }, { "epoch": 1.79, "learning_rate": 0.00013493119266055046, "loss": 1.2399, "step": 2906 }, { "epoch": 1.79, "learning_rate": 0.00013486238532110092, "loss": 1.2276, "step": 2907 }, { "epoch": 1.8, "learning_rate": 0.00013479357798165136, "loss": 1.1559, "step": 2908 }, { "epoch": 1.8, "learning_rate": 0.00013472477064220182, "loss": 1.1065, "step": 2909 }, { "epoch": 1.8, "learning_rate": 0.00013465596330275228, "loss": 1.213, "step": 2910 }, { "epoch": 1.8, "learning_rate": 0.00013458715596330274, "loss": 1.0436, "step": 2911 }, { "epoch": 1.8, "learning_rate": 0.0001345183486238532, "loss": 1.1155, "step": 2912 }, { "epoch": 1.8, "learning_rate": 0.00013444954128440366, "loss": 1.0094, "step": 2913 }, { "epoch": 1.8, "learning_rate": 0.00013438073394495413, "loss": 0.9752, "step": 2914 }, { "epoch": 1.8, "learning_rate": 0.0001343119266055046, "loss": 0.9204, "step": 2915 }, { "epoch": 1.8, "learning_rate": 0.00013424311926605502, "loss": 0.9557, "step": 2916 }, { "epoch": 1.8, "learning_rate": 0.00013417431192660548, "loss": 0.8189, "step": 2917 }, { "epoch": 1.8, "learning_rate": 0.00013410550458715594, "loss": 0.9168, "step": 2918 }, { "epoch": 1.8, "learning_rate": 0.0001340366972477064, "loss": 0.7607, "step": 2919 }, { "epoch": 1.8, "learning_rate": 0.00013396788990825687, "loss": 0.9393, "step": 2920 }, { "epoch": 1.8, "learning_rate": 0.00013389908256880733, "loss": 2.4956, "step": 2921 }, { "epoch": 1.8, "learning_rate": 0.0001338302752293578, "loss": 2.2171, "step": 2922 }, { "epoch": 1.8, "learning_rate": 0.00013376146788990825, "loss": 1.8456, "step": 2923 }, { "epoch": 1.8, "learning_rate": 0.00013369266055045868, "loss": 1.7762, "step": 2924 }, { "epoch": 1.81, "learning_rate": 0.00013362385321100915, "loss": 2.0005, "step": 2925 }, { "epoch": 1.81, "learning_rate": 0.0001335550458715596, "loss": 1.8162, "step": 2926 }, { "epoch": 1.81, "learning_rate": 0.00013348623853211007, "loss": 1.8116, "step": 2927 }, { "epoch": 1.81, "learning_rate": 0.00013341743119266053, "loss": 1.7126, "step": 2928 }, { "epoch": 1.81, "learning_rate": 0.000133348623853211, "loss": 1.672, "step": 2929 }, { "epoch": 1.81, "learning_rate": 0.00013327981651376145, "loss": 1.8054, "step": 2930 }, { "epoch": 1.81, "learning_rate": 0.0001332110091743119, "loss": 1.6762, "step": 2931 }, { "epoch": 1.81, "learning_rate": 0.00013314220183486237, "loss": 1.7362, "step": 2932 }, { "epoch": 1.81, "learning_rate": 0.00013307339449541284, "loss": 1.7796, "step": 2933 }, { "epoch": 1.81, "learning_rate": 0.0001330045871559633, "loss": 1.6385, "step": 2934 }, { "epoch": 1.81, "learning_rate": 0.00013293577981651376, "loss": 1.5444, "step": 2935 }, { "epoch": 1.81, "learning_rate": 0.00013286697247706422, "loss": 1.6065, "step": 2936 }, { "epoch": 1.81, "learning_rate": 0.00013279816513761468, "loss": 1.6448, "step": 2937 }, { "epoch": 1.81, "learning_rate": 0.00013272935779816514, "loss": 1.5449, "step": 2938 }, { "epoch": 1.81, "learning_rate": 0.0001326605504587156, "loss": 1.5301, "step": 2939 }, { "epoch": 1.81, "learning_rate": 0.00013259174311926604, "loss": 1.3936, "step": 2940 }, { "epoch": 1.82, "learning_rate": 0.0001325229357798165, "loss": 1.6669, "step": 2941 }, { "epoch": 1.82, "learning_rate": 0.00013245412844036696, "loss": 1.4067, "step": 2942 }, { "epoch": 1.82, "learning_rate": 0.00013238532110091742, "loss": 1.559, "step": 2943 }, { "epoch": 1.82, "learning_rate": 0.00013231651376146788, "loss": 1.6738, "step": 2944 }, { "epoch": 1.82, "learning_rate": 0.00013224770642201834, "loss": 1.5091, "step": 2945 }, { "epoch": 1.82, "learning_rate": 0.0001321788990825688, "loss": 1.5064, "step": 2946 }, { "epoch": 1.82, "learning_rate": 0.00013211009174311927, "loss": 1.3895, "step": 2947 }, { "epoch": 1.82, "learning_rate": 0.0001320412844036697, "loss": 1.4321, "step": 2948 }, { "epoch": 1.82, "learning_rate": 0.00013197247706422016, "loss": 1.4752, "step": 2949 }, { "epoch": 1.82, "learning_rate": 0.00013190366972477062, "loss": 1.3213, "step": 2950 }, { "epoch": 1.82, "learning_rate": 0.00013183486238532108, "loss": 1.3146, "step": 2951 }, { "epoch": 1.82, "learning_rate": 0.00013176605504587155, "loss": 1.3893, "step": 2952 }, { "epoch": 1.82, "learning_rate": 0.000131697247706422, "loss": 1.5642, "step": 2953 }, { "epoch": 1.82, "learning_rate": 0.00013162844036697247, "loss": 1.3631, "step": 2954 }, { "epoch": 1.82, "learning_rate": 0.00013155963302752293, "loss": 1.3061, "step": 2955 }, { "epoch": 1.82, "learning_rate": 0.00013149082568807336, "loss": 1.1956, "step": 2956 }, { "epoch": 1.83, "learning_rate": 0.00013142201834862383, "loss": 1.1796, "step": 2957 }, { "epoch": 1.83, "learning_rate": 0.00013135321100917429, "loss": 1.1048, "step": 2958 }, { "epoch": 1.83, "learning_rate": 0.00013128440366972475, "loss": 1.0927, "step": 2959 }, { "epoch": 1.83, "learning_rate": 0.0001312155963302752, "loss": 1.0363, "step": 2960 }, { "epoch": 1.83, "learning_rate": 0.00013114678899082567, "loss": 1.127, "step": 2961 }, { "epoch": 1.83, "learning_rate": 0.00013107798165137613, "loss": 1.1476, "step": 2962 }, { "epoch": 1.83, "learning_rate": 0.0001310091743119266, "loss": 0.9952, "step": 2963 }, { "epoch": 1.83, "learning_rate": 0.00013094036697247705, "loss": 0.9473, "step": 2964 }, { "epoch": 1.83, "learning_rate": 0.00013087155963302752, "loss": 1.0435, "step": 2965 }, { "epoch": 1.83, "learning_rate": 0.00013080275229357798, "loss": 0.8027, "step": 2966 }, { "epoch": 1.83, "learning_rate": 0.00013073394495412844, "loss": 0.8835, "step": 2967 }, { "epoch": 1.83, "learning_rate": 0.0001306651376146789, "loss": 0.8866, "step": 2968 }, { "epoch": 1.83, "learning_rate": 0.00013059633027522936, "loss": 0.8132, "step": 2969 }, { "epoch": 1.83, "learning_rate": 0.00013052752293577982, "loss": 0.8458, "step": 2970 }, { "epoch": 1.83, "learning_rate": 0.00013045871559633028, "loss": 2.4443, "step": 2971 }, { "epoch": 1.83, "learning_rate": 0.00013038990825688072, "loss": 2.0481, "step": 2972 }, { "epoch": 1.84, "learning_rate": 0.00013032110091743118, "loss": 2.0024, "step": 2973 }, { "epoch": 1.84, "learning_rate": 0.00013025229357798164, "loss": 1.7469, "step": 2974 }, { "epoch": 1.84, "learning_rate": 0.0001301834862385321, "loss": 1.7955, "step": 2975 }, { "epoch": 1.84, "learning_rate": 0.00013011467889908256, "loss": 1.9425, "step": 2976 }, { "epoch": 1.84, "learning_rate": 0.00013004587155963302, "loss": 1.8198, "step": 2977 }, { "epoch": 1.84, "learning_rate": 0.00012997706422018348, "loss": 1.9084, "step": 2978 }, { "epoch": 1.84, "learning_rate": 0.00012990825688073395, "loss": 1.7141, "step": 2979 }, { "epoch": 1.84, "learning_rate": 0.00012983944954128438, "loss": 1.6393, "step": 2980 }, { "epoch": 1.84, "learning_rate": 0.00012977064220183484, "loss": 1.7444, "step": 2981 }, { "epoch": 1.84, "learning_rate": 0.0001297018348623853, "loss": 1.6021, "step": 2982 }, { "epoch": 1.84, "learning_rate": 0.00012963302752293576, "loss": 1.5906, "step": 2983 }, { "epoch": 1.84, "learning_rate": 0.00012956422018348623, "loss": 1.7553, "step": 2984 }, { "epoch": 1.84, "learning_rate": 0.0001294954128440367, "loss": 1.673, "step": 2985 }, { "epoch": 1.84, "learning_rate": 0.00012942660550458715, "loss": 1.5965, "step": 2986 }, { "epoch": 1.84, "learning_rate": 0.0001293577981651376, "loss": 1.7012, "step": 2987 }, { "epoch": 1.84, "learning_rate": 0.00012928899082568804, "loss": 1.6297, "step": 2988 }, { "epoch": 1.84, "learning_rate": 0.0001292201834862385, "loss": 1.5626, "step": 2989 }, { "epoch": 1.85, "learning_rate": 0.00012915137614678897, "loss": 1.6843, "step": 2990 }, { "epoch": 1.85, "learning_rate": 0.00012908256880733943, "loss": 1.5795, "step": 2991 }, { "epoch": 1.85, "learning_rate": 0.0001290137614678899, "loss": 1.6206, "step": 2992 }, { "epoch": 1.85, "learning_rate": 0.00012894495412844035, "loss": 1.5836, "step": 2993 }, { "epoch": 1.85, "learning_rate": 0.0001288761467889908, "loss": 1.4308, "step": 2994 }, { "epoch": 1.85, "learning_rate": 0.00012880733944954127, "loss": 1.675, "step": 2995 }, { "epoch": 1.85, "learning_rate": 0.00012873853211009173, "loss": 1.4567, "step": 2996 }, { "epoch": 1.85, "learning_rate": 0.0001286697247706422, "loss": 1.4889, "step": 2997 }, { "epoch": 1.85, "learning_rate": 0.00012860091743119266, "loss": 1.5249, "step": 2998 }, { "epoch": 1.85, "learning_rate": 0.00012853211009174312, "loss": 1.4043, "step": 2999 }, { "epoch": 1.85, "learning_rate": 0.00012846330275229358, "loss": 1.4587, "step": 3000 }, { "epoch": 1.85, "eval_bleu": 2.0001405343797235e-14, "eval_loss": 1.9121204614639282, "eval_runtime": 2596.1075, "eval_samples_per_second": 5.685, "eval_steps_per_second": 0.711, "step": 3000 }, { "epoch": 1.85, "learning_rate": 0.00012839449541284404, "loss": 1.39, "step": 3001 }, { "epoch": 1.85, "learning_rate": 0.0001283256880733945, "loss": 1.3469, "step": 3002 }, { "epoch": 1.85, "learning_rate": 0.00012825688073394496, "loss": 1.1199, "step": 3003 }, { "epoch": 1.85, "learning_rate": 0.0001281880733944954, "loss": 1.1479, "step": 3004 }, { "epoch": 1.85, "learning_rate": 0.00012811926605504586, "loss": 1.4059, "step": 3005 }, { "epoch": 1.86, "learning_rate": 0.00012805045871559632, "loss": 1.1349, "step": 3006 }, { "epoch": 1.86, "learning_rate": 0.00012798165137614678, "loss": 1.3401, "step": 3007 }, { "epoch": 1.86, "learning_rate": 0.00012791284403669724, "loss": 1.1072, "step": 3008 }, { "epoch": 1.86, "learning_rate": 0.0001278440366972477, "loss": 1.2175, "step": 3009 }, { "epoch": 1.86, "learning_rate": 0.00012777522935779816, "loss": 1.2285, "step": 3010 }, { "epoch": 1.86, "learning_rate": 0.00012770642201834863, "loss": 1.0367, "step": 3011 }, { "epoch": 1.86, "learning_rate": 0.00012763761467889906, "loss": 1.0945, "step": 3012 }, { "epoch": 1.86, "learning_rate": 0.00012756880733944952, "loss": 1.0363, "step": 3013 }, { "epoch": 1.86, "learning_rate": 0.00012749999999999998, "loss": 1.0025, "step": 3014 }, { "epoch": 1.86, "learning_rate": 0.00012743119266055044, "loss": 0.8898, "step": 3015 }, { "epoch": 1.86, "learning_rate": 0.0001273623853211009, "loss": 0.8455, "step": 3016 }, { "epoch": 1.86, "learning_rate": 0.00012729357798165137, "loss": 0.9232, "step": 3017 }, { "epoch": 1.86, "learning_rate": 0.00012722477064220183, "loss": 0.9279, "step": 3018 }, { "epoch": 1.86, "learning_rate": 0.0001271559633027523, "loss": 0.7919, "step": 3019 }, { "epoch": 1.86, "learning_rate": 0.00012708715596330272, "loss": 0.8098, "step": 3020 }, { "epoch": 1.86, "learning_rate": 0.00012701834862385318, "loss": 2.407, "step": 3021 }, { "epoch": 1.87, "learning_rate": 0.00012694954128440365, "loss": 2.0285, "step": 3022 }, { "epoch": 1.87, "learning_rate": 0.0001268807339449541, "loss": 1.9198, "step": 3023 }, { "epoch": 1.87, "learning_rate": 0.00012681192660550457, "loss": 1.8226, "step": 3024 }, { "epoch": 1.87, "learning_rate": 0.00012674311926605503, "loss": 1.5921, "step": 3025 }, { "epoch": 1.87, "learning_rate": 0.0001266743119266055, "loss": 1.6613, "step": 3026 }, { "epoch": 1.87, "learning_rate": 0.00012660550458715595, "loss": 1.8662, "step": 3027 }, { "epoch": 1.87, "learning_rate": 0.0001265366972477064, "loss": 1.6606, "step": 3028 }, { "epoch": 1.87, "learning_rate": 0.00012646788990825687, "loss": 1.6165, "step": 3029 }, { "epoch": 1.87, "learning_rate": 0.00012639908256880734, "loss": 1.7079, "step": 3030 }, { "epoch": 1.87, "learning_rate": 0.0001263302752293578, "loss": 1.5306, "step": 3031 }, { "epoch": 1.87, "learning_rate": 0.00012626146788990826, "loss": 1.6343, "step": 3032 }, { "epoch": 1.87, "learning_rate": 0.00012619266055045872, "loss": 1.5417, "step": 3033 }, { "epoch": 1.87, "learning_rate": 0.00012612385321100918, "loss": 1.4634, "step": 3034 }, { "epoch": 1.87, "learning_rate": 0.00012605504587155964, "loss": 1.5174, "step": 3035 }, { "epoch": 1.87, "learning_rate": 0.00012598623853211008, "loss": 1.5647, "step": 3036 }, { "epoch": 1.87, "learning_rate": 0.00012591743119266054, "loss": 1.6638, "step": 3037 }, { "epoch": 1.88, "learning_rate": 0.000125848623853211, "loss": 1.589, "step": 3038 }, { "epoch": 1.88, "learning_rate": 0.00012577981651376146, "loss": 1.7321, "step": 3039 }, { "epoch": 1.88, "learning_rate": 0.00012571100917431192, "loss": 1.4931, "step": 3040 }, { "epoch": 1.88, "learning_rate": 0.00012564220183486238, "loss": 1.4464, "step": 3041 }, { "epoch": 1.88, "learning_rate": 0.00012557339449541284, "loss": 1.4127, "step": 3042 }, { "epoch": 1.88, "learning_rate": 0.0001255045871559633, "loss": 1.5282, "step": 3043 }, { "epoch": 1.88, "learning_rate": 0.00012543577981651374, "loss": 1.5018, "step": 3044 }, { "epoch": 1.88, "learning_rate": 0.0001253669724770642, "loss": 1.5265, "step": 3045 }, { "epoch": 1.88, "learning_rate": 0.00012529816513761466, "loss": 1.4623, "step": 3046 }, { "epoch": 1.88, "learning_rate": 0.00012522935779816512, "loss": 1.4353, "step": 3047 }, { "epoch": 1.88, "learning_rate": 0.00012516055045871558, "loss": 1.2729, "step": 3048 }, { "epoch": 1.88, "learning_rate": 0.00012509174311926605, "loss": 1.4302, "step": 3049 }, { "epoch": 1.88, "learning_rate": 0.0001250229357798165, "loss": 1.3645, "step": 3050 }, { "epoch": 1.88, "learning_rate": 0.00012495412844036697, "loss": 1.4171, "step": 3051 }, { "epoch": 1.88, "learning_rate": 0.0001248853211009174, "loss": 1.3164, "step": 3052 }, { "epoch": 1.88, "learning_rate": 0.00012481651376146786, "loss": 1.3799, "step": 3053 }, { "epoch": 1.89, "learning_rate": 0.00012474770642201832, "loss": 1.3677, "step": 3054 }, { "epoch": 1.89, "learning_rate": 0.00012467889908256879, "loss": 1.166, "step": 3055 }, { "epoch": 1.89, "learning_rate": 0.00012461009174311925, "loss": 1.2092, "step": 3056 }, { "epoch": 1.89, "learning_rate": 0.0001245412844036697, "loss": 1.1978, "step": 3057 }, { "epoch": 1.89, "learning_rate": 0.00012447247706422017, "loss": 1.2847, "step": 3058 }, { "epoch": 1.89, "learning_rate": 0.00012440366972477063, "loss": 1.0555, "step": 3059 }, { "epoch": 1.89, "learning_rate": 0.0001243348623853211, "loss": 0.9679, "step": 3060 }, { "epoch": 1.89, "learning_rate": 0.00012426605504587155, "loss": 1.1345, "step": 3061 }, { "epoch": 1.89, "learning_rate": 0.00012419724770642201, "loss": 1.111, "step": 3062 }, { "epoch": 1.89, "learning_rate": 0.00012412844036697248, "loss": 1.0165, "step": 3063 }, { "epoch": 1.89, "learning_rate": 0.00012405963302752294, "loss": 1.1669, "step": 3064 }, { "epoch": 1.89, "learning_rate": 0.0001239908256880734, "loss": 1.0485, "step": 3065 }, { "epoch": 1.89, "learning_rate": 0.00012392201834862386, "loss": 0.885, "step": 3066 }, { "epoch": 1.89, "learning_rate": 0.00012385321100917432, "loss": 0.9282, "step": 3067 }, { "epoch": 1.89, "learning_rate": 0.00012378440366972476, "loss": 0.8279, "step": 3068 }, { "epoch": 1.89, "learning_rate": 0.00012371559633027522, "loss": 0.8235, "step": 3069 }, { "epoch": 1.89, "learning_rate": 0.00012364678899082568, "loss": 0.7094, "step": 3070 }, { "epoch": 1.9, "learning_rate": 0.00012357798165137614, "loss": 2.3064, "step": 3071 }, { "epoch": 1.9, "learning_rate": 0.0001235091743119266, "loss": 2.1128, "step": 3072 }, { "epoch": 1.9, "learning_rate": 0.00012344036697247706, "loss": 1.8536, "step": 3073 }, { "epoch": 1.9, "learning_rate": 0.00012337155963302752, "loss": 1.8792, "step": 3074 }, { "epoch": 1.9, "learning_rate": 0.00012330275229357798, "loss": 1.8256, "step": 3075 }, { "epoch": 1.9, "learning_rate": 0.00012323394495412842, "loss": 1.6448, "step": 3076 }, { "epoch": 1.9, "learning_rate": 0.00012316513761467888, "loss": 1.5534, "step": 3077 }, { "epoch": 1.9, "learning_rate": 0.00012309633027522934, "loss": 1.8025, "step": 3078 }, { "epoch": 1.9, "learning_rate": 0.0001230275229357798, "loss": 1.7608, "step": 3079 }, { "epoch": 1.9, "learning_rate": 0.00012295871559633026, "loss": 1.6532, "step": 3080 }, { "epoch": 1.9, "learning_rate": 0.00012288990825688072, "loss": 1.6359, "step": 3081 }, { "epoch": 1.9, "learning_rate": 0.00012282110091743119, "loss": 1.5604, "step": 3082 }, { "epoch": 1.9, "learning_rate": 0.00012275229357798165, "loss": 1.5728, "step": 3083 }, { "epoch": 1.9, "learning_rate": 0.00012268348623853208, "loss": 1.4838, "step": 3084 }, { "epoch": 1.9, "learning_rate": 0.00012261467889908254, "loss": 1.6424, "step": 3085 }, { "epoch": 1.9, "learning_rate": 0.000122545871559633, "loss": 1.6252, "step": 3086 }, { "epoch": 1.91, "learning_rate": 0.00012247706422018347, "loss": 1.5319, "step": 3087 }, { "epoch": 1.91, "learning_rate": 0.00012240825688073393, "loss": 1.5595, "step": 3088 }, { "epoch": 1.91, "learning_rate": 0.0001223394495412844, "loss": 1.5667, "step": 3089 }, { "epoch": 1.91, "learning_rate": 0.00012227064220183485, "loss": 1.4773, "step": 3090 }, { "epoch": 1.91, "learning_rate": 0.0001222018348623853, "loss": 1.4734, "step": 3091 }, { "epoch": 1.91, "learning_rate": 0.00012213302752293577, "loss": 1.4411, "step": 3092 }, { "epoch": 1.91, "learning_rate": 0.00012206422018348622, "loss": 1.4964, "step": 3093 }, { "epoch": 1.91, "learning_rate": 0.00012199541284403668, "loss": 1.4735, "step": 3094 }, { "epoch": 1.91, "learning_rate": 0.00012192660550458714, "loss": 1.5404, "step": 3095 }, { "epoch": 1.91, "learning_rate": 0.0001218577981651376, "loss": 1.3542, "step": 3096 }, { "epoch": 1.91, "learning_rate": 0.00012178899082568806, "loss": 1.4582, "step": 3097 }, { "epoch": 1.91, "learning_rate": 0.00012172018348623853, "loss": 1.3486, "step": 3098 }, { "epoch": 1.91, "learning_rate": 0.00012165137614678899, "loss": 1.2559, "step": 3099 }, { "epoch": 1.91, "learning_rate": 0.00012158256880733943, "loss": 1.2626, "step": 3100 }, { "epoch": 1.91, "learning_rate": 0.0001215137614678899, "loss": 1.332, "step": 3101 }, { "epoch": 1.91, "learning_rate": 0.00012144495412844036, "loss": 1.319, "step": 3102 }, { "epoch": 1.92, "learning_rate": 0.00012137614678899082, "loss": 1.2508, "step": 3103 }, { "epoch": 1.92, "learning_rate": 0.00012130733944954128, "loss": 1.1567, "step": 3104 }, { "epoch": 1.92, "learning_rate": 0.00012123853211009174, "loss": 1.2438, "step": 3105 }, { "epoch": 1.92, "learning_rate": 0.0001211697247706422, "loss": 1.1211, "step": 3106 }, { "epoch": 1.92, "learning_rate": 0.00012110091743119266, "loss": 1.2314, "step": 3107 }, { "epoch": 1.92, "learning_rate": 0.0001210321100917431, "loss": 1.1626, "step": 3108 }, { "epoch": 1.92, "learning_rate": 0.00012096330275229356, "loss": 1.0391, "step": 3109 }, { "epoch": 1.92, "learning_rate": 0.00012089449541284402, "loss": 1.0671, "step": 3110 }, { "epoch": 1.92, "learning_rate": 0.00012082568807339448, "loss": 0.9932, "step": 3111 }, { "epoch": 1.92, "learning_rate": 0.00012075688073394494, "loss": 1.0327, "step": 3112 }, { "epoch": 1.92, "learning_rate": 0.0001206880733944954, "loss": 1.0487, "step": 3113 }, { "epoch": 1.92, "learning_rate": 0.00012061926605504587, "loss": 1.0331, "step": 3114 }, { "epoch": 1.92, "learning_rate": 0.00012055045871559633, "loss": 0.8263, "step": 3115 }, { "epoch": 1.92, "learning_rate": 0.00012048165137614677, "loss": 0.9046, "step": 3116 }, { "epoch": 1.92, "learning_rate": 0.00012041284403669724, "loss": 0.864, "step": 3117 }, { "epoch": 1.92, "learning_rate": 0.0001203440366972477, "loss": 0.8924, "step": 3118 }, { "epoch": 1.93, "learning_rate": 0.00012027522935779816, "loss": 0.7518, "step": 3119 }, { "epoch": 1.93, "learning_rate": 0.00012020642201834862, "loss": 0.6808, "step": 3120 }, { "epoch": 1.93, "learning_rate": 0.00012013761467889908, "loss": 2.3114, "step": 3121 }, { "epoch": 1.93, "learning_rate": 0.00012006880733944954, "loss": 2.2515, "step": 3122 }, { "epoch": 1.93, "learning_rate": 0.00011999999999999999, "loss": 1.7827, "step": 3123 }, { "epoch": 1.93, "learning_rate": 0.00011993119266055044, "loss": 1.7588, "step": 3124 }, { "epoch": 1.93, "learning_rate": 0.0001198623853211009, "loss": 1.742, "step": 3125 }, { "epoch": 1.93, "learning_rate": 0.00011979357798165136, "loss": 1.6805, "step": 3126 }, { "epoch": 1.93, "learning_rate": 0.00011972477064220182, "loss": 1.7548, "step": 3127 }, { "epoch": 1.93, "learning_rate": 0.00011965596330275228, "loss": 1.6756, "step": 3128 }, { "epoch": 1.93, "learning_rate": 0.00011958715596330274, "loss": 1.6954, "step": 3129 }, { "epoch": 1.93, "learning_rate": 0.0001195183486238532, "loss": 1.573, "step": 3130 }, { "epoch": 1.93, "learning_rate": 0.00011944954128440367, "loss": 1.6479, "step": 3131 }, { "epoch": 1.93, "learning_rate": 0.00011938073394495411, "loss": 1.6855, "step": 3132 }, { "epoch": 1.93, "learning_rate": 0.00011931192660550458, "loss": 1.5933, "step": 3133 }, { "epoch": 1.93, "learning_rate": 0.00011924311926605504, "loss": 1.5387, "step": 3134 }, { "epoch": 1.94, "learning_rate": 0.0001191743119266055, "loss": 1.5856, "step": 3135 }, { "epoch": 1.94, "learning_rate": 0.00011910550458715596, "loss": 1.5024, "step": 3136 }, { "epoch": 1.94, "learning_rate": 0.00011903669724770642, "loss": 1.4833, "step": 3137 }, { "epoch": 1.94, "learning_rate": 0.00011896788990825688, "loss": 1.4254, "step": 3138 }, { "epoch": 1.94, "learning_rate": 0.00011889908256880733, "loss": 1.4986, "step": 3139 }, { "epoch": 1.94, "learning_rate": 0.00011883027522935778, "loss": 1.5669, "step": 3140 }, { "epoch": 1.94, "learning_rate": 0.00011876146788990824, "loss": 1.4592, "step": 3141 }, { "epoch": 1.94, "learning_rate": 0.0001186926605504587, "loss": 1.3417, "step": 3142 }, { "epoch": 1.94, "learning_rate": 0.00011862385321100916, "loss": 1.4769, "step": 3143 }, { "epoch": 1.94, "learning_rate": 0.00011855504587155962, "loss": 1.5297, "step": 3144 }, { "epoch": 1.94, "learning_rate": 0.00011848623853211008, "loss": 1.5434, "step": 3145 }, { "epoch": 1.94, "learning_rate": 0.00011841743119266054, "loss": 1.4386, "step": 3146 }, { "epoch": 1.94, "learning_rate": 0.000118348623853211, "loss": 1.4406, "step": 3147 }, { "epoch": 1.94, "learning_rate": 0.00011827981651376145, "loss": 1.4261, "step": 3148 }, { "epoch": 1.94, "learning_rate": 0.00011821100917431191, "loss": 1.3689, "step": 3149 }, { "epoch": 1.94, "learning_rate": 0.00011814220183486238, "loss": 1.2491, "step": 3150 }, { "epoch": 1.94, "learning_rate": 0.00011807339449541284, "loss": 1.3364, "step": 3151 }, { "epoch": 1.95, "learning_rate": 0.0001180045871559633, "loss": 1.256, "step": 3152 }, { "epoch": 1.95, "learning_rate": 0.00011793577981651376, "loss": 1.3198, "step": 3153 }, { "epoch": 1.95, "learning_rate": 0.00011786697247706421, "loss": 1.1744, "step": 3154 }, { "epoch": 1.95, "learning_rate": 0.00011779816513761467, "loss": 1.2297, "step": 3155 }, { "epoch": 1.95, "learning_rate": 0.00011772935779816512, "loss": 1.0893, "step": 3156 }, { "epoch": 1.95, "learning_rate": 0.00011766055045871558, "loss": 1.2357, "step": 3157 }, { "epoch": 1.95, "learning_rate": 0.00011759174311926604, "loss": 1.0637, "step": 3158 }, { "epoch": 1.95, "learning_rate": 0.0001175229357798165, "loss": 0.9954, "step": 3159 }, { "epoch": 1.95, "learning_rate": 0.00011745412844036696, "loss": 1.1149, "step": 3160 }, { "epoch": 1.95, "learning_rate": 0.00011738532110091742, "loss": 1.0899, "step": 3161 }, { "epoch": 1.95, "learning_rate": 0.00011731651376146788, "loss": 0.9103, "step": 3162 }, { "epoch": 1.95, "learning_rate": 0.00011724770642201835, "loss": 1.0167, "step": 3163 }, { "epoch": 1.95, "learning_rate": 0.00011717889908256879, "loss": 0.9564, "step": 3164 }, { "epoch": 1.95, "learning_rate": 0.00011711009174311925, "loss": 0.9899, "step": 3165 }, { "epoch": 1.95, "learning_rate": 0.00011704128440366972, "loss": 0.89, "step": 3166 }, { "epoch": 1.95, "learning_rate": 0.00011697247706422018, "loss": 0.7885, "step": 3167 }, { "epoch": 1.96, "learning_rate": 0.00011690366972477064, "loss": 0.8358, "step": 3168 }, { "epoch": 1.96, "learning_rate": 0.0001168348623853211, "loss": 0.7267, "step": 3169 }, { "epoch": 1.96, "learning_rate": 0.00011676605504587155, "loss": 0.8229, "step": 3170 }, { "epoch": 1.96, "learning_rate": 0.00011669724770642201, "loss": 2.4191, "step": 3171 }, { "epoch": 1.96, "learning_rate": 0.00011662844036697246, "loss": 2.0197, "step": 3172 }, { "epoch": 1.96, "learning_rate": 0.00011655963302752292, "loss": 1.9992, "step": 3173 }, { "epoch": 1.96, "learning_rate": 0.00011649082568807338, "loss": 1.7866, "step": 3174 }, { "epoch": 1.96, "learning_rate": 0.00011642201834862384, "loss": 1.6166, "step": 3175 }, { "epoch": 1.96, "learning_rate": 0.0001163532110091743, "loss": 1.642, "step": 3176 }, { "epoch": 1.96, "learning_rate": 0.00011628440366972476, "loss": 1.7037, "step": 3177 }, { "epoch": 1.96, "learning_rate": 0.00011621559633027522, "loss": 1.6492, "step": 3178 }, { "epoch": 1.96, "learning_rate": 0.00011614678899082569, "loss": 1.6288, "step": 3179 }, { "epoch": 1.96, "learning_rate": 0.00011607798165137613, "loss": 1.5632, "step": 3180 }, { "epoch": 1.96, "learning_rate": 0.0001160091743119266, "loss": 1.5532, "step": 3181 }, { "epoch": 1.96, "learning_rate": 0.00011594036697247706, "loss": 1.7433, "step": 3182 }, { "epoch": 1.96, "learning_rate": 0.00011587155963302752, "loss": 1.5728, "step": 3183 }, { "epoch": 1.97, "learning_rate": 0.00011580275229357798, "loss": 1.7057, "step": 3184 }, { "epoch": 1.97, "learning_rate": 0.00011573394495412844, "loss": 1.5485, "step": 3185 }, { "epoch": 1.97, "learning_rate": 0.00011566513761467889, "loss": 1.528, "step": 3186 }, { "epoch": 1.97, "learning_rate": 0.00011559633027522935, "loss": 1.4964, "step": 3187 }, { "epoch": 1.97, "learning_rate": 0.0001155275229357798, "loss": 1.516, "step": 3188 }, { "epoch": 1.97, "learning_rate": 0.00011545871559633026, "loss": 1.6188, "step": 3189 }, { "epoch": 1.97, "learning_rate": 0.00011538990825688072, "loss": 1.417, "step": 3190 }, { "epoch": 1.97, "learning_rate": 0.00011532110091743118, "loss": 1.437, "step": 3191 }, { "epoch": 1.97, "learning_rate": 0.00011525229357798164, "loss": 1.446, "step": 3192 }, { "epoch": 1.97, "learning_rate": 0.0001151834862385321, "loss": 1.4484, "step": 3193 }, { "epoch": 1.97, "learning_rate": 0.00011511467889908256, "loss": 1.3121, "step": 3194 }, { "epoch": 1.97, "learning_rate": 0.00011504587155963302, "loss": 1.4484, "step": 3195 }, { "epoch": 1.97, "learning_rate": 0.00011497706422018347, "loss": 1.5076, "step": 3196 }, { "epoch": 1.97, "learning_rate": 0.00011490825688073393, "loss": 1.3903, "step": 3197 }, { "epoch": 1.97, "learning_rate": 0.0001148394495412844, "loss": 1.3124, "step": 3198 }, { "epoch": 1.97, "learning_rate": 0.00011477064220183486, "loss": 1.1794, "step": 3199 }, { "epoch": 1.98, "learning_rate": 0.00011470183486238532, "loss": 1.2289, "step": 3200 }, { "epoch": 1.98, "learning_rate": 0.00011463302752293577, "loss": 1.3136, "step": 3201 }, { "epoch": 1.98, "learning_rate": 0.00011456422018348623, "loss": 1.2753, "step": 3202 }, { "epoch": 1.98, "learning_rate": 0.00011449541284403669, "loss": 1.2726, "step": 3203 }, { "epoch": 1.98, "learning_rate": 0.00011442660550458714, "loss": 1.2318, "step": 3204 }, { "epoch": 1.98, "learning_rate": 0.0001143577981651376, "loss": 1.2314, "step": 3205 }, { "epoch": 1.98, "learning_rate": 0.00011428899082568806, "loss": 1.2216, "step": 3206 }, { "epoch": 1.98, "learning_rate": 0.00011422018348623852, "loss": 1.0592, "step": 3207 }, { "epoch": 1.98, "learning_rate": 0.00011415137614678898, "loss": 1.1086, "step": 3208 }, { "epoch": 1.98, "learning_rate": 0.00011408256880733944, "loss": 1.01, "step": 3209 }, { "epoch": 1.98, "learning_rate": 0.0001140137614678899, "loss": 0.9835, "step": 3210 }, { "epoch": 1.98, "learning_rate": 0.00011394495412844036, "loss": 1.0515, "step": 3211 }, { "epoch": 1.98, "learning_rate": 0.00011387614678899081, "loss": 1.0544, "step": 3212 }, { "epoch": 1.98, "learning_rate": 0.00011380733944954127, "loss": 1.0031, "step": 3213 }, { "epoch": 1.98, "learning_rate": 0.00011373853211009173, "loss": 0.8895, "step": 3214 }, { "epoch": 1.98, "learning_rate": 0.0001136697247706422, "loss": 0.869, "step": 3215 }, { "epoch": 1.99, "learning_rate": 0.00011360091743119266, "loss": 0.8822, "step": 3216 }, { "epoch": 1.99, "learning_rate": 0.0001135321100917431, "loss": 0.8238, "step": 3217 }, { "epoch": 1.99, "learning_rate": 0.00011346330275229357, "loss": 0.9088, "step": 3218 }, { "epoch": 1.99, "learning_rate": 0.00011339449541284403, "loss": 0.8089, "step": 3219 }, { "epoch": 1.99, "learning_rate": 0.00011332568807339448, "loss": 0.7532, "step": 3220 }, { "epoch": 1.99, "learning_rate": 0.00011325688073394494, "loss": 2.1718, "step": 3221 }, { "epoch": 1.99, "learning_rate": 0.0001131880733944954, "loss": 1.8052, "step": 3222 }, { "epoch": 1.99, "learning_rate": 0.00011311926605504586, "loss": 1.6693, "step": 3223 }, { "epoch": 1.99, "learning_rate": 0.00011305045871559632, "loss": 1.6312, "step": 3224 }, { "epoch": 1.99, "learning_rate": 0.00011298165137614678, "loss": 1.5112, "step": 3225 }, { "epoch": 1.99, "learning_rate": 0.00011291284403669724, "loss": 1.5097, "step": 3226 }, { "epoch": 1.99, "learning_rate": 0.0001128440366972477, "loss": 1.5781, "step": 3227 }, { "epoch": 1.99, "learning_rate": 0.00011277522935779815, "loss": 1.5083, "step": 3228 }, { "epoch": 1.99, "learning_rate": 0.00011270642201834861, "loss": 1.5071, "step": 3229 }, { "epoch": 1.99, "learning_rate": 0.00011263761467889907, "loss": 1.3595, "step": 3230 }, { "epoch": 1.99, "learning_rate": 0.00011256880733944954, "loss": 1.3151, "step": 3231 }, { "epoch": 1.99, "learning_rate": 0.0001125, "loss": 1.4075, "step": 3232 }, { "epoch": 2.0, "learning_rate": 0.00011243119266055044, "loss": 1.438, "step": 3233 }, { "epoch": 2.0, "learning_rate": 0.0001123623853211009, "loss": 1.3186, "step": 3234 }, { "epoch": 2.0, "learning_rate": 0.00011229357798165137, "loss": 1.1342, "step": 3235 }, { "epoch": 2.0, "learning_rate": 0.00011222477064220183, "loss": 1.0946, "step": 3236 }, { "epoch": 2.0, "learning_rate": 0.00011215596330275228, "loss": 1.0398, "step": 3237 }, { "epoch": 2.0, "learning_rate": 0.00011208715596330274, "loss": 0.9152, "step": 3238 }, { "epoch": 2.0, "learning_rate": 0.0001120183486238532, "loss": 0.8311, "step": 3239 }, { "epoch": 2.0, "learning_rate": 0.00011194954128440366, "loss": 0.8015, "step": 3240 }, { "epoch": 2.0, "learning_rate": 0.00011188073394495412, "loss": 1.8998, "step": 3241 }, { "epoch": 2.0, "learning_rate": 0.00011181192660550458, "loss": 1.6994, "step": 3242 }, { "epoch": 2.0, "learning_rate": 0.00011174311926605504, "loss": 1.449, "step": 3243 }, { "epoch": 2.0, "learning_rate": 0.0001116743119266055, "loss": 1.48, "step": 3244 }, { "epoch": 2.0, "learning_rate": 0.00011160550458715595, "loss": 1.45, "step": 3245 }, { "epoch": 2.0, "learning_rate": 0.00011153669724770641, "loss": 1.3864, "step": 3246 }, { "epoch": 2.0, "learning_rate": 0.00011146788990825688, "loss": 1.2641, "step": 3247 }, { "epoch": 2.0, "learning_rate": 0.00011139908256880732, "loss": 1.3071, "step": 3248 }, { "epoch": 2.01, "learning_rate": 0.00011133027522935778, "loss": 1.2711, "step": 3249 }, { "epoch": 2.01, "learning_rate": 0.00011126146788990825, "loss": 1.1952, "step": 3250 }, { "epoch": 2.01, "learning_rate": 0.00011119266055045871, "loss": 1.1409, "step": 3251 }, { "epoch": 2.01, "learning_rate": 0.00011112385321100917, "loss": 1.2432, "step": 3252 }, { "epoch": 2.01, "learning_rate": 0.00011105504587155962, "loss": 1.3421, "step": 3253 }, { "epoch": 2.01, "learning_rate": 0.00011098623853211008, "loss": 1.1626, "step": 3254 }, { "epoch": 2.01, "learning_rate": 0.00011091743119266054, "loss": 1.2161, "step": 3255 }, { "epoch": 2.01, "learning_rate": 0.000110848623853211, "loss": 1.27, "step": 3256 }, { "epoch": 2.01, "learning_rate": 0.00011077981651376146, "loss": 1.2663, "step": 3257 }, { "epoch": 2.01, "learning_rate": 0.00011071100917431192, "loss": 1.1851, "step": 3258 }, { "epoch": 2.01, "learning_rate": 0.00011064220183486238, "loss": 1.1674, "step": 3259 }, { "epoch": 2.01, "learning_rate": 0.00011057339449541284, "loss": 1.1489, "step": 3260 }, { "epoch": 2.01, "learning_rate": 0.00011050458715596329, "loss": 1.1105, "step": 3261 }, { "epoch": 2.01, "learning_rate": 0.00011043577981651375, "loss": 1.2066, "step": 3262 }, { "epoch": 2.01, "learning_rate": 0.00011036697247706421, "loss": 1.1664, "step": 3263 }, { "epoch": 2.01, "learning_rate": 0.00011029816513761466, "loss": 1.0803, "step": 3264 }, { "epoch": 2.02, "learning_rate": 0.00011022935779816512, "loss": 0.9479, "step": 3265 }, { "epoch": 2.02, "learning_rate": 0.00011016055045871559, "loss": 1.0562, "step": 3266 }, { "epoch": 2.02, "learning_rate": 0.00011009174311926605, "loss": 0.9889, "step": 3267 }, { "epoch": 2.02, "learning_rate": 0.00011002293577981651, "loss": 0.9113, "step": 3268 }, { "epoch": 2.02, "learning_rate": 0.00010995412844036696, "loss": 0.9999, "step": 3269 }, { "epoch": 2.02, "learning_rate": 0.00010988532110091742, "loss": 1.0455, "step": 3270 }, { "epoch": 2.02, "learning_rate": 0.00010981651376146788, "loss": 0.8844, "step": 3271 }, { "epoch": 2.02, "learning_rate": 0.00010974770642201834, "loss": 0.9276, "step": 3272 }, { "epoch": 2.02, "learning_rate": 0.0001096788990825688, "loss": 0.9944, "step": 3273 }, { "epoch": 2.02, "learning_rate": 0.00010961009174311926, "loss": 0.8686, "step": 3274 }, { "epoch": 2.02, "learning_rate": 0.00010954128440366972, "loss": 0.8649, "step": 3275 }, { "epoch": 2.02, "learning_rate": 0.00010947247706422018, "loss": 0.9012, "step": 3276 }, { "epoch": 2.02, "learning_rate": 0.00010940366972477063, "loss": 0.8939, "step": 3277 }, { "epoch": 2.02, "learning_rate": 0.0001093348623853211, "loss": 0.6883, "step": 3278 }, { "epoch": 2.02, "learning_rate": 0.00010926605504587154, "loss": 0.8001, "step": 3279 }, { "epoch": 2.02, "learning_rate": 0.000109197247706422, "loss": 0.8376, "step": 3280 }, { "epoch": 2.03, "learning_rate": 0.00010912844036697246, "loss": 0.7515, "step": 3281 }, { "epoch": 2.03, "learning_rate": 0.00010905963302752292, "loss": 0.7799, "step": 3282 }, { "epoch": 2.03, "learning_rate": 0.00010899082568807339, "loss": 0.815, "step": 3283 }, { "epoch": 2.03, "learning_rate": 0.00010892201834862385, "loss": 0.6693, "step": 3284 }, { "epoch": 2.03, "learning_rate": 0.0001088532110091743, "loss": 0.697, "step": 3285 }, { "epoch": 2.03, "learning_rate": 0.00010878440366972476, "loss": 0.7176, "step": 3286 }, { "epoch": 2.03, "learning_rate": 0.00010871559633027522, "loss": 0.5935, "step": 3287 }, { "epoch": 2.03, "learning_rate": 0.00010864678899082568, "loss": 0.6069, "step": 3288 }, { "epoch": 2.03, "learning_rate": 0.00010857798165137614, "loss": 0.5867, "step": 3289 }, { "epoch": 2.03, "learning_rate": 0.0001085091743119266, "loss": 0.6292, "step": 3290 }, { "epoch": 2.03, "learning_rate": 0.00010844036697247706, "loss": 1.9927, "step": 3291 }, { "epoch": 2.03, "learning_rate": 0.00010837155963302752, "loss": 1.8382, "step": 3292 }, { "epoch": 2.03, "learning_rate": 0.00010830275229357797, "loss": 1.6172, "step": 3293 }, { "epoch": 2.03, "learning_rate": 0.00010823394495412843, "loss": 1.3558, "step": 3294 }, { "epoch": 2.03, "learning_rate": 0.00010816513761467888, "loss": 1.3915, "step": 3295 }, { "epoch": 2.03, "learning_rate": 0.00010809633027522934, "loss": 1.3112, "step": 3296 }, { "epoch": 2.04, "learning_rate": 0.0001080275229357798, "loss": 1.351, "step": 3297 }, { "epoch": 2.04, "learning_rate": 0.00010795871559633026, "loss": 1.5167, "step": 3298 }, { "epoch": 2.04, "learning_rate": 0.00010788990825688073, "loss": 1.4182, "step": 3299 }, { "epoch": 2.04, "learning_rate": 0.00010782110091743119, "loss": 1.2447, "step": 3300 }, { "epoch": 2.04, "learning_rate": 0.00010775229357798163, "loss": 1.2202, "step": 3301 }, { "epoch": 2.04, "learning_rate": 0.0001076834862385321, "loss": 1.2386, "step": 3302 }, { "epoch": 2.04, "learning_rate": 0.00010761467889908256, "loss": 1.2798, "step": 3303 }, { "epoch": 2.04, "learning_rate": 0.00010754587155963302, "loss": 1.2992, "step": 3304 }, { "epoch": 2.04, "learning_rate": 0.00010747706422018348, "loss": 1.2656, "step": 3305 }, { "epoch": 2.04, "learning_rate": 0.00010740825688073394, "loss": 1.187, "step": 3306 }, { "epoch": 2.04, "learning_rate": 0.0001073394495412844, "loss": 1.1373, "step": 3307 }, { "epoch": 2.04, "learning_rate": 0.00010727064220183486, "loss": 1.1892, "step": 3308 }, { "epoch": 2.04, "learning_rate": 0.00010720183486238531, "loss": 1.196, "step": 3309 }, { "epoch": 2.04, "learning_rate": 0.00010713302752293577, "loss": 1.113, "step": 3310 }, { "epoch": 2.04, "learning_rate": 0.00010706422018348622, "loss": 1.0907, "step": 3311 }, { "epoch": 2.04, "learning_rate": 0.00010699541284403668, "loss": 1.0878, "step": 3312 }, { "epoch": 2.05, "learning_rate": 0.00010692660550458714, "loss": 1.0999, "step": 3313 }, { "epoch": 2.05, "learning_rate": 0.0001068577981651376, "loss": 1.0508, "step": 3314 }, { "epoch": 2.05, "learning_rate": 0.00010678899082568807, "loss": 1.1594, "step": 3315 }, { "epoch": 2.05, "learning_rate": 0.00010672018348623853, "loss": 1.0458, "step": 3316 }, { "epoch": 2.05, "learning_rate": 0.00010665137614678897, "loss": 0.9389, "step": 3317 }, { "epoch": 2.05, "learning_rate": 0.00010658256880733944, "loss": 1.0557, "step": 3318 }, { "epoch": 2.05, "learning_rate": 0.0001065137614678899, "loss": 0.924, "step": 3319 }, { "epoch": 2.05, "learning_rate": 0.00010644495412844036, "loss": 1.0157, "step": 3320 }, { "epoch": 2.05, "learning_rate": 0.00010637614678899082, "loss": 0.9298, "step": 3321 }, { "epoch": 2.05, "learning_rate": 0.00010630733944954128, "loss": 0.9338, "step": 3322 }, { "epoch": 2.05, "learning_rate": 0.00010623853211009174, "loss": 0.9585, "step": 3323 }, { "epoch": 2.05, "learning_rate": 0.0001061697247706422, "loss": 0.8398, "step": 3324 }, { "epoch": 2.05, "learning_rate": 0.00010610091743119265, "loss": 0.8221, "step": 3325 }, { "epoch": 2.05, "learning_rate": 0.0001060321100917431, "loss": 0.8746, "step": 3326 }, { "epoch": 2.05, "learning_rate": 0.00010596330275229356, "loss": 0.8275, "step": 3327 }, { "epoch": 2.05, "learning_rate": 0.00010589449541284402, "loss": 0.8398, "step": 3328 }, { "epoch": 2.05, "learning_rate": 0.00010582568807339448, "loss": 0.8013, "step": 3329 }, { "epoch": 2.06, "learning_rate": 0.00010575688073394494, "loss": 0.738, "step": 3330 }, { "epoch": 2.06, "learning_rate": 0.0001056880733944954, "loss": 0.7928, "step": 3331 }, { "epoch": 2.06, "learning_rate": 0.00010561926605504587, "loss": 0.7466, "step": 3332 }, { "epoch": 2.06, "learning_rate": 0.00010555045871559631, "loss": 0.7771, "step": 3333 }, { "epoch": 2.06, "learning_rate": 0.00010548165137614678, "loss": 0.7779, "step": 3334 }, { "epoch": 2.06, "learning_rate": 0.00010541284403669724, "loss": 0.6664, "step": 3335 }, { "epoch": 2.06, "learning_rate": 0.0001053440366972477, "loss": 0.5874, "step": 3336 }, { "epoch": 2.06, "learning_rate": 0.00010527522935779816, "loss": 0.6239, "step": 3337 }, { "epoch": 2.06, "learning_rate": 0.00010520642201834862, "loss": 0.6225, "step": 3338 }, { "epoch": 2.06, "learning_rate": 0.00010513761467889908, "loss": 0.5751, "step": 3339 }, { "epoch": 2.06, "learning_rate": 0.00010506880733944954, "loss": 0.6024, "step": 3340 }, { "epoch": 2.06, "learning_rate": 0.00010499999999999999, "loss": 1.9908, "step": 3341 }, { "epoch": 2.06, "learning_rate": 0.00010493119266055044, "loss": 1.8556, "step": 3342 }, { "epoch": 2.06, "learning_rate": 0.0001048623853211009, "loss": 1.4763, "step": 3343 }, { "epoch": 2.06, "learning_rate": 0.00010479357798165136, "loss": 1.3352, "step": 3344 }, { "epoch": 2.06, "learning_rate": 0.00010472477064220182, "loss": 1.4443, "step": 3345 }, { "epoch": 2.07, "learning_rate": 0.00010465596330275228, "loss": 1.3799, "step": 3346 }, { "epoch": 2.07, "learning_rate": 0.00010458715596330274, "loss": 1.3652, "step": 3347 }, { "epoch": 2.07, "learning_rate": 0.0001045183486238532, "loss": 1.4642, "step": 3348 }, { "epoch": 2.07, "learning_rate": 0.00010444954128440365, "loss": 1.3314, "step": 3349 }, { "epoch": 2.07, "learning_rate": 0.00010438073394495412, "loss": 1.2554, "step": 3350 }, { "epoch": 2.07, "learning_rate": 0.00010431192660550458, "loss": 1.3257, "step": 3351 }, { "epoch": 2.07, "learning_rate": 0.00010424311926605504, "loss": 1.1991, "step": 3352 }, { "epoch": 2.07, "learning_rate": 0.0001041743119266055, "loss": 1.1797, "step": 3353 }, { "epoch": 2.07, "learning_rate": 0.00010410550458715596, "loss": 1.3033, "step": 3354 }, { "epoch": 2.07, "learning_rate": 0.00010403669724770642, "loss": 1.35, "step": 3355 }, { "epoch": 2.07, "learning_rate": 0.00010396788990825688, "loss": 1.2393, "step": 3356 }, { "epoch": 2.07, "learning_rate": 0.00010389908256880732, "loss": 1.2471, "step": 3357 }, { "epoch": 2.07, "learning_rate": 0.00010383027522935778, "loss": 1.2599, "step": 3358 }, { "epoch": 2.07, "learning_rate": 0.00010376146788990824, "loss": 1.2458, "step": 3359 }, { "epoch": 2.07, "learning_rate": 0.0001036926605504587, "loss": 1.0753, "step": 3360 }, { "epoch": 2.07, "learning_rate": 0.00010362385321100916, "loss": 1.1329, "step": 3361 }, { "epoch": 2.08, "learning_rate": 0.00010355504587155962, "loss": 1.134, "step": 3362 }, { "epoch": 2.08, "learning_rate": 0.00010348623853211008, "loss": 1.112, "step": 3363 }, { "epoch": 2.08, "learning_rate": 0.00010341743119266055, "loss": 1.1027, "step": 3364 }, { "epoch": 2.08, "learning_rate": 0.000103348623853211, "loss": 0.9633, "step": 3365 }, { "epoch": 2.08, "learning_rate": 0.00010327981651376145, "loss": 1.0392, "step": 3366 }, { "epoch": 2.08, "learning_rate": 0.00010321100917431192, "loss": 1.1153, "step": 3367 }, { "epoch": 2.08, "learning_rate": 0.00010314220183486238, "loss": 1.0044, "step": 3368 }, { "epoch": 2.08, "learning_rate": 0.00010307339449541284, "loss": 0.9651, "step": 3369 }, { "epoch": 2.08, "learning_rate": 0.0001030045871559633, "loss": 0.9368, "step": 3370 }, { "epoch": 2.08, "learning_rate": 0.00010293577981651376, "loss": 0.8179, "step": 3371 }, { "epoch": 2.08, "learning_rate": 0.00010286697247706422, "loss": 1.0389, "step": 3372 }, { "epoch": 2.08, "learning_rate": 0.00010279816513761466, "loss": 0.8948, "step": 3373 }, { "epoch": 2.08, "learning_rate": 0.00010272935779816512, "loss": 0.8474, "step": 3374 }, { "epoch": 2.08, "learning_rate": 0.00010266055045871558, "loss": 0.9365, "step": 3375 }, { "epoch": 2.08, "learning_rate": 0.00010259174311926604, "loss": 0.9399, "step": 3376 }, { "epoch": 2.08, "learning_rate": 0.0001025229357798165, "loss": 0.818, "step": 3377 }, { "epoch": 2.09, "learning_rate": 0.00010245412844036696, "loss": 0.8144, "step": 3378 }, { "epoch": 2.09, "learning_rate": 0.00010238532110091742, "loss": 0.8105, "step": 3379 }, { "epoch": 2.09, "learning_rate": 0.00010231651376146789, "loss": 0.6731, "step": 3380 }, { "epoch": 2.09, "learning_rate": 0.00010224770642201833, "loss": 0.8096, "step": 3381 }, { "epoch": 2.09, "learning_rate": 0.0001021788990825688, "loss": 0.7844, "step": 3382 }, { "epoch": 2.09, "learning_rate": 0.00010211009174311926, "loss": 0.7096, "step": 3383 }, { "epoch": 2.09, "learning_rate": 0.00010204128440366972, "loss": 0.5881, "step": 3384 }, { "epoch": 2.09, "learning_rate": 0.00010197247706422018, "loss": 0.7115, "step": 3385 }, { "epoch": 2.09, "learning_rate": 0.00010190366972477064, "loss": 0.6648, "step": 3386 }, { "epoch": 2.09, "learning_rate": 0.0001018348623853211, "loss": 0.6421, "step": 3387 }, { "epoch": 2.09, "learning_rate": 0.00010176605504587156, "loss": 0.5482, "step": 3388 }, { "epoch": 2.09, "learning_rate": 0.000101697247706422, "loss": 0.597, "step": 3389 }, { "epoch": 2.09, "learning_rate": 0.00010162844036697246, "loss": 0.7062, "step": 3390 }, { "epoch": 2.09, "learning_rate": 0.00010155963302752292, "loss": 1.9705, "step": 3391 }, { "epoch": 2.09, "learning_rate": 0.00010149082568807338, "loss": 1.6736, "step": 3392 }, { "epoch": 2.09, "learning_rate": 0.00010142201834862384, "loss": 1.641, "step": 3393 }, { "epoch": 2.1, "learning_rate": 0.0001013532110091743, "loss": 1.5655, "step": 3394 }, { "epoch": 2.1, "learning_rate": 0.00010128440366972476, "loss": 1.4845, "step": 3395 }, { "epoch": 2.1, "learning_rate": 0.00010121559633027523, "loss": 1.3657, "step": 3396 }, { "epoch": 2.1, "learning_rate": 0.00010114678899082567, "loss": 1.4253, "step": 3397 }, { "epoch": 2.1, "learning_rate": 0.00010107798165137613, "loss": 1.4804, "step": 3398 }, { "epoch": 2.1, "learning_rate": 0.0001010091743119266, "loss": 1.4445, "step": 3399 }, { "epoch": 2.1, "learning_rate": 0.00010094036697247706, "loss": 1.2404, "step": 3400 }, { "epoch": 2.1, "learning_rate": 0.00010087155963302752, "loss": 1.2933, "step": 3401 }, { "epoch": 2.1, "learning_rate": 0.00010080275229357798, "loss": 1.25, "step": 3402 }, { "epoch": 2.1, "learning_rate": 0.00010073394495412844, "loss": 1.1904, "step": 3403 }, { "epoch": 2.1, "learning_rate": 0.0001006651376146789, "loss": 1.3232, "step": 3404 }, { "epoch": 2.1, "learning_rate": 0.00010059633027522934, "loss": 1.2416, "step": 3405 }, { "epoch": 2.1, "learning_rate": 0.0001005275229357798, "loss": 1.2251, "step": 3406 }, { "epoch": 2.1, "learning_rate": 0.00010045871559633026, "loss": 1.1865, "step": 3407 }, { "epoch": 2.1, "learning_rate": 0.00010038990825688072, "loss": 1.2328, "step": 3408 }, { "epoch": 2.1, "learning_rate": 0.00010032110091743118, "loss": 1.1768, "step": 3409 }, { "epoch": 2.1, "learning_rate": 0.00010025229357798164, "loss": 1.1402, "step": 3410 }, { "epoch": 2.11, "learning_rate": 0.0001001834862385321, "loss": 1.2011, "step": 3411 }, { "epoch": 2.11, "learning_rate": 0.00010011467889908256, "loss": 1.1794, "step": 3412 }, { "epoch": 2.11, "learning_rate": 0.00010004587155963301, "loss": 1.0074, "step": 3413 }, { "epoch": 2.11, "learning_rate": 9.997706422018347e-05, "loss": 1.0444, "step": 3414 }, { "epoch": 2.11, "learning_rate": 9.990825688073394e-05, "loss": 1.1296, "step": 3415 }, { "epoch": 2.11, "learning_rate": 9.98394495412844e-05, "loss": 1.0497, "step": 3416 }, { "epoch": 2.11, "learning_rate": 9.977064220183486e-05, "loss": 1.0577, "step": 3417 }, { "epoch": 2.11, "learning_rate": 9.970183486238532e-05, "loss": 0.9681, "step": 3418 }, { "epoch": 2.11, "learning_rate": 9.963302752293578e-05, "loss": 1.0792, "step": 3419 }, { "epoch": 2.11, "learning_rate": 9.956422018348624e-05, "loss": 1.0182, "step": 3420 }, { "epoch": 2.11, "learning_rate": 9.949541284403668e-05, "loss": 0.9079, "step": 3421 }, { "epoch": 2.11, "learning_rate": 9.942660550458714e-05, "loss": 0.878, "step": 3422 }, { "epoch": 2.11, "learning_rate": 9.93577981651376e-05, "loss": 0.9249, "step": 3423 }, { "epoch": 2.11, "learning_rate": 9.928899082568806e-05, "loss": 0.9437, "step": 3424 }, { "epoch": 2.11, "learning_rate": 9.922018348623852e-05, "loss": 0.9596, "step": 3425 }, { "epoch": 2.11, "learning_rate": 9.915137614678898e-05, "loss": 1.0187, "step": 3426 }, { "epoch": 2.12, "learning_rate": 9.908256880733944e-05, "loss": 0.8151, "step": 3427 }, { "epoch": 2.12, "learning_rate": 9.90137614678899e-05, "loss": 0.8303, "step": 3428 }, { "epoch": 2.12, "learning_rate": 9.894495412844035e-05, "loss": 0.8061, "step": 3429 }, { "epoch": 2.12, "learning_rate": 9.887614678899081e-05, "loss": 0.7894, "step": 3430 }, { "epoch": 2.12, "learning_rate": 9.880733944954127e-05, "loss": 0.7404, "step": 3431 }, { "epoch": 2.12, "learning_rate": 9.873853211009174e-05, "loss": 0.7282, "step": 3432 }, { "epoch": 2.12, "learning_rate": 9.86697247706422e-05, "loss": 0.7364, "step": 3433 }, { "epoch": 2.12, "learning_rate": 9.860091743119266e-05, "loss": 0.7194, "step": 3434 }, { "epoch": 2.12, "learning_rate": 9.853211009174312e-05, "loss": 0.6544, "step": 3435 }, { "epoch": 2.12, "learning_rate": 9.846330275229358e-05, "loss": 0.7254, "step": 3436 }, { "epoch": 2.12, "learning_rate": 9.839449541284402e-05, "loss": 0.6503, "step": 3437 }, { "epoch": 2.12, "learning_rate": 9.832568807339448e-05, "loss": 0.6116, "step": 3438 }, { "epoch": 2.12, "learning_rate": 9.825688073394494e-05, "loss": 0.6194, "step": 3439 }, { "epoch": 2.12, "learning_rate": 9.81880733944954e-05, "loss": 0.6522, "step": 3440 }, { "epoch": 2.12, "learning_rate": 9.811926605504586e-05, "loss": 1.935, "step": 3441 }, { "epoch": 2.12, "learning_rate": 9.805045871559632e-05, "loss": 1.8507, "step": 3442 }, { "epoch": 2.13, "learning_rate": 9.798165137614678e-05, "loss": 1.5835, "step": 3443 }, { "epoch": 2.13, "learning_rate": 9.791284403669724e-05, "loss": 1.3481, "step": 3444 }, { "epoch": 2.13, "learning_rate": 9.784403669724769e-05, "loss": 1.4314, "step": 3445 }, { "epoch": 2.13, "learning_rate": 9.777522935779815e-05, "loss": 1.302, "step": 3446 }, { "epoch": 2.13, "learning_rate": 9.770642201834861e-05, "loss": 1.4149, "step": 3447 }, { "epoch": 2.13, "learning_rate": 9.763761467889908e-05, "loss": 1.2766, "step": 3448 }, { "epoch": 2.13, "learning_rate": 9.756880733944954e-05, "loss": 1.3728, "step": 3449 }, { "epoch": 2.13, "learning_rate": 9.75e-05, "loss": 1.1997, "step": 3450 }, { "epoch": 2.13, "learning_rate": 9.743119266055046e-05, "loss": 1.33, "step": 3451 }, { "epoch": 2.13, "learning_rate": 9.736238532110092e-05, "loss": 1.3506, "step": 3452 }, { "epoch": 2.13, "learning_rate": 9.729357798165135e-05, "loss": 1.2247, "step": 3453 }, { "epoch": 2.13, "learning_rate": 9.722477064220182e-05, "loss": 1.1914, "step": 3454 }, { "epoch": 2.13, "learning_rate": 9.715596330275228e-05, "loss": 1.1132, "step": 3455 }, { "epoch": 2.13, "learning_rate": 9.708715596330274e-05, "loss": 1.139, "step": 3456 }, { "epoch": 2.13, "learning_rate": 9.70183486238532e-05, "loss": 1.1673, "step": 3457 }, { "epoch": 2.13, "learning_rate": 9.694954128440366e-05, "loss": 1.1573, "step": 3458 }, { "epoch": 2.14, "learning_rate": 9.688073394495412e-05, "loss": 1.198, "step": 3459 }, { "epoch": 2.14, "learning_rate": 9.681192660550458e-05, "loss": 1.138, "step": 3460 }, { "epoch": 2.14, "learning_rate": 9.674311926605503e-05, "loss": 1.115, "step": 3461 }, { "epoch": 2.14, "learning_rate": 9.667431192660549e-05, "loss": 1.1485, "step": 3462 }, { "epoch": 2.14, "learning_rate": 9.660550458715595e-05, "loss": 1.0184, "step": 3463 }, { "epoch": 2.14, "learning_rate": 9.653669724770642e-05, "loss": 1.0353, "step": 3464 }, { "epoch": 2.14, "learning_rate": 9.646788990825688e-05, "loss": 1.1045, "step": 3465 }, { "epoch": 2.14, "learning_rate": 9.639908256880734e-05, "loss": 1.0095, "step": 3466 }, { "epoch": 2.14, "learning_rate": 9.63302752293578e-05, "loss": 1.0109, "step": 3467 }, { "epoch": 2.14, "learning_rate": 9.626146788990826e-05, "loss": 1.0251, "step": 3468 }, { "epoch": 2.14, "learning_rate": 9.61926605504587e-05, "loss": 1.0543, "step": 3469 }, { "epoch": 2.14, "learning_rate": 9.612385321100916e-05, "loss": 0.9435, "step": 3470 }, { "epoch": 2.14, "learning_rate": 9.605504587155962e-05, "loss": 0.9223, "step": 3471 }, { "epoch": 2.14, "learning_rate": 9.598623853211008e-05, "loss": 0.9981, "step": 3472 }, { "epoch": 2.14, "learning_rate": 9.591743119266054e-05, "loss": 0.9542, "step": 3473 }, { "epoch": 2.14, "learning_rate": 9.5848623853211e-05, "loss": 0.8352, "step": 3474 }, { "epoch": 2.15, "learning_rate": 9.577981651376146e-05, "loss": 0.8954, "step": 3475 }, { "epoch": 2.15, "learning_rate": 9.571100917431192e-05, "loss": 0.7515, "step": 3476 }, { "epoch": 2.15, "learning_rate": 9.564220183486237e-05, "loss": 0.8236, "step": 3477 }, { "epoch": 2.15, "learning_rate": 9.557339449541283e-05, "loss": 0.902, "step": 3478 }, { "epoch": 2.15, "learning_rate": 9.55045871559633e-05, "loss": 0.6544, "step": 3479 }, { "epoch": 2.15, "learning_rate": 9.543577981651376e-05, "loss": 0.9736, "step": 3480 }, { "epoch": 2.15, "learning_rate": 9.536697247706422e-05, "loss": 0.8371, "step": 3481 }, { "epoch": 2.15, "learning_rate": 9.529816513761468e-05, "loss": 0.7488, "step": 3482 }, { "epoch": 2.15, "learning_rate": 9.522935779816514e-05, "loss": 0.7905, "step": 3483 }, { "epoch": 2.15, "learning_rate": 9.51605504587156e-05, "loss": 0.6648, "step": 3484 }, { "epoch": 2.15, "learning_rate": 9.509174311926603e-05, "loss": 0.7115, "step": 3485 }, { "epoch": 2.15, "learning_rate": 9.50229357798165e-05, "loss": 0.6977, "step": 3486 }, { "epoch": 2.15, "learning_rate": 9.495412844036696e-05, "loss": 0.7411, "step": 3487 }, { "epoch": 2.15, "learning_rate": 9.488532110091742e-05, "loss": 0.6174, "step": 3488 }, { "epoch": 2.15, "learning_rate": 9.481651376146788e-05, "loss": 0.6065, "step": 3489 }, { "epoch": 2.15, "learning_rate": 9.474770642201834e-05, "loss": 0.6839, "step": 3490 }, { "epoch": 2.15, "learning_rate": 9.46788990825688e-05, "loss": 1.9471, "step": 3491 }, { "epoch": 2.16, "learning_rate": 9.461009174311926e-05, "loss": 1.8161, "step": 3492 }, { "epoch": 2.16, "learning_rate": 9.454128440366971e-05, "loss": 1.5657, "step": 3493 }, { "epoch": 2.16, "learning_rate": 9.447247706422017e-05, "loss": 1.3685, "step": 3494 }, { "epoch": 2.16, "learning_rate": 9.440366972477063e-05, "loss": 1.4222, "step": 3495 }, { "epoch": 2.16, "learning_rate": 9.43348623853211e-05, "loss": 1.289, "step": 3496 }, { "epoch": 2.16, "learning_rate": 9.426605504587156e-05, "loss": 1.4978, "step": 3497 }, { "epoch": 2.16, "learning_rate": 9.419724770642202e-05, "loss": 1.2522, "step": 3498 }, { "epoch": 2.16, "learning_rate": 9.412844036697248e-05, "loss": 1.1857, "step": 3499 }, { "epoch": 2.16, "learning_rate": 9.405963302752294e-05, "loss": 1.288, "step": 3500 }, { "epoch": 2.16, "eval_bleu": 4.138692653029469e-15, "eval_loss": 1.8705445528030396, "eval_runtime": 2629.971, "eval_samples_per_second": 5.612, "eval_steps_per_second": 0.702, "step": 3500 }, { "epoch": 2.16, "learning_rate": 9.399082568807337e-05, "loss": 1.2475, "step": 3501 }, { "epoch": 2.16, "learning_rate": 9.392201834862384e-05, "loss": 1.3109, "step": 3502 }, { "epoch": 2.16, "learning_rate": 9.38532110091743e-05, "loss": 1.3751, "step": 3503 }, { "epoch": 2.16, "learning_rate": 9.378440366972476e-05, "loss": 1.1444, "step": 3504 }, { "epoch": 2.16, "learning_rate": 9.371559633027522e-05, "loss": 1.185, "step": 3505 }, { "epoch": 2.16, "learning_rate": 9.364678899082568e-05, "loss": 1.2135, "step": 3506 }, { "epoch": 2.16, "learning_rate": 9.357798165137614e-05, "loss": 1.1444, "step": 3507 }, { "epoch": 2.17, "learning_rate": 9.35091743119266e-05, "loss": 1.0526, "step": 3508 }, { "epoch": 2.17, "learning_rate": 9.344036697247705e-05, "loss": 1.1024, "step": 3509 }, { "epoch": 2.17, "learning_rate": 9.337155963302751e-05, "loss": 1.0709, "step": 3510 }, { "epoch": 2.17, "learning_rate": 9.330275229357797e-05, "loss": 0.9865, "step": 3511 }, { "epoch": 2.17, "learning_rate": 9.323394495412843e-05, "loss": 1.1247, "step": 3512 }, { "epoch": 2.17, "learning_rate": 9.31651376146789e-05, "loss": 1.1548, "step": 3513 }, { "epoch": 2.17, "learning_rate": 9.309633027522936e-05, "loss": 1.207, "step": 3514 }, { "epoch": 2.17, "learning_rate": 9.302752293577982e-05, "loss": 1.0402, "step": 3515 }, { "epoch": 2.17, "learning_rate": 9.295871559633028e-05, "loss": 1.0626, "step": 3516 }, { "epoch": 2.17, "learning_rate": 9.288990825688071e-05, "loss": 1.0654, "step": 3517 }, { "epoch": 2.17, "learning_rate": 9.282110091743117e-05, "loss": 0.9486, "step": 3518 }, { "epoch": 2.17, "learning_rate": 9.275229357798164e-05, "loss": 0.8802, "step": 3519 }, { "epoch": 2.17, "learning_rate": 9.26834862385321e-05, "loss": 1.0842, "step": 3520 }, { "epoch": 2.17, "learning_rate": 9.261467889908256e-05, "loss": 0.8901, "step": 3521 }, { "epoch": 2.17, "learning_rate": 9.254587155963302e-05, "loss": 1.0226, "step": 3522 }, { "epoch": 2.17, "learning_rate": 9.247706422018348e-05, "loss": 0.7663, "step": 3523 }, { "epoch": 2.18, "learning_rate": 9.240825688073394e-05, "loss": 0.8909, "step": 3524 }, { "epoch": 2.18, "learning_rate": 9.233944954128439e-05, "loss": 0.9203, "step": 3525 }, { "epoch": 2.18, "learning_rate": 9.227064220183485e-05, "loss": 0.8367, "step": 3526 }, { "epoch": 2.18, "learning_rate": 9.220183486238531e-05, "loss": 0.6632, "step": 3527 }, { "epoch": 2.18, "learning_rate": 9.213302752293577e-05, "loss": 0.7502, "step": 3528 }, { "epoch": 2.18, "learning_rate": 9.206422018348624e-05, "loss": 0.7843, "step": 3529 }, { "epoch": 2.18, "learning_rate": 9.19954128440367e-05, "loss": 0.7437, "step": 3530 }, { "epoch": 2.18, "learning_rate": 9.192660550458716e-05, "loss": 0.762, "step": 3531 }, { "epoch": 2.18, "learning_rate": 9.185779816513762e-05, "loss": 0.7725, "step": 3532 }, { "epoch": 2.18, "learning_rate": 9.178899082568805e-05, "loss": 0.6965, "step": 3533 }, { "epoch": 2.18, "learning_rate": 9.172018348623851e-05, "loss": 0.7457, "step": 3534 }, { "epoch": 2.18, "learning_rate": 9.165137614678898e-05, "loss": 0.6144, "step": 3535 }, { "epoch": 2.18, "learning_rate": 9.158256880733944e-05, "loss": 0.6706, "step": 3536 }, { "epoch": 2.18, "learning_rate": 9.15137614678899e-05, "loss": 0.6769, "step": 3537 }, { "epoch": 2.18, "learning_rate": 9.144495412844036e-05, "loss": 0.5808, "step": 3538 }, { "epoch": 2.18, "learning_rate": 9.137614678899082e-05, "loss": 0.6593, "step": 3539 }, { "epoch": 2.19, "learning_rate": 9.130733944954128e-05, "loss": 0.7093, "step": 3540 }, { "epoch": 2.19, "learning_rate": 9.123853211009173e-05, "loss": 1.8793, "step": 3541 }, { "epoch": 2.19, "learning_rate": 9.116972477064219e-05, "loss": 1.7687, "step": 3542 }, { "epoch": 2.19, "learning_rate": 9.110091743119265e-05, "loss": 1.4532, "step": 3543 }, { "epoch": 2.19, "learning_rate": 9.103211009174311e-05, "loss": 1.5811, "step": 3544 }, { "epoch": 2.19, "learning_rate": 9.096330275229357e-05, "loss": 1.5387, "step": 3545 }, { "epoch": 2.19, "learning_rate": 9.089449541284404e-05, "loss": 1.35, "step": 3546 }, { "epoch": 2.19, "learning_rate": 9.08256880733945e-05, "loss": 1.355, "step": 3547 }, { "epoch": 2.19, "learning_rate": 9.075688073394496e-05, "loss": 1.2898, "step": 3548 }, { "epoch": 2.19, "learning_rate": 9.068807339449539e-05, "loss": 1.2948, "step": 3549 }, { "epoch": 2.19, "learning_rate": 9.061926605504585e-05, "loss": 1.3558, "step": 3550 }, { "epoch": 2.19, "learning_rate": 9.055045871559632e-05, "loss": 1.2416, "step": 3551 }, { "epoch": 2.19, "learning_rate": 9.048165137614678e-05, "loss": 1.2496, "step": 3552 }, { "epoch": 2.19, "learning_rate": 9.041284403669724e-05, "loss": 1.3016, "step": 3553 }, { "epoch": 2.19, "learning_rate": 9.03440366972477e-05, "loss": 1.0363, "step": 3554 }, { "epoch": 2.19, "learning_rate": 9.027522935779816e-05, "loss": 1.1286, "step": 3555 }, { "epoch": 2.2, "learning_rate": 9.020642201834862e-05, "loss": 1.1571, "step": 3556 }, { "epoch": 2.2, "learning_rate": 9.013761467889907e-05, "loss": 1.1658, "step": 3557 }, { "epoch": 2.2, "learning_rate": 9.006880733944953e-05, "loss": 1.1334, "step": 3558 }, { "epoch": 2.2, "learning_rate": 8.999999999999999e-05, "loss": 1.0564, "step": 3559 }, { "epoch": 2.2, "learning_rate": 8.993119266055045e-05, "loss": 1.227, "step": 3560 }, { "epoch": 2.2, "learning_rate": 8.986238532110091e-05, "loss": 1.2322, "step": 3561 }, { "epoch": 2.2, "learning_rate": 8.979357798165138e-05, "loss": 1.1075, "step": 3562 }, { "epoch": 2.2, "learning_rate": 8.972477064220184e-05, "loss": 1.0504, "step": 3563 }, { "epoch": 2.2, "learning_rate": 8.96559633027523e-05, "loss": 1.1546, "step": 3564 }, { "epoch": 2.2, "learning_rate": 8.958715596330273e-05, "loss": 1.0451, "step": 3565 }, { "epoch": 2.2, "learning_rate": 8.95183486238532e-05, "loss": 1.0315, "step": 3566 }, { "epoch": 2.2, "learning_rate": 8.944954128440366e-05, "loss": 1.054, "step": 3567 }, { "epoch": 2.2, "learning_rate": 8.938073394495412e-05, "loss": 1.0141, "step": 3568 }, { "epoch": 2.2, "learning_rate": 8.931192660550458e-05, "loss": 0.941, "step": 3569 }, { "epoch": 2.2, "learning_rate": 8.924311926605504e-05, "loss": 0.865, "step": 3570 }, { "epoch": 2.2, "learning_rate": 8.91743119266055e-05, "loss": 0.8931, "step": 3571 }, { "epoch": 2.2, "learning_rate": 8.910550458715596e-05, "loss": 0.8717, "step": 3572 }, { "epoch": 2.21, "learning_rate": 8.903669724770641e-05, "loss": 0.977, "step": 3573 }, { "epoch": 2.21, "learning_rate": 8.896788990825687e-05, "loss": 0.8469, "step": 3574 }, { "epoch": 2.21, "learning_rate": 8.889908256880733e-05, "loss": 0.8662, "step": 3575 }, { "epoch": 2.21, "learning_rate": 8.883027522935779e-05, "loss": 0.9046, "step": 3576 }, { "epoch": 2.21, "learning_rate": 8.876146788990825e-05, "loss": 0.7823, "step": 3577 }, { "epoch": 2.21, "learning_rate": 8.869266055045872e-05, "loss": 0.8266, "step": 3578 }, { "epoch": 2.21, "learning_rate": 8.862385321100918e-05, "loss": 0.8557, "step": 3579 }, { "epoch": 2.21, "learning_rate": 8.855504587155964e-05, "loss": 0.7704, "step": 3580 }, { "epoch": 2.21, "learning_rate": 8.848623853211007e-05, "loss": 0.7383, "step": 3581 }, { "epoch": 2.21, "learning_rate": 8.841743119266053e-05, "loss": 0.7076, "step": 3582 }, { "epoch": 2.21, "learning_rate": 8.8348623853211e-05, "loss": 0.667, "step": 3583 }, { "epoch": 2.21, "learning_rate": 8.827981651376146e-05, "loss": 0.7423, "step": 3584 }, { "epoch": 2.21, "learning_rate": 8.821100917431192e-05, "loss": 0.6872, "step": 3585 }, { "epoch": 2.21, "learning_rate": 8.814220183486238e-05, "loss": 0.8133, "step": 3586 }, { "epoch": 2.21, "learning_rate": 8.807339449541284e-05, "loss": 0.5794, "step": 3587 }, { "epoch": 2.21, "learning_rate": 8.80045871559633e-05, "loss": 0.6116, "step": 3588 }, { "epoch": 2.22, "learning_rate": 8.793577981651375e-05, "loss": 0.5995, "step": 3589 }, { "epoch": 2.22, "learning_rate": 8.786697247706421e-05, "loss": 0.6264, "step": 3590 }, { "epoch": 2.22, "learning_rate": 8.779816513761467e-05, "loss": 1.9586, "step": 3591 }, { "epoch": 2.22, "learning_rate": 8.772935779816513e-05, "loss": 1.6907, "step": 3592 }, { "epoch": 2.22, "learning_rate": 8.76605504587156e-05, "loss": 1.4812, "step": 3593 }, { "epoch": 2.22, "learning_rate": 8.759174311926606e-05, "loss": 1.4334, "step": 3594 }, { "epoch": 2.22, "learning_rate": 8.752293577981652e-05, "loss": 1.208, "step": 3595 }, { "epoch": 2.22, "learning_rate": 8.745412844036698e-05, "loss": 1.2686, "step": 3596 }, { "epoch": 2.22, "learning_rate": 8.738532110091741e-05, "loss": 1.3475, "step": 3597 }, { "epoch": 2.22, "learning_rate": 8.731651376146787e-05, "loss": 1.2428, "step": 3598 }, { "epoch": 2.22, "learning_rate": 8.724770642201833e-05, "loss": 1.1171, "step": 3599 }, { "epoch": 2.22, "learning_rate": 8.71788990825688e-05, "loss": 1.2145, "step": 3600 }, { "epoch": 2.22, "learning_rate": 8.711009174311926e-05, "loss": 1.2259, "step": 3601 }, { "epoch": 2.22, "learning_rate": 8.704128440366972e-05, "loss": 1.2641, "step": 3602 }, { "epoch": 2.22, "learning_rate": 8.697247706422018e-05, "loss": 1.1741, "step": 3603 }, { "epoch": 2.22, "learning_rate": 8.690366972477064e-05, "loss": 1.2045, "step": 3604 }, { "epoch": 2.23, "learning_rate": 8.683486238532109e-05, "loss": 1.2254, "step": 3605 }, { "epoch": 2.23, "learning_rate": 8.676605504587155e-05, "loss": 1.2583, "step": 3606 }, { "epoch": 2.23, "learning_rate": 8.669724770642201e-05, "loss": 1.1906, "step": 3607 }, { "epoch": 2.23, "learning_rate": 8.662844036697247e-05, "loss": 1.0979, "step": 3608 }, { "epoch": 2.23, "learning_rate": 8.655963302752293e-05, "loss": 1.1706, "step": 3609 }, { "epoch": 2.23, "learning_rate": 8.64908256880734e-05, "loss": 1.0694, "step": 3610 }, { "epoch": 2.23, "learning_rate": 8.642201834862386e-05, "loss": 1.0504, "step": 3611 }, { "epoch": 2.23, "learning_rate": 8.635321100917432e-05, "loss": 1.0533, "step": 3612 }, { "epoch": 2.23, "learning_rate": 8.628440366972475e-05, "loss": 1.1704, "step": 3613 }, { "epoch": 2.23, "learning_rate": 8.621559633027521e-05, "loss": 1.0594, "step": 3614 }, { "epoch": 2.23, "learning_rate": 8.614678899082567e-05, "loss": 0.9619, "step": 3615 }, { "epoch": 2.23, "learning_rate": 8.607798165137614e-05, "loss": 0.9403, "step": 3616 }, { "epoch": 2.23, "learning_rate": 8.60091743119266e-05, "loss": 0.9973, "step": 3617 }, { "epoch": 2.23, "learning_rate": 8.594036697247706e-05, "loss": 1.0126, "step": 3618 }, { "epoch": 2.23, "learning_rate": 8.587155963302752e-05, "loss": 0.8992, "step": 3619 }, { "epoch": 2.23, "learning_rate": 8.580275229357798e-05, "loss": 0.9369, "step": 3620 }, { "epoch": 2.24, "learning_rate": 8.573394495412843e-05, "loss": 0.9439, "step": 3621 }, { "epoch": 2.24, "learning_rate": 8.566513761467889e-05, "loss": 0.8866, "step": 3622 }, { "epoch": 2.24, "learning_rate": 8.559633027522935e-05, "loss": 0.9114, "step": 3623 }, { "epoch": 2.24, "learning_rate": 8.552752293577981e-05, "loss": 0.8652, "step": 3624 }, { "epoch": 2.24, "learning_rate": 8.545871559633027e-05, "loss": 0.9351, "step": 3625 }, { "epoch": 2.24, "learning_rate": 8.538990825688073e-05, "loss": 0.8621, "step": 3626 }, { "epoch": 2.24, "learning_rate": 8.53211009174312e-05, "loss": 0.8522, "step": 3627 }, { "epoch": 2.24, "learning_rate": 8.525229357798166e-05, "loss": 0.8697, "step": 3628 }, { "epoch": 2.24, "learning_rate": 8.518348623853209e-05, "loss": 0.7722, "step": 3629 }, { "epoch": 2.24, "learning_rate": 8.511467889908255e-05, "loss": 0.7667, "step": 3630 }, { "epoch": 2.24, "learning_rate": 8.504587155963301e-05, "loss": 0.7215, "step": 3631 }, { "epoch": 2.24, "learning_rate": 8.497706422018348e-05, "loss": 0.67, "step": 3632 }, { "epoch": 2.24, "learning_rate": 8.490825688073394e-05, "loss": 0.7736, "step": 3633 }, { "epoch": 2.24, "learning_rate": 8.48394495412844e-05, "loss": 0.6512, "step": 3634 }, { "epoch": 2.24, "learning_rate": 8.477064220183486e-05, "loss": 0.5156, "step": 3635 }, { "epoch": 2.24, "learning_rate": 8.470183486238532e-05, "loss": 0.6425, "step": 3636 }, { "epoch": 2.25, "learning_rate": 8.463302752293577e-05, "loss": 0.5601, "step": 3637 }, { "epoch": 2.25, "learning_rate": 8.456422018348623e-05, "loss": 0.5592, "step": 3638 }, { "epoch": 2.25, "learning_rate": 8.449541284403669e-05, "loss": 0.733, "step": 3639 }, { "epoch": 2.25, "learning_rate": 8.442660550458715e-05, "loss": 0.6353, "step": 3640 }, { "epoch": 2.25, "learning_rate": 8.435779816513761e-05, "loss": 1.9434, "step": 3641 }, { "epoch": 2.25, "learning_rate": 8.428899082568807e-05, "loss": 1.5979, "step": 3642 }, { "epoch": 2.25, "learning_rate": 8.422018348623854e-05, "loss": 1.5136, "step": 3643 }, { "epoch": 2.25, "learning_rate": 8.4151376146789e-05, "loss": 1.4489, "step": 3644 }, { "epoch": 2.25, "learning_rate": 8.408256880733943e-05, "loss": 1.3091, "step": 3645 }, { "epoch": 2.25, "learning_rate": 8.401376146788989e-05, "loss": 1.3172, "step": 3646 }, { "epoch": 2.25, "learning_rate": 8.394495412844035e-05, "loss": 1.3353, "step": 3647 }, { "epoch": 2.25, "learning_rate": 8.387614678899081e-05, "loss": 1.4128, "step": 3648 }, { "epoch": 2.25, "learning_rate": 8.380733944954128e-05, "loss": 1.1792, "step": 3649 }, { "epoch": 2.25, "learning_rate": 8.373853211009174e-05, "loss": 1.353, "step": 3650 }, { "epoch": 2.25, "learning_rate": 8.36697247706422e-05, "loss": 1.2037, "step": 3651 }, { "epoch": 2.25, "learning_rate": 8.360091743119266e-05, "loss": 1.1121, "step": 3652 }, { "epoch": 2.25, "learning_rate": 8.353211009174311e-05, "loss": 1.2247, "step": 3653 }, { "epoch": 2.26, "learning_rate": 8.346330275229357e-05, "loss": 1.1077, "step": 3654 }, { "epoch": 2.26, "learning_rate": 8.339449541284403e-05, "loss": 1.2596, "step": 3655 }, { "epoch": 2.26, "learning_rate": 8.332568807339449e-05, "loss": 1.0809, "step": 3656 }, { "epoch": 2.26, "learning_rate": 8.325688073394495e-05, "loss": 1.2514, "step": 3657 }, { "epoch": 2.26, "learning_rate": 8.318807339449541e-05, "loss": 1.2214, "step": 3658 }, { "epoch": 2.26, "learning_rate": 8.311926605504588e-05, "loss": 1.05, "step": 3659 }, { "epoch": 2.26, "learning_rate": 8.305045871559632e-05, "loss": 1.0321, "step": 3660 }, { "epoch": 2.26, "learning_rate": 8.298165137614677e-05, "loss": 1.1047, "step": 3661 }, { "epoch": 2.26, "learning_rate": 8.291284403669723e-05, "loss": 1.143, "step": 3662 }, { "epoch": 2.26, "learning_rate": 8.284403669724769e-05, "loss": 1.1161, "step": 3663 }, { "epoch": 2.26, "learning_rate": 8.277522935779815e-05, "loss": 1.0904, "step": 3664 }, { "epoch": 2.26, "learning_rate": 8.270642201834862e-05, "loss": 0.9625, "step": 3665 }, { "epoch": 2.26, "learning_rate": 8.263761467889908e-05, "loss": 1.0028, "step": 3666 }, { "epoch": 2.26, "learning_rate": 8.256880733944954e-05, "loss": 0.9684, "step": 3667 }, { "epoch": 2.26, "learning_rate": 8.25e-05, "loss": 1.0138, "step": 3668 }, { "epoch": 2.26, "learning_rate": 8.243119266055045e-05, "loss": 0.8684, "step": 3669 }, { "epoch": 2.27, "learning_rate": 8.236238532110091e-05, "loss": 0.9249, "step": 3670 }, { "epoch": 2.27, "learning_rate": 8.229357798165137e-05, "loss": 0.9015, "step": 3671 }, { "epoch": 2.27, "learning_rate": 8.222477064220183e-05, "loss": 1.019, "step": 3672 }, { "epoch": 2.27, "learning_rate": 8.215596330275229e-05, "loss": 0.8567, "step": 3673 }, { "epoch": 2.27, "learning_rate": 8.208715596330275e-05, "loss": 0.8534, "step": 3674 }, { "epoch": 2.27, "learning_rate": 8.201834862385321e-05, "loss": 0.8346, "step": 3675 }, { "epoch": 2.27, "learning_rate": 8.194954128440366e-05, "loss": 0.7746, "step": 3676 }, { "epoch": 2.27, "learning_rate": 8.188073394495411e-05, "loss": 0.8471, "step": 3677 }, { "epoch": 2.27, "learning_rate": 8.181192660550457e-05, "loss": 0.7325, "step": 3678 }, { "epoch": 2.27, "learning_rate": 8.174311926605503e-05, "loss": 0.7493, "step": 3679 }, { "epoch": 2.27, "learning_rate": 8.16743119266055e-05, "loss": 0.8608, "step": 3680 }, { "epoch": 2.27, "learning_rate": 8.160550458715596e-05, "loss": 0.753, "step": 3681 }, { "epoch": 2.27, "learning_rate": 8.153669724770642e-05, "loss": 0.63, "step": 3682 }, { "epoch": 2.27, "learning_rate": 8.146788990825688e-05, "loss": 0.7141, "step": 3683 }, { "epoch": 2.27, "learning_rate": 8.139908256880734e-05, "loss": 0.6127, "step": 3684 }, { "epoch": 2.27, "learning_rate": 8.133027522935779e-05, "loss": 0.7448, "step": 3685 }, { "epoch": 2.28, "learning_rate": 8.126146788990825e-05, "loss": 0.5936, "step": 3686 }, { "epoch": 2.28, "learning_rate": 8.119266055045871e-05, "loss": 0.6129, "step": 3687 }, { "epoch": 2.28, "learning_rate": 8.112385321100917e-05, "loss": 0.6168, "step": 3688 }, { "epoch": 2.28, "learning_rate": 8.105504587155963e-05, "loss": 0.5678, "step": 3689 }, { "epoch": 2.28, "learning_rate": 8.09862385321101e-05, "loss": 0.6078, "step": 3690 }, { "epoch": 2.28, "learning_rate": 8.091743119266055e-05, "loss": 2.0308, "step": 3691 }, { "epoch": 2.28, "learning_rate": 8.0848623853211e-05, "loss": 1.6064, "step": 3692 }, { "epoch": 2.28, "learning_rate": 8.077981651376145e-05, "loss": 1.5351, "step": 3693 }, { "epoch": 2.28, "learning_rate": 8.071100917431191e-05, "loss": 1.4937, "step": 3694 }, { "epoch": 2.28, "learning_rate": 8.064220183486237e-05, "loss": 1.4196, "step": 3695 }, { "epoch": 2.28, "learning_rate": 8.057339449541283e-05, "loss": 1.382, "step": 3696 }, { "epoch": 2.28, "learning_rate": 8.05045871559633e-05, "loss": 1.2651, "step": 3697 }, { "epoch": 2.28, "learning_rate": 8.043577981651376e-05, "loss": 1.3193, "step": 3698 }, { "epoch": 2.28, "learning_rate": 8.036697247706422e-05, "loss": 1.2006, "step": 3699 }, { "epoch": 2.28, "learning_rate": 8.029816513761468e-05, "loss": 1.2163, "step": 3700 }, { "epoch": 2.28, "learning_rate": 8.022935779816513e-05, "loss": 1.1444, "step": 3701 }, { "epoch": 2.29, "learning_rate": 8.016055045871559e-05, "loss": 1.2515, "step": 3702 }, { "epoch": 2.29, "learning_rate": 8.009174311926605e-05, "loss": 1.1149, "step": 3703 }, { "epoch": 2.29, "learning_rate": 8.002293577981651e-05, "loss": 1.1447, "step": 3704 }, { "epoch": 2.29, "learning_rate": 7.995412844036697e-05, "loss": 1.2943, "step": 3705 }, { "epoch": 2.29, "learning_rate": 7.988532110091743e-05, "loss": 1.28, "step": 3706 }, { "epoch": 2.29, "learning_rate": 7.981651376146788e-05, "loss": 1.0291, "step": 3707 }, { "epoch": 2.29, "learning_rate": 7.974770642201834e-05, "loss": 1.1908, "step": 3708 }, { "epoch": 2.29, "learning_rate": 7.967889908256879e-05, "loss": 1.087, "step": 3709 }, { "epoch": 2.29, "learning_rate": 7.961009174311925e-05, "loss": 1.0814, "step": 3710 }, { "epoch": 2.29, "learning_rate": 7.954128440366971e-05, "loss": 1.0838, "step": 3711 }, { "epoch": 2.29, "learning_rate": 7.947247706422017e-05, "loss": 1.1268, "step": 3712 }, { "epoch": 2.29, "learning_rate": 7.940366972477063e-05, "loss": 1.1201, "step": 3713 }, { "epoch": 2.29, "learning_rate": 7.93348623853211e-05, "loss": 0.9538, "step": 3714 }, { "epoch": 2.29, "learning_rate": 7.926605504587156e-05, "loss": 1.0952, "step": 3715 }, { "epoch": 2.29, "learning_rate": 7.919724770642202e-05, "loss": 0.9965, "step": 3716 }, { "epoch": 2.29, "learning_rate": 7.912844036697247e-05, "loss": 1.0483, "step": 3717 }, { "epoch": 2.3, "learning_rate": 7.905963302752293e-05, "loss": 1.0054, "step": 3718 }, { "epoch": 2.3, "learning_rate": 7.899082568807339e-05, "loss": 0.9281, "step": 3719 }, { "epoch": 2.3, "learning_rate": 7.892201834862385e-05, "loss": 0.9692, "step": 3720 }, { "epoch": 2.3, "learning_rate": 7.885321100917431e-05, "loss": 0.9048, "step": 3721 }, { "epoch": 2.3, "learning_rate": 7.878440366972477e-05, "loss": 0.9481, "step": 3722 }, { "epoch": 2.3, "learning_rate": 7.871559633027522e-05, "loss": 0.8283, "step": 3723 }, { "epoch": 2.3, "learning_rate": 7.864678899082568e-05, "loss": 0.8775, "step": 3724 }, { "epoch": 2.3, "learning_rate": 7.857798165137613e-05, "loss": 0.9424, "step": 3725 }, { "epoch": 2.3, "learning_rate": 7.850917431192659e-05, "loss": 0.7042, "step": 3726 }, { "epoch": 2.3, "learning_rate": 7.844036697247705e-05, "loss": 0.7994, "step": 3727 }, { "epoch": 2.3, "learning_rate": 7.837155963302751e-05, "loss": 0.7188, "step": 3728 }, { "epoch": 2.3, "learning_rate": 7.830275229357797e-05, "loss": 0.7178, "step": 3729 }, { "epoch": 2.3, "learning_rate": 7.823394495412844e-05, "loss": 0.7984, "step": 3730 }, { "epoch": 2.3, "learning_rate": 7.81651376146789e-05, "loss": 0.8928, "step": 3731 }, { "epoch": 2.3, "learning_rate": 7.809633027522936e-05, "loss": 0.7836, "step": 3732 }, { "epoch": 2.3, "learning_rate": 7.80275229357798e-05, "loss": 0.7521, "step": 3733 }, { "epoch": 2.3, "learning_rate": 7.795871559633027e-05, "loss": 0.7392, "step": 3734 }, { "epoch": 2.31, "learning_rate": 7.788990825688073e-05, "loss": 0.6345, "step": 3735 }, { "epoch": 2.31, "learning_rate": 7.782110091743119e-05, "loss": 0.5659, "step": 3736 }, { "epoch": 2.31, "learning_rate": 7.775229357798165e-05, "loss": 0.6035, "step": 3737 }, { "epoch": 2.31, "learning_rate": 7.76834862385321e-05, "loss": 0.5845, "step": 3738 }, { "epoch": 2.31, "learning_rate": 7.761467889908256e-05, "loss": 0.5404, "step": 3739 }, { "epoch": 2.31, "learning_rate": 7.754587155963302e-05, "loss": 0.5461, "step": 3740 }, { "epoch": 2.31, "learning_rate": 7.747706422018347e-05, "loss": 2.0229, "step": 3741 }, { "epoch": 2.31, "learning_rate": 7.740825688073393e-05, "loss": 1.6285, "step": 3742 }, { "epoch": 2.31, "learning_rate": 7.733944954128439e-05, "loss": 1.5521, "step": 3743 }, { "epoch": 2.31, "learning_rate": 7.727064220183485e-05, "loss": 1.4716, "step": 3744 }, { "epoch": 2.31, "learning_rate": 7.720183486238531e-05, "loss": 1.3636, "step": 3745 }, { "epoch": 2.31, "learning_rate": 7.713302752293578e-05, "loss": 1.3513, "step": 3746 }, { "epoch": 2.31, "learning_rate": 7.706422018348624e-05, "loss": 1.2689, "step": 3747 }, { "epoch": 2.31, "learning_rate": 7.69954128440367e-05, "loss": 1.1857, "step": 3748 }, { "epoch": 2.31, "learning_rate": 7.692660550458715e-05, "loss": 1.2468, "step": 3749 }, { "epoch": 2.31, "learning_rate": 7.685779816513761e-05, "loss": 1.0738, "step": 3750 }, { "epoch": 2.32, "learning_rate": 7.678899082568807e-05, "loss": 1.2023, "step": 3751 }, { "epoch": 2.32, "learning_rate": 7.672018348623853e-05, "loss": 1.4627, "step": 3752 }, { "epoch": 2.32, "learning_rate": 7.665137614678899e-05, "loss": 1.1457, "step": 3753 }, { "epoch": 2.32, "learning_rate": 7.658256880733944e-05, "loss": 1.2178, "step": 3754 }, { "epoch": 2.32, "learning_rate": 7.65137614678899e-05, "loss": 1.1336, "step": 3755 }, { "epoch": 2.32, "learning_rate": 7.644495412844036e-05, "loss": 1.1109, "step": 3756 }, { "epoch": 2.32, "learning_rate": 7.637614678899081e-05, "loss": 1.1721, "step": 3757 }, { "epoch": 2.32, "learning_rate": 7.630733944954127e-05, "loss": 1.2285, "step": 3758 }, { "epoch": 2.32, "learning_rate": 7.623853211009173e-05, "loss": 1.2159, "step": 3759 }, { "epoch": 2.32, "learning_rate": 7.616972477064219e-05, "loss": 1.1024, "step": 3760 }, { "epoch": 2.32, "learning_rate": 7.610091743119265e-05, "loss": 1.194, "step": 3761 }, { "epoch": 2.32, "learning_rate": 7.603211009174312e-05, "loss": 1.027, "step": 3762 }, { "epoch": 2.32, "learning_rate": 7.596330275229358e-05, "loss": 1.1311, "step": 3763 }, { "epoch": 2.32, "learning_rate": 7.589449541284404e-05, "loss": 1.0079, "step": 3764 }, { "epoch": 2.32, "learning_rate": 7.582568807339449e-05, "loss": 1.048, "step": 3765 }, { "epoch": 2.32, "learning_rate": 7.575688073394495e-05, "loss": 0.8928, "step": 3766 }, { "epoch": 2.33, "learning_rate": 7.568807339449541e-05, "loss": 0.9437, "step": 3767 }, { "epoch": 2.33, "learning_rate": 7.561926605504587e-05, "loss": 0.9593, "step": 3768 }, { "epoch": 2.33, "learning_rate": 7.555045871559633e-05, "loss": 0.885, "step": 3769 }, { "epoch": 2.33, "learning_rate": 7.548165137614678e-05, "loss": 0.9193, "step": 3770 }, { "epoch": 2.33, "learning_rate": 7.541284403669724e-05, "loss": 0.9879, "step": 3771 }, { "epoch": 2.33, "learning_rate": 7.53440366972477e-05, "loss": 0.9309, "step": 3772 }, { "epoch": 2.33, "learning_rate": 7.527522935779815e-05, "loss": 0.9125, "step": 3773 }, { "epoch": 2.33, "learning_rate": 7.520642201834861e-05, "loss": 0.8467, "step": 3774 }, { "epoch": 2.33, "learning_rate": 7.513761467889907e-05, "loss": 0.829, "step": 3775 }, { "epoch": 2.33, "learning_rate": 7.506880733944953e-05, "loss": 0.8285, "step": 3776 }, { "epoch": 2.33, "learning_rate": 7.5e-05, "loss": 0.818, "step": 3777 }, { "epoch": 2.33, "learning_rate": 7.493119266055045e-05, "loss": 0.8252, "step": 3778 }, { "epoch": 2.33, "learning_rate": 7.486238532110092e-05, "loss": 0.7033, "step": 3779 }, { "epoch": 2.33, "learning_rate": 7.479357798165136e-05, "loss": 0.7164, "step": 3780 }, { "epoch": 2.33, "learning_rate": 7.472477064220182e-05, "loss": 0.6598, "step": 3781 }, { "epoch": 2.33, "learning_rate": 7.465596330275229e-05, "loss": 0.774, "step": 3782 }, { "epoch": 2.34, "learning_rate": 7.458715596330275e-05, "loss": 0.7169, "step": 3783 }, { "epoch": 2.34, "learning_rate": 7.451834862385321e-05, "loss": 0.6549, "step": 3784 }, { "epoch": 2.34, "learning_rate": 7.444954128440366e-05, "loss": 0.6953, "step": 3785 }, { "epoch": 2.34, "learning_rate": 7.438073394495412e-05, "loss": 0.5774, "step": 3786 }, { "epoch": 2.34, "learning_rate": 7.431192660550458e-05, "loss": 0.5607, "step": 3787 }, { "epoch": 2.34, "learning_rate": 7.424311926605504e-05, "loss": 0.5631, "step": 3788 }, { "epoch": 2.34, "learning_rate": 7.41743119266055e-05, "loss": 0.6497, "step": 3789 }, { "epoch": 2.34, "learning_rate": 7.410550458715596e-05, "loss": 0.7251, "step": 3790 }, { "epoch": 2.34, "learning_rate": 7.403669724770642e-05, "loss": 2.0469, "step": 3791 }, { "epoch": 2.34, "learning_rate": 7.396788990825687e-05, "loss": 1.5945, "step": 3792 }, { "epoch": 2.34, "learning_rate": 7.389908256880733e-05, "loss": 1.5784, "step": 3793 }, { "epoch": 2.34, "learning_rate": 7.38302752293578e-05, "loss": 1.2069, "step": 3794 }, { "epoch": 2.34, "learning_rate": 7.376146788990826e-05, "loss": 1.3418, "step": 3795 }, { "epoch": 2.34, "learning_rate": 7.36926605504587e-05, "loss": 1.3097, "step": 3796 }, { "epoch": 2.34, "learning_rate": 7.362385321100916e-05, "loss": 1.3082, "step": 3797 }, { "epoch": 2.34, "learning_rate": 7.355504587155963e-05, "loss": 1.302, "step": 3798 }, { "epoch": 2.35, "learning_rate": 7.348623853211009e-05, "loss": 1.2243, "step": 3799 }, { "epoch": 2.35, "learning_rate": 7.341743119266055e-05, "loss": 1.2325, "step": 3800 }, { "epoch": 2.35, "learning_rate": 7.3348623853211e-05, "loss": 1.1625, "step": 3801 }, { "epoch": 2.35, "learning_rate": 7.327981651376146e-05, "loss": 1.1826, "step": 3802 }, { "epoch": 2.35, "learning_rate": 7.321100917431192e-05, "loss": 1.2711, "step": 3803 }, { "epoch": 2.35, "learning_rate": 7.314220183486238e-05, "loss": 1.1629, "step": 3804 }, { "epoch": 2.35, "learning_rate": 7.307339449541284e-05, "loss": 1.1474, "step": 3805 }, { "epoch": 2.35, "learning_rate": 7.30045871559633e-05, "loss": 1.1057, "step": 3806 }, { "epoch": 2.35, "learning_rate": 7.293577981651376e-05, "loss": 1.1821, "step": 3807 }, { "epoch": 2.35, "learning_rate": 7.286697247706421e-05, "loss": 1.1355, "step": 3808 }, { "epoch": 2.35, "learning_rate": 7.279816513761467e-05, "loss": 1.0072, "step": 3809 }, { "epoch": 2.35, "learning_rate": 7.272935779816513e-05, "loss": 1.0652, "step": 3810 }, { "epoch": 2.35, "learning_rate": 7.26605504587156e-05, "loss": 1.0744, "step": 3811 }, { "epoch": 2.35, "learning_rate": 7.259174311926604e-05, "loss": 1.0275, "step": 3812 }, { "epoch": 2.35, "learning_rate": 7.25229357798165e-05, "loss": 1.1359, "step": 3813 }, { "epoch": 2.35, "learning_rate": 7.245412844036697e-05, "loss": 1.0698, "step": 3814 }, { "epoch": 2.35, "learning_rate": 7.238532110091743e-05, "loss": 0.9387, "step": 3815 }, { "epoch": 2.36, "learning_rate": 7.231651376146789e-05, "loss": 1.0375, "step": 3816 }, { "epoch": 2.36, "learning_rate": 7.224770642201834e-05, "loss": 0.9125, "step": 3817 }, { "epoch": 2.36, "learning_rate": 7.21788990825688e-05, "loss": 0.9368, "step": 3818 }, { "epoch": 2.36, "learning_rate": 7.211009174311926e-05, "loss": 0.9507, "step": 3819 }, { "epoch": 2.36, "learning_rate": 7.204128440366972e-05, "loss": 0.8843, "step": 3820 }, { "epoch": 2.36, "learning_rate": 7.197247706422018e-05, "loss": 0.9185, "step": 3821 }, { "epoch": 2.36, "learning_rate": 7.190366972477064e-05, "loss": 0.8446, "step": 3822 }, { "epoch": 2.36, "learning_rate": 7.18348623853211e-05, "loss": 0.9292, "step": 3823 }, { "epoch": 2.36, "learning_rate": 7.176605504587155e-05, "loss": 0.7549, "step": 3824 }, { "epoch": 2.36, "learning_rate": 7.169724770642201e-05, "loss": 0.7597, "step": 3825 }, { "epoch": 2.36, "learning_rate": 7.162844036697247e-05, "loss": 0.8596, "step": 3826 }, { "epoch": 2.36, "learning_rate": 7.155963302752293e-05, "loss": 0.7265, "step": 3827 }, { "epoch": 2.36, "learning_rate": 7.149082568807338e-05, "loss": 0.6695, "step": 3828 }, { "epoch": 2.36, "learning_rate": 7.142201834862384e-05, "loss": 0.7114, "step": 3829 }, { "epoch": 2.36, "learning_rate": 7.13532110091743e-05, "loss": 0.7298, "step": 3830 }, { "epoch": 2.36, "learning_rate": 7.128440366972477e-05, "loss": 0.6657, "step": 3831 }, { "epoch": 2.37, "learning_rate": 7.121559633027521e-05, "loss": 0.6855, "step": 3832 }, { "epoch": 2.37, "learning_rate": 7.114678899082568e-05, "loss": 0.6794, "step": 3833 }, { "epoch": 2.37, "learning_rate": 7.107798165137614e-05, "loss": 0.6444, "step": 3834 }, { "epoch": 2.37, "learning_rate": 7.10091743119266e-05, "loss": 0.6803, "step": 3835 }, { "epoch": 2.37, "learning_rate": 7.094036697247706e-05, "loss": 0.6631, "step": 3836 }, { "epoch": 2.37, "learning_rate": 7.087155963302752e-05, "loss": 0.6324, "step": 3837 }, { "epoch": 2.37, "learning_rate": 7.080275229357798e-05, "loss": 0.6049, "step": 3838 }, { "epoch": 2.37, "learning_rate": 7.073394495412844e-05, "loss": 0.5095, "step": 3839 }, { "epoch": 2.37, "learning_rate": 7.066513761467889e-05, "loss": 0.6426, "step": 3840 }, { "epoch": 2.37, "learning_rate": 7.059633027522935e-05, "loss": 1.5919, "step": 3841 }, { "epoch": 2.37, "learning_rate": 7.052752293577981e-05, "loss": 1.5759, "step": 3842 }, { "epoch": 2.37, "learning_rate": 7.045871559633027e-05, "loss": 1.5045, "step": 3843 }, { "epoch": 2.37, "learning_rate": 7.038990825688072e-05, "loss": 1.4559, "step": 3844 }, { "epoch": 2.37, "learning_rate": 7.032110091743118e-05, "loss": 1.2657, "step": 3845 }, { "epoch": 2.37, "learning_rate": 7.025229357798164e-05, "loss": 1.318, "step": 3846 }, { "epoch": 2.37, "learning_rate": 7.01834862385321e-05, "loss": 1.2932, "step": 3847 }, { "epoch": 2.38, "learning_rate": 7.011467889908255e-05, "loss": 1.2322, "step": 3848 }, { "epoch": 2.38, "learning_rate": 7.004587155963302e-05, "loss": 1.3297, "step": 3849 }, { "epoch": 2.38, "learning_rate": 6.997706422018348e-05, "loss": 1.1616, "step": 3850 }, { "epoch": 2.38, "learning_rate": 6.990825688073394e-05, "loss": 1.1907, "step": 3851 }, { "epoch": 2.38, "learning_rate": 6.98394495412844e-05, "loss": 1.2661, "step": 3852 }, { "epoch": 2.38, "learning_rate": 6.977064220183486e-05, "loss": 1.1496, "step": 3853 }, { "epoch": 2.38, "learning_rate": 6.970183486238532e-05, "loss": 1.1133, "step": 3854 }, { "epoch": 2.38, "learning_rate": 6.963302752293578e-05, "loss": 1.1594, "step": 3855 }, { "epoch": 2.38, "learning_rate": 6.956422018348623e-05, "loss": 1.14, "step": 3856 }, { "epoch": 2.38, "learning_rate": 6.949541284403669e-05, "loss": 1.1135, "step": 3857 }, { "epoch": 2.38, "learning_rate": 6.942660550458715e-05, "loss": 1.0633, "step": 3858 }, { "epoch": 2.38, "learning_rate": 6.935779816513761e-05, "loss": 1.2905, "step": 3859 }, { "epoch": 2.38, "learning_rate": 6.928899082568806e-05, "loss": 1.0853, "step": 3860 }, { "epoch": 2.38, "learning_rate": 6.922018348623852e-05, "loss": 0.9948, "step": 3861 }, { "epoch": 2.38, "learning_rate": 6.915137614678898e-05, "loss": 0.927, "step": 3862 }, { "epoch": 2.38, "learning_rate": 6.908256880733945e-05, "loss": 0.9915, "step": 3863 }, { "epoch": 2.39, "learning_rate": 6.90137614678899e-05, "loss": 0.927, "step": 3864 }, { "epoch": 2.39, "learning_rate": 6.894495412844035e-05, "loss": 1.1382, "step": 3865 }, { "epoch": 2.39, "learning_rate": 6.887614678899082e-05, "loss": 1.1153, "step": 3866 }, { "epoch": 2.39, "learning_rate": 6.880733944954128e-05, "loss": 1.0171, "step": 3867 }, { "epoch": 2.39, "learning_rate": 6.873853211009174e-05, "loss": 0.9905, "step": 3868 }, { "epoch": 2.39, "learning_rate": 6.86697247706422e-05, "loss": 0.9552, "step": 3869 }, { "epoch": 2.39, "learning_rate": 6.860091743119266e-05, "loss": 0.9987, "step": 3870 }, { "epoch": 2.39, "learning_rate": 6.853211009174312e-05, "loss": 0.8147, "step": 3871 }, { "epoch": 2.39, "learning_rate": 6.846330275229357e-05, "loss": 0.8816, "step": 3872 }, { "epoch": 2.39, "learning_rate": 6.839449541284403e-05, "loss": 0.9317, "step": 3873 }, { "epoch": 2.39, "learning_rate": 6.832568807339449e-05, "loss": 0.9196, "step": 3874 }, { "epoch": 2.39, "learning_rate": 6.825688073394495e-05, "loss": 0.7313, "step": 3875 }, { "epoch": 2.39, "learning_rate": 6.81880733944954e-05, "loss": 0.8846, "step": 3876 }, { "epoch": 2.39, "learning_rate": 6.811926605504586e-05, "loss": 0.8747, "step": 3877 }, { "epoch": 2.39, "learning_rate": 6.805045871559632e-05, "loss": 0.7789, "step": 3878 }, { "epoch": 2.39, "learning_rate": 6.798165137614679e-05, "loss": 0.7371, "step": 3879 }, { "epoch": 2.4, "learning_rate": 6.791284403669723e-05, "loss": 0.8152, "step": 3880 }, { "epoch": 2.4, "learning_rate": 6.78440366972477e-05, "loss": 0.7001, "step": 3881 }, { "epoch": 2.4, "learning_rate": 6.777522935779816e-05, "loss": 0.7366, "step": 3882 }, { "epoch": 2.4, "learning_rate": 6.770642201834862e-05, "loss": 0.7287, "step": 3883 }, { "epoch": 2.4, "learning_rate": 6.763761467889908e-05, "loss": 0.6601, "step": 3884 }, { "epoch": 2.4, "learning_rate": 6.756880733944954e-05, "loss": 0.7124, "step": 3885 }, { "epoch": 2.4, "learning_rate": 6.75e-05, "loss": 0.659, "step": 3886 }, { "epoch": 2.4, "learning_rate": 6.743119266055046e-05, "loss": 0.567, "step": 3887 }, { "epoch": 2.4, "learning_rate": 6.736238532110091e-05, "loss": 0.5553, "step": 3888 }, { "epoch": 2.4, "learning_rate": 6.729357798165137e-05, "loss": 0.522, "step": 3889 }, { "epoch": 2.4, "learning_rate": 6.722477064220183e-05, "loss": 0.5788, "step": 3890 }, { "epoch": 2.4, "learning_rate": 6.71559633027523e-05, "loss": 1.8061, "step": 3891 }, { "epoch": 2.4, "learning_rate": 6.708715596330274e-05, "loss": 1.462, "step": 3892 }, { "epoch": 2.4, "learning_rate": 6.70183486238532e-05, "loss": 1.3906, "step": 3893 }, { "epoch": 2.4, "learning_rate": 6.694954128440366e-05, "loss": 1.3512, "step": 3894 }, { "epoch": 2.4, "learning_rate": 6.688073394495413e-05, "loss": 1.3055, "step": 3895 }, { "epoch": 2.4, "learning_rate": 6.681192660550457e-05, "loss": 1.1685, "step": 3896 }, { "epoch": 2.41, "learning_rate": 6.674311926605503e-05, "loss": 1.2855, "step": 3897 }, { "epoch": 2.41, "learning_rate": 6.66743119266055e-05, "loss": 1.3616, "step": 3898 }, { "epoch": 2.41, "learning_rate": 6.660550458715596e-05, "loss": 1.3278, "step": 3899 }, { "epoch": 2.41, "learning_rate": 6.653669724770642e-05, "loss": 1.2772, "step": 3900 }, { "epoch": 2.41, "learning_rate": 6.646788990825688e-05, "loss": 1.1643, "step": 3901 }, { "epoch": 2.41, "learning_rate": 6.639908256880734e-05, "loss": 1.1848, "step": 3902 }, { "epoch": 2.41, "learning_rate": 6.63302752293578e-05, "loss": 1.179, "step": 3903 }, { "epoch": 2.41, "learning_rate": 6.626146788990825e-05, "loss": 1.0882, "step": 3904 }, { "epoch": 2.41, "learning_rate": 6.619266055045871e-05, "loss": 1.197, "step": 3905 }, { "epoch": 2.41, "learning_rate": 6.612385321100917e-05, "loss": 1.0708, "step": 3906 }, { "epoch": 2.41, "learning_rate": 6.605504587155963e-05, "loss": 1.1216, "step": 3907 }, { "epoch": 2.41, "learning_rate": 6.598623853211008e-05, "loss": 1.1411, "step": 3908 }, { "epoch": 2.41, "learning_rate": 6.591743119266054e-05, "loss": 1.1111, "step": 3909 }, { "epoch": 2.41, "learning_rate": 6.5848623853211e-05, "loss": 1.1304, "step": 3910 }, { "epoch": 2.41, "learning_rate": 6.577981651376146e-05, "loss": 1.0557, "step": 3911 }, { "epoch": 2.41, "learning_rate": 6.571100917431191e-05, "loss": 1.0124, "step": 3912 }, { "epoch": 2.42, "learning_rate": 6.564220183486237e-05, "loss": 1.0451, "step": 3913 }, { "epoch": 2.42, "learning_rate": 6.557339449541284e-05, "loss": 0.9642, "step": 3914 }, { "epoch": 2.42, "learning_rate": 6.55045871559633e-05, "loss": 1.0452, "step": 3915 }, { "epoch": 2.42, "learning_rate": 6.543577981651376e-05, "loss": 0.8989, "step": 3916 }, { "epoch": 2.42, "learning_rate": 6.536697247706422e-05, "loss": 1.001, "step": 3917 }, { "epoch": 2.42, "learning_rate": 6.529816513761468e-05, "loss": 1.017, "step": 3918 }, { "epoch": 2.42, "learning_rate": 6.522935779816514e-05, "loss": 0.86, "step": 3919 }, { "epoch": 2.42, "learning_rate": 6.516055045871559e-05, "loss": 0.9105, "step": 3920 }, { "epoch": 2.42, "learning_rate": 6.509174311926605e-05, "loss": 0.9058, "step": 3921 }, { "epoch": 2.42, "learning_rate": 6.502293577981651e-05, "loss": 0.7566, "step": 3922 }, { "epoch": 2.42, "learning_rate": 6.495412844036697e-05, "loss": 0.8233, "step": 3923 }, { "epoch": 2.42, "learning_rate": 6.488532110091742e-05, "loss": 0.8129, "step": 3924 }, { "epoch": 2.42, "learning_rate": 6.481651376146788e-05, "loss": 0.6511, "step": 3925 }, { "epoch": 2.42, "learning_rate": 6.474770642201834e-05, "loss": 0.7709, "step": 3926 }, { "epoch": 2.42, "learning_rate": 6.46788990825688e-05, "loss": 0.7738, "step": 3927 }, { "epoch": 2.42, "learning_rate": 6.461009174311925e-05, "loss": 0.7771, "step": 3928 }, { "epoch": 2.43, "learning_rate": 6.454128440366971e-05, "loss": 0.7731, "step": 3929 }, { "epoch": 2.43, "learning_rate": 6.447247706422017e-05, "loss": 0.6962, "step": 3930 }, { "epoch": 2.43, "learning_rate": 6.440366972477064e-05, "loss": 0.6278, "step": 3931 }, { "epoch": 2.43, "learning_rate": 6.43348623853211e-05, "loss": 0.619, "step": 3932 }, { "epoch": 2.43, "learning_rate": 6.426605504587156e-05, "loss": 0.6599, "step": 3933 }, { "epoch": 2.43, "learning_rate": 6.419724770642202e-05, "loss": 0.6381, "step": 3934 }, { "epoch": 2.43, "learning_rate": 6.412844036697248e-05, "loss": 0.6831, "step": 3935 }, { "epoch": 2.43, "learning_rate": 6.405963302752293e-05, "loss": 0.6431, "step": 3936 }, { "epoch": 2.43, "learning_rate": 6.399082568807339e-05, "loss": 0.4903, "step": 3937 }, { "epoch": 2.43, "learning_rate": 6.392201834862385e-05, "loss": 0.5067, "step": 3938 }, { "epoch": 2.43, "learning_rate": 6.385321100917431e-05, "loss": 0.5121, "step": 3939 }, { "epoch": 2.43, "learning_rate": 6.378440366972476e-05, "loss": 0.5709, "step": 3940 }, { "epoch": 2.43, "learning_rate": 6.371559633027522e-05, "loss": 1.9488, "step": 3941 }, { "epoch": 2.43, "learning_rate": 6.364678899082568e-05, "loss": 1.7386, "step": 3942 }, { "epoch": 2.43, "learning_rate": 6.357798165137614e-05, "loss": 1.5199, "step": 3943 }, { "epoch": 2.43, "learning_rate": 6.350917431192659e-05, "loss": 1.442, "step": 3944 }, { "epoch": 2.44, "learning_rate": 6.344036697247705e-05, "loss": 1.3376, "step": 3945 }, { "epoch": 2.44, "learning_rate": 6.337155963302751e-05, "loss": 1.376, "step": 3946 }, { "epoch": 2.44, "learning_rate": 6.330275229357798e-05, "loss": 1.2876, "step": 3947 }, { "epoch": 2.44, "learning_rate": 6.323394495412844e-05, "loss": 1.1852, "step": 3948 }, { "epoch": 2.44, "learning_rate": 6.31651376146789e-05, "loss": 1.4057, "step": 3949 }, { "epoch": 2.44, "learning_rate": 6.309633027522936e-05, "loss": 1.2245, "step": 3950 }, { "epoch": 2.44, "learning_rate": 6.302752293577982e-05, "loss": 1.0874, "step": 3951 }, { "epoch": 2.44, "learning_rate": 6.295871559633027e-05, "loss": 1.185, "step": 3952 }, { "epoch": 2.44, "learning_rate": 6.288990825688073e-05, "loss": 1.1653, "step": 3953 }, { "epoch": 2.44, "learning_rate": 6.282110091743119e-05, "loss": 1.1425, "step": 3954 }, { "epoch": 2.44, "learning_rate": 6.275229357798165e-05, "loss": 1.1903, "step": 3955 }, { "epoch": 2.44, "learning_rate": 6.26834862385321e-05, "loss": 1.094, "step": 3956 }, { "epoch": 2.44, "learning_rate": 6.261467889908256e-05, "loss": 1.1175, "step": 3957 }, { "epoch": 2.44, "learning_rate": 6.254587155963302e-05, "loss": 0.9874, "step": 3958 }, { "epoch": 2.44, "learning_rate": 6.247706422018348e-05, "loss": 1.056, "step": 3959 }, { "epoch": 2.44, "learning_rate": 6.240825688073393e-05, "loss": 1.0672, "step": 3960 }, { "epoch": 2.45, "learning_rate": 6.233944954128439e-05, "loss": 1.1297, "step": 3961 }, { "epoch": 2.45, "learning_rate": 6.227064220183485e-05, "loss": 1.0381, "step": 3962 }, { "epoch": 2.45, "learning_rate": 6.220183486238532e-05, "loss": 0.9999, "step": 3963 }, { "epoch": 2.45, "learning_rate": 6.213302752293578e-05, "loss": 0.9969, "step": 3964 }, { "epoch": 2.45, "learning_rate": 6.206422018348624e-05, "loss": 1.1175, "step": 3965 }, { "epoch": 2.45, "learning_rate": 6.19954128440367e-05, "loss": 1.0107, "step": 3966 }, { "epoch": 2.45, "learning_rate": 6.192660550458716e-05, "loss": 1.0708, "step": 3967 }, { "epoch": 2.45, "learning_rate": 6.185779816513761e-05, "loss": 1.0417, "step": 3968 }, { "epoch": 2.45, "learning_rate": 6.178899082568807e-05, "loss": 0.9759, "step": 3969 }, { "epoch": 2.45, "learning_rate": 6.172018348623853e-05, "loss": 0.9711, "step": 3970 }, { "epoch": 2.45, "learning_rate": 6.165137614678899e-05, "loss": 0.9798, "step": 3971 }, { "epoch": 2.45, "learning_rate": 6.158256880733944e-05, "loss": 0.9775, "step": 3972 }, { "epoch": 2.45, "learning_rate": 6.15137614678899e-05, "loss": 0.8265, "step": 3973 }, { "epoch": 2.45, "learning_rate": 6.144495412844036e-05, "loss": 0.7628, "step": 3974 }, { "epoch": 2.45, "learning_rate": 6.137614678899082e-05, "loss": 0.8038, "step": 3975 }, { "epoch": 2.45, "learning_rate": 6.130733944954127e-05, "loss": 0.7388, "step": 3976 }, { "epoch": 2.45, "learning_rate": 6.123853211009173e-05, "loss": 0.8612, "step": 3977 }, { "epoch": 2.46, "learning_rate": 6.11697247706422e-05, "loss": 0.7079, "step": 3978 }, { "epoch": 2.46, "learning_rate": 6.110091743119266e-05, "loss": 0.7777, "step": 3979 }, { "epoch": 2.46, "learning_rate": 6.103211009174311e-05, "loss": 0.7212, "step": 3980 }, { "epoch": 2.46, "learning_rate": 6.096330275229357e-05, "loss": 0.6824, "step": 3981 }, { "epoch": 2.46, "learning_rate": 6.089449541284403e-05, "loss": 0.6714, "step": 3982 }, { "epoch": 2.46, "learning_rate": 6.082568807339449e-05, "loss": 0.6988, "step": 3983 }, { "epoch": 2.46, "learning_rate": 6.075688073394495e-05, "loss": 0.611, "step": 3984 }, { "epoch": 2.46, "learning_rate": 6.068807339449541e-05, "loss": 0.5702, "step": 3985 }, { "epoch": 2.46, "learning_rate": 6.061926605504587e-05, "loss": 0.5123, "step": 3986 }, { "epoch": 2.46, "learning_rate": 6.055045871559633e-05, "loss": 0.584, "step": 3987 }, { "epoch": 2.46, "learning_rate": 6.048165137614678e-05, "loss": 0.5251, "step": 3988 }, { "epoch": 2.46, "learning_rate": 6.041284403669724e-05, "loss": 0.5232, "step": 3989 }, { "epoch": 2.46, "learning_rate": 6.03440366972477e-05, "loss": 0.6507, "step": 3990 }, { "epoch": 2.46, "learning_rate": 6.027522935779816e-05, "loss": 1.8332, "step": 3991 }, { "epoch": 2.46, "learning_rate": 6.020642201834862e-05, "loss": 1.6924, "step": 3992 }, { "epoch": 2.46, "learning_rate": 6.013761467889908e-05, "loss": 1.4632, "step": 3993 }, { "epoch": 2.47, "learning_rate": 6.006880733944954e-05, "loss": 1.3165, "step": 3994 }, { "epoch": 2.47, "learning_rate": 5.9999999999999995e-05, "loss": 1.4082, "step": 3995 }, { "epoch": 2.47, "learning_rate": 5.993119266055045e-05, "loss": 1.3622, "step": 3996 }, { "epoch": 2.47, "learning_rate": 5.986238532110091e-05, "loss": 1.2599, "step": 3997 }, { "epoch": 2.47, "learning_rate": 5.979357798165137e-05, "loss": 1.2599, "step": 3998 }, { "epoch": 2.47, "learning_rate": 5.972477064220183e-05, "loss": 1.3481, "step": 3999 }, { "epoch": 2.47, "learning_rate": 5.965596330275229e-05, "loss": 1.2244, "step": 4000 }, { "epoch": 2.47, "eval_bleu": 3.550386321342364e-14, "eval_loss": 1.794046401977539, "eval_runtime": 2535.6975, "eval_samples_per_second": 5.821, "eval_steps_per_second": 0.728, "step": 4000 }, { "epoch": 2.47, "learning_rate": 5.958715596330275e-05, "loss": 1.2282, "step": 4001 }, { "epoch": 2.47, "learning_rate": 5.951834862385321e-05, "loss": 1.2254, "step": 4002 }, { "epoch": 2.47, "learning_rate": 5.9449541284403665e-05, "loss": 1.2155, "step": 4003 }, { "epoch": 2.47, "learning_rate": 5.938073394495412e-05, "loss": 1.2088, "step": 4004 }, { "epoch": 2.47, "learning_rate": 5.931192660550458e-05, "loss": 1.1898, "step": 4005 }, { "epoch": 2.47, "learning_rate": 5.924311926605504e-05, "loss": 1.1753, "step": 4006 }, { "epoch": 2.47, "learning_rate": 5.91743119266055e-05, "loss": 1.1453, "step": 4007 }, { "epoch": 2.47, "learning_rate": 5.910550458715596e-05, "loss": 1.1096, "step": 4008 }, { "epoch": 2.47, "learning_rate": 5.903669724770642e-05, "loss": 1.0013, "step": 4009 }, { "epoch": 2.48, "learning_rate": 5.896788990825688e-05, "loss": 1.1856, "step": 4010 }, { "epoch": 2.48, "learning_rate": 5.8899082568807334e-05, "loss": 1.0922, "step": 4011 }, { "epoch": 2.48, "learning_rate": 5.883027522935779e-05, "loss": 0.9892, "step": 4012 }, { "epoch": 2.48, "learning_rate": 5.876146788990825e-05, "loss": 1.0373, "step": 4013 }, { "epoch": 2.48, "learning_rate": 5.869266055045871e-05, "loss": 1.0258, "step": 4014 }, { "epoch": 2.48, "learning_rate": 5.862385321100917e-05, "loss": 1.0715, "step": 4015 }, { "epoch": 2.48, "learning_rate": 5.855504587155963e-05, "loss": 0.9778, "step": 4016 }, { "epoch": 2.48, "learning_rate": 5.848623853211009e-05, "loss": 0.8886, "step": 4017 }, { "epoch": 2.48, "learning_rate": 5.841743119266055e-05, "loss": 1.0301, "step": 4018 }, { "epoch": 2.48, "learning_rate": 5.8348623853211004e-05, "loss": 0.9745, "step": 4019 }, { "epoch": 2.48, "learning_rate": 5.827981651376146e-05, "loss": 0.8738, "step": 4020 }, { "epoch": 2.48, "learning_rate": 5.821100917431192e-05, "loss": 0.8567, "step": 4021 }, { "epoch": 2.48, "learning_rate": 5.814220183486238e-05, "loss": 0.8669, "step": 4022 }, { "epoch": 2.48, "learning_rate": 5.807339449541284e-05, "loss": 0.8761, "step": 4023 }, { "epoch": 2.48, "learning_rate": 5.80045871559633e-05, "loss": 0.817, "step": 4024 }, { "epoch": 2.48, "learning_rate": 5.793577981651376e-05, "loss": 0.7875, "step": 4025 }, { "epoch": 2.49, "learning_rate": 5.786697247706422e-05, "loss": 0.8503, "step": 4026 }, { "epoch": 2.49, "learning_rate": 5.7798165137614674e-05, "loss": 0.8447, "step": 4027 }, { "epoch": 2.49, "learning_rate": 5.772935779816513e-05, "loss": 0.7419, "step": 4028 }, { "epoch": 2.49, "learning_rate": 5.766055045871559e-05, "loss": 0.6784, "step": 4029 }, { "epoch": 2.49, "learning_rate": 5.759174311926605e-05, "loss": 0.6791, "step": 4030 }, { "epoch": 2.49, "learning_rate": 5.752293577981651e-05, "loss": 0.6456, "step": 4031 }, { "epoch": 2.49, "learning_rate": 5.745412844036697e-05, "loss": 0.7289, "step": 4032 }, { "epoch": 2.49, "learning_rate": 5.738532110091743e-05, "loss": 0.7594, "step": 4033 }, { "epoch": 2.49, "learning_rate": 5.731651376146788e-05, "loss": 0.5625, "step": 4034 }, { "epoch": 2.49, "learning_rate": 5.7247706422018344e-05, "loss": 0.6974, "step": 4035 }, { "epoch": 2.49, "learning_rate": 5.71788990825688e-05, "loss": 0.5923, "step": 4036 }, { "epoch": 2.49, "learning_rate": 5.711009174311926e-05, "loss": 0.6235, "step": 4037 }, { "epoch": 2.49, "learning_rate": 5.704128440366972e-05, "loss": 0.5517, "step": 4038 }, { "epoch": 2.49, "learning_rate": 5.697247706422018e-05, "loss": 0.5743, "step": 4039 }, { "epoch": 2.49, "learning_rate": 5.690366972477064e-05, "loss": 0.5629, "step": 4040 }, { "epoch": 2.49, "learning_rate": 5.68348623853211e-05, "loss": 1.781, "step": 4041 }, { "epoch": 2.5, "learning_rate": 5.676605504587155e-05, "loss": 1.5586, "step": 4042 }, { "epoch": 2.5, "learning_rate": 5.6697247706422014e-05, "loss": 1.1529, "step": 4043 }, { "epoch": 2.5, "learning_rate": 5.662844036697247e-05, "loss": 1.323, "step": 4044 }, { "epoch": 2.5, "learning_rate": 5.655963302752293e-05, "loss": 1.2483, "step": 4045 }, { "epoch": 2.5, "learning_rate": 5.649082568807339e-05, "loss": 1.3055, "step": 4046 }, { "epoch": 2.5, "learning_rate": 5.642201834862385e-05, "loss": 1.2784, "step": 4047 }, { "epoch": 2.5, "learning_rate": 5.6353211009174307e-05, "loss": 1.189, "step": 4048 }, { "epoch": 2.5, "learning_rate": 5.628440366972477e-05, "loss": 1.231, "step": 4049 }, { "epoch": 2.5, "learning_rate": 5.621559633027522e-05, "loss": 1.2197, "step": 4050 }, { "epoch": 2.5, "learning_rate": 5.6146788990825684e-05, "loss": 1.1427, "step": 4051 }, { "epoch": 2.5, "learning_rate": 5.607798165137614e-05, "loss": 1.0593, "step": 4052 }, { "epoch": 2.5, "learning_rate": 5.60091743119266e-05, "loss": 1.0903, "step": 4053 }, { "epoch": 2.5, "learning_rate": 5.594036697247706e-05, "loss": 1.0469, "step": 4054 }, { "epoch": 2.5, "learning_rate": 5.587155963302752e-05, "loss": 1.1623, "step": 4055 }, { "epoch": 2.5, "learning_rate": 5.5802752293577976e-05, "loss": 1.0757, "step": 4056 }, { "epoch": 2.5, "learning_rate": 5.573394495412844e-05, "loss": 1.1588, "step": 4057 }, { "epoch": 2.5, "learning_rate": 5.566513761467889e-05, "loss": 1.127, "step": 4058 }, { "epoch": 2.51, "learning_rate": 5.5596330275229353e-05, "loss": 0.9677, "step": 4059 }, { "epoch": 2.51, "learning_rate": 5.552752293577981e-05, "loss": 1.0443, "step": 4060 }, { "epoch": 2.51, "learning_rate": 5.545871559633027e-05, "loss": 1.0866, "step": 4061 }, { "epoch": 2.51, "learning_rate": 5.538990825688073e-05, "loss": 1.1641, "step": 4062 }, { "epoch": 2.51, "learning_rate": 5.532110091743119e-05, "loss": 1.075, "step": 4063 }, { "epoch": 2.51, "learning_rate": 5.5252293577981646e-05, "loss": 1.0053, "step": 4064 }, { "epoch": 2.51, "learning_rate": 5.518348623853211e-05, "loss": 0.8862, "step": 4065 }, { "epoch": 2.51, "learning_rate": 5.511467889908256e-05, "loss": 0.9135, "step": 4066 }, { "epoch": 2.51, "learning_rate": 5.504587155963302e-05, "loss": 0.922, "step": 4067 }, { "epoch": 2.51, "learning_rate": 5.497706422018348e-05, "loss": 0.8063, "step": 4068 }, { "epoch": 2.51, "learning_rate": 5.490825688073394e-05, "loss": 0.9106, "step": 4069 }, { "epoch": 2.51, "learning_rate": 5.48394495412844e-05, "loss": 0.829, "step": 4070 }, { "epoch": 2.51, "learning_rate": 5.477064220183486e-05, "loss": 1.0013, "step": 4071 }, { "epoch": 2.51, "learning_rate": 5.4701834862385316e-05, "loss": 0.8004, "step": 4072 }, { "epoch": 2.51, "learning_rate": 5.463302752293577e-05, "loss": 0.8024, "step": 4073 }, { "epoch": 2.51, "learning_rate": 5.456422018348623e-05, "loss": 0.7838, "step": 4074 }, { "epoch": 2.52, "learning_rate": 5.449541284403669e-05, "loss": 0.8898, "step": 4075 }, { "epoch": 2.52, "learning_rate": 5.442660550458715e-05, "loss": 0.7599, "step": 4076 }, { "epoch": 2.52, "learning_rate": 5.435779816513761e-05, "loss": 0.7727, "step": 4077 }, { "epoch": 2.52, "learning_rate": 5.428899082568807e-05, "loss": 0.7514, "step": 4078 }, { "epoch": 2.52, "learning_rate": 5.422018348623853e-05, "loss": 0.7568, "step": 4079 }, { "epoch": 2.52, "learning_rate": 5.4151376146788986e-05, "loss": 0.6024, "step": 4080 }, { "epoch": 2.52, "learning_rate": 5.408256880733944e-05, "loss": 0.5901, "step": 4081 }, { "epoch": 2.52, "learning_rate": 5.40137614678899e-05, "loss": 0.6672, "step": 4082 }, { "epoch": 2.52, "learning_rate": 5.394495412844036e-05, "loss": 0.7881, "step": 4083 }, { "epoch": 2.52, "learning_rate": 5.387614678899082e-05, "loss": 0.5734, "step": 4084 }, { "epoch": 2.52, "learning_rate": 5.380733944954128e-05, "loss": 0.6074, "step": 4085 }, { "epoch": 2.52, "learning_rate": 5.373853211009174e-05, "loss": 0.5452, "step": 4086 }, { "epoch": 2.52, "learning_rate": 5.36697247706422e-05, "loss": 0.5534, "step": 4087 }, { "epoch": 2.52, "learning_rate": 5.3600917431192656e-05, "loss": 0.571, "step": 4088 }, { "epoch": 2.52, "learning_rate": 5.353211009174311e-05, "loss": 0.4237, "step": 4089 }, { "epoch": 2.52, "learning_rate": 5.346330275229357e-05, "loss": 0.5453, "step": 4090 }, { "epoch": 2.53, "learning_rate": 5.339449541284403e-05, "loss": 1.7752, "step": 4091 }, { "epoch": 2.53, "learning_rate": 5.332568807339449e-05, "loss": 1.5751, "step": 4092 }, { "epoch": 2.53, "learning_rate": 5.325688073394495e-05, "loss": 1.3436, "step": 4093 }, { "epoch": 2.53, "learning_rate": 5.318807339449541e-05, "loss": 1.3601, "step": 4094 }, { "epoch": 2.53, "learning_rate": 5.311926605504587e-05, "loss": 1.4734, "step": 4095 }, { "epoch": 2.53, "learning_rate": 5.3050458715596326e-05, "loss": 1.3058, "step": 4096 }, { "epoch": 2.53, "learning_rate": 5.298165137614678e-05, "loss": 1.2182, "step": 4097 }, { "epoch": 2.53, "learning_rate": 5.291284403669724e-05, "loss": 1.2523, "step": 4098 }, { "epoch": 2.53, "learning_rate": 5.28440366972477e-05, "loss": 1.3629, "step": 4099 }, { "epoch": 2.53, "learning_rate": 5.277522935779816e-05, "loss": 1.3259, "step": 4100 }, { "epoch": 2.53, "learning_rate": 5.270642201834862e-05, "loss": 1.3156, "step": 4101 }, { "epoch": 2.53, "learning_rate": 5.263761467889908e-05, "loss": 1.1659, "step": 4102 }, { "epoch": 2.53, "learning_rate": 5.256880733944954e-05, "loss": 1.17, "step": 4103 }, { "epoch": 2.53, "learning_rate": 5.2499999999999995e-05, "loss": 0.9798, "step": 4104 }, { "epoch": 2.53, "learning_rate": 5.243119266055045e-05, "loss": 1.2055, "step": 4105 }, { "epoch": 2.53, "learning_rate": 5.236238532110091e-05, "loss": 1.1754, "step": 4106 }, { "epoch": 2.54, "learning_rate": 5.229357798165137e-05, "loss": 1.0331, "step": 4107 }, { "epoch": 2.54, "learning_rate": 5.222477064220183e-05, "loss": 1.0595, "step": 4108 }, { "epoch": 2.54, "learning_rate": 5.215596330275229e-05, "loss": 1.0183, "step": 4109 }, { "epoch": 2.54, "learning_rate": 5.208715596330275e-05, "loss": 1.0931, "step": 4110 }, { "epoch": 2.54, "learning_rate": 5.201834862385321e-05, "loss": 1.0373, "step": 4111 }, { "epoch": 2.54, "learning_rate": 5.194954128440366e-05, "loss": 0.924, "step": 4112 }, { "epoch": 2.54, "learning_rate": 5.188073394495412e-05, "loss": 0.9767, "step": 4113 }, { "epoch": 2.54, "learning_rate": 5.181192660550458e-05, "loss": 0.9825, "step": 4114 }, { "epoch": 2.54, "learning_rate": 5.174311926605504e-05, "loss": 1.0085, "step": 4115 }, { "epoch": 2.54, "learning_rate": 5.16743119266055e-05, "loss": 0.8032, "step": 4116 }, { "epoch": 2.54, "learning_rate": 5.160550458715596e-05, "loss": 0.9641, "step": 4117 }, { "epoch": 2.54, "learning_rate": 5.153669724770642e-05, "loss": 0.918, "step": 4118 }, { "epoch": 2.54, "learning_rate": 5.146788990825688e-05, "loss": 0.8796, "step": 4119 }, { "epoch": 2.54, "learning_rate": 5.139908256880733e-05, "loss": 0.8951, "step": 4120 }, { "epoch": 2.54, "learning_rate": 5.133027522935779e-05, "loss": 0.8549, "step": 4121 }, { "epoch": 2.54, "learning_rate": 5.126146788990825e-05, "loss": 0.8465, "step": 4122 }, { "epoch": 2.55, "learning_rate": 5.119266055045871e-05, "loss": 0.914, "step": 4123 }, { "epoch": 2.55, "learning_rate": 5.1123853211009167e-05, "loss": 0.7472, "step": 4124 }, { "epoch": 2.55, "learning_rate": 5.105504587155963e-05, "loss": 0.7595, "step": 4125 }, { "epoch": 2.55, "learning_rate": 5.098623853211009e-05, "loss": 0.8311, "step": 4126 }, { "epoch": 2.55, "learning_rate": 5.091743119266055e-05, "loss": 0.7757, "step": 4127 }, { "epoch": 2.55, "learning_rate": 5.0848623853211e-05, "loss": 0.8456, "step": 4128 }, { "epoch": 2.55, "learning_rate": 5.077981651376146e-05, "loss": 0.7343, "step": 4129 }, { "epoch": 2.55, "learning_rate": 5.071100917431192e-05, "loss": 0.7085, "step": 4130 }, { "epoch": 2.55, "learning_rate": 5.064220183486238e-05, "loss": 0.7026, "step": 4131 }, { "epoch": 2.55, "learning_rate": 5.0573394495412836e-05, "loss": 0.6747, "step": 4132 }, { "epoch": 2.55, "learning_rate": 5.05045871559633e-05, "loss": 0.6524, "step": 4133 }, { "epoch": 2.55, "learning_rate": 5.043577981651376e-05, "loss": 0.6328, "step": 4134 }, { "epoch": 2.55, "learning_rate": 5.036697247706422e-05, "loss": 0.5689, "step": 4135 }, { "epoch": 2.55, "learning_rate": 5.029816513761467e-05, "loss": 0.5405, "step": 4136 }, { "epoch": 2.55, "learning_rate": 5.022935779816513e-05, "loss": 0.5766, "step": 4137 }, { "epoch": 2.55, "learning_rate": 5.016055045871559e-05, "loss": 0.6067, "step": 4138 }, { "epoch": 2.55, "learning_rate": 5.009174311926605e-05, "loss": 0.4424, "step": 4139 }, { "epoch": 2.56, "learning_rate": 5.0022935779816506e-05, "loss": 0.5854, "step": 4140 }, { "epoch": 2.56, "learning_rate": 4.995412844036697e-05, "loss": 1.7243, "step": 4141 }, { "epoch": 2.56, "learning_rate": 4.988532110091743e-05, "loss": 1.5123, "step": 4142 }, { "epoch": 2.56, "learning_rate": 4.981651376146789e-05, "loss": 1.4056, "step": 4143 }, { "epoch": 2.56, "learning_rate": 4.974770642201834e-05, "loss": 1.3029, "step": 4144 }, { "epoch": 2.56, "learning_rate": 4.96788990825688e-05, "loss": 1.2879, "step": 4145 }, { "epoch": 2.56, "learning_rate": 4.961009174311926e-05, "loss": 1.1951, "step": 4146 }, { "epoch": 2.56, "learning_rate": 4.954128440366972e-05, "loss": 1.2032, "step": 4147 }, { "epoch": 2.56, "learning_rate": 4.9472477064220176e-05, "loss": 1.1617, "step": 4148 }, { "epoch": 2.56, "learning_rate": 4.940366972477064e-05, "loss": 1.2706, "step": 4149 }, { "epoch": 2.56, "learning_rate": 4.93348623853211e-05, "loss": 1.1968, "step": 4150 }, { "epoch": 2.56, "learning_rate": 4.926605504587156e-05, "loss": 1.1593, "step": 4151 }, { "epoch": 2.56, "learning_rate": 4.919724770642201e-05, "loss": 1.1046, "step": 4152 }, { "epoch": 2.56, "learning_rate": 4.912844036697247e-05, "loss": 1.0335, "step": 4153 }, { "epoch": 2.56, "learning_rate": 4.905963302752293e-05, "loss": 0.9551, "step": 4154 }, { "epoch": 2.56, "learning_rate": 4.899082568807339e-05, "loss": 1.0888, "step": 4155 }, { "epoch": 2.57, "learning_rate": 4.8922018348623846e-05, "loss": 1.1191, "step": 4156 }, { "epoch": 2.57, "learning_rate": 4.885321100917431e-05, "loss": 1.0991, "step": 4157 }, { "epoch": 2.57, "learning_rate": 4.878440366972477e-05, "loss": 1.206, "step": 4158 }, { "epoch": 2.57, "learning_rate": 4.871559633027523e-05, "loss": 1.1113, "step": 4159 }, { "epoch": 2.57, "learning_rate": 4.864678899082568e-05, "loss": 1.0501, "step": 4160 }, { "epoch": 2.57, "learning_rate": 4.857798165137614e-05, "loss": 0.9874, "step": 4161 }, { "epoch": 2.57, "learning_rate": 4.85091743119266e-05, "loss": 1.1126, "step": 4162 }, { "epoch": 2.57, "learning_rate": 4.844036697247706e-05, "loss": 1.0347, "step": 4163 }, { "epoch": 2.57, "learning_rate": 4.8371559633027516e-05, "loss": 0.9851, "step": 4164 }, { "epoch": 2.57, "learning_rate": 4.830275229357798e-05, "loss": 0.9094, "step": 4165 }, { "epoch": 2.57, "learning_rate": 4.823394495412844e-05, "loss": 0.9265, "step": 4166 }, { "epoch": 2.57, "learning_rate": 4.81651376146789e-05, "loss": 0.9256, "step": 4167 }, { "epoch": 2.57, "learning_rate": 4.809633027522935e-05, "loss": 1.0236, "step": 4168 }, { "epoch": 2.57, "learning_rate": 4.802752293577981e-05, "loss": 0.8734, "step": 4169 }, { "epoch": 2.57, "learning_rate": 4.795871559633027e-05, "loss": 0.8474, "step": 4170 }, { "epoch": 2.57, "learning_rate": 4.788990825688073e-05, "loss": 0.91, "step": 4171 }, { "epoch": 2.58, "learning_rate": 4.7821100917431186e-05, "loss": 0.869, "step": 4172 }, { "epoch": 2.58, "learning_rate": 4.775229357798165e-05, "loss": 0.8564, "step": 4173 }, { "epoch": 2.58, "learning_rate": 4.768348623853211e-05, "loss": 0.8149, "step": 4174 }, { "epoch": 2.58, "learning_rate": 4.761467889908257e-05, "loss": 0.7767, "step": 4175 }, { "epoch": 2.58, "learning_rate": 4.754587155963302e-05, "loss": 0.8687, "step": 4176 }, { "epoch": 2.58, "learning_rate": 4.747706422018348e-05, "loss": 0.6592, "step": 4177 }, { "epoch": 2.58, "learning_rate": 4.740825688073394e-05, "loss": 0.7391, "step": 4178 }, { "epoch": 2.58, "learning_rate": 4.73394495412844e-05, "loss": 0.7144, "step": 4179 }, { "epoch": 2.58, "learning_rate": 4.7270642201834855e-05, "loss": 0.6974, "step": 4180 }, { "epoch": 2.58, "learning_rate": 4.720183486238532e-05, "loss": 0.7498, "step": 4181 }, { "epoch": 2.58, "learning_rate": 4.713302752293578e-05, "loss": 0.7182, "step": 4182 }, { "epoch": 2.58, "learning_rate": 4.706422018348624e-05, "loss": 0.6398, "step": 4183 }, { "epoch": 2.58, "learning_rate": 4.699541284403669e-05, "loss": 0.618, "step": 4184 }, { "epoch": 2.58, "learning_rate": 4.692660550458715e-05, "loss": 0.4744, "step": 4185 }, { "epoch": 2.58, "learning_rate": 4.685779816513761e-05, "loss": 0.5971, "step": 4186 }, { "epoch": 2.58, "learning_rate": 4.678899082568807e-05, "loss": 0.5964, "step": 4187 }, { "epoch": 2.59, "learning_rate": 4.6720183486238525e-05, "loss": 0.5752, "step": 4188 }, { "epoch": 2.59, "learning_rate": 4.6651376146788987e-05, "loss": 0.5247, "step": 4189 }, { "epoch": 2.59, "learning_rate": 4.658256880733945e-05, "loss": 0.6126, "step": 4190 }, { "epoch": 2.59, "learning_rate": 4.651376146788991e-05, "loss": 1.6758, "step": 4191 }, { "epoch": 2.59, "learning_rate": 4.644495412844036e-05, "loss": 1.4452, "step": 4192 }, { "epoch": 2.59, "learning_rate": 4.637614678899082e-05, "loss": 1.4002, "step": 4193 }, { "epoch": 2.59, "learning_rate": 4.630733944954128e-05, "loss": 1.3457, "step": 4194 }, { "epoch": 2.59, "learning_rate": 4.623853211009174e-05, "loss": 1.3434, "step": 4195 }, { "epoch": 2.59, "learning_rate": 4.6169724770642195e-05, "loss": 1.2629, "step": 4196 }, { "epoch": 2.59, "learning_rate": 4.6100917431192656e-05, "loss": 1.3153, "step": 4197 }, { "epoch": 2.59, "learning_rate": 4.603211009174312e-05, "loss": 1.2419, "step": 4198 }, { "epoch": 2.59, "learning_rate": 4.596330275229358e-05, "loss": 1.2093, "step": 4199 }, { "epoch": 2.59, "learning_rate": 4.589449541284403e-05, "loss": 1.1671, "step": 4200 }, { "epoch": 2.59, "learning_rate": 4.582568807339449e-05, "loss": 1.1328, "step": 4201 }, { "epoch": 2.59, "learning_rate": 4.575688073394495e-05, "loss": 1.0832, "step": 4202 }, { "epoch": 2.59, "learning_rate": 4.568807339449541e-05, "loss": 1.0791, "step": 4203 }, { "epoch": 2.6, "learning_rate": 4.5619266055045865e-05, "loss": 0.9577, "step": 4204 }, { "epoch": 2.6, "learning_rate": 4.5550458715596326e-05, "loss": 1.0808, "step": 4205 }, { "epoch": 2.6, "learning_rate": 4.548165137614679e-05, "loss": 1.1195, "step": 4206 }, { "epoch": 2.6, "learning_rate": 4.541284403669725e-05, "loss": 0.9716, "step": 4207 }, { "epoch": 2.6, "learning_rate": 4.5344036697247696e-05, "loss": 1.0299, "step": 4208 }, { "epoch": 2.6, "learning_rate": 4.527522935779816e-05, "loss": 0.9451, "step": 4209 }, { "epoch": 2.6, "learning_rate": 4.520642201834862e-05, "loss": 0.8694, "step": 4210 }, { "epoch": 2.6, "learning_rate": 4.513761467889908e-05, "loss": 1.0465, "step": 4211 }, { "epoch": 2.6, "learning_rate": 4.5068807339449535e-05, "loss": 1.0414, "step": 4212 }, { "epoch": 2.6, "learning_rate": 4.4999999999999996e-05, "loss": 0.9537, "step": 4213 }, { "epoch": 2.6, "learning_rate": 4.493119266055046e-05, "loss": 0.9595, "step": 4214 }, { "epoch": 2.6, "learning_rate": 4.486238532110092e-05, "loss": 0.8366, "step": 4215 }, { "epoch": 2.6, "learning_rate": 4.4793577981651366e-05, "loss": 0.8978, "step": 4216 }, { "epoch": 2.6, "learning_rate": 4.472477064220183e-05, "loss": 0.9412, "step": 4217 }, { "epoch": 2.6, "learning_rate": 4.465596330275229e-05, "loss": 1.0137, "step": 4218 }, { "epoch": 2.6, "learning_rate": 4.458715596330275e-05, "loss": 0.8054, "step": 4219 }, { "epoch": 2.6, "learning_rate": 4.4518348623853205e-05, "loss": 0.9263, "step": 4220 }, { "epoch": 2.61, "learning_rate": 4.4449541284403666e-05, "loss": 0.7642, "step": 4221 }, { "epoch": 2.61, "learning_rate": 4.438073394495413e-05, "loss": 0.7591, "step": 4222 }, { "epoch": 2.61, "learning_rate": 4.431192660550459e-05, "loss": 0.8161, "step": 4223 }, { "epoch": 2.61, "learning_rate": 4.4243119266055036e-05, "loss": 0.7657, "step": 4224 }, { "epoch": 2.61, "learning_rate": 4.41743119266055e-05, "loss": 0.9252, "step": 4225 }, { "epoch": 2.61, "learning_rate": 4.410550458715596e-05, "loss": 0.725, "step": 4226 }, { "epoch": 2.61, "learning_rate": 4.403669724770642e-05, "loss": 0.6823, "step": 4227 }, { "epoch": 2.61, "learning_rate": 4.3967889908256874e-05, "loss": 0.7195, "step": 4228 }, { "epoch": 2.61, "learning_rate": 4.3899082568807336e-05, "loss": 0.6832, "step": 4229 }, { "epoch": 2.61, "learning_rate": 4.38302752293578e-05, "loss": 0.7753, "step": 4230 }, { "epoch": 2.61, "learning_rate": 4.376146788990826e-05, "loss": 0.6637, "step": 4231 }, { "epoch": 2.61, "learning_rate": 4.3692660550458706e-05, "loss": 0.6432, "step": 4232 }, { "epoch": 2.61, "learning_rate": 4.362385321100917e-05, "loss": 0.6546, "step": 4233 }, { "epoch": 2.61, "learning_rate": 4.355504587155963e-05, "loss": 0.5591, "step": 4234 }, { "epoch": 2.61, "learning_rate": 4.348623853211009e-05, "loss": 0.477, "step": 4235 }, { "epoch": 2.61, "learning_rate": 4.3417431192660544e-05, "loss": 0.5643, "step": 4236 }, { "epoch": 2.62, "learning_rate": 4.3348623853211006e-05, "loss": 0.5023, "step": 4237 }, { "epoch": 2.62, "learning_rate": 4.327981651376147e-05, "loss": 0.4696, "step": 4238 }, { "epoch": 2.62, "learning_rate": 4.321100917431193e-05, "loss": 0.4686, "step": 4239 }, { "epoch": 2.62, "learning_rate": 4.3142201834862376e-05, "loss": 0.5683, "step": 4240 }, { "epoch": 2.62, "learning_rate": 4.307339449541284e-05, "loss": 1.6947, "step": 4241 }, { "epoch": 2.62, "learning_rate": 4.30045871559633e-05, "loss": 1.579, "step": 4242 }, { "epoch": 2.62, "learning_rate": 4.293577981651376e-05, "loss": 1.3698, "step": 4243 }, { "epoch": 2.62, "learning_rate": 4.2866972477064214e-05, "loss": 1.2538, "step": 4244 }, { "epoch": 2.62, "learning_rate": 4.2798165137614675e-05, "loss": 1.2957, "step": 4245 }, { "epoch": 2.62, "learning_rate": 4.272935779816514e-05, "loss": 1.3994, "step": 4246 }, { "epoch": 2.62, "learning_rate": 4.26605504587156e-05, "loss": 1.1448, "step": 4247 }, { "epoch": 2.62, "learning_rate": 4.2591743119266046e-05, "loss": 1.1225, "step": 4248 }, { "epoch": 2.62, "learning_rate": 4.252293577981651e-05, "loss": 1.1653, "step": 4249 }, { "epoch": 2.62, "learning_rate": 4.245412844036697e-05, "loss": 1.2382, "step": 4250 }, { "epoch": 2.62, "learning_rate": 4.238532110091743e-05, "loss": 1.1891, "step": 4251 }, { "epoch": 2.62, "learning_rate": 4.2316513761467884e-05, "loss": 1.1272, "step": 4252 }, { "epoch": 2.63, "learning_rate": 4.2247706422018345e-05, "loss": 1.0963, "step": 4253 }, { "epoch": 2.63, "learning_rate": 4.2178899082568807e-05, "loss": 1.1827, "step": 4254 }, { "epoch": 2.63, "learning_rate": 4.211009174311927e-05, "loss": 1.0944, "step": 4255 }, { "epoch": 2.63, "learning_rate": 4.2041284403669715e-05, "loss": 0.9591, "step": 4256 }, { "epoch": 2.63, "learning_rate": 4.197247706422018e-05, "loss": 1.0556, "step": 4257 }, { "epoch": 2.63, "learning_rate": 4.190366972477064e-05, "loss": 1.154, "step": 4258 }, { "epoch": 2.63, "learning_rate": 4.18348623853211e-05, "loss": 0.9204, "step": 4259 }, { "epoch": 2.63, "learning_rate": 4.1766055045871554e-05, "loss": 1.0697, "step": 4260 }, { "epoch": 2.63, "learning_rate": 4.1697247706422015e-05, "loss": 0.9823, "step": 4261 }, { "epoch": 2.63, "learning_rate": 4.1628440366972476e-05, "loss": 1.0943, "step": 4262 }, { "epoch": 2.63, "learning_rate": 4.155963302752294e-05, "loss": 1.0037, "step": 4263 }, { "epoch": 2.63, "learning_rate": 4.1490825688073385e-05, "loss": 1.0757, "step": 4264 }, { "epoch": 2.63, "learning_rate": 4.1422018348623847e-05, "loss": 1.0198, "step": 4265 }, { "epoch": 2.63, "learning_rate": 4.135321100917431e-05, "loss": 0.8978, "step": 4266 }, { "epoch": 2.63, "learning_rate": 4.128440366972477e-05, "loss": 0.9337, "step": 4267 }, { "epoch": 2.63, "learning_rate": 4.1215596330275224e-05, "loss": 0.8985, "step": 4268 }, { "epoch": 2.64, "learning_rate": 4.1146788990825685e-05, "loss": 0.9095, "step": 4269 }, { "epoch": 2.64, "learning_rate": 4.1077981651376146e-05, "loss": 0.8912, "step": 4270 }, { "epoch": 2.64, "learning_rate": 4.100917431192661e-05, "loss": 0.7621, "step": 4271 }, { "epoch": 2.64, "learning_rate": 4.0940366972477055e-05, "loss": 0.7871, "step": 4272 }, { "epoch": 2.64, "learning_rate": 4.0871559633027516e-05, "loss": 0.7856, "step": 4273 }, { "epoch": 2.64, "learning_rate": 4.080275229357798e-05, "loss": 0.8142, "step": 4274 }, { "epoch": 2.64, "learning_rate": 4.073394495412844e-05, "loss": 0.8532, "step": 4275 }, { "epoch": 2.64, "learning_rate": 4.0665137614678893e-05, "loss": 0.7337, "step": 4276 }, { "epoch": 2.64, "learning_rate": 4.0596330275229355e-05, "loss": 0.7877, "step": 4277 }, { "epoch": 2.64, "learning_rate": 4.0527522935779816e-05, "loss": 0.751, "step": 4278 }, { "epoch": 2.64, "learning_rate": 4.045871559633028e-05, "loss": 0.6837, "step": 4279 }, { "epoch": 2.64, "learning_rate": 4.0389908256880725e-05, "loss": 0.736, "step": 4280 }, { "epoch": 2.64, "learning_rate": 4.0321100917431186e-05, "loss": 0.6137, "step": 4281 }, { "epoch": 2.64, "learning_rate": 4.025229357798165e-05, "loss": 0.6763, "step": 4282 }, { "epoch": 2.64, "learning_rate": 4.018348623853211e-05, "loss": 0.6719, "step": 4283 }, { "epoch": 2.64, "learning_rate": 4.011467889908256e-05, "loss": 0.6475, "step": 4284 }, { "epoch": 2.65, "learning_rate": 4.0045871559633025e-05, "loss": 0.5891, "step": 4285 }, { "epoch": 2.65, "learning_rate": 3.9977064220183486e-05, "loss": 0.5518, "step": 4286 }, { "epoch": 2.65, "learning_rate": 3.990825688073394e-05, "loss": 0.5259, "step": 4287 }, { "epoch": 2.65, "learning_rate": 3.9839449541284395e-05, "loss": 0.414, "step": 4288 }, { "epoch": 2.65, "learning_rate": 3.9770642201834856e-05, "loss": 0.4134, "step": 4289 }, { "epoch": 2.65, "learning_rate": 3.970183486238532e-05, "loss": 0.5864, "step": 4290 }, { "epoch": 2.65, "learning_rate": 3.963302752293578e-05, "loss": 1.6473, "step": 4291 }, { "epoch": 2.65, "learning_rate": 3.956422018348623e-05, "loss": 1.5565, "step": 4292 }, { "epoch": 2.65, "learning_rate": 3.9495412844036694e-05, "loss": 1.336, "step": 4293 }, { "epoch": 2.65, "learning_rate": 3.9426605504587156e-05, "loss": 1.2156, "step": 4294 }, { "epoch": 2.65, "learning_rate": 3.935779816513761e-05, "loss": 1.2194, "step": 4295 }, { "epoch": 2.65, "learning_rate": 3.9288990825688065e-05, "loss": 1.1811, "step": 4296 }, { "epoch": 2.65, "learning_rate": 3.9220183486238526e-05, "loss": 1.2588, "step": 4297 }, { "epoch": 2.65, "learning_rate": 3.915137614678899e-05, "loss": 1.1802, "step": 4298 }, { "epoch": 2.65, "learning_rate": 3.908256880733945e-05, "loss": 1.1653, "step": 4299 }, { "epoch": 2.65, "learning_rate": 3.90137614678899e-05, "loss": 1.1736, "step": 4300 }, { "epoch": 2.65, "learning_rate": 3.8944954128440364e-05, "loss": 1.2001, "step": 4301 }, { "epoch": 2.66, "learning_rate": 3.8876146788990826e-05, "loss": 1.1908, "step": 4302 }, { "epoch": 2.66, "learning_rate": 3.880733944954128e-05, "loss": 1.0737, "step": 4303 }, { "epoch": 2.66, "learning_rate": 3.8738532110091735e-05, "loss": 1.0277, "step": 4304 }, { "epoch": 2.66, "learning_rate": 3.8669724770642196e-05, "loss": 1.1035, "step": 4305 }, { "epoch": 2.66, "learning_rate": 3.860091743119266e-05, "loss": 1.1159, "step": 4306 }, { "epoch": 2.66, "learning_rate": 3.853211009174312e-05, "loss": 1.0814, "step": 4307 }, { "epoch": 2.66, "learning_rate": 3.846330275229357e-05, "loss": 1.1012, "step": 4308 }, { "epoch": 2.66, "learning_rate": 3.8394495412844034e-05, "loss": 0.9931, "step": 4309 }, { "epoch": 2.66, "learning_rate": 3.8325688073394495e-05, "loss": 0.981, "step": 4310 }, { "epoch": 2.66, "learning_rate": 3.825688073394495e-05, "loss": 1.042, "step": 4311 }, { "epoch": 2.66, "learning_rate": 3.8188073394495404e-05, "loss": 0.9217, "step": 4312 }, { "epoch": 2.66, "learning_rate": 3.8119266055045866e-05, "loss": 1.0243, "step": 4313 }, { "epoch": 2.66, "learning_rate": 3.805045871559633e-05, "loss": 0.9812, "step": 4314 }, { "epoch": 2.66, "learning_rate": 3.798165137614679e-05, "loss": 0.9574, "step": 4315 }, { "epoch": 2.66, "learning_rate": 3.791284403669724e-05, "loss": 0.9598, "step": 4316 }, { "epoch": 2.66, "learning_rate": 3.7844036697247704e-05, "loss": 0.9847, "step": 4317 }, { "epoch": 2.67, "learning_rate": 3.7775229357798165e-05, "loss": 0.9159, "step": 4318 }, { "epoch": 2.67, "learning_rate": 3.770642201834862e-05, "loss": 0.8966, "step": 4319 }, { "epoch": 2.67, "learning_rate": 3.7637614678899074e-05, "loss": 0.9458, "step": 4320 }, { "epoch": 2.67, "learning_rate": 3.7568807339449535e-05, "loss": 0.7979, "step": 4321 }, { "epoch": 2.67, "learning_rate": 3.75e-05, "loss": 0.8373, "step": 4322 }, { "epoch": 2.67, "learning_rate": 3.743119266055046e-05, "loss": 0.7266, "step": 4323 }, { "epoch": 2.67, "learning_rate": 3.736238532110091e-05, "loss": 0.7917, "step": 4324 }, { "epoch": 2.67, "learning_rate": 3.7293577981651374e-05, "loss": 0.8114, "step": 4325 }, { "epoch": 2.67, "learning_rate": 3.722477064220183e-05, "loss": 0.7576, "step": 4326 }, { "epoch": 2.67, "learning_rate": 3.715596330275229e-05, "loss": 0.7017, "step": 4327 }, { "epoch": 2.67, "learning_rate": 3.708715596330275e-05, "loss": 0.6844, "step": 4328 }, { "epoch": 2.67, "learning_rate": 3.701834862385321e-05, "loss": 0.6881, "step": 4329 }, { "epoch": 2.67, "learning_rate": 3.6949541284403667e-05, "loss": 0.6883, "step": 4330 }, { "epoch": 2.67, "learning_rate": 3.688073394495413e-05, "loss": 0.6264, "step": 4331 }, { "epoch": 2.67, "learning_rate": 3.681192660550458e-05, "loss": 0.6504, "step": 4332 }, { "epoch": 2.67, "learning_rate": 3.6743119266055044e-05, "loss": 0.6262, "step": 4333 }, { "epoch": 2.68, "learning_rate": 3.66743119266055e-05, "loss": 0.6282, "step": 4334 }, { "epoch": 2.68, "learning_rate": 3.660550458715596e-05, "loss": 0.5891, "step": 4335 }, { "epoch": 2.68, "learning_rate": 3.653669724770642e-05, "loss": 0.4968, "step": 4336 }, { "epoch": 2.68, "learning_rate": 3.646788990825688e-05, "loss": 0.5388, "step": 4337 }, { "epoch": 2.68, "learning_rate": 3.6399082568807336e-05, "loss": 0.5422, "step": 4338 }, { "epoch": 2.68, "learning_rate": 3.63302752293578e-05, "loss": 0.5698, "step": 4339 }, { "epoch": 2.68, "learning_rate": 3.626146788990825e-05, "loss": 0.5959, "step": 4340 }, { "epoch": 2.68, "learning_rate": 3.6192660550458713e-05, "loss": 1.5995, "step": 4341 }, { "epoch": 2.68, "learning_rate": 3.612385321100917e-05, "loss": 1.4782, "step": 4342 }, { "epoch": 2.68, "learning_rate": 3.605504587155963e-05, "loss": 1.4813, "step": 4343 }, { "epoch": 2.68, "learning_rate": 3.598623853211009e-05, "loss": 1.4207, "step": 4344 }, { "epoch": 2.68, "learning_rate": 3.591743119266055e-05, "loss": 1.1731, "step": 4345 }, { "epoch": 2.68, "learning_rate": 3.5848623853211006e-05, "loss": 1.179, "step": 4346 }, { "epoch": 2.68, "learning_rate": 3.577981651376147e-05, "loss": 1.0944, "step": 4347 }, { "epoch": 2.68, "learning_rate": 3.571100917431192e-05, "loss": 1.1699, "step": 4348 }, { "epoch": 2.68, "learning_rate": 3.564220183486238e-05, "loss": 1.2315, "step": 4349 }, { "epoch": 2.69, "learning_rate": 3.557339449541284e-05, "loss": 1.1727, "step": 4350 }, { "epoch": 2.69, "learning_rate": 3.55045871559633e-05, "loss": 1.0606, "step": 4351 }, { "epoch": 2.69, "learning_rate": 3.543577981651376e-05, "loss": 1.1568, "step": 4352 }, { "epoch": 2.69, "learning_rate": 3.536697247706422e-05, "loss": 1.1571, "step": 4353 }, { "epoch": 2.69, "learning_rate": 3.5298165137614676e-05, "loss": 0.9995, "step": 4354 }, { "epoch": 2.69, "learning_rate": 3.522935779816514e-05, "loss": 1.0495, "step": 4355 }, { "epoch": 2.69, "learning_rate": 3.516055045871559e-05, "loss": 1.0534, "step": 4356 }, { "epoch": 2.69, "learning_rate": 3.509174311926605e-05, "loss": 0.9739, "step": 4357 }, { "epoch": 2.69, "learning_rate": 3.502293577981651e-05, "loss": 0.9951, "step": 4358 }, { "epoch": 2.69, "learning_rate": 3.495412844036697e-05, "loss": 1.0041, "step": 4359 }, { "epoch": 2.69, "learning_rate": 3.488532110091743e-05, "loss": 0.977, "step": 4360 }, { "epoch": 2.69, "learning_rate": 3.481651376146789e-05, "loss": 0.9856, "step": 4361 }, { "epoch": 2.69, "learning_rate": 3.4747706422018346e-05, "loss": 1.0146, "step": 4362 }, { "epoch": 2.69, "learning_rate": 3.467889908256881e-05, "loss": 0.9208, "step": 4363 }, { "epoch": 2.69, "learning_rate": 3.461009174311926e-05, "loss": 0.9545, "step": 4364 }, { "epoch": 2.69, "learning_rate": 3.454128440366972e-05, "loss": 0.9631, "step": 4365 }, { "epoch": 2.7, "learning_rate": 3.447247706422018e-05, "loss": 0.9365, "step": 4366 }, { "epoch": 2.7, "learning_rate": 3.440366972477064e-05, "loss": 1.0486, "step": 4367 }, { "epoch": 2.7, "learning_rate": 3.43348623853211e-05, "loss": 0.788, "step": 4368 }, { "epoch": 2.7, "learning_rate": 3.426605504587156e-05, "loss": 0.814, "step": 4369 }, { "epoch": 2.7, "learning_rate": 3.4197247706422016e-05, "loss": 0.8769, "step": 4370 }, { "epoch": 2.7, "learning_rate": 3.412844036697248e-05, "loss": 0.7888, "step": 4371 }, { "epoch": 2.7, "learning_rate": 3.405963302752293e-05, "loss": 0.8163, "step": 4372 }, { "epoch": 2.7, "learning_rate": 3.399082568807339e-05, "loss": 0.8102, "step": 4373 }, { "epoch": 2.7, "learning_rate": 3.392201834862385e-05, "loss": 0.8255, "step": 4374 }, { "epoch": 2.7, "learning_rate": 3.385321100917431e-05, "loss": 0.7503, "step": 4375 }, { "epoch": 2.7, "learning_rate": 3.378440366972477e-05, "loss": 0.6332, "step": 4376 }, { "epoch": 2.7, "learning_rate": 3.371559633027523e-05, "loss": 0.7194, "step": 4377 }, { "epoch": 2.7, "learning_rate": 3.3646788990825686e-05, "loss": 0.6134, "step": 4378 }, { "epoch": 2.7, "learning_rate": 3.357798165137615e-05, "loss": 0.5928, "step": 4379 }, { "epoch": 2.7, "learning_rate": 3.35091743119266e-05, "loss": 0.691, "step": 4380 }, { "epoch": 2.7, "learning_rate": 3.344036697247706e-05, "loss": 0.6328, "step": 4381 }, { "epoch": 2.7, "learning_rate": 3.337155963302752e-05, "loss": 0.6643, "step": 4382 }, { "epoch": 2.71, "learning_rate": 3.330275229357798e-05, "loss": 0.5593, "step": 4383 }, { "epoch": 2.71, "learning_rate": 3.323394495412844e-05, "loss": 0.5403, "step": 4384 }, { "epoch": 2.71, "learning_rate": 3.31651376146789e-05, "loss": 0.6108, "step": 4385 }, { "epoch": 2.71, "learning_rate": 3.3096330275229355e-05, "loss": 0.5947, "step": 4386 }, { "epoch": 2.71, "learning_rate": 3.302752293577982e-05, "loss": 0.5018, "step": 4387 }, { "epoch": 2.71, "learning_rate": 3.295871559633027e-05, "loss": 0.4964, "step": 4388 }, { "epoch": 2.71, "learning_rate": 3.288990825688073e-05, "loss": 0.5018, "step": 4389 }, { "epoch": 2.71, "learning_rate": 3.282110091743119e-05, "loss": 0.6679, "step": 4390 }, { "epoch": 2.71, "learning_rate": 3.275229357798165e-05, "loss": 1.8382, "step": 4391 }, { "epoch": 2.71, "learning_rate": 3.268348623853211e-05, "loss": 1.597, "step": 4392 }, { "epoch": 2.71, "learning_rate": 3.261467889908257e-05, "loss": 1.3316, "step": 4393 }, { "epoch": 2.71, "learning_rate": 3.2545871559633025e-05, "loss": 1.1517, "step": 4394 }, { "epoch": 2.71, "learning_rate": 3.2477064220183487e-05, "loss": 1.2313, "step": 4395 }, { "epoch": 2.71, "learning_rate": 3.240825688073394e-05, "loss": 1.2299, "step": 4396 }, { "epoch": 2.71, "learning_rate": 3.23394495412844e-05, "loss": 1.1572, "step": 4397 }, { "epoch": 2.71, "learning_rate": 3.227064220183486e-05, "loss": 1.2136, "step": 4398 }, { "epoch": 2.72, "learning_rate": 3.220183486238532e-05, "loss": 1.1076, "step": 4399 }, { "epoch": 2.72, "learning_rate": 3.213302752293578e-05, "loss": 1.1897, "step": 4400 }, { "epoch": 2.72, "learning_rate": 3.206422018348624e-05, "loss": 1.1483, "step": 4401 }, { "epoch": 2.72, "learning_rate": 3.1995412844036695e-05, "loss": 1.0785, "step": 4402 }, { "epoch": 2.72, "learning_rate": 3.1926605504587156e-05, "loss": 0.9209, "step": 4403 }, { "epoch": 2.72, "learning_rate": 3.185779816513761e-05, "loss": 0.9945, "step": 4404 }, { "epoch": 2.72, "learning_rate": 3.178899082568807e-05, "loss": 1.1352, "step": 4405 }, { "epoch": 2.72, "learning_rate": 3.1720183486238527e-05, "loss": 1.0646, "step": 4406 }, { "epoch": 2.72, "learning_rate": 3.165137614678899e-05, "loss": 0.9019, "step": 4407 }, { "epoch": 2.72, "learning_rate": 3.158256880733945e-05, "loss": 0.9739, "step": 4408 }, { "epoch": 2.72, "learning_rate": 3.151376146788991e-05, "loss": 1.0171, "step": 4409 }, { "epoch": 2.72, "learning_rate": 3.1444954128440365e-05, "loss": 1.0374, "step": 4410 }, { "epoch": 2.72, "learning_rate": 3.1376146788990826e-05, "loss": 0.9943, "step": 4411 }, { "epoch": 2.72, "learning_rate": 3.130733944954128e-05, "loss": 0.9283, "step": 4412 }, { "epoch": 2.72, "learning_rate": 3.123853211009174e-05, "loss": 0.9359, "step": 4413 }, { "epoch": 2.72, "learning_rate": 3.1169724770642196e-05, "loss": 0.9158, "step": 4414 }, { "epoch": 2.73, "learning_rate": 3.110091743119266e-05, "loss": 0.848, "step": 4415 }, { "epoch": 2.73, "learning_rate": 3.103211009174312e-05, "loss": 0.9724, "step": 4416 }, { "epoch": 2.73, "learning_rate": 3.096330275229358e-05, "loss": 0.7664, "step": 4417 }, { "epoch": 2.73, "learning_rate": 3.0894495412844035e-05, "loss": 0.9433, "step": 4418 }, { "epoch": 2.73, "learning_rate": 3.0825688073394496e-05, "loss": 0.7457, "step": 4419 }, { "epoch": 2.73, "learning_rate": 3.075688073394495e-05, "loss": 0.8187, "step": 4420 }, { "epoch": 2.73, "learning_rate": 3.068807339449541e-05, "loss": 0.8369, "step": 4421 }, { "epoch": 2.73, "learning_rate": 3.0619266055045866e-05, "loss": 0.9324, "step": 4422 }, { "epoch": 2.73, "learning_rate": 3.055045871559633e-05, "loss": 0.8421, "step": 4423 }, { "epoch": 2.73, "learning_rate": 3.0481651376146785e-05, "loss": 0.8011, "step": 4424 }, { "epoch": 2.73, "learning_rate": 3.0412844036697247e-05, "loss": 0.8359, "step": 4425 }, { "epoch": 2.73, "learning_rate": 3.0344036697247705e-05, "loss": 0.8032, "step": 4426 }, { "epoch": 2.73, "learning_rate": 3.0275229357798166e-05, "loss": 0.6778, "step": 4427 }, { "epoch": 2.73, "learning_rate": 3.020642201834862e-05, "loss": 0.6758, "step": 4428 }, { "epoch": 2.73, "learning_rate": 3.013761467889908e-05, "loss": 0.7229, "step": 4429 }, { "epoch": 2.73, "learning_rate": 3.006880733944954e-05, "loss": 0.6191, "step": 4430 }, { "epoch": 2.74, "learning_rate": 2.9999999999999997e-05, "loss": 0.5859, "step": 4431 }, { "epoch": 2.74, "learning_rate": 2.9931192660550455e-05, "loss": 0.7384, "step": 4432 }, { "epoch": 2.74, "learning_rate": 2.9862385321100917e-05, "loss": 0.5568, "step": 4433 }, { "epoch": 2.74, "learning_rate": 2.9793577981651374e-05, "loss": 0.621, "step": 4434 }, { "epoch": 2.74, "learning_rate": 2.9724770642201832e-05, "loss": 0.4893, "step": 4435 }, { "epoch": 2.74, "learning_rate": 2.965596330275229e-05, "loss": 0.5297, "step": 4436 }, { "epoch": 2.74, "learning_rate": 2.958715596330275e-05, "loss": 0.4829, "step": 4437 }, { "epoch": 2.74, "learning_rate": 2.951834862385321e-05, "loss": 0.4738, "step": 4438 }, { "epoch": 2.74, "learning_rate": 2.9449541284403667e-05, "loss": 0.4318, "step": 4439 }, { "epoch": 2.74, "learning_rate": 2.9380733944954125e-05, "loss": 0.5377, "step": 4440 }, { "epoch": 2.74, "learning_rate": 2.9311926605504586e-05, "loss": 1.7343, "step": 4441 }, { "epoch": 2.74, "learning_rate": 2.9243119266055044e-05, "loss": 1.5264, "step": 4442 }, { "epoch": 2.74, "learning_rate": 2.9174311926605502e-05, "loss": 1.3026, "step": 4443 }, { "epoch": 2.74, "learning_rate": 2.910550458715596e-05, "loss": 1.1369, "step": 4444 }, { "epoch": 2.74, "learning_rate": 2.903669724770642e-05, "loss": 1.3275, "step": 4445 }, { "epoch": 2.74, "learning_rate": 2.896788990825688e-05, "loss": 1.1836, "step": 4446 }, { "epoch": 2.75, "learning_rate": 2.8899082568807337e-05, "loss": 1.1694, "step": 4447 }, { "epoch": 2.75, "learning_rate": 2.8830275229357795e-05, "loss": 1.179, "step": 4448 }, { "epoch": 2.75, "learning_rate": 2.8761467889908256e-05, "loss": 1.0569, "step": 4449 }, { "epoch": 2.75, "learning_rate": 2.8692660550458714e-05, "loss": 1.1042, "step": 4450 }, { "epoch": 2.75, "learning_rate": 2.8623853211009172e-05, "loss": 1.1164, "step": 4451 }, { "epoch": 2.75, "learning_rate": 2.855504587155963e-05, "loss": 1.1172, "step": 4452 }, { "epoch": 2.75, "learning_rate": 2.848623853211009e-05, "loss": 1.0317, "step": 4453 }, { "epoch": 2.75, "learning_rate": 2.841743119266055e-05, "loss": 1.1035, "step": 4454 }, { "epoch": 2.75, "learning_rate": 2.8348623853211007e-05, "loss": 1.0552, "step": 4455 }, { "epoch": 2.75, "learning_rate": 2.8279816513761465e-05, "loss": 1.2677, "step": 4456 }, { "epoch": 2.75, "learning_rate": 2.8211009174311926e-05, "loss": 0.9247, "step": 4457 }, { "epoch": 2.75, "learning_rate": 2.8142201834862384e-05, "loss": 1.042, "step": 4458 }, { "epoch": 2.75, "learning_rate": 2.8073394495412842e-05, "loss": 1.0111, "step": 4459 }, { "epoch": 2.75, "learning_rate": 2.80045871559633e-05, "loss": 0.9646, "step": 4460 }, { "epoch": 2.75, "learning_rate": 2.793577981651376e-05, "loss": 1.032, "step": 4461 }, { "epoch": 2.75, "learning_rate": 2.786697247706422e-05, "loss": 0.8874, "step": 4462 }, { "epoch": 2.75, "learning_rate": 2.7798165137614677e-05, "loss": 0.9166, "step": 4463 }, { "epoch": 2.76, "learning_rate": 2.7729357798165135e-05, "loss": 1.0155, "step": 4464 }, { "epoch": 2.76, "learning_rate": 2.7660550458715596e-05, "loss": 0.9373, "step": 4465 }, { "epoch": 2.76, "learning_rate": 2.7591743119266054e-05, "loss": 0.9618, "step": 4466 }, { "epoch": 2.76, "learning_rate": 2.752293577981651e-05, "loss": 0.9627, "step": 4467 }, { "epoch": 2.76, "learning_rate": 2.745412844036697e-05, "loss": 0.8467, "step": 4468 }, { "epoch": 2.76, "learning_rate": 2.738532110091743e-05, "loss": 0.9347, "step": 4469 }, { "epoch": 2.76, "learning_rate": 2.7316513761467885e-05, "loss": 0.8594, "step": 4470 }, { "epoch": 2.76, "learning_rate": 2.7247706422018347e-05, "loss": 0.8523, "step": 4471 }, { "epoch": 2.76, "learning_rate": 2.7178899082568804e-05, "loss": 0.8178, "step": 4472 }, { "epoch": 2.76, "learning_rate": 2.7110091743119266e-05, "loss": 0.8881, "step": 4473 }, { "epoch": 2.76, "learning_rate": 2.704128440366972e-05, "loss": 0.6914, "step": 4474 }, { "epoch": 2.76, "learning_rate": 2.697247706422018e-05, "loss": 0.7926, "step": 4475 }, { "epoch": 2.76, "learning_rate": 2.690366972477064e-05, "loss": 0.811, "step": 4476 }, { "epoch": 2.76, "learning_rate": 2.68348623853211e-05, "loss": 0.725, "step": 4477 }, { "epoch": 2.76, "learning_rate": 2.6766055045871555e-05, "loss": 0.7814, "step": 4478 }, { "epoch": 2.76, "learning_rate": 2.6697247706422016e-05, "loss": 0.6713, "step": 4479 }, { "epoch": 2.77, "learning_rate": 2.6628440366972474e-05, "loss": 0.6762, "step": 4480 }, { "epoch": 2.77, "learning_rate": 2.6559633027522936e-05, "loss": 0.6274, "step": 4481 }, { "epoch": 2.77, "learning_rate": 2.649082568807339e-05, "loss": 0.6817, "step": 4482 }, { "epoch": 2.77, "learning_rate": 2.642201834862385e-05, "loss": 0.601, "step": 4483 }, { "epoch": 2.77, "learning_rate": 2.635321100917431e-05, "loss": 0.5633, "step": 4484 }, { "epoch": 2.77, "learning_rate": 2.628440366972477e-05, "loss": 0.4726, "step": 4485 }, { "epoch": 2.77, "learning_rate": 2.6215596330275225e-05, "loss": 0.4709, "step": 4486 }, { "epoch": 2.77, "learning_rate": 2.6146788990825686e-05, "loss": 0.5094, "step": 4487 }, { "epoch": 2.77, "learning_rate": 2.6077981651376144e-05, "loss": 0.511, "step": 4488 }, { "epoch": 2.77, "learning_rate": 2.6009174311926605e-05, "loss": 0.4216, "step": 4489 }, { "epoch": 2.77, "learning_rate": 2.594036697247706e-05, "loss": 0.4931, "step": 4490 }, { "epoch": 2.77, "learning_rate": 2.587155963302752e-05, "loss": 1.599, "step": 4491 }, { "epoch": 2.77, "learning_rate": 2.580275229357798e-05, "loss": 1.4101, "step": 4492 }, { "epoch": 2.77, "learning_rate": 2.573394495412844e-05, "loss": 1.3726, "step": 4493 }, { "epoch": 2.77, "learning_rate": 2.5665137614678895e-05, "loss": 1.3044, "step": 4494 }, { "epoch": 2.77, "learning_rate": 2.5596330275229356e-05, "loss": 1.3374, "step": 4495 }, { "epoch": 2.78, "learning_rate": 2.5527522935779814e-05, "loss": 1.196, "step": 4496 }, { "epoch": 2.78, "learning_rate": 2.5458715596330275e-05, "loss": 1.2508, "step": 4497 }, { "epoch": 2.78, "learning_rate": 2.538990825688073e-05, "loss": 1.1229, "step": 4498 }, { "epoch": 2.78, "learning_rate": 2.532110091743119e-05, "loss": 1.0638, "step": 4499 }, { "epoch": 2.78, "learning_rate": 2.525229357798165e-05, "loss": 1.0364, "step": 4500 }, { "epoch": 2.78, "eval_bleu": 1.3741745601573323e-19, "eval_loss": 1.776824951171875, "eval_runtime": 2494.6636, "eval_samples_per_second": 5.917, "eval_steps_per_second": 0.74, "step": 4500 }, { "epoch": 2.78, "learning_rate": 2.518348623853211e-05, "loss": 1.0992, "step": 4501 }, { "epoch": 2.78, "learning_rate": 2.5114678899082565e-05, "loss": 1.1023, "step": 4502 }, { "epoch": 2.78, "learning_rate": 2.5045871559633026e-05, "loss": 1.0038, "step": 4503 }, { "epoch": 2.78, "learning_rate": 2.4977064220183484e-05, "loss": 1.0275, "step": 4504 }, { "epoch": 2.78, "learning_rate": 2.4908256880733945e-05, "loss": 1.1966, "step": 4505 }, { "epoch": 2.78, "learning_rate": 2.48394495412844e-05, "loss": 1.0445, "step": 4506 }, { "epoch": 2.78, "learning_rate": 2.477064220183486e-05, "loss": 0.988, "step": 4507 }, { "epoch": 2.78, "learning_rate": 2.470183486238532e-05, "loss": 0.9769, "step": 4508 }, { "epoch": 2.78, "learning_rate": 2.463302752293578e-05, "loss": 0.9518, "step": 4509 }, { "epoch": 2.78, "learning_rate": 2.4564220183486234e-05, "loss": 1.1336, "step": 4510 }, { "epoch": 2.78, "learning_rate": 2.4495412844036696e-05, "loss": 0.9184, "step": 4511 }, { "epoch": 2.79, "learning_rate": 2.4426605504587154e-05, "loss": 0.846, "step": 4512 }, { "epoch": 2.79, "learning_rate": 2.4357798165137615e-05, "loss": 1.0451, "step": 4513 }, { "epoch": 2.79, "learning_rate": 2.428899082568807e-05, "loss": 0.9108, "step": 4514 }, { "epoch": 2.79, "learning_rate": 2.422018348623853e-05, "loss": 0.8252, "step": 4515 }, { "epoch": 2.79, "learning_rate": 2.415137614678899e-05, "loss": 0.9341, "step": 4516 }, { "epoch": 2.79, "learning_rate": 2.408256880733945e-05, "loss": 0.8154, "step": 4517 }, { "epoch": 2.79, "learning_rate": 2.4013761467889904e-05, "loss": 0.8433, "step": 4518 }, { "epoch": 2.79, "learning_rate": 2.3944954128440366e-05, "loss": 0.8591, "step": 4519 }, { "epoch": 2.79, "learning_rate": 2.3876146788990823e-05, "loss": 0.8198, "step": 4520 }, { "epoch": 2.79, "learning_rate": 2.3807339449541285e-05, "loss": 0.839, "step": 4521 }, { "epoch": 2.79, "learning_rate": 2.373853211009174e-05, "loss": 0.7614, "step": 4522 }, { "epoch": 2.79, "learning_rate": 2.36697247706422e-05, "loss": 0.8214, "step": 4523 }, { "epoch": 2.79, "learning_rate": 2.360091743119266e-05, "loss": 0.7016, "step": 4524 }, { "epoch": 2.79, "learning_rate": 2.353211009174312e-05, "loss": 0.668, "step": 4525 }, { "epoch": 2.79, "learning_rate": 2.3463302752293574e-05, "loss": 0.7033, "step": 4526 }, { "epoch": 2.79, "learning_rate": 2.3394495412844035e-05, "loss": 0.7296, "step": 4527 }, { "epoch": 2.8, "learning_rate": 2.3325688073394493e-05, "loss": 0.6356, "step": 4528 }, { "epoch": 2.8, "learning_rate": 2.3256880733944955e-05, "loss": 0.5682, "step": 4529 }, { "epoch": 2.8, "learning_rate": 2.318807339449541e-05, "loss": 0.649, "step": 4530 }, { "epoch": 2.8, "learning_rate": 2.311926605504587e-05, "loss": 0.6647, "step": 4531 }, { "epoch": 2.8, "learning_rate": 2.3050458715596328e-05, "loss": 0.5715, "step": 4532 }, { "epoch": 2.8, "learning_rate": 2.298165137614679e-05, "loss": 0.5845, "step": 4533 }, { "epoch": 2.8, "learning_rate": 2.2912844036697244e-05, "loss": 0.5445, "step": 4534 }, { "epoch": 2.8, "learning_rate": 2.2844036697247705e-05, "loss": 0.524, "step": 4535 }, { "epoch": 2.8, "learning_rate": 2.2775229357798163e-05, "loss": 0.5271, "step": 4536 }, { "epoch": 2.8, "learning_rate": 2.2706422018348624e-05, "loss": 0.5474, "step": 4537 }, { "epoch": 2.8, "learning_rate": 2.263761467889908e-05, "loss": 0.3952, "step": 4538 }, { "epoch": 2.8, "learning_rate": 2.256880733944954e-05, "loss": 0.4909, "step": 4539 }, { "epoch": 2.8, "learning_rate": 2.2499999999999998e-05, "loss": 0.5566, "step": 4540 }, { "epoch": 2.8, "learning_rate": 2.243119266055046e-05, "loss": 1.7067, "step": 4541 }, { "epoch": 2.8, "learning_rate": 2.2362385321100914e-05, "loss": 1.3893, "step": 4542 }, { "epoch": 2.8, "learning_rate": 2.2293577981651375e-05, "loss": 1.2802, "step": 4543 }, { "epoch": 2.8, "learning_rate": 2.2224770642201833e-05, "loss": 1.1312, "step": 4544 }, { "epoch": 2.81, "learning_rate": 2.2155963302752294e-05, "loss": 1.1352, "step": 4545 }, { "epoch": 2.81, "learning_rate": 2.208715596330275e-05, "loss": 1.2833, "step": 4546 }, { "epoch": 2.81, "learning_rate": 2.201834862385321e-05, "loss": 1.1461, "step": 4547 }, { "epoch": 2.81, "learning_rate": 2.1949541284403668e-05, "loss": 1.1973, "step": 4548 }, { "epoch": 2.81, "learning_rate": 2.188073394495413e-05, "loss": 1.1169, "step": 4549 }, { "epoch": 2.81, "learning_rate": 2.1811926605504584e-05, "loss": 1.0031, "step": 4550 }, { "epoch": 2.81, "learning_rate": 2.1743119266055045e-05, "loss": 1.071, "step": 4551 }, { "epoch": 2.81, "learning_rate": 2.1674311926605503e-05, "loss": 1.2425, "step": 4552 }, { "epoch": 2.81, "learning_rate": 2.1605504587155964e-05, "loss": 0.9873, "step": 4553 }, { "epoch": 2.81, "learning_rate": 2.153669724770642e-05, "loss": 1.0584, "step": 4554 }, { "epoch": 2.81, "learning_rate": 2.146788990825688e-05, "loss": 1.208, "step": 4555 }, { "epoch": 2.81, "learning_rate": 2.1399082568807338e-05, "loss": 1.1719, "step": 4556 }, { "epoch": 2.81, "learning_rate": 2.13302752293578e-05, "loss": 1.0729, "step": 4557 }, { "epoch": 2.81, "learning_rate": 2.1261467889908253e-05, "loss": 1.0485, "step": 4558 }, { "epoch": 2.81, "learning_rate": 2.1192660550458715e-05, "loss": 1.036, "step": 4559 }, { "epoch": 2.81, "learning_rate": 2.1123853211009173e-05, "loss": 0.8543, "step": 4560 }, { "epoch": 2.82, "learning_rate": 2.1055045871559634e-05, "loss": 1.0383, "step": 4561 }, { "epoch": 2.82, "learning_rate": 2.098623853211009e-05, "loss": 0.9774, "step": 4562 }, { "epoch": 2.82, "learning_rate": 2.091743119266055e-05, "loss": 0.9882, "step": 4563 }, { "epoch": 2.82, "learning_rate": 2.0848623853211008e-05, "loss": 0.9675, "step": 4564 }, { "epoch": 2.82, "learning_rate": 2.077981651376147e-05, "loss": 0.9406, "step": 4565 }, { "epoch": 2.82, "learning_rate": 2.0711009174311923e-05, "loss": 0.9818, "step": 4566 }, { "epoch": 2.82, "learning_rate": 2.0642201834862385e-05, "loss": 0.9217, "step": 4567 }, { "epoch": 2.82, "learning_rate": 2.0573394495412842e-05, "loss": 0.8368, "step": 4568 }, { "epoch": 2.82, "learning_rate": 2.0504587155963304e-05, "loss": 0.9413, "step": 4569 }, { "epoch": 2.82, "learning_rate": 2.0435779816513758e-05, "loss": 0.788, "step": 4570 }, { "epoch": 2.82, "learning_rate": 2.036697247706422e-05, "loss": 0.9156, "step": 4571 }, { "epoch": 2.82, "learning_rate": 2.0298165137614677e-05, "loss": 0.8273, "step": 4572 }, { "epoch": 2.82, "learning_rate": 2.022935779816514e-05, "loss": 0.6147, "step": 4573 }, { "epoch": 2.82, "learning_rate": 2.0160550458715593e-05, "loss": 0.7959, "step": 4574 }, { "epoch": 2.82, "learning_rate": 2.0091743119266054e-05, "loss": 0.8803, "step": 4575 }, { "epoch": 2.82, "learning_rate": 2.0022935779816512e-05, "loss": 0.6465, "step": 4576 }, { "epoch": 2.83, "learning_rate": 1.995412844036697e-05, "loss": 0.76, "step": 4577 }, { "epoch": 2.83, "learning_rate": 1.9885321100917428e-05, "loss": 0.7173, "step": 4578 }, { "epoch": 2.83, "learning_rate": 1.981651376146789e-05, "loss": 0.5321, "step": 4579 }, { "epoch": 2.83, "learning_rate": 1.9747706422018347e-05, "loss": 0.6674, "step": 4580 }, { "epoch": 2.83, "learning_rate": 1.9678899082568805e-05, "loss": 0.6767, "step": 4581 }, { "epoch": 2.83, "learning_rate": 1.9610091743119263e-05, "loss": 0.6509, "step": 4582 }, { "epoch": 2.83, "learning_rate": 1.9541284403669724e-05, "loss": 0.6361, "step": 4583 }, { "epoch": 2.83, "learning_rate": 1.9472477064220182e-05, "loss": 0.5187, "step": 4584 }, { "epoch": 2.83, "learning_rate": 1.940366972477064e-05, "loss": 0.5913, "step": 4585 }, { "epoch": 2.83, "learning_rate": 1.9334862385321098e-05, "loss": 0.5777, "step": 4586 }, { "epoch": 2.83, "learning_rate": 1.926605504587156e-05, "loss": 0.5197, "step": 4587 }, { "epoch": 2.83, "learning_rate": 1.9197247706422017e-05, "loss": 0.4075, "step": 4588 }, { "epoch": 2.83, "learning_rate": 1.9128440366972475e-05, "loss": 0.4522, "step": 4589 }, { "epoch": 2.83, "learning_rate": 1.9059633027522933e-05, "loss": 0.4476, "step": 4590 }, { "epoch": 2.83, "learning_rate": 1.8990825688073394e-05, "loss": 1.7788, "step": 4591 }, { "epoch": 2.83, "learning_rate": 1.8922018348623852e-05, "loss": 1.4156, "step": 4592 }, { "epoch": 2.84, "learning_rate": 1.885321100917431e-05, "loss": 1.3459, "step": 4593 }, { "epoch": 2.84, "learning_rate": 1.8784403669724768e-05, "loss": 1.1863, "step": 4594 }, { "epoch": 2.84, "learning_rate": 1.871559633027523e-05, "loss": 1.2492, "step": 4595 }, { "epoch": 2.84, "learning_rate": 1.8646788990825687e-05, "loss": 1.1565, "step": 4596 }, { "epoch": 2.84, "learning_rate": 1.8577981651376145e-05, "loss": 1.3033, "step": 4597 }, { "epoch": 2.84, "learning_rate": 1.8509174311926606e-05, "loss": 1.1299, "step": 4598 }, { "epoch": 2.84, "learning_rate": 1.8440366972477064e-05, "loss": 1.3174, "step": 4599 }, { "epoch": 2.84, "learning_rate": 1.8371559633027522e-05, "loss": 1.005, "step": 4600 }, { "epoch": 2.84, "learning_rate": 1.830275229357798e-05, "loss": 1.1638, "step": 4601 }, { "epoch": 2.84, "learning_rate": 1.823394495412844e-05, "loss": 1.028, "step": 4602 }, { "epoch": 2.84, "learning_rate": 1.81651376146789e-05, "loss": 1.081, "step": 4603 }, { "epoch": 2.84, "learning_rate": 1.8096330275229357e-05, "loss": 1.0723, "step": 4604 }, { "epoch": 2.84, "learning_rate": 1.8027522935779815e-05, "loss": 1.0672, "step": 4605 }, { "epoch": 2.84, "learning_rate": 1.7958715596330276e-05, "loss": 0.9622, "step": 4606 }, { "epoch": 2.84, "learning_rate": 1.7889908256880734e-05, "loss": 0.9519, "step": 4607 }, { "epoch": 2.84, "learning_rate": 1.782110091743119e-05, "loss": 0.912, "step": 4608 }, { "epoch": 2.84, "learning_rate": 1.775229357798165e-05, "loss": 0.8903, "step": 4609 }, { "epoch": 2.85, "learning_rate": 1.768348623853211e-05, "loss": 1.0945, "step": 4610 }, { "epoch": 2.85, "learning_rate": 1.761467889908257e-05, "loss": 0.9083, "step": 4611 }, { "epoch": 2.85, "learning_rate": 1.7545871559633027e-05, "loss": 0.8767, "step": 4612 }, { "epoch": 2.85, "learning_rate": 1.7477064220183484e-05, "loss": 0.9596, "step": 4613 }, { "epoch": 2.85, "learning_rate": 1.7408256880733946e-05, "loss": 1.0027, "step": 4614 }, { "epoch": 2.85, "learning_rate": 1.7339449541284404e-05, "loss": 0.8588, "step": 4615 }, { "epoch": 2.85, "learning_rate": 1.727064220183486e-05, "loss": 0.9063, "step": 4616 }, { "epoch": 2.85, "learning_rate": 1.720183486238532e-05, "loss": 0.7942, "step": 4617 }, { "epoch": 2.85, "learning_rate": 1.713302752293578e-05, "loss": 0.82, "step": 4618 }, { "epoch": 2.85, "learning_rate": 1.706422018348624e-05, "loss": 0.8289, "step": 4619 }, { "epoch": 2.85, "learning_rate": 1.6995412844036696e-05, "loss": 0.8573, "step": 4620 }, { "epoch": 2.85, "learning_rate": 1.6926605504587154e-05, "loss": 0.7957, "step": 4621 }, { "epoch": 2.85, "learning_rate": 1.6857798165137616e-05, "loss": 0.8278, "step": 4622 }, { "epoch": 2.85, "learning_rate": 1.6788990825688073e-05, "loss": 0.8664, "step": 4623 }, { "epoch": 2.85, "learning_rate": 1.672018348623853e-05, "loss": 0.6976, "step": 4624 }, { "epoch": 2.85, "learning_rate": 1.665137614678899e-05, "loss": 0.7771, "step": 4625 }, { "epoch": 2.86, "learning_rate": 1.658256880733945e-05, "loss": 0.6409, "step": 4626 }, { "epoch": 2.86, "learning_rate": 1.651376146788991e-05, "loss": 0.624, "step": 4627 }, { "epoch": 2.86, "learning_rate": 1.6444954128440366e-05, "loss": 0.5996, "step": 4628 }, { "epoch": 2.86, "learning_rate": 1.6376146788990824e-05, "loss": 0.7131, "step": 4629 }, { "epoch": 2.86, "learning_rate": 1.6307339449541285e-05, "loss": 0.6636, "step": 4630 }, { "epoch": 2.86, "learning_rate": 1.6238532110091743e-05, "loss": 0.6364, "step": 4631 }, { "epoch": 2.86, "learning_rate": 1.61697247706422e-05, "loss": 0.5541, "step": 4632 }, { "epoch": 2.86, "learning_rate": 1.610091743119266e-05, "loss": 0.6066, "step": 4633 }, { "epoch": 2.86, "learning_rate": 1.603211009174312e-05, "loss": 0.5724, "step": 4634 }, { "epoch": 2.86, "learning_rate": 1.5963302752293578e-05, "loss": 0.5396, "step": 4635 }, { "epoch": 2.86, "learning_rate": 1.5894495412844036e-05, "loss": 0.5158, "step": 4636 }, { "epoch": 2.86, "learning_rate": 1.5825688073394494e-05, "loss": 0.4567, "step": 4637 }, { "epoch": 2.86, "learning_rate": 1.5756880733944955e-05, "loss": 0.4934, "step": 4638 }, { "epoch": 2.86, "learning_rate": 1.5688073394495413e-05, "loss": 0.5748, "step": 4639 }, { "epoch": 2.86, "learning_rate": 1.561926605504587e-05, "loss": 0.5306, "step": 4640 }, { "epoch": 2.86, "learning_rate": 1.555045871559633e-05, "loss": 1.515, "step": 4641 }, { "epoch": 2.87, "learning_rate": 1.548165137614679e-05, "loss": 1.4593, "step": 4642 }, { "epoch": 2.87, "learning_rate": 1.5412844036697248e-05, "loss": 1.348, "step": 4643 }, { "epoch": 2.87, "learning_rate": 1.5344036697247706e-05, "loss": 1.2722, "step": 4644 }, { "epoch": 2.87, "learning_rate": 1.5275229357798164e-05, "loss": 1.1665, "step": 4645 }, { "epoch": 2.87, "learning_rate": 1.5206422018348623e-05, "loss": 1.2146, "step": 4646 }, { "epoch": 2.87, "learning_rate": 1.5137614678899083e-05, "loss": 1.2046, "step": 4647 }, { "epoch": 2.87, "learning_rate": 1.506880733944954e-05, "loss": 1.1509, "step": 4648 }, { "epoch": 2.87, "learning_rate": 1.4999999999999999e-05, "loss": 1.2011, "step": 4649 }, { "epoch": 2.87, "learning_rate": 1.4931192660550458e-05, "loss": 1.1445, "step": 4650 }, { "epoch": 2.87, "learning_rate": 1.4862385321100916e-05, "loss": 1.0681, "step": 4651 }, { "epoch": 2.87, "learning_rate": 1.4793577981651376e-05, "loss": 1.0935, "step": 4652 }, { "epoch": 2.87, "learning_rate": 1.4724770642201834e-05, "loss": 1.0531, "step": 4653 }, { "epoch": 2.87, "learning_rate": 1.4655963302752293e-05, "loss": 1.1714, "step": 4654 }, { "epoch": 2.87, "learning_rate": 1.4587155963302751e-05, "loss": 1.0124, "step": 4655 }, { "epoch": 2.87, "learning_rate": 1.451834862385321e-05, "loss": 1.0096, "step": 4656 }, { "epoch": 2.87, "learning_rate": 1.4449541284403669e-05, "loss": 1.0099, "step": 4657 }, { "epoch": 2.88, "learning_rate": 1.4380733944954128e-05, "loss": 0.9615, "step": 4658 }, { "epoch": 2.88, "learning_rate": 1.4311926605504586e-05, "loss": 0.9102, "step": 4659 }, { "epoch": 2.88, "learning_rate": 1.4243119266055046e-05, "loss": 1.0185, "step": 4660 }, { "epoch": 2.88, "learning_rate": 1.4174311926605503e-05, "loss": 1.0161, "step": 4661 }, { "epoch": 2.88, "learning_rate": 1.4105504587155963e-05, "loss": 1.0585, "step": 4662 }, { "epoch": 2.88, "learning_rate": 1.4036697247706421e-05, "loss": 0.9831, "step": 4663 }, { "epoch": 2.88, "learning_rate": 1.396788990825688e-05, "loss": 0.9231, "step": 4664 }, { "epoch": 2.88, "learning_rate": 1.3899082568807338e-05, "loss": 0.8305, "step": 4665 }, { "epoch": 2.88, "learning_rate": 1.3830275229357798e-05, "loss": 0.8973, "step": 4666 }, { "epoch": 2.88, "learning_rate": 1.3761467889908256e-05, "loss": 0.9529, "step": 4667 }, { "epoch": 2.88, "learning_rate": 1.3692660550458715e-05, "loss": 0.8387, "step": 4668 }, { "epoch": 2.88, "learning_rate": 1.3623853211009173e-05, "loss": 0.7796, "step": 4669 }, { "epoch": 2.88, "learning_rate": 1.3555045871559633e-05, "loss": 0.8061, "step": 4670 }, { "epoch": 2.88, "learning_rate": 1.348623853211009e-05, "loss": 0.7489, "step": 4671 }, { "epoch": 2.88, "learning_rate": 1.341743119266055e-05, "loss": 0.7882, "step": 4672 }, { "epoch": 2.88, "learning_rate": 1.3348623853211008e-05, "loss": 0.6364, "step": 4673 }, { "epoch": 2.89, "learning_rate": 1.3279816513761468e-05, "loss": 0.7082, "step": 4674 }, { "epoch": 2.89, "learning_rate": 1.3211009174311926e-05, "loss": 0.7344, "step": 4675 }, { "epoch": 2.89, "learning_rate": 1.3142201834862385e-05, "loss": 0.8378, "step": 4676 }, { "epoch": 2.89, "learning_rate": 1.3073394495412843e-05, "loss": 0.7075, "step": 4677 }, { "epoch": 2.89, "learning_rate": 1.3004587155963303e-05, "loss": 0.6722, "step": 4678 }, { "epoch": 2.89, "learning_rate": 1.293577981651376e-05, "loss": 0.5465, "step": 4679 }, { "epoch": 2.89, "learning_rate": 1.286697247706422e-05, "loss": 0.5903, "step": 4680 }, { "epoch": 2.89, "learning_rate": 1.2798165137614678e-05, "loss": 0.6214, "step": 4681 }, { "epoch": 2.89, "learning_rate": 1.2729357798165138e-05, "loss": 0.617, "step": 4682 }, { "epoch": 2.89, "learning_rate": 1.2660550458715595e-05, "loss": 0.5606, "step": 4683 }, { "epoch": 2.89, "learning_rate": 1.2591743119266055e-05, "loss": 0.4589, "step": 4684 }, { "epoch": 2.89, "learning_rate": 1.2522935779816513e-05, "loss": 0.5961, "step": 4685 }, { "epoch": 2.89, "learning_rate": 1.2454128440366973e-05, "loss": 0.6141, "step": 4686 }, { "epoch": 2.89, "learning_rate": 1.238532110091743e-05, "loss": 0.4514, "step": 4687 }, { "epoch": 2.89, "learning_rate": 1.231651376146789e-05, "loss": 0.5933, "step": 4688 }, { "epoch": 2.89, "learning_rate": 1.2247706422018348e-05, "loss": 0.4379, "step": 4689 }, { "epoch": 2.89, "learning_rate": 1.2178899082568807e-05, "loss": 0.5371, "step": 4690 }, { "epoch": 2.9, "learning_rate": 1.2110091743119265e-05, "loss": 1.647, "step": 4691 }, { "epoch": 2.9, "learning_rate": 1.2041284403669725e-05, "loss": 1.3897, "step": 4692 }, { "epoch": 2.9, "learning_rate": 1.1972477064220183e-05, "loss": 1.2873, "step": 4693 }, { "epoch": 2.9, "learning_rate": 1.1903669724770642e-05, "loss": 1.2577, "step": 4694 }, { "epoch": 2.9, "learning_rate": 1.18348623853211e-05, "loss": 1.2482, "step": 4695 }, { "epoch": 2.9, "learning_rate": 1.176605504587156e-05, "loss": 1.2372, "step": 4696 }, { "epoch": 2.9, "learning_rate": 1.1697247706422018e-05, "loss": 1.29, "step": 4697 }, { "epoch": 2.9, "learning_rate": 1.1628440366972477e-05, "loss": 1.0405, "step": 4698 }, { "epoch": 2.9, "learning_rate": 1.1559633027522935e-05, "loss": 1.1388, "step": 4699 }, { "epoch": 2.9, "learning_rate": 1.1490825688073395e-05, "loss": 1.0377, "step": 4700 }, { "epoch": 2.9, "learning_rate": 1.1422018348623853e-05, "loss": 1.1141, "step": 4701 }, { "epoch": 2.9, "learning_rate": 1.1353211009174312e-05, "loss": 1.0284, "step": 4702 }, { "epoch": 2.9, "learning_rate": 1.128440366972477e-05, "loss": 1.0036, "step": 4703 }, { "epoch": 2.9, "learning_rate": 1.121559633027523e-05, "loss": 1.0257, "step": 4704 }, { "epoch": 2.9, "learning_rate": 1.1146788990825688e-05, "loss": 0.9324, "step": 4705 }, { "epoch": 2.9, "learning_rate": 1.1077981651376147e-05, "loss": 1.076, "step": 4706 }, { "epoch": 2.91, "learning_rate": 1.1009174311926605e-05, "loss": 1.0192, "step": 4707 }, { "epoch": 2.91, "learning_rate": 1.0940366972477065e-05, "loss": 1.1122, "step": 4708 }, { "epoch": 2.91, "learning_rate": 1.0871559633027522e-05, "loss": 1.0317, "step": 4709 }, { "epoch": 2.91, "learning_rate": 1.0802752293577982e-05, "loss": 0.8847, "step": 4710 }, { "epoch": 2.91, "learning_rate": 1.073394495412844e-05, "loss": 1.0597, "step": 4711 }, { "epoch": 2.91, "learning_rate": 1.06651376146789e-05, "loss": 1.056, "step": 4712 }, { "epoch": 2.91, "learning_rate": 1.0596330275229357e-05, "loss": 0.9446, "step": 4713 }, { "epoch": 2.91, "learning_rate": 1.0527522935779817e-05, "loss": 0.9083, "step": 4714 }, { "epoch": 2.91, "learning_rate": 1.0458715596330275e-05, "loss": 0.8394, "step": 4715 }, { "epoch": 2.91, "learning_rate": 1.0389908256880734e-05, "loss": 0.8397, "step": 4716 }, { "epoch": 2.91, "learning_rate": 1.0321100917431192e-05, "loss": 0.8365, "step": 4717 }, { "epoch": 2.91, "learning_rate": 1.0252293577981652e-05, "loss": 0.8737, "step": 4718 }, { "epoch": 2.91, "learning_rate": 1.018348623853211e-05, "loss": 0.8334, "step": 4719 }, { "epoch": 2.91, "learning_rate": 1.011467889908257e-05, "loss": 0.9145, "step": 4720 }, { "epoch": 2.91, "learning_rate": 1.0045871559633027e-05, "loss": 0.9948, "step": 4721 }, { "epoch": 2.91, "learning_rate": 9.977064220183485e-06, "loss": 0.7694, "step": 4722 }, { "epoch": 2.92, "learning_rate": 9.908256880733945e-06, "loss": 0.7595, "step": 4723 }, { "epoch": 2.92, "learning_rate": 9.839449541284403e-06, "loss": 0.6905, "step": 4724 }, { "epoch": 2.92, "learning_rate": 9.770642201834862e-06, "loss": 0.6627, "step": 4725 }, { "epoch": 2.92, "learning_rate": 9.70183486238532e-06, "loss": 0.7632, "step": 4726 }, { "epoch": 2.92, "learning_rate": 9.63302752293578e-06, "loss": 0.7008, "step": 4727 }, { "epoch": 2.92, "learning_rate": 9.564220183486237e-06, "loss": 0.6408, "step": 4728 }, { "epoch": 2.92, "learning_rate": 9.495412844036697e-06, "loss": 0.6336, "step": 4729 }, { "epoch": 2.92, "learning_rate": 9.426605504587155e-06, "loss": 0.7248, "step": 4730 }, { "epoch": 2.92, "learning_rate": 9.357798165137614e-06, "loss": 0.6665, "step": 4731 }, { "epoch": 2.92, "learning_rate": 9.288990825688072e-06, "loss": 0.5397, "step": 4732 }, { "epoch": 2.92, "learning_rate": 9.220183486238532e-06, "loss": 0.5865, "step": 4733 }, { "epoch": 2.92, "learning_rate": 9.15137614678899e-06, "loss": 0.6092, "step": 4734 }, { "epoch": 2.92, "learning_rate": 9.08256880733945e-06, "loss": 0.5846, "step": 4735 }, { "epoch": 2.92, "learning_rate": 9.013761467889907e-06, "loss": 0.5967, "step": 4736 }, { "epoch": 2.92, "learning_rate": 8.944954128440367e-06, "loss": 0.5171, "step": 4737 }, { "epoch": 2.92, "learning_rate": 8.876146788990825e-06, "loss": 0.4322, "step": 4738 }, { "epoch": 2.93, "learning_rate": 8.807339449541284e-06, "loss": 0.437, "step": 4739 }, { "epoch": 2.93, "learning_rate": 8.738532110091742e-06, "loss": 0.4653, "step": 4740 }, { "epoch": 2.93, "learning_rate": 8.669724770642202e-06, "loss": 1.597, "step": 4741 }, { "epoch": 2.93, "learning_rate": 8.60091743119266e-06, "loss": 1.6638, "step": 4742 }, { "epoch": 2.93, "learning_rate": 8.53211009174312e-06, "loss": 1.2833, "step": 4743 }, { "epoch": 2.93, "learning_rate": 8.463302752293577e-06, "loss": 1.2478, "step": 4744 }, { "epoch": 2.93, "learning_rate": 8.394495412844037e-06, "loss": 1.1786, "step": 4745 }, { "epoch": 2.93, "learning_rate": 8.325688073394495e-06, "loss": 1.1102, "step": 4746 }, { "epoch": 2.93, "learning_rate": 8.256880733944954e-06, "loss": 1.2521, "step": 4747 }, { "epoch": 2.93, "learning_rate": 8.188073394495412e-06, "loss": 1.193, "step": 4748 }, { "epoch": 2.93, "learning_rate": 8.119266055045872e-06, "loss": 0.9963, "step": 4749 }, { "epoch": 2.93, "learning_rate": 8.05045871559633e-06, "loss": 1.1012, "step": 4750 }, { "epoch": 2.93, "learning_rate": 7.981651376146789e-06, "loss": 1.0408, "step": 4751 }, { "epoch": 2.93, "learning_rate": 7.912844036697247e-06, "loss": 1.0984, "step": 4752 }, { "epoch": 2.93, "learning_rate": 7.844036697247707e-06, "loss": 0.9055, "step": 4753 }, { "epoch": 2.93, "learning_rate": 7.775229357798164e-06, "loss": 0.9933, "step": 4754 }, { "epoch": 2.94, "learning_rate": 7.706422018348624e-06, "loss": 0.9191, "step": 4755 }, { "epoch": 2.94, "learning_rate": 7.637614678899082e-06, "loss": 0.8848, "step": 4756 }, { "epoch": 2.94, "learning_rate": 7.5688073394495415e-06, "loss": 0.9051, "step": 4757 }, { "epoch": 2.94, "learning_rate": 7.499999999999999e-06, "loss": 0.9039, "step": 4758 }, { "epoch": 2.94, "learning_rate": 7.431192660550458e-06, "loss": 1.0504, "step": 4759 }, { "epoch": 2.94, "learning_rate": 7.362385321100917e-06, "loss": 0.9298, "step": 4760 }, { "epoch": 2.94, "learning_rate": 7.2935779816513755e-06, "loss": 0.8676, "step": 4761 }, { "epoch": 2.94, "learning_rate": 7.224770642201834e-06, "loss": 0.9159, "step": 4762 }, { "epoch": 2.94, "learning_rate": 7.155963302752293e-06, "loss": 1.0064, "step": 4763 }, { "epoch": 2.94, "learning_rate": 7.087155963302752e-06, "loss": 0.8841, "step": 4764 }, { "epoch": 2.94, "learning_rate": 7.0183486238532104e-06, "loss": 0.9083, "step": 4765 }, { "epoch": 2.94, "learning_rate": 6.949541284403669e-06, "loss": 0.8359, "step": 4766 }, { "epoch": 2.94, "learning_rate": 6.880733944954128e-06, "loss": 0.9571, "step": 4767 }, { "epoch": 2.94, "learning_rate": 6.811926605504587e-06, "loss": 0.8202, "step": 4768 }, { "epoch": 2.94, "learning_rate": 6.743119266055045e-06, "loss": 0.7132, "step": 4769 }, { "epoch": 2.94, "learning_rate": 6.674311926605504e-06, "loss": 0.8124, "step": 4770 }, { "epoch": 2.94, "learning_rate": 6.605504587155963e-06, "loss": 0.6306, "step": 4771 }, { "epoch": 2.95, "learning_rate": 6.5366972477064216e-06, "loss": 0.8336, "step": 4772 }, { "epoch": 2.95, "learning_rate": 6.46788990825688e-06, "loss": 0.7276, "step": 4773 }, { "epoch": 2.95, "learning_rate": 6.399082568807339e-06, "loss": 0.7068, "step": 4774 }, { "epoch": 2.95, "learning_rate": 6.330275229357798e-06, "loss": 0.7737, "step": 4775 }, { "epoch": 2.95, "learning_rate": 6.2614678899082565e-06, "loss": 0.7396, "step": 4776 }, { "epoch": 2.95, "learning_rate": 6.192660550458715e-06, "loss": 0.6719, "step": 4777 }, { "epoch": 2.95, "learning_rate": 6.123853211009174e-06, "loss": 0.678, "step": 4778 }, { "epoch": 2.95, "learning_rate": 6.055045871559633e-06, "loss": 0.6683, "step": 4779 }, { "epoch": 2.95, "learning_rate": 5.986238532110091e-06, "loss": 0.5581, "step": 4780 }, { "epoch": 2.95, "learning_rate": 5.91743119266055e-06, "loss": 0.4987, "step": 4781 }, { "epoch": 2.95, "learning_rate": 5.848623853211009e-06, "loss": 0.6311, "step": 4782 }, { "epoch": 2.95, "learning_rate": 5.779816513761468e-06, "loss": 0.5812, "step": 4783 }, { "epoch": 2.95, "learning_rate": 5.711009174311926e-06, "loss": 0.5674, "step": 4784 }, { "epoch": 2.95, "learning_rate": 5.642201834862385e-06, "loss": 0.436, "step": 4785 }, { "epoch": 2.95, "learning_rate": 5.573394495412844e-06, "loss": 0.5947, "step": 4786 }, { "epoch": 2.95, "learning_rate": 5.5045871559633025e-06, "loss": 0.4523, "step": 4787 }, { "epoch": 2.96, "learning_rate": 5.435779816513761e-06, "loss": 0.5239, "step": 4788 }, { "epoch": 2.96, "learning_rate": 5.36697247706422e-06, "loss": 0.4447, "step": 4789 }, { "epoch": 2.96, "learning_rate": 5.298165137614679e-06, "loss": 0.5378, "step": 4790 }, { "epoch": 2.96, "learning_rate": 5.229357798165137e-06, "loss": 1.77, "step": 4791 }, { "epoch": 2.96, "learning_rate": 5.160550458715596e-06, "loss": 1.4126, "step": 4792 }, { "epoch": 2.96, "learning_rate": 5.091743119266055e-06, "loss": 1.2132, "step": 4793 }, { "epoch": 2.96, "learning_rate": 5.022935779816514e-06, "loss": 1.263, "step": 4794 }, { "epoch": 2.96, "learning_rate": 4.954128440366972e-06, "loss": 1.3289, "step": 4795 }, { "epoch": 2.96, "learning_rate": 4.885321100917431e-06, "loss": 1.1501, "step": 4796 }, { "epoch": 2.96, "learning_rate": 4.81651376146789e-06, "loss": 1.2185, "step": 4797 }, { "epoch": 2.96, "learning_rate": 4.7477064220183485e-06, "loss": 1.11, "step": 4798 }, { "epoch": 2.96, "learning_rate": 4.678899082568807e-06, "loss": 1.1127, "step": 4799 }, { "epoch": 2.96, "learning_rate": 4.610091743119266e-06, "loss": 1.0948, "step": 4800 }, { "epoch": 2.96, "learning_rate": 4.541284403669725e-06, "loss": 1.0986, "step": 4801 }, { "epoch": 2.96, "learning_rate": 4.4724770642201834e-06, "loss": 1.0068, "step": 4802 }, { "epoch": 2.96, "learning_rate": 4.403669724770642e-06, "loss": 0.9905, "step": 4803 }, { "epoch": 2.97, "learning_rate": 4.334862385321101e-06, "loss": 1.0702, "step": 4804 }, { "epoch": 2.97, "learning_rate": 4.26605504587156e-06, "loss": 1.031, "step": 4805 }, { "epoch": 2.97, "learning_rate": 4.197247706422018e-06, "loss": 1.066, "step": 4806 }, { "epoch": 2.97, "learning_rate": 4.128440366972477e-06, "loss": 0.991, "step": 4807 }, { "epoch": 2.97, "learning_rate": 4.059633027522936e-06, "loss": 0.9512, "step": 4808 }, { "epoch": 2.97, "learning_rate": 3.9908256880733945e-06, "loss": 1.0002, "step": 4809 }, { "epoch": 2.97, "learning_rate": 3.922018348623853e-06, "loss": 0.9506, "step": 4810 }, { "epoch": 2.97, "learning_rate": 3.853211009174312e-06, "loss": 0.8638, "step": 4811 }, { "epoch": 2.97, "learning_rate": 3.7844036697247707e-06, "loss": 0.9749, "step": 4812 }, { "epoch": 2.97, "learning_rate": 3.715596330275229e-06, "loss": 0.9649, "step": 4813 }, { "epoch": 2.97, "learning_rate": 3.6467889908256878e-06, "loss": 0.8234, "step": 4814 }, { "epoch": 2.97, "learning_rate": 3.5779816513761465e-06, "loss": 0.931, "step": 4815 }, { "epoch": 2.97, "learning_rate": 3.5091743119266052e-06, "loss": 0.8197, "step": 4816 }, { "epoch": 2.97, "learning_rate": 3.440366972477064e-06, "loss": 0.8216, "step": 4817 }, { "epoch": 2.97, "learning_rate": 3.3715596330275227e-06, "loss": 0.8401, "step": 4818 }, { "epoch": 2.97, "learning_rate": 3.3027522935779814e-06, "loss": 0.8022, "step": 4819 }, { "epoch": 2.98, "learning_rate": 3.23394495412844e-06, "loss": 0.7684, "step": 4820 }, { "epoch": 2.98, "learning_rate": 3.165137614678899e-06, "loss": 0.9382, "step": 4821 }, { "epoch": 2.98, "learning_rate": 3.0963302752293576e-06, "loss": 0.8536, "step": 4822 }, { "epoch": 2.98, "learning_rate": 3.0275229357798163e-06, "loss": 0.7438, "step": 4823 }, { "epoch": 2.98, "learning_rate": 2.958715596330275e-06, "loss": 0.7, "step": 4824 }, { "epoch": 2.98, "learning_rate": 2.889908256880734e-06, "loss": 0.7312, "step": 4825 }, { "epoch": 2.98, "learning_rate": 2.8211009174311925e-06, "loss": 0.6984, "step": 4826 }, { "epoch": 2.98, "learning_rate": 2.7522935779816512e-06, "loss": 0.7788, "step": 4827 }, { "epoch": 2.98, "learning_rate": 2.68348623853211e-06, "loss": 0.6525, "step": 4828 }, { "epoch": 2.98, "learning_rate": 2.6146788990825687e-06, "loss": 0.7049, "step": 4829 }, { "epoch": 2.98, "learning_rate": 2.5458715596330274e-06, "loss": 0.6016, "step": 4830 }, { "epoch": 2.98, "learning_rate": 2.477064220183486e-06, "loss": 0.6061, "step": 4831 }, { "epoch": 2.98, "learning_rate": 2.408256880733945e-06, "loss": 0.6008, "step": 4832 }, { "epoch": 2.98, "learning_rate": 2.3394495412844036e-06, "loss": 0.506, "step": 4833 }, { "epoch": 2.98, "learning_rate": 2.2706422018348624e-06, "loss": 0.4979, "step": 4834 }, { "epoch": 2.98, "learning_rate": 2.201834862385321e-06, "loss": 0.4849, "step": 4835 }, { "epoch": 2.99, "learning_rate": 2.13302752293578e-06, "loss": 0.468, "step": 4836 }, { "epoch": 2.99, "learning_rate": 2.0642201834862385e-06, "loss": 0.5054, "step": 4837 }, { "epoch": 2.99, "learning_rate": 1.9954128440366973e-06, "loss": 0.5039, "step": 4838 }, { "epoch": 2.99, "learning_rate": 1.926605504587156e-06, "loss": 0.4429, "step": 4839 }, { "epoch": 2.99, "learning_rate": 1.8577981651376145e-06, "loss": 0.4599, "step": 4840 }, { "epoch": 2.99, "learning_rate": 1.7889908256880732e-06, "loss": 1.5099, "step": 4841 }, { "epoch": 2.99, "learning_rate": 1.720183486238532e-06, "loss": 1.2594, "step": 4842 }, { "epoch": 2.99, "learning_rate": 1.6513761467889907e-06, "loss": 1.1815, "step": 4843 }, { "epoch": 2.99, "learning_rate": 1.5825688073394494e-06, "loss": 1.0883, "step": 4844 }, { "epoch": 2.99, "learning_rate": 1.5137614678899082e-06, "loss": 1.1598, "step": 4845 }, { "epoch": 2.99, "learning_rate": 1.444954128440367e-06, "loss": 0.9655, "step": 4846 }, { "epoch": 2.99, "learning_rate": 1.3761467889908256e-06, "loss": 0.8867, "step": 4847 }, { "epoch": 2.99, "learning_rate": 1.3073394495412844e-06, "loss": 0.9597, "step": 4848 }, { "epoch": 2.99, "learning_rate": 1.238532110091743e-06, "loss": 1.0178, "step": 4849 }, { "epoch": 2.99, "learning_rate": 1.1697247706422018e-06, "loss": 0.8696, "step": 4850 }, { "epoch": 2.99, "learning_rate": 1.1009174311926605e-06, "loss": 0.906, "step": 4851 }, { "epoch": 2.99, "learning_rate": 1.0321100917431193e-06, "loss": 0.8132, "step": 4852 }, { "epoch": 3.0, "learning_rate": 9.63302752293578e-07, "loss": 0.8195, "step": 4853 }, { "epoch": 3.0, "learning_rate": 8.944954128440366e-07, "loss": 0.7609, "step": 4854 }, { "epoch": 3.0, "learning_rate": 8.256880733944954e-07, "loss": 0.7066, "step": 4855 }, { "epoch": 3.0, "learning_rate": 7.568807339449541e-07, "loss": 0.5607, "step": 4856 }, { "epoch": 3.0, "learning_rate": 6.880733944954128e-07, "loss": 0.6017, "step": 4857 }, { "epoch": 3.0, "learning_rate": 6.192660550458715e-07, "loss": 0.473, "step": 4858 }, { "epoch": 3.0, "learning_rate": 5.504587155963303e-07, "loss": 0.4602, "step": 4859 }, { "epoch": 3.0, "learning_rate": 4.81651376146789e-07, "loss": 0.4551, "step": 4860 }, { "epoch": 3.0, "step": 4860, "total_flos": 0.0, "train_loss": 2.012981654225308, "train_runtime": 68108.5497, "train_samples_per_second": 9.134, "train_steps_per_second": 0.071 } ], "max_steps": 4860, "num_train_epochs": 3, "total_flos": 0.0, "trial_name": null, "trial_params": null }