{ "best_metric": 0.8935389133627019, "best_model_checkpoint": "xtreme_s_xlsr_t5lephone-small_minds14.en-all/checkpoint-9400", "epoch": 149.99628252788105, "global_step": 10050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9999999999999996e-07, "loss": 2.6343, "step": 1 }, { "epoch": 0.03, "learning_rate": 3.9999999999999993e-07, "loss": 2.6511, "step": 2 }, { "epoch": 0.04, "learning_rate": 6e-07, "loss": 2.637, "step": 3 }, { "epoch": 0.06, "learning_rate": 7.999999999999999e-07, "loss": 2.6419, "step": 4 }, { "epoch": 0.07, "learning_rate": 1e-06, "loss": 2.6541, "step": 5 }, { "epoch": 0.09, "learning_rate": 1.2e-06, "loss": 2.6411, "step": 6 }, { "epoch": 0.1, "learning_rate": 1.4e-06, "loss": 2.6381, "step": 7 }, { "epoch": 0.12, "learning_rate": 1.5999999999999997e-06, "loss": 2.6385, "step": 8 }, { "epoch": 0.13, "learning_rate": 1.8e-06, "loss": 2.6384, "step": 9 }, { "epoch": 0.15, "learning_rate": 2e-06, "loss": 2.6368, "step": 10 }, { "epoch": 0.16, "learning_rate": 2.1999999999999997e-06, "loss": 2.6385, "step": 11 }, { "epoch": 0.18, "learning_rate": 2.4e-06, "loss": 2.6431, "step": 12 }, { "epoch": 0.19, "learning_rate": 2.5999999999999997e-06, "loss": 2.6428, "step": 13 }, { "epoch": 0.21, "learning_rate": 2.8e-06, "loss": 2.6378, "step": 14 }, { "epoch": 0.22, "learning_rate": 2.9999999999999997e-06, "loss": 2.6362, "step": 15 }, { "epoch": 0.24, "learning_rate": 3.1999999999999994e-06, "loss": 2.6277, "step": 16 }, { "epoch": 0.25, "learning_rate": 3.4e-06, "loss": 2.6533, "step": 17 }, { "epoch": 0.27, "learning_rate": 3.6e-06, "loss": 2.645, "step": 18 }, { "epoch": 0.28, "learning_rate": 3.7999999999999996e-06, "loss": 2.644, "step": 19 }, { "epoch": 0.3, "learning_rate": 4e-06, "loss": 2.6494, "step": 20 }, { "epoch": 0.31, "learning_rate": 4.2e-06, "loss": 2.6442, "step": 21 }, { "epoch": 0.33, "learning_rate": 4.399999999999999e-06, "loss": 2.638, "step": 22 }, { "epoch": 0.34, "learning_rate": 4.599999999999999e-06, "loss": 2.6362, "step": 23 }, { "epoch": 0.36, "learning_rate": 4.8e-06, "loss": 2.6443, "step": 24 }, { "epoch": 0.37, "learning_rate": 4.9999999999999996e-06, "loss": 2.6295, "step": 25 }, { "epoch": 0.39, "learning_rate": 5.199999999999999e-06, "loss": 2.6204, "step": 26 }, { "epoch": 0.4, "learning_rate": 5.399999999999999e-06, "loss": 2.6425, "step": 27 }, { "epoch": 0.42, "learning_rate": 5.6e-06, "loss": 2.6329, "step": 28 }, { "epoch": 0.43, "learning_rate": 5.7999999999999995e-06, "loss": 2.6295, "step": 29 }, { "epoch": 0.45, "learning_rate": 5.999999999999999e-06, "loss": 2.6281, "step": 30 }, { "epoch": 0.46, "learning_rate": 6.199999999999999e-06, "loss": 2.6481, "step": 31 }, { "epoch": 0.48, "learning_rate": 6.399999999999999e-06, "loss": 2.6351, "step": 32 }, { "epoch": 0.49, "learning_rate": 6.599999999999999e-06, "loss": 2.6362, "step": 33 }, { "epoch": 0.51, "learning_rate": 6.8e-06, "loss": 2.6711, "step": 34 }, { "epoch": 0.52, "learning_rate": 7e-06, "loss": 2.6709, "step": 35 }, { "epoch": 0.54, "learning_rate": 7.2e-06, "loss": 2.6567, "step": 36 }, { "epoch": 0.55, "learning_rate": 7.3999999999999995e-06, "loss": 2.6572, "step": 37 }, { "epoch": 0.57, "learning_rate": 7.599999999999999e-06, "loss": 2.6388, "step": 38 }, { "epoch": 0.58, "learning_rate": 7.799999999999998e-06, "loss": 2.6439, "step": 39 }, { "epoch": 0.59, "learning_rate": 8e-06, "loss": 2.6409, "step": 40 }, { "epoch": 0.61, "learning_rate": 8.2e-06, "loss": 2.6417, "step": 41 }, { "epoch": 0.62, "learning_rate": 8.4e-06, "loss": 2.6226, "step": 42 }, { "epoch": 0.64, "learning_rate": 8.599999999999999e-06, "loss": 2.6615, "step": 43 }, { "epoch": 0.65, "learning_rate": 8.799999999999999e-06, "loss": 2.6323, "step": 44 }, { "epoch": 0.67, "learning_rate": 8.999999999999999e-06, "loss": 2.6342, "step": 45 }, { "epoch": 0.68, "learning_rate": 9.199999999999998e-06, "loss": 2.6494, "step": 46 }, { "epoch": 0.7, "learning_rate": 9.399999999999998e-06, "loss": 2.6318, "step": 47 }, { "epoch": 0.71, "learning_rate": 9.6e-06, "loss": 2.6485, "step": 48 }, { "epoch": 0.73, "learning_rate": 9.799999999999998e-06, "loss": 2.6277, "step": 49 }, { "epoch": 0.74, "learning_rate": 9.999999999999999e-06, "loss": 2.644, "step": 50 }, { "epoch": 0.76, "learning_rate": 1.02e-05, "loss": 2.6556, "step": 51 }, { "epoch": 0.77, "learning_rate": 1.0399999999999999e-05, "loss": 2.6486, "step": 52 }, { "epoch": 0.79, "learning_rate": 1.06e-05, "loss": 2.632, "step": 53 }, { "epoch": 0.8, "learning_rate": 1.0799999999999998e-05, "loss": 2.6526, "step": 54 }, { "epoch": 0.82, "learning_rate": 1.1e-05, "loss": 2.6667, "step": 55 }, { "epoch": 0.83, "learning_rate": 1.12e-05, "loss": 2.6432, "step": 56 }, { "epoch": 0.85, "learning_rate": 1.14e-05, "loss": 2.6647, "step": 57 }, { "epoch": 0.86, "learning_rate": 1.1599999999999999e-05, "loss": 2.6317, "step": 58 }, { "epoch": 0.88, "learning_rate": 1.1799999999999999e-05, "loss": 2.6349, "step": 59 }, { "epoch": 0.89, "learning_rate": 1.1999999999999999e-05, "loss": 2.6423, "step": 60 }, { "epoch": 0.91, "learning_rate": 1.2199999999999998e-05, "loss": 2.6268, "step": 61 }, { "epoch": 0.92, "learning_rate": 1.2399999999999998e-05, "loss": 2.6271, "step": 62 }, { "epoch": 0.94, "learning_rate": 1.26e-05, "loss": 2.603, "step": 63 }, { "epoch": 0.95, "learning_rate": 1.2799999999999998e-05, "loss": 2.5958, "step": 64 }, { "epoch": 0.97, "learning_rate": 1.3e-05, "loss": 2.6169, "step": 65 }, { "epoch": 0.98, "learning_rate": 1.3199999999999997e-05, "loss": 2.5735, "step": 66 }, { "epoch": 1.0, "learning_rate": 1.3399999999999999e-05, "loss": 2.6467, "step": 67 }, { "epoch": 1.01, "learning_rate": 1.36e-05, "loss": 3.3148, "step": 68 }, { "epoch": 1.03, "learning_rate": 1.3799999999999998e-05, "loss": 2.6297, "step": 69 }, { "epoch": 1.04, "learning_rate": 1.4e-05, "loss": 2.6577, "step": 70 }, { "epoch": 1.06, "learning_rate": 1.4199999999999998e-05, "loss": 2.6311, "step": 71 }, { "epoch": 1.07, "learning_rate": 1.44e-05, "loss": 2.616, "step": 72 }, { "epoch": 1.09, "learning_rate": 1.4599999999999997e-05, "loss": 2.6987, "step": 73 }, { "epoch": 1.1, "learning_rate": 1.4799999999999999e-05, "loss": 2.6268, "step": 74 }, { "epoch": 1.12, "learning_rate": 1.4999999999999999e-05, "loss": 2.565, "step": 75 }, { "epoch": 1.13, "learning_rate": 1.5199999999999998e-05, "loss": 2.6689, "step": 76 }, { "epoch": 1.15, "learning_rate": 1.5399999999999998e-05, "loss": 2.6792, "step": 77 }, { "epoch": 1.16, "learning_rate": 1.5599999999999996e-05, "loss": 2.5829, "step": 78 }, { "epoch": 1.18, "learning_rate": 1.5799999999999998e-05, "loss": 2.6033, "step": 79 }, { "epoch": 1.19, "learning_rate": 1.6e-05, "loss": 2.5901, "step": 80 }, { "epoch": 1.21, "learning_rate": 1.6199999999999997e-05, "loss": 2.5608, "step": 81 }, { "epoch": 1.22, "learning_rate": 1.64e-05, "loss": 2.5485, "step": 82 }, { "epoch": 1.24, "learning_rate": 1.6599999999999997e-05, "loss": 2.5484, "step": 83 }, { "epoch": 1.25, "learning_rate": 1.68e-05, "loss": 2.602, "step": 84 }, { "epoch": 1.27, "learning_rate": 1.6999999999999996e-05, "loss": 2.6658, "step": 85 }, { "epoch": 1.28, "learning_rate": 1.7199999999999998e-05, "loss": 2.6671, "step": 86 }, { "epoch": 1.3, "learning_rate": 1.74e-05, "loss": 2.676, "step": 87 }, { "epoch": 1.31, "learning_rate": 1.7599999999999998e-05, "loss": 2.6539, "step": 88 }, { "epoch": 1.33, "learning_rate": 1.78e-05, "loss": 2.6765, "step": 89 }, { "epoch": 1.34, "learning_rate": 1.7999999999999997e-05, "loss": 2.6133, "step": 90 }, { "epoch": 1.36, "learning_rate": 1.82e-05, "loss": 2.639, "step": 91 }, { "epoch": 1.37, "learning_rate": 1.8399999999999997e-05, "loss": 2.6448, "step": 92 }, { "epoch": 1.39, "learning_rate": 1.8599999999999998e-05, "loss": 2.6431, "step": 93 }, { "epoch": 1.4, "learning_rate": 1.8799999999999996e-05, "loss": 2.6401, "step": 94 }, { "epoch": 1.42, "learning_rate": 1.9e-05, "loss": 2.6407, "step": 95 }, { "epoch": 1.43, "learning_rate": 1.92e-05, "loss": 2.617, "step": 96 }, { "epoch": 1.45, "learning_rate": 1.9399999999999997e-05, "loss": 2.6529, "step": 97 }, { "epoch": 1.46, "learning_rate": 1.9599999999999995e-05, "loss": 2.6185, "step": 98 }, { "epoch": 1.48, "learning_rate": 1.98e-05, "loss": 2.6339, "step": 99 }, { "epoch": 1.49, "learning_rate": 1.9999999999999998e-05, "loss": 2.6058, "step": 100 }, { "epoch": 1.51, "learning_rate": 2.0199999999999996e-05, "loss": 2.6031, "step": 101 }, { "epoch": 1.52, "learning_rate": 2.04e-05, "loss": 2.6588, "step": 102 }, { "epoch": 1.54, "learning_rate": 2.06e-05, "loss": 2.6377, "step": 103 }, { "epoch": 1.55, "learning_rate": 2.0799999999999997e-05, "loss": 2.6863, "step": 104 }, { "epoch": 1.57, "learning_rate": 2.1e-05, "loss": 2.6617, "step": 105 }, { "epoch": 1.58, "learning_rate": 2.12e-05, "loss": 2.6473, "step": 106 }, { "epoch": 1.59, "learning_rate": 2.14e-05, "loss": 2.6711, "step": 107 }, { "epoch": 1.61, "learning_rate": 2.1599999999999996e-05, "loss": 2.6132, "step": 108 }, { "epoch": 1.62, "learning_rate": 2.1799999999999998e-05, "loss": 2.6237, "step": 109 }, { "epoch": 1.64, "learning_rate": 2.2e-05, "loss": 2.645, "step": 110 }, { "epoch": 1.65, "learning_rate": 2.2199999999999998e-05, "loss": 2.6069, "step": 111 }, { "epoch": 1.67, "learning_rate": 2.24e-05, "loss": 2.642, "step": 112 }, { "epoch": 1.68, "learning_rate": 2.2599999999999997e-05, "loss": 2.6126, "step": 113 }, { "epoch": 1.7, "learning_rate": 2.28e-05, "loss": 2.6048, "step": 114 }, { "epoch": 1.71, "learning_rate": 2.2999999999999997e-05, "loss": 2.6229, "step": 115 }, { "epoch": 1.73, "learning_rate": 2.3199999999999998e-05, "loss": 2.5935, "step": 116 }, { "epoch": 1.74, "learning_rate": 2.34e-05, "loss": 2.5616, "step": 117 }, { "epoch": 1.76, "learning_rate": 2.3599999999999998e-05, "loss": 2.6095, "step": 118 }, { "epoch": 1.77, "learning_rate": 2.38e-05, "loss": 2.5821, "step": 119 }, { "epoch": 1.79, "learning_rate": 2.3999999999999997e-05, "loss": 2.6188, "step": 120 }, { "epoch": 1.8, "learning_rate": 2.42e-05, "loss": 2.6443, "step": 121 }, { "epoch": 1.82, "learning_rate": 2.4399999999999997e-05, "loss": 2.6284, "step": 122 }, { "epoch": 1.83, "learning_rate": 2.4599999999999998e-05, "loss": 2.6273, "step": 123 }, { "epoch": 1.85, "learning_rate": 2.4799999999999996e-05, "loss": 2.6608, "step": 124 }, { "epoch": 1.86, "learning_rate": 2.4999999999999998e-05, "loss": 2.6192, "step": 125 }, { "epoch": 1.88, "learning_rate": 2.52e-05, "loss": 2.6312, "step": 126 }, { "epoch": 1.89, "learning_rate": 2.5399999999999997e-05, "loss": 2.6786, "step": 127 }, { "epoch": 1.91, "learning_rate": 2.5599999999999995e-05, "loss": 2.6266, "step": 128 }, { "epoch": 1.92, "learning_rate": 2.5799999999999997e-05, "loss": 2.6833, "step": 129 }, { "epoch": 1.94, "learning_rate": 2.6e-05, "loss": 2.6481, "step": 130 }, { "epoch": 1.95, "learning_rate": 2.6199999999999996e-05, "loss": 2.5106, "step": 131 }, { "epoch": 1.97, "learning_rate": 2.6399999999999995e-05, "loss": 2.5399, "step": 132 }, { "epoch": 1.98, "learning_rate": 2.66e-05, "loss": 2.5729, "step": 133 }, { "epoch": 2.0, "learning_rate": 2.6799999999999998e-05, "loss": 2.6409, "step": 134 }, { "epoch": 2.01, "learning_rate": 2.6999999999999996e-05, "loss": 3.1924, "step": 135 }, { "epoch": 2.03, "learning_rate": 2.72e-05, "loss": 2.5826, "step": 136 }, { "epoch": 2.04, "learning_rate": 2.74e-05, "loss": 2.6108, "step": 137 }, { "epoch": 2.06, "learning_rate": 2.7599999999999997e-05, "loss": 2.5618, "step": 138 }, { "epoch": 2.07, "learning_rate": 2.7799999999999995e-05, "loss": 2.6187, "step": 139 }, { "epoch": 2.09, "learning_rate": 2.8e-05, "loss": 2.5749, "step": 140 }, { "epoch": 2.1, "learning_rate": 2.8199999999999998e-05, "loss": 2.5772, "step": 141 }, { "epoch": 2.12, "learning_rate": 2.8399999999999996e-05, "loss": 2.6725, "step": 142 }, { "epoch": 2.13, "learning_rate": 2.86e-05, "loss": 2.5364, "step": 143 }, { "epoch": 2.15, "learning_rate": 2.88e-05, "loss": 2.6227, "step": 144 }, { "epoch": 2.16, "learning_rate": 2.8999999999999997e-05, "loss": 2.6355, "step": 145 }, { "epoch": 2.18, "learning_rate": 2.9199999999999995e-05, "loss": 2.4874, "step": 146 }, { "epoch": 2.19, "learning_rate": 2.94e-05, "loss": 2.5832, "step": 147 }, { "epoch": 2.21, "learning_rate": 2.9599999999999998e-05, "loss": 2.5628, "step": 148 }, { "epoch": 2.22, "learning_rate": 2.9799999999999996e-05, "loss": 2.5375, "step": 149 }, { "epoch": 2.24, "learning_rate": 2.9999999999999997e-05, "loss": 2.5401, "step": 150 }, { "epoch": 2.25, "learning_rate": 3.02e-05, "loss": 2.6058, "step": 151 }, { "epoch": 2.27, "learning_rate": 3.0399999999999997e-05, "loss": 2.6267, "step": 152 }, { "epoch": 2.28, "learning_rate": 3.06e-05, "loss": 2.5839, "step": 153 }, { "epoch": 2.3, "learning_rate": 3.0799999999999996e-05, "loss": 2.6529, "step": 154 }, { "epoch": 2.31, "learning_rate": 3.0999999999999995e-05, "loss": 2.6135, "step": 155 }, { "epoch": 2.33, "learning_rate": 3.119999999999999e-05, "loss": 2.6257, "step": 156 }, { "epoch": 2.34, "learning_rate": 3.14e-05, "loss": 2.7242, "step": 157 }, { "epoch": 2.36, "learning_rate": 3.1599999999999996e-05, "loss": 2.6371, "step": 158 }, { "epoch": 2.37, "learning_rate": 3.1799999999999994e-05, "loss": 2.5878, "step": 159 }, { "epoch": 2.39, "learning_rate": 3.2e-05, "loss": 2.6008, "step": 160 }, { "epoch": 2.4, "learning_rate": 3.22e-05, "loss": 2.5713, "step": 161 }, { "epoch": 2.42, "learning_rate": 3.2399999999999995e-05, "loss": 2.6273, "step": 162 }, { "epoch": 2.43, "learning_rate": 3.259999999999999e-05, "loss": 2.5838, "step": 163 }, { "epoch": 2.45, "learning_rate": 3.28e-05, "loss": 2.5846, "step": 164 }, { "epoch": 2.46, "learning_rate": 3.2999999999999996e-05, "loss": 2.562, "step": 165 }, { "epoch": 2.48, "learning_rate": 3.3199999999999994e-05, "loss": 2.5598, "step": 166 }, { "epoch": 2.49, "learning_rate": 3.34e-05, "loss": 2.4896, "step": 167 }, { "epoch": 2.51, "learning_rate": 3.36e-05, "loss": 2.5917, "step": 168 }, { "epoch": 2.52, "learning_rate": 3.3799999999999995e-05, "loss": 2.6609, "step": 169 }, { "epoch": 2.54, "learning_rate": 3.399999999999999e-05, "loss": 2.653, "step": 170 }, { "epoch": 2.55, "learning_rate": 3.42e-05, "loss": 2.6373, "step": 171 }, { "epoch": 2.57, "learning_rate": 3.4399999999999996e-05, "loss": 2.6079, "step": 172 }, { "epoch": 2.58, "learning_rate": 3.4599999999999994e-05, "loss": 2.616, "step": 173 }, { "epoch": 2.59, "learning_rate": 3.48e-05, "loss": 2.6555, "step": 174 }, { "epoch": 2.61, "learning_rate": 3.5e-05, "loss": 2.6044, "step": 175 }, { "epoch": 2.62, "learning_rate": 3.5199999999999995e-05, "loss": 2.6301, "step": 176 }, { "epoch": 2.64, "learning_rate": 3.539999999999999e-05, "loss": 2.6324, "step": 177 }, { "epoch": 2.65, "learning_rate": 3.56e-05, "loss": 2.5571, "step": 178 }, { "epoch": 2.67, "learning_rate": 3.5799999999999996e-05, "loss": 2.5637, "step": 179 }, { "epoch": 2.68, "learning_rate": 3.5999999999999994e-05, "loss": 2.4853, "step": 180 }, { "epoch": 2.7, "learning_rate": 3.62e-05, "loss": 2.5275, "step": 181 }, { "epoch": 2.71, "learning_rate": 3.64e-05, "loss": 2.4241, "step": 182 }, { "epoch": 2.73, "learning_rate": 3.6599999999999995e-05, "loss": 2.4874, "step": 183 }, { "epoch": 2.74, "learning_rate": 3.679999999999999e-05, "loss": 2.6265, "step": 184 }, { "epoch": 2.76, "learning_rate": 3.7e-05, "loss": 2.5432, "step": 185 }, { "epoch": 2.77, "learning_rate": 3.7199999999999996e-05, "loss": 2.6158, "step": 186 }, { "epoch": 2.79, "learning_rate": 3.7399999999999994e-05, "loss": 2.5841, "step": 187 }, { "epoch": 2.8, "learning_rate": 3.759999999999999e-05, "loss": 2.5802, "step": 188 }, { "epoch": 2.82, "learning_rate": 3.78e-05, "loss": 2.5986, "step": 189 }, { "epoch": 2.83, "learning_rate": 3.8e-05, "loss": 2.499, "step": 190 }, { "epoch": 2.85, "learning_rate": 3.8199999999999993e-05, "loss": 2.6219, "step": 191 }, { "epoch": 2.86, "learning_rate": 3.84e-05, "loss": 2.6017, "step": 192 }, { "epoch": 2.88, "learning_rate": 3.86e-05, "loss": 2.5413, "step": 193 }, { "epoch": 2.89, "learning_rate": 3.8799999999999994e-05, "loss": 2.5604, "step": 194 }, { "epoch": 2.91, "learning_rate": 3.9e-05, "loss": 2.5409, "step": 195 }, { "epoch": 2.92, "learning_rate": 3.919999999999999e-05, "loss": 2.5019, "step": 196 }, { "epoch": 2.94, "learning_rate": 3.9399999999999995e-05, "loss": 2.5966, "step": 197 }, { "epoch": 2.95, "learning_rate": 3.96e-05, "loss": 2.5088, "step": 198 }, { "epoch": 2.97, "learning_rate": 3.979999999999999e-05, "loss": 2.5385, "step": 199 }, { "epoch": 2.98, "learning_rate": 3.9999999999999996e-05, "loss": 2.3561, "step": 200 }, { "epoch": 2.98, "eval_accuracy": 0.13338228095937346, "eval_f1": 0.06813158475964522, "eval_loss": 2.5464296340942383, "eval_runtime": 349.0, "eval_samples_per_second": 11.708, "eval_steps_per_second": 0.734, "step": 200 }, { "epoch": 3.0, "learning_rate": 4.02e-05, "loss": 2.4665, "step": 201 }, { "epoch": 3.01, "learning_rate": 4.039999999999999e-05, "loss": 3.2467, "step": 202 }, { "epoch": 3.03, "learning_rate": 4.06e-05, "loss": 2.5004, "step": 203 }, { "epoch": 3.04, "learning_rate": 4.08e-05, "loss": 2.4464, "step": 204 }, { "epoch": 3.06, "learning_rate": 4.0999999999999994e-05, "loss": 2.5131, "step": 205 }, { "epoch": 3.07, "learning_rate": 4.12e-05, "loss": 2.6149, "step": 206 }, { "epoch": 3.09, "learning_rate": 4.14e-05, "loss": 2.4677, "step": 207 }, { "epoch": 3.1, "learning_rate": 4.1599999999999995e-05, "loss": 2.5008, "step": 208 }, { "epoch": 3.12, "learning_rate": 4.18e-05, "loss": 2.5037, "step": 209 }, { "epoch": 3.13, "learning_rate": 4.2e-05, "loss": 2.4629, "step": 210 }, { "epoch": 3.15, "learning_rate": 4.2199999999999996e-05, "loss": 2.4151, "step": 211 }, { "epoch": 3.16, "learning_rate": 4.24e-05, "loss": 2.3735, "step": 212 }, { "epoch": 3.18, "learning_rate": 4.259999999999999e-05, "loss": 2.4441, "step": 213 }, { "epoch": 3.19, "learning_rate": 4.28e-05, "loss": 2.432, "step": 214 }, { "epoch": 3.21, "learning_rate": 4.3e-05, "loss": 2.4883, "step": 215 }, { "epoch": 3.22, "learning_rate": 4.319999999999999e-05, "loss": 2.4319, "step": 216 }, { "epoch": 3.24, "learning_rate": 4.34e-05, "loss": 2.4026, "step": 217 }, { "epoch": 3.25, "learning_rate": 4.3599999999999996e-05, "loss": 2.357, "step": 218 }, { "epoch": 3.27, "learning_rate": 4.3799999999999994e-05, "loss": 2.5772, "step": 219 }, { "epoch": 3.28, "learning_rate": 4.4e-05, "loss": 2.3947, "step": 220 }, { "epoch": 3.3, "learning_rate": 4.42e-05, "loss": 2.4569, "step": 221 }, { "epoch": 3.31, "learning_rate": 4.4399999999999995e-05, "loss": 2.3699, "step": 222 }, { "epoch": 3.33, "learning_rate": 4.46e-05, "loss": 2.7355, "step": 223 }, { "epoch": 3.34, "learning_rate": 4.48e-05, "loss": 2.7759, "step": 224 }, { "epoch": 3.36, "learning_rate": 4.4999999999999996e-05, "loss": 2.6568, "step": 225 }, { "epoch": 3.37, "learning_rate": 4.5199999999999994e-05, "loss": 2.5626, "step": 226 }, { "epoch": 3.39, "learning_rate": 4.539999999999999e-05, "loss": 2.282, "step": 227 }, { "epoch": 3.4, "learning_rate": 4.56e-05, "loss": 2.3982, "step": 228 }, { "epoch": 3.42, "learning_rate": 4.5799999999999995e-05, "loss": 2.4393, "step": 229 }, { "epoch": 3.43, "learning_rate": 4.599999999999999e-05, "loss": 2.5938, "step": 230 }, { "epoch": 3.45, "learning_rate": 4.62e-05, "loss": 2.6461, "step": 231 }, { "epoch": 3.46, "learning_rate": 4.6399999999999996e-05, "loss": 2.327, "step": 232 }, { "epoch": 3.48, "learning_rate": 4.6599999999999994e-05, "loss": 2.3971, "step": 233 }, { "epoch": 3.49, "learning_rate": 4.68e-05, "loss": 2.515, "step": 234 }, { "epoch": 3.51, "learning_rate": 4.7e-05, "loss": 2.505, "step": 235 }, { "epoch": 3.52, "learning_rate": 4.7199999999999995e-05, "loss": 2.5023, "step": 236 }, { "epoch": 3.54, "learning_rate": 4.7399999999999993e-05, "loss": 2.4241, "step": 237 }, { "epoch": 3.55, "learning_rate": 4.76e-05, "loss": 2.4735, "step": 238 }, { "epoch": 3.57, "learning_rate": 4.7799999999999996e-05, "loss": 2.503, "step": 239 }, { "epoch": 3.58, "learning_rate": 4.7999999999999994e-05, "loss": 2.468, "step": 240 }, { "epoch": 3.59, "learning_rate": 4.82e-05, "loss": 2.4701, "step": 241 }, { "epoch": 3.61, "learning_rate": 4.84e-05, "loss": 2.4992, "step": 242 }, { "epoch": 3.62, "learning_rate": 4.8599999999999995e-05, "loss": 2.557, "step": 243 }, { "epoch": 3.64, "learning_rate": 4.8799999999999994e-05, "loss": 2.4359, "step": 244 }, { "epoch": 3.65, "learning_rate": 4.899999999999999e-05, "loss": 2.508, "step": 245 }, { "epoch": 3.67, "learning_rate": 4.9199999999999997e-05, "loss": 2.4404, "step": 246 }, { "epoch": 3.68, "learning_rate": 4.9399999999999995e-05, "loss": 2.2592, "step": 247 }, { "epoch": 3.7, "learning_rate": 4.959999999999999e-05, "loss": 2.4594, "step": 248 }, { "epoch": 3.71, "learning_rate": 4.98e-05, "loss": 2.3305, "step": 249 }, { "epoch": 3.73, "learning_rate": 4.9999999999999996e-05, "loss": 2.3883, "step": 250 }, { "epoch": 3.74, "learning_rate": 5.0199999999999994e-05, "loss": 2.2801, "step": 251 }, { "epoch": 3.76, "learning_rate": 5.04e-05, "loss": 2.4757, "step": 252 }, { "epoch": 3.77, "learning_rate": 5.06e-05, "loss": 2.4579, "step": 253 }, { "epoch": 3.79, "learning_rate": 5.0799999999999995e-05, "loss": 2.3734, "step": 254 }, { "epoch": 3.8, "learning_rate": 5.1e-05, "loss": 2.3411, "step": 255 }, { "epoch": 3.82, "learning_rate": 5.119999999999999e-05, "loss": 2.3687, "step": 256 }, { "epoch": 3.83, "learning_rate": 5.1399999999999996e-05, "loss": 2.4481, "step": 257 }, { "epoch": 3.85, "learning_rate": 5.1599999999999994e-05, "loss": 2.471, "step": 258 }, { "epoch": 3.86, "learning_rate": 5.179999999999999e-05, "loss": 2.4718, "step": 259 }, { "epoch": 3.88, "learning_rate": 5.2e-05, "loss": 2.4932, "step": 260 }, { "epoch": 3.89, "learning_rate": 5.2199999999999995e-05, "loss": 2.3567, "step": 261 }, { "epoch": 3.91, "learning_rate": 5.239999999999999e-05, "loss": 2.3381, "step": 262 }, { "epoch": 3.92, "learning_rate": 5.26e-05, "loss": 2.3708, "step": 263 }, { "epoch": 3.94, "learning_rate": 5.279999999999999e-05, "loss": 2.4944, "step": 264 }, { "epoch": 3.95, "learning_rate": 5.2999999999999994e-05, "loss": 2.3488, "step": 265 }, { "epoch": 3.97, "learning_rate": 5.32e-05, "loss": 2.4171, "step": 266 }, { "epoch": 3.98, "learning_rate": 5.339999999999999e-05, "loss": 2.3411, "step": 267 }, { "epoch": 4.0, "learning_rate": 5.3599999999999995e-05, "loss": 2.3492, "step": 268 }, { "epoch": 4.01, "learning_rate": 5.38e-05, "loss": 3.0346, "step": 269 }, { "epoch": 4.03, "learning_rate": 5.399999999999999e-05, "loss": 2.2454, "step": 270 }, { "epoch": 4.04, "learning_rate": 5.4199999999999996e-05, "loss": 2.2054, "step": 271 }, { "epoch": 4.06, "learning_rate": 5.44e-05, "loss": 2.2441, "step": 272 }, { "epoch": 4.07, "learning_rate": 5.459999999999999e-05, "loss": 2.3293, "step": 273 }, { "epoch": 4.09, "learning_rate": 5.48e-05, "loss": 2.2064, "step": 274 }, { "epoch": 4.1, "learning_rate": 5.499999999999999e-05, "loss": 2.3777, "step": 275 }, { "epoch": 4.12, "learning_rate": 5.519999999999999e-05, "loss": 2.1567, "step": 276 }, { "epoch": 4.13, "learning_rate": 5.54e-05, "loss": 2.1665, "step": 277 }, { "epoch": 4.15, "learning_rate": 5.559999999999999e-05, "loss": 2.249, "step": 278 }, { "epoch": 4.16, "learning_rate": 5.5799999999999994e-05, "loss": 2.1989, "step": 279 }, { "epoch": 4.18, "learning_rate": 5.6e-05, "loss": 2.2401, "step": 280 }, { "epoch": 4.19, "learning_rate": 5.619999999999999e-05, "loss": 2.1298, "step": 281 }, { "epoch": 4.21, "learning_rate": 5.6399999999999995e-05, "loss": 2.2022, "step": 282 }, { "epoch": 4.22, "learning_rate": 5.66e-05, "loss": 2.0053, "step": 283 }, { "epoch": 4.24, "learning_rate": 5.679999999999999e-05, "loss": 2.1011, "step": 284 }, { "epoch": 4.25, "learning_rate": 5.6999999999999996e-05, "loss": 2.0889, "step": 285 }, { "epoch": 4.27, "learning_rate": 5.72e-05, "loss": 2.2418, "step": 286 }, { "epoch": 4.28, "learning_rate": 5.739999999999999e-05, "loss": 2.1132, "step": 287 }, { "epoch": 4.3, "learning_rate": 5.76e-05, "loss": 2.2312, "step": 288 }, { "epoch": 4.31, "learning_rate": 5.78e-05, "loss": 1.9977, "step": 289 }, { "epoch": 4.33, "learning_rate": 5.7999999999999994e-05, "loss": 2.1724, "step": 290 }, { "epoch": 4.34, "learning_rate": 5.82e-05, "loss": 2.1606, "step": 291 }, { "epoch": 4.36, "learning_rate": 5.839999999999999e-05, "loss": 2.1853, "step": 292 }, { "epoch": 4.37, "learning_rate": 5.8599999999999995e-05, "loss": 2.1057, "step": 293 }, { "epoch": 4.39, "learning_rate": 5.88e-05, "loss": 2.0778, "step": 294 }, { "epoch": 4.4, "learning_rate": 5.899999999999999e-05, "loss": 1.967, "step": 295 }, { "epoch": 4.42, "learning_rate": 5.9199999999999996e-05, "loss": 2.3167, "step": 296 }, { "epoch": 4.43, "learning_rate": 5.94e-05, "loss": 2.0744, "step": 297 }, { "epoch": 4.45, "learning_rate": 5.959999999999999e-05, "loss": 2.1545, "step": 298 }, { "epoch": 4.46, "learning_rate": 5.98e-05, "loss": 2.2421, "step": 299 }, { "epoch": 4.48, "learning_rate": 5.9999999999999995e-05, "loss": 1.9392, "step": 300 }, { "epoch": 4.49, "learning_rate": 6.019999999999999e-05, "loss": 1.8914, "step": 301 }, { "epoch": 4.51, "learning_rate": 6.04e-05, "loss": 2.2063, "step": 302 }, { "epoch": 4.52, "learning_rate": 6.0599999999999996e-05, "loss": 2.0361, "step": 303 }, { "epoch": 4.54, "learning_rate": 6.0799999999999994e-05, "loss": 2.1857, "step": 304 }, { "epoch": 4.55, "learning_rate": 6.1e-05, "loss": 1.8373, "step": 305 }, { "epoch": 4.57, "learning_rate": 6.12e-05, "loss": 1.9555, "step": 306 }, { "epoch": 4.58, "learning_rate": 6.139999999999999e-05, "loss": 2.0524, "step": 307 }, { "epoch": 4.59, "learning_rate": 6.159999999999999e-05, "loss": 1.9854, "step": 308 }, { "epoch": 4.61, "learning_rate": 6.18e-05, "loss": 1.9668, "step": 309 }, { "epoch": 4.62, "learning_rate": 6.199999999999999e-05, "loss": 1.8757, "step": 310 }, { "epoch": 4.64, "learning_rate": 6.22e-05, "loss": 1.7739, "step": 311 }, { "epoch": 4.65, "learning_rate": 6.239999999999999e-05, "loss": 1.9759, "step": 312 }, { "epoch": 4.67, "learning_rate": 6.259999999999999e-05, "loss": 1.8669, "step": 313 }, { "epoch": 4.68, "learning_rate": 6.28e-05, "loss": 1.9185, "step": 314 }, { "epoch": 4.7, "learning_rate": 6.299999999999999e-05, "loss": 1.8514, "step": 315 }, { "epoch": 4.71, "learning_rate": 6.319999999999999e-05, "loss": 1.5974, "step": 316 }, { "epoch": 4.73, "learning_rate": 6.34e-05, "loss": 1.6711, "step": 317 }, { "epoch": 4.74, "learning_rate": 6.359999999999999e-05, "loss": 1.8531, "step": 318 }, { "epoch": 4.76, "learning_rate": 6.379999999999999e-05, "loss": 2.0521, "step": 319 }, { "epoch": 4.77, "learning_rate": 6.4e-05, "loss": 1.7659, "step": 320 }, { "epoch": 4.79, "learning_rate": 6.419999999999999e-05, "loss": 1.9653, "step": 321 }, { "epoch": 4.8, "learning_rate": 6.44e-05, "loss": 1.9988, "step": 322 }, { "epoch": 4.82, "learning_rate": 6.459999999999998e-05, "loss": 2.0249, "step": 323 }, { "epoch": 4.83, "learning_rate": 6.479999999999999e-05, "loss": 1.783, "step": 324 }, { "epoch": 4.85, "learning_rate": 6.5e-05, "loss": 2.1492, "step": 325 }, { "epoch": 4.86, "learning_rate": 6.519999999999999e-05, "loss": 1.7947, "step": 326 }, { "epoch": 4.88, "learning_rate": 6.539999999999999e-05, "loss": 1.9124, "step": 327 }, { "epoch": 4.89, "learning_rate": 6.56e-05, "loss": 1.7927, "step": 328 }, { "epoch": 4.91, "learning_rate": 6.579999999999999e-05, "loss": 1.7357, "step": 329 }, { "epoch": 4.92, "learning_rate": 6.599999999999999e-05, "loss": 1.733, "step": 330 }, { "epoch": 4.94, "learning_rate": 6.62e-05, "loss": 2.0315, "step": 331 }, { "epoch": 4.95, "learning_rate": 6.639999999999999e-05, "loss": 1.9838, "step": 332 }, { "epoch": 4.97, "learning_rate": 6.659999999999999e-05, "loss": 1.9308, "step": 333 }, { "epoch": 4.98, "learning_rate": 6.68e-05, "loss": 1.8901, "step": 334 }, { "epoch": 5.0, "learning_rate": 6.699999999999999e-05, "loss": 1.9154, "step": 335 }, { "epoch": 5.01, "learning_rate": 6.72e-05, "loss": 2.1272, "step": 336 }, { "epoch": 5.03, "learning_rate": 6.739999999999998e-05, "loss": 1.7548, "step": 337 }, { "epoch": 5.04, "learning_rate": 6.759999999999999e-05, "loss": 1.6528, "step": 338 }, { "epoch": 5.06, "learning_rate": 6.78e-05, "loss": 1.7716, "step": 339 }, { "epoch": 5.07, "learning_rate": 6.799999999999999e-05, "loss": 1.4649, "step": 340 }, { "epoch": 5.09, "learning_rate": 6.819999999999999e-05, "loss": 1.6351, "step": 341 }, { "epoch": 5.1, "learning_rate": 6.84e-05, "loss": 1.534, "step": 342 }, { "epoch": 5.12, "learning_rate": 6.859999999999999e-05, "loss": 1.6599, "step": 343 }, { "epoch": 5.13, "learning_rate": 6.879999999999999e-05, "loss": 1.6582, "step": 344 }, { "epoch": 5.15, "learning_rate": 6.9e-05, "loss": 1.8148, "step": 345 }, { "epoch": 5.16, "learning_rate": 6.919999999999999e-05, "loss": 1.3586, "step": 346 }, { "epoch": 5.18, "learning_rate": 6.939999999999999e-05, "loss": 1.8631, "step": 347 }, { "epoch": 5.19, "learning_rate": 6.96e-05, "loss": 1.785, "step": 348 }, { "epoch": 5.21, "learning_rate": 6.979999999999999e-05, "loss": 1.913, "step": 349 }, { "epoch": 5.22, "learning_rate": 6.979999999999999e-05, "loss": 1.6482, "step": 350 }, { "epoch": 5.24, "learning_rate": 7e-05, "loss": 1.5916, "step": 351 }, { "epoch": 5.25, "learning_rate": 7.02e-05, "loss": 1.6887, "step": 352 }, { "epoch": 5.27, "learning_rate": 7.039999999999999e-05, "loss": 1.6632, "step": 353 }, { "epoch": 5.28, "learning_rate": 7.06e-05, "loss": 1.4964, "step": 354 }, { "epoch": 5.3, "learning_rate": 7.079999999999999e-05, "loss": 1.8404, "step": 355 }, { "epoch": 5.31, "learning_rate": 7.099999999999999e-05, "loss": 1.6802, "step": 356 }, { "epoch": 5.33, "learning_rate": 7.12e-05, "loss": 1.7256, "step": 357 }, { "epoch": 5.34, "learning_rate": 7.139999999999999e-05, "loss": 1.7233, "step": 358 }, { "epoch": 5.36, "learning_rate": 7.159999999999999e-05, "loss": 1.7826, "step": 359 }, { "epoch": 5.37, "learning_rate": 7.18e-05, "loss": 1.7502, "step": 360 }, { "epoch": 5.39, "learning_rate": 7.199999999999999e-05, "loss": 1.5327, "step": 361 }, { "epoch": 5.4, "learning_rate": 7.219999999999999e-05, "loss": 1.4375, "step": 362 }, { "epoch": 5.42, "learning_rate": 7.24e-05, "loss": 1.6509, "step": 363 }, { "epoch": 5.43, "learning_rate": 7.259999999999999e-05, "loss": 1.628, "step": 364 }, { "epoch": 5.45, "learning_rate": 7.28e-05, "loss": 1.5856, "step": 365 }, { "epoch": 5.46, "learning_rate": 7.3e-05, "loss": 1.4559, "step": 366 }, { "epoch": 5.48, "learning_rate": 7.319999999999999e-05, "loss": 1.7826, "step": 367 }, { "epoch": 5.49, "learning_rate": 7.34e-05, "loss": 1.5844, "step": 368 }, { "epoch": 5.51, "learning_rate": 7.359999999999999e-05, "loss": 1.7874, "step": 369 }, { "epoch": 5.52, "learning_rate": 7.379999999999999e-05, "loss": 1.4583, "step": 370 }, { "epoch": 5.54, "learning_rate": 7.4e-05, "loss": 1.7617, "step": 371 }, { "epoch": 5.55, "learning_rate": 7.419999999999999e-05, "loss": 1.445, "step": 372 }, { "epoch": 5.57, "learning_rate": 7.439999999999999e-05, "loss": 1.5213, "step": 373 }, { "epoch": 5.58, "learning_rate": 7.46e-05, "loss": 1.6538, "step": 374 }, { "epoch": 5.59, "learning_rate": 7.479999999999999e-05, "loss": 1.642, "step": 375 }, { "epoch": 5.61, "learning_rate": 7.5e-05, "loss": 1.5272, "step": 376 }, { "epoch": 5.62, "learning_rate": 7.519999999999998e-05, "loss": 1.615, "step": 377 }, { "epoch": 5.64, "learning_rate": 7.54e-05, "loss": 1.4302, "step": 378 }, { "epoch": 5.65, "learning_rate": 7.56e-05, "loss": 1.8705, "step": 379 }, { "epoch": 5.67, "learning_rate": 7.579999999999999e-05, "loss": 1.7639, "step": 380 }, { "epoch": 5.68, "learning_rate": 7.6e-05, "loss": 1.3727, "step": 381 }, { "epoch": 5.7, "learning_rate": 7.62e-05, "loss": 1.7963, "step": 382 }, { "epoch": 5.71, "learning_rate": 7.639999999999999e-05, "loss": 1.3527, "step": 383 }, { "epoch": 5.73, "learning_rate": 7.66e-05, "loss": 1.3017, "step": 384 }, { "epoch": 5.74, "learning_rate": 7.68e-05, "loss": 1.5047, "step": 385 }, { "epoch": 5.76, "learning_rate": 7.699999999999999e-05, "loss": 1.6173, "step": 386 }, { "epoch": 5.77, "learning_rate": 7.72e-05, "loss": 1.3575, "step": 387 }, { "epoch": 5.79, "learning_rate": 7.74e-05, "loss": 1.6024, "step": 388 }, { "epoch": 5.8, "learning_rate": 7.759999999999999e-05, "loss": 1.3431, "step": 389 }, { "epoch": 5.82, "learning_rate": 7.780000000000001e-05, "loss": 1.6089, "step": 390 }, { "epoch": 5.83, "learning_rate": 7.8e-05, "loss": 1.6595, "step": 391 }, { "epoch": 5.85, "learning_rate": 7.819999999999999e-05, "loss": 1.4502, "step": 392 }, { "epoch": 5.86, "learning_rate": 7.839999999999998e-05, "loss": 1.7518, "step": 393 }, { "epoch": 5.88, "learning_rate": 7.86e-05, "loss": 1.4151, "step": 394 }, { "epoch": 5.89, "learning_rate": 7.879999999999999e-05, "loss": 1.4656, "step": 395 }, { "epoch": 5.91, "learning_rate": 7.899999999999998e-05, "loss": 1.6396, "step": 396 }, { "epoch": 5.92, "learning_rate": 7.92e-05, "loss": 1.3383, "step": 397 }, { "epoch": 5.94, "learning_rate": 7.939999999999999e-05, "loss": 1.5093, "step": 398 }, { "epoch": 5.95, "learning_rate": 7.959999999999998e-05, "loss": 1.3226, "step": 399 }, { "epoch": 5.97, "learning_rate": 7.98e-05, "loss": 1.1851, "step": 400 }, { "epoch": 5.97, "eval_accuracy": 0.5861478218306412, "eval_f1": 0.5583492858181879, "eval_loss": 1.5055691003799438, "eval_runtime": 343.8006, "eval_samples_per_second": 11.885, "eval_steps_per_second": 0.745, "step": 400 }, { "epoch": 5.98, "learning_rate": 7.999999999999999e-05, "loss": 1.3192, "step": 401 }, { "epoch": 6.0, "learning_rate": 8.019999999999998e-05, "loss": 1.6715, "step": 402 }, { "epoch": 6.01, "learning_rate": 8.04e-05, "loss": 1.7022, "step": 403 }, { "epoch": 6.03, "learning_rate": 8.06e-05, "loss": 1.2076, "step": 404 }, { "epoch": 6.04, "learning_rate": 8.079999999999999e-05, "loss": 1.451, "step": 405 }, { "epoch": 6.06, "learning_rate": 8.1e-05, "loss": 1.811, "step": 406 }, { "epoch": 6.07, "learning_rate": 8.12e-05, "loss": 1.6629, "step": 407 }, { "epoch": 6.09, "learning_rate": 8.139999999999999e-05, "loss": 1.3388, "step": 408 }, { "epoch": 6.1, "learning_rate": 8.16e-05, "loss": 1.5293, "step": 409 }, { "epoch": 6.12, "learning_rate": 8.18e-05, "loss": 1.4277, "step": 410 }, { "epoch": 6.13, "learning_rate": 8.199999999999999e-05, "loss": 1.195, "step": 411 }, { "epoch": 6.15, "learning_rate": 8.22e-05, "loss": 1.461, "step": 412 }, { "epoch": 6.16, "learning_rate": 8.24e-05, "loss": 1.5956, "step": 413 }, { "epoch": 6.18, "learning_rate": 8.259999999999999e-05, "loss": 1.2268, "step": 414 }, { "epoch": 6.19, "learning_rate": 8.28e-05, "loss": 1.6036, "step": 415 }, { "epoch": 6.21, "learning_rate": 8.3e-05, "loss": 1.4096, "step": 416 }, { "epoch": 6.22, "learning_rate": 8.319999999999999e-05, "loss": 1.3096, "step": 417 }, { "epoch": 6.24, "learning_rate": 8.34e-05, "loss": 1.1763, "step": 418 }, { "epoch": 6.25, "learning_rate": 8.36e-05, "loss": 1.5893, "step": 419 }, { "epoch": 6.27, "learning_rate": 8.379999999999999e-05, "loss": 1.2182, "step": 420 }, { "epoch": 6.28, "learning_rate": 8.4e-05, "loss": 1.243, "step": 421 }, { "epoch": 6.3, "learning_rate": 8.42e-05, "loss": 1.5265, "step": 422 }, { "epoch": 6.31, "learning_rate": 8.439999999999999e-05, "loss": 1.4368, "step": 423 }, { "epoch": 6.33, "learning_rate": 8.459999999999998e-05, "loss": 1.4227, "step": 424 }, { "epoch": 6.34, "learning_rate": 8.48e-05, "loss": 1.4331, "step": 425 }, { "epoch": 6.36, "learning_rate": 8.499999999999999e-05, "loss": 1.3622, "step": 426 }, { "epoch": 6.37, "learning_rate": 8.519999999999998e-05, "loss": 1.5802, "step": 427 }, { "epoch": 6.39, "learning_rate": 8.54e-05, "loss": 1.4871, "step": 428 }, { "epoch": 6.4, "learning_rate": 8.56e-05, "loss": 1.207, "step": 429 }, { "epoch": 6.42, "learning_rate": 8.579999999999998e-05, "loss": 1.2297, "step": 430 }, { "epoch": 6.43, "learning_rate": 8.6e-05, "loss": 1.5138, "step": 431 }, { "epoch": 6.45, "learning_rate": 8.62e-05, "loss": 1.6429, "step": 432 }, { "epoch": 6.46, "learning_rate": 8.639999999999999e-05, "loss": 1.3145, "step": 433 }, { "epoch": 6.48, "learning_rate": 8.659999999999999e-05, "loss": 1.2543, "step": 434 }, { "epoch": 6.49, "learning_rate": 8.68e-05, "loss": 1.0758, "step": 435 }, { "epoch": 6.51, "learning_rate": 8.699999999999999e-05, "loss": 1.6667, "step": 436 }, { "epoch": 6.52, "learning_rate": 8.719999999999999e-05, "loss": 1.3135, "step": 437 }, { "epoch": 6.54, "learning_rate": 8.74e-05, "loss": 1.3914, "step": 438 }, { "epoch": 6.55, "learning_rate": 8.759999999999999e-05, "loss": 1.3286, "step": 439 }, { "epoch": 6.57, "learning_rate": 8.779999999999999e-05, "loss": 1.3077, "step": 440 }, { "epoch": 6.58, "learning_rate": 8.8e-05, "loss": 1.3647, "step": 441 }, { "epoch": 6.59, "learning_rate": 8.819999999999999e-05, "loss": 1.3532, "step": 442 }, { "epoch": 6.61, "learning_rate": 8.84e-05, "loss": 1.5361, "step": 443 }, { "epoch": 6.62, "learning_rate": 8.86e-05, "loss": 1.2559, "step": 444 }, { "epoch": 6.64, "learning_rate": 8.879999999999999e-05, "loss": 1.4815, "step": 445 }, { "epoch": 6.65, "learning_rate": 8.9e-05, "loss": 0.9722, "step": 446 }, { "epoch": 6.67, "learning_rate": 8.92e-05, "loss": 1.6683, "step": 447 }, { "epoch": 6.68, "learning_rate": 8.939999999999999e-05, "loss": 1.5446, "step": 448 }, { "epoch": 6.7, "learning_rate": 8.96e-05, "loss": 1.5083, "step": 449 }, { "epoch": 6.71, "learning_rate": 8.98e-05, "loss": 1.4544, "step": 450 }, { "epoch": 6.73, "learning_rate": 8.999999999999999e-05, "loss": 1.5404, "step": 451 }, { "epoch": 6.74, "learning_rate": 9.02e-05, "loss": 1.3788, "step": 452 }, { "epoch": 6.76, "learning_rate": 9.039999999999999e-05, "loss": 1.7929, "step": 453 }, { "epoch": 6.77, "learning_rate": 9.059999999999999e-05, "loss": 1.6221, "step": 454 }, { "epoch": 6.79, "learning_rate": 9.079999999999998e-05, "loss": 2.0289, "step": 455 }, { "epoch": 6.8, "learning_rate": 9.099999999999999e-05, "loss": 1.3513, "step": 456 }, { "epoch": 6.82, "learning_rate": 9.12e-05, "loss": 1.1809, "step": 457 }, { "epoch": 6.83, "learning_rate": 9.139999999999999e-05, "loss": 1.659, "step": 458 }, { "epoch": 6.85, "learning_rate": 9.159999999999999e-05, "loss": 1.3772, "step": 459 }, { "epoch": 6.86, "learning_rate": 9.18e-05, "loss": 1.0775, "step": 460 }, { "epoch": 6.88, "learning_rate": 9.199999999999999e-05, "loss": 1.6983, "step": 461 }, { "epoch": 6.89, "learning_rate": 9.219999999999999e-05, "loss": 1.1782, "step": 462 }, { "epoch": 6.91, "learning_rate": 9.24e-05, "loss": 1.3642, "step": 463 }, { "epoch": 6.92, "learning_rate": 9.259999999999999e-05, "loss": 1.1298, "step": 464 }, { "epoch": 6.94, "learning_rate": 9.279999999999999e-05, "loss": 1.3433, "step": 465 }, { "epoch": 6.95, "learning_rate": 9.3e-05, "loss": 1.2165, "step": 466 }, { "epoch": 6.97, "learning_rate": 9.319999999999999e-05, "loss": 1.5146, "step": 467 }, { "epoch": 6.98, "learning_rate": 9.34e-05, "loss": 1.4164, "step": 468 }, { "epoch": 7.0, "learning_rate": 9.36e-05, "loss": 1.2011, "step": 469 }, { "epoch": 7.01, "learning_rate": 9.379999999999999e-05, "loss": 1.6677, "step": 470 }, { "epoch": 7.03, "learning_rate": 9.4e-05, "loss": 1.0573, "step": 471 }, { "epoch": 7.04, "learning_rate": 9.419999999999999e-05, "loss": 1.3889, "step": 472 }, { "epoch": 7.06, "learning_rate": 9.439999999999999e-05, "loss": 1.4316, "step": 473 }, { "epoch": 7.07, "learning_rate": 9.46e-05, "loss": 1.0497, "step": 474 }, { "epoch": 7.09, "learning_rate": 9.479999999999999e-05, "loss": 1.203, "step": 475 }, { "epoch": 7.1, "learning_rate": 9.499999999999999e-05, "loss": 1.543, "step": 476 }, { "epoch": 7.12, "learning_rate": 9.52e-05, "loss": 1.3419, "step": 477 }, { "epoch": 7.13, "learning_rate": 9.539999999999999e-05, "loss": 1.3901, "step": 478 }, { "epoch": 7.15, "learning_rate": 9.559999999999999e-05, "loss": 1.152, "step": 479 }, { "epoch": 7.16, "learning_rate": 9.58e-05, "loss": 1.1519, "step": 480 }, { "epoch": 7.18, "learning_rate": 9.599999999999999e-05, "loss": 1.2613, "step": 481 }, { "epoch": 7.19, "learning_rate": 9.62e-05, "loss": 1.5289, "step": 482 }, { "epoch": 7.21, "learning_rate": 9.64e-05, "loss": 1.3045, "step": 483 }, { "epoch": 7.22, "learning_rate": 9.659999999999999e-05, "loss": 1.1174, "step": 484 }, { "epoch": 7.24, "learning_rate": 9.68e-05, "loss": 0.9389, "step": 485 }, { "epoch": 7.25, "learning_rate": 9.699999999999999e-05, "loss": 1.277, "step": 486 }, { "epoch": 7.27, "learning_rate": 9.719999999999999e-05, "loss": 1.4324, "step": 487 }, { "epoch": 7.28, "learning_rate": 9.74e-05, "loss": 1.3783, "step": 488 }, { "epoch": 7.3, "learning_rate": 9.759999999999999e-05, "loss": 0.9759, "step": 489 }, { "epoch": 7.31, "learning_rate": 9.779999999999999e-05, "loss": 0.9928, "step": 490 }, { "epoch": 7.33, "learning_rate": 9.799999999999998e-05, "loss": 1.0354, "step": 491 }, { "epoch": 7.34, "learning_rate": 9.819999999999999e-05, "loss": 1.4557, "step": 492 }, { "epoch": 7.36, "learning_rate": 9.839999999999999e-05, "loss": 1.3679, "step": 493 }, { "epoch": 7.37, "learning_rate": 9.859999999999998e-05, "loss": 1.3464, "step": 494 }, { "epoch": 7.39, "learning_rate": 9.879999999999999e-05, "loss": 1.1559, "step": 495 }, { "epoch": 7.4, "learning_rate": 9.9e-05, "loss": 1.1564, "step": 496 }, { "epoch": 7.42, "learning_rate": 9.919999999999999e-05, "loss": 0.9569, "step": 497 }, { "epoch": 7.43, "learning_rate": 9.939999999999999e-05, "loss": 1.3608, "step": 498 }, { "epoch": 7.45, "learning_rate": 9.96e-05, "loss": 0.9687, "step": 499 }, { "epoch": 7.46, "learning_rate": 9.979999999999999e-05, "loss": 1.1822, "step": 500 }, { "epoch": 7.48, "learning_rate": 9.999999999999999e-05, "loss": 0.9472, "step": 501 }, { "epoch": 7.49, "learning_rate": 0.0001002, "loss": 1.1478, "step": 502 }, { "epoch": 7.51, "learning_rate": 0.00010039999999999999, "loss": 1.7424, "step": 503 }, { "epoch": 7.52, "learning_rate": 0.00010059999999999999, "loss": 1.2375, "step": 504 }, { "epoch": 7.54, "learning_rate": 0.0001008, "loss": 1.6025, "step": 505 }, { "epoch": 7.55, "learning_rate": 0.00010099999999999999, "loss": 1.0611, "step": 506 }, { "epoch": 7.57, "learning_rate": 0.0001012, "loss": 1.1944, "step": 507 }, { "epoch": 7.58, "learning_rate": 0.0001014, "loss": 1.2099, "step": 508 }, { "epoch": 7.59, "learning_rate": 0.00010159999999999999, "loss": 1.0249, "step": 509 }, { "epoch": 7.61, "learning_rate": 0.00010179999999999998, "loss": 1.1262, "step": 510 }, { "epoch": 7.62, "learning_rate": 0.000102, "loss": 1.1328, "step": 511 }, { "epoch": 7.64, "learning_rate": 0.00010219999999999999, "loss": 0.8573, "step": 512 }, { "epoch": 7.65, "learning_rate": 0.00010239999999999998, "loss": 1.3475, "step": 513 }, { "epoch": 7.67, "learning_rate": 0.0001026, "loss": 1.0557, "step": 514 }, { "epoch": 7.68, "learning_rate": 0.00010279999999999999, "loss": 1.043, "step": 515 }, { "epoch": 7.7, "learning_rate": 0.00010299999999999998, "loss": 1.2895, "step": 516 }, { "epoch": 7.71, "learning_rate": 0.00010319999999999999, "loss": 1.2999, "step": 517 }, { "epoch": 7.73, "learning_rate": 0.00010339999999999999, "loss": 0.8422, "step": 518 }, { "epoch": 7.74, "learning_rate": 0.00010359999999999998, "loss": 1.3048, "step": 519 }, { "epoch": 7.76, "learning_rate": 0.00010379999999999999, "loss": 1.054, "step": 520 }, { "epoch": 7.77, "learning_rate": 0.000104, "loss": 1.0085, "step": 521 }, { "epoch": 7.79, "learning_rate": 0.00010419999999999998, "loss": 1.372, "step": 522 }, { "epoch": 7.8, "learning_rate": 0.00010439999999999999, "loss": 1.2399, "step": 523 }, { "epoch": 7.82, "learning_rate": 0.0001046, "loss": 1.1393, "step": 524 }, { "epoch": 7.83, "learning_rate": 0.00010479999999999999, "loss": 1.1062, "step": 525 }, { "epoch": 7.85, "learning_rate": 0.00010499999999999999, "loss": 1.1683, "step": 526 }, { "epoch": 7.86, "learning_rate": 0.0001052, "loss": 1.4802, "step": 527 }, { "epoch": 7.88, "learning_rate": 0.00010539999999999999, "loss": 0.9131, "step": 528 }, { "epoch": 7.89, "learning_rate": 0.00010559999999999998, "loss": 0.8833, "step": 529 }, { "epoch": 7.91, "learning_rate": 0.0001058, "loss": 0.8876, "step": 530 }, { "epoch": 7.92, "learning_rate": 0.00010599999999999999, "loss": 1.2304, "step": 531 }, { "epoch": 7.94, "learning_rate": 0.00010619999999999998, "loss": 1.0087, "step": 532 }, { "epoch": 7.95, "learning_rate": 0.0001064, "loss": 1.0249, "step": 533 }, { "epoch": 7.97, "learning_rate": 0.00010659999999999999, "loss": 1.4578, "step": 534 }, { "epoch": 7.98, "learning_rate": 0.00010679999999999998, "loss": 0.8898, "step": 535 }, { "epoch": 8.0, "learning_rate": 0.000107, "loss": 1.1781, "step": 536 }, { "epoch": 8.01, "learning_rate": 0.00010719999999999999, "loss": 1.749, "step": 537 }, { "epoch": 8.03, "learning_rate": 0.00010739999999999998, "loss": 0.9843, "step": 538 }, { "epoch": 8.04, "learning_rate": 0.0001076, "loss": 1.1008, "step": 539 }, { "epoch": 8.06, "learning_rate": 0.00010779999999999999, "loss": 1.1893, "step": 540 }, { "epoch": 8.07, "learning_rate": 0.00010799999999999998, "loss": 0.9748, "step": 541 }, { "epoch": 8.09, "learning_rate": 0.0001082, "loss": 1.2305, "step": 542 }, { "epoch": 8.1, "learning_rate": 0.00010839999999999999, "loss": 1.154, "step": 543 }, { "epoch": 8.12, "learning_rate": 0.00010859999999999998, "loss": 1.2386, "step": 544 }, { "epoch": 8.13, "learning_rate": 0.0001088, "loss": 1.2909, "step": 545 }, { "epoch": 8.15, "learning_rate": 0.00010899999999999999, "loss": 1.1747, "step": 546 }, { "epoch": 8.16, "learning_rate": 0.00010919999999999998, "loss": 1.2138, "step": 547 }, { "epoch": 8.18, "learning_rate": 0.00010939999999999998, "loss": 1.421, "step": 548 }, { "epoch": 8.19, "learning_rate": 0.0001096, "loss": 1.2327, "step": 549 }, { "epoch": 8.21, "learning_rate": 0.00010979999999999999, "loss": 1.1603, "step": 550 }, { "epoch": 8.22, "learning_rate": 0.00010999999999999998, "loss": 1.0756, "step": 551 }, { "epoch": 8.24, "learning_rate": 0.0001102, "loss": 0.8484, "step": 552 }, { "epoch": 8.25, "learning_rate": 0.00011039999999999999, "loss": 0.8909, "step": 553 }, { "epoch": 8.27, "learning_rate": 0.00011059999999999998, "loss": 1.1358, "step": 554 }, { "epoch": 8.28, "learning_rate": 0.0001108, "loss": 1.3485, "step": 555 }, { "epoch": 8.3, "learning_rate": 0.00011099999999999999, "loss": 0.9957, "step": 556 }, { "epoch": 8.31, "learning_rate": 0.00011119999999999998, "loss": 1.0312, "step": 557 }, { "epoch": 8.33, "learning_rate": 0.0001114, "loss": 1.3304, "step": 558 }, { "epoch": 8.34, "learning_rate": 0.0001114, "loss": 1.3952, "step": 559 }, { "epoch": 8.36, "learning_rate": 0.00011159999999999999, "loss": 1.0264, "step": 560 }, { "epoch": 8.37, "learning_rate": 0.00011179999999999998, "loss": 1.1699, "step": 561 }, { "epoch": 8.39, "learning_rate": 0.000112, "loss": 1.2597, "step": 562 }, { "epoch": 8.4, "learning_rate": 0.00011219999999999999, "loss": 0.9144, "step": 563 }, { "epoch": 8.42, "learning_rate": 0.00011239999999999998, "loss": 1.4595, "step": 564 }, { "epoch": 8.43, "learning_rate": 0.0001126, "loss": 0.9706, "step": 565 }, { "epoch": 8.45, "learning_rate": 0.00011279999999999999, "loss": 1.5886, "step": 566 }, { "epoch": 8.46, "learning_rate": 0.00011299999999999998, "loss": 0.974, "step": 567 }, { "epoch": 8.48, "learning_rate": 0.0001132, "loss": 1.135, "step": 568 }, { "epoch": 8.49, "learning_rate": 0.00011339999999999999, "loss": 1.0261, "step": 569 }, { "epoch": 8.51, "learning_rate": 0.00011359999999999998, "loss": 1.3876, "step": 570 }, { "epoch": 8.52, "learning_rate": 0.0001138, "loss": 1.1007, "step": 571 }, { "epoch": 8.54, "learning_rate": 0.00011399999999999999, "loss": 1.0671, "step": 572 }, { "epoch": 8.55, "learning_rate": 0.00011419999999999998, "loss": 1.2932, "step": 573 }, { "epoch": 8.57, "learning_rate": 0.0001144, "loss": 1.4211, "step": 574 }, { "epoch": 8.58, "learning_rate": 0.0001146, "loss": 0.9986, "step": 575 }, { "epoch": 8.59, "learning_rate": 0.00011479999999999999, "loss": 1.0899, "step": 576 }, { "epoch": 8.61, "learning_rate": 0.000115, "loss": 0.8065, "step": 577 }, { "epoch": 8.62, "learning_rate": 0.0001152, "loss": 0.6506, "step": 578 }, { "epoch": 8.64, "learning_rate": 0.00011539999999999999, "loss": 0.8245, "step": 579 }, { "epoch": 8.65, "learning_rate": 0.0001156, "loss": 0.948, "step": 580 }, { "epoch": 8.67, "learning_rate": 0.0001158, "loss": 0.8349, "step": 581 }, { "epoch": 8.68, "learning_rate": 0.00011599999999999999, "loss": 0.8581, "step": 582 }, { "epoch": 8.7, "learning_rate": 0.00011619999999999998, "loss": 0.9276, "step": 583 }, { "epoch": 8.71, "learning_rate": 0.0001164, "loss": 1.0779, "step": 584 }, { "epoch": 8.73, "learning_rate": 0.00011659999999999999, "loss": 1.0055, "step": 585 }, { "epoch": 8.74, "learning_rate": 0.00011679999999999998, "loss": 1.0188, "step": 586 }, { "epoch": 8.76, "learning_rate": 0.000117, "loss": 0.9448, "step": 587 }, { "epoch": 8.77, "learning_rate": 0.00011719999999999999, "loss": 0.9065, "step": 588 }, { "epoch": 8.79, "learning_rate": 0.00011739999999999998, "loss": 1.3076, "step": 589 }, { "epoch": 8.8, "learning_rate": 0.0001176, "loss": 1.124, "step": 590 }, { "epoch": 8.82, "learning_rate": 0.00011779999999999999, "loss": 1.0797, "step": 591 }, { "epoch": 8.83, "learning_rate": 0.00011799999999999998, "loss": 0.9384, "step": 592 }, { "epoch": 8.85, "learning_rate": 0.0001182, "loss": 0.8262, "step": 593 }, { "epoch": 8.86, "learning_rate": 0.00011839999999999999, "loss": 1.4642, "step": 594 }, { "epoch": 8.88, "learning_rate": 0.00011859999999999998, "loss": 1.0777, "step": 595 }, { "epoch": 8.89, "learning_rate": 0.0001188, "loss": 1.0487, "step": 596 }, { "epoch": 8.91, "learning_rate": 0.00011899999999999999, "loss": 1.0663, "step": 597 }, { "epoch": 8.92, "learning_rate": 0.00011919999999999998, "loss": 1.3841, "step": 598 }, { "epoch": 8.94, "learning_rate": 0.0001194, "loss": 1.095, "step": 599 }, { "epoch": 8.95, "learning_rate": 0.0001196, "loss": 1.2805, "step": 600 }, { "epoch": 8.95, "eval_accuracy": 0.7043563387175722, "eval_f1": 0.7105830295720407, "eval_loss": 1.139738917350769, "eval_runtime": 343.0859, "eval_samples_per_second": 11.91, "eval_steps_per_second": 0.746, "step": 600 }, { "epoch": 8.97, "learning_rate": 0.00011979999999999998, "loss": 0.944, "step": 601 }, { "epoch": 8.98, "learning_rate": 0.00011999999999999999, "loss": 0.8265, "step": 602 }, { "epoch": 9.0, "learning_rate": 0.0001202, "loss": 0.9694, "step": 603 }, { "epoch": 9.01, "learning_rate": 0.00012039999999999999, "loss": 1.4551, "step": 604 }, { "epoch": 9.03, "learning_rate": 0.00012059999999999999, "loss": 1.2523, "step": 605 }, { "epoch": 9.04, "learning_rate": 0.0001208, "loss": 0.7349, "step": 606 }, { "epoch": 9.06, "learning_rate": 0.00012099999999999999, "loss": 1.3356, "step": 607 }, { "epoch": 9.07, "learning_rate": 0.00012119999999999999, "loss": 1.3264, "step": 608 }, { "epoch": 9.09, "learning_rate": 0.0001214, "loss": 0.9967, "step": 609 }, { "epoch": 9.1, "learning_rate": 0.00012159999999999999, "loss": 1.0618, "step": 610 }, { "epoch": 9.12, "learning_rate": 0.00012179999999999999, "loss": 1.1899, "step": 611 }, { "epoch": 9.13, "learning_rate": 0.000122, "loss": 0.8158, "step": 612 }, { "epoch": 9.15, "learning_rate": 0.0001222, "loss": 1.4054, "step": 613 }, { "epoch": 9.16, "learning_rate": 0.0001224, "loss": 0.9819, "step": 614 }, { "epoch": 9.18, "learning_rate": 0.0001226, "loss": 0.8747, "step": 615 }, { "epoch": 9.19, "learning_rate": 0.00012279999999999998, "loss": 0.989, "step": 616 }, { "epoch": 9.21, "learning_rate": 0.00012299999999999998, "loss": 1.0601, "step": 617 }, { "epoch": 9.22, "learning_rate": 0.00012319999999999999, "loss": 1.0786, "step": 618 }, { "epoch": 9.24, "learning_rate": 0.0001234, "loss": 0.8078, "step": 619 }, { "epoch": 9.25, "learning_rate": 0.0001236, "loss": 1.2362, "step": 620 }, { "epoch": 9.27, "learning_rate": 0.0001238, "loss": 0.7693, "step": 621 }, { "epoch": 9.28, "learning_rate": 0.00012399999999999998, "loss": 0.8017, "step": 622 }, { "epoch": 9.3, "learning_rate": 0.00012419999999999998, "loss": 1.3265, "step": 623 }, { "epoch": 9.31, "learning_rate": 0.0001244, "loss": 0.7762, "step": 624 }, { "epoch": 9.33, "learning_rate": 0.0001246, "loss": 1.02, "step": 625 }, { "epoch": 9.34, "learning_rate": 0.00012479999999999997, "loss": 1.1217, "step": 626 }, { "epoch": 9.36, "learning_rate": 0.000125, "loss": 1.0048, "step": 627 }, { "epoch": 9.37, "learning_rate": 0.00012519999999999998, "loss": 0.9923, "step": 628 }, { "epoch": 9.39, "learning_rate": 0.00012539999999999999, "loss": 0.7986, "step": 629 }, { "epoch": 9.4, "learning_rate": 0.0001256, "loss": 0.8572, "step": 630 }, { "epoch": 9.42, "learning_rate": 0.0001258, "loss": 0.7296, "step": 631 }, { "epoch": 9.43, "learning_rate": 0.00012599999999999997, "loss": 1.1171, "step": 632 }, { "epoch": 9.45, "learning_rate": 0.0001262, "loss": 0.9852, "step": 633 }, { "epoch": 9.46, "learning_rate": 0.00012639999999999998, "loss": 0.9377, "step": 634 }, { "epoch": 9.48, "learning_rate": 0.0001266, "loss": 0.8699, "step": 635 }, { "epoch": 9.49, "learning_rate": 0.0001268, "loss": 0.9284, "step": 636 }, { "epoch": 9.51, "learning_rate": 0.000127, "loss": 0.9991, "step": 637 }, { "epoch": 9.52, "learning_rate": 0.00012719999999999997, "loss": 1.0121, "step": 638 }, { "epoch": 9.54, "learning_rate": 0.0001274, "loss": 1.0221, "step": 639 }, { "epoch": 9.55, "learning_rate": 0.00012759999999999998, "loss": 1.0897, "step": 640 }, { "epoch": 9.57, "learning_rate": 0.0001278, "loss": 1.2973, "step": 641 }, { "epoch": 9.58, "learning_rate": 0.000128, "loss": 1.0074, "step": 642 }, { "epoch": 9.59, "learning_rate": 0.0001282, "loss": 1.2608, "step": 643 }, { "epoch": 9.61, "learning_rate": 0.00012839999999999998, "loss": 1.2781, "step": 644 }, { "epoch": 9.62, "learning_rate": 0.00012859999999999998, "loss": 1.029, "step": 645 }, { "epoch": 9.64, "learning_rate": 0.0001288, "loss": 0.8011, "step": 646 }, { "epoch": 9.65, "learning_rate": 0.000129, "loss": 0.7844, "step": 647 }, { "epoch": 9.67, "learning_rate": 0.00012919999999999997, "loss": 1.3425, "step": 648 }, { "epoch": 9.68, "learning_rate": 0.0001294, "loss": 1.4064, "step": 649 }, { "epoch": 9.7, "learning_rate": 0.00012959999999999998, "loss": 1.0535, "step": 650 }, { "epoch": 9.71, "learning_rate": 0.00012979999999999998, "loss": 1.2251, "step": 651 }, { "epoch": 9.73, "learning_rate": 0.00013, "loss": 1.0629, "step": 652 }, { "epoch": 9.74, "learning_rate": 0.0001302, "loss": 0.9377, "step": 653 }, { "epoch": 9.76, "learning_rate": 0.00013039999999999997, "loss": 1.1107, "step": 654 }, { "epoch": 9.77, "learning_rate": 0.0001306, "loss": 1.4752, "step": 655 }, { "epoch": 9.79, "learning_rate": 0.00013079999999999998, "loss": 1.0769, "step": 656 }, { "epoch": 9.8, "learning_rate": 0.00013099999999999999, "loss": 0.988, "step": 657 }, { "epoch": 9.82, "learning_rate": 0.0001312, "loss": 1.4048, "step": 658 }, { "epoch": 9.83, "learning_rate": 0.0001314, "loss": 1.0237, "step": 659 }, { "epoch": 9.85, "learning_rate": 0.00013159999999999997, "loss": 0.6261, "step": 660 }, { "epoch": 9.86, "learning_rate": 0.0001318, "loss": 0.7488, "step": 661 }, { "epoch": 9.88, "learning_rate": 0.00013199999999999998, "loss": 1.1096, "step": 662 }, { "epoch": 9.89, "learning_rate": 0.0001322, "loss": 0.8872, "step": 663 }, { "epoch": 9.91, "learning_rate": 0.0001324, "loss": 0.9701, "step": 664 }, { "epoch": 9.92, "learning_rate": 0.0001326, "loss": 0.7629, "step": 665 }, { "epoch": 9.94, "learning_rate": 0.00013279999999999998, "loss": 0.8341, "step": 666 }, { "epoch": 9.95, "learning_rate": 0.000133, "loss": 1.316, "step": 667 }, { "epoch": 9.97, "learning_rate": 0.00013319999999999999, "loss": 0.8714, "step": 668 }, { "epoch": 9.98, "learning_rate": 0.0001334, "loss": 0.8048, "step": 669 }, { "epoch": 10.0, "learning_rate": 0.0001336, "loss": 1.276, "step": 670 }, { "epoch": 10.01, "learning_rate": 0.0001338, "loss": 1.2213, "step": 671 }, { "epoch": 10.03, "learning_rate": 0.00013399999999999998, "loss": 0.954, "step": 672 }, { "epoch": 10.04, "learning_rate": 0.0001342, "loss": 0.7978, "step": 673 }, { "epoch": 10.06, "learning_rate": 0.0001344, "loss": 0.9366, "step": 674 }, { "epoch": 10.07, "learning_rate": 0.0001346, "loss": 0.9517, "step": 675 }, { "epoch": 10.09, "learning_rate": 0.00013479999999999997, "loss": 0.8653, "step": 676 }, { "epoch": 10.1, "learning_rate": 0.000135, "loss": 0.8692, "step": 677 }, { "epoch": 10.12, "learning_rate": 0.00013519999999999998, "loss": 0.7885, "step": 678 }, { "epoch": 10.13, "learning_rate": 0.00013539999999999998, "loss": 1.0205, "step": 679 }, { "epoch": 10.15, "learning_rate": 0.0001356, "loss": 0.9433, "step": 680 }, { "epoch": 10.16, "learning_rate": 0.0001358, "loss": 0.769, "step": 681 }, { "epoch": 10.18, "learning_rate": 0.00013599999999999997, "loss": 0.7986, "step": 682 }, { "epoch": 10.19, "learning_rate": 0.0001362, "loss": 0.8684, "step": 683 }, { "epoch": 10.21, "learning_rate": 0.00013639999999999998, "loss": 1.1515, "step": 684 }, { "epoch": 10.22, "learning_rate": 0.00013659999999999999, "loss": 0.6979, "step": 685 }, { "epoch": 10.24, "learning_rate": 0.0001368, "loss": 0.6019, "step": 686 }, { "epoch": 10.25, "learning_rate": 0.000137, "loss": 0.9636, "step": 687 }, { "epoch": 10.27, "learning_rate": 0.00013719999999999997, "loss": 0.9187, "step": 688 }, { "epoch": 10.28, "learning_rate": 0.0001374, "loss": 1.0534, "step": 689 }, { "epoch": 10.3, "learning_rate": 0.00013759999999999998, "loss": 0.5844, "step": 690 }, { "epoch": 10.31, "learning_rate": 0.0001378, "loss": 1.2123, "step": 691 }, { "epoch": 10.33, "learning_rate": 0.000138, "loss": 1.0089, "step": 692 }, { "epoch": 10.34, "learning_rate": 0.0001382, "loss": 1.1607, "step": 693 }, { "epoch": 10.36, "learning_rate": 0.00013839999999999998, "loss": 1.3608, "step": 694 }, { "epoch": 10.37, "learning_rate": 0.0001386, "loss": 0.8186, "step": 695 }, { "epoch": 10.39, "learning_rate": 0.00013879999999999999, "loss": 1.2984, "step": 696 }, { "epoch": 10.4, "learning_rate": 0.000139, "loss": 0.8887, "step": 697 }, { "epoch": 10.42, "learning_rate": 0.0001392, "loss": 1.0305, "step": 698 }, { "epoch": 10.43, "learning_rate": 0.0001394, "loss": 0.8821, "step": 699 }, { "epoch": 10.45, "learning_rate": 0.00013959999999999998, "loss": 0.9324, "step": 700 }, { "epoch": 10.46, "learning_rate": 0.00013979999999999998, "loss": 0.7349, "step": 701 }, { "epoch": 10.48, "learning_rate": 0.00014, "loss": 0.9737, "step": 702 }, { "epoch": 10.49, "learning_rate": 0.0001402, "loss": 0.7634, "step": 703 }, { "epoch": 10.51, "learning_rate": 0.0001404, "loss": 1.2076, "step": 704 }, { "epoch": 10.52, "learning_rate": 0.0001406, "loss": 0.6951, "step": 705 }, { "epoch": 10.54, "learning_rate": 0.00014079999999999998, "loss": 0.8113, "step": 706 }, { "epoch": 10.55, "learning_rate": 0.00014099999999999998, "loss": 0.6317, "step": 707 }, { "epoch": 10.57, "learning_rate": 0.0001412, "loss": 1.0482, "step": 708 }, { "epoch": 10.58, "learning_rate": 0.0001414, "loss": 0.8645, "step": 709 }, { "epoch": 10.59, "learning_rate": 0.00014159999999999997, "loss": 1.0108, "step": 710 }, { "epoch": 10.61, "learning_rate": 0.0001418, "loss": 0.9291, "step": 711 }, { "epoch": 10.62, "learning_rate": 0.00014199999999999998, "loss": 0.7584, "step": 712 }, { "epoch": 10.64, "learning_rate": 0.0001422, "loss": 0.9291, "step": 713 }, { "epoch": 10.65, "learning_rate": 0.0001424, "loss": 0.9126, "step": 714 }, { "epoch": 10.67, "learning_rate": 0.0001426, "loss": 0.7771, "step": 715 }, { "epoch": 10.68, "learning_rate": 0.00014279999999999997, "loss": 1.5661, "step": 716 }, { "epoch": 10.7, "learning_rate": 0.00014299999999999998, "loss": 1.2408, "step": 717 }, { "epoch": 10.71, "learning_rate": 0.00014319999999999998, "loss": 0.7724, "step": 718 }, { "epoch": 10.73, "learning_rate": 0.0001434, "loss": 0.8097, "step": 719 }, { "epoch": 10.74, "learning_rate": 0.0001436, "loss": 0.4914, "step": 720 }, { "epoch": 10.76, "learning_rate": 0.0001438, "loss": 0.8946, "step": 721 }, { "epoch": 10.77, "learning_rate": 0.00014399999999999998, "loss": 0.876, "step": 722 }, { "epoch": 10.79, "learning_rate": 0.00014419999999999998, "loss": 0.8784, "step": 723 }, { "epoch": 10.8, "learning_rate": 0.00014439999999999999, "loss": 1.0133, "step": 724 }, { "epoch": 10.82, "learning_rate": 0.0001446, "loss": 1.1433, "step": 725 }, { "epoch": 10.83, "learning_rate": 0.0001448, "loss": 1.361, "step": 726 }, { "epoch": 10.85, "learning_rate": 0.000145, "loss": 1.006, "step": 727 }, { "epoch": 10.86, "learning_rate": 0.00014519999999999998, "loss": 1.2211, "step": 728 }, { "epoch": 10.88, "learning_rate": 0.00014539999999999998, "loss": 0.9271, "step": 729 }, { "epoch": 10.89, "learning_rate": 0.0001456, "loss": 1.0685, "step": 730 }, { "epoch": 10.91, "learning_rate": 0.0001458, "loss": 1.1139, "step": 731 }, { "epoch": 10.92, "learning_rate": 0.000146, "loss": 0.9508, "step": 732 }, { "epoch": 10.94, "learning_rate": 0.0001462, "loss": 0.6874, "step": 733 }, { "epoch": 10.95, "learning_rate": 0.00014639999999999998, "loss": 1.0912, "step": 734 }, { "epoch": 10.97, "learning_rate": 0.00014659999999999999, "loss": 0.5254, "step": 735 }, { "epoch": 10.98, "learning_rate": 0.0001468, "loss": 1.1663, "step": 736 }, { "epoch": 11.0, "learning_rate": 0.000147, "loss": 0.7879, "step": 737 }, { "epoch": 11.01, "learning_rate": 0.00014719999999999997, "loss": 1.0129, "step": 738 }, { "epoch": 11.03, "learning_rate": 0.00014739999999999998, "loss": 0.8334, "step": 739 }, { "epoch": 11.04, "learning_rate": 0.00014759999999999998, "loss": 0.649, "step": 740 }, { "epoch": 11.06, "learning_rate": 0.0001478, "loss": 1.2923, "step": 741 }, { "epoch": 11.07, "learning_rate": 0.000148, "loss": 0.7388, "step": 742 }, { "epoch": 11.09, "learning_rate": 0.0001482, "loss": 1.1052, "step": 743 }, { "epoch": 11.1, "learning_rate": 0.00014839999999999998, "loss": 0.8788, "step": 744 }, { "epoch": 11.12, "learning_rate": 0.00014859999999999998, "loss": 0.8711, "step": 745 }, { "epoch": 11.13, "learning_rate": 0.00014879999999999998, "loss": 1.0916, "step": 746 }, { "epoch": 11.15, "learning_rate": 0.000149, "loss": 0.9535, "step": 747 }, { "epoch": 11.16, "learning_rate": 0.0001492, "loss": 0.8849, "step": 748 }, { "epoch": 11.18, "learning_rate": 0.0001494, "loss": 0.9825, "step": 749 }, { "epoch": 11.19, "learning_rate": 0.00014959999999999998, "loss": 0.9475, "step": 750 }, { "epoch": 11.21, "learning_rate": 0.00014979999999999998, "loss": 1.3699, "step": 751 }, { "epoch": 11.22, "learning_rate": 0.00015, "loss": 0.7989, "step": 752 }, { "epoch": 11.24, "learning_rate": 0.0001502, "loss": 1.1212, "step": 753 }, { "epoch": 11.25, "learning_rate": 0.00015039999999999997, "loss": 0.7149, "step": 754 }, { "epoch": 11.27, "learning_rate": 0.00015059999999999997, "loss": 0.9158, "step": 755 }, { "epoch": 11.28, "learning_rate": 0.0001508, "loss": 0.7415, "step": 756 }, { "epoch": 11.3, "learning_rate": 0.00015099999999999998, "loss": 0.8255, "step": 757 }, { "epoch": 11.31, "learning_rate": 0.0001512, "loss": 1.0209, "step": 758 }, { "epoch": 11.33, "learning_rate": 0.0001514, "loss": 0.8702, "step": 759 }, { "epoch": 11.34, "learning_rate": 0.00015159999999999997, "loss": 1.0192, "step": 760 }, { "epoch": 11.36, "learning_rate": 0.00015179999999999998, "loss": 0.6197, "step": 761 }, { "epoch": 11.37, "learning_rate": 0.000152, "loss": 0.8326, "step": 762 }, { "epoch": 11.39, "learning_rate": 0.00015219999999999999, "loss": 0.8132, "step": 763 }, { "epoch": 11.4, "learning_rate": 0.0001524, "loss": 0.7399, "step": 764 }, { "epoch": 11.42, "learning_rate": 0.0001526, "loss": 1.057, "step": 765 }, { "epoch": 11.43, "learning_rate": 0.00015279999999999997, "loss": 0.7861, "step": 766 }, { "epoch": 11.45, "learning_rate": 0.00015299999999999998, "loss": 0.9496, "step": 767 }, { "epoch": 11.46, "learning_rate": 0.0001532, "loss": 1.1022, "step": 768 }, { "epoch": 11.48, "learning_rate": 0.0001534, "loss": 0.7708, "step": 769 }, { "epoch": 11.49, "learning_rate": 0.0001536, "loss": 0.5718, "step": 770 }, { "epoch": 11.51, "learning_rate": 0.0001538, "loss": 1.4677, "step": 771 }, { "epoch": 11.52, "learning_rate": 0.00015399999999999998, "loss": 0.9794, "step": 772 }, { "epoch": 11.54, "learning_rate": 0.00015419999999999998, "loss": 1.1667, "step": 773 }, { "epoch": 11.55, "learning_rate": 0.0001544, "loss": 1.3804, "step": 774 }, { "epoch": 11.57, "learning_rate": 0.0001546, "loss": 1.3366, "step": 775 }, { "epoch": 11.58, "learning_rate": 0.0001548, "loss": 0.749, "step": 776 }, { "epoch": 11.59, "learning_rate": 0.000155, "loss": 0.6442, "step": 777 }, { "epoch": 11.61, "learning_rate": 0.00015519999999999998, "loss": 1.1415, "step": 778 }, { "epoch": 11.62, "learning_rate": 0.00015539999999999998, "loss": 1.4601, "step": 779 }, { "epoch": 11.64, "learning_rate": 0.00015560000000000001, "loss": 1.3154, "step": 780 }, { "epoch": 11.65, "learning_rate": 0.0001558, "loss": 0.5728, "step": 781 }, { "epoch": 11.67, "learning_rate": 0.000156, "loss": 0.6963, "step": 782 }, { "epoch": 11.68, "learning_rate": 0.0001562, "loss": 0.8572, "step": 783 }, { "epoch": 11.7, "learning_rate": 0.00015639999999999998, "loss": 0.8292, "step": 784 }, { "epoch": 11.71, "learning_rate": 0.00015659999999999998, "loss": 1.1899, "step": 785 }, { "epoch": 11.73, "learning_rate": 0.00015679999999999996, "loss": 0.6802, "step": 786 }, { "epoch": 11.74, "learning_rate": 0.000157, "loss": 1.5924, "step": 787 }, { "epoch": 11.76, "learning_rate": 0.0001572, "loss": 1.3299, "step": 788 }, { "epoch": 11.77, "learning_rate": 0.00015739999999999998, "loss": 1.2905, "step": 789 }, { "epoch": 11.79, "learning_rate": 0.00015759999999999998, "loss": 0.7761, "step": 790 }, { "epoch": 11.8, "learning_rate": 0.0001578, "loss": 0.7079, "step": 791 }, { "epoch": 11.82, "learning_rate": 0.00015799999999999996, "loss": 1.2024, "step": 792 }, { "epoch": 11.83, "learning_rate": 0.00015819999999999997, "loss": 1.0789, "step": 793 }, { "epoch": 11.85, "learning_rate": 0.0001584, "loss": 0.8125, "step": 794 }, { "epoch": 11.86, "learning_rate": 0.00015859999999999998, "loss": 0.7292, "step": 795 }, { "epoch": 11.88, "learning_rate": 0.00015879999999999998, "loss": 0.7865, "step": 796 }, { "epoch": 11.89, "learning_rate": 0.000159, "loss": 1.002, "step": 797 }, { "epoch": 11.91, "learning_rate": 0.00015919999999999997, "loss": 0.8947, "step": 798 }, { "epoch": 11.92, "learning_rate": 0.00015939999999999997, "loss": 1.1309, "step": 799 }, { "epoch": 11.94, "learning_rate": 0.0001596, "loss": 1.0801, "step": 800 }, { "epoch": 11.94, "eval_accuracy": 0.7197748409202154, "eval_f1": 0.7132262117406672, "eval_loss": 0.9862720370292664, "eval_runtime": 343.503, "eval_samples_per_second": 11.895, "eval_steps_per_second": 0.745, "step": 800 }, { "epoch": 11.95, "learning_rate": 0.00015979999999999998, "loss": 0.7474, "step": 801 }, { "epoch": 11.97, "learning_rate": 0.00015999999999999999, "loss": 0.843, "step": 802 }, { "epoch": 11.98, "learning_rate": 0.0001602, "loss": 0.7314, "step": 803 }, { "epoch": 12.0, "learning_rate": 0.00016039999999999997, "loss": 0.9725, "step": 804 }, { "epoch": 12.01, "learning_rate": 0.00016059999999999997, "loss": 1.4201, "step": 805 }, { "epoch": 12.03, "learning_rate": 0.0001608, "loss": 0.5819, "step": 806 }, { "epoch": 12.04, "learning_rate": 0.00016099999999999998, "loss": 1.0351, "step": 807 }, { "epoch": 12.06, "learning_rate": 0.0001612, "loss": 0.8731, "step": 808 }, { "epoch": 12.07, "learning_rate": 0.0001614, "loss": 0.8561, "step": 809 }, { "epoch": 12.09, "learning_rate": 0.00016159999999999997, "loss": 0.9219, "step": 810 }, { "epoch": 12.1, "learning_rate": 0.00016179999999999998, "loss": 0.7039, "step": 811 }, { "epoch": 12.12, "learning_rate": 0.000162, "loss": 1.0735, "step": 812 }, { "epoch": 12.13, "learning_rate": 0.00016219999999999999, "loss": 1.1316, "step": 813 }, { "epoch": 12.15, "learning_rate": 0.0001624, "loss": 0.83, "step": 814 }, { "epoch": 12.16, "learning_rate": 0.0001626, "loss": 0.6262, "step": 815 }, { "epoch": 12.18, "learning_rate": 0.00016279999999999997, "loss": 0.6751, "step": 816 }, { "epoch": 12.19, "learning_rate": 0.00016299999999999998, "loss": 0.7606, "step": 817 }, { "epoch": 12.21, "learning_rate": 0.0001632, "loss": 0.5516, "step": 818 }, { "epoch": 12.22, "learning_rate": 0.0001634, "loss": 0.8138, "step": 819 }, { "epoch": 12.24, "learning_rate": 0.0001636, "loss": 0.3187, "step": 820 }, { "epoch": 12.25, "learning_rate": 0.0001638, "loss": 0.5061, "step": 821 }, { "epoch": 12.27, "learning_rate": 0.00016399999999999997, "loss": 0.9732, "step": 822 }, { "epoch": 12.28, "learning_rate": 0.00016419999999999998, "loss": 0.8939, "step": 823 }, { "epoch": 12.3, "learning_rate": 0.0001644, "loss": 0.5243, "step": 824 }, { "epoch": 12.31, "learning_rate": 0.0001646, "loss": 0.7636, "step": 825 }, { "epoch": 12.33, "learning_rate": 0.0001648, "loss": 0.8959, "step": 826 }, { "epoch": 12.34, "learning_rate": 0.000165, "loss": 1.1794, "step": 827 }, { "epoch": 12.36, "learning_rate": 0.00016519999999999998, "loss": 0.9718, "step": 828 }, { "epoch": 12.37, "learning_rate": 0.00016539999999999998, "loss": 0.8994, "step": 829 }, { "epoch": 12.39, "learning_rate": 0.0001656, "loss": 0.6492, "step": 830 }, { "epoch": 12.4, "learning_rate": 0.00016579999999999996, "loss": 0.5952, "step": 831 }, { "epoch": 12.42, "learning_rate": 0.000166, "loss": 0.7995, "step": 832 }, { "epoch": 12.43, "learning_rate": 0.0001662, "loss": 1.0975, "step": 833 }, { "epoch": 12.45, "learning_rate": 0.00016639999999999998, "loss": 1.332, "step": 834 }, { "epoch": 12.46, "learning_rate": 0.00016659999999999998, "loss": 1.1362, "step": 835 }, { "epoch": 12.48, "learning_rate": 0.0001668, "loss": 0.7806, "step": 836 }, { "epoch": 12.49, "learning_rate": 0.00016699999999999997, "loss": 0.8691, "step": 837 }, { "epoch": 12.51, "learning_rate": 0.0001672, "loss": 0.8649, "step": 838 }, { "epoch": 12.52, "learning_rate": 0.0001674, "loss": 0.7305, "step": 839 }, { "epoch": 12.54, "learning_rate": 0.00016759999999999998, "loss": 1.0438, "step": 840 }, { "epoch": 12.55, "learning_rate": 0.00016779999999999999, "loss": 0.977, "step": 841 }, { "epoch": 12.57, "learning_rate": 0.000168, "loss": 0.5909, "step": 842 }, { "epoch": 12.58, "learning_rate": 0.00016819999999999997, "loss": 1.1691, "step": 843 }, { "epoch": 12.59, "learning_rate": 0.0001684, "loss": 1.4127, "step": 844 }, { "epoch": 12.61, "learning_rate": 0.0001686, "loss": 1.1452, "step": 845 }, { "epoch": 12.62, "learning_rate": 0.00016879999999999998, "loss": 0.858, "step": 846 }, { "epoch": 12.64, "learning_rate": 0.000169, "loss": 0.9272, "step": 847 }, { "epoch": 12.65, "learning_rate": 0.00016919999999999997, "loss": 0.8182, "step": 848 }, { "epoch": 12.67, "learning_rate": 0.00016939999999999997, "loss": 0.93, "step": 849 }, { "epoch": 12.68, "learning_rate": 0.0001696, "loss": 1.305, "step": 850 }, { "epoch": 12.7, "learning_rate": 0.00016979999999999998, "loss": 0.7852, "step": 851 }, { "epoch": 12.71, "learning_rate": 0.00016999999999999999, "loss": 0.6363, "step": 852 }, { "epoch": 12.73, "learning_rate": 0.0001702, "loss": 0.9209, "step": 853 }, { "epoch": 12.74, "learning_rate": 0.00017039999999999997, "loss": 0.5253, "step": 854 }, { "epoch": 12.76, "learning_rate": 0.00017059999999999997, "loss": 1.0001, "step": 855 }, { "epoch": 12.77, "learning_rate": 0.0001708, "loss": 0.9678, "step": 856 }, { "epoch": 12.79, "learning_rate": 0.00017099999999999998, "loss": 0.639, "step": 857 }, { "epoch": 12.8, "learning_rate": 0.0001712, "loss": 0.8503, "step": 858 }, { "epoch": 12.82, "learning_rate": 0.0001714, "loss": 0.6808, "step": 859 }, { "epoch": 12.83, "learning_rate": 0.00017159999999999997, "loss": 0.7688, "step": 860 }, { "epoch": 12.85, "learning_rate": 0.00017179999999999997, "loss": 1.008, "step": 861 }, { "epoch": 12.86, "learning_rate": 0.000172, "loss": 0.9021, "step": 862 }, { "epoch": 12.88, "learning_rate": 0.00017219999999999998, "loss": 0.9418, "step": 863 }, { "epoch": 12.89, "learning_rate": 0.0001724, "loss": 0.9163, "step": 864 }, { "epoch": 12.91, "learning_rate": 0.0001726, "loss": 1.0017, "step": 865 }, { "epoch": 12.92, "learning_rate": 0.00017279999999999997, "loss": 0.8404, "step": 866 }, { "epoch": 12.94, "learning_rate": 0.00017299999999999998, "loss": 0.6335, "step": 867 }, { "epoch": 12.95, "learning_rate": 0.00017319999999999998, "loss": 0.7219, "step": 868 }, { "epoch": 12.97, "learning_rate": 0.00017339999999999996, "loss": 1.103, "step": 869 }, { "epoch": 12.98, "learning_rate": 0.0001736, "loss": 0.7049, "step": 870 }, { "epoch": 13.0, "learning_rate": 0.0001738, "loss": 0.9147, "step": 871 }, { "epoch": 13.01, "learning_rate": 0.00017399999999999997, "loss": 0.91, "step": 872 }, { "epoch": 13.03, "learning_rate": 0.00017419999999999998, "loss": 0.8884, "step": 873 }, { "epoch": 13.04, "learning_rate": 0.00017439999999999998, "loss": 0.6004, "step": 874 }, { "epoch": 13.06, "learning_rate": 0.00017459999999999996, "loss": 0.5358, "step": 875 }, { "epoch": 13.07, "learning_rate": 0.0001748, "loss": 0.8575, "step": 876 }, { "epoch": 13.09, "learning_rate": 0.000175, "loss": 0.8457, "step": 877 }, { "epoch": 13.1, "learning_rate": 0.00017519999999999998, "loss": 0.7373, "step": 878 }, { "epoch": 13.12, "learning_rate": 0.00017539999999999998, "loss": 0.6553, "step": 879 }, { "epoch": 13.13, "learning_rate": 0.00017559999999999999, "loss": 0.7267, "step": 880 }, { "epoch": 13.15, "learning_rate": 0.00017579999999999996, "loss": 0.6778, "step": 881 }, { "epoch": 13.16, "learning_rate": 0.000176, "loss": 0.4163, "step": 882 }, { "epoch": 13.18, "learning_rate": 0.0001762, "loss": 1.0095, "step": 883 }, { "epoch": 13.19, "learning_rate": 0.00017639999999999998, "loss": 1.2363, "step": 884 }, { "epoch": 13.21, "learning_rate": 0.00017659999999999998, "loss": 0.9183, "step": 885 }, { "epoch": 13.22, "learning_rate": 0.0001768, "loss": 0.8321, "step": 886 }, { "epoch": 13.24, "learning_rate": 0.00017699999999999997, "loss": 0.9908, "step": 887 }, { "epoch": 13.25, "learning_rate": 0.0001772, "loss": 0.7905, "step": 888 }, { "epoch": 13.27, "learning_rate": 0.0001774, "loss": 1.2317, "step": 889 }, { "epoch": 13.28, "learning_rate": 0.00017759999999999998, "loss": 0.8439, "step": 890 }, { "epoch": 13.3, "learning_rate": 0.00017779999999999998, "loss": 1.0982, "step": 891 }, { "epoch": 13.31, "learning_rate": 0.000178, "loss": 0.7464, "step": 892 }, { "epoch": 13.33, "learning_rate": 0.00017819999999999997, "loss": 0.602, "step": 893 }, { "epoch": 13.34, "learning_rate": 0.0001784, "loss": 0.9053, "step": 894 }, { "epoch": 13.36, "learning_rate": 0.0001786, "loss": 0.6464, "step": 895 }, { "epoch": 13.37, "learning_rate": 0.00017879999999999998, "loss": 1.0542, "step": 896 }, { "epoch": 13.39, "learning_rate": 0.000179, "loss": 0.5522, "step": 897 }, { "epoch": 13.4, "learning_rate": 0.0001792, "loss": 0.7257, "step": 898 }, { "epoch": 13.42, "learning_rate": 0.00017939999999999997, "loss": 0.6098, "step": 899 }, { "epoch": 13.43, "learning_rate": 0.0001796, "loss": 0.5454, "step": 900 }, { "epoch": 13.45, "learning_rate": 0.0001798, "loss": 0.7657, "step": 901 }, { "epoch": 13.46, "learning_rate": 0.00017999999999999998, "loss": 0.78, "step": 902 }, { "epoch": 13.48, "learning_rate": 0.0001802, "loss": 0.81, "step": 903 }, { "epoch": 13.49, "learning_rate": 0.0001804, "loss": 0.9637, "step": 904 }, { "epoch": 13.51, "learning_rate": 0.00018059999999999997, "loss": 0.7132, "step": 905 }, { "epoch": 13.52, "learning_rate": 0.00018079999999999998, "loss": 0.9006, "step": 906 }, { "epoch": 13.54, "learning_rate": 0.000181, "loss": 0.7289, "step": 907 }, { "epoch": 13.55, "learning_rate": 0.00018119999999999999, "loss": 0.8941, "step": 908 }, { "epoch": 13.57, "learning_rate": 0.00018119999999999999, "loss": 0.8517, "step": 909 }, { "epoch": 13.58, "learning_rate": 0.0001814, "loss": 0.6031, "step": 910 }, { "epoch": 13.59, "learning_rate": 0.00018159999999999997, "loss": 0.7666, "step": 911 }, { "epoch": 13.61, "learning_rate": 0.00018179999999999997, "loss": 0.8234, "step": 912 }, { "epoch": 13.62, "learning_rate": 0.00018199999999999998, "loss": 1.0899, "step": 913 }, { "epoch": 13.64, "learning_rate": 0.00018219999999999996, "loss": 0.2315, "step": 914 }, { "epoch": 13.65, "learning_rate": 0.0001824, "loss": 0.5115, "step": 915 }, { "epoch": 13.67, "learning_rate": 0.0001826, "loss": 0.9171, "step": 916 }, { "epoch": 13.68, "learning_rate": 0.00018279999999999997, "loss": 1.2401, "step": 917 }, { "epoch": 13.7, "learning_rate": 0.00018299999999999998, "loss": 0.5019, "step": 918 }, { "epoch": 13.71, "learning_rate": 0.00018319999999999998, "loss": 0.6102, "step": 919 }, { "epoch": 13.73, "learning_rate": 0.00018339999999999996, "loss": 0.5548, "step": 920 }, { "epoch": 13.74, "learning_rate": 0.0001836, "loss": 0.8196, "step": 921 }, { "epoch": 13.76, "learning_rate": 0.0001838, "loss": 0.4653, "step": 922 }, { "epoch": 13.77, "learning_rate": 0.00018399999999999997, "loss": 0.6946, "step": 923 }, { "epoch": 13.79, "learning_rate": 0.00018419999999999998, "loss": 0.8383, "step": 924 }, { "epoch": 13.8, "learning_rate": 0.00018439999999999998, "loss": 0.757, "step": 925 }, { "epoch": 13.82, "learning_rate": 0.00018459999999999996, "loss": 1.1668, "step": 926 }, { "epoch": 13.83, "learning_rate": 0.0001848, "loss": 1.2177, "step": 927 }, { "epoch": 13.85, "learning_rate": 0.000185, "loss": 1.1548, "step": 928 }, { "epoch": 13.86, "learning_rate": 0.00018519999999999998, "loss": 0.7392, "step": 929 }, { "epoch": 13.88, "learning_rate": 0.00018539999999999998, "loss": 0.659, "step": 930 }, { "epoch": 13.89, "learning_rate": 0.00018559999999999998, "loss": 0.4055, "step": 931 }, { "epoch": 13.91, "learning_rate": 0.00018579999999999996, "loss": 0.7573, "step": 932 }, { "epoch": 13.92, "learning_rate": 0.000186, "loss": 1.0789, "step": 933 }, { "epoch": 13.94, "learning_rate": 0.0001862, "loss": 1.0103, "step": 934 }, { "epoch": 13.95, "learning_rate": 0.00018639999999999998, "loss": 0.626, "step": 935 }, { "epoch": 13.97, "learning_rate": 0.00018659999999999998, "loss": 0.5344, "step": 936 }, { "epoch": 13.98, "learning_rate": 0.0001868, "loss": 1.0478, "step": 937 }, { "epoch": 14.0, "learning_rate": 0.00018699999999999996, "loss": 1.033, "step": 938 }, { "epoch": 14.01, "learning_rate": 0.0001872, "loss": 1.1783, "step": 939 }, { "epoch": 14.03, "learning_rate": 0.0001874, "loss": 1.0731, "step": 940 }, { "epoch": 14.04, "learning_rate": 0.00018759999999999998, "loss": 0.9886, "step": 941 }, { "epoch": 14.06, "learning_rate": 0.00018779999999999998, "loss": 0.5971, "step": 942 }, { "epoch": 14.07, "learning_rate": 0.000188, "loss": 0.9326, "step": 943 }, { "epoch": 14.09, "learning_rate": 0.00018819999999999997, "loss": 0.9478, "step": 944 }, { "epoch": 14.1, "learning_rate": 0.00018839999999999997, "loss": 0.5437, "step": 945 }, { "epoch": 14.12, "learning_rate": 0.0001886, "loss": 0.9006, "step": 946 }, { "epoch": 14.13, "learning_rate": 0.00018879999999999998, "loss": 1.1322, "step": 947 }, { "epoch": 14.15, "learning_rate": 0.00018899999999999999, "loss": 0.7737, "step": 948 }, { "epoch": 14.16, "learning_rate": 0.0001892, "loss": 0.658, "step": 949 }, { "epoch": 14.18, "learning_rate": 0.00018939999999999997, "loss": 0.8001, "step": 950 }, { "epoch": 14.19, "learning_rate": 0.00018959999999999997, "loss": 1.0102, "step": 951 }, { "epoch": 14.21, "learning_rate": 0.0001898, "loss": 0.6759, "step": 952 }, { "epoch": 14.22, "learning_rate": 0.00018999999999999998, "loss": 0.6328, "step": 953 }, { "epoch": 14.24, "learning_rate": 0.0001902, "loss": 0.4403, "step": 954 }, { "epoch": 14.25, "learning_rate": 0.0001904, "loss": 0.6944, "step": 955 }, { "epoch": 14.27, "learning_rate": 0.00019059999999999997, "loss": 0.5963, "step": 956 }, { "epoch": 14.28, "learning_rate": 0.00019079999999999998, "loss": 0.5515, "step": 957 }, { "epoch": 14.3, "learning_rate": 0.000191, "loss": 0.9895, "step": 958 }, { "epoch": 14.31, "learning_rate": 0.00019119999999999999, "loss": 0.5896, "step": 959 }, { "epoch": 14.33, "learning_rate": 0.0001914, "loss": 0.6857, "step": 960 }, { "epoch": 14.34, "learning_rate": 0.0001916, "loss": 0.9114, "step": 961 }, { "epoch": 14.36, "learning_rate": 0.00019179999999999997, "loss": 0.7469, "step": 962 }, { "epoch": 14.37, "learning_rate": 0.00019199999999999998, "loss": 0.6013, "step": 963 }, { "epoch": 14.39, "learning_rate": 0.0001922, "loss": 1.166, "step": 964 }, { "epoch": 14.4, "learning_rate": 0.0001924, "loss": 0.5838, "step": 965 }, { "epoch": 14.42, "learning_rate": 0.0001926, "loss": 1.0111, "step": 966 }, { "epoch": 14.43, "learning_rate": 0.0001928, "loss": 0.9963, "step": 967 }, { "epoch": 14.45, "learning_rate": 0.00019299999999999997, "loss": 0.9305, "step": 968 }, { "epoch": 14.46, "learning_rate": 0.00019319999999999998, "loss": 0.6325, "step": 969 }, { "epoch": 14.48, "learning_rate": 0.0001934, "loss": 0.6293, "step": 970 }, { "epoch": 14.49, "learning_rate": 0.0001936, "loss": 0.6669, "step": 971 }, { "epoch": 14.51, "learning_rate": 0.0001938, "loss": 0.4591, "step": 972 }, { "epoch": 14.52, "learning_rate": 0.00019399999999999997, "loss": 0.6482, "step": 973 }, { "epoch": 14.54, "learning_rate": 0.00019419999999999998, "loss": 0.5052, "step": 974 }, { "epoch": 14.55, "learning_rate": 0.00019439999999999998, "loss": 0.5585, "step": 975 }, { "epoch": 14.57, "learning_rate": 0.00019459999999999996, "loss": 1.0939, "step": 976 }, { "epoch": 14.58, "learning_rate": 0.0001948, "loss": 0.5625, "step": 977 }, { "epoch": 14.59, "learning_rate": 0.000195, "loss": 0.7092, "step": 978 }, { "epoch": 14.61, "learning_rate": 0.00019519999999999997, "loss": 0.6617, "step": 979 }, { "epoch": 14.62, "learning_rate": 0.00019539999999999998, "loss": 0.4754, "step": 980 }, { "epoch": 14.64, "learning_rate": 0.00019559999999999998, "loss": 0.5685, "step": 981 }, { "epoch": 14.65, "learning_rate": 0.00019579999999999996, "loss": 0.5839, "step": 982 }, { "epoch": 14.67, "learning_rate": 0.00019599999999999997, "loss": 0.6545, "step": 983 }, { "epoch": 14.68, "learning_rate": 0.0001962, "loss": 0.6199, "step": 984 }, { "epoch": 14.7, "learning_rate": 0.00019639999999999998, "loss": 1.2447, "step": 985 }, { "epoch": 14.71, "learning_rate": 0.00019659999999999998, "loss": 0.6477, "step": 986 }, { "epoch": 14.73, "learning_rate": 0.00019679999999999999, "loss": 0.8531, "step": 987 }, { "epoch": 14.74, "learning_rate": 0.00019699999999999996, "loss": 1.2488, "step": 988 }, { "epoch": 14.76, "learning_rate": 0.00019719999999999997, "loss": 1.353, "step": 989 }, { "epoch": 14.77, "learning_rate": 0.0001974, "loss": 0.9964, "step": 990 }, { "epoch": 14.79, "learning_rate": 0.00019759999999999998, "loss": 1.2242, "step": 991 }, { "epoch": 14.8, "learning_rate": 0.00019779999999999998, "loss": 1.2178, "step": 992 }, { "epoch": 14.82, "learning_rate": 0.000198, "loss": 0.9707, "step": 993 }, { "epoch": 14.83, "learning_rate": 0.00019819999999999997, "loss": 1.4878, "step": 994 }, { "epoch": 14.85, "learning_rate": 0.00019839999999999997, "loss": 1.3396, "step": 995 }, { "epoch": 14.86, "learning_rate": 0.0001986, "loss": 1.1865, "step": 996 }, { "epoch": 14.88, "learning_rate": 0.00019879999999999998, "loss": 1.0701, "step": 997 }, { "epoch": 14.89, "learning_rate": 0.00019899999999999999, "loss": 1.0805, "step": 998 }, { "epoch": 14.91, "learning_rate": 0.0001992, "loss": 0.8146, "step": 999 }, { "epoch": 14.92, "learning_rate": 0.00019939999999999997, "loss": 0.9285, "step": 1000 }, { "epoch": 14.92, "eval_accuracy": 0.7139011257953989, "eval_f1": 0.7036851398351651, "eval_loss": 0.991243302822113, "eval_runtime": 344.8445, "eval_samples_per_second": 11.849, "eval_steps_per_second": 0.742, "step": 1000 }, { "epoch": 14.94, "learning_rate": 0.00019959999999999997, "loss": 0.7898, "step": 1001 }, { "epoch": 14.95, "learning_rate": 0.0001998, "loss": 1.1387, "step": 1002 }, { "epoch": 14.97, "learning_rate": 0.00019999999999999998, "loss": 0.2668, "step": 1003 }, { "epoch": 14.98, "learning_rate": 0.0002002, "loss": 0.3939, "step": 1004 }, { "epoch": 15.0, "learning_rate": 0.0002004, "loss": 0.7606, "step": 1005 }, { "epoch": 15.01, "learning_rate": 0.00020059999999999997, "loss": 1.0102, "step": 1006 }, { "epoch": 15.03, "learning_rate": 0.00020079999999999997, "loss": 0.6444, "step": 1007 }, { "epoch": 15.04, "learning_rate": 0.000201, "loss": 1.0351, "step": 1008 }, { "epoch": 15.06, "learning_rate": 0.00020119999999999998, "loss": 0.7079, "step": 1009 }, { "epoch": 15.07, "learning_rate": 0.0002014, "loss": 0.281, "step": 1010 }, { "epoch": 15.09, "learning_rate": 0.0002016, "loss": 0.6687, "step": 1011 }, { "epoch": 15.1, "learning_rate": 0.00020179999999999997, "loss": 0.8258, "step": 1012 }, { "epoch": 15.12, "learning_rate": 0.00020199999999999998, "loss": 0.7249, "step": 1013 }, { "epoch": 15.13, "learning_rate": 0.0002022, "loss": 0.776, "step": 1014 }, { "epoch": 15.15, "learning_rate": 0.0002024, "loss": 0.9554, "step": 1015 }, { "epoch": 15.16, "learning_rate": 0.0002026, "loss": 0.364, "step": 1016 }, { "epoch": 15.18, "learning_rate": 0.0002028, "loss": 0.7906, "step": 1017 }, { "epoch": 15.19, "learning_rate": 0.00020299999999999997, "loss": 0.6387, "step": 1018 }, { "epoch": 15.21, "learning_rate": 0.00020319999999999998, "loss": 0.4794, "step": 1019 }, { "epoch": 15.22, "learning_rate": 0.00020339999999999998, "loss": 1.0287, "step": 1020 }, { "epoch": 15.24, "learning_rate": 0.00020359999999999996, "loss": 0.7316, "step": 1021 }, { "epoch": 15.25, "learning_rate": 0.0002038, "loss": 0.8346, "step": 1022 }, { "epoch": 15.27, "learning_rate": 0.000204, "loss": 1.083, "step": 1023 }, { "epoch": 15.28, "learning_rate": 0.00020419999999999998, "loss": 0.9167, "step": 1024 }, { "epoch": 15.3, "learning_rate": 0.00020439999999999998, "loss": 0.9092, "step": 1025 }, { "epoch": 15.31, "learning_rate": 0.00020459999999999999, "loss": 0.6361, "step": 1026 }, { "epoch": 15.33, "learning_rate": 0.00020479999999999996, "loss": 0.8581, "step": 1027 }, { "epoch": 15.34, "learning_rate": 0.000205, "loss": 1.1408, "step": 1028 }, { "epoch": 15.36, "learning_rate": 0.0002052, "loss": 0.6805, "step": 1029 }, { "epoch": 15.37, "learning_rate": 0.00020539999999999998, "loss": 1.0752, "step": 1030 }, { "epoch": 15.39, "learning_rate": 0.00020559999999999998, "loss": 0.8579, "step": 1031 }, { "epoch": 15.4, "learning_rate": 0.0002058, "loss": 0.7609, "step": 1032 }, { "epoch": 15.42, "learning_rate": 0.00020599999999999997, "loss": 0.6567, "step": 1033 }, { "epoch": 15.43, "learning_rate": 0.0002062, "loss": 0.8744, "step": 1034 }, { "epoch": 15.45, "learning_rate": 0.00020639999999999998, "loss": 1.0182, "step": 1035 }, { "epoch": 15.46, "learning_rate": 0.00020659999999999998, "loss": 0.5767, "step": 1036 }, { "epoch": 15.48, "learning_rate": 0.00020679999999999999, "loss": 0.7308, "step": 1037 }, { "epoch": 15.49, "learning_rate": 0.00020699999999999996, "loss": 0.6098, "step": 1038 }, { "epoch": 15.51, "learning_rate": 0.00020719999999999997, "loss": 1.1058, "step": 1039 }, { "epoch": 15.52, "learning_rate": 0.0002074, "loss": 0.471, "step": 1040 }, { "epoch": 15.54, "learning_rate": 0.00020759999999999998, "loss": 0.9943, "step": 1041 }, { "epoch": 15.55, "learning_rate": 0.00020779999999999998, "loss": 0.598, "step": 1042 }, { "epoch": 15.57, "learning_rate": 0.000208, "loss": 0.8455, "step": 1043 }, { "epoch": 15.58, "learning_rate": 0.00020819999999999996, "loss": 0.7184, "step": 1044 }, { "epoch": 15.59, "learning_rate": 0.00020839999999999997, "loss": 1.0079, "step": 1045 }, { "epoch": 15.61, "learning_rate": 0.0002086, "loss": 0.5931, "step": 1046 }, { "epoch": 15.62, "learning_rate": 0.00020879999999999998, "loss": 0.6176, "step": 1047 }, { "epoch": 15.64, "learning_rate": 0.00020899999999999998, "loss": 0.6809, "step": 1048 }, { "epoch": 15.65, "learning_rate": 0.0002092, "loss": 0.3144, "step": 1049 }, { "epoch": 15.67, "learning_rate": 0.00020939999999999997, "loss": 0.7481, "step": 1050 }, { "epoch": 15.68, "learning_rate": 0.00020959999999999997, "loss": 0.5394, "step": 1051 }, { "epoch": 15.7, "learning_rate": 0.0002098, "loss": 0.6844, "step": 1052 }, { "epoch": 15.71, "learning_rate": 0.00020999999999999998, "loss": 0.6366, "step": 1053 }, { "epoch": 15.73, "learning_rate": 0.0002102, "loss": 0.585, "step": 1054 }, { "epoch": 15.74, "learning_rate": 0.0002104, "loss": 0.6458, "step": 1055 }, { "epoch": 15.76, "learning_rate": 0.00021059999999999997, "loss": 1.1576, "step": 1056 }, { "epoch": 15.77, "learning_rate": 0.00021079999999999997, "loss": 0.7424, "step": 1057 }, { "epoch": 15.79, "learning_rate": 0.00021099999999999998, "loss": 0.8765, "step": 1058 }, { "epoch": 15.8, "learning_rate": 0.00021119999999999996, "loss": 0.6492, "step": 1059 }, { "epoch": 15.82, "learning_rate": 0.0002114, "loss": 0.9297, "step": 1060 }, { "epoch": 15.83, "learning_rate": 0.0002116, "loss": 0.7282, "step": 1061 }, { "epoch": 15.85, "learning_rate": 0.00021179999999999997, "loss": 1.1035, "step": 1062 }, { "epoch": 15.86, "learning_rate": 0.00021199999999999998, "loss": 0.8628, "step": 1063 }, { "epoch": 15.88, "learning_rate": 0.00021219999999999998, "loss": 0.7203, "step": 1064 }, { "epoch": 15.89, "learning_rate": 0.00021239999999999996, "loss": 0.5733, "step": 1065 }, { "epoch": 15.91, "learning_rate": 0.0002126, "loss": 0.6324, "step": 1066 }, { "epoch": 15.92, "learning_rate": 0.0002128, "loss": 0.899, "step": 1067 }, { "epoch": 15.94, "learning_rate": 0.00021299999999999997, "loss": 0.7308, "step": 1068 }, { "epoch": 15.95, "learning_rate": 0.00021319999999999998, "loss": 0.8845, "step": 1069 }, { "epoch": 15.97, "learning_rate": 0.00021339999999999998, "loss": 0.2766, "step": 1070 }, { "epoch": 15.98, "learning_rate": 0.00021359999999999996, "loss": 0.7306, "step": 1071 }, { "epoch": 16.0, "learning_rate": 0.0002138, "loss": 0.9311, "step": 1072 }, { "epoch": 16.01, "learning_rate": 0.000214, "loss": 0.7444, "step": 1073 }, { "epoch": 16.03, "learning_rate": 0.00021419999999999998, "loss": 0.9739, "step": 1074 }, { "epoch": 16.04, "learning_rate": 0.00021439999999999998, "loss": 0.9668, "step": 1075 }, { "epoch": 16.06, "learning_rate": 0.00021459999999999998, "loss": 0.7567, "step": 1076 }, { "epoch": 16.07, "learning_rate": 0.00021479999999999996, "loss": 0.4541, "step": 1077 }, { "epoch": 16.09, "learning_rate": 0.000215, "loss": 0.443, "step": 1078 }, { "epoch": 16.1, "learning_rate": 0.0002152, "loss": 0.4377, "step": 1079 }, { "epoch": 16.12, "learning_rate": 0.00021539999999999998, "loss": 0.4075, "step": 1080 }, { "epoch": 16.13, "learning_rate": 0.00021559999999999998, "loss": 1.0803, "step": 1081 }, { "epoch": 16.15, "learning_rate": 0.0002158, "loss": 0.7278, "step": 1082 }, { "epoch": 16.16, "learning_rate": 0.00021599999999999996, "loss": 0.4957, "step": 1083 }, { "epoch": 16.18, "learning_rate": 0.0002162, "loss": 0.4261, "step": 1084 }, { "epoch": 16.19, "learning_rate": 0.0002164, "loss": 0.3906, "step": 1085 }, { "epoch": 16.21, "learning_rate": 0.00021659999999999998, "loss": 0.7426, "step": 1086 }, { "epoch": 16.22, "learning_rate": 0.00021679999999999998, "loss": 0.6193, "step": 1087 }, { "epoch": 16.24, "learning_rate": 0.000217, "loss": 0.5344, "step": 1088 }, { "epoch": 16.25, "learning_rate": 0.00021719999999999997, "loss": 0.9232, "step": 1089 }, { "epoch": 16.27, "learning_rate": 0.0002174, "loss": 0.5786, "step": 1090 }, { "epoch": 16.28, "learning_rate": 0.0002176, "loss": 0.8235, "step": 1091 }, { "epoch": 16.3, "learning_rate": 0.00021779999999999998, "loss": 0.9143, "step": 1092 }, { "epoch": 16.31, "learning_rate": 0.00021799999999999999, "loss": 0.3326, "step": 1093 }, { "epoch": 16.33, "learning_rate": 0.0002182, "loss": 0.2456, "step": 1094 }, { "epoch": 16.34, "learning_rate": 0.00021839999999999997, "loss": 0.858, "step": 1095 }, { "epoch": 16.36, "learning_rate": 0.00021859999999999997, "loss": 1.022, "step": 1096 }, { "epoch": 16.37, "learning_rate": 0.00021879999999999995, "loss": 0.5284, "step": 1097 }, { "epoch": 16.39, "learning_rate": 0.00021899999999999998, "loss": 0.3627, "step": 1098 }, { "epoch": 16.4, "learning_rate": 0.0002192, "loss": 0.9552, "step": 1099 }, { "epoch": 16.42, "learning_rate": 0.00021939999999999997, "loss": 0.619, "step": 1100 }, { "epoch": 16.43, "learning_rate": 0.00021959999999999997, "loss": 0.2164, "step": 1101 }, { "epoch": 16.45, "learning_rate": 0.00021979999999999998, "loss": 0.7193, "step": 1102 }, { "epoch": 16.46, "learning_rate": 0.00021999999999999995, "loss": 0.7335, "step": 1103 }, { "epoch": 16.48, "learning_rate": 0.00022019999999999999, "loss": 0.367, "step": 1104 }, { "epoch": 16.49, "learning_rate": 0.0002204, "loss": 0.4086, "step": 1105 }, { "epoch": 16.51, "learning_rate": 0.00022059999999999997, "loss": 1.1209, "step": 1106 }, { "epoch": 16.52, "learning_rate": 0.00022079999999999997, "loss": 0.4251, "step": 1107 }, { "epoch": 16.54, "learning_rate": 0.00022099999999999998, "loss": 0.2345, "step": 1108 }, { "epoch": 16.55, "learning_rate": 0.00022119999999999996, "loss": 0.9918, "step": 1109 }, { "epoch": 16.57, "learning_rate": 0.0002214, "loss": 0.5629, "step": 1110 }, { "epoch": 16.58, "learning_rate": 0.0002216, "loss": 0.9618, "step": 1111 }, { "epoch": 16.59, "learning_rate": 0.00022179999999999997, "loss": 0.9031, "step": 1112 }, { "epoch": 16.61, "learning_rate": 0.00022199999999999998, "loss": 0.7487, "step": 1113 }, { "epoch": 16.62, "learning_rate": 0.00022219999999999998, "loss": 0.8206, "step": 1114 }, { "epoch": 16.64, "learning_rate": 0.00022239999999999996, "loss": 0.3871, "step": 1115 }, { "epoch": 16.65, "learning_rate": 0.0002226, "loss": 0.6073, "step": 1116 }, { "epoch": 16.67, "learning_rate": 0.0002228, "loss": 0.3924, "step": 1117 }, { "epoch": 16.68, "learning_rate": 0.00022299999999999997, "loss": 0.6858, "step": 1118 }, { "epoch": 16.7, "learning_rate": 0.00022319999999999998, "loss": 0.51, "step": 1119 }, { "epoch": 16.71, "learning_rate": 0.00022339999999999998, "loss": 0.8494, "step": 1120 }, { "epoch": 16.73, "learning_rate": 0.00022359999999999996, "loss": 0.7321, "step": 1121 }, { "epoch": 16.74, "learning_rate": 0.0002238, "loss": 0.9092, "step": 1122 }, { "epoch": 16.76, "learning_rate": 0.000224, "loss": 0.8845, "step": 1123 }, { "epoch": 16.77, "learning_rate": 0.00022419999999999997, "loss": 1.2335, "step": 1124 }, { "epoch": 16.79, "learning_rate": 0.00022439999999999998, "loss": 1.1761, "step": 1125 }, { "epoch": 16.8, "learning_rate": 0.00022459999999999998, "loss": 1.3628, "step": 1126 }, { "epoch": 16.82, "learning_rate": 0.00022479999999999996, "loss": 0.6865, "step": 1127 }, { "epoch": 16.83, "learning_rate": 0.000225, "loss": 1.1485, "step": 1128 }, { "epoch": 16.85, "learning_rate": 0.0002252, "loss": 0.9444, "step": 1129 }, { "epoch": 16.86, "learning_rate": 0.00022539999999999998, "loss": 0.3779, "step": 1130 }, { "epoch": 16.88, "learning_rate": 0.00022559999999999998, "loss": 0.7367, "step": 1131 }, { "epoch": 16.89, "learning_rate": 0.00022579999999999999, "loss": 0.7024, "step": 1132 }, { "epoch": 16.91, "learning_rate": 0.00022599999999999996, "loss": 0.6302, "step": 1133 }, { "epoch": 16.92, "learning_rate": 0.00022619999999999997, "loss": 0.7474, "step": 1134 }, { "epoch": 16.94, "learning_rate": 0.0002264, "loss": 0.9909, "step": 1135 }, { "epoch": 16.95, "learning_rate": 0.00022659999999999998, "loss": 0.8745, "step": 1136 }, { "epoch": 16.97, "learning_rate": 0.00022679999999999998, "loss": 0.4185, "step": 1137 }, { "epoch": 16.98, "learning_rate": 0.000227, "loss": 0.9698, "step": 1138 }, { "epoch": 17.0, "learning_rate": 0.00022719999999999997, "loss": 1.022, "step": 1139 }, { "epoch": 17.01, "learning_rate": 0.00022739999999999997, "loss": 1.537, "step": 1140 }, { "epoch": 17.03, "learning_rate": 0.0002276, "loss": 0.7912, "step": 1141 }, { "epoch": 17.04, "learning_rate": 0.00022779999999999998, "loss": 0.8698, "step": 1142 }, { "epoch": 17.06, "learning_rate": 0.00022799999999999999, "loss": 0.7288, "step": 1143 }, { "epoch": 17.07, "learning_rate": 0.0002282, "loss": 0.2158, "step": 1144 }, { "epoch": 17.09, "learning_rate": 0.00022839999999999997, "loss": 0.631, "step": 1145 }, { "epoch": 17.1, "learning_rate": 0.00022859999999999997, "loss": 0.8167, "step": 1146 }, { "epoch": 17.12, "learning_rate": 0.0002288, "loss": 0.4729, "step": 1147 }, { "epoch": 17.13, "learning_rate": 0.00022899999999999998, "loss": 0.8643, "step": 1148 }, { "epoch": 17.15, "learning_rate": 0.0002292, "loss": 0.5403, "step": 1149 }, { "epoch": 17.16, "learning_rate": 0.0002294, "loss": 0.7748, "step": 1150 }, { "epoch": 17.18, "learning_rate": 0.00022959999999999997, "loss": 0.9571, "step": 1151 }, { "epoch": 17.19, "learning_rate": 0.00022979999999999997, "loss": 0.5619, "step": 1152 }, { "epoch": 17.21, "learning_rate": 0.00023, "loss": 0.6504, "step": 1153 }, { "epoch": 17.22, "learning_rate": 0.00023019999999999998, "loss": 0.5057, "step": 1154 }, { "epoch": 17.24, "learning_rate": 0.0002304, "loss": 0.481, "step": 1155 }, { "epoch": 17.25, "learning_rate": 0.0002306, "loss": 1.1169, "step": 1156 }, { "epoch": 17.27, "learning_rate": 0.00023079999999999997, "loss": 0.8684, "step": 1157 }, { "epoch": 17.28, "learning_rate": 0.00023099999999999998, "loss": 0.5652, "step": 1158 }, { "epoch": 17.3, "learning_rate": 0.0002312, "loss": 0.9157, "step": 1159 }, { "epoch": 17.31, "learning_rate": 0.0002314, "loss": 0.912, "step": 1160 }, { "epoch": 17.33, "learning_rate": 0.0002316, "loss": 1.064, "step": 1161 }, { "epoch": 17.34, "learning_rate": 0.00023179999999999997, "loss": 0.481, "step": 1162 }, { "epoch": 17.36, "learning_rate": 0.00023199999999999997, "loss": 1.0895, "step": 1163 }, { "epoch": 17.37, "learning_rate": 0.00023219999999999998, "loss": 0.9731, "step": 1164 }, { "epoch": 17.39, "learning_rate": 0.00023239999999999996, "loss": 0.7848, "step": 1165 }, { "epoch": 17.4, "learning_rate": 0.00023259999999999996, "loss": 0.8208, "step": 1166 }, { "epoch": 17.42, "learning_rate": 0.0002328, "loss": 0.3356, "step": 1167 }, { "epoch": 17.43, "learning_rate": 0.00023299999999999997, "loss": 0.5094, "step": 1168 }, { "epoch": 17.45, "learning_rate": 0.00023319999999999998, "loss": 0.5053, "step": 1169 }, { "epoch": 17.46, "learning_rate": 0.00023339999999999998, "loss": 0.6877, "step": 1170 }, { "epoch": 17.48, "learning_rate": 0.00023359999999999996, "loss": 0.4477, "step": 1171 }, { "epoch": 17.49, "learning_rate": 0.00023379999999999996, "loss": 0.5053, "step": 1172 }, { "epoch": 17.51, "learning_rate": 0.000234, "loss": 0.9771, "step": 1173 }, { "epoch": 17.52, "learning_rate": 0.00023419999999999997, "loss": 1.0193, "step": 1174 }, { "epoch": 17.54, "learning_rate": 0.00023439999999999998, "loss": 0.9896, "step": 1175 }, { "epoch": 17.55, "learning_rate": 0.00023459999999999998, "loss": 0.7799, "step": 1176 }, { "epoch": 17.57, "learning_rate": 0.00023479999999999996, "loss": 1.0455, "step": 1177 }, { "epoch": 17.58, "learning_rate": 0.00023499999999999997, "loss": 0.5766, "step": 1178 }, { "epoch": 17.59, "learning_rate": 0.0002352, "loss": 1.0561, "step": 1179 }, { "epoch": 17.61, "learning_rate": 0.00023539999999999998, "loss": 0.6531, "step": 1180 }, { "epoch": 17.62, "learning_rate": 0.00023559999999999998, "loss": 1.1587, "step": 1181 }, { "epoch": 17.64, "learning_rate": 0.00023579999999999999, "loss": 0.3106, "step": 1182 }, { "epoch": 17.65, "learning_rate": 0.00023599999999999996, "loss": 0.9023, "step": 1183 }, { "epoch": 17.67, "learning_rate": 0.00023619999999999997, "loss": 0.5792, "step": 1184 }, { "epoch": 17.68, "learning_rate": 0.0002364, "loss": 0.7693, "step": 1185 }, { "epoch": 17.7, "learning_rate": 0.00023659999999999998, "loss": 0.4481, "step": 1186 }, { "epoch": 17.71, "learning_rate": 0.00023679999999999998, "loss": 0.6329, "step": 1187 }, { "epoch": 17.73, "learning_rate": 0.000237, "loss": 0.4823, "step": 1188 }, { "epoch": 17.74, "learning_rate": 0.00023719999999999997, "loss": 0.3002, "step": 1189 }, { "epoch": 17.76, "learning_rate": 0.00023739999999999997, "loss": 0.8866, "step": 1190 }, { "epoch": 17.77, "learning_rate": 0.0002376, "loss": 0.6895, "step": 1191 }, { "epoch": 17.79, "learning_rate": 0.00023779999999999998, "loss": 1.239, "step": 1192 }, { "epoch": 17.8, "learning_rate": 0.00023799999999999998, "loss": 0.8187, "step": 1193 }, { "epoch": 17.82, "learning_rate": 0.0002382, "loss": 0.6027, "step": 1194 }, { "epoch": 17.83, "learning_rate": 0.00023839999999999997, "loss": 0.7884, "step": 1195 }, { "epoch": 17.85, "learning_rate": 0.00023859999999999997, "loss": 0.9204, "step": 1196 }, { "epoch": 17.86, "learning_rate": 0.0002388, "loss": 0.7918, "step": 1197 }, { "epoch": 17.88, "learning_rate": 0.00023899999999999998, "loss": 0.7283, "step": 1198 }, { "epoch": 17.89, "learning_rate": 0.0002392, "loss": 0.429, "step": 1199 }, { "epoch": 17.91, "learning_rate": 0.0002394, "loss": 0.4164, "step": 1200 }, { "epoch": 17.91, "eval_accuracy": 0.7741067058247675, "eval_f1": 0.7742523404153536, "eval_loss": 0.8225661516189575, "eval_runtime": 343.498, "eval_samples_per_second": 11.895, "eval_steps_per_second": 0.745, "step": 1200 }, { "epoch": 17.92, "learning_rate": 0.00023959999999999997, "loss": 0.2999, "step": 1201 }, { "epoch": 17.94, "learning_rate": 0.00023979999999999997, "loss": 0.6004, "step": 1202 }, { "epoch": 17.95, "learning_rate": 0.00023999999999999998, "loss": 0.4274, "step": 1203 }, { "epoch": 17.97, "learning_rate": 0.00024019999999999996, "loss": 0.3801, "step": 1204 }, { "epoch": 17.98, "learning_rate": 0.0002404, "loss": 0.6226, "step": 1205 }, { "epoch": 18.0, "learning_rate": 0.0002406, "loss": 0.8777, "step": 1206 }, { "epoch": 18.01, "learning_rate": 0.00024079999999999997, "loss": 1.1747, "step": 1207 }, { "epoch": 18.03, "learning_rate": 0.00024099999999999998, "loss": 0.6821, "step": 1208 }, { "epoch": 18.04, "learning_rate": 0.00024119999999999998, "loss": 0.64, "step": 1209 }, { "epoch": 18.06, "learning_rate": 0.00024139999999999996, "loss": 0.4556, "step": 1210 }, { "epoch": 18.07, "learning_rate": 0.0002416, "loss": 0.5366, "step": 1211 }, { "epoch": 18.09, "learning_rate": 0.0002418, "loss": 0.3206, "step": 1212 }, { "epoch": 18.1, "learning_rate": 0.00024199999999999997, "loss": 0.496, "step": 1213 }, { "epoch": 18.12, "learning_rate": 0.00024219999999999998, "loss": 0.9585, "step": 1214 }, { "epoch": 18.13, "learning_rate": 0.00024239999999999998, "loss": 0.331, "step": 1215 }, { "epoch": 18.15, "learning_rate": 0.00024259999999999996, "loss": 0.4689, "step": 1216 }, { "epoch": 18.16, "learning_rate": 0.0002428, "loss": 0.9751, "step": 1217 }, { "epoch": 18.18, "learning_rate": 0.000243, "loss": 0.6266, "step": 1218 }, { "epoch": 18.19, "learning_rate": 0.00024319999999999998, "loss": 0.7254, "step": 1219 }, { "epoch": 18.21, "learning_rate": 0.00024339999999999998, "loss": 0.86, "step": 1220 }, { "epoch": 18.22, "learning_rate": 0.00024359999999999999, "loss": 0.7694, "step": 1221 }, { "epoch": 18.24, "learning_rate": 0.00024379999999999996, "loss": 0.5908, "step": 1222 }, { "epoch": 18.25, "learning_rate": 0.000244, "loss": 0.7398, "step": 1223 }, { "epoch": 18.27, "learning_rate": 0.00024419999999999997, "loss": 0.5606, "step": 1224 }, { "epoch": 18.28, "learning_rate": 0.0002444, "loss": 0.9814, "step": 1225 }, { "epoch": 18.3, "learning_rate": 0.0002446, "loss": 1.1783, "step": 1226 }, { "epoch": 18.31, "learning_rate": 0.0002448, "loss": 0.5309, "step": 1227 }, { "epoch": 18.33, "learning_rate": 0.000245, "loss": 0.5142, "step": 1228 }, { "epoch": 18.34, "learning_rate": 0.0002452, "loss": 0.5806, "step": 1229 }, { "epoch": 18.36, "learning_rate": 0.00024539999999999995, "loss": 0.6274, "step": 1230 }, { "epoch": 18.37, "learning_rate": 0.00024559999999999995, "loss": 0.7508, "step": 1231 }, { "epoch": 18.39, "learning_rate": 0.0002458, "loss": 0.2308, "step": 1232 }, { "epoch": 18.4, "learning_rate": 0.00024599999999999996, "loss": 0.6213, "step": 1233 }, { "epoch": 18.42, "learning_rate": 0.00024619999999999997, "loss": 0.8141, "step": 1234 }, { "epoch": 18.43, "learning_rate": 0.00024639999999999997, "loss": 0.2922, "step": 1235 }, { "epoch": 18.45, "learning_rate": 0.0002466, "loss": 0.5329, "step": 1236 }, { "epoch": 18.46, "learning_rate": 0.0002468, "loss": 0.4933, "step": 1237 }, { "epoch": 18.48, "learning_rate": 0.000247, "loss": 0.5091, "step": 1238 }, { "epoch": 18.49, "learning_rate": 0.0002472, "loss": 0.6618, "step": 1239 }, { "epoch": 18.51, "learning_rate": 0.0002474, "loss": 1.1074, "step": 1240 }, { "epoch": 18.52, "learning_rate": 0.0002476, "loss": 0.3855, "step": 1241 }, { "epoch": 18.54, "learning_rate": 0.00024779999999999995, "loss": 0.5161, "step": 1242 }, { "epoch": 18.55, "learning_rate": 0.00024799999999999996, "loss": 0.7131, "step": 1243 }, { "epoch": 18.57, "learning_rate": 0.00024819999999999996, "loss": 0.6932, "step": 1244 }, { "epoch": 18.58, "learning_rate": 0.00024839999999999997, "loss": 0.8774, "step": 1245 }, { "epoch": 18.59, "learning_rate": 0.00024859999999999997, "loss": 0.5768, "step": 1246 }, { "epoch": 18.61, "learning_rate": 0.0002488, "loss": 0.981, "step": 1247 }, { "epoch": 18.62, "learning_rate": 0.000249, "loss": 1.1701, "step": 1248 }, { "epoch": 18.64, "learning_rate": 0.0002492, "loss": 0.735, "step": 1249 }, { "epoch": 18.65, "learning_rate": 0.0002494, "loss": 0.7628, "step": 1250 }, { "epoch": 18.67, "learning_rate": 0.00024959999999999994, "loss": 1.064, "step": 1251 }, { "epoch": 18.68, "learning_rate": 0.0002498, "loss": 1.0694, "step": 1252 }, { "epoch": 18.7, "learning_rate": 0.00025, "loss": 0.4311, "step": 1253 }, { "epoch": 18.71, "learning_rate": 0.00025019999999999996, "loss": 0.7704, "step": 1254 }, { "epoch": 18.73, "learning_rate": 0.00025039999999999996, "loss": 0.2663, "step": 1255 }, { "epoch": 18.74, "learning_rate": 0.00025059999999999997, "loss": 0.3193, "step": 1256 }, { "epoch": 18.76, "learning_rate": 0.00025079999999999997, "loss": 0.5628, "step": 1257 }, { "epoch": 18.77, "learning_rate": 0.000251, "loss": 0.779, "step": 1258 }, { "epoch": 18.79, "learning_rate": 0.0002512, "loss": 1.1226, "step": 1259 }, { "epoch": 18.8, "learning_rate": 0.0002514, "loss": 0.8199, "step": 1260 }, { "epoch": 18.82, "learning_rate": 0.0002516, "loss": 0.6151, "step": 1261 }, { "epoch": 18.83, "learning_rate": 0.0002518, "loss": 0.5055, "step": 1262 }, { "epoch": 18.85, "learning_rate": 0.00025199999999999995, "loss": 0.1621, "step": 1263 }, { "epoch": 18.86, "learning_rate": 0.0002522, "loss": 0.5277, "step": 1264 }, { "epoch": 18.88, "learning_rate": 0.0002524, "loss": 0.9161, "step": 1265 }, { "epoch": 18.89, "learning_rate": 0.00025259999999999996, "loss": 0.6097, "step": 1266 }, { "epoch": 18.91, "learning_rate": 0.00025279999999999996, "loss": 0.4727, "step": 1267 }, { "epoch": 18.92, "learning_rate": 0.00025299999999999997, "loss": 0.3202, "step": 1268 }, { "epoch": 18.94, "learning_rate": 0.0002532, "loss": 1.0695, "step": 1269 }, { "epoch": 18.95, "learning_rate": 0.0002534, "loss": 0.6529, "step": 1270 }, { "epoch": 18.97, "learning_rate": 0.0002536, "loss": 0.276, "step": 1271 }, { "epoch": 18.98, "learning_rate": 0.0002538, "loss": 0.5075, "step": 1272 }, { "epoch": 19.0, "learning_rate": 0.000254, "loss": 0.6272, "step": 1273 }, { "epoch": 19.01, "learning_rate": 0.0002542, "loss": 1.1173, "step": 1274 }, { "epoch": 19.03, "learning_rate": 0.00025439999999999995, "loss": 0.3821, "step": 1275 }, { "epoch": 19.04, "learning_rate": 0.0002546, "loss": 0.544, "step": 1276 }, { "epoch": 19.06, "learning_rate": 0.0002548, "loss": 0.3951, "step": 1277 }, { "epoch": 19.07, "learning_rate": 0.00025499999999999996, "loss": 0.7183, "step": 1278 }, { "epoch": 19.09, "learning_rate": 0.00025519999999999997, "loss": 0.5846, "step": 1279 }, { "epoch": 19.1, "learning_rate": 0.0002554, "loss": 0.8813, "step": 1280 }, { "epoch": 19.12, "learning_rate": 0.0002556, "loss": 0.2713, "step": 1281 }, { "epoch": 19.13, "learning_rate": 0.0002558, "loss": 0.666, "step": 1282 }, { "epoch": 19.15, "learning_rate": 0.000256, "loss": 0.7438, "step": 1283 }, { "epoch": 19.16, "learning_rate": 0.0002562, "loss": 0.5591, "step": 1284 }, { "epoch": 19.18, "learning_rate": 0.0002564, "loss": 0.4984, "step": 1285 }, { "epoch": 19.19, "learning_rate": 0.00025659999999999995, "loss": 0.6012, "step": 1286 }, { "epoch": 19.21, "learning_rate": 0.00025679999999999995, "loss": 0.6843, "step": 1287 }, { "epoch": 19.22, "learning_rate": 0.00025699999999999996, "loss": 0.7454, "step": 1288 }, { "epoch": 19.24, "learning_rate": 0.00025719999999999996, "loss": 0.5694, "step": 1289 }, { "epoch": 19.25, "learning_rate": 0.00025739999999999997, "loss": 0.7526, "step": 1290 }, { "epoch": 19.27, "learning_rate": 0.0002576, "loss": 0.7195, "step": 1291 }, { "epoch": 19.28, "learning_rate": 0.0002578, "loss": 0.5612, "step": 1292 }, { "epoch": 19.3, "learning_rate": 0.000258, "loss": 0.9403, "step": 1293 }, { "epoch": 19.31, "learning_rate": 0.0002582, "loss": 0.5989, "step": 1294 }, { "epoch": 19.33, "learning_rate": 0.00025839999999999994, "loss": 0.8607, "step": 1295 }, { "epoch": 19.34, "learning_rate": 0.0002586, "loss": 0.5824, "step": 1296 }, { "epoch": 19.36, "learning_rate": 0.0002588, "loss": 0.6549, "step": 1297 }, { "epoch": 19.37, "learning_rate": 0.00025899999999999995, "loss": 0.6932, "step": 1298 }, { "epoch": 19.39, "learning_rate": 0.00025919999999999996, "loss": 0.6864, "step": 1299 }, { "epoch": 19.4, "learning_rate": 0.00025939999999999996, "loss": 0.5762, "step": 1300 }, { "epoch": 19.42, "learning_rate": 0.00025959999999999997, "loss": 0.8634, "step": 1301 }, { "epoch": 19.43, "learning_rate": 0.00025979999999999997, "loss": 0.6391, "step": 1302 }, { "epoch": 19.45, "learning_rate": 0.00026, "loss": 0.7527, "step": 1303 }, { "epoch": 19.46, "learning_rate": 0.0002602, "loss": 0.4871, "step": 1304 }, { "epoch": 19.48, "learning_rate": 0.0002604, "loss": 0.2296, "step": 1305 }, { "epoch": 19.49, "learning_rate": 0.0002606, "loss": 0.282, "step": 1306 }, { "epoch": 19.51, "learning_rate": 0.00026079999999999994, "loss": 0.8365, "step": 1307 }, { "epoch": 19.52, "learning_rate": 0.000261, "loss": 0.5651, "step": 1308 }, { "epoch": 19.54, "learning_rate": 0.0002612, "loss": 0.8462, "step": 1309 }, { "epoch": 19.55, "learning_rate": 0.00026139999999999996, "loss": 0.7099, "step": 1310 }, { "epoch": 19.57, "learning_rate": 0.00026159999999999996, "loss": 0.6501, "step": 1311 }, { "epoch": 19.58, "learning_rate": 0.00026179999999999997, "loss": 0.6466, "step": 1312 }, { "epoch": 19.59, "learning_rate": 0.00026199999999999997, "loss": 0.5846, "step": 1313 }, { "epoch": 19.61, "learning_rate": 0.0002622, "loss": 0.9175, "step": 1314 }, { "epoch": 19.62, "learning_rate": 0.0002624, "loss": 0.4235, "step": 1315 }, { "epoch": 19.64, "learning_rate": 0.0002626, "loss": 0.4004, "step": 1316 }, { "epoch": 19.65, "learning_rate": 0.0002628, "loss": 0.5586, "step": 1317 }, { "epoch": 19.67, "learning_rate": 0.000263, "loss": 0.5125, "step": 1318 }, { "epoch": 19.68, "learning_rate": 0.00026319999999999995, "loss": 0.4819, "step": 1319 }, { "epoch": 19.7, "learning_rate": 0.00026339999999999995, "loss": 0.3895, "step": 1320 }, { "epoch": 19.71, "learning_rate": 0.0002636, "loss": 0.4449, "step": 1321 }, { "epoch": 19.73, "learning_rate": 0.00026379999999999996, "loss": 0.6049, "step": 1322 }, { "epoch": 19.74, "learning_rate": 0.00026399999999999997, "loss": 1.2677, "step": 1323 }, { "epoch": 19.76, "learning_rate": 0.00026419999999999997, "loss": 1.0832, "step": 1324 }, { "epoch": 19.77, "learning_rate": 0.0002644, "loss": 0.7723, "step": 1325 }, { "epoch": 19.79, "learning_rate": 0.0002646, "loss": 1.1757, "step": 1326 }, { "epoch": 19.8, "learning_rate": 0.0002648, "loss": 0.5295, "step": 1327 }, { "epoch": 19.82, "learning_rate": 0.000265, "loss": 0.2148, "step": 1328 }, { "epoch": 19.83, "learning_rate": 0.0002652, "loss": 0.3802, "step": 1329 }, { "epoch": 19.85, "learning_rate": 0.0002654, "loss": 0.3808, "step": 1330 }, { "epoch": 19.86, "learning_rate": 0.00026559999999999995, "loss": 0.6835, "step": 1331 }, { "epoch": 19.88, "learning_rate": 0.00026579999999999996, "loss": 0.7268, "step": 1332 }, { "epoch": 19.89, "learning_rate": 0.000266, "loss": 0.3596, "step": 1333 }, { "epoch": 19.91, "learning_rate": 0.00026619999999999997, "loss": 0.6912, "step": 1334 }, { "epoch": 19.92, "learning_rate": 0.00026639999999999997, "loss": 1.3149, "step": 1335 }, { "epoch": 19.94, "learning_rate": 0.0002666, "loss": 0.7943, "step": 1336 }, { "epoch": 19.95, "learning_rate": 0.0002668, "loss": 1.0217, "step": 1337 }, { "epoch": 19.97, "learning_rate": 0.000267, "loss": 0.6082, "step": 1338 }, { "epoch": 19.98, "learning_rate": 0.0002672, "loss": 0.436, "step": 1339 }, { "epoch": 20.0, "learning_rate": 0.0002674, "loss": 0.7156, "step": 1340 }, { "epoch": 20.01, "learning_rate": 0.0002676, "loss": 0.6298, "step": 1341 }, { "epoch": 20.03, "learning_rate": 0.0002678, "loss": 0.5792, "step": 1342 }, { "epoch": 20.04, "learning_rate": 0.00026799999999999995, "loss": 0.6893, "step": 1343 }, { "epoch": 20.06, "learning_rate": 0.00026819999999999996, "loss": 0.6991, "step": 1344 }, { "epoch": 20.07, "learning_rate": 0.0002684, "loss": 0.7287, "step": 1345 }, { "epoch": 20.09, "learning_rate": 0.00026859999999999997, "loss": 0.4918, "step": 1346 }, { "epoch": 20.1, "learning_rate": 0.0002688, "loss": 0.912, "step": 1347 }, { "epoch": 20.12, "learning_rate": 0.000269, "loss": 0.5828, "step": 1348 }, { "epoch": 20.13, "learning_rate": 0.0002692, "loss": 0.6996, "step": 1349 }, { "epoch": 20.15, "learning_rate": 0.0002694, "loss": 0.46, "step": 1350 }, { "epoch": 20.16, "learning_rate": 0.00026959999999999994, "loss": 0.3978, "step": 1351 }, { "epoch": 20.18, "learning_rate": 0.0002698, "loss": 0.5182, "step": 1352 }, { "epoch": 20.19, "learning_rate": 0.00027, "loss": 0.893, "step": 1353 }, { "epoch": 20.21, "learning_rate": 0.00027019999999999995, "loss": 0.9816, "step": 1354 }, { "epoch": 20.22, "learning_rate": 0.00027039999999999996, "loss": 0.4094, "step": 1355 }, { "epoch": 20.24, "learning_rate": 0.00027059999999999996, "loss": 0.4442, "step": 1356 }, { "epoch": 20.25, "learning_rate": 0.00027079999999999997, "loss": 0.4466, "step": 1357 }, { "epoch": 20.27, "learning_rate": 0.000271, "loss": 0.415, "step": 1358 }, { "epoch": 20.28, "learning_rate": 0.0002712, "loss": 0.9744, "step": 1359 }, { "epoch": 20.3, "learning_rate": 0.0002714, "loss": 0.9508, "step": 1360 }, { "epoch": 20.31, "learning_rate": 0.0002716, "loss": 0.635, "step": 1361 }, { "epoch": 20.33, "learning_rate": 0.0002718, "loss": 0.7858, "step": 1362 }, { "epoch": 20.34, "learning_rate": 0.00027199999999999994, "loss": 1.0854, "step": 1363 }, { "epoch": 20.36, "learning_rate": 0.00027219999999999995, "loss": 0.9812, "step": 1364 }, { "epoch": 20.37, "learning_rate": 0.0002724, "loss": 0.5966, "step": 1365 }, { "epoch": 20.39, "learning_rate": 0.00027259999999999996, "loss": 0.4025, "step": 1366 }, { "epoch": 20.4, "learning_rate": 0.00027279999999999996, "loss": 0.5009, "step": 1367 }, { "epoch": 20.42, "learning_rate": 0.00027299999999999997, "loss": 0.1482, "step": 1368 }, { "epoch": 20.43, "learning_rate": 0.00027319999999999997, "loss": 0.5557, "step": 1369 }, { "epoch": 20.45, "learning_rate": 0.0002734, "loss": 0.6613, "step": 1370 }, { "epoch": 20.46, "learning_rate": 0.0002736, "loss": 0.8169, "step": 1371 }, { "epoch": 20.48, "learning_rate": 0.0002738, "loss": 0.5959, "step": 1372 }, { "epoch": 20.49, "learning_rate": 0.000274, "loss": 0.4948, "step": 1373 }, { "epoch": 20.51, "learning_rate": 0.0002742, "loss": 0.7418, "step": 1374 }, { "epoch": 20.52, "learning_rate": 0.00027439999999999995, "loss": 0.5943, "step": 1375 }, { "epoch": 20.54, "learning_rate": 0.00027459999999999995, "loss": 0.716, "step": 1376 }, { "epoch": 20.55, "learning_rate": 0.0002748, "loss": 0.8324, "step": 1377 }, { "epoch": 20.57, "learning_rate": 0.00027499999999999996, "loss": 0.2354, "step": 1378 }, { "epoch": 20.58, "learning_rate": 0.00027519999999999997, "loss": 0.5413, "step": 1379 }, { "epoch": 20.59, "learning_rate": 0.00027539999999999997, "loss": 0.6385, "step": 1380 }, { "epoch": 20.61, "learning_rate": 0.0002756, "loss": 0.5612, "step": 1381 }, { "epoch": 20.62, "learning_rate": 0.0002758, "loss": 0.9119, "step": 1382 }, { "epoch": 20.64, "learning_rate": 0.000276, "loss": 0.5478, "step": 1383 }, { "epoch": 20.65, "learning_rate": 0.0002762, "loss": 0.5139, "step": 1384 }, { "epoch": 20.67, "learning_rate": 0.0002764, "loss": 0.7378, "step": 1385 }, { "epoch": 20.68, "learning_rate": 0.0002766, "loss": 0.6912, "step": 1386 }, { "epoch": 20.7, "learning_rate": 0.00027679999999999995, "loss": 0.391, "step": 1387 }, { "epoch": 20.71, "learning_rate": 0.00027699999999999996, "loss": 0.3529, "step": 1388 }, { "epoch": 20.73, "learning_rate": 0.0002772, "loss": 0.6294, "step": 1389 }, { "epoch": 20.74, "learning_rate": 0.00027739999999999997, "loss": 0.5305, "step": 1390 }, { "epoch": 20.76, "learning_rate": 0.00027759999999999997, "loss": 0.6153, "step": 1391 }, { "epoch": 20.77, "learning_rate": 0.0002778, "loss": 0.7893, "step": 1392 }, { "epoch": 20.79, "learning_rate": 0.000278, "loss": 0.7969, "step": 1393 }, { "epoch": 20.8, "learning_rate": 0.0002782, "loss": 0.5978, "step": 1394 }, { "epoch": 20.82, "learning_rate": 0.0002784, "loss": 0.7604, "step": 1395 }, { "epoch": 20.83, "learning_rate": 0.00027859999999999994, "loss": 0.8762, "step": 1396 }, { "epoch": 20.85, "learning_rate": 0.0002788, "loss": 0.727, "step": 1397 }, { "epoch": 20.86, "learning_rate": 0.000279, "loss": 0.8943, "step": 1398 }, { "epoch": 20.88, "learning_rate": 0.00027919999999999996, "loss": 0.6312, "step": 1399 }, { "epoch": 20.89, "learning_rate": 0.00027939999999999996, "loss": 0.7669, "step": 1400 }, { "epoch": 20.89, "eval_accuracy": 0.7787567302985805, "eval_f1": 0.778299974393245, "eval_loss": 0.813083827495575, "eval_runtime": 344.4352, "eval_samples_per_second": 11.863, "eval_steps_per_second": 0.743, "step": 1400 }, { "epoch": 20.91, "learning_rate": 0.00027959999999999997, "loss": 0.455, "step": 1401 }, { "epoch": 20.92, "learning_rate": 0.00027979999999999997, "loss": 0.6066, "step": 1402 }, { "epoch": 20.94, "learning_rate": 0.00028, "loss": 0.3945, "step": 1403 }, { "epoch": 20.95, "learning_rate": 0.0002802, "loss": 0.7466, "step": 1404 }, { "epoch": 20.97, "learning_rate": 0.0002804, "loss": 0.5893, "step": 1405 }, { "epoch": 20.98, "learning_rate": 0.0002806, "loss": 0.6915, "step": 1406 }, { "epoch": 21.0, "learning_rate": 0.0002808, "loss": 1.0937, "step": 1407 }, { "epoch": 21.01, "learning_rate": 0.00028099999999999995, "loss": 0.8182, "step": 1408 }, { "epoch": 21.03, "learning_rate": 0.0002812, "loss": 0.9176, "step": 1409 }, { "epoch": 21.04, "learning_rate": 0.00028139999999999996, "loss": 0.8487, "step": 1410 }, { "epoch": 21.06, "learning_rate": 0.00028159999999999996, "loss": 0.4835, "step": 1411 }, { "epoch": 21.07, "learning_rate": 0.00028179999999999997, "loss": 0.9755, "step": 1412 }, { "epoch": 21.09, "learning_rate": 0.00028199999999999997, "loss": 1.315, "step": 1413 }, { "epoch": 21.1, "learning_rate": 0.0002822, "loss": 0.8134, "step": 1414 }, { "epoch": 21.12, "learning_rate": 0.0002824, "loss": 0.7944, "step": 1415 }, { "epoch": 21.13, "learning_rate": 0.0002826, "loss": 0.4791, "step": 1416 }, { "epoch": 21.15, "learning_rate": 0.0002828, "loss": 0.6249, "step": 1417 }, { "epoch": 21.16, "learning_rate": 0.000283, "loss": 0.8417, "step": 1418 }, { "epoch": 21.18, "learning_rate": 0.00028319999999999994, "loss": 0.774, "step": 1419 }, { "epoch": 21.19, "learning_rate": 0.00028339999999999995, "loss": 0.727, "step": 1420 }, { "epoch": 21.21, "learning_rate": 0.0002836, "loss": 0.3706, "step": 1421 }, { "epoch": 21.22, "learning_rate": 0.00028379999999999996, "loss": 0.3887, "step": 1422 }, { "epoch": 21.24, "learning_rate": 0.00028399999999999996, "loss": 0.6382, "step": 1423 }, { "epoch": 21.25, "learning_rate": 0.00028419999999999997, "loss": 0.718, "step": 1424 }, { "epoch": 21.27, "learning_rate": 0.0002844, "loss": 0.8399, "step": 1425 }, { "epoch": 21.28, "learning_rate": 0.0002846, "loss": 0.9529, "step": 1426 }, { "epoch": 21.3, "learning_rate": 0.0002848, "loss": 0.9305, "step": 1427 }, { "epoch": 21.31, "learning_rate": 0.000285, "loss": 0.7231, "step": 1428 }, { "epoch": 21.33, "learning_rate": 0.0002852, "loss": 0.5729, "step": 1429 }, { "epoch": 21.34, "learning_rate": 0.0002854, "loss": 0.5407, "step": 1430 }, { "epoch": 21.36, "learning_rate": 0.00028559999999999995, "loss": 0.7901, "step": 1431 }, { "epoch": 21.37, "learning_rate": 0.00028579999999999995, "loss": 0.575, "step": 1432 }, { "epoch": 21.39, "learning_rate": 0.00028599999999999996, "loss": 0.219, "step": 1433 }, { "epoch": 21.4, "learning_rate": 0.00028619999999999996, "loss": 0.7134, "step": 1434 }, { "epoch": 21.42, "learning_rate": 0.00028639999999999997, "loss": 0.2605, "step": 1435 }, { "epoch": 21.43, "learning_rate": 0.0002866, "loss": 0.5372, "step": 1436 }, { "epoch": 21.45, "learning_rate": 0.0002868, "loss": 0.4434, "step": 1437 }, { "epoch": 21.46, "learning_rate": 0.000287, "loss": 0.774, "step": 1438 }, { "epoch": 21.48, "learning_rate": 0.0002872, "loss": 0.2877, "step": 1439 }, { "epoch": 21.49, "learning_rate": 0.00028739999999999994, "loss": 0.2525, "step": 1440 }, { "epoch": 21.51, "learning_rate": 0.0002876, "loss": 0.5474, "step": 1441 }, { "epoch": 21.52, "learning_rate": 0.0002878, "loss": 0.4075, "step": 1442 }, { "epoch": 21.54, "learning_rate": 0.00028799999999999995, "loss": 0.72, "step": 1443 }, { "epoch": 21.55, "learning_rate": 0.00028819999999999996, "loss": 0.3995, "step": 1444 }, { "epoch": 21.57, "learning_rate": 0.00028839999999999996, "loss": 0.5891, "step": 1445 }, { "epoch": 21.58, "learning_rate": 0.00028859999999999997, "loss": 0.8102, "step": 1446 }, { "epoch": 21.59, "learning_rate": 0.00028879999999999997, "loss": 0.6418, "step": 1447 }, { "epoch": 21.61, "learning_rate": 0.000289, "loss": 0.3104, "step": 1448 }, { "epoch": 21.62, "learning_rate": 0.0002892, "loss": 0.8299, "step": 1449 }, { "epoch": 21.64, "learning_rate": 0.0002894, "loss": 0.2404, "step": 1450 }, { "epoch": 21.65, "learning_rate": 0.0002896, "loss": 0.3771, "step": 1451 }, { "epoch": 21.67, "learning_rate": 0.00028979999999999994, "loss": 0.7731, "step": 1452 }, { "epoch": 21.68, "learning_rate": 0.00029, "loss": 0.2826, "step": 1453 }, { "epoch": 21.7, "learning_rate": 0.0002902, "loss": 0.5406, "step": 1454 }, { "epoch": 21.71, "learning_rate": 0.00029039999999999996, "loss": 0.5265, "step": 1455 }, { "epoch": 21.73, "learning_rate": 0.00029059999999999996, "loss": 0.2196, "step": 1456 }, { "epoch": 21.74, "learning_rate": 0.00029079999999999997, "loss": 0.5047, "step": 1457 }, { "epoch": 21.76, "learning_rate": 0.00029099999999999997, "loss": 0.8224, "step": 1458 }, { "epoch": 21.77, "learning_rate": 0.0002912, "loss": 0.7324, "step": 1459 }, { "epoch": 21.79, "learning_rate": 0.0002914, "loss": 0.9723, "step": 1460 }, { "epoch": 21.8, "learning_rate": 0.0002916, "loss": 0.7277, "step": 1461 }, { "epoch": 21.82, "learning_rate": 0.0002918, "loss": 0.6583, "step": 1462 }, { "epoch": 21.83, "learning_rate": 0.000292, "loss": 0.6693, "step": 1463 }, { "epoch": 21.85, "learning_rate": 0.00029219999999999995, "loss": 0.5129, "step": 1464 }, { "epoch": 21.86, "learning_rate": 0.0002924, "loss": 0.5233, "step": 1465 }, { "epoch": 21.88, "learning_rate": 0.0002926, "loss": 0.2062, "step": 1466 }, { "epoch": 21.89, "learning_rate": 0.00029279999999999996, "loss": 0.9001, "step": 1467 }, { "epoch": 21.91, "learning_rate": 0.00029299999999999997, "loss": 0.3718, "step": 1468 }, { "epoch": 21.92, "learning_rate": 0.00029319999999999997, "loss": 0.5696, "step": 1469 }, { "epoch": 21.94, "learning_rate": 0.0002934, "loss": 0.7076, "step": 1470 }, { "epoch": 21.95, "learning_rate": 0.0002936, "loss": 0.7528, "step": 1471 }, { "epoch": 21.97, "learning_rate": 0.00029379999999999993, "loss": 0.4238, "step": 1472 }, { "epoch": 21.98, "learning_rate": 0.000294, "loss": 0.5624, "step": 1473 }, { "epoch": 22.0, "learning_rate": 0.0002942, "loss": 0.8636, "step": 1474 }, { "epoch": 22.01, "learning_rate": 0.00029439999999999995, "loss": 0.3772, "step": 1475 }, { "epoch": 22.03, "learning_rate": 0.00029459999999999995, "loss": 1.2659, "step": 1476 }, { "epoch": 22.04, "learning_rate": 0.00029479999999999996, "loss": 0.4182, "step": 1477 }, { "epoch": 22.06, "learning_rate": 0.00029499999999999996, "loss": 0.3934, "step": 1478 }, { "epoch": 22.07, "learning_rate": 0.00029519999999999997, "loss": 0.4777, "step": 1479 }, { "epoch": 22.09, "learning_rate": 0.00029539999999999997, "loss": 0.8398, "step": 1480 }, { "epoch": 22.1, "learning_rate": 0.0002956, "loss": 0.7846, "step": 1481 }, { "epoch": 22.12, "learning_rate": 0.0002958, "loss": 0.8802, "step": 1482 }, { "epoch": 22.13, "learning_rate": 0.000296, "loss": 0.4906, "step": 1483 }, { "epoch": 22.15, "learning_rate": 0.00029619999999999994, "loss": 0.5514, "step": 1484 }, { "epoch": 22.16, "learning_rate": 0.0002964, "loss": 0.3834, "step": 1485 }, { "epoch": 22.18, "learning_rate": 0.0002966, "loss": 0.3833, "step": 1486 }, { "epoch": 22.19, "learning_rate": 0.00029679999999999995, "loss": 0.2823, "step": 1487 }, { "epoch": 22.21, "learning_rate": 0.00029699999999999996, "loss": 0.7299, "step": 1488 }, { "epoch": 22.22, "learning_rate": 0.00029719999999999996, "loss": 0.2089, "step": 1489 }, { "epoch": 22.24, "learning_rate": 0.00029739999999999996, "loss": 0.103, "step": 1490 }, { "epoch": 22.25, "learning_rate": 0.00029759999999999997, "loss": 0.4865, "step": 1491 }, { "epoch": 22.27, "learning_rate": 0.0002978, "loss": 0.9131, "step": 1492 }, { "epoch": 22.28, "learning_rate": 0.000298, "loss": 0.4465, "step": 1493 }, { "epoch": 22.3, "learning_rate": 0.0002982, "loss": 0.3508, "step": 1494 }, { "epoch": 22.31, "learning_rate": 0.0002984, "loss": 0.4053, "step": 1495 }, { "epoch": 22.33, "learning_rate": 0.00029859999999999994, "loss": 1.0208, "step": 1496 }, { "epoch": 22.34, "learning_rate": 0.0002988, "loss": 0.3976, "step": 1497 }, { "epoch": 22.36, "learning_rate": 0.000299, "loss": 0.6894, "step": 1498 }, { "epoch": 22.37, "learning_rate": 0.00029919999999999995, "loss": 0.1601, "step": 1499 }, { "epoch": 22.39, "learning_rate": 0.00029939999999999996, "loss": 0.4907, "step": 1500 }, { "epoch": 22.4, "learning_rate": 0.00029959999999999996, "loss": 0.3205, "step": 1501 }, { "epoch": 22.42, "learning_rate": 0.00029979999999999997, "loss": 1.2744, "step": 1502 }, { "epoch": 22.43, "learning_rate": 0.0003, "loss": 0.4245, "step": 1503 }, { "epoch": 22.45, "learning_rate": 0.0002999649122807017, "loss": 0.7102, "step": 1504 }, { "epoch": 22.46, "learning_rate": 0.00029992982456140347, "loss": 1.26, "step": 1505 }, { "epoch": 22.48, "learning_rate": 0.0002998947368421052, "loss": 0.8482, "step": 1506 }, { "epoch": 22.49, "learning_rate": 0.00029985964912280697, "loss": 0.6502, "step": 1507 }, { "epoch": 22.51, "learning_rate": 0.0002998245614035087, "loss": 1.0905, "step": 1508 }, { "epoch": 22.52, "learning_rate": 0.0002997894736842105, "loss": 0.9749, "step": 1509 }, { "epoch": 22.54, "learning_rate": 0.00029975438596491227, "loss": 0.8889, "step": 1510 }, { "epoch": 22.55, "learning_rate": 0.000299719298245614, "loss": 0.9885, "step": 1511 }, { "epoch": 22.57, "learning_rate": 0.00029968421052631577, "loss": 0.9825, "step": 1512 }, { "epoch": 22.58, "learning_rate": 0.0002996491228070175, "loss": 0.5989, "step": 1513 }, { "epoch": 22.59, "learning_rate": 0.00029961403508771926, "loss": 0.6243, "step": 1514 }, { "epoch": 22.61, "learning_rate": 0.000299578947368421, "loss": 0.9003, "step": 1515 }, { "epoch": 22.62, "learning_rate": 0.0002995438596491228, "loss": 0.3962, "step": 1516 }, { "epoch": 22.64, "learning_rate": 0.00029950877192982457, "loss": 0.6765, "step": 1517 }, { "epoch": 22.65, "learning_rate": 0.0002994736842105263, "loss": 0.242, "step": 1518 }, { "epoch": 22.67, "learning_rate": 0.000299438596491228, "loss": 0.456, "step": 1519 }, { "epoch": 22.68, "learning_rate": 0.0002994035087719298, "loss": 0.4331, "step": 1520 }, { "epoch": 22.7, "learning_rate": 0.00029936842105263156, "loss": 0.1624, "step": 1521 }, { "epoch": 22.71, "learning_rate": 0.0002993333333333333, "loss": 0.7405, "step": 1522 }, { "epoch": 22.73, "learning_rate": 0.00029929824561403506, "loss": 0.2843, "step": 1523 }, { "epoch": 22.74, "learning_rate": 0.0002992631578947368, "loss": 0.7121, "step": 1524 }, { "epoch": 22.76, "learning_rate": 0.00029922807017543856, "loss": 1.2846, "step": 1525 }, { "epoch": 22.77, "learning_rate": 0.0002991929824561403, "loss": 0.6734, "step": 1526 }, { "epoch": 22.79, "learning_rate": 0.00029915789473684205, "loss": 1.0198, "step": 1527 }, { "epoch": 22.8, "learning_rate": 0.00029912280701754386, "loss": 0.5129, "step": 1528 }, { "epoch": 22.82, "learning_rate": 0.0002990877192982456, "loss": 0.4385, "step": 1529 }, { "epoch": 22.83, "learning_rate": 0.00029905263157894735, "loss": 0.8057, "step": 1530 }, { "epoch": 22.85, "learning_rate": 0.0002990175438596491, "loss": 1.1243, "step": 1531 }, { "epoch": 22.86, "learning_rate": 0.00029898245614035085, "loss": 1.09, "step": 1532 }, { "epoch": 22.88, "learning_rate": 0.0002989473684210526, "loss": 0.4577, "step": 1533 }, { "epoch": 22.89, "learning_rate": 0.00029891228070175435, "loss": 0.5147, "step": 1534 }, { "epoch": 22.91, "learning_rate": 0.00029887719298245615, "loss": 0.5657, "step": 1535 }, { "epoch": 22.92, "learning_rate": 0.00029884210526315785, "loss": 0.7621, "step": 1536 }, { "epoch": 22.94, "learning_rate": 0.0002988070175438596, "loss": 0.4339, "step": 1537 }, { "epoch": 22.95, "learning_rate": 0.00029877192982456134, "loss": 0.6648, "step": 1538 }, { "epoch": 22.97, "learning_rate": 0.00029873684210526315, "loss": 0.4844, "step": 1539 }, { "epoch": 22.98, "learning_rate": 0.0002987017543859649, "loss": 0.4958, "step": 1540 }, { "epoch": 23.0, "learning_rate": 0.00029866666666666664, "loss": 0.6266, "step": 1541 }, { "epoch": 23.01, "learning_rate": 0.0002986315789473684, "loss": 1.2849, "step": 1542 }, { "epoch": 23.03, "learning_rate": 0.00029859649122807014, "loss": 0.8311, "step": 1543 }, { "epoch": 23.04, "learning_rate": 0.0002985614035087719, "loss": 0.7814, "step": 1544 }, { "epoch": 23.06, "learning_rate": 0.00029852631578947364, "loss": 0.7051, "step": 1545 }, { "epoch": 23.07, "learning_rate": 0.00029849122807017544, "loss": 0.752, "step": 1546 }, { "epoch": 23.09, "learning_rate": 0.0002984561403508772, "loss": 0.4662, "step": 1547 }, { "epoch": 23.1, "learning_rate": 0.00029842105263157894, "loss": 0.4054, "step": 1548 }, { "epoch": 23.12, "learning_rate": 0.0002983859649122807, "loss": 0.9495, "step": 1549 }, { "epoch": 23.13, "learning_rate": 0.00029835087719298244, "loss": 0.4645, "step": 1550 }, { "epoch": 23.15, "learning_rate": 0.0002983157894736842, "loss": 0.9708, "step": 1551 }, { "epoch": 23.16, "learning_rate": 0.00029828070175438593, "loss": 0.5805, "step": 1552 }, { "epoch": 23.18, "learning_rate": 0.0002982456140350877, "loss": 0.6846, "step": 1553 }, { "epoch": 23.19, "learning_rate": 0.00029821052631578943, "loss": 0.7177, "step": 1554 }, { "epoch": 23.21, "learning_rate": 0.0002981754385964912, "loss": 0.764, "step": 1555 }, { "epoch": 23.22, "learning_rate": 0.00029814035087719293, "loss": 0.512, "step": 1556 }, { "epoch": 23.24, "learning_rate": 0.00029810526315789473, "loss": 0.1168, "step": 1557 }, { "epoch": 23.25, "learning_rate": 0.0002980701754385965, "loss": 0.3928, "step": 1558 }, { "epoch": 23.27, "learning_rate": 0.00029803508771929823, "loss": 0.624, "step": 1559 }, { "epoch": 23.28, "learning_rate": 0.000298, "loss": 0.7086, "step": 1560 }, { "epoch": 23.3, "learning_rate": 0.00029796491228070173, "loss": 0.6273, "step": 1561 }, { "epoch": 23.31, "learning_rate": 0.0002979298245614035, "loss": 0.6187, "step": 1562 }, { "epoch": 23.33, "learning_rate": 0.0002978947368421052, "loss": 0.6805, "step": 1563 }, { "epoch": 23.34, "learning_rate": 0.000297859649122807, "loss": 0.5361, "step": 1564 }, { "epoch": 23.36, "learning_rate": 0.0002978245614035088, "loss": 0.6424, "step": 1565 }, { "epoch": 23.37, "learning_rate": 0.0002977894736842105, "loss": 0.1724, "step": 1566 }, { "epoch": 23.39, "learning_rate": 0.0002977543859649122, "loss": 0.936, "step": 1567 }, { "epoch": 23.4, "learning_rate": 0.000297719298245614, "loss": 0.5024, "step": 1568 }, { "epoch": 23.42, "learning_rate": 0.00029768421052631577, "loss": 0.4122, "step": 1569 }, { "epoch": 23.43, "learning_rate": 0.0002976491228070175, "loss": 0.7544, "step": 1570 }, { "epoch": 23.45, "learning_rate": 0.00029761403508771927, "loss": 0.3982, "step": 1571 }, { "epoch": 23.46, "learning_rate": 0.000297578947368421, "loss": 0.2862, "step": 1572 }, { "epoch": 23.48, "learning_rate": 0.00029754385964912277, "loss": 0.4515, "step": 1573 }, { "epoch": 23.49, "learning_rate": 0.0002975087719298245, "loss": 0.1853, "step": 1574 }, { "epoch": 23.51, "learning_rate": 0.00029747368421052627, "loss": 0.7664, "step": 1575 }, { "epoch": 23.52, "learning_rate": 0.00029743859649122807, "loss": 0.6729, "step": 1576 }, { "epoch": 23.54, "learning_rate": 0.0002974035087719298, "loss": 0.3235, "step": 1577 }, { "epoch": 23.55, "learning_rate": 0.00029736842105263157, "loss": 0.5128, "step": 1578 }, { "epoch": 23.57, "learning_rate": 0.0002973333333333333, "loss": 0.8515, "step": 1579 }, { "epoch": 23.58, "learning_rate": 0.00029729824561403506, "loss": 0.8926, "step": 1580 }, { "epoch": 23.59, "learning_rate": 0.0002972631578947368, "loss": 1.0383, "step": 1581 }, { "epoch": 23.61, "learning_rate": 0.00029722807017543856, "loss": 1.0073, "step": 1582 }, { "epoch": 23.62, "learning_rate": 0.00029719298245614036, "loss": 1.0494, "step": 1583 }, { "epoch": 23.64, "learning_rate": 0.00029715789473684206, "loss": 0.7003, "step": 1584 }, { "epoch": 23.65, "learning_rate": 0.0002971228070175438, "loss": 0.5168, "step": 1585 }, { "epoch": 23.67, "learning_rate": 0.00029708771929824556, "loss": 0.6879, "step": 1586 }, { "epoch": 23.68, "learning_rate": 0.00029705263157894736, "loss": 0.2879, "step": 1587 }, { "epoch": 23.7, "learning_rate": 0.0002970175438596491, "loss": 0.8452, "step": 1588 }, { "epoch": 23.71, "learning_rate": 0.00029698245614035086, "loss": 0.2214, "step": 1589 }, { "epoch": 23.73, "learning_rate": 0.0002969473684210526, "loss": 0.5489, "step": 1590 }, { "epoch": 23.74, "learning_rate": 0.00029691228070175435, "loss": 0.4714, "step": 1591 }, { "epoch": 23.76, "learning_rate": 0.0002968771929824561, "loss": 1.0541, "step": 1592 }, { "epoch": 23.77, "learning_rate": 0.00029684210526315785, "loss": 0.8526, "step": 1593 }, { "epoch": 23.79, "learning_rate": 0.00029680701754385965, "loss": 0.5043, "step": 1594 }, { "epoch": 23.8, "learning_rate": 0.0002967719298245614, "loss": 0.4541, "step": 1595 }, { "epoch": 23.82, "learning_rate": 0.00029673684210526315, "loss": 0.4321, "step": 1596 }, { "epoch": 23.83, "learning_rate": 0.00029670175438596485, "loss": 0.7054, "step": 1597 }, { "epoch": 23.85, "learning_rate": 0.00029666666666666665, "loss": 0.8708, "step": 1598 }, { "epoch": 23.86, "learning_rate": 0.0002966315789473684, "loss": 0.343, "step": 1599 }, { "epoch": 23.88, "learning_rate": 0.00029659649122807015, "loss": 0.4606, "step": 1600 }, { "epoch": 23.88, "eval_accuracy": 0.7792462065589819, "eval_f1": 0.7878802780794968, "eval_loss": 0.8314271569252014, "eval_runtime": 344.0688, "eval_samples_per_second": 11.876, "eval_steps_per_second": 0.744, "step": 1600 }, { "epoch": 23.89, "learning_rate": 0.0002965614035087719, "loss": 0.6198, "step": 1601 }, { "epoch": 23.91, "learning_rate": 0.00029652631578947364, "loss": 0.6289, "step": 1602 }, { "epoch": 23.92, "learning_rate": 0.0002964912280701754, "loss": 0.4563, "step": 1603 }, { "epoch": 23.94, "learning_rate": 0.00029645614035087714, "loss": 0.9609, "step": 1604 }, { "epoch": 23.95, "learning_rate": 0.00029642105263157895, "loss": 0.4432, "step": 1605 }, { "epoch": 23.97, "learning_rate": 0.0002963859649122807, "loss": 0.3827, "step": 1606 }, { "epoch": 23.98, "learning_rate": 0.00029635087719298244, "loss": 0.9992, "step": 1607 }, { "epoch": 24.0, "learning_rate": 0.0002963157894736842, "loss": 0.4266, "step": 1608 }, { "epoch": 24.01, "learning_rate": 0.00029628070175438594, "loss": 0.5823, "step": 1609 }, { "epoch": 24.03, "learning_rate": 0.0002962456140350877, "loss": 0.5493, "step": 1610 }, { "epoch": 24.04, "learning_rate": 0.00029621052631578944, "loss": 0.3707, "step": 1611 }, { "epoch": 24.06, "learning_rate": 0.0002961754385964912, "loss": 0.4518, "step": 1612 }, { "epoch": 24.07, "learning_rate": 0.000296140350877193, "loss": 0.7895, "step": 1613 }, { "epoch": 24.09, "learning_rate": 0.00029610526315789474, "loss": 0.5102, "step": 1614 }, { "epoch": 24.1, "learning_rate": 0.00029607017543859643, "loss": 0.6533, "step": 1615 }, { "epoch": 24.12, "learning_rate": 0.00029603508771929824, "loss": 0.6582, "step": 1616 }, { "epoch": 24.13, "learning_rate": 0.000296, "loss": 0.6249, "step": 1617 }, { "epoch": 24.15, "learning_rate": 0.00029596491228070173, "loss": 0.4666, "step": 1618 }, { "epoch": 24.16, "learning_rate": 0.0002959298245614035, "loss": 0.0862, "step": 1619 }, { "epoch": 24.18, "learning_rate": 0.00029589473684210523, "loss": 0.2562, "step": 1620 }, { "epoch": 24.19, "learning_rate": 0.000295859649122807, "loss": 0.5261, "step": 1621 }, { "epoch": 24.21, "learning_rate": 0.00029582456140350873, "loss": 0.5443, "step": 1622 }, { "epoch": 24.22, "learning_rate": 0.0002957894736842105, "loss": 0.2514, "step": 1623 }, { "epoch": 24.24, "learning_rate": 0.0002957543859649123, "loss": 0.501, "step": 1624 }, { "epoch": 24.25, "learning_rate": 0.00029571929824561403, "loss": 0.6602, "step": 1625 }, { "epoch": 24.27, "learning_rate": 0.0002956842105263158, "loss": 1.0655, "step": 1626 }, { "epoch": 24.28, "learning_rate": 0.0002956491228070175, "loss": 1.0767, "step": 1627 }, { "epoch": 24.3, "learning_rate": 0.0002956140350877193, "loss": 1.6448, "step": 1628 }, { "epoch": 24.31, "learning_rate": 0.000295578947368421, "loss": 0.8344, "step": 1629 }, { "epoch": 24.33, "learning_rate": 0.0002955438596491228, "loss": 0.4289, "step": 1630 }, { "epoch": 24.34, "learning_rate": 0.0002955087719298246, "loss": 0.4206, "step": 1631 }, { "epoch": 24.36, "learning_rate": 0.00029547368421052627, "loss": 0.4214, "step": 1632 }, { "epoch": 24.37, "learning_rate": 0.000295438596491228, "loss": 0.8347, "step": 1633 }, { "epoch": 24.39, "learning_rate": 0.00029540350877192977, "loss": 0.4034, "step": 1634 }, { "epoch": 24.4, "learning_rate": 0.00029536842105263157, "loss": 0.4719, "step": 1635 }, { "epoch": 24.42, "learning_rate": 0.0002953333333333333, "loss": 0.3112, "step": 1636 }, { "epoch": 24.43, "learning_rate": 0.00029529824561403507, "loss": 0.7053, "step": 1637 }, { "epoch": 24.45, "learning_rate": 0.0002952631578947368, "loss": 0.2759, "step": 1638 }, { "epoch": 24.46, "learning_rate": 0.00029522807017543857, "loss": 0.3891, "step": 1639 }, { "epoch": 24.48, "learning_rate": 0.0002951929824561403, "loss": 0.2628, "step": 1640 }, { "epoch": 24.49, "learning_rate": 0.00029515789473684206, "loss": 0.4246, "step": 1641 }, { "epoch": 24.51, "learning_rate": 0.00029512280701754387, "loss": 0.8412, "step": 1642 }, { "epoch": 24.52, "learning_rate": 0.0002950877192982456, "loss": 0.5919, "step": 1643 }, { "epoch": 24.54, "learning_rate": 0.00029505263157894736, "loss": 0.5095, "step": 1644 }, { "epoch": 24.55, "learning_rate": 0.00029501754385964906, "loss": 0.7387, "step": 1645 }, { "epoch": 24.57, "learning_rate": 0.00029498245614035086, "loss": 0.3755, "step": 1646 }, { "epoch": 24.58, "learning_rate": 0.0002949473684210526, "loss": 0.3604, "step": 1647 }, { "epoch": 24.59, "learning_rate": 0.00029491228070175436, "loss": 0.3853, "step": 1648 }, { "epoch": 24.61, "learning_rate": 0.0002948771929824561, "loss": 0.4088, "step": 1649 }, { "epoch": 24.62, "learning_rate": 0.00029484210526315786, "loss": 0.652, "step": 1650 }, { "epoch": 24.64, "learning_rate": 0.0002948070175438596, "loss": 0.8167, "step": 1651 }, { "epoch": 24.65, "learning_rate": 0.00029477192982456135, "loss": 0.3586, "step": 1652 }, { "epoch": 24.67, "learning_rate": 0.0002947368421052631, "loss": 0.2884, "step": 1653 }, { "epoch": 24.68, "learning_rate": 0.0002947017543859649, "loss": 0.2747, "step": 1654 }, { "epoch": 24.7, "learning_rate": 0.00029466666666666666, "loss": 0.3327, "step": 1655 }, { "epoch": 24.71, "learning_rate": 0.0002946315789473684, "loss": 0.2504, "step": 1656 }, { "epoch": 24.73, "learning_rate": 0.00029459649122807015, "loss": 0.3425, "step": 1657 }, { "epoch": 24.74, "learning_rate": 0.0002945614035087719, "loss": 0.429, "step": 1658 }, { "epoch": 24.76, "learning_rate": 0.00029452631578947365, "loss": 0.3557, "step": 1659 }, { "epoch": 24.77, "learning_rate": 0.0002944912280701754, "loss": 0.6772, "step": 1660 }, { "epoch": 24.79, "learning_rate": 0.0002944561403508772, "loss": 0.321, "step": 1661 }, { "epoch": 24.8, "learning_rate": 0.00029442105263157895, "loss": 0.7565, "step": 1662 }, { "epoch": 24.82, "learning_rate": 0.00029438596491228065, "loss": 0.2763, "step": 1663 }, { "epoch": 24.83, "learning_rate": 0.0002943508771929824, "loss": 0.8887, "step": 1664 }, { "epoch": 24.85, "learning_rate": 0.0002943157894736842, "loss": 0.5218, "step": 1665 }, { "epoch": 24.86, "learning_rate": 0.00029428070175438595, "loss": 0.2061, "step": 1666 }, { "epoch": 24.88, "learning_rate": 0.0002942456140350877, "loss": 0.2076, "step": 1667 }, { "epoch": 24.89, "learning_rate": 0.00029421052631578944, "loss": 0.3881, "step": 1668 }, { "epoch": 24.91, "learning_rate": 0.0002941754385964912, "loss": 0.5423, "step": 1669 }, { "epoch": 24.92, "learning_rate": 0.00029414035087719294, "loss": 0.2679, "step": 1670 }, { "epoch": 24.94, "learning_rate": 0.0002941052631578947, "loss": 0.3646, "step": 1671 }, { "epoch": 24.95, "learning_rate": 0.0002940701754385965, "loss": 0.5029, "step": 1672 }, { "epoch": 24.97, "learning_rate": 0.00029403508771929824, "loss": 0.3023, "step": 1673 }, { "epoch": 24.98, "learning_rate": 0.000294, "loss": 0.5306, "step": 1674 }, { "epoch": 25.0, "learning_rate": 0.00029396491228070174, "loss": 0.4512, "step": 1675 }, { "epoch": 25.01, "learning_rate": 0.0002939298245614035, "loss": 0.7319, "step": 1676 }, { "epoch": 25.03, "learning_rate": 0.00029389473684210524, "loss": 0.604, "step": 1677 }, { "epoch": 25.04, "learning_rate": 0.000293859649122807, "loss": 0.3086, "step": 1678 }, { "epoch": 25.06, "learning_rate": 0.0002938245614035088, "loss": 0.2008, "step": 1679 }, { "epoch": 25.07, "learning_rate": 0.0002937894736842105, "loss": 0.3837, "step": 1680 }, { "epoch": 25.09, "learning_rate": 0.00029375438596491223, "loss": 0.5445, "step": 1681 }, { "epoch": 25.1, "learning_rate": 0.000293719298245614, "loss": 0.4024, "step": 1682 }, { "epoch": 25.12, "learning_rate": 0.0002936842105263158, "loss": 0.8895, "step": 1683 }, { "epoch": 25.13, "learning_rate": 0.00029364912280701753, "loss": 0.4681, "step": 1684 }, { "epoch": 25.15, "learning_rate": 0.0002936140350877193, "loss": 0.2096, "step": 1685 }, { "epoch": 25.16, "learning_rate": 0.00029357894736842103, "loss": 0.3158, "step": 1686 }, { "epoch": 25.18, "learning_rate": 0.0002935438596491228, "loss": 0.3478, "step": 1687 }, { "epoch": 25.19, "learning_rate": 0.00029350877192982453, "loss": 0.1883, "step": 1688 }, { "epoch": 25.21, "learning_rate": 0.0002934736842105263, "loss": 0.3698, "step": 1689 }, { "epoch": 25.22, "learning_rate": 0.000293438596491228, "loss": 0.4285, "step": 1690 }, { "epoch": 25.24, "learning_rate": 0.00029340350877192983, "loss": 0.373, "step": 1691 }, { "epoch": 25.25, "learning_rate": 0.0002933684210526316, "loss": 0.1485, "step": 1692 }, { "epoch": 25.27, "learning_rate": 0.00029333333333333327, "loss": 0.447, "step": 1693 }, { "epoch": 25.28, "learning_rate": 0.0002932982456140351, "loss": 0.6378, "step": 1694 }, { "epoch": 25.3, "learning_rate": 0.0002932631578947368, "loss": 0.5661, "step": 1695 }, { "epoch": 25.31, "learning_rate": 0.00029322807017543857, "loss": 0.5903, "step": 1696 }, { "epoch": 25.33, "learning_rate": 0.0002931929824561403, "loss": 0.6971, "step": 1697 }, { "epoch": 25.34, "learning_rate": 0.00029315789473684207, "loss": 0.3974, "step": 1698 }, { "epoch": 25.36, "learning_rate": 0.0002931228070175438, "loss": 0.2634, "step": 1699 }, { "epoch": 25.37, "learning_rate": 0.00029308771929824557, "loss": 0.7687, "step": 1700 }, { "epoch": 25.39, "learning_rate": 0.0002930526315789473, "loss": 0.6014, "step": 1701 }, { "epoch": 25.4, "learning_rate": 0.0002930175438596491, "loss": 0.5096, "step": 1702 }, { "epoch": 25.42, "learning_rate": 0.00029298245614035087, "loss": 0.3946, "step": 1703 }, { "epoch": 25.43, "learning_rate": 0.0002929473684210526, "loss": 0.3069, "step": 1704 }, { "epoch": 25.45, "learning_rate": 0.00029291228070175437, "loss": 0.2154, "step": 1705 }, { "epoch": 25.46, "learning_rate": 0.0002928771929824561, "loss": 0.2049, "step": 1706 }, { "epoch": 25.48, "learning_rate": 0.00029284210526315786, "loss": 0.4638, "step": 1707 }, { "epoch": 25.49, "learning_rate": 0.0002928070175438596, "loss": 0.4175, "step": 1708 }, { "epoch": 25.51, "learning_rate": 0.0002927719298245614, "loss": 1.1591, "step": 1709 }, { "epoch": 25.52, "learning_rate": 0.00029273684210526316, "loss": 0.2331, "step": 1710 }, { "epoch": 25.54, "learning_rate": 0.00029270175438596486, "loss": 0.7711, "step": 1711 }, { "epoch": 25.55, "learning_rate": 0.0002926666666666666, "loss": 0.3989, "step": 1712 }, { "epoch": 25.57, "learning_rate": 0.0002926315789473684, "loss": 0.4343, "step": 1713 }, { "epoch": 25.58, "learning_rate": 0.00029259649122807016, "loss": 0.5231, "step": 1714 }, { "epoch": 25.59, "learning_rate": 0.0002925614035087719, "loss": 0.3829, "step": 1715 }, { "epoch": 25.61, "learning_rate": 0.00029252631578947366, "loss": 0.5188, "step": 1716 }, { "epoch": 25.62, "learning_rate": 0.0002924912280701754, "loss": 0.495, "step": 1717 }, { "epoch": 25.64, "learning_rate": 0.00029245614035087715, "loss": 0.1815, "step": 1718 }, { "epoch": 25.65, "learning_rate": 0.0002924210526315789, "loss": 0.2124, "step": 1719 }, { "epoch": 25.67, "learning_rate": 0.0002923859649122807, "loss": 0.372, "step": 1720 }, { "epoch": 25.68, "learning_rate": 0.00029235087719298245, "loss": 0.1183, "step": 1721 }, { "epoch": 25.7, "learning_rate": 0.0002923157894736842, "loss": 0.7579, "step": 1722 }, { "epoch": 25.71, "learning_rate": 0.00029228070175438595, "loss": 0.217, "step": 1723 }, { "epoch": 25.73, "learning_rate": 0.0002922456140350877, "loss": 0.0824, "step": 1724 }, { "epoch": 25.74, "learning_rate": 0.00029221052631578945, "loss": 0.1303, "step": 1725 }, { "epoch": 25.76, "learning_rate": 0.0002921754385964912, "loss": 0.8698, "step": 1726 }, { "epoch": 25.77, "learning_rate": 0.00029214035087719295, "loss": 0.7743, "step": 1727 }, { "epoch": 25.79, "learning_rate": 0.0002921052631578947, "loss": 0.3436, "step": 1728 }, { "epoch": 25.8, "learning_rate": 0.00029207017543859644, "loss": 0.9807, "step": 1729 }, { "epoch": 25.82, "learning_rate": 0.0002920350877192982, "loss": 0.3922, "step": 1730 }, { "epoch": 25.83, "learning_rate": 0.000292, "loss": 0.9146, "step": 1731 }, { "epoch": 25.85, "learning_rate": 0.00029196491228070174, "loss": 0.2695, "step": 1732 }, { "epoch": 25.86, "learning_rate": 0.0002919298245614035, "loss": 0.5178, "step": 1733 }, { "epoch": 25.88, "learning_rate": 0.00029189473684210524, "loss": 0.4618, "step": 1734 }, { "epoch": 25.89, "learning_rate": 0.000291859649122807, "loss": 0.3861, "step": 1735 }, { "epoch": 25.91, "learning_rate": 0.00029182456140350874, "loss": 0.2787, "step": 1736 }, { "epoch": 25.92, "learning_rate": 0.0002917894736842105, "loss": 0.4399, "step": 1737 }, { "epoch": 25.94, "learning_rate": 0.00029175438596491224, "loss": 0.9383, "step": 1738 }, { "epoch": 25.95, "learning_rate": 0.00029171929824561404, "loss": 0.3467, "step": 1739 }, { "epoch": 25.97, "learning_rate": 0.0002916842105263158, "loss": 0.6328, "step": 1740 }, { "epoch": 25.98, "learning_rate": 0.0002916491228070175, "loss": 0.5946, "step": 1741 }, { "epoch": 26.0, "learning_rate": 0.0002916140350877193, "loss": 0.2688, "step": 1742 }, { "epoch": 26.01, "learning_rate": 0.00029157894736842104, "loss": 0.3514, "step": 1743 }, { "epoch": 26.03, "learning_rate": 0.0002915438596491228, "loss": 0.8932, "step": 1744 }, { "epoch": 26.04, "learning_rate": 0.00029150877192982453, "loss": 0.4737, "step": 1745 }, { "epoch": 26.06, "learning_rate": 0.0002914736842105263, "loss": 0.1499, "step": 1746 }, { "epoch": 26.07, "learning_rate": 0.00029143859649122803, "loss": 0.5537, "step": 1747 }, { "epoch": 26.09, "learning_rate": 0.0002914035087719298, "loss": 0.2274, "step": 1748 }, { "epoch": 26.1, "learning_rate": 0.00029136842105263153, "loss": 0.2173, "step": 1749 }, { "epoch": 26.12, "learning_rate": 0.00029133333333333333, "loss": 0.6862, "step": 1750 }, { "epoch": 26.13, "learning_rate": 0.0002912982456140351, "loss": 0.4882, "step": 1751 }, { "epoch": 26.15, "learning_rate": 0.00029126315789473683, "loss": 0.3082, "step": 1752 }, { "epoch": 26.16, "learning_rate": 0.0002912280701754386, "loss": 0.4787, "step": 1753 }, { "epoch": 26.18, "learning_rate": 0.0002911929824561403, "loss": 0.5084, "step": 1754 }, { "epoch": 26.19, "learning_rate": 0.0002911578947368421, "loss": 0.5261, "step": 1755 }, { "epoch": 26.21, "learning_rate": 0.0002911228070175438, "loss": 0.8972, "step": 1756 }, { "epoch": 26.22, "learning_rate": 0.0002910877192982456, "loss": 0.0873, "step": 1757 }, { "epoch": 26.24, "learning_rate": 0.0002910526315789473, "loss": 0.0688, "step": 1758 }, { "epoch": 26.25, "learning_rate": 0.00029101754385964907, "loss": 0.5787, "step": 1759 }, { "epoch": 26.27, "learning_rate": 0.0002909824561403508, "loss": 0.5109, "step": 1760 }, { "epoch": 26.28, "learning_rate": 0.0002909473684210526, "loss": 0.9027, "step": 1761 }, { "epoch": 26.3, "learning_rate": 0.00029091228070175437, "loss": 0.6658, "step": 1762 }, { "epoch": 26.31, "learning_rate": 0.0002908771929824561, "loss": 0.3153, "step": 1763 }, { "epoch": 26.33, "learning_rate": 0.00029084210526315787, "loss": 0.4754, "step": 1764 }, { "epoch": 26.34, "learning_rate": 0.0002908070175438596, "loss": 0.8009, "step": 1765 }, { "epoch": 26.36, "learning_rate": 0.00029077192982456137, "loss": 0.7278, "step": 1766 }, { "epoch": 26.37, "learning_rate": 0.0002907368421052631, "loss": 0.9031, "step": 1767 }, { "epoch": 26.39, "learning_rate": 0.0002907017543859649, "loss": 0.4986, "step": 1768 }, { "epoch": 26.4, "learning_rate": 0.00029066666666666667, "loss": 0.6661, "step": 1769 }, { "epoch": 26.42, "learning_rate": 0.0002906315789473684, "loss": 0.555, "step": 1770 }, { "epoch": 26.43, "learning_rate": 0.00029059649122807016, "loss": 0.5531, "step": 1771 }, { "epoch": 26.45, "learning_rate": 0.0002905614035087719, "loss": 0.3935, "step": 1772 }, { "epoch": 26.46, "learning_rate": 0.00029052631578947366, "loss": 0.8509, "step": 1773 }, { "epoch": 26.48, "learning_rate": 0.0002904912280701754, "loss": 0.3773, "step": 1774 }, { "epoch": 26.49, "learning_rate": 0.00029045614035087716, "loss": 0.4842, "step": 1775 }, { "epoch": 26.51, "learning_rate": 0.0002904210526315789, "loss": 0.5316, "step": 1776 }, { "epoch": 26.52, "learning_rate": 0.00029038596491228066, "loss": 0.3468, "step": 1777 }, { "epoch": 26.54, "learning_rate": 0.0002903508771929824, "loss": 0.6874, "step": 1778 }, { "epoch": 26.55, "learning_rate": 0.0002903157894736842, "loss": 0.4743, "step": 1779 }, { "epoch": 26.57, "learning_rate": 0.00029028070175438596, "loss": 0.551, "step": 1780 }, { "epoch": 26.58, "learning_rate": 0.0002902456140350877, "loss": 0.1915, "step": 1781 }, { "epoch": 26.59, "learning_rate": 0.00029021052631578945, "loss": 0.8392, "step": 1782 }, { "epoch": 26.61, "learning_rate": 0.0002901754385964912, "loss": 0.4326, "step": 1783 }, { "epoch": 26.62, "learning_rate": 0.00029014035087719295, "loss": 0.3187, "step": 1784 }, { "epoch": 26.64, "learning_rate": 0.0002901052631578947, "loss": 0.3209, "step": 1785 }, { "epoch": 26.65, "learning_rate": 0.00029007017543859645, "loss": 0.5229, "step": 1786 }, { "epoch": 26.67, "learning_rate": 0.00029003508771929825, "loss": 0.5639, "step": 1787 }, { "epoch": 26.68, "learning_rate": 0.00029, "loss": 0.2592, "step": 1788 }, { "epoch": 26.7, "learning_rate": 0.0002899649122807017, "loss": 0.5842, "step": 1789 }, { "epoch": 26.71, "learning_rate": 0.00028992982456140345, "loss": 0.3494, "step": 1790 }, { "epoch": 26.73, "learning_rate": 0.00028989473684210525, "loss": 0.3164, "step": 1791 }, { "epoch": 26.74, "learning_rate": 0.000289859649122807, "loss": 0.4838, "step": 1792 }, { "epoch": 26.76, "learning_rate": 0.00028982456140350875, "loss": 0.2019, "step": 1793 }, { "epoch": 26.77, "learning_rate": 0.0002897894736842105, "loss": 0.3821, "step": 1794 }, { "epoch": 26.79, "learning_rate": 0.00028975438596491224, "loss": 0.5468, "step": 1795 }, { "epoch": 26.8, "learning_rate": 0.000289719298245614, "loss": 0.1877, "step": 1796 }, { "epoch": 26.82, "learning_rate": 0.00028968421052631574, "loss": 0.5564, "step": 1797 }, { "epoch": 26.83, "learning_rate": 0.00028964912280701754, "loss": 0.9035, "step": 1798 }, { "epoch": 26.85, "learning_rate": 0.0002896140350877193, "loss": 0.6982, "step": 1799 }, { "epoch": 26.86, "learning_rate": 0.00028957894736842104, "loss": 0.6975, "step": 1800 }, { "epoch": 26.86, "eval_accuracy": 0.793930494371023, "eval_f1": 0.792651690068327, "eval_loss": 0.7666684985160828, "eval_runtime": 344.0857, "eval_samples_per_second": 11.875, "eval_steps_per_second": 0.744, "step": 1800 }, { "epoch": 26.88, "learning_rate": 0.0002895438596491228, "loss": 0.2577, "step": 1801 }, { "epoch": 26.89, "learning_rate": 0.00028950877192982454, "loss": 0.2534, "step": 1802 }, { "epoch": 26.91, "learning_rate": 0.0002894736842105263, "loss": 0.2775, "step": 1803 }, { "epoch": 26.92, "learning_rate": 0.00028943859649122804, "loss": 0.2636, "step": 1804 }, { "epoch": 26.94, "learning_rate": 0.00028940350877192984, "loss": 0.6394, "step": 1805 }, { "epoch": 26.95, "learning_rate": 0.00028936842105263153, "loss": 0.6804, "step": 1806 }, { "epoch": 26.97, "learning_rate": 0.0002893333333333333, "loss": 0.4613, "step": 1807 }, { "epoch": 26.98, "learning_rate": 0.00028929824561403503, "loss": 0.2651, "step": 1808 }, { "epoch": 27.0, "learning_rate": 0.00028926315789473683, "loss": 0.3102, "step": 1809 }, { "epoch": 27.01, "learning_rate": 0.0002892280701754386, "loss": 0.6897, "step": 1810 }, { "epoch": 27.03, "learning_rate": 0.00028919298245614033, "loss": 0.3374, "step": 1811 }, { "epoch": 27.04, "learning_rate": 0.0002891578947368421, "loss": 1.2131, "step": 1812 }, { "epoch": 27.06, "learning_rate": 0.00028912280701754383, "loss": 0.4629, "step": 1813 }, { "epoch": 27.07, "learning_rate": 0.0002890877192982456, "loss": 0.4115, "step": 1814 }, { "epoch": 27.09, "learning_rate": 0.00028905263157894733, "loss": 0.4939, "step": 1815 }, { "epoch": 27.1, "learning_rate": 0.0002890175438596491, "loss": 0.3057, "step": 1816 }, { "epoch": 27.12, "learning_rate": 0.0002889824561403509, "loss": 0.7153, "step": 1817 }, { "epoch": 27.13, "learning_rate": 0.00028894736842105263, "loss": 0.3378, "step": 1818 }, { "epoch": 27.15, "learning_rate": 0.0002889122807017544, "loss": 0.3109, "step": 1819 }, { "epoch": 27.16, "learning_rate": 0.0002888771929824561, "loss": 0.4494, "step": 1820 }, { "epoch": 27.18, "learning_rate": 0.0002888421052631579, "loss": 0.2402, "step": 1821 }, { "epoch": 27.19, "learning_rate": 0.0002888070175438596, "loss": 0.1107, "step": 1822 }, { "epoch": 27.21, "learning_rate": 0.00028877192982456137, "loss": 0.7066, "step": 1823 }, { "epoch": 27.22, "learning_rate": 0.0002887368421052631, "loss": 0.1625, "step": 1824 }, { "epoch": 27.24, "learning_rate": 0.00028870175438596487, "loss": 0.2452, "step": 1825 }, { "epoch": 27.25, "learning_rate": 0.0002886666666666666, "loss": 1.0513, "step": 1826 }, { "epoch": 27.27, "learning_rate": 0.00028863157894736837, "loss": 0.5964, "step": 1827 }, { "epoch": 27.28, "learning_rate": 0.00028859649122807017, "loss": 0.7036, "step": 1828 }, { "epoch": 27.3, "learning_rate": 0.0002885614035087719, "loss": 0.4673, "step": 1829 }, { "epoch": 27.31, "learning_rate": 0.00028852631578947367, "loss": 0.255, "step": 1830 }, { "epoch": 27.33, "learning_rate": 0.0002884912280701754, "loss": 0.4344, "step": 1831 }, { "epoch": 27.34, "learning_rate": 0.00028845614035087716, "loss": 0.2454, "step": 1832 }, { "epoch": 27.36, "learning_rate": 0.0002884210526315789, "loss": 0.6276, "step": 1833 }, { "epoch": 27.37, "learning_rate": 0.00028838596491228066, "loss": 0.3496, "step": 1834 }, { "epoch": 27.39, "learning_rate": 0.00028835087719298247, "loss": 0.249, "step": 1835 }, { "epoch": 27.4, "learning_rate": 0.0002883157894736842, "loss": 0.1035, "step": 1836 }, { "epoch": 27.42, "learning_rate": 0.0002882807017543859, "loss": 0.2411, "step": 1837 }, { "epoch": 27.43, "learning_rate": 0.00028824561403508766, "loss": 0.3164, "step": 1838 }, { "epoch": 27.45, "learning_rate": 0.00028821052631578946, "loss": 0.0571, "step": 1839 }, { "epoch": 27.46, "learning_rate": 0.0002881754385964912, "loss": 0.0659, "step": 1840 }, { "epoch": 27.48, "learning_rate": 0.00028814035087719296, "loss": 0.363, "step": 1841 }, { "epoch": 27.49, "learning_rate": 0.0002881052631578947, "loss": 0.6705, "step": 1842 }, { "epoch": 27.51, "learning_rate": 0.00028807017543859646, "loss": 0.4213, "step": 1843 }, { "epoch": 27.52, "learning_rate": 0.0002880350877192982, "loss": 0.4256, "step": 1844 }, { "epoch": 27.54, "learning_rate": 0.00028799999999999995, "loss": 0.3606, "step": 1845 }, { "epoch": 27.55, "learning_rate": 0.00028796491228070176, "loss": 0.7367, "step": 1846 }, { "epoch": 27.57, "learning_rate": 0.0002879298245614035, "loss": 0.391, "step": 1847 }, { "epoch": 27.58, "learning_rate": 0.00028789473684210525, "loss": 0.7125, "step": 1848 }, { "epoch": 27.59, "learning_rate": 0.000287859649122807, "loss": 0.0586, "step": 1849 }, { "epoch": 27.61, "learning_rate": 0.00028782456140350875, "loss": 0.3911, "step": 1850 }, { "epoch": 27.62, "learning_rate": 0.0002877894736842105, "loss": 0.222, "step": 1851 }, { "epoch": 27.64, "learning_rate": 0.00028775438596491225, "loss": 0.2349, "step": 1852 }, { "epoch": 27.65, "learning_rate": 0.000287719298245614, "loss": 0.1077, "step": 1853 }, { "epoch": 27.67, "learning_rate": 0.00028768421052631575, "loss": 0.1471, "step": 1854 }, { "epoch": 27.68, "learning_rate": 0.0002876491228070175, "loss": 0.4215, "step": 1855 }, { "epoch": 27.7, "learning_rate": 0.00028761403508771924, "loss": 0.3904, "step": 1856 }, { "epoch": 27.71, "learning_rate": 0.00028757894736842105, "loss": 0.4847, "step": 1857 }, { "epoch": 27.73, "learning_rate": 0.0002875438596491228, "loss": 0.4888, "step": 1858 }, { "epoch": 27.74, "learning_rate": 0.00028750877192982454, "loss": 0.7409, "step": 1859 }, { "epoch": 27.76, "learning_rate": 0.0002874736842105263, "loss": 0.3056, "step": 1860 }, { "epoch": 27.77, "learning_rate": 0.00028743859649122804, "loss": 0.4178, "step": 1861 }, { "epoch": 27.79, "learning_rate": 0.0002874035087719298, "loss": 0.2794, "step": 1862 }, { "epoch": 27.8, "learning_rate": 0.00028736842105263154, "loss": 0.2874, "step": 1863 }, { "epoch": 27.82, "learning_rate": 0.0002873333333333333, "loss": 0.6377, "step": 1864 }, { "epoch": 27.83, "learning_rate": 0.0002872982456140351, "loss": 1.002, "step": 1865 }, { "epoch": 27.85, "learning_rate": 0.00028726315789473684, "loss": 0.5173, "step": 1866 }, { "epoch": 27.86, "learning_rate": 0.0002872280701754386, "loss": 0.4654, "step": 1867 }, { "epoch": 27.88, "learning_rate": 0.00028719298245614034, "loss": 0.3534, "step": 1868 }, { "epoch": 27.89, "learning_rate": 0.0002871578947368421, "loss": 0.6233, "step": 1869 }, { "epoch": 27.91, "learning_rate": 0.00028712280701754384, "loss": 0.3434, "step": 1870 }, { "epoch": 27.92, "learning_rate": 0.0002870877192982456, "loss": 0.2129, "step": 1871 }, { "epoch": 27.94, "learning_rate": 0.00028705263157894733, "loss": 0.1183, "step": 1872 }, { "epoch": 27.95, "learning_rate": 0.0002870175438596491, "loss": 0.233, "step": 1873 }, { "epoch": 27.97, "learning_rate": 0.00028698245614035083, "loss": 0.5479, "step": 1874 }, { "epoch": 27.98, "learning_rate": 0.0002869473684210526, "loss": 0.3382, "step": 1875 }, { "epoch": 28.0, "learning_rate": 0.0002869122807017544, "loss": 0.9043, "step": 1876 }, { "epoch": 28.01, "learning_rate": 0.00028687719298245613, "loss": 0.6242, "step": 1877 }, { "epoch": 28.03, "learning_rate": 0.0002868421052631579, "loss": 0.6041, "step": 1878 }, { "epoch": 28.04, "learning_rate": 0.00028680701754385963, "loss": 0.6087, "step": 1879 }, { "epoch": 28.06, "learning_rate": 0.0002867719298245614, "loss": 0.2625, "step": 1880 }, { "epoch": 28.07, "learning_rate": 0.0002867368421052631, "loss": 0.3646, "step": 1881 }, { "epoch": 28.09, "learning_rate": 0.0002867017543859649, "loss": 0.4302, "step": 1882 }, { "epoch": 28.1, "learning_rate": 0.0002866666666666667, "loss": 0.9889, "step": 1883 }, { "epoch": 28.12, "learning_rate": 0.0002866315789473684, "loss": 0.4311, "step": 1884 }, { "epoch": 28.13, "learning_rate": 0.0002865964912280701, "loss": 0.3208, "step": 1885 }, { "epoch": 28.15, "learning_rate": 0.00028656140350877187, "loss": 0.7227, "step": 1886 }, { "epoch": 28.16, "learning_rate": 0.00028652631578947367, "loss": 0.2999, "step": 1887 }, { "epoch": 28.18, "learning_rate": 0.0002864912280701754, "loss": 0.1791, "step": 1888 }, { "epoch": 28.19, "learning_rate": 0.00028645614035087717, "loss": 0.377, "step": 1889 }, { "epoch": 28.21, "learning_rate": 0.0002864210526315789, "loss": 0.1628, "step": 1890 }, { "epoch": 28.22, "learning_rate": 0.00028638596491228067, "loss": 0.3154, "step": 1891 }, { "epoch": 28.24, "learning_rate": 0.0002863508771929824, "loss": 0.1039, "step": 1892 }, { "epoch": 28.25, "learning_rate": 0.00028631578947368417, "loss": 0.2205, "step": 1893 }, { "epoch": 28.27, "learning_rate": 0.00028628070175438597, "loss": 0.8305, "step": 1894 }, { "epoch": 28.28, "learning_rate": 0.0002862456140350877, "loss": 0.2431, "step": 1895 }, { "epoch": 28.3, "learning_rate": 0.00028621052631578947, "loss": 0.2169, "step": 1896 }, { "epoch": 28.31, "learning_rate": 0.0002861754385964912, "loss": 0.2878, "step": 1897 }, { "epoch": 28.33, "learning_rate": 0.00028614035087719296, "loss": 0.3567, "step": 1898 }, { "epoch": 28.34, "learning_rate": 0.0002861052631578947, "loss": 0.7409, "step": 1899 }, { "epoch": 28.36, "learning_rate": 0.00028607017543859646, "loss": 0.3751, "step": 1900 }, { "epoch": 28.37, "learning_rate": 0.0002860350877192982, "loss": 0.154, "step": 1901 }, { "epoch": 28.39, "learning_rate": 0.00028599999999999996, "loss": 0.1643, "step": 1902 }, { "epoch": 28.4, "learning_rate": 0.0002859649122807017, "loss": 0.2874, "step": 1903 }, { "epoch": 28.42, "learning_rate": 0.00028592982456140346, "loss": 0.6538, "step": 1904 }, { "epoch": 28.43, "learning_rate": 0.00028589473684210526, "loss": 0.3018, "step": 1905 }, { "epoch": 28.45, "learning_rate": 0.000285859649122807, "loss": 0.7342, "step": 1906 }, { "epoch": 28.46, "learning_rate": 0.00028582456140350876, "loss": 0.3263, "step": 1907 }, { "epoch": 28.48, "learning_rate": 0.0002857894736842105, "loss": 0.5486, "step": 1908 }, { "epoch": 28.49, "learning_rate": 0.00028575438596491225, "loss": 0.6566, "step": 1909 }, { "epoch": 28.51, "learning_rate": 0.000285719298245614, "loss": 0.645, "step": 1910 }, { "epoch": 28.52, "learning_rate": 0.00028568421052631575, "loss": 0.657, "step": 1911 }, { "epoch": 28.54, "learning_rate": 0.0002856491228070175, "loss": 0.5839, "step": 1912 }, { "epoch": 28.55, "learning_rate": 0.0002856140350877193, "loss": 0.4959, "step": 1913 }, { "epoch": 28.57, "learning_rate": 0.00028557894736842105, "loss": 0.6578, "step": 1914 }, { "epoch": 28.58, "learning_rate": 0.0002855438596491228, "loss": 1.0061, "step": 1915 }, { "epoch": 28.59, "learning_rate": 0.0002855087719298245, "loss": 0.6346, "step": 1916 }, { "epoch": 28.61, "learning_rate": 0.0002854736842105263, "loss": 1.1097, "step": 1917 }, { "epoch": 28.62, "learning_rate": 0.00028543859649122805, "loss": 0.45, "step": 1918 }, { "epoch": 28.64, "learning_rate": 0.0002854035087719298, "loss": 0.2154, "step": 1919 }, { "epoch": 28.65, "learning_rate": 0.00028536842105263155, "loss": 0.4049, "step": 1920 }, { "epoch": 28.67, "learning_rate": 0.0002853333333333333, "loss": 0.742, "step": 1921 }, { "epoch": 28.68, "learning_rate": 0.00028529824561403504, "loss": 0.336, "step": 1922 }, { "epoch": 28.7, "learning_rate": 0.0002852631578947368, "loss": 0.4915, "step": 1923 }, { "epoch": 28.71, "learning_rate": 0.0002852280701754386, "loss": 0.3299, "step": 1924 }, { "epoch": 28.73, "learning_rate": 0.00028519298245614034, "loss": 0.1165, "step": 1925 }, { "epoch": 28.74, "learning_rate": 0.0002851578947368421, "loss": 0.4511, "step": 1926 }, { "epoch": 28.76, "learning_rate": 0.00028512280701754384, "loss": 0.36, "step": 1927 }, { "epoch": 28.77, "learning_rate": 0.0002850877192982456, "loss": 0.9207, "step": 1928 }, { "epoch": 28.79, "learning_rate": 0.00028505263157894734, "loss": 0.573, "step": 1929 }, { "epoch": 28.8, "learning_rate": 0.0002850175438596491, "loss": 0.282, "step": 1930 }, { "epoch": 28.82, "learning_rate": 0.0002849824561403509, "loss": 0.8202, "step": 1931 }, { "epoch": 28.83, "learning_rate": 0.00028494736842105264, "loss": 0.6128, "step": 1932 }, { "epoch": 28.85, "learning_rate": 0.00028491228070175433, "loss": 0.3645, "step": 1933 }, { "epoch": 28.86, "learning_rate": 0.0002848771929824561, "loss": 0.1557, "step": 1934 }, { "epoch": 28.88, "learning_rate": 0.0002848421052631579, "loss": 0.2146, "step": 1935 }, { "epoch": 28.89, "learning_rate": 0.00028480701754385963, "loss": 0.7744, "step": 1936 }, { "epoch": 28.91, "learning_rate": 0.0002847719298245614, "loss": 0.0497, "step": 1937 }, { "epoch": 28.92, "learning_rate": 0.00028473684210526313, "loss": 0.2807, "step": 1938 }, { "epoch": 28.94, "learning_rate": 0.0002847017543859649, "loss": 0.3621, "step": 1939 }, { "epoch": 28.95, "learning_rate": 0.00028466666666666663, "loss": 0.4343, "step": 1940 }, { "epoch": 28.97, "learning_rate": 0.0002846315789473684, "loss": 0.4488, "step": 1941 }, { "epoch": 28.98, "learning_rate": 0.0002845964912280702, "loss": 0.6635, "step": 1942 }, { "epoch": 29.0, "learning_rate": 0.00028456140350877193, "loss": 0.49, "step": 1943 }, { "epoch": 29.01, "learning_rate": 0.0002845263157894737, "loss": 0.6394, "step": 1944 }, { "epoch": 29.03, "learning_rate": 0.00028449122807017543, "loss": 0.289, "step": 1945 }, { "epoch": 29.04, "learning_rate": 0.0002844561403508772, "loss": 0.4145, "step": 1946 }, { "epoch": 29.06, "learning_rate": 0.0002844210526315789, "loss": 0.4305, "step": 1947 }, { "epoch": 29.07, "learning_rate": 0.0002843859649122807, "loss": 0.6245, "step": 1948 }, { "epoch": 29.09, "learning_rate": 0.0002843508771929824, "loss": 0.5812, "step": 1949 }, { "epoch": 29.1, "learning_rate": 0.00028431578947368417, "loss": 0.3968, "step": 1950 }, { "epoch": 29.12, "learning_rate": 0.0002842807017543859, "loss": 0.4733, "step": 1951 }, { "epoch": 29.13, "learning_rate": 0.00028424561403508767, "loss": 0.32, "step": 1952 }, { "epoch": 29.15, "learning_rate": 0.0002842105263157894, "loss": 0.1961, "step": 1953 }, { "epoch": 29.16, "learning_rate": 0.0002841754385964912, "loss": 0.5253, "step": 1954 }, { "epoch": 29.18, "learning_rate": 0.00028414035087719297, "loss": 0.1697, "step": 1955 }, { "epoch": 29.19, "learning_rate": 0.0002841052631578947, "loss": 0.3636, "step": 1956 }, { "epoch": 29.21, "learning_rate": 0.00028407017543859647, "loss": 0.4167, "step": 1957 }, { "epoch": 29.22, "learning_rate": 0.0002840350877192982, "loss": 0.4889, "step": 1958 }, { "epoch": 29.24, "learning_rate": 0.00028399999999999996, "loss": 0.4878, "step": 1959 }, { "epoch": 29.25, "learning_rate": 0.0002839649122807017, "loss": 0.6979, "step": 1960 }, { "epoch": 29.27, "learning_rate": 0.0002839298245614035, "loss": 0.9622, "step": 1961 }, { "epoch": 29.28, "learning_rate": 0.00028389473684210526, "loss": 0.5342, "step": 1962 }, { "epoch": 29.3, "learning_rate": 0.000283859649122807, "loss": 0.3308, "step": 1963 }, { "epoch": 29.31, "learning_rate": 0.0002838245614035087, "loss": 0.3056, "step": 1964 }, { "epoch": 29.33, "learning_rate": 0.0002837894736842105, "loss": 0.3, "step": 1965 }, { "epoch": 29.34, "learning_rate": 0.00028375438596491226, "loss": 0.7208, "step": 1966 }, { "epoch": 29.36, "learning_rate": 0.000283719298245614, "loss": 1.1475, "step": 1967 }, { "epoch": 29.37, "learning_rate": 0.00028368421052631576, "loss": 0.2273, "step": 1968 }, { "epoch": 29.39, "learning_rate": 0.0002836491228070175, "loss": 0.3318, "step": 1969 }, { "epoch": 29.4, "learning_rate": 0.00028361403508771926, "loss": 0.1126, "step": 1970 }, { "epoch": 29.42, "learning_rate": 0.000283578947368421, "loss": 0.2493, "step": 1971 }, { "epoch": 29.43, "learning_rate": 0.0002835438596491228, "loss": 0.1019, "step": 1972 }, { "epoch": 29.45, "learning_rate": 0.00028350877192982456, "loss": 0.3286, "step": 1973 }, { "epoch": 29.46, "learning_rate": 0.0002834736842105263, "loss": 0.0936, "step": 1974 }, { "epoch": 29.48, "learning_rate": 0.00028343859649122805, "loss": 0.7561, "step": 1975 }, { "epoch": 29.49, "learning_rate": 0.0002834035087719298, "loss": 0.2806, "step": 1976 }, { "epoch": 29.51, "learning_rate": 0.00028336842105263155, "loss": 0.4837, "step": 1977 }, { "epoch": 29.52, "learning_rate": 0.0002833333333333333, "loss": 0.4605, "step": 1978 }, { "epoch": 29.54, "learning_rate": 0.00028329824561403505, "loss": 0.7167, "step": 1979 }, { "epoch": 29.55, "learning_rate": 0.00028326315789473685, "loss": 0.3975, "step": 1980 }, { "epoch": 29.57, "learning_rate": 0.00028322807017543855, "loss": 0.3134, "step": 1981 }, { "epoch": 29.58, "learning_rate": 0.0002831929824561403, "loss": 0.4018, "step": 1982 }, { "epoch": 29.59, "learning_rate": 0.0002831578947368421, "loss": 0.1087, "step": 1983 }, { "epoch": 29.61, "learning_rate": 0.00028312280701754385, "loss": 0.793, "step": 1984 }, { "epoch": 29.62, "learning_rate": 0.0002830877192982456, "loss": 0.6217, "step": 1985 }, { "epoch": 29.64, "learning_rate": 0.00028305263157894734, "loss": 0.3012, "step": 1986 }, { "epoch": 29.65, "learning_rate": 0.0002830175438596491, "loss": 0.4654, "step": 1987 }, { "epoch": 29.67, "learning_rate": 0.00028298245614035084, "loss": 0.0225, "step": 1988 }, { "epoch": 29.68, "learning_rate": 0.0002829473684210526, "loss": 0.216, "step": 1989 }, { "epoch": 29.7, "learning_rate": 0.00028291228070175434, "loss": 0.4494, "step": 1990 }, { "epoch": 29.71, "learning_rate": 0.00028287719298245614, "loss": 0.5049, "step": 1991 }, { "epoch": 29.73, "learning_rate": 0.0002828421052631579, "loss": 0.0648, "step": 1992 }, { "epoch": 29.74, "learning_rate": 0.00028280701754385964, "loss": 0.4833, "step": 1993 }, { "epoch": 29.76, "learning_rate": 0.0002827719298245614, "loss": 0.3664, "step": 1994 }, { "epoch": 29.77, "learning_rate": 0.00028273684210526314, "loss": 0.5582, "step": 1995 }, { "epoch": 29.79, "learning_rate": 0.0002827017543859649, "loss": 0.323, "step": 1996 }, { "epoch": 29.8, "learning_rate": 0.00028266666666666663, "loss": 0.5679, "step": 1997 }, { "epoch": 29.82, "learning_rate": 0.0002826315789473684, "loss": 0.2273, "step": 1998 }, { "epoch": 29.83, "learning_rate": 0.00028259649122807013, "loss": 0.6848, "step": 1999 }, { "epoch": 29.85, "learning_rate": 0.0002825614035087719, "loss": 0.9913, "step": 2000 }, { "epoch": 29.85, "eval_accuracy": 0.7706803720019579, "eval_f1": 0.7733909006780546, "eval_loss": 0.9206514954566956, "eval_runtime": 344.174, "eval_samples_per_second": 11.872, "eval_steps_per_second": 0.744, "step": 2000 }, { "epoch": 29.86, "learning_rate": 0.00028252631578947363, "loss": 0.7397, "step": 2001 }, { "epoch": 29.88, "learning_rate": 0.00028249122807017543, "loss": 0.4359, "step": 2002 }, { "epoch": 29.89, "learning_rate": 0.0002824561403508772, "loss": 0.1009, "step": 2003 }, { "epoch": 29.91, "learning_rate": 0.00028242105263157893, "loss": 0.2327, "step": 2004 }, { "epoch": 29.92, "learning_rate": 0.0002823859649122807, "loss": 0.343, "step": 2005 }, { "epoch": 29.94, "learning_rate": 0.00028235087719298243, "loss": 0.3399, "step": 2006 }, { "epoch": 29.95, "learning_rate": 0.0002823157894736842, "loss": 0.5942, "step": 2007 }, { "epoch": 29.97, "learning_rate": 0.0002822807017543859, "loss": 0.5096, "step": 2008 }, { "epoch": 29.98, "learning_rate": 0.00028224561403508773, "loss": 0.2877, "step": 2009 }, { "epoch": 30.0, "learning_rate": 0.0002822105263157895, "loss": 0.1841, "step": 2010 }, { "epoch": 30.01, "learning_rate": 0.0002821754385964912, "loss": 0.0952, "step": 2011 }, { "epoch": 30.03, "learning_rate": 0.0002821403508771929, "loss": 0.6921, "step": 2012 }, { "epoch": 30.04, "learning_rate": 0.0002821052631578947, "loss": 0.3671, "step": 2013 }, { "epoch": 30.06, "learning_rate": 0.00028207017543859647, "loss": 0.3699, "step": 2014 }, { "epoch": 30.07, "learning_rate": 0.0002820350877192982, "loss": 0.4393, "step": 2015 }, { "epoch": 30.09, "learning_rate": 0.00028199999999999997, "loss": 0.0431, "step": 2016 }, { "epoch": 30.1, "learning_rate": 0.0002819649122807017, "loss": 0.4641, "step": 2017 }, { "epoch": 30.12, "learning_rate": 0.00028192982456140347, "loss": 0.4369, "step": 2018 }, { "epoch": 30.13, "learning_rate": 0.0002818947368421052, "loss": 0.1586, "step": 2019 }, { "epoch": 30.15, "learning_rate": 0.000281859649122807, "loss": 0.2343, "step": 2020 }, { "epoch": 30.16, "learning_rate": 0.00028182456140350877, "loss": 0.0312, "step": 2021 }, { "epoch": 30.18, "learning_rate": 0.0002817894736842105, "loss": 0.1787, "step": 2022 }, { "epoch": 30.19, "learning_rate": 0.00028175438596491227, "loss": 0.3184, "step": 2023 }, { "epoch": 30.21, "learning_rate": 0.000281719298245614, "loss": 0.3404, "step": 2024 }, { "epoch": 30.22, "learning_rate": 0.00028168421052631576, "loss": 0.2371, "step": 2025 }, { "epoch": 30.24, "learning_rate": 0.0002816491228070175, "loss": 0.3565, "step": 2026 }, { "epoch": 30.25, "learning_rate": 0.00028161403508771926, "loss": 0.6661, "step": 2027 }, { "epoch": 30.27, "learning_rate": 0.00028157894736842106, "loss": 0.9013, "step": 2028 }, { "epoch": 30.28, "learning_rate": 0.00028154385964912276, "loss": 0.3751, "step": 2029 }, { "epoch": 30.3, "learning_rate": 0.0002815087719298245, "loss": 0.1435, "step": 2030 }, { "epoch": 30.31, "learning_rate": 0.0002814736842105263, "loss": 0.2184, "step": 2031 }, { "epoch": 30.33, "learning_rate": 0.00028143859649122806, "loss": 0.0664, "step": 2032 }, { "epoch": 30.34, "learning_rate": 0.0002814035087719298, "loss": 0.227, "step": 2033 }, { "epoch": 30.36, "learning_rate": 0.00028136842105263156, "loss": 0.3569, "step": 2034 }, { "epoch": 30.37, "learning_rate": 0.0002813333333333333, "loss": 0.455, "step": 2035 }, { "epoch": 30.39, "learning_rate": 0.00028129824561403505, "loss": 0.3361, "step": 2036 }, { "epoch": 30.4, "learning_rate": 0.0002812631578947368, "loss": 0.2786, "step": 2037 }, { "epoch": 30.42, "learning_rate": 0.00028122807017543855, "loss": 0.0724, "step": 2038 }, { "epoch": 30.43, "learning_rate": 0.00028119298245614035, "loss": 0.7161, "step": 2039 }, { "epoch": 30.45, "learning_rate": 0.0002811578947368421, "loss": 0.0976, "step": 2040 }, { "epoch": 30.46, "learning_rate": 0.00028112280701754385, "loss": 0.4322, "step": 2041 }, { "epoch": 30.48, "learning_rate": 0.00028108771929824555, "loss": 0.4597, "step": 2042 }, { "epoch": 30.49, "learning_rate": 0.00028105263157894735, "loss": 0.6184, "step": 2043 }, { "epoch": 30.51, "learning_rate": 0.0002810175438596491, "loss": 0.7813, "step": 2044 }, { "epoch": 30.52, "learning_rate": 0.00028098245614035085, "loss": 0.3278, "step": 2045 }, { "epoch": 30.54, "learning_rate": 0.0002809473684210526, "loss": 0.1911, "step": 2046 }, { "epoch": 30.55, "learning_rate": 0.00028091228070175434, "loss": 0.3555, "step": 2047 }, { "epoch": 30.57, "learning_rate": 0.0002808771929824561, "loss": 0.6221, "step": 2048 }, { "epoch": 30.58, "learning_rate": 0.00028084210526315784, "loss": 0.376, "step": 2049 }, { "epoch": 30.59, "learning_rate": 0.00028080701754385965, "loss": 0.259, "step": 2050 }, { "epoch": 30.61, "learning_rate": 0.0002807719298245614, "loss": 0.3966, "step": 2051 }, { "epoch": 30.62, "learning_rate": 0.00028073684210526314, "loss": 0.5047, "step": 2052 }, { "epoch": 30.64, "learning_rate": 0.0002807017543859649, "loss": 0.4883, "step": 2053 }, { "epoch": 30.65, "learning_rate": 0.00028066666666666664, "loss": 0.0462, "step": 2054 }, { "epoch": 30.67, "learning_rate": 0.0002806315789473684, "loss": 0.152, "step": 2055 }, { "epoch": 30.68, "learning_rate": 0.00028059649122807014, "loss": 0.0942, "step": 2056 }, { "epoch": 30.7, "learning_rate": 0.00028056140350877194, "loss": 0.5002, "step": 2057 }, { "epoch": 30.71, "learning_rate": 0.0002805263157894737, "loss": 0.3678, "step": 2058 }, { "epoch": 30.73, "learning_rate": 0.0002804912280701754, "loss": 0.1678, "step": 2059 }, { "epoch": 30.74, "learning_rate": 0.00028045614035087713, "loss": 0.5979, "step": 2060 }, { "epoch": 30.76, "learning_rate": 0.00028042105263157894, "loss": 0.2498, "step": 2061 }, { "epoch": 30.77, "learning_rate": 0.0002803859649122807, "loss": 0.6051, "step": 2062 }, { "epoch": 30.79, "learning_rate": 0.00028035087719298243, "loss": 0.3782, "step": 2063 }, { "epoch": 30.8, "learning_rate": 0.0002803157894736842, "loss": 0.3381, "step": 2064 }, { "epoch": 30.82, "learning_rate": 0.00028028070175438593, "loss": 0.5742, "step": 2065 }, { "epoch": 30.83, "learning_rate": 0.0002802456140350877, "loss": 0.2356, "step": 2066 }, { "epoch": 30.85, "learning_rate": 0.00028021052631578943, "loss": 0.5509, "step": 2067 }, { "epoch": 30.86, "learning_rate": 0.00028017543859649123, "loss": 0.4965, "step": 2068 }, { "epoch": 30.88, "learning_rate": 0.000280140350877193, "loss": 0.1467, "step": 2069 }, { "epoch": 30.89, "learning_rate": 0.00028010526315789473, "loss": 0.4174, "step": 2070 }, { "epoch": 30.91, "learning_rate": 0.0002800701754385965, "loss": 0.2724, "step": 2071 }, { "epoch": 30.92, "learning_rate": 0.0002800350877192982, "loss": 0.4106, "step": 2072 }, { "epoch": 30.94, "learning_rate": 0.00028, "loss": 0.5703, "step": 2073 }, { "epoch": 30.95, "learning_rate": 0.0002799649122807017, "loss": 0.3324, "step": 2074 }, { "epoch": 30.97, "learning_rate": 0.0002799298245614035, "loss": 0.13, "step": 2075 }, { "epoch": 30.98, "learning_rate": 0.0002798947368421053, "loss": 0.2645, "step": 2076 }, { "epoch": 31.0, "learning_rate": 0.00027985964912280697, "loss": 0.1889, "step": 2077 }, { "epoch": 31.01, "learning_rate": 0.0002798245614035087, "loss": 0.3298, "step": 2078 }, { "epoch": 31.03, "learning_rate": 0.00027978947368421047, "loss": 0.1496, "step": 2079 }, { "epoch": 31.04, "learning_rate": 0.00027975438596491227, "loss": 0.3236, "step": 2080 }, { "epoch": 31.06, "learning_rate": 0.000279719298245614, "loss": 0.1155, "step": 2081 }, { "epoch": 31.07, "learning_rate": 0.00027968421052631577, "loss": 0.1675, "step": 2082 }, { "epoch": 31.09, "learning_rate": 0.0002796491228070175, "loss": 0.2023, "step": 2083 }, { "epoch": 31.1, "learning_rate": 0.00027961403508771927, "loss": 0.35, "step": 2084 }, { "epoch": 31.12, "learning_rate": 0.000279578947368421, "loss": 0.5572, "step": 2085 }, { "epoch": 31.13, "learning_rate": 0.00027954385964912276, "loss": 0.1961, "step": 2086 }, { "epoch": 31.15, "learning_rate": 0.00027950877192982457, "loss": 0.1702, "step": 2087 }, { "epoch": 31.16, "learning_rate": 0.0002794736842105263, "loss": 0.0372, "step": 2088 }, { "epoch": 31.18, "learning_rate": 0.00027943859649122806, "loss": 0.3766, "step": 2089 }, { "epoch": 31.19, "learning_rate": 0.00027940350877192976, "loss": 0.3156, "step": 2090 }, { "epoch": 31.21, "learning_rate": 0.00027936842105263156, "loss": 0.1704, "step": 2091 }, { "epoch": 31.22, "learning_rate": 0.0002793333333333333, "loss": 0.0498, "step": 2092 }, { "epoch": 31.24, "learning_rate": 0.00027929824561403506, "loss": 0.2717, "step": 2093 }, { "epoch": 31.25, "learning_rate": 0.0002792631578947368, "loss": 0.0501, "step": 2094 }, { "epoch": 31.27, "learning_rate": 0.00027922807017543856, "loss": 0.0992, "step": 2095 }, { "epoch": 31.28, "learning_rate": 0.0002791929824561403, "loss": 0.5988, "step": 2096 }, { "epoch": 31.3, "learning_rate": 0.00027915789473684205, "loss": 0.2703, "step": 2097 }, { "epoch": 31.31, "learning_rate": 0.00027912280701754386, "loss": 0.5195, "step": 2098 }, { "epoch": 31.33, "learning_rate": 0.0002790877192982456, "loss": 0.0573, "step": 2099 }, { "epoch": 31.34, "learning_rate": 0.00027905263157894736, "loss": 0.0256, "step": 2100 }, { "epoch": 31.36, "learning_rate": 0.0002790175438596491, "loss": 0.579, "step": 2101 }, { "epoch": 31.37, "learning_rate": 0.00027898245614035085, "loss": 0.0547, "step": 2102 }, { "epoch": 31.39, "learning_rate": 0.0002789473684210526, "loss": 0.0967, "step": 2103 }, { "epoch": 31.4, "learning_rate": 0.00027891228070175435, "loss": 0.258, "step": 2104 }, { "epoch": 31.42, "learning_rate": 0.00027887719298245615, "loss": 0.2852, "step": 2105 }, { "epoch": 31.43, "learning_rate": 0.0002788421052631579, "loss": 0.0816, "step": 2106 }, { "epoch": 31.45, "learning_rate": 0.0002788070175438596, "loss": 0.2592, "step": 2107 }, { "epoch": 31.46, "learning_rate": 0.00027877192982456135, "loss": 0.4082, "step": 2108 }, { "epoch": 31.48, "learning_rate": 0.00027873684210526315, "loss": 0.0363, "step": 2109 }, { "epoch": 31.49, "learning_rate": 0.0002787017543859649, "loss": 0.2759, "step": 2110 }, { "epoch": 31.51, "learning_rate": 0.00027866666666666665, "loss": 0.9859, "step": 2111 }, { "epoch": 31.52, "learning_rate": 0.0002786315789473684, "loss": 0.3908, "step": 2112 }, { "epoch": 31.54, "learning_rate": 0.00027859649122807014, "loss": 0.4191, "step": 2113 }, { "epoch": 31.55, "learning_rate": 0.0002785614035087719, "loss": 0.2121, "step": 2114 }, { "epoch": 31.57, "learning_rate": 0.00027852631578947364, "loss": 0.0456, "step": 2115 }, { "epoch": 31.58, "learning_rate": 0.0002784912280701754, "loss": 0.3328, "step": 2116 }, { "epoch": 31.59, "learning_rate": 0.0002784561403508772, "loss": 0.4704, "step": 2117 }, { "epoch": 31.61, "learning_rate": 0.00027842105263157894, "loss": 0.1809, "step": 2118 }, { "epoch": 31.62, "learning_rate": 0.0002783859649122807, "loss": 0.394, "step": 2119 }, { "epoch": 31.64, "learning_rate": 0.00027835087719298244, "loss": 0.0635, "step": 2120 }, { "epoch": 31.65, "learning_rate": 0.0002783157894736842, "loss": 0.0528, "step": 2121 }, { "epoch": 31.67, "learning_rate": 0.00027828070175438594, "loss": 0.0976, "step": 2122 }, { "epoch": 31.68, "learning_rate": 0.0002782456140350877, "loss": 0.5734, "step": 2123 }, { "epoch": 31.7, "learning_rate": 0.0002782105263157895, "loss": 0.2444, "step": 2124 }, { "epoch": 31.71, "learning_rate": 0.0002781754385964912, "loss": 0.1145, "step": 2125 }, { "epoch": 31.73, "learning_rate": 0.00027814035087719293, "loss": 0.2101, "step": 2126 }, { "epoch": 31.74, "learning_rate": 0.0002781052631578947, "loss": 0.3387, "step": 2127 }, { "epoch": 31.76, "learning_rate": 0.0002780701754385965, "loss": 0.1206, "step": 2128 }, { "epoch": 31.77, "learning_rate": 0.00027803508771929823, "loss": 0.3084, "step": 2129 }, { "epoch": 31.79, "learning_rate": 0.000278, "loss": 0.5995, "step": 2130 }, { "epoch": 31.8, "learning_rate": 0.00027796491228070173, "loss": 0.6344, "step": 2131 }, { "epoch": 31.82, "learning_rate": 0.0002779298245614035, "loss": 0.4073, "step": 2132 }, { "epoch": 31.83, "learning_rate": 0.00027789473684210523, "loss": 0.2271, "step": 2133 }, { "epoch": 31.85, "learning_rate": 0.000277859649122807, "loss": 0.1028, "step": 2134 }, { "epoch": 31.86, "learning_rate": 0.0002778245614035088, "loss": 0.1707, "step": 2135 }, { "epoch": 31.88, "learning_rate": 0.00027778947368421053, "loss": 0.1946, "step": 2136 }, { "epoch": 31.89, "learning_rate": 0.0002777543859649123, "loss": 0.5396, "step": 2137 }, { "epoch": 31.91, "learning_rate": 0.00027771929824561397, "loss": 0.0741, "step": 2138 }, { "epoch": 31.92, "learning_rate": 0.0002776842105263158, "loss": 0.3099, "step": 2139 }, { "epoch": 31.94, "learning_rate": 0.0002776491228070175, "loss": 0.2818, "step": 2140 }, { "epoch": 31.95, "learning_rate": 0.00027761403508771927, "loss": 0.3299, "step": 2141 }, { "epoch": 31.97, "learning_rate": 0.000277578947368421, "loss": 0.4072, "step": 2142 }, { "epoch": 31.98, "learning_rate": 0.00027754385964912277, "loss": 0.0182, "step": 2143 }, { "epoch": 32.0, "learning_rate": 0.0002775087719298245, "loss": 0.6486, "step": 2144 }, { "epoch": 32.01, "learning_rate": 0.00027747368421052627, "loss": 0.6269, "step": 2145 }, { "epoch": 32.03, "learning_rate": 0.00027743859649122807, "loss": 0.353, "step": 2146 }, { "epoch": 32.04, "learning_rate": 0.0002774035087719298, "loss": 0.4854, "step": 2147 }, { "epoch": 32.06, "learning_rate": 0.00027736842105263157, "loss": 0.2553, "step": 2148 }, { "epoch": 32.07, "learning_rate": 0.0002773333333333333, "loss": 0.158, "step": 2149 }, { "epoch": 32.09, "learning_rate": 0.00027729824561403507, "loss": 0.4838, "step": 2150 }, { "epoch": 32.1, "learning_rate": 0.0002772631578947368, "loss": 0.2814, "step": 2151 }, { "epoch": 32.12, "learning_rate": 0.00027722807017543856, "loss": 0.4563, "step": 2152 }, { "epoch": 32.13, "learning_rate": 0.0002771929824561403, "loss": 0.4573, "step": 2153 }, { "epoch": 32.15, "learning_rate": 0.0002771578947368421, "loss": 0.3097, "step": 2154 }, { "epoch": 32.16, "learning_rate": 0.0002771228070175438, "loss": 0.2421, "step": 2155 }, { "epoch": 32.18, "learning_rate": 0.00027708771929824556, "loss": 0.8276, "step": 2156 }, { "epoch": 32.19, "learning_rate": 0.00027705263157894736, "loss": 0.4142, "step": 2157 }, { "epoch": 32.21, "learning_rate": 0.0002770175438596491, "loss": 0.0869, "step": 2158 }, { "epoch": 32.22, "learning_rate": 0.00027698245614035086, "loss": 0.3774, "step": 2159 }, { "epoch": 32.24, "learning_rate": 0.0002769473684210526, "loss": 0.2919, "step": 2160 }, { "epoch": 32.25, "learning_rate": 0.00027691228070175436, "loss": 0.652, "step": 2161 }, { "epoch": 32.27, "learning_rate": 0.0002768771929824561, "loss": 0.2367, "step": 2162 }, { "epoch": 32.28, "learning_rate": 0.00027684210526315785, "loss": 0.7383, "step": 2163 }, { "epoch": 32.3, "learning_rate": 0.0002768070175438596, "loss": 0.4451, "step": 2164 }, { "epoch": 32.31, "learning_rate": 0.0002767719298245614, "loss": 0.2831, "step": 2165 }, { "epoch": 32.33, "learning_rate": 0.00027673684210526315, "loss": 0.1604, "step": 2166 }, { "epoch": 32.34, "learning_rate": 0.0002767017543859649, "loss": 0.0478, "step": 2167 }, { "epoch": 32.36, "learning_rate": 0.00027666666666666665, "loss": 0.2886, "step": 2168 }, { "epoch": 32.37, "learning_rate": 0.0002766315789473684, "loss": 0.625, "step": 2169 }, { "epoch": 32.39, "learning_rate": 0.00027659649122807015, "loss": 0.2271, "step": 2170 }, { "epoch": 32.4, "learning_rate": 0.0002765614035087719, "loss": 0.1946, "step": 2171 }, { "epoch": 32.42, "learning_rate": 0.00027652631578947365, "loss": 0.2989, "step": 2172 }, { "epoch": 32.43, "learning_rate": 0.0002764912280701754, "loss": 0.0619, "step": 2173 }, { "epoch": 32.45, "learning_rate": 0.00027645614035087714, "loss": 0.2763, "step": 2174 }, { "epoch": 32.46, "learning_rate": 0.0002764210526315789, "loss": 0.416, "step": 2175 }, { "epoch": 32.48, "learning_rate": 0.0002763859649122807, "loss": 0.2922, "step": 2176 }, { "epoch": 32.49, "learning_rate": 0.00027635087719298244, "loss": 0.2956, "step": 2177 }, { "epoch": 32.51, "learning_rate": 0.0002763157894736842, "loss": 0.369, "step": 2178 }, { "epoch": 32.52, "learning_rate": 0.00027628070175438594, "loss": 0.4236, "step": 2179 }, { "epoch": 32.54, "learning_rate": 0.0002762456140350877, "loss": 0.4884, "step": 2180 }, { "epoch": 32.55, "learning_rate": 0.00027621052631578944, "loss": 0.2356, "step": 2181 }, { "epoch": 32.57, "learning_rate": 0.0002761754385964912, "loss": 0.4257, "step": 2182 }, { "epoch": 32.58, "learning_rate": 0.000276140350877193, "loss": 0.3091, "step": 2183 }, { "epoch": 32.59, "learning_rate": 0.00027610526315789474, "loss": 0.2393, "step": 2184 }, { "epoch": 32.61, "learning_rate": 0.0002760701754385965, "loss": 0.2166, "step": 2185 }, { "epoch": 32.62, "learning_rate": 0.0002760350877192982, "loss": 0.0775, "step": 2186 }, { "epoch": 32.64, "learning_rate": 0.000276, "loss": 0.2225, "step": 2187 }, { "epoch": 32.65, "learning_rate": 0.00027596491228070174, "loss": 0.2278, "step": 2188 }, { "epoch": 32.67, "learning_rate": 0.0002759298245614035, "loss": 0.2411, "step": 2189 }, { "epoch": 32.68, "learning_rate": 0.00027589473684210523, "loss": 0.9149, "step": 2190 }, { "epoch": 32.7, "learning_rate": 0.000275859649122807, "loss": 0.186, "step": 2191 }, { "epoch": 32.71, "learning_rate": 0.00027582456140350873, "loss": 0.3165, "step": 2192 }, { "epoch": 32.73, "learning_rate": 0.0002757894736842105, "loss": 0.4464, "step": 2193 }, { "epoch": 32.74, "learning_rate": 0.0002757543859649123, "loss": 0.3044, "step": 2194 }, { "epoch": 32.76, "learning_rate": 0.00027571929824561403, "loss": 0.4148, "step": 2195 }, { "epoch": 32.77, "learning_rate": 0.0002756842105263158, "loss": 0.4251, "step": 2196 }, { "epoch": 32.79, "learning_rate": 0.00027564912280701753, "loss": 0.3422, "step": 2197 }, { "epoch": 32.8, "learning_rate": 0.0002756140350877193, "loss": 0.0342, "step": 2198 }, { "epoch": 32.82, "learning_rate": 0.000275578947368421, "loss": 0.2479, "step": 2199 }, { "epoch": 32.83, "learning_rate": 0.0002755438596491228, "loss": 0.2307, "step": 2200 }, { "epoch": 32.83, "eval_accuracy": 0.8086147821830642, "eval_f1": 0.8071662783486067, "eval_loss": 0.7650861740112305, "eval_runtime": 343.8365, "eval_samples_per_second": 11.884, "eval_steps_per_second": 0.745, "step": 2200 }, { "epoch": 32.85, "learning_rate": 0.0002755087719298245, "loss": 0.5653, "step": 2201 }, { "epoch": 32.86, "learning_rate": 0.0002754736842105263, "loss": 0.2916, "step": 2202 }, { "epoch": 32.88, "learning_rate": 0.000275438596491228, "loss": 0.0993, "step": 2203 }, { "epoch": 32.89, "learning_rate": 0.00027540350877192977, "loss": 0.3158, "step": 2204 }, { "epoch": 32.91, "learning_rate": 0.0002753684210526315, "loss": 0.0403, "step": 2205 }, { "epoch": 32.92, "learning_rate": 0.0002753333333333333, "loss": 0.1977, "step": 2206 }, { "epoch": 32.94, "learning_rate": 0.00027529824561403507, "loss": 0.2377, "step": 2207 }, { "epoch": 32.95, "learning_rate": 0.0002752631578947368, "loss": 0.1325, "step": 2208 }, { "epoch": 32.97, "learning_rate": 0.00027522807017543857, "loss": 0.4147, "step": 2209 }, { "epoch": 32.98, "learning_rate": 0.0002751929824561403, "loss": 0.1223, "step": 2210 }, { "epoch": 33.0, "learning_rate": 0.00027515789473684207, "loss": 0.1362, "step": 2211 }, { "epoch": 33.01, "learning_rate": 0.0002751228070175438, "loss": 0.3086, "step": 2212 }, { "epoch": 33.03, "learning_rate": 0.0002750877192982456, "loss": 0.3467, "step": 2213 }, { "epoch": 33.04, "learning_rate": 0.00027505263157894737, "loss": 0.1881, "step": 2214 }, { "epoch": 33.06, "learning_rate": 0.0002750175438596491, "loss": 0.2578, "step": 2215 }, { "epoch": 33.07, "learning_rate": 0.00027498245614035086, "loss": 0.2942, "step": 2216 }, { "epoch": 33.09, "learning_rate": 0.0002749473684210526, "loss": 0.3025, "step": 2217 }, { "epoch": 33.1, "learning_rate": 0.00027491228070175436, "loss": 0.1853, "step": 2218 }, { "epoch": 33.12, "learning_rate": 0.0002748771929824561, "loss": 0.052, "step": 2219 }, { "epoch": 33.13, "learning_rate": 0.00027484210526315786, "loss": 0.3404, "step": 2220 }, { "epoch": 33.15, "learning_rate": 0.0002748070175438596, "loss": 0.0831, "step": 2221 }, { "epoch": 33.16, "learning_rate": 0.00027477192982456136, "loss": 0.1659, "step": 2222 }, { "epoch": 33.18, "learning_rate": 0.0002747368421052631, "loss": 0.0441, "step": 2223 }, { "epoch": 33.19, "learning_rate": 0.0002747017543859649, "loss": 0.2571, "step": 2224 }, { "epoch": 33.21, "learning_rate": 0.00027466666666666666, "loss": 0.3079, "step": 2225 }, { "epoch": 33.22, "learning_rate": 0.0002746315789473684, "loss": 0.3602, "step": 2226 }, { "epoch": 33.24, "learning_rate": 0.00027459649122807015, "loss": 0.1547, "step": 2227 }, { "epoch": 33.25, "learning_rate": 0.0002745614035087719, "loss": 0.3108, "step": 2228 }, { "epoch": 33.27, "learning_rate": 0.00027452631578947365, "loss": 0.6134, "step": 2229 }, { "epoch": 33.28, "learning_rate": 0.0002744912280701754, "loss": 0.3319, "step": 2230 }, { "epoch": 33.3, "learning_rate": 0.0002744561403508772, "loss": 0.5913, "step": 2231 }, { "epoch": 33.31, "learning_rate": 0.00027442105263157895, "loss": 0.4172, "step": 2232 }, { "epoch": 33.33, "learning_rate": 0.0002743859649122807, "loss": 0.5581, "step": 2233 }, { "epoch": 33.34, "learning_rate": 0.0002743508771929824, "loss": 0.1744, "step": 2234 }, { "epoch": 33.36, "learning_rate": 0.0002743157894736842, "loss": 0.3023, "step": 2235 }, { "epoch": 33.37, "learning_rate": 0.00027428070175438595, "loss": 0.9059, "step": 2236 }, { "epoch": 33.39, "learning_rate": 0.0002742456140350877, "loss": 0.1562, "step": 2237 }, { "epoch": 33.4, "learning_rate": 0.00027421052631578945, "loss": 0.2438, "step": 2238 }, { "epoch": 33.42, "learning_rate": 0.0002741754385964912, "loss": 0.5985, "step": 2239 }, { "epoch": 33.43, "learning_rate": 0.00027414035087719294, "loss": 0.021, "step": 2240 }, { "epoch": 33.45, "learning_rate": 0.0002741052631578947, "loss": 0.4208, "step": 2241 }, { "epoch": 33.46, "learning_rate": 0.00027407017543859644, "loss": 0.2522, "step": 2242 }, { "epoch": 33.48, "learning_rate": 0.00027403508771929824, "loss": 0.2324, "step": 2243 }, { "epoch": 33.49, "learning_rate": 0.000274, "loss": 0.235, "step": 2244 }, { "epoch": 33.51, "learning_rate": 0.00027396491228070174, "loss": 0.3669, "step": 2245 }, { "epoch": 33.52, "learning_rate": 0.0002739298245614035, "loss": 0.3819, "step": 2246 }, { "epoch": 33.54, "learning_rate": 0.00027389473684210524, "loss": 0.1008, "step": 2247 }, { "epoch": 33.55, "learning_rate": 0.000273859649122807, "loss": 0.4268, "step": 2248 }, { "epoch": 33.57, "learning_rate": 0.00027382456140350874, "loss": 0.0733, "step": 2249 }, { "epoch": 33.58, "learning_rate": 0.00027378947368421054, "loss": 0.2913, "step": 2250 }, { "epoch": 33.59, "learning_rate": 0.00027375438596491223, "loss": 0.2841, "step": 2251 }, { "epoch": 33.61, "learning_rate": 0.000273719298245614, "loss": 0.0646, "step": 2252 }, { "epoch": 33.62, "learning_rate": 0.00027368421052631573, "loss": 0.4224, "step": 2253 }, { "epoch": 33.64, "learning_rate": 0.00027364912280701753, "loss": 0.1624, "step": 2254 }, { "epoch": 33.65, "learning_rate": 0.0002736140350877193, "loss": 0.1866, "step": 2255 }, { "epoch": 33.67, "learning_rate": 0.00027357894736842103, "loss": 0.0219, "step": 2256 }, { "epoch": 33.68, "learning_rate": 0.0002735438596491228, "loss": 0.123, "step": 2257 }, { "epoch": 33.7, "learning_rate": 0.00027350877192982453, "loss": 0.4814, "step": 2258 }, { "epoch": 33.71, "learning_rate": 0.0002734736842105263, "loss": 0.1772, "step": 2259 }, { "epoch": 33.73, "learning_rate": 0.000273438596491228, "loss": 0.2027, "step": 2260 }, { "epoch": 33.74, "learning_rate": 0.00027340350877192983, "loss": 0.2114, "step": 2261 }, { "epoch": 33.76, "learning_rate": 0.0002733684210526316, "loss": 0.4351, "step": 2262 }, { "epoch": 33.77, "learning_rate": 0.00027333333333333333, "loss": 0.4057, "step": 2263 }, { "epoch": 33.79, "learning_rate": 0.0002732982456140351, "loss": 0.2215, "step": 2264 }, { "epoch": 33.8, "learning_rate": 0.0002732631578947368, "loss": 0.1509, "step": 2265 }, { "epoch": 33.82, "learning_rate": 0.0002732280701754386, "loss": 0.4424, "step": 2266 }, { "epoch": 33.83, "learning_rate": 0.0002731929824561403, "loss": 0.2958, "step": 2267 }, { "epoch": 33.85, "learning_rate": 0.00027315789473684207, "loss": 0.0407, "step": 2268 }, { "epoch": 33.86, "learning_rate": 0.0002731228070175438, "loss": 0.2227, "step": 2269 }, { "epoch": 33.88, "learning_rate": 0.00027308771929824557, "loss": 0.6847, "step": 2270 }, { "epoch": 33.89, "learning_rate": 0.0002730526315789473, "loss": 0.1189, "step": 2271 }, { "epoch": 33.91, "learning_rate": 0.0002730175438596491, "loss": 0.4449, "step": 2272 }, { "epoch": 33.92, "learning_rate": 0.00027298245614035087, "loss": 0.1553, "step": 2273 }, { "epoch": 33.94, "learning_rate": 0.0002729473684210526, "loss": 0.0367, "step": 2274 }, { "epoch": 33.95, "learning_rate": 0.00027291228070175437, "loss": 0.0587, "step": 2275 }, { "epoch": 33.97, "learning_rate": 0.0002728771929824561, "loss": 0.4163, "step": 2276 }, { "epoch": 33.98, "learning_rate": 0.00027284210526315786, "loss": 0.3024, "step": 2277 }, { "epoch": 34.0, "learning_rate": 0.0002728070175438596, "loss": 0.6945, "step": 2278 }, { "epoch": 34.01, "learning_rate": 0.00027277192982456136, "loss": 0.4193, "step": 2279 }, { "epoch": 34.03, "learning_rate": 0.00027273684210526317, "loss": 0.2735, "step": 2280 }, { "epoch": 34.04, "learning_rate": 0.0002727017543859649, "loss": 0.1118, "step": 2281 }, { "epoch": 34.06, "learning_rate": 0.0002726666666666666, "loss": 0.6835, "step": 2282 }, { "epoch": 34.07, "learning_rate": 0.0002726315789473684, "loss": 0.3097, "step": 2283 }, { "epoch": 34.09, "learning_rate": 0.00027259649122807016, "loss": 0.2908, "step": 2284 }, { "epoch": 34.1, "learning_rate": 0.0002725614035087719, "loss": 0.168, "step": 2285 }, { "epoch": 34.12, "learning_rate": 0.00027252631578947366, "loss": 0.3362, "step": 2286 }, { "epoch": 34.13, "learning_rate": 0.0002724912280701754, "loss": 0.2603, "step": 2287 }, { "epoch": 34.15, "learning_rate": 0.00027245614035087716, "loss": 0.0215, "step": 2288 }, { "epoch": 34.16, "learning_rate": 0.0002724210526315789, "loss": 0.0313, "step": 2289 }, { "epoch": 34.18, "learning_rate": 0.00027238596491228065, "loss": 0.0771, "step": 2290 }, { "epoch": 34.19, "learning_rate": 0.00027235087719298246, "loss": 0.067, "step": 2291 }, { "epoch": 34.21, "learning_rate": 0.0002723157894736842, "loss": 0.5942, "step": 2292 }, { "epoch": 34.22, "learning_rate": 0.00027228070175438595, "loss": 0.0307, "step": 2293 }, { "epoch": 34.24, "learning_rate": 0.0002722456140350877, "loss": 0.226, "step": 2294 }, { "epoch": 34.25, "learning_rate": 0.00027221052631578945, "loss": 0.0363, "step": 2295 }, { "epoch": 34.27, "learning_rate": 0.0002721754385964912, "loss": 0.1999, "step": 2296 }, { "epoch": 34.28, "learning_rate": 0.00027214035087719295, "loss": 0.1683, "step": 2297 }, { "epoch": 34.3, "learning_rate": 0.00027210526315789475, "loss": 0.0772, "step": 2298 }, { "epoch": 34.31, "learning_rate": 0.00027207017543859645, "loss": 0.1123, "step": 2299 }, { "epoch": 34.33, "learning_rate": 0.0002720350877192982, "loss": 0.4395, "step": 2300 }, { "epoch": 34.34, "learning_rate": 0.00027199999999999994, "loss": 0.4303, "step": 2301 }, { "epoch": 34.36, "learning_rate": 0.00027196491228070175, "loss": 0.3241, "step": 2302 }, { "epoch": 34.37, "learning_rate": 0.0002719298245614035, "loss": 0.1974, "step": 2303 }, { "epoch": 34.39, "learning_rate": 0.00027189473684210524, "loss": 0.3825, "step": 2304 }, { "epoch": 34.4, "learning_rate": 0.000271859649122807, "loss": 0.1469, "step": 2305 }, { "epoch": 34.42, "learning_rate": 0.00027182456140350874, "loss": 0.2059, "step": 2306 }, { "epoch": 34.43, "learning_rate": 0.0002717894736842105, "loss": 0.026, "step": 2307 }, { "epoch": 34.45, "learning_rate": 0.00027175438596491224, "loss": 0.2546, "step": 2308 }, { "epoch": 34.46, "learning_rate": 0.00027171929824561404, "loss": 0.1752, "step": 2309 }, { "epoch": 34.48, "learning_rate": 0.0002716842105263158, "loss": 0.1128, "step": 2310 }, { "epoch": 34.49, "learning_rate": 0.00027164912280701754, "loss": 0.2363, "step": 2311 }, { "epoch": 34.51, "learning_rate": 0.0002716140350877193, "loss": 0.2964, "step": 2312 }, { "epoch": 34.52, "learning_rate": 0.00027157894736842104, "loss": 0.6012, "step": 2313 }, { "epoch": 34.54, "learning_rate": 0.0002715438596491228, "loss": 0.4373, "step": 2314 }, { "epoch": 34.55, "learning_rate": 0.00027150877192982453, "loss": 0.3563, "step": 2315 }, { "epoch": 34.57, "learning_rate": 0.0002714736842105263, "loss": 0.264, "step": 2316 }, { "epoch": 34.58, "learning_rate": 0.00027143859649122803, "loss": 0.0819, "step": 2317 }, { "epoch": 34.59, "learning_rate": 0.0002714035087719298, "loss": 0.2622, "step": 2318 }, { "epoch": 34.61, "learning_rate": 0.00027136842105263153, "loss": 0.2915, "step": 2319 }, { "epoch": 34.62, "learning_rate": 0.00027133333333333333, "loss": 0.2508, "step": 2320 }, { "epoch": 34.64, "learning_rate": 0.0002712982456140351, "loss": 0.2723, "step": 2321 }, { "epoch": 34.65, "learning_rate": 0.00027126315789473683, "loss": 0.3011, "step": 2322 }, { "epoch": 34.67, "learning_rate": 0.0002712280701754386, "loss": 0.3078, "step": 2323 }, { "epoch": 34.68, "learning_rate": 0.00027119298245614033, "loss": 0.0938, "step": 2324 }, { "epoch": 34.7, "learning_rate": 0.0002711578947368421, "loss": 0.0643, "step": 2325 }, { "epoch": 34.71, "learning_rate": 0.0002711228070175438, "loss": 0.1048, "step": 2326 }, { "epoch": 34.73, "learning_rate": 0.0002710877192982456, "loss": 0.0173, "step": 2327 }, { "epoch": 34.74, "learning_rate": 0.0002710526315789474, "loss": 0.1349, "step": 2328 }, { "epoch": 34.76, "learning_rate": 0.0002710175438596491, "loss": 0.3766, "step": 2329 }, { "epoch": 34.77, "learning_rate": 0.0002709824561403508, "loss": 0.2583, "step": 2330 }, { "epoch": 34.79, "learning_rate": 0.0002709473684210526, "loss": 0.1862, "step": 2331 }, { "epoch": 34.8, "learning_rate": 0.00027091228070175437, "loss": 0.253, "step": 2332 }, { "epoch": 34.82, "learning_rate": 0.0002708771929824561, "loss": 0.7504, "step": 2333 }, { "epoch": 34.83, "learning_rate": 0.00027084210526315787, "loss": 0.6195, "step": 2334 }, { "epoch": 34.85, "learning_rate": 0.0002708070175438596, "loss": 0.163, "step": 2335 }, { "epoch": 34.86, "learning_rate": 0.00027077192982456137, "loss": 0.0618, "step": 2336 }, { "epoch": 34.88, "learning_rate": 0.0002707368421052631, "loss": 0.3675, "step": 2337 }, { "epoch": 34.89, "learning_rate": 0.00027070175438596487, "loss": 0.2927, "step": 2338 }, { "epoch": 34.91, "learning_rate": 0.00027066666666666667, "loss": 0.7439, "step": 2339 }, { "epoch": 34.92, "learning_rate": 0.0002706315789473684, "loss": 0.2834, "step": 2340 }, { "epoch": 34.94, "learning_rate": 0.00027059649122807017, "loss": 0.6386, "step": 2341 }, { "epoch": 34.95, "learning_rate": 0.0002705614035087719, "loss": 0.2333, "step": 2342 }, { "epoch": 34.97, "learning_rate": 0.00027052631578947366, "loss": 0.5232, "step": 2343 }, { "epoch": 34.98, "learning_rate": 0.0002704912280701754, "loss": 0.2471, "step": 2344 }, { "epoch": 35.0, "learning_rate": 0.00027045614035087716, "loss": 1.1787, "step": 2345 }, { "epoch": 35.01, "learning_rate": 0.00027042105263157896, "loss": 0.2669, "step": 2346 }, { "epoch": 35.03, "learning_rate": 0.00027038596491228066, "loss": 0.7957, "step": 2347 }, { "epoch": 35.04, "learning_rate": 0.0002703508771929824, "loss": 0.4517, "step": 2348 }, { "epoch": 35.06, "learning_rate": 0.00027031578947368416, "loss": 0.299, "step": 2349 }, { "epoch": 35.07, "learning_rate": 0.00027028070175438596, "loss": 0.2013, "step": 2350 }, { "epoch": 35.09, "learning_rate": 0.0002702456140350877, "loss": 0.2649, "step": 2351 }, { "epoch": 35.1, "learning_rate": 0.00027021052631578946, "loss": 0.1592, "step": 2352 }, { "epoch": 35.12, "learning_rate": 0.0002701754385964912, "loss": 0.2795, "step": 2353 }, { "epoch": 35.13, "learning_rate": 0.00027014035087719295, "loss": 0.3779, "step": 2354 }, { "epoch": 35.15, "learning_rate": 0.0002701052631578947, "loss": 0.2906, "step": 2355 }, { "epoch": 35.16, "learning_rate": 0.00027007017543859645, "loss": 0.2444, "step": 2356 }, { "epoch": 35.18, "learning_rate": 0.00027003508771929825, "loss": 0.3235, "step": 2357 }, { "epoch": 35.19, "learning_rate": 0.00027, "loss": 0.248, "step": 2358 }, { "epoch": 35.21, "learning_rate": 0.00026996491228070175, "loss": 0.2753, "step": 2359 }, { "epoch": 35.22, "learning_rate": 0.00026992982456140345, "loss": 0.2824, "step": 2360 }, { "epoch": 35.24, "learning_rate": 0.00026989473684210525, "loss": 0.0507, "step": 2361 }, { "epoch": 35.25, "learning_rate": 0.000269859649122807, "loss": 0.1854, "step": 2362 }, { "epoch": 35.27, "learning_rate": 0.00026982456140350875, "loss": 0.2716, "step": 2363 }, { "epoch": 35.28, "learning_rate": 0.0002697894736842105, "loss": 0.8457, "step": 2364 }, { "epoch": 35.3, "learning_rate": 0.00026975438596491224, "loss": 0.3006, "step": 2365 }, { "epoch": 35.31, "learning_rate": 0.000269719298245614, "loss": 0.1396, "step": 2366 }, { "epoch": 35.33, "learning_rate": 0.00026968421052631574, "loss": 0.3765, "step": 2367 }, { "epoch": 35.34, "learning_rate": 0.0002696491228070175, "loss": 0.1906, "step": 2368 }, { "epoch": 35.36, "learning_rate": 0.0002696140350877193, "loss": 0.5598, "step": 2369 }, { "epoch": 35.37, "learning_rate": 0.00026957894736842104, "loss": 0.3803, "step": 2370 }, { "epoch": 35.39, "learning_rate": 0.0002695438596491228, "loss": 0.2419, "step": 2371 }, { "epoch": 35.4, "learning_rate": 0.00026950877192982454, "loss": 0.0922, "step": 2372 }, { "epoch": 35.42, "learning_rate": 0.0002694736842105263, "loss": 0.261, "step": 2373 }, { "epoch": 35.43, "learning_rate": 0.00026943859649122804, "loss": 0.1247, "step": 2374 }, { "epoch": 35.45, "learning_rate": 0.0002694035087719298, "loss": 0.1853, "step": 2375 }, { "epoch": 35.46, "learning_rate": 0.0002693684210526316, "loss": 0.5109, "step": 2376 }, { "epoch": 35.48, "learning_rate": 0.00026933333333333334, "loss": 0.6572, "step": 2377 }, { "epoch": 35.49, "learning_rate": 0.00026929824561403503, "loss": 0.1821, "step": 2378 }, { "epoch": 35.51, "learning_rate": 0.0002692631578947368, "loss": 0.1235, "step": 2379 }, { "epoch": 35.52, "learning_rate": 0.0002692280701754386, "loss": 0.1746, "step": 2380 }, { "epoch": 35.54, "learning_rate": 0.00026919298245614033, "loss": 0.3027, "step": 2381 }, { "epoch": 35.55, "learning_rate": 0.0002691578947368421, "loss": 0.1361, "step": 2382 }, { "epoch": 35.57, "learning_rate": 0.00026912280701754383, "loss": 0.2752, "step": 2383 }, { "epoch": 35.58, "learning_rate": 0.0002690877192982456, "loss": 0.1813, "step": 2384 }, { "epoch": 35.59, "learning_rate": 0.00026905263157894733, "loss": 0.2419, "step": 2385 }, { "epoch": 35.61, "learning_rate": 0.0002690175438596491, "loss": 0.542, "step": 2386 }, { "epoch": 35.62, "learning_rate": 0.0002689824561403509, "loss": 0.2718, "step": 2387 }, { "epoch": 35.64, "learning_rate": 0.00026894736842105263, "loss": 0.3817, "step": 2388 }, { "epoch": 35.65, "learning_rate": 0.0002689122807017544, "loss": 0.0568, "step": 2389 }, { "epoch": 35.67, "learning_rate": 0.0002688771929824561, "loss": 0.212, "step": 2390 }, { "epoch": 35.68, "learning_rate": 0.0002688421052631579, "loss": 0.0527, "step": 2391 }, { "epoch": 35.7, "learning_rate": 0.0002688070175438596, "loss": 0.2184, "step": 2392 }, { "epoch": 35.71, "learning_rate": 0.0002687719298245614, "loss": 0.0476, "step": 2393 }, { "epoch": 35.73, "learning_rate": 0.0002687368421052632, "loss": 0.138, "step": 2394 }, { "epoch": 35.74, "learning_rate": 0.00026870175438596487, "loss": 0.2552, "step": 2395 }, { "epoch": 35.76, "learning_rate": 0.0002686666666666666, "loss": 0.5772, "step": 2396 }, { "epoch": 35.77, "learning_rate": 0.00026863157894736837, "loss": 0.3532, "step": 2397 }, { "epoch": 35.79, "learning_rate": 0.00026859649122807017, "loss": 0.0265, "step": 2398 }, { "epoch": 35.8, "learning_rate": 0.0002685614035087719, "loss": 0.1063, "step": 2399 }, { "epoch": 35.82, "learning_rate": 0.00026852631578947367, "loss": 0.1412, "step": 2400 }, { "epoch": 35.82, "eval_accuracy": 0.8311306901615272, "eval_f1": 0.8352003849989705, "eval_loss": 0.7131851315498352, "eval_runtime": 344.3646, "eval_samples_per_second": 11.865, "eval_steps_per_second": 0.743, "step": 2400 }, { "epoch": 35.83, "learning_rate": 0.0002684912280701754, "loss": 0.3079, "step": 2401 }, { "epoch": 35.85, "learning_rate": 0.00026845614035087717, "loss": 0.4798, "step": 2402 }, { "epoch": 35.86, "learning_rate": 0.0002684210526315789, "loss": 0.058, "step": 2403 }, { "epoch": 35.88, "learning_rate": 0.00026838596491228066, "loss": 0.0388, "step": 2404 }, { "epoch": 35.89, "learning_rate": 0.0002683508771929824, "loss": 0.0393, "step": 2405 }, { "epoch": 35.91, "learning_rate": 0.0002683157894736842, "loss": 0.0151, "step": 2406 }, { "epoch": 35.92, "learning_rate": 0.00026828070175438596, "loss": 0.2027, "step": 2407 }, { "epoch": 35.94, "learning_rate": 0.00026824561403508766, "loss": 0.1488, "step": 2408 }, { "epoch": 35.95, "learning_rate": 0.00026821052631578946, "loss": 0.0274, "step": 2409 }, { "epoch": 35.97, "learning_rate": 0.0002681754385964912, "loss": 0.5102, "step": 2410 }, { "epoch": 35.98, "learning_rate": 0.00026814035087719296, "loss": 0.0618, "step": 2411 }, { "epoch": 36.0, "learning_rate": 0.0002681052631578947, "loss": 0.0711, "step": 2412 }, { "epoch": 36.01, "learning_rate": 0.00026807017543859646, "loss": 0.333, "step": 2413 }, { "epoch": 36.03, "learning_rate": 0.0002680350877192982, "loss": 0.2717, "step": 2414 }, { "epoch": 36.04, "learning_rate": 0.00026799999999999995, "loss": 0.027, "step": 2415 }, { "epoch": 36.06, "learning_rate": 0.0002679649122807017, "loss": 0.2816, "step": 2416 }, { "epoch": 36.07, "learning_rate": 0.0002679298245614035, "loss": 0.2222, "step": 2417 }, { "epoch": 36.09, "learning_rate": 0.00026789473684210526, "loss": 0.2245, "step": 2418 }, { "epoch": 36.1, "learning_rate": 0.000267859649122807, "loss": 0.3236, "step": 2419 }, { "epoch": 36.12, "learning_rate": 0.00026782456140350875, "loss": 0.0227, "step": 2420 }, { "epoch": 36.13, "learning_rate": 0.0002677894736842105, "loss": 0.1237, "step": 2421 }, { "epoch": 36.15, "learning_rate": 0.00026775438596491225, "loss": 0.2968, "step": 2422 }, { "epoch": 36.16, "learning_rate": 0.000267719298245614, "loss": 0.4212, "step": 2423 }, { "epoch": 36.18, "learning_rate": 0.0002676842105263158, "loss": 0.5356, "step": 2424 }, { "epoch": 36.19, "learning_rate": 0.00026764912280701755, "loss": 0.1707, "step": 2425 }, { "epoch": 36.21, "learning_rate": 0.00026761403508771925, "loss": 0.0538, "step": 2426 }, { "epoch": 36.22, "learning_rate": 0.000267578947368421, "loss": 0.5187, "step": 2427 }, { "epoch": 36.24, "learning_rate": 0.0002675438596491228, "loss": 0.1425, "step": 2428 }, { "epoch": 36.25, "learning_rate": 0.00026750877192982455, "loss": 0.2559, "step": 2429 }, { "epoch": 36.27, "learning_rate": 0.0002674736842105263, "loss": 0.2514, "step": 2430 }, { "epoch": 36.28, "learning_rate": 0.00026743859649122804, "loss": 0.3183, "step": 2431 }, { "epoch": 36.3, "learning_rate": 0.0002674035087719298, "loss": 0.4393, "step": 2432 }, { "epoch": 36.31, "learning_rate": 0.00026736842105263154, "loss": 0.2093, "step": 2433 }, { "epoch": 36.33, "learning_rate": 0.0002673333333333333, "loss": 0.3006, "step": 2434 }, { "epoch": 36.34, "learning_rate": 0.0002672982456140351, "loss": 0.1366, "step": 2435 }, { "epoch": 36.36, "learning_rate": 0.00026726315789473684, "loss": 0.1511, "step": 2436 }, { "epoch": 36.37, "learning_rate": 0.0002672280701754386, "loss": 0.3088, "step": 2437 }, { "epoch": 36.39, "learning_rate": 0.00026719298245614034, "loss": 0.2611, "step": 2438 }, { "epoch": 36.4, "learning_rate": 0.0002671578947368421, "loss": 0.0553, "step": 2439 }, { "epoch": 36.42, "learning_rate": 0.00026712280701754384, "loss": 0.0143, "step": 2440 }, { "epoch": 36.43, "learning_rate": 0.0002670877192982456, "loss": 0.2214, "step": 2441 }, { "epoch": 36.45, "learning_rate": 0.00026705263157894733, "loss": 0.3594, "step": 2442 }, { "epoch": 36.46, "learning_rate": 0.0002670175438596491, "loss": 0.0609, "step": 2443 }, { "epoch": 36.48, "learning_rate": 0.00026698245614035083, "loss": 0.1486, "step": 2444 }, { "epoch": 36.49, "learning_rate": 0.0002669473684210526, "loss": 0.033, "step": 2445 }, { "epoch": 36.51, "learning_rate": 0.0002669122807017544, "loss": 0.8025, "step": 2446 }, { "epoch": 36.52, "learning_rate": 0.00026687719298245613, "loss": 0.3945, "step": 2447 }, { "epoch": 36.54, "learning_rate": 0.0002668421052631579, "loss": 0.3908, "step": 2448 }, { "epoch": 36.55, "learning_rate": 0.00026680701754385963, "loss": 0.1322, "step": 2449 }, { "epoch": 36.57, "learning_rate": 0.0002667719298245614, "loss": 0.3136, "step": 2450 }, { "epoch": 36.58, "learning_rate": 0.00026673684210526313, "loss": 0.1262, "step": 2451 }, { "epoch": 36.59, "learning_rate": 0.0002667017543859649, "loss": 0.4639, "step": 2452 }, { "epoch": 36.61, "learning_rate": 0.0002666666666666666, "loss": 0.1867, "step": 2453 }, { "epoch": 36.62, "learning_rate": 0.00026663157894736843, "loss": 0.0463, "step": 2454 }, { "epoch": 36.64, "learning_rate": 0.0002665964912280702, "loss": 0.2519, "step": 2455 }, { "epoch": 36.65, "learning_rate": 0.00026656140350877187, "loss": 0.1951, "step": 2456 }, { "epoch": 36.67, "learning_rate": 0.0002665263157894737, "loss": 0.0287, "step": 2457 }, { "epoch": 36.68, "learning_rate": 0.0002664912280701754, "loss": 0.7845, "step": 2458 }, { "epoch": 36.7, "learning_rate": 0.00026645614035087717, "loss": 0.1309, "step": 2459 }, { "epoch": 36.71, "learning_rate": 0.0002664210526315789, "loss": 0.1365, "step": 2460 }, { "epoch": 36.73, "learning_rate": 0.00026638596491228067, "loss": 0.0312, "step": 2461 }, { "epoch": 36.74, "learning_rate": 0.0002663508771929824, "loss": 0.1098, "step": 2462 }, { "epoch": 36.76, "learning_rate": 0.00026631578947368417, "loss": 0.4963, "step": 2463 }, { "epoch": 36.77, "learning_rate": 0.0002662807017543859, "loss": 0.1178, "step": 2464 }, { "epoch": 36.79, "learning_rate": 0.0002662456140350877, "loss": 0.1513, "step": 2465 }, { "epoch": 36.8, "learning_rate": 0.00026621052631578947, "loss": 0.2865, "step": 2466 }, { "epoch": 36.82, "learning_rate": 0.0002661754385964912, "loss": 0.3721, "step": 2467 }, { "epoch": 36.83, "learning_rate": 0.00026614035087719297, "loss": 0.1291, "step": 2468 }, { "epoch": 36.85, "learning_rate": 0.0002661052631578947, "loss": 0.2043, "step": 2469 }, { "epoch": 36.86, "learning_rate": 0.00026607017543859646, "loss": 0.1493, "step": 2470 }, { "epoch": 36.88, "learning_rate": 0.0002660350877192982, "loss": 0.311, "step": 2471 }, { "epoch": 36.89, "learning_rate": 0.000266, "loss": 0.3776, "step": 2472 }, { "epoch": 36.91, "learning_rate": 0.0002659649122807017, "loss": 0.0136, "step": 2473 }, { "epoch": 36.92, "learning_rate": 0.00026592982456140346, "loss": 0.3089, "step": 2474 }, { "epoch": 36.94, "learning_rate": 0.0002658947368421052, "loss": 0.1018, "step": 2475 }, { "epoch": 36.95, "learning_rate": 0.000265859649122807, "loss": 0.2423, "step": 2476 }, { "epoch": 36.97, "learning_rate": 0.00026582456140350876, "loss": 0.3071, "step": 2477 }, { "epoch": 36.98, "learning_rate": 0.0002657894736842105, "loss": 0.2843, "step": 2478 }, { "epoch": 37.0, "learning_rate": 0.00026575438596491226, "loss": 0.4304, "step": 2479 }, { "epoch": 37.01, "learning_rate": 0.000265719298245614, "loss": 0.2623, "step": 2480 }, { "epoch": 37.03, "learning_rate": 0.00026568421052631575, "loss": 0.5204, "step": 2481 }, { "epoch": 37.04, "learning_rate": 0.0002656491228070175, "loss": 0.181, "step": 2482 }, { "epoch": 37.06, "learning_rate": 0.0002656140350877193, "loss": 0.2256, "step": 2483 }, { "epoch": 37.07, "learning_rate": 0.00026557894736842105, "loss": 0.6583, "step": 2484 }, { "epoch": 37.09, "learning_rate": 0.0002655438596491228, "loss": 0.3567, "step": 2485 }, { "epoch": 37.1, "learning_rate": 0.00026550877192982455, "loss": 0.2477, "step": 2486 }, { "epoch": 37.12, "learning_rate": 0.0002654736842105263, "loss": 0.4258, "step": 2487 }, { "epoch": 37.13, "learning_rate": 0.00026543859649122805, "loss": 0.0732, "step": 2488 }, { "epoch": 37.15, "learning_rate": 0.0002654035087719298, "loss": 0.0374, "step": 2489 }, { "epoch": 37.16, "learning_rate": 0.00026536842105263155, "loss": 0.1289, "step": 2490 }, { "epoch": 37.18, "learning_rate": 0.0002653333333333333, "loss": 0.0477, "step": 2491 }, { "epoch": 37.19, "learning_rate": 0.00026529824561403504, "loss": 0.1553, "step": 2492 }, { "epoch": 37.21, "learning_rate": 0.0002652631578947368, "loss": 0.1792, "step": 2493 }, { "epoch": 37.22, "learning_rate": 0.0002652280701754386, "loss": 0.0681, "step": 2494 }, { "epoch": 37.24, "learning_rate": 0.00026519298245614034, "loss": 0.3027, "step": 2495 }, { "epoch": 37.25, "learning_rate": 0.0002651578947368421, "loss": 0.2969, "step": 2496 }, { "epoch": 37.27, "learning_rate": 0.00026512280701754384, "loss": 0.4292, "step": 2497 }, { "epoch": 37.28, "learning_rate": 0.0002650877192982456, "loss": 0.285, "step": 2498 }, { "epoch": 37.3, "learning_rate": 0.00026505263157894734, "loss": 1.0064, "step": 2499 }, { "epoch": 37.31, "learning_rate": 0.0002650175438596491, "loss": 0.2859, "step": 2500 }, { "epoch": 37.33, "learning_rate": 0.00026498245614035084, "loss": 0.131, "step": 2501 }, { "epoch": 37.34, "learning_rate": 0.00026494736842105264, "loss": 0.3665, "step": 2502 }, { "epoch": 37.36, "learning_rate": 0.0002649122807017544, "loss": 0.4487, "step": 2503 }, { "epoch": 37.37, "learning_rate": 0.0002648771929824561, "loss": 0.722, "step": 2504 }, { "epoch": 37.39, "learning_rate": 0.00026484210526315783, "loss": 1.3793, "step": 2505 }, { "epoch": 37.4, "learning_rate": 0.00026480701754385964, "loss": 0.2841, "step": 2506 }, { "epoch": 37.42, "learning_rate": 0.0002647719298245614, "loss": 0.6317, "step": 2507 }, { "epoch": 37.43, "learning_rate": 0.00026473684210526313, "loss": 0.4639, "step": 2508 }, { "epoch": 37.45, "learning_rate": 0.0002647017543859649, "loss": 0.2349, "step": 2509 }, { "epoch": 37.46, "learning_rate": 0.00026466666666666663, "loss": 0.252, "step": 2510 }, { "epoch": 37.48, "learning_rate": 0.0002646315789473684, "loss": 0.2653, "step": 2511 }, { "epoch": 37.49, "learning_rate": 0.00026459649122807013, "loss": 0.1756, "step": 2512 }, { "epoch": 37.51, "learning_rate": 0.00026456140350877193, "loss": 0.6025, "step": 2513 }, { "epoch": 37.52, "learning_rate": 0.0002645263157894737, "loss": 0.1538, "step": 2514 }, { "epoch": 37.54, "learning_rate": 0.00026449122807017543, "loss": 0.7887, "step": 2515 }, { "epoch": 37.55, "learning_rate": 0.0002644561403508772, "loss": 0.1094, "step": 2516 }, { "epoch": 37.57, "learning_rate": 0.0002644210526315789, "loss": 0.1136, "step": 2517 }, { "epoch": 37.58, "learning_rate": 0.0002643859649122807, "loss": 0.054, "step": 2518 }, { "epoch": 37.59, "learning_rate": 0.0002643508771929824, "loss": 0.4868, "step": 2519 }, { "epoch": 37.61, "learning_rate": 0.0002643157894736842, "loss": 0.2704, "step": 2520 }, { "epoch": 37.62, "learning_rate": 0.0002642807017543859, "loss": 0.0936, "step": 2521 }, { "epoch": 37.64, "learning_rate": 0.00026424561403508767, "loss": 0.7214, "step": 2522 }, { "epoch": 37.65, "learning_rate": 0.0002642105263157894, "loss": 0.0592, "step": 2523 }, { "epoch": 37.67, "learning_rate": 0.0002641754385964912, "loss": 0.0877, "step": 2524 }, { "epoch": 37.68, "learning_rate": 0.00026414035087719297, "loss": 0.2035, "step": 2525 }, { "epoch": 37.7, "learning_rate": 0.0002641052631578947, "loss": 0.0444, "step": 2526 }, { "epoch": 37.71, "learning_rate": 0.00026407017543859647, "loss": 0.1678, "step": 2527 }, { "epoch": 37.73, "learning_rate": 0.0002640350877192982, "loss": 0.037, "step": 2528 }, { "epoch": 37.74, "learning_rate": 0.00026399999999999997, "loss": 0.0603, "step": 2529 }, { "epoch": 37.76, "learning_rate": 0.0002639649122807017, "loss": 0.2909, "step": 2530 }, { "epoch": 37.77, "learning_rate": 0.00026392982456140346, "loss": 0.1873, "step": 2531 }, { "epoch": 37.79, "learning_rate": 0.00026389473684210527, "loss": 0.0805, "step": 2532 }, { "epoch": 37.8, "learning_rate": 0.000263859649122807, "loss": 0.1528, "step": 2533 }, { "epoch": 37.82, "learning_rate": 0.00026382456140350876, "loss": 0.1128, "step": 2534 }, { "epoch": 37.83, "learning_rate": 0.0002637894736842105, "loss": 0.0441, "step": 2535 }, { "epoch": 37.85, "learning_rate": 0.00026375438596491226, "loss": 0.3716, "step": 2536 }, { "epoch": 37.86, "learning_rate": 0.000263719298245614, "loss": 0.2316, "step": 2537 }, { "epoch": 37.88, "learning_rate": 0.00026368421052631576, "loss": 0.397, "step": 2538 }, { "epoch": 37.89, "learning_rate": 0.0002636491228070175, "loss": 0.0878, "step": 2539 }, { "epoch": 37.91, "learning_rate": 0.00026361403508771926, "loss": 0.3564, "step": 2540 }, { "epoch": 37.92, "learning_rate": 0.000263578947368421, "loss": 0.2722, "step": 2541 }, { "epoch": 37.94, "learning_rate": 0.00026354385964912275, "loss": 0.2298, "step": 2542 }, { "epoch": 37.95, "learning_rate": 0.00026350877192982456, "loss": 0.2963, "step": 2543 }, { "epoch": 37.97, "learning_rate": 0.0002634736842105263, "loss": 0.2939, "step": 2544 }, { "epoch": 37.98, "learning_rate": 0.00026343859649122805, "loss": 0.1811, "step": 2545 }, { "epoch": 38.0, "learning_rate": 0.0002634035087719298, "loss": 0.2587, "step": 2546 }, { "epoch": 38.01, "learning_rate": 0.00026336842105263155, "loss": 0.335, "step": 2547 }, { "epoch": 38.03, "learning_rate": 0.0002633333333333333, "loss": 0.3061, "step": 2548 }, { "epoch": 38.04, "learning_rate": 0.00026329824561403505, "loss": 0.2655, "step": 2549 }, { "epoch": 38.06, "learning_rate": 0.00026326315789473685, "loss": 0.2371, "step": 2550 }, { "epoch": 38.07, "learning_rate": 0.0002632280701754386, "loss": 0.5286, "step": 2551 }, { "epoch": 38.09, "learning_rate": 0.0002631929824561403, "loss": 0.3128, "step": 2552 }, { "epoch": 38.1, "learning_rate": 0.00026315789473684205, "loss": 0.0212, "step": 2553 }, { "epoch": 38.12, "learning_rate": 0.00026312280701754385, "loss": 0.5485, "step": 2554 }, { "epoch": 38.13, "learning_rate": 0.0002630877192982456, "loss": 0.2955, "step": 2555 }, { "epoch": 38.15, "learning_rate": 0.00026305263157894735, "loss": 0.0294, "step": 2556 }, { "epoch": 38.16, "learning_rate": 0.0002630175438596491, "loss": 0.2217, "step": 2557 }, { "epoch": 38.18, "learning_rate": 0.00026298245614035084, "loss": 0.2246, "step": 2558 }, { "epoch": 38.19, "learning_rate": 0.0002629473684210526, "loss": 0.4677, "step": 2559 }, { "epoch": 38.21, "learning_rate": 0.00026291228070175434, "loss": 0.1354, "step": 2560 }, { "epoch": 38.22, "learning_rate": 0.00026287719298245614, "loss": 0.0242, "step": 2561 }, { "epoch": 38.24, "learning_rate": 0.0002628421052631579, "loss": 0.1915, "step": 2562 }, { "epoch": 38.25, "learning_rate": 0.00026280701754385964, "loss": 0.494, "step": 2563 }, { "epoch": 38.27, "learning_rate": 0.0002627719298245614, "loss": 0.1895, "step": 2564 }, { "epoch": 38.28, "learning_rate": 0.00026273684210526314, "loss": 0.0574, "step": 2565 }, { "epoch": 38.3, "learning_rate": 0.0002627017543859649, "loss": 0.085, "step": 2566 }, { "epoch": 38.31, "learning_rate": 0.00026266666666666664, "loss": 0.317, "step": 2567 }, { "epoch": 38.33, "learning_rate": 0.0002626315789473684, "loss": 0.1981, "step": 2568 }, { "epoch": 38.34, "learning_rate": 0.00026259649122807013, "loss": 0.2723, "step": 2569 }, { "epoch": 38.36, "learning_rate": 0.0002625614035087719, "loss": 0.4135, "step": 2570 }, { "epoch": 38.37, "learning_rate": 0.00026252631578947363, "loss": 0.3537, "step": 2571 }, { "epoch": 38.39, "learning_rate": 0.00026249122807017543, "loss": 0.223, "step": 2572 }, { "epoch": 38.4, "learning_rate": 0.0002624561403508772, "loss": 0.0888, "step": 2573 }, { "epoch": 38.42, "learning_rate": 0.00026242105263157893, "loss": 0.0888, "step": 2574 }, { "epoch": 38.43, "learning_rate": 0.0002623859649122807, "loss": 0.2686, "step": 2575 }, { "epoch": 38.45, "learning_rate": 0.00026235087719298243, "loss": 0.291, "step": 2576 }, { "epoch": 38.46, "learning_rate": 0.0002623157894736842, "loss": 0.0836, "step": 2577 }, { "epoch": 38.48, "learning_rate": 0.00026228070175438593, "loss": 0.2366, "step": 2578 }, { "epoch": 38.49, "learning_rate": 0.0002622456140350877, "loss": 0.0858, "step": 2579 }, { "epoch": 38.51, "learning_rate": 0.0002622105263157895, "loss": 0.5286, "step": 2580 }, { "epoch": 38.52, "learning_rate": 0.00026217543859649123, "loss": 0.5125, "step": 2581 }, { "epoch": 38.54, "learning_rate": 0.000262140350877193, "loss": 0.0903, "step": 2582 }, { "epoch": 38.55, "learning_rate": 0.0002621052631578947, "loss": 0.457, "step": 2583 }, { "epoch": 38.57, "learning_rate": 0.0002620701754385965, "loss": 0.6476, "step": 2584 }, { "epoch": 38.58, "learning_rate": 0.0002620350877192982, "loss": 0.0249, "step": 2585 }, { "epoch": 38.59, "learning_rate": 0.00026199999999999997, "loss": 0.2783, "step": 2586 }, { "epoch": 38.61, "learning_rate": 0.0002619649122807017, "loss": 0.2174, "step": 2587 }, { "epoch": 38.62, "learning_rate": 0.00026192982456140347, "loss": 0.2787, "step": 2588 }, { "epoch": 38.64, "learning_rate": 0.0002618947368421052, "loss": 0.073, "step": 2589 }, { "epoch": 38.65, "learning_rate": 0.00026185964912280697, "loss": 0.0238, "step": 2590 }, { "epoch": 38.67, "learning_rate": 0.00026182456140350877, "loss": 0.0319, "step": 2591 }, { "epoch": 38.68, "learning_rate": 0.0002617894736842105, "loss": 0.1248, "step": 2592 }, { "epoch": 38.7, "learning_rate": 0.00026175438596491227, "loss": 0.049, "step": 2593 }, { "epoch": 38.71, "learning_rate": 0.000261719298245614, "loss": 0.3386, "step": 2594 }, { "epoch": 38.73, "learning_rate": 0.00026168421052631576, "loss": 0.3199, "step": 2595 }, { "epoch": 38.74, "learning_rate": 0.0002616491228070175, "loss": 0.1528, "step": 2596 }, { "epoch": 38.76, "learning_rate": 0.00026161403508771926, "loss": 0.1884, "step": 2597 }, { "epoch": 38.77, "learning_rate": 0.00026157894736842107, "loss": 0.3862, "step": 2598 }, { "epoch": 38.79, "learning_rate": 0.0002615438596491228, "loss": 0.1399, "step": 2599 }, { "epoch": 38.8, "learning_rate": 0.0002615087719298245, "loss": 0.2141, "step": 2600 }, { "epoch": 38.8, "eval_accuracy": 0.8262359275575134, "eval_f1": 0.8275994930387057, "eval_loss": 0.7550894618034363, "eval_runtime": 345.2152, "eval_samples_per_second": 11.836, "eval_steps_per_second": 0.742, "step": 2600 }, { "epoch": 38.82, "learning_rate": 0.00026147368421052626, "loss": 0.0499, "step": 2601 }, { "epoch": 38.83, "learning_rate": 0.00026143859649122806, "loss": 0.3111, "step": 2602 }, { "epoch": 38.85, "learning_rate": 0.0002614035087719298, "loss": 0.1958, "step": 2603 }, { "epoch": 38.86, "learning_rate": 0.00026136842105263156, "loss": 0.0505, "step": 2604 }, { "epoch": 38.88, "learning_rate": 0.0002613333333333333, "loss": 0.1559, "step": 2605 }, { "epoch": 38.89, "learning_rate": 0.00026129824561403506, "loss": 0.0148, "step": 2606 }, { "epoch": 38.91, "learning_rate": 0.0002612631578947368, "loss": 0.3198, "step": 2607 }, { "epoch": 38.92, "learning_rate": 0.00026122807017543855, "loss": 0.5153, "step": 2608 }, { "epoch": 38.94, "learning_rate": 0.00026119298245614036, "loss": 0.2361, "step": 2609 }, { "epoch": 38.95, "learning_rate": 0.0002611578947368421, "loss": 0.3028, "step": 2610 }, { "epoch": 38.97, "learning_rate": 0.00026112280701754385, "loss": 0.3361, "step": 2611 }, { "epoch": 38.98, "learning_rate": 0.0002610877192982456, "loss": 0.0152, "step": 2612 }, { "epoch": 39.0, "learning_rate": 0.00026105263157894735, "loss": 0.0569, "step": 2613 }, { "epoch": 39.01, "learning_rate": 0.0002610175438596491, "loss": 0.2744, "step": 2614 }, { "epoch": 39.03, "learning_rate": 0.00026098245614035085, "loss": 0.2632, "step": 2615 }, { "epoch": 39.04, "learning_rate": 0.0002609473684210526, "loss": 0.0301, "step": 2616 }, { "epoch": 39.06, "learning_rate": 0.00026091228070175435, "loss": 0.2426, "step": 2617 }, { "epoch": 39.07, "learning_rate": 0.0002608771929824561, "loss": 0.4076, "step": 2618 }, { "epoch": 39.09, "learning_rate": 0.00026084210526315784, "loss": 0.2153, "step": 2619 }, { "epoch": 39.1, "learning_rate": 0.00026080701754385965, "loss": 0.0533, "step": 2620 }, { "epoch": 39.12, "learning_rate": 0.0002607719298245614, "loss": 0.4746, "step": 2621 }, { "epoch": 39.13, "learning_rate": 0.00026073684210526314, "loss": 0.71, "step": 2622 }, { "epoch": 39.15, "learning_rate": 0.0002607017543859649, "loss": 0.2725, "step": 2623 }, { "epoch": 39.16, "learning_rate": 0.00026066666666666664, "loss": 0.1129, "step": 2624 }, { "epoch": 39.18, "learning_rate": 0.0002606315789473684, "loss": 0.2319, "step": 2625 }, { "epoch": 39.19, "learning_rate": 0.00026059649122807014, "loss": 0.1732, "step": 2626 }, { "epoch": 39.21, "learning_rate": 0.0002605614035087719, "loss": 0.3672, "step": 2627 }, { "epoch": 39.22, "learning_rate": 0.0002605263157894737, "loss": 0.2615, "step": 2628 }, { "epoch": 39.24, "learning_rate": 0.00026049122807017544, "loss": 0.2643, "step": 2629 }, { "epoch": 39.25, "learning_rate": 0.0002604561403508772, "loss": 0.3267, "step": 2630 }, { "epoch": 39.27, "learning_rate": 0.0002604210526315789, "loss": 0.5538, "step": 2631 }, { "epoch": 39.28, "learning_rate": 0.0002603859649122807, "loss": 0.1607, "step": 2632 }, { "epoch": 39.3, "learning_rate": 0.00026035087719298244, "loss": 0.3511, "step": 2633 }, { "epoch": 39.31, "learning_rate": 0.0002603157894736842, "loss": 0.0744, "step": 2634 }, { "epoch": 39.33, "learning_rate": 0.00026028070175438593, "loss": 0.4115, "step": 2635 }, { "epoch": 39.34, "learning_rate": 0.0002602456140350877, "loss": 0.189, "step": 2636 }, { "epoch": 39.36, "learning_rate": 0.00026021052631578943, "loss": 0.437, "step": 2637 }, { "epoch": 39.37, "learning_rate": 0.0002601754385964912, "loss": 0.1349, "step": 2638 }, { "epoch": 39.39, "learning_rate": 0.000260140350877193, "loss": 0.1645, "step": 2639 }, { "epoch": 39.4, "learning_rate": 0.00026010526315789473, "loss": 0.7744, "step": 2640 }, { "epoch": 39.42, "learning_rate": 0.0002600701754385965, "loss": 0.0679, "step": 2641 }, { "epoch": 39.43, "learning_rate": 0.00026003508771929823, "loss": 0.1496, "step": 2642 }, { "epoch": 39.45, "learning_rate": 0.00026, "loss": 0.056, "step": 2643 }, { "epoch": 39.46, "learning_rate": 0.0002599649122807017, "loss": 0.3514, "step": 2644 }, { "epoch": 39.48, "learning_rate": 0.0002599298245614035, "loss": 0.112, "step": 2645 }, { "epoch": 39.49, "learning_rate": 0.0002598947368421053, "loss": 0.0599, "step": 2646 }, { "epoch": 39.51, "learning_rate": 0.000259859649122807, "loss": 0.4294, "step": 2647 }, { "epoch": 39.52, "learning_rate": 0.0002598245614035087, "loss": 0.1908, "step": 2648 }, { "epoch": 39.54, "learning_rate": 0.00025978947368421047, "loss": 0.4259, "step": 2649 }, { "epoch": 39.55, "learning_rate": 0.00025975438596491227, "loss": 0.2764, "step": 2650 }, { "epoch": 39.57, "learning_rate": 0.000259719298245614, "loss": 0.2793, "step": 2651 }, { "epoch": 39.58, "learning_rate": 0.00025968421052631577, "loss": 0.2748, "step": 2652 }, { "epoch": 39.59, "learning_rate": 0.0002596491228070175, "loss": 0.0622, "step": 2653 }, { "epoch": 39.61, "learning_rate": 0.00025961403508771927, "loss": 0.2978, "step": 2654 }, { "epoch": 39.62, "learning_rate": 0.000259578947368421, "loss": 0.0851, "step": 2655 }, { "epoch": 39.64, "learning_rate": 0.00025954385964912277, "loss": 0.2829, "step": 2656 }, { "epoch": 39.65, "learning_rate": 0.00025950877192982457, "loss": 0.1534, "step": 2657 }, { "epoch": 39.67, "learning_rate": 0.0002594736842105263, "loss": 0.4885, "step": 2658 }, { "epoch": 39.68, "learning_rate": 0.00025943859649122807, "loss": 0.3212, "step": 2659 }, { "epoch": 39.7, "learning_rate": 0.0002594035087719298, "loss": 0.054, "step": 2660 }, { "epoch": 39.71, "learning_rate": 0.00025936842105263156, "loss": 0.1702, "step": 2661 }, { "epoch": 39.73, "learning_rate": 0.0002593333333333333, "loss": 0.068, "step": 2662 }, { "epoch": 39.74, "learning_rate": 0.00025929824561403506, "loss": 0.4455, "step": 2663 }, { "epoch": 39.76, "learning_rate": 0.0002592631578947368, "loss": 0.5979, "step": 2664 }, { "epoch": 39.77, "learning_rate": 0.00025922807017543856, "loss": 0.1285, "step": 2665 }, { "epoch": 39.79, "learning_rate": 0.0002591929824561403, "loss": 0.259, "step": 2666 }, { "epoch": 39.8, "learning_rate": 0.00025915789473684206, "loss": 0.2704, "step": 2667 }, { "epoch": 39.82, "learning_rate": 0.0002591228070175438, "loss": 0.5382, "step": 2668 }, { "epoch": 39.83, "learning_rate": 0.0002590877192982456, "loss": 0.1596, "step": 2669 }, { "epoch": 39.85, "learning_rate": 0.00025905263157894736, "loss": 0.1808, "step": 2670 }, { "epoch": 39.86, "learning_rate": 0.0002590175438596491, "loss": 0.1089, "step": 2671 }, { "epoch": 39.88, "learning_rate": 0.00025898245614035085, "loss": 0.1954, "step": 2672 }, { "epoch": 39.89, "learning_rate": 0.0002589473684210526, "loss": 0.1062, "step": 2673 }, { "epoch": 39.91, "learning_rate": 0.00025891228070175435, "loss": 0.032, "step": 2674 }, { "epoch": 39.92, "learning_rate": 0.0002588771929824561, "loss": 0.2953, "step": 2675 }, { "epoch": 39.94, "learning_rate": 0.0002588421052631579, "loss": 0.2644, "step": 2676 }, { "epoch": 39.95, "learning_rate": 0.00025880701754385965, "loss": 0.0182, "step": 2677 }, { "epoch": 39.97, "learning_rate": 0.0002587719298245614, "loss": 0.0144, "step": 2678 }, { "epoch": 39.98, "learning_rate": 0.0002587368421052631, "loss": 0.0845, "step": 2679 }, { "epoch": 40.0, "learning_rate": 0.0002587017543859649, "loss": 0.3278, "step": 2680 }, { "epoch": 40.01, "learning_rate": 0.00025866666666666665, "loss": 0.1518, "step": 2681 }, { "epoch": 40.03, "learning_rate": 0.0002586315789473684, "loss": 0.3586, "step": 2682 }, { "epoch": 40.04, "learning_rate": 0.00025859649122807015, "loss": 0.0538, "step": 2683 }, { "epoch": 40.06, "learning_rate": 0.0002585614035087719, "loss": 0.211, "step": 2684 }, { "epoch": 40.07, "learning_rate": 0.00025852631578947364, "loss": 0.0149, "step": 2685 }, { "epoch": 40.09, "learning_rate": 0.0002584912280701754, "loss": 0.0996, "step": 2686 }, { "epoch": 40.1, "learning_rate": 0.0002584561403508772, "loss": 0.0451, "step": 2687 }, { "epoch": 40.12, "learning_rate": 0.00025842105263157894, "loss": 0.1934, "step": 2688 }, { "epoch": 40.13, "learning_rate": 0.0002583859649122807, "loss": 0.1372, "step": 2689 }, { "epoch": 40.15, "learning_rate": 0.00025835087719298244, "loss": 0.3718, "step": 2690 }, { "epoch": 40.16, "learning_rate": 0.0002583157894736842, "loss": 0.0081, "step": 2691 }, { "epoch": 40.18, "learning_rate": 0.00025828070175438594, "loss": 0.4675, "step": 2692 }, { "epoch": 40.19, "learning_rate": 0.0002582456140350877, "loss": 0.0657, "step": 2693 }, { "epoch": 40.21, "learning_rate": 0.0002582105263157895, "loss": 0.0938, "step": 2694 }, { "epoch": 40.22, "learning_rate": 0.00025817543859649124, "loss": 0.3227, "step": 2695 }, { "epoch": 40.24, "learning_rate": 0.00025814035087719293, "loss": 0.4954, "step": 2696 }, { "epoch": 40.25, "learning_rate": 0.0002581052631578947, "loss": 0.0188, "step": 2697 }, { "epoch": 40.27, "learning_rate": 0.0002580701754385965, "loss": 0.1594, "step": 2698 }, { "epoch": 40.28, "learning_rate": 0.00025803508771929823, "loss": 0.2826, "step": 2699 }, { "epoch": 40.3, "learning_rate": 0.000258, "loss": 0.135, "step": 2700 }, { "epoch": 40.31, "learning_rate": 0.00025796491228070173, "loss": 0.038, "step": 2701 }, { "epoch": 40.33, "learning_rate": 0.0002579298245614035, "loss": 0.2259, "step": 2702 }, { "epoch": 40.34, "learning_rate": 0.00025789473684210523, "loss": 0.0391, "step": 2703 }, { "epoch": 40.36, "learning_rate": 0.000257859649122807, "loss": 0.2152, "step": 2704 }, { "epoch": 40.37, "learning_rate": 0.0002578245614035087, "loss": 0.1713, "step": 2705 }, { "epoch": 40.39, "learning_rate": 0.00025778947368421053, "loss": 0.3175, "step": 2706 }, { "epoch": 40.4, "learning_rate": 0.0002577543859649123, "loss": 0.1026, "step": 2707 }, { "epoch": 40.42, "learning_rate": 0.00025771929824561403, "loss": 0.0078, "step": 2708 }, { "epoch": 40.43, "learning_rate": 0.0002576842105263158, "loss": 0.1131, "step": 2709 }, { "epoch": 40.45, "learning_rate": 0.0002576491228070175, "loss": 0.209, "step": 2710 }, { "epoch": 40.46, "learning_rate": 0.0002576140350877193, "loss": 0.161, "step": 2711 }, { "epoch": 40.48, "learning_rate": 0.000257578947368421, "loss": 0.096, "step": 2712 }, { "epoch": 40.49, "learning_rate": 0.00025754385964912277, "loss": 0.1569, "step": 2713 }, { "epoch": 40.51, "learning_rate": 0.0002575087719298245, "loss": 0.1204, "step": 2714 }, { "epoch": 40.52, "learning_rate": 0.00025747368421052627, "loss": 0.3505, "step": 2715 }, { "epoch": 40.54, "learning_rate": 0.000257438596491228, "loss": 0.172, "step": 2716 }, { "epoch": 40.55, "learning_rate": 0.0002574035087719298, "loss": 0.0452, "step": 2717 }, { "epoch": 40.57, "learning_rate": 0.00025736842105263157, "loss": 0.2946, "step": 2718 }, { "epoch": 40.58, "learning_rate": 0.0002573333333333333, "loss": 0.0231, "step": 2719 }, { "epoch": 40.59, "learning_rate": 0.00025729824561403507, "loss": 0.1766, "step": 2720 }, { "epoch": 40.61, "learning_rate": 0.0002572631578947368, "loss": 0.2658, "step": 2721 }, { "epoch": 40.62, "learning_rate": 0.00025722807017543856, "loss": 0.2161, "step": 2722 }, { "epoch": 40.64, "learning_rate": 0.0002571929824561403, "loss": 0.0936, "step": 2723 }, { "epoch": 40.65, "learning_rate": 0.0002571578947368421, "loss": 0.1312, "step": 2724 }, { "epoch": 40.67, "learning_rate": 0.00025712280701754386, "loss": 0.1671, "step": 2725 }, { "epoch": 40.68, "learning_rate": 0.0002570877192982456, "loss": 0.1141, "step": 2726 }, { "epoch": 40.7, "learning_rate": 0.0002570526315789473, "loss": 0.1463, "step": 2727 }, { "epoch": 40.71, "learning_rate": 0.0002570175438596491, "loss": 0.2518, "step": 2728 }, { "epoch": 40.73, "learning_rate": 0.00025698245614035086, "loss": 0.0229, "step": 2729 }, { "epoch": 40.74, "learning_rate": 0.0002569473684210526, "loss": 0.0613, "step": 2730 }, { "epoch": 40.76, "learning_rate": 0.00025691228070175436, "loss": 0.1352, "step": 2731 }, { "epoch": 40.77, "learning_rate": 0.0002568771929824561, "loss": 0.1684, "step": 2732 }, { "epoch": 40.79, "learning_rate": 0.00025684210526315786, "loss": 0.2984, "step": 2733 }, { "epoch": 40.8, "learning_rate": 0.0002568070175438596, "loss": 0.2261, "step": 2734 }, { "epoch": 40.82, "learning_rate": 0.0002567719298245614, "loss": 0.0484, "step": 2735 }, { "epoch": 40.83, "learning_rate": 0.00025673684210526316, "loss": 0.4353, "step": 2736 }, { "epoch": 40.85, "learning_rate": 0.0002567017543859649, "loss": 0.1866, "step": 2737 }, { "epoch": 40.86, "learning_rate": 0.00025666666666666665, "loss": 0.2467, "step": 2738 }, { "epoch": 40.88, "learning_rate": 0.0002566315789473684, "loss": 0.0329, "step": 2739 }, { "epoch": 40.89, "learning_rate": 0.00025659649122807015, "loss": 0.108, "step": 2740 }, { "epoch": 40.91, "learning_rate": 0.0002565614035087719, "loss": 0.025, "step": 2741 }, { "epoch": 40.92, "learning_rate": 0.00025652631578947365, "loss": 0.1324, "step": 2742 }, { "epoch": 40.94, "learning_rate": 0.00025649122807017545, "loss": 0.2477, "step": 2743 }, { "epoch": 40.95, "learning_rate": 0.00025645614035087715, "loss": 0.0161, "step": 2744 }, { "epoch": 40.97, "learning_rate": 0.0002564210526315789, "loss": 0.1783, "step": 2745 }, { "epoch": 40.98, "learning_rate": 0.0002563859649122807, "loss": 0.0191, "step": 2746 }, { "epoch": 41.0, "learning_rate": 0.00025635087719298245, "loss": 0.1988, "step": 2747 }, { "epoch": 41.01, "learning_rate": 0.0002563157894736842, "loss": 0.9461, "step": 2748 }, { "epoch": 41.03, "learning_rate": 0.00025628070175438594, "loss": 0.6253, "step": 2749 }, { "epoch": 41.04, "learning_rate": 0.0002562456140350877, "loss": 0.3213, "step": 2750 }, { "epoch": 41.06, "learning_rate": 0.00025621052631578944, "loss": 0.3665, "step": 2751 }, { "epoch": 41.07, "learning_rate": 0.0002561754385964912, "loss": 0.1163, "step": 2752 }, { "epoch": 41.09, "learning_rate": 0.00025614035087719294, "loss": 0.0786, "step": 2753 }, { "epoch": 41.1, "learning_rate": 0.00025610526315789474, "loss": 0.2727, "step": 2754 }, { "epoch": 41.12, "learning_rate": 0.0002560701754385965, "loss": 0.1635, "step": 2755 }, { "epoch": 41.13, "learning_rate": 0.00025603508771929824, "loss": 0.029, "step": 2756 }, { "epoch": 41.15, "learning_rate": 0.000256, "loss": 0.0872, "step": 2757 }, { "epoch": 41.16, "learning_rate": 0.00025596491228070174, "loss": 0.3625, "step": 2758 }, { "epoch": 41.18, "learning_rate": 0.0002559298245614035, "loss": 0.0278, "step": 2759 }, { "epoch": 41.19, "learning_rate": 0.00025589473684210523, "loss": 0.0368, "step": 2760 }, { "epoch": 41.21, "learning_rate": 0.000255859649122807, "loss": 0.6355, "step": 2761 }, { "epoch": 41.22, "learning_rate": 0.00025582456140350873, "loss": 0.0689, "step": 2762 }, { "epoch": 41.24, "learning_rate": 0.0002557894736842105, "loss": 0.0832, "step": 2763 }, { "epoch": 41.25, "learning_rate": 0.00025575438596491223, "loss": 0.3266, "step": 2764 }, { "epoch": 41.27, "learning_rate": 0.00025571929824561403, "loss": 0.0166, "step": 2765 }, { "epoch": 41.28, "learning_rate": 0.0002556842105263158, "loss": 0.3294, "step": 2766 }, { "epoch": 41.3, "learning_rate": 0.00025564912280701753, "loss": 0.3561, "step": 2767 }, { "epoch": 41.31, "learning_rate": 0.0002556140350877193, "loss": 0.4908, "step": 2768 }, { "epoch": 41.33, "learning_rate": 0.00025557894736842103, "loss": 0.3531, "step": 2769 }, { "epoch": 41.34, "learning_rate": 0.0002555438596491228, "loss": 0.3064, "step": 2770 }, { "epoch": 41.36, "learning_rate": 0.0002555087719298245, "loss": 0.2952, "step": 2771 }, { "epoch": 41.37, "learning_rate": 0.00025547368421052633, "loss": 0.0237, "step": 2772 }, { "epoch": 41.39, "learning_rate": 0.0002554385964912281, "loss": 0.0403, "step": 2773 }, { "epoch": 41.4, "learning_rate": 0.0002554035087719298, "loss": 0.0116, "step": 2774 }, { "epoch": 41.42, "learning_rate": 0.0002553684210526315, "loss": 0.0635, "step": 2775 }, { "epoch": 41.43, "learning_rate": 0.0002553333333333333, "loss": 0.0113, "step": 2776 }, { "epoch": 41.45, "learning_rate": 0.00025529824561403507, "loss": 0.0259, "step": 2777 }, { "epoch": 41.46, "learning_rate": 0.0002552631578947368, "loss": 0.025, "step": 2778 }, { "epoch": 41.48, "learning_rate": 0.00025522807017543857, "loss": 0.1297, "step": 2779 }, { "epoch": 41.49, "learning_rate": 0.0002551929824561403, "loss": 0.4284, "step": 2780 }, { "epoch": 41.51, "learning_rate": 0.00025515789473684207, "loss": 0.2254, "step": 2781 }, { "epoch": 41.52, "learning_rate": 0.0002551228070175438, "loss": 0.246, "step": 2782 }, { "epoch": 41.54, "learning_rate": 0.0002550877192982456, "loss": 0.1542, "step": 2783 }, { "epoch": 41.55, "learning_rate": 0.00025505263157894737, "loss": 0.0179, "step": 2784 }, { "epoch": 41.57, "learning_rate": 0.0002550175438596491, "loss": 0.1614, "step": 2785 }, { "epoch": 41.58, "learning_rate": 0.00025498245614035087, "loss": 0.4609, "step": 2786 }, { "epoch": 41.59, "learning_rate": 0.0002549473684210526, "loss": 0.24, "step": 2787 }, { "epoch": 41.61, "learning_rate": 0.00025491228070175436, "loss": 0.4677, "step": 2788 }, { "epoch": 41.62, "learning_rate": 0.0002548771929824561, "loss": 0.2245, "step": 2789 }, { "epoch": 41.64, "learning_rate": 0.00025484210526315786, "loss": 0.1657, "step": 2790 }, { "epoch": 41.65, "learning_rate": 0.00025480701754385966, "loss": 0.0554, "step": 2791 }, { "epoch": 41.67, "learning_rate": 0.00025477192982456136, "loss": 0.0357, "step": 2792 }, { "epoch": 41.68, "learning_rate": 0.0002547368421052631, "loss": 0.0624, "step": 2793 }, { "epoch": 41.7, "learning_rate": 0.00025470175438596486, "loss": 0.0994, "step": 2794 }, { "epoch": 41.71, "learning_rate": 0.00025466666666666666, "loss": 0.1196, "step": 2795 }, { "epoch": 41.73, "learning_rate": 0.0002546315789473684, "loss": 0.2126, "step": 2796 }, { "epoch": 41.74, "learning_rate": 0.00025459649122807016, "loss": 0.2458, "step": 2797 }, { "epoch": 41.76, "learning_rate": 0.0002545614035087719, "loss": 0.0517, "step": 2798 }, { "epoch": 41.77, "learning_rate": 0.00025452631578947365, "loss": 0.0656, "step": 2799 }, { "epoch": 41.79, "learning_rate": 0.0002544912280701754, "loss": 0.2169, "step": 2800 }, { "epoch": 41.79, "eval_accuracy": 0.8159569260890847, "eval_f1": 0.8147649383825286, "eval_loss": 0.789986789226532, "eval_runtime": 344.2728, "eval_samples_per_second": 11.868, "eval_steps_per_second": 0.744, "step": 2800 }, { "epoch": 41.8, "learning_rate": 0.00025445614035087715, "loss": 0.472, "step": 2801 }, { "epoch": 41.82, "learning_rate": 0.00025442105263157895, "loss": 0.3, "step": 2802 }, { "epoch": 41.83, "learning_rate": 0.0002543859649122807, "loss": 0.2579, "step": 2803 }, { "epoch": 41.85, "learning_rate": 0.00025435087719298245, "loss": 0.0847, "step": 2804 }, { "epoch": 41.86, "learning_rate": 0.00025431578947368415, "loss": 0.014, "step": 2805 }, { "epoch": 41.88, "learning_rate": 0.00025428070175438595, "loss": 0.0145, "step": 2806 }, { "epoch": 41.89, "learning_rate": 0.0002542456140350877, "loss": 0.1619, "step": 2807 }, { "epoch": 41.91, "learning_rate": 0.00025421052631578945, "loss": 0.1608, "step": 2808 }, { "epoch": 41.92, "learning_rate": 0.0002541754385964912, "loss": 0.0117, "step": 2809 }, { "epoch": 41.94, "learning_rate": 0.00025414035087719294, "loss": 0.3642, "step": 2810 }, { "epoch": 41.95, "learning_rate": 0.0002541052631578947, "loss": 0.0423, "step": 2811 }, { "epoch": 41.97, "learning_rate": 0.00025407017543859644, "loss": 0.0056, "step": 2812 }, { "epoch": 41.98, "learning_rate": 0.00025403508771929825, "loss": 0.0841, "step": 2813 }, { "epoch": 42.0, "learning_rate": 0.000254, "loss": 0.2236, "step": 2814 }, { "epoch": 42.01, "learning_rate": 0.00025396491228070174, "loss": 0.0366, "step": 2815 }, { "epoch": 42.03, "learning_rate": 0.0002539298245614035, "loss": 0.1367, "step": 2816 }, { "epoch": 42.04, "learning_rate": 0.00025389473684210524, "loss": 0.0367, "step": 2817 }, { "epoch": 42.06, "learning_rate": 0.000253859649122807, "loss": 0.0964, "step": 2818 }, { "epoch": 42.07, "learning_rate": 0.00025382456140350874, "loss": 0.3533, "step": 2819 }, { "epoch": 42.09, "learning_rate": 0.00025378947368421054, "loss": 0.1794, "step": 2820 }, { "epoch": 42.1, "learning_rate": 0.0002537543859649123, "loss": 0.3442, "step": 2821 }, { "epoch": 42.12, "learning_rate": 0.000253719298245614, "loss": 0.0575, "step": 2822 }, { "epoch": 42.13, "learning_rate": 0.00025368421052631573, "loss": 0.2326, "step": 2823 }, { "epoch": 42.15, "learning_rate": 0.00025364912280701754, "loss": 0.4745, "step": 2824 }, { "epoch": 42.16, "learning_rate": 0.0002536140350877193, "loss": 0.0171, "step": 2825 }, { "epoch": 42.18, "learning_rate": 0.00025357894736842103, "loss": 0.1074, "step": 2826 }, { "epoch": 42.19, "learning_rate": 0.0002535438596491228, "loss": 0.2265, "step": 2827 }, { "epoch": 42.21, "learning_rate": 0.00025350877192982453, "loss": 0.0297, "step": 2828 }, { "epoch": 42.22, "learning_rate": 0.0002534736842105263, "loss": 0.0187, "step": 2829 }, { "epoch": 42.24, "learning_rate": 0.00025343859649122803, "loss": 0.0116, "step": 2830 }, { "epoch": 42.25, "learning_rate": 0.0002534035087719298, "loss": 0.3781, "step": 2831 }, { "epoch": 42.27, "learning_rate": 0.0002533684210526316, "loss": 0.5599, "step": 2832 }, { "epoch": 42.28, "learning_rate": 0.00025333333333333333, "loss": 0.0833, "step": 2833 }, { "epoch": 42.3, "learning_rate": 0.0002532982456140351, "loss": 0.0891, "step": 2834 }, { "epoch": 42.31, "learning_rate": 0.0002532631578947368, "loss": 0.1705, "step": 2835 }, { "epoch": 42.33, "learning_rate": 0.0002532280701754386, "loss": 0.5718, "step": 2836 }, { "epoch": 42.34, "learning_rate": 0.0002531929824561403, "loss": 0.2741, "step": 2837 }, { "epoch": 42.36, "learning_rate": 0.0002531578947368421, "loss": 0.1348, "step": 2838 }, { "epoch": 42.37, "learning_rate": 0.0002531228070175439, "loss": 0.0928, "step": 2839 }, { "epoch": 42.39, "learning_rate": 0.00025308771929824557, "loss": 0.1806, "step": 2840 }, { "epoch": 42.4, "learning_rate": 0.0002530526315789473, "loss": 0.1709, "step": 2841 }, { "epoch": 42.42, "learning_rate": 0.00025301754385964907, "loss": 0.2734, "step": 2842 }, { "epoch": 42.43, "learning_rate": 0.00025298245614035087, "loss": 0.3118, "step": 2843 }, { "epoch": 42.45, "learning_rate": 0.0002529473684210526, "loss": 0.0629, "step": 2844 }, { "epoch": 42.46, "learning_rate": 0.00025291228070175437, "loss": 0.6791, "step": 2845 }, { "epoch": 42.48, "learning_rate": 0.0002528771929824561, "loss": 0.2058, "step": 2846 }, { "epoch": 42.49, "learning_rate": 0.00025284210526315787, "loss": 0.0604, "step": 2847 }, { "epoch": 42.51, "learning_rate": 0.0002528070175438596, "loss": 0.1178, "step": 2848 }, { "epoch": 42.52, "learning_rate": 0.00025277192982456136, "loss": 0.1147, "step": 2849 }, { "epoch": 42.54, "learning_rate": 0.00025273684210526317, "loss": 0.126, "step": 2850 }, { "epoch": 42.55, "learning_rate": 0.0002527017543859649, "loss": 0.0091, "step": 2851 }, { "epoch": 42.57, "learning_rate": 0.00025266666666666666, "loss": 0.1277, "step": 2852 }, { "epoch": 42.58, "learning_rate": 0.00025263157894736836, "loss": 0.5943, "step": 2853 }, { "epoch": 42.59, "learning_rate": 0.00025259649122807016, "loss": 0.1464, "step": 2854 }, { "epoch": 42.61, "learning_rate": 0.0002525614035087719, "loss": 0.6232, "step": 2855 }, { "epoch": 42.62, "learning_rate": 0.00025252631578947366, "loss": 0.2973, "step": 2856 }, { "epoch": 42.64, "learning_rate": 0.0002524912280701754, "loss": 0.1586, "step": 2857 }, { "epoch": 42.65, "learning_rate": 0.00025245614035087716, "loss": 0.0124, "step": 2858 }, { "epoch": 42.67, "learning_rate": 0.0002524210526315789, "loss": 0.0061, "step": 2859 }, { "epoch": 42.68, "learning_rate": 0.00025238596491228065, "loss": 0.015, "step": 2860 }, { "epoch": 42.7, "learning_rate": 0.00025235087719298246, "loss": 0.337, "step": 2861 }, { "epoch": 42.71, "learning_rate": 0.0002523157894736842, "loss": 0.1753, "step": 2862 }, { "epoch": 42.73, "learning_rate": 0.00025228070175438596, "loss": 0.0205, "step": 2863 }, { "epoch": 42.74, "learning_rate": 0.0002522456140350877, "loss": 0.1048, "step": 2864 }, { "epoch": 42.76, "learning_rate": 0.00025221052631578945, "loss": 0.6739, "step": 2865 }, { "epoch": 42.77, "learning_rate": 0.0002521754385964912, "loss": 0.0291, "step": 2866 }, { "epoch": 42.79, "learning_rate": 0.00025214035087719295, "loss": 0.0214, "step": 2867 }, { "epoch": 42.8, "learning_rate": 0.0002521052631578947, "loss": 0.2474, "step": 2868 }, { "epoch": 42.82, "learning_rate": 0.0002520701754385965, "loss": 0.3674, "step": 2869 }, { "epoch": 42.83, "learning_rate": 0.0002520350877192982, "loss": 0.2382, "step": 2870 }, { "epoch": 42.85, "learning_rate": 0.00025199999999999995, "loss": 0.2272, "step": 2871 }, { "epoch": 42.86, "learning_rate": 0.00025196491228070175, "loss": 0.0438, "step": 2872 }, { "epoch": 42.88, "learning_rate": 0.0002519298245614035, "loss": 0.2005, "step": 2873 }, { "epoch": 42.89, "learning_rate": 0.00025189473684210525, "loss": 0.1324, "step": 2874 }, { "epoch": 42.91, "learning_rate": 0.000251859649122807, "loss": 0.1544, "step": 2875 }, { "epoch": 42.92, "learning_rate": 0.00025182456140350874, "loss": 0.1731, "step": 2876 }, { "epoch": 42.94, "learning_rate": 0.0002517894736842105, "loss": 0.0689, "step": 2877 }, { "epoch": 42.95, "learning_rate": 0.00025175438596491224, "loss": 0.114, "step": 2878 }, { "epoch": 42.97, "learning_rate": 0.000251719298245614, "loss": 0.0568, "step": 2879 }, { "epoch": 42.98, "learning_rate": 0.0002516842105263158, "loss": 0.3209, "step": 2880 }, { "epoch": 43.0, "learning_rate": 0.00025164912280701754, "loss": 0.2897, "step": 2881 }, { "epoch": 43.01, "learning_rate": 0.0002516140350877193, "loss": 0.2322, "step": 2882 }, { "epoch": 43.03, "learning_rate": 0.00025157894736842104, "loss": 0.1146, "step": 2883 }, { "epoch": 43.04, "learning_rate": 0.0002515438596491228, "loss": 0.0472, "step": 2884 }, { "epoch": 43.06, "learning_rate": 0.00025150877192982454, "loss": 0.0171, "step": 2885 }, { "epoch": 43.07, "learning_rate": 0.0002514736842105263, "loss": 0.2853, "step": 2886 }, { "epoch": 43.09, "learning_rate": 0.0002514385964912281, "loss": 0.1367, "step": 2887 }, { "epoch": 43.1, "learning_rate": 0.0002514035087719298, "loss": 0.5374, "step": 2888 }, { "epoch": 43.12, "learning_rate": 0.00025136842105263153, "loss": 0.0226, "step": 2889 }, { "epoch": 43.13, "learning_rate": 0.0002513333333333333, "loss": 0.3714, "step": 2890 }, { "epoch": 43.15, "learning_rate": 0.0002512982456140351, "loss": 0.0909, "step": 2891 }, { "epoch": 43.16, "learning_rate": 0.00025126315789473683, "loss": 0.5632, "step": 2892 }, { "epoch": 43.18, "learning_rate": 0.0002512280701754386, "loss": 0.1337, "step": 2893 }, { "epoch": 43.19, "learning_rate": 0.00025119298245614033, "loss": 0.0837, "step": 2894 }, { "epoch": 43.21, "learning_rate": 0.0002511578947368421, "loss": 0.0137, "step": 2895 }, { "epoch": 43.22, "learning_rate": 0.00025112280701754383, "loss": 0.0378, "step": 2896 }, { "epoch": 43.24, "learning_rate": 0.0002510877192982456, "loss": 0.0816, "step": 2897 }, { "epoch": 43.25, "learning_rate": 0.0002510526315789474, "loss": 0.039, "step": 2898 }, { "epoch": 43.27, "learning_rate": 0.00025101754385964913, "loss": 0.4815, "step": 2899 }, { "epoch": 43.28, "learning_rate": 0.0002509824561403509, "loss": 0.0348, "step": 2900 }, { "epoch": 43.3, "learning_rate": 0.00025094736842105257, "loss": 0.3811, "step": 2901 }, { "epoch": 43.31, "learning_rate": 0.0002509122807017544, "loss": 0.1087, "step": 2902 }, { "epoch": 43.33, "learning_rate": 0.0002508771929824561, "loss": 0.0497, "step": 2903 }, { "epoch": 43.34, "learning_rate": 0.00025084210526315787, "loss": 0.1322, "step": 2904 }, { "epoch": 43.36, "learning_rate": 0.0002508070175438596, "loss": 0.2186, "step": 2905 }, { "epoch": 43.37, "learning_rate": 0.00025077192982456137, "loss": 0.1789, "step": 2906 }, { "epoch": 43.39, "learning_rate": 0.0002507368421052631, "loss": 0.3261, "step": 2907 }, { "epoch": 43.4, "learning_rate": 0.00025070175438596487, "loss": 0.0251, "step": 2908 }, { "epoch": 43.42, "learning_rate": 0.00025066666666666667, "loss": 0.3344, "step": 2909 }, { "epoch": 43.43, "learning_rate": 0.0002506315789473684, "loss": 0.2145, "step": 2910 }, { "epoch": 43.45, "learning_rate": 0.00025059649122807017, "loss": 0.2443, "step": 2911 }, { "epoch": 43.46, "learning_rate": 0.0002505614035087719, "loss": 0.1077, "step": 2912 }, { "epoch": 43.48, "learning_rate": 0.00025052631578947367, "loss": 0.0922, "step": 2913 }, { "epoch": 43.49, "learning_rate": 0.0002504912280701754, "loss": 0.2037, "step": 2914 }, { "epoch": 43.51, "learning_rate": 0.00025045614035087716, "loss": 0.1559, "step": 2915 }, { "epoch": 43.52, "learning_rate": 0.0002504210526315789, "loss": 0.1043, "step": 2916 }, { "epoch": 43.54, "learning_rate": 0.0002503859649122807, "loss": 0.2706, "step": 2917 }, { "epoch": 43.55, "learning_rate": 0.0002503508771929824, "loss": 0.0522, "step": 2918 }, { "epoch": 43.57, "learning_rate": 0.00025031578947368416, "loss": 0.0158, "step": 2919 }, { "epoch": 43.58, "learning_rate": 0.00025028070175438596, "loss": 0.3399, "step": 2920 }, { "epoch": 43.59, "learning_rate": 0.0002502456140350877, "loss": 0.1641, "step": 2921 }, { "epoch": 43.61, "learning_rate": 0.00025021052631578946, "loss": 0.3141, "step": 2922 }, { "epoch": 43.62, "learning_rate": 0.0002501754385964912, "loss": 0.0162, "step": 2923 }, { "epoch": 43.64, "learning_rate": 0.00025014035087719296, "loss": 0.561, "step": 2924 }, { "epoch": 43.65, "learning_rate": 0.0002501052631578947, "loss": 0.1644, "step": 2925 }, { "epoch": 43.67, "learning_rate": 0.00025007017543859645, "loss": 0.2219, "step": 2926 }, { "epoch": 43.68, "learning_rate": 0.0002500350877192982, "loss": 0.1341, "step": 2927 }, { "epoch": 43.7, "learning_rate": 0.00025, "loss": 0.33, "step": 2928 }, { "epoch": 43.71, "learning_rate": 0.00024996491228070175, "loss": 0.1807, "step": 2929 }, { "epoch": 43.73, "learning_rate": 0.0002499298245614035, "loss": 0.1551, "step": 2930 }, { "epoch": 43.74, "learning_rate": 0.00024989473684210525, "loss": 0.1855, "step": 2931 }, { "epoch": 43.76, "learning_rate": 0.000249859649122807, "loss": 0.0575, "step": 2932 }, { "epoch": 43.77, "learning_rate": 0.00024982456140350875, "loss": 0.2949, "step": 2933 }, { "epoch": 43.79, "learning_rate": 0.0002497894736842105, "loss": 0.1351, "step": 2934 }, { "epoch": 43.8, "learning_rate": 0.00024975438596491225, "loss": 0.1623, "step": 2935 }, { "epoch": 43.82, "learning_rate": 0.000249719298245614, "loss": 0.3443, "step": 2936 }, { "epoch": 43.83, "learning_rate": 0.00024968421052631574, "loss": 0.1855, "step": 2937 }, { "epoch": 43.85, "learning_rate": 0.0002496491228070175, "loss": 0.498, "step": 2938 }, { "epoch": 43.86, "learning_rate": 0.0002496140350877193, "loss": 0.1217, "step": 2939 }, { "epoch": 43.88, "learning_rate": 0.00024957894736842104, "loss": 0.021, "step": 2940 }, { "epoch": 43.89, "learning_rate": 0.0002495438596491228, "loss": 0.0545, "step": 2941 }, { "epoch": 43.91, "learning_rate": 0.00024950877192982454, "loss": 0.0702, "step": 2942 }, { "epoch": 43.92, "learning_rate": 0.0002494736842105263, "loss": 0.2528, "step": 2943 }, { "epoch": 43.94, "learning_rate": 0.00024943859649122804, "loss": 0.0393, "step": 2944 }, { "epoch": 43.95, "learning_rate": 0.0002494035087719298, "loss": 0.2664, "step": 2945 }, { "epoch": 43.97, "learning_rate": 0.0002493684210526316, "loss": 0.3916, "step": 2946 }, { "epoch": 43.98, "learning_rate": 0.00024933333333333334, "loss": 0.08, "step": 2947 }, { "epoch": 44.0, "learning_rate": 0.0002492982456140351, "loss": 0.07, "step": 2948 }, { "epoch": 44.01, "learning_rate": 0.0002492631578947368, "loss": 0.4027, "step": 2949 }, { "epoch": 44.03, "learning_rate": 0.0002492280701754386, "loss": 0.1744, "step": 2950 }, { "epoch": 44.04, "learning_rate": 0.00024919298245614034, "loss": 0.1237, "step": 2951 }, { "epoch": 44.06, "learning_rate": 0.0002491578947368421, "loss": 0.1386, "step": 2952 }, { "epoch": 44.07, "learning_rate": 0.00024912280701754383, "loss": 0.1239, "step": 2953 }, { "epoch": 44.09, "learning_rate": 0.0002490877192982456, "loss": 0.3292, "step": 2954 }, { "epoch": 44.1, "learning_rate": 0.00024905263157894733, "loss": 0.1744, "step": 2955 }, { "epoch": 44.12, "learning_rate": 0.0002490175438596491, "loss": 0.2993, "step": 2956 }, { "epoch": 44.13, "learning_rate": 0.00024898245614035083, "loss": 0.0331, "step": 2957 }, { "epoch": 44.15, "learning_rate": 0.00024894736842105263, "loss": 0.1431, "step": 2958 }, { "epoch": 44.16, "learning_rate": 0.0002489122807017544, "loss": 0.0802, "step": 2959 }, { "epoch": 44.18, "learning_rate": 0.00024887719298245613, "loss": 0.131, "step": 2960 }, { "epoch": 44.19, "learning_rate": 0.0002488421052631579, "loss": 0.098, "step": 2961 }, { "epoch": 44.21, "learning_rate": 0.0002488070175438596, "loss": 0.0511, "step": 2962 }, { "epoch": 44.22, "learning_rate": 0.0002487719298245614, "loss": 0.2082, "step": 2963 }, { "epoch": 44.24, "learning_rate": 0.0002487368421052631, "loss": 0.0368, "step": 2964 }, { "epoch": 44.25, "learning_rate": 0.0002487017543859649, "loss": 0.0232, "step": 2965 }, { "epoch": 44.27, "learning_rate": 0.0002486666666666666, "loss": 0.1291, "step": 2966 }, { "epoch": 44.28, "learning_rate": 0.00024863157894736837, "loss": 0.015, "step": 2967 }, { "epoch": 44.3, "learning_rate": 0.0002485964912280701, "loss": 0.3891, "step": 2968 }, { "epoch": 44.31, "learning_rate": 0.0002485614035087719, "loss": 0.0984, "step": 2969 }, { "epoch": 44.33, "learning_rate": 0.00024852631578947367, "loss": 0.3002, "step": 2970 }, { "epoch": 44.34, "learning_rate": 0.0002484912280701754, "loss": 0.0087, "step": 2971 }, { "epoch": 44.36, "learning_rate": 0.00024845614035087717, "loss": 0.2016, "step": 2972 }, { "epoch": 44.37, "learning_rate": 0.0002484210526315789, "loss": 0.0873, "step": 2973 }, { "epoch": 44.39, "learning_rate": 0.00024838596491228067, "loss": 0.0506, "step": 2974 }, { "epoch": 44.4, "learning_rate": 0.0002483508771929824, "loss": 0.2401, "step": 2975 }, { "epoch": 44.42, "learning_rate": 0.0002483157894736842, "loss": 0.0794, "step": 2976 }, { "epoch": 44.43, "learning_rate": 0.00024828070175438597, "loss": 0.0242, "step": 2977 }, { "epoch": 44.45, "learning_rate": 0.0002482456140350877, "loss": 0.0321, "step": 2978 }, { "epoch": 44.46, "learning_rate": 0.00024821052631578946, "loss": 0.1454, "step": 2979 }, { "epoch": 44.48, "learning_rate": 0.0002481754385964912, "loss": 0.0272, "step": 2980 }, { "epoch": 44.49, "learning_rate": 0.00024814035087719296, "loss": 0.0047, "step": 2981 }, { "epoch": 44.51, "learning_rate": 0.0002481052631578947, "loss": 0.0876, "step": 2982 }, { "epoch": 44.52, "learning_rate": 0.00024807017543859646, "loss": 0.053, "step": 2983 }, { "epoch": 44.54, "learning_rate": 0.0002480350877192982, "loss": 0.0717, "step": 2984 }, { "epoch": 44.55, "learning_rate": 0.00024799999999999996, "loss": 0.5333, "step": 2985 }, { "epoch": 44.57, "learning_rate": 0.0002479649122807017, "loss": 0.0326, "step": 2986 }, { "epoch": 44.58, "learning_rate": 0.0002479298245614035, "loss": 0.1626, "step": 2987 }, { "epoch": 44.59, "learning_rate": 0.00024789473684210526, "loss": 0.1837, "step": 2988 }, { "epoch": 44.61, "learning_rate": 0.000247859649122807, "loss": 0.3757, "step": 2989 }, { "epoch": 44.62, "learning_rate": 0.00024782456140350875, "loss": 0.065, "step": 2990 }, { "epoch": 44.64, "learning_rate": 0.0002477894736842105, "loss": 0.21, "step": 2991 }, { "epoch": 44.65, "learning_rate": 0.00024775438596491225, "loss": 0.0765, "step": 2992 }, { "epoch": 44.67, "learning_rate": 0.000247719298245614, "loss": 0.1718, "step": 2993 }, { "epoch": 44.68, "learning_rate": 0.00024768421052631575, "loss": 0.0413, "step": 2994 }, { "epoch": 44.7, "learning_rate": 0.00024764912280701755, "loss": 0.0597, "step": 2995 }, { "epoch": 44.71, "learning_rate": 0.0002476140350877193, "loss": 0.2591, "step": 2996 }, { "epoch": 44.73, "learning_rate": 0.000247578947368421, "loss": 0.0086, "step": 2997 }, { "epoch": 44.74, "learning_rate": 0.0002475438596491228, "loss": 0.2637, "step": 2998 }, { "epoch": 44.76, "learning_rate": 0.00024750877192982455, "loss": 0.7918, "step": 2999 }, { "epoch": 44.77, "learning_rate": 0.0002474736842105263, "loss": 0.3942, "step": 3000 }, { "epoch": 44.77, "eval_accuracy": 0.8042094958394518, "eval_f1": 0.8129959248685728, "eval_loss": 0.8620648384094238, "eval_runtime": 344.2906, "eval_samples_per_second": 11.868, "eval_steps_per_second": 0.744, "step": 3000 }, { "epoch": 44.79, "learning_rate": 0.00024743859649122805, "loss": 0.4602, "step": 3001 }, { "epoch": 44.8, "learning_rate": 0.0002474035087719298, "loss": 0.1656, "step": 3002 }, { "epoch": 44.82, "learning_rate": 0.00024736842105263154, "loss": 0.3406, "step": 3003 }, { "epoch": 44.83, "learning_rate": 0.0002473333333333333, "loss": 0.2357, "step": 3004 }, { "epoch": 44.85, "learning_rate": 0.00024729824561403504, "loss": 0.5817, "step": 3005 }, { "epoch": 44.86, "learning_rate": 0.00024726315789473684, "loss": 0.0355, "step": 3006 }, { "epoch": 44.88, "learning_rate": 0.0002472280701754386, "loss": 0.332, "step": 3007 }, { "epoch": 44.89, "learning_rate": 0.00024719298245614034, "loss": 0.0443, "step": 3008 }, { "epoch": 44.91, "learning_rate": 0.0002471578947368421, "loss": 0.3544, "step": 3009 }, { "epoch": 44.92, "learning_rate": 0.00024712280701754384, "loss": 0.2409, "step": 3010 }, { "epoch": 44.94, "learning_rate": 0.0002470877192982456, "loss": 0.025, "step": 3011 }, { "epoch": 44.95, "learning_rate": 0.00024705263157894734, "loss": 0.1748, "step": 3012 }, { "epoch": 44.97, "learning_rate": 0.00024701754385964914, "loss": 0.0428, "step": 3013 }, { "epoch": 44.98, "learning_rate": 0.00024698245614035083, "loss": 0.1191, "step": 3014 }, { "epoch": 45.0, "learning_rate": 0.0002469473684210526, "loss": 0.2148, "step": 3015 }, { "epoch": 45.01, "learning_rate": 0.00024691228070175433, "loss": 0.3834, "step": 3016 }, { "epoch": 45.03, "learning_rate": 0.00024687719298245613, "loss": 0.2043, "step": 3017 }, { "epoch": 45.04, "learning_rate": 0.0002468421052631579, "loss": 0.0231, "step": 3018 }, { "epoch": 45.06, "learning_rate": 0.00024680701754385963, "loss": 0.17, "step": 3019 }, { "epoch": 45.07, "learning_rate": 0.0002467719298245614, "loss": 0.2151, "step": 3020 }, { "epoch": 45.09, "learning_rate": 0.00024673684210526313, "loss": 0.0963, "step": 3021 }, { "epoch": 45.1, "learning_rate": 0.0002467017543859649, "loss": 0.0191, "step": 3022 }, { "epoch": 45.12, "learning_rate": 0.0002466666666666666, "loss": 0.1489, "step": 3023 }, { "epoch": 45.13, "learning_rate": 0.00024663157894736843, "loss": 0.3331, "step": 3024 }, { "epoch": 45.15, "learning_rate": 0.0002465964912280702, "loss": 0.3295, "step": 3025 }, { "epoch": 45.16, "learning_rate": 0.00024656140350877193, "loss": 0.2693, "step": 3026 }, { "epoch": 45.18, "learning_rate": 0.0002465263157894737, "loss": 0.4132, "step": 3027 }, { "epoch": 45.19, "learning_rate": 0.0002464912280701754, "loss": 0.0197, "step": 3028 }, { "epoch": 45.21, "learning_rate": 0.0002464561403508772, "loss": 0.0577, "step": 3029 }, { "epoch": 45.22, "learning_rate": 0.0002464210526315789, "loss": 0.1374, "step": 3030 }, { "epoch": 45.24, "learning_rate": 0.00024638596491228067, "loss": 0.1153, "step": 3031 }, { "epoch": 45.25, "learning_rate": 0.0002463508771929824, "loss": 0.1013, "step": 3032 }, { "epoch": 45.27, "learning_rate": 0.00024631578947368417, "loss": 0.2066, "step": 3033 }, { "epoch": 45.28, "learning_rate": 0.0002462807017543859, "loss": 0.0493, "step": 3034 }, { "epoch": 45.3, "learning_rate": 0.0002462456140350877, "loss": 0.1365, "step": 3035 }, { "epoch": 45.31, "learning_rate": 0.00024621052631578947, "loss": 0.088, "step": 3036 }, { "epoch": 45.33, "learning_rate": 0.0002461754385964912, "loss": 0.0117, "step": 3037 }, { "epoch": 45.34, "learning_rate": 0.00024614035087719297, "loss": 0.0095, "step": 3038 }, { "epoch": 45.36, "learning_rate": 0.0002461052631578947, "loss": 0.1326, "step": 3039 }, { "epoch": 45.37, "learning_rate": 0.00024607017543859646, "loss": 0.1197, "step": 3040 }, { "epoch": 45.39, "learning_rate": 0.0002460350877192982, "loss": 0.029, "step": 3041 }, { "epoch": 45.4, "learning_rate": 0.00024599999999999996, "loss": 0.1934, "step": 3042 }, { "epoch": 45.42, "learning_rate": 0.00024596491228070177, "loss": 0.1238, "step": 3043 }, { "epoch": 45.43, "learning_rate": 0.0002459298245614035, "loss": 0.0603, "step": 3044 }, { "epoch": 45.45, "learning_rate": 0.0002458947368421052, "loss": 0.0289, "step": 3045 }, { "epoch": 45.46, "learning_rate": 0.000245859649122807, "loss": 0.3161, "step": 3046 }, { "epoch": 45.48, "learning_rate": 0.00024582456140350876, "loss": 0.1879, "step": 3047 }, { "epoch": 45.49, "learning_rate": 0.0002457894736842105, "loss": 0.2013, "step": 3048 }, { "epoch": 45.51, "learning_rate": 0.00024575438596491226, "loss": 0.2212, "step": 3049 }, { "epoch": 45.52, "learning_rate": 0.000245719298245614, "loss": 0.4477, "step": 3050 }, { "epoch": 45.54, "learning_rate": 0.00024568421052631576, "loss": 0.2636, "step": 3051 }, { "epoch": 45.55, "learning_rate": 0.0002456491228070175, "loss": 0.3108, "step": 3052 }, { "epoch": 45.57, "learning_rate": 0.00024561403508771925, "loss": 0.2828, "step": 3053 }, { "epoch": 45.58, "learning_rate": 0.00024557894736842106, "loss": 0.0839, "step": 3054 }, { "epoch": 45.59, "learning_rate": 0.0002455438596491228, "loss": 0.1829, "step": 3055 }, { "epoch": 45.61, "learning_rate": 0.00024550877192982455, "loss": 0.0478, "step": 3056 }, { "epoch": 45.62, "learning_rate": 0.0002454736842105263, "loss": 0.2361, "step": 3057 }, { "epoch": 45.64, "learning_rate": 0.00024543859649122805, "loss": 0.1828, "step": 3058 }, { "epoch": 45.65, "learning_rate": 0.0002454035087719298, "loss": 0.245, "step": 3059 }, { "epoch": 45.67, "learning_rate": 0.00024536842105263155, "loss": 0.139, "step": 3060 }, { "epoch": 45.68, "learning_rate": 0.00024533333333333335, "loss": 0.1808, "step": 3061 }, { "epoch": 45.7, "learning_rate": 0.00024529824561403505, "loss": 0.0683, "step": 3062 }, { "epoch": 45.71, "learning_rate": 0.0002452631578947368, "loss": 0.0367, "step": 3063 }, { "epoch": 45.73, "learning_rate": 0.00024522807017543854, "loss": 0.1152, "step": 3064 }, { "epoch": 45.74, "learning_rate": 0.00024519298245614035, "loss": 0.1368, "step": 3065 }, { "epoch": 45.76, "learning_rate": 0.0002451578947368421, "loss": 0.0907, "step": 3066 }, { "epoch": 45.77, "learning_rate": 0.00024512280701754384, "loss": 0.0304, "step": 3067 }, { "epoch": 45.79, "learning_rate": 0.0002450877192982456, "loss": 0.156, "step": 3068 }, { "epoch": 45.8, "learning_rate": 0.00024505263157894734, "loss": 0.3528, "step": 3069 }, { "epoch": 45.82, "learning_rate": 0.0002450175438596491, "loss": 0.0618, "step": 3070 }, { "epoch": 45.83, "learning_rate": 0.00024498245614035084, "loss": 0.2586, "step": 3071 }, { "epoch": 45.85, "learning_rate": 0.00024494736842105264, "loss": 0.2432, "step": 3072 }, { "epoch": 45.86, "learning_rate": 0.0002449122807017544, "loss": 0.1289, "step": 3073 }, { "epoch": 45.88, "learning_rate": 0.00024487719298245614, "loss": 0.0413, "step": 3074 }, { "epoch": 45.89, "learning_rate": 0.0002448421052631579, "loss": 0.228, "step": 3075 }, { "epoch": 45.91, "learning_rate": 0.00024480701754385964, "loss": 0.2158, "step": 3076 }, { "epoch": 45.92, "learning_rate": 0.0002447719298245614, "loss": 0.0989, "step": 3077 }, { "epoch": 45.94, "learning_rate": 0.00024473684210526314, "loss": 0.3633, "step": 3078 }, { "epoch": 45.95, "learning_rate": 0.0002447017543859649, "loss": 0.2802, "step": 3079 }, { "epoch": 45.97, "learning_rate": 0.00024466666666666663, "loss": 0.1715, "step": 3080 }, { "epoch": 45.98, "learning_rate": 0.0002446315789473684, "loss": 0.0474, "step": 3081 }, { "epoch": 46.0, "learning_rate": 0.00024459649122807013, "loss": 0.3234, "step": 3082 }, { "epoch": 46.01, "learning_rate": 0.00024456140350877193, "loss": 0.3223, "step": 3083 }, { "epoch": 46.03, "learning_rate": 0.0002445263157894737, "loss": 0.0872, "step": 3084 }, { "epoch": 46.04, "learning_rate": 0.00024449122807017543, "loss": 0.0712, "step": 3085 }, { "epoch": 46.06, "learning_rate": 0.0002444561403508772, "loss": 0.1009, "step": 3086 }, { "epoch": 46.07, "learning_rate": 0.00024442105263157893, "loss": 0.3466, "step": 3087 }, { "epoch": 46.09, "learning_rate": 0.0002443859649122807, "loss": 0.3214, "step": 3088 }, { "epoch": 46.1, "learning_rate": 0.0002443508771929824, "loss": 0.507, "step": 3089 }, { "epoch": 46.12, "learning_rate": 0.0002443157894736842, "loss": 0.0643, "step": 3090 }, { "epoch": 46.13, "learning_rate": 0.000244280701754386, "loss": 0.0152, "step": 3091 }, { "epoch": 46.15, "learning_rate": 0.0002442456140350877, "loss": 0.0998, "step": 3092 }, { "epoch": 46.16, "learning_rate": 0.0002442105263157894, "loss": 0.0875, "step": 3093 }, { "epoch": 46.18, "learning_rate": 0.00024417543859649117, "loss": 0.1965, "step": 3094 }, { "epoch": 46.19, "learning_rate": 0.00024414035087719297, "loss": 0.124, "step": 3095 }, { "epoch": 46.21, "learning_rate": 0.00024410526315789472, "loss": 0.0267, "step": 3096 }, { "epoch": 46.22, "learning_rate": 0.00024407017543859647, "loss": 0.0135, "step": 3097 }, { "epoch": 46.24, "learning_rate": 0.00024403508771929825, "loss": 0.1685, "step": 3098 }, { "epoch": 46.25, "learning_rate": 0.000244, "loss": 0.2474, "step": 3099 }, { "epoch": 46.27, "learning_rate": 0.00024396491228070172, "loss": 0.1671, "step": 3100 }, { "epoch": 46.28, "learning_rate": 0.00024392982456140347, "loss": 0.1471, "step": 3101 }, { "epoch": 46.3, "learning_rate": 0.00024389473684210524, "loss": 0.2128, "step": 3102 }, { "epoch": 46.31, "learning_rate": 0.000243859649122807, "loss": 0.0124, "step": 3103 }, { "epoch": 46.33, "learning_rate": 0.00024382456140350874, "loss": 0.3404, "step": 3104 }, { "epoch": 46.34, "learning_rate": 0.0002437894736842105, "loss": 0.1251, "step": 3105 }, { "epoch": 46.36, "learning_rate": 0.00024375438596491226, "loss": 0.1597, "step": 3106 }, { "epoch": 46.37, "learning_rate": 0.000243719298245614, "loss": 0.0564, "step": 3107 }, { "epoch": 46.39, "learning_rate": 0.00024368421052631576, "loss": 0.0055, "step": 3108 }, { "epoch": 46.4, "learning_rate": 0.00024364912280701754, "loss": 0.206, "step": 3109 }, { "epoch": 46.42, "learning_rate": 0.00024361403508771929, "loss": 0.1958, "step": 3110 }, { "epoch": 46.43, "learning_rate": 0.00024357894736842103, "loss": 0.2692, "step": 3111 }, { "epoch": 46.45, "learning_rate": 0.00024354385964912278, "loss": 0.045, "step": 3112 }, { "epoch": 46.46, "learning_rate": 0.00024350877192982456, "loss": 0.1451, "step": 3113 }, { "epoch": 46.48, "learning_rate": 0.0002434736842105263, "loss": 0.066, "step": 3114 }, { "epoch": 46.49, "learning_rate": 0.00024343859649122806, "loss": 0.0914, "step": 3115 }, { "epoch": 46.51, "learning_rate": 0.00024340350877192978, "loss": 0.4548, "step": 3116 }, { "epoch": 46.52, "learning_rate": 0.00024336842105263158, "loss": 0.2507, "step": 3117 }, { "epoch": 46.54, "learning_rate": 0.0002433333333333333, "loss": 0.0096, "step": 3118 }, { "epoch": 46.55, "learning_rate": 0.00024329824561403505, "loss": 0.1576, "step": 3119 }, { "epoch": 46.57, "learning_rate": 0.0002432631578947368, "loss": 0.2727, "step": 3120 }, { "epoch": 46.58, "learning_rate": 0.00024322807017543858, "loss": 0.0633, "step": 3121 }, { "epoch": 46.59, "learning_rate": 0.00024319298245614033, "loss": 0.0677, "step": 3122 }, { "epoch": 46.61, "learning_rate": 0.00024315789473684207, "loss": 0.2125, "step": 3123 }, { "epoch": 46.62, "learning_rate": 0.00024312280701754385, "loss": 0.06, "step": 3124 }, { "epoch": 46.64, "learning_rate": 0.0002430877192982456, "loss": 0.0515, "step": 3125 }, { "epoch": 46.65, "learning_rate": 0.00024305263157894735, "loss": 0.0243, "step": 3126 }, { "epoch": 46.67, "learning_rate": 0.0002430175438596491, "loss": 0.1532, "step": 3127 }, { "epoch": 46.68, "learning_rate": 0.00024298245614035087, "loss": 0.2456, "step": 3128 }, { "epoch": 46.7, "learning_rate": 0.00024294736842105262, "loss": 0.034, "step": 3129 }, { "epoch": 46.71, "learning_rate": 0.00024291228070175437, "loss": 0.1093, "step": 3130 }, { "epoch": 46.73, "learning_rate": 0.0002428771929824561, "loss": 0.0393, "step": 3131 }, { "epoch": 46.74, "learning_rate": 0.0002428421052631579, "loss": 0.1327, "step": 3132 }, { "epoch": 46.76, "learning_rate": 0.00024280701754385962, "loss": 0.1199, "step": 3133 }, { "epoch": 46.77, "learning_rate": 0.00024277192982456136, "loss": 0.358, "step": 3134 }, { "epoch": 46.79, "learning_rate": 0.00024273684210526314, "loss": 0.08, "step": 3135 }, { "epoch": 46.8, "learning_rate": 0.0002427017543859649, "loss": 0.3817, "step": 3136 }, { "epoch": 46.82, "learning_rate": 0.00024266666666666664, "loss": 0.0779, "step": 3137 }, { "epoch": 46.83, "learning_rate": 0.0002426315789473684, "loss": 0.1329, "step": 3138 }, { "epoch": 46.85, "learning_rate": 0.00024259649122807016, "loss": 0.1074, "step": 3139 }, { "epoch": 46.86, "learning_rate": 0.0002425614035087719, "loss": 0.1433, "step": 3140 }, { "epoch": 46.88, "learning_rate": 0.00024252631578947366, "loss": 0.1577, "step": 3141 }, { "epoch": 46.89, "learning_rate": 0.0002424912280701754, "loss": 0.3007, "step": 3142 }, { "epoch": 46.91, "learning_rate": 0.00024245614035087719, "loss": 0.2814, "step": 3143 }, { "epoch": 46.92, "learning_rate": 0.00024242105263157893, "loss": 0.3594, "step": 3144 }, { "epoch": 46.94, "learning_rate": 0.00024238596491228068, "loss": 0.1507, "step": 3145 }, { "epoch": 46.95, "learning_rate": 0.00024235087719298246, "loss": 0.1063, "step": 3146 }, { "epoch": 46.97, "learning_rate": 0.0002423157894736842, "loss": 0.2693, "step": 3147 }, { "epoch": 46.98, "learning_rate": 0.00024228070175438593, "loss": 0.112, "step": 3148 }, { "epoch": 47.0, "learning_rate": 0.00024224561403508768, "loss": 0.3829, "step": 3149 }, { "epoch": 47.01, "learning_rate": 0.00024221052631578945, "loss": 0.1906, "step": 3150 }, { "epoch": 47.03, "learning_rate": 0.0002421754385964912, "loss": 0.1587, "step": 3151 }, { "epoch": 47.04, "learning_rate": 0.00024214035087719295, "loss": 0.1186, "step": 3152 }, { "epoch": 47.06, "learning_rate": 0.0002421052631578947, "loss": 0.1939, "step": 3153 }, { "epoch": 47.07, "learning_rate": 0.00024207017543859648, "loss": 0.0562, "step": 3154 }, { "epoch": 47.09, "learning_rate": 0.00024203508771929822, "loss": 0.2518, "step": 3155 }, { "epoch": 47.1, "learning_rate": 0.00024199999999999997, "loss": 0.255, "step": 3156 }, { "epoch": 47.12, "learning_rate": 0.00024196491228070172, "loss": 0.0486, "step": 3157 }, { "epoch": 47.13, "learning_rate": 0.0002419298245614035, "loss": 0.2381, "step": 3158 }, { "epoch": 47.15, "learning_rate": 0.00024189473684210525, "loss": 0.1361, "step": 3159 }, { "epoch": 47.16, "learning_rate": 0.000241859649122807, "loss": 0.157, "step": 3160 }, { "epoch": 47.18, "learning_rate": 0.00024182456140350877, "loss": 0.5923, "step": 3161 }, { "epoch": 47.19, "learning_rate": 0.00024178947368421052, "loss": 0.1404, "step": 3162 }, { "epoch": 47.21, "learning_rate": 0.00024175438596491227, "loss": 0.013, "step": 3163 }, { "epoch": 47.22, "learning_rate": 0.000241719298245614, "loss": 0.0112, "step": 3164 }, { "epoch": 47.24, "learning_rate": 0.0002416842105263158, "loss": 0.2291, "step": 3165 }, { "epoch": 47.25, "learning_rate": 0.00024164912280701752, "loss": 0.1353, "step": 3166 }, { "epoch": 47.27, "learning_rate": 0.00024161403508771926, "loss": 0.0393, "step": 3167 }, { "epoch": 47.28, "learning_rate": 0.000241578947368421, "loss": 0.0808, "step": 3168 }, { "epoch": 47.3, "learning_rate": 0.0002415438596491228, "loss": 0.0112, "step": 3169 }, { "epoch": 47.31, "learning_rate": 0.00024150877192982454, "loss": 0.0361, "step": 3170 }, { "epoch": 47.33, "learning_rate": 0.00024147368421052629, "loss": 0.1137, "step": 3171 }, { "epoch": 47.34, "learning_rate": 0.00024143859649122806, "loss": 0.0466, "step": 3172 }, { "epoch": 47.36, "learning_rate": 0.0002414035087719298, "loss": 0.0181, "step": 3173 }, { "epoch": 47.37, "learning_rate": 0.00024136842105263156, "loss": 0.0182, "step": 3174 }, { "epoch": 47.39, "learning_rate": 0.0002413333333333333, "loss": 0.0145, "step": 3175 }, { "epoch": 47.4, "learning_rate": 0.00024129824561403508, "loss": 0.2759, "step": 3176 }, { "epoch": 47.42, "learning_rate": 0.00024126315789473683, "loss": 0.1418, "step": 3177 }, { "epoch": 47.43, "learning_rate": 0.00024122807017543858, "loss": 0.1672, "step": 3178 }, { "epoch": 47.45, "learning_rate": 0.0002411929824561403, "loss": 0.1402, "step": 3179 }, { "epoch": 47.46, "learning_rate": 0.0002411578947368421, "loss": 0.1563, "step": 3180 }, { "epoch": 47.48, "learning_rate": 0.00024112280701754383, "loss": 0.0914, "step": 3181 }, { "epoch": 47.49, "learning_rate": 0.00024108771929824558, "loss": 0.0067, "step": 3182 }, { "epoch": 47.51, "learning_rate": 0.00024105263157894735, "loss": 0.1717, "step": 3183 }, { "epoch": 47.52, "learning_rate": 0.0002410175438596491, "loss": 0.2388, "step": 3184 }, { "epoch": 47.54, "learning_rate": 0.00024098245614035085, "loss": 0.561, "step": 3185 }, { "epoch": 47.55, "learning_rate": 0.0002409473684210526, "loss": 0.2598, "step": 3186 }, { "epoch": 47.57, "learning_rate": 0.00024091228070175438, "loss": 0.0856, "step": 3187 }, { "epoch": 47.58, "learning_rate": 0.00024087719298245612, "loss": 0.2389, "step": 3188 }, { "epoch": 47.59, "learning_rate": 0.00024084210526315787, "loss": 0.0659, "step": 3189 }, { "epoch": 47.61, "learning_rate": 0.00024080701754385962, "loss": 0.1238, "step": 3190 }, { "epoch": 47.62, "learning_rate": 0.0002407719298245614, "loss": 0.0759, "step": 3191 }, { "epoch": 47.64, "learning_rate": 0.00024073684210526315, "loss": 0.1634, "step": 3192 }, { "epoch": 47.65, "learning_rate": 0.0002407017543859649, "loss": 0.184, "step": 3193 }, { "epoch": 47.67, "learning_rate": 0.00024066666666666662, "loss": 0.2784, "step": 3194 }, { "epoch": 47.68, "learning_rate": 0.00024063157894736842, "loss": 0.1188, "step": 3195 }, { "epoch": 47.7, "learning_rate": 0.00024059649122807014, "loss": 0.0494, "step": 3196 }, { "epoch": 47.71, "learning_rate": 0.0002405614035087719, "loss": 0.0575, "step": 3197 }, { "epoch": 47.73, "learning_rate": 0.00024052631578947367, "loss": 0.2455, "step": 3198 }, { "epoch": 47.74, "learning_rate": 0.00024049122807017541, "loss": 0.1662, "step": 3199 }, { "epoch": 47.76, "learning_rate": 0.00024045614035087716, "loss": 0.2306, "step": 3200 }, { "epoch": 47.76, "eval_accuracy": 0.8252569750367107, "eval_f1": 0.8263631777806041, "eval_loss": 0.6788427233695984, "eval_runtime": 344.1209, "eval_samples_per_second": 11.874, "eval_steps_per_second": 0.744, "step": 3200 }, { "epoch": 47.77, "learning_rate": 0.0002404210526315789, "loss": 0.0692, "step": 3201 }, { "epoch": 47.79, "learning_rate": 0.0002403859649122807, "loss": 0.0733, "step": 3202 }, { "epoch": 47.8, "learning_rate": 0.00024035087719298244, "loss": 0.2759, "step": 3203 }, { "epoch": 47.82, "learning_rate": 0.00024031578947368419, "loss": 0.0444, "step": 3204 }, { "epoch": 47.83, "learning_rate": 0.00024028070175438593, "loss": 0.5526, "step": 3205 }, { "epoch": 47.85, "learning_rate": 0.0002402456140350877, "loss": 0.1555, "step": 3206 }, { "epoch": 47.86, "learning_rate": 0.00024021052631578946, "loss": 0.1475, "step": 3207 }, { "epoch": 47.88, "learning_rate": 0.0002401754385964912, "loss": 0.1136, "step": 3208 }, { "epoch": 47.89, "learning_rate": 0.00024014035087719298, "loss": 0.0093, "step": 3209 }, { "epoch": 47.91, "learning_rate": 0.00024010526315789473, "loss": 0.0347, "step": 3210 }, { "epoch": 47.92, "learning_rate": 0.00024007017543859648, "loss": 0.1611, "step": 3211 }, { "epoch": 47.94, "learning_rate": 0.0002400350877192982, "loss": 0.103, "step": 3212 }, { "epoch": 47.95, "learning_rate": 0.00023999999999999998, "loss": 0.1416, "step": 3213 }, { "epoch": 47.97, "learning_rate": 0.00023996491228070173, "loss": 0.3555, "step": 3214 }, { "epoch": 47.98, "learning_rate": 0.00023992982456140348, "loss": 0.3259, "step": 3215 }, { "epoch": 48.0, "learning_rate": 0.00023989473684210523, "loss": 0.0298, "step": 3216 }, { "epoch": 48.01, "learning_rate": 0.000239859649122807, "loss": 0.3933, "step": 3217 }, { "epoch": 48.03, "learning_rate": 0.00023982456140350875, "loss": 0.0306, "step": 3218 }, { "epoch": 48.04, "learning_rate": 0.0002397894736842105, "loss": 0.242, "step": 3219 }, { "epoch": 48.06, "learning_rate": 0.00023975438596491225, "loss": 0.0583, "step": 3220 }, { "epoch": 48.07, "learning_rate": 0.00023971929824561402, "loss": 0.0489, "step": 3221 }, { "epoch": 48.09, "learning_rate": 0.00023968421052631577, "loss": 0.2804, "step": 3222 }, { "epoch": 48.1, "learning_rate": 0.00023964912280701752, "loss": 0.2151, "step": 3223 }, { "epoch": 48.12, "learning_rate": 0.0002396140350877193, "loss": 0.1241, "step": 3224 }, { "epoch": 48.13, "learning_rate": 0.00023957894736842105, "loss": 0.0905, "step": 3225 }, { "epoch": 48.15, "learning_rate": 0.0002395438596491228, "loss": 0.0301, "step": 3226 }, { "epoch": 48.16, "learning_rate": 0.00023950877192982452, "loss": 0.045, "step": 3227 }, { "epoch": 48.18, "learning_rate": 0.00023947368421052632, "loss": 0.1225, "step": 3228 }, { "epoch": 48.19, "learning_rate": 0.00023943859649122804, "loss": 0.0339, "step": 3229 }, { "epoch": 48.21, "learning_rate": 0.0002394035087719298, "loss": 0.0699, "step": 3230 }, { "epoch": 48.22, "learning_rate": 0.00023936842105263154, "loss": 0.0094, "step": 3231 }, { "epoch": 48.24, "learning_rate": 0.00023933333333333331, "loss": 0.3309, "step": 3232 }, { "epoch": 48.25, "learning_rate": 0.00023929824561403506, "loss": 0.0171, "step": 3233 }, { "epoch": 48.27, "learning_rate": 0.0002392631578947368, "loss": 0.1391, "step": 3234 }, { "epoch": 48.28, "learning_rate": 0.0002392280701754386, "loss": 0.3318, "step": 3235 }, { "epoch": 48.3, "learning_rate": 0.00023919298245614034, "loss": 0.0258, "step": 3236 }, { "epoch": 48.31, "learning_rate": 0.00023915789473684209, "loss": 0.0946, "step": 3237 }, { "epoch": 48.33, "learning_rate": 0.00023912280701754383, "loss": 0.2643, "step": 3238 }, { "epoch": 48.34, "learning_rate": 0.0002390877192982456, "loss": 0.3604, "step": 3239 }, { "epoch": 48.36, "learning_rate": 0.00023905263157894736, "loss": 0.1013, "step": 3240 }, { "epoch": 48.37, "learning_rate": 0.0002390175438596491, "loss": 0.3569, "step": 3241 }, { "epoch": 48.39, "learning_rate": 0.00023898245614035083, "loss": 0.0471, "step": 3242 }, { "epoch": 48.4, "learning_rate": 0.00023894736842105263, "loss": 0.1155, "step": 3243 }, { "epoch": 48.42, "learning_rate": 0.00023891228070175435, "loss": 0.0065, "step": 3244 }, { "epoch": 48.43, "learning_rate": 0.0002388771929824561, "loss": 0.0241, "step": 3245 }, { "epoch": 48.45, "learning_rate": 0.00023884210526315788, "loss": 0.0109, "step": 3246 }, { "epoch": 48.46, "learning_rate": 0.00023880701754385963, "loss": 0.0422, "step": 3247 }, { "epoch": 48.48, "learning_rate": 0.00023877192982456138, "loss": 0.1441, "step": 3248 }, { "epoch": 48.49, "learning_rate": 0.00023873684210526312, "loss": 0.0596, "step": 3249 }, { "epoch": 48.51, "learning_rate": 0.0002387017543859649, "loss": 0.0554, "step": 3250 }, { "epoch": 48.52, "learning_rate": 0.00023866666666666665, "loss": 0.1545, "step": 3251 }, { "epoch": 48.54, "learning_rate": 0.0002386315789473684, "loss": 0.235, "step": 3252 }, { "epoch": 48.55, "learning_rate": 0.00023859649122807015, "loss": 0.2796, "step": 3253 }, { "epoch": 48.57, "learning_rate": 0.00023856140350877192, "loss": 0.1655, "step": 3254 }, { "epoch": 48.58, "learning_rate": 0.00023852631578947367, "loss": 0.0283, "step": 3255 }, { "epoch": 48.59, "learning_rate": 0.00023849122807017542, "loss": 0.15, "step": 3256 }, { "epoch": 48.61, "learning_rate": 0.00023845614035087717, "loss": 0.1791, "step": 3257 }, { "epoch": 48.62, "learning_rate": 0.00023842105263157895, "loss": 0.167, "step": 3258 }, { "epoch": 48.64, "learning_rate": 0.0002383859649122807, "loss": 0.1388, "step": 3259 }, { "epoch": 48.65, "learning_rate": 0.00023835087719298242, "loss": 0.1566, "step": 3260 }, { "epoch": 48.67, "learning_rate": 0.0002383157894736842, "loss": 0.0098, "step": 3261 }, { "epoch": 48.68, "learning_rate": 0.00023828070175438594, "loss": 0.06, "step": 3262 }, { "epoch": 48.7, "learning_rate": 0.0002382456140350877, "loss": 0.2902, "step": 3263 }, { "epoch": 48.71, "learning_rate": 0.00023821052631578944, "loss": 0.0044, "step": 3264 }, { "epoch": 48.73, "learning_rate": 0.00023817543859649121, "loss": 0.3811, "step": 3265 }, { "epoch": 48.74, "learning_rate": 0.00023814035087719296, "loss": 0.5167, "step": 3266 }, { "epoch": 48.76, "learning_rate": 0.0002381052631578947, "loss": 0.1743, "step": 3267 }, { "epoch": 48.77, "learning_rate": 0.00023807017543859646, "loss": 0.0538, "step": 3268 }, { "epoch": 48.79, "learning_rate": 0.00023803508771929824, "loss": 0.4199, "step": 3269 }, { "epoch": 48.8, "learning_rate": 0.00023799999999999998, "loss": 0.2232, "step": 3270 }, { "epoch": 48.82, "learning_rate": 0.00023796491228070173, "loss": 0.1179, "step": 3271 }, { "epoch": 48.83, "learning_rate": 0.0002379298245614035, "loss": 0.2499, "step": 3272 }, { "epoch": 48.85, "learning_rate": 0.00023789473684210526, "loss": 0.2151, "step": 3273 }, { "epoch": 48.86, "learning_rate": 0.000237859649122807, "loss": 0.1367, "step": 3274 }, { "epoch": 48.88, "learning_rate": 0.00023782456140350873, "loss": 0.0687, "step": 3275 }, { "epoch": 48.89, "learning_rate": 0.00023778947368421053, "loss": 0.0195, "step": 3276 }, { "epoch": 48.91, "learning_rate": 0.00023775438596491225, "loss": 0.2018, "step": 3277 }, { "epoch": 48.92, "learning_rate": 0.000237719298245614, "loss": 0.187, "step": 3278 }, { "epoch": 48.94, "learning_rate": 0.00023768421052631575, "loss": 0.1456, "step": 3279 }, { "epoch": 48.95, "learning_rate": 0.00023764912280701753, "loss": 0.152, "step": 3280 }, { "epoch": 48.97, "learning_rate": 0.00023761403508771928, "loss": 0.0065, "step": 3281 }, { "epoch": 48.98, "learning_rate": 0.00023757894736842102, "loss": 0.9299, "step": 3282 }, { "epoch": 49.0, "learning_rate": 0.00023754385964912277, "loss": 0.2614, "step": 3283 }, { "epoch": 49.01, "learning_rate": 0.00023750877192982455, "loss": 0.3121, "step": 3284 }, { "epoch": 49.03, "learning_rate": 0.0002374736842105263, "loss": 0.2731, "step": 3285 }, { "epoch": 49.04, "learning_rate": 0.00023743859649122805, "loss": 0.5073, "step": 3286 }, { "epoch": 49.06, "learning_rate": 0.00023740350877192982, "loss": 0.1915, "step": 3287 }, { "epoch": 49.07, "learning_rate": 0.00023736842105263157, "loss": 0.1298, "step": 3288 }, { "epoch": 49.09, "learning_rate": 0.00023733333333333332, "loss": 0.1686, "step": 3289 }, { "epoch": 49.1, "learning_rate": 0.00023729824561403504, "loss": 0.0814, "step": 3290 }, { "epoch": 49.12, "learning_rate": 0.00023726315789473684, "loss": 0.221, "step": 3291 }, { "epoch": 49.13, "learning_rate": 0.00023722807017543857, "loss": 0.1197, "step": 3292 }, { "epoch": 49.15, "learning_rate": 0.00023719298245614031, "loss": 0.4632, "step": 3293 }, { "epoch": 49.16, "learning_rate": 0.00023715789473684206, "loss": 0.4504, "step": 3294 }, { "epoch": 49.18, "learning_rate": 0.00023712280701754384, "loss": 0.0194, "step": 3295 }, { "epoch": 49.19, "learning_rate": 0.0002370877192982456, "loss": 0.1115, "step": 3296 }, { "epoch": 49.21, "learning_rate": 0.00023705263157894734, "loss": 0.194, "step": 3297 }, { "epoch": 49.22, "learning_rate": 0.0002370175438596491, "loss": 0.1706, "step": 3298 }, { "epoch": 49.24, "learning_rate": 0.00023698245614035086, "loss": 0.2053, "step": 3299 }, { "epoch": 49.25, "learning_rate": 0.0002369473684210526, "loss": 0.0102, "step": 3300 }, { "epoch": 49.27, "learning_rate": 0.00023691228070175436, "loss": 0.1797, "step": 3301 }, { "epoch": 49.28, "learning_rate": 0.00023687719298245614, "loss": 0.1465, "step": 3302 }, { "epoch": 49.3, "learning_rate": 0.00023684210526315788, "loss": 0.2035, "step": 3303 }, { "epoch": 49.31, "learning_rate": 0.00023680701754385963, "loss": 0.266, "step": 3304 }, { "epoch": 49.33, "learning_rate": 0.00023677192982456138, "loss": 0.2117, "step": 3305 }, { "epoch": 49.34, "learning_rate": 0.00023673684210526316, "loss": 0.1285, "step": 3306 }, { "epoch": 49.36, "learning_rate": 0.00023670175438596488, "loss": 0.156, "step": 3307 }, { "epoch": 49.37, "learning_rate": 0.00023666666666666663, "loss": 0.2169, "step": 3308 }, { "epoch": 49.39, "learning_rate": 0.0002366315789473684, "loss": 0.0067, "step": 3309 }, { "epoch": 49.4, "learning_rate": 0.00023659649122807015, "loss": 0.0784, "step": 3310 }, { "epoch": 49.42, "learning_rate": 0.0002365614035087719, "loss": 0.1451, "step": 3311 }, { "epoch": 49.43, "learning_rate": 0.00023652631578947365, "loss": 0.0804, "step": 3312 }, { "epoch": 49.45, "learning_rate": 0.00023649122807017543, "loss": 0.155, "step": 3313 }, { "epoch": 49.46, "learning_rate": 0.00023645614035087717, "loss": 0.1286, "step": 3314 }, { "epoch": 49.48, "learning_rate": 0.00023642105263157892, "loss": 0.0605, "step": 3315 }, { "epoch": 49.49, "learning_rate": 0.00023638596491228067, "loss": 0.0368, "step": 3316 }, { "epoch": 49.51, "learning_rate": 0.00023635087719298245, "loss": 0.3509, "step": 3317 }, { "epoch": 49.52, "learning_rate": 0.0002363157894736842, "loss": 0.226, "step": 3318 }, { "epoch": 49.54, "learning_rate": 0.00023628070175438595, "loss": 0.2064, "step": 3319 }, { "epoch": 49.55, "learning_rate": 0.0002362456140350877, "loss": 0.0506, "step": 3320 }, { "epoch": 49.57, "learning_rate": 0.00023621052631578947, "loss": 0.0928, "step": 3321 }, { "epoch": 49.58, "learning_rate": 0.00023617543859649122, "loss": 0.1743, "step": 3322 }, { "epoch": 49.59, "learning_rate": 0.00023614035087719294, "loss": 0.0956, "step": 3323 }, { "epoch": 49.61, "learning_rate": 0.00023610526315789474, "loss": 0.2717, "step": 3324 }, { "epoch": 49.62, "learning_rate": 0.00023607017543859647, "loss": 0.0131, "step": 3325 }, { "epoch": 49.64, "learning_rate": 0.00023603508771929821, "loss": 0.1284, "step": 3326 }, { "epoch": 49.65, "learning_rate": 0.00023599999999999996, "loss": 0.0224, "step": 3327 }, { "epoch": 49.67, "learning_rate": 0.00023596491228070174, "loss": 0.2971, "step": 3328 }, { "epoch": 49.68, "learning_rate": 0.0002359298245614035, "loss": 0.0894, "step": 3329 }, { "epoch": 49.7, "learning_rate": 0.00023589473684210524, "loss": 0.0096, "step": 3330 }, { "epoch": 49.71, "learning_rate": 0.00023585964912280699, "loss": 0.1581, "step": 3331 }, { "epoch": 49.73, "learning_rate": 0.00023582456140350876, "loss": 0.0086, "step": 3332 }, { "epoch": 49.74, "learning_rate": 0.0002357894736842105, "loss": 0.2042, "step": 3333 }, { "epoch": 49.76, "learning_rate": 0.00023575438596491226, "loss": 0.0336, "step": 3334 }, { "epoch": 49.77, "learning_rate": 0.00023571929824561403, "loss": 0.047, "step": 3335 }, { "epoch": 49.79, "learning_rate": 0.00023568421052631578, "loss": 0.114, "step": 3336 }, { "epoch": 49.8, "learning_rate": 0.00023564912280701753, "loss": 0.0184, "step": 3337 }, { "epoch": 49.82, "learning_rate": 0.00023561403508771925, "loss": 0.2492, "step": 3338 }, { "epoch": 49.83, "learning_rate": 0.00023557894736842106, "loss": 0.0591, "step": 3339 }, { "epoch": 49.85, "learning_rate": 0.00023554385964912278, "loss": 0.0539, "step": 3340 }, { "epoch": 49.86, "learning_rate": 0.00023550877192982453, "loss": 0.0631, "step": 3341 }, { "epoch": 49.88, "learning_rate": 0.00023547368421052628, "loss": 0.0913, "step": 3342 }, { "epoch": 49.89, "learning_rate": 0.00023543859649122805, "loss": 0.1166, "step": 3343 }, { "epoch": 49.91, "learning_rate": 0.0002354035087719298, "loss": 0.0261, "step": 3344 }, { "epoch": 49.92, "learning_rate": 0.00023536842105263155, "loss": 0.4387, "step": 3345 }, { "epoch": 49.94, "learning_rate": 0.00023533333333333333, "loss": 0.1136, "step": 3346 }, { "epoch": 49.95, "learning_rate": 0.00023529824561403507, "loss": 0.0142, "step": 3347 }, { "epoch": 49.97, "learning_rate": 0.00023526315789473682, "loss": 0.1371, "step": 3348 }, { "epoch": 49.98, "learning_rate": 0.00023522807017543857, "loss": 0.0146, "step": 3349 }, { "epoch": 50.0, "learning_rate": 0.00023519298245614035, "loss": 0.006, "step": 3350 }, { "epoch": 50.01, "learning_rate": 0.0002351578947368421, "loss": 0.013, "step": 3351 }, { "epoch": 50.03, "learning_rate": 0.00023512280701754385, "loss": 0.0484, "step": 3352 }, { "epoch": 50.04, "learning_rate": 0.0002350877192982456, "loss": 0.1242, "step": 3353 }, { "epoch": 50.06, "learning_rate": 0.00023505263157894737, "loss": 0.158, "step": 3354 }, { "epoch": 50.07, "learning_rate": 0.0002350175438596491, "loss": 0.2993, "step": 3355 }, { "epoch": 50.09, "learning_rate": 0.00023498245614035084, "loss": 0.035, "step": 3356 }, { "epoch": 50.1, "learning_rate": 0.0002349473684210526, "loss": 0.0617, "step": 3357 }, { "epoch": 50.12, "learning_rate": 0.00023491228070175436, "loss": 0.2287, "step": 3358 }, { "epoch": 50.13, "learning_rate": 0.00023487719298245611, "loss": 0.1619, "step": 3359 }, { "epoch": 50.15, "learning_rate": 0.00023484210526315786, "loss": 0.0349, "step": 3360 }, { "epoch": 50.16, "learning_rate": 0.00023480701754385964, "loss": 0.0262, "step": 3361 }, { "epoch": 50.18, "learning_rate": 0.0002347719298245614, "loss": 0.0156, "step": 3362 }, { "epoch": 50.19, "learning_rate": 0.00023473684210526314, "loss": 0.0146, "step": 3363 }, { "epoch": 50.21, "learning_rate": 0.00023470175438596488, "loss": 0.0343, "step": 3364 }, { "epoch": 50.22, "learning_rate": 0.00023466666666666666, "loss": 0.0789, "step": 3365 }, { "epoch": 50.24, "learning_rate": 0.0002346315789473684, "loss": 0.0122, "step": 3366 }, { "epoch": 50.25, "learning_rate": 0.00023459649122807016, "loss": 0.0105, "step": 3367 }, { "epoch": 50.27, "learning_rate": 0.0002345614035087719, "loss": 0.1957, "step": 3368 }, { "epoch": 50.28, "learning_rate": 0.00023452631578947368, "loss": 0.2115, "step": 3369 }, { "epoch": 50.3, "learning_rate": 0.00023449122807017543, "loss": 0.242, "step": 3370 }, { "epoch": 50.31, "learning_rate": 0.00023445614035087715, "loss": 0.3058, "step": 3371 }, { "epoch": 50.33, "learning_rate": 0.00023442105263157896, "loss": 0.0748, "step": 3372 }, { "epoch": 50.34, "learning_rate": 0.00023438596491228068, "loss": 0.0155, "step": 3373 }, { "epoch": 50.36, "learning_rate": 0.00023435087719298243, "loss": 0.3034, "step": 3374 }, { "epoch": 50.37, "learning_rate": 0.00023431578947368418, "loss": 0.0136, "step": 3375 }, { "epoch": 50.39, "learning_rate": 0.00023428070175438595, "loss": 0.1864, "step": 3376 }, { "epoch": 50.4, "learning_rate": 0.0002342456140350877, "loss": 0.1253, "step": 3377 }, { "epoch": 50.42, "learning_rate": 0.00023421052631578945, "loss": 0.1835, "step": 3378 }, { "epoch": 50.43, "learning_rate": 0.0002341754385964912, "loss": 0.1046, "step": 3379 }, { "epoch": 50.45, "learning_rate": 0.00023414035087719297, "loss": 0.036, "step": 3380 }, { "epoch": 50.46, "learning_rate": 0.00023410526315789472, "loss": 0.0947, "step": 3381 }, { "epoch": 50.48, "learning_rate": 0.00023407017543859647, "loss": 0.0058, "step": 3382 }, { "epoch": 50.49, "learning_rate": 0.00023403508771929822, "loss": 0.1306, "step": 3383 }, { "epoch": 50.51, "learning_rate": 0.000234, "loss": 0.1747, "step": 3384 }, { "epoch": 50.52, "learning_rate": 0.00023396491228070174, "loss": 0.037, "step": 3385 }, { "epoch": 50.54, "learning_rate": 0.00023392982456140347, "loss": 0.2375, "step": 3386 }, { "epoch": 50.55, "learning_rate": 0.00023389473684210527, "loss": 0.4548, "step": 3387 }, { "epoch": 50.57, "learning_rate": 0.000233859649122807, "loss": 0.0223, "step": 3388 }, { "epoch": 50.58, "learning_rate": 0.00023382456140350874, "loss": 0.0359, "step": 3389 }, { "epoch": 50.59, "learning_rate": 0.0002337894736842105, "loss": 0.1929, "step": 3390 }, { "epoch": 50.61, "learning_rate": 0.00023375438596491226, "loss": 0.0979, "step": 3391 }, { "epoch": 50.62, "learning_rate": 0.000233719298245614, "loss": 0.0213, "step": 3392 }, { "epoch": 50.64, "learning_rate": 0.00023368421052631576, "loss": 0.05, "step": 3393 }, { "epoch": 50.65, "learning_rate": 0.0002336491228070175, "loss": 0.2083, "step": 3394 }, { "epoch": 50.67, "learning_rate": 0.0002336140350877193, "loss": 0.5703, "step": 3395 }, { "epoch": 50.68, "learning_rate": 0.00023357894736842104, "loss": 0.0331, "step": 3396 }, { "epoch": 50.7, "learning_rate": 0.00023354385964912278, "loss": 0.0684, "step": 3397 }, { "epoch": 50.71, "learning_rate": 0.00023350877192982456, "loss": 0.1066, "step": 3398 }, { "epoch": 50.73, "learning_rate": 0.0002334736842105263, "loss": 0.1049, "step": 3399 }, { "epoch": 50.74, "learning_rate": 0.00023343859649122806, "loss": 0.0975, "step": 3400 }, { "epoch": 50.74, "eval_accuracy": 0.828928046989721, "eval_f1": 0.8295199623127844, "eval_loss": 0.723612368106842, "eval_runtime": 345.274, "eval_samples_per_second": 11.834, "eval_steps_per_second": 0.741, "step": 3400 }, { "epoch": 50.76, "learning_rate": 0.00023340350877192978, "loss": 0.0765, "step": 3401 }, { "epoch": 50.77, "learning_rate": 0.00023336842105263158, "loss": 0.0704, "step": 3402 }, { "epoch": 50.79, "learning_rate": 0.0002333333333333333, "loss": 0.0318, "step": 3403 }, { "epoch": 50.8, "learning_rate": 0.00023329824561403505, "loss": 0.2962, "step": 3404 }, { "epoch": 50.82, "learning_rate": 0.0002332631578947368, "loss": 0.0897, "step": 3405 }, { "epoch": 50.83, "learning_rate": 0.00023322807017543858, "loss": 0.3142, "step": 3406 }, { "epoch": 50.85, "learning_rate": 0.00023319298245614033, "loss": 0.2082, "step": 3407 }, { "epoch": 50.86, "learning_rate": 0.00023315789473684207, "loss": 0.0214, "step": 3408 }, { "epoch": 50.88, "learning_rate": 0.00023312280701754385, "loss": 0.0919, "step": 3409 }, { "epoch": 50.89, "learning_rate": 0.0002330877192982456, "loss": 0.1899, "step": 3410 }, { "epoch": 50.91, "learning_rate": 0.00023305263157894735, "loss": 0.1403, "step": 3411 }, { "epoch": 50.92, "learning_rate": 0.0002330175438596491, "loss": 0.0434, "step": 3412 }, { "epoch": 50.94, "learning_rate": 0.00023298245614035087, "loss": 0.0165, "step": 3413 }, { "epoch": 50.95, "learning_rate": 0.00023294736842105262, "loss": 0.1556, "step": 3414 }, { "epoch": 50.97, "learning_rate": 0.00023291228070175437, "loss": 0.1885, "step": 3415 }, { "epoch": 50.98, "learning_rate": 0.00023287719298245612, "loss": 0.0331, "step": 3416 }, { "epoch": 51.0, "learning_rate": 0.0002328421052631579, "loss": 0.1439, "step": 3417 }, { "epoch": 51.01, "learning_rate": 0.00023280701754385964, "loss": 0.1991, "step": 3418 }, { "epoch": 51.03, "learning_rate": 0.00023277192982456137, "loss": 0.2616, "step": 3419 }, { "epoch": 51.04, "learning_rate": 0.00023273684210526311, "loss": 0.0325, "step": 3420 }, { "epoch": 51.06, "learning_rate": 0.0002327017543859649, "loss": 0.1062, "step": 3421 }, { "epoch": 51.07, "learning_rate": 0.00023266666666666664, "loss": 0.0539, "step": 3422 }, { "epoch": 51.09, "learning_rate": 0.0002326315789473684, "loss": 0.3147, "step": 3423 }, { "epoch": 51.1, "learning_rate": 0.00023259649122807016, "loss": 0.2051, "step": 3424 }, { "epoch": 51.12, "learning_rate": 0.0002325614035087719, "loss": 0.0063, "step": 3425 }, { "epoch": 51.13, "learning_rate": 0.00023252631578947366, "loss": 0.1891, "step": 3426 }, { "epoch": 51.15, "learning_rate": 0.0002324912280701754, "loss": 0.2393, "step": 3427 }, { "epoch": 51.16, "learning_rate": 0.00023245614035087719, "loss": 0.2608, "step": 3428 }, { "epoch": 51.18, "learning_rate": 0.00023242105263157893, "loss": 0.155, "step": 3429 }, { "epoch": 51.19, "learning_rate": 0.00023238596491228068, "loss": 0.2852, "step": 3430 }, { "epoch": 51.21, "learning_rate": 0.00023235087719298243, "loss": 0.0219, "step": 3431 }, { "epoch": 51.22, "learning_rate": 0.0002323157894736842, "loss": 0.0044, "step": 3432 }, { "epoch": 51.24, "learning_rate": 0.00023228070175438596, "loss": 0.2911, "step": 3433 }, { "epoch": 51.25, "learning_rate": 0.00023224561403508768, "loss": 0.1041, "step": 3434 }, { "epoch": 51.27, "learning_rate": 0.00023221052631578948, "loss": 0.1581, "step": 3435 }, { "epoch": 51.28, "learning_rate": 0.0002321754385964912, "loss": 0.0615, "step": 3436 }, { "epoch": 51.3, "learning_rate": 0.00023214035087719295, "loss": 0.0482, "step": 3437 }, { "epoch": 51.31, "learning_rate": 0.0002321052631578947, "loss": 0.1814, "step": 3438 }, { "epoch": 51.33, "learning_rate": 0.00023207017543859648, "loss": 0.0087, "step": 3439 }, { "epoch": 51.34, "learning_rate": 0.00023203508771929823, "loss": 0.0068, "step": 3440 }, { "epoch": 51.36, "learning_rate": 0.00023199999999999997, "loss": 0.4871, "step": 3441 }, { "epoch": 51.37, "learning_rate": 0.00023196491228070172, "loss": 0.0375, "step": 3442 }, { "epoch": 51.39, "learning_rate": 0.0002319298245614035, "loss": 0.0656, "step": 3443 }, { "epoch": 51.4, "learning_rate": 0.00023189473684210525, "loss": 0.0113, "step": 3444 }, { "epoch": 51.42, "learning_rate": 0.000231859649122807, "loss": 0.0957, "step": 3445 }, { "epoch": 51.43, "learning_rate": 0.00023182456140350875, "loss": 0.0936, "step": 3446 }, { "epoch": 51.45, "learning_rate": 0.00023178947368421052, "loss": 0.2175, "step": 3447 }, { "epoch": 51.46, "learning_rate": 0.00023175438596491227, "loss": 0.1612, "step": 3448 }, { "epoch": 51.48, "learning_rate": 0.000231719298245614, "loss": 0.0313, "step": 3449 }, { "epoch": 51.49, "learning_rate": 0.0002316842105263158, "loss": 0.0096, "step": 3450 }, { "epoch": 51.51, "learning_rate": 0.00023164912280701752, "loss": 0.0376, "step": 3451 }, { "epoch": 51.52, "learning_rate": 0.00023161403508771927, "loss": 0.0071, "step": 3452 }, { "epoch": 51.54, "learning_rate": 0.00023157894736842101, "loss": 0.2736, "step": 3453 }, { "epoch": 51.55, "learning_rate": 0.0002315438596491228, "loss": 0.1893, "step": 3454 }, { "epoch": 51.57, "learning_rate": 0.00023150877192982454, "loss": 0.0625, "step": 3455 }, { "epoch": 51.58, "learning_rate": 0.0002314736842105263, "loss": 0.0211, "step": 3456 }, { "epoch": 51.59, "learning_rate": 0.00023143859649122804, "loss": 0.026, "step": 3457 }, { "epoch": 51.61, "learning_rate": 0.0002314035087719298, "loss": 0.0453, "step": 3458 }, { "epoch": 51.62, "learning_rate": 0.00023136842105263156, "loss": 0.0545, "step": 3459 }, { "epoch": 51.64, "learning_rate": 0.0002313333333333333, "loss": 0.0336, "step": 3460 }, { "epoch": 51.65, "learning_rate": 0.00023129824561403509, "loss": 0.0492, "step": 3461 }, { "epoch": 51.67, "learning_rate": 0.00023126315789473683, "loss": 0.2222, "step": 3462 }, { "epoch": 51.68, "learning_rate": 0.00023122807017543858, "loss": 0.006, "step": 3463 }, { "epoch": 51.7, "learning_rate": 0.00023119298245614033, "loss": 0.0502, "step": 3464 }, { "epoch": 51.71, "learning_rate": 0.0002311578947368421, "loss": 0.0217, "step": 3465 }, { "epoch": 51.73, "learning_rate": 0.00023112280701754386, "loss": 0.4115, "step": 3466 }, { "epoch": 51.74, "learning_rate": 0.00023108771929824558, "loss": 0.0063, "step": 3467 }, { "epoch": 51.76, "learning_rate": 0.00023105263157894733, "loss": 0.386, "step": 3468 }, { "epoch": 51.77, "learning_rate": 0.0002310175438596491, "loss": 0.0952, "step": 3469 }, { "epoch": 51.79, "learning_rate": 0.00023098245614035085, "loss": 0.075, "step": 3470 }, { "epoch": 51.8, "learning_rate": 0.0002309473684210526, "loss": 0.2486, "step": 3471 }, { "epoch": 51.82, "learning_rate": 0.00023091228070175438, "loss": 0.5256, "step": 3472 }, { "epoch": 51.83, "learning_rate": 0.00023087719298245612, "loss": 0.0866, "step": 3473 }, { "epoch": 51.85, "learning_rate": 0.00023084210526315787, "loss": 0.1161, "step": 3474 }, { "epoch": 51.86, "learning_rate": 0.00023080701754385962, "loss": 0.035, "step": 3475 }, { "epoch": 51.88, "learning_rate": 0.0002307719298245614, "loss": 0.0224, "step": 3476 }, { "epoch": 51.89, "learning_rate": 0.00023073684210526315, "loss": 0.0111, "step": 3477 }, { "epoch": 51.91, "learning_rate": 0.0002307017543859649, "loss": 0.0374, "step": 3478 }, { "epoch": 51.92, "learning_rate": 0.00023066666666666664, "loss": 0.0163, "step": 3479 }, { "epoch": 51.94, "learning_rate": 0.00023063157894736842, "loss": 0.1842, "step": 3480 }, { "epoch": 51.95, "learning_rate": 0.00023059649122807017, "loss": 0.2779, "step": 3481 }, { "epoch": 51.97, "learning_rate": 0.0002305614035087719, "loss": 0.0568, "step": 3482 }, { "epoch": 51.98, "learning_rate": 0.00023052631578947364, "loss": 0.2603, "step": 3483 }, { "epoch": 52.0, "learning_rate": 0.00023049122807017542, "loss": 0.126, "step": 3484 }, { "epoch": 52.01, "learning_rate": 0.00023045614035087716, "loss": 0.7035, "step": 3485 }, { "epoch": 52.03, "learning_rate": 0.0002304210526315789, "loss": 0.0054, "step": 3486 }, { "epoch": 52.04, "learning_rate": 0.0002303859649122807, "loss": 0.0937, "step": 3487 }, { "epoch": 52.06, "learning_rate": 0.00023035087719298244, "loss": 0.1501, "step": 3488 }, { "epoch": 52.07, "learning_rate": 0.0002303157894736842, "loss": 0.0795, "step": 3489 }, { "epoch": 52.09, "learning_rate": 0.00023028070175438594, "loss": 0.1251, "step": 3490 }, { "epoch": 52.1, "learning_rate": 0.0002302456140350877, "loss": 0.01, "step": 3491 }, { "epoch": 52.12, "learning_rate": 0.00023021052631578946, "loss": 0.3727, "step": 3492 }, { "epoch": 52.13, "learning_rate": 0.0002301754385964912, "loss": 0.0147, "step": 3493 }, { "epoch": 52.15, "learning_rate": 0.00023014035087719296, "loss": 0.2178, "step": 3494 }, { "epoch": 52.16, "learning_rate": 0.00023010526315789473, "loss": 0.0409, "step": 3495 }, { "epoch": 52.18, "learning_rate": 0.00023007017543859648, "loss": 0.1585, "step": 3496 }, { "epoch": 52.19, "learning_rate": 0.0002300350877192982, "loss": 0.0069, "step": 3497 }, { "epoch": 52.21, "learning_rate": 0.00023, "loss": 0.0145, "step": 3498 }, { "epoch": 52.22, "learning_rate": 0.00022996491228070173, "loss": 0.0131, "step": 3499 }, { "epoch": 52.24, "learning_rate": 0.00022992982456140348, "loss": 0.2495, "step": 3500 }, { "epoch": 52.25, "learning_rate": 0.00022989473684210523, "loss": 0.1867, "step": 3501 }, { "epoch": 52.27, "learning_rate": 0.000229859649122807, "loss": 0.0064, "step": 3502 }, { "epoch": 52.28, "learning_rate": 0.00022982456140350875, "loss": 0.0257, "step": 3503 }, { "epoch": 52.3, "learning_rate": 0.0002297894736842105, "loss": 0.0654, "step": 3504 }, { "epoch": 52.31, "learning_rate": 0.00022975438596491225, "loss": 0.0885, "step": 3505 }, { "epoch": 52.33, "learning_rate": 0.00022971929824561402, "loss": 0.0556, "step": 3506 }, { "epoch": 52.34, "learning_rate": 0.00022968421052631577, "loss": 0.0429, "step": 3507 }, { "epoch": 52.36, "learning_rate": 0.00022964912280701752, "loss": 0.0837, "step": 3508 }, { "epoch": 52.37, "learning_rate": 0.0002296140350877193, "loss": 0.1373, "step": 3509 }, { "epoch": 52.39, "learning_rate": 0.00022957894736842105, "loss": 0.022, "step": 3510 }, { "epoch": 52.4, "learning_rate": 0.0002295438596491228, "loss": 0.356, "step": 3511 }, { "epoch": 52.42, "learning_rate": 0.00022950877192982454, "loss": 0.0119, "step": 3512 }, { "epoch": 52.43, "learning_rate": 0.00022947368421052632, "loss": 0.0376, "step": 3513 }, { "epoch": 52.45, "learning_rate": 0.00022943859649122804, "loss": 0.0049, "step": 3514 }, { "epoch": 52.46, "learning_rate": 0.0002294035087719298, "loss": 0.1981, "step": 3515 }, { "epoch": 52.48, "learning_rate": 0.00022936842105263154, "loss": 0.2462, "step": 3516 }, { "epoch": 52.49, "learning_rate": 0.00022933333333333332, "loss": 0.0036, "step": 3517 }, { "epoch": 52.51, "learning_rate": 0.00022929824561403506, "loss": 0.2211, "step": 3518 }, { "epoch": 52.52, "learning_rate": 0.0002292631578947368, "loss": 0.1351, "step": 3519 }, { "epoch": 52.54, "learning_rate": 0.00022922807017543856, "loss": 0.1374, "step": 3520 }, { "epoch": 52.55, "learning_rate": 0.00022919298245614034, "loss": 0.2577, "step": 3521 }, { "epoch": 52.57, "learning_rate": 0.00022915789473684209, "loss": 0.3107, "step": 3522 }, { "epoch": 52.58, "learning_rate": 0.00022912280701754383, "loss": 0.3881, "step": 3523 }, { "epoch": 52.59, "learning_rate": 0.0002290877192982456, "loss": 0.0885, "step": 3524 }, { "epoch": 52.61, "learning_rate": 0.00022905263157894736, "loss": 0.0183, "step": 3525 }, { "epoch": 52.62, "learning_rate": 0.0002290175438596491, "loss": 0.006, "step": 3526 }, { "epoch": 52.64, "learning_rate": 0.00022898245614035086, "loss": 0.1872, "step": 3527 }, { "epoch": 52.65, "learning_rate": 0.00022894736842105263, "loss": 0.1191, "step": 3528 }, { "epoch": 52.67, "learning_rate": 0.00022891228070175438, "loss": 0.1761, "step": 3529 }, { "epoch": 52.68, "learning_rate": 0.0002288771929824561, "loss": 0.0031, "step": 3530 }, { "epoch": 52.7, "learning_rate": 0.00022884210526315785, "loss": 0.0431, "step": 3531 }, { "epoch": 52.71, "learning_rate": 0.00022880701754385963, "loss": 0.0034, "step": 3532 }, { "epoch": 52.73, "learning_rate": 0.00022877192982456138, "loss": 0.1489, "step": 3533 }, { "epoch": 52.74, "learning_rate": 0.00022873684210526313, "loss": 0.0079, "step": 3534 }, { "epoch": 52.76, "learning_rate": 0.0002287017543859649, "loss": 0.2767, "step": 3535 }, { "epoch": 52.77, "learning_rate": 0.00022866666666666665, "loss": 0.4586, "step": 3536 }, { "epoch": 52.79, "learning_rate": 0.0002286315789473684, "loss": 0.0055, "step": 3537 }, { "epoch": 52.8, "learning_rate": 0.00022859649122807015, "loss": 0.2642, "step": 3538 }, { "epoch": 52.82, "learning_rate": 0.00022856140350877192, "loss": 0.0057, "step": 3539 }, { "epoch": 52.83, "learning_rate": 0.00022852631578947367, "loss": 0.0305, "step": 3540 }, { "epoch": 52.85, "learning_rate": 0.00022849122807017542, "loss": 0.1817, "step": 3541 }, { "epoch": 52.86, "learning_rate": 0.00022845614035087717, "loss": 0.1287, "step": 3542 }, { "epoch": 52.88, "learning_rate": 0.00022842105263157895, "loss": 0.2467, "step": 3543 }, { "epoch": 52.89, "learning_rate": 0.0002283859649122807, "loss": 0.0116, "step": 3544 }, { "epoch": 52.91, "learning_rate": 0.00022835087719298242, "loss": 0.4328, "step": 3545 }, { "epoch": 52.92, "learning_rate": 0.00022831578947368417, "loss": 0.0037, "step": 3546 }, { "epoch": 52.94, "learning_rate": 0.00022828070175438594, "loss": 0.2324, "step": 3547 }, { "epoch": 52.95, "learning_rate": 0.0002282456140350877, "loss": 0.0079, "step": 3548 }, { "epoch": 52.97, "learning_rate": 0.00022821052631578944, "loss": 0.2288, "step": 3549 }, { "epoch": 52.98, "learning_rate": 0.00022817543859649121, "loss": 0.0985, "step": 3550 }, { "epoch": 53.0, "learning_rate": 0.00022814035087719296, "loss": 0.3461, "step": 3551 }, { "epoch": 53.01, "learning_rate": 0.0002281052631578947, "loss": 0.115, "step": 3552 }, { "epoch": 53.03, "learning_rate": 0.00022807017543859646, "loss": 0.1899, "step": 3553 }, { "epoch": 53.04, "learning_rate": 0.00022803508771929824, "loss": 0.0037, "step": 3554 }, { "epoch": 53.06, "learning_rate": 0.00022799999999999999, "loss": 0.0942, "step": 3555 }, { "epoch": 53.07, "learning_rate": 0.00022796491228070173, "loss": 0.1492, "step": 3556 }, { "epoch": 53.09, "learning_rate": 0.00022792982456140348, "loss": 0.014, "step": 3557 }, { "epoch": 53.1, "learning_rate": 0.00022789473684210526, "loss": 0.015, "step": 3558 }, { "epoch": 53.12, "learning_rate": 0.000227859649122807, "loss": 0.0556, "step": 3559 }, { "epoch": 53.13, "learning_rate": 0.00022782456140350876, "loss": 0.0535, "step": 3560 }, { "epoch": 53.15, "learning_rate": 0.00022778947368421053, "loss": 0.0459, "step": 3561 }, { "epoch": 53.16, "learning_rate": 0.00022775438596491225, "loss": 0.5427, "step": 3562 }, { "epoch": 53.18, "learning_rate": 0.000227719298245614, "loss": 0.0204, "step": 3563 }, { "epoch": 53.19, "learning_rate": 0.00022768421052631575, "loss": 0.0051, "step": 3564 }, { "epoch": 53.21, "learning_rate": 0.00022764912280701753, "loss": 0.1251, "step": 3565 }, { "epoch": 53.22, "learning_rate": 0.00022761403508771928, "loss": 0.1806, "step": 3566 }, { "epoch": 53.24, "learning_rate": 0.00022757894736842102, "loss": 0.0308, "step": 3567 }, { "epoch": 53.25, "learning_rate": 0.00022754385964912277, "loss": 0.2411, "step": 3568 }, { "epoch": 53.27, "learning_rate": 0.00022750877192982455, "loss": 0.2055, "step": 3569 }, { "epoch": 53.28, "learning_rate": 0.0002274736842105263, "loss": 0.0065, "step": 3570 }, { "epoch": 53.3, "learning_rate": 0.00022743859649122805, "loss": 0.0063, "step": 3571 }, { "epoch": 53.31, "learning_rate": 0.00022740350877192982, "loss": 0.1127, "step": 3572 }, { "epoch": 53.33, "learning_rate": 0.00022736842105263157, "loss": 0.15, "step": 3573 }, { "epoch": 53.34, "learning_rate": 0.00022733333333333332, "loss": 0.1083, "step": 3574 }, { "epoch": 53.36, "learning_rate": 0.00022729824561403507, "loss": 0.2448, "step": 3575 }, { "epoch": 53.37, "learning_rate": 0.00022726315789473685, "loss": 0.0562, "step": 3576 }, { "epoch": 53.39, "learning_rate": 0.0002272280701754386, "loss": 0.0156, "step": 3577 }, { "epoch": 53.4, "learning_rate": 0.00022719298245614032, "loss": 0.1385, "step": 3578 }, { "epoch": 53.42, "learning_rate": 0.00022715789473684206, "loss": 0.2497, "step": 3579 }, { "epoch": 53.43, "learning_rate": 0.00022712280701754384, "loss": 0.1978, "step": 3580 }, { "epoch": 53.45, "learning_rate": 0.0002270877192982456, "loss": 0.0094, "step": 3581 }, { "epoch": 53.46, "learning_rate": 0.00022705263157894734, "loss": 0.0449, "step": 3582 }, { "epoch": 53.48, "learning_rate": 0.0002270175438596491, "loss": 0.092, "step": 3583 }, { "epoch": 53.49, "learning_rate": 0.00022698245614035086, "loss": 0.0045, "step": 3584 }, { "epoch": 53.51, "learning_rate": 0.0002269473684210526, "loss": 0.034, "step": 3585 }, { "epoch": 53.52, "learning_rate": 0.00022691228070175436, "loss": 0.1851, "step": 3586 }, { "epoch": 53.54, "learning_rate": 0.00022687719298245614, "loss": 0.2542, "step": 3587 }, { "epoch": 53.55, "learning_rate": 0.00022684210526315788, "loss": 0.0296, "step": 3588 }, { "epoch": 53.57, "learning_rate": 0.00022680701754385963, "loss": 0.0841, "step": 3589 }, { "epoch": 53.58, "learning_rate": 0.00022677192982456138, "loss": 0.1696, "step": 3590 }, { "epoch": 53.59, "learning_rate": 0.00022673684210526316, "loss": 0.021, "step": 3591 }, { "epoch": 53.61, "learning_rate": 0.0002267017543859649, "loss": 0.0085, "step": 3592 }, { "epoch": 53.62, "learning_rate": 0.00022666666666666663, "loss": 0.0084, "step": 3593 }, { "epoch": 53.64, "learning_rate": 0.00022663157894736838, "loss": 0.1819, "step": 3594 }, { "epoch": 53.65, "learning_rate": 0.00022659649122807015, "loss": 0.0062, "step": 3595 }, { "epoch": 53.67, "learning_rate": 0.0002265614035087719, "loss": 0.2001, "step": 3596 }, { "epoch": 53.68, "learning_rate": 0.00022652631578947365, "loss": 0.0464, "step": 3597 }, { "epoch": 53.7, "learning_rate": 0.00022649122807017543, "loss": 0.1304, "step": 3598 }, { "epoch": 53.71, "learning_rate": 0.00022645614035087718, "loss": 0.0339, "step": 3599 }, { "epoch": 53.73, "learning_rate": 0.00022642105263157892, "loss": 0.0062, "step": 3600 }, { "epoch": 53.73, "eval_accuracy": 0.8277043563387175, "eval_f1": 0.8286378990021197, "eval_loss": 0.6871868968009949, "eval_runtime": 343.9702, "eval_samples_per_second": 11.879, "eval_steps_per_second": 0.744, "step": 3600 }, { "epoch": 53.74, "learning_rate": 0.00022638596491228067, "loss": 0.1994, "step": 3601 }, { "epoch": 53.76, "learning_rate": 0.00022635087719298245, "loss": 0.0094, "step": 3602 }, { "epoch": 53.77, "learning_rate": 0.0002263157894736842, "loss": 0.0321, "step": 3603 }, { "epoch": 53.79, "learning_rate": 0.00022628070175438595, "loss": 0.2154, "step": 3604 }, { "epoch": 53.8, "learning_rate": 0.0002262456140350877, "loss": 0.0209, "step": 3605 }, { "epoch": 53.82, "learning_rate": 0.00022621052631578947, "loss": 0.159, "step": 3606 }, { "epoch": 53.83, "learning_rate": 0.00022617543859649122, "loss": 0.0349, "step": 3607 }, { "epoch": 53.85, "learning_rate": 0.00022614035087719294, "loss": 0.0212, "step": 3608 }, { "epoch": 53.86, "learning_rate": 0.0002261052631578947, "loss": 0.1698, "step": 3609 }, { "epoch": 53.88, "learning_rate": 0.00022607017543859647, "loss": 0.0174, "step": 3610 }, { "epoch": 53.89, "learning_rate": 0.00022603508771929822, "loss": 0.1267, "step": 3611 }, { "epoch": 53.91, "learning_rate": 0.00022599999999999996, "loss": 0.0366, "step": 3612 }, { "epoch": 53.92, "learning_rate": 0.00022596491228070174, "loss": 0.1204, "step": 3613 }, { "epoch": 53.94, "learning_rate": 0.0002259298245614035, "loss": 0.071, "step": 3614 }, { "epoch": 53.95, "learning_rate": 0.00022589473684210524, "loss": 0.0097, "step": 3615 }, { "epoch": 53.97, "learning_rate": 0.00022585964912280699, "loss": 0.0107, "step": 3616 }, { "epoch": 53.98, "learning_rate": 0.00022582456140350876, "loss": 0.1535, "step": 3617 }, { "epoch": 54.0, "learning_rate": 0.0002257894736842105, "loss": 0.009, "step": 3618 }, { "epoch": 54.01, "learning_rate": 0.00022575438596491226, "loss": 0.0093, "step": 3619 }, { "epoch": 54.03, "learning_rate": 0.000225719298245614, "loss": 0.3425, "step": 3620 }, { "epoch": 54.04, "learning_rate": 0.00022568421052631578, "loss": 0.035, "step": 3621 }, { "epoch": 54.06, "learning_rate": 0.00022564912280701753, "loss": 0.0755, "step": 3622 }, { "epoch": 54.07, "learning_rate": 0.00022561403508771928, "loss": 0.0045, "step": 3623 }, { "epoch": 54.09, "learning_rate": 0.00022557894736842106, "loss": 0.2203, "step": 3624 }, { "epoch": 54.1, "learning_rate": 0.0002255438596491228, "loss": 0.326, "step": 3625 }, { "epoch": 54.12, "learning_rate": 0.00022550877192982453, "loss": 0.0032, "step": 3626 }, { "epoch": 54.13, "learning_rate": 0.00022547368421052628, "loss": 0.1305, "step": 3627 }, { "epoch": 54.15, "learning_rate": 0.00022543859649122805, "loss": 0.2871, "step": 3628 }, { "epoch": 54.16, "learning_rate": 0.0002254035087719298, "loss": 0.0593, "step": 3629 }, { "epoch": 54.18, "learning_rate": 0.00022536842105263155, "loss": 0.022, "step": 3630 }, { "epoch": 54.19, "learning_rate": 0.0002253333333333333, "loss": 0.0904, "step": 3631 }, { "epoch": 54.21, "learning_rate": 0.00022529824561403508, "loss": 0.0036, "step": 3632 }, { "epoch": 54.22, "learning_rate": 0.00022526315789473682, "loss": 0.0114, "step": 3633 }, { "epoch": 54.24, "learning_rate": 0.00022522807017543857, "loss": 0.1628, "step": 3634 }, { "epoch": 54.25, "learning_rate": 0.00022519298245614035, "loss": 0.2939, "step": 3635 }, { "epoch": 54.27, "learning_rate": 0.0002251578947368421, "loss": 0.0156, "step": 3636 }, { "epoch": 54.28, "learning_rate": 0.00022512280701754385, "loss": 0.0028, "step": 3637 }, { "epoch": 54.3, "learning_rate": 0.0002250877192982456, "loss": 0.0056, "step": 3638 }, { "epoch": 54.31, "learning_rate": 0.00022505263157894737, "loss": 0.0847, "step": 3639 }, { "epoch": 54.33, "learning_rate": 0.00022501754385964912, "loss": 0.3336, "step": 3640 }, { "epoch": 54.34, "learning_rate": 0.00022498245614035084, "loss": 0.1036, "step": 3641 }, { "epoch": 54.36, "learning_rate": 0.0002249473684210526, "loss": 0.0032, "step": 3642 }, { "epoch": 54.37, "learning_rate": 0.00022491228070175437, "loss": 0.0345, "step": 3643 }, { "epoch": 54.39, "learning_rate": 0.00022487719298245611, "loss": 0.1888, "step": 3644 }, { "epoch": 54.4, "learning_rate": 0.00022484210526315786, "loss": 0.0094, "step": 3645 }, { "epoch": 54.42, "learning_rate": 0.0002248070175438596, "loss": 0.0244, "step": 3646 }, { "epoch": 54.43, "learning_rate": 0.0002247719298245614, "loss": 0.0999, "step": 3647 }, { "epoch": 54.45, "learning_rate": 0.00022473684210526314, "loss": 0.0718, "step": 3648 }, { "epoch": 54.46, "learning_rate": 0.00022470175438596489, "loss": 0.2473, "step": 3649 }, { "epoch": 54.48, "learning_rate": 0.00022466666666666666, "loss": 0.164, "step": 3650 }, { "epoch": 54.49, "learning_rate": 0.0002246315789473684, "loss": 0.2189, "step": 3651 }, { "epoch": 54.51, "learning_rate": 0.00022459649122807016, "loss": 0.1645, "step": 3652 }, { "epoch": 54.52, "learning_rate": 0.0002245614035087719, "loss": 0.0121, "step": 3653 }, { "epoch": 54.54, "learning_rate": 0.00022452631578947368, "loss": 0.0113, "step": 3654 }, { "epoch": 54.55, "learning_rate": 0.00022449122807017543, "loss": 0.056, "step": 3655 }, { "epoch": 54.57, "learning_rate": 0.00022445614035087715, "loss": 0.1074, "step": 3656 }, { "epoch": 54.58, "learning_rate": 0.0002244210526315789, "loss": 0.2388, "step": 3657 }, { "epoch": 54.59, "learning_rate": 0.00022438596491228068, "loss": 0.0244, "step": 3658 }, { "epoch": 54.61, "learning_rate": 0.00022435087719298243, "loss": 0.0135, "step": 3659 }, { "epoch": 54.62, "learning_rate": 0.00022431578947368418, "loss": 0.004, "step": 3660 }, { "epoch": 54.64, "learning_rate": 0.00022428070175438595, "loss": 0.1477, "step": 3661 }, { "epoch": 54.65, "learning_rate": 0.0002242456140350877, "loss": 0.012, "step": 3662 }, { "epoch": 54.67, "learning_rate": 0.00022421052631578945, "loss": 0.2786, "step": 3663 }, { "epoch": 54.68, "learning_rate": 0.0002241754385964912, "loss": 0.0527, "step": 3664 }, { "epoch": 54.7, "learning_rate": 0.00022414035087719297, "loss": 0.0731, "step": 3665 }, { "epoch": 54.71, "learning_rate": 0.00022410526315789472, "loss": 0.2216, "step": 3666 }, { "epoch": 54.73, "learning_rate": 0.00022407017543859647, "loss": 0.0036, "step": 3667 }, { "epoch": 54.74, "learning_rate": 0.00022403508771929822, "loss": 0.0137, "step": 3668 }, { "epoch": 54.76, "learning_rate": 0.000224, "loss": 0.3671, "step": 3669 }, { "epoch": 54.77, "learning_rate": 0.00022396491228070175, "loss": 0.3146, "step": 3670 }, { "epoch": 54.79, "learning_rate": 0.0002239298245614035, "loss": 0.2034, "step": 3671 }, { "epoch": 54.8, "learning_rate": 0.00022389473684210527, "loss": 0.1474, "step": 3672 }, { "epoch": 54.82, "learning_rate": 0.00022385964912280702, "loss": 0.0591, "step": 3673 }, { "epoch": 54.83, "learning_rate": 0.00022382456140350874, "loss": 0.0043, "step": 3674 }, { "epoch": 54.85, "learning_rate": 0.0002237894736842105, "loss": 0.0173, "step": 3675 }, { "epoch": 54.86, "learning_rate": 0.00022375438596491227, "loss": 0.1988, "step": 3676 }, { "epoch": 54.88, "learning_rate": 0.00022371929824561401, "loss": 0.4382, "step": 3677 }, { "epoch": 54.89, "learning_rate": 0.00022368421052631576, "loss": 0.0258, "step": 3678 }, { "epoch": 54.91, "learning_rate": 0.0002236491228070175, "loss": 0.0638, "step": 3679 }, { "epoch": 54.92, "learning_rate": 0.0002236140350877193, "loss": 0.1948, "step": 3680 }, { "epoch": 54.94, "learning_rate": 0.00022357894736842104, "loss": 0.2229, "step": 3681 }, { "epoch": 54.95, "learning_rate": 0.00022354385964912278, "loss": 0.0038, "step": 3682 }, { "epoch": 54.97, "learning_rate": 0.00022350877192982453, "loss": 0.451, "step": 3683 }, { "epoch": 54.98, "learning_rate": 0.0002234736842105263, "loss": 0.0037, "step": 3684 }, { "epoch": 55.0, "learning_rate": 0.00022343859649122806, "loss": 0.1133, "step": 3685 }, { "epoch": 55.01, "learning_rate": 0.0002234035087719298, "loss": 0.1625, "step": 3686 }, { "epoch": 55.03, "learning_rate": 0.00022336842105263158, "loss": 0.0319, "step": 3687 }, { "epoch": 55.04, "learning_rate": 0.00022333333333333333, "loss": 0.0071, "step": 3688 }, { "epoch": 55.06, "learning_rate": 0.00022329824561403505, "loss": 0.0307, "step": 3689 }, { "epoch": 55.07, "learning_rate": 0.0002232631578947368, "loss": 0.0852, "step": 3690 }, { "epoch": 55.09, "learning_rate": 0.00022322807017543858, "loss": 0.0048, "step": 3691 }, { "epoch": 55.1, "learning_rate": 0.00022319298245614033, "loss": 0.019, "step": 3692 }, { "epoch": 55.12, "learning_rate": 0.00022315789473684208, "loss": 0.0731, "step": 3693 }, { "epoch": 55.13, "learning_rate": 0.00022312280701754382, "loss": 0.129, "step": 3694 }, { "epoch": 55.15, "learning_rate": 0.0002230877192982456, "loss": 0.037, "step": 3695 }, { "epoch": 55.16, "learning_rate": 0.00022305263157894735, "loss": 0.0068, "step": 3696 }, { "epoch": 55.18, "learning_rate": 0.0002230175438596491, "loss": 0.0246, "step": 3697 }, { "epoch": 55.19, "learning_rate": 0.00022298245614035087, "loss": 0.1615, "step": 3698 }, { "epoch": 55.21, "learning_rate": 0.00022294736842105262, "loss": 0.0029, "step": 3699 }, { "epoch": 55.22, "learning_rate": 0.00022291228070175437, "loss": 0.0246, "step": 3700 }, { "epoch": 55.24, "learning_rate": 0.00022287719298245612, "loss": 0.0065, "step": 3701 }, { "epoch": 55.25, "learning_rate": 0.0002228421052631579, "loss": 0.0155, "step": 3702 }, { "epoch": 55.27, "learning_rate": 0.00022280701754385964, "loss": 0.0479, "step": 3703 }, { "epoch": 55.28, "learning_rate": 0.00022277192982456137, "loss": 0.0084, "step": 3704 }, { "epoch": 55.3, "learning_rate": 0.00022273684210526312, "loss": 0.3419, "step": 3705 }, { "epoch": 55.31, "learning_rate": 0.0002227017543859649, "loss": 0.2642, "step": 3706 }, { "epoch": 55.33, "learning_rate": 0.00022266666666666664, "loss": 0.0043, "step": 3707 }, { "epoch": 55.34, "learning_rate": 0.0002226315789473684, "loss": 0.0485, "step": 3708 }, { "epoch": 55.36, "learning_rate": 0.00022259649122807014, "loss": 0.0498, "step": 3709 }, { "epoch": 55.37, "learning_rate": 0.0002225614035087719, "loss": 0.0062, "step": 3710 }, { "epoch": 55.39, "learning_rate": 0.00022252631578947366, "loss": 0.0073, "step": 3711 }, { "epoch": 55.4, "learning_rate": 0.0002224912280701754, "loss": 0.1017, "step": 3712 }, { "epoch": 55.42, "learning_rate": 0.0002224561403508772, "loss": 0.2038, "step": 3713 }, { "epoch": 55.43, "learning_rate": 0.00022242105263157894, "loss": 0.1198, "step": 3714 }, { "epoch": 55.45, "learning_rate": 0.00022238596491228068, "loss": 0.0198, "step": 3715 }, { "epoch": 55.46, "learning_rate": 0.00022235087719298243, "loss": 0.1242, "step": 3716 }, { "epoch": 55.48, "learning_rate": 0.0002223157894736842, "loss": 0.0246, "step": 3717 }, { "epoch": 55.49, "learning_rate": 0.00022228070175438596, "loss": 0.0113, "step": 3718 }, { "epoch": 55.51, "learning_rate": 0.0002222456140350877, "loss": 0.0202, "step": 3719 }, { "epoch": 55.52, "learning_rate": 0.00022221052631578943, "loss": 0.0141, "step": 3720 }, { "epoch": 55.54, "learning_rate": 0.0002221754385964912, "loss": 0.0651, "step": 3721 }, { "epoch": 55.55, "learning_rate": 0.00022214035087719295, "loss": 0.0952, "step": 3722 }, { "epoch": 55.57, "learning_rate": 0.0002221052631578947, "loss": 0.1741, "step": 3723 }, { "epoch": 55.58, "learning_rate": 0.00022207017543859648, "loss": 0.5287, "step": 3724 }, { "epoch": 55.59, "learning_rate": 0.00022203508771929823, "loss": 0.1499, "step": 3725 }, { "epoch": 55.61, "learning_rate": 0.00022199999999999998, "loss": 0.2446, "step": 3726 }, { "epoch": 55.62, "learning_rate": 0.00022196491228070172, "loss": 0.0443, "step": 3727 }, { "epoch": 55.64, "learning_rate": 0.0002219298245614035, "loss": 0.0117, "step": 3728 }, { "epoch": 55.65, "learning_rate": 0.00022189473684210525, "loss": 0.0446, "step": 3729 }, { "epoch": 55.67, "learning_rate": 0.000221859649122807, "loss": 0.1981, "step": 3730 }, { "epoch": 55.68, "learning_rate": 0.00022182456140350875, "loss": 0.0033, "step": 3731 }, { "epoch": 55.7, "learning_rate": 0.00022178947368421052, "loss": 0.1273, "step": 3732 }, { "epoch": 55.71, "learning_rate": 0.00022175438596491227, "loss": 0.0741, "step": 3733 }, { "epoch": 55.73, "learning_rate": 0.00022171929824561402, "loss": 0.0265, "step": 3734 }, { "epoch": 55.74, "learning_rate": 0.0002216842105263158, "loss": 0.0115, "step": 3735 }, { "epoch": 55.76, "learning_rate": 0.00022164912280701754, "loss": 0.0161, "step": 3736 }, { "epoch": 55.77, "learning_rate": 0.00022161403508771927, "loss": 0.0085, "step": 3737 }, { "epoch": 55.79, "learning_rate": 0.00022157894736842101, "loss": 0.2448, "step": 3738 }, { "epoch": 55.8, "learning_rate": 0.0002215438596491228, "loss": 0.0145, "step": 3739 }, { "epoch": 55.82, "learning_rate": 0.00022150877192982454, "loss": 0.0153, "step": 3740 }, { "epoch": 55.83, "learning_rate": 0.0002214736842105263, "loss": 0.018, "step": 3741 }, { "epoch": 55.85, "learning_rate": 0.00022143859649122804, "loss": 0.2306, "step": 3742 }, { "epoch": 55.86, "learning_rate": 0.0002214035087719298, "loss": 0.0034, "step": 3743 }, { "epoch": 55.88, "learning_rate": 0.00022136842105263156, "loss": 0.0116, "step": 3744 }, { "epoch": 55.89, "learning_rate": 0.0002213333333333333, "loss": 0.0537, "step": 3745 }, { "epoch": 55.91, "learning_rate": 0.00022129824561403506, "loss": 0.1457, "step": 3746 }, { "epoch": 55.92, "learning_rate": 0.00022126315789473683, "loss": 0.0771, "step": 3747 }, { "epoch": 55.94, "learning_rate": 0.00022122807017543858, "loss": 0.0036, "step": 3748 }, { "epoch": 55.95, "learning_rate": 0.00022119298245614033, "loss": 0.1992, "step": 3749 }, { "epoch": 55.97, "learning_rate": 0.0002211578947368421, "loss": 0.1269, "step": 3750 }, { "epoch": 55.98, "learning_rate": 0.00022112280701754386, "loss": 0.209, "step": 3751 }, { "epoch": 56.0, "learning_rate": 0.00022108771929824558, "loss": 0.0806, "step": 3752 }, { "epoch": 56.01, "learning_rate": 0.00022105263157894733, "loss": 0.3064, "step": 3753 }, { "epoch": 56.03, "learning_rate": 0.0002210175438596491, "loss": 0.1929, "step": 3754 }, { "epoch": 56.04, "learning_rate": 0.00022098245614035085, "loss": 0.2673, "step": 3755 }, { "epoch": 56.06, "learning_rate": 0.0002209473684210526, "loss": 0.2803, "step": 3756 }, { "epoch": 56.07, "learning_rate": 0.00022091228070175435, "loss": 0.0192, "step": 3757 }, { "epoch": 56.09, "learning_rate": 0.00022087719298245613, "loss": 0.1947, "step": 3758 }, { "epoch": 56.1, "learning_rate": 0.00022084210526315787, "loss": 0.0723, "step": 3759 }, { "epoch": 56.12, "learning_rate": 0.00022080701754385962, "loss": 0.3244, "step": 3760 }, { "epoch": 56.13, "learning_rate": 0.0002207719298245614, "loss": 0.0238, "step": 3761 }, { "epoch": 56.15, "learning_rate": 0.00022073684210526315, "loss": 0.0308, "step": 3762 }, { "epoch": 56.16, "learning_rate": 0.0002207017543859649, "loss": 0.1321, "step": 3763 }, { "epoch": 56.18, "learning_rate": 0.00022066666666666665, "loss": 0.0051, "step": 3764 }, { "epoch": 56.19, "learning_rate": 0.00022063157894736842, "loss": 0.0057, "step": 3765 }, { "epoch": 56.21, "learning_rate": 0.00022059649122807017, "loss": 0.0935, "step": 3766 }, { "epoch": 56.22, "learning_rate": 0.00022056140350877192, "loss": 0.009, "step": 3767 }, { "epoch": 56.24, "learning_rate": 0.00022052631578947364, "loss": 0.0077, "step": 3768 }, { "epoch": 56.25, "learning_rate": 0.00022049122807017542, "loss": 0.0445, "step": 3769 }, { "epoch": 56.27, "learning_rate": 0.00022045614035087717, "loss": 0.1608, "step": 3770 }, { "epoch": 56.28, "learning_rate": 0.00022042105263157891, "loss": 0.1767, "step": 3771 }, { "epoch": 56.3, "learning_rate": 0.00022038596491228066, "loss": 0.0302, "step": 3772 }, { "epoch": 56.31, "learning_rate": 0.00022035087719298244, "loss": 0.1324, "step": 3773 }, { "epoch": 56.33, "learning_rate": 0.0002203157894736842, "loss": 0.0097, "step": 3774 }, { "epoch": 56.34, "learning_rate": 0.00022028070175438594, "loss": 0.3323, "step": 3775 }, { "epoch": 56.36, "learning_rate": 0.0002202456140350877, "loss": 0.1146, "step": 3776 }, { "epoch": 56.37, "learning_rate": 0.00022021052631578946, "loss": 0.1479, "step": 3777 }, { "epoch": 56.39, "learning_rate": 0.0002201754385964912, "loss": 0.1966, "step": 3778 }, { "epoch": 56.4, "learning_rate": 0.00022014035087719296, "loss": 0.0405, "step": 3779 }, { "epoch": 56.42, "learning_rate": 0.00022010526315789473, "loss": 0.3266, "step": 3780 }, { "epoch": 56.43, "learning_rate": 0.00022007017543859648, "loss": 0.0037, "step": 3781 }, { "epoch": 56.45, "learning_rate": 0.00022003508771929823, "loss": 0.089, "step": 3782 }, { "epoch": 56.46, "learning_rate": 0.00021999999999999995, "loss": 0.2181, "step": 3783 }, { "epoch": 56.48, "learning_rate": 0.00021996491228070176, "loss": 0.0876, "step": 3784 }, { "epoch": 56.49, "learning_rate": 0.00021992982456140348, "loss": 0.2992, "step": 3785 }, { "epoch": 56.51, "learning_rate": 0.00021989473684210523, "loss": 0.0213, "step": 3786 }, { "epoch": 56.52, "learning_rate": 0.000219859649122807, "loss": 0.1602, "step": 3787 }, { "epoch": 56.54, "learning_rate": 0.00021982456140350875, "loss": 0.2594, "step": 3788 }, { "epoch": 56.55, "learning_rate": 0.0002197894736842105, "loss": 0.013, "step": 3789 }, { "epoch": 56.57, "learning_rate": 0.00021975438596491225, "loss": 0.041, "step": 3790 }, { "epoch": 56.58, "learning_rate": 0.00021971929824561403, "loss": 0.1021, "step": 3791 }, { "epoch": 56.59, "learning_rate": 0.00021968421052631577, "loss": 0.1141, "step": 3792 }, { "epoch": 56.61, "learning_rate": 0.00021964912280701752, "loss": 0.0101, "step": 3793 }, { "epoch": 56.62, "learning_rate": 0.00021961403508771927, "loss": 0.0539, "step": 3794 }, { "epoch": 56.64, "learning_rate": 0.00021957894736842105, "loss": 0.0914, "step": 3795 }, { "epoch": 56.65, "learning_rate": 0.0002195438596491228, "loss": 0.0729, "step": 3796 }, { "epoch": 56.67, "learning_rate": 0.00021950877192982454, "loss": 0.0327, "step": 3797 }, { "epoch": 56.68, "learning_rate": 0.00021947368421052632, "loss": 0.0187, "step": 3798 }, { "epoch": 56.7, "learning_rate": 0.00021943859649122807, "loss": 0.0103, "step": 3799 }, { "epoch": 56.71, "learning_rate": 0.0002194035087719298, "loss": 0.1781, "step": 3800 }, { "epoch": 56.71, "eval_accuracy": 0.8389623103279491, "eval_f1": 0.8392926046674013, "eval_loss": 0.6989664435386658, "eval_runtime": 344.8439, "eval_samples_per_second": 11.849, "eval_steps_per_second": 0.742, "step": 3800 }, { "epoch": 56.73, "learning_rate": 0.00021936842105263154, "loss": 0.0225, "step": 3801 }, { "epoch": 56.74, "learning_rate": 0.00021933333333333332, "loss": 0.1503, "step": 3802 }, { "epoch": 56.76, "learning_rate": 0.00021929824561403506, "loss": 0.0784, "step": 3803 }, { "epoch": 56.77, "learning_rate": 0.0002192631578947368, "loss": 0.2565, "step": 3804 }, { "epoch": 56.79, "learning_rate": 0.00021922807017543856, "loss": 0.1283, "step": 3805 }, { "epoch": 56.8, "learning_rate": 0.00021919298245614034, "loss": 0.2075, "step": 3806 }, { "epoch": 56.82, "learning_rate": 0.0002191578947368421, "loss": 0.1317, "step": 3807 }, { "epoch": 56.83, "learning_rate": 0.00021912280701754384, "loss": 0.0164, "step": 3808 }, { "epoch": 56.85, "learning_rate": 0.00021908771929824558, "loss": 0.1049, "step": 3809 }, { "epoch": 56.86, "learning_rate": 0.00021905263157894736, "loss": 0.0052, "step": 3810 }, { "epoch": 56.88, "learning_rate": 0.0002190175438596491, "loss": 0.1653, "step": 3811 }, { "epoch": 56.89, "learning_rate": 0.00021898245614035086, "loss": 0.0032, "step": 3812 }, { "epoch": 56.91, "learning_rate": 0.00021894736842105263, "loss": 0.0823, "step": 3813 }, { "epoch": 56.92, "learning_rate": 0.00021891228070175438, "loss": 0.007, "step": 3814 }, { "epoch": 56.94, "learning_rate": 0.0002188771929824561, "loss": 0.0183, "step": 3815 }, { "epoch": 56.95, "learning_rate": 0.00021884210526315785, "loss": 0.0204, "step": 3816 }, { "epoch": 56.97, "learning_rate": 0.00021880701754385963, "loss": 0.0054, "step": 3817 }, { "epoch": 56.98, "learning_rate": 0.00021877192982456138, "loss": 0.0143, "step": 3818 }, { "epoch": 57.0, "learning_rate": 0.00021873684210526313, "loss": 0.2526, "step": 3819 }, { "epoch": 57.01, "learning_rate": 0.00021870175438596488, "loss": 0.4135, "step": 3820 }, { "epoch": 57.03, "learning_rate": 0.00021866666666666665, "loss": 0.1704, "step": 3821 }, { "epoch": 57.04, "learning_rate": 0.0002186315789473684, "loss": 0.1849, "step": 3822 }, { "epoch": 57.06, "learning_rate": 0.00021859649122807015, "loss": 0.1036, "step": 3823 }, { "epoch": 57.07, "learning_rate": 0.00021856140350877192, "loss": 0.0054, "step": 3824 }, { "epoch": 57.09, "learning_rate": 0.00021852631578947367, "loss": 0.0035, "step": 3825 }, { "epoch": 57.1, "learning_rate": 0.00021849122807017542, "loss": 0.1784, "step": 3826 }, { "epoch": 57.12, "learning_rate": 0.00021845614035087717, "loss": 0.0086, "step": 3827 }, { "epoch": 57.13, "learning_rate": 0.00021842105263157895, "loss": 0.2458, "step": 3828 }, { "epoch": 57.15, "learning_rate": 0.0002183859649122807, "loss": 0.1473, "step": 3829 }, { "epoch": 57.16, "learning_rate": 0.00021835087719298244, "loss": 0.0109, "step": 3830 }, { "epoch": 57.18, "learning_rate": 0.00021831578947368417, "loss": 0.0234, "step": 3831 }, { "epoch": 57.19, "learning_rate": 0.00021828070175438597, "loss": 0.1154, "step": 3832 }, { "epoch": 57.21, "learning_rate": 0.0002182456140350877, "loss": 0.0863, "step": 3833 }, { "epoch": 57.22, "learning_rate": 0.00021821052631578944, "loss": 0.0019, "step": 3834 }, { "epoch": 57.24, "learning_rate": 0.00021817543859649122, "loss": 0.0907, "step": 3835 }, { "epoch": 57.25, "learning_rate": 0.00021814035087719296, "loss": 0.0025, "step": 3836 }, { "epoch": 57.27, "learning_rate": 0.0002181052631578947, "loss": 0.16, "step": 3837 }, { "epoch": 57.28, "learning_rate": 0.00021807017543859646, "loss": 0.2068, "step": 3838 }, { "epoch": 57.3, "learning_rate": 0.00021803508771929824, "loss": 0.0291, "step": 3839 }, { "epoch": 57.31, "learning_rate": 0.00021799999999999999, "loss": 0.0851, "step": 3840 }, { "epoch": 57.33, "learning_rate": 0.00021796491228070174, "loss": 0.0864, "step": 3841 }, { "epoch": 57.34, "learning_rate": 0.00021792982456140348, "loss": 0.0652, "step": 3842 }, { "epoch": 57.36, "learning_rate": 0.00021789473684210526, "loss": 0.0539, "step": 3843 }, { "epoch": 57.37, "learning_rate": 0.000217859649122807, "loss": 0.0076, "step": 3844 }, { "epoch": 57.39, "learning_rate": 0.00021782456140350876, "loss": 0.0818, "step": 3845 }, { "epoch": 57.4, "learning_rate": 0.00021778947368421048, "loss": 0.0047, "step": 3846 }, { "epoch": 57.42, "learning_rate": 0.00021775438596491228, "loss": 0.0698, "step": 3847 }, { "epoch": 57.43, "learning_rate": 0.000217719298245614, "loss": 0.002, "step": 3848 }, { "epoch": 57.45, "learning_rate": 0.00021768421052631575, "loss": 0.0138, "step": 3849 }, { "epoch": 57.46, "learning_rate": 0.00021764912280701753, "loss": 0.0896, "step": 3850 }, { "epoch": 57.48, "learning_rate": 0.00021761403508771928, "loss": 0.0029, "step": 3851 }, { "epoch": 57.49, "learning_rate": 0.00021757894736842103, "loss": 0.2649, "step": 3852 }, { "epoch": 57.51, "learning_rate": 0.00021754385964912277, "loss": 0.0521, "step": 3853 }, { "epoch": 57.52, "learning_rate": 0.00021750877192982455, "loss": 0.3472, "step": 3854 }, { "epoch": 57.54, "learning_rate": 0.0002174736842105263, "loss": 0.1763, "step": 3855 }, { "epoch": 57.55, "learning_rate": 0.00021743859649122805, "loss": 0.0379, "step": 3856 }, { "epoch": 57.57, "learning_rate": 0.0002174035087719298, "loss": 0.1569, "step": 3857 }, { "epoch": 57.58, "learning_rate": 0.00021736842105263157, "loss": 0.0211, "step": 3858 }, { "epoch": 57.59, "learning_rate": 0.00021733333333333332, "loss": 0.0072, "step": 3859 }, { "epoch": 57.61, "learning_rate": 0.00021729824561403507, "loss": 0.0209, "step": 3860 }, { "epoch": 57.62, "learning_rate": 0.00021726315789473685, "loss": 0.0629, "step": 3861 }, { "epoch": 57.64, "learning_rate": 0.0002172280701754386, "loss": 0.0572, "step": 3862 }, { "epoch": 57.65, "learning_rate": 0.00021719298245614032, "loss": 0.0058, "step": 3863 }, { "epoch": 57.67, "learning_rate": 0.00021715789473684207, "loss": 0.192, "step": 3864 }, { "epoch": 57.68, "learning_rate": 0.00021712280701754384, "loss": 0.005, "step": 3865 }, { "epoch": 57.7, "learning_rate": 0.0002170877192982456, "loss": 0.0027, "step": 3866 }, { "epoch": 57.71, "learning_rate": 0.00021705263157894734, "loss": 0.0573, "step": 3867 }, { "epoch": 57.73, "learning_rate": 0.0002170175438596491, "loss": 0.0142, "step": 3868 }, { "epoch": 57.74, "learning_rate": 0.00021698245614035086, "loss": 0.0021, "step": 3869 }, { "epoch": 57.76, "learning_rate": 0.0002169473684210526, "loss": 0.0478, "step": 3870 }, { "epoch": 57.77, "learning_rate": 0.00021691228070175436, "loss": 0.0061, "step": 3871 }, { "epoch": 57.79, "learning_rate": 0.0002168771929824561, "loss": 0.0036, "step": 3872 }, { "epoch": 57.8, "learning_rate": 0.00021684210526315789, "loss": 0.0152, "step": 3873 }, { "epoch": 57.82, "learning_rate": 0.00021680701754385963, "loss": 0.0777, "step": 3874 }, { "epoch": 57.83, "learning_rate": 0.00021677192982456138, "loss": 0.0093, "step": 3875 }, { "epoch": 57.85, "learning_rate": 0.00021673684210526316, "loss": 0.2739, "step": 3876 }, { "epoch": 57.86, "learning_rate": 0.0002167017543859649, "loss": 0.1036, "step": 3877 }, { "epoch": 57.88, "learning_rate": 0.00021666666666666666, "loss": 0.0132, "step": 3878 }, { "epoch": 57.89, "learning_rate": 0.00021663157894736838, "loss": 0.0039, "step": 3879 }, { "epoch": 57.91, "learning_rate": 0.00021659649122807018, "loss": 0.0541, "step": 3880 }, { "epoch": 57.92, "learning_rate": 0.0002165614035087719, "loss": 0.0431, "step": 3881 }, { "epoch": 57.94, "learning_rate": 0.00021652631578947365, "loss": 0.1137, "step": 3882 }, { "epoch": 57.95, "learning_rate": 0.0002164912280701754, "loss": 0.0022, "step": 3883 }, { "epoch": 57.97, "learning_rate": 0.00021645614035087718, "loss": 0.1969, "step": 3884 }, { "epoch": 57.98, "learning_rate": 0.00021642105263157893, "loss": 0.0212, "step": 3885 }, { "epoch": 58.0, "learning_rate": 0.00021638596491228067, "loss": 0.3204, "step": 3886 }, { "epoch": 58.01, "learning_rate": 0.00021635087719298245, "loss": 0.0098, "step": 3887 }, { "epoch": 58.03, "learning_rate": 0.0002163157894736842, "loss": 0.1595, "step": 3888 }, { "epoch": 58.04, "learning_rate": 0.00021628070175438595, "loss": 0.086, "step": 3889 }, { "epoch": 58.06, "learning_rate": 0.0002162456140350877, "loss": 0.0183, "step": 3890 }, { "epoch": 58.07, "learning_rate": 0.00021621052631578947, "loss": 0.2086, "step": 3891 }, { "epoch": 58.09, "learning_rate": 0.00021617543859649122, "loss": 0.0196, "step": 3892 }, { "epoch": 58.1, "learning_rate": 0.00021614035087719297, "loss": 0.1816, "step": 3893 }, { "epoch": 58.12, "learning_rate": 0.0002161052631578947, "loss": 0.2019, "step": 3894 }, { "epoch": 58.13, "learning_rate": 0.0002160701754385965, "loss": 0.0304, "step": 3895 }, { "epoch": 58.15, "learning_rate": 0.00021603508771929822, "loss": 0.0164, "step": 3896 }, { "epoch": 58.16, "learning_rate": 0.00021599999999999996, "loss": 0.002, "step": 3897 }, { "epoch": 58.18, "learning_rate": 0.00021596491228070174, "loss": 0.0027, "step": 3898 }, { "epoch": 58.19, "learning_rate": 0.0002159298245614035, "loss": 0.0026, "step": 3899 }, { "epoch": 58.21, "learning_rate": 0.00021589473684210524, "loss": 0.0901, "step": 3900 }, { "epoch": 58.22, "learning_rate": 0.000215859649122807, "loss": 0.0058, "step": 3901 }, { "epoch": 58.24, "learning_rate": 0.00021582456140350876, "loss": 0.0685, "step": 3902 }, { "epoch": 58.25, "learning_rate": 0.0002157894736842105, "loss": 0.0045, "step": 3903 }, { "epoch": 58.27, "learning_rate": 0.00021575438596491226, "loss": 0.2307, "step": 3904 }, { "epoch": 58.28, "learning_rate": 0.000215719298245614, "loss": 0.0077, "step": 3905 }, { "epoch": 58.3, "learning_rate": 0.00021568421052631579, "loss": 0.0029, "step": 3906 }, { "epoch": 58.31, "learning_rate": 0.00021564912280701753, "loss": 0.2945, "step": 3907 }, { "epoch": 58.33, "learning_rate": 0.00021561403508771928, "loss": 0.0025, "step": 3908 }, { "epoch": 58.34, "learning_rate": 0.000215578947368421, "loss": 0.019, "step": 3909 }, { "epoch": 58.36, "learning_rate": 0.0002155438596491228, "loss": 0.0021, "step": 3910 }, { "epoch": 58.37, "learning_rate": 0.00021550877192982453, "loss": 0.0225, "step": 3911 }, { "epoch": 58.39, "learning_rate": 0.00021547368421052628, "loss": 0.0101, "step": 3912 }, { "epoch": 58.4, "learning_rate": 0.00021543859649122805, "loss": 0.0075, "step": 3913 }, { "epoch": 58.42, "learning_rate": 0.0002154035087719298, "loss": 0.0023, "step": 3914 }, { "epoch": 58.43, "learning_rate": 0.00021536842105263155, "loss": 0.0037, "step": 3915 }, { "epoch": 58.45, "learning_rate": 0.0002153333333333333, "loss": 0.0055, "step": 3916 }, { "epoch": 58.46, "learning_rate": 0.00021529824561403508, "loss": 0.0205, "step": 3917 }, { "epoch": 58.48, "learning_rate": 0.00021526315789473682, "loss": 0.2707, "step": 3918 }, { "epoch": 58.49, "learning_rate": 0.00021522807017543857, "loss": 0.0289, "step": 3919 }, { "epoch": 58.51, "learning_rate": 0.00021519298245614032, "loss": 0.0175, "step": 3920 }, { "epoch": 58.52, "learning_rate": 0.0002151578947368421, "loss": 0.1888, "step": 3921 }, { "epoch": 58.54, "learning_rate": 0.00021512280701754385, "loss": 0.0133, "step": 3922 }, { "epoch": 58.55, "learning_rate": 0.0002150877192982456, "loss": 0.1769, "step": 3923 }, { "epoch": 58.57, "learning_rate": 0.00021505263157894737, "loss": 0.0578, "step": 3924 }, { "epoch": 58.58, "learning_rate": 0.00021501754385964912, "loss": 0.1585, "step": 3925 }, { "epoch": 58.59, "learning_rate": 0.00021498245614035087, "loss": 0.0783, "step": 3926 }, { "epoch": 58.61, "learning_rate": 0.0002149473684210526, "loss": 0.0021, "step": 3927 }, { "epoch": 58.62, "learning_rate": 0.00021491228070175437, "loss": 0.0082, "step": 3928 }, { "epoch": 58.64, "learning_rate": 0.00021487719298245612, "loss": 0.0019, "step": 3929 }, { "epoch": 58.65, "learning_rate": 0.00021484210526315786, "loss": 0.0022, "step": 3930 }, { "epoch": 58.67, "learning_rate": 0.0002148070175438596, "loss": 0.0103, "step": 3931 }, { "epoch": 58.68, "learning_rate": 0.0002147719298245614, "loss": 0.297, "step": 3932 }, { "epoch": 58.7, "learning_rate": 0.00021473684210526314, "loss": 0.0833, "step": 3933 }, { "epoch": 58.71, "learning_rate": 0.00021470175438596489, "loss": 0.0789, "step": 3934 }, { "epoch": 58.73, "learning_rate": 0.00021466666666666664, "loss": 0.0016, "step": 3935 }, { "epoch": 58.74, "learning_rate": 0.0002146315789473684, "loss": 0.0414, "step": 3936 }, { "epoch": 58.76, "learning_rate": 0.00021459649122807016, "loss": 0.0163, "step": 3937 }, { "epoch": 58.77, "learning_rate": 0.0002145614035087719, "loss": 0.1671, "step": 3938 }, { "epoch": 58.79, "learning_rate": 0.00021452631578947368, "loss": 0.1753, "step": 3939 }, { "epoch": 58.8, "learning_rate": 0.00021449122807017543, "loss": 0.2833, "step": 3940 }, { "epoch": 58.82, "learning_rate": 0.00021445614035087718, "loss": 0.0499, "step": 3941 }, { "epoch": 58.83, "learning_rate": 0.0002144210526315789, "loss": 0.0969, "step": 3942 }, { "epoch": 58.85, "learning_rate": 0.0002143859649122807, "loss": 0.4685, "step": 3943 }, { "epoch": 58.86, "learning_rate": 0.00021435087719298243, "loss": 0.0489, "step": 3944 }, { "epoch": 58.88, "learning_rate": 0.00021431578947368418, "loss": 0.1016, "step": 3945 }, { "epoch": 58.89, "learning_rate": 0.00021428070175438593, "loss": 0.161, "step": 3946 }, { "epoch": 58.91, "learning_rate": 0.0002142456140350877, "loss": 0.0239, "step": 3947 }, { "epoch": 58.92, "learning_rate": 0.00021421052631578945, "loss": 0.0382, "step": 3948 }, { "epoch": 58.94, "learning_rate": 0.0002141754385964912, "loss": 0.105, "step": 3949 }, { "epoch": 58.95, "learning_rate": 0.00021414035087719298, "loss": 0.5169, "step": 3950 }, { "epoch": 58.97, "learning_rate": 0.00021410526315789472, "loss": 0.1401, "step": 3951 }, { "epoch": 58.98, "learning_rate": 0.00021407017543859647, "loss": 0.4505, "step": 3952 }, { "epoch": 59.0, "learning_rate": 0.00021403508771929822, "loss": 0.2539, "step": 3953 }, { "epoch": 59.01, "learning_rate": 0.000214, "loss": 0.4692, "step": 3954 }, { "epoch": 59.03, "learning_rate": 0.00021396491228070175, "loss": 0.1235, "step": 3955 }, { "epoch": 59.04, "learning_rate": 0.0002139298245614035, "loss": 0.1404, "step": 3956 }, { "epoch": 59.06, "learning_rate": 0.00021389473684210522, "loss": 0.2029, "step": 3957 }, { "epoch": 59.07, "learning_rate": 0.00021385964912280702, "loss": 0.004, "step": 3958 }, { "epoch": 59.09, "learning_rate": 0.00021382456140350874, "loss": 0.2866, "step": 3959 }, { "epoch": 59.1, "learning_rate": 0.0002137894736842105, "loss": 0.1797, "step": 3960 }, { "epoch": 59.12, "learning_rate": 0.00021375438596491227, "loss": 0.0153, "step": 3961 }, { "epoch": 59.13, "learning_rate": 0.00021371929824561401, "loss": 0.0148, "step": 3962 }, { "epoch": 59.15, "learning_rate": 0.00021368421052631576, "loss": 0.0334, "step": 3963 }, { "epoch": 59.16, "learning_rate": 0.0002136491228070175, "loss": 0.1279, "step": 3964 }, { "epoch": 59.18, "learning_rate": 0.0002136140350877193, "loss": 0.1225, "step": 3965 }, { "epoch": 59.19, "learning_rate": 0.00021357894736842104, "loss": 0.2333, "step": 3966 }, { "epoch": 59.21, "learning_rate": 0.00021354385964912279, "loss": 0.0704, "step": 3967 }, { "epoch": 59.22, "learning_rate": 0.00021350877192982453, "loss": 0.0717, "step": 3968 }, { "epoch": 59.24, "learning_rate": 0.0002134736842105263, "loss": 0.026, "step": 3969 }, { "epoch": 59.25, "learning_rate": 0.00021343859649122806, "loss": 0.0112, "step": 3970 }, { "epoch": 59.27, "learning_rate": 0.0002134035087719298, "loss": 0.2346, "step": 3971 }, { "epoch": 59.28, "learning_rate": 0.00021336842105263156, "loss": 0.099, "step": 3972 }, { "epoch": 59.3, "learning_rate": 0.00021333333333333333, "loss": 0.1305, "step": 3973 }, { "epoch": 59.31, "learning_rate": 0.00021329824561403508, "loss": 0.1288, "step": 3974 }, { "epoch": 59.33, "learning_rate": 0.0002132631578947368, "loss": 0.0628, "step": 3975 }, { "epoch": 59.34, "learning_rate": 0.00021322807017543858, "loss": 0.1149, "step": 3976 }, { "epoch": 59.36, "learning_rate": 0.00021319298245614033, "loss": 0.0104, "step": 3977 }, { "epoch": 59.37, "learning_rate": 0.00021315789473684208, "loss": 0.2735, "step": 3978 }, { "epoch": 59.39, "learning_rate": 0.00021312280701754383, "loss": 0.0035, "step": 3979 }, { "epoch": 59.4, "learning_rate": 0.0002130877192982456, "loss": 0.0304, "step": 3980 }, { "epoch": 59.42, "learning_rate": 0.00021305263157894735, "loss": 0.0028, "step": 3981 }, { "epoch": 59.43, "learning_rate": 0.0002130175438596491, "loss": 0.0032, "step": 3982 }, { "epoch": 59.45, "learning_rate": 0.00021298245614035085, "loss": 0.1995, "step": 3983 }, { "epoch": 59.46, "learning_rate": 0.00021294736842105262, "loss": 0.0188, "step": 3984 }, { "epoch": 59.48, "learning_rate": 0.00021291228070175437, "loss": 0.0776, "step": 3985 }, { "epoch": 59.49, "learning_rate": 0.00021287719298245612, "loss": 0.0076, "step": 3986 }, { "epoch": 59.51, "learning_rate": 0.0002128421052631579, "loss": 0.5661, "step": 3987 }, { "epoch": 59.52, "learning_rate": 0.00021280701754385965, "loss": 0.4085, "step": 3988 }, { "epoch": 59.54, "learning_rate": 0.0002127719298245614, "loss": 0.1628, "step": 3989 }, { "epoch": 59.55, "learning_rate": 0.00021273684210526312, "loss": 0.1061, "step": 3990 }, { "epoch": 59.57, "learning_rate": 0.00021270175438596492, "loss": 0.2666, "step": 3991 }, { "epoch": 59.58, "learning_rate": 0.00021266666666666664, "loss": 0.0611, "step": 3992 }, { "epoch": 59.59, "learning_rate": 0.0002126315789473684, "loss": 0.0071, "step": 3993 }, { "epoch": 59.61, "learning_rate": 0.00021259649122807014, "loss": 0.0339, "step": 3994 }, { "epoch": 59.62, "learning_rate": 0.00021256140350877191, "loss": 0.2808, "step": 3995 }, { "epoch": 59.64, "learning_rate": 0.00021252631578947366, "loss": 0.0188, "step": 3996 }, { "epoch": 59.65, "learning_rate": 0.0002124912280701754, "loss": 0.1661, "step": 3997 }, { "epoch": 59.67, "learning_rate": 0.0002124561403508772, "loss": 0.0633, "step": 3998 }, { "epoch": 59.68, "learning_rate": 0.00021242105263157894, "loss": 0.0081, "step": 3999 }, { "epoch": 59.7, "learning_rate": 0.00021238596491228069, "loss": 0.0309, "step": 4000 }, { "epoch": 59.7, "eval_accuracy": 0.8499755261869799, "eval_f1": 0.8496479698093485, "eval_loss": 0.6348404884338379, "eval_runtime": 344.6603, "eval_samples_per_second": 11.855, "eval_steps_per_second": 0.743, "step": 4000 }, { "epoch": 59.71, "learning_rate": 0.00021235087719298243, "loss": 0.004, "step": 4001 }, { "epoch": 59.73, "learning_rate": 0.0002123157894736842, "loss": 0.1196, "step": 4002 }, { "epoch": 59.74, "learning_rate": 0.00021228070175438596, "loss": 0.2379, "step": 4003 }, { "epoch": 59.76, "learning_rate": 0.0002122456140350877, "loss": 0.1041, "step": 4004 }, { "epoch": 59.77, "learning_rate": 0.00021221052631578943, "loss": 0.0147, "step": 4005 }, { "epoch": 59.79, "learning_rate": 0.00021217543859649123, "loss": 0.238, "step": 4006 }, { "epoch": 59.8, "learning_rate": 0.00021214035087719295, "loss": 0.1002, "step": 4007 }, { "epoch": 59.82, "learning_rate": 0.0002121052631578947, "loss": 0.0141, "step": 4008 }, { "epoch": 59.83, "learning_rate": 0.00021207017543859645, "loss": 0.0981, "step": 4009 }, { "epoch": 59.85, "learning_rate": 0.00021203508771929823, "loss": 0.0131, "step": 4010 }, { "epoch": 59.86, "learning_rate": 0.00021199999999999998, "loss": 0.0039, "step": 4011 }, { "epoch": 59.88, "learning_rate": 0.00021196491228070172, "loss": 0.0125, "step": 4012 }, { "epoch": 59.89, "learning_rate": 0.0002119298245614035, "loss": 0.0485, "step": 4013 }, { "epoch": 59.91, "learning_rate": 0.00021189473684210525, "loss": 0.0308, "step": 4014 }, { "epoch": 59.92, "learning_rate": 0.000211859649122807, "loss": 0.0196, "step": 4015 }, { "epoch": 59.94, "learning_rate": 0.00021182456140350875, "loss": 0.0278, "step": 4016 }, { "epoch": 59.95, "learning_rate": 0.00021178947368421052, "loss": 0.0999, "step": 4017 }, { "epoch": 59.97, "learning_rate": 0.00021175438596491227, "loss": 0.3143, "step": 4018 }, { "epoch": 59.98, "learning_rate": 0.00021171929824561402, "loss": 0.0039, "step": 4019 }, { "epoch": 60.0, "learning_rate": 0.00021168421052631577, "loss": 0.0026, "step": 4020 }, { "epoch": 60.01, "learning_rate": 0.00021164912280701755, "loss": 0.0043, "step": 4021 }, { "epoch": 60.03, "learning_rate": 0.00021161403508771927, "loss": 0.0167, "step": 4022 }, { "epoch": 60.04, "learning_rate": 0.00021157894736842102, "loss": 0.0815, "step": 4023 }, { "epoch": 60.06, "learning_rate": 0.0002115438596491228, "loss": 0.0135, "step": 4024 }, { "epoch": 60.07, "learning_rate": 0.00021150877192982454, "loss": 0.0043, "step": 4025 }, { "epoch": 60.09, "learning_rate": 0.0002114736842105263, "loss": 0.206, "step": 4026 }, { "epoch": 60.1, "learning_rate": 0.00021143859649122804, "loss": 0.017, "step": 4027 }, { "epoch": 60.12, "learning_rate": 0.00021140350877192981, "loss": 0.0317, "step": 4028 }, { "epoch": 60.13, "learning_rate": 0.00021136842105263156, "loss": 0.0049, "step": 4029 }, { "epoch": 60.15, "learning_rate": 0.0002113333333333333, "loss": 0.0669, "step": 4030 }, { "epoch": 60.16, "learning_rate": 0.00021129824561403506, "loss": 0.0034, "step": 4031 }, { "epoch": 60.18, "learning_rate": 0.00021126315789473684, "loss": 0.0118, "step": 4032 }, { "epoch": 60.19, "learning_rate": 0.00021122807017543858, "loss": 0.1456, "step": 4033 }, { "epoch": 60.21, "learning_rate": 0.00021119298245614033, "loss": 0.0936, "step": 4034 }, { "epoch": 60.22, "learning_rate": 0.00021115789473684208, "loss": 0.002, "step": 4035 }, { "epoch": 60.24, "learning_rate": 0.00021112280701754386, "loss": 0.0019, "step": 4036 }, { "epoch": 60.25, "learning_rate": 0.0002110877192982456, "loss": 0.0154, "step": 4037 }, { "epoch": 60.27, "learning_rate": 0.00021105263157894733, "loss": 0.1007, "step": 4038 }, { "epoch": 60.28, "learning_rate": 0.00021101754385964913, "loss": 0.002, "step": 4039 }, { "epoch": 60.3, "learning_rate": 0.00021098245614035085, "loss": 0.0145, "step": 4040 }, { "epoch": 60.31, "learning_rate": 0.0002109473684210526, "loss": 0.0021, "step": 4041 }, { "epoch": 60.33, "learning_rate": 0.00021091228070175435, "loss": 0.0255, "step": 4042 }, { "epoch": 60.34, "learning_rate": 0.00021087719298245613, "loss": 0.1407, "step": 4043 }, { "epoch": 60.36, "learning_rate": 0.00021084210526315788, "loss": 0.0017, "step": 4044 }, { "epoch": 60.37, "learning_rate": 0.00021080701754385962, "loss": 0.0023, "step": 4045 }, { "epoch": 60.39, "learning_rate": 0.00021077192982456137, "loss": 0.2124, "step": 4046 }, { "epoch": 60.4, "learning_rate": 0.00021073684210526315, "loss": 0.0597, "step": 4047 }, { "epoch": 60.42, "learning_rate": 0.0002107017543859649, "loss": 0.002, "step": 4048 }, { "epoch": 60.43, "learning_rate": 0.00021066666666666665, "loss": 0.1701, "step": 4049 }, { "epoch": 60.45, "learning_rate": 0.00021063157894736842, "loss": 0.0052, "step": 4050 }, { "epoch": 60.46, "learning_rate": 0.00021059649122807017, "loss": 0.0016, "step": 4051 }, { "epoch": 60.48, "learning_rate": 0.00021056140350877192, "loss": 0.0016, "step": 4052 }, { "epoch": 60.49, "learning_rate": 0.00021052631578947364, "loss": 0.0013, "step": 4053 }, { "epoch": 60.51, "learning_rate": 0.00021049122807017544, "loss": 0.0026, "step": 4054 }, { "epoch": 60.52, "learning_rate": 0.00021045614035087717, "loss": 0.0104, "step": 4055 }, { "epoch": 60.54, "learning_rate": 0.00021042105263157891, "loss": 0.0041, "step": 4056 }, { "epoch": 60.55, "learning_rate": 0.00021038596491228066, "loss": 0.022, "step": 4057 }, { "epoch": 60.57, "learning_rate": 0.00021035087719298244, "loss": 0.3154, "step": 4058 }, { "epoch": 60.58, "learning_rate": 0.0002103157894736842, "loss": 0.0074, "step": 4059 }, { "epoch": 60.59, "learning_rate": 0.00021028070175438594, "loss": 0.0018, "step": 4060 }, { "epoch": 60.61, "learning_rate": 0.0002102456140350877, "loss": 0.1199, "step": 4061 }, { "epoch": 60.62, "learning_rate": 0.00021021052631578946, "loss": 0.0353, "step": 4062 }, { "epoch": 60.64, "learning_rate": 0.0002101754385964912, "loss": 0.011, "step": 4063 }, { "epoch": 60.65, "learning_rate": 0.00021014035087719296, "loss": 0.0122, "step": 4064 }, { "epoch": 60.67, "learning_rate": 0.00021010526315789474, "loss": 0.0019, "step": 4065 }, { "epoch": 60.68, "learning_rate": 0.00021007017543859648, "loss": 0.2162, "step": 4066 }, { "epoch": 60.7, "learning_rate": 0.00021003508771929823, "loss": 0.0015, "step": 4067 }, { "epoch": 60.71, "learning_rate": 0.00020999999999999998, "loss": 0.0029, "step": 4068 }, { "epoch": 60.73, "learning_rate": 0.00020996491228070176, "loss": 0.0676, "step": 4069 }, { "epoch": 60.74, "learning_rate": 0.00020992982456140348, "loss": 0.0236, "step": 4070 }, { "epoch": 60.76, "learning_rate": 0.00020989473684210523, "loss": 0.2489, "step": 4071 }, { "epoch": 60.77, "learning_rate": 0.00020985964912280698, "loss": 0.0363, "step": 4072 }, { "epoch": 60.79, "learning_rate": 0.00020982456140350875, "loss": 0.0253, "step": 4073 }, { "epoch": 60.8, "learning_rate": 0.0002097894736842105, "loss": 0.111, "step": 4074 }, { "epoch": 60.82, "learning_rate": 0.00020975438596491225, "loss": 0.4341, "step": 4075 }, { "epoch": 60.83, "learning_rate": 0.00020971929824561403, "loss": 0.157, "step": 4076 }, { "epoch": 60.85, "learning_rate": 0.00020968421052631577, "loss": 0.0185, "step": 4077 }, { "epoch": 60.86, "learning_rate": 0.00020964912280701752, "loss": 0.1298, "step": 4078 }, { "epoch": 60.88, "learning_rate": 0.00020961403508771927, "loss": 0.1157, "step": 4079 }, { "epoch": 60.89, "learning_rate": 0.00020957894736842105, "loss": 0.0145, "step": 4080 }, { "epoch": 60.91, "learning_rate": 0.0002095438596491228, "loss": 0.0021, "step": 4081 }, { "epoch": 60.92, "learning_rate": 0.00020950877192982455, "loss": 0.0042, "step": 4082 }, { "epoch": 60.94, "learning_rate": 0.0002094736842105263, "loss": 0.1683, "step": 4083 }, { "epoch": 60.95, "learning_rate": 0.00020943859649122807, "loss": 0.3351, "step": 4084 }, { "epoch": 60.97, "learning_rate": 0.00020940350877192982, "loss": 0.2813, "step": 4085 }, { "epoch": 60.98, "learning_rate": 0.00020936842105263154, "loss": 0.0018, "step": 4086 }, { "epoch": 61.0, "learning_rate": 0.00020933333333333334, "loss": 0.1741, "step": 4087 }, { "epoch": 61.01, "learning_rate": 0.00020929824561403507, "loss": 0.0651, "step": 4088 }, { "epoch": 61.03, "learning_rate": 0.00020926315789473681, "loss": 0.2118, "step": 4089 }, { "epoch": 61.04, "learning_rate": 0.00020922807017543856, "loss": 0.003, "step": 4090 }, { "epoch": 61.06, "learning_rate": 0.00020919298245614034, "loss": 0.1208, "step": 4091 }, { "epoch": 61.07, "learning_rate": 0.0002091578947368421, "loss": 0.0036, "step": 4092 }, { "epoch": 61.09, "learning_rate": 0.00020912280701754384, "loss": 0.1213, "step": 4093 }, { "epoch": 61.1, "learning_rate": 0.00020908771929824559, "loss": 0.0017, "step": 4094 }, { "epoch": 61.12, "learning_rate": 0.00020905263157894736, "loss": 0.0023, "step": 4095 }, { "epoch": 61.13, "learning_rate": 0.0002090175438596491, "loss": 0.0016, "step": 4096 }, { "epoch": 61.15, "learning_rate": 0.00020898245614035086, "loss": 0.1565, "step": 4097 }, { "epoch": 61.16, "learning_rate": 0.0002089473684210526, "loss": 0.0819, "step": 4098 }, { "epoch": 61.18, "learning_rate": 0.00020891228070175438, "loss": 0.0196, "step": 4099 }, { "epoch": 61.19, "learning_rate": 0.00020887719298245613, "loss": 0.0111, "step": 4100 }, { "epoch": 61.21, "learning_rate": 0.00020884210526315785, "loss": 0.0028, "step": 4101 }, { "epoch": 61.22, "learning_rate": 0.00020880701754385966, "loss": 0.0065, "step": 4102 }, { "epoch": 61.24, "learning_rate": 0.00020877192982456138, "loss": 0.0015, "step": 4103 }, { "epoch": 61.25, "learning_rate": 0.00020873684210526313, "loss": 0.3828, "step": 4104 }, { "epoch": 61.27, "learning_rate": 0.00020870175438596488, "loss": 0.295, "step": 4105 }, { "epoch": 61.28, "learning_rate": 0.00020866666666666665, "loss": 0.3663, "step": 4106 }, { "epoch": 61.3, "learning_rate": 0.0002086315789473684, "loss": 0.0094, "step": 4107 }, { "epoch": 61.31, "learning_rate": 0.00020859649122807015, "loss": 0.0764, "step": 4108 }, { "epoch": 61.33, "learning_rate": 0.0002085614035087719, "loss": 0.2064, "step": 4109 }, { "epoch": 61.34, "learning_rate": 0.00020852631578947367, "loss": 0.2032, "step": 4110 }, { "epoch": 61.36, "learning_rate": 0.00020849122807017542, "loss": 0.0028, "step": 4111 }, { "epoch": 61.37, "learning_rate": 0.00020845614035087717, "loss": 0.247, "step": 4112 }, { "epoch": 61.39, "learning_rate": 0.00020842105263157895, "loss": 0.0929, "step": 4113 }, { "epoch": 61.4, "learning_rate": 0.0002083859649122807, "loss": 0.1304, "step": 4114 }, { "epoch": 61.42, "learning_rate": 0.00020835087719298245, "loss": 0.0107, "step": 4115 }, { "epoch": 61.43, "learning_rate": 0.00020831578947368417, "loss": 0.0028, "step": 4116 }, { "epoch": 61.45, "learning_rate": 0.00020828070175438597, "loss": 0.0404, "step": 4117 }, { "epoch": 61.46, "learning_rate": 0.0002082456140350877, "loss": 0.0027, "step": 4118 }, { "epoch": 61.48, "learning_rate": 0.00020821052631578944, "loss": 0.0116, "step": 4119 }, { "epoch": 61.49, "learning_rate": 0.0002081754385964912, "loss": 0.1348, "step": 4120 }, { "epoch": 61.51, "learning_rate": 0.00020814035087719296, "loss": 0.2591, "step": 4121 }, { "epoch": 61.52, "learning_rate": 0.00020810526315789471, "loss": 0.0318, "step": 4122 }, { "epoch": 61.54, "learning_rate": 0.00020807017543859646, "loss": 0.004, "step": 4123 }, { "epoch": 61.55, "learning_rate": 0.00020803508771929824, "loss": 0.1941, "step": 4124 }, { "epoch": 61.57, "learning_rate": 0.000208, "loss": 0.315, "step": 4125 }, { "epoch": 61.58, "learning_rate": 0.00020796491228070174, "loss": 0.5173, "step": 4126 }, { "epoch": 61.59, "learning_rate": 0.00020792982456140348, "loss": 0.0103, "step": 4127 }, { "epoch": 61.61, "learning_rate": 0.00020789473684210526, "loss": 0.0171, "step": 4128 }, { "epoch": 61.62, "learning_rate": 0.000207859649122807, "loss": 0.044, "step": 4129 }, { "epoch": 61.64, "learning_rate": 0.00020782456140350876, "loss": 0.1884, "step": 4130 }, { "epoch": 61.65, "learning_rate": 0.0002077894736842105, "loss": 0.0724, "step": 4131 }, { "epoch": 61.67, "learning_rate": 0.00020775438596491228, "loss": 0.1644, "step": 4132 }, { "epoch": 61.68, "learning_rate": 0.00020771929824561403, "loss": 0.0115, "step": 4133 }, { "epoch": 61.7, "learning_rate": 0.00020768421052631575, "loss": 0.0602, "step": 4134 }, { "epoch": 61.71, "learning_rate": 0.0002076491228070175, "loss": 0.1779, "step": 4135 }, { "epoch": 61.73, "learning_rate": 0.00020761403508771928, "loss": 0.3983, "step": 4136 }, { "epoch": 61.74, "learning_rate": 0.00020757894736842103, "loss": 0.034, "step": 4137 }, { "epoch": 61.76, "learning_rate": 0.00020754385964912278, "loss": 0.016, "step": 4138 }, { "epoch": 61.77, "learning_rate": 0.00020750877192982455, "loss": 0.0098, "step": 4139 }, { "epoch": 61.79, "learning_rate": 0.0002074736842105263, "loss": 0.1986, "step": 4140 }, { "epoch": 61.8, "learning_rate": 0.00020743859649122805, "loss": 0.2418, "step": 4141 }, { "epoch": 61.82, "learning_rate": 0.0002074035087719298, "loss": 0.156, "step": 4142 }, { "epoch": 61.83, "learning_rate": 0.00020736842105263157, "loss": 0.0444, "step": 4143 }, { "epoch": 61.85, "learning_rate": 0.00020733333333333332, "loss": 0.2965, "step": 4144 }, { "epoch": 61.86, "learning_rate": 0.00020729824561403507, "loss": 0.0129, "step": 4145 }, { "epoch": 61.88, "learning_rate": 0.00020726315789473682, "loss": 0.1533, "step": 4146 }, { "epoch": 61.89, "learning_rate": 0.0002072280701754386, "loss": 0.0443, "step": 4147 }, { "epoch": 61.91, "learning_rate": 0.00020719298245614034, "loss": 0.2175, "step": 4148 }, { "epoch": 61.92, "learning_rate": 0.00020715789473684207, "loss": 0.0256, "step": 4149 }, { "epoch": 61.94, "learning_rate": 0.00020712280701754387, "loss": 0.2934, "step": 4150 }, { "epoch": 61.95, "learning_rate": 0.0002070877192982456, "loss": 0.0082, "step": 4151 }, { "epoch": 61.97, "learning_rate": 0.00020705263157894734, "loss": 0.2396, "step": 4152 }, { "epoch": 61.98, "learning_rate": 0.0002070175438596491, "loss": 0.0225, "step": 4153 }, { "epoch": 62.0, "learning_rate": 0.00020698245614035086, "loss": 0.0113, "step": 4154 }, { "epoch": 62.01, "learning_rate": 0.0002069473684210526, "loss": 0.0102, "step": 4155 }, { "epoch": 62.03, "learning_rate": 0.00020691228070175436, "loss": 0.1027, "step": 4156 }, { "epoch": 62.04, "learning_rate": 0.0002068771929824561, "loss": 0.0548, "step": 4157 }, { "epoch": 62.06, "learning_rate": 0.0002068421052631579, "loss": 0.0603, "step": 4158 }, { "epoch": 62.07, "learning_rate": 0.00020680701754385964, "loss": 0.1559, "step": 4159 }, { "epoch": 62.09, "learning_rate": 0.00020677192982456138, "loss": 0.0041, "step": 4160 }, { "epoch": 62.1, "learning_rate": 0.00020673684210526316, "loss": 0.2439, "step": 4161 }, { "epoch": 62.12, "learning_rate": 0.0002067017543859649, "loss": 0.055, "step": 4162 }, { "epoch": 62.13, "learning_rate": 0.00020666666666666666, "loss": 0.009, "step": 4163 }, { "epoch": 62.15, "learning_rate": 0.00020663157894736838, "loss": 0.0038, "step": 4164 }, { "epoch": 62.16, "learning_rate": 0.00020659649122807018, "loss": 0.0186, "step": 4165 }, { "epoch": 62.18, "learning_rate": 0.0002065614035087719, "loss": 0.1261, "step": 4166 }, { "epoch": 62.19, "learning_rate": 0.00020652631578947365, "loss": 0.0824, "step": 4167 }, { "epoch": 62.21, "learning_rate": 0.0002064912280701754, "loss": 0.0174, "step": 4168 }, { "epoch": 62.22, "learning_rate": 0.00020645614035087718, "loss": 0.0025, "step": 4169 }, { "epoch": 62.24, "learning_rate": 0.00020642105263157893, "loss": 0.0024, "step": 4170 }, { "epoch": 62.25, "learning_rate": 0.00020638596491228067, "loss": 0.1987, "step": 4171 }, { "epoch": 62.27, "learning_rate": 0.00020635087719298242, "loss": 0.2195, "step": 4172 }, { "epoch": 62.28, "learning_rate": 0.0002063157894736842, "loss": 0.2643, "step": 4173 }, { "epoch": 62.3, "learning_rate": 0.00020628070175438595, "loss": 0.1246, "step": 4174 }, { "epoch": 62.31, "learning_rate": 0.0002062456140350877, "loss": 0.0093, "step": 4175 }, { "epoch": 62.33, "learning_rate": 0.00020621052631578947, "loss": 0.0032, "step": 4176 }, { "epoch": 62.34, "learning_rate": 0.00020617543859649122, "loss": 0.0156, "step": 4177 }, { "epoch": 62.36, "learning_rate": 0.00020614035087719297, "loss": 0.0021, "step": 4178 }, { "epoch": 62.37, "learning_rate": 0.00020610526315789472, "loss": 0.011, "step": 4179 }, { "epoch": 62.39, "learning_rate": 0.0002060701754385965, "loss": 0.0111, "step": 4180 }, { "epoch": 62.4, "learning_rate": 0.00020603508771929824, "loss": 0.0531, "step": 4181 }, { "epoch": 62.42, "learning_rate": 0.00020599999999999997, "loss": 0.0025, "step": 4182 }, { "epoch": 62.43, "learning_rate": 0.00020596491228070171, "loss": 0.0019, "step": 4183 }, { "epoch": 62.45, "learning_rate": 0.0002059298245614035, "loss": 0.1355, "step": 4184 }, { "epoch": 62.46, "learning_rate": 0.00020589473684210524, "loss": 0.1004, "step": 4185 }, { "epoch": 62.48, "learning_rate": 0.000205859649122807, "loss": 0.2385, "step": 4186 }, { "epoch": 62.49, "learning_rate": 0.00020582456140350876, "loss": 0.0027, "step": 4187 }, { "epoch": 62.51, "learning_rate": 0.0002057894736842105, "loss": 0.0032, "step": 4188 }, { "epoch": 62.52, "learning_rate": 0.00020575438596491226, "loss": 0.0206, "step": 4189 }, { "epoch": 62.54, "learning_rate": 0.000205719298245614, "loss": 0.0684, "step": 4190 }, { "epoch": 62.55, "learning_rate": 0.00020568421052631579, "loss": 0.0065, "step": 4191 }, { "epoch": 62.57, "learning_rate": 0.00020564912280701753, "loss": 0.0411, "step": 4192 }, { "epoch": 62.58, "learning_rate": 0.00020561403508771928, "loss": 0.1708, "step": 4193 }, { "epoch": 62.59, "learning_rate": 0.00020557894736842103, "loss": 0.0696, "step": 4194 }, { "epoch": 62.61, "learning_rate": 0.0002055438596491228, "loss": 0.111, "step": 4195 }, { "epoch": 62.62, "learning_rate": 0.00020550877192982456, "loss": 0.2224, "step": 4196 }, { "epoch": 62.64, "learning_rate": 0.00020547368421052628, "loss": 0.0619, "step": 4197 }, { "epoch": 62.65, "learning_rate": 0.00020543859649122803, "loss": 0.1194, "step": 4198 }, { "epoch": 62.67, "learning_rate": 0.0002054035087719298, "loss": 0.1641, "step": 4199 }, { "epoch": 62.68, "learning_rate": 0.00020536842105263155, "loss": 0.0026, "step": 4200 }, { "epoch": 62.68, "eval_accuracy": 0.8565834557023985, "eval_f1": 0.8584555081928308, "eval_loss": 0.6736618876457214, "eval_runtime": 345.5411, "eval_samples_per_second": 11.825, "eval_steps_per_second": 0.741, "step": 4200 }, { "epoch": 62.7, "learning_rate": 0.0002053333333333333, "loss": 0.004, "step": 4201 }, { "epoch": 62.71, "learning_rate": 0.00020529824561403508, "loss": 0.2727, "step": 4202 }, { "epoch": 62.73, "learning_rate": 0.00020526315789473683, "loss": 0.0279, "step": 4203 }, { "epoch": 62.74, "learning_rate": 0.00020522807017543857, "loss": 0.0092, "step": 4204 }, { "epoch": 62.76, "learning_rate": 0.00020519298245614032, "loss": 0.1556, "step": 4205 }, { "epoch": 62.77, "learning_rate": 0.0002051578947368421, "loss": 0.1234, "step": 4206 }, { "epoch": 62.79, "learning_rate": 0.00020512280701754385, "loss": 0.0567, "step": 4207 }, { "epoch": 62.8, "learning_rate": 0.0002050877192982456, "loss": 0.0048, "step": 4208 }, { "epoch": 62.82, "learning_rate": 0.00020505263157894735, "loss": 0.1576, "step": 4209 }, { "epoch": 62.83, "learning_rate": 0.00020501754385964912, "loss": 0.0604, "step": 4210 }, { "epoch": 62.85, "learning_rate": 0.00020498245614035087, "loss": 0.0071, "step": 4211 }, { "epoch": 62.86, "learning_rate": 0.0002049473684210526, "loss": 0.189, "step": 4212 }, { "epoch": 62.88, "learning_rate": 0.0002049122807017544, "loss": 0.0091, "step": 4213 }, { "epoch": 62.89, "learning_rate": 0.00020487719298245612, "loss": 0.1073, "step": 4214 }, { "epoch": 62.91, "learning_rate": 0.00020484210526315787, "loss": 0.0408, "step": 4215 }, { "epoch": 62.92, "learning_rate": 0.00020480701754385961, "loss": 0.1842, "step": 4216 }, { "epoch": 62.94, "learning_rate": 0.0002047719298245614, "loss": 0.0559, "step": 4217 }, { "epoch": 62.95, "learning_rate": 0.00020473684210526314, "loss": 0.0073, "step": 4218 }, { "epoch": 62.97, "learning_rate": 0.0002047017543859649, "loss": 0.071, "step": 4219 }, { "epoch": 62.98, "learning_rate": 0.00020466666666666664, "loss": 0.0027, "step": 4220 }, { "epoch": 63.0, "learning_rate": 0.0002046315789473684, "loss": 0.0257, "step": 4221 }, { "epoch": 63.01, "learning_rate": 0.00020459649122807016, "loss": 0.0126, "step": 4222 }, { "epoch": 63.03, "learning_rate": 0.0002045614035087719, "loss": 0.1263, "step": 4223 }, { "epoch": 63.04, "learning_rate": 0.00020452631578947369, "loss": 0.0039, "step": 4224 }, { "epoch": 63.06, "learning_rate": 0.00020449122807017543, "loss": 0.0579, "step": 4225 }, { "epoch": 63.07, "learning_rate": 0.00020445614035087718, "loss": 0.0764, "step": 4226 }, { "epoch": 63.09, "learning_rate": 0.00020442105263157893, "loss": 0.1427, "step": 4227 }, { "epoch": 63.1, "learning_rate": 0.0002043859649122807, "loss": 0.0591, "step": 4228 }, { "epoch": 63.12, "learning_rate": 0.00020435087719298246, "loss": 0.2312, "step": 4229 }, { "epoch": 63.13, "learning_rate": 0.00020431578947368418, "loss": 0.0047, "step": 4230 }, { "epoch": 63.15, "learning_rate": 0.00020428070175438593, "loss": 0.1699, "step": 4231 }, { "epoch": 63.16, "learning_rate": 0.0002042456140350877, "loss": 0.0909, "step": 4232 }, { "epoch": 63.18, "learning_rate": 0.00020421052631578945, "loss": 0.0058, "step": 4233 }, { "epoch": 63.19, "learning_rate": 0.0002041754385964912, "loss": 0.0023, "step": 4234 }, { "epoch": 63.21, "learning_rate": 0.00020414035087719295, "loss": 0.0038, "step": 4235 }, { "epoch": 63.22, "learning_rate": 0.00020410526315789472, "loss": 0.0051, "step": 4236 }, { "epoch": 63.24, "learning_rate": 0.00020407017543859647, "loss": 0.0402, "step": 4237 }, { "epoch": 63.25, "learning_rate": 0.00020403508771929822, "loss": 0.0693, "step": 4238 }, { "epoch": 63.27, "learning_rate": 0.000204, "loss": 0.0375, "step": 4239 }, { "epoch": 63.28, "learning_rate": 0.00020396491228070175, "loss": 0.0099, "step": 4240 }, { "epoch": 63.3, "learning_rate": 0.0002039298245614035, "loss": 0.194, "step": 4241 }, { "epoch": 63.31, "learning_rate": 0.00020389473684210524, "loss": 0.0048, "step": 4242 }, { "epoch": 63.33, "learning_rate": 0.00020385964912280702, "loss": 0.2181, "step": 4243 }, { "epoch": 63.34, "learning_rate": 0.00020382456140350877, "loss": 0.0991, "step": 4244 }, { "epoch": 63.36, "learning_rate": 0.0002037894736842105, "loss": 0.1155, "step": 4245 }, { "epoch": 63.37, "learning_rate": 0.00020375438596491224, "loss": 0.0227, "step": 4246 }, { "epoch": 63.39, "learning_rate": 0.00020371929824561402, "loss": 0.2667, "step": 4247 }, { "epoch": 63.4, "learning_rate": 0.00020368421052631576, "loss": 0.0338, "step": 4248 }, { "epoch": 63.42, "learning_rate": 0.0002036491228070175, "loss": 0.0102, "step": 4249 }, { "epoch": 63.43, "learning_rate": 0.0002036140350877193, "loss": 0.0171, "step": 4250 }, { "epoch": 63.45, "learning_rate": 0.00020357894736842104, "loss": 0.0062, "step": 4251 }, { "epoch": 63.46, "learning_rate": 0.0002035438596491228, "loss": 0.0692, "step": 4252 }, { "epoch": 63.48, "learning_rate": 0.00020350877192982454, "loss": 0.0126, "step": 4253 }, { "epoch": 63.49, "learning_rate": 0.0002034736842105263, "loss": 0.1374, "step": 4254 }, { "epoch": 63.51, "learning_rate": 0.00020343859649122806, "loss": 0.0052, "step": 4255 }, { "epoch": 63.52, "learning_rate": 0.0002034035087719298, "loss": 0.0199, "step": 4256 }, { "epoch": 63.54, "learning_rate": 0.00020336842105263156, "loss": 0.0057, "step": 4257 }, { "epoch": 63.55, "learning_rate": 0.00020333333333333333, "loss": 0.0557, "step": 4258 }, { "epoch": 63.57, "learning_rate": 0.00020329824561403508, "loss": 0.2364, "step": 4259 }, { "epoch": 63.58, "learning_rate": 0.0002032631578947368, "loss": 0.0175, "step": 4260 }, { "epoch": 63.59, "learning_rate": 0.00020322807017543855, "loss": 0.0065, "step": 4261 }, { "epoch": 63.61, "learning_rate": 0.00020319298245614033, "loss": 0.0044, "step": 4262 }, { "epoch": 63.62, "learning_rate": 0.00020315789473684208, "loss": 0.0074, "step": 4263 }, { "epoch": 63.64, "learning_rate": 0.00020312280701754383, "loss": 0.0929, "step": 4264 }, { "epoch": 63.65, "learning_rate": 0.0002030877192982456, "loss": 0.3596, "step": 4265 }, { "epoch": 63.67, "learning_rate": 0.00020305263157894735, "loss": 0.051, "step": 4266 }, { "epoch": 63.68, "learning_rate": 0.0002030175438596491, "loss": 0.038, "step": 4267 }, { "epoch": 63.7, "learning_rate": 0.00020298245614035085, "loss": 0.0597, "step": 4268 }, { "epoch": 63.71, "learning_rate": 0.00020294736842105262, "loss": 0.1669, "step": 4269 }, { "epoch": 63.73, "learning_rate": 0.00020291228070175437, "loss": 0.0057, "step": 4270 }, { "epoch": 63.74, "learning_rate": 0.00020287719298245612, "loss": 0.0087, "step": 4271 }, { "epoch": 63.76, "learning_rate": 0.00020284210526315787, "loss": 0.2221, "step": 4272 }, { "epoch": 63.77, "learning_rate": 0.00020280701754385965, "loss": 0.0157, "step": 4273 }, { "epoch": 63.79, "learning_rate": 0.0002027719298245614, "loss": 0.2365, "step": 4274 }, { "epoch": 63.8, "learning_rate": 0.00020273684210526314, "loss": 0.01, "step": 4275 }, { "epoch": 63.82, "learning_rate": 0.00020270175438596492, "loss": 0.0646, "step": 4276 }, { "epoch": 63.83, "learning_rate": 0.00020266666666666664, "loss": 0.0485, "step": 4277 }, { "epoch": 63.85, "learning_rate": 0.0002026315789473684, "loss": 0.0316, "step": 4278 }, { "epoch": 63.86, "learning_rate": 0.00020259649122807014, "loss": 0.0029, "step": 4279 }, { "epoch": 63.88, "learning_rate": 0.00020256140350877192, "loss": 0.002, "step": 4280 }, { "epoch": 63.89, "learning_rate": 0.00020252631578947366, "loss": 0.0292, "step": 4281 }, { "epoch": 63.91, "learning_rate": 0.0002024912280701754, "loss": 0.0059, "step": 4282 }, { "epoch": 63.92, "learning_rate": 0.00020245614035087716, "loss": 0.0683, "step": 4283 }, { "epoch": 63.94, "learning_rate": 0.00020242105263157894, "loss": 0.0021, "step": 4284 }, { "epoch": 63.95, "learning_rate": 0.00020238596491228069, "loss": 0.0025, "step": 4285 }, { "epoch": 63.97, "learning_rate": 0.00020235087719298243, "loss": 0.2327, "step": 4286 }, { "epoch": 63.98, "learning_rate": 0.0002023157894736842, "loss": 0.2651, "step": 4287 }, { "epoch": 64.0, "learning_rate": 0.00020228070175438596, "loss": 0.0564, "step": 4288 }, { "epoch": 64.01, "learning_rate": 0.0002022456140350877, "loss": 0.1241, "step": 4289 }, { "epoch": 64.03, "learning_rate": 0.00020221052631578946, "loss": 0.1026, "step": 4290 }, { "epoch": 64.04, "learning_rate": 0.00020217543859649123, "loss": 0.0335, "step": 4291 }, { "epoch": 64.06, "learning_rate": 0.00020214035087719298, "loss": 0.0337, "step": 4292 }, { "epoch": 64.07, "learning_rate": 0.0002021052631578947, "loss": 0.0045, "step": 4293 }, { "epoch": 64.09, "learning_rate": 0.00020207017543859645, "loss": 0.0046, "step": 4294 }, { "epoch": 64.1, "learning_rate": 0.00020203508771929823, "loss": 0.0291, "step": 4295 }, { "epoch": 64.12, "learning_rate": 0.00020199999999999998, "loss": 0.0063, "step": 4296 }, { "epoch": 64.13, "learning_rate": 0.00020196491228070173, "loss": 0.0019, "step": 4297 }, { "epoch": 64.15, "learning_rate": 0.00020192982456140347, "loss": 0.0518, "step": 4298 }, { "epoch": 64.16, "learning_rate": 0.00020189473684210525, "loss": 0.0032, "step": 4299 }, { "epoch": 64.18, "learning_rate": 0.000201859649122807, "loss": 0.0083, "step": 4300 }, { "epoch": 64.19, "learning_rate": 0.00020182456140350875, "loss": 0.0019, "step": 4301 }, { "epoch": 64.21, "learning_rate": 0.00020178947368421052, "loss": 0.0107, "step": 4302 }, { "epoch": 64.22, "learning_rate": 0.00020175438596491227, "loss": 0.0465, "step": 4303 }, { "epoch": 64.24, "learning_rate": 0.00020171929824561402, "loss": 0.0032, "step": 4304 }, { "epoch": 64.25, "learning_rate": 0.00020168421052631577, "loss": 0.0136, "step": 4305 }, { "epoch": 64.27, "learning_rate": 0.00020164912280701755, "loss": 0.1822, "step": 4306 }, { "epoch": 64.28, "learning_rate": 0.0002016140350877193, "loss": 0.022, "step": 4307 }, { "epoch": 64.3, "learning_rate": 0.00020157894736842102, "loss": 0.004, "step": 4308 }, { "epoch": 64.31, "learning_rate": 0.00020154385964912277, "loss": 0.2661, "step": 4309 }, { "epoch": 64.33, "learning_rate": 0.00020150877192982454, "loss": 0.632, "step": 4310 }, { "epoch": 64.34, "learning_rate": 0.0002014736842105263, "loss": 0.0027, "step": 4311 }, { "epoch": 64.36, "learning_rate": 0.00020143859649122804, "loss": 0.0271, "step": 4312 }, { "epoch": 64.37, "learning_rate": 0.00020140350877192981, "loss": 0.0028, "step": 4313 }, { "epoch": 64.39, "learning_rate": 0.00020136842105263156, "loss": 0.0025, "step": 4314 }, { "epoch": 64.4, "learning_rate": 0.0002013333333333333, "loss": 0.0028, "step": 4315 }, { "epoch": 64.42, "learning_rate": 0.00020129824561403506, "loss": 0.029, "step": 4316 }, { "epoch": 64.43, "learning_rate": 0.00020126315789473684, "loss": 0.0315, "step": 4317 }, { "epoch": 64.45, "learning_rate": 0.00020122807017543859, "loss": 0.2121, "step": 4318 }, { "epoch": 64.46, "learning_rate": 0.00020119298245614033, "loss": 0.0234, "step": 4319 }, { "epoch": 64.48, "learning_rate": 0.00020115789473684208, "loss": 0.0018, "step": 4320 }, { "epoch": 64.49, "learning_rate": 0.00020112280701754386, "loss": 0.3627, "step": 4321 }, { "epoch": 64.51, "learning_rate": 0.0002010877192982456, "loss": 0.0109, "step": 4322 }, { "epoch": 64.52, "learning_rate": 0.00020105263157894736, "loss": 0.2671, "step": 4323 }, { "epoch": 64.54, "learning_rate": 0.00020101754385964913, "loss": 0.2699, "step": 4324 }, { "epoch": 64.55, "learning_rate": 0.00020098245614035085, "loss": 0.0016, "step": 4325 }, { "epoch": 64.57, "learning_rate": 0.0002009473684210526, "loss": 0.1817, "step": 4326 }, { "epoch": 64.58, "learning_rate": 0.00020091228070175435, "loss": 0.1634, "step": 4327 }, { "epoch": 64.59, "learning_rate": 0.00020087719298245613, "loss": 0.0018, "step": 4328 }, { "epoch": 64.61, "learning_rate": 0.00020084210526315788, "loss": 0.0023, "step": 4329 }, { "epoch": 64.62, "learning_rate": 0.00020080701754385963, "loss": 0.0434, "step": 4330 }, { "epoch": 64.64, "learning_rate": 0.00020077192982456137, "loss": 0.1258, "step": 4331 }, { "epoch": 64.65, "learning_rate": 0.00020073684210526315, "loss": 0.0021, "step": 4332 }, { "epoch": 64.67, "learning_rate": 0.0002007017543859649, "loss": 0.0041, "step": 4333 }, { "epoch": 64.68, "learning_rate": 0.00020066666666666665, "loss": 0.0288, "step": 4334 }, { "epoch": 64.7, "learning_rate": 0.0002006315789473684, "loss": 0.0029, "step": 4335 }, { "epoch": 64.71, "learning_rate": 0.00020059649122807017, "loss": 0.0031, "step": 4336 }, { "epoch": 64.73, "learning_rate": 0.00020056140350877192, "loss": 0.0202, "step": 4337 }, { "epoch": 64.74, "learning_rate": 0.00020052631578947367, "loss": 0.007, "step": 4338 }, { "epoch": 64.76, "learning_rate": 0.00020049122807017545, "loss": 0.0825, "step": 4339 }, { "epoch": 64.77, "learning_rate": 0.0002004561403508772, "loss": 0.073, "step": 4340 }, { "epoch": 64.79, "learning_rate": 0.00020042105263157892, "loss": 0.0015, "step": 4341 }, { "epoch": 64.8, "learning_rate": 0.00020038596491228066, "loss": 0.1713, "step": 4342 }, { "epoch": 64.82, "learning_rate": 0.00020035087719298244, "loss": 0.1075, "step": 4343 }, { "epoch": 64.83, "learning_rate": 0.0002003157894736842, "loss": 0.0029, "step": 4344 }, { "epoch": 64.85, "learning_rate": 0.00020028070175438594, "loss": 0.0301, "step": 4345 }, { "epoch": 64.86, "learning_rate": 0.0002002456140350877, "loss": 0.0017, "step": 4346 }, { "epoch": 64.88, "learning_rate": 0.00020021052631578946, "loss": 0.0022, "step": 4347 }, { "epoch": 64.89, "learning_rate": 0.0002001754385964912, "loss": 0.3129, "step": 4348 }, { "epoch": 64.91, "learning_rate": 0.00020014035087719296, "loss": 0.101, "step": 4349 }, { "epoch": 64.92, "learning_rate": 0.00020010526315789474, "loss": 0.0029, "step": 4350 }, { "epoch": 64.94, "learning_rate": 0.00020007017543859648, "loss": 0.008, "step": 4351 }, { "epoch": 64.95, "learning_rate": 0.00020003508771929823, "loss": 0.1807, "step": 4352 }, { "epoch": 64.97, "learning_rate": 0.00019999999999999998, "loss": 0.053, "step": 4353 }, { "epoch": 64.98, "learning_rate": 0.00019996491228070176, "loss": 0.1483, "step": 4354 }, { "epoch": 65.0, "learning_rate": 0.0001999298245614035, "loss": 0.0238, "step": 4355 }, { "epoch": 65.01, "learning_rate": 0.00019989473684210523, "loss": 0.0031, "step": 4356 }, { "epoch": 65.03, "learning_rate": 0.00019985964912280698, "loss": 0.0186, "step": 4357 }, { "epoch": 65.04, "learning_rate": 0.00019982456140350875, "loss": 0.04, "step": 4358 }, { "epoch": 65.06, "learning_rate": 0.0001997894736842105, "loss": 0.0081, "step": 4359 }, { "epoch": 65.07, "learning_rate": 0.00019975438596491225, "loss": 0.0024, "step": 4360 }, { "epoch": 65.09, "learning_rate": 0.000199719298245614, "loss": 0.0114, "step": 4361 }, { "epoch": 65.1, "learning_rate": 0.00019968421052631578, "loss": 0.0039, "step": 4362 }, { "epoch": 65.12, "learning_rate": 0.00019964912280701752, "loss": 0.0028, "step": 4363 }, { "epoch": 65.13, "learning_rate": 0.00019961403508771927, "loss": 0.0313, "step": 4364 }, { "epoch": 65.15, "learning_rate": 0.00019957894736842105, "loss": 0.2049, "step": 4365 }, { "epoch": 65.16, "learning_rate": 0.0001995438596491228, "loss": 0.3819, "step": 4366 }, { "epoch": 65.18, "learning_rate": 0.00019950877192982455, "loss": 0.0066, "step": 4367 }, { "epoch": 65.19, "learning_rate": 0.0001994736842105263, "loss": 0.0154, "step": 4368 }, { "epoch": 65.21, "learning_rate": 0.00019943859649122807, "loss": 0.039, "step": 4369 }, { "epoch": 65.22, "learning_rate": 0.00019940350877192982, "loss": 0.0363, "step": 4370 }, { "epoch": 65.24, "learning_rate": 0.00019936842105263154, "loss": 0.1389, "step": 4371 }, { "epoch": 65.25, "learning_rate": 0.0001993333333333333, "loss": 0.0658, "step": 4372 }, { "epoch": 65.27, "learning_rate": 0.00019929824561403507, "loss": 0.2018, "step": 4373 }, { "epoch": 65.28, "learning_rate": 0.00019926315789473682, "loss": 0.0042, "step": 4374 }, { "epoch": 65.3, "learning_rate": 0.00019922807017543856, "loss": 0.0155, "step": 4375 }, { "epoch": 65.31, "learning_rate": 0.00019919298245614034, "loss": 0.1579, "step": 4376 }, { "epoch": 65.33, "learning_rate": 0.0001991578947368421, "loss": 0.4092, "step": 4377 }, { "epoch": 65.34, "learning_rate": 0.00019912280701754384, "loss": 0.0113, "step": 4378 }, { "epoch": 65.36, "learning_rate": 0.00019908771929824559, "loss": 0.3527, "step": 4379 }, { "epoch": 65.37, "learning_rate": 0.00019905263157894736, "loss": 0.0811, "step": 4380 }, { "epoch": 65.39, "learning_rate": 0.0001990175438596491, "loss": 0.0931, "step": 4381 }, { "epoch": 65.4, "learning_rate": 0.00019898245614035086, "loss": 0.1252, "step": 4382 }, { "epoch": 65.42, "learning_rate": 0.0001989473684210526, "loss": 0.263, "step": 4383 }, { "epoch": 65.43, "learning_rate": 0.00019891228070175438, "loss": 0.0018, "step": 4384 }, { "epoch": 65.45, "learning_rate": 0.00019887719298245613, "loss": 0.0345, "step": 4385 }, { "epoch": 65.46, "learning_rate": 0.00019884210526315788, "loss": 0.2768, "step": 4386 }, { "epoch": 65.48, "learning_rate": 0.00019880701754385966, "loss": 0.0059, "step": 4387 }, { "epoch": 65.49, "learning_rate": 0.0001987719298245614, "loss": 0.2509, "step": 4388 }, { "epoch": 65.51, "learning_rate": 0.00019873684210526313, "loss": 0.02, "step": 4389 }, { "epoch": 65.52, "learning_rate": 0.00019870175438596488, "loss": 0.0966, "step": 4390 }, { "epoch": 65.54, "learning_rate": 0.00019866666666666665, "loss": 0.0251, "step": 4391 }, { "epoch": 65.55, "learning_rate": 0.0001986315789473684, "loss": 0.0032, "step": 4392 }, { "epoch": 65.57, "learning_rate": 0.00019859649122807015, "loss": 0.2893, "step": 4393 }, { "epoch": 65.58, "learning_rate": 0.0001985614035087719, "loss": 0.1985, "step": 4394 }, { "epoch": 65.59, "learning_rate": 0.00019852631578947368, "loss": 0.0492, "step": 4395 }, { "epoch": 65.61, "learning_rate": 0.00019849122807017542, "loss": 0.0081, "step": 4396 }, { "epoch": 65.62, "learning_rate": 0.00019845614035087717, "loss": 0.3234, "step": 4397 }, { "epoch": 65.64, "learning_rate": 0.00019842105263157892, "loss": 0.0271, "step": 4398 }, { "epoch": 65.65, "learning_rate": 0.0001983859649122807, "loss": 0.1399, "step": 4399 }, { "epoch": 65.67, "learning_rate": 0.00019835087719298245, "loss": 0.0043, "step": 4400 }, { "epoch": 65.67, "eval_accuracy": 0.8387175721977485, "eval_f1": 0.8415862353144246, "eval_loss": 0.7780405879020691, "eval_runtime": 345.1415, "eval_samples_per_second": 11.839, "eval_steps_per_second": 0.742, "step": 4400 }, { "epoch": 65.68, "learning_rate": 0.0001983157894736842, "loss": 0.1963, "step": 4401 }, { "epoch": 65.7, "learning_rate": 0.00019828070175438597, "loss": 0.3146, "step": 4402 }, { "epoch": 65.71, "learning_rate": 0.00019824561403508772, "loss": 0.0092, "step": 4403 }, { "epoch": 65.73, "learning_rate": 0.00019821052631578944, "loss": 0.0416, "step": 4404 }, { "epoch": 65.74, "learning_rate": 0.0001981754385964912, "loss": 0.2324, "step": 4405 }, { "epoch": 65.76, "learning_rate": 0.00019814035087719297, "loss": 0.1401, "step": 4406 }, { "epoch": 65.77, "learning_rate": 0.00019810526315789471, "loss": 0.0429, "step": 4407 }, { "epoch": 65.79, "learning_rate": 0.00019807017543859646, "loss": 0.1155, "step": 4408 }, { "epoch": 65.8, "learning_rate": 0.0001980350877192982, "loss": 0.1608, "step": 4409 }, { "epoch": 65.82, "learning_rate": 0.000198, "loss": 0.3258, "step": 4410 }, { "epoch": 65.83, "learning_rate": 0.00019796491228070174, "loss": 0.0427, "step": 4411 }, { "epoch": 65.85, "learning_rate": 0.00019792982456140349, "loss": 0.2753, "step": 4412 }, { "epoch": 65.86, "learning_rate": 0.00019789473684210526, "loss": 0.0221, "step": 4413 }, { "epoch": 65.88, "learning_rate": 0.000197859649122807, "loss": 0.0376, "step": 4414 }, { "epoch": 65.89, "learning_rate": 0.00019782456140350876, "loss": 0.0378, "step": 4415 }, { "epoch": 65.91, "learning_rate": 0.0001977894736842105, "loss": 0.0455, "step": 4416 }, { "epoch": 65.92, "learning_rate": 0.00019775438596491228, "loss": 0.0532, "step": 4417 }, { "epoch": 65.94, "learning_rate": 0.00019771929824561403, "loss": 0.1445, "step": 4418 }, { "epoch": 65.95, "learning_rate": 0.00019768421052631575, "loss": 0.035, "step": 4419 }, { "epoch": 65.97, "learning_rate": 0.0001976491228070175, "loss": 0.4898, "step": 4420 }, { "epoch": 65.98, "learning_rate": 0.00019761403508771928, "loss": 0.0462, "step": 4421 }, { "epoch": 66.0, "learning_rate": 0.00019757894736842103, "loss": 0.018, "step": 4422 }, { "epoch": 66.01, "learning_rate": 0.00019754385964912278, "loss": 0.021, "step": 4423 }, { "epoch": 66.03, "learning_rate": 0.00019750877192982453, "loss": 0.1062, "step": 4424 }, { "epoch": 66.04, "learning_rate": 0.0001974736842105263, "loss": 0.2795, "step": 4425 }, { "epoch": 66.06, "learning_rate": 0.00019743859649122805, "loss": 0.0288, "step": 4426 }, { "epoch": 66.07, "learning_rate": 0.0001974035087719298, "loss": 0.0331, "step": 4427 }, { "epoch": 66.09, "learning_rate": 0.00019736842105263157, "loss": 0.0093, "step": 4428 }, { "epoch": 66.1, "learning_rate": 0.00019733333333333332, "loss": 0.0161, "step": 4429 }, { "epoch": 66.12, "learning_rate": 0.00019729824561403507, "loss": 0.0112, "step": 4430 }, { "epoch": 66.13, "learning_rate": 0.00019726315789473682, "loss": 0.0462, "step": 4431 }, { "epoch": 66.15, "learning_rate": 0.0001972280701754386, "loss": 0.0103, "step": 4432 }, { "epoch": 66.16, "learning_rate": 0.00019719298245614035, "loss": 0.0626, "step": 4433 }, { "epoch": 66.18, "learning_rate": 0.0001971578947368421, "loss": 0.0874, "step": 4434 }, { "epoch": 66.19, "learning_rate": 0.00019712280701754382, "loss": 0.0121, "step": 4435 }, { "epoch": 66.21, "learning_rate": 0.00019708771929824562, "loss": 0.2007, "step": 4436 }, { "epoch": 66.22, "learning_rate": 0.00019705263157894734, "loss": 0.0053, "step": 4437 }, { "epoch": 66.24, "learning_rate": 0.0001970175438596491, "loss": 0.0047, "step": 4438 }, { "epoch": 66.25, "learning_rate": 0.00019698245614035087, "loss": 0.0076, "step": 4439 }, { "epoch": 66.27, "learning_rate": 0.00019694736842105261, "loss": 0.1006, "step": 4440 }, { "epoch": 66.28, "learning_rate": 0.00019691228070175436, "loss": 0.0038, "step": 4441 }, { "epoch": 66.3, "learning_rate": 0.0001968771929824561, "loss": 0.0139, "step": 4442 }, { "epoch": 66.31, "learning_rate": 0.0001968421052631579, "loss": 0.0153, "step": 4443 }, { "epoch": 66.33, "learning_rate": 0.00019680701754385964, "loss": 0.087, "step": 4444 }, { "epoch": 66.34, "learning_rate": 0.00019677192982456139, "loss": 0.1041, "step": 4445 }, { "epoch": 66.36, "learning_rate": 0.00019673684210526313, "loss": 0.0709, "step": 4446 }, { "epoch": 66.37, "learning_rate": 0.0001967017543859649, "loss": 0.0218, "step": 4447 }, { "epoch": 66.39, "learning_rate": 0.00019666666666666666, "loss": 0.3569, "step": 4448 }, { "epoch": 66.4, "learning_rate": 0.0001966315789473684, "loss": 0.0036, "step": 4449 }, { "epoch": 66.42, "learning_rate": 0.00019659649122807018, "loss": 0.0239, "step": 4450 }, { "epoch": 66.43, "learning_rate": 0.00019656140350877193, "loss": 0.1154, "step": 4451 }, { "epoch": 66.45, "learning_rate": 0.00019652631578947365, "loss": 0.0184, "step": 4452 }, { "epoch": 66.46, "learning_rate": 0.0001964912280701754, "loss": 0.1561, "step": 4453 }, { "epoch": 66.48, "learning_rate": 0.00019645614035087718, "loss": 0.0059, "step": 4454 }, { "epoch": 66.49, "learning_rate": 0.00019642105263157893, "loss": 0.0033, "step": 4455 }, { "epoch": 66.51, "learning_rate": 0.00019638596491228068, "loss": 0.0037, "step": 4456 }, { "epoch": 66.52, "learning_rate": 0.00019635087719298242, "loss": 0.4539, "step": 4457 }, { "epoch": 66.54, "learning_rate": 0.0001963157894736842, "loss": 0.0782, "step": 4458 }, { "epoch": 66.55, "learning_rate": 0.00019628070175438595, "loss": 0.0233, "step": 4459 }, { "epoch": 66.57, "learning_rate": 0.0001962456140350877, "loss": 0.0077, "step": 4460 }, { "epoch": 66.58, "learning_rate": 0.00019621052631578945, "loss": 0.0778, "step": 4461 }, { "epoch": 66.59, "learning_rate": 0.00019617543859649122, "loss": 0.0108, "step": 4462 }, { "epoch": 66.61, "learning_rate": 0.00019614035087719297, "loss": 0.0633, "step": 4463 }, { "epoch": 66.62, "learning_rate": 0.00019610526315789472, "loss": 0.0154, "step": 4464 }, { "epoch": 66.64, "learning_rate": 0.0001960701754385965, "loss": 0.2093, "step": 4465 }, { "epoch": 66.65, "learning_rate": 0.00019603508771929824, "loss": 0.0022, "step": 4466 }, { "epoch": 66.67, "learning_rate": 0.00019599999999999997, "loss": 0.0282, "step": 4467 }, { "epoch": 66.68, "learning_rate": 0.00019596491228070172, "loss": 0.1057, "step": 4468 }, { "epoch": 66.7, "learning_rate": 0.0001959298245614035, "loss": 0.2186, "step": 4469 }, { "epoch": 66.71, "learning_rate": 0.00019589473684210524, "loss": 0.0053, "step": 4470 }, { "epoch": 66.73, "learning_rate": 0.000195859649122807, "loss": 0.002, "step": 4471 }, { "epoch": 66.74, "learning_rate": 0.00019582456140350874, "loss": 0.0124, "step": 4472 }, { "epoch": 66.76, "learning_rate": 0.0001957894736842105, "loss": 0.0051, "step": 4473 }, { "epoch": 66.77, "learning_rate": 0.00019575438596491226, "loss": 0.0029, "step": 4474 }, { "epoch": 66.79, "learning_rate": 0.000195719298245614, "loss": 0.1849, "step": 4475 }, { "epoch": 66.8, "learning_rate": 0.0001956842105263158, "loss": 0.0463, "step": 4476 }, { "epoch": 66.82, "learning_rate": 0.00019564912280701754, "loss": 0.0748, "step": 4477 }, { "epoch": 66.83, "learning_rate": 0.00019561403508771928, "loss": 0.1394, "step": 4478 }, { "epoch": 66.85, "learning_rate": 0.00019557894736842103, "loss": 0.0029, "step": 4479 }, { "epoch": 66.86, "learning_rate": 0.0001955438596491228, "loss": 0.0054, "step": 4480 }, { "epoch": 66.88, "learning_rate": 0.00019550877192982456, "loss": 0.1783, "step": 4481 }, { "epoch": 66.89, "learning_rate": 0.0001954736842105263, "loss": 0.1708, "step": 4482 }, { "epoch": 66.91, "learning_rate": 0.00019543859649122803, "loss": 0.0019, "step": 4483 }, { "epoch": 66.92, "learning_rate": 0.0001954035087719298, "loss": 0.0016, "step": 4484 }, { "epoch": 66.94, "learning_rate": 0.00019536842105263155, "loss": 0.0361, "step": 4485 }, { "epoch": 66.95, "learning_rate": 0.0001953333333333333, "loss": 0.0388, "step": 4486 }, { "epoch": 66.97, "learning_rate": 0.00019529824561403508, "loss": 0.1492, "step": 4487 }, { "epoch": 66.98, "learning_rate": 0.00019526315789473683, "loss": 0.246, "step": 4488 }, { "epoch": 67.0, "learning_rate": 0.00019522807017543858, "loss": 0.0024, "step": 4489 }, { "epoch": 67.01, "learning_rate": 0.00019519298245614032, "loss": 0.0028, "step": 4490 }, { "epoch": 67.03, "learning_rate": 0.0001951578947368421, "loss": 0.009, "step": 4491 }, { "epoch": 67.04, "learning_rate": 0.00019512280701754385, "loss": 0.0023, "step": 4492 }, { "epoch": 67.06, "learning_rate": 0.0001950877192982456, "loss": 0.0056, "step": 4493 }, { "epoch": 67.07, "learning_rate": 0.00019505263157894735, "loss": 0.0018, "step": 4494 }, { "epoch": 67.09, "learning_rate": 0.00019501754385964912, "loss": 0.0128, "step": 4495 }, { "epoch": 67.1, "learning_rate": 0.00019498245614035087, "loss": 0.0055, "step": 4496 }, { "epoch": 67.12, "learning_rate": 0.00019494736842105262, "loss": 0.0201, "step": 4497 }, { "epoch": 67.13, "learning_rate": 0.00019491228070175434, "loss": 0.0064, "step": 4498 }, { "epoch": 67.15, "learning_rate": 0.00019487719298245614, "loss": 0.0973, "step": 4499 }, { "epoch": 67.16, "learning_rate": 0.00019484210526315787, "loss": 0.0027, "step": 4500 }, { "epoch": 67.18, "learning_rate": 0.00019480701754385961, "loss": 0.1812, "step": 4501 }, { "epoch": 67.19, "learning_rate": 0.0001947719298245614, "loss": 0.0089, "step": 4502 }, { "epoch": 67.21, "learning_rate": 0.00019473684210526314, "loss": 0.0528, "step": 4503 }, { "epoch": 67.22, "learning_rate": 0.0001947017543859649, "loss": 0.002, "step": 4504 }, { "epoch": 67.24, "learning_rate": 0.00019466666666666664, "loss": 0.0464, "step": 4505 }, { "epoch": 67.25, "learning_rate": 0.0001946315789473684, "loss": 0.0053, "step": 4506 }, { "epoch": 67.27, "learning_rate": 0.00019459649122807016, "loss": 0.0055, "step": 4507 }, { "epoch": 67.28, "learning_rate": 0.0001945614035087719, "loss": 0.0269, "step": 4508 }, { "epoch": 67.3, "learning_rate": 0.00019452631578947366, "loss": 0.0141, "step": 4509 }, { "epoch": 67.31, "learning_rate": 0.00019449122807017544, "loss": 0.0025, "step": 4510 }, { "epoch": 67.33, "learning_rate": 0.00019445614035087718, "loss": 0.192, "step": 4511 }, { "epoch": 67.34, "learning_rate": 0.00019442105263157893, "loss": 0.1234, "step": 4512 }, { "epoch": 67.36, "learning_rate": 0.0001943859649122807, "loss": 0.0036, "step": 4513 }, { "epoch": 67.37, "learning_rate": 0.00019435087719298246, "loss": 0.1498, "step": 4514 }, { "epoch": 67.39, "learning_rate": 0.00019431578947368418, "loss": 0.004, "step": 4515 }, { "epoch": 67.4, "learning_rate": 0.00019428070175438593, "loss": 0.0039, "step": 4516 }, { "epoch": 67.42, "learning_rate": 0.0001942456140350877, "loss": 0.1355, "step": 4517 }, { "epoch": 67.43, "learning_rate": 0.00019421052631578945, "loss": 0.0039, "step": 4518 }, { "epoch": 67.45, "learning_rate": 0.0001941754385964912, "loss": 0.0032, "step": 4519 }, { "epoch": 67.46, "learning_rate": 0.00019414035087719295, "loss": 0.0042, "step": 4520 }, { "epoch": 67.48, "learning_rate": 0.00019410526315789473, "loss": 0.0115, "step": 4521 }, { "epoch": 67.49, "learning_rate": 0.00019407017543859647, "loss": 0.2845, "step": 4522 }, { "epoch": 67.51, "learning_rate": 0.00019403508771929822, "loss": 0.0287, "step": 4523 }, { "epoch": 67.52, "learning_rate": 0.00019399999999999997, "loss": 0.0677, "step": 4524 }, { "epoch": 67.54, "learning_rate": 0.00019396491228070175, "loss": 0.1119, "step": 4525 }, { "epoch": 67.55, "learning_rate": 0.0001939298245614035, "loss": 0.0017, "step": 4526 }, { "epoch": 67.57, "learning_rate": 0.00019389473684210525, "loss": 0.5221, "step": 4527 }, { "epoch": 67.58, "learning_rate": 0.00019385964912280702, "loss": 0.2991, "step": 4528 }, { "epoch": 67.59, "learning_rate": 0.00019382456140350877, "loss": 0.0145, "step": 4529 }, { "epoch": 67.61, "learning_rate": 0.00019378947368421052, "loss": 0.0054, "step": 4530 }, { "epoch": 67.62, "learning_rate": 0.00019375438596491224, "loss": 0.0019, "step": 4531 }, { "epoch": 67.64, "learning_rate": 0.00019371929824561402, "loss": 0.002, "step": 4532 }, { "epoch": 67.65, "learning_rate": 0.00019368421052631577, "loss": 0.1062, "step": 4533 }, { "epoch": 67.67, "learning_rate": 0.00019364912280701751, "loss": 0.0275, "step": 4534 }, { "epoch": 67.68, "learning_rate": 0.00019361403508771926, "loss": 0.0017, "step": 4535 }, { "epoch": 67.7, "learning_rate": 0.00019357894736842104, "loss": 0.2389, "step": 4536 }, { "epoch": 67.71, "learning_rate": 0.0001935438596491228, "loss": 0.0711, "step": 4537 }, { "epoch": 67.73, "learning_rate": 0.00019350877192982454, "loss": 0.0034, "step": 4538 }, { "epoch": 67.74, "learning_rate": 0.0001934736842105263, "loss": 0.0016, "step": 4539 }, { "epoch": 67.76, "learning_rate": 0.00019343859649122806, "loss": 0.0042, "step": 4540 }, { "epoch": 67.77, "learning_rate": 0.0001934035087719298, "loss": 0.0104, "step": 4541 }, { "epoch": 67.79, "learning_rate": 0.00019336842105263156, "loss": 0.0176, "step": 4542 }, { "epoch": 67.8, "learning_rate": 0.00019333333333333333, "loss": 0.0078, "step": 4543 }, { "epoch": 67.82, "learning_rate": 0.00019329824561403508, "loss": 0.0097, "step": 4544 }, { "epoch": 67.83, "learning_rate": 0.00019326315789473683, "loss": 0.0031, "step": 4545 }, { "epoch": 67.85, "learning_rate": 0.00019322807017543855, "loss": 0.0199, "step": 4546 }, { "epoch": 67.86, "learning_rate": 0.00019319298245614036, "loss": 0.0096, "step": 4547 }, { "epoch": 67.88, "learning_rate": 0.00019315789473684208, "loss": 0.0373, "step": 4548 }, { "epoch": 67.89, "learning_rate": 0.00019312280701754383, "loss": 0.0832, "step": 4549 }, { "epoch": 67.91, "learning_rate": 0.0001930877192982456, "loss": 0.0047, "step": 4550 }, { "epoch": 67.92, "learning_rate": 0.00019305263157894735, "loss": 0.003, "step": 4551 }, { "epoch": 67.94, "learning_rate": 0.0001930175438596491, "loss": 0.0063, "step": 4552 }, { "epoch": 67.95, "learning_rate": 0.00019298245614035085, "loss": 0.0092, "step": 4553 }, { "epoch": 67.97, "learning_rate": 0.00019294736842105263, "loss": 0.0266, "step": 4554 }, { "epoch": 67.98, "learning_rate": 0.00019291228070175437, "loss": 0.0016, "step": 4555 }, { "epoch": 68.0, "learning_rate": 0.00019287719298245612, "loss": 0.0127, "step": 4556 }, { "epoch": 68.01, "learning_rate": 0.00019284210526315787, "loss": 0.0633, "step": 4557 }, { "epoch": 68.03, "learning_rate": 0.00019280701754385965, "loss": 0.0054, "step": 4558 }, { "epoch": 68.04, "learning_rate": 0.0001927719298245614, "loss": 0.003, "step": 4559 }, { "epoch": 68.06, "learning_rate": 0.00019273684210526315, "loss": 0.1727, "step": 4560 }, { "epoch": 68.07, "learning_rate": 0.00019270175438596487, "loss": 0.0038, "step": 4561 }, { "epoch": 68.09, "learning_rate": 0.00019266666666666667, "loss": 0.0344, "step": 4562 }, { "epoch": 68.1, "learning_rate": 0.0001926315789473684, "loss": 0.0019, "step": 4563 }, { "epoch": 68.12, "learning_rate": 0.00019259649122807014, "loss": 0.0056, "step": 4564 }, { "epoch": 68.13, "learning_rate": 0.00019256140350877192, "loss": 0.0031, "step": 4565 }, { "epoch": 68.15, "learning_rate": 0.00019252631578947366, "loss": 0.002, "step": 4566 }, { "epoch": 68.16, "learning_rate": 0.00019249122807017541, "loss": 0.01, "step": 4567 }, { "epoch": 68.18, "learning_rate": 0.00019245614035087716, "loss": 0.0028, "step": 4568 }, { "epoch": 68.19, "learning_rate": 0.00019242105263157894, "loss": 0.1223, "step": 4569 }, { "epoch": 68.21, "learning_rate": 0.0001923859649122807, "loss": 0.0052, "step": 4570 }, { "epoch": 68.22, "learning_rate": 0.00019235087719298244, "loss": 0.3806, "step": 4571 }, { "epoch": 68.24, "learning_rate": 0.00019231578947368418, "loss": 0.0019, "step": 4572 }, { "epoch": 68.25, "learning_rate": 0.00019228070175438596, "loss": 0.0223, "step": 4573 }, { "epoch": 68.27, "learning_rate": 0.0001922456140350877, "loss": 0.0017, "step": 4574 }, { "epoch": 68.28, "learning_rate": 0.00019221052631578946, "loss": 0.1442, "step": 4575 }, { "epoch": 68.3, "learning_rate": 0.00019217543859649123, "loss": 0.0333, "step": 4576 }, { "epoch": 68.31, "learning_rate": 0.00019214035087719298, "loss": 0.1573, "step": 4577 }, { "epoch": 68.33, "learning_rate": 0.0001921052631578947, "loss": 0.1816, "step": 4578 }, { "epoch": 68.34, "learning_rate": 0.00019207017543859645, "loss": 0.0019, "step": 4579 }, { "epoch": 68.36, "learning_rate": 0.00019203508771929823, "loss": 0.0022, "step": 4580 }, { "epoch": 68.37, "learning_rate": 0.00019199999999999998, "loss": 0.0021, "step": 4581 }, { "epoch": 68.39, "learning_rate": 0.00019196491228070173, "loss": 0.0018, "step": 4582 }, { "epoch": 68.4, "learning_rate": 0.00019192982456140348, "loss": 0.0197, "step": 4583 }, { "epoch": 68.42, "learning_rate": 0.00019189473684210525, "loss": 0.1204, "step": 4584 }, { "epoch": 68.43, "learning_rate": 0.000191859649122807, "loss": 0.0027, "step": 4585 }, { "epoch": 68.45, "learning_rate": 0.00019182456140350875, "loss": 0.194, "step": 4586 }, { "epoch": 68.46, "learning_rate": 0.0001917894736842105, "loss": 0.0022, "step": 4587 }, { "epoch": 68.48, "learning_rate": 0.00019175438596491227, "loss": 0.003, "step": 4588 }, { "epoch": 68.49, "learning_rate": 0.00019171929824561402, "loss": 0.1746, "step": 4589 }, { "epoch": 68.51, "learning_rate": 0.00019168421052631577, "loss": 0.2832, "step": 4590 }, { "epoch": 68.52, "learning_rate": 0.00019164912280701755, "loss": 0.0399, "step": 4591 }, { "epoch": 68.54, "learning_rate": 0.0001916140350877193, "loss": 0.0209, "step": 4592 }, { "epoch": 68.55, "learning_rate": 0.00019157894736842104, "loss": 0.0055, "step": 4593 }, { "epoch": 68.57, "learning_rate": 0.00019154385964912277, "loss": 0.0084, "step": 4594 }, { "epoch": 68.58, "learning_rate": 0.00019150877192982457, "loss": 0.0051, "step": 4595 }, { "epoch": 68.59, "learning_rate": 0.0001914736842105263, "loss": 0.1722, "step": 4596 }, { "epoch": 68.61, "learning_rate": 0.00019143859649122804, "loss": 0.0606, "step": 4597 }, { "epoch": 68.62, "learning_rate": 0.0001914035087719298, "loss": 0.0611, "step": 4598 }, { "epoch": 68.64, "learning_rate": 0.00019136842105263156, "loss": 0.0172, "step": 4599 }, { "epoch": 68.65, "learning_rate": 0.0001913333333333333, "loss": 0.0032, "step": 4600 }, { "epoch": 68.65, "eval_accuracy": 0.846059716103769, "eval_f1": 0.8482206471494375, "eval_loss": 0.689896821975708, "eval_runtime": 344.0746, "eval_samples_per_second": 11.875, "eval_steps_per_second": 0.744, "step": 4600 }, { "epoch": 68.67, "learning_rate": 0.00019129824561403506, "loss": 0.0392, "step": 4601 }, { "epoch": 68.68, "learning_rate": 0.00019126315789473684, "loss": 0.0019, "step": 4602 }, { "epoch": 68.7, "learning_rate": 0.00019122807017543859, "loss": 0.0025, "step": 4603 }, { "epoch": 68.71, "learning_rate": 0.00019119298245614034, "loss": 0.0398, "step": 4604 }, { "epoch": 68.73, "learning_rate": 0.00019115789473684208, "loss": 0.1833, "step": 4605 }, { "epoch": 68.74, "learning_rate": 0.00019112280701754386, "loss": 0.0022, "step": 4606 }, { "epoch": 68.76, "learning_rate": 0.0001910877192982456, "loss": 0.0574, "step": 4607 }, { "epoch": 68.77, "learning_rate": 0.00019105263157894736, "loss": 0.0379, "step": 4608 }, { "epoch": 68.79, "learning_rate": 0.00019101754385964908, "loss": 0.0053, "step": 4609 }, { "epoch": 68.8, "learning_rate": 0.00019098245614035088, "loss": 0.0207, "step": 4610 }, { "epoch": 68.82, "learning_rate": 0.0001909473684210526, "loss": 0.0028, "step": 4611 }, { "epoch": 68.83, "learning_rate": 0.00019091228070175435, "loss": 0.0186, "step": 4612 }, { "epoch": 68.85, "learning_rate": 0.00019087719298245613, "loss": 0.1757, "step": 4613 }, { "epoch": 68.86, "learning_rate": 0.00019084210526315788, "loss": 0.0196, "step": 4614 }, { "epoch": 68.88, "learning_rate": 0.00019080701754385963, "loss": 0.1241, "step": 4615 }, { "epoch": 68.89, "learning_rate": 0.00019077192982456137, "loss": 0.0025, "step": 4616 }, { "epoch": 68.91, "learning_rate": 0.00019073684210526315, "loss": 0.2688, "step": 4617 }, { "epoch": 68.92, "learning_rate": 0.0001907017543859649, "loss": 0.0955, "step": 4618 }, { "epoch": 68.94, "learning_rate": 0.00019066666666666665, "loss": 0.1777, "step": 4619 }, { "epoch": 68.95, "learning_rate": 0.0001906315789473684, "loss": 0.0022, "step": 4620 }, { "epoch": 68.97, "learning_rate": 0.00019059649122807017, "loss": 0.0059, "step": 4621 }, { "epoch": 68.98, "learning_rate": 0.00019056140350877192, "loss": 0.0378, "step": 4622 }, { "epoch": 69.0, "learning_rate": 0.00019052631578947367, "loss": 0.1003, "step": 4623 }, { "epoch": 69.01, "learning_rate": 0.00019049122807017542, "loss": 0.2193, "step": 4624 }, { "epoch": 69.03, "learning_rate": 0.0001904561403508772, "loss": 0.211, "step": 4625 }, { "epoch": 69.04, "learning_rate": 0.00019042105263157892, "loss": 0.1146, "step": 4626 }, { "epoch": 69.06, "learning_rate": 0.00019038596491228067, "loss": 0.0106, "step": 4627 }, { "epoch": 69.07, "learning_rate": 0.00019035087719298244, "loss": 0.2057, "step": 4628 }, { "epoch": 69.09, "learning_rate": 0.0001903157894736842, "loss": 0.011, "step": 4629 }, { "epoch": 69.1, "learning_rate": 0.00019028070175438594, "loss": 0.1871, "step": 4630 }, { "epoch": 69.12, "learning_rate": 0.0001902456140350877, "loss": 0.2694, "step": 4631 }, { "epoch": 69.13, "learning_rate": 0.00019021052631578946, "loss": 0.0786, "step": 4632 }, { "epoch": 69.15, "learning_rate": 0.0001901754385964912, "loss": 0.0609, "step": 4633 }, { "epoch": 69.16, "learning_rate": 0.00019014035087719296, "loss": 0.1266, "step": 4634 }, { "epoch": 69.18, "learning_rate": 0.0001901052631578947, "loss": 0.0047, "step": 4635 }, { "epoch": 69.19, "learning_rate": 0.00019007017543859649, "loss": 0.0186, "step": 4636 }, { "epoch": 69.21, "learning_rate": 0.00019003508771929823, "loss": 0.1044, "step": 4637 }, { "epoch": 69.22, "learning_rate": 0.00018999999999999998, "loss": 0.0125, "step": 4638 }, { "epoch": 69.24, "learning_rate": 0.00018996491228070176, "loss": 0.0122, "step": 4639 }, { "epoch": 69.25, "learning_rate": 0.0001899298245614035, "loss": 0.0034, "step": 4640 }, { "epoch": 69.27, "learning_rate": 0.00018989473684210526, "loss": 0.2012, "step": 4641 }, { "epoch": 69.28, "learning_rate": 0.00018985964912280698, "loss": 0.1219, "step": 4642 }, { "epoch": 69.3, "learning_rate": 0.00018982456140350878, "loss": 0.128, "step": 4643 }, { "epoch": 69.31, "learning_rate": 0.0001897894736842105, "loss": 0.2125, "step": 4644 }, { "epoch": 69.33, "learning_rate": 0.00018975438596491225, "loss": 0.0541, "step": 4645 }, { "epoch": 69.34, "learning_rate": 0.000189719298245614, "loss": 0.0034, "step": 4646 }, { "epoch": 69.36, "learning_rate": 0.00018968421052631578, "loss": 0.1952, "step": 4647 }, { "epoch": 69.37, "learning_rate": 0.00018964912280701753, "loss": 0.1402, "step": 4648 }, { "epoch": 69.39, "learning_rate": 0.00018961403508771927, "loss": 0.0026, "step": 4649 }, { "epoch": 69.4, "learning_rate": 0.00018957894736842105, "loss": 0.0261, "step": 4650 }, { "epoch": 69.42, "learning_rate": 0.0001895438596491228, "loss": 0.0645, "step": 4651 }, { "epoch": 69.43, "learning_rate": 0.00018950877192982455, "loss": 0.0089, "step": 4652 }, { "epoch": 69.45, "learning_rate": 0.0001894736842105263, "loss": 0.0456, "step": 4653 }, { "epoch": 69.46, "learning_rate": 0.00018943859649122807, "loss": 0.0451, "step": 4654 }, { "epoch": 69.48, "learning_rate": 0.00018940350877192982, "loss": 0.1816, "step": 4655 }, { "epoch": 69.49, "learning_rate": 0.00018936842105263157, "loss": 0.0079, "step": 4656 }, { "epoch": 69.51, "learning_rate": 0.0001893333333333333, "loss": 0.0054, "step": 4657 }, { "epoch": 69.52, "learning_rate": 0.0001892982456140351, "loss": 0.0098, "step": 4658 }, { "epoch": 69.54, "learning_rate": 0.00018926315789473682, "loss": 0.1181, "step": 4659 }, { "epoch": 69.55, "learning_rate": 0.00018922807017543856, "loss": 0.0056, "step": 4660 }, { "epoch": 69.57, "learning_rate": 0.00018919298245614031, "loss": 0.0632, "step": 4661 }, { "epoch": 69.58, "learning_rate": 0.0001891578947368421, "loss": 0.0759, "step": 4662 }, { "epoch": 69.59, "learning_rate": 0.00018912280701754384, "loss": 0.0149, "step": 4663 }, { "epoch": 69.61, "learning_rate": 0.0001890877192982456, "loss": 0.0175, "step": 4664 }, { "epoch": 69.62, "learning_rate": 0.00018905263157894736, "loss": 0.0583, "step": 4665 }, { "epoch": 69.64, "learning_rate": 0.0001890175438596491, "loss": 0.0057, "step": 4666 }, { "epoch": 69.65, "learning_rate": 0.00018898245614035086, "loss": 0.023, "step": 4667 }, { "epoch": 69.67, "learning_rate": 0.0001889473684210526, "loss": 0.0054, "step": 4668 }, { "epoch": 69.68, "learning_rate": 0.00018891228070175439, "loss": 0.0018, "step": 4669 }, { "epoch": 69.7, "learning_rate": 0.00018887719298245613, "loss": 0.0502, "step": 4670 }, { "epoch": 69.71, "learning_rate": 0.00018884210526315788, "loss": 0.0018, "step": 4671 }, { "epoch": 69.73, "learning_rate": 0.0001888070175438596, "loss": 0.005, "step": 4672 }, { "epoch": 69.74, "learning_rate": 0.0001887719298245614, "loss": 0.3566, "step": 4673 }, { "epoch": 69.76, "learning_rate": 0.00018873684210526313, "loss": 0.0016, "step": 4674 }, { "epoch": 69.77, "learning_rate": 0.00018870175438596488, "loss": 0.1829, "step": 4675 }, { "epoch": 69.79, "learning_rate": 0.00018866666666666665, "loss": 0.0085, "step": 4676 }, { "epoch": 69.8, "learning_rate": 0.0001886315789473684, "loss": 0.0765, "step": 4677 }, { "epoch": 69.82, "learning_rate": 0.00018859649122807015, "loss": 0.0022, "step": 4678 }, { "epoch": 69.83, "learning_rate": 0.0001885614035087719, "loss": 0.0162, "step": 4679 }, { "epoch": 69.85, "learning_rate": 0.00018852631578947368, "loss": 0.2237, "step": 4680 }, { "epoch": 69.86, "learning_rate": 0.00018849122807017542, "loss": 0.2743, "step": 4681 }, { "epoch": 69.88, "learning_rate": 0.00018845614035087717, "loss": 0.0056, "step": 4682 }, { "epoch": 69.89, "learning_rate": 0.00018842105263157892, "loss": 0.0636, "step": 4683 }, { "epoch": 69.91, "learning_rate": 0.0001883859649122807, "loss": 0.0069, "step": 4684 }, { "epoch": 69.92, "learning_rate": 0.00018835087719298245, "loss": 0.1107, "step": 4685 }, { "epoch": 69.94, "learning_rate": 0.0001883157894736842, "loss": 0.0033, "step": 4686 }, { "epoch": 69.95, "learning_rate": 0.00018828070175438594, "loss": 0.0026, "step": 4687 }, { "epoch": 69.97, "learning_rate": 0.00018824561403508772, "loss": 0.188, "step": 4688 }, { "epoch": 69.98, "learning_rate": 0.00018821052631578947, "loss": 0.1188, "step": 4689 }, { "epoch": 70.0, "learning_rate": 0.0001881754385964912, "loss": 0.0255, "step": 4690 }, { "epoch": 70.01, "learning_rate": 0.00018814035087719297, "loss": 0.0019, "step": 4691 }, { "epoch": 70.03, "learning_rate": 0.00018810526315789472, "loss": 0.2015, "step": 4692 }, { "epoch": 70.04, "learning_rate": 0.00018807017543859646, "loss": 0.0018, "step": 4693 }, { "epoch": 70.06, "learning_rate": 0.0001880350877192982, "loss": 0.0096, "step": 4694 }, { "epoch": 70.07, "learning_rate": 0.000188, "loss": 0.0097, "step": 4695 }, { "epoch": 70.09, "learning_rate": 0.00018796491228070174, "loss": 0.1639, "step": 4696 }, { "epoch": 70.1, "learning_rate": 0.0001879298245614035, "loss": 0.0949, "step": 4697 }, { "epoch": 70.12, "learning_rate": 0.00018789473684210524, "loss": 0.0125, "step": 4698 }, { "epoch": 70.13, "learning_rate": 0.000187859649122807, "loss": 0.0019, "step": 4699 }, { "epoch": 70.15, "learning_rate": 0.00018782456140350876, "loss": 0.0016, "step": 4700 }, { "epoch": 70.16, "learning_rate": 0.0001877894736842105, "loss": 0.0095, "step": 4701 }, { "epoch": 70.18, "learning_rate": 0.00018775438596491228, "loss": 0.0103, "step": 4702 }, { "epoch": 70.19, "learning_rate": 0.00018771929824561403, "loss": 0.0036, "step": 4703 }, { "epoch": 70.21, "learning_rate": 0.00018768421052631578, "loss": 0.0878, "step": 4704 }, { "epoch": 70.22, "learning_rate": 0.0001876491228070175, "loss": 0.1278, "step": 4705 }, { "epoch": 70.24, "learning_rate": 0.0001876140350877193, "loss": 0.0028, "step": 4706 }, { "epoch": 70.25, "learning_rate": 0.00018757894736842103, "loss": 0.0033, "step": 4707 }, { "epoch": 70.27, "learning_rate": 0.00018754385964912278, "loss": 0.0084, "step": 4708 }, { "epoch": 70.28, "learning_rate": 0.00018750877192982453, "loss": 0.002, "step": 4709 }, { "epoch": 70.3, "learning_rate": 0.0001874736842105263, "loss": 0.2291, "step": 4710 }, { "epoch": 70.31, "learning_rate": 0.00018743859649122805, "loss": 0.005, "step": 4711 }, { "epoch": 70.33, "learning_rate": 0.0001874035087719298, "loss": 0.133, "step": 4712 }, { "epoch": 70.34, "learning_rate": 0.00018736842105263158, "loss": 0.0036, "step": 4713 }, { "epoch": 70.36, "learning_rate": 0.00018733333333333332, "loss": 0.1317, "step": 4714 }, { "epoch": 70.37, "learning_rate": 0.00018729824561403507, "loss": 0.0028, "step": 4715 }, { "epoch": 70.39, "learning_rate": 0.00018726315789473682, "loss": 0.02, "step": 4716 }, { "epoch": 70.4, "learning_rate": 0.0001872280701754386, "loss": 0.0013, "step": 4717 }, { "epoch": 70.42, "learning_rate": 0.00018719298245614035, "loss": 0.0013, "step": 4718 }, { "epoch": 70.43, "learning_rate": 0.0001871578947368421, "loss": 0.0015, "step": 4719 }, { "epoch": 70.45, "learning_rate": 0.00018712280701754382, "loss": 0.0062, "step": 4720 }, { "epoch": 70.46, "learning_rate": 0.00018708771929824562, "loss": 0.3317, "step": 4721 }, { "epoch": 70.48, "learning_rate": 0.00018705263157894734, "loss": 0.0073, "step": 4722 }, { "epoch": 70.49, "learning_rate": 0.0001870175438596491, "loss": 0.0042, "step": 4723 }, { "epoch": 70.51, "learning_rate": 0.00018698245614035084, "loss": 0.0138, "step": 4724 }, { "epoch": 70.52, "learning_rate": 0.00018694736842105261, "loss": 0.1278, "step": 4725 }, { "epoch": 70.54, "learning_rate": 0.00018691228070175436, "loss": 0.0046, "step": 4726 }, { "epoch": 70.55, "learning_rate": 0.0001868771929824561, "loss": 0.003, "step": 4727 }, { "epoch": 70.57, "learning_rate": 0.0001868421052631579, "loss": 0.0073, "step": 4728 }, { "epoch": 70.58, "learning_rate": 0.00018680701754385964, "loss": 0.0081, "step": 4729 }, { "epoch": 70.59, "learning_rate": 0.00018677192982456139, "loss": 0.0057, "step": 4730 }, { "epoch": 70.61, "learning_rate": 0.00018673684210526313, "loss": 0.0133, "step": 4731 }, { "epoch": 70.62, "learning_rate": 0.0001867017543859649, "loss": 0.0017, "step": 4732 }, { "epoch": 70.64, "learning_rate": 0.00018666666666666666, "loss": 0.0448, "step": 4733 }, { "epoch": 70.65, "learning_rate": 0.0001866315789473684, "loss": 0.0026, "step": 4734 }, { "epoch": 70.67, "learning_rate": 0.00018659649122807016, "loss": 0.2009, "step": 4735 }, { "epoch": 70.68, "learning_rate": 0.00018656140350877193, "loss": 0.0104, "step": 4736 }, { "epoch": 70.7, "learning_rate": 0.00018652631578947368, "loss": 0.008, "step": 4737 }, { "epoch": 70.71, "learning_rate": 0.0001864912280701754, "loss": 0.0966, "step": 4738 }, { "epoch": 70.73, "learning_rate": 0.00018645614035087718, "loss": 0.0021, "step": 4739 }, { "epoch": 70.74, "learning_rate": 0.00018642105263157893, "loss": 0.0016, "step": 4740 }, { "epoch": 70.76, "learning_rate": 0.00018638596491228068, "loss": 0.1067, "step": 4741 }, { "epoch": 70.77, "learning_rate": 0.00018635087719298243, "loss": 0.0963, "step": 4742 }, { "epoch": 70.79, "learning_rate": 0.0001863157894736842, "loss": 0.0917, "step": 4743 }, { "epoch": 70.8, "learning_rate": 0.00018628070175438595, "loss": 0.0018, "step": 4744 }, { "epoch": 70.82, "learning_rate": 0.0001862456140350877, "loss": 0.183, "step": 4745 }, { "epoch": 70.83, "learning_rate": 0.00018621052631578945, "loss": 0.0012, "step": 4746 }, { "epoch": 70.85, "learning_rate": 0.00018617543859649122, "loss": 0.0013, "step": 4747 }, { "epoch": 70.86, "learning_rate": 0.00018614035087719297, "loss": 0.002, "step": 4748 }, { "epoch": 70.88, "learning_rate": 0.00018610526315789472, "loss": 0.0035, "step": 4749 }, { "epoch": 70.89, "learning_rate": 0.0001860701754385965, "loss": 0.0024, "step": 4750 }, { "epoch": 70.91, "learning_rate": 0.00018603508771929825, "loss": 0.0031, "step": 4751 }, { "epoch": 70.92, "learning_rate": 0.000186, "loss": 0.0021, "step": 4752 }, { "epoch": 70.94, "learning_rate": 0.00018596491228070172, "loss": 0.0054, "step": 4753 }, { "epoch": 70.95, "learning_rate": 0.00018592982456140352, "loss": 0.0037, "step": 4754 }, { "epoch": 70.97, "learning_rate": 0.00018589473684210524, "loss": 0.002, "step": 4755 }, { "epoch": 70.98, "learning_rate": 0.000185859649122807, "loss": 0.0839, "step": 4756 }, { "epoch": 71.0, "learning_rate": 0.00018582456140350874, "loss": 0.1267, "step": 4757 }, { "epoch": 71.01, "learning_rate": 0.00018578947368421051, "loss": 0.0022, "step": 4758 }, { "epoch": 71.03, "learning_rate": 0.00018575438596491226, "loss": 0.001, "step": 4759 }, { "epoch": 71.04, "learning_rate": 0.000185719298245614, "loss": 0.0012, "step": 4760 }, { "epoch": 71.06, "learning_rate": 0.00018568421052631576, "loss": 0.0075, "step": 4761 }, { "epoch": 71.07, "learning_rate": 0.00018564912280701754, "loss": 0.0519, "step": 4762 }, { "epoch": 71.09, "learning_rate": 0.00018561403508771929, "loss": 0.0016, "step": 4763 }, { "epoch": 71.1, "learning_rate": 0.00018557894736842103, "loss": 0.0017, "step": 4764 }, { "epoch": 71.12, "learning_rate": 0.0001855438596491228, "loss": 0.0289, "step": 4765 }, { "epoch": 71.13, "learning_rate": 0.00018550877192982456, "loss": 0.0028, "step": 4766 }, { "epoch": 71.15, "learning_rate": 0.0001854736842105263, "loss": 0.0536, "step": 4767 }, { "epoch": 71.16, "learning_rate": 0.00018543859649122803, "loss": 0.1368, "step": 4768 }, { "epoch": 71.18, "learning_rate": 0.00018540350877192983, "loss": 0.0013, "step": 4769 }, { "epoch": 71.19, "learning_rate": 0.00018536842105263155, "loss": 0.2852, "step": 4770 }, { "epoch": 71.21, "learning_rate": 0.0001853333333333333, "loss": 0.0144, "step": 4771 }, { "epoch": 71.22, "learning_rate": 0.00018529824561403505, "loss": 0.0072, "step": 4772 }, { "epoch": 71.24, "learning_rate": 0.00018526315789473683, "loss": 0.0088, "step": 4773 }, { "epoch": 71.25, "learning_rate": 0.00018522807017543858, "loss": 0.3189, "step": 4774 }, { "epoch": 71.27, "learning_rate": 0.00018519298245614032, "loss": 0.1993, "step": 4775 }, { "epoch": 71.28, "learning_rate": 0.0001851578947368421, "loss": 0.255, "step": 4776 }, { "epoch": 71.3, "learning_rate": 0.00018512280701754385, "loss": 0.0594, "step": 4777 }, { "epoch": 71.31, "learning_rate": 0.0001850877192982456, "loss": 0.0034, "step": 4778 }, { "epoch": 71.33, "learning_rate": 0.00018505263157894735, "loss": 0.014, "step": 4779 }, { "epoch": 71.34, "learning_rate": 0.00018501754385964912, "loss": 0.0016, "step": 4780 }, { "epoch": 71.36, "learning_rate": 0.00018498245614035087, "loss": 0.1977, "step": 4781 }, { "epoch": 71.37, "learning_rate": 0.00018494736842105262, "loss": 0.2658, "step": 4782 }, { "epoch": 71.39, "learning_rate": 0.00018491228070175437, "loss": 0.005, "step": 4783 }, { "epoch": 71.4, "learning_rate": 0.00018487719298245615, "loss": 0.0766, "step": 4784 }, { "epoch": 71.42, "learning_rate": 0.00018484210526315787, "loss": 0.084, "step": 4785 }, { "epoch": 71.43, "learning_rate": 0.00018480701754385962, "loss": 0.0876, "step": 4786 }, { "epoch": 71.45, "learning_rate": 0.00018477192982456136, "loss": 0.002, "step": 4787 }, { "epoch": 71.46, "learning_rate": 0.00018473684210526314, "loss": 0.0182, "step": 4788 }, { "epoch": 71.48, "learning_rate": 0.0001847017543859649, "loss": 0.0067, "step": 4789 }, { "epoch": 71.49, "learning_rate": 0.00018466666666666664, "loss": 0.0028, "step": 4790 }, { "epoch": 71.51, "learning_rate": 0.00018463157894736841, "loss": 0.0689, "step": 4791 }, { "epoch": 71.52, "learning_rate": 0.00018459649122807016, "loss": 0.3814, "step": 4792 }, { "epoch": 71.54, "learning_rate": 0.0001845614035087719, "loss": 0.0075, "step": 4793 }, { "epoch": 71.55, "learning_rate": 0.00018452631578947366, "loss": 0.0132, "step": 4794 }, { "epoch": 71.57, "learning_rate": 0.00018449122807017544, "loss": 0.1036, "step": 4795 }, { "epoch": 71.58, "learning_rate": 0.00018445614035087718, "loss": 0.0515, "step": 4796 }, { "epoch": 71.59, "learning_rate": 0.00018442105263157893, "loss": 0.0664, "step": 4797 }, { "epoch": 71.61, "learning_rate": 0.00018438596491228068, "loss": 0.0349, "step": 4798 }, { "epoch": 71.62, "learning_rate": 0.00018435087719298246, "loss": 0.1953, "step": 4799 }, { "epoch": 71.64, "learning_rate": 0.0001843157894736842, "loss": 0.0302, "step": 4800 }, { "epoch": 71.64, "eval_accuracy": 0.8494860499265786, "eval_f1": 0.8515287106120767, "eval_loss": 0.6812880635261536, "eval_runtime": 344.4321, "eval_samples_per_second": 11.863, "eval_steps_per_second": 0.743, "step": 4800 }, { "epoch": 71.65, "learning_rate": 0.00018428070175438593, "loss": 0.0782, "step": 4801 }, { "epoch": 71.67, "learning_rate": 0.00018424561403508773, "loss": 0.0039, "step": 4802 }, { "epoch": 71.68, "learning_rate": 0.00018421052631578945, "loss": 0.0041, "step": 4803 }, { "epoch": 71.7, "learning_rate": 0.0001841754385964912, "loss": 0.0041, "step": 4804 }, { "epoch": 71.71, "learning_rate": 0.00018414035087719295, "loss": 0.0499, "step": 4805 }, { "epoch": 71.73, "learning_rate": 0.00018410526315789473, "loss": 0.0392, "step": 4806 }, { "epoch": 71.74, "learning_rate": 0.00018407017543859648, "loss": 0.0955, "step": 4807 }, { "epoch": 71.76, "learning_rate": 0.00018403508771929822, "loss": 0.2134, "step": 4808 }, { "epoch": 71.77, "learning_rate": 0.00018399999999999997, "loss": 0.2199, "step": 4809 }, { "epoch": 71.79, "learning_rate": 0.00018396491228070175, "loss": 0.0032, "step": 4810 }, { "epoch": 71.8, "learning_rate": 0.0001839298245614035, "loss": 0.0542, "step": 4811 }, { "epoch": 71.82, "learning_rate": 0.00018389473684210525, "loss": 0.1352, "step": 4812 }, { "epoch": 71.83, "learning_rate": 0.00018385964912280702, "loss": 0.1094, "step": 4813 }, { "epoch": 71.85, "learning_rate": 0.00018382456140350877, "loss": 0.0961, "step": 4814 }, { "epoch": 71.86, "learning_rate": 0.00018378947368421052, "loss": 0.0033, "step": 4815 }, { "epoch": 71.88, "learning_rate": 0.00018375438596491224, "loss": 0.3464, "step": 4816 }, { "epoch": 71.89, "learning_rate": 0.00018371929824561404, "loss": 0.0021, "step": 4817 }, { "epoch": 71.91, "learning_rate": 0.00018368421052631577, "loss": 0.0212, "step": 4818 }, { "epoch": 71.92, "learning_rate": 0.00018364912280701752, "loss": 0.09, "step": 4819 }, { "epoch": 71.94, "learning_rate": 0.00018361403508771926, "loss": 0.1911, "step": 4820 }, { "epoch": 71.95, "learning_rate": 0.00018357894736842104, "loss": 0.0022, "step": 4821 }, { "epoch": 71.97, "learning_rate": 0.0001835438596491228, "loss": 0.0026, "step": 4822 }, { "epoch": 71.98, "learning_rate": 0.00018350877192982454, "loss": 0.003, "step": 4823 }, { "epoch": 72.0, "learning_rate": 0.00018347368421052629, "loss": 0.0518, "step": 4824 }, { "epoch": 72.01, "learning_rate": 0.00018343859649122806, "loss": 0.1218, "step": 4825 }, { "epoch": 72.03, "learning_rate": 0.0001834035087719298, "loss": 0.2565, "step": 4826 }, { "epoch": 72.04, "learning_rate": 0.00018336842105263156, "loss": 0.0015, "step": 4827 }, { "epoch": 72.06, "learning_rate": 0.00018333333333333334, "loss": 0.1193, "step": 4828 }, { "epoch": 72.07, "learning_rate": 0.00018329824561403508, "loss": 0.0566, "step": 4829 }, { "epoch": 72.09, "learning_rate": 0.00018326315789473683, "loss": 0.0022, "step": 4830 }, { "epoch": 72.1, "learning_rate": 0.00018322807017543858, "loss": 0.0022, "step": 4831 }, { "epoch": 72.12, "learning_rate": 0.00018319298245614036, "loss": 0.1068, "step": 4832 }, { "epoch": 72.13, "learning_rate": 0.00018315789473684208, "loss": 0.062, "step": 4833 }, { "epoch": 72.15, "learning_rate": 0.00018312280701754383, "loss": 0.0677, "step": 4834 }, { "epoch": 72.16, "learning_rate": 0.00018308771929824558, "loss": 0.0032, "step": 4835 }, { "epoch": 72.18, "learning_rate": 0.00018305263157894735, "loss": 0.0027, "step": 4836 }, { "epoch": 72.19, "learning_rate": 0.0001830175438596491, "loss": 0.0203, "step": 4837 }, { "epoch": 72.21, "learning_rate": 0.00018298245614035085, "loss": 0.0049, "step": 4838 }, { "epoch": 72.22, "learning_rate": 0.00018294736842105263, "loss": 0.0329, "step": 4839 }, { "epoch": 72.24, "learning_rate": 0.00018291228070175437, "loss": 0.0059, "step": 4840 }, { "epoch": 72.25, "learning_rate": 0.00018287719298245612, "loss": 0.0042, "step": 4841 }, { "epoch": 72.27, "learning_rate": 0.00018284210526315787, "loss": 0.1367, "step": 4842 }, { "epoch": 72.28, "learning_rate": 0.00018280701754385965, "loss": 0.0015, "step": 4843 }, { "epoch": 72.3, "learning_rate": 0.0001827719298245614, "loss": 0.1505, "step": 4844 }, { "epoch": 72.31, "learning_rate": 0.00018273684210526315, "loss": 0.0677, "step": 4845 }, { "epoch": 72.33, "learning_rate": 0.0001827017543859649, "loss": 0.0066, "step": 4846 }, { "epoch": 72.34, "learning_rate": 0.00018266666666666667, "loss": 0.0131, "step": 4847 }, { "epoch": 72.36, "learning_rate": 0.00018263157894736842, "loss": 0.095, "step": 4848 }, { "epoch": 72.37, "learning_rate": 0.00018259649122807014, "loss": 0.0308, "step": 4849 }, { "epoch": 72.39, "learning_rate": 0.0001825614035087719, "loss": 0.0013, "step": 4850 }, { "epoch": 72.4, "learning_rate": 0.00018252631578947367, "loss": 0.1971, "step": 4851 }, { "epoch": 72.42, "learning_rate": 0.00018249122807017541, "loss": 0.0014, "step": 4852 }, { "epoch": 72.43, "learning_rate": 0.00018245614035087716, "loss": 0.0013, "step": 4853 }, { "epoch": 72.45, "learning_rate": 0.00018242105263157894, "loss": 0.0355, "step": 4854 }, { "epoch": 72.46, "learning_rate": 0.0001823859649122807, "loss": 0.0038, "step": 4855 }, { "epoch": 72.48, "learning_rate": 0.00018235087719298244, "loss": 0.0012, "step": 4856 }, { "epoch": 72.49, "learning_rate": 0.00018231578947368419, "loss": 0.102, "step": 4857 }, { "epoch": 72.51, "learning_rate": 0.00018228070175438596, "loss": 0.0082, "step": 4858 }, { "epoch": 72.52, "learning_rate": 0.0001822456140350877, "loss": 0.0481, "step": 4859 }, { "epoch": 72.54, "learning_rate": 0.00018221052631578946, "loss": 0.0025, "step": 4860 }, { "epoch": 72.55, "learning_rate": 0.0001821754385964912, "loss": 0.0018, "step": 4861 }, { "epoch": 72.57, "learning_rate": 0.00018214035087719298, "loss": 0.1083, "step": 4862 }, { "epoch": 72.58, "learning_rate": 0.00018210526315789473, "loss": 0.0437, "step": 4863 }, { "epoch": 72.59, "learning_rate": 0.00018207017543859645, "loss": 0.0088, "step": 4864 }, { "epoch": 72.61, "learning_rate": 0.00018203508771929826, "loss": 0.1357, "step": 4865 }, { "epoch": 72.62, "learning_rate": 0.00018199999999999998, "loss": 0.0158, "step": 4866 }, { "epoch": 72.64, "learning_rate": 0.00018196491228070173, "loss": 0.0188, "step": 4867 }, { "epoch": 72.65, "learning_rate": 0.00018192982456140348, "loss": 0.0024, "step": 4868 }, { "epoch": 72.67, "learning_rate": 0.00018189473684210525, "loss": 0.005, "step": 4869 }, { "epoch": 72.68, "learning_rate": 0.000181859649122807, "loss": 0.0025, "step": 4870 }, { "epoch": 72.7, "learning_rate": 0.00018182456140350875, "loss": 0.0077, "step": 4871 }, { "epoch": 72.71, "learning_rate": 0.0001817894736842105, "loss": 0.0025, "step": 4872 }, { "epoch": 72.73, "learning_rate": 0.00018175438596491227, "loss": 0.058, "step": 4873 }, { "epoch": 72.74, "learning_rate": 0.00018171929824561402, "loss": 0.1914, "step": 4874 }, { "epoch": 72.76, "learning_rate": 0.00018168421052631577, "loss": 0.1722, "step": 4875 }, { "epoch": 72.77, "learning_rate": 0.00018164912280701755, "loss": 0.0266, "step": 4876 }, { "epoch": 72.79, "learning_rate": 0.0001816140350877193, "loss": 0.0278, "step": 4877 }, { "epoch": 72.8, "learning_rate": 0.00018157894736842105, "loss": 0.0495, "step": 4878 }, { "epoch": 72.82, "learning_rate": 0.00018154385964912277, "loss": 0.1768, "step": 4879 }, { "epoch": 72.83, "learning_rate": 0.00018150877192982457, "loss": 0.0619, "step": 4880 }, { "epoch": 72.85, "learning_rate": 0.0001814736842105263, "loss": 0.1561, "step": 4881 }, { "epoch": 72.86, "learning_rate": 0.00018143859649122804, "loss": 0.0668, "step": 4882 }, { "epoch": 72.88, "learning_rate": 0.0001814035087719298, "loss": 0.0052, "step": 4883 }, { "epoch": 72.89, "learning_rate": 0.00018136842105263157, "loss": 0.0246, "step": 4884 }, { "epoch": 72.91, "learning_rate": 0.00018133333333333331, "loss": 0.2206, "step": 4885 }, { "epoch": 72.92, "learning_rate": 0.00018129824561403506, "loss": 0.2973, "step": 4886 }, { "epoch": 72.94, "learning_rate": 0.0001812631578947368, "loss": 0.3662, "step": 4887 }, { "epoch": 72.95, "learning_rate": 0.0001812280701754386, "loss": 0.2203, "step": 4888 }, { "epoch": 72.97, "learning_rate": 0.00018119298245614034, "loss": 0.0179, "step": 4889 }, { "epoch": 72.98, "learning_rate": 0.00018115789473684208, "loss": 0.0022, "step": 4890 }, { "epoch": 73.0, "learning_rate": 0.00018112280701754386, "loss": 0.0622, "step": 4891 }, { "epoch": 73.01, "learning_rate": 0.0001810877192982456, "loss": 0.0017, "step": 4892 }, { "epoch": 73.03, "learning_rate": 0.00018105263157894736, "loss": 0.0018, "step": 4893 }, { "epoch": 73.04, "learning_rate": 0.0001810175438596491, "loss": 0.0528, "step": 4894 }, { "epoch": 73.06, "learning_rate": 0.00018098245614035088, "loss": 0.0023, "step": 4895 }, { "epoch": 73.07, "learning_rate": 0.00018094736842105263, "loss": 0.0043, "step": 4896 }, { "epoch": 73.09, "learning_rate": 0.00018091228070175435, "loss": 0.0218, "step": 4897 }, { "epoch": 73.1, "learning_rate": 0.0001808771929824561, "loss": 0.2016, "step": 4898 }, { "epoch": 73.12, "learning_rate": 0.00018084210526315788, "loss": 0.1458, "step": 4899 }, { "epoch": 73.13, "learning_rate": 0.00018080701754385963, "loss": 0.0088, "step": 4900 }, { "epoch": 73.15, "learning_rate": 0.00018077192982456138, "loss": 0.0022, "step": 4901 }, { "epoch": 73.16, "learning_rate": 0.00018073684210526315, "loss": 0.0228, "step": 4902 }, { "epoch": 73.18, "learning_rate": 0.0001807017543859649, "loss": 0.0058, "step": 4903 }, { "epoch": 73.19, "learning_rate": 0.00018066666666666665, "loss": 0.0074, "step": 4904 }, { "epoch": 73.21, "learning_rate": 0.0001806315789473684, "loss": 0.0103, "step": 4905 }, { "epoch": 73.22, "learning_rate": 0.00018059649122807017, "loss": 0.1562, "step": 4906 }, { "epoch": 73.24, "learning_rate": 0.00018056140350877192, "loss": 0.004, "step": 4907 }, { "epoch": 73.25, "learning_rate": 0.00018052631578947367, "loss": 0.0222, "step": 4908 }, { "epoch": 73.27, "learning_rate": 0.00018049122807017542, "loss": 0.0036, "step": 4909 }, { "epoch": 73.28, "learning_rate": 0.0001804561403508772, "loss": 0.0019, "step": 4910 }, { "epoch": 73.3, "learning_rate": 0.00018042105263157894, "loss": 0.0021, "step": 4911 }, { "epoch": 73.31, "learning_rate": 0.00018038596491228067, "loss": 0.2216, "step": 4912 }, { "epoch": 73.33, "learning_rate": 0.00018035087719298247, "loss": 0.1907, "step": 4913 }, { "epoch": 73.34, "learning_rate": 0.0001803157894736842, "loss": 0.0057, "step": 4914 }, { "epoch": 73.36, "learning_rate": 0.00018028070175438594, "loss": 0.0024, "step": 4915 }, { "epoch": 73.37, "learning_rate": 0.0001802456140350877, "loss": 0.2023, "step": 4916 }, { "epoch": 73.39, "learning_rate": 0.00018021052631578946, "loss": 0.0057, "step": 4917 }, { "epoch": 73.4, "learning_rate": 0.0001801754385964912, "loss": 0.0878, "step": 4918 }, { "epoch": 73.42, "learning_rate": 0.00018014035087719296, "loss": 0.0024, "step": 4919 }, { "epoch": 73.43, "learning_rate": 0.0001801052631578947, "loss": 0.0191, "step": 4920 }, { "epoch": 73.45, "learning_rate": 0.0001800701754385965, "loss": 0.0756, "step": 4921 }, { "epoch": 73.46, "learning_rate": 0.00018003508771929824, "loss": 0.0028, "step": 4922 }, { "epoch": 73.48, "learning_rate": 0.00017999999999999998, "loss": 0.2512, "step": 4923 }, { "epoch": 73.49, "learning_rate": 0.00017996491228070173, "loss": 0.1202, "step": 4924 }, { "epoch": 73.51, "learning_rate": 0.0001799298245614035, "loss": 0.1339, "step": 4925 }, { "epoch": 73.52, "learning_rate": 0.00017989473684210526, "loss": 0.0635, "step": 4926 }, { "epoch": 73.54, "learning_rate": 0.00017985964912280698, "loss": 0.0029, "step": 4927 }, { "epoch": 73.55, "learning_rate": 0.00017982456140350878, "loss": 0.0032, "step": 4928 }, { "epoch": 73.57, "learning_rate": 0.0001797894736842105, "loss": 0.0057, "step": 4929 }, { "epoch": 73.58, "learning_rate": 0.00017975438596491225, "loss": 0.267, "step": 4930 }, { "epoch": 73.59, "learning_rate": 0.000179719298245614, "loss": 0.1825, "step": 4931 }, { "epoch": 73.61, "learning_rate": 0.00017968421052631578, "loss": 0.1034, "step": 4932 }, { "epoch": 73.62, "learning_rate": 0.00017964912280701753, "loss": 0.0088, "step": 4933 }, { "epoch": 73.64, "learning_rate": 0.00017961403508771927, "loss": 0.27, "step": 4934 }, { "epoch": 73.65, "learning_rate": 0.00017957894736842102, "loss": 0.0016, "step": 4935 }, { "epoch": 73.67, "learning_rate": 0.0001795438596491228, "loss": 0.0059, "step": 4936 }, { "epoch": 73.68, "learning_rate": 0.00017950877192982455, "loss": 0.3191, "step": 4937 }, { "epoch": 73.7, "learning_rate": 0.0001794736842105263, "loss": 0.0091, "step": 4938 }, { "epoch": 73.71, "learning_rate": 0.00017943859649122807, "loss": 0.037, "step": 4939 }, { "epoch": 73.73, "learning_rate": 0.00017940350877192982, "loss": 0.0173, "step": 4940 }, { "epoch": 73.74, "learning_rate": 0.00017936842105263157, "loss": 0.0028, "step": 4941 }, { "epoch": 73.76, "learning_rate": 0.00017933333333333332, "loss": 0.0125, "step": 4942 }, { "epoch": 73.77, "learning_rate": 0.0001792982456140351, "loss": 0.0519, "step": 4943 }, { "epoch": 73.79, "learning_rate": 0.00017926315789473684, "loss": 0.3839, "step": 4944 }, { "epoch": 73.8, "learning_rate": 0.00017922807017543857, "loss": 0.0054, "step": 4945 }, { "epoch": 73.82, "learning_rate": 0.00017919298245614031, "loss": 0.036, "step": 4946 }, { "epoch": 73.83, "learning_rate": 0.0001791578947368421, "loss": 0.0084, "step": 4947 }, { "epoch": 73.85, "learning_rate": 0.00017912280701754384, "loss": 0.0497, "step": 4948 }, { "epoch": 73.86, "learning_rate": 0.0001790877192982456, "loss": 0.0131, "step": 4949 }, { "epoch": 73.88, "learning_rate": 0.00017905263157894734, "loss": 0.2431, "step": 4950 }, { "epoch": 73.89, "learning_rate": 0.0001790175438596491, "loss": 0.215, "step": 4951 }, { "epoch": 73.91, "learning_rate": 0.00017898245614035086, "loss": 0.011, "step": 4952 }, { "epoch": 73.92, "learning_rate": 0.0001789473684210526, "loss": 0.0145, "step": 4953 }, { "epoch": 73.94, "learning_rate": 0.00017891228070175439, "loss": 0.0019, "step": 4954 }, { "epoch": 73.95, "learning_rate": 0.00017887719298245613, "loss": 0.0811, "step": 4955 }, { "epoch": 73.97, "learning_rate": 0.00017884210526315788, "loss": 0.008, "step": 4956 }, { "epoch": 73.98, "learning_rate": 0.00017880701754385963, "loss": 0.2649, "step": 4957 }, { "epoch": 74.0, "learning_rate": 0.0001787719298245614, "loss": 0.1351, "step": 4958 }, { "epoch": 74.01, "learning_rate": 0.00017873684210526316, "loss": 0.0034, "step": 4959 }, { "epoch": 74.03, "learning_rate": 0.00017870175438596488, "loss": 0.012, "step": 4960 }, { "epoch": 74.04, "learning_rate": 0.00017866666666666663, "loss": 0.0023, "step": 4961 }, { "epoch": 74.06, "learning_rate": 0.0001786315789473684, "loss": 0.1564, "step": 4962 }, { "epoch": 74.07, "learning_rate": 0.00017859649122807015, "loss": 0.1394, "step": 4963 }, { "epoch": 74.09, "learning_rate": 0.0001785614035087719, "loss": 0.0147, "step": 4964 }, { "epoch": 74.1, "learning_rate": 0.00017852631578947368, "loss": 0.0139, "step": 4965 }, { "epoch": 74.12, "learning_rate": 0.00017849122807017543, "loss": 0.028, "step": 4966 }, { "epoch": 74.13, "learning_rate": 0.00017845614035087717, "loss": 0.0709, "step": 4967 }, { "epoch": 74.15, "learning_rate": 0.00017842105263157892, "loss": 0.0028, "step": 4968 }, { "epoch": 74.16, "learning_rate": 0.0001783859649122807, "loss": 0.0064, "step": 4969 }, { "epoch": 74.18, "learning_rate": 0.00017835087719298245, "loss": 0.005, "step": 4970 }, { "epoch": 74.19, "learning_rate": 0.0001783157894736842, "loss": 0.0279, "step": 4971 }, { "epoch": 74.21, "learning_rate": 0.00017828070175438595, "loss": 0.0143, "step": 4972 }, { "epoch": 74.22, "learning_rate": 0.00017824561403508772, "loss": 0.0258, "step": 4973 }, { "epoch": 74.24, "learning_rate": 0.00017821052631578947, "loss": 0.0602, "step": 4974 }, { "epoch": 74.25, "learning_rate": 0.0001781754385964912, "loss": 0.1954, "step": 4975 }, { "epoch": 74.27, "learning_rate": 0.000178140350877193, "loss": 0.0018, "step": 4976 }, { "epoch": 74.28, "learning_rate": 0.00017810526315789472, "loss": 0.0082, "step": 4977 }, { "epoch": 74.3, "learning_rate": 0.00017807017543859647, "loss": 0.1672, "step": 4978 }, { "epoch": 74.31, "learning_rate": 0.00017803508771929821, "loss": 0.0016, "step": 4979 }, { "epoch": 74.33, "learning_rate": 0.000178, "loss": 0.0091, "step": 4980 }, { "epoch": 74.34, "learning_rate": 0.00017796491228070174, "loss": 0.0312, "step": 4981 }, { "epoch": 74.36, "learning_rate": 0.0001779298245614035, "loss": 0.2499, "step": 4982 }, { "epoch": 74.37, "learning_rate": 0.00017789473684210524, "loss": 0.1338, "step": 4983 }, { "epoch": 74.39, "learning_rate": 0.000177859649122807, "loss": 0.011, "step": 4984 }, { "epoch": 74.4, "learning_rate": 0.00017782456140350876, "loss": 0.0024, "step": 4985 }, { "epoch": 74.42, "learning_rate": 0.0001777894736842105, "loss": 0.0041, "step": 4986 }, { "epoch": 74.43, "learning_rate": 0.00017775438596491226, "loss": 0.0016, "step": 4987 }, { "epoch": 74.45, "learning_rate": 0.00017771929824561403, "loss": 0.0135, "step": 4988 }, { "epoch": 74.46, "learning_rate": 0.00017768421052631578, "loss": 0.0148, "step": 4989 }, { "epoch": 74.48, "learning_rate": 0.00017764912280701753, "loss": 0.0708, "step": 4990 }, { "epoch": 74.49, "learning_rate": 0.0001776140350877193, "loss": 0.083, "step": 4991 }, { "epoch": 74.51, "learning_rate": 0.00017757894736842103, "loss": 0.0161, "step": 4992 }, { "epoch": 74.52, "learning_rate": 0.00017754385964912278, "loss": 0.0177, "step": 4993 }, { "epoch": 74.54, "learning_rate": 0.00017750877192982453, "loss": 0.0022, "step": 4994 }, { "epoch": 74.55, "learning_rate": 0.0001774736842105263, "loss": 0.0189, "step": 4995 }, { "epoch": 74.57, "learning_rate": 0.00017743859649122805, "loss": 0.0014, "step": 4996 }, { "epoch": 74.58, "learning_rate": 0.0001774035087719298, "loss": 0.0093, "step": 4997 }, { "epoch": 74.59, "learning_rate": 0.00017736842105263155, "loss": 0.0283, "step": 4998 }, { "epoch": 74.61, "learning_rate": 0.00017733333333333333, "loss": 0.139, "step": 4999 }, { "epoch": 74.62, "learning_rate": 0.00017729824561403507, "loss": 0.0027, "step": 5000 }, { "epoch": 74.62, "eval_accuracy": 0.8529123837493882, "eval_f1": 0.8530140782623041, "eval_loss": 0.7162572145462036, "eval_runtime": 343.7711, "eval_samples_per_second": 11.886, "eval_steps_per_second": 0.745, "step": 5000 }, { "epoch": 74.64, "learning_rate": 0.00017726315789473682, "loss": 0.0074, "step": 5001 }, { "epoch": 74.65, "learning_rate": 0.0001772280701754386, "loss": 0.0027, "step": 5002 }, { "epoch": 74.67, "learning_rate": 0.00017719298245614035, "loss": 0.0679, "step": 5003 }, { "epoch": 74.68, "learning_rate": 0.0001771578947368421, "loss": 0.0013, "step": 5004 }, { "epoch": 74.7, "learning_rate": 0.00017712280701754384, "loss": 0.0307, "step": 5005 }, { "epoch": 74.71, "learning_rate": 0.00017708771929824562, "loss": 0.0014, "step": 5006 }, { "epoch": 74.73, "learning_rate": 0.00017705263157894737, "loss": 0.0016, "step": 5007 }, { "epoch": 74.74, "learning_rate": 0.00017705263157894737, "loss": 0.3708, "step": 5008 }, { "epoch": 74.76, "learning_rate": 0.0001770175438596491, "loss": 0.0718, "step": 5009 }, { "epoch": 74.77, "learning_rate": 0.00017698245614035084, "loss": 0.0015, "step": 5010 }, { "epoch": 74.79, "learning_rate": 0.00017694736842105262, "loss": 0.0336, "step": 5011 }, { "epoch": 74.8, "learning_rate": 0.00017691228070175436, "loss": 0.0311, "step": 5012 }, { "epoch": 74.82, "learning_rate": 0.0001768771929824561, "loss": 0.0123, "step": 5013 }, { "epoch": 74.83, "learning_rate": 0.00017684210526315786, "loss": 0.0049, "step": 5014 }, { "epoch": 74.85, "learning_rate": 0.00017680701754385964, "loss": 0.0137, "step": 5015 }, { "epoch": 74.86, "learning_rate": 0.0001767719298245614, "loss": 0.0619, "step": 5016 }, { "epoch": 74.88, "learning_rate": 0.00017673684210526314, "loss": 0.1754, "step": 5017 }, { "epoch": 74.89, "learning_rate": 0.0001767017543859649, "loss": 0.0045, "step": 5018 }, { "epoch": 74.91, "learning_rate": 0.00017666666666666666, "loss": 0.0024, "step": 5019 }, { "epoch": 74.92, "learning_rate": 0.0001766315789473684, "loss": 0.0026, "step": 5020 }, { "epoch": 74.94, "learning_rate": 0.00017659649122807016, "loss": 0.1055, "step": 5021 }, { "epoch": 74.95, "learning_rate": 0.00017656140350877193, "loss": 0.004, "step": 5022 }, { "epoch": 74.97, "learning_rate": 0.00017652631578947368, "loss": 0.0023, "step": 5023 }, { "epoch": 74.98, "learning_rate": 0.0001764912280701754, "loss": 0.0035, "step": 5024 }, { "epoch": 75.0, "learning_rate": 0.00017645614035087715, "loss": 0.0064, "step": 5025 }, { "epoch": 75.01, "learning_rate": 0.00017642105263157893, "loss": 0.0107, "step": 5026 }, { "epoch": 75.03, "learning_rate": 0.00017638596491228068, "loss": 0.0037, "step": 5027 }, { "epoch": 75.04, "learning_rate": 0.00017635087719298243, "loss": 0.0015, "step": 5028 }, { "epoch": 75.06, "learning_rate": 0.0001763157894736842, "loss": 0.0016, "step": 5029 }, { "epoch": 75.07, "learning_rate": 0.00017628070175438595, "loss": 0.1804, "step": 5030 }, { "epoch": 75.09, "learning_rate": 0.0001762456140350877, "loss": 0.0013, "step": 5031 }, { "epoch": 75.1, "learning_rate": 0.00017621052631578945, "loss": 0.0021, "step": 5032 }, { "epoch": 75.12, "learning_rate": 0.00017617543859649122, "loss": 0.0022, "step": 5033 }, { "epoch": 75.13, "learning_rate": 0.00017614035087719297, "loss": 0.0011, "step": 5034 }, { "epoch": 75.15, "learning_rate": 0.00017610526315789472, "loss": 0.2328, "step": 5035 }, { "epoch": 75.16, "learning_rate": 0.00017607017543859647, "loss": 0.2112, "step": 5036 }, { "epoch": 75.18, "learning_rate": 0.00017603508771929825, "loss": 0.0083, "step": 5037 }, { "epoch": 75.19, "learning_rate": 0.000176, "loss": 0.2025, "step": 5038 }, { "epoch": 75.21, "learning_rate": 0.00017596491228070174, "loss": 0.0229, "step": 5039 }, { "epoch": 75.22, "learning_rate": 0.00017592982456140352, "loss": 0.0018, "step": 5040 }, { "epoch": 75.24, "learning_rate": 0.00017589473684210524, "loss": 0.0022, "step": 5041 }, { "epoch": 75.25, "learning_rate": 0.000175859649122807, "loss": 0.0017, "step": 5042 }, { "epoch": 75.27, "learning_rate": 0.00017582456140350874, "loss": 0.0017, "step": 5043 }, { "epoch": 75.28, "learning_rate": 0.00017578947368421052, "loss": 0.1587, "step": 5044 }, { "epoch": 75.3, "learning_rate": 0.00017575438596491226, "loss": 0.0028, "step": 5045 }, { "epoch": 75.31, "learning_rate": 0.000175719298245614, "loss": 0.0041, "step": 5046 }, { "epoch": 75.33, "learning_rate": 0.00017568421052631576, "loss": 0.0012, "step": 5047 }, { "epoch": 75.34, "learning_rate": 0.00017564912280701754, "loss": 0.0014, "step": 5048 }, { "epoch": 75.36, "learning_rate": 0.00017561403508771929, "loss": 0.1035, "step": 5049 }, { "epoch": 75.37, "learning_rate": 0.00017557894736842103, "loss": 0.0893, "step": 5050 }, { "epoch": 75.39, "learning_rate": 0.00017554385964912278, "loss": 0.162, "step": 5051 }, { "epoch": 75.4, "learning_rate": 0.00017550877192982456, "loss": 0.0009, "step": 5052 }, { "epoch": 75.42, "learning_rate": 0.0001754736842105263, "loss": 0.001, "step": 5053 }, { "epoch": 75.43, "learning_rate": 0.00017543859649122806, "loss": 0.001, "step": 5054 }, { "epoch": 75.45, "learning_rate": 0.00017540350877192983, "loss": 0.0011, "step": 5055 }, { "epoch": 75.46, "learning_rate": 0.00017536842105263158, "loss": 0.1141, "step": 5056 }, { "epoch": 75.48, "learning_rate": 0.0001753333333333333, "loss": 0.0036, "step": 5057 }, { "epoch": 75.49, "learning_rate": 0.00017529824561403505, "loss": 0.001, "step": 5058 }, { "epoch": 75.51, "learning_rate": 0.00017526315789473683, "loss": 0.16, "step": 5059 }, { "epoch": 75.52, "learning_rate": 0.00017522807017543858, "loss": 0.2575, "step": 5060 }, { "epoch": 75.54, "learning_rate": 0.00017519298245614033, "loss": 0.0013, "step": 5061 }, { "epoch": 75.55, "learning_rate": 0.00017515789473684207, "loss": 0.1232, "step": 5062 }, { "epoch": 75.57, "learning_rate": 0.00017512280701754385, "loss": 0.0014, "step": 5063 }, { "epoch": 75.58, "learning_rate": 0.0001750877192982456, "loss": 0.0102, "step": 5064 }, { "epoch": 75.59, "learning_rate": 0.00017505263157894735, "loss": 0.1149, "step": 5065 }, { "epoch": 75.61, "learning_rate": 0.00017501754385964912, "loss": 0.1935, "step": 5066 }, { "epoch": 75.62, "learning_rate": 0.00017498245614035087, "loss": 0.0018, "step": 5067 }, { "epoch": 75.64, "learning_rate": 0.00017494736842105262, "loss": 0.1331, "step": 5068 }, { "epoch": 75.65, "learning_rate": 0.00017491228070175437, "loss": 0.1212, "step": 5069 }, { "epoch": 75.67, "learning_rate": 0.00017487719298245615, "loss": 0.1023, "step": 5070 }, { "epoch": 75.68, "learning_rate": 0.0001748421052631579, "loss": 0.1183, "step": 5071 }, { "epoch": 75.7, "learning_rate": 0.00017480701754385962, "loss": 0.145, "step": 5072 }, { "epoch": 75.71, "learning_rate": 0.00017477192982456137, "loss": 0.0044, "step": 5073 }, { "epoch": 75.73, "learning_rate": 0.00017473684210526314, "loss": 0.0016, "step": 5074 }, { "epoch": 75.74, "learning_rate": 0.0001747017543859649, "loss": 0.0024, "step": 5075 }, { "epoch": 75.76, "learning_rate": 0.00017466666666666664, "loss": 0.0338, "step": 5076 }, { "epoch": 75.77, "learning_rate": 0.00017463157894736841, "loss": 0.0053, "step": 5077 }, { "epoch": 75.79, "learning_rate": 0.00017459649122807016, "loss": 0.0742, "step": 5078 }, { "epoch": 75.8, "learning_rate": 0.0001745614035087719, "loss": 0.0017, "step": 5079 }, { "epoch": 75.82, "learning_rate": 0.00017452631578947366, "loss": 0.0229, "step": 5080 }, { "epoch": 75.83, "learning_rate": 0.00017449122807017544, "loss": 0.0323, "step": 5081 }, { "epoch": 75.85, "learning_rate": 0.00017445614035087719, "loss": 0.009, "step": 5082 }, { "epoch": 75.86, "learning_rate": 0.00017442105263157893, "loss": 0.0308, "step": 5083 }, { "epoch": 75.88, "learning_rate": 0.00017438596491228068, "loss": 0.1732, "step": 5084 }, { "epoch": 75.89, "learning_rate": 0.00017435087719298246, "loss": 0.0183, "step": 5085 }, { "epoch": 75.91, "learning_rate": 0.0001743157894736842, "loss": 0.0027, "step": 5086 }, { "epoch": 75.92, "learning_rate": 0.00017428070175438593, "loss": 0.0059, "step": 5087 }, { "epoch": 75.94, "learning_rate": 0.00017424561403508768, "loss": 0.0011, "step": 5088 }, { "epoch": 75.95, "learning_rate": 0.00017421052631578945, "loss": 0.0154, "step": 5089 }, { "epoch": 75.97, "learning_rate": 0.0001741754385964912, "loss": 0.2746, "step": 5090 }, { "epoch": 75.98, "learning_rate": 0.00017414035087719295, "loss": 0.009, "step": 5091 }, { "epoch": 76.0, "learning_rate": 0.00017410526315789473, "loss": 0.1161, "step": 5092 }, { "epoch": 76.01, "learning_rate": 0.00017407017543859648, "loss": 0.0044, "step": 5093 }, { "epoch": 76.03, "learning_rate": 0.00017403508771929823, "loss": 0.0122, "step": 5094 }, { "epoch": 76.04, "learning_rate": 0.00017399999999999997, "loss": 0.0024, "step": 5095 }, { "epoch": 76.06, "learning_rate": 0.00017396491228070175, "loss": 0.017, "step": 5096 }, { "epoch": 76.07, "learning_rate": 0.0001739298245614035, "loss": 0.3844, "step": 5097 }, { "epoch": 76.09, "learning_rate": 0.00017389473684210525, "loss": 0.0014, "step": 5098 }, { "epoch": 76.1, "learning_rate": 0.000173859649122807, "loss": 0.0806, "step": 5099 }, { "epoch": 76.12, "learning_rate": 0.00017382456140350877, "loss": 0.0194, "step": 5100 }, { "epoch": 76.13, "learning_rate": 0.00017378947368421052, "loss": 0.1099, "step": 5101 }, { "epoch": 76.15, "learning_rate": 0.00017375438596491227, "loss": 0.0016, "step": 5102 }, { "epoch": 76.16, "learning_rate": 0.00017371929824561405, "loss": 0.0011, "step": 5103 }, { "epoch": 76.18, "learning_rate": 0.0001736842105263158, "loss": 0.0017, "step": 5104 }, { "epoch": 76.19, "learning_rate": 0.00017364912280701752, "loss": 0.0024, "step": 5105 }, { "epoch": 76.21, "learning_rate": 0.00017361403508771926, "loss": 0.0012, "step": 5106 }, { "epoch": 76.22, "learning_rate": 0.00017357894736842104, "loss": 0.0047, "step": 5107 }, { "epoch": 76.24, "learning_rate": 0.0001735438596491228, "loss": 0.001, "step": 5108 }, { "epoch": 76.25, "learning_rate": 0.00017350877192982454, "loss": 0.3858, "step": 5109 }, { "epoch": 76.27, "learning_rate": 0.0001734736842105263, "loss": 0.0133, "step": 5110 }, { "epoch": 76.28, "learning_rate": 0.00017343859649122806, "loss": 0.0053, "step": 5111 }, { "epoch": 76.3, "learning_rate": 0.0001734035087719298, "loss": 0.0933, "step": 5112 }, { "epoch": 76.31, "learning_rate": 0.00017336842105263156, "loss": 0.023, "step": 5113 }, { "epoch": 76.33, "learning_rate": 0.0001733333333333333, "loss": 0.0009, "step": 5114 }, { "epoch": 76.34, "learning_rate": 0.00017329824561403508, "loss": 0.0033, "step": 5115 }, { "epoch": 76.36, "learning_rate": 0.00017326315789473683, "loss": 0.0283, "step": 5116 }, { "epoch": 76.37, "learning_rate": 0.00017322807017543858, "loss": 0.0076, "step": 5117 }, { "epoch": 76.39, "learning_rate": 0.00017319298245614036, "loss": 0.0467, "step": 5118 }, { "epoch": 76.4, "learning_rate": 0.0001731578947368421, "loss": 0.0849, "step": 5119 }, { "epoch": 76.42, "learning_rate": 0.00017312280701754383, "loss": 0.0441, "step": 5120 }, { "epoch": 76.43, "learning_rate": 0.00017308771929824558, "loss": 0.0016, "step": 5121 }, { "epoch": 76.45, "learning_rate": 0.00017305263157894735, "loss": 0.0011, "step": 5122 }, { "epoch": 76.46, "learning_rate": 0.0001730175438596491, "loss": 0.1175, "step": 5123 }, { "epoch": 76.48, "learning_rate": 0.00017298245614035085, "loss": 0.0439, "step": 5124 }, { "epoch": 76.49, "learning_rate": 0.0001729473684210526, "loss": 0.002, "step": 5125 }, { "epoch": 76.51, "learning_rate": 0.00017291228070175438, "loss": 0.2141, "step": 5126 }, { "epoch": 76.52, "learning_rate": 0.00017287719298245612, "loss": 0.0033, "step": 5127 }, { "epoch": 76.54, "learning_rate": 0.00017284210526315787, "loss": 0.0052, "step": 5128 }, { "epoch": 76.55, "learning_rate": 0.00017280701754385965, "loss": 0.001, "step": 5129 }, { "epoch": 76.57, "learning_rate": 0.0001727719298245614, "loss": 0.1329, "step": 5130 }, { "epoch": 76.58, "learning_rate": 0.00017273684210526315, "loss": 0.1306, "step": 5131 }, { "epoch": 76.59, "learning_rate": 0.0001727017543859649, "loss": 0.0259, "step": 5132 }, { "epoch": 76.61, "learning_rate": 0.00017266666666666667, "loss": 0.0014, "step": 5133 }, { "epoch": 76.62, "learning_rate": 0.00017263157894736842, "loss": 0.0735, "step": 5134 }, { "epoch": 76.64, "learning_rate": 0.00017259649122807014, "loss": 0.001, "step": 5135 }, { "epoch": 76.65, "learning_rate": 0.0001725614035087719, "loss": 0.0068, "step": 5136 }, { "epoch": 76.67, "learning_rate": 0.00017252631578947367, "loss": 0.113, "step": 5137 }, { "epoch": 76.68, "learning_rate": 0.00017249122807017542, "loss": 0.1599, "step": 5138 }, { "epoch": 76.7, "learning_rate": 0.00017245614035087716, "loss": 0.0022, "step": 5139 }, { "epoch": 76.71, "learning_rate": 0.00017242105263157894, "loss": 0.0012, "step": 5140 }, { "epoch": 76.73, "learning_rate": 0.0001723859649122807, "loss": 0.0692, "step": 5141 }, { "epoch": 76.74, "learning_rate": 0.00017235087719298244, "loss": 0.0012, "step": 5142 }, { "epoch": 76.76, "learning_rate": 0.00017231578947368419, "loss": 0.0655, "step": 5143 }, { "epoch": 76.77, "learning_rate": 0.00017228070175438596, "loss": 0.1018, "step": 5144 }, { "epoch": 76.79, "learning_rate": 0.0001722456140350877, "loss": 0.0011, "step": 5145 }, { "epoch": 76.8, "learning_rate": 0.00017221052631578946, "loss": 0.0603, "step": 5146 }, { "epoch": 76.82, "learning_rate": 0.0001721754385964912, "loss": 0.1936, "step": 5147 }, { "epoch": 76.83, "learning_rate": 0.00017214035087719298, "loss": 0.0032, "step": 5148 }, { "epoch": 76.85, "learning_rate": 0.00017210526315789473, "loss": 0.003, "step": 5149 }, { "epoch": 76.86, "learning_rate": 0.00017207017543859648, "loss": 0.0038, "step": 5150 }, { "epoch": 76.88, "learning_rate": 0.0001720350877192982, "loss": 0.1508, "step": 5151 }, { "epoch": 76.89, "learning_rate": 0.000172, "loss": 0.001, "step": 5152 }, { "epoch": 76.91, "learning_rate": 0.00017196491228070173, "loss": 0.0362, "step": 5153 }, { "epoch": 76.92, "learning_rate": 0.00017192982456140348, "loss": 0.0024, "step": 5154 }, { "epoch": 76.94, "learning_rate": 0.00017189473684210525, "loss": 0.0012, "step": 5155 }, { "epoch": 76.95, "learning_rate": 0.000171859649122807, "loss": 0.0012, "step": 5156 }, { "epoch": 76.97, "learning_rate": 0.00017182456140350875, "loss": 0.0092, "step": 5157 }, { "epoch": 76.98, "learning_rate": 0.0001717894736842105, "loss": 0.0012, "step": 5158 }, { "epoch": 77.0, "learning_rate": 0.00017175438596491228, "loss": 0.0331, "step": 5159 }, { "epoch": 77.01, "learning_rate": 0.00017171929824561402, "loss": 0.0255, "step": 5160 }, { "epoch": 77.03, "learning_rate": 0.00017168421052631577, "loss": 0.0015, "step": 5161 }, { "epoch": 77.04, "learning_rate": 0.00017164912280701752, "loss": 0.1901, "step": 5162 }, { "epoch": 77.06, "learning_rate": 0.0001716140350877193, "loss": 0.0017, "step": 5163 }, { "epoch": 77.07, "learning_rate": 0.00017157894736842105, "loss": 0.013, "step": 5164 }, { "epoch": 77.09, "learning_rate": 0.0001715438596491228, "loss": 0.0037, "step": 5165 }, { "epoch": 77.1, "learning_rate": 0.00017150877192982457, "loss": 0.0039, "step": 5166 }, { "epoch": 77.12, "learning_rate": 0.00017147368421052632, "loss": 0.0437, "step": 5167 }, { "epoch": 77.13, "learning_rate": 0.00017143859649122804, "loss": 0.1028, "step": 5168 }, { "epoch": 77.15, "learning_rate": 0.0001714035087719298, "loss": 0.0809, "step": 5169 }, { "epoch": 77.16, "learning_rate": 0.00017136842105263157, "loss": 0.0092, "step": 5170 }, { "epoch": 77.18, "learning_rate": 0.00017133333333333331, "loss": 0.0021, "step": 5171 }, { "epoch": 77.19, "learning_rate": 0.00017129824561403506, "loss": 0.0011, "step": 5172 }, { "epoch": 77.21, "learning_rate": 0.0001712631578947368, "loss": 0.1418, "step": 5173 }, { "epoch": 77.22, "learning_rate": 0.0001712280701754386, "loss": 0.0012, "step": 5174 }, { "epoch": 77.24, "learning_rate": 0.00017119298245614034, "loss": 0.0011, "step": 5175 }, { "epoch": 77.25, "learning_rate": 0.00017115789473684209, "loss": 0.1556, "step": 5176 }, { "epoch": 77.27, "learning_rate": 0.00017112280701754383, "loss": 0.02, "step": 5177 }, { "epoch": 77.28, "learning_rate": 0.0001710877192982456, "loss": 0.0541, "step": 5178 }, { "epoch": 77.3, "learning_rate": 0.00017105263157894736, "loss": 0.0024, "step": 5179 }, { "epoch": 77.31, "learning_rate": 0.0001710175438596491, "loss": 0.0144, "step": 5180 }, { "epoch": 77.33, "learning_rate": 0.00017098245614035088, "loss": 0.0672, "step": 5181 }, { "epoch": 77.34, "learning_rate": 0.00017094736842105263, "loss": 0.0022, "step": 5182 }, { "epoch": 77.36, "learning_rate": 0.00017091228070175435, "loss": 0.1882, "step": 5183 }, { "epoch": 77.37, "learning_rate": 0.0001708771929824561, "loss": 0.0506, "step": 5184 }, { "epoch": 77.39, "learning_rate": 0.00017084210526315788, "loss": 0.0947, "step": 5185 }, { "epoch": 77.4, "learning_rate": 0.00017080701754385963, "loss": 0.0038, "step": 5186 }, { "epoch": 77.42, "learning_rate": 0.00017077192982456138, "loss": 0.1022, "step": 5187 }, { "epoch": 77.43, "learning_rate": 0.00017073684210526313, "loss": 0.0033, "step": 5188 }, { "epoch": 77.45, "learning_rate": 0.0001707017543859649, "loss": 0.0016, "step": 5189 }, { "epoch": 77.46, "learning_rate": 0.00017066666666666665, "loss": 0.0544, "step": 5190 }, { "epoch": 77.48, "learning_rate": 0.0001706315789473684, "loss": 0.0466, "step": 5191 }, { "epoch": 77.49, "learning_rate": 0.00017059649122807017, "loss": 0.0066, "step": 5192 }, { "epoch": 77.51, "learning_rate": 0.00017056140350877192, "loss": 0.1766, "step": 5193 }, { "epoch": 77.52, "learning_rate": 0.00017052631578947367, "loss": 0.0044, "step": 5194 }, { "epoch": 77.54, "learning_rate": 0.00017049122807017542, "loss": 0.1594, "step": 5195 }, { "epoch": 77.55, "learning_rate": 0.0001704561403508772, "loss": 0.002, "step": 5196 }, { "epoch": 77.57, "learning_rate": 0.00017042105263157895, "loss": 0.5177, "step": 5197 }, { "epoch": 77.58, "learning_rate": 0.0001703859649122807, "loss": 0.007, "step": 5198 }, { "epoch": 77.59, "learning_rate": 0.00017035087719298242, "loss": 0.0725, "step": 5199 }, { "epoch": 77.61, "learning_rate": 0.00017031578947368422, "loss": 0.1165, "step": 5200 }, { "epoch": 77.61, "eval_accuracy": 0.8595203132648067, "eval_f1": 0.860270309660957, "eval_loss": 0.6248674988746643, "eval_runtime": 345.784, "eval_samples_per_second": 11.817, "eval_steps_per_second": 0.74, "step": 5200 }, { "epoch": 77.62, "learning_rate": 0.00017028070175438594, "loss": 0.011, "step": 5201 }, { "epoch": 77.64, "learning_rate": 0.0001702456140350877, "loss": 0.0015, "step": 5202 }, { "epoch": 77.65, "learning_rate": 0.00017021052631578947, "loss": 0.0022, "step": 5203 }, { "epoch": 77.67, "learning_rate": 0.00017017543859649121, "loss": 0.0012, "step": 5204 }, { "epoch": 77.68, "learning_rate": 0.00017014035087719296, "loss": 0.0163, "step": 5205 }, { "epoch": 77.7, "learning_rate": 0.0001701052631578947, "loss": 0.0082, "step": 5206 }, { "epoch": 77.71, "learning_rate": 0.0001700701754385965, "loss": 0.0025, "step": 5207 }, { "epoch": 77.73, "learning_rate": 0.00017003508771929824, "loss": 0.0022, "step": 5208 }, { "epoch": 77.74, "learning_rate": 0.00016999999999999999, "loss": 0.0288, "step": 5209 }, { "epoch": 77.76, "learning_rate": 0.00016996491228070173, "loss": 0.1087, "step": 5210 }, { "epoch": 77.77, "learning_rate": 0.0001699298245614035, "loss": 0.0049, "step": 5211 }, { "epoch": 77.79, "learning_rate": 0.00016989473684210526, "loss": 0.0052, "step": 5212 }, { "epoch": 77.8, "learning_rate": 0.000169859649122807, "loss": 0.0126, "step": 5213 }, { "epoch": 77.82, "learning_rate": 0.00016982456140350873, "loss": 0.0048, "step": 5214 }, { "epoch": 77.83, "learning_rate": 0.00016978947368421053, "loss": 0.0034, "step": 5215 }, { "epoch": 77.85, "learning_rate": 0.00016975438596491225, "loss": 0.1069, "step": 5216 }, { "epoch": 77.86, "learning_rate": 0.000169719298245614, "loss": 0.0046, "step": 5217 }, { "epoch": 77.88, "learning_rate": 0.00016968421052631578, "loss": 0.0169, "step": 5218 }, { "epoch": 77.89, "learning_rate": 0.00016964912280701753, "loss": 0.2265, "step": 5219 }, { "epoch": 77.91, "learning_rate": 0.00016961403508771928, "loss": 0.002, "step": 5220 }, { "epoch": 77.92, "learning_rate": 0.00016957894736842102, "loss": 0.0123, "step": 5221 }, { "epoch": 77.94, "learning_rate": 0.0001695438596491228, "loss": 0.0356, "step": 5222 }, { "epoch": 77.95, "learning_rate": 0.00016950877192982455, "loss": 0.0099, "step": 5223 }, { "epoch": 77.97, "learning_rate": 0.0001694736842105263, "loss": 0.003, "step": 5224 }, { "epoch": 77.98, "learning_rate": 0.00016943859649122805, "loss": 0.0033, "step": 5225 }, { "epoch": 78.0, "learning_rate": 0.00016940350877192982, "loss": 0.1931, "step": 5226 }, { "epoch": 78.01, "learning_rate": 0.00016936842105263157, "loss": 0.2121, "step": 5227 }, { "epoch": 78.03, "learning_rate": 0.00016933333333333332, "loss": 0.0295, "step": 5228 }, { "epoch": 78.04, "learning_rate": 0.0001692982456140351, "loss": 0.0095, "step": 5229 }, { "epoch": 78.06, "learning_rate": 0.00016926315789473684, "loss": 0.1848, "step": 5230 }, { "epoch": 78.07, "learning_rate": 0.00016922807017543857, "loss": 0.179, "step": 5231 }, { "epoch": 78.09, "learning_rate": 0.00016919298245614032, "loss": 0.002, "step": 5232 }, { "epoch": 78.1, "learning_rate": 0.0001691578947368421, "loss": 0.0021, "step": 5233 }, { "epoch": 78.12, "learning_rate": 0.00016912280701754384, "loss": 0.001, "step": 5234 }, { "epoch": 78.13, "learning_rate": 0.0001690877192982456, "loss": 0.0018, "step": 5235 }, { "epoch": 78.15, "learning_rate": 0.00016905263157894734, "loss": 0.0209, "step": 5236 }, { "epoch": 78.16, "learning_rate": 0.0001690175438596491, "loss": 0.003, "step": 5237 }, { "epoch": 78.18, "learning_rate": 0.00016898245614035086, "loss": 0.0016, "step": 5238 }, { "epoch": 78.19, "learning_rate": 0.0001689473684210526, "loss": 0.0861, "step": 5239 }, { "epoch": 78.21, "learning_rate": 0.0001689122807017544, "loss": 0.3714, "step": 5240 }, { "epoch": 78.22, "learning_rate": 0.00016887719298245614, "loss": 0.0318, "step": 5241 }, { "epoch": 78.24, "learning_rate": 0.00016884210526315788, "loss": 0.0144, "step": 5242 }, { "epoch": 78.25, "learning_rate": 0.00016880701754385963, "loss": 0.0178, "step": 5243 }, { "epoch": 78.27, "learning_rate": 0.0001687719298245614, "loss": 0.0019, "step": 5244 }, { "epoch": 78.28, "learning_rate": 0.00016873684210526316, "loss": 0.0023, "step": 5245 }, { "epoch": 78.3, "learning_rate": 0.0001687017543859649, "loss": 0.1122, "step": 5246 }, { "epoch": 78.31, "learning_rate": 0.00016866666666666663, "loss": 0.0778, "step": 5247 }, { "epoch": 78.33, "learning_rate": 0.0001686315789473684, "loss": 0.0029, "step": 5248 }, { "epoch": 78.34, "learning_rate": 0.00016859649122807015, "loss": 0.0016, "step": 5249 }, { "epoch": 78.36, "learning_rate": 0.0001685614035087719, "loss": 0.1838, "step": 5250 }, { "epoch": 78.37, "learning_rate": 0.00016852631578947365, "loss": 0.0189, "step": 5251 }, { "epoch": 78.39, "learning_rate": 0.00016849122807017543, "loss": 0.0798, "step": 5252 }, { "epoch": 78.4, "learning_rate": 0.00016845614035087718, "loss": 0.0018, "step": 5253 }, { "epoch": 78.42, "learning_rate": 0.00016842105263157892, "loss": 0.0105, "step": 5254 }, { "epoch": 78.43, "learning_rate": 0.0001683859649122807, "loss": 0.0016, "step": 5255 }, { "epoch": 78.45, "learning_rate": 0.00016835087719298245, "loss": 0.0053, "step": 5256 }, { "epoch": 78.46, "learning_rate": 0.0001683157894736842, "loss": 0.0009, "step": 5257 }, { "epoch": 78.48, "learning_rate": 0.00016828070175438595, "loss": 0.0009, "step": 5258 }, { "epoch": 78.49, "learning_rate": 0.00016824561403508772, "loss": 0.0011, "step": 5259 }, { "epoch": 78.51, "learning_rate": 0.00016821052631578947, "loss": 0.1651, "step": 5260 }, { "epoch": 78.52, "learning_rate": 0.00016817543859649122, "loss": 0.0027, "step": 5261 }, { "epoch": 78.54, "learning_rate": 0.00016814035087719294, "loss": 0.0896, "step": 5262 }, { "epoch": 78.55, "learning_rate": 0.00016810526315789474, "loss": 0.1429, "step": 5263 }, { "epoch": 78.57, "learning_rate": 0.00016807017543859647, "loss": 0.0048, "step": 5264 }, { "epoch": 78.58, "learning_rate": 0.00016803508771929821, "loss": 0.0124, "step": 5265 }, { "epoch": 78.59, "learning_rate": 0.000168, "loss": 0.0183, "step": 5266 }, { "epoch": 78.61, "learning_rate": 0.00016796491228070174, "loss": 0.0027, "step": 5267 }, { "epoch": 78.62, "learning_rate": 0.0001679298245614035, "loss": 0.0021, "step": 5268 }, { "epoch": 78.64, "learning_rate": 0.00016789473684210524, "loss": 0.0305, "step": 5269 }, { "epoch": 78.65, "learning_rate": 0.000167859649122807, "loss": 0.0382, "step": 5270 }, { "epoch": 78.67, "learning_rate": 0.00016782456140350876, "loss": 0.0012, "step": 5271 }, { "epoch": 78.68, "learning_rate": 0.0001677894736842105, "loss": 0.0018, "step": 5272 }, { "epoch": 78.7, "learning_rate": 0.00016775438596491226, "loss": 0.0016, "step": 5273 }, { "epoch": 78.71, "learning_rate": 0.00016771929824561404, "loss": 0.0149, "step": 5274 }, { "epoch": 78.73, "learning_rate": 0.00016768421052631578, "loss": 0.0011, "step": 5275 }, { "epoch": 78.74, "learning_rate": 0.00016764912280701753, "loss": 0.2443, "step": 5276 }, { "epoch": 78.76, "learning_rate": 0.00016761403508771925, "loss": 0.0069, "step": 5277 }, { "epoch": 78.77, "learning_rate": 0.00016757894736842106, "loss": 0.0438, "step": 5278 }, { "epoch": 78.79, "learning_rate": 0.00016754385964912278, "loss": 0.002, "step": 5279 }, { "epoch": 78.8, "learning_rate": 0.00016750877192982453, "loss": 0.0701, "step": 5280 }, { "epoch": 78.82, "learning_rate": 0.0001674736842105263, "loss": 0.0552, "step": 5281 }, { "epoch": 78.83, "learning_rate": 0.00016743859649122805, "loss": 0.1124, "step": 5282 }, { "epoch": 78.85, "learning_rate": 0.0001674035087719298, "loss": 0.0138, "step": 5283 }, { "epoch": 78.86, "learning_rate": 0.00016736842105263155, "loss": 0.0011, "step": 5284 }, { "epoch": 78.88, "learning_rate": 0.00016733333333333333, "loss": 0.0077, "step": 5285 }, { "epoch": 78.89, "learning_rate": 0.00016729824561403507, "loss": 0.0048, "step": 5286 }, { "epoch": 78.91, "learning_rate": 0.00016726315789473682, "loss": 0.0017, "step": 5287 }, { "epoch": 78.92, "learning_rate": 0.00016722807017543857, "loss": 0.1648, "step": 5288 }, { "epoch": 78.94, "learning_rate": 0.00016719298245614035, "loss": 0.1637, "step": 5289 }, { "epoch": 78.95, "learning_rate": 0.0001671578947368421, "loss": 0.0021, "step": 5290 }, { "epoch": 78.97, "learning_rate": 0.00016712280701754385, "loss": 0.0633, "step": 5291 }, { "epoch": 78.98, "learning_rate": 0.00016708771929824562, "loss": 0.0014, "step": 5292 }, { "epoch": 79.0, "learning_rate": 0.00016705263157894737, "loss": 0.0012, "step": 5293 }, { "epoch": 79.01, "learning_rate": 0.00016701754385964912, "loss": 0.0053, "step": 5294 }, { "epoch": 79.03, "learning_rate": 0.00016698245614035084, "loss": 0.0032, "step": 5295 }, { "epoch": 79.04, "learning_rate": 0.00016694736842105262, "loss": 0.0259, "step": 5296 }, { "epoch": 79.06, "learning_rate": 0.00016691228070175437, "loss": 0.0016, "step": 5297 }, { "epoch": 79.07, "learning_rate": 0.00016687719298245611, "loss": 0.0043, "step": 5298 }, { "epoch": 79.09, "learning_rate": 0.00016684210526315786, "loss": 0.099, "step": 5299 }, { "epoch": 79.1, "learning_rate": 0.00016680701754385964, "loss": 0.0156, "step": 5300 }, { "epoch": 79.12, "learning_rate": 0.0001667719298245614, "loss": 0.0023, "step": 5301 }, { "epoch": 79.13, "learning_rate": 0.00016673684210526314, "loss": 0.0013, "step": 5302 }, { "epoch": 79.15, "learning_rate": 0.0001667017543859649, "loss": 0.147, "step": 5303 }, { "epoch": 79.16, "learning_rate": 0.00016666666666666666, "loss": 0.0026, "step": 5304 }, { "epoch": 79.18, "learning_rate": 0.0001666315789473684, "loss": 0.1411, "step": 5305 }, { "epoch": 79.19, "learning_rate": 0.00016659649122807016, "loss": 0.0008, "step": 5306 }, { "epoch": 79.21, "learning_rate": 0.00016656140350877193, "loss": 0.002, "step": 5307 }, { "epoch": 79.22, "learning_rate": 0.00016652631578947368, "loss": 0.0238, "step": 5308 }, { "epoch": 79.24, "learning_rate": 0.00016649122807017543, "loss": 0.0234, "step": 5309 }, { "epoch": 79.25, "learning_rate": 0.00016645614035087715, "loss": 0.0023, "step": 5310 }, { "epoch": 79.27, "learning_rate": 0.00016642105263157896, "loss": 0.0031, "step": 5311 }, { "epoch": 79.28, "learning_rate": 0.00016638596491228068, "loss": 0.0033, "step": 5312 }, { "epoch": 79.3, "learning_rate": 0.00016635087719298243, "loss": 0.0009, "step": 5313 }, { "epoch": 79.31, "learning_rate": 0.00016631578947368418, "loss": 0.0379, "step": 5314 }, { "epoch": 79.33, "learning_rate": 0.00016628070175438595, "loss": 0.001, "step": 5315 }, { "epoch": 79.34, "learning_rate": 0.0001662456140350877, "loss": 0.0029, "step": 5316 }, { "epoch": 79.36, "learning_rate": 0.00016621052631578945, "loss": 0.0009, "step": 5317 }, { "epoch": 79.37, "learning_rate": 0.00016617543859649123, "loss": 0.0497, "step": 5318 }, { "epoch": 79.39, "learning_rate": 0.00016614035087719297, "loss": 0.0067, "step": 5319 }, { "epoch": 79.4, "learning_rate": 0.00016610526315789472, "loss": 0.001, "step": 5320 }, { "epoch": 79.42, "learning_rate": 0.00016607017543859647, "loss": 0.0771, "step": 5321 }, { "epoch": 79.43, "learning_rate": 0.00016603508771929825, "loss": 0.0084, "step": 5322 }, { "epoch": 79.45, "learning_rate": 0.000166, "loss": 0.0012, "step": 5323 }, { "epoch": 79.46, "learning_rate": 0.00016596491228070175, "loss": 0.0063, "step": 5324 }, { "epoch": 79.48, "learning_rate": 0.00016592982456140347, "loss": 0.0082, "step": 5325 }, { "epoch": 79.49, "learning_rate": 0.00016589473684210527, "loss": 0.0008, "step": 5326 }, { "epoch": 79.51, "learning_rate": 0.000165859649122807, "loss": 0.0708, "step": 5327 }, { "epoch": 79.52, "learning_rate": 0.00016582456140350874, "loss": 0.2766, "step": 5328 }, { "epoch": 79.54, "learning_rate": 0.00016578947368421052, "loss": 0.1932, "step": 5329 }, { "epoch": 79.55, "learning_rate": 0.00016575438596491226, "loss": 0.0024, "step": 5330 }, { "epoch": 79.57, "learning_rate": 0.00016571929824561401, "loss": 0.001, "step": 5331 }, { "epoch": 79.58, "learning_rate": 0.00016568421052631576, "loss": 0.026, "step": 5332 }, { "epoch": 79.59, "learning_rate": 0.00016564912280701754, "loss": 0.0014, "step": 5333 }, { "epoch": 79.61, "learning_rate": 0.0001656140350877193, "loss": 0.0027, "step": 5334 }, { "epoch": 79.62, "learning_rate": 0.00016557894736842104, "loss": 0.0038, "step": 5335 }, { "epoch": 79.64, "learning_rate": 0.00016554385964912278, "loss": 0.0042, "step": 5336 }, { "epoch": 79.65, "learning_rate": 0.00016550877192982456, "loss": 0.0009, "step": 5337 }, { "epoch": 79.67, "learning_rate": 0.0001654736842105263, "loss": 0.0024, "step": 5338 }, { "epoch": 79.68, "learning_rate": 0.00016543859649122806, "loss": 0.0009, "step": 5339 }, { "epoch": 79.7, "learning_rate": 0.0001654035087719298, "loss": 0.2441, "step": 5340 }, { "epoch": 79.71, "learning_rate": 0.00016536842105263158, "loss": 0.0387, "step": 5341 }, { "epoch": 79.73, "learning_rate": 0.0001653333333333333, "loss": 0.001, "step": 5342 }, { "epoch": 79.74, "learning_rate": 0.00016529824561403505, "loss": 0.0024, "step": 5343 }, { "epoch": 79.76, "learning_rate": 0.00016526315789473683, "loss": 0.0043, "step": 5344 }, { "epoch": 79.77, "learning_rate": 0.00016522807017543858, "loss": 0.0826, "step": 5345 }, { "epoch": 79.79, "learning_rate": 0.00016519298245614033, "loss": 0.0019, "step": 5346 }, { "epoch": 79.8, "learning_rate": 0.00016515789473684208, "loss": 0.0067, "step": 5347 }, { "epoch": 79.82, "learning_rate": 0.00016512280701754385, "loss": 0.1593, "step": 5348 }, { "epoch": 79.83, "learning_rate": 0.0001650877192982456, "loss": 0.0102, "step": 5349 }, { "epoch": 79.85, "learning_rate": 0.00016505263157894735, "loss": 0.0048, "step": 5350 }, { "epoch": 79.86, "learning_rate": 0.0001650175438596491, "loss": 0.0009, "step": 5351 }, { "epoch": 79.88, "learning_rate": 0.00016498245614035087, "loss": 0.1238, "step": 5352 }, { "epoch": 79.89, "learning_rate": 0.00016494736842105262, "loss": 0.0311, "step": 5353 }, { "epoch": 79.91, "learning_rate": 0.00016491228070175437, "loss": 0.0013, "step": 5354 }, { "epoch": 79.92, "learning_rate": 0.00016487719298245615, "loss": 0.0785, "step": 5355 }, { "epoch": 79.94, "learning_rate": 0.0001648421052631579, "loss": 0.2155, "step": 5356 }, { "epoch": 79.95, "learning_rate": 0.00016480701754385964, "loss": 0.2146, "step": 5357 }, { "epoch": 79.97, "learning_rate": 0.00016477192982456137, "loss": 0.1953, "step": 5358 }, { "epoch": 79.98, "learning_rate": 0.00016473684210526317, "loss": 0.0021, "step": 5359 }, { "epoch": 80.0, "learning_rate": 0.0001647017543859649, "loss": 0.0479, "step": 5360 }, { "epoch": 80.01, "learning_rate": 0.00016466666666666664, "loss": 0.0028, "step": 5361 }, { "epoch": 80.03, "learning_rate": 0.0001646315789473684, "loss": 0.001, "step": 5362 }, { "epoch": 80.04, "learning_rate": 0.00016459649122807016, "loss": 0.1739, "step": 5363 }, { "epoch": 80.06, "learning_rate": 0.0001645614035087719, "loss": 0.1624, "step": 5364 }, { "epoch": 80.07, "learning_rate": 0.00016452631578947366, "loss": 0.1035, "step": 5365 }, { "epoch": 80.09, "learning_rate": 0.00016449122807017544, "loss": 0.0018, "step": 5366 }, { "epoch": 80.1, "learning_rate": 0.0001644561403508772, "loss": 0.1251, "step": 5367 }, { "epoch": 80.12, "learning_rate": 0.00016442105263157894, "loss": 0.0059, "step": 5368 }, { "epoch": 80.13, "learning_rate": 0.00016438596491228068, "loss": 0.3782, "step": 5369 }, { "epoch": 80.15, "learning_rate": 0.00016435087719298246, "loss": 0.0023, "step": 5370 }, { "epoch": 80.16, "learning_rate": 0.0001643157894736842, "loss": 0.0096, "step": 5371 }, { "epoch": 80.18, "learning_rate": 0.00016428070175438596, "loss": 0.3213, "step": 5372 }, { "epoch": 80.19, "learning_rate": 0.00016424561403508768, "loss": 0.0009, "step": 5373 }, { "epoch": 80.21, "learning_rate": 0.00016421052631578948, "loss": 0.0033, "step": 5374 }, { "epoch": 80.22, "learning_rate": 0.0001641754385964912, "loss": 0.001, "step": 5375 }, { "epoch": 80.24, "learning_rate": 0.00016414035087719295, "loss": 0.0185, "step": 5376 }, { "epoch": 80.25, "learning_rate": 0.0001641052631578947, "loss": 0.0059, "step": 5377 }, { "epoch": 80.27, "learning_rate": 0.00016407017543859648, "loss": 0.0022, "step": 5378 }, { "epoch": 80.28, "learning_rate": 0.00016403508771929823, "loss": 0.0538, "step": 5379 }, { "epoch": 80.3, "learning_rate": 0.00016399999999999997, "loss": 0.0096, "step": 5380 }, { "epoch": 80.31, "learning_rate": 0.00016396491228070175, "loss": 0.0257, "step": 5381 }, { "epoch": 80.33, "learning_rate": 0.0001639298245614035, "loss": 0.0072, "step": 5382 }, { "epoch": 80.34, "learning_rate": 0.00016389473684210525, "loss": 0.1744, "step": 5383 }, { "epoch": 80.36, "learning_rate": 0.000163859649122807, "loss": 0.0038, "step": 5384 }, { "epoch": 80.37, "learning_rate": 0.00016382456140350877, "loss": 0.0013, "step": 5385 }, { "epoch": 80.39, "learning_rate": 0.00016378947368421052, "loss": 0.032, "step": 5386 }, { "epoch": 80.4, "learning_rate": 0.00016375438596491227, "loss": 0.001, "step": 5387 }, { "epoch": 80.42, "learning_rate": 0.000163719298245614, "loss": 0.0909, "step": 5388 }, { "epoch": 80.43, "learning_rate": 0.0001636842105263158, "loss": 0.0019, "step": 5389 }, { "epoch": 80.45, "learning_rate": 0.00016364912280701752, "loss": 0.0483, "step": 5390 }, { "epoch": 80.46, "learning_rate": 0.00016361403508771927, "loss": 0.2005, "step": 5391 }, { "epoch": 80.48, "learning_rate": 0.00016357894736842104, "loss": 0.0697, "step": 5392 }, { "epoch": 80.49, "learning_rate": 0.0001635438596491228, "loss": 0.001, "step": 5393 }, { "epoch": 80.51, "learning_rate": 0.00016350877192982454, "loss": 0.0186, "step": 5394 }, { "epoch": 80.52, "learning_rate": 0.0001634736842105263, "loss": 0.1185, "step": 5395 }, { "epoch": 80.54, "learning_rate": 0.00016343859649122806, "loss": 0.243, "step": 5396 }, { "epoch": 80.55, "learning_rate": 0.0001634035087719298, "loss": 0.0291, "step": 5397 }, { "epoch": 80.57, "learning_rate": 0.00016336842105263156, "loss": 0.0045, "step": 5398 }, { "epoch": 80.58, "learning_rate": 0.0001633333333333333, "loss": 0.0055, "step": 5399 }, { "epoch": 80.59, "learning_rate": 0.00016329824561403509, "loss": 0.0021, "step": 5400 }, { "epoch": 80.59, "eval_accuracy": 0.8578071463534018, "eval_f1": 0.8587756014640304, "eval_loss": 0.6747012138366699, "eval_runtime": 344.6737, "eval_samples_per_second": 11.855, "eval_steps_per_second": 0.743, "step": 5400 }, { "epoch": 80.61, "learning_rate": 0.00016326315789473683, "loss": 0.0016, "step": 5401 }, { "epoch": 80.62, "learning_rate": 0.00016322807017543858, "loss": 0.0357, "step": 5402 }, { "epoch": 80.64, "learning_rate": 0.00016319298245614036, "loss": 0.0355, "step": 5403 }, { "epoch": 80.65, "learning_rate": 0.0001631578947368421, "loss": 0.0025, "step": 5404 }, { "epoch": 80.67, "learning_rate": 0.00016312280701754386, "loss": 0.005, "step": 5405 }, { "epoch": 80.68, "learning_rate": 0.00016308771929824558, "loss": 0.002, "step": 5406 }, { "epoch": 80.7, "learning_rate": 0.00016305263157894738, "loss": 0.0022, "step": 5407 }, { "epoch": 80.71, "learning_rate": 0.0001630175438596491, "loss": 0.2577, "step": 5408 }, { "epoch": 80.73, "learning_rate": 0.00016298245614035085, "loss": 0.0013, "step": 5409 }, { "epoch": 80.74, "learning_rate": 0.0001629473684210526, "loss": 0.007, "step": 5410 }, { "epoch": 80.76, "learning_rate": 0.00016291228070175438, "loss": 0.0017, "step": 5411 }, { "epoch": 80.77, "learning_rate": 0.00016287719298245613, "loss": 0.0038, "step": 5412 }, { "epoch": 80.79, "learning_rate": 0.00016284210526315787, "loss": 0.2622, "step": 5413 }, { "epoch": 80.8, "learning_rate": 0.00016280701754385962, "loss": 0.0013, "step": 5414 }, { "epoch": 80.82, "learning_rate": 0.0001627719298245614, "loss": 0.1833, "step": 5415 }, { "epoch": 80.83, "learning_rate": 0.00016273684210526315, "loss": 0.066, "step": 5416 }, { "epoch": 80.85, "learning_rate": 0.0001627017543859649, "loss": 0.0032, "step": 5417 }, { "epoch": 80.86, "learning_rate": 0.00016266666666666667, "loss": 0.0973, "step": 5418 }, { "epoch": 80.88, "learning_rate": 0.00016263157894736842, "loss": 0.0088, "step": 5419 }, { "epoch": 80.89, "learning_rate": 0.00016259649122807017, "loss": 0.001, "step": 5420 }, { "epoch": 80.91, "learning_rate": 0.0001625614035087719, "loss": 0.1095, "step": 5421 }, { "epoch": 80.92, "learning_rate": 0.0001625263157894737, "loss": 0.0488, "step": 5422 }, { "epoch": 80.94, "learning_rate": 0.00016249122807017542, "loss": 0.0919, "step": 5423 }, { "epoch": 80.95, "learning_rate": 0.00016245614035087716, "loss": 0.1362, "step": 5424 }, { "epoch": 80.97, "learning_rate": 0.00016242105263157891, "loss": 0.0035, "step": 5425 }, { "epoch": 80.98, "learning_rate": 0.0001623859649122807, "loss": 0.0581, "step": 5426 }, { "epoch": 81.0, "learning_rate": 0.00016235087719298244, "loss": 0.2841, "step": 5427 }, { "epoch": 81.01, "learning_rate": 0.0001623157894736842, "loss": 0.3199, "step": 5428 }, { "epoch": 81.03, "learning_rate": 0.00016228070175438596, "loss": 0.0018, "step": 5429 }, { "epoch": 81.04, "learning_rate": 0.0001622456140350877, "loss": 0.1194, "step": 5430 }, { "epoch": 81.06, "learning_rate": 0.00016221052631578946, "loss": 0.2508, "step": 5431 }, { "epoch": 81.07, "learning_rate": 0.0001621754385964912, "loss": 0.0017, "step": 5432 }, { "epoch": 81.09, "learning_rate": 0.00016214035087719299, "loss": 0.0014, "step": 5433 }, { "epoch": 81.1, "learning_rate": 0.00016210526315789473, "loss": 0.0689, "step": 5434 }, { "epoch": 81.12, "learning_rate": 0.00016207017543859648, "loss": 0.0054, "step": 5435 }, { "epoch": 81.13, "learning_rate": 0.0001620350877192982, "loss": 0.0099, "step": 5436 }, { "epoch": 81.15, "learning_rate": 0.000162, "loss": 0.122, "step": 5437 }, { "epoch": 81.16, "learning_rate": 0.00016196491228070173, "loss": 0.0013, "step": 5438 }, { "epoch": 81.18, "learning_rate": 0.00016192982456140348, "loss": 0.0066, "step": 5439 }, { "epoch": 81.19, "learning_rate": 0.00016189473684210523, "loss": 0.013, "step": 5440 }, { "epoch": 81.21, "learning_rate": 0.000161859649122807, "loss": 0.1842, "step": 5441 }, { "epoch": 81.22, "learning_rate": 0.00016182456140350875, "loss": 0.0014, "step": 5442 }, { "epoch": 81.24, "learning_rate": 0.0001617894736842105, "loss": 0.0029, "step": 5443 }, { "epoch": 81.25, "learning_rate": 0.00016175438596491228, "loss": 0.001, "step": 5444 }, { "epoch": 81.27, "learning_rate": 0.00016171929824561402, "loss": 0.0012, "step": 5445 }, { "epoch": 81.28, "learning_rate": 0.00016168421052631577, "loss": 0.002, "step": 5446 }, { "epoch": 81.3, "learning_rate": 0.00016164912280701752, "loss": 0.004, "step": 5447 }, { "epoch": 81.31, "learning_rate": 0.0001616140350877193, "loss": 0.0059, "step": 5448 }, { "epoch": 81.33, "learning_rate": 0.00016157894736842105, "loss": 0.1543, "step": 5449 }, { "epoch": 81.34, "learning_rate": 0.0001615438596491228, "loss": 0.0173, "step": 5450 }, { "epoch": 81.36, "learning_rate": 0.00016150877192982454, "loss": 0.0082, "step": 5451 }, { "epoch": 81.37, "learning_rate": 0.00016147368421052632, "loss": 0.107, "step": 5452 }, { "epoch": 81.39, "learning_rate": 0.00016143859649122807, "loss": 0.001, "step": 5453 }, { "epoch": 81.4, "learning_rate": 0.0001614035087719298, "loss": 0.0013, "step": 5454 }, { "epoch": 81.42, "learning_rate": 0.00016136842105263157, "loss": 0.0149, "step": 5455 }, { "epoch": 81.43, "learning_rate": 0.00016133333333333332, "loss": 0.0018, "step": 5456 }, { "epoch": 81.45, "learning_rate": 0.00016129824561403506, "loss": 0.0016, "step": 5457 }, { "epoch": 81.46, "learning_rate": 0.0001612631578947368, "loss": 0.0064, "step": 5458 }, { "epoch": 81.48, "learning_rate": 0.0001612280701754386, "loss": 0.0017, "step": 5459 }, { "epoch": 81.49, "learning_rate": 0.00016119298245614034, "loss": 0.0033, "step": 5460 }, { "epoch": 81.51, "learning_rate": 0.0001611578947368421, "loss": 0.0052, "step": 5461 }, { "epoch": 81.52, "learning_rate": 0.00016112280701754384, "loss": 0.2818, "step": 5462 }, { "epoch": 81.54, "learning_rate": 0.0001610877192982456, "loss": 0.0061, "step": 5463 }, { "epoch": 81.55, "learning_rate": 0.00016105263157894736, "loss": 0.0035, "step": 5464 }, { "epoch": 81.57, "learning_rate": 0.0001610175438596491, "loss": 0.0019, "step": 5465 }, { "epoch": 81.58, "learning_rate": 0.00016098245614035088, "loss": 0.1041, "step": 5466 }, { "epoch": 81.59, "learning_rate": 0.00016094736842105263, "loss": 0.0522, "step": 5467 }, { "epoch": 81.61, "learning_rate": 0.00016091228070175438, "loss": 0.0039, "step": 5468 }, { "epoch": 81.62, "learning_rate": 0.0001608771929824561, "loss": 0.0032, "step": 5469 }, { "epoch": 81.64, "learning_rate": 0.0001608421052631579, "loss": 0.001, "step": 5470 }, { "epoch": 81.65, "learning_rate": 0.00016080701754385963, "loss": 0.0013, "step": 5471 }, { "epoch": 81.67, "learning_rate": 0.00016077192982456138, "loss": 0.0008, "step": 5472 }, { "epoch": 81.68, "learning_rate": 0.00016073684210526313, "loss": 0.0009, "step": 5473 }, { "epoch": 81.7, "learning_rate": 0.0001607017543859649, "loss": 0.0013, "step": 5474 }, { "epoch": 81.71, "learning_rate": 0.00016066666666666665, "loss": 0.001, "step": 5475 }, { "epoch": 81.73, "learning_rate": 0.0001606315789473684, "loss": 0.0009, "step": 5476 }, { "epoch": 81.74, "learning_rate": 0.00016059649122807015, "loss": 0.0008, "step": 5477 }, { "epoch": 81.76, "learning_rate": 0.00016056140350877192, "loss": 0.0014, "step": 5478 }, { "epoch": 81.77, "learning_rate": 0.00016052631578947367, "loss": 0.0065, "step": 5479 }, { "epoch": 81.79, "learning_rate": 0.00016049122807017542, "loss": 0.0279, "step": 5480 }, { "epoch": 81.8, "learning_rate": 0.0001604561403508772, "loss": 0.0008, "step": 5481 }, { "epoch": 81.82, "learning_rate": 0.00016042105263157895, "loss": 0.001, "step": 5482 }, { "epoch": 81.83, "learning_rate": 0.0001603859649122807, "loss": 0.0019, "step": 5483 }, { "epoch": 81.85, "learning_rate": 0.00016035087719298242, "loss": 0.0048, "step": 5484 }, { "epoch": 81.86, "learning_rate": 0.00016031578947368422, "loss": 0.0362, "step": 5485 }, { "epoch": 81.88, "learning_rate": 0.00016028070175438594, "loss": 0.009, "step": 5486 }, { "epoch": 81.89, "learning_rate": 0.0001602456140350877, "loss": 0.0007, "step": 5487 }, { "epoch": 81.91, "learning_rate": 0.00016021052631578944, "loss": 0.0149, "step": 5488 }, { "epoch": 81.92, "learning_rate": 0.00016017543859649121, "loss": 0.0784, "step": 5489 }, { "epoch": 81.94, "learning_rate": 0.00016014035087719296, "loss": 0.0009, "step": 5490 }, { "epoch": 81.95, "learning_rate": 0.0001601052631578947, "loss": 0.005, "step": 5491 }, { "epoch": 81.97, "learning_rate": 0.0001600701754385965, "loss": 0.0018, "step": 5492 }, { "epoch": 81.98, "learning_rate": 0.00016003508771929824, "loss": 0.0299, "step": 5493 }, { "epoch": 82.0, "learning_rate": 0.00015999999999999999, "loss": 0.2323, "step": 5494 }, { "epoch": 82.01, "learning_rate": 0.00015996491228070173, "loss": 0.0014, "step": 5495 }, { "epoch": 82.03, "learning_rate": 0.0001599298245614035, "loss": 0.0014, "step": 5496 }, { "epoch": 82.04, "learning_rate": 0.00015989473684210526, "loss": 0.0885, "step": 5497 }, { "epoch": 82.06, "learning_rate": 0.000159859649122807, "loss": 0.0017, "step": 5498 }, { "epoch": 82.07, "learning_rate": 0.00015982456140350876, "loss": 0.0009, "step": 5499 }, { "epoch": 82.09, "learning_rate": 0.00015978947368421053, "loss": 0.0017, "step": 5500 }, { "epoch": 82.1, "learning_rate": 0.00015975438596491228, "loss": 0.0016, "step": 5501 }, { "epoch": 82.12, "learning_rate": 0.000159719298245614, "loss": 0.1006, "step": 5502 }, { "epoch": 82.13, "learning_rate": 0.00015968421052631575, "loss": 0.0051, "step": 5503 }, { "epoch": 82.15, "learning_rate": 0.00015964912280701753, "loss": 0.0014, "step": 5504 }, { "epoch": 82.16, "learning_rate": 0.00015961403508771928, "loss": 0.0016, "step": 5505 }, { "epoch": 82.18, "learning_rate": 0.00015957894736842103, "loss": 0.0018, "step": 5506 }, { "epoch": 82.19, "learning_rate": 0.0001595438596491228, "loss": 0.0018, "step": 5507 }, { "epoch": 82.21, "learning_rate": 0.00015950877192982455, "loss": 0.0027, "step": 5508 }, { "epoch": 82.22, "learning_rate": 0.0001594736842105263, "loss": 0.0009, "step": 5509 }, { "epoch": 82.24, "learning_rate": 0.00015943859649122805, "loss": 0.0029, "step": 5510 }, { "epoch": 82.25, "learning_rate": 0.00015940350877192982, "loss": 0.0521, "step": 5511 }, { "epoch": 82.27, "learning_rate": 0.00015936842105263157, "loss": 0.019, "step": 5512 }, { "epoch": 82.28, "learning_rate": 0.00015933333333333332, "loss": 0.0095, "step": 5513 }, { "epoch": 82.3, "learning_rate": 0.00015929824561403507, "loss": 0.0915, "step": 5514 }, { "epoch": 82.31, "learning_rate": 0.00015926315789473685, "loss": 0.0017, "step": 5515 }, { "epoch": 82.33, "learning_rate": 0.0001592280701754386, "loss": 0.0024, "step": 5516 }, { "epoch": 82.34, "learning_rate": 0.00015919298245614032, "loss": 0.0162, "step": 5517 }, { "epoch": 82.36, "learning_rate": 0.00015915789473684212, "loss": 0.0009, "step": 5518 }, { "epoch": 82.37, "learning_rate": 0.00015912280701754384, "loss": 0.0009, "step": 5519 }, { "epoch": 82.39, "learning_rate": 0.0001590877192982456, "loss": 0.0008, "step": 5520 }, { "epoch": 82.4, "learning_rate": 0.00015905263157894734, "loss": 0.0008, "step": 5521 }, { "epoch": 82.42, "learning_rate": 0.00015901754385964911, "loss": 0.0081, "step": 5522 }, { "epoch": 82.43, "learning_rate": 0.00015898245614035086, "loss": 0.001, "step": 5523 }, { "epoch": 82.45, "learning_rate": 0.0001589473684210526, "loss": 0.0013, "step": 5524 }, { "epoch": 82.46, "learning_rate": 0.00015891228070175436, "loss": 0.0824, "step": 5525 }, { "epoch": 82.48, "learning_rate": 0.00015887719298245614, "loss": 0.0008, "step": 5526 }, { "epoch": 82.49, "learning_rate": 0.00015884210526315789, "loss": 0.0013, "step": 5527 }, { "epoch": 82.51, "learning_rate": 0.00015880701754385963, "loss": 0.0014, "step": 5528 }, { "epoch": 82.52, "learning_rate": 0.0001587719298245614, "loss": 0.0571, "step": 5529 }, { "epoch": 82.54, "learning_rate": 0.00015873684210526316, "loss": 0.0397, "step": 5530 }, { "epoch": 82.55, "learning_rate": 0.0001587017543859649, "loss": 0.0006, "step": 5531 }, { "epoch": 82.57, "learning_rate": 0.00015866666666666663, "loss": 0.106, "step": 5532 }, { "epoch": 82.58, "learning_rate": 0.00015863157894736843, "loss": 0.0012, "step": 5533 }, { "epoch": 82.59, "learning_rate": 0.00015859649122807015, "loss": 0.0023, "step": 5534 }, { "epoch": 82.61, "learning_rate": 0.0001585614035087719, "loss": 0.0007, "step": 5535 }, { "epoch": 82.62, "learning_rate": 0.00015852631578947365, "loss": 0.0007, "step": 5536 }, { "epoch": 82.64, "learning_rate": 0.00015849122807017543, "loss": 0.0006, "step": 5537 }, { "epoch": 82.65, "learning_rate": 0.00015845614035087718, "loss": 0.0947, "step": 5538 }, { "epoch": 82.67, "learning_rate": 0.00015842105263157892, "loss": 0.0804, "step": 5539 }, { "epoch": 82.68, "learning_rate": 0.00015838596491228067, "loss": 0.0006, "step": 5540 }, { "epoch": 82.7, "learning_rate": 0.00015835087719298245, "loss": 0.002, "step": 5541 }, { "epoch": 82.71, "learning_rate": 0.0001583157894736842, "loss": 0.0091, "step": 5542 }, { "epoch": 82.73, "learning_rate": 0.00015828070175438595, "loss": 0.0725, "step": 5543 }, { "epoch": 82.74, "learning_rate": 0.00015824561403508772, "loss": 0.0369, "step": 5544 }, { "epoch": 82.76, "learning_rate": 0.00015821052631578947, "loss": 0.0008, "step": 5545 }, { "epoch": 82.77, "learning_rate": 0.00015817543859649122, "loss": 0.2917, "step": 5546 }, { "epoch": 82.79, "learning_rate": 0.00015814035087719297, "loss": 0.3772, "step": 5547 }, { "epoch": 82.8, "learning_rate": 0.00015810526315789475, "loss": 0.0007, "step": 5548 }, { "epoch": 82.82, "learning_rate": 0.00015807017543859647, "loss": 0.1434, "step": 5549 }, { "epoch": 82.83, "learning_rate": 0.00015803508771929822, "loss": 0.0369, "step": 5550 }, { "epoch": 82.85, "learning_rate": 0.00015799999999999996, "loss": 0.0007, "step": 5551 }, { "epoch": 82.86, "learning_rate": 0.00015796491228070174, "loss": 0.0007, "step": 5552 }, { "epoch": 82.88, "learning_rate": 0.0001579298245614035, "loss": 0.0009, "step": 5553 }, { "epoch": 82.89, "learning_rate": 0.00015789473684210524, "loss": 0.0011, "step": 5554 }, { "epoch": 82.91, "learning_rate": 0.00015785964912280701, "loss": 0.0009, "step": 5555 }, { "epoch": 82.92, "learning_rate": 0.00015782456140350876, "loss": 0.0047, "step": 5556 }, { "epoch": 82.94, "learning_rate": 0.0001577894736842105, "loss": 0.0011, "step": 5557 }, { "epoch": 82.95, "learning_rate": 0.00015775438596491226, "loss": 0.0142, "step": 5558 }, { "epoch": 82.97, "learning_rate": 0.00015771929824561404, "loss": 0.0384, "step": 5559 }, { "epoch": 82.98, "learning_rate": 0.00015768421052631578, "loss": 0.0017, "step": 5560 }, { "epoch": 83.0, "learning_rate": 0.00015764912280701753, "loss": 0.0011, "step": 5561 }, { "epoch": 83.01, "learning_rate": 0.00015761403508771928, "loss": 0.0043, "step": 5562 }, { "epoch": 83.03, "learning_rate": 0.00015757894736842106, "loss": 0.0012, "step": 5563 }, { "epoch": 83.04, "learning_rate": 0.0001575438596491228, "loss": 0.1857, "step": 5564 }, { "epoch": 83.06, "learning_rate": 0.00015750877192982453, "loss": 0.1551, "step": 5565 }, { "epoch": 83.07, "learning_rate": 0.00015747368421052633, "loss": 0.0013, "step": 5566 }, { "epoch": 83.09, "learning_rate": 0.00015743859649122805, "loss": 0.0018, "step": 5567 }, { "epoch": 83.1, "learning_rate": 0.0001574035087719298, "loss": 0.0416, "step": 5568 }, { "epoch": 83.12, "learning_rate": 0.00015736842105263155, "loss": 0.0016, "step": 5569 }, { "epoch": 83.13, "learning_rate": 0.00015733333333333333, "loss": 0.0565, "step": 5570 }, { "epoch": 83.15, "learning_rate": 0.00015729824561403508, "loss": 0.0044, "step": 5571 }, { "epoch": 83.16, "learning_rate": 0.00015726315789473682, "loss": 0.0031, "step": 5572 }, { "epoch": 83.18, "learning_rate": 0.00015722807017543857, "loss": 0.0062, "step": 5573 }, { "epoch": 83.19, "learning_rate": 0.00015719298245614035, "loss": 0.0015, "step": 5574 }, { "epoch": 83.21, "learning_rate": 0.0001571578947368421, "loss": 0.0162, "step": 5575 }, { "epoch": 83.22, "learning_rate": 0.00015712280701754385, "loss": 0.0023, "step": 5576 }, { "epoch": 83.24, "learning_rate": 0.0001570877192982456, "loss": 0.1509, "step": 5577 }, { "epoch": 83.25, "learning_rate": 0.00015705263157894737, "loss": 0.0032, "step": 5578 }, { "epoch": 83.27, "learning_rate": 0.00015701754385964912, "loss": 0.0601, "step": 5579 }, { "epoch": 83.28, "learning_rate": 0.00015698245614035084, "loss": 0.0017, "step": 5580 }, { "epoch": 83.3, "learning_rate": 0.00015694736842105264, "loss": 0.0017, "step": 5581 }, { "epoch": 83.31, "learning_rate": 0.00015691228070175437, "loss": 0.0074, "step": 5582 }, { "epoch": 83.33, "learning_rate": 0.00015687719298245612, "loss": 0.2237, "step": 5583 }, { "epoch": 83.34, "learning_rate": 0.00015684210526315786, "loss": 0.0026, "step": 5584 }, { "epoch": 83.36, "learning_rate": 0.00015680701754385964, "loss": 0.0321, "step": 5585 }, { "epoch": 83.37, "learning_rate": 0.0001567719298245614, "loss": 0.0013, "step": 5586 }, { "epoch": 83.39, "learning_rate": 0.00015673684210526314, "loss": 0.0012, "step": 5587 }, { "epoch": 83.4, "learning_rate": 0.00015670175438596489, "loss": 0.0011, "step": 5588 }, { "epoch": 83.42, "learning_rate": 0.00015666666666666666, "loss": 0.0089, "step": 5589 }, { "epoch": 83.43, "learning_rate": 0.0001566315789473684, "loss": 0.0007, "step": 5590 }, { "epoch": 83.45, "learning_rate": 0.00015659649122807016, "loss": 0.0008, "step": 5591 }, { "epoch": 83.46, "learning_rate": 0.00015656140350877194, "loss": 0.0024, "step": 5592 }, { "epoch": 83.48, "learning_rate": 0.00015652631578947368, "loss": 0.0008, "step": 5593 }, { "epoch": 83.49, "learning_rate": 0.00015649122807017543, "loss": 0.0019, "step": 5594 }, { "epoch": 83.51, "learning_rate": 0.00015645614035087718, "loss": 0.2801, "step": 5595 }, { "epoch": 83.52, "learning_rate": 0.00015642105263157896, "loss": 0.0012, "step": 5596 }, { "epoch": 83.54, "learning_rate": 0.00015638596491228068, "loss": 0.0034, "step": 5597 }, { "epoch": 83.55, "learning_rate": 0.00015635087719298243, "loss": 0.0846, "step": 5598 }, { "epoch": 83.57, "learning_rate": 0.00015631578947368418, "loss": 0.0549, "step": 5599 }, { "epoch": 83.58, "learning_rate": 0.00015628070175438595, "loss": 0.2558, "step": 5600 }, { "epoch": 83.58, "eval_accuracy": 0.8580518844836026, "eval_f1": 0.8581217984766242, "eval_loss": 0.7513839602470398, "eval_runtime": 345.0011, "eval_samples_per_second": 11.843, "eval_steps_per_second": 0.742, "step": 5600 }, { "epoch": 83.59, "learning_rate": 0.0001562456140350877, "loss": 0.0009, "step": 5601 }, { "epoch": 83.61, "learning_rate": 0.00015621052631578945, "loss": 0.0996, "step": 5602 }, { "epoch": 83.62, "learning_rate": 0.0001561754385964912, "loss": 0.0023, "step": 5603 }, { "epoch": 83.64, "learning_rate": 0.00015614035087719297, "loss": 0.0013, "step": 5604 }, { "epoch": 83.65, "learning_rate": 0.00015610526315789472, "loss": 0.0011, "step": 5605 }, { "epoch": 83.67, "learning_rate": 0.00015607017543859647, "loss": 0.0021, "step": 5606 }, { "epoch": 83.68, "learning_rate": 0.00015603508771929825, "loss": 0.267, "step": 5607 }, { "epoch": 83.7, "learning_rate": 0.000156, "loss": 0.0036, "step": 5608 }, { "epoch": 83.71, "learning_rate": 0.00015596491228070175, "loss": 0.0013, "step": 5609 }, { "epoch": 83.73, "learning_rate": 0.0001559298245614035, "loss": 0.0077, "step": 5610 }, { "epoch": 83.74, "learning_rate": 0.00015589473684210527, "loss": 0.0034, "step": 5611 }, { "epoch": 83.76, "learning_rate": 0.00015585964912280702, "loss": 0.0024, "step": 5612 }, { "epoch": 83.77, "learning_rate": 0.00015582456140350874, "loss": 0.1785, "step": 5613 }, { "epoch": 83.79, "learning_rate": 0.0001557894736842105, "loss": 0.0032, "step": 5614 }, { "epoch": 83.8, "learning_rate": 0.00015575438596491227, "loss": 0.0021, "step": 5615 }, { "epoch": 83.82, "learning_rate": 0.00015571929824561401, "loss": 0.4422, "step": 5616 }, { "epoch": 83.83, "learning_rate": 0.00015568421052631576, "loss": 0.0062, "step": 5617 }, { "epoch": 83.85, "learning_rate": 0.00015564912280701754, "loss": 0.3256, "step": 5618 }, { "epoch": 83.86, "learning_rate": 0.0001556140350877193, "loss": 0.0058, "step": 5619 }, { "epoch": 83.88, "learning_rate": 0.00015557894736842104, "loss": 0.0015, "step": 5620 }, { "epoch": 83.89, "learning_rate": 0.00015554385964912279, "loss": 0.0021, "step": 5621 }, { "epoch": 83.91, "learning_rate": 0.00015550877192982456, "loss": 0.3244, "step": 5622 }, { "epoch": 83.92, "learning_rate": 0.0001554736842105263, "loss": 0.002, "step": 5623 }, { "epoch": 83.94, "learning_rate": 0.00015543859649122806, "loss": 0.0073, "step": 5624 }, { "epoch": 83.95, "learning_rate": 0.0001554035087719298, "loss": 0.034, "step": 5625 }, { "epoch": 83.97, "learning_rate": 0.00015536842105263158, "loss": 0.0015, "step": 5626 }, { "epoch": 83.98, "learning_rate": 0.00015533333333333333, "loss": 0.0368, "step": 5627 }, { "epoch": 84.0, "learning_rate": 0.00015529824561403505, "loss": 0.0769, "step": 5628 }, { "epoch": 84.01, "learning_rate": 0.00015526315789473686, "loss": 0.0027, "step": 5629 }, { "epoch": 84.03, "learning_rate": 0.00015522807017543858, "loss": 0.1014, "step": 5630 }, { "epoch": 84.04, "learning_rate": 0.00015519298245614033, "loss": 0.0016, "step": 5631 }, { "epoch": 84.06, "learning_rate": 0.00015515789473684208, "loss": 0.0019, "step": 5632 }, { "epoch": 84.07, "learning_rate": 0.00015512280701754385, "loss": 0.0227, "step": 5633 }, { "epoch": 84.09, "learning_rate": 0.0001550877192982456, "loss": 0.0014, "step": 5634 }, { "epoch": 84.1, "learning_rate": 0.00015505263157894735, "loss": 0.0755, "step": 5635 }, { "epoch": 84.12, "learning_rate": 0.0001550175438596491, "loss": 0.0014, "step": 5636 }, { "epoch": 84.13, "learning_rate": 0.00015498245614035087, "loss": 0.0031, "step": 5637 }, { "epoch": 84.15, "learning_rate": 0.00015494736842105262, "loss": 0.107, "step": 5638 }, { "epoch": 84.16, "learning_rate": 0.00015491228070175437, "loss": 0.3357, "step": 5639 }, { "epoch": 84.18, "learning_rate": 0.00015487719298245612, "loss": 0.0045, "step": 5640 }, { "epoch": 84.19, "learning_rate": 0.0001548421052631579, "loss": 0.0018, "step": 5641 }, { "epoch": 84.21, "learning_rate": 0.00015480701754385965, "loss": 0.0163, "step": 5642 }, { "epoch": 84.22, "learning_rate": 0.00015477192982456137, "loss": 0.3467, "step": 5643 }, { "epoch": 84.24, "learning_rate": 0.00015473684210526317, "loss": 0.0168, "step": 5644 }, { "epoch": 84.25, "learning_rate": 0.0001547017543859649, "loss": 0.0943, "step": 5645 }, { "epoch": 84.27, "learning_rate": 0.00015466666666666664, "loss": 0.002, "step": 5646 }, { "epoch": 84.28, "learning_rate": 0.0001546315789473684, "loss": 0.193, "step": 5647 }, { "epoch": 84.3, "learning_rate": 0.00015459649122807017, "loss": 0.0045, "step": 5648 }, { "epoch": 84.31, "learning_rate": 0.00015456140350877191, "loss": 0.0017, "step": 5649 }, { "epoch": 84.33, "learning_rate": 0.00015452631578947366, "loss": 0.4212, "step": 5650 }, { "epoch": 84.34, "learning_rate": 0.0001544912280701754, "loss": 0.0033, "step": 5651 }, { "epoch": 84.36, "learning_rate": 0.0001544561403508772, "loss": 0.0022, "step": 5652 }, { "epoch": 84.37, "learning_rate": 0.00015442105263157894, "loss": 0.0021, "step": 5653 }, { "epoch": 84.39, "learning_rate": 0.00015438596491228068, "loss": 0.0087, "step": 5654 }, { "epoch": 84.4, "learning_rate": 0.00015435087719298246, "loss": 0.0464, "step": 5655 }, { "epoch": 84.42, "learning_rate": 0.0001543157894736842, "loss": 0.0063, "step": 5656 }, { "epoch": 84.43, "learning_rate": 0.00015428070175438596, "loss": 0.0176, "step": 5657 }, { "epoch": 84.45, "learning_rate": 0.0001542456140350877, "loss": 0.3616, "step": 5658 }, { "epoch": 84.46, "learning_rate": 0.00015421052631578948, "loss": 0.0088, "step": 5659 }, { "epoch": 84.48, "learning_rate": 0.00015417543859649123, "loss": 0.0016, "step": 5660 }, { "epoch": 84.49, "learning_rate": 0.00015414035087719295, "loss": 0.0014, "step": 5661 }, { "epoch": 84.51, "learning_rate": 0.0001541052631578947, "loss": 0.0018, "step": 5662 }, { "epoch": 84.52, "learning_rate": 0.00015407017543859648, "loss": 0.0014, "step": 5663 }, { "epoch": 84.54, "learning_rate": 0.00015403508771929823, "loss": 0.0066, "step": 5664 }, { "epoch": 84.55, "learning_rate": 0.00015399999999999998, "loss": 0.0012, "step": 5665 }, { "epoch": 84.57, "learning_rate": 0.00015396491228070172, "loss": 0.0026, "step": 5666 }, { "epoch": 84.58, "learning_rate": 0.0001539298245614035, "loss": 0.0016, "step": 5667 }, { "epoch": 84.59, "learning_rate": 0.00015389473684210525, "loss": 0.0063, "step": 5668 }, { "epoch": 84.61, "learning_rate": 0.000153859649122807, "loss": 0.0022, "step": 5669 }, { "epoch": 84.62, "learning_rate": 0.00015382456140350877, "loss": 0.0018, "step": 5670 }, { "epoch": 84.64, "learning_rate": 0.00015378947368421052, "loss": 0.1457, "step": 5671 }, { "epoch": 84.65, "learning_rate": 0.00015375438596491227, "loss": 0.0018, "step": 5672 }, { "epoch": 84.67, "learning_rate": 0.00015371929824561402, "loss": 0.0831, "step": 5673 }, { "epoch": 84.68, "learning_rate": 0.0001536842105263158, "loss": 0.0848, "step": 5674 }, { "epoch": 84.7, "learning_rate": 0.00015364912280701754, "loss": 0.1043, "step": 5675 }, { "epoch": 84.71, "learning_rate": 0.00015361403508771927, "loss": 0.01, "step": 5676 }, { "epoch": 84.73, "learning_rate": 0.00015357894736842102, "loss": 0.0043, "step": 5677 }, { "epoch": 84.74, "learning_rate": 0.0001535438596491228, "loss": 0.0017, "step": 5678 }, { "epoch": 84.76, "learning_rate": 0.00015350877192982454, "loss": 0.0034, "step": 5679 }, { "epoch": 84.77, "learning_rate": 0.0001534736842105263, "loss": 0.0042, "step": 5680 }, { "epoch": 84.79, "learning_rate": 0.00015343859649122806, "loss": 0.0725, "step": 5681 }, { "epoch": 84.8, "learning_rate": 0.0001534035087719298, "loss": 0.0264, "step": 5682 }, { "epoch": 84.82, "learning_rate": 0.00015336842105263156, "loss": 0.0028, "step": 5683 }, { "epoch": 84.83, "learning_rate": 0.0001533333333333333, "loss": 0.6024, "step": 5684 }, { "epoch": 84.85, "learning_rate": 0.0001532982456140351, "loss": 0.0025, "step": 5685 }, { "epoch": 84.86, "learning_rate": 0.00015326315789473684, "loss": 0.0011, "step": 5686 }, { "epoch": 84.88, "learning_rate": 0.00015322807017543858, "loss": 0.0021, "step": 5687 }, { "epoch": 84.89, "learning_rate": 0.00015319298245614033, "loss": 0.001, "step": 5688 }, { "epoch": 84.91, "learning_rate": 0.0001531578947368421, "loss": 0.0023, "step": 5689 }, { "epoch": 84.92, "learning_rate": 0.00015312280701754386, "loss": 0.0969, "step": 5690 }, { "epoch": 84.94, "learning_rate": 0.00015308771929824558, "loss": 0.0009, "step": 5691 }, { "epoch": 84.95, "learning_rate": 0.00015305263157894738, "loss": 0.0018, "step": 5692 }, { "epoch": 84.97, "learning_rate": 0.0001530175438596491, "loss": 0.0348, "step": 5693 }, { "epoch": 84.98, "learning_rate": 0.00015298245614035085, "loss": 0.006, "step": 5694 }, { "epoch": 85.0, "learning_rate": 0.0001529473684210526, "loss": 0.0012, "step": 5695 }, { "epoch": 85.01, "learning_rate": 0.00015291228070175438, "loss": 0.0085, "step": 5696 }, { "epoch": 85.03, "learning_rate": 0.00015287719298245613, "loss": 0.0019, "step": 5697 }, { "epoch": 85.04, "learning_rate": 0.00015284210526315788, "loss": 0.0055, "step": 5698 }, { "epoch": 85.06, "learning_rate": 0.00015280701754385962, "loss": 0.1146, "step": 5699 }, { "epoch": 85.07, "learning_rate": 0.0001527719298245614, "loss": 0.0261, "step": 5700 }, { "epoch": 85.09, "learning_rate": 0.00015273684210526315, "loss": 0.0082, "step": 5701 }, { "epoch": 85.1, "learning_rate": 0.0001527017543859649, "loss": 0.001, "step": 5702 }, { "epoch": 85.12, "learning_rate": 0.00015266666666666665, "loss": 0.0011, "step": 5703 }, { "epoch": 85.13, "learning_rate": 0.00015263157894736842, "loss": 0.0084, "step": 5704 }, { "epoch": 85.15, "learning_rate": 0.00015259649122807017, "loss": 0.005, "step": 5705 }, { "epoch": 85.16, "learning_rate": 0.00015256140350877192, "loss": 0.0011, "step": 5706 }, { "epoch": 85.18, "learning_rate": 0.0001525263157894737, "loss": 0.0007, "step": 5707 }, { "epoch": 85.19, "learning_rate": 0.00015249122807017544, "loss": 0.0018, "step": 5708 }, { "epoch": 85.21, "learning_rate": 0.00015245614035087717, "loss": 0.0009, "step": 5709 }, { "epoch": 85.22, "learning_rate": 0.00015242105263157891, "loss": 0.001, "step": 5710 }, { "epoch": 85.24, "learning_rate": 0.0001523859649122807, "loss": 0.0112, "step": 5711 }, { "epoch": 85.25, "learning_rate": 0.00015235087719298244, "loss": 0.0023, "step": 5712 }, { "epoch": 85.27, "learning_rate": 0.0001523157894736842, "loss": 0.0017, "step": 5713 }, { "epoch": 85.28, "learning_rate": 0.00015228070175438594, "loss": 0.0008, "step": 5714 }, { "epoch": 85.3, "learning_rate": 0.0001522456140350877, "loss": 0.0268, "step": 5715 }, { "epoch": 85.31, "learning_rate": 0.00015221052631578946, "loss": 0.0033, "step": 5716 }, { "epoch": 85.33, "learning_rate": 0.0001521754385964912, "loss": 0.0422, "step": 5717 }, { "epoch": 85.34, "learning_rate": 0.00015214035087719299, "loss": 0.0048, "step": 5718 }, { "epoch": 85.36, "learning_rate": 0.00015210526315789473, "loss": 0.005, "step": 5719 }, { "epoch": 85.37, "learning_rate": 0.00015207017543859648, "loss": 0.0054, "step": 5720 }, { "epoch": 85.39, "learning_rate": 0.00015203508771929823, "loss": 0.1974, "step": 5721 }, { "epoch": 85.4, "learning_rate": 0.000152, "loss": 0.0008, "step": 5722 }, { "epoch": 85.42, "learning_rate": 0.00015196491228070176, "loss": 0.0007, "step": 5723 }, { "epoch": 85.43, "learning_rate": 0.00015192982456140348, "loss": 0.0016, "step": 5724 }, { "epoch": 85.45, "learning_rate": 0.00015189473684210523, "loss": 0.0007, "step": 5725 }, { "epoch": 85.46, "learning_rate": 0.000151859649122807, "loss": 0.0115, "step": 5726 }, { "epoch": 85.48, "learning_rate": 0.00015182456140350875, "loss": 0.0009, "step": 5727 }, { "epoch": 85.49, "learning_rate": 0.0001517894736842105, "loss": 0.0008, "step": 5728 }, { "epoch": 85.51, "learning_rate": 0.00015175438596491228, "loss": 0.001, "step": 5729 }, { "epoch": 85.52, "learning_rate": 0.00015171929824561403, "loss": 0.0024, "step": 5730 }, { "epoch": 85.54, "learning_rate": 0.00015168421052631577, "loss": 0.0352, "step": 5731 }, { "epoch": 85.55, "learning_rate": 0.00015164912280701752, "loss": 0.0773, "step": 5732 }, { "epoch": 85.57, "learning_rate": 0.0001516140350877193, "loss": 0.0067, "step": 5733 }, { "epoch": 85.58, "learning_rate": 0.00015157894736842105, "loss": 0.001, "step": 5734 }, { "epoch": 85.59, "learning_rate": 0.0001515438596491228, "loss": 0.0658, "step": 5735 }, { "epoch": 85.61, "learning_rate": 0.00015150877192982455, "loss": 0.0008, "step": 5736 }, { "epoch": 85.62, "learning_rate": 0.00015147368421052632, "loss": 0.0021, "step": 5737 }, { "epoch": 85.64, "learning_rate": 0.00015143859649122807, "loss": 0.0005, "step": 5738 }, { "epoch": 85.65, "learning_rate": 0.0001514035087719298, "loss": 0.0008, "step": 5739 }, { "epoch": 85.67, "learning_rate": 0.00015136842105263154, "loss": 0.0009, "step": 5740 }, { "epoch": 85.68, "learning_rate": 0.00015133333333333332, "loss": 0.0006, "step": 5741 }, { "epoch": 85.7, "learning_rate": 0.00015129824561403507, "loss": 0.0015, "step": 5742 }, { "epoch": 85.71, "learning_rate": 0.00015126315789473681, "loss": 0.0008, "step": 5743 }, { "epoch": 85.73, "learning_rate": 0.0001512280701754386, "loss": 0.0008, "step": 5744 }, { "epoch": 85.74, "learning_rate": 0.00015119298245614034, "loss": 0.0009, "step": 5745 }, { "epoch": 85.76, "learning_rate": 0.0001511578947368421, "loss": 0.0076, "step": 5746 }, { "epoch": 85.77, "learning_rate": 0.00015112280701754384, "loss": 0.0253, "step": 5747 }, { "epoch": 85.79, "learning_rate": 0.0001510877192982456, "loss": 0.0017, "step": 5748 }, { "epoch": 85.8, "learning_rate": 0.00015105263157894736, "loss": 0.0044, "step": 5749 }, { "epoch": 85.82, "learning_rate": 0.0001510175438596491, "loss": 0.0056, "step": 5750 }, { "epoch": 85.83, "learning_rate": 0.00015098245614035086, "loss": 0.0009, "step": 5751 }, { "epoch": 85.85, "learning_rate": 0.00015094736842105263, "loss": 0.0039, "step": 5752 }, { "epoch": 85.86, "learning_rate": 0.00015091228070175438, "loss": 0.0027, "step": 5753 }, { "epoch": 85.88, "learning_rate": 0.00015087719298245613, "loss": 0.0222, "step": 5754 }, { "epoch": 85.89, "learning_rate": 0.0001508421052631579, "loss": 0.1151, "step": 5755 }, { "epoch": 85.91, "learning_rate": 0.00015080701754385963, "loss": 0.322, "step": 5756 }, { "epoch": 85.92, "learning_rate": 0.00015077192982456138, "loss": 0.0006, "step": 5757 }, { "epoch": 85.94, "learning_rate": 0.00015073684210526313, "loss": 0.0007, "step": 5758 }, { "epoch": 85.95, "learning_rate": 0.0001507017543859649, "loss": 0.0091, "step": 5759 }, { "epoch": 85.97, "learning_rate": 0.00015066666666666665, "loss": 0.1359, "step": 5760 }, { "epoch": 85.98, "learning_rate": 0.0001506315789473684, "loss": 0.0006, "step": 5761 }, { "epoch": 86.0, "learning_rate": 0.00015059649122807015, "loss": 0.0034, "step": 5762 }, { "epoch": 86.01, "learning_rate": 0.00015056140350877193, "loss": 0.0012, "step": 5763 }, { "epoch": 86.03, "learning_rate": 0.00015052631578947367, "loss": 0.0997, "step": 5764 }, { "epoch": 86.04, "learning_rate": 0.00015049122807017542, "loss": 0.2033, "step": 5765 }, { "epoch": 86.06, "learning_rate": 0.00015045614035087717, "loss": 0.0006, "step": 5766 }, { "epoch": 86.07, "learning_rate": 0.00015042105263157895, "loss": 0.4448, "step": 5767 }, { "epoch": 86.09, "learning_rate": 0.0001503859649122807, "loss": 0.0009, "step": 5768 }, { "epoch": 86.1, "learning_rate": 0.00015035087719298244, "loss": 0.0669, "step": 5769 }, { "epoch": 86.12, "learning_rate": 0.00015031578947368422, "loss": 0.055, "step": 5770 }, { "epoch": 86.13, "learning_rate": 0.00015028070175438597, "loss": 0.0017, "step": 5771 }, { "epoch": 86.15, "learning_rate": 0.0001502456140350877, "loss": 0.0013, "step": 5772 }, { "epoch": 86.16, "learning_rate": 0.00015021052631578944, "loss": 0.0008, "step": 5773 }, { "epoch": 86.18, "learning_rate": 0.00015017543859649122, "loss": 0.0009, "step": 5774 }, { "epoch": 86.19, "learning_rate": 0.00015014035087719296, "loss": 0.0008, "step": 5775 }, { "epoch": 86.21, "learning_rate": 0.0001501052631578947, "loss": 0.1733, "step": 5776 }, { "epoch": 86.22, "learning_rate": 0.00015007017543859646, "loss": 0.0011, "step": 5777 }, { "epoch": 86.24, "learning_rate": 0.00015003508771929824, "loss": 0.0011, "step": 5778 }, { "epoch": 86.25, "learning_rate": 0.00015, "loss": 0.1676, "step": 5779 }, { "epoch": 86.27, "learning_rate": 0.00014996491228070174, "loss": 0.0156, "step": 5780 }, { "epoch": 86.28, "learning_rate": 0.00014992982456140348, "loss": 0.0034, "step": 5781 }, { "epoch": 86.3, "learning_rate": 0.00014989473684210526, "loss": 0.0028, "step": 5782 }, { "epoch": 86.31, "learning_rate": 0.000149859649122807, "loss": 0.0805, "step": 5783 }, { "epoch": 86.33, "learning_rate": 0.00014982456140350876, "loss": 0.0009, "step": 5784 }, { "epoch": 86.34, "learning_rate": 0.0001497894736842105, "loss": 0.001, "step": 5785 }, { "epoch": 86.36, "learning_rate": 0.00014975438596491228, "loss": 0.0014, "step": 5786 }, { "epoch": 86.37, "learning_rate": 0.000149719298245614, "loss": 0.0013, "step": 5787 }, { "epoch": 86.39, "learning_rate": 0.00014968421052631578, "loss": 0.004, "step": 5788 }, { "epoch": 86.4, "learning_rate": 0.00014964912280701753, "loss": 0.0011, "step": 5789 }, { "epoch": 86.42, "learning_rate": 0.00014961403508771928, "loss": 0.3142, "step": 5790 }, { "epoch": 86.43, "learning_rate": 0.00014957894736842103, "loss": 0.0419, "step": 5791 }, { "epoch": 86.45, "learning_rate": 0.0001495438596491228, "loss": 0.0111, "step": 5792 }, { "epoch": 86.46, "learning_rate": 0.00014950877192982455, "loss": 0.008, "step": 5793 }, { "epoch": 86.48, "learning_rate": 0.0001494736842105263, "loss": 0.0049, "step": 5794 }, { "epoch": 86.49, "learning_rate": 0.00014943859649122808, "loss": 0.1487, "step": 5795 }, { "epoch": 86.51, "learning_rate": 0.0001494035087719298, "loss": 0.0008, "step": 5796 }, { "epoch": 86.52, "learning_rate": 0.00014936842105263157, "loss": 0.0008, "step": 5797 }, { "epoch": 86.54, "learning_rate": 0.00014933333333333332, "loss": 0.001, "step": 5798 }, { "epoch": 86.55, "learning_rate": 0.00014929824561403507, "loss": 0.0466, "step": 5799 }, { "epoch": 86.57, "learning_rate": 0.00014926315789473682, "loss": 0.0162, "step": 5800 }, { "epoch": 86.57, "eval_accuracy": 0.8663729809104258, "eval_f1": 0.8667231212452812, "eval_loss": 0.6782434582710266, "eval_runtime": 344.5523, "eval_samples_per_second": 11.859, "eval_steps_per_second": 0.743, "step": 5800 }, { "epoch": 86.58, "learning_rate": 0.0001492280701754386, "loss": 0.1697, "step": 5801 }, { "epoch": 86.59, "learning_rate": 0.00014919298245614034, "loss": 0.0398, "step": 5802 }, { "epoch": 86.61, "learning_rate": 0.0001491578947368421, "loss": 0.0209, "step": 5803 }, { "epoch": 86.62, "learning_rate": 0.00014912280701754384, "loss": 0.001, "step": 5804 }, { "epoch": 86.64, "learning_rate": 0.0001490877192982456, "loss": 0.0625, "step": 5805 }, { "epoch": 86.65, "learning_rate": 0.00014905263157894737, "loss": 0.0015, "step": 5806 }, { "epoch": 86.67, "learning_rate": 0.00014901754385964912, "loss": 0.1041, "step": 5807 }, { "epoch": 86.68, "learning_rate": 0.00014898245614035086, "loss": 0.0991, "step": 5808 }, { "epoch": 86.7, "learning_rate": 0.0001489473684210526, "loss": 0.0031, "step": 5809 }, { "epoch": 86.71, "learning_rate": 0.0001489122807017544, "loss": 0.0017, "step": 5810 }, { "epoch": 86.73, "learning_rate": 0.0001488771929824561, "loss": 0.0084, "step": 5811 }, { "epoch": 86.74, "learning_rate": 0.00014884210526315789, "loss": 0.0009, "step": 5812 }, { "epoch": 86.76, "learning_rate": 0.00014880701754385964, "loss": 0.0012, "step": 5813 }, { "epoch": 86.77, "learning_rate": 0.00014877192982456138, "loss": 0.0019, "step": 5814 }, { "epoch": 86.79, "learning_rate": 0.00014873684210526313, "loss": 0.0017, "step": 5815 }, { "epoch": 86.8, "learning_rate": 0.0001487017543859649, "loss": 0.0127, "step": 5816 }, { "epoch": 86.82, "learning_rate": 0.00014866666666666666, "loss": 0.0011, "step": 5817 }, { "epoch": 86.83, "learning_rate": 0.0001486315789473684, "loss": 0.0011, "step": 5818 }, { "epoch": 86.85, "learning_rate": 0.00014859649122807018, "loss": 0.2392, "step": 5819 }, { "epoch": 86.86, "learning_rate": 0.0001485614035087719, "loss": 0.0805, "step": 5820 }, { "epoch": 86.88, "learning_rate": 0.00014852631578947368, "loss": 0.0014, "step": 5821 }, { "epoch": 86.89, "learning_rate": 0.00014849122807017543, "loss": 0.0017, "step": 5822 }, { "epoch": 86.91, "learning_rate": 0.00014845614035087718, "loss": 0.3464, "step": 5823 }, { "epoch": 86.92, "learning_rate": 0.00014842105263157893, "loss": 0.0017, "step": 5824 }, { "epoch": 86.94, "learning_rate": 0.0001483859649122807, "loss": 0.0026, "step": 5825 }, { "epoch": 86.95, "learning_rate": 0.00014835087719298242, "loss": 0.0041, "step": 5826 }, { "epoch": 86.97, "learning_rate": 0.0001483157894736842, "loss": 0.0068, "step": 5827 }, { "epoch": 86.98, "learning_rate": 0.00014828070175438595, "loss": 0.0137, "step": 5828 }, { "epoch": 87.0, "learning_rate": 0.0001482456140350877, "loss": 0.1182, "step": 5829 }, { "epoch": 87.01, "learning_rate": 0.00014821052631578947, "loss": 0.0045, "step": 5830 }, { "epoch": 87.03, "learning_rate": 0.00014817543859649122, "loss": 0.0103, "step": 5831 }, { "epoch": 87.04, "learning_rate": 0.00014814035087719297, "loss": 0.0048, "step": 5832 }, { "epoch": 87.06, "learning_rate": 0.00014810526315789472, "loss": 0.0839, "step": 5833 }, { "epoch": 87.07, "learning_rate": 0.0001480701754385965, "loss": 0.0019, "step": 5834 }, { "epoch": 87.09, "learning_rate": 0.00014803508771929822, "loss": 0.0032, "step": 5835 }, { "epoch": 87.1, "learning_rate": 0.000148, "loss": 0.0013, "step": 5836 }, { "epoch": 87.12, "learning_rate": 0.00014796491228070174, "loss": 0.0122, "step": 5837 }, { "epoch": 87.13, "learning_rate": 0.0001479298245614035, "loss": 0.024, "step": 5838 }, { "epoch": 87.15, "learning_rate": 0.00014789473684210524, "loss": 0.1766, "step": 5839 }, { "epoch": 87.16, "learning_rate": 0.00014785964912280701, "loss": 0.0011, "step": 5840 }, { "epoch": 87.18, "learning_rate": 0.00014782456140350876, "loss": 0.1159, "step": 5841 }, { "epoch": 87.19, "learning_rate": 0.0001477894736842105, "loss": 0.0019, "step": 5842 }, { "epoch": 87.21, "learning_rate": 0.0001477543859649123, "loss": 0.0011, "step": 5843 }, { "epoch": 87.22, "learning_rate": 0.000147719298245614, "loss": 0.0013, "step": 5844 }, { "epoch": 87.24, "learning_rate": 0.00014768421052631579, "loss": 0.0013, "step": 5845 }, { "epoch": 87.25, "learning_rate": 0.00014764912280701753, "loss": 0.0241, "step": 5846 }, { "epoch": 87.27, "learning_rate": 0.00014761403508771928, "loss": 0.0051, "step": 5847 }, { "epoch": 87.28, "learning_rate": 0.00014757894736842103, "loss": 0.0034, "step": 5848 }, { "epoch": 87.3, "learning_rate": 0.0001475438596491228, "loss": 0.0059, "step": 5849 }, { "epoch": 87.31, "learning_rate": 0.00014750877192982453, "loss": 0.002, "step": 5850 }, { "epoch": 87.33, "learning_rate": 0.0001474736842105263, "loss": 0.079, "step": 5851 }, { "epoch": 87.34, "learning_rate": 0.00014743859649122805, "loss": 0.0011, "step": 5852 }, { "epoch": 87.36, "learning_rate": 0.0001474035087719298, "loss": 0.0013, "step": 5853 }, { "epoch": 87.37, "learning_rate": 0.00014736842105263155, "loss": 0.004, "step": 5854 }, { "epoch": 87.39, "learning_rate": 0.00014733333333333333, "loss": 0.23, "step": 5855 }, { "epoch": 87.4, "learning_rate": 0.00014729824561403508, "loss": 0.0115, "step": 5856 }, { "epoch": 87.42, "learning_rate": 0.00014726315789473683, "loss": 0.0328, "step": 5857 }, { "epoch": 87.43, "learning_rate": 0.0001472280701754386, "loss": 0.0018, "step": 5858 }, { "epoch": 87.45, "learning_rate": 0.00014719298245614032, "loss": 0.0015, "step": 5859 }, { "epoch": 87.46, "learning_rate": 0.0001471578947368421, "loss": 0.0015, "step": 5860 }, { "epoch": 87.48, "learning_rate": 0.00014712280701754385, "loss": 0.0011, "step": 5861 }, { "epoch": 87.49, "learning_rate": 0.0001470877192982456, "loss": 0.0487, "step": 5862 }, { "epoch": 87.51, "learning_rate": 0.00014705263157894734, "loss": 0.0065, "step": 5863 }, { "epoch": 87.52, "learning_rate": 0.00014701754385964912, "loss": 0.0877, "step": 5864 }, { "epoch": 87.54, "learning_rate": 0.00014698245614035087, "loss": 0.0022, "step": 5865 }, { "epoch": 87.55, "learning_rate": 0.00014694736842105262, "loss": 0.0031, "step": 5866 }, { "epoch": 87.57, "learning_rate": 0.0001469122807017544, "loss": 0.2098, "step": 5867 }, { "epoch": 87.58, "learning_rate": 0.00014687719298245612, "loss": 0.002, "step": 5868 }, { "epoch": 87.59, "learning_rate": 0.0001468421052631579, "loss": 0.0015, "step": 5869 }, { "epoch": 87.61, "learning_rate": 0.00014680701754385964, "loss": 0.0051, "step": 5870 }, { "epoch": 87.62, "learning_rate": 0.0001467719298245614, "loss": 0.0025, "step": 5871 }, { "epoch": 87.64, "learning_rate": 0.00014673684210526314, "loss": 0.0078, "step": 5872 }, { "epoch": 87.65, "learning_rate": 0.00014670175438596491, "loss": 0.0011, "step": 5873 }, { "epoch": 87.67, "learning_rate": 0.00014666666666666664, "loss": 0.0079, "step": 5874 }, { "epoch": 87.68, "learning_rate": 0.0001466315789473684, "loss": 0.0046, "step": 5875 }, { "epoch": 87.7, "learning_rate": 0.00014659649122807016, "loss": 0.0021, "step": 5876 }, { "epoch": 87.71, "learning_rate": 0.0001465614035087719, "loss": 0.0013, "step": 5877 }, { "epoch": 87.73, "learning_rate": 0.00014652631578947366, "loss": 0.0009, "step": 5878 }, { "epoch": 87.74, "learning_rate": 0.00014649122807017543, "loss": 0.0011, "step": 5879 }, { "epoch": 87.76, "learning_rate": 0.00014645614035087718, "loss": 0.0012, "step": 5880 }, { "epoch": 87.77, "learning_rate": 0.00014642105263157893, "loss": 0.0009, "step": 5881 }, { "epoch": 87.79, "learning_rate": 0.0001463859649122807, "loss": 0.0008, "step": 5882 }, { "epoch": 87.8, "learning_rate": 0.00014635087719298243, "loss": 0.0012, "step": 5883 }, { "epoch": 87.82, "learning_rate": 0.0001463157894736842, "loss": 0.075, "step": 5884 }, { "epoch": 87.83, "learning_rate": 0.00014628070175438595, "loss": 0.003, "step": 5885 }, { "epoch": 87.85, "learning_rate": 0.0001462456140350877, "loss": 0.0025, "step": 5886 }, { "epoch": 87.86, "learning_rate": 0.00014621052631578945, "loss": 0.001, "step": 5887 }, { "epoch": 87.88, "learning_rate": 0.00014617543859649123, "loss": 0.0007, "step": 5888 }, { "epoch": 87.89, "learning_rate": 0.00014614035087719298, "loss": 0.0009, "step": 5889 }, { "epoch": 87.91, "learning_rate": 0.00014610526315789472, "loss": 0.0394, "step": 5890 }, { "epoch": 87.92, "learning_rate": 0.00014607017543859647, "loss": 0.0007, "step": 5891 }, { "epoch": 87.94, "learning_rate": 0.00014603508771929822, "loss": 0.0369, "step": 5892 }, { "epoch": 87.95, "learning_rate": 0.000146, "loss": 0.0059, "step": 5893 }, { "epoch": 87.97, "learning_rate": 0.00014596491228070175, "loss": 0.0013, "step": 5894 }, { "epoch": 87.98, "learning_rate": 0.0001459298245614035, "loss": 0.2778, "step": 5895 }, { "epoch": 88.0, "learning_rate": 0.00014589473684210524, "loss": 0.0017, "step": 5896 }, { "epoch": 88.01, "learning_rate": 0.00014585964912280702, "loss": 0.0066, "step": 5897 }, { "epoch": 88.03, "learning_rate": 0.00014582456140350874, "loss": 0.0257, "step": 5898 }, { "epoch": 88.04, "learning_rate": 0.00014578947368421052, "loss": 0.0007, "step": 5899 }, { "epoch": 88.06, "learning_rate": 0.00014575438596491227, "loss": 0.1894, "step": 5900 }, { "epoch": 88.07, "learning_rate": 0.00014571929824561402, "loss": 0.1666, "step": 5901 }, { "epoch": 88.09, "learning_rate": 0.00014568421052631576, "loss": 0.0976, "step": 5902 }, { "epoch": 88.1, "learning_rate": 0.00014564912280701754, "loss": 0.0007, "step": 5903 }, { "epoch": 88.12, "learning_rate": 0.0001456140350877193, "loss": 0.0009, "step": 5904 }, { "epoch": 88.13, "learning_rate": 0.00014557894736842104, "loss": 0.0785, "step": 5905 }, { "epoch": 88.15, "learning_rate": 0.0001455438596491228, "loss": 0.1406, "step": 5906 }, { "epoch": 88.16, "learning_rate": 0.00014550877192982454, "loss": 0.0008, "step": 5907 }, { "epoch": 88.18, "learning_rate": 0.0001454736842105263, "loss": 0.0007, "step": 5908 }, { "epoch": 88.19, "learning_rate": 0.00014543859649122806, "loss": 0.0007, "step": 5909 }, { "epoch": 88.21, "learning_rate": 0.0001454035087719298, "loss": 0.1557, "step": 5910 }, { "epoch": 88.22, "learning_rate": 0.00014536842105263156, "loss": 0.0011, "step": 5911 }, { "epoch": 88.24, "learning_rate": 0.00014533333333333333, "loss": 0.2107, "step": 5912 }, { "epoch": 88.25, "learning_rate": 0.00014529824561403508, "loss": 0.2468, "step": 5913 }, { "epoch": 88.27, "learning_rate": 0.00014526315789473683, "loss": 0.0007, "step": 5914 }, { "epoch": 88.28, "learning_rate": 0.00014522807017543858, "loss": 0.0032, "step": 5915 }, { "epoch": 88.3, "learning_rate": 0.00014519298245614033, "loss": 0.002, "step": 5916 }, { "epoch": 88.31, "learning_rate": 0.0001451578947368421, "loss": 0.0035, "step": 5917 }, { "epoch": 88.33, "learning_rate": 0.00014512280701754385, "loss": 0.0014, "step": 5918 }, { "epoch": 88.34, "learning_rate": 0.0001450877192982456, "loss": 0.0042, "step": 5919 }, { "epoch": 88.36, "learning_rate": 0.00014505263157894735, "loss": 0.0015, "step": 5920 }, { "epoch": 88.37, "learning_rate": 0.00014501754385964913, "loss": 0.0027, "step": 5921 }, { "epoch": 88.39, "learning_rate": 0.00014498245614035085, "loss": 0.2759, "step": 5922 }, { "epoch": 88.4, "learning_rate": 0.00014494736842105262, "loss": 0.0025, "step": 5923 }, { "epoch": 88.42, "learning_rate": 0.00014491228070175437, "loss": 0.0016, "step": 5924 }, { "epoch": 88.43, "learning_rate": 0.00014487719298245612, "loss": 0.0013, "step": 5925 }, { "epoch": 88.45, "learning_rate": 0.00014484210526315787, "loss": 0.0033, "step": 5926 }, { "epoch": 88.46, "learning_rate": 0.00014480701754385965, "loss": 0.0038, "step": 5927 }, { "epoch": 88.48, "learning_rate": 0.0001447719298245614, "loss": 0.1503, "step": 5928 }, { "epoch": 88.49, "learning_rate": 0.00014473684210526314, "loss": 0.0013, "step": 5929 }, { "epoch": 88.51, "learning_rate": 0.00014470175438596492, "loss": 0.0019, "step": 5930 }, { "epoch": 88.52, "learning_rate": 0.00014466666666666664, "loss": 0.0517, "step": 5931 }, { "epoch": 88.54, "learning_rate": 0.00014463157894736842, "loss": 0.0095, "step": 5932 }, { "epoch": 88.55, "learning_rate": 0.00014459649122807017, "loss": 0.0308, "step": 5933 }, { "epoch": 88.57, "learning_rate": 0.00014456140350877191, "loss": 0.0025, "step": 5934 }, { "epoch": 88.58, "learning_rate": 0.00014452631578947366, "loss": 0.0047, "step": 5935 }, { "epoch": 88.59, "learning_rate": 0.00014449122807017544, "loss": 0.0013, "step": 5936 }, { "epoch": 88.61, "learning_rate": 0.0001444561403508772, "loss": 0.004, "step": 5937 }, { "epoch": 88.62, "learning_rate": 0.00014442105263157894, "loss": 0.0042, "step": 5938 }, { "epoch": 88.64, "learning_rate": 0.00014438596491228069, "loss": 0.2497, "step": 5939 }, { "epoch": 88.65, "learning_rate": 0.00014435087719298243, "loss": 0.0088, "step": 5940 }, { "epoch": 88.67, "learning_rate": 0.00014431578947368418, "loss": 0.0128, "step": 5941 }, { "epoch": 88.68, "learning_rate": 0.00014428070175438596, "loss": 0.0015, "step": 5942 }, { "epoch": 88.7, "learning_rate": 0.0001442456140350877, "loss": 0.0049, "step": 5943 }, { "epoch": 88.71, "learning_rate": 0.00014421052631578946, "loss": 0.0013, "step": 5944 }, { "epoch": 88.73, "learning_rate": 0.00014417543859649123, "loss": 0.0062, "step": 5945 }, { "epoch": 88.74, "learning_rate": 0.00014414035087719295, "loss": 0.0102, "step": 5946 }, { "epoch": 88.76, "learning_rate": 0.00014410526315789473, "loss": 0.1041, "step": 5947 }, { "epoch": 88.77, "learning_rate": 0.00014407017543859648, "loss": 0.0318, "step": 5948 }, { "epoch": 88.79, "learning_rate": 0.00014403508771929823, "loss": 0.1817, "step": 5949 }, { "epoch": 88.8, "learning_rate": 0.00014399999999999998, "loss": 0.0012, "step": 5950 }, { "epoch": 88.82, "learning_rate": 0.00014396491228070175, "loss": 0.2596, "step": 5951 }, { "epoch": 88.83, "learning_rate": 0.0001439298245614035, "loss": 0.0012, "step": 5952 }, { "epoch": 88.85, "learning_rate": 0.00014389473684210525, "loss": 0.162, "step": 5953 }, { "epoch": 88.86, "learning_rate": 0.000143859649122807, "loss": 0.0988, "step": 5954 }, { "epoch": 88.88, "learning_rate": 0.00014382456140350875, "loss": 0.0008, "step": 5955 }, { "epoch": 88.89, "learning_rate": 0.00014378947368421052, "loss": 0.0078, "step": 5956 }, { "epoch": 88.91, "learning_rate": 0.00014375438596491227, "loss": 0.0186, "step": 5957 }, { "epoch": 88.92, "learning_rate": 0.00014371929824561402, "loss": 0.0009, "step": 5958 }, { "epoch": 88.94, "learning_rate": 0.00014368421052631577, "loss": 0.0046, "step": 5959 }, { "epoch": 88.95, "learning_rate": 0.00014364912280701755, "loss": 0.0011, "step": 5960 }, { "epoch": 88.97, "learning_rate": 0.0001436140350877193, "loss": 0.001, "step": 5961 }, { "epoch": 88.98, "learning_rate": 0.00014357894736842104, "loss": 0.0008, "step": 5962 }, { "epoch": 89.0, "learning_rate": 0.0001435438596491228, "loss": 0.0851, "step": 5963 }, { "epoch": 89.01, "learning_rate": 0.00014350877192982454, "loss": 0.0031, "step": 5964 }, { "epoch": 89.03, "learning_rate": 0.0001434736842105263, "loss": 0.0015, "step": 5965 }, { "epoch": 89.04, "learning_rate": 0.00014343859649122807, "loss": 0.2782, "step": 5966 }, { "epoch": 89.06, "learning_rate": 0.00014340350877192981, "loss": 0.0386, "step": 5967 }, { "epoch": 89.07, "learning_rate": 0.00014336842105263156, "loss": 0.2128, "step": 5968 }, { "epoch": 89.09, "learning_rate": 0.00014333333333333334, "loss": 0.001, "step": 5969 }, { "epoch": 89.1, "learning_rate": 0.00014329824561403506, "loss": 0.0344, "step": 5970 }, { "epoch": 89.12, "learning_rate": 0.00014326315789473684, "loss": 0.001, "step": 5971 }, { "epoch": 89.13, "learning_rate": 0.00014322807017543859, "loss": 0.0008, "step": 5972 }, { "epoch": 89.15, "learning_rate": 0.00014319298245614033, "loss": 0.0195, "step": 5973 }, { "epoch": 89.16, "learning_rate": 0.00014315789473684208, "loss": 0.0012, "step": 5974 }, { "epoch": 89.18, "learning_rate": 0.00014312280701754386, "loss": 0.0016, "step": 5975 }, { "epoch": 89.19, "learning_rate": 0.0001430877192982456, "loss": 0.0053, "step": 5976 }, { "epoch": 89.21, "learning_rate": 0.00014305263157894736, "loss": 0.0014, "step": 5977 }, { "epoch": 89.22, "learning_rate": 0.0001430175438596491, "loss": 0.1234, "step": 5978 }, { "epoch": 89.24, "learning_rate": 0.00014298245614035085, "loss": 0.0009, "step": 5979 }, { "epoch": 89.25, "learning_rate": 0.00014294736842105263, "loss": 0.0274, "step": 5980 }, { "epoch": 89.27, "learning_rate": 0.00014291228070175438, "loss": 0.0013, "step": 5981 }, { "epoch": 89.28, "learning_rate": 0.00014287719298245613, "loss": 0.0067, "step": 5982 }, { "epoch": 89.3, "learning_rate": 0.00014284210526315788, "loss": 0.0061, "step": 5983 }, { "epoch": 89.31, "learning_rate": 0.00014280701754385965, "loss": 0.1048, "step": 5984 }, { "epoch": 89.33, "learning_rate": 0.0001427719298245614, "loss": 0.2524, "step": 5985 }, { "epoch": 89.34, "learning_rate": 0.00014273684210526315, "loss": 0.0997, "step": 5986 }, { "epoch": 89.36, "learning_rate": 0.0001427017543859649, "loss": 0.0029, "step": 5987 }, { "epoch": 89.37, "learning_rate": 0.00014266666666666665, "loss": 0.1637, "step": 5988 }, { "epoch": 89.39, "learning_rate": 0.0001426315789473684, "loss": 0.0022, "step": 5989 }, { "epoch": 89.4, "learning_rate": 0.00014259649122807017, "loss": 0.0015, "step": 5990 }, { "epoch": 89.42, "learning_rate": 0.00014256140350877192, "loss": 0.0012, "step": 5991 }, { "epoch": 89.43, "learning_rate": 0.00014252631578947367, "loss": 0.1172, "step": 5992 }, { "epoch": 89.45, "learning_rate": 0.00014249122807017545, "loss": 0.1637, "step": 5993 }, { "epoch": 89.46, "learning_rate": 0.00014245614035087717, "loss": 0.0271, "step": 5994 }, { "epoch": 89.48, "learning_rate": 0.00014242105263157894, "loss": 0.0028, "step": 5995 }, { "epoch": 89.49, "learning_rate": 0.0001423859649122807, "loss": 0.2502, "step": 5996 }, { "epoch": 89.51, "learning_rate": 0.00014235087719298244, "loss": 0.0021, "step": 5997 }, { "epoch": 89.52, "learning_rate": 0.0001423157894736842, "loss": 0.1202, "step": 5998 }, { "epoch": 89.54, "learning_rate": 0.00014228070175438596, "loss": 0.1288, "step": 5999 }, { "epoch": 89.55, "learning_rate": 0.00014224561403508771, "loss": 0.1929, "step": 6000 }, { "epoch": 89.55, "eval_accuracy": 0.860009789525208, "eval_f1": 0.8615169079479937, "eval_loss": 0.6371402144432068, "eval_runtime": 345.6404, "eval_samples_per_second": 11.822, "eval_steps_per_second": 0.741, "step": 6000 }, { "epoch": 89.57, "learning_rate": 0.00014221052631578946, "loss": 0.0023, "step": 6001 }, { "epoch": 89.58, "learning_rate": 0.0001421754385964912, "loss": 0.1027, "step": 6002 }, { "epoch": 89.59, "learning_rate": 0.00014214035087719296, "loss": 0.0391, "step": 6003 }, { "epoch": 89.61, "learning_rate": 0.0001421052631578947, "loss": 0.0332, "step": 6004 }, { "epoch": 89.62, "learning_rate": 0.00014207017543859648, "loss": 0.0038, "step": 6005 }, { "epoch": 89.64, "learning_rate": 0.00014203508771929823, "loss": 0.0185, "step": 6006 }, { "epoch": 89.65, "learning_rate": 0.00014199999999999998, "loss": 0.0101, "step": 6007 }, { "epoch": 89.67, "learning_rate": 0.00014196491228070176, "loss": 0.0023, "step": 6008 }, { "epoch": 89.68, "learning_rate": 0.0001419298245614035, "loss": 0.0232, "step": 6009 }, { "epoch": 89.7, "learning_rate": 0.00014189473684210526, "loss": 0.1696, "step": 6010 }, { "epoch": 89.71, "learning_rate": 0.000141859649122807, "loss": 0.0579, "step": 6011 }, { "epoch": 89.73, "learning_rate": 0.00014182456140350875, "loss": 0.0025, "step": 6012 }, { "epoch": 89.74, "learning_rate": 0.0001417894736842105, "loss": 0.0084, "step": 6013 }, { "epoch": 89.76, "learning_rate": 0.00014175438596491228, "loss": 0.0014, "step": 6014 }, { "epoch": 89.77, "learning_rate": 0.00014171929824561403, "loss": 0.0051, "step": 6015 }, { "epoch": 89.79, "learning_rate": 0.00014168421052631578, "loss": 0.0147, "step": 6016 }, { "epoch": 89.8, "learning_rate": 0.00014164912280701752, "loss": 0.0327, "step": 6017 }, { "epoch": 89.82, "learning_rate": 0.00014161403508771927, "loss": 0.0069, "step": 6018 }, { "epoch": 89.83, "learning_rate": 0.00014157894736842105, "loss": 0.011, "step": 6019 }, { "epoch": 89.85, "learning_rate": 0.0001415438596491228, "loss": 0.016, "step": 6020 }, { "epoch": 89.86, "learning_rate": 0.00014150877192982455, "loss": 0.0099, "step": 6021 }, { "epoch": 89.88, "learning_rate": 0.0001414736842105263, "loss": 0.0036, "step": 6022 }, { "epoch": 89.89, "learning_rate": 0.00014143859649122807, "loss": 0.0856, "step": 6023 }, { "epoch": 89.91, "learning_rate": 0.00014140350877192982, "loss": 0.0012, "step": 6024 }, { "epoch": 89.92, "learning_rate": 0.00014136842105263157, "loss": 0.0026, "step": 6025 }, { "epoch": 89.94, "learning_rate": 0.00014133333333333332, "loss": 0.225, "step": 6026 }, { "epoch": 89.95, "learning_rate": 0.00014129824561403507, "loss": 0.2369, "step": 6027 }, { "epoch": 89.97, "learning_rate": 0.00014126315789473681, "loss": 0.0025, "step": 6028 }, { "epoch": 89.98, "learning_rate": 0.0001412280701754386, "loss": 0.0009, "step": 6029 }, { "epoch": 90.0, "learning_rate": 0.00014119298245614034, "loss": 0.0011, "step": 6030 }, { "epoch": 90.01, "learning_rate": 0.0001411578947368421, "loss": 0.0779, "step": 6031 }, { "epoch": 90.03, "learning_rate": 0.00014112280701754386, "loss": 0.127, "step": 6032 }, { "epoch": 90.04, "learning_rate": 0.0001410877192982456, "loss": 0.1497, "step": 6033 }, { "epoch": 90.06, "learning_rate": 0.00014105263157894736, "loss": 0.0013, "step": 6034 }, { "epoch": 90.07, "learning_rate": 0.0001410175438596491, "loss": 0.0189, "step": 6035 }, { "epoch": 90.09, "learning_rate": 0.00014098245614035086, "loss": 0.2014, "step": 6036 }, { "epoch": 90.1, "learning_rate": 0.0001409473684210526, "loss": 0.0027, "step": 6037 }, { "epoch": 90.12, "learning_rate": 0.00014091228070175438, "loss": 0.0085, "step": 6038 }, { "epoch": 90.13, "learning_rate": 0.00014087719298245613, "loss": 0.0017, "step": 6039 }, { "epoch": 90.15, "learning_rate": 0.00014084210526315788, "loss": 0.0009, "step": 6040 }, { "epoch": 90.16, "learning_rate": 0.00014080701754385963, "loss": 0.0031, "step": 6041 }, { "epoch": 90.18, "learning_rate": 0.00014077192982456138, "loss": 0.0009, "step": 6042 }, { "epoch": 90.19, "learning_rate": 0.00014073684210526315, "loss": 0.0007, "step": 6043 }, { "epoch": 90.21, "learning_rate": 0.0001407017543859649, "loss": 0.0053, "step": 6044 }, { "epoch": 90.22, "learning_rate": 0.00014066666666666665, "loss": 0.0176, "step": 6045 }, { "epoch": 90.24, "learning_rate": 0.0001406315789473684, "loss": 0.0015, "step": 6046 }, { "epoch": 90.25, "learning_rate": 0.00014059649122807018, "loss": 0.0021, "step": 6047 }, { "epoch": 90.27, "learning_rate": 0.00014056140350877193, "loss": 0.0071, "step": 6048 }, { "epoch": 90.28, "learning_rate": 0.00014052631578947367, "loss": 0.0008, "step": 6049 }, { "epoch": 90.3, "learning_rate": 0.00014049122807017542, "loss": 0.0057, "step": 6050 }, { "epoch": 90.31, "learning_rate": 0.00014045614035087717, "loss": 0.0154, "step": 6051 }, { "epoch": 90.33, "learning_rate": 0.00014042105263157892, "loss": 0.001, "step": 6052 }, { "epoch": 90.34, "learning_rate": 0.0001403859649122807, "loss": 0.001, "step": 6053 }, { "epoch": 90.36, "learning_rate": 0.00014035087719298245, "loss": 0.0235, "step": 6054 }, { "epoch": 90.37, "learning_rate": 0.0001403157894736842, "loss": 0.0128, "step": 6055 }, { "epoch": 90.39, "learning_rate": 0.00014028070175438597, "loss": 0.0008, "step": 6056 }, { "epoch": 90.4, "learning_rate": 0.0001402456140350877, "loss": 0.0027, "step": 6057 }, { "epoch": 90.42, "learning_rate": 0.00014021052631578947, "loss": 0.0024, "step": 6058 }, { "epoch": 90.43, "learning_rate": 0.00014017543859649122, "loss": 0.0008, "step": 6059 }, { "epoch": 90.45, "learning_rate": 0.00014014035087719297, "loss": 0.0984, "step": 6060 }, { "epoch": 90.46, "learning_rate": 0.00014010526315789471, "loss": 0.0008, "step": 6061 }, { "epoch": 90.48, "learning_rate": 0.0001400701754385965, "loss": 0.0008, "step": 6062 }, { "epoch": 90.49, "learning_rate": 0.00014003508771929824, "loss": 0.0664, "step": 6063 }, { "epoch": 90.51, "learning_rate": 0.00014, "loss": 0.0387, "step": 6064 }, { "epoch": 90.52, "learning_rate": 0.00013996491228070174, "loss": 0.0026, "step": 6065 }, { "epoch": 90.54, "learning_rate": 0.00013992982456140349, "loss": 0.003, "step": 6066 }, { "epoch": 90.55, "learning_rate": 0.00013989473684210523, "loss": 0.0011, "step": 6067 }, { "epoch": 90.57, "learning_rate": 0.000139859649122807, "loss": 0.0022, "step": 6068 }, { "epoch": 90.58, "learning_rate": 0.00013982456140350876, "loss": 0.001, "step": 6069 }, { "epoch": 90.59, "learning_rate": 0.0001397894736842105, "loss": 0.0246, "step": 6070 }, { "epoch": 90.61, "learning_rate": 0.00013975438596491228, "loss": 0.0157, "step": 6071 }, { "epoch": 90.62, "learning_rate": 0.00013971929824561403, "loss": 0.001, "step": 6072 }, { "epoch": 90.64, "learning_rate": 0.00013968421052631578, "loss": 0.0108, "step": 6073 }, { "epoch": 90.65, "learning_rate": 0.00013964912280701753, "loss": 0.0009, "step": 6074 }, { "epoch": 90.67, "learning_rate": 0.00013961403508771928, "loss": 0.0008, "step": 6075 }, { "epoch": 90.68, "learning_rate": 0.00013957894736842103, "loss": 0.0034, "step": 6076 }, { "epoch": 90.7, "learning_rate": 0.0001395438596491228, "loss": 0.0009, "step": 6077 }, { "epoch": 90.71, "learning_rate": 0.00013950877192982455, "loss": 0.0557, "step": 6078 }, { "epoch": 90.73, "learning_rate": 0.0001394736842105263, "loss": 0.0007, "step": 6079 }, { "epoch": 90.74, "learning_rate": 0.00013943859649122808, "loss": 0.0019, "step": 6080 }, { "epoch": 90.76, "learning_rate": 0.0001394035087719298, "loss": 0.0069, "step": 6081 }, { "epoch": 90.77, "learning_rate": 0.00013936842105263157, "loss": 0.0013, "step": 6082 }, { "epoch": 90.79, "learning_rate": 0.00013933333333333332, "loss": 0.0486, "step": 6083 }, { "epoch": 90.8, "learning_rate": 0.00013929824561403507, "loss": 0.0448, "step": 6084 }, { "epoch": 90.82, "learning_rate": 0.00013926315789473682, "loss": 0.0008, "step": 6085 }, { "epoch": 90.83, "learning_rate": 0.0001392280701754386, "loss": 0.0208, "step": 6086 }, { "epoch": 90.85, "learning_rate": 0.00013919298245614035, "loss": 0.0014, "step": 6087 }, { "epoch": 90.86, "learning_rate": 0.0001391578947368421, "loss": 0.258, "step": 6088 }, { "epoch": 90.88, "learning_rate": 0.00013912280701754384, "loss": 0.0007, "step": 6089 }, { "epoch": 90.89, "learning_rate": 0.0001390877192982456, "loss": 0.0034, "step": 6090 }, { "epoch": 90.91, "learning_rate": 0.00013905263157894734, "loss": 0.001, "step": 6091 }, { "epoch": 90.92, "learning_rate": 0.00013901754385964912, "loss": 0.0009, "step": 6092 }, { "epoch": 90.94, "learning_rate": 0.00013898245614035086, "loss": 0.0006, "step": 6093 }, { "epoch": 90.95, "learning_rate": 0.00013894736842105261, "loss": 0.0012, "step": 6094 }, { "epoch": 90.97, "learning_rate": 0.0001389122807017544, "loss": 0.001, "step": 6095 }, { "epoch": 90.98, "learning_rate": 0.00013887719298245614, "loss": 0.001, "step": 6096 }, { "epoch": 91.0, "learning_rate": 0.0001388421052631579, "loss": 0.0011, "step": 6097 }, { "epoch": 91.01, "learning_rate": 0.00013880701754385964, "loss": 0.0022, "step": 6098 }, { "epoch": 91.03, "learning_rate": 0.00013877192982456138, "loss": 0.0029, "step": 6099 }, { "epoch": 91.04, "learning_rate": 0.00013873684210526313, "loss": 0.0463, "step": 6100 }, { "epoch": 91.06, "learning_rate": 0.0001387017543859649, "loss": 0.0864, "step": 6101 }, { "epoch": 91.07, "learning_rate": 0.00013866666666666666, "loss": 0.001, "step": 6102 }, { "epoch": 91.09, "learning_rate": 0.0001386315789473684, "loss": 0.0009, "step": 6103 }, { "epoch": 91.1, "learning_rate": 0.00013859649122807016, "loss": 0.0079, "step": 6104 }, { "epoch": 91.12, "learning_rate": 0.0001385614035087719, "loss": 0.0109, "step": 6105 }, { "epoch": 91.13, "learning_rate": 0.00013852631578947368, "loss": 0.0017, "step": 6106 }, { "epoch": 91.15, "learning_rate": 0.00013849122807017543, "loss": 0.0009, "step": 6107 }, { "epoch": 91.16, "learning_rate": 0.00013845614035087718, "loss": 0.0028, "step": 6108 }, { "epoch": 91.18, "learning_rate": 0.00013842105263157893, "loss": 0.0007, "step": 6109 }, { "epoch": 91.19, "learning_rate": 0.0001383859649122807, "loss": 0.0013, "step": 6110 }, { "epoch": 91.21, "learning_rate": 0.00013835087719298245, "loss": 0.0051, "step": 6111 }, { "epoch": 91.22, "learning_rate": 0.0001383157894736842, "loss": 0.0016, "step": 6112 }, { "epoch": 91.24, "learning_rate": 0.00013828070175438595, "loss": 0.0112, "step": 6113 }, { "epoch": 91.25, "learning_rate": 0.0001382456140350877, "loss": 0.0008, "step": 6114 }, { "epoch": 91.27, "learning_rate": 0.00013821052631578945, "loss": 0.1127, "step": 6115 }, { "epoch": 91.28, "learning_rate": 0.00013817543859649122, "loss": 0.0011, "step": 6116 }, { "epoch": 91.3, "learning_rate": 0.00013814035087719297, "loss": 0.0027, "step": 6117 }, { "epoch": 91.31, "learning_rate": 0.00013810526315789472, "loss": 0.0007, "step": 6118 }, { "epoch": 91.33, "learning_rate": 0.0001380701754385965, "loss": 0.0555, "step": 6119 }, { "epoch": 91.34, "learning_rate": 0.00013803508771929824, "loss": 0.0025, "step": 6120 }, { "epoch": 91.36, "learning_rate": 0.000138, "loss": 0.0009, "step": 6121 }, { "epoch": 91.37, "learning_rate": 0.00013796491228070174, "loss": 0.0059, "step": 6122 }, { "epoch": 91.39, "learning_rate": 0.0001379298245614035, "loss": 0.0016, "step": 6123 }, { "epoch": 91.4, "learning_rate": 0.00013789473684210524, "loss": 0.0484, "step": 6124 }, { "epoch": 91.42, "learning_rate": 0.00013785964912280702, "loss": 0.0589, "step": 6125 }, { "epoch": 91.43, "learning_rate": 0.00013782456140350876, "loss": 0.0498, "step": 6126 }, { "epoch": 91.45, "learning_rate": 0.0001377894736842105, "loss": 0.0007, "step": 6127 }, { "epoch": 91.46, "learning_rate": 0.00013775438596491226, "loss": 0.0008, "step": 6128 }, { "epoch": 91.48, "learning_rate": 0.000137719298245614, "loss": 0.0011, "step": 6129 }, { "epoch": 91.49, "learning_rate": 0.00013768421052631576, "loss": 0.0008, "step": 6130 }, { "epoch": 91.51, "learning_rate": 0.00013764912280701754, "loss": 0.001, "step": 6131 }, { "epoch": 91.52, "learning_rate": 0.00013761403508771928, "loss": 0.0006, "step": 6132 }, { "epoch": 91.54, "learning_rate": 0.00013757894736842103, "loss": 0.0101, "step": 6133 }, { "epoch": 91.55, "learning_rate": 0.0001375438596491228, "loss": 0.0091, "step": 6134 }, { "epoch": 91.57, "learning_rate": 0.00013750877192982456, "loss": 0.0023, "step": 6135 }, { "epoch": 91.58, "learning_rate": 0.0001374736842105263, "loss": 0.001, "step": 6136 }, { "epoch": 91.59, "learning_rate": 0.00013743859649122806, "loss": 0.0009, "step": 6137 }, { "epoch": 91.61, "learning_rate": 0.0001374035087719298, "loss": 0.0023, "step": 6138 }, { "epoch": 91.62, "learning_rate": 0.00013736842105263155, "loss": 0.0014, "step": 6139 }, { "epoch": 91.64, "learning_rate": 0.00013733333333333333, "loss": 0.0016, "step": 6140 }, { "epoch": 91.65, "learning_rate": 0.00013729824561403508, "loss": 0.0045, "step": 6141 }, { "epoch": 91.67, "learning_rate": 0.00013726315789473683, "loss": 0.2162, "step": 6142 }, { "epoch": 91.68, "learning_rate": 0.0001372280701754386, "loss": 0.0006, "step": 6143 }, { "epoch": 91.7, "learning_rate": 0.00013719298245614035, "loss": 0.0014, "step": 6144 }, { "epoch": 91.71, "learning_rate": 0.0001371578947368421, "loss": 0.0006, "step": 6145 }, { "epoch": 91.73, "learning_rate": 0.00013712280701754385, "loss": 0.0007, "step": 6146 }, { "epoch": 91.74, "learning_rate": 0.0001370877192982456, "loss": 0.0077, "step": 6147 }, { "epoch": 91.76, "learning_rate": 0.00013705263157894735, "loss": 0.0008, "step": 6148 }, { "epoch": 91.77, "learning_rate": 0.00013701754385964912, "loss": 0.004, "step": 6149 }, { "epoch": 91.79, "learning_rate": 0.00013698245614035087, "loss": 0.0006, "step": 6150 }, { "epoch": 91.8, "learning_rate": 0.00013694736842105262, "loss": 0.1144, "step": 6151 }, { "epoch": 91.82, "learning_rate": 0.00013691228070175437, "loss": 0.0006, "step": 6152 }, { "epoch": 91.83, "learning_rate": 0.00013687719298245612, "loss": 0.0028, "step": 6153 }, { "epoch": 91.85, "learning_rate": 0.00013684210526315787, "loss": 0.0038, "step": 6154 }, { "epoch": 91.86, "learning_rate": 0.00013680701754385964, "loss": 0.0016, "step": 6155 }, { "epoch": 91.88, "learning_rate": 0.0001367719298245614, "loss": 0.0007, "step": 6156 }, { "epoch": 91.89, "learning_rate": 0.00013673684210526314, "loss": 0.0006, "step": 6157 }, { "epoch": 91.91, "learning_rate": 0.00013670175438596491, "loss": 0.001, "step": 6158 }, { "epoch": 91.92, "learning_rate": 0.00013666666666666666, "loss": 0.0008, "step": 6159 }, { "epoch": 91.94, "learning_rate": 0.0001366315789473684, "loss": 0.0173, "step": 6160 }, { "epoch": 91.95, "learning_rate": 0.00013659649122807016, "loss": 0.0082, "step": 6161 }, { "epoch": 91.97, "learning_rate": 0.0001365614035087719, "loss": 0.0005, "step": 6162 }, { "epoch": 91.98, "learning_rate": 0.00013652631578947366, "loss": 0.0015, "step": 6163 }, { "epoch": 92.0, "learning_rate": 0.00013649122807017543, "loss": 0.117, "step": 6164 }, { "epoch": 92.01, "learning_rate": 0.00013645614035087718, "loss": 0.0019, "step": 6165 }, { "epoch": 92.03, "learning_rate": 0.00013642105263157893, "loss": 0.0015, "step": 6166 }, { "epoch": 92.04, "learning_rate": 0.00013638596491228068, "loss": 0.0028, "step": 6167 }, { "epoch": 92.06, "learning_rate": 0.00013635087719298246, "loss": 0.0007, "step": 6168 }, { "epoch": 92.07, "learning_rate": 0.0001363157894736842, "loss": 0.0005, "step": 6169 }, { "epoch": 92.09, "learning_rate": 0.00013628070175438595, "loss": 0.0019, "step": 6170 }, { "epoch": 92.1, "learning_rate": 0.0001362456140350877, "loss": 0.0083, "step": 6171 }, { "epoch": 92.12, "learning_rate": 0.00013621052631578945, "loss": 0.0007, "step": 6172 }, { "epoch": 92.13, "learning_rate": 0.00013617543859649123, "loss": 0.0006, "step": 6173 }, { "epoch": 92.15, "learning_rate": 0.00013614035087719298, "loss": 0.0821, "step": 6174 }, { "epoch": 92.16, "learning_rate": 0.00013610526315789473, "loss": 0.0006, "step": 6175 }, { "epoch": 92.18, "learning_rate": 0.00013607017543859647, "loss": 0.0115, "step": 6176 }, { "epoch": 92.19, "learning_rate": 0.00013603508771929822, "loss": 0.0006, "step": 6177 }, { "epoch": 92.21, "learning_rate": 0.00013599999999999997, "loss": 0.0011, "step": 6178 }, { "epoch": 92.22, "learning_rate": 0.00013596491228070175, "loss": 0.0006, "step": 6179 }, { "epoch": 92.24, "learning_rate": 0.0001359298245614035, "loss": 0.0005, "step": 6180 }, { "epoch": 92.25, "learning_rate": 0.00013589473684210525, "loss": 0.0007, "step": 6181 }, { "epoch": 92.27, "learning_rate": 0.00013585964912280702, "loss": 0.0055, "step": 6182 }, { "epoch": 92.28, "learning_rate": 0.00013582456140350877, "loss": 0.0007, "step": 6183 }, { "epoch": 92.3, "learning_rate": 0.00013578947368421052, "loss": 0.0107, "step": 6184 }, { "epoch": 92.31, "learning_rate": 0.00013575438596491227, "loss": 0.0112, "step": 6185 }, { "epoch": 92.33, "learning_rate": 0.00013571929824561402, "loss": 0.0007, "step": 6186 }, { "epoch": 92.34, "learning_rate": 0.00013568421052631577, "loss": 0.0005, "step": 6187 }, { "epoch": 92.36, "learning_rate": 0.00013564912280701754, "loss": 0.0006, "step": 6188 }, { "epoch": 92.37, "learning_rate": 0.0001356140350877193, "loss": 0.0073, "step": 6189 }, { "epoch": 92.39, "learning_rate": 0.00013557894736842104, "loss": 0.0006, "step": 6190 }, { "epoch": 92.4, "learning_rate": 0.0001355438596491228, "loss": 0.0006, "step": 6191 }, { "epoch": 92.42, "learning_rate": 0.00013550877192982456, "loss": 0.0006, "step": 6192 }, { "epoch": 92.43, "learning_rate": 0.0001354736842105263, "loss": 0.0005, "step": 6193 }, { "epoch": 92.45, "learning_rate": 0.00013543859649122806, "loss": 0.0007, "step": 6194 }, { "epoch": 92.46, "learning_rate": 0.0001354035087719298, "loss": 0.0005, "step": 6195 }, { "epoch": 92.48, "learning_rate": 0.00013536842105263156, "loss": 0.1313, "step": 6196 }, { "epoch": 92.49, "learning_rate": 0.00013533333333333333, "loss": 0.0005, "step": 6197 }, { "epoch": 92.51, "learning_rate": 0.00013529824561403508, "loss": 0.0802, "step": 6198 }, { "epoch": 92.52, "learning_rate": 0.00013526315789473683, "loss": 0.0005, "step": 6199 }, { "epoch": 92.54, "learning_rate": 0.00013522807017543858, "loss": 0.0621, "step": 6200 }, { "epoch": 92.54, "eval_accuracy": 0.8607440039158101, "eval_f1": 0.8599750596640915, "eval_loss": 0.8078528642654419, "eval_runtime": 344.8499, "eval_samples_per_second": 11.849, "eval_steps_per_second": 0.742, "step": 6200 }, { "epoch": 92.55, "learning_rate": 0.00013519298245614033, "loss": 0.0008, "step": 6201 }, { "epoch": 92.57, "learning_rate": 0.00013515789473684208, "loss": 0.0007, "step": 6202 }, { "epoch": 92.58, "learning_rate": 0.00013512280701754385, "loss": 0.001, "step": 6203 }, { "epoch": 92.59, "learning_rate": 0.0001350877192982456, "loss": 0.0007, "step": 6204 }, { "epoch": 92.61, "learning_rate": 0.00013505263157894735, "loss": 0.0006, "step": 6205 }, { "epoch": 92.62, "learning_rate": 0.00013501754385964913, "loss": 0.0015, "step": 6206 }, { "epoch": 92.64, "learning_rate": 0.00013498245614035088, "loss": 0.0006, "step": 6207 }, { "epoch": 92.65, "learning_rate": 0.00013494736842105262, "loss": 0.0005, "step": 6208 }, { "epoch": 92.67, "learning_rate": 0.00013491228070175437, "loss": 0.0017, "step": 6209 }, { "epoch": 92.68, "learning_rate": 0.00013487719298245612, "loss": 0.0016, "step": 6210 }, { "epoch": 92.7, "learning_rate": 0.00013484210526315787, "loss": 0.0011, "step": 6211 }, { "epoch": 92.71, "learning_rate": 0.00013480701754385965, "loss": 0.0957, "step": 6212 }, { "epoch": 92.73, "learning_rate": 0.0001347719298245614, "loss": 0.0011, "step": 6213 }, { "epoch": 92.74, "learning_rate": 0.00013473684210526314, "loss": 0.0005, "step": 6214 }, { "epoch": 92.76, "learning_rate": 0.0001347017543859649, "loss": 0.0006, "step": 6215 }, { "epoch": 92.77, "learning_rate": 0.00013466666666666667, "loss": 0.0032, "step": 6216 }, { "epoch": 92.79, "learning_rate": 0.0001346315789473684, "loss": 0.0005, "step": 6217 }, { "epoch": 92.8, "learning_rate": 0.00013459649122807017, "loss": 0.0136, "step": 6218 }, { "epoch": 92.82, "learning_rate": 0.00013456140350877192, "loss": 0.0005, "step": 6219 }, { "epoch": 92.83, "learning_rate": 0.00013452631578947366, "loss": 0.0005, "step": 6220 }, { "epoch": 92.85, "learning_rate": 0.00013449122807017544, "loss": 0.0007, "step": 6221 }, { "epoch": 92.86, "learning_rate": 0.0001344561403508772, "loss": 0.0007, "step": 6222 }, { "epoch": 92.88, "learning_rate": 0.00013442105263157894, "loss": 0.0016, "step": 6223 }, { "epoch": 92.89, "learning_rate": 0.0001343859649122807, "loss": 0.0035, "step": 6224 }, { "epoch": 92.91, "learning_rate": 0.00013435087719298244, "loss": 0.0029, "step": 6225 }, { "epoch": 92.92, "learning_rate": 0.00013431578947368418, "loss": 0.0985, "step": 6226 }, { "epoch": 92.94, "learning_rate": 0.00013428070175438596, "loss": 0.0301, "step": 6227 }, { "epoch": 92.95, "learning_rate": 0.0001342456140350877, "loss": 0.0009, "step": 6228 }, { "epoch": 92.97, "learning_rate": 0.00013421052631578946, "loss": 0.0006, "step": 6229 }, { "epoch": 92.98, "learning_rate": 0.0001341754385964912, "loss": 0.0004, "step": 6230 }, { "epoch": 93.0, "learning_rate": 0.00013414035087719298, "loss": 0.1966, "step": 6231 }, { "epoch": 93.01, "learning_rate": 0.00013410526315789473, "loss": 0.0009, "step": 6232 }, { "epoch": 93.03, "learning_rate": 0.00013407017543859648, "loss": 0.0005, "step": 6233 }, { "epoch": 93.04, "learning_rate": 0.00013403508771929823, "loss": 0.0006, "step": 6234 }, { "epoch": 93.06, "learning_rate": 0.00013399999999999998, "loss": 0.1749, "step": 6235 }, { "epoch": 93.07, "learning_rate": 0.00013396491228070175, "loss": 0.0007, "step": 6236 }, { "epoch": 93.09, "learning_rate": 0.0001339298245614035, "loss": 0.0009, "step": 6237 }, { "epoch": 93.1, "learning_rate": 0.00013389473684210525, "loss": 0.0663, "step": 6238 }, { "epoch": 93.12, "learning_rate": 0.000133859649122807, "loss": 0.0005, "step": 6239 }, { "epoch": 93.13, "learning_rate": 0.00013382456140350878, "loss": 0.0692, "step": 6240 }, { "epoch": 93.15, "learning_rate": 0.0001337894736842105, "loss": 0.001, "step": 6241 }, { "epoch": 93.16, "learning_rate": 0.00013375438596491227, "loss": 0.0006, "step": 6242 }, { "epoch": 93.18, "learning_rate": 0.00013371929824561402, "loss": 0.0683, "step": 6243 }, { "epoch": 93.19, "learning_rate": 0.00013368421052631577, "loss": 0.0006, "step": 6244 }, { "epoch": 93.21, "learning_rate": 0.00013364912280701755, "loss": 0.002, "step": 6245 }, { "epoch": 93.22, "learning_rate": 0.0001336140350877193, "loss": 0.0018, "step": 6246 }, { "epoch": 93.24, "learning_rate": 0.00013357894736842104, "loss": 0.0647, "step": 6247 }, { "epoch": 93.25, "learning_rate": 0.0001335438596491228, "loss": 0.0004, "step": 6248 }, { "epoch": 93.27, "learning_rate": 0.00013350877192982454, "loss": 0.0005, "step": 6249 }, { "epoch": 93.28, "learning_rate": 0.0001334736842105263, "loss": 0.0004, "step": 6250 }, { "epoch": 93.3, "learning_rate": 0.00013343859649122807, "loss": 0.0005, "step": 6251 }, { "epoch": 93.31, "learning_rate": 0.00013340350877192982, "loss": 0.2038, "step": 6252 }, { "epoch": 93.33, "learning_rate": 0.00013336842105263156, "loss": 0.0026, "step": 6253 }, { "epoch": 93.34, "learning_rate": 0.0001333333333333333, "loss": 0.0317, "step": 6254 }, { "epoch": 93.36, "learning_rate": 0.0001332982456140351, "loss": 0.0086, "step": 6255 }, { "epoch": 93.37, "learning_rate": 0.00013326315789473684, "loss": 0.0007, "step": 6256 }, { "epoch": 93.39, "learning_rate": 0.00013322807017543859, "loss": 0.0007, "step": 6257 }, { "epoch": 93.4, "learning_rate": 0.00013319298245614033, "loss": 0.0006, "step": 6258 }, { "epoch": 93.42, "learning_rate": 0.00013315789473684208, "loss": 0.0013, "step": 6259 }, { "epoch": 93.43, "learning_rate": 0.00013312280701754386, "loss": 0.0005, "step": 6260 }, { "epoch": 93.45, "learning_rate": 0.0001330877192982456, "loss": 0.0021, "step": 6261 }, { "epoch": 93.46, "learning_rate": 0.00013305263157894736, "loss": 0.0004, "step": 6262 }, { "epoch": 93.48, "learning_rate": 0.0001330175438596491, "loss": 0.0004, "step": 6263 }, { "epoch": 93.49, "learning_rate": 0.00013298245614035085, "loss": 0.0005, "step": 6264 }, { "epoch": 93.51, "learning_rate": 0.0001329473684210526, "loss": 0.0008, "step": 6265 }, { "epoch": 93.52, "learning_rate": 0.00013291228070175438, "loss": 0.0006, "step": 6266 }, { "epoch": 93.54, "learning_rate": 0.00013287719298245613, "loss": 0.0008, "step": 6267 }, { "epoch": 93.55, "learning_rate": 0.00013284210526315788, "loss": 0.0007, "step": 6268 }, { "epoch": 93.57, "learning_rate": 0.00013280701754385965, "loss": 0.0006, "step": 6269 }, { "epoch": 93.58, "learning_rate": 0.0001327719298245614, "loss": 0.0005, "step": 6270 }, { "epoch": 93.59, "learning_rate": 0.00013273684210526315, "loss": 0.0024, "step": 6271 }, { "epoch": 93.61, "learning_rate": 0.0001327017543859649, "loss": 0.0011, "step": 6272 }, { "epoch": 93.62, "learning_rate": 0.00013266666666666665, "loss": 0.0007, "step": 6273 }, { "epoch": 93.64, "learning_rate": 0.0001326315789473684, "loss": 0.001, "step": 6274 }, { "epoch": 93.65, "learning_rate": 0.00013259649122807017, "loss": 0.0005, "step": 6275 }, { "epoch": 93.67, "learning_rate": 0.00013256140350877192, "loss": 0.0005, "step": 6276 }, { "epoch": 93.68, "learning_rate": 0.00013252631578947367, "loss": 0.0384, "step": 6277 }, { "epoch": 93.7, "learning_rate": 0.00013249122807017542, "loss": 0.0009, "step": 6278 }, { "epoch": 93.71, "learning_rate": 0.0001324561403508772, "loss": 0.0007, "step": 6279 }, { "epoch": 93.73, "learning_rate": 0.00013242105263157892, "loss": 0.0027, "step": 6280 }, { "epoch": 93.74, "learning_rate": 0.0001323859649122807, "loss": 0.0005, "step": 6281 }, { "epoch": 93.76, "learning_rate": 0.00013235087719298244, "loss": 0.0039, "step": 6282 }, { "epoch": 93.77, "learning_rate": 0.0001323157894736842, "loss": 0.0004, "step": 6283 }, { "epoch": 93.79, "learning_rate": 0.00013228070175438597, "loss": 0.001, "step": 6284 }, { "epoch": 93.8, "learning_rate": 0.00013224561403508771, "loss": 0.001, "step": 6285 }, { "epoch": 93.82, "learning_rate": 0.00013221052631578946, "loss": 0.1333, "step": 6286 }, { "epoch": 93.83, "learning_rate": 0.0001321754385964912, "loss": 0.0008, "step": 6287 }, { "epoch": 93.85, "learning_rate": 0.00013214035087719296, "loss": 0.0007, "step": 6288 }, { "epoch": 93.86, "learning_rate": 0.0001321052631578947, "loss": 0.0018, "step": 6289 }, { "epoch": 93.88, "learning_rate": 0.00013207017543859649, "loss": 0.1526, "step": 6290 }, { "epoch": 93.89, "learning_rate": 0.00013203508771929823, "loss": 0.0005, "step": 6291 }, { "epoch": 93.91, "learning_rate": 0.00013199999999999998, "loss": 0.0031, "step": 6292 }, { "epoch": 93.92, "learning_rate": 0.00013196491228070173, "loss": 0.0005, "step": 6293 }, { "epoch": 93.94, "learning_rate": 0.0001319298245614035, "loss": 0.0926, "step": 6294 }, { "epoch": 93.95, "learning_rate": 0.00013189473684210526, "loss": 0.0006, "step": 6295 }, { "epoch": 93.97, "learning_rate": 0.000131859649122807, "loss": 0.0005, "step": 6296 }, { "epoch": 93.98, "learning_rate": 0.00013182456140350875, "loss": 0.0016, "step": 6297 }, { "epoch": 94.0, "learning_rate": 0.0001317894736842105, "loss": 0.0028, "step": 6298 }, { "epoch": 94.01, "learning_rate": 0.00013175438596491228, "loss": 0.0095, "step": 6299 }, { "epoch": 94.03, "learning_rate": 0.00013171929824561403, "loss": 0.0258, "step": 6300 }, { "epoch": 94.04, "learning_rate": 0.00013168421052631578, "loss": 0.0004, "step": 6301 }, { "epoch": 94.06, "learning_rate": 0.00013164912280701753, "loss": 0.0005, "step": 6302 }, { "epoch": 94.07, "learning_rate": 0.0001316140350877193, "loss": 0.0438, "step": 6303 }, { "epoch": 94.09, "learning_rate": 0.00013157894736842102, "loss": 0.0025, "step": 6304 }, { "epoch": 94.1, "learning_rate": 0.0001315438596491228, "loss": 0.0166, "step": 6305 }, { "epoch": 94.12, "learning_rate": 0.00013150877192982455, "loss": 0.0006, "step": 6306 }, { "epoch": 94.13, "learning_rate": 0.0001314736842105263, "loss": 0.0005, "step": 6307 }, { "epoch": 94.15, "learning_rate": 0.00013143859649122807, "loss": 0.0006, "step": 6308 }, { "epoch": 94.16, "learning_rate": 0.00013140350877192982, "loss": 0.0017, "step": 6309 }, { "epoch": 94.18, "learning_rate": 0.00013136842105263157, "loss": 0.0004, "step": 6310 }, { "epoch": 94.19, "learning_rate": 0.00013133333333333332, "loss": 0.0006, "step": 6311 }, { "epoch": 94.21, "learning_rate": 0.00013129824561403507, "loss": 0.0006, "step": 6312 }, { "epoch": 94.22, "learning_rate": 0.00013126315789473682, "loss": 0.0004, "step": 6313 }, { "epoch": 94.24, "learning_rate": 0.0001312280701754386, "loss": 0.0004, "step": 6314 }, { "epoch": 94.25, "learning_rate": 0.00013119298245614034, "loss": 0.0006, "step": 6315 }, { "epoch": 94.27, "learning_rate": 0.0001311578947368421, "loss": 0.0004, "step": 6316 }, { "epoch": 94.28, "learning_rate": 0.00013112280701754384, "loss": 0.05, "step": 6317 }, { "epoch": 94.3, "learning_rate": 0.00013108771929824561, "loss": 0.0743, "step": 6318 }, { "epoch": 94.31, "learning_rate": 0.00013105263157894736, "loss": 0.0017, "step": 6319 }, { "epoch": 94.33, "learning_rate": 0.0001310175438596491, "loss": 0.1355, "step": 6320 }, { "epoch": 94.34, "learning_rate": 0.00013098245614035086, "loss": 0.0004, "step": 6321 }, { "epoch": 94.36, "learning_rate": 0.0001309473684210526, "loss": 0.0004, "step": 6322 }, { "epoch": 94.37, "learning_rate": 0.00013091228070175438, "loss": 0.0094, "step": 6323 }, { "epoch": 94.39, "learning_rate": 0.00013087719298245613, "loss": 0.0006, "step": 6324 }, { "epoch": 94.4, "learning_rate": 0.00013084210526315788, "loss": 0.0006, "step": 6325 }, { "epoch": 94.42, "learning_rate": 0.00013080701754385963, "loss": 0.0016, "step": 6326 }, { "epoch": 94.43, "learning_rate": 0.0001307719298245614, "loss": 0.0007, "step": 6327 }, { "epoch": 94.45, "learning_rate": 0.00013073684210526313, "loss": 0.0006, "step": 6328 }, { "epoch": 94.46, "learning_rate": 0.0001307017543859649, "loss": 0.0005, "step": 6329 }, { "epoch": 94.48, "learning_rate": 0.00013066666666666665, "loss": 0.0007, "step": 6330 }, { "epoch": 94.49, "learning_rate": 0.0001306315789473684, "loss": 0.0004, "step": 6331 }, { "epoch": 94.51, "learning_rate": 0.00013059649122807018, "loss": 0.0312, "step": 6332 }, { "epoch": 94.52, "learning_rate": 0.00013056140350877193, "loss": 0.0006, "step": 6333 }, { "epoch": 94.54, "learning_rate": 0.00013052631578947368, "loss": 0.0689, "step": 6334 }, { "epoch": 94.55, "learning_rate": 0.00013049122807017542, "loss": 0.0004, "step": 6335 }, { "epoch": 94.57, "learning_rate": 0.00013045614035087717, "loss": 0.002, "step": 6336 }, { "epoch": 94.58, "learning_rate": 0.00013042105263157892, "loss": 0.0004, "step": 6337 }, { "epoch": 94.59, "learning_rate": 0.0001303859649122807, "loss": 0.0005, "step": 6338 }, { "epoch": 94.61, "learning_rate": 0.00013035087719298245, "loss": 0.0005, "step": 6339 }, { "epoch": 94.62, "learning_rate": 0.0001303157894736842, "loss": 0.0027, "step": 6340 }, { "epoch": 94.64, "learning_rate": 0.00013028070175438594, "loss": 0.0174, "step": 6341 }, { "epoch": 94.65, "learning_rate": 0.00013024561403508772, "loss": 0.0006, "step": 6342 }, { "epoch": 94.67, "learning_rate": 0.00013021052631578944, "loss": 0.0006, "step": 6343 }, { "epoch": 94.68, "learning_rate": 0.00013017543859649122, "loss": 0.0004, "step": 6344 }, { "epoch": 94.7, "learning_rate": 0.00013014035087719297, "loss": 0.0829, "step": 6345 }, { "epoch": 94.71, "learning_rate": 0.00013010526315789472, "loss": 0.0004, "step": 6346 }, { "epoch": 94.73, "learning_rate": 0.0001300701754385965, "loss": 0.0004, "step": 6347 }, { "epoch": 94.74, "learning_rate": 0.00013003508771929824, "loss": 0.0714, "step": 6348 }, { "epoch": 94.76, "learning_rate": 0.00013, "loss": 0.0005, "step": 6349 }, { "epoch": 94.77, "learning_rate": 0.00012996491228070174, "loss": 0.0005, "step": 6350 }, { "epoch": 94.79, "learning_rate": 0.0001299298245614035, "loss": 0.048, "step": 6351 }, { "epoch": 94.8, "learning_rate": 0.00012989473684210523, "loss": 0.0013, "step": 6352 }, { "epoch": 94.82, "learning_rate": 0.000129859649122807, "loss": 0.0238, "step": 6353 }, { "epoch": 94.83, "learning_rate": 0.00012982456140350876, "loss": 0.0007, "step": 6354 }, { "epoch": 94.85, "learning_rate": 0.0001297894736842105, "loss": 0.0733, "step": 6355 }, { "epoch": 94.86, "learning_rate": 0.00012975438596491228, "loss": 0.0005, "step": 6356 }, { "epoch": 94.88, "learning_rate": 0.00012971929824561403, "loss": 0.0747, "step": 6357 }, { "epoch": 94.89, "learning_rate": 0.00012968421052631578, "loss": 0.0004, "step": 6358 }, { "epoch": 94.91, "learning_rate": 0.00012964912280701753, "loss": 0.0004, "step": 6359 }, { "epoch": 94.92, "learning_rate": 0.00012961403508771928, "loss": 0.0004, "step": 6360 }, { "epoch": 94.94, "learning_rate": 0.00012957894736842103, "loss": 0.0108, "step": 6361 }, { "epoch": 94.95, "learning_rate": 0.0001295438596491228, "loss": 0.0044, "step": 6362 }, { "epoch": 94.97, "learning_rate": 0.00012950877192982455, "loss": 0.0004, "step": 6363 }, { "epoch": 94.98, "learning_rate": 0.0001294736842105263, "loss": 0.0004, "step": 6364 }, { "epoch": 95.0, "learning_rate": 0.00012943859649122805, "loss": 0.0027, "step": 6365 }, { "epoch": 95.01, "learning_rate": 0.00012940350877192983, "loss": 0.0074, "step": 6366 }, { "epoch": 95.03, "learning_rate": 0.00012936842105263155, "loss": 0.0005, "step": 6367 }, { "epoch": 95.04, "learning_rate": 0.00012933333333333332, "loss": 0.0004, "step": 6368 }, { "epoch": 95.06, "learning_rate": 0.00012929824561403507, "loss": 0.0004, "step": 6369 }, { "epoch": 95.07, "learning_rate": 0.00012926315789473682, "loss": 0.0162, "step": 6370 }, { "epoch": 95.09, "learning_rate": 0.0001292280701754386, "loss": 0.0012, "step": 6371 }, { "epoch": 95.1, "learning_rate": 0.00012919298245614035, "loss": 0.0006, "step": 6372 }, { "epoch": 95.12, "learning_rate": 0.0001291578947368421, "loss": 0.0157, "step": 6373 }, { "epoch": 95.13, "learning_rate": 0.00012912280701754384, "loss": 0.0005, "step": 6374 }, { "epoch": 95.15, "learning_rate": 0.00012908771929824562, "loss": 0.0015, "step": 6375 }, { "epoch": 95.16, "learning_rate": 0.00012905263157894734, "loss": 0.0004, "step": 6376 }, { "epoch": 95.18, "learning_rate": 0.00012901754385964912, "loss": 0.1115, "step": 6377 }, { "epoch": 95.19, "learning_rate": 0.00012898245614035087, "loss": 0.0723, "step": 6378 }, { "epoch": 95.21, "learning_rate": 0.00012894736842105261, "loss": 0.0145, "step": 6379 }, { "epoch": 95.22, "learning_rate": 0.00012891228070175436, "loss": 0.0005, "step": 6380 }, { "epoch": 95.24, "learning_rate": 0.00012887719298245614, "loss": 0.1661, "step": 6381 }, { "epoch": 95.25, "learning_rate": 0.0001288421052631579, "loss": 0.0004, "step": 6382 }, { "epoch": 95.27, "learning_rate": 0.00012880701754385964, "loss": 0.001, "step": 6383 }, { "epoch": 95.28, "learning_rate": 0.00012877192982456139, "loss": 0.0004, "step": 6384 }, { "epoch": 95.3, "learning_rate": 0.00012873684210526313, "loss": 0.2419, "step": 6385 }, { "epoch": 95.31, "learning_rate": 0.0001287017543859649, "loss": 0.0008, "step": 6386 }, { "epoch": 95.33, "learning_rate": 0.00012866666666666666, "loss": 0.2924, "step": 6387 }, { "epoch": 95.34, "learning_rate": 0.0001286315789473684, "loss": 0.0004, "step": 6388 }, { "epoch": 95.36, "learning_rate": 0.00012859649122807016, "loss": 0.1491, "step": 6389 }, { "epoch": 95.37, "learning_rate": 0.00012856140350877193, "loss": 0.0007, "step": 6390 }, { "epoch": 95.39, "learning_rate": 0.00012852631578947365, "loss": 0.0036, "step": 6391 }, { "epoch": 95.4, "learning_rate": 0.00012849122807017543, "loss": 0.0036, "step": 6392 }, { "epoch": 95.42, "learning_rate": 0.00012845614035087718, "loss": 0.0024, "step": 6393 }, { "epoch": 95.43, "learning_rate": 0.00012842105263157893, "loss": 0.0925, "step": 6394 }, { "epoch": 95.45, "learning_rate": 0.0001283859649122807, "loss": 0.0011, "step": 6395 }, { "epoch": 95.46, "learning_rate": 0.00012835087719298245, "loss": 0.0024, "step": 6396 }, { "epoch": 95.48, "learning_rate": 0.0001283157894736842, "loss": 0.0005, "step": 6397 }, { "epoch": 95.49, "learning_rate": 0.00012828070175438595, "loss": 0.0031, "step": 6398 }, { "epoch": 95.51, "learning_rate": 0.00012824561403508773, "loss": 0.0012, "step": 6399 }, { "epoch": 95.52, "learning_rate": 0.00012821052631578945, "loss": 0.0017, "step": 6400 }, { "epoch": 95.52, "eval_accuracy": 0.8668624571708272, "eval_f1": 0.8678242564825082, "eval_loss": 0.7071595191955566, "eval_runtime": 345.1767, "eval_samples_per_second": 11.837, "eval_steps_per_second": 0.742, "step": 6400 }, { "epoch": 95.54, "learning_rate": 0.00012817543859649122, "loss": 0.0011, "step": 6401 }, { "epoch": 95.55, "learning_rate": 0.00012814035087719297, "loss": 0.1979, "step": 6402 }, { "epoch": 95.57, "learning_rate": 0.00012810526315789472, "loss": 0.205, "step": 6403 }, { "epoch": 95.58, "learning_rate": 0.00012807017543859647, "loss": 0.2207, "step": 6404 }, { "epoch": 95.59, "learning_rate": 0.00012803508771929825, "loss": 0.0012, "step": 6405 }, { "epoch": 95.61, "learning_rate": 0.000128, "loss": 0.0026, "step": 6406 }, { "epoch": 95.62, "learning_rate": 0.00012796491228070174, "loss": 0.0007, "step": 6407 }, { "epoch": 95.64, "learning_rate": 0.0001279298245614035, "loss": 0.0007, "step": 6408 }, { "epoch": 95.65, "learning_rate": 0.00012789473684210524, "loss": 0.0006, "step": 6409 }, { "epoch": 95.67, "learning_rate": 0.00012785964912280702, "loss": 0.0007, "step": 6410 }, { "epoch": 95.68, "learning_rate": 0.00012782456140350877, "loss": 0.0044, "step": 6411 }, { "epoch": 95.7, "learning_rate": 0.00012778947368421051, "loss": 0.0106, "step": 6412 }, { "epoch": 95.71, "learning_rate": 0.00012775438596491226, "loss": 0.0007, "step": 6413 }, { "epoch": 95.73, "learning_rate": 0.00012771929824561404, "loss": 0.0008, "step": 6414 }, { "epoch": 95.74, "learning_rate": 0.00012768421052631576, "loss": 0.0084, "step": 6415 }, { "epoch": 95.76, "learning_rate": 0.00012764912280701754, "loss": 0.0012, "step": 6416 }, { "epoch": 95.77, "learning_rate": 0.00012761403508771928, "loss": 0.0544, "step": 6417 }, { "epoch": 95.79, "learning_rate": 0.00012757894736842103, "loss": 0.01, "step": 6418 }, { "epoch": 95.8, "learning_rate": 0.0001275438596491228, "loss": 0.0022, "step": 6419 }, { "epoch": 95.82, "learning_rate": 0.00012750877192982456, "loss": 0.1372, "step": 6420 }, { "epoch": 95.83, "learning_rate": 0.0001274736842105263, "loss": 0.0036, "step": 6421 }, { "epoch": 95.85, "learning_rate": 0.00012743859649122806, "loss": 0.0005, "step": 6422 }, { "epoch": 95.86, "learning_rate": 0.00012740350877192983, "loss": 0.0006, "step": 6423 }, { "epoch": 95.88, "learning_rate": 0.00012736842105263155, "loss": 0.0088, "step": 6424 }, { "epoch": 95.89, "learning_rate": 0.00012733333333333333, "loss": 0.0007, "step": 6425 }, { "epoch": 95.91, "learning_rate": 0.00012729824561403508, "loss": 0.1873, "step": 6426 }, { "epoch": 95.92, "learning_rate": 0.00012726315789473683, "loss": 0.0013, "step": 6427 }, { "epoch": 95.94, "learning_rate": 0.00012722807017543858, "loss": 0.0006, "step": 6428 }, { "epoch": 95.95, "learning_rate": 0.00012719298245614035, "loss": 0.0006, "step": 6429 }, { "epoch": 95.97, "learning_rate": 0.00012715789473684207, "loss": 0.0028, "step": 6430 }, { "epoch": 95.98, "learning_rate": 0.00012712280701754385, "loss": 0.0006, "step": 6431 }, { "epoch": 96.0, "learning_rate": 0.0001270877192982456, "loss": 0.359, "step": 6432 }, { "epoch": 96.01, "learning_rate": 0.00012705263157894735, "loss": 0.0027, "step": 6433 }, { "epoch": 96.03, "learning_rate": 0.00012701754385964912, "loss": 0.0006, "step": 6434 }, { "epoch": 96.04, "learning_rate": 0.00012698245614035087, "loss": 0.0006, "step": 6435 }, { "epoch": 96.06, "learning_rate": 0.00012694736842105262, "loss": 0.1297, "step": 6436 }, { "epoch": 96.07, "learning_rate": 0.00012691228070175437, "loss": 0.0008, "step": 6437 }, { "epoch": 96.09, "learning_rate": 0.00012687719298245614, "loss": 0.0014, "step": 6438 }, { "epoch": 96.1, "learning_rate": 0.00012684210526315787, "loss": 0.045, "step": 6439 }, { "epoch": 96.12, "learning_rate": 0.00012680701754385964, "loss": 0.0009, "step": 6440 }, { "epoch": 96.13, "learning_rate": 0.0001267719298245614, "loss": 0.0013, "step": 6441 }, { "epoch": 96.15, "learning_rate": 0.00012673684210526314, "loss": 0.002, "step": 6442 }, { "epoch": 96.16, "learning_rate": 0.0001267017543859649, "loss": 0.0005, "step": 6443 }, { "epoch": 96.18, "learning_rate": 0.00012666666666666666, "loss": 0.1275, "step": 6444 }, { "epoch": 96.19, "learning_rate": 0.0001266315789473684, "loss": 0.0005, "step": 6445 }, { "epoch": 96.21, "learning_rate": 0.00012659649122807016, "loss": 0.0006, "step": 6446 }, { "epoch": 96.22, "learning_rate": 0.00012656140350877194, "loss": 0.0136, "step": 6447 }, { "epoch": 96.24, "learning_rate": 0.00012652631578947366, "loss": 0.038, "step": 6448 }, { "epoch": 96.25, "learning_rate": 0.00012649122807017544, "loss": 0.0005, "step": 6449 }, { "epoch": 96.27, "learning_rate": 0.00012645614035087718, "loss": 0.003, "step": 6450 }, { "epoch": 96.28, "learning_rate": 0.00012642105263157893, "loss": 0.0006, "step": 6451 }, { "epoch": 96.3, "learning_rate": 0.00012638596491228068, "loss": 0.0449, "step": 6452 }, { "epoch": 96.31, "learning_rate": 0.00012635087719298246, "loss": 0.1171, "step": 6453 }, { "epoch": 96.33, "learning_rate": 0.00012631578947368418, "loss": 0.0005, "step": 6454 }, { "epoch": 96.34, "learning_rate": 0.00012628070175438596, "loss": 0.0008, "step": 6455 }, { "epoch": 96.36, "learning_rate": 0.0001262456140350877, "loss": 0.0006, "step": 6456 }, { "epoch": 96.37, "learning_rate": 0.00012621052631578945, "loss": 0.0016, "step": 6457 }, { "epoch": 96.39, "learning_rate": 0.00012617543859649123, "loss": 0.0027, "step": 6458 }, { "epoch": 96.4, "learning_rate": 0.00012614035087719298, "loss": 0.1313, "step": 6459 }, { "epoch": 96.42, "learning_rate": 0.00012610526315789473, "loss": 0.0006, "step": 6460 }, { "epoch": 96.43, "learning_rate": 0.00012607017543859648, "loss": 0.0006, "step": 6461 }, { "epoch": 96.45, "learning_rate": 0.00012603508771929825, "loss": 0.0048, "step": 6462 }, { "epoch": 96.46, "learning_rate": 0.00012599999999999997, "loss": 0.0032, "step": 6463 }, { "epoch": 96.48, "learning_rate": 0.00012596491228070175, "loss": 0.0005, "step": 6464 }, { "epoch": 96.49, "learning_rate": 0.0001259298245614035, "loss": 0.0802, "step": 6465 }, { "epoch": 96.51, "learning_rate": 0.00012589473684210525, "loss": 0.0009, "step": 6466 }, { "epoch": 96.52, "learning_rate": 0.000125859649122807, "loss": 0.0037, "step": 6467 }, { "epoch": 96.54, "learning_rate": 0.00012582456140350877, "loss": 0.0009, "step": 6468 }, { "epoch": 96.55, "learning_rate": 0.00012578947368421052, "loss": 0.0008, "step": 6469 }, { "epoch": 96.57, "learning_rate": 0.00012575438596491227, "loss": 0.0007, "step": 6470 }, { "epoch": 96.58, "learning_rate": 0.00012571929824561404, "loss": 0.0011, "step": 6471 }, { "epoch": 96.59, "learning_rate": 0.00012568421052631577, "loss": 0.0257, "step": 6472 }, { "epoch": 96.61, "learning_rate": 0.00012564912280701754, "loss": 0.014, "step": 6473 }, { "epoch": 96.62, "learning_rate": 0.0001256140350877193, "loss": 0.0006, "step": 6474 }, { "epoch": 96.64, "learning_rate": 0.00012557894736842104, "loss": 0.0185, "step": 6475 }, { "epoch": 96.65, "learning_rate": 0.0001255438596491228, "loss": 0.0005, "step": 6476 }, { "epoch": 96.67, "learning_rate": 0.00012550877192982456, "loss": 0.0009, "step": 6477 }, { "epoch": 96.68, "learning_rate": 0.00012547368421052629, "loss": 0.0007, "step": 6478 }, { "epoch": 96.7, "learning_rate": 0.00012543859649122806, "loss": 0.0401, "step": 6479 }, { "epoch": 96.71, "learning_rate": 0.0001254035087719298, "loss": 0.0007, "step": 6480 }, { "epoch": 96.73, "learning_rate": 0.00012536842105263156, "loss": 0.0008, "step": 6481 }, { "epoch": 96.74, "learning_rate": 0.00012533333333333334, "loss": 0.0511, "step": 6482 }, { "epoch": 96.76, "learning_rate": 0.00012529824561403508, "loss": 0.0219, "step": 6483 }, { "epoch": 96.77, "learning_rate": 0.00012526315789473683, "loss": 0.0034, "step": 6484 }, { "epoch": 96.79, "learning_rate": 0.00012522807017543858, "loss": 0.0456, "step": 6485 }, { "epoch": 96.8, "learning_rate": 0.00012519298245614036, "loss": 0.0006, "step": 6486 }, { "epoch": 96.82, "learning_rate": 0.00012515789473684208, "loss": 0.0041, "step": 6487 }, { "epoch": 96.83, "learning_rate": 0.00012512280701754385, "loss": 0.0008, "step": 6488 }, { "epoch": 96.85, "learning_rate": 0.0001250877192982456, "loss": 0.0006, "step": 6489 }, { "epoch": 96.86, "learning_rate": 0.00012505263157894735, "loss": 0.0044, "step": 6490 }, { "epoch": 96.88, "learning_rate": 0.0001250175438596491, "loss": 0.0006, "step": 6491 }, { "epoch": 96.89, "learning_rate": 0.00012498245614035088, "loss": 0.0007, "step": 6492 }, { "epoch": 96.91, "learning_rate": 0.00012494736842105263, "loss": 0.0006, "step": 6493 }, { "epoch": 96.92, "learning_rate": 0.00012491228070175437, "loss": 0.0076, "step": 6494 }, { "epoch": 96.94, "learning_rate": 0.00012487719298245612, "loss": 0.3258, "step": 6495 }, { "epoch": 96.95, "learning_rate": 0.00012484210526315787, "loss": 0.0007, "step": 6496 }, { "epoch": 96.97, "learning_rate": 0.00012480701754385965, "loss": 0.0005, "step": 6497 }, { "epoch": 96.98, "learning_rate": 0.0001247719298245614, "loss": 0.0013, "step": 6498 }, { "epoch": 97.0, "learning_rate": 0.00012473684210526315, "loss": 0.1592, "step": 6499 }, { "epoch": 97.01, "learning_rate": 0.0001247017543859649, "loss": 0.0008, "step": 6500 }, { "epoch": 97.03, "learning_rate": 0.00012466666666666667, "loss": 0.0006, "step": 6501 }, { "epoch": 97.04, "learning_rate": 0.0001246315789473684, "loss": 0.1534, "step": 6502 }, { "epoch": 97.06, "learning_rate": 0.00012459649122807017, "loss": 0.0008, "step": 6503 }, { "epoch": 97.07, "learning_rate": 0.00012456140350877192, "loss": 0.001, "step": 6504 }, { "epoch": 97.09, "learning_rate": 0.00012452631578947367, "loss": 0.0008, "step": 6505 }, { "epoch": 97.1, "learning_rate": 0.00012449122807017541, "loss": 0.0243, "step": 6506 }, { "epoch": 97.12, "learning_rate": 0.0001244561403508772, "loss": 0.0334, "step": 6507 }, { "epoch": 97.13, "learning_rate": 0.00012442105263157894, "loss": 0.0012, "step": 6508 }, { "epoch": 97.15, "learning_rate": 0.0001243859649122807, "loss": 0.0018, "step": 6509 }, { "epoch": 97.16, "learning_rate": 0.00012435087719298246, "loss": 0.001, "step": 6510 }, { "epoch": 97.18, "learning_rate": 0.00012431578947368419, "loss": 0.0021, "step": 6511 }, { "epoch": 97.19, "learning_rate": 0.00012428070175438596, "loss": 0.0018, "step": 6512 }, { "epoch": 97.21, "learning_rate": 0.0001242456140350877, "loss": 0.0435, "step": 6513 }, { "epoch": 97.22, "learning_rate": 0.00012421052631578946, "loss": 0.1776, "step": 6514 }, { "epoch": 97.24, "learning_rate": 0.0001241754385964912, "loss": 0.0017, "step": 6515 }, { "epoch": 97.25, "learning_rate": 0.00012414035087719298, "loss": 0.0035, "step": 6516 }, { "epoch": 97.27, "learning_rate": 0.00012410526315789473, "loss": 0.1954, "step": 6517 }, { "epoch": 97.28, "learning_rate": 0.00012407017543859648, "loss": 0.1197, "step": 6518 }, { "epoch": 97.3, "learning_rate": 0.00012403508771929823, "loss": 0.0009, "step": 6519 }, { "epoch": 97.31, "learning_rate": 0.00012399999999999998, "loss": 0.0014, "step": 6520 }, { "epoch": 97.33, "learning_rate": 0.00012396491228070175, "loss": 0.0012, "step": 6521 }, { "epoch": 97.34, "learning_rate": 0.0001239298245614035, "loss": 0.1418, "step": 6522 }, { "epoch": 97.36, "learning_rate": 0.00012389473684210525, "loss": 0.0008, "step": 6523 }, { "epoch": 97.37, "learning_rate": 0.000123859649122807, "loss": 0.0011, "step": 6524 }, { "epoch": 97.39, "learning_rate": 0.00012382456140350878, "loss": 0.1914, "step": 6525 }, { "epoch": 97.4, "learning_rate": 0.0001237894736842105, "loss": 0.0006, "step": 6526 }, { "epoch": 97.42, "learning_rate": 0.00012375438596491227, "loss": 0.0008, "step": 6527 }, { "epoch": 97.43, "learning_rate": 0.00012371929824561402, "loss": 0.0015, "step": 6528 }, { "epoch": 97.45, "learning_rate": 0.00012368421052631577, "loss": 0.0129, "step": 6529 }, { "epoch": 97.46, "learning_rate": 0.00012364912280701752, "loss": 0.0008, "step": 6530 }, { "epoch": 97.48, "learning_rate": 0.0001236140350877193, "loss": 0.1816, "step": 6531 }, { "epoch": 97.49, "learning_rate": 0.00012357894736842104, "loss": 0.2985, "step": 6532 }, { "epoch": 97.51, "learning_rate": 0.0001235438596491228, "loss": 0.1943, "step": 6533 }, { "epoch": 97.52, "learning_rate": 0.00012350877192982457, "loss": 0.0019, "step": 6534 }, { "epoch": 97.54, "learning_rate": 0.0001234736842105263, "loss": 0.0129, "step": 6535 }, { "epoch": 97.55, "learning_rate": 0.00012343859649122807, "loss": 0.3213, "step": 6536 }, { "epoch": 97.57, "learning_rate": 0.00012340350877192982, "loss": 0.0094, "step": 6537 }, { "epoch": 97.58, "learning_rate": 0.00012336842105263156, "loss": 0.0682, "step": 6538 }, { "epoch": 97.59, "learning_rate": 0.0001233333333333333, "loss": 0.0008, "step": 6539 }, { "epoch": 97.61, "learning_rate": 0.0001232982456140351, "loss": 0.0007, "step": 6540 }, { "epoch": 97.62, "learning_rate": 0.00012326315789473684, "loss": 0.0007, "step": 6541 }, { "epoch": 97.64, "learning_rate": 0.0001232280701754386, "loss": 0.0043, "step": 6542 }, { "epoch": 97.65, "learning_rate": 0.00012319298245614034, "loss": 0.0012, "step": 6543 }, { "epoch": 97.67, "learning_rate": 0.00012315789473684208, "loss": 0.0501, "step": 6544 }, { "epoch": 97.68, "learning_rate": 0.00012312280701754386, "loss": 0.0713, "step": 6545 }, { "epoch": 97.7, "learning_rate": 0.0001230877192982456, "loss": 0.0011, "step": 6546 }, { "epoch": 97.71, "learning_rate": 0.00012305263157894736, "loss": 0.0039, "step": 6547 }, { "epoch": 97.73, "learning_rate": 0.0001230175438596491, "loss": 0.0008, "step": 6548 }, { "epoch": 97.74, "learning_rate": 0.00012298245614035088, "loss": 0.0011, "step": 6549 }, { "epoch": 97.76, "learning_rate": 0.0001229473684210526, "loss": 0.0009, "step": 6550 }, { "epoch": 97.77, "learning_rate": 0.00012291228070175438, "loss": 0.001, "step": 6551 }, { "epoch": 97.79, "learning_rate": 0.00012287719298245613, "loss": 0.0026, "step": 6552 }, { "epoch": 97.8, "learning_rate": 0.00012284210526315788, "loss": 0.2741, "step": 6553 }, { "epoch": 97.82, "learning_rate": 0.00012280701754385963, "loss": 0.0012, "step": 6554 }, { "epoch": 97.83, "learning_rate": 0.0001227719298245614, "loss": 0.0008, "step": 6555 }, { "epoch": 97.85, "learning_rate": 0.00012273684210526315, "loss": 0.0007, "step": 6556 }, { "epoch": 97.86, "learning_rate": 0.0001227017543859649, "loss": 0.0024, "step": 6557 }, { "epoch": 97.88, "learning_rate": 0.00012266666666666668, "loss": 0.0008, "step": 6558 }, { "epoch": 97.89, "learning_rate": 0.0001226315789473684, "loss": 0.0038, "step": 6559 }, { "epoch": 97.91, "learning_rate": 0.00012259649122807017, "loss": 0.0148, "step": 6560 }, { "epoch": 97.92, "learning_rate": 0.00012256140350877192, "loss": 0.0027, "step": 6561 }, { "epoch": 97.94, "learning_rate": 0.00012252631578947367, "loss": 0.2488, "step": 6562 }, { "epoch": 97.95, "learning_rate": 0.00012249122807017542, "loss": 0.002, "step": 6563 }, { "epoch": 97.97, "learning_rate": 0.0001224561403508772, "loss": 0.001, "step": 6564 }, { "epoch": 97.98, "learning_rate": 0.00012242105263157894, "loss": 0.0031, "step": 6565 }, { "epoch": 98.0, "learning_rate": 0.0001223859649122807, "loss": 0.0342, "step": 6566 }, { "epoch": 98.01, "learning_rate": 0.00012235087719298244, "loss": 0.0302, "step": 6567 }, { "epoch": 98.03, "learning_rate": 0.0001223157894736842, "loss": 0.0006, "step": 6568 }, { "epoch": 98.04, "learning_rate": 0.00012228070175438597, "loss": 0.0008, "step": 6569 }, { "epoch": 98.06, "learning_rate": 0.00012224561403508772, "loss": 0.0014, "step": 6570 }, { "epoch": 98.07, "learning_rate": 0.00012221052631578946, "loss": 0.0008, "step": 6571 }, { "epoch": 98.09, "learning_rate": 0.0001221754385964912, "loss": 0.0008, "step": 6572 }, { "epoch": 98.1, "learning_rate": 0.000122140350877193, "loss": 0.0027, "step": 6573 }, { "epoch": 98.12, "learning_rate": 0.0001221052631578947, "loss": 0.0007, "step": 6574 }, { "epoch": 98.13, "learning_rate": 0.00012207017543859649, "loss": 0.001, "step": 6575 }, { "epoch": 98.15, "learning_rate": 0.00012203508771929824, "loss": 0.001, "step": 6576 }, { "epoch": 98.16, "learning_rate": 0.000122, "loss": 0.0014, "step": 6577 }, { "epoch": 98.18, "learning_rate": 0.00012196491228070173, "loss": 0.1129, "step": 6578 }, { "epoch": 98.19, "learning_rate": 0.0001219298245614035, "loss": 0.0015, "step": 6579 }, { "epoch": 98.21, "learning_rate": 0.00012189473684210524, "loss": 0.0031, "step": 6580 }, { "epoch": 98.22, "learning_rate": 0.000121859649122807, "loss": 0.0012, "step": 6581 }, { "epoch": 98.24, "learning_rate": 0.00012182456140350877, "loss": 0.0009, "step": 6582 }, { "epoch": 98.25, "learning_rate": 0.00012178947368421052, "loss": 0.0543, "step": 6583 }, { "epoch": 98.27, "learning_rate": 0.00012175438596491228, "loss": 0.0008, "step": 6584 }, { "epoch": 98.28, "learning_rate": 0.00012171929824561403, "loss": 0.0016, "step": 6585 }, { "epoch": 98.3, "learning_rate": 0.00012168421052631579, "loss": 0.0021, "step": 6586 }, { "epoch": 98.31, "learning_rate": 0.00012164912280701753, "loss": 0.2339, "step": 6587 }, { "epoch": 98.33, "learning_rate": 0.00012161403508771929, "loss": 0.0009, "step": 6588 }, { "epoch": 98.34, "learning_rate": 0.00012157894736842104, "loss": 0.0053, "step": 6589 }, { "epoch": 98.36, "learning_rate": 0.0001215438596491228, "loss": 0.001, "step": 6590 }, { "epoch": 98.37, "learning_rate": 0.00012150877192982455, "loss": 0.1344, "step": 6591 }, { "epoch": 98.39, "learning_rate": 0.00012147368421052631, "loss": 0.2065, "step": 6592 }, { "epoch": 98.4, "learning_rate": 0.00012143859649122805, "loss": 0.0011, "step": 6593 }, { "epoch": 98.42, "learning_rate": 0.00012140350877192981, "loss": 0.0007, "step": 6594 }, { "epoch": 98.43, "learning_rate": 0.00012136842105263157, "loss": 0.1541, "step": 6595 }, { "epoch": 98.45, "learning_rate": 0.00012133333333333332, "loss": 0.0015, "step": 6596 }, { "epoch": 98.46, "learning_rate": 0.00012129824561403508, "loss": 0.0012, "step": 6597 }, { "epoch": 98.48, "learning_rate": 0.00012126315789473683, "loss": 0.0543, "step": 6598 }, { "epoch": 98.49, "learning_rate": 0.00012122807017543859, "loss": 0.001, "step": 6599 }, { "epoch": 98.51, "learning_rate": 0.00012119298245614034, "loss": 0.0008, "step": 6600 }, { "epoch": 98.51, "eval_accuracy": 0.8541360744003916, "eval_f1": 0.8572461579693889, "eval_loss": 0.7322733402252197, "eval_runtime": 344.5259, "eval_samples_per_second": 11.86, "eval_steps_per_second": 0.743, "step": 6600 }, { "epoch": 98.52, "learning_rate": 0.0001211578947368421, "loss": 0.0381, "step": 6601 }, { "epoch": 98.54, "learning_rate": 0.00012112280701754384, "loss": 0.0028, "step": 6602 }, { "epoch": 98.55, "learning_rate": 0.0001210877192982456, "loss": 0.2096, "step": 6603 }, { "epoch": 98.57, "learning_rate": 0.00012105263157894735, "loss": 0.1052, "step": 6604 }, { "epoch": 98.58, "learning_rate": 0.00012101754385964911, "loss": 0.277, "step": 6605 }, { "epoch": 98.59, "learning_rate": 0.00012098245614035086, "loss": 0.0104, "step": 6606 }, { "epoch": 98.61, "learning_rate": 0.00012094736842105262, "loss": 0.0007, "step": 6607 }, { "epoch": 98.62, "learning_rate": 0.00012091228070175439, "loss": 0.004, "step": 6608 }, { "epoch": 98.64, "learning_rate": 0.00012087719298245613, "loss": 0.2461, "step": 6609 }, { "epoch": 98.65, "learning_rate": 0.0001208421052631579, "loss": 0.0009, "step": 6610 }, { "epoch": 98.67, "learning_rate": 0.00012080701754385963, "loss": 0.0634, "step": 6611 }, { "epoch": 98.68, "learning_rate": 0.0001207719298245614, "loss": 0.0097, "step": 6612 }, { "epoch": 98.7, "learning_rate": 0.00012073684210526314, "loss": 0.0934, "step": 6613 }, { "epoch": 98.71, "learning_rate": 0.0001207017543859649, "loss": 0.001, "step": 6614 }, { "epoch": 98.73, "learning_rate": 0.00012066666666666665, "loss": 0.0013, "step": 6615 }, { "epoch": 98.74, "learning_rate": 0.00012063157894736842, "loss": 0.0033, "step": 6616 }, { "epoch": 98.76, "learning_rate": 0.00012059649122807015, "loss": 0.0016, "step": 6617 }, { "epoch": 98.77, "learning_rate": 0.00012056140350877191, "loss": 0.023, "step": 6618 }, { "epoch": 98.79, "learning_rate": 0.00012052631578947368, "loss": 0.0018, "step": 6619 }, { "epoch": 98.8, "learning_rate": 0.00012049122807017543, "loss": 0.1118, "step": 6620 }, { "epoch": 98.82, "learning_rate": 0.00012045614035087719, "loss": 0.0055, "step": 6621 }, { "epoch": 98.83, "learning_rate": 0.00012042105263157894, "loss": 0.011, "step": 6622 }, { "epoch": 98.85, "learning_rate": 0.0001203859649122807, "loss": 0.0015, "step": 6623 }, { "epoch": 98.86, "learning_rate": 0.00012035087719298245, "loss": 0.2383, "step": 6624 }, { "epoch": 98.88, "learning_rate": 0.00012031578947368421, "loss": 0.0013, "step": 6625 }, { "epoch": 98.89, "learning_rate": 0.00012028070175438595, "loss": 0.0012, "step": 6626 }, { "epoch": 98.91, "learning_rate": 0.00012024561403508771, "loss": 0.0179, "step": 6627 }, { "epoch": 98.92, "learning_rate": 0.00012021052631578946, "loss": 0.0349, "step": 6628 }, { "epoch": 98.94, "learning_rate": 0.00012017543859649122, "loss": 0.0045, "step": 6629 }, { "epoch": 98.95, "learning_rate": 0.00012014035087719297, "loss": 0.0787, "step": 6630 }, { "epoch": 98.97, "learning_rate": 0.00012010526315789473, "loss": 0.0008, "step": 6631 }, { "epoch": 98.98, "learning_rate": 0.00012007017543859649, "loss": 0.1505, "step": 6632 }, { "epoch": 99.0, "learning_rate": 0.00012003508771929824, "loss": 0.0029, "step": 6633 }, { "epoch": 99.01, "learning_rate": 0.00011999999999999999, "loss": 0.0435, "step": 6634 }, { "epoch": 99.03, "learning_rate": 0.00011996491228070174, "loss": 0.0363, "step": 6635 }, { "epoch": 99.04, "learning_rate": 0.0001199298245614035, "loss": 0.0013, "step": 6636 }, { "epoch": 99.06, "learning_rate": 0.00011989473684210525, "loss": 0.177, "step": 6637 }, { "epoch": 99.07, "learning_rate": 0.00011985964912280701, "loss": 0.001, "step": 6638 }, { "epoch": 99.09, "learning_rate": 0.00011982456140350876, "loss": 0.0011, "step": 6639 }, { "epoch": 99.1, "learning_rate": 0.00011978947368421052, "loss": 0.0017, "step": 6640 }, { "epoch": 99.12, "learning_rate": 0.00011975438596491226, "loss": 0.0028, "step": 6641 }, { "epoch": 99.13, "learning_rate": 0.00011971929824561402, "loss": 0.0211, "step": 6642 }, { "epoch": 99.15, "learning_rate": 0.00011968421052631577, "loss": 0.0025, "step": 6643 }, { "epoch": 99.16, "learning_rate": 0.00011964912280701753, "loss": 0.001, "step": 6644 }, { "epoch": 99.18, "learning_rate": 0.0001196140350877193, "loss": 0.0008, "step": 6645 }, { "epoch": 99.19, "learning_rate": 0.00011957894736842104, "loss": 0.0013, "step": 6646 }, { "epoch": 99.21, "learning_rate": 0.0001195438596491228, "loss": 0.0011, "step": 6647 }, { "epoch": 99.22, "learning_rate": 0.00011950877192982455, "loss": 0.3039, "step": 6648 }, { "epoch": 99.24, "learning_rate": 0.00011947368421052632, "loss": 0.0019, "step": 6649 }, { "epoch": 99.25, "learning_rate": 0.00011943859649122805, "loss": 0.1022, "step": 6650 }, { "epoch": 99.27, "learning_rate": 0.00011940350877192981, "loss": 0.0957, "step": 6651 }, { "epoch": 99.28, "learning_rate": 0.00011936842105263156, "loss": 0.1441, "step": 6652 }, { "epoch": 99.3, "learning_rate": 0.00011933333333333332, "loss": 0.2018, "step": 6653 }, { "epoch": 99.31, "learning_rate": 0.00011929824561403507, "loss": 0.0094, "step": 6654 }, { "epoch": 99.33, "learning_rate": 0.00011926315789473684, "loss": 0.0008, "step": 6655 }, { "epoch": 99.34, "learning_rate": 0.00011922807017543858, "loss": 0.0024, "step": 6656 }, { "epoch": 99.36, "learning_rate": 0.00011919298245614035, "loss": 0.0005, "step": 6657 }, { "epoch": 99.37, "learning_rate": 0.0001191578947368421, "loss": 0.0171, "step": 6658 }, { "epoch": 99.39, "learning_rate": 0.00011912280701754384, "loss": 0.0042, "step": 6659 }, { "epoch": 99.4, "learning_rate": 0.00011908771929824561, "loss": 0.1014, "step": 6660 }, { "epoch": 99.42, "learning_rate": 0.00011905263157894736, "loss": 0.0596, "step": 6661 }, { "epoch": 99.43, "learning_rate": 0.00011901754385964912, "loss": 0.0012, "step": 6662 }, { "epoch": 99.45, "learning_rate": 0.00011898245614035087, "loss": 0.001, "step": 6663 }, { "epoch": 99.46, "learning_rate": 0.00011894736842105263, "loss": 0.0005, "step": 6664 }, { "epoch": 99.48, "learning_rate": 0.00011891228070175436, "loss": 0.0008, "step": 6665 }, { "epoch": 99.49, "learning_rate": 0.00011887719298245613, "loss": 0.0008, "step": 6666 }, { "epoch": 99.51, "learning_rate": 0.00011884210526315788, "loss": 0.0011, "step": 6667 }, { "epoch": 99.52, "learning_rate": 0.00011880701754385964, "loss": 0.0267, "step": 6668 }, { "epoch": 99.54, "learning_rate": 0.00011877192982456139, "loss": 0.2269, "step": 6669 }, { "epoch": 99.55, "learning_rate": 0.00011873684210526315, "loss": 0.0203, "step": 6670 }, { "epoch": 99.57, "learning_rate": 0.00011870175438596491, "loss": 0.0008, "step": 6671 }, { "epoch": 99.58, "learning_rate": 0.00011866666666666666, "loss": 0.0025, "step": 6672 }, { "epoch": 99.59, "learning_rate": 0.00011863157894736842, "loss": 0.001, "step": 6673 }, { "epoch": 99.61, "learning_rate": 0.00011859649122807016, "loss": 0.0006, "step": 6674 }, { "epoch": 99.62, "learning_rate": 0.00011856140350877192, "loss": 0.0052, "step": 6675 }, { "epoch": 99.64, "learning_rate": 0.00011852631578947367, "loss": 0.0008, "step": 6676 }, { "epoch": 99.65, "learning_rate": 0.00011849122807017543, "loss": 0.0006, "step": 6677 }, { "epoch": 99.67, "learning_rate": 0.00011845614035087718, "loss": 0.1664, "step": 6678 }, { "epoch": 99.68, "learning_rate": 0.00011842105263157894, "loss": 0.0015, "step": 6679 }, { "epoch": 99.7, "learning_rate": 0.00011838596491228069, "loss": 0.0011, "step": 6680 }, { "epoch": 99.71, "learning_rate": 0.00011835087719298244, "loss": 0.0046, "step": 6681 }, { "epoch": 99.73, "learning_rate": 0.0001183157894736842, "loss": 0.0009, "step": 6682 }, { "epoch": 99.74, "learning_rate": 0.00011828070175438595, "loss": 0.0322, "step": 6683 }, { "epoch": 99.76, "learning_rate": 0.00011824561403508771, "loss": 0.0009, "step": 6684 }, { "epoch": 99.77, "learning_rate": 0.00011821052631578946, "loss": 0.0012, "step": 6685 }, { "epoch": 99.79, "learning_rate": 0.00011817543859649122, "loss": 0.0545, "step": 6686 }, { "epoch": 99.8, "learning_rate": 0.00011814035087719297, "loss": 0.0012, "step": 6687 }, { "epoch": 99.82, "learning_rate": 0.00011810526315789474, "loss": 0.0016, "step": 6688 }, { "epoch": 99.83, "learning_rate": 0.00011807017543859647, "loss": 0.007, "step": 6689 }, { "epoch": 99.85, "learning_rate": 0.00011803508771929823, "loss": 0.0365, "step": 6690 }, { "epoch": 99.86, "learning_rate": 0.00011799999999999998, "loss": 0.1005, "step": 6691 }, { "epoch": 99.88, "learning_rate": 0.00011796491228070174, "loss": 0.001, "step": 6692 }, { "epoch": 99.89, "learning_rate": 0.00011792982456140349, "loss": 0.0126, "step": 6693 }, { "epoch": 99.91, "learning_rate": 0.00011789473684210525, "loss": 0.0256, "step": 6694 }, { "epoch": 99.92, "learning_rate": 0.00011785964912280702, "loss": 0.1166, "step": 6695 }, { "epoch": 99.94, "learning_rate": 0.00011782456140350877, "loss": 0.0006, "step": 6696 }, { "epoch": 99.95, "learning_rate": 0.00011778947368421053, "loss": 0.0009, "step": 6697 }, { "epoch": 99.97, "learning_rate": 0.00011775438596491226, "loss": 0.0018, "step": 6698 }, { "epoch": 99.98, "learning_rate": 0.00011771929824561403, "loss": 0.001, "step": 6699 }, { "epoch": 100.0, "learning_rate": 0.00011768421052631577, "loss": 0.0008, "step": 6700 }, { "epoch": 100.01, "learning_rate": 0.00011764912280701754, "loss": 0.0034, "step": 6701 }, { "epoch": 100.03, "learning_rate": 0.00011761403508771929, "loss": 0.0061, "step": 6702 }, { "epoch": 100.04, "learning_rate": 0.00011757894736842105, "loss": 0.0006, "step": 6703 }, { "epoch": 100.06, "learning_rate": 0.0001175438596491228, "loss": 0.0007, "step": 6704 }, { "epoch": 100.07, "learning_rate": 0.00011750877192982455, "loss": 0.0068, "step": 6705 }, { "epoch": 100.09, "learning_rate": 0.0001174736842105263, "loss": 0.1123, "step": 6706 }, { "epoch": 100.1, "learning_rate": 0.00011743859649122806, "loss": 0.0011, "step": 6707 }, { "epoch": 100.12, "learning_rate": 0.00011740350877192982, "loss": 0.0603, "step": 6708 }, { "epoch": 100.13, "learning_rate": 0.00011736842105263157, "loss": 0.0008, "step": 6709 }, { "epoch": 100.15, "learning_rate": 0.00011733333333333333, "loss": 0.0006, "step": 6710 }, { "epoch": 100.16, "learning_rate": 0.00011729824561403508, "loss": 0.0008, "step": 6711 }, { "epoch": 100.18, "learning_rate": 0.00011726315789473684, "loss": 0.2151, "step": 6712 }, { "epoch": 100.19, "learning_rate": 0.00011722807017543858, "loss": 0.0026, "step": 6713 }, { "epoch": 100.21, "learning_rate": 0.00011719298245614034, "loss": 0.0011, "step": 6714 }, { "epoch": 100.22, "learning_rate": 0.00011715789473684209, "loss": 0.0007, "step": 6715 }, { "epoch": 100.24, "learning_rate": 0.00011712280701754385, "loss": 0.0009, "step": 6716 }, { "epoch": 100.25, "learning_rate": 0.0001170877192982456, "loss": 0.0048, "step": 6717 }, { "epoch": 100.27, "learning_rate": 0.00011705263157894736, "loss": 0.1761, "step": 6718 }, { "epoch": 100.28, "learning_rate": 0.00011701754385964911, "loss": 0.0009, "step": 6719 }, { "epoch": 100.3, "learning_rate": 0.00011698245614035087, "loss": 0.0514, "step": 6720 }, { "epoch": 100.31, "learning_rate": 0.00011694736842105263, "loss": 0.063, "step": 6721 }, { "epoch": 100.33, "learning_rate": 0.00011691228070175437, "loss": 0.0164, "step": 6722 }, { "epoch": 100.34, "learning_rate": 0.00011687719298245613, "loss": 0.0022, "step": 6723 }, { "epoch": 100.36, "learning_rate": 0.00011684210526315788, "loss": 0.0007, "step": 6724 }, { "epoch": 100.37, "learning_rate": 0.00011680701754385964, "loss": 0.0019, "step": 6725 }, { "epoch": 100.39, "learning_rate": 0.00011677192982456139, "loss": 0.0429, "step": 6726 }, { "epoch": 100.4, "learning_rate": 0.00011673684210526315, "loss": 0.0109, "step": 6727 }, { "epoch": 100.42, "learning_rate": 0.00011670175438596489, "loss": 0.0123, "step": 6728 }, { "epoch": 100.43, "learning_rate": 0.00011666666666666665, "loss": 0.0008, "step": 6729 }, { "epoch": 100.45, "learning_rate": 0.0001166315789473684, "loss": 0.0429, "step": 6730 }, { "epoch": 100.46, "learning_rate": 0.00011659649122807016, "loss": 0.3201, "step": 6731 }, { "epoch": 100.48, "learning_rate": 0.00011656140350877193, "loss": 0.0026, "step": 6732 }, { "epoch": 100.49, "learning_rate": 0.00011652631578947367, "loss": 0.002, "step": 6733 }, { "epoch": 100.51, "learning_rate": 0.00011649122807017544, "loss": 0.017, "step": 6734 }, { "epoch": 100.52, "learning_rate": 0.00011645614035087719, "loss": 0.0023, "step": 6735 }, { "epoch": 100.54, "learning_rate": 0.00011642105263157895, "loss": 0.0273, "step": 6736 }, { "epoch": 100.55, "learning_rate": 0.00011638596491228068, "loss": 0.0007, "step": 6737 }, { "epoch": 100.57, "learning_rate": 0.00011635087719298245, "loss": 0.0015, "step": 6738 }, { "epoch": 100.58, "learning_rate": 0.0001163157894736842, "loss": 0.0036, "step": 6739 }, { "epoch": 100.59, "learning_rate": 0.00011628070175438596, "loss": 0.0009, "step": 6740 }, { "epoch": 100.61, "learning_rate": 0.0001162456140350877, "loss": 0.0008, "step": 6741 }, { "epoch": 100.62, "learning_rate": 0.00011621052631578947, "loss": 0.0743, "step": 6742 }, { "epoch": 100.64, "learning_rate": 0.00011617543859649122, "loss": 0.0029, "step": 6743 }, { "epoch": 100.65, "learning_rate": 0.00011614035087719298, "loss": 0.0018, "step": 6744 }, { "epoch": 100.67, "learning_rate": 0.00011610526315789474, "loss": 0.0005, "step": 6745 }, { "epoch": 100.68, "learning_rate": 0.00011607017543859648, "loss": 0.0006, "step": 6746 }, { "epoch": 100.7, "learning_rate": 0.00011603508771929824, "loss": 0.0709, "step": 6747 }, { "epoch": 100.71, "learning_rate": 0.00011599999999999999, "loss": 0.0006, "step": 6748 }, { "epoch": 100.73, "learning_rate": 0.00011596491228070175, "loss": 0.135, "step": 6749 }, { "epoch": 100.74, "learning_rate": 0.0001159298245614035, "loss": 0.0049, "step": 6750 }, { "epoch": 100.76, "learning_rate": 0.00011589473684210526, "loss": 0.0007, "step": 6751 }, { "epoch": 100.77, "learning_rate": 0.000115859649122807, "loss": 0.0038, "step": 6752 }, { "epoch": 100.79, "learning_rate": 0.00011582456140350876, "loss": 0.0005, "step": 6753 }, { "epoch": 100.8, "learning_rate": 0.00011578947368421051, "loss": 0.0006, "step": 6754 }, { "epoch": 100.82, "learning_rate": 0.00011575438596491227, "loss": 0.0157, "step": 6755 }, { "epoch": 100.83, "learning_rate": 0.00011571929824561402, "loss": 0.0022, "step": 6756 }, { "epoch": 100.85, "learning_rate": 0.00011568421052631578, "loss": 0.0006, "step": 6757 }, { "epoch": 100.86, "learning_rate": 0.00011564912280701754, "loss": 0.2283, "step": 6758 }, { "epoch": 100.88, "learning_rate": 0.00011561403508771929, "loss": 0.0035, "step": 6759 }, { "epoch": 100.89, "learning_rate": 0.00011557894736842105, "loss": 0.001, "step": 6760 }, { "epoch": 100.91, "learning_rate": 0.00011554385964912279, "loss": 0.0009, "step": 6761 }, { "epoch": 100.92, "learning_rate": 0.00011550877192982455, "loss": 0.0014, "step": 6762 }, { "epoch": 100.94, "learning_rate": 0.0001154736842105263, "loss": 0.0028, "step": 6763 }, { "epoch": 100.95, "learning_rate": 0.00011543859649122806, "loss": 0.0009, "step": 6764 }, { "epoch": 100.97, "learning_rate": 0.00011540350877192981, "loss": 0.0024, "step": 6765 }, { "epoch": 100.98, "learning_rate": 0.00011536842105263157, "loss": 0.0009, "step": 6766 }, { "epoch": 101.0, "learning_rate": 0.00011533333333333332, "loss": 0.0269, "step": 6767 }, { "epoch": 101.01, "learning_rate": 0.00011529824561403508, "loss": 0.0018, "step": 6768 }, { "epoch": 101.03, "learning_rate": 0.00011526315789473682, "loss": 0.002, "step": 6769 }, { "epoch": 101.04, "learning_rate": 0.00011522807017543858, "loss": 0.1631, "step": 6770 }, { "epoch": 101.06, "learning_rate": 0.00011519298245614034, "loss": 0.0057, "step": 6771 }, { "epoch": 101.07, "learning_rate": 0.0001151578947368421, "loss": 0.0009, "step": 6772 }, { "epoch": 101.09, "learning_rate": 0.00011512280701754386, "loss": 0.1869, "step": 6773 }, { "epoch": 101.1, "learning_rate": 0.0001150877192982456, "loss": 0.0018, "step": 6774 }, { "epoch": 101.12, "learning_rate": 0.00011505263157894737, "loss": 0.0007, "step": 6775 }, { "epoch": 101.13, "learning_rate": 0.0001150175438596491, "loss": 0.0008, "step": 6776 }, { "epoch": 101.15, "learning_rate": 0.00011498245614035086, "loss": 0.0088, "step": 6777 }, { "epoch": 101.16, "learning_rate": 0.00011494736842105261, "loss": 0.0016, "step": 6778 }, { "epoch": 101.18, "learning_rate": 0.00011491228070175438, "loss": 0.0006, "step": 6779 }, { "epoch": 101.19, "learning_rate": 0.00011487719298245612, "loss": 0.0011, "step": 6780 }, { "epoch": 101.21, "learning_rate": 0.00011484210526315789, "loss": 0.0407, "step": 6781 }, { "epoch": 101.22, "learning_rate": 0.00011480701754385965, "loss": 0.0213, "step": 6782 }, { "epoch": 101.24, "learning_rate": 0.0001147719298245614, "loss": 0.0024, "step": 6783 }, { "epoch": 101.25, "learning_rate": 0.00011473684210526316, "loss": 0.0057, "step": 6784 }, { "epoch": 101.27, "learning_rate": 0.0001147017543859649, "loss": 0.0007, "step": 6785 }, { "epoch": 101.28, "learning_rate": 0.00011466666666666666, "loss": 0.0972, "step": 6786 }, { "epoch": 101.3, "learning_rate": 0.0001146315789473684, "loss": 0.1075, "step": 6787 }, { "epoch": 101.31, "learning_rate": 0.00011459649122807017, "loss": 0.0015, "step": 6788 }, { "epoch": 101.33, "learning_rate": 0.00011456140350877192, "loss": 0.0007, "step": 6789 }, { "epoch": 101.34, "learning_rate": 0.00011452631578947368, "loss": 0.0006, "step": 6790 }, { "epoch": 101.36, "learning_rate": 0.00011449122807017543, "loss": 0.0019, "step": 6791 }, { "epoch": 101.37, "learning_rate": 0.00011445614035087719, "loss": 0.0008, "step": 6792 }, { "epoch": 101.39, "learning_rate": 0.00011442105263157893, "loss": 0.2029, "step": 6793 }, { "epoch": 101.4, "learning_rate": 0.00011438596491228069, "loss": 0.0061, "step": 6794 }, { "epoch": 101.42, "learning_rate": 0.00011435087719298245, "loss": 0.0006, "step": 6795 }, { "epoch": 101.43, "learning_rate": 0.0001143157894736842, "loss": 0.0015, "step": 6796 }, { "epoch": 101.45, "learning_rate": 0.00011428070175438596, "loss": 0.0066, "step": 6797 }, { "epoch": 101.46, "learning_rate": 0.00011424561403508771, "loss": 0.0121, "step": 6798 }, { "epoch": 101.48, "learning_rate": 0.00011421052631578947, "loss": 0.006, "step": 6799 }, { "epoch": 101.49, "learning_rate": 0.00011417543859649121, "loss": 0.1655, "step": 6800 }, { "epoch": 101.49, "eval_accuracy": 0.8504650024473813, "eval_f1": 0.8521261710057196, "eval_loss": 0.6953144073486328, "eval_runtime": 343.8957, "eval_samples_per_second": 11.882, "eval_steps_per_second": 0.744, "step": 6800 }, { "epoch": 101.51, "learning_rate": 0.00011414035087719297, "loss": 0.004, "step": 6801 }, { "epoch": 101.52, "learning_rate": 0.00011410526315789472, "loss": 0.1086, "step": 6802 }, { "epoch": 101.54, "learning_rate": 0.00011407017543859648, "loss": 0.0871, "step": 6803 }, { "epoch": 101.55, "learning_rate": 0.00011403508771929823, "loss": 0.071, "step": 6804 }, { "epoch": 101.57, "learning_rate": 0.00011399999999999999, "loss": 0.075, "step": 6805 }, { "epoch": 101.58, "learning_rate": 0.00011396491228070174, "loss": 0.0009, "step": 6806 }, { "epoch": 101.59, "learning_rate": 0.0001139298245614035, "loss": 0.0031, "step": 6807 }, { "epoch": 101.61, "learning_rate": 0.00011389473684210527, "loss": 0.0009, "step": 6808 }, { "epoch": 101.62, "learning_rate": 0.000113859649122807, "loss": 0.0013, "step": 6809 }, { "epoch": 101.64, "learning_rate": 0.00011382456140350876, "loss": 0.0874, "step": 6810 }, { "epoch": 101.65, "learning_rate": 0.00011378947368421051, "loss": 0.0005, "step": 6811 }, { "epoch": 101.67, "learning_rate": 0.00011375438596491227, "loss": 0.02, "step": 6812 }, { "epoch": 101.68, "learning_rate": 0.00011371929824561402, "loss": 0.0116, "step": 6813 }, { "epoch": 101.7, "learning_rate": 0.00011368421052631579, "loss": 0.001, "step": 6814 }, { "epoch": 101.71, "learning_rate": 0.00011364912280701753, "loss": 0.0017, "step": 6815 }, { "epoch": 101.73, "learning_rate": 0.0001136140350877193, "loss": 0.0016, "step": 6816 }, { "epoch": 101.74, "learning_rate": 0.00011357894736842103, "loss": 0.0042, "step": 6817 }, { "epoch": 101.76, "learning_rate": 0.0001135438596491228, "loss": 0.0054, "step": 6818 }, { "epoch": 101.77, "learning_rate": 0.00011350877192982454, "loss": 0.0103, "step": 6819 }, { "epoch": 101.79, "learning_rate": 0.0001134736842105263, "loss": 0.0009, "step": 6820 }, { "epoch": 101.8, "learning_rate": 0.00011343859649122807, "loss": 0.0088, "step": 6821 }, { "epoch": 101.82, "learning_rate": 0.00011340350877192982, "loss": 0.0007, "step": 6822 }, { "epoch": 101.83, "learning_rate": 0.00011336842105263158, "loss": 0.0122, "step": 6823 }, { "epoch": 101.85, "learning_rate": 0.00011333333333333331, "loss": 0.0046, "step": 6824 }, { "epoch": 101.86, "learning_rate": 0.00011329824561403508, "loss": 0.0005, "step": 6825 }, { "epoch": 101.88, "learning_rate": 0.00011326315789473683, "loss": 0.001, "step": 6826 }, { "epoch": 101.89, "learning_rate": 0.00011322807017543859, "loss": 0.0014, "step": 6827 }, { "epoch": 101.91, "learning_rate": 0.00011319298245614034, "loss": 0.0048, "step": 6828 }, { "epoch": 101.92, "learning_rate": 0.0001131578947368421, "loss": 0.0039, "step": 6829 }, { "epoch": 101.94, "learning_rate": 0.00011312280701754385, "loss": 0.0007, "step": 6830 }, { "epoch": 101.95, "learning_rate": 0.00011308771929824561, "loss": 0.1849, "step": 6831 }, { "epoch": 101.97, "learning_rate": 0.00011305263157894735, "loss": 0.0843, "step": 6832 }, { "epoch": 101.98, "learning_rate": 0.00011301754385964911, "loss": 0.0008, "step": 6833 }, { "epoch": 102.0, "learning_rate": 0.00011298245614035087, "loss": 0.0574, "step": 6834 }, { "epoch": 102.01, "learning_rate": 0.00011294736842105262, "loss": 0.0702, "step": 6835 }, { "epoch": 102.03, "learning_rate": 0.00011291228070175438, "loss": 0.0056, "step": 6836 }, { "epoch": 102.04, "learning_rate": 0.00011287719298245613, "loss": 0.0005, "step": 6837 }, { "epoch": 102.06, "learning_rate": 0.00011284210526315789, "loss": 0.0851, "step": 6838 }, { "epoch": 102.07, "learning_rate": 0.00011280701754385964, "loss": 0.0006, "step": 6839 }, { "epoch": 102.09, "learning_rate": 0.0001127719298245614, "loss": 0.0006, "step": 6840 }, { "epoch": 102.1, "learning_rate": 0.00011273684210526314, "loss": 0.0008, "step": 6841 }, { "epoch": 102.12, "learning_rate": 0.0001127017543859649, "loss": 0.0299, "step": 6842 }, { "epoch": 102.13, "learning_rate": 0.00011266666666666665, "loss": 0.0005, "step": 6843 }, { "epoch": 102.15, "learning_rate": 0.00011263157894736841, "loss": 0.0631, "step": 6844 }, { "epoch": 102.16, "learning_rate": 0.00011259649122807017, "loss": 0.0007, "step": 6845 }, { "epoch": 102.18, "learning_rate": 0.00011256140350877192, "loss": 0.0005, "step": 6846 }, { "epoch": 102.19, "learning_rate": 0.00011252631578947369, "loss": 0.0008, "step": 6847 }, { "epoch": 102.21, "learning_rate": 0.00011249122807017542, "loss": 0.0005, "step": 6848 }, { "epoch": 102.22, "learning_rate": 0.00011245614035087718, "loss": 0.0006, "step": 6849 }, { "epoch": 102.24, "learning_rate": 0.00011242105263157893, "loss": 0.0006, "step": 6850 }, { "epoch": 102.25, "learning_rate": 0.0001123859649122807, "loss": 0.0006, "step": 6851 }, { "epoch": 102.27, "learning_rate": 0.00011235087719298244, "loss": 0.0018, "step": 6852 }, { "epoch": 102.28, "learning_rate": 0.0001123157894736842, "loss": 0.0006, "step": 6853 }, { "epoch": 102.3, "learning_rate": 0.00011228070175438595, "loss": 0.0616, "step": 6854 }, { "epoch": 102.31, "learning_rate": 0.00011224561403508772, "loss": 0.0104, "step": 6855 }, { "epoch": 102.33, "learning_rate": 0.00011221052631578945, "loss": 0.0321, "step": 6856 }, { "epoch": 102.34, "learning_rate": 0.00011217543859649121, "loss": 0.0013, "step": 6857 }, { "epoch": 102.36, "learning_rate": 0.00011214035087719298, "loss": 0.0983, "step": 6858 }, { "epoch": 102.37, "learning_rate": 0.00011210526315789472, "loss": 0.0005, "step": 6859 }, { "epoch": 102.39, "learning_rate": 0.00011207017543859649, "loss": 0.0901, "step": 6860 }, { "epoch": 102.4, "learning_rate": 0.00011203508771929824, "loss": 0.078, "step": 6861 }, { "epoch": 102.42, "learning_rate": 0.000112, "loss": 0.0034, "step": 6862 }, { "epoch": 102.43, "learning_rate": 0.00011196491228070175, "loss": 0.0006, "step": 6863 }, { "epoch": 102.45, "learning_rate": 0.00011192982456140351, "loss": 0.0079, "step": 6864 }, { "epoch": 102.46, "learning_rate": 0.00011189473684210524, "loss": 0.0381, "step": 6865 }, { "epoch": 102.48, "learning_rate": 0.00011185964912280701, "loss": 0.0005, "step": 6866 }, { "epoch": 102.49, "learning_rate": 0.00011182456140350876, "loss": 0.0005, "step": 6867 }, { "epoch": 102.51, "learning_rate": 0.00011178947368421052, "loss": 0.0006, "step": 6868 }, { "epoch": 102.52, "learning_rate": 0.00011175438596491227, "loss": 0.0259, "step": 6869 }, { "epoch": 102.54, "learning_rate": 0.00011171929824561403, "loss": 0.0227, "step": 6870 }, { "epoch": 102.55, "learning_rate": 0.00011168421052631579, "loss": 0.0005, "step": 6871 }, { "epoch": 102.57, "learning_rate": 0.00011164912280701753, "loss": 0.0165, "step": 6872 }, { "epoch": 102.58, "learning_rate": 0.00011161403508771929, "loss": 0.0009, "step": 6873 }, { "epoch": 102.59, "learning_rate": 0.00011157894736842104, "loss": 0.0006, "step": 6874 }, { "epoch": 102.61, "learning_rate": 0.0001115438596491228, "loss": 0.0014, "step": 6875 }, { "epoch": 102.62, "learning_rate": 0.00011150877192982455, "loss": 0.2293, "step": 6876 }, { "epoch": 102.64, "learning_rate": 0.00011147368421052631, "loss": 0.0006, "step": 6877 }, { "epoch": 102.65, "learning_rate": 0.00011143859649122806, "loss": 0.0031, "step": 6878 }, { "epoch": 102.67, "learning_rate": 0.00011140350877192982, "loss": 0.0006, "step": 6879 }, { "epoch": 102.68, "learning_rate": 0.00011136842105263156, "loss": 0.0005, "step": 6880 }, { "epoch": 102.7, "learning_rate": 0.00011133333333333332, "loss": 0.0006, "step": 6881 }, { "epoch": 102.71, "learning_rate": 0.00011129824561403507, "loss": 0.0009, "step": 6882 }, { "epoch": 102.73, "learning_rate": 0.00011126315789473683, "loss": 0.0055, "step": 6883 }, { "epoch": 102.74, "learning_rate": 0.0001112280701754386, "loss": 0.029, "step": 6884 }, { "epoch": 102.76, "learning_rate": 0.00011119298245614034, "loss": 0.0482, "step": 6885 }, { "epoch": 102.77, "learning_rate": 0.0001111578947368421, "loss": 0.0178, "step": 6886 }, { "epoch": 102.79, "learning_rate": 0.00011112280701754385, "loss": 0.0007, "step": 6887 }, { "epoch": 102.8, "learning_rate": 0.0001110877192982456, "loss": 0.0005, "step": 6888 }, { "epoch": 102.82, "learning_rate": 0.00011105263157894735, "loss": 0.0006, "step": 6889 }, { "epoch": 102.83, "learning_rate": 0.00011101754385964911, "loss": 0.0014, "step": 6890 }, { "epoch": 102.85, "learning_rate": 0.00011098245614035086, "loss": 0.0006, "step": 6891 }, { "epoch": 102.86, "learning_rate": 0.00011094736842105262, "loss": 0.0005, "step": 6892 }, { "epoch": 102.88, "learning_rate": 0.00011091228070175437, "loss": 0.0545, "step": 6893 }, { "epoch": 102.89, "learning_rate": 0.00011087719298245614, "loss": 0.018, "step": 6894 }, { "epoch": 102.91, "learning_rate": 0.0001108421052631579, "loss": 0.0625, "step": 6895 }, { "epoch": 102.92, "learning_rate": 0.00011080701754385963, "loss": 0.0008, "step": 6896 }, { "epoch": 102.94, "learning_rate": 0.0001107719298245614, "loss": 0.0009, "step": 6897 }, { "epoch": 102.95, "learning_rate": 0.00011073684210526314, "loss": 0.0486, "step": 6898 }, { "epoch": 102.97, "learning_rate": 0.0001107017543859649, "loss": 0.0005, "step": 6899 }, { "epoch": 102.98, "learning_rate": 0.00011066666666666666, "loss": 0.0007, "step": 6900 }, { "epoch": 103.0, "learning_rate": 0.00011063157894736842, "loss": 0.0005, "step": 6901 }, { "epoch": 103.01, "learning_rate": 0.00011059649122807017, "loss": 0.0022, "step": 6902 }, { "epoch": 103.03, "learning_rate": 0.00011056140350877193, "loss": 0.0014, "step": 6903 }, { "epoch": 103.04, "learning_rate": 0.00011052631578947366, "loss": 0.001, "step": 6904 }, { "epoch": 103.06, "learning_rate": 0.00011049122807017543, "loss": 0.0007, "step": 6905 }, { "epoch": 103.07, "learning_rate": 0.00011045614035087717, "loss": 0.0019, "step": 6906 }, { "epoch": 103.09, "learning_rate": 0.00011042105263157894, "loss": 0.0005, "step": 6907 }, { "epoch": 103.1, "learning_rate": 0.0001103859649122807, "loss": 0.0007, "step": 6908 }, { "epoch": 103.12, "learning_rate": 0.00011035087719298245, "loss": 0.0418, "step": 6909 }, { "epoch": 103.13, "learning_rate": 0.00011031578947368421, "loss": 0.0005, "step": 6910 }, { "epoch": 103.15, "learning_rate": 0.00011028070175438596, "loss": 0.0019, "step": 6911 }, { "epoch": 103.16, "learning_rate": 0.00011024561403508771, "loss": 0.0004, "step": 6912 }, { "epoch": 103.18, "learning_rate": 0.00011021052631578946, "loss": 0.0004, "step": 6913 }, { "epoch": 103.19, "learning_rate": 0.00011017543859649122, "loss": 0.0004, "step": 6914 }, { "epoch": 103.21, "learning_rate": 0.00011014035087719297, "loss": 0.0005, "step": 6915 }, { "epoch": 103.22, "learning_rate": 0.00011010526315789473, "loss": 0.0005, "step": 6916 }, { "epoch": 103.24, "learning_rate": 0.00011007017543859648, "loss": 0.0011, "step": 6917 }, { "epoch": 103.25, "learning_rate": 0.00011003508771929824, "loss": 0.0006, "step": 6918 }, { "epoch": 103.27, "learning_rate": 0.00010999999999999998, "loss": 0.0006, "step": 6919 }, { "epoch": 103.28, "learning_rate": 0.00010996491228070174, "loss": 0.0004, "step": 6920 }, { "epoch": 103.3, "learning_rate": 0.0001099298245614035, "loss": 0.0006, "step": 6921 }, { "epoch": 103.31, "learning_rate": 0.00010989473684210525, "loss": 0.0006, "step": 6922 }, { "epoch": 103.33, "learning_rate": 0.00010985964912280701, "loss": 0.0007, "step": 6923 }, { "epoch": 103.34, "learning_rate": 0.00010982456140350876, "loss": 0.008, "step": 6924 }, { "epoch": 103.36, "learning_rate": 0.00010978947368421052, "loss": 0.0014, "step": 6925 }, { "epoch": 103.37, "learning_rate": 0.00010975438596491227, "loss": 0.0016, "step": 6926 }, { "epoch": 103.39, "learning_rate": 0.00010971929824561403, "loss": 0.0005, "step": 6927 }, { "epoch": 103.4, "learning_rate": 0.00010968421052631577, "loss": 0.0005, "step": 6928 }, { "epoch": 103.42, "learning_rate": 0.00010964912280701753, "loss": 0.0724, "step": 6929 }, { "epoch": 103.43, "learning_rate": 0.00010961403508771928, "loss": 0.0007, "step": 6930 }, { "epoch": 103.45, "learning_rate": 0.00010957894736842104, "loss": 0.0006, "step": 6931 }, { "epoch": 103.46, "learning_rate": 0.00010954385964912279, "loss": 0.0149, "step": 6932 }, { "epoch": 103.48, "learning_rate": 0.00010950877192982455, "loss": 0.0024, "step": 6933 }, { "epoch": 103.49, "learning_rate": 0.00010947368421052632, "loss": 0.1264, "step": 6934 }, { "epoch": 103.51, "learning_rate": 0.00010943859649122805, "loss": 0.0011, "step": 6935 }, { "epoch": 103.52, "learning_rate": 0.00010940350877192981, "loss": 0.0013, "step": 6936 }, { "epoch": 103.54, "learning_rate": 0.00010936842105263156, "loss": 0.0004, "step": 6937 }, { "epoch": 103.55, "learning_rate": 0.00010933333333333333, "loss": 0.1629, "step": 6938 }, { "epoch": 103.57, "learning_rate": 0.00010929824561403507, "loss": 0.0004, "step": 6939 }, { "epoch": 103.58, "learning_rate": 0.00010926315789473684, "loss": 0.0007, "step": 6940 }, { "epoch": 103.59, "learning_rate": 0.00010922807017543859, "loss": 0.0009, "step": 6941 }, { "epoch": 103.61, "learning_rate": 0.00010919298245614035, "loss": 0.0004, "step": 6942 }, { "epoch": 103.62, "learning_rate": 0.00010915789473684208, "loss": 0.0004, "step": 6943 }, { "epoch": 103.64, "learning_rate": 0.00010912280701754385, "loss": 0.0004, "step": 6944 }, { "epoch": 103.65, "learning_rate": 0.00010908771929824561, "loss": 0.0004, "step": 6945 }, { "epoch": 103.67, "learning_rate": 0.00010905263157894736, "loss": 0.0215, "step": 6946 }, { "epoch": 103.68, "learning_rate": 0.00010901754385964912, "loss": 0.0005, "step": 6947 }, { "epoch": 103.7, "learning_rate": 0.00010898245614035087, "loss": 0.0006, "step": 6948 }, { "epoch": 103.71, "learning_rate": 0.00010894736842105263, "loss": 0.0005, "step": 6949 }, { "epoch": 103.73, "learning_rate": 0.00010891228070175438, "loss": 0.0006, "step": 6950 }, { "epoch": 103.74, "learning_rate": 0.00010887719298245614, "loss": 0.0006, "step": 6951 }, { "epoch": 103.76, "learning_rate": 0.00010884210526315788, "loss": 0.0238, "step": 6952 }, { "epoch": 103.77, "learning_rate": 0.00010880701754385964, "loss": 0.0102, "step": 6953 }, { "epoch": 103.79, "learning_rate": 0.00010877192982456139, "loss": 0.0005, "step": 6954 }, { "epoch": 103.8, "learning_rate": 0.00010873684210526315, "loss": 0.0006, "step": 6955 }, { "epoch": 103.82, "learning_rate": 0.0001087017543859649, "loss": 0.0011, "step": 6956 }, { "epoch": 103.83, "learning_rate": 0.00010866666666666666, "loss": 0.0007, "step": 6957 }, { "epoch": 103.85, "learning_rate": 0.00010863157894736842, "loss": 0.0612, "step": 6958 }, { "epoch": 103.86, "learning_rate": 0.00010859649122807016, "loss": 0.0005, "step": 6959 }, { "epoch": 103.88, "learning_rate": 0.00010856140350877192, "loss": 0.0006, "step": 6960 }, { "epoch": 103.89, "learning_rate": 0.00010852631578947367, "loss": 0.0005, "step": 6961 }, { "epoch": 103.91, "learning_rate": 0.00010849122807017543, "loss": 0.0165, "step": 6962 }, { "epoch": 103.92, "learning_rate": 0.00010845614035087718, "loss": 0.0005, "step": 6963 }, { "epoch": 103.94, "learning_rate": 0.00010842105263157894, "loss": 0.0006, "step": 6964 }, { "epoch": 103.95, "learning_rate": 0.00010838596491228069, "loss": 0.0004, "step": 6965 }, { "epoch": 103.97, "learning_rate": 0.00010835087719298245, "loss": 0.0009, "step": 6966 }, { "epoch": 103.98, "learning_rate": 0.00010831578947368419, "loss": 0.0008, "step": 6967 }, { "epoch": 104.0, "learning_rate": 0.00010828070175438595, "loss": 0.0005, "step": 6968 }, { "epoch": 104.01, "learning_rate": 0.0001082456140350877, "loss": 0.0032, "step": 6969 }, { "epoch": 104.03, "learning_rate": 0.00010821052631578946, "loss": 0.0005, "step": 6970 }, { "epoch": 104.04, "learning_rate": 0.00010817543859649122, "loss": 0.0006, "step": 6971 }, { "epoch": 104.06, "learning_rate": 0.00010814035087719297, "loss": 0.0052, "step": 6972 }, { "epoch": 104.07, "learning_rate": 0.00010810526315789474, "loss": 0.0004, "step": 6973 }, { "epoch": 104.09, "learning_rate": 0.00010807017543859648, "loss": 0.0004, "step": 6974 }, { "epoch": 104.1, "learning_rate": 0.00010803508771929825, "loss": 0.0005, "step": 6975 }, { "epoch": 104.12, "learning_rate": 0.00010799999999999998, "loss": 0.0102, "step": 6976 }, { "epoch": 104.13, "learning_rate": 0.00010796491228070174, "loss": 0.0004, "step": 6977 }, { "epoch": 104.15, "learning_rate": 0.0001079298245614035, "loss": 0.0003, "step": 6978 }, { "epoch": 104.16, "learning_rate": 0.00010789473684210526, "loss": 0.0009, "step": 6979 }, { "epoch": 104.18, "learning_rate": 0.000107859649122807, "loss": 0.0116, "step": 6980 }, { "epoch": 104.19, "learning_rate": 0.00010782456140350877, "loss": 0.0003, "step": 6981 }, { "epoch": 104.21, "learning_rate": 0.0001077894736842105, "loss": 0.0005, "step": 6982 }, { "epoch": 104.22, "learning_rate": 0.00010775438596491226, "loss": 0.0074, "step": 6983 }, { "epoch": 104.24, "learning_rate": 0.00010771929824561403, "loss": 0.0005, "step": 6984 }, { "epoch": 104.25, "learning_rate": 0.00010768421052631578, "loss": 0.0004, "step": 6985 }, { "epoch": 104.27, "learning_rate": 0.00010764912280701754, "loss": 0.0471, "step": 6986 }, { "epoch": 104.28, "learning_rate": 0.00010761403508771929, "loss": 0.0005, "step": 6987 }, { "epoch": 104.3, "learning_rate": 0.00010757894736842105, "loss": 0.0229, "step": 6988 }, { "epoch": 104.31, "learning_rate": 0.0001075438596491228, "loss": 0.0003, "step": 6989 }, { "epoch": 104.33, "learning_rate": 0.00010750877192982456, "loss": 0.0005, "step": 6990 }, { "epoch": 104.34, "learning_rate": 0.0001074736842105263, "loss": 0.0004, "step": 6991 }, { "epoch": 104.36, "learning_rate": 0.00010743859649122806, "loss": 0.0005, "step": 6992 }, { "epoch": 104.37, "learning_rate": 0.0001074035087719298, "loss": 0.1992, "step": 6993 }, { "epoch": 104.39, "learning_rate": 0.00010736842105263157, "loss": 0.0003, "step": 6994 }, { "epoch": 104.4, "learning_rate": 0.00010733333333333332, "loss": 0.0005, "step": 6995 }, { "epoch": 104.42, "learning_rate": 0.00010729824561403508, "loss": 0.0018, "step": 6996 }, { "epoch": 104.43, "learning_rate": 0.00010726315789473684, "loss": 0.0399, "step": 6997 }, { "epoch": 104.45, "learning_rate": 0.00010722807017543859, "loss": 0.0004, "step": 6998 }, { "epoch": 104.46, "learning_rate": 0.00010719298245614035, "loss": 0.1261, "step": 6999 }, { "epoch": 104.48, "learning_rate": 0.00010715789473684209, "loss": 0.01, "step": 7000 }, { "epoch": 104.48, "eval_accuracy": 0.8673519334312286, "eval_f1": 0.866532551783554, "eval_loss": 0.7149230241775513, "eval_runtime": 346.4155, "eval_samples_per_second": 11.795, "eval_steps_per_second": 0.739, "step": 7000 }, { "epoch": 104.49, "learning_rate": 0.00010712280701754385, "loss": 0.0006, "step": 7001 }, { "epoch": 104.51, "learning_rate": 0.0001070877192982456, "loss": 0.0004, "step": 7002 }, { "epoch": 104.52, "learning_rate": 0.00010705263157894736, "loss": 0.0005, "step": 7003 }, { "epoch": 104.54, "learning_rate": 0.00010701754385964911, "loss": 0.0046, "step": 7004 }, { "epoch": 104.55, "learning_rate": 0.00010698245614035087, "loss": 0.0004, "step": 7005 }, { "epoch": 104.57, "learning_rate": 0.00010694736842105261, "loss": 0.0004, "step": 7006 }, { "epoch": 104.58, "learning_rate": 0.00010691228070175437, "loss": 0.0004, "step": 7007 }, { "epoch": 104.59, "learning_rate": 0.00010687719298245613, "loss": 0.0015, "step": 7008 }, { "epoch": 104.61, "learning_rate": 0.00010684210526315788, "loss": 0.0003, "step": 7009 }, { "epoch": 104.62, "learning_rate": 0.00010680701754385964, "loss": 0.0004, "step": 7010 }, { "epoch": 104.64, "learning_rate": 0.00010677192982456139, "loss": 0.0011, "step": 7011 }, { "epoch": 104.65, "learning_rate": 0.00010673684210526316, "loss": 0.0004, "step": 7012 }, { "epoch": 104.67, "learning_rate": 0.0001067017543859649, "loss": 0.0004, "step": 7013 }, { "epoch": 104.68, "learning_rate": 0.00010666666666666667, "loss": 0.0218, "step": 7014 }, { "epoch": 104.7, "learning_rate": 0.0001066315789473684, "loss": 0.0021, "step": 7015 }, { "epoch": 104.71, "learning_rate": 0.00010659649122807016, "loss": 0.0008, "step": 7016 }, { "epoch": 104.73, "learning_rate": 0.00010656140350877191, "loss": 0.0005, "step": 7017 }, { "epoch": 104.74, "learning_rate": 0.00010652631578947368, "loss": 0.0005, "step": 7018 }, { "epoch": 104.76, "learning_rate": 0.00010649122807017542, "loss": 0.0005, "step": 7019 }, { "epoch": 104.77, "learning_rate": 0.00010645614035087719, "loss": 0.0004, "step": 7020 }, { "epoch": 104.79, "learning_rate": 0.00010642105263157895, "loss": 0.0005, "step": 7021 }, { "epoch": 104.8, "learning_rate": 0.0001063859649122807, "loss": 0.0157, "step": 7022 }, { "epoch": 104.82, "learning_rate": 0.00010635087719298246, "loss": 0.0004, "step": 7023 }, { "epoch": 104.83, "learning_rate": 0.0001063157894736842, "loss": 0.0014, "step": 7024 }, { "epoch": 104.85, "learning_rate": 0.00010628070175438596, "loss": 0.0004, "step": 7025 }, { "epoch": 104.86, "learning_rate": 0.0001062456140350877, "loss": 0.0005, "step": 7026 }, { "epoch": 104.88, "learning_rate": 0.00010621052631578947, "loss": 0.0149, "step": 7027 }, { "epoch": 104.89, "learning_rate": 0.00010617543859649122, "loss": 0.062, "step": 7028 }, { "epoch": 104.91, "learning_rate": 0.00010614035087719298, "loss": 0.0003, "step": 7029 }, { "epoch": 104.92, "learning_rate": 0.00010610526315789471, "loss": 0.0006, "step": 7030 }, { "epoch": 104.94, "learning_rate": 0.00010607017543859648, "loss": 0.0006, "step": 7031 }, { "epoch": 104.95, "learning_rate": 0.00010603508771929823, "loss": 0.0004, "step": 7032 }, { "epoch": 104.97, "learning_rate": 0.00010599999999999999, "loss": 0.3033, "step": 7033 }, { "epoch": 104.98, "learning_rate": 0.00010596491228070175, "loss": 0.0003, "step": 7034 }, { "epoch": 105.0, "learning_rate": 0.0001059298245614035, "loss": 0.1008, "step": 7035 }, { "epoch": 105.01, "learning_rate": 0.00010589473684210526, "loss": 0.0013, "step": 7036 }, { "epoch": 105.03, "learning_rate": 0.00010585964912280701, "loss": 0.0004, "step": 7037 }, { "epoch": 105.04, "learning_rate": 0.00010582456140350877, "loss": 0.0014, "step": 7038 }, { "epoch": 105.06, "learning_rate": 0.00010578947368421051, "loss": 0.0006, "step": 7039 }, { "epoch": 105.07, "learning_rate": 0.00010575438596491227, "loss": 0.0003, "step": 7040 }, { "epoch": 105.09, "learning_rate": 0.00010571929824561402, "loss": 0.0016, "step": 7041 }, { "epoch": 105.1, "learning_rate": 0.00010568421052631578, "loss": 0.0015, "step": 7042 }, { "epoch": 105.12, "learning_rate": 0.00010564912280701753, "loss": 0.0004, "step": 7043 }, { "epoch": 105.13, "learning_rate": 0.00010561403508771929, "loss": 0.0005, "step": 7044 }, { "epoch": 105.15, "learning_rate": 0.00010557894736842104, "loss": 0.0004, "step": 7045 }, { "epoch": 105.16, "learning_rate": 0.0001055438596491228, "loss": 0.0015, "step": 7046 }, { "epoch": 105.18, "learning_rate": 0.00010550877192982457, "loss": 0.0004, "step": 7047 }, { "epoch": 105.19, "learning_rate": 0.0001054736842105263, "loss": 0.0322, "step": 7048 }, { "epoch": 105.21, "learning_rate": 0.00010543859649122806, "loss": 0.0088, "step": 7049 }, { "epoch": 105.22, "learning_rate": 0.00010540350877192981, "loss": 0.0003, "step": 7050 }, { "epoch": 105.24, "learning_rate": 0.00010536842105263157, "loss": 0.0005, "step": 7051 }, { "epoch": 105.25, "learning_rate": 0.00010533333333333332, "loss": 0.0082, "step": 7052 }, { "epoch": 105.27, "learning_rate": 0.00010529824561403509, "loss": 0.0004, "step": 7053 }, { "epoch": 105.28, "learning_rate": 0.00010526315789473682, "loss": 0.0005, "step": 7054 }, { "epoch": 105.3, "learning_rate": 0.00010522807017543858, "loss": 0.0004, "step": 7055 }, { "epoch": 105.31, "learning_rate": 0.00010519298245614033, "loss": 0.2876, "step": 7056 }, { "epoch": 105.33, "learning_rate": 0.0001051578947368421, "loss": 0.0004, "step": 7057 }, { "epoch": 105.34, "learning_rate": 0.00010512280701754386, "loss": 0.0005, "step": 7058 }, { "epoch": 105.36, "learning_rate": 0.0001050877192982456, "loss": 0.0005, "step": 7059 }, { "epoch": 105.37, "learning_rate": 0.00010505263157894737, "loss": 0.0408, "step": 7060 }, { "epoch": 105.39, "learning_rate": 0.00010501754385964912, "loss": 0.0003, "step": 7061 }, { "epoch": 105.4, "learning_rate": 0.00010498245614035088, "loss": 0.0004, "step": 7062 }, { "epoch": 105.42, "learning_rate": 0.00010494736842105261, "loss": 0.0004, "step": 7063 }, { "epoch": 105.43, "learning_rate": 0.00010491228070175438, "loss": 0.0342, "step": 7064 }, { "epoch": 105.45, "learning_rate": 0.00010487719298245613, "loss": 0.0008, "step": 7065 }, { "epoch": 105.46, "learning_rate": 0.00010484210526315789, "loss": 0.0004, "step": 7066 }, { "epoch": 105.48, "learning_rate": 0.00010480701754385964, "loss": 0.0004, "step": 7067 }, { "epoch": 105.49, "learning_rate": 0.0001047719298245614, "loss": 0.0004, "step": 7068 }, { "epoch": 105.51, "learning_rate": 0.00010473684210526315, "loss": 0.0005, "step": 7069 }, { "epoch": 105.52, "learning_rate": 0.00010470175438596491, "loss": 0.044, "step": 7070 }, { "epoch": 105.54, "learning_rate": 0.00010466666666666667, "loss": 0.0004, "step": 7071 }, { "epoch": 105.55, "learning_rate": 0.00010463157894736841, "loss": 0.0051, "step": 7072 }, { "epoch": 105.57, "learning_rate": 0.00010459649122807017, "loss": 0.0007, "step": 7073 }, { "epoch": 105.58, "learning_rate": 0.00010456140350877192, "loss": 0.0009, "step": 7074 }, { "epoch": 105.59, "learning_rate": 0.00010452631578947368, "loss": 0.0004, "step": 7075 }, { "epoch": 105.61, "learning_rate": 0.00010449122807017543, "loss": 0.0009, "step": 7076 }, { "epoch": 105.62, "learning_rate": 0.00010445614035087719, "loss": 0.0007, "step": 7077 }, { "epoch": 105.64, "learning_rate": 0.00010442105263157893, "loss": 0.0501, "step": 7078 }, { "epoch": 105.65, "learning_rate": 0.00010438596491228069, "loss": 0.1193, "step": 7079 }, { "epoch": 105.67, "learning_rate": 0.00010435087719298244, "loss": 0.0006, "step": 7080 }, { "epoch": 105.68, "learning_rate": 0.0001043157894736842, "loss": 0.0005, "step": 7081 }, { "epoch": 105.7, "learning_rate": 0.00010428070175438595, "loss": 0.0393, "step": 7082 }, { "epoch": 105.71, "learning_rate": 0.00010424561403508771, "loss": 0.0015, "step": 7083 }, { "epoch": 105.73, "learning_rate": 0.00010421052631578947, "loss": 0.0004, "step": 7084 }, { "epoch": 105.74, "learning_rate": 0.00010417543859649122, "loss": 0.0006, "step": 7085 }, { "epoch": 105.76, "learning_rate": 0.00010414035087719298, "loss": 0.0011, "step": 7086 }, { "epoch": 105.77, "learning_rate": 0.00010410526315789472, "loss": 0.0026, "step": 7087 }, { "epoch": 105.79, "learning_rate": 0.00010407017543859648, "loss": 0.0008, "step": 7088 }, { "epoch": 105.8, "learning_rate": 0.00010403508771929823, "loss": 0.0011, "step": 7089 }, { "epoch": 105.82, "learning_rate": 0.000104, "loss": 0.0729, "step": 7090 }, { "epoch": 105.83, "learning_rate": 0.00010396491228070174, "loss": 0.0147, "step": 7091 }, { "epoch": 105.85, "learning_rate": 0.0001039298245614035, "loss": 0.0004, "step": 7092 }, { "epoch": 105.86, "learning_rate": 0.00010389473684210525, "loss": 0.0007, "step": 7093 }, { "epoch": 105.88, "learning_rate": 0.00010385964912280702, "loss": 0.0004, "step": 7094 }, { "epoch": 105.89, "learning_rate": 0.00010382456140350875, "loss": 0.0019, "step": 7095 }, { "epoch": 105.91, "learning_rate": 0.00010378947368421051, "loss": 0.0011, "step": 7096 }, { "epoch": 105.92, "learning_rate": 0.00010375438596491228, "loss": 0.0003, "step": 7097 }, { "epoch": 105.94, "learning_rate": 0.00010371929824561402, "loss": 0.0016, "step": 7098 }, { "epoch": 105.95, "learning_rate": 0.00010368421052631579, "loss": 0.313, "step": 7099 }, { "epoch": 105.97, "learning_rate": 0.00010364912280701754, "loss": 0.0122, "step": 7100 }, { "epoch": 105.98, "learning_rate": 0.0001036140350877193, "loss": 0.0004, "step": 7101 }, { "epoch": 106.0, "learning_rate": 0.00010357894736842103, "loss": 0.0011, "step": 7102 }, { "epoch": 106.01, "learning_rate": 0.0001035438596491228, "loss": 0.0025, "step": 7103 }, { "epoch": 106.03, "learning_rate": 0.00010350877192982454, "loss": 0.0007, "step": 7104 }, { "epoch": 106.04, "learning_rate": 0.0001034736842105263, "loss": 0.0091, "step": 7105 }, { "epoch": 106.06, "learning_rate": 0.00010343859649122806, "loss": 0.0014, "step": 7106 }, { "epoch": 106.07, "learning_rate": 0.00010340350877192982, "loss": 0.0006, "step": 7107 }, { "epoch": 106.09, "learning_rate": 0.00010336842105263158, "loss": 0.0014, "step": 7108 }, { "epoch": 106.1, "learning_rate": 0.00010333333333333333, "loss": 0.0007, "step": 7109 }, { "epoch": 106.12, "learning_rate": 0.00010329824561403509, "loss": 0.0005, "step": 7110 }, { "epoch": 106.13, "learning_rate": 0.00010326315789473683, "loss": 0.0054, "step": 7111 }, { "epoch": 106.15, "learning_rate": 0.00010322807017543859, "loss": 0.0003, "step": 7112 }, { "epoch": 106.16, "learning_rate": 0.00010319298245614034, "loss": 0.0121, "step": 7113 }, { "epoch": 106.18, "learning_rate": 0.0001031578947368421, "loss": 0.0257, "step": 7114 }, { "epoch": 106.19, "learning_rate": 0.00010312280701754385, "loss": 0.0006, "step": 7115 }, { "epoch": 106.21, "learning_rate": 0.00010308771929824561, "loss": 0.0004, "step": 7116 }, { "epoch": 106.22, "learning_rate": 0.00010305263157894736, "loss": 0.0145, "step": 7117 }, { "epoch": 106.24, "learning_rate": 0.00010301754385964912, "loss": 0.0004, "step": 7118 }, { "epoch": 106.25, "learning_rate": 0.00010298245614035086, "loss": 0.0669, "step": 7119 }, { "epoch": 106.27, "learning_rate": 0.00010294736842105262, "loss": 0.0009, "step": 7120 }, { "epoch": 106.28, "learning_rate": 0.00010291228070175438, "loss": 0.0004, "step": 7121 }, { "epoch": 106.3, "learning_rate": 0.00010287719298245613, "loss": 0.0005, "step": 7122 }, { "epoch": 106.31, "learning_rate": 0.00010284210526315789, "loss": 0.0005, "step": 7123 }, { "epoch": 106.33, "learning_rate": 0.00010280701754385964, "loss": 0.0003, "step": 7124 }, { "epoch": 106.34, "learning_rate": 0.0001027719298245614, "loss": 0.0004, "step": 7125 }, { "epoch": 106.36, "learning_rate": 0.00010273684210526314, "loss": 0.0439, "step": 7126 }, { "epoch": 106.37, "learning_rate": 0.0001027017543859649, "loss": 0.0006, "step": 7127 }, { "epoch": 106.39, "learning_rate": 0.00010266666666666665, "loss": 0.1396, "step": 7128 }, { "epoch": 106.4, "learning_rate": 0.00010263157894736841, "loss": 0.0007, "step": 7129 }, { "epoch": 106.42, "learning_rate": 0.00010259649122807016, "loss": 0.0177, "step": 7130 }, { "epoch": 106.43, "learning_rate": 0.00010256140350877192, "loss": 0.0004, "step": 7131 }, { "epoch": 106.45, "learning_rate": 0.00010252631578947367, "loss": 0.0007, "step": 7132 }, { "epoch": 106.46, "learning_rate": 0.00010249122807017543, "loss": 0.0005, "step": 7133 }, { "epoch": 106.48, "learning_rate": 0.0001024561403508772, "loss": 0.0004, "step": 7134 }, { "epoch": 106.49, "learning_rate": 0.00010242105263157893, "loss": 0.1185, "step": 7135 }, { "epoch": 106.51, "learning_rate": 0.0001023859649122807, "loss": 0.0004, "step": 7136 }, { "epoch": 106.52, "learning_rate": 0.00010235087719298244, "loss": 0.0011, "step": 7137 }, { "epoch": 106.54, "learning_rate": 0.0001023157894736842, "loss": 0.0003, "step": 7138 }, { "epoch": 106.55, "learning_rate": 0.00010228070175438595, "loss": 0.0006, "step": 7139 }, { "epoch": 106.57, "learning_rate": 0.00010224561403508772, "loss": 0.0004, "step": 7140 }, { "epoch": 106.58, "learning_rate": 0.00010221052631578947, "loss": 0.0005, "step": 7141 }, { "epoch": 106.59, "learning_rate": 0.00010217543859649123, "loss": 0.0237, "step": 7142 }, { "epoch": 106.61, "learning_rate": 0.00010214035087719296, "loss": 0.0056, "step": 7143 }, { "epoch": 106.62, "learning_rate": 0.00010210526315789473, "loss": 0.0004, "step": 7144 }, { "epoch": 106.64, "learning_rate": 0.00010207017543859647, "loss": 0.0005, "step": 7145 }, { "epoch": 106.65, "learning_rate": 0.00010203508771929824, "loss": 0.0004, "step": 7146 }, { "epoch": 106.67, "learning_rate": 0.000102, "loss": 0.0004, "step": 7147 }, { "epoch": 106.68, "learning_rate": 0.00010196491228070175, "loss": 0.0003, "step": 7148 }, { "epoch": 106.7, "learning_rate": 0.00010192982456140351, "loss": 0.1856, "step": 7149 }, { "epoch": 106.71, "learning_rate": 0.00010189473684210525, "loss": 0.0004, "step": 7150 }, { "epoch": 106.73, "learning_rate": 0.00010185964912280701, "loss": 0.0004, "step": 7151 }, { "epoch": 106.74, "learning_rate": 0.00010182456140350876, "loss": 0.0004, "step": 7152 }, { "epoch": 106.76, "learning_rate": 0.00010178947368421052, "loss": 0.0005, "step": 7153 }, { "epoch": 106.77, "learning_rate": 0.00010175438596491227, "loss": 0.0337, "step": 7154 }, { "epoch": 106.79, "learning_rate": 0.00010171929824561403, "loss": 0.0004, "step": 7155 }, { "epoch": 106.8, "learning_rate": 0.00010168421052631578, "loss": 0.0031, "step": 7156 }, { "epoch": 106.82, "learning_rate": 0.00010164912280701754, "loss": 0.0008, "step": 7157 }, { "epoch": 106.83, "learning_rate": 0.00010161403508771928, "loss": 0.0005, "step": 7158 }, { "epoch": 106.85, "learning_rate": 0.00010157894736842104, "loss": 0.0005, "step": 7159 }, { "epoch": 106.86, "learning_rate": 0.0001015438596491228, "loss": 0.0004, "step": 7160 }, { "epoch": 106.88, "learning_rate": 0.00010150877192982455, "loss": 0.0003, "step": 7161 }, { "epoch": 106.89, "learning_rate": 0.00010147368421052631, "loss": 0.0009, "step": 7162 }, { "epoch": 106.91, "learning_rate": 0.00010143859649122806, "loss": 0.0054, "step": 7163 }, { "epoch": 106.92, "learning_rate": 0.00010140350877192982, "loss": 0.0003, "step": 7164 }, { "epoch": 106.94, "learning_rate": 0.00010136842105263157, "loss": 0.0005, "step": 7165 }, { "epoch": 106.95, "learning_rate": 0.00010133333333333332, "loss": 0.0004, "step": 7166 }, { "epoch": 106.97, "learning_rate": 0.00010129824561403507, "loss": 0.0015, "step": 7167 }, { "epoch": 106.98, "learning_rate": 0.00010126315789473683, "loss": 0.0294, "step": 7168 }, { "epoch": 107.0, "learning_rate": 0.00010122807017543858, "loss": 0.0004, "step": 7169 }, { "epoch": 107.01, "learning_rate": 0.00010119298245614034, "loss": 0.0877, "step": 7170 }, { "epoch": 107.03, "learning_rate": 0.0001011578947368421, "loss": 0.0004, "step": 7171 }, { "epoch": 107.04, "learning_rate": 0.00010112280701754385, "loss": 0.0005, "step": 7172 }, { "epoch": 107.06, "learning_rate": 0.00010108771929824562, "loss": 0.0004, "step": 7173 }, { "epoch": 107.07, "learning_rate": 0.00010105263157894735, "loss": 0.0003, "step": 7174 }, { "epoch": 107.09, "learning_rate": 0.00010101754385964911, "loss": 0.0008, "step": 7175 }, { "epoch": 107.1, "learning_rate": 0.00010098245614035086, "loss": 0.0104, "step": 7176 }, { "epoch": 107.12, "learning_rate": 0.00010094736842105263, "loss": 0.0695, "step": 7177 }, { "epoch": 107.13, "learning_rate": 0.00010091228070175437, "loss": 0.0003, "step": 7178 }, { "epoch": 107.15, "learning_rate": 0.00010087719298245614, "loss": 0.1677, "step": 7179 }, { "epoch": 107.16, "learning_rate": 0.00010084210526315789, "loss": 0.1828, "step": 7180 }, { "epoch": 107.18, "learning_rate": 0.00010080701754385965, "loss": 0.0051, "step": 7181 }, { "epoch": 107.19, "learning_rate": 0.00010077192982456138, "loss": 0.0006, "step": 7182 }, { "epoch": 107.21, "learning_rate": 0.00010073684210526314, "loss": 0.0007, "step": 7183 }, { "epoch": 107.22, "learning_rate": 0.00010070175438596491, "loss": 0.0192, "step": 7184 }, { "epoch": 107.24, "learning_rate": 0.00010066666666666666, "loss": 0.0003, "step": 7185 }, { "epoch": 107.25, "learning_rate": 0.00010063157894736842, "loss": 0.0004, "step": 7186 }, { "epoch": 107.27, "learning_rate": 0.00010059649122807017, "loss": 0.1569, "step": 7187 }, { "epoch": 107.28, "learning_rate": 0.00010056140350877193, "loss": 0.0006, "step": 7188 }, { "epoch": 107.3, "learning_rate": 0.00010052631578947368, "loss": 0.193, "step": 7189 }, { "epoch": 107.31, "learning_rate": 0.00010049122807017543, "loss": 0.1177, "step": 7190 }, { "epoch": 107.33, "learning_rate": 0.00010045614035087718, "loss": 0.0047, "step": 7191 }, { "epoch": 107.34, "learning_rate": 0.00010042105263157894, "loss": 0.0005, "step": 7192 }, { "epoch": 107.36, "learning_rate": 0.00010038596491228069, "loss": 0.0005, "step": 7193 }, { "epoch": 107.37, "learning_rate": 0.00010035087719298245, "loss": 0.0005, "step": 7194 }, { "epoch": 107.39, "learning_rate": 0.0001003157894736842, "loss": 0.0082, "step": 7195 }, { "epoch": 107.4, "learning_rate": 0.00010028070175438596, "loss": 0.084, "step": 7196 }, { "epoch": 107.42, "learning_rate": 0.00010024561403508772, "loss": 0.0384, "step": 7197 }, { "epoch": 107.43, "learning_rate": 0.00010021052631578946, "loss": 0.0004, "step": 7198 }, { "epoch": 107.45, "learning_rate": 0.00010017543859649122, "loss": 0.2362, "step": 7199 }, { "epoch": 107.46, "learning_rate": 0.00010014035087719297, "loss": 0.0135, "step": 7200 }, { "epoch": 107.46, "eval_accuracy": 0.8487518355359766, "eval_f1": 0.8522686889090442, "eval_loss": 0.8989996314048767, "eval_runtime": 344.41, "eval_samples_per_second": 11.864, "eval_steps_per_second": 0.743, "step": 7200 }, { "epoch": 107.48, "learning_rate": 0.00010010526315789473, "loss": 0.0007, "step": 7201 }, { "epoch": 107.49, "learning_rate": 0.00010007017543859648, "loss": 0.0007, "step": 7202 }, { "epoch": 107.51, "learning_rate": 0.00010003508771929824, "loss": 0.0039, "step": 7203 }, { "epoch": 107.52, "learning_rate": 9.999999999999999e-05, "loss": 0.0029, "step": 7204 }, { "epoch": 107.54, "learning_rate": 9.996491228070175e-05, "loss": 0.0005, "step": 7205 }, { "epoch": 107.55, "learning_rate": 9.992982456140349e-05, "loss": 0.0929, "step": 7206 }, { "epoch": 107.57, "learning_rate": 9.989473684210525e-05, "loss": 0.0011, "step": 7207 }, { "epoch": 107.58, "learning_rate": 9.9859649122807e-05, "loss": 0.0015, "step": 7208 }, { "epoch": 107.59, "learning_rate": 9.982456140350876e-05, "loss": 0.1055, "step": 7209 }, { "epoch": 107.61, "learning_rate": 9.978947368421052e-05, "loss": 0.0126, "step": 7210 }, { "epoch": 107.62, "learning_rate": 9.975438596491227e-05, "loss": 0.0004, "step": 7211 }, { "epoch": 107.64, "learning_rate": 9.971929824561404e-05, "loss": 0.0492, "step": 7212 }, { "epoch": 107.65, "learning_rate": 9.968421052631577e-05, "loss": 0.0005, "step": 7213 }, { "epoch": 107.67, "learning_rate": 9.964912280701753e-05, "loss": 0.0005, "step": 7214 }, { "epoch": 107.68, "learning_rate": 9.961403508771928e-05, "loss": 0.0004, "step": 7215 }, { "epoch": 107.7, "learning_rate": 9.957894736842104e-05, "loss": 0.0005, "step": 7216 }, { "epoch": 107.71, "learning_rate": 9.954385964912279e-05, "loss": 0.1363, "step": 7217 }, { "epoch": 107.73, "learning_rate": 9.950877192982456e-05, "loss": 0.0007, "step": 7218 }, { "epoch": 107.74, "learning_rate": 9.94736842105263e-05, "loss": 0.0005, "step": 7219 }, { "epoch": 107.76, "learning_rate": 9.943859649122807e-05, "loss": 0.0035, "step": 7220 }, { "epoch": 107.77, "learning_rate": 9.940350877192983e-05, "loss": 0.0008, "step": 7221 }, { "epoch": 107.79, "learning_rate": 9.936842105263156e-05, "loss": 0.0004, "step": 7222 }, { "epoch": 107.8, "learning_rate": 9.933333333333333e-05, "loss": 0.0004, "step": 7223 }, { "epoch": 107.82, "learning_rate": 9.929824561403508e-05, "loss": 0.2155, "step": 7224 }, { "epoch": 107.83, "learning_rate": 9.926315789473684e-05, "loss": 0.0007, "step": 7225 }, { "epoch": 107.85, "learning_rate": 9.922807017543859e-05, "loss": 0.2681, "step": 7226 }, { "epoch": 107.86, "learning_rate": 9.919298245614035e-05, "loss": 0.0009, "step": 7227 }, { "epoch": 107.88, "learning_rate": 9.91578947368421e-05, "loss": 0.2484, "step": 7228 }, { "epoch": 107.89, "learning_rate": 9.912280701754386e-05, "loss": 0.0004, "step": 7229 }, { "epoch": 107.91, "learning_rate": 9.90877192982456e-05, "loss": 0.0012, "step": 7230 }, { "epoch": 107.92, "learning_rate": 9.905263157894736e-05, "loss": 0.0004, "step": 7231 }, { "epoch": 107.94, "learning_rate": 9.90175438596491e-05, "loss": 0.0005, "step": 7232 }, { "epoch": 107.95, "learning_rate": 9.898245614035087e-05, "loss": 0.0005, "step": 7233 }, { "epoch": 107.97, "learning_rate": 9.894736842105263e-05, "loss": 0.0008, "step": 7234 }, { "epoch": 107.98, "learning_rate": 9.891228070175438e-05, "loss": 0.2469, "step": 7235 }, { "epoch": 108.0, "learning_rate": 9.887719298245614e-05, "loss": 0.0005, "step": 7236 }, { "epoch": 108.01, "learning_rate": 9.884210526315788e-05, "loss": 0.0026, "step": 7237 }, { "epoch": 108.03, "learning_rate": 9.880701754385964e-05, "loss": 0.0469, "step": 7238 }, { "epoch": 108.04, "learning_rate": 9.877192982456139e-05, "loss": 0.0003, "step": 7239 }, { "epoch": 108.06, "learning_rate": 9.873684210526315e-05, "loss": 0.0006, "step": 7240 }, { "epoch": 108.07, "learning_rate": 9.87017543859649e-05, "loss": 0.0007, "step": 7241 }, { "epoch": 108.09, "learning_rate": 9.866666666666666e-05, "loss": 0.0048, "step": 7242 }, { "epoch": 108.1, "learning_rate": 9.863157894736841e-05, "loss": 0.0006, "step": 7243 }, { "epoch": 108.12, "learning_rate": 9.859649122807017e-05, "loss": 0.0006, "step": 7244 }, { "epoch": 108.13, "learning_rate": 9.856140350877191e-05, "loss": 0.0004, "step": 7245 }, { "epoch": 108.15, "learning_rate": 9.852631578947367e-05, "loss": 0.0757, "step": 7246 }, { "epoch": 108.16, "learning_rate": 9.849122807017543e-05, "loss": 0.0004, "step": 7247 }, { "epoch": 108.18, "learning_rate": 9.845614035087718e-05, "loss": 0.0097, "step": 7248 }, { "epoch": 108.19, "learning_rate": 9.842105263157894e-05, "loss": 0.0004, "step": 7249 }, { "epoch": 108.21, "learning_rate": 9.838596491228069e-05, "loss": 0.0033, "step": 7250 }, { "epoch": 108.22, "learning_rate": 9.835087719298245e-05, "loss": 0.0004, "step": 7251 }, { "epoch": 108.24, "learning_rate": 9.83157894736842e-05, "loss": 0.0004, "step": 7252 }, { "epoch": 108.25, "learning_rate": 9.828070175438597e-05, "loss": 0.1304, "step": 7253 }, { "epoch": 108.27, "learning_rate": 9.82456140350877e-05, "loss": 0.0005, "step": 7254 }, { "epoch": 108.28, "learning_rate": 9.821052631578946e-05, "loss": 0.0037, "step": 7255 }, { "epoch": 108.3, "learning_rate": 9.817543859649121e-05, "loss": 0.0031, "step": 7256 }, { "epoch": 108.31, "learning_rate": 9.814035087719297e-05, "loss": 0.1553, "step": 7257 }, { "epoch": 108.33, "learning_rate": 9.810526315789472e-05, "loss": 0.0009, "step": 7258 }, { "epoch": 108.34, "learning_rate": 9.807017543859649e-05, "loss": 0.0005, "step": 7259 }, { "epoch": 108.36, "learning_rate": 9.803508771929825e-05, "loss": 0.0206, "step": 7260 }, { "epoch": 108.37, "learning_rate": 9.799999999999998e-05, "loss": 0.0004, "step": 7261 }, { "epoch": 108.39, "learning_rate": 9.796491228070175e-05, "loss": 0.0011, "step": 7262 }, { "epoch": 108.4, "learning_rate": 9.79298245614035e-05, "loss": 0.0066, "step": 7263 }, { "epoch": 108.42, "learning_rate": 9.789473684210526e-05, "loss": 0.0004, "step": 7264 }, { "epoch": 108.43, "learning_rate": 9.7859649122807e-05, "loss": 0.0007, "step": 7265 }, { "epoch": 108.45, "learning_rate": 9.782456140350877e-05, "loss": 0.0005, "step": 7266 }, { "epoch": 108.46, "learning_rate": 9.778947368421052e-05, "loss": 0.0004, "step": 7267 }, { "epoch": 108.48, "learning_rate": 9.775438596491228e-05, "loss": 0.0006, "step": 7268 }, { "epoch": 108.49, "learning_rate": 9.771929824561401e-05, "loss": 0.0005, "step": 7269 }, { "epoch": 108.51, "learning_rate": 9.768421052631578e-05, "loss": 0.0004, "step": 7270 }, { "epoch": 108.52, "learning_rate": 9.764912280701754e-05, "loss": 0.0018, "step": 7271 }, { "epoch": 108.54, "learning_rate": 9.761403508771929e-05, "loss": 0.2924, "step": 7272 }, { "epoch": 108.55, "learning_rate": 9.757894736842105e-05, "loss": 0.0004, "step": 7273 }, { "epoch": 108.57, "learning_rate": 9.75438596491228e-05, "loss": 0.0018, "step": 7274 }, { "epoch": 108.58, "learning_rate": 9.750877192982456e-05, "loss": 0.0012, "step": 7275 }, { "epoch": 108.59, "learning_rate": 9.747368421052631e-05, "loss": 0.0125, "step": 7276 }, { "epoch": 108.61, "learning_rate": 9.743859649122807e-05, "loss": 0.0004, "step": 7277 }, { "epoch": 108.62, "learning_rate": 9.740350877192981e-05, "loss": 0.0005, "step": 7278 }, { "epoch": 108.64, "learning_rate": 9.736842105263157e-05, "loss": 0.0007, "step": 7279 }, { "epoch": 108.65, "learning_rate": 9.733333333333332e-05, "loss": 0.0005, "step": 7280 }, { "epoch": 108.67, "learning_rate": 9.729824561403508e-05, "loss": 0.0003, "step": 7281 }, { "epoch": 108.68, "learning_rate": 9.726315789473683e-05, "loss": 0.0005, "step": 7282 }, { "epoch": 108.7, "learning_rate": 9.722807017543859e-05, "loss": 0.0014, "step": 7283 }, { "epoch": 108.71, "learning_rate": 9.719298245614035e-05, "loss": 0.0005, "step": 7284 }, { "epoch": 108.73, "learning_rate": 9.715789473684209e-05, "loss": 0.0026, "step": 7285 }, { "epoch": 108.74, "learning_rate": 9.712280701754385e-05, "loss": 0.0128, "step": 7286 }, { "epoch": 108.76, "learning_rate": 9.70877192982456e-05, "loss": 0.0005, "step": 7287 }, { "epoch": 108.77, "learning_rate": 9.705263157894736e-05, "loss": 0.0045, "step": 7288 }, { "epoch": 108.79, "learning_rate": 9.701754385964911e-05, "loss": 0.0012, "step": 7289 }, { "epoch": 108.8, "learning_rate": 9.698245614035087e-05, "loss": 0.0269, "step": 7290 }, { "epoch": 108.82, "learning_rate": 9.694736842105262e-05, "loss": 0.0005, "step": 7291 }, { "epoch": 108.83, "learning_rate": 9.691228070175439e-05, "loss": 0.0006, "step": 7292 }, { "epoch": 108.85, "learning_rate": 9.687719298245612e-05, "loss": 0.0005, "step": 7293 }, { "epoch": 108.86, "learning_rate": 9.684210526315788e-05, "loss": 0.0004, "step": 7294 }, { "epoch": 108.88, "learning_rate": 9.680701754385963e-05, "loss": 0.0004, "step": 7295 }, { "epoch": 108.89, "learning_rate": 9.67719298245614e-05, "loss": 0.0004, "step": 7296 }, { "epoch": 108.91, "learning_rate": 9.673684210526316e-05, "loss": 0.0004, "step": 7297 }, { "epoch": 108.92, "learning_rate": 9.67017543859649e-05, "loss": 0.0051, "step": 7298 }, { "epoch": 108.94, "learning_rate": 9.666666666666667e-05, "loss": 0.0005, "step": 7299 }, { "epoch": 108.95, "learning_rate": 9.663157894736842e-05, "loss": 0.0012, "step": 7300 }, { "epoch": 108.97, "learning_rate": 9.659649122807018e-05, "loss": 0.0003, "step": 7301 }, { "epoch": 108.98, "learning_rate": 9.656140350877191e-05, "loss": 0.0017, "step": 7302 }, { "epoch": 109.0, "learning_rate": 9.652631578947368e-05, "loss": 0.0251, "step": 7303 }, { "epoch": 109.01, "learning_rate": 9.649122807017542e-05, "loss": 0.0009, "step": 7304 }, { "epoch": 109.03, "learning_rate": 9.645614035087719e-05, "loss": 0.0003, "step": 7305 }, { "epoch": 109.04, "learning_rate": 9.642105263157894e-05, "loss": 0.1369, "step": 7306 }, { "epoch": 109.06, "learning_rate": 9.63859649122807e-05, "loss": 0.0092, "step": 7307 }, { "epoch": 109.07, "learning_rate": 9.635087719298243e-05, "loss": 0.0006, "step": 7308 }, { "epoch": 109.09, "learning_rate": 9.63157894736842e-05, "loss": 0.1566, "step": 7309 }, { "epoch": 109.1, "learning_rate": 9.628070175438596e-05, "loss": 0.0004, "step": 7310 }, { "epoch": 109.12, "learning_rate": 9.624561403508771e-05, "loss": 0.0004, "step": 7311 }, { "epoch": 109.13, "learning_rate": 9.621052631578947e-05, "loss": 0.0006, "step": 7312 }, { "epoch": 109.15, "learning_rate": 9.617543859649122e-05, "loss": 0.0004, "step": 7313 }, { "epoch": 109.16, "learning_rate": 9.614035087719298e-05, "loss": 0.0004, "step": 7314 }, { "epoch": 109.18, "learning_rate": 9.610526315789473e-05, "loss": 0.0004, "step": 7315 }, { "epoch": 109.19, "learning_rate": 9.607017543859649e-05, "loss": 0.0004, "step": 7316 }, { "epoch": 109.21, "learning_rate": 9.603508771929823e-05, "loss": 0.0009, "step": 7317 }, { "epoch": 109.22, "learning_rate": 9.599999999999999e-05, "loss": 0.0004, "step": 7318 }, { "epoch": 109.24, "learning_rate": 9.596491228070174e-05, "loss": 0.0004, "step": 7319 }, { "epoch": 109.25, "learning_rate": 9.59298245614035e-05, "loss": 0.0003, "step": 7320 }, { "epoch": 109.27, "learning_rate": 9.589473684210525e-05, "loss": 0.0003, "step": 7321 }, { "epoch": 109.28, "learning_rate": 9.585964912280701e-05, "loss": 0.0005, "step": 7322 }, { "epoch": 109.3, "learning_rate": 9.582456140350877e-05, "loss": 0.0004, "step": 7323 }, { "epoch": 109.31, "learning_rate": 9.578947368421052e-05, "loss": 0.0005, "step": 7324 }, { "epoch": 109.33, "learning_rate": 9.575438596491228e-05, "loss": 0.0004, "step": 7325 }, { "epoch": 109.34, "learning_rate": 9.571929824561402e-05, "loss": 0.0006, "step": 7326 }, { "epoch": 109.36, "learning_rate": 9.568421052631578e-05, "loss": 0.0004, "step": 7327 }, { "epoch": 109.37, "learning_rate": 9.564912280701753e-05, "loss": 0.0004, "step": 7328 }, { "epoch": 109.39, "learning_rate": 9.561403508771929e-05, "loss": 0.286, "step": 7329 }, { "epoch": 109.4, "learning_rate": 9.557894736842104e-05, "loss": 0.0004, "step": 7330 }, { "epoch": 109.42, "learning_rate": 9.55438596491228e-05, "loss": 0.0005, "step": 7331 }, { "epoch": 109.43, "learning_rate": 9.550877192982454e-05, "loss": 0.0003, "step": 7332 }, { "epoch": 109.45, "learning_rate": 9.54736842105263e-05, "loss": 0.0005, "step": 7333 }, { "epoch": 109.46, "learning_rate": 9.543859649122806e-05, "loss": 0.0005, "step": 7334 }, { "epoch": 109.48, "learning_rate": 9.540350877192981e-05, "loss": 0.0004, "step": 7335 }, { "epoch": 109.49, "learning_rate": 9.536842105263158e-05, "loss": 0.1862, "step": 7336 }, { "epoch": 109.51, "learning_rate": 9.533333333333332e-05, "loss": 0.0016, "step": 7337 }, { "epoch": 109.52, "learning_rate": 9.529824561403509e-05, "loss": 0.0007, "step": 7338 }, { "epoch": 109.54, "learning_rate": 9.526315789473684e-05, "loss": 0.0004, "step": 7339 }, { "epoch": 109.55, "learning_rate": 9.52280701754386e-05, "loss": 0.0475, "step": 7340 }, { "epoch": 109.57, "learning_rate": 9.519298245614033e-05, "loss": 0.0006, "step": 7341 }, { "epoch": 109.58, "learning_rate": 9.51578947368421e-05, "loss": 0.0012, "step": 7342 }, { "epoch": 109.59, "learning_rate": 9.512280701754384e-05, "loss": 0.0007, "step": 7343 }, { "epoch": 109.61, "learning_rate": 9.50877192982456e-05, "loss": 0.0071, "step": 7344 }, { "epoch": 109.62, "learning_rate": 9.505263157894735e-05, "loss": 0.0008, "step": 7345 }, { "epoch": 109.64, "learning_rate": 9.501754385964912e-05, "loss": 0.0005, "step": 7346 }, { "epoch": 109.65, "learning_rate": 9.498245614035088e-05, "loss": 0.0004, "step": 7347 }, { "epoch": 109.67, "learning_rate": 9.494736842105263e-05, "loss": 0.0324, "step": 7348 }, { "epoch": 109.68, "learning_rate": 9.491228070175439e-05, "loss": 0.0005, "step": 7349 }, { "epoch": 109.7, "learning_rate": 9.487719298245613e-05, "loss": 0.0459, "step": 7350 }, { "epoch": 109.71, "learning_rate": 9.484210526315789e-05, "loss": 0.0004, "step": 7351 }, { "epoch": 109.73, "learning_rate": 9.480701754385964e-05, "loss": 0.0005, "step": 7352 }, { "epoch": 109.74, "learning_rate": 9.47719298245614e-05, "loss": 0.0036, "step": 7353 }, { "epoch": 109.76, "learning_rate": 9.473684210526315e-05, "loss": 0.0005, "step": 7354 }, { "epoch": 109.77, "learning_rate": 9.470175438596491e-05, "loss": 0.0005, "step": 7355 }, { "epoch": 109.79, "learning_rate": 9.466666666666665e-05, "loss": 0.0013, "step": 7356 }, { "epoch": 109.8, "learning_rate": 9.463157894736841e-05, "loss": 0.0005, "step": 7357 }, { "epoch": 109.82, "learning_rate": 9.459649122807016e-05, "loss": 0.0004, "step": 7358 }, { "epoch": 109.83, "learning_rate": 9.456140350877192e-05, "loss": 0.0004, "step": 7359 }, { "epoch": 109.85, "learning_rate": 9.452631578947368e-05, "loss": 0.0005, "step": 7360 }, { "epoch": 109.86, "learning_rate": 9.449122807017543e-05, "loss": 0.0005, "step": 7361 }, { "epoch": 109.88, "learning_rate": 9.445614035087719e-05, "loss": 0.0005, "step": 7362 }, { "epoch": 109.89, "learning_rate": 9.442105263157894e-05, "loss": 0.0006, "step": 7363 }, { "epoch": 109.91, "learning_rate": 9.43859649122807e-05, "loss": 0.0004, "step": 7364 }, { "epoch": 109.92, "learning_rate": 9.435087719298244e-05, "loss": 0.0005, "step": 7365 }, { "epoch": 109.94, "learning_rate": 9.43157894736842e-05, "loss": 0.3628, "step": 7366 }, { "epoch": 109.95, "learning_rate": 9.428070175438595e-05, "loss": 0.0009, "step": 7367 }, { "epoch": 109.97, "learning_rate": 9.424561403508771e-05, "loss": 0.0006, "step": 7368 }, { "epoch": 109.98, "learning_rate": 9.421052631578946e-05, "loss": 0.0004, "step": 7369 }, { "epoch": 110.0, "learning_rate": 9.417543859649122e-05, "loss": 0.0581, "step": 7370 }, { "epoch": 110.01, "learning_rate": 9.414035087719297e-05, "loss": 0.0037, "step": 7371 }, { "epoch": 110.03, "learning_rate": 9.410526315789473e-05, "loss": 0.0004, "step": 7372 }, { "epoch": 110.04, "learning_rate": 9.407017543859648e-05, "loss": 0.0005, "step": 7373 }, { "epoch": 110.06, "learning_rate": 9.403508771929823e-05, "loss": 0.0006, "step": 7374 }, { "epoch": 110.07, "learning_rate": 9.4e-05, "loss": 0.0005, "step": 7375 }, { "epoch": 110.09, "learning_rate": 9.396491228070174e-05, "loss": 0.0007, "step": 7376 }, { "epoch": 110.1, "learning_rate": 9.39298245614035e-05, "loss": 0.0004, "step": 7377 }, { "epoch": 110.12, "learning_rate": 9.389473684210525e-05, "loss": 0.1453, "step": 7378 }, { "epoch": 110.13, "learning_rate": 9.385964912280702e-05, "loss": 0.0018, "step": 7379 }, { "epoch": 110.15, "learning_rate": 9.382456140350875e-05, "loss": 0.0004, "step": 7380 }, { "epoch": 110.16, "learning_rate": 9.378947368421051e-05, "loss": 0.0005, "step": 7381 }, { "epoch": 110.18, "learning_rate": 9.375438596491226e-05, "loss": 0.0008, "step": 7382 }, { "epoch": 110.19, "learning_rate": 9.371929824561403e-05, "loss": 0.0005, "step": 7383 }, { "epoch": 110.21, "learning_rate": 9.368421052631579e-05, "loss": 0.0007, "step": 7384 }, { "epoch": 110.22, "learning_rate": 9.364912280701754e-05, "loss": 0.0005, "step": 7385 }, { "epoch": 110.24, "learning_rate": 9.36140350877193e-05, "loss": 0.0006, "step": 7386 }, { "epoch": 110.25, "learning_rate": 9.357894736842105e-05, "loss": 0.0199, "step": 7387 }, { "epoch": 110.27, "learning_rate": 9.354385964912281e-05, "loss": 0.0981, "step": 7388 }, { "epoch": 110.28, "learning_rate": 9.350877192982455e-05, "loss": 0.0014, "step": 7389 }, { "epoch": 110.3, "learning_rate": 9.347368421052631e-05, "loss": 0.0004, "step": 7390 }, { "epoch": 110.31, "learning_rate": 9.343859649122806e-05, "loss": 0.0005, "step": 7391 }, { "epoch": 110.33, "learning_rate": 9.340350877192982e-05, "loss": 0.001, "step": 7392 }, { "epoch": 110.34, "learning_rate": 9.336842105263157e-05, "loss": 0.0007, "step": 7393 }, { "epoch": 110.36, "learning_rate": 9.333333333333333e-05, "loss": 0.0005, "step": 7394 }, { "epoch": 110.37, "learning_rate": 9.329824561403508e-05, "loss": 0.003, "step": 7395 }, { "epoch": 110.39, "learning_rate": 9.326315789473684e-05, "loss": 0.0005, "step": 7396 }, { "epoch": 110.4, "learning_rate": 9.322807017543859e-05, "loss": 0.0031, "step": 7397 }, { "epoch": 110.42, "learning_rate": 9.319298245614034e-05, "loss": 0.1359, "step": 7398 }, { "epoch": 110.43, "learning_rate": 9.31578947368421e-05, "loss": 0.0018, "step": 7399 }, { "epoch": 110.45, "learning_rate": 9.312280701754385e-05, "loss": 0.0056, "step": 7400 }, { "epoch": 110.45, "eval_accuracy": 0.8663729809104258, "eval_f1": 0.8672745803593204, "eval_loss": 0.7320371270179749, "eval_runtime": 345.8375, "eval_samples_per_second": 11.815, "eval_steps_per_second": 0.74, "step": 7400 }, { "epoch": 110.46, "learning_rate": 9.308771929824561e-05, "loss": 0.0007, "step": 7401 }, { "epoch": 110.48, "learning_rate": 9.305263157894736e-05, "loss": 0.0004, "step": 7402 }, { "epoch": 110.49, "learning_rate": 9.301754385964912e-05, "loss": 0.0004, "step": 7403 }, { "epoch": 110.51, "learning_rate": 9.298245614035086e-05, "loss": 0.0007, "step": 7404 }, { "epoch": 110.52, "learning_rate": 9.294736842105262e-05, "loss": 0.0004, "step": 7405 }, { "epoch": 110.54, "learning_rate": 9.291228070175437e-05, "loss": 0.0021, "step": 7406 }, { "epoch": 110.55, "learning_rate": 9.287719298245613e-05, "loss": 0.0136, "step": 7407 }, { "epoch": 110.57, "learning_rate": 9.284210526315788e-05, "loss": 0.072, "step": 7408 }, { "epoch": 110.58, "learning_rate": 9.280701754385964e-05, "loss": 0.0006, "step": 7409 }, { "epoch": 110.59, "learning_rate": 9.27719298245614e-05, "loss": 0.0004, "step": 7410 }, { "epoch": 110.61, "learning_rate": 9.273684210526315e-05, "loss": 0.2028, "step": 7411 }, { "epoch": 110.62, "learning_rate": 9.270175438596492e-05, "loss": 0.0009, "step": 7412 }, { "epoch": 110.64, "learning_rate": 9.266666666666665e-05, "loss": 0.0004, "step": 7413 }, { "epoch": 110.65, "learning_rate": 9.263157894736841e-05, "loss": 0.0006, "step": 7414 }, { "epoch": 110.67, "learning_rate": 9.259649122807016e-05, "loss": 0.0006, "step": 7415 }, { "epoch": 110.68, "learning_rate": 9.256140350877192e-05, "loss": 0.0008, "step": 7416 }, { "epoch": 110.7, "learning_rate": 9.252631578947367e-05, "loss": 0.0008, "step": 7417 }, { "epoch": 110.71, "learning_rate": 9.249122807017544e-05, "loss": 0.0013, "step": 7418 }, { "epoch": 110.73, "learning_rate": 9.245614035087718e-05, "loss": 0.0007, "step": 7419 }, { "epoch": 110.74, "learning_rate": 9.242105263157893e-05, "loss": 0.0007, "step": 7420 }, { "epoch": 110.76, "learning_rate": 9.238596491228068e-05, "loss": 0.0004, "step": 7421 }, { "epoch": 110.77, "learning_rate": 9.235087719298244e-05, "loss": 0.1436, "step": 7422 }, { "epoch": 110.79, "learning_rate": 9.231578947368421e-05, "loss": 0.0007, "step": 7423 }, { "epoch": 110.8, "learning_rate": 9.228070175438596e-05, "loss": 0.0032, "step": 7424 }, { "epoch": 110.82, "learning_rate": 9.224561403508772e-05, "loss": 0.0004, "step": 7425 }, { "epoch": 110.83, "learning_rate": 9.221052631578947e-05, "loss": 0.0006, "step": 7426 }, { "epoch": 110.85, "learning_rate": 9.217543859649123e-05, "loss": 0.0038, "step": 7427 }, { "epoch": 110.86, "learning_rate": 9.214035087719296e-05, "loss": 0.0006, "step": 7428 }, { "epoch": 110.88, "learning_rate": 9.210526315789473e-05, "loss": 0.0011, "step": 7429 }, { "epoch": 110.89, "learning_rate": 9.207017543859648e-05, "loss": 0.0004, "step": 7430 }, { "epoch": 110.91, "learning_rate": 9.203508771929824e-05, "loss": 0.003, "step": 7431 }, { "epoch": 110.92, "learning_rate": 9.199999999999999e-05, "loss": 0.0009, "step": 7432 }, { "epoch": 110.94, "learning_rate": 9.196491228070175e-05, "loss": 0.0006, "step": 7433 }, { "epoch": 110.95, "learning_rate": 9.192982456140351e-05, "loss": 0.0026, "step": 7434 }, { "epoch": 110.97, "learning_rate": 9.189473684210526e-05, "loss": 0.0029, "step": 7435 }, { "epoch": 110.98, "learning_rate": 9.185964912280702e-05, "loss": 0.0007, "step": 7436 }, { "epoch": 111.0, "learning_rate": 9.182456140350876e-05, "loss": 0.0004, "step": 7437 }, { "epoch": 111.01, "learning_rate": 9.178947368421052e-05, "loss": 0.0081, "step": 7438 }, { "epoch": 111.03, "learning_rate": 9.175438596491227e-05, "loss": 0.0344, "step": 7439 }, { "epoch": 111.04, "learning_rate": 9.171929824561403e-05, "loss": 0.0006, "step": 7440 }, { "epoch": 111.06, "learning_rate": 9.168421052631578e-05, "loss": 0.002, "step": 7441 }, { "epoch": 111.07, "learning_rate": 9.164912280701754e-05, "loss": 0.0005, "step": 7442 }, { "epoch": 111.09, "learning_rate": 9.161403508771929e-05, "loss": 0.1795, "step": 7443 }, { "epoch": 111.1, "learning_rate": 9.157894736842104e-05, "loss": 0.0006, "step": 7444 }, { "epoch": 111.12, "learning_rate": 9.154385964912279e-05, "loss": 0.1568, "step": 7445 }, { "epoch": 111.13, "learning_rate": 9.150877192982455e-05, "loss": 0.0005, "step": 7446 }, { "epoch": 111.15, "learning_rate": 9.147368421052631e-05, "loss": 0.0023, "step": 7447 }, { "epoch": 111.16, "learning_rate": 9.143859649122806e-05, "loss": 0.0008, "step": 7448 }, { "epoch": 111.18, "learning_rate": 9.140350877192982e-05, "loss": 0.0029, "step": 7449 }, { "epoch": 111.19, "learning_rate": 9.136842105263157e-05, "loss": 0.0005, "step": 7450 }, { "epoch": 111.21, "learning_rate": 9.133333333333334e-05, "loss": 0.0005, "step": 7451 }, { "epoch": 111.22, "learning_rate": 9.129824561403507e-05, "loss": 0.0669, "step": 7452 }, { "epoch": 111.24, "learning_rate": 9.126315789473683e-05, "loss": 0.0004, "step": 7453 }, { "epoch": 111.25, "learning_rate": 9.122807017543858e-05, "loss": 0.0005, "step": 7454 }, { "epoch": 111.27, "learning_rate": 9.119298245614034e-05, "loss": 0.0004, "step": 7455 }, { "epoch": 111.28, "learning_rate": 9.115789473684209e-05, "loss": 0.0004, "step": 7456 }, { "epoch": 111.3, "learning_rate": 9.112280701754386e-05, "loss": 0.0012, "step": 7457 }, { "epoch": 111.31, "learning_rate": 9.10877192982456e-05, "loss": 0.0005, "step": 7458 }, { "epoch": 111.33, "learning_rate": 9.105263157894737e-05, "loss": 0.0006, "step": 7459 }, { "epoch": 111.34, "learning_rate": 9.101754385964913e-05, "loss": 0.0266, "step": 7460 }, { "epoch": 111.36, "learning_rate": 9.098245614035086e-05, "loss": 0.0007, "step": 7461 }, { "epoch": 111.37, "learning_rate": 9.094736842105263e-05, "loss": 0.0006, "step": 7462 }, { "epoch": 111.39, "learning_rate": 9.091228070175437e-05, "loss": 0.0004, "step": 7463 }, { "epoch": 111.4, "learning_rate": 9.087719298245614e-05, "loss": 0.0004, "step": 7464 }, { "epoch": 111.42, "learning_rate": 9.084210526315789e-05, "loss": 0.0004, "step": 7465 }, { "epoch": 111.43, "learning_rate": 9.080701754385965e-05, "loss": 0.0008, "step": 7466 }, { "epoch": 111.45, "learning_rate": 9.077192982456138e-05, "loss": 0.0007, "step": 7467 }, { "epoch": 111.46, "learning_rate": 9.073684210526315e-05, "loss": 0.0005, "step": 7468 }, { "epoch": 111.48, "learning_rate": 9.07017543859649e-05, "loss": 0.0004, "step": 7469 }, { "epoch": 111.49, "learning_rate": 9.066666666666666e-05, "loss": 0.0004, "step": 7470 }, { "epoch": 111.51, "learning_rate": 9.06315789473684e-05, "loss": 0.0129, "step": 7471 }, { "epoch": 111.52, "learning_rate": 9.059649122807017e-05, "loss": 0.002, "step": 7472 }, { "epoch": 111.54, "learning_rate": 9.056140350877193e-05, "loss": 0.0004, "step": 7473 }, { "epoch": 111.55, "learning_rate": 9.052631578947368e-05, "loss": 0.0004, "step": 7474 }, { "epoch": 111.57, "learning_rate": 9.049122807017544e-05, "loss": 0.0006, "step": 7475 }, { "epoch": 111.58, "learning_rate": 9.045614035087718e-05, "loss": 0.0004, "step": 7476 }, { "epoch": 111.59, "learning_rate": 9.042105263157894e-05, "loss": 0.001, "step": 7477 }, { "epoch": 111.61, "learning_rate": 9.038596491228069e-05, "loss": 0.0004, "step": 7478 }, { "epoch": 111.62, "learning_rate": 9.035087719298245e-05, "loss": 0.0007, "step": 7479 }, { "epoch": 111.64, "learning_rate": 9.03157894736842e-05, "loss": 0.0004, "step": 7480 }, { "epoch": 111.65, "learning_rate": 9.028070175438596e-05, "loss": 0.0005, "step": 7481 }, { "epoch": 111.67, "learning_rate": 9.024561403508771e-05, "loss": 0.0073, "step": 7482 }, { "epoch": 111.68, "learning_rate": 9.021052631578947e-05, "loss": 0.0004, "step": 7483 }, { "epoch": 111.7, "learning_rate": 9.017543859649123e-05, "loss": 0.0004, "step": 7484 }, { "epoch": 111.71, "learning_rate": 9.014035087719297e-05, "loss": 0.0154, "step": 7485 }, { "epoch": 111.73, "learning_rate": 9.010526315789473e-05, "loss": 0.003, "step": 7486 }, { "epoch": 111.74, "learning_rate": 9.007017543859648e-05, "loss": 0.0004, "step": 7487 }, { "epoch": 111.76, "learning_rate": 9.003508771929824e-05, "loss": 0.0004, "step": 7488 }, { "epoch": 111.77, "learning_rate": 8.999999999999999e-05, "loss": 0.0466, "step": 7489 }, { "epoch": 111.79, "learning_rate": 8.996491228070175e-05, "loss": 0.0004, "step": 7490 }, { "epoch": 111.8, "learning_rate": 8.992982456140349e-05, "loss": 0.0006, "step": 7491 }, { "epoch": 111.82, "learning_rate": 8.989473684210525e-05, "loss": 0.1431, "step": 7492 }, { "epoch": 111.83, "learning_rate": 8.9859649122807e-05, "loss": 0.0005, "step": 7493 }, { "epoch": 111.85, "learning_rate": 8.982456140350876e-05, "loss": 0.0008, "step": 7494 }, { "epoch": 111.86, "learning_rate": 8.978947368421051e-05, "loss": 0.0003, "step": 7495 }, { "epoch": 111.88, "learning_rate": 8.975438596491227e-05, "loss": 0.0004, "step": 7496 }, { "epoch": 111.89, "learning_rate": 8.971929824561404e-05, "loss": 0.0014, "step": 7497 }, { "epoch": 111.91, "learning_rate": 8.968421052631579e-05, "loss": 0.0004, "step": 7498 }, { "epoch": 111.92, "learning_rate": 8.964912280701755e-05, "loss": 0.0004, "step": 7499 }, { "epoch": 111.94, "learning_rate": 8.961403508771928e-05, "loss": 0.4048, "step": 7500 }, { "epoch": 111.95, "learning_rate": 8.957894736842105e-05, "loss": 0.0005, "step": 7501 }, { "epoch": 111.97, "learning_rate": 8.95438596491228e-05, "loss": 0.0007, "step": 7502 }, { "epoch": 111.98, "learning_rate": 8.950877192982456e-05, "loss": 0.0003, "step": 7503 }, { "epoch": 112.0, "learning_rate": 8.94736842105263e-05, "loss": 0.0005, "step": 7504 }, { "epoch": 112.01, "learning_rate": 8.943859649122807e-05, "loss": 0.0006, "step": 7505 }, { "epoch": 112.03, "learning_rate": 8.940350877192982e-05, "loss": 0.0004, "step": 7506 }, { "epoch": 112.04, "learning_rate": 8.936842105263158e-05, "loss": 0.0005, "step": 7507 }, { "epoch": 112.06, "learning_rate": 8.933333333333331e-05, "loss": 0.0004, "step": 7508 }, { "epoch": 112.07, "learning_rate": 8.929824561403508e-05, "loss": 0.0005, "step": 7509 }, { "epoch": 112.09, "learning_rate": 8.926315789473684e-05, "loss": 0.001, "step": 7510 }, { "epoch": 112.1, "learning_rate": 8.922807017543859e-05, "loss": 0.0007, "step": 7511 }, { "epoch": 112.12, "learning_rate": 8.919298245614035e-05, "loss": 0.0019, "step": 7512 }, { "epoch": 112.13, "learning_rate": 8.91578947368421e-05, "loss": 0.002, "step": 7513 }, { "epoch": 112.15, "learning_rate": 8.912280701754386e-05, "loss": 0.0004, "step": 7514 }, { "epoch": 112.16, "learning_rate": 8.90877192982456e-05, "loss": 0.0005, "step": 7515 }, { "epoch": 112.18, "learning_rate": 8.905263157894736e-05, "loss": 0.0004, "step": 7516 }, { "epoch": 112.19, "learning_rate": 8.901754385964911e-05, "loss": 0.0071, "step": 7517 }, { "epoch": 112.21, "learning_rate": 8.898245614035087e-05, "loss": 0.0004, "step": 7518 }, { "epoch": 112.22, "learning_rate": 8.894736842105262e-05, "loss": 0.0008, "step": 7519 }, { "epoch": 112.24, "learning_rate": 8.891228070175438e-05, "loss": 0.0004, "step": 7520 }, { "epoch": 112.25, "learning_rate": 8.887719298245613e-05, "loss": 0.0005, "step": 7521 }, { "epoch": 112.27, "learning_rate": 8.884210526315789e-05, "loss": 0.0009, "step": 7522 }, { "epoch": 112.28, "learning_rate": 8.880701754385965e-05, "loss": 0.0004, "step": 7523 }, { "epoch": 112.3, "learning_rate": 8.877192982456139e-05, "loss": 0.0004, "step": 7524 }, { "epoch": 112.31, "learning_rate": 8.873684210526315e-05, "loss": 0.0005, "step": 7525 }, { "epoch": 112.33, "learning_rate": 8.87017543859649e-05, "loss": 0.0004, "step": 7526 }, { "epoch": 112.34, "learning_rate": 8.866666666666666e-05, "loss": 0.0005, "step": 7527 }, { "epoch": 112.36, "learning_rate": 8.863157894736841e-05, "loss": 0.0006, "step": 7528 }, { "epoch": 112.37, "learning_rate": 8.859649122807017e-05, "loss": 0.0008, "step": 7529 }, { "epoch": 112.39, "learning_rate": 8.856140350877192e-05, "loss": 0.0005, "step": 7530 }, { "epoch": 112.4, "learning_rate": 8.852631578947368e-05, "loss": 0.0005, "step": 7531 }, { "epoch": 112.42, "learning_rate": 8.849122807017542e-05, "loss": 0.0004, "step": 7532 }, { "epoch": 112.43, "learning_rate": 8.845614035087718e-05, "loss": 0.0004, "step": 7533 }, { "epoch": 112.45, "learning_rate": 8.842105263157893e-05, "loss": 0.0003, "step": 7534 }, { "epoch": 112.46, "learning_rate": 8.83859649122807e-05, "loss": 0.0011, "step": 7535 }, { "epoch": 112.48, "learning_rate": 8.835087719298246e-05, "loss": 0.0003, "step": 7536 }, { "epoch": 112.49, "learning_rate": 8.83157894736842e-05, "loss": 0.0102, "step": 7537 }, { "epoch": 112.51, "learning_rate": 8.828070175438597e-05, "loss": 0.0007, "step": 7538 }, { "epoch": 112.52, "learning_rate": 8.82456140350877e-05, "loss": 0.0003, "step": 7539 }, { "epoch": 112.54, "learning_rate": 8.821052631578946e-05, "loss": 0.0003, "step": 7540 }, { "epoch": 112.55, "learning_rate": 8.817543859649121e-05, "loss": 0.0004, "step": 7541 }, { "epoch": 112.57, "learning_rate": 8.814035087719298e-05, "loss": 0.0003, "step": 7542 }, { "epoch": 112.58, "learning_rate": 8.810526315789472e-05, "loss": 0.0011, "step": 7543 }, { "epoch": 112.59, "learning_rate": 8.807017543859649e-05, "loss": 0.0056, "step": 7544 }, { "epoch": 112.61, "learning_rate": 8.803508771929824e-05, "loss": 0.0765, "step": 7545 }, { "epoch": 112.62, "learning_rate": 8.8e-05, "loss": 0.0003, "step": 7546 }, { "epoch": 112.64, "learning_rate": 8.796491228070176e-05, "loss": 0.0005, "step": 7547 }, { "epoch": 112.65, "learning_rate": 8.79298245614035e-05, "loss": 0.0004, "step": 7548 }, { "epoch": 112.67, "learning_rate": 8.789473684210526e-05, "loss": 0.0004, "step": 7549 }, { "epoch": 112.68, "learning_rate": 8.7859649122807e-05, "loss": 0.0005, "step": 7550 }, { "epoch": 112.7, "learning_rate": 8.782456140350877e-05, "loss": 0.0004, "step": 7551 }, { "epoch": 112.71, "learning_rate": 8.778947368421052e-05, "loss": 0.0004, "step": 7552 }, { "epoch": 112.73, "learning_rate": 8.775438596491228e-05, "loss": 0.0005, "step": 7553 }, { "epoch": 112.74, "learning_rate": 8.771929824561403e-05, "loss": 0.0005, "step": 7554 }, { "epoch": 112.76, "learning_rate": 8.768421052631579e-05, "loss": 0.0005, "step": 7555 }, { "epoch": 112.77, "learning_rate": 8.764912280701753e-05, "loss": 0.0003, "step": 7556 }, { "epoch": 112.79, "learning_rate": 8.761403508771929e-05, "loss": 0.0042, "step": 7557 }, { "epoch": 112.8, "learning_rate": 8.757894736842104e-05, "loss": 0.0004, "step": 7558 }, { "epoch": 112.82, "learning_rate": 8.75438596491228e-05, "loss": 0.0007, "step": 7559 }, { "epoch": 112.83, "learning_rate": 8.750877192982456e-05, "loss": 0.0003, "step": 7560 }, { "epoch": 112.85, "learning_rate": 8.747368421052631e-05, "loss": 0.0005, "step": 7561 }, { "epoch": 112.86, "learning_rate": 8.743859649122807e-05, "loss": 0.0005, "step": 7562 }, { "epoch": 112.88, "learning_rate": 8.740350877192981e-05, "loss": 0.0003, "step": 7563 }, { "epoch": 112.89, "learning_rate": 8.736842105263157e-05, "loss": 0.0005, "step": 7564 }, { "epoch": 112.91, "learning_rate": 8.733333333333332e-05, "loss": 0.0011, "step": 7565 }, { "epoch": 112.92, "learning_rate": 8.729824561403508e-05, "loss": 0.0009, "step": 7566 }, { "epoch": 112.94, "learning_rate": 8.726315789473683e-05, "loss": 0.0004, "step": 7567 }, { "epoch": 112.95, "learning_rate": 8.722807017543859e-05, "loss": 0.097, "step": 7568 }, { "epoch": 112.97, "learning_rate": 8.719298245614034e-05, "loss": 0.0004, "step": 7569 }, { "epoch": 112.98, "learning_rate": 8.71578947368421e-05, "loss": 0.0003, "step": 7570 }, { "epoch": 113.0, "learning_rate": 8.712280701754384e-05, "loss": 0.0004, "step": 7571 }, { "epoch": 113.01, "learning_rate": 8.70877192982456e-05, "loss": 0.0076, "step": 7572 }, { "epoch": 113.03, "learning_rate": 8.705263157894736e-05, "loss": 0.0003, "step": 7573 }, { "epoch": 113.04, "learning_rate": 8.701754385964911e-05, "loss": 0.0057, "step": 7574 }, { "epoch": 113.06, "learning_rate": 8.698245614035087e-05, "loss": 0.0005, "step": 7575 }, { "epoch": 113.07, "learning_rate": 8.694736842105262e-05, "loss": 0.0037, "step": 7576 }, { "epoch": 113.09, "learning_rate": 8.691228070175439e-05, "loss": 0.2041, "step": 7577 }, { "epoch": 113.1, "learning_rate": 8.687719298245613e-05, "loss": 0.0026, "step": 7578 }, { "epoch": 113.12, "learning_rate": 8.68421052631579e-05, "loss": 0.0009, "step": 7579 }, { "epoch": 113.13, "learning_rate": 8.680701754385963e-05, "loss": 0.0004, "step": 7580 }, { "epoch": 113.15, "learning_rate": 8.67719298245614e-05, "loss": 0.0008, "step": 7581 }, { "epoch": 113.16, "learning_rate": 8.673684210526314e-05, "loss": 0.0005, "step": 7582 }, { "epoch": 113.18, "learning_rate": 8.67017543859649e-05, "loss": 0.0003, "step": 7583 }, { "epoch": 113.19, "learning_rate": 8.666666666666665e-05, "loss": 0.0112, "step": 7584 }, { "epoch": 113.21, "learning_rate": 8.663157894736842e-05, "loss": 0.0011, "step": 7585 }, { "epoch": 113.22, "learning_rate": 8.659649122807018e-05, "loss": 0.0004, "step": 7586 }, { "epoch": 113.24, "learning_rate": 8.656140350877191e-05, "loss": 0.0005, "step": 7587 }, { "epoch": 113.25, "learning_rate": 8.652631578947368e-05, "loss": 0.0005, "step": 7588 }, { "epoch": 113.27, "learning_rate": 8.649122807017543e-05, "loss": 0.0046, "step": 7589 }, { "epoch": 113.28, "learning_rate": 8.645614035087719e-05, "loss": 0.0006, "step": 7590 }, { "epoch": 113.3, "learning_rate": 8.642105263157894e-05, "loss": 0.0848, "step": 7591 }, { "epoch": 113.31, "learning_rate": 8.63859649122807e-05, "loss": 0.0004, "step": 7592 }, { "epoch": 113.33, "learning_rate": 8.635087719298245e-05, "loss": 0.0401, "step": 7593 }, { "epoch": 113.34, "learning_rate": 8.631578947368421e-05, "loss": 0.2326, "step": 7594 }, { "epoch": 113.36, "learning_rate": 8.628070175438595e-05, "loss": 0.0007, "step": 7595 }, { "epoch": 113.37, "learning_rate": 8.624561403508771e-05, "loss": 0.0007, "step": 7596 }, { "epoch": 113.39, "learning_rate": 8.621052631578947e-05, "loss": 0.0086, "step": 7597 }, { "epoch": 113.4, "learning_rate": 8.617543859649122e-05, "loss": 0.0022, "step": 7598 }, { "epoch": 113.42, "learning_rate": 8.614035087719298e-05, "loss": 0.0006, "step": 7599 }, { "epoch": 113.43, "learning_rate": 8.610526315789473e-05, "loss": 0.0023, "step": 7600 }, { "epoch": 113.43, "eval_accuracy": 0.8705335291238375, "eval_f1": 0.8699635174956661, "eval_loss": 0.7108378410339355, "eval_runtime": 344.4393, "eval_samples_per_second": 11.863, "eval_steps_per_second": 0.743, "step": 7600 }, { "epoch": 113.45, "learning_rate": 8.607017543859649e-05, "loss": 0.0003, "step": 7601 }, { "epoch": 113.46, "learning_rate": 8.603508771929824e-05, "loss": 0.1926, "step": 7602 }, { "epoch": 113.48, "learning_rate": 8.6e-05, "loss": 0.0004, "step": 7603 }, { "epoch": 113.49, "learning_rate": 8.596491228070174e-05, "loss": 0.0003, "step": 7604 }, { "epoch": 113.51, "learning_rate": 8.59298245614035e-05, "loss": 0.0006, "step": 7605 }, { "epoch": 113.52, "learning_rate": 8.589473684210525e-05, "loss": 0.0004, "step": 7606 }, { "epoch": 113.54, "learning_rate": 8.585964912280701e-05, "loss": 0.0003, "step": 7607 }, { "epoch": 113.55, "learning_rate": 8.582456140350876e-05, "loss": 0.0006, "step": 7608 }, { "epoch": 113.57, "learning_rate": 8.578947368421052e-05, "loss": 0.0024, "step": 7609 }, { "epoch": 113.58, "learning_rate": 8.575438596491229e-05, "loss": 0.0123, "step": 7610 }, { "epoch": 113.59, "learning_rate": 8.571929824561402e-05, "loss": 0.0013, "step": 7611 }, { "epoch": 113.61, "learning_rate": 8.568421052631578e-05, "loss": 0.0004, "step": 7612 }, { "epoch": 113.62, "learning_rate": 8.564912280701753e-05, "loss": 0.1429, "step": 7613 }, { "epoch": 113.64, "learning_rate": 8.56140350877193e-05, "loss": 0.0117, "step": 7614 }, { "epoch": 113.65, "learning_rate": 8.557894736842104e-05, "loss": 0.0003, "step": 7615 }, { "epoch": 113.67, "learning_rate": 8.55438596491228e-05, "loss": 0.0345, "step": 7616 }, { "epoch": 113.68, "learning_rate": 8.550877192982455e-05, "loss": 0.0004, "step": 7617 }, { "epoch": 113.7, "learning_rate": 8.547368421052632e-05, "loss": 0.0004, "step": 7618 }, { "epoch": 113.71, "learning_rate": 8.543859649122805e-05, "loss": 0.0004, "step": 7619 }, { "epoch": 113.73, "learning_rate": 8.540350877192981e-05, "loss": 0.0003, "step": 7620 }, { "epoch": 113.74, "learning_rate": 8.536842105263156e-05, "loss": 0.0003, "step": 7621 }, { "epoch": 113.76, "learning_rate": 8.533333333333332e-05, "loss": 0.0004, "step": 7622 }, { "epoch": 113.77, "learning_rate": 8.529824561403509e-05, "loss": 0.0003, "step": 7623 }, { "epoch": 113.79, "learning_rate": 8.526315789473684e-05, "loss": 0.0011, "step": 7624 }, { "epoch": 113.8, "learning_rate": 8.52280701754386e-05, "loss": 0.0003, "step": 7625 }, { "epoch": 113.82, "learning_rate": 8.519298245614035e-05, "loss": 0.0003, "step": 7626 }, { "epoch": 113.83, "learning_rate": 8.515789473684211e-05, "loss": 0.0004, "step": 7627 }, { "epoch": 113.85, "learning_rate": 8.512280701754384e-05, "loss": 0.0003, "step": 7628 }, { "epoch": 113.86, "learning_rate": 8.508771929824561e-05, "loss": 0.0007, "step": 7629 }, { "epoch": 113.88, "learning_rate": 8.505263157894736e-05, "loss": 0.0004, "step": 7630 }, { "epoch": 113.89, "learning_rate": 8.501754385964912e-05, "loss": 0.0186, "step": 7631 }, { "epoch": 113.91, "learning_rate": 8.498245614035087e-05, "loss": 0.0004, "step": 7632 }, { "epoch": 113.92, "learning_rate": 8.494736842105263e-05, "loss": 0.0003, "step": 7633 }, { "epoch": 113.94, "learning_rate": 8.491228070175436e-05, "loss": 0.0003, "step": 7634 }, { "epoch": 113.95, "learning_rate": 8.487719298245613e-05, "loss": 0.0003, "step": 7635 }, { "epoch": 113.97, "learning_rate": 8.484210526315789e-05, "loss": 0.0005, "step": 7636 }, { "epoch": 113.98, "learning_rate": 8.480701754385964e-05, "loss": 0.0005, "step": 7637 }, { "epoch": 114.0, "learning_rate": 8.47719298245614e-05, "loss": 0.0003, "step": 7638 }, { "epoch": 114.01, "learning_rate": 8.473684210526315e-05, "loss": 0.0042, "step": 7639 }, { "epoch": 114.03, "learning_rate": 8.470175438596491e-05, "loss": 0.0004, "step": 7640 }, { "epoch": 114.04, "learning_rate": 8.466666666666666e-05, "loss": 0.0004, "step": 7641 }, { "epoch": 114.06, "learning_rate": 8.463157894736842e-05, "loss": 0.0256, "step": 7642 }, { "epoch": 114.07, "learning_rate": 8.459649122807016e-05, "loss": 0.0015, "step": 7643 }, { "epoch": 114.09, "learning_rate": 8.456140350877192e-05, "loss": 0.0005, "step": 7644 }, { "epoch": 114.1, "learning_rate": 8.452631578947367e-05, "loss": 0.0006, "step": 7645 }, { "epoch": 114.12, "learning_rate": 8.449122807017543e-05, "loss": 0.0003, "step": 7646 }, { "epoch": 114.13, "learning_rate": 8.44561403508772e-05, "loss": 0.0003, "step": 7647 }, { "epoch": 114.15, "learning_rate": 8.442105263157894e-05, "loss": 0.0051, "step": 7648 }, { "epoch": 114.16, "learning_rate": 8.43859649122807e-05, "loss": 0.0266, "step": 7649 }, { "epoch": 114.18, "learning_rate": 8.435087719298245e-05, "loss": 0.0007, "step": 7650 }, { "epoch": 114.19, "learning_rate": 8.43157894736842e-05, "loss": 0.097, "step": 7651 }, { "epoch": 114.21, "learning_rate": 8.428070175438595e-05, "loss": 0.0003, "step": 7652 }, { "epoch": 114.22, "learning_rate": 8.424561403508771e-05, "loss": 0.0006, "step": 7653 }, { "epoch": 114.24, "learning_rate": 8.421052631578946e-05, "loss": 0.2054, "step": 7654 }, { "epoch": 114.25, "learning_rate": 8.417543859649122e-05, "loss": 0.0003, "step": 7655 }, { "epoch": 114.27, "learning_rate": 8.414035087719297e-05, "loss": 0.0113, "step": 7656 }, { "epoch": 114.28, "learning_rate": 8.410526315789474e-05, "loss": 0.0003, "step": 7657 }, { "epoch": 114.3, "learning_rate": 8.407017543859647e-05, "loss": 0.1907, "step": 7658 }, { "epoch": 114.31, "learning_rate": 8.403508771929823e-05, "loss": 0.0003, "step": 7659 }, { "epoch": 114.33, "learning_rate": 8.4e-05, "loss": 0.0004, "step": 7660 }, { "epoch": 114.34, "learning_rate": 8.396491228070174e-05, "loss": 0.0004, "step": 7661 }, { "epoch": 114.36, "learning_rate": 8.39298245614035e-05, "loss": 0.0003, "step": 7662 }, { "epoch": 114.37, "learning_rate": 8.389473684210526e-05, "loss": 0.0192, "step": 7663 }, { "epoch": 114.39, "learning_rate": 8.385964912280702e-05, "loss": 0.0004, "step": 7664 }, { "epoch": 114.4, "learning_rate": 8.382456140350877e-05, "loss": 0.0003, "step": 7665 }, { "epoch": 114.42, "learning_rate": 8.378947368421053e-05, "loss": 0.0018, "step": 7666 }, { "epoch": 114.43, "learning_rate": 8.375438596491226e-05, "loss": 0.0003, "step": 7667 }, { "epoch": 114.45, "learning_rate": 8.371929824561403e-05, "loss": 0.0003, "step": 7668 }, { "epoch": 114.46, "learning_rate": 8.368421052631578e-05, "loss": 0.0003, "step": 7669 }, { "epoch": 114.48, "learning_rate": 8.364912280701754e-05, "loss": 0.0003, "step": 7670 }, { "epoch": 114.49, "learning_rate": 8.361403508771929e-05, "loss": 0.0004, "step": 7671 }, { "epoch": 114.51, "learning_rate": 8.357894736842105e-05, "loss": 0.0003, "step": 7672 }, { "epoch": 114.52, "learning_rate": 8.354385964912281e-05, "loss": 0.0003, "step": 7673 }, { "epoch": 114.54, "learning_rate": 8.350877192982456e-05, "loss": 0.0003, "step": 7674 }, { "epoch": 114.55, "learning_rate": 8.347368421052631e-05, "loss": 0.0629, "step": 7675 }, { "epoch": 114.57, "learning_rate": 8.343859649122806e-05, "loss": 0.0005, "step": 7676 }, { "epoch": 114.58, "learning_rate": 8.340350877192982e-05, "loss": 0.1229, "step": 7677 }, { "epoch": 114.59, "learning_rate": 8.336842105263157e-05, "loss": 0.0004, "step": 7678 }, { "epoch": 114.61, "learning_rate": 8.333333333333333e-05, "loss": 0.0455, "step": 7679 }, { "epoch": 114.62, "learning_rate": 8.329824561403508e-05, "loss": 0.2398, "step": 7680 }, { "epoch": 114.64, "learning_rate": 8.326315789473684e-05, "loss": 0.0346, "step": 7681 }, { "epoch": 114.65, "learning_rate": 8.322807017543858e-05, "loss": 0.0005, "step": 7682 }, { "epoch": 114.67, "learning_rate": 8.319298245614034e-05, "loss": 0.0003, "step": 7683 }, { "epoch": 114.68, "learning_rate": 8.315789473684209e-05, "loss": 0.0004, "step": 7684 }, { "epoch": 114.7, "learning_rate": 8.312280701754385e-05, "loss": 0.0018, "step": 7685 }, { "epoch": 114.71, "learning_rate": 8.308771929824561e-05, "loss": 0.0004, "step": 7686 }, { "epoch": 114.73, "learning_rate": 8.305263157894736e-05, "loss": 0.0023, "step": 7687 }, { "epoch": 114.74, "learning_rate": 8.301754385964912e-05, "loss": 0.0005, "step": 7688 }, { "epoch": 114.76, "learning_rate": 8.298245614035087e-05, "loss": 0.0005, "step": 7689 }, { "epoch": 114.77, "learning_rate": 8.294736842105263e-05, "loss": 0.0004, "step": 7690 }, { "epoch": 114.79, "learning_rate": 8.291228070175437e-05, "loss": 0.0004, "step": 7691 }, { "epoch": 114.8, "learning_rate": 8.287719298245613e-05, "loss": 0.0004, "step": 7692 }, { "epoch": 114.82, "learning_rate": 8.284210526315788e-05, "loss": 0.0004, "step": 7693 }, { "epoch": 114.83, "learning_rate": 8.280701754385964e-05, "loss": 0.0004, "step": 7694 }, { "epoch": 114.85, "learning_rate": 8.277192982456139e-05, "loss": 0.0003, "step": 7695 }, { "epoch": 114.86, "learning_rate": 8.273684210526315e-05, "loss": 0.1994, "step": 7696 }, { "epoch": 114.88, "learning_rate": 8.27017543859649e-05, "loss": 0.0004, "step": 7697 }, { "epoch": 114.89, "learning_rate": 8.266666666666665e-05, "loss": 0.0005, "step": 7698 }, { "epoch": 114.91, "learning_rate": 8.263157894736841e-05, "loss": 0.0003, "step": 7699 }, { "epoch": 114.92, "learning_rate": 8.259649122807016e-05, "loss": 0.0004, "step": 7700 }, { "epoch": 114.94, "learning_rate": 8.256140350877193e-05, "loss": 0.0004, "step": 7701 }, { "epoch": 114.95, "learning_rate": 8.252631578947367e-05, "loss": 0.0044, "step": 7702 }, { "epoch": 114.97, "learning_rate": 8.249122807017544e-05, "loss": 0.003, "step": 7703 }, { "epoch": 114.98, "learning_rate": 8.245614035087719e-05, "loss": 0.0004, "step": 7704 }, { "epoch": 115.0, "learning_rate": 8.242105263157895e-05, "loss": 0.0003, "step": 7705 }, { "epoch": 115.01, "learning_rate": 8.238596491228068e-05, "loss": 0.0008, "step": 7706 }, { "epoch": 115.03, "learning_rate": 8.235087719298245e-05, "loss": 0.0015, "step": 7707 }, { "epoch": 115.04, "learning_rate": 8.23157894736842e-05, "loss": 0.0003, "step": 7708 }, { "epoch": 115.06, "learning_rate": 8.228070175438596e-05, "loss": 0.0005, "step": 7709 }, { "epoch": 115.07, "learning_rate": 8.224561403508772e-05, "loss": 0.0029, "step": 7710 }, { "epoch": 115.09, "learning_rate": 8.221052631578947e-05, "loss": 0.0004, "step": 7711 }, { "epoch": 115.1, "learning_rate": 8.217543859649123e-05, "loss": 0.0007, "step": 7712 }, { "epoch": 115.12, "learning_rate": 8.214035087719298e-05, "loss": 0.0005, "step": 7713 }, { "epoch": 115.13, "learning_rate": 8.210526315789474e-05, "loss": 0.0004, "step": 7714 }, { "epoch": 115.15, "learning_rate": 8.207017543859648e-05, "loss": 0.0004, "step": 7715 }, { "epoch": 115.16, "learning_rate": 8.203508771929824e-05, "loss": 0.0008, "step": 7716 }, { "epoch": 115.18, "learning_rate": 8.199999999999999e-05, "loss": 0.1419, "step": 7717 }, { "epoch": 115.19, "learning_rate": 8.196491228070175e-05, "loss": 0.079, "step": 7718 }, { "epoch": 115.21, "learning_rate": 8.19298245614035e-05, "loss": 0.0003, "step": 7719 }, { "epoch": 115.22, "learning_rate": 8.189473684210526e-05, "loss": 0.0013, "step": 7720 }, { "epoch": 115.24, "learning_rate": 8.1859649122807e-05, "loss": 0.0007, "step": 7721 }, { "epoch": 115.25, "learning_rate": 8.182456140350876e-05, "loss": 0.0007, "step": 7722 }, { "epoch": 115.27, "learning_rate": 8.178947368421052e-05, "loss": 0.0003, "step": 7723 }, { "epoch": 115.28, "learning_rate": 8.175438596491227e-05, "loss": 0.0003, "step": 7724 }, { "epoch": 115.3, "learning_rate": 8.171929824561403e-05, "loss": 0.0004, "step": 7725 }, { "epoch": 115.31, "learning_rate": 8.168421052631578e-05, "loss": 0.0004, "step": 7726 }, { "epoch": 115.33, "learning_rate": 8.164912280701754e-05, "loss": 0.0043, "step": 7727 }, { "epoch": 115.34, "learning_rate": 8.161403508771929e-05, "loss": 0.0025, "step": 7728 }, { "epoch": 115.36, "learning_rate": 8.157894736842105e-05, "loss": 0.0004, "step": 7729 }, { "epoch": 115.37, "learning_rate": 8.154385964912279e-05, "loss": 0.055, "step": 7730 }, { "epoch": 115.39, "learning_rate": 8.150877192982455e-05, "loss": 0.0006, "step": 7731 }, { "epoch": 115.4, "learning_rate": 8.14736842105263e-05, "loss": 0.0004, "step": 7732 }, { "epoch": 115.42, "learning_rate": 8.143859649122806e-05, "loss": 0.0003, "step": 7733 }, { "epoch": 115.43, "learning_rate": 8.140350877192981e-05, "loss": 0.0011, "step": 7734 }, { "epoch": 115.45, "learning_rate": 8.136842105263157e-05, "loss": 0.006, "step": 7735 }, { "epoch": 115.46, "learning_rate": 8.133333333333334e-05, "loss": 0.0004, "step": 7736 }, { "epoch": 115.48, "learning_rate": 8.129824561403508e-05, "loss": 0.0711, "step": 7737 }, { "epoch": 115.49, "learning_rate": 8.126315789473685e-05, "loss": 0.0136, "step": 7738 }, { "epoch": 115.51, "learning_rate": 8.122807017543858e-05, "loss": 0.0005, "step": 7739 }, { "epoch": 115.52, "learning_rate": 8.119298245614034e-05, "loss": 0.0004, "step": 7740 }, { "epoch": 115.54, "learning_rate": 8.11578947368421e-05, "loss": 0.0004, "step": 7741 }, { "epoch": 115.55, "learning_rate": 8.112280701754386e-05, "loss": 0.0003, "step": 7742 }, { "epoch": 115.57, "learning_rate": 8.10877192982456e-05, "loss": 0.0004, "step": 7743 }, { "epoch": 115.58, "learning_rate": 8.105263157894737e-05, "loss": 0.0028, "step": 7744 }, { "epoch": 115.59, "learning_rate": 8.10175438596491e-05, "loss": 0.0022, "step": 7745 }, { "epoch": 115.61, "learning_rate": 8.098245614035086e-05, "loss": 0.0003, "step": 7746 }, { "epoch": 115.62, "learning_rate": 8.094736842105261e-05, "loss": 0.0004, "step": 7747 }, { "epoch": 115.64, "learning_rate": 8.091228070175438e-05, "loss": 0.0004, "step": 7748 }, { "epoch": 115.65, "learning_rate": 8.087719298245614e-05, "loss": 0.0003, "step": 7749 }, { "epoch": 115.67, "learning_rate": 8.084210526315789e-05, "loss": 0.0003, "step": 7750 }, { "epoch": 115.68, "learning_rate": 8.080701754385965e-05, "loss": 0.0003, "step": 7751 }, { "epoch": 115.7, "learning_rate": 8.07719298245614e-05, "loss": 0.0003, "step": 7752 }, { "epoch": 115.71, "learning_rate": 8.073684210526316e-05, "loss": 0.0003, "step": 7753 }, { "epoch": 115.73, "learning_rate": 8.07017543859649e-05, "loss": 0.0003, "step": 7754 }, { "epoch": 115.74, "learning_rate": 8.066666666666666e-05, "loss": 0.0003, "step": 7755 }, { "epoch": 115.76, "learning_rate": 8.06315789473684e-05, "loss": 0.0003, "step": 7756 }, { "epoch": 115.77, "learning_rate": 8.059649122807017e-05, "loss": 0.0017, "step": 7757 }, { "epoch": 115.79, "learning_rate": 8.056140350877192e-05, "loss": 0.001, "step": 7758 }, { "epoch": 115.8, "learning_rate": 8.052631578947368e-05, "loss": 0.0004, "step": 7759 }, { "epoch": 115.82, "learning_rate": 8.049122807017544e-05, "loss": 0.0004, "step": 7760 }, { "epoch": 115.83, "learning_rate": 8.045614035087719e-05, "loss": 0.0263, "step": 7761 }, { "epoch": 115.85, "learning_rate": 8.042105263157895e-05, "loss": 0.0004, "step": 7762 }, { "epoch": 115.86, "learning_rate": 8.038596491228069e-05, "loss": 0.0003, "step": 7763 }, { "epoch": 115.88, "learning_rate": 8.035087719298245e-05, "loss": 0.0004, "step": 7764 }, { "epoch": 115.89, "learning_rate": 8.03157894736842e-05, "loss": 0.0003, "step": 7765 }, { "epoch": 115.91, "learning_rate": 8.028070175438596e-05, "loss": 0.0015, "step": 7766 }, { "epoch": 115.92, "learning_rate": 8.024561403508771e-05, "loss": 0.0004, "step": 7767 }, { "epoch": 115.94, "learning_rate": 8.021052631578947e-05, "loss": 0.0003, "step": 7768 }, { "epoch": 115.95, "learning_rate": 8.017543859649121e-05, "loss": 0.0005, "step": 7769 }, { "epoch": 115.97, "learning_rate": 8.014035087719297e-05, "loss": 0.0004, "step": 7770 }, { "epoch": 115.98, "learning_rate": 8.010526315789472e-05, "loss": 0.0026, "step": 7771 }, { "epoch": 116.0, "learning_rate": 8.007017543859648e-05, "loss": 0.0012, "step": 7772 }, { "epoch": 116.01, "learning_rate": 8.003508771929824e-05, "loss": 0.0028, "step": 7773 }, { "epoch": 116.03, "learning_rate": 7.999999999999999e-05, "loss": 0.0005, "step": 7774 }, { "epoch": 116.04, "learning_rate": 7.996491228070176e-05, "loss": 0.0403, "step": 7775 }, { "epoch": 116.06, "learning_rate": 7.99298245614035e-05, "loss": 0.0003, "step": 7776 }, { "epoch": 116.07, "learning_rate": 7.989473684210527e-05, "loss": 0.0007, "step": 7777 }, { "epoch": 116.09, "learning_rate": 7.9859649122807e-05, "loss": 0.0003, "step": 7778 }, { "epoch": 116.1, "learning_rate": 7.982456140350876e-05, "loss": 0.0004, "step": 7779 }, { "epoch": 116.12, "learning_rate": 7.978947368421051e-05, "loss": 0.0011, "step": 7780 }, { "epoch": 116.13, "learning_rate": 7.975438596491228e-05, "loss": 0.0004, "step": 7781 }, { "epoch": 116.15, "learning_rate": 7.971929824561402e-05, "loss": 0.0003, "step": 7782 }, { "epoch": 116.16, "learning_rate": 7.968421052631579e-05, "loss": 0.0004, "step": 7783 }, { "epoch": 116.18, "learning_rate": 7.964912280701753e-05, "loss": 0.0037, "step": 7784 }, { "epoch": 116.19, "learning_rate": 7.96140350877193e-05, "loss": 0.0003, "step": 7785 }, { "epoch": 116.21, "learning_rate": 7.957894736842106e-05, "loss": 0.0003, "step": 7786 }, { "epoch": 116.22, "learning_rate": 7.95438596491228e-05, "loss": 0.0406, "step": 7787 }, { "epoch": 116.24, "learning_rate": 7.950877192982456e-05, "loss": 0.0003, "step": 7788 }, { "epoch": 116.25, "learning_rate": 7.94736842105263e-05, "loss": 0.0003, "step": 7789 }, { "epoch": 116.27, "learning_rate": 7.943859649122807e-05, "loss": 0.0103, "step": 7790 }, { "epoch": 116.28, "learning_rate": 7.940350877192982e-05, "loss": 0.0003, "step": 7791 }, { "epoch": 116.3, "learning_rate": 7.936842105263158e-05, "loss": 0.0003, "step": 7792 }, { "epoch": 116.31, "learning_rate": 7.933333333333331e-05, "loss": 0.0003, "step": 7793 }, { "epoch": 116.33, "learning_rate": 7.929824561403508e-05, "loss": 0.0004, "step": 7794 }, { "epoch": 116.34, "learning_rate": 7.926315789473683e-05, "loss": 0.0003, "step": 7795 }, { "epoch": 116.36, "learning_rate": 7.922807017543859e-05, "loss": 0.0003, "step": 7796 }, { "epoch": 116.37, "learning_rate": 7.919298245614034e-05, "loss": 0.0003, "step": 7797 }, { "epoch": 116.39, "learning_rate": 7.91578947368421e-05, "loss": 0.0027, "step": 7798 }, { "epoch": 116.4, "learning_rate": 7.912280701754386e-05, "loss": 0.0003, "step": 7799 }, { "epoch": 116.42, "learning_rate": 7.908771929824561e-05, "loss": 0.0025, "step": 7800 }, { "epoch": 116.42, "eval_accuracy": 0.8820362212432697, "eval_f1": 0.8817875307257204, "eval_loss": 0.6464086174964905, "eval_runtime": 346.6255, "eval_samples_per_second": 11.788, "eval_steps_per_second": 0.739, "step": 7800 }, { "epoch": 116.43, "learning_rate": 7.905263157894737e-05, "loss": 0.0002, "step": 7801 }, { "epoch": 116.45, "learning_rate": 7.901754385964911e-05, "loss": 0.0003, "step": 7802 }, { "epoch": 116.46, "learning_rate": 7.898245614035087e-05, "loss": 0.0003, "step": 7803 }, { "epoch": 116.48, "learning_rate": 7.894736842105262e-05, "loss": 0.0003, "step": 7804 }, { "epoch": 116.49, "learning_rate": 7.891228070175438e-05, "loss": 0.0005, "step": 7805 }, { "epoch": 116.51, "learning_rate": 7.887719298245613e-05, "loss": 0.0004, "step": 7806 }, { "epoch": 116.52, "learning_rate": 7.884210526315789e-05, "loss": 0.0003, "step": 7807 }, { "epoch": 116.54, "learning_rate": 7.880701754385964e-05, "loss": 0.0076, "step": 7808 }, { "epoch": 116.55, "learning_rate": 7.87719298245614e-05, "loss": 0.0132, "step": 7809 }, { "epoch": 116.57, "learning_rate": 7.873684210526317e-05, "loss": 0.0007, "step": 7810 }, { "epoch": 116.58, "learning_rate": 7.87017543859649e-05, "loss": 0.0003, "step": 7811 }, { "epoch": 116.59, "learning_rate": 7.866666666666666e-05, "loss": 0.0003, "step": 7812 }, { "epoch": 116.61, "learning_rate": 7.863157894736841e-05, "loss": 0.0003, "step": 7813 }, { "epoch": 116.62, "learning_rate": 7.859649122807017e-05, "loss": 0.0002, "step": 7814 }, { "epoch": 116.64, "learning_rate": 7.856140350877192e-05, "loss": 0.0003, "step": 7815 }, { "epoch": 116.65, "learning_rate": 7.852631578947369e-05, "loss": 0.0263, "step": 7816 }, { "epoch": 116.67, "learning_rate": 7.849122807017542e-05, "loss": 0.0082, "step": 7817 }, { "epoch": 116.68, "learning_rate": 7.845614035087718e-05, "loss": 0.0004, "step": 7818 }, { "epoch": 116.7, "learning_rate": 7.842105263157893e-05, "loss": 0.0004, "step": 7819 }, { "epoch": 116.71, "learning_rate": 7.83859649122807e-05, "loss": 0.0003, "step": 7820 }, { "epoch": 116.73, "learning_rate": 7.835087719298244e-05, "loss": 0.0003, "step": 7821 }, { "epoch": 116.74, "learning_rate": 7.83157894736842e-05, "loss": 0.0003, "step": 7822 }, { "epoch": 116.76, "learning_rate": 7.828070175438597e-05, "loss": 0.0003, "step": 7823 }, { "epoch": 116.77, "learning_rate": 7.824561403508772e-05, "loss": 0.0003, "step": 7824 }, { "epoch": 116.79, "learning_rate": 7.821052631578948e-05, "loss": 0.0002, "step": 7825 }, { "epoch": 116.8, "learning_rate": 7.817543859649121e-05, "loss": 0.0003, "step": 7826 }, { "epoch": 116.82, "learning_rate": 7.814035087719298e-05, "loss": 0.0002, "step": 7827 }, { "epoch": 116.83, "learning_rate": 7.810526315789473e-05, "loss": 0.0003, "step": 7828 }, { "epoch": 116.85, "learning_rate": 7.807017543859649e-05, "loss": 0.001, "step": 7829 }, { "epoch": 116.86, "learning_rate": 7.803508771929824e-05, "loss": 0.0007, "step": 7830 }, { "epoch": 116.88, "learning_rate": 7.8e-05, "loss": 0.0003, "step": 7831 }, { "epoch": 116.89, "learning_rate": 7.796491228070175e-05, "loss": 0.0004, "step": 7832 }, { "epoch": 116.91, "learning_rate": 7.792982456140351e-05, "loss": 0.0002, "step": 7833 }, { "epoch": 116.92, "learning_rate": 7.789473684210524e-05, "loss": 0.0145, "step": 7834 }, { "epoch": 116.94, "learning_rate": 7.785964912280701e-05, "loss": 0.0039, "step": 7835 }, { "epoch": 116.95, "learning_rate": 7.782456140350877e-05, "loss": 0.0012, "step": 7836 }, { "epoch": 116.97, "learning_rate": 7.778947368421052e-05, "loss": 0.0005, "step": 7837 }, { "epoch": 116.98, "learning_rate": 7.775438596491228e-05, "loss": 0.0003, "step": 7838 }, { "epoch": 117.0, "learning_rate": 7.771929824561403e-05, "loss": 0.0109, "step": 7839 }, { "epoch": 117.01, "learning_rate": 7.768421052631579e-05, "loss": 0.0004, "step": 7840 }, { "epoch": 117.03, "learning_rate": 7.764912280701753e-05, "loss": 0.0003, "step": 7841 }, { "epoch": 117.04, "learning_rate": 7.761403508771929e-05, "loss": 0.0003, "step": 7842 }, { "epoch": 117.06, "learning_rate": 7.757894736842104e-05, "loss": 0.2597, "step": 7843 }, { "epoch": 117.07, "learning_rate": 7.75438596491228e-05, "loss": 0.0002, "step": 7844 }, { "epoch": 117.09, "learning_rate": 7.750877192982455e-05, "loss": 0.0002, "step": 7845 }, { "epoch": 117.1, "learning_rate": 7.747368421052631e-05, "loss": 0.0003, "step": 7846 }, { "epoch": 117.12, "learning_rate": 7.743859649122806e-05, "loss": 0.0002, "step": 7847 }, { "epoch": 117.13, "learning_rate": 7.740350877192982e-05, "loss": 0.0002, "step": 7848 }, { "epoch": 117.15, "learning_rate": 7.736842105263159e-05, "loss": 0.0002, "step": 7849 }, { "epoch": 117.16, "learning_rate": 7.733333333333332e-05, "loss": 0.0172, "step": 7850 }, { "epoch": 117.18, "learning_rate": 7.729824561403508e-05, "loss": 0.0003, "step": 7851 }, { "epoch": 117.19, "learning_rate": 7.726315789473683e-05, "loss": 0.0009, "step": 7852 }, { "epoch": 117.21, "learning_rate": 7.72280701754386e-05, "loss": 0.0002, "step": 7853 }, { "epoch": 117.22, "learning_rate": 7.719298245614034e-05, "loss": 0.0002, "step": 7854 }, { "epoch": 117.24, "learning_rate": 7.71578947368421e-05, "loss": 0.0003, "step": 7855 }, { "epoch": 117.25, "learning_rate": 7.712280701754385e-05, "loss": 0.0021, "step": 7856 }, { "epoch": 117.27, "learning_rate": 7.708771929824562e-05, "loss": 0.0101, "step": 7857 }, { "epoch": 117.28, "learning_rate": 7.705263157894735e-05, "loss": 0.0003, "step": 7858 }, { "epoch": 117.3, "learning_rate": 7.701754385964911e-05, "loss": 0.1783, "step": 7859 }, { "epoch": 117.31, "learning_rate": 7.698245614035086e-05, "loss": 0.0002, "step": 7860 }, { "epoch": 117.33, "learning_rate": 7.694736842105262e-05, "loss": 0.1317, "step": 7861 }, { "epoch": 117.34, "learning_rate": 7.691228070175439e-05, "loss": 0.0003, "step": 7862 }, { "epoch": 117.36, "learning_rate": 7.687719298245614e-05, "loss": 0.0003, "step": 7863 }, { "epoch": 117.37, "learning_rate": 7.68421052631579e-05, "loss": 0.0002, "step": 7864 }, { "epoch": 117.39, "learning_rate": 7.680701754385963e-05, "loss": 0.0005, "step": 7865 }, { "epoch": 117.4, "learning_rate": 7.67719298245614e-05, "loss": 0.0003, "step": 7866 }, { "epoch": 117.42, "learning_rate": 7.673684210526314e-05, "loss": 0.0003, "step": 7867 }, { "epoch": 117.43, "learning_rate": 7.67017543859649e-05, "loss": 0.0005, "step": 7868 }, { "epoch": 117.45, "learning_rate": 7.666666666666666e-05, "loss": 0.0005, "step": 7869 }, { "epoch": 117.46, "learning_rate": 7.663157894736842e-05, "loss": 0.0002, "step": 7870 }, { "epoch": 117.48, "learning_rate": 7.659649122807017e-05, "loss": 0.0003, "step": 7871 }, { "epoch": 117.49, "learning_rate": 7.656140350877193e-05, "loss": 0.0002, "step": 7872 }, { "epoch": 117.51, "learning_rate": 7.652631578947369e-05, "loss": 0.0003, "step": 7873 }, { "epoch": 117.52, "learning_rate": 7.649122807017543e-05, "loss": 0.012, "step": 7874 }, { "epoch": 117.54, "learning_rate": 7.645614035087719e-05, "loss": 0.0005, "step": 7875 }, { "epoch": 117.55, "learning_rate": 7.642105263157894e-05, "loss": 0.0002, "step": 7876 }, { "epoch": 117.57, "learning_rate": 7.63859649122807e-05, "loss": 0.1101, "step": 7877 }, { "epoch": 117.58, "learning_rate": 7.635087719298245e-05, "loss": 0.048, "step": 7878 }, { "epoch": 117.59, "learning_rate": 7.631578947368421e-05, "loss": 0.0002, "step": 7879 }, { "epoch": 117.61, "learning_rate": 7.628070175438596e-05, "loss": 0.0004, "step": 7880 }, { "epoch": 117.62, "learning_rate": 7.624561403508772e-05, "loss": 0.0004, "step": 7881 }, { "epoch": 117.64, "learning_rate": 7.621052631578946e-05, "loss": 0.0018, "step": 7882 }, { "epoch": 117.65, "learning_rate": 7.617543859649122e-05, "loss": 0.0002, "step": 7883 }, { "epoch": 117.67, "learning_rate": 7.614035087719297e-05, "loss": 0.001, "step": 7884 }, { "epoch": 117.68, "learning_rate": 7.610526315789473e-05, "loss": 0.0003, "step": 7885 }, { "epoch": 117.7, "learning_rate": 7.607017543859649e-05, "loss": 0.0004, "step": 7886 }, { "epoch": 117.71, "learning_rate": 7.603508771929824e-05, "loss": 0.0002, "step": 7887 }, { "epoch": 117.73, "learning_rate": 7.6e-05, "loss": 0.0065, "step": 7888 }, { "epoch": 117.74, "learning_rate": 7.596491228070174e-05, "loss": 0.0003, "step": 7889 }, { "epoch": 117.76, "learning_rate": 7.59298245614035e-05, "loss": 0.0003, "step": 7890 }, { "epoch": 117.77, "learning_rate": 7.589473684210525e-05, "loss": 0.0003, "step": 7891 }, { "epoch": 117.79, "learning_rate": 7.585964912280701e-05, "loss": 0.0004, "step": 7892 }, { "epoch": 117.8, "learning_rate": 7.582456140350876e-05, "loss": 0.0922, "step": 7893 }, { "epoch": 117.82, "learning_rate": 7.578947368421052e-05, "loss": 0.0002, "step": 7894 }, { "epoch": 117.83, "learning_rate": 7.575438596491227e-05, "loss": 0.0011, "step": 7895 }, { "epoch": 117.85, "learning_rate": 7.571929824561404e-05, "loss": 0.0003, "step": 7896 }, { "epoch": 117.86, "learning_rate": 7.568421052631577e-05, "loss": 0.0011, "step": 7897 }, { "epoch": 117.88, "learning_rate": 7.564912280701753e-05, "loss": 0.0003, "step": 7898 }, { "epoch": 117.89, "learning_rate": 7.56140350877193e-05, "loss": 0.0002, "step": 7899 }, { "epoch": 117.91, "learning_rate": 7.557894736842104e-05, "loss": 0.1365, "step": 7900 }, { "epoch": 117.92, "learning_rate": 7.55438596491228e-05, "loss": 0.0007, "step": 7901 }, { "epoch": 117.94, "learning_rate": 7.550877192982455e-05, "loss": 0.0003, "step": 7902 }, { "epoch": 117.95, "learning_rate": 7.547368421052632e-05, "loss": 0.0003, "step": 7903 }, { "epoch": 117.97, "learning_rate": 7.543859649122807e-05, "loss": 0.0002, "step": 7904 }, { "epoch": 117.98, "learning_rate": 7.540350877192981e-05, "loss": 0.0002, "step": 7905 }, { "epoch": 118.0, "learning_rate": 7.536842105263156e-05, "loss": 0.0023, "step": 7906 }, { "epoch": 118.01, "learning_rate": 7.533333333333333e-05, "loss": 0.0019, "step": 7907 }, { "epoch": 118.03, "learning_rate": 7.529824561403507e-05, "loss": 0.0003, "step": 7908 }, { "epoch": 118.04, "learning_rate": 7.526315789473684e-05, "loss": 0.0003, "step": 7909 }, { "epoch": 118.06, "learning_rate": 7.522807017543859e-05, "loss": 0.0929, "step": 7910 }, { "epoch": 118.07, "learning_rate": 7.519298245614035e-05, "loss": 0.0003, "step": 7911 }, { "epoch": 118.09, "learning_rate": 7.515789473684211e-05, "loss": 0.0003, "step": 7912 }, { "epoch": 118.1, "learning_rate": 7.512280701754385e-05, "loss": 0.0003, "step": 7913 }, { "epoch": 118.12, "learning_rate": 7.508771929824561e-05, "loss": 0.0003, "step": 7914 }, { "epoch": 118.13, "learning_rate": 7.505263157894736e-05, "loss": 0.0005, "step": 7915 }, { "epoch": 118.15, "learning_rate": 7.501754385964912e-05, "loss": 0.0003, "step": 7916 }, { "epoch": 118.16, "learning_rate": 7.498245614035087e-05, "loss": 0.0002, "step": 7917 }, { "epoch": 118.18, "learning_rate": 7.494736842105263e-05, "loss": 0.0004, "step": 7918 }, { "epoch": 118.19, "learning_rate": 7.491228070175438e-05, "loss": 0.0237, "step": 7919 }, { "epoch": 118.21, "learning_rate": 7.487719298245614e-05, "loss": 0.0017, "step": 7920 }, { "epoch": 118.22, "learning_rate": 7.484210526315789e-05, "loss": 0.0004, "step": 7921 }, { "epoch": 118.24, "learning_rate": 7.480701754385964e-05, "loss": 0.0038, "step": 7922 }, { "epoch": 118.25, "learning_rate": 7.47719298245614e-05, "loss": 0.0003, "step": 7923 }, { "epoch": 118.27, "learning_rate": 7.473684210526315e-05, "loss": 0.2341, "step": 7924 }, { "epoch": 118.28, "learning_rate": 7.47017543859649e-05, "loss": 0.0003, "step": 7925 }, { "epoch": 118.3, "learning_rate": 7.466666666666666e-05, "loss": 0.0003, "step": 7926 }, { "epoch": 118.31, "learning_rate": 7.463157894736841e-05, "loss": 0.0008, "step": 7927 }, { "epoch": 118.33, "learning_rate": 7.459649122807017e-05, "loss": 0.0004, "step": 7928 }, { "epoch": 118.34, "learning_rate": 7.456140350877192e-05, "loss": 0.0003, "step": 7929 }, { "epoch": 118.36, "learning_rate": 7.452631578947368e-05, "loss": 0.0003, "step": 7930 }, { "epoch": 118.37, "learning_rate": 7.449122807017543e-05, "loss": 0.0003, "step": 7931 }, { "epoch": 118.39, "learning_rate": 7.44561403508772e-05, "loss": 0.0002, "step": 7932 }, { "epoch": 118.4, "learning_rate": 7.442105263157894e-05, "loss": 0.0003, "step": 7933 }, { "epoch": 118.42, "learning_rate": 7.438596491228069e-05, "loss": 0.0002, "step": 7934 }, { "epoch": 118.43, "learning_rate": 7.435087719298245e-05, "loss": 0.0367, "step": 7935 }, { "epoch": 118.45, "learning_rate": 7.43157894736842e-05, "loss": 0.0005, "step": 7936 }, { "epoch": 118.46, "learning_rate": 7.428070175438595e-05, "loss": 0.0003, "step": 7937 }, { "epoch": 118.48, "learning_rate": 7.424561403508771e-05, "loss": 0.0002, "step": 7938 }, { "epoch": 118.49, "learning_rate": 7.421052631578946e-05, "loss": 0.0002, "step": 7939 }, { "epoch": 118.51, "learning_rate": 7.417543859649121e-05, "loss": 0.0004, "step": 7940 }, { "epoch": 118.52, "learning_rate": 7.414035087719297e-05, "loss": 0.001, "step": 7941 }, { "epoch": 118.54, "learning_rate": 7.410526315789474e-05, "loss": 0.0004, "step": 7942 }, { "epoch": 118.55, "learning_rate": 7.407017543859649e-05, "loss": 0.0642, "step": 7943 }, { "epoch": 118.57, "learning_rate": 7.403508771929825e-05, "loss": 0.0002, "step": 7944 }, { "epoch": 118.58, "learning_rate": 7.4e-05, "loss": 0.0374, "step": 7945 }, { "epoch": 118.59, "learning_rate": 7.396491228070175e-05, "loss": 0.0003, "step": 7946 }, { "epoch": 118.61, "learning_rate": 7.392982456140351e-05, "loss": 0.0002, "step": 7947 }, { "epoch": 118.62, "learning_rate": 7.389473684210526e-05, "loss": 0.0003, "step": 7948 }, { "epoch": 118.64, "learning_rate": 7.3859649122807e-05, "loss": 0.0004, "step": 7949 }, { "epoch": 118.65, "learning_rate": 7.382456140350877e-05, "loss": 0.0002, "step": 7950 }, { "epoch": 118.67, "learning_rate": 7.378947368421052e-05, "loss": 0.2289, "step": 7951 }, { "epoch": 118.68, "learning_rate": 7.375438596491226e-05, "loss": 0.0002, "step": 7952 }, { "epoch": 118.7, "learning_rate": 7.371929824561403e-05, "loss": 0.0003, "step": 7953 }, { "epoch": 118.71, "learning_rate": 7.368421052631578e-05, "loss": 0.0003, "step": 7954 }, { "epoch": 118.73, "learning_rate": 7.364912280701754e-05, "loss": 0.0003, "step": 7955 }, { "epoch": 118.74, "learning_rate": 7.36140350877193e-05, "loss": 0.0003, "step": 7956 }, { "epoch": 118.76, "learning_rate": 7.357894736842105e-05, "loss": 0.0021, "step": 7957 }, { "epoch": 118.77, "learning_rate": 7.35438596491228e-05, "loss": 0.0127, "step": 7958 }, { "epoch": 118.79, "learning_rate": 7.350877192982456e-05, "loss": 0.0003, "step": 7959 }, { "epoch": 118.8, "learning_rate": 7.347368421052631e-05, "loss": 0.1613, "step": 7960 }, { "epoch": 118.82, "learning_rate": 7.343859649122806e-05, "loss": 0.0002, "step": 7961 }, { "epoch": 118.83, "learning_rate": 7.340350877192982e-05, "loss": 0.0003, "step": 7962 }, { "epoch": 118.85, "learning_rate": 7.336842105263157e-05, "loss": 0.0003, "step": 7963 }, { "epoch": 118.86, "learning_rate": 7.333333333333332e-05, "loss": 0.1024, "step": 7964 }, { "epoch": 118.88, "learning_rate": 7.329824561403508e-05, "loss": 0.0002, "step": 7965 }, { "epoch": 118.89, "learning_rate": 7.326315789473683e-05, "loss": 0.0003, "step": 7966 }, { "epoch": 118.91, "learning_rate": 7.322807017543859e-05, "loss": 0.0904, "step": 7967 }, { "epoch": 118.92, "learning_rate": 7.319298245614035e-05, "loss": 0.0003, "step": 7968 }, { "epoch": 118.94, "learning_rate": 7.31578947368421e-05, "loss": 0.0006, "step": 7969 }, { "epoch": 118.95, "learning_rate": 7.312280701754385e-05, "loss": 0.0004, "step": 7970 }, { "epoch": 118.97, "learning_rate": 7.308771929824561e-05, "loss": 0.0002, "step": 7971 }, { "epoch": 118.98, "learning_rate": 7.305263157894736e-05, "loss": 0.0002, "step": 7972 }, { "epoch": 119.0, "learning_rate": 7.301754385964911e-05, "loss": 0.057, "step": 7973 }, { "epoch": 119.01, "learning_rate": 7.298245614035087e-05, "loss": 0.0006, "step": 7974 }, { "epoch": 119.03, "learning_rate": 7.294736842105262e-05, "loss": 0.0041, "step": 7975 }, { "epoch": 119.04, "learning_rate": 7.291228070175437e-05, "loss": 0.0006, "step": 7976 }, { "epoch": 119.06, "learning_rate": 7.287719298245613e-05, "loss": 0.0002, "step": 7977 }, { "epoch": 119.07, "learning_rate": 7.284210526315788e-05, "loss": 0.2424, "step": 7978 }, { "epoch": 119.09, "learning_rate": 7.280701754385964e-05, "loss": 0.0006, "step": 7979 }, { "epoch": 119.1, "learning_rate": 7.27719298245614e-05, "loss": 0.1771, "step": 7980 }, { "epoch": 119.12, "learning_rate": 7.273684210526316e-05, "loss": 0.0004, "step": 7981 }, { "epoch": 119.13, "learning_rate": 7.27017543859649e-05, "loss": 0.0022, "step": 7982 }, { "epoch": 119.15, "learning_rate": 7.266666666666667e-05, "loss": 0.0002, "step": 7983 }, { "epoch": 119.16, "learning_rate": 7.263157894736842e-05, "loss": 0.0002, "step": 7984 }, { "epoch": 119.18, "learning_rate": 7.259649122807016e-05, "loss": 0.0003, "step": 7985 }, { "epoch": 119.19, "learning_rate": 7.256140350877193e-05, "loss": 0.0003, "step": 7986 }, { "epoch": 119.21, "learning_rate": 7.252631578947368e-05, "loss": 0.0191, "step": 7987 }, { "epoch": 119.22, "learning_rate": 7.249122807017542e-05, "loss": 0.0004, "step": 7988 }, { "epoch": 119.24, "learning_rate": 7.245614035087719e-05, "loss": 0.0003, "step": 7989 }, { "epoch": 119.25, "learning_rate": 7.242105263157894e-05, "loss": 0.0003, "step": 7990 }, { "epoch": 119.27, "learning_rate": 7.23859649122807e-05, "loss": 0.0003, "step": 7991 }, { "epoch": 119.28, "learning_rate": 7.235087719298246e-05, "loss": 0.0003, "step": 7992 }, { "epoch": 119.3, "learning_rate": 7.231578947368421e-05, "loss": 0.0003, "step": 7993 }, { "epoch": 119.31, "learning_rate": 7.228070175438596e-05, "loss": 0.0003, "step": 7994 }, { "epoch": 119.33, "learning_rate": 7.224561403508772e-05, "loss": 0.0002, "step": 7995 }, { "epoch": 119.34, "learning_rate": 7.221052631578947e-05, "loss": 0.0003, "step": 7996 }, { "epoch": 119.36, "learning_rate": 7.217543859649122e-05, "loss": 0.0003, "step": 7997 }, { "epoch": 119.37, "learning_rate": 7.214035087719298e-05, "loss": 0.0007, "step": 7998 }, { "epoch": 119.39, "learning_rate": 7.210526315789473e-05, "loss": 0.0004, "step": 7999 }, { "epoch": 119.4, "learning_rate": 7.207017543859648e-05, "loss": 0.0003, "step": 8000 }, { "epoch": 119.4, "eval_accuracy": 0.8712677435144396, "eval_f1": 0.8705504000781962, "eval_loss": 0.698488175868988, "eval_runtime": 343.8989, "eval_samples_per_second": 11.881, "eval_steps_per_second": 0.744, "step": 8000 }, { "epoch": 119.42, "learning_rate": 7.203508771929824e-05, "loss": 0.0003, "step": 8001 }, { "epoch": 119.43, "learning_rate": 7.199999999999999e-05, "loss": 0.0019, "step": 8002 }, { "epoch": 119.45, "learning_rate": 7.196491228070175e-05, "loss": 0.0002, "step": 8003 }, { "epoch": 119.46, "learning_rate": 7.19298245614035e-05, "loss": 0.0014, "step": 8004 }, { "epoch": 119.48, "learning_rate": 7.189473684210526e-05, "loss": 0.0002, "step": 8005 }, { "epoch": 119.49, "learning_rate": 7.185964912280701e-05, "loss": 0.0025, "step": 8006 }, { "epoch": 119.51, "learning_rate": 7.182456140350877e-05, "loss": 0.0003, "step": 8007 }, { "epoch": 119.52, "learning_rate": 7.178947368421052e-05, "loss": 0.1452, "step": 8008 }, { "epoch": 119.54, "learning_rate": 7.175438596491227e-05, "loss": 0.0008, "step": 8009 }, { "epoch": 119.55, "learning_rate": 7.171929824561403e-05, "loss": 0.0003, "step": 8010 }, { "epoch": 119.57, "learning_rate": 7.168421052631578e-05, "loss": 0.0041, "step": 8011 }, { "epoch": 119.58, "learning_rate": 7.164912280701753e-05, "loss": 0.0018, "step": 8012 }, { "epoch": 119.59, "learning_rate": 7.161403508771929e-05, "loss": 0.0003, "step": 8013 }, { "epoch": 119.61, "learning_rate": 7.157894736842104e-05, "loss": 0.0003, "step": 8014 }, { "epoch": 119.62, "learning_rate": 7.15438596491228e-05, "loss": 0.107, "step": 8015 }, { "epoch": 119.64, "learning_rate": 7.150877192982455e-05, "loss": 0.0005, "step": 8016 }, { "epoch": 119.65, "learning_rate": 7.147368421052631e-05, "loss": 0.0003, "step": 8017 }, { "epoch": 119.67, "learning_rate": 7.143859649122806e-05, "loss": 0.0003, "step": 8018 }, { "epoch": 119.68, "learning_rate": 7.140350877192983e-05, "loss": 0.0003, "step": 8019 }, { "epoch": 119.7, "learning_rate": 7.136842105263157e-05, "loss": 0.0003, "step": 8020 }, { "epoch": 119.71, "learning_rate": 7.133333333333332e-05, "loss": 0.0006, "step": 8021 }, { "epoch": 119.73, "learning_rate": 7.129824561403509e-05, "loss": 0.0002, "step": 8022 }, { "epoch": 119.74, "learning_rate": 7.126315789473683e-05, "loss": 0.0003, "step": 8023 }, { "epoch": 119.76, "learning_rate": 7.122807017543858e-05, "loss": 0.0003, "step": 8024 }, { "epoch": 119.77, "learning_rate": 7.119298245614035e-05, "loss": 0.0004, "step": 8025 }, { "epoch": 119.79, "learning_rate": 7.11578947368421e-05, "loss": 0.0003, "step": 8026 }, { "epoch": 119.8, "learning_rate": 7.112280701754386e-05, "loss": 0.0003, "step": 8027 }, { "epoch": 119.82, "learning_rate": 7.10877192982456e-05, "loss": 0.0029, "step": 8028 }, { "epoch": 119.83, "learning_rate": 7.105263157894735e-05, "loss": 0.0003, "step": 8029 }, { "epoch": 119.85, "learning_rate": 7.101754385964912e-05, "loss": 0.0003, "step": 8030 }, { "epoch": 119.86, "learning_rate": 7.098245614035088e-05, "loss": 0.0008, "step": 8031 }, { "epoch": 119.88, "learning_rate": 7.094736842105263e-05, "loss": 0.0009, "step": 8032 }, { "epoch": 119.89, "learning_rate": 7.091228070175438e-05, "loss": 0.0009, "step": 8033 }, { "epoch": 119.91, "learning_rate": 7.087719298245614e-05, "loss": 0.0003, "step": 8034 }, { "epoch": 119.92, "learning_rate": 7.084210526315789e-05, "loss": 0.0002, "step": 8035 }, { "epoch": 119.94, "learning_rate": 7.080701754385964e-05, "loss": 0.0003, "step": 8036 }, { "epoch": 119.95, "learning_rate": 7.07719298245614e-05, "loss": 0.0011, "step": 8037 }, { "epoch": 119.97, "learning_rate": 7.073684210526315e-05, "loss": 0.0002, "step": 8038 }, { "epoch": 119.98, "learning_rate": 7.070175438596491e-05, "loss": 0.0066, "step": 8039 }, { "epoch": 120.0, "learning_rate": 7.066666666666666e-05, "loss": 0.0071, "step": 8040 }, { "epoch": 120.01, "learning_rate": 7.063157894736841e-05, "loss": 0.0009, "step": 8041 }, { "epoch": 120.03, "learning_rate": 7.059649122807017e-05, "loss": 0.0003, "step": 8042 }, { "epoch": 120.04, "learning_rate": 7.056140350877193e-05, "loss": 0.0008, "step": 8043 }, { "epoch": 120.06, "learning_rate": 7.052631578947368e-05, "loss": 0.0004, "step": 8044 }, { "epoch": 120.07, "learning_rate": 7.049122807017543e-05, "loss": 0.0003, "step": 8045 }, { "epoch": 120.09, "learning_rate": 7.045614035087719e-05, "loss": 0.001, "step": 8046 }, { "epoch": 120.1, "learning_rate": 7.042105263157894e-05, "loss": 0.0188, "step": 8047 }, { "epoch": 120.12, "learning_rate": 7.038596491228069e-05, "loss": 0.0004, "step": 8048 }, { "epoch": 120.13, "learning_rate": 7.035087719298245e-05, "loss": 0.0003, "step": 8049 }, { "epoch": 120.15, "learning_rate": 7.03157894736842e-05, "loss": 0.0002, "step": 8050 }, { "epoch": 120.16, "learning_rate": 7.028070175438596e-05, "loss": 0.0002, "step": 8051 }, { "epoch": 120.18, "learning_rate": 7.024561403508771e-05, "loss": 0.0003, "step": 8052 }, { "epoch": 120.19, "learning_rate": 7.021052631578946e-05, "loss": 0.0003, "step": 8053 }, { "epoch": 120.21, "learning_rate": 7.017543859649122e-05, "loss": 0.0003, "step": 8054 }, { "epoch": 120.22, "learning_rate": 7.014035087719299e-05, "loss": 0.0003, "step": 8055 }, { "epoch": 120.24, "learning_rate": 7.010526315789473e-05, "loss": 0.2835, "step": 8056 }, { "epoch": 120.25, "learning_rate": 7.007017543859648e-05, "loss": 0.0002, "step": 8057 }, { "epoch": 120.27, "learning_rate": 7.003508771929825e-05, "loss": 0.0003, "step": 8058 }, { "epoch": 120.28, "learning_rate": 7e-05, "loss": 0.0004, "step": 8059 }, { "epoch": 120.3, "learning_rate": 6.996491228070174e-05, "loss": 0.0002, "step": 8060 }, { "epoch": 120.31, "learning_rate": 6.99298245614035e-05, "loss": 0.0003, "step": 8061 }, { "epoch": 120.33, "learning_rate": 6.989473684210525e-05, "loss": 0.0003, "step": 8062 }, { "epoch": 120.34, "learning_rate": 6.985964912280702e-05, "loss": 0.0009, "step": 8063 }, { "epoch": 120.36, "learning_rate": 6.982456140350876e-05, "loss": 0.0044, "step": 8064 }, { "epoch": 120.37, "learning_rate": 6.978947368421051e-05, "loss": 0.0004, "step": 8065 }, { "epoch": 120.39, "learning_rate": 6.975438596491228e-05, "loss": 0.0003, "step": 8066 }, { "epoch": 120.4, "learning_rate": 6.971929824561404e-05, "loss": 0.0003, "step": 8067 }, { "epoch": 120.42, "learning_rate": 6.968421052631579e-05, "loss": 0.0003, "step": 8068 }, { "epoch": 120.43, "learning_rate": 6.964912280701754e-05, "loss": 0.0015, "step": 8069 }, { "epoch": 120.45, "learning_rate": 6.96140350877193e-05, "loss": 0.0004, "step": 8070 }, { "epoch": 120.46, "learning_rate": 6.957894736842105e-05, "loss": 0.0004, "step": 8071 }, { "epoch": 120.48, "learning_rate": 6.95438596491228e-05, "loss": 0.0004, "step": 8072 }, { "epoch": 120.49, "learning_rate": 6.950877192982456e-05, "loss": 0.0011, "step": 8073 }, { "epoch": 120.51, "learning_rate": 6.947368421052631e-05, "loss": 0.0021, "step": 8074 }, { "epoch": 120.52, "learning_rate": 6.943859649122807e-05, "loss": 0.0526, "step": 8075 }, { "epoch": 120.54, "learning_rate": 6.940350877192982e-05, "loss": 0.006, "step": 8076 }, { "epoch": 120.55, "learning_rate": 6.936842105263157e-05, "loss": 0.0005, "step": 8077 }, { "epoch": 120.57, "learning_rate": 6.933333333333333e-05, "loss": 0.0004, "step": 8078 }, { "epoch": 120.58, "learning_rate": 6.929824561403508e-05, "loss": 0.0048, "step": 8079 }, { "epoch": 120.59, "learning_rate": 6.926315789473684e-05, "loss": 0.0005, "step": 8080 }, { "epoch": 120.61, "learning_rate": 6.922807017543859e-05, "loss": 0.0004, "step": 8081 }, { "epoch": 120.62, "learning_rate": 6.919298245614035e-05, "loss": 0.0125, "step": 8082 }, { "epoch": 120.64, "learning_rate": 6.91578947368421e-05, "loss": 0.0353, "step": 8083 }, { "epoch": 120.65, "learning_rate": 6.912280701754385e-05, "loss": 0.015, "step": 8084 }, { "epoch": 120.67, "learning_rate": 6.908771929824561e-05, "loss": 0.0004, "step": 8085 }, { "epoch": 120.68, "learning_rate": 6.905263157894736e-05, "loss": 0.0691, "step": 8086 }, { "epoch": 120.7, "learning_rate": 6.901754385964912e-05, "loss": 0.0005, "step": 8087 }, { "epoch": 120.71, "learning_rate": 6.898245614035087e-05, "loss": 0.0014, "step": 8088 }, { "epoch": 120.73, "learning_rate": 6.894736842105262e-05, "loss": 0.3338, "step": 8089 }, { "epoch": 120.74, "learning_rate": 6.891228070175438e-05, "loss": 0.0003, "step": 8090 }, { "epoch": 120.76, "learning_rate": 6.887719298245613e-05, "loss": 0.0003, "step": 8091 }, { "epoch": 120.77, "learning_rate": 6.884210526315788e-05, "loss": 0.0003, "step": 8092 }, { "epoch": 120.79, "learning_rate": 6.880701754385964e-05, "loss": 0.0298, "step": 8093 }, { "epoch": 120.8, "learning_rate": 6.87719298245614e-05, "loss": 0.001, "step": 8094 }, { "epoch": 120.82, "learning_rate": 6.873684210526315e-05, "loss": 0.0004, "step": 8095 }, { "epoch": 120.83, "learning_rate": 6.87017543859649e-05, "loss": 0.0004, "step": 8096 }, { "epoch": 120.85, "learning_rate": 6.866666666666666e-05, "loss": 0.0509, "step": 8097 }, { "epoch": 120.86, "learning_rate": 6.863157894736841e-05, "loss": 0.1401, "step": 8098 }, { "epoch": 120.88, "learning_rate": 6.859649122807018e-05, "loss": 0.0007, "step": 8099 }, { "epoch": 120.89, "learning_rate": 6.856140350877192e-05, "loss": 0.0005, "step": 8100 }, { "epoch": 120.91, "learning_rate": 6.852631578947367e-05, "loss": 0.0005, "step": 8101 }, { "epoch": 120.92, "learning_rate": 6.849122807017544e-05, "loss": 0.001, "step": 8102 }, { "epoch": 120.94, "learning_rate": 6.845614035087718e-05, "loss": 0.0003, "step": 8103 }, { "epoch": 120.95, "learning_rate": 6.842105263157893e-05, "loss": 0.174, "step": 8104 }, { "epoch": 120.97, "learning_rate": 6.83859649122807e-05, "loss": 0.001, "step": 8105 }, { "epoch": 120.98, "learning_rate": 6.835087719298246e-05, "loss": 0.0006, "step": 8106 }, { "epoch": 121.0, "learning_rate": 6.83157894736842e-05, "loss": 0.0003, "step": 8107 }, { "epoch": 121.01, "learning_rate": 6.828070175438596e-05, "loss": 0.0004, "step": 8108 }, { "epoch": 121.03, "learning_rate": 6.824561403508772e-05, "loss": 0.0005, "step": 8109 }, { "epoch": 121.04, "learning_rate": 6.821052631578947e-05, "loss": 0.0006, "step": 8110 }, { "epoch": 121.06, "learning_rate": 6.817543859649123e-05, "loss": 0.0008, "step": 8111 }, { "epoch": 121.07, "learning_rate": 6.814035087719298e-05, "loss": 0.0035, "step": 8112 }, { "epoch": 121.09, "learning_rate": 6.810526315789473e-05, "loss": 0.0006, "step": 8113 }, { "epoch": 121.1, "learning_rate": 6.807017543859649e-05, "loss": 0.0008, "step": 8114 }, { "epoch": 121.12, "learning_rate": 6.803508771929824e-05, "loss": 0.001, "step": 8115 }, { "epoch": 121.13, "learning_rate": 6.799999999999999e-05, "loss": 0.0007, "step": 8116 }, { "epoch": 121.15, "learning_rate": 6.796491228070175e-05, "loss": 0.0006, "step": 8117 }, { "epoch": 121.16, "learning_rate": 6.792982456140351e-05, "loss": 0.0007, "step": 8118 }, { "epoch": 121.18, "learning_rate": 6.789473684210526e-05, "loss": 0.0272, "step": 8119 }, { "epoch": 121.19, "learning_rate": 6.785964912280701e-05, "loss": 0.0264, "step": 8120 }, { "epoch": 121.21, "learning_rate": 6.782456140350877e-05, "loss": 0.0007, "step": 8121 }, { "epoch": 121.22, "learning_rate": 6.778947368421052e-05, "loss": 0.0007, "step": 8122 }, { "epoch": 121.24, "learning_rate": 6.775438596491228e-05, "loss": 0.0009, "step": 8123 }, { "epoch": 121.25, "learning_rate": 6.771929824561403e-05, "loss": 0.0006, "step": 8124 }, { "epoch": 121.27, "learning_rate": 6.768421052631578e-05, "loss": 0.0005, "step": 8125 }, { "epoch": 121.28, "learning_rate": 6.764912280701754e-05, "loss": 0.0193, "step": 8126 }, { "epoch": 121.3, "learning_rate": 6.761403508771929e-05, "loss": 0.0092, "step": 8127 }, { "epoch": 121.31, "learning_rate": 6.757894736842104e-05, "loss": 0.0099, "step": 8128 }, { "epoch": 121.33, "learning_rate": 6.75438596491228e-05, "loss": 0.0004, "step": 8129 }, { "epoch": 121.34, "learning_rate": 6.750877192982456e-05, "loss": 0.0005, "step": 8130 }, { "epoch": 121.36, "learning_rate": 6.747368421052631e-05, "loss": 0.0005, "step": 8131 }, { "epoch": 121.37, "learning_rate": 6.743859649122806e-05, "loss": 0.0004, "step": 8132 }, { "epoch": 121.39, "learning_rate": 6.740350877192982e-05, "loss": 0.001, "step": 8133 }, { "epoch": 121.4, "learning_rate": 6.736842105263157e-05, "loss": 0.0005, "step": 8134 }, { "epoch": 121.42, "learning_rate": 6.733333333333333e-05, "loss": 0.0004, "step": 8135 }, { "epoch": 121.43, "learning_rate": 6.729824561403508e-05, "loss": 0.0005, "step": 8136 }, { "epoch": 121.45, "learning_rate": 6.726315789473683e-05, "loss": 0.4882, "step": 8137 }, { "epoch": 121.46, "learning_rate": 6.72280701754386e-05, "loss": 0.0033, "step": 8138 }, { "epoch": 121.48, "learning_rate": 6.719298245614034e-05, "loss": 0.0003, "step": 8139 }, { "epoch": 121.49, "learning_rate": 6.715789473684209e-05, "loss": 0.0005, "step": 8140 }, { "epoch": 121.51, "learning_rate": 6.712280701754385e-05, "loss": 0.0008, "step": 8141 }, { "epoch": 121.52, "learning_rate": 6.70877192982456e-05, "loss": 0.0311, "step": 8142 }, { "epoch": 121.54, "learning_rate": 6.705263157894737e-05, "loss": 0.0003, "step": 8143 }, { "epoch": 121.55, "learning_rate": 6.701754385964911e-05, "loss": 0.0004, "step": 8144 }, { "epoch": 121.57, "learning_rate": 6.698245614035088e-05, "loss": 0.0012, "step": 8145 }, { "epoch": 121.58, "learning_rate": 6.694736842105263e-05, "loss": 0.0004, "step": 8146 }, { "epoch": 121.59, "learning_rate": 6.691228070175439e-05, "loss": 0.0003, "step": 8147 }, { "epoch": 121.61, "learning_rate": 6.687719298245614e-05, "loss": 0.0031, "step": 8148 }, { "epoch": 121.62, "learning_rate": 6.684210526315789e-05, "loss": 0.0031, "step": 8149 }, { "epoch": 121.64, "learning_rate": 6.680701754385965e-05, "loss": 0.0003, "step": 8150 }, { "epoch": 121.65, "learning_rate": 6.67719298245614e-05, "loss": 0.0003, "step": 8151 }, { "epoch": 121.67, "learning_rate": 6.673684210526315e-05, "loss": 0.0006, "step": 8152 }, { "epoch": 121.68, "learning_rate": 6.670175438596491e-05, "loss": 0.0005, "step": 8153 }, { "epoch": 121.7, "learning_rate": 6.666666666666666e-05, "loss": 0.0005, "step": 8154 }, { "epoch": 121.71, "learning_rate": 6.663157894736842e-05, "loss": 0.0006, "step": 8155 }, { "epoch": 121.73, "learning_rate": 6.659649122807017e-05, "loss": 0.0005, "step": 8156 }, { "epoch": 121.74, "learning_rate": 6.656140350877193e-05, "loss": 0.0004, "step": 8157 }, { "epoch": 121.76, "learning_rate": 6.652631578947368e-05, "loss": 0.0384, "step": 8158 }, { "epoch": 121.77, "learning_rate": 6.649122807017543e-05, "loss": 0.0011, "step": 8159 }, { "epoch": 121.79, "learning_rate": 6.645614035087719e-05, "loss": 0.0004, "step": 8160 }, { "epoch": 121.8, "learning_rate": 6.642105263157894e-05, "loss": 0.0032, "step": 8161 }, { "epoch": 121.82, "learning_rate": 6.63859649122807e-05, "loss": 0.0018, "step": 8162 }, { "epoch": 121.83, "learning_rate": 6.635087719298245e-05, "loss": 0.0003, "step": 8163 }, { "epoch": 121.85, "learning_rate": 6.63157894736842e-05, "loss": 0.0392, "step": 8164 }, { "epoch": 121.86, "learning_rate": 6.628070175438596e-05, "loss": 0.0005, "step": 8165 }, { "epoch": 121.88, "learning_rate": 6.624561403508771e-05, "loss": 0.0014, "step": 8166 }, { "epoch": 121.89, "learning_rate": 6.621052631578946e-05, "loss": 0.0004, "step": 8167 }, { "epoch": 121.91, "learning_rate": 6.617543859649122e-05, "loss": 0.0008, "step": 8168 }, { "epoch": 121.92, "learning_rate": 6.614035087719298e-05, "loss": 0.0003, "step": 8169 }, { "epoch": 121.94, "learning_rate": 6.610526315789473e-05, "loss": 0.001, "step": 8170 }, { "epoch": 121.95, "learning_rate": 6.607017543859648e-05, "loss": 0.0003, "step": 8171 }, { "epoch": 121.97, "learning_rate": 6.603508771929824e-05, "loss": 0.0003, "step": 8172 }, { "epoch": 121.98, "learning_rate": 6.599999999999999e-05, "loss": 0.0005, "step": 8173 }, { "epoch": 122.0, "learning_rate": 6.596491228070175e-05, "loss": 0.0004, "step": 8174 }, { "epoch": 122.01, "learning_rate": 6.59298245614035e-05, "loss": 0.0211, "step": 8175 }, { "epoch": 122.03, "learning_rate": 6.589473684210525e-05, "loss": 0.0003, "step": 8176 }, { "epoch": 122.04, "learning_rate": 6.585964912280701e-05, "loss": 0.002, "step": 8177 }, { "epoch": 122.06, "learning_rate": 6.582456140350876e-05, "loss": 0.0005, "step": 8178 }, { "epoch": 122.07, "learning_rate": 6.578947368421051e-05, "loss": 0.0003, "step": 8179 }, { "epoch": 122.09, "learning_rate": 6.575438596491227e-05, "loss": 0.0008, "step": 8180 }, { "epoch": 122.1, "learning_rate": 6.571929824561404e-05, "loss": 0.0044, "step": 8181 }, { "epoch": 122.12, "learning_rate": 6.568421052631578e-05, "loss": 0.0005, "step": 8182 }, { "epoch": 122.13, "learning_rate": 6.564912280701753e-05, "loss": 0.0003, "step": 8183 }, { "epoch": 122.15, "learning_rate": 6.56140350877193e-05, "loss": 0.0019, "step": 8184 }, { "epoch": 122.16, "learning_rate": 6.557894736842104e-05, "loss": 0.0017, "step": 8185 }, { "epoch": 122.18, "learning_rate": 6.554385964912281e-05, "loss": 0.0003, "step": 8186 }, { "epoch": 122.19, "learning_rate": 6.550877192982456e-05, "loss": 0.0005, "step": 8187 }, { "epoch": 122.21, "learning_rate": 6.54736842105263e-05, "loss": 0.0005, "step": 8188 }, { "epoch": 122.22, "learning_rate": 6.543859649122807e-05, "loss": 0.0096, "step": 8189 }, { "epoch": 122.24, "learning_rate": 6.540350877192982e-05, "loss": 0.0004, "step": 8190 }, { "epoch": 122.25, "learning_rate": 6.536842105263156e-05, "loss": 0.0006, "step": 8191 }, { "epoch": 122.27, "learning_rate": 6.533333333333333e-05, "loss": 0.0184, "step": 8192 }, { "epoch": 122.28, "learning_rate": 6.529824561403509e-05, "loss": 0.0003, "step": 8193 }, { "epoch": 122.3, "learning_rate": 6.526315789473684e-05, "loss": 0.0005, "step": 8194 }, { "epoch": 122.31, "learning_rate": 6.522807017543859e-05, "loss": 0.0003, "step": 8195 }, { "epoch": 122.33, "learning_rate": 6.519298245614035e-05, "loss": 0.0007, "step": 8196 }, { "epoch": 122.34, "learning_rate": 6.51578947368421e-05, "loss": 0.004, "step": 8197 }, { "epoch": 122.36, "learning_rate": 6.512280701754386e-05, "loss": 0.0003, "step": 8198 }, { "epoch": 122.37, "learning_rate": 6.508771929824561e-05, "loss": 0.0003, "step": 8199 }, { "epoch": 122.39, "learning_rate": 6.505263157894736e-05, "loss": 0.0048, "step": 8200 }, { "epoch": 122.39, "eval_accuracy": 0.8739598629466471, "eval_f1": 0.8764769996728009, "eval_loss": 0.6620244979858398, "eval_runtime": 344.6899, "eval_samples_per_second": 11.854, "eval_steps_per_second": 0.743, "step": 8200 }, { "epoch": 122.4, "learning_rate": 6.501754385964912e-05, "loss": 0.001, "step": 8201 }, { "epoch": 122.42, "learning_rate": 6.498245614035087e-05, "loss": 0.0004, "step": 8202 }, { "epoch": 122.43, "learning_rate": 6.494736842105262e-05, "loss": 0.0022, "step": 8203 }, { "epoch": 122.45, "learning_rate": 6.491228070175438e-05, "loss": 0.0004, "step": 8204 }, { "epoch": 122.46, "learning_rate": 6.487719298245614e-05, "loss": 0.0034, "step": 8205 }, { "epoch": 122.48, "learning_rate": 6.484210526315789e-05, "loss": 0.0013, "step": 8206 }, { "epoch": 122.49, "learning_rate": 6.480701754385964e-05, "loss": 0.0004, "step": 8207 }, { "epoch": 122.51, "learning_rate": 6.47719298245614e-05, "loss": 0.0004, "step": 8208 }, { "epoch": 122.52, "learning_rate": 6.473684210526315e-05, "loss": 0.0003, "step": 8209 }, { "epoch": 122.54, "learning_rate": 6.470175438596491e-05, "loss": 0.0003, "step": 8210 }, { "epoch": 122.55, "learning_rate": 6.466666666666666e-05, "loss": 0.0003, "step": 8211 }, { "epoch": 122.57, "learning_rate": 6.463157894736841e-05, "loss": 0.0003, "step": 8212 }, { "epoch": 122.58, "learning_rate": 6.459649122807017e-05, "loss": 0.1371, "step": 8213 }, { "epoch": 122.59, "learning_rate": 6.456140350877192e-05, "loss": 0.0026, "step": 8214 }, { "epoch": 122.61, "learning_rate": 6.452631578947367e-05, "loss": 0.0022, "step": 8215 }, { "epoch": 122.62, "learning_rate": 6.449122807017543e-05, "loss": 0.031, "step": 8216 }, { "epoch": 122.64, "learning_rate": 6.445614035087718e-05, "loss": 0.0003, "step": 8217 }, { "epoch": 122.65, "learning_rate": 6.442105263157894e-05, "loss": 0.0004, "step": 8218 }, { "epoch": 122.67, "learning_rate": 6.438596491228069e-05, "loss": 0.177, "step": 8219 }, { "epoch": 122.68, "learning_rate": 6.435087719298246e-05, "loss": 0.0004, "step": 8220 }, { "epoch": 122.7, "learning_rate": 6.43157894736842e-05, "loss": 0.0004, "step": 8221 }, { "epoch": 122.71, "learning_rate": 6.428070175438597e-05, "loss": 0.0004, "step": 8222 }, { "epoch": 122.73, "learning_rate": 6.424561403508772e-05, "loss": 0.0004, "step": 8223 }, { "epoch": 122.74, "learning_rate": 6.421052631578946e-05, "loss": 0.0002, "step": 8224 }, { "epoch": 122.76, "learning_rate": 6.417543859649123e-05, "loss": 0.0923, "step": 8225 }, { "epoch": 122.77, "learning_rate": 6.414035087719297e-05, "loss": 0.0009, "step": 8226 }, { "epoch": 122.79, "learning_rate": 6.410526315789472e-05, "loss": 0.0002, "step": 8227 }, { "epoch": 122.8, "learning_rate": 6.407017543859649e-05, "loss": 0.0006, "step": 8228 }, { "epoch": 122.82, "learning_rate": 6.403508771929823e-05, "loss": 0.0004, "step": 8229 }, { "epoch": 122.83, "learning_rate": 6.4e-05, "loss": 0.0003, "step": 8230 }, { "epoch": 122.85, "learning_rate": 6.396491228070175e-05, "loss": 0.0004, "step": 8231 }, { "epoch": 122.86, "learning_rate": 6.392982456140351e-05, "loss": 0.0664, "step": 8232 }, { "epoch": 122.88, "learning_rate": 6.389473684210526e-05, "loss": 0.0004, "step": 8233 }, { "epoch": 122.89, "learning_rate": 6.385964912280702e-05, "loss": 0.0002, "step": 8234 }, { "epoch": 122.91, "learning_rate": 6.382456140350877e-05, "loss": 0.0003, "step": 8235 }, { "epoch": 122.92, "learning_rate": 6.378947368421052e-05, "loss": 0.0003, "step": 8236 }, { "epoch": 122.94, "learning_rate": 6.375438596491228e-05, "loss": 0.0005, "step": 8237 }, { "epoch": 122.95, "learning_rate": 6.371929824561403e-05, "loss": 0.0081, "step": 8238 }, { "epoch": 122.97, "learning_rate": 6.368421052631578e-05, "loss": 0.0003, "step": 8239 }, { "epoch": 122.98, "learning_rate": 6.364912280701754e-05, "loss": 0.0004, "step": 8240 }, { "epoch": 123.0, "learning_rate": 6.361403508771929e-05, "loss": 0.0003, "step": 8241 }, { "epoch": 123.01, "learning_rate": 6.357894736842104e-05, "loss": 0.0003, "step": 8242 }, { "epoch": 123.03, "learning_rate": 6.35438596491228e-05, "loss": 0.0003, "step": 8243 }, { "epoch": 123.04, "learning_rate": 6.350877192982456e-05, "loss": 0.0072, "step": 8244 }, { "epoch": 123.06, "learning_rate": 6.347368421052631e-05, "loss": 0.0005, "step": 8245 }, { "epoch": 123.07, "learning_rate": 6.343859649122807e-05, "loss": 0.0003, "step": 8246 }, { "epoch": 123.09, "learning_rate": 6.340350877192982e-05, "loss": 0.0003, "step": 8247 }, { "epoch": 123.1, "learning_rate": 6.336842105263157e-05, "loss": 0.0015, "step": 8248 }, { "epoch": 123.12, "learning_rate": 6.333333333333333e-05, "loss": 0.0003, "step": 8249 }, { "epoch": 123.13, "learning_rate": 6.329824561403508e-05, "loss": 0.0004, "step": 8250 }, { "epoch": 123.15, "learning_rate": 6.326315789473683e-05, "loss": 0.0003, "step": 8251 }, { "epoch": 123.16, "learning_rate": 6.322807017543859e-05, "loss": 0.1065, "step": 8252 }, { "epoch": 123.18, "learning_rate": 6.319298245614034e-05, "loss": 0.0002, "step": 8253 }, { "epoch": 123.19, "learning_rate": 6.315789473684209e-05, "loss": 0.0003, "step": 8254 }, { "epoch": 123.21, "learning_rate": 6.312280701754385e-05, "loss": 0.0321, "step": 8255 }, { "epoch": 123.22, "learning_rate": 6.308771929824561e-05, "loss": 0.0003, "step": 8256 }, { "epoch": 123.24, "learning_rate": 6.305263157894736e-05, "loss": 0.0003, "step": 8257 }, { "epoch": 123.25, "learning_rate": 6.301754385964913e-05, "loss": 0.0003, "step": 8258 }, { "epoch": 123.27, "learning_rate": 6.298245614035087e-05, "loss": 0.0003, "step": 8259 }, { "epoch": 123.28, "learning_rate": 6.294736842105262e-05, "loss": 0.0002, "step": 8260 }, { "epoch": 123.3, "learning_rate": 6.291228070175439e-05, "loss": 0.0002, "step": 8261 }, { "epoch": 123.31, "learning_rate": 6.287719298245613e-05, "loss": 0.0003, "step": 8262 }, { "epoch": 123.33, "learning_rate": 6.284210526315788e-05, "loss": 0.0004, "step": 8263 }, { "epoch": 123.34, "learning_rate": 6.280701754385965e-05, "loss": 0.0018, "step": 8264 }, { "epoch": 123.36, "learning_rate": 6.27719298245614e-05, "loss": 0.0003, "step": 8265 }, { "epoch": 123.37, "learning_rate": 6.273684210526314e-05, "loss": 0.0003, "step": 8266 }, { "epoch": 123.39, "learning_rate": 6.27017543859649e-05, "loss": 0.0002, "step": 8267 }, { "epoch": 123.4, "learning_rate": 6.266666666666667e-05, "loss": 0.0226, "step": 8268 }, { "epoch": 123.42, "learning_rate": 6.263157894736842e-05, "loss": 0.0003, "step": 8269 }, { "epoch": 123.43, "learning_rate": 6.259649122807018e-05, "loss": 0.0127, "step": 8270 }, { "epoch": 123.45, "learning_rate": 6.256140350877193e-05, "loss": 0.0004, "step": 8271 }, { "epoch": 123.46, "learning_rate": 6.252631578947368e-05, "loss": 0.0003, "step": 8272 }, { "epoch": 123.48, "learning_rate": 6.249122807017544e-05, "loss": 0.0003, "step": 8273 }, { "epoch": 123.49, "learning_rate": 6.245614035087719e-05, "loss": 0.0002, "step": 8274 }, { "epoch": 123.51, "learning_rate": 6.242105263157894e-05, "loss": 0.0004, "step": 8275 }, { "epoch": 123.52, "learning_rate": 6.23859649122807e-05, "loss": 0.0002, "step": 8276 }, { "epoch": 123.54, "learning_rate": 6.235087719298245e-05, "loss": 0.0003, "step": 8277 }, { "epoch": 123.55, "learning_rate": 6.23157894736842e-05, "loss": 0.0004, "step": 8278 }, { "epoch": 123.57, "learning_rate": 6.228070175438596e-05, "loss": 0.0002, "step": 8279 }, { "epoch": 123.58, "learning_rate": 6.224561403508771e-05, "loss": 0.0003, "step": 8280 }, { "epoch": 123.59, "learning_rate": 6.221052631578947e-05, "loss": 0.0018, "step": 8281 }, { "epoch": 123.61, "learning_rate": 6.217543859649123e-05, "loss": 0.0004, "step": 8282 }, { "epoch": 123.62, "learning_rate": 6.214035087719298e-05, "loss": 0.0003, "step": 8283 }, { "epoch": 123.64, "learning_rate": 6.210526315789473e-05, "loss": 0.0025, "step": 8284 }, { "epoch": 123.65, "learning_rate": 6.207017543859649e-05, "loss": 0.0004, "step": 8285 }, { "epoch": 123.67, "learning_rate": 6.203508771929824e-05, "loss": 0.0005, "step": 8286 }, { "epoch": 123.68, "learning_rate": 6.199999999999999e-05, "loss": 0.0003, "step": 8287 }, { "epoch": 123.7, "learning_rate": 6.196491228070175e-05, "loss": 0.0003, "step": 8288 }, { "epoch": 123.71, "learning_rate": 6.19298245614035e-05, "loss": 0.0253, "step": 8289 }, { "epoch": 123.73, "learning_rate": 6.189473684210525e-05, "loss": 0.0003, "step": 8290 }, { "epoch": 123.74, "learning_rate": 6.185964912280701e-05, "loss": 0.0003, "step": 8291 }, { "epoch": 123.76, "learning_rate": 6.182456140350876e-05, "loss": 0.0003, "step": 8292 }, { "epoch": 123.77, "learning_rate": 6.178947368421052e-05, "loss": 0.0007, "step": 8293 }, { "epoch": 123.79, "learning_rate": 6.175438596491228e-05, "loss": 0.0003, "step": 8294 }, { "epoch": 123.8, "learning_rate": 6.171929824561403e-05, "loss": 0.0003, "step": 8295 }, { "epoch": 123.82, "learning_rate": 6.168421052631578e-05, "loss": 0.0003, "step": 8296 }, { "epoch": 123.83, "learning_rate": 6.164912280701754e-05, "loss": 0.0003, "step": 8297 }, { "epoch": 123.85, "learning_rate": 6.16140350877193e-05, "loss": 0.0003, "step": 8298 }, { "epoch": 123.86, "learning_rate": 6.157894736842104e-05, "loss": 0.0003, "step": 8299 }, { "epoch": 123.88, "learning_rate": 6.15438596491228e-05, "loss": 0.0003, "step": 8300 }, { "epoch": 123.89, "learning_rate": 6.150877192982455e-05, "loss": 0.0002, "step": 8301 }, { "epoch": 123.91, "learning_rate": 6.14736842105263e-05, "loss": 0.0002, "step": 8302 }, { "epoch": 123.92, "learning_rate": 6.143859649122806e-05, "loss": 0.0003, "step": 8303 }, { "epoch": 123.94, "learning_rate": 6.140350877192981e-05, "loss": 0.0003, "step": 8304 }, { "epoch": 123.95, "learning_rate": 6.136842105263158e-05, "loss": 0.0003, "step": 8305 }, { "epoch": 123.97, "learning_rate": 6.133333333333334e-05, "loss": 0.0003, "step": 8306 }, { "epoch": 123.98, "learning_rate": 6.129824561403509e-05, "loss": 0.0003, "step": 8307 }, { "epoch": 124.0, "learning_rate": 6.126315789473684e-05, "loss": 0.0002, "step": 8308 }, { "epoch": 124.01, "learning_rate": 6.12280701754386e-05, "loss": 0.0016, "step": 8309 }, { "epoch": 124.03, "learning_rate": 6.119298245614035e-05, "loss": 0.0002, "step": 8310 }, { "epoch": 124.04, "learning_rate": 6.11578947368421e-05, "loss": 0.0002, "step": 8311 }, { "epoch": 124.06, "learning_rate": 6.112280701754386e-05, "loss": 0.0002, "step": 8312 }, { "epoch": 124.07, "learning_rate": 6.10877192982456e-05, "loss": 0.0003, "step": 8313 }, { "epoch": 124.09, "learning_rate": 6.105263157894736e-05, "loss": 0.0004, "step": 8314 }, { "epoch": 124.1, "learning_rate": 6.101754385964912e-05, "loss": 0.0002, "step": 8315 }, { "epoch": 124.12, "learning_rate": 6.0982456140350866e-05, "loss": 0.0003, "step": 8316 }, { "epoch": 124.13, "learning_rate": 6.094736842105262e-05, "loss": 0.0003, "step": 8317 }, { "epoch": 124.15, "learning_rate": 6.0912280701754384e-05, "loss": 0.0002, "step": 8318 }, { "epoch": 124.16, "learning_rate": 6.087719298245614e-05, "loss": 0.0003, "step": 8319 }, { "epoch": 124.18, "learning_rate": 6.0842105263157895e-05, "loss": 0.0003, "step": 8320 }, { "epoch": 124.19, "learning_rate": 6.0807017543859644e-05, "loss": 0.0035, "step": 8321 }, { "epoch": 124.21, "learning_rate": 6.07719298245614e-05, "loss": 0.0002, "step": 8322 }, { "epoch": 124.22, "learning_rate": 6.0736842105263155e-05, "loss": 0.0031, "step": 8323 }, { "epoch": 124.24, "learning_rate": 6.0701754385964904e-05, "loss": 0.1618, "step": 8324 }, { "epoch": 124.25, "learning_rate": 6.066666666666666e-05, "loss": 0.0002, "step": 8325 }, { "epoch": 124.27, "learning_rate": 6.0631578947368415e-05, "loss": 0.0002, "step": 8326 }, { "epoch": 124.28, "learning_rate": 6.059649122807017e-05, "loss": 0.0002, "step": 8327 }, { "epoch": 124.3, "learning_rate": 6.056140350877192e-05, "loss": 0.0004, "step": 8328 }, { "epoch": 124.31, "learning_rate": 6.0526315789473675e-05, "loss": 0.0002, "step": 8329 }, { "epoch": 124.33, "learning_rate": 6.049122807017543e-05, "loss": 0.0002, "step": 8330 }, { "epoch": 124.34, "learning_rate": 6.045614035087719e-05, "loss": 0.0003, "step": 8331 }, { "epoch": 124.36, "learning_rate": 6.042105263157895e-05, "loss": 0.0002, "step": 8332 }, { "epoch": 124.37, "learning_rate": 6.03859649122807e-05, "loss": 0.0002, "step": 8333 }, { "epoch": 124.39, "learning_rate": 6.035087719298245e-05, "loss": 0.0003, "step": 8334 }, { "epoch": 124.4, "learning_rate": 6.031578947368421e-05, "loss": 0.0009, "step": 8335 }, { "epoch": 124.42, "learning_rate": 6.028070175438596e-05, "loss": 0.0004, "step": 8336 }, { "epoch": 124.43, "learning_rate": 6.024561403508771e-05, "loss": 0.0003, "step": 8337 }, { "epoch": 124.45, "learning_rate": 6.021052631578947e-05, "loss": 0.0004, "step": 8338 }, { "epoch": 124.46, "learning_rate": 6.0175438596491224e-05, "loss": 0.0003, "step": 8339 }, { "epoch": 124.48, "learning_rate": 6.014035087719297e-05, "loss": 0.0002, "step": 8340 }, { "epoch": 124.49, "learning_rate": 6.010526315789473e-05, "loss": 0.008, "step": 8341 }, { "epoch": 124.51, "learning_rate": 6.0070175438596484e-05, "loss": 0.0002, "step": 8342 }, { "epoch": 124.52, "learning_rate": 6.0035087719298246e-05, "loss": 0.0002, "step": 8343 }, { "epoch": 124.54, "learning_rate": 5.9999999999999995e-05, "loss": 0.0002, "step": 8344 }, { "epoch": 124.55, "learning_rate": 5.996491228070175e-05, "loss": 0.0002, "step": 8345 }, { "epoch": 124.57, "learning_rate": 5.9929824561403506e-05, "loss": 0.0002, "step": 8346 }, { "epoch": 124.58, "learning_rate": 5.989473684210526e-05, "loss": 0.0005, "step": 8347 }, { "epoch": 124.59, "learning_rate": 5.985964912280701e-05, "loss": 0.0002, "step": 8348 }, { "epoch": 124.61, "learning_rate": 5.9824561403508766e-05, "loss": 0.0002, "step": 8349 }, { "epoch": 124.62, "learning_rate": 5.978947368421052e-05, "loss": 0.0003, "step": 8350 }, { "epoch": 124.64, "learning_rate": 5.975438596491228e-05, "loss": 0.0004, "step": 8351 }, { "epoch": 124.65, "learning_rate": 5.9719298245614026e-05, "loss": 0.0002, "step": 8352 }, { "epoch": 124.67, "learning_rate": 5.968421052631578e-05, "loss": 0.0002, "step": 8353 }, { "epoch": 124.68, "learning_rate": 5.964912280701754e-05, "loss": 0.0061, "step": 8354 }, { "epoch": 124.7, "learning_rate": 5.961403508771929e-05, "loss": 0.0002, "step": 8355 }, { "epoch": 124.71, "learning_rate": 5.957894736842105e-05, "loss": 0.0002, "step": 8356 }, { "epoch": 124.73, "learning_rate": 5.9543859649122803e-05, "loss": 0.0002, "step": 8357 }, { "epoch": 124.74, "learning_rate": 5.950877192982456e-05, "loss": 0.0003, "step": 8358 }, { "epoch": 124.76, "learning_rate": 5.9473684210526315e-05, "loss": 0.0002, "step": 8359 }, { "epoch": 124.77, "learning_rate": 5.943859649122806e-05, "loss": 0.0002, "step": 8360 }, { "epoch": 124.79, "learning_rate": 5.940350877192982e-05, "loss": 0.0098, "step": 8361 }, { "epoch": 124.8, "learning_rate": 5.9368421052631574e-05, "loss": 0.0003, "step": 8362 }, { "epoch": 124.82, "learning_rate": 5.933333333333333e-05, "loss": 0.0002, "step": 8363 }, { "epoch": 124.83, "learning_rate": 5.929824561403508e-05, "loss": 0.0034, "step": 8364 }, { "epoch": 124.85, "learning_rate": 5.9263157894736834e-05, "loss": 0.0003, "step": 8365 }, { "epoch": 124.86, "learning_rate": 5.922807017543859e-05, "loss": 0.0002, "step": 8366 }, { "epoch": 124.88, "learning_rate": 5.9192982456140345e-05, "loss": 0.0004, "step": 8367 }, { "epoch": 124.89, "learning_rate": 5.91578947368421e-05, "loss": 0.0002, "step": 8368 }, { "epoch": 124.91, "learning_rate": 5.9122807017543856e-05, "loss": 0.0003, "step": 8369 }, { "epoch": 124.92, "learning_rate": 5.908771929824561e-05, "loss": 0.0002, "step": 8370 }, { "epoch": 124.94, "learning_rate": 5.905263157894737e-05, "loss": 0.0003, "step": 8371 }, { "epoch": 124.95, "learning_rate": 5.9017543859649116e-05, "loss": 0.0002, "step": 8372 }, { "epoch": 124.97, "learning_rate": 5.898245614035087e-05, "loss": 0.0002, "step": 8373 }, { "epoch": 124.98, "learning_rate": 5.894736842105263e-05, "loss": 0.0002, "step": 8374 }, { "epoch": 125.0, "learning_rate": 5.891228070175438e-05, "loss": 0.0002, "step": 8375 }, { "epoch": 125.01, "learning_rate": 5.887719298245613e-05, "loss": 0.0003, "step": 8376 }, { "epoch": 125.03, "learning_rate": 5.884210526315789e-05, "loss": 0.0006, "step": 8377 }, { "epoch": 125.04, "learning_rate": 5.880701754385964e-05, "loss": 0.0002, "step": 8378 }, { "epoch": 125.06, "learning_rate": 5.87719298245614e-05, "loss": 0.0002, "step": 8379 }, { "epoch": 125.07, "learning_rate": 5.873684210526315e-05, "loss": 0.0002, "step": 8380 }, { "epoch": 125.09, "learning_rate": 5.870175438596491e-05, "loss": 0.1178, "step": 8381 }, { "epoch": 125.1, "learning_rate": 5.8666666666666665e-05, "loss": 0.0002, "step": 8382 }, { "epoch": 125.12, "learning_rate": 5.863157894736842e-05, "loss": 0.0002, "step": 8383 }, { "epoch": 125.13, "learning_rate": 5.859649122807017e-05, "loss": 0.0003, "step": 8384 }, { "epoch": 125.15, "learning_rate": 5.8561403508771925e-05, "loss": 0.0006, "step": 8385 }, { "epoch": 125.16, "learning_rate": 5.852631578947368e-05, "loss": 0.0002, "step": 8386 }, { "epoch": 125.18, "learning_rate": 5.8491228070175436e-05, "loss": 0.0027, "step": 8387 }, { "epoch": 125.19, "learning_rate": 5.8456140350877185e-05, "loss": 0.0004, "step": 8388 }, { "epoch": 125.21, "learning_rate": 5.842105263157894e-05, "loss": 0.0002, "step": 8389 }, { "epoch": 125.22, "learning_rate": 5.8385964912280696e-05, "loss": 0.0004, "step": 8390 }, { "epoch": 125.24, "learning_rate": 5.8350877192982445e-05, "loss": 0.0002, "step": 8391 }, { "epoch": 125.25, "learning_rate": 5.83157894736842e-05, "loss": 0.0002, "step": 8392 }, { "epoch": 125.27, "learning_rate": 5.828070175438596e-05, "loss": 0.0002, "step": 8393 }, { "epoch": 125.28, "learning_rate": 5.824561403508772e-05, "loss": 0.0007, "step": 8394 }, { "epoch": 125.3, "learning_rate": 5.8210526315789474e-05, "loss": 0.0002, "step": 8395 }, { "epoch": 125.31, "learning_rate": 5.817543859649122e-05, "loss": 0.0003, "step": 8396 }, { "epoch": 125.33, "learning_rate": 5.814035087719298e-05, "loss": 0.0009, "step": 8397 }, { "epoch": 125.34, "learning_rate": 5.8105263157894734e-05, "loss": 0.0002, "step": 8398 }, { "epoch": 125.36, "learning_rate": 5.807017543859649e-05, "loss": 0.0003, "step": 8399 }, { "epoch": 125.37, "learning_rate": 5.803508771929824e-05, "loss": 0.2335, "step": 8400 }, { "epoch": 125.37, "eval_accuracy": 0.8827704356338718, "eval_f1": 0.8831895482310441, "eval_loss": 0.6515348553657532, "eval_runtime": 348.7658, "eval_samples_per_second": 11.716, "eval_steps_per_second": 0.734, "step": 8400 }, { "epoch": 125.39, "learning_rate": 5.7999999999999994e-05, "loss": 0.0005, "step": 8401 }, { "epoch": 125.4, "learning_rate": 5.796491228070175e-05, "loss": 0.0002, "step": 8402 }, { "epoch": 125.42, "learning_rate": 5.79298245614035e-05, "loss": 0.0002, "step": 8403 }, { "epoch": 125.43, "learning_rate": 5.7894736842105253e-05, "loss": 0.0002, "step": 8404 }, { "epoch": 125.45, "learning_rate": 5.785964912280701e-05, "loss": 0.0003, "step": 8405 }, { "epoch": 125.46, "learning_rate": 5.782456140350877e-05, "loss": 0.0002, "step": 8406 }, { "epoch": 125.48, "learning_rate": 5.778947368421053e-05, "loss": 0.0008, "step": 8407 }, { "epoch": 125.49, "learning_rate": 5.7754385964912276e-05, "loss": 0.0002, "step": 8408 }, { "epoch": 125.51, "learning_rate": 5.771929824561403e-05, "loss": 0.0002, "step": 8409 }, { "epoch": 125.52, "learning_rate": 5.768421052631579e-05, "loss": 0.0029, "step": 8410 }, { "epoch": 125.54, "learning_rate": 5.764912280701754e-05, "loss": 0.0002, "step": 8411 }, { "epoch": 125.55, "learning_rate": 5.761403508771929e-05, "loss": 0.0044, "step": 8412 }, { "epoch": 125.57, "learning_rate": 5.757894736842105e-05, "loss": 0.0002, "step": 8413 }, { "epoch": 125.58, "learning_rate": 5.75438596491228e-05, "loss": 0.0003, "step": 8414 }, { "epoch": 125.59, "learning_rate": 5.750877192982455e-05, "loss": 0.0002, "step": 8415 }, { "epoch": 125.61, "learning_rate": 5.7473684210526307e-05, "loss": 0.0002, "step": 8416 }, { "epoch": 125.62, "learning_rate": 5.743859649122806e-05, "loss": 0.0199, "step": 8417 }, { "epoch": 125.64, "learning_rate": 5.7403508771929824e-05, "loss": 0.0004, "step": 8418 }, { "epoch": 125.65, "learning_rate": 5.736842105263158e-05, "loss": 0.0621, "step": 8419 }, { "epoch": 125.67, "learning_rate": 5.733333333333333e-05, "loss": 0.0003, "step": 8420 }, { "epoch": 125.68, "learning_rate": 5.7298245614035084e-05, "loss": 0.0002, "step": 8421 }, { "epoch": 125.7, "learning_rate": 5.726315789473684e-05, "loss": 0.0002, "step": 8422 }, { "epoch": 125.71, "learning_rate": 5.7228070175438595e-05, "loss": 0.0008, "step": 8423 }, { "epoch": 125.73, "learning_rate": 5.7192982456140344e-05, "loss": 0.0002, "step": 8424 }, { "epoch": 125.74, "learning_rate": 5.71578947368421e-05, "loss": 0.0014, "step": 8425 }, { "epoch": 125.76, "learning_rate": 5.7122807017543855e-05, "loss": 0.0848, "step": 8426 }, { "epoch": 125.77, "learning_rate": 5.7087719298245604e-05, "loss": 0.0004, "step": 8427 }, { "epoch": 125.79, "learning_rate": 5.705263157894736e-05, "loss": 0.0003, "step": 8428 }, { "epoch": 125.8, "learning_rate": 5.7017543859649115e-05, "loss": 0.0092, "step": 8429 }, { "epoch": 125.82, "learning_rate": 5.698245614035087e-05, "loss": 0.0004, "step": 8430 }, { "epoch": 125.83, "learning_rate": 5.694736842105263e-05, "loss": 0.0072, "step": 8431 }, { "epoch": 125.85, "learning_rate": 5.691228070175438e-05, "loss": 0.0002, "step": 8432 }, { "epoch": 125.86, "learning_rate": 5.687719298245614e-05, "loss": 0.0003, "step": 8433 }, { "epoch": 125.88, "learning_rate": 5.684210526315789e-05, "loss": 0.0002, "step": 8434 }, { "epoch": 125.89, "learning_rate": 5.680701754385965e-05, "loss": 0.0003, "step": 8435 }, { "epoch": 125.91, "learning_rate": 5.67719298245614e-05, "loss": 0.0002, "step": 8436 }, { "epoch": 125.92, "learning_rate": 5.673684210526315e-05, "loss": 0.0002, "step": 8437 }, { "epoch": 125.94, "learning_rate": 5.670175438596491e-05, "loss": 0.0012, "step": 8438 }, { "epoch": 125.95, "learning_rate": 5.666666666666666e-05, "loss": 0.0005, "step": 8439 }, { "epoch": 125.97, "learning_rate": 5.663157894736841e-05, "loss": 0.0003, "step": 8440 }, { "epoch": 125.98, "learning_rate": 5.659649122807017e-05, "loss": 0.0003, "step": 8441 }, { "epoch": 126.0, "learning_rate": 5.6561403508771924e-05, "loss": 0.0002, "step": 8442 }, { "epoch": 126.01, "learning_rate": 5.652631578947367e-05, "loss": 0.0003, "step": 8443 }, { "epoch": 126.03, "learning_rate": 5.6491228070175435e-05, "loss": 0.0004, "step": 8444 }, { "epoch": 126.04, "learning_rate": 5.645614035087719e-05, "loss": 0.0143, "step": 8445 }, { "epoch": 126.06, "learning_rate": 5.6421052631578946e-05, "loss": 0.0083, "step": 8446 }, { "epoch": 126.07, "learning_rate": 5.63859649122807e-05, "loss": 0.0032, "step": 8447 }, { "epoch": 126.09, "learning_rate": 5.635087719298245e-05, "loss": 0.0002, "step": 8448 }, { "epoch": 126.1, "learning_rate": 5.6315789473684206e-05, "loss": 0.0004, "step": 8449 }, { "epoch": 126.12, "learning_rate": 5.628070175438596e-05, "loss": 0.0003, "step": 8450 }, { "epoch": 126.13, "learning_rate": 5.624561403508771e-05, "loss": 0.0002, "step": 8451 }, { "epoch": 126.15, "learning_rate": 5.6210526315789466e-05, "loss": 0.0003, "step": 8452 }, { "epoch": 126.16, "learning_rate": 5.617543859649122e-05, "loss": 0.0002, "step": 8453 }, { "epoch": 126.18, "learning_rate": 5.614035087719298e-05, "loss": 0.0013, "step": 8454 }, { "epoch": 126.19, "learning_rate": 5.6105263157894726e-05, "loss": 0.0013, "step": 8455 }, { "epoch": 126.21, "learning_rate": 5.607017543859649e-05, "loss": 0.0003, "step": 8456 }, { "epoch": 126.22, "learning_rate": 5.6035087719298244e-05, "loss": 0.0002, "step": 8457 }, { "epoch": 126.24, "learning_rate": 5.6e-05, "loss": 0.0002, "step": 8458 }, { "epoch": 126.25, "learning_rate": 5.5964912280701755e-05, "loss": 0.0002, "step": 8459 }, { "epoch": 126.27, "learning_rate": 5.5929824561403503e-05, "loss": 0.0002, "step": 8460 }, { "epoch": 126.28, "learning_rate": 5.589473684210526e-05, "loss": 0.001, "step": 8461 }, { "epoch": 126.3, "learning_rate": 5.5859649122807015e-05, "loss": 0.001, "step": 8462 }, { "epoch": 126.31, "learning_rate": 5.582456140350876e-05, "loss": 0.0002, "step": 8463 }, { "epoch": 126.33, "learning_rate": 5.578947368421052e-05, "loss": 0.0002, "step": 8464 }, { "epoch": 126.34, "learning_rate": 5.5754385964912274e-05, "loss": 0.0002, "step": 8465 }, { "epoch": 126.36, "learning_rate": 5.571929824561403e-05, "loss": 0.0002, "step": 8466 }, { "epoch": 126.37, "learning_rate": 5.568421052631578e-05, "loss": 0.0003, "step": 8467 }, { "epoch": 126.39, "learning_rate": 5.5649122807017534e-05, "loss": 0.0002, "step": 8468 }, { "epoch": 126.4, "learning_rate": 5.56140350877193e-05, "loss": 0.0003, "step": 8469 }, { "epoch": 126.42, "learning_rate": 5.557894736842105e-05, "loss": 0.0002, "step": 8470 }, { "epoch": 126.43, "learning_rate": 5.55438596491228e-05, "loss": 0.0366, "step": 8471 }, { "epoch": 126.45, "learning_rate": 5.5508771929824557e-05, "loss": 0.0002, "step": 8472 }, { "epoch": 126.46, "learning_rate": 5.547368421052631e-05, "loss": 0.0002, "step": 8473 }, { "epoch": 126.48, "learning_rate": 5.543859649122807e-05, "loss": 0.0004, "step": 8474 }, { "epoch": 126.49, "learning_rate": 5.5403508771929816e-05, "loss": 0.0002, "step": 8475 }, { "epoch": 126.51, "learning_rate": 5.536842105263157e-05, "loss": 0.0003, "step": 8476 }, { "epoch": 126.52, "learning_rate": 5.533333333333333e-05, "loss": 0.0003, "step": 8477 }, { "epoch": 126.54, "learning_rate": 5.529824561403508e-05, "loss": 0.0002, "step": 8478 }, { "epoch": 126.55, "learning_rate": 5.526315789473683e-05, "loss": 0.0002, "step": 8479 }, { "epoch": 126.57, "learning_rate": 5.522807017543859e-05, "loss": 0.0002, "step": 8480 }, { "epoch": 126.58, "learning_rate": 5.519298245614035e-05, "loss": 0.0002, "step": 8481 }, { "epoch": 126.59, "learning_rate": 5.5157894736842105e-05, "loss": 0.0005, "step": 8482 }, { "epoch": 126.61, "learning_rate": 5.5122807017543854e-05, "loss": 0.0002, "step": 8483 }, { "epoch": 126.62, "learning_rate": 5.508771929824561e-05, "loss": 0.0003, "step": 8484 }, { "epoch": 126.64, "learning_rate": 5.5052631578947365e-05, "loss": 0.0002, "step": 8485 }, { "epoch": 126.65, "learning_rate": 5.501754385964912e-05, "loss": 0.0002, "step": 8486 }, { "epoch": 126.67, "learning_rate": 5.498245614035087e-05, "loss": 0.0002, "step": 8487 }, { "epoch": 126.68, "learning_rate": 5.4947368421052625e-05, "loss": 0.0002, "step": 8488 }, { "epoch": 126.7, "learning_rate": 5.491228070175438e-05, "loss": 0.0003, "step": 8489 }, { "epoch": 126.71, "learning_rate": 5.4877192982456136e-05, "loss": 0.0002, "step": 8490 }, { "epoch": 126.73, "learning_rate": 5.4842105263157885e-05, "loss": 0.0002, "step": 8491 }, { "epoch": 126.74, "learning_rate": 5.480701754385964e-05, "loss": 0.0003, "step": 8492 }, { "epoch": 126.76, "learning_rate": 5.4771929824561396e-05, "loss": 0.2602, "step": 8493 }, { "epoch": 126.77, "learning_rate": 5.473684210526316e-05, "loss": 0.0003, "step": 8494 }, { "epoch": 126.79, "learning_rate": 5.470175438596491e-05, "loss": 0.0002, "step": 8495 }, { "epoch": 126.8, "learning_rate": 5.466666666666666e-05, "loss": 0.0003, "step": 8496 }, { "epoch": 126.82, "learning_rate": 5.463157894736842e-05, "loss": 0.0137, "step": 8497 }, { "epoch": 126.83, "learning_rate": 5.4596491228070174e-05, "loss": 0.2638, "step": 8498 }, { "epoch": 126.85, "learning_rate": 5.456140350877192e-05, "loss": 0.0002, "step": 8499 }, { "epoch": 126.86, "learning_rate": 5.452631578947368e-05, "loss": 0.0003, "step": 8500 }, { "epoch": 126.88, "learning_rate": 5.4491228070175434e-05, "loss": 0.0002, "step": 8501 }, { "epoch": 126.89, "learning_rate": 5.445614035087719e-05, "loss": 0.0003, "step": 8502 }, { "epoch": 126.91, "learning_rate": 5.442105263157894e-05, "loss": 0.0003, "step": 8503 }, { "epoch": 126.92, "learning_rate": 5.4385964912280694e-05, "loss": 0.0051, "step": 8504 }, { "epoch": 126.94, "learning_rate": 5.435087719298245e-05, "loss": 0.0003, "step": 8505 }, { "epoch": 126.95, "learning_rate": 5.431578947368421e-05, "loss": 0.0003, "step": 8506 }, { "epoch": 126.97, "learning_rate": 5.428070175438596e-05, "loss": 0.0007, "step": 8507 }, { "epoch": 126.98, "learning_rate": 5.4245614035087716e-05, "loss": 0.0003, "step": 8508 }, { "epoch": 127.0, "learning_rate": 5.421052631578947e-05, "loss": 0.0002, "step": 8509 }, { "epoch": 127.01, "learning_rate": 5.417543859649123e-05, "loss": 0.0003, "step": 8510 }, { "epoch": 127.03, "learning_rate": 5.4140350877192976e-05, "loss": 0.0003, "step": 8511 }, { "epoch": 127.04, "learning_rate": 5.410526315789473e-05, "loss": 0.0003, "step": 8512 }, { "epoch": 127.06, "learning_rate": 5.407017543859649e-05, "loss": 0.0002, "step": 8513 }, { "epoch": 127.07, "learning_rate": 5.403508771929824e-05, "loss": 0.0004, "step": 8514 }, { "epoch": 127.09, "learning_rate": 5.399999999999999e-05, "loss": 0.0002, "step": 8515 }, { "epoch": 127.1, "learning_rate": 5.396491228070175e-05, "loss": 0.0003, "step": 8516 }, { "epoch": 127.12, "learning_rate": 5.39298245614035e-05, "loss": 0.0004, "step": 8517 }, { "epoch": 127.13, "learning_rate": 5.389473684210525e-05, "loss": 0.0003, "step": 8518 }, { "epoch": 127.15, "learning_rate": 5.3859649122807013e-05, "loss": 0.0004, "step": 8519 }, { "epoch": 127.16, "learning_rate": 5.382456140350877e-05, "loss": 0.0003, "step": 8520 }, { "epoch": 127.18, "learning_rate": 5.3789473684210525e-05, "loss": 0.0003, "step": 8521 }, { "epoch": 127.19, "learning_rate": 5.375438596491228e-05, "loss": 0.0003, "step": 8522 }, { "epoch": 127.21, "learning_rate": 5.371929824561403e-05, "loss": 0.0004, "step": 8523 }, { "epoch": 127.22, "learning_rate": 5.3684210526315784e-05, "loss": 0.1857, "step": 8524 }, { "epoch": 127.24, "learning_rate": 5.364912280701754e-05, "loss": 0.0003, "step": 8525 }, { "epoch": 127.25, "learning_rate": 5.3614035087719296e-05, "loss": 0.0003, "step": 8526 }, { "epoch": 127.27, "learning_rate": 5.3578947368421044e-05, "loss": 0.0005, "step": 8527 }, { "epoch": 127.28, "learning_rate": 5.35438596491228e-05, "loss": 0.0003, "step": 8528 }, { "epoch": 127.3, "learning_rate": 5.3508771929824555e-05, "loss": 0.0003, "step": 8529 }, { "epoch": 127.31, "learning_rate": 5.3473684210526304e-05, "loss": 0.0004, "step": 8530 }, { "epoch": 127.33, "learning_rate": 5.3438596491228067e-05, "loss": 0.0003, "step": 8531 }, { "epoch": 127.34, "learning_rate": 5.340350877192982e-05, "loss": 0.1111, "step": 8532 }, { "epoch": 127.36, "learning_rate": 5.336842105263158e-05, "loss": 0.0005, "step": 8533 }, { "epoch": 127.37, "learning_rate": 5.333333333333333e-05, "loss": 0.0003, "step": 8534 }, { "epoch": 127.39, "learning_rate": 5.329824561403508e-05, "loss": 0.0002, "step": 8535 }, { "epoch": 127.4, "learning_rate": 5.326315789473684e-05, "loss": 0.0004, "step": 8536 }, { "epoch": 127.42, "learning_rate": 5.322807017543859e-05, "loss": 0.0017, "step": 8537 }, { "epoch": 127.43, "learning_rate": 5.319298245614035e-05, "loss": 0.0004, "step": 8538 }, { "epoch": 127.45, "learning_rate": 5.31578947368421e-05, "loss": 0.0003, "step": 8539 }, { "epoch": 127.46, "learning_rate": 5.312280701754385e-05, "loss": 0.0003, "step": 8540 }, { "epoch": 127.48, "learning_rate": 5.308771929824561e-05, "loss": 0.0003, "step": 8541 }, { "epoch": 127.49, "learning_rate": 5.305263157894736e-05, "loss": 0.0003, "step": 8542 }, { "epoch": 127.51, "learning_rate": 5.301754385964911e-05, "loss": 0.0003, "step": 8543 }, { "epoch": 127.52, "learning_rate": 5.2982456140350875e-05, "loss": 0.0016, "step": 8544 }, { "epoch": 127.54, "learning_rate": 5.294736842105263e-05, "loss": 0.0003, "step": 8545 }, { "epoch": 127.55, "learning_rate": 5.2912280701754386e-05, "loss": 0.0004, "step": 8546 }, { "epoch": 127.57, "learning_rate": 5.2877192982456135e-05, "loss": 0.0003, "step": 8547 }, { "epoch": 127.58, "learning_rate": 5.284210526315789e-05, "loss": 0.0004, "step": 8548 }, { "epoch": 127.59, "learning_rate": 5.2807017543859646e-05, "loss": 0.0014, "step": 8549 }, { "epoch": 127.61, "learning_rate": 5.27719298245614e-05, "loss": 0.0004, "step": 8550 }, { "epoch": 127.62, "learning_rate": 5.273684210526315e-05, "loss": 0.0132, "step": 8551 }, { "epoch": 127.64, "learning_rate": 5.2701754385964906e-05, "loss": 0.0012, "step": 8552 }, { "epoch": 127.65, "learning_rate": 5.266666666666666e-05, "loss": 0.1389, "step": 8553 }, { "epoch": 127.67, "learning_rate": 5.263157894736841e-05, "loss": 0.0003, "step": 8554 }, { "epoch": 127.68, "learning_rate": 5.2596491228070166e-05, "loss": 0.0417, "step": 8555 }, { "epoch": 127.7, "learning_rate": 5.256140350877193e-05, "loss": 0.0004, "step": 8556 }, { "epoch": 127.71, "learning_rate": 5.2526315789473684e-05, "loss": 0.0003, "step": 8557 }, { "epoch": 127.73, "learning_rate": 5.249122807017544e-05, "loss": 0.0003, "step": 8558 }, { "epoch": 127.74, "learning_rate": 5.245614035087719e-05, "loss": 0.0005, "step": 8559 }, { "epoch": 127.76, "learning_rate": 5.2421052631578944e-05, "loss": 0.0002, "step": 8560 }, { "epoch": 127.77, "learning_rate": 5.23859649122807e-05, "loss": 0.0005, "step": 8561 }, { "epoch": 127.79, "learning_rate": 5.2350877192982455e-05, "loss": 0.0003, "step": 8562 }, { "epoch": 127.8, "learning_rate": 5.2315789473684204e-05, "loss": 0.0007, "step": 8563 }, { "epoch": 127.82, "learning_rate": 5.228070175438596e-05, "loss": 0.0002, "step": 8564 }, { "epoch": 127.83, "learning_rate": 5.2245614035087715e-05, "loss": 0.0004, "step": 8565 }, { "epoch": 127.85, "learning_rate": 5.2210526315789463e-05, "loss": 0.0015, "step": 8566 }, { "epoch": 127.86, "learning_rate": 5.217543859649122e-05, "loss": 0.0009, "step": 8567 }, { "epoch": 127.88, "learning_rate": 5.2140350877192975e-05, "loss": 0.0003, "step": 8568 }, { "epoch": 127.89, "learning_rate": 5.210526315789474e-05, "loss": 0.3038, "step": 8569 }, { "epoch": 127.91, "learning_rate": 5.207017543859649e-05, "loss": 0.0003, "step": 8570 }, { "epoch": 127.92, "learning_rate": 5.203508771929824e-05, "loss": 0.0012, "step": 8571 }, { "epoch": 127.94, "learning_rate": 5.2e-05, "loss": 0.0003, "step": 8572 }, { "epoch": 127.95, "learning_rate": 5.196491228070175e-05, "loss": 0.0003, "step": 8573 }, { "epoch": 127.97, "learning_rate": 5.192982456140351e-05, "loss": 0.0003, "step": 8574 }, { "epoch": 127.98, "learning_rate": 5.189473684210526e-05, "loss": 0.1019, "step": 8575 }, { "epoch": 128.0, "learning_rate": 5.185964912280701e-05, "loss": 0.0004, "step": 8576 }, { "epoch": 128.01, "learning_rate": 5.182456140350877e-05, "loss": 0.0004, "step": 8577 }, { "epoch": 128.03, "learning_rate": 5.1789473684210517e-05, "loss": 0.0119, "step": 8578 }, { "epoch": 128.04, "learning_rate": 5.175438596491227e-05, "loss": 0.0004, "step": 8579 }, { "epoch": 128.06, "learning_rate": 5.171929824561403e-05, "loss": 0.0018, "step": 8580 }, { "epoch": 128.07, "learning_rate": 5.168421052631579e-05, "loss": 0.078, "step": 8581 }, { "epoch": 128.09, "learning_rate": 5.1649122807017546e-05, "loss": 0.0004, "step": 8582 }, { "epoch": 128.1, "learning_rate": 5.1614035087719294e-05, "loss": 0.0005, "step": 8583 }, { "epoch": 128.12, "learning_rate": 5.157894736842105e-05, "loss": 0.2706, "step": 8584 }, { "epoch": 128.13, "learning_rate": 5.1543859649122805e-05, "loss": 0.0003, "step": 8585 }, { "epoch": 128.15, "learning_rate": 5.150877192982456e-05, "loss": 0.0099, "step": 8586 }, { "epoch": 128.16, "learning_rate": 5.147368421052631e-05, "loss": 0.0003, "step": 8587 }, { "epoch": 128.18, "learning_rate": 5.1438596491228065e-05, "loss": 0.0438, "step": 8588 }, { "epoch": 128.19, "learning_rate": 5.140350877192982e-05, "loss": 0.0003, "step": 8589 }, { "epoch": 128.21, "learning_rate": 5.136842105263157e-05, "loss": 0.0004, "step": 8590 }, { "epoch": 128.22, "learning_rate": 5.1333333333333325e-05, "loss": 0.0004, "step": 8591 }, { "epoch": 128.24, "learning_rate": 5.129824561403508e-05, "loss": 0.0004, "step": 8592 }, { "epoch": 128.25, "learning_rate": 5.1263157894736836e-05, "loss": 0.0004, "step": 8593 }, { "epoch": 128.27, "learning_rate": 5.12280701754386e-05, "loss": 0.0684, "step": 8594 }, { "epoch": 128.28, "learning_rate": 5.119298245614035e-05, "loss": 0.001, "step": 8595 }, { "epoch": 128.3, "learning_rate": 5.11578947368421e-05, "loss": 0.0004, "step": 8596 }, { "epoch": 128.31, "learning_rate": 5.112280701754386e-05, "loss": 0.0004, "step": 8597 }, { "epoch": 128.33, "learning_rate": 5.1087719298245614e-05, "loss": 0.0007, "step": 8598 }, { "epoch": 128.34, "learning_rate": 5.105263157894736e-05, "loss": 0.0036, "step": 8599 }, { "epoch": 128.36, "learning_rate": 5.101754385964912e-05, "loss": 0.0005, "step": 8600 }, { "epoch": 128.36, "eval_accuracy": 0.8761625061184533, "eval_f1": 0.8776023752481238, "eval_loss": 0.6961445808410645, "eval_runtime": 345.4659, "eval_samples_per_second": 11.828, "eval_steps_per_second": 0.741, "step": 8600 }, { "epoch": 128.37, "learning_rate": 5.0982456140350874e-05, "loss": 0.0066, "step": 8601 }, { "epoch": 128.39, "learning_rate": 5.094736842105262e-05, "loss": 0.0003, "step": 8602 }, { "epoch": 128.4, "learning_rate": 5.091228070175438e-05, "loss": 0.0004, "step": 8603 }, { "epoch": 128.42, "learning_rate": 5.0877192982456134e-05, "loss": 0.0009, "step": 8604 }, { "epoch": 128.43, "learning_rate": 5.084210526315789e-05, "loss": 0.0017, "step": 8605 }, { "epoch": 128.45, "learning_rate": 5.080701754385964e-05, "loss": 0.0003, "step": 8606 }, { "epoch": 128.46, "learning_rate": 5.07719298245614e-05, "loss": 0.0008, "step": 8607 }, { "epoch": 128.48, "learning_rate": 5.0736842105263156e-05, "loss": 0.0005, "step": 8608 }, { "epoch": 128.49, "learning_rate": 5.070175438596491e-05, "loss": 0.0004, "step": 8609 }, { "epoch": 128.51, "learning_rate": 5.066666666666666e-05, "loss": 0.0004, "step": 8610 }, { "epoch": 128.52, "learning_rate": 5.0631578947368416e-05, "loss": 0.0007, "step": 8611 }, { "epoch": 128.54, "learning_rate": 5.059649122807017e-05, "loss": 0.0006, "step": 8612 }, { "epoch": 128.55, "learning_rate": 5.056140350877193e-05, "loss": 0.0003, "step": 8613 }, { "epoch": 128.57, "learning_rate": 5.0526315789473676e-05, "loss": 0.0003, "step": 8614 }, { "epoch": 128.58, "learning_rate": 5.049122807017543e-05, "loss": 0.0002, "step": 8615 }, { "epoch": 128.59, "learning_rate": 5.045614035087719e-05, "loss": 0.0004, "step": 8616 }, { "epoch": 128.61, "learning_rate": 5.042105263157894e-05, "loss": 0.0194, "step": 8617 }, { "epoch": 128.62, "learning_rate": 5.038596491228069e-05, "loss": 0.0003, "step": 8618 }, { "epoch": 128.64, "learning_rate": 5.0350877192982454e-05, "loss": 0.001, "step": 8619 }, { "epoch": 128.65, "learning_rate": 5.031578947368421e-05, "loss": 0.0004, "step": 8620 }, { "epoch": 128.67, "learning_rate": 5.0280701754385965e-05, "loss": 0.0152, "step": 8621 }, { "epoch": 128.68, "learning_rate": 5.0245614035087714e-05, "loss": 0.0003, "step": 8622 }, { "epoch": 128.7, "learning_rate": 5.021052631578947e-05, "loss": 0.0003, "step": 8623 }, { "epoch": 128.71, "learning_rate": 5.0175438596491225e-05, "loss": 0.0004, "step": 8624 }, { "epoch": 128.73, "learning_rate": 5.014035087719298e-05, "loss": 0.0003, "step": 8625 }, { "epoch": 128.74, "learning_rate": 5.010526315789473e-05, "loss": 0.0013, "step": 8626 }, { "epoch": 128.76, "learning_rate": 5.0070175438596485e-05, "loss": 0.0003, "step": 8627 }, { "epoch": 128.77, "learning_rate": 5.003508771929824e-05, "loss": 0.0002, "step": 8628 }, { "epoch": 128.79, "learning_rate": 4.9999999999999996e-05, "loss": 0.0003, "step": 8629 }, { "epoch": 128.8, "learning_rate": 4.9964912280701744e-05, "loss": 0.0003, "step": 8630 }, { "epoch": 128.82, "learning_rate": 4.99298245614035e-05, "loss": 0.0007, "step": 8631 }, { "epoch": 128.83, "learning_rate": 4.989473684210526e-05, "loss": 0.0075, "step": 8632 }, { "epoch": 128.85, "learning_rate": 4.985964912280702e-05, "loss": 0.0009, "step": 8633 }, { "epoch": 128.86, "learning_rate": 4.982456140350877e-05, "loss": 0.0003, "step": 8634 }, { "epoch": 128.88, "learning_rate": 4.978947368421052e-05, "loss": 0.0003, "step": 8635 }, { "epoch": 128.89, "learning_rate": 4.975438596491228e-05, "loss": 0.0004, "step": 8636 }, { "epoch": 128.91, "learning_rate": 4.971929824561403e-05, "loss": 0.0004, "step": 8637 }, { "epoch": 128.92, "learning_rate": 4.968421052631578e-05, "loss": 0.0004, "step": 8638 }, { "epoch": 128.94, "learning_rate": 4.964912280701754e-05, "loss": 0.0003, "step": 8639 }, { "epoch": 128.95, "learning_rate": 4.961403508771929e-05, "loss": 0.0004, "step": 8640 }, { "epoch": 128.97, "learning_rate": 4.957894736842105e-05, "loss": 0.0004, "step": 8641 }, { "epoch": 128.98, "learning_rate": 4.95438596491228e-05, "loss": 0.0004, "step": 8642 }, { "epoch": 129.0, "learning_rate": 4.950877192982455e-05, "loss": 0.0004, "step": 8643 }, { "epoch": 129.01, "learning_rate": 4.9473684210526315e-05, "loss": 0.0003, "step": 8644 }, { "epoch": 129.03, "learning_rate": 4.943859649122807e-05, "loss": 0.0003, "step": 8645 }, { "epoch": 129.04, "learning_rate": 4.940350877192982e-05, "loss": 0.0002, "step": 8646 }, { "epoch": 129.06, "learning_rate": 4.9368421052631575e-05, "loss": 0.0003, "step": 8647 }, { "epoch": 129.07, "learning_rate": 4.933333333333333e-05, "loss": 0.0957, "step": 8648 }, { "epoch": 129.09, "learning_rate": 4.9298245614035086e-05, "loss": 0.0003, "step": 8649 }, { "epoch": 129.1, "learning_rate": 4.9263157894736835e-05, "loss": 0.0003, "step": 8650 }, { "epoch": 129.12, "learning_rate": 4.922807017543859e-05, "loss": 0.0009, "step": 8651 }, { "epoch": 129.13, "learning_rate": 4.9192982456140346e-05, "loss": 0.0003, "step": 8652 }, { "epoch": 129.15, "learning_rate": 4.91578947368421e-05, "loss": 0.0002, "step": 8653 }, { "epoch": 129.16, "learning_rate": 4.912280701754385e-05, "loss": 0.0002, "step": 8654 }, { "epoch": 129.18, "learning_rate": 4.9087719298245606e-05, "loss": 0.0002, "step": 8655 }, { "epoch": 129.19, "learning_rate": 4.905263157894736e-05, "loss": 0.0004, "step": 8656 }, { "epoch": 129.21, "learning_rate": 4.9017543859649124e-05, "loss": 0.0003, "step": 8657 }, { "epoch": 129.22, "learning_rate": 4.898245614035087e-05, "loss": 0.0003, "step": 8658 }, { "epoch": 129.24, "learning_rate": 4.894736842105263e-05, "loss": 0.0002, "step": 8659 }, { "epoch": 129.25, "learning_rate": 4.8912280701754384e-05, "loss": 0.0003, "step": 8660 }, { "epoch": 129.27, "learning_rate": 4.887719298245614e-05, "loss": 0.0005, "step": 8661 }, { "epoch": 129.28, "learning_rate": 4.884210526315789e-05, "loss": 0.1397, "step": 8662 }, { "epoch": 129.3, "learning_rate": 4.8807017543859644e-05, "loss": 0.0002, "step": 8663 }, { "epoch": 129.31, "learning_rate": 4.87719298245614e-05, "loss": 0.001, "step": 8664 }, { "epoch": 129.33, "learning_rate": 4.8736842105263155e-05, "loss": 0.0003, "step": 8665 }, { "epoch": 129.34, "learning_rate": 4.8701754385964904e-05, "loss": 0.0003, "step": 8666 }, { "epoch": 129.36, "learning_rate": 4.866666666666666e-05, "loss": 0.0002, "step": 8667 }, { "epoch": 129.37, "learning_rate": 4.8631578947368415e-05, "loss": 0.0002, "step": 8668 }, { "epoch": 129.39, "learning_rate": 4.859649122807018e-05, "loss": 0.0004, "step": 8669 }, { "epoch": 129.4, "learning_rate": 4.8561403508771926e-05, "loss": 0.0003, "step": 8670 }, { "epoch": 129.42, "learning_rate": 4.852631578947368e-05, "loss": 0.0003, "step": 8671 }, { "epoch": 129.43, "learning_rate": 4.849122807017544e-05, "loss": 0.0003, "step": 8672 }, { "epoch": 129.45, "learning_rate": 4.845614035087719e-05, "loss": 0.0002, "step": 8673 }, { "epoch": 129.46, "learning_rate": 4.842105263157894e-05, "loss": 0.0004, "step": 8674 }, { "epoch": 129.48, "learning_rate": 4.83859649122807e-05, "loss": 0.0006, "step": 8675 }, { "epoch": 129.49, "learning_rate": 4.835087719298245e-05, "loss": 0.0077, "step": 8676 }, { "epoch": 129.51, "learning_rate": 4.831578947368421e-05, "loss": 0.0002, "step": 8677 }, { "epoch": 129.52, "learning_rate": 4.828070175438596e-05, "loss": 0.0004, "step": 8678 }, { "epoch": 129.54, "learning_rate": 4.824561403508771e-05, "loss": 0.0003, "step": 8679 }, { "epoch": 129.55, "learning_rate": 4.821052631578947e-05, "loss": 0.0004, "step": 8680 }, { "epoch": 129.57, "learning_rate": 4.817543859649122e-05, "loss": 0.0004, "step": 8681 }, { "epoch": 129.58, "learning_rate": 4.814035087719298e-05, "loss": 0.0004, "step": 8682 }, { "epoch": 129.59, "learning_rate": 4.8105263157894735e-05, "loss": 0.1145, "step": 8683 }, { "epoch": 129.61, "learning_rate": 4.807017543859649e-05, "loss": 0.0003, "step": 8684 }, { "epoch": 129.62, "learning_rate": 4.8035087719298246e-05, "loss": 0.1198, "step": 8685 }, { "epoch": 129.64, "learning_rate": 4.7999999999999994e-05, "loss": 0.0003, "step": 8686 }, { "epoch": 129.65, "learning_rate": 4.796491228070175e-05, "loss": 0.0002, "step": 8687 }, { "epoch": 129.67, "learning_rate": 4.7929824561403506e-05, "loss": 0.0003, "step": 8688 }, { "epoch": 129.68, "learning_rate": 4.789473684210526e-05, "loss": 0.0002, "step": 8689 }, { "epoch": 129.7, "learning_rate": 4.785964912280701e-05, "loss": 0.0005, "step": 8690 }, { "epoch": 129.71, "learning_rate": 4.7824561403508765e-05, "loss": 0.0006, "step": 8691 }, { "epoch": 129.73, "learning_rate": 4.778947368421052e-05, "loss": 0.0003, "step": 8692 }, { "epoch": 129.74, "learning_rate": 4.775438596491227e-05, "loss": 0.0003, "step": 8693 }, { "epoch": 129.76, "learning_rate": 4.771929824561403e-05, "loss": 0.0003, "step": 8694 }, { "epoch": 129.77, "learning_rate": 4.768421052631579e-05, "loss": 0.0003, "step": 8695 }, { "epoch": 129.79, "learning_rate": 4.764912280701754e-05, "loss": 0.0003, "step": 8696 }, { "epoch": 129.8, "learning_rate": 4.76140350877193e-05, "loss": 0.03, "step": 8697 }, { "epoch": 129.82, "learning_rate": 4.757894736842105e-05, "loss": 0.0003, "step": 8698 }, { "epoch": 129.83, "learning_rate": 4.75438596491228e-05, "loss": 0.0003, "step": 8699 }, { "epoch": 129.85, "learning_rate": 4.750877192982456e-05, "loss": 0.0003, "step": 8700 }, { "epoch": 129.86, "learning_rate": 4.7473684210526314e-05, "loss": 0.0004, "step": 8701 }, { "epoch": 129.88, "learning_rate": 4.743859649122806e-05, "loss": 0.0003, "step": 8702 }, { "epoch": 129.89, "learning_rate": 4.740350877192982e-05, "loss": 0.0002, "step": 8703 }, { "epoch": 129.91, "learning_rate": 4.7368421052631574e-05, "loss": 0.0003, "step": 8704 }, { "epoch": 129.92, "learning_rate": 4.733333333333332e-05, "loss": 0.0004, "step": 8705 }, { "epoch": 129.94, "learning_rate": 4.729824561403508e-05, "loss": 0.0012, "step": 8706 }, { "epoch": 129.95, "learning_rate": 4.726315789473684e-05, "loss": 0.0003, "step": 8707 }, { "epoch": 129.97, "learning_rate": 4.7228070175438596e-05, "loss": 0.0024, "step": 8708 }, { "epoch": 129.98, "learning_rate": 4.719298245614035e-05, "loss": 0.0003, "step": 8709 }, { "epoch": 130.0, "learning_rate": 4.71578947368421e-05, "loss": 0.0003, "step": 8710 }, { "epoch": 130.01, "learning_rate": 4.7122807017543856e-05, "loss": 0.0006, "step": 8711 }, { "epoch": 130.03, "learning_rate": 4.708771929824561e-05, "loss": 0.0012, "step": 8712 }, { "epoch": 130.04, "learning_rate": 4.705263157894737e-05, "loss": 0.0002, "step": 8713 }, { "epoch": 130.06, "learning_rate": 4.7017543859649116e-05, "loss": 0.0005, "step": 8714 }, { "epoch": 130.07, "learning_rate": 4.698245614035087e-05, "loss": 0.0003, "step": 8715 }, { "epoch": 130.09, "learning_rate": 4.694736842105263e-05, "loss": 0.0003, "step": 8716 }, { "epoch": 130.1, "learning_rate": 4.6912280701754376e-05, "loss": 0.0004, "step": 8717 }, { "epoch": 130.12, "learning_rate": 4.687719298245613e-05, "loss": 0.0058, "step": 8718 }, { "epoch": 130.13, "learning_rate": 4.6842105263157894e-05, "loss": 0.0002, "step": 8719 }, { "epoch": 130.15, "learning_rate": 4.680701754385965e-05, "loss": 0.0009, "step": 8720 }, { "epoch": 130.16, "learning_rate": 4.6771929824561405e-05, "loss": 0.0003, "step": 8721 }, { "epoch": 130.18, "learning_rate": 4.6736842105263154e-05, "loss": 0.0002, "step": 8722 }, { "epoch": 130.19, "learning_rate": 4.670175438596491e-05, "loss": 0.1649, "step": 8723 }, { "epoch": 130.21, "learning_rate": 4.6666666666666665e-05, "loss": 0.0003, "step": 8724 }, { "epoch": 130.22, "learning_rate": 4.663157894736842e-05, "loss": 0.0006, "step": 8725 }, { "epoch": 130.24, "learning_rate": 4.659649122807017e-05, "loss": 0.0003, "step": 8726 }, { "epoch": 130.25, "learning_rate": 4.6561403508771925e-05, "loss": 0.0003, "step": 8727 }, { "epoch": 130.27, "learning_rate": 4.652631578947368e-05, "loss": 0.0002, "step": 8728 }, { "epoch": 130.28, "learning_rate": 4.649122807017543e-05, "loss": 0.0008, "step": 8729 }, { "epoch": 130.3, "learning_rate": 4.6456140350877185e-05, "loss": 0.0003, "step": 8730 }, { "epoch": 130.31, "learning_rate": 4.642105263157894e-05, "loss": 0.0005, "step": 8731 }, { "epoch": 130.33, "learning_rate": 4.63859649122807e-05, "loss": 0.0003, "step": 8732 }, { "epoch": 130.34, "learning_rate": 4.635087719298246e-05, "loss": 0.0004, "step": 8733 }, { "epoch": 130.36, "learning_rate": 4.631578947368421e-05, "loss": 0.2316, "step": 8734 }, { "epoch": 130.37, "learning_rate": 4.628070175438596e-05, "loss": 0.0004, "step": 8735 }, { "epoch": 130.39, "learning_rate": 4.624561403508772e-05, "loss": 0.0003, "step": 8736 }, { "epoch": 130.4, "learning_rate": 4.621052631578947e-05, "loss": 0.0002, "step": 8737 }, { "epoch": 130.42, "learning_rate": 4.617543859649122e-05, "loss": 0.0002, "step": 8738 }, { "epoch": 130.43, "learning_rate": 4.614035087719298e-05, "loss": 0.0881, "step": 8739 }, { "epoch": 130.45, "learning_rate": 4.610526315789473e-05, "loss": 0.0137, "step": 8740 }, { "epoch": 130.46, "learning_rate": 4.607017543859648e-05, "loss": 0.0003, "step": 8741 }, { "epoch": 130.48, "learning_rate": 4.603508771929824e-05, "loss": 0.0003, "step": 8742 }, { "epoch": 130.49, "learning_rate": 4.599999999999999e-05, "loss": 0.0003, "step": 8743 }, { "epoch": 130.51, "learning_rate": 4.5964912280701756e-05, "loss": 0.0003, "step": 8744 }, { "epoch": 130.52, "learning_rate": 4.592982456140351e-05, "loss": 0.0003, "step": 8745 }, { "epoch": 130.54, "learning_rate": 4.589473684210526e-05, "loss": 0.0013, "step": 8746 }, { "epoch": 130.55, "learning_rate": 4.5859649122807015e-05, "loss": 0.0004, "step": 8747 }, { "epoch": 130.57, "learning_rate": 4.582456140350877e-05, "loss": 0.0003, "step": 8748 }, { "epoch": 130.58, "learning_rate": 4.578947368421052e-05, "loss": 0.0011, "step": 8749 }, { "epoch": 130.59, "learning_rate": 4.5754385964912275e-05, "loss": 0.0002, "step": 8750 }, { "epoch": 130.61, "learning_rate": 4.571929824561403e-05, "loss": 0.0004, "step": 8751 }, { "epoch": 130.62, "learning_rate": 4.5684210526315786e-05, "loss": 0.0002, "step": 8752 }, { "epoch": 130.64, "learning_rate": 4.5649122807017535e-05, "loss": 0.0005, "step": 8753 }, { "epoch": 130.65, "learning_rate": 4.561403508771929e-05, "loss": 0.0003, "step": 8754 }, { "epoch": 130.67, "learning_rate": 4.5578947368421046e-05, "loss": 0.0003, "step": 8755 }, { "epoch": 130.68, "learning_rate": 4.55438596491228e-05, "loss": 0.0003, "step": 8756 }, { "epoch": 130.7, "learning_rate": 4.5508771929824564e-05, "loss": 0.0004, "step": 8757 }, { "epoch": 130.71, "learning_rate": 4.547368421052631e-05, "loss": 0.0004, "step": 8758 }, { "epoch": 130.73, "learning_rate": 4.543859649122807e-05, "loss": 0.0003, "step": 8759 }, { "epoch": 130.74, "learning_rate": 4.5403508771929824e-05, "loss": 0.0366, "step": 8760 }, { "epoch": 130.76, "learning_rate": 4.536842105263157e-05, "loss": 0.0002, "step": 8761 }, { "epoch": 130.77, "learning_rate": 4.533333333333333e-05, "loss": 0.0003, "step": 8762 }, { "epoch": 130.79, "learning_rate": 4.5298245614035084e-05, "loss": 0.0003, "step": 8763 }, { "epoch": 130.8, "learning_rate": 4.526315789473684e-05, "loss": 0.0003, "step": 8764 }, { "epoch": 130.82, "learning_rate": 4.522807017543859e-05, "loss": 0.0002, "step": 8765 }, { "epoch": 130.83, "learning_rate": 4.5192982456140344e-05, "loss": 0.0002, "step": 8766 }, { "epoch": 130.85, "learning_rate": 4.51578947368421e-05, "loss": 0.0065, "step": 8767 }, { "epoch": 130.86, "learning_rate": 4.5122807017543855e-05, "loss": 0.0002, "step": 8768 }, { "epoch": 130.88, "learning_rate": 4.508771929824562e-05, "loss": 0.0002, "step": 8769 }, { "epoch": 130.89, "learning_rate": 4.5052631578947366e-05, "loss": 0.0002, "step": 8770 }, { "epoch": 130.91, "learning_rate": 4.501754385964912e-05, "loss": 0.0002, "step": 8771 }, { "epoch": 130.92, "learning_rate": 4.498245614035088e-05, "loss": 0.0002, "step": 8772 }, { "epoch": 130.94, "learning_rate": 4.4947368421052626e-05, "loss": 0.0002, "step": 8773 }, { "epoch": 130.95, "learning_rate": 4.491228070175438e-05, "loss": 0.0003, "step": 8774 }, { "epoch": 130.97, "learning_rate": 4.487719298245614e-05, "loss": 0.0003, "step": 8775 }, { "epoch": 130.98, "learning_rate": 4.484210526315789e-05, "loss": 0.0007, "step": 8776 }, { "epoch": 131.0, "learning_rate": 4.480701754385964e-05, "loss": 0.0003, "step": 8777 }, { "epoch": 131.01, "learning_rate": 4.47719298245614e-05, "loss": 0.0003, "step": 8778 }, { "epoch": 131.03, "learning_rate": 4.473684210526315e-05, "loss": 0.0042, "step": 8779 }, { "epoch": 131.04, "learning_rate": 4.470175438596491e-05, "loss": 0.0002, "step": 8780 }, { "epoch": 131.06, "learning_rate": 4.466666666666666e-05, "loss": 0.0003, "step": 8781 }, { "epoch": 131.07, "learning_rate": 4.463157894736842e-05, "loss": 0.0002, "step": 8782 }, { "epoch": 131.09, "learning_rate": 4.4596491228070175e-05, "loss": 0.0002, "step": 8783 }, { "epoch": 131.1, "learning_rate": 4.456140350877193e-05, "loss": 0.0003, "step": 8784 }, { "epoch": 131.12, "learning_rate": 4.452631578947368e-05, "loss": 0.0004, "step": 8785 }, { "epoch": 131.13, "learning_rate": 4.4491228070175435e-05, "loss": 0.0002, "step": 8786 }, { "epoch": 131.15, "learning_rate": 4.445614035087719e-05, "loss": 0.0003, "step": 8787 }, { "epoch": 131.16, "learning_rate": 4.4421052631578946e-05, "loss": 0.0003, "step": 8788 }, { "epoch": 131.18, "learning_rate": 4.4385964912280695e-05, "loss": 0.0003, "step": 8789 }, { "epoch": 131.19, "learning_rate": 4.435087719298245e-05, "loss": 0.0013, "step": 8790 }, { "epoch": 131.21, "learning_rate": 4.4315789473684206e-05, "loss": 0.0467, "step": 8791 }, { "epoch": 131.22, "learning_rate": 4.428070175438596e-05, "loss": 0.0002, "step": 8792 }, { "epoch": 131.24, "learning_rate": 4.424561403508771e-05, "loss": 0.0002, "step": 8793 }, { "epoch": 131.25, "learning_rate": 4.4210526315789466e-05, "loss": 0.0003, "step": 8794 }, { "epoch": 131.27, "learning_rate": 4.417543859649123e-05, "loss": 0.0003, "step": 8795 }, { "epoch": 131.28, "learning_rate": 4.4140350877192983e-05, "loss": 0.0009, "step": 8796 }, { "epoch": 131.3, "learning_rate": 4.410526315789473e-05, "loss": 0.0005, "step": 8797 }, { "epoch": 131.31, "learning_rate": 4.407017543859649e-05, "loss": 0.0003, "step": 8798 }, { "epoch": 131.33, "learning_rate": 4.403508771929824e-05, "loss": 0.0019, "step": 8799 }, { "epoch": 131.34, "learning_rate": 4.4e-05, "loss": 0.0003, "step": 8800 }, { "epoch": 131.34, "eval_accuracy": 0.8881546744982868, "eval_f1": 0.887829841036129, "eval_loss": 0.5989698171615601, "eval_runtime": 343.81, "eval_samples_per_second": 11.884, "eval_steps_per_second": 0.745, "step": 8800 }, { "epoch": 131.36, "learning_rate": 4.396491228070175e-05, "loss": 0.0003, "step": 8801 }, { "epoch": 131.37, "learning_rate": 4.39298245614035e-05, "loss": 0.0002, "step": 8802 }, { "epoch": 131.39, "learning_rate": 4.389473684210526e-05, "loss": 0.0002, "step": 8803 }, { "epoch": 131.4, "learning_rate": 4.3859649122807014e-05, "loss": 0.079, "step": 8804 }, { "epoch": 131.42, "learning_rate": 4.382456140350876e-05, "loss": 0.0002, "step": 8805 }, { "epoch": 131.43, "learning_rate": 4.378947368421052e-05, "loss": 0.0009, "step": 8806 }, { "epoch": 131.45, "learning_rate": 4.375438596491228e-05, "loss": 0.0003, "step": 8807 }, { "epoch": 131.46, "learning_rate": 4.3719298245614037e-05, "loss": 0.0005, "step": 8808 }, { "epoch": 131.48, "learning_rate": 4.3684210526315785e-05, "loss": 0.0002, "step": 8809 }, { "epoch": 131.49, "learning_rate": 4.364912280701754e-05, "loss": 0.0003, "step": 8810 }, { "epoch": 131.51, "learning_rate": 4.3614035087719296e-05, "loss": 0.003, "step": 8811 }, { "epoch": 131.52, "learning_rate": 4.357894736842105e-05, "loss": 0.0004, "step": 8812 }, { "epoch": 131.54, "learning_rate": 4.35438596491228e-05, "loss": 0.0002, "step": 8813 }, { "epoch": 131.55, "learning_rate": 4.3508771929824556e-05, "loss": 0.0002, "step": 8814 }, { "epoch": 131.57, "learning_rate": 4.347368421052631e-05, "loss": 0.0003, "step": 8815 }, { "epoch": 131.58, "learning_rate": 4.343859649122807e-05, "loss": 0.0003, "step": 8816 }, { "epoch": 131.59, "learning_rate": 4.3403508771929816e-05, "loss": 0.0004, "step": 8817 }, { "epoch": 131.61, "learning_rate": 4.336842105263157e-05, "loss": 0.0003, "step": 8818 }, { "epoch": 131.62, "learning_rate": 4.333333333333333e-05, "loss": 0.0003, "step": 8819 }, { "epoch": 131.64, "learning_rate": 4.329824561403509e-05, "loss": 0.0004, "step": 8820 }, { "epoch": 131.65, "learning_rate": 4.326315789473684e-05, "loss": 0.0003, "step": 8821 }, { "epoch": 131.67, "learning_rate": 4.3228070175438594e-05, "loss": 0.0002, "step": 8822 }, { "epoch": 131.68, "learning_rate": 4.319298245614035e-05, "loss": 0.009, "step": 8823 }, { "epoch": 131.7, "learning_rate": 4.3157894736842105e-05, "loss": 0.0358, "step": 8824 }, { "epoch": 131.71, "learning_rate": 4.3122807017543854e-05, "loss": 0.0002, "step": 8825 }, { "epoch": 131.73, "learning_rate": 4.308771929824561e-05, "loss": 0.0003, "step": 8826 }, { "epoch": 131.74, "learning_rate": 4.3052631578947365e-05, "loss": 0.1321, "step": 8827 }, { "epoch": 131.76, "learning_rate": 4.301754385964912e-05, "loss": 0.0002, "step": 8828 }, { "epoch": 131.77, "learning_rate": 4.298245614035087e-05, "loss": 0.0002, "step": 8829 }, { "epoch": 131.79, "learning_rate": 4.2947368421052625e-05, "loss": 0.0002, "step": 8830 }, { "epoch": 131.8, "learning_rate": 4.291228070175438e-05, "loss": 0.0002, "step": 8831 }, { "epoch": 131.82, "learning_rate": 4.287719298245614e-05, "loss": 0.0039, "step": 8832 }, { "epoch": 131.83, "learning_rate": 4.284210526315789e-05, "loss": 0.0006, "step": 8833 }, { "epoch": 131.85, "learning_rate": 4.280701754385965e-05, "loss": 0.0003, "step": 8834 }, { "epoch": 131.86, "learning_rate": 4.27719298245614e-05, "loss": 0.0007, "step": 8835 }, { "epoch": 131.88, "learning_rate": 4.273684210526316e-05, "loss": 0.0002, "step": 8836 }, { "epoch": 131.89, "learning_rate": 4.270175438596491e-05, "loss": 0.0002, "step": 8837 }, { "epoch": 131.91, "learning_rate": 4.266666666666666e-05, "loss": 0.0003, "step": 8838 }, { "epoch": 131.92, "learning_rate": 4.263157894736842e-05, "loss": 0.0003, "step": 8839 }, { "epoch": 131.94, "learning_rate": 4.2596491228070174e-05, "loss": 0.0007, "step": 8840 }, { "epoch": 131.95, "learning_rate": 4.256140350877192e-05, "loss": 0.0003, "step": 8841 }, { "epoch": 131.97, "learning_rate": 4.252631578947368e-05, "loss": 0.0002, "step": 8842 }, { "epoch": 131.98, "learning_rate": 4.2491228070175433e-05, "loss": 0.0002, "step": 8843 }, { "epoch": 132.0, "learning_rate": 4.245614035087718e-05, "loss": 0.0003, "step": 8844 }, { "epoch": 132.01, "learning_rate": 4.2421052631578945e-05, "loss": 0.0459, "step": 8845 }, { "epoch": 132.03, "learning_rate": 4.23859649122807e-05, "loss": 0.0036, "step": 8846 }, { "epoch": 132.04, "learning_rate": 4.2350877192982456e-05, "loss": 0.2391, "step": 8847 }, { "epoch": 132.06, "learning_rate": 4.231578947368421e-05, "loss": 0.0002, "step": 8848 }, { "epoch": 132.07, "learning_rate": 4.228070175438596e-05, "loss": 0.0002, "step": 8849 }, { "epoch": 132.09, "learning_rate": 4.2245614035087716e-05, "loss": 0.0002, "step": 8850 }, { "epoch": 132.1, "learning_rate": 4.221052631578947e-05, "loss": 0.0003, "step": 8851 }, { "epoch": 132.12, "learning_rate": 4.217543859649123e-05, "loss": 0.0002, "step": 8852 }, { "epoch": 132.13, "learning_rate": 4.2140350877192975e-05, "loss": 0.0003, "step": 8853 }, { "epoch": 132.15, "learning_rate": 4.210526315789473e-05, "loss": 0.0002, "step": 8854 }, { "epoch": 132.16, "learning_rate": 4.2070175438596487e-05, "loss": 0.0003, "step": 8855 }, { "epoch": 132.18, "learning_rate": 4.2035087719298235e-05, "loss": 0.0002, "step": 8856 }, { "epoch": 132.19, "learning_rate": 4.2e-05, "loss": 0.0002, "step": 8857 }, { "epoch": 132.21, "learning_rate": 4.196491228070175e-05, "loss": 0.0002, "step": 8858 }, { "epoch": 132.22, "learning_rate": 4.192982456140351e-05, "loss": 0.0742, "step": 8859 }, { "epoch": 132.24, "learning_rate": 4.1894736842105264e-05, "loss": 0.0009, "step": 8860 }, { "epoch": 132.25, "learning_rate": 4.185964912280701e-05, "loss": 0.0268, "step": 8861 }, { "epoch": 132.27, "learning_rate": 4.182456140350877e-05, "loss": 0.0003, "step": 8862 }, { "epoch": 132.28, "learning_rate": 4.1789473684210524e-05, "loss": 0.0003, "step": 8863 }, { "epoch": 132.3, "learning_rate": 4.175438596491228e-05, "loss": 0.0065, "step": 8864 }, { "epoch": 132.31, "learning_rate": 4.171929824561403e-05, "loss": 0.0002, "step": 8865 }, { "epoch": 132.33, "learning_rate": 4.1684210526315784e-05, "loss": 0.0004, "step": 8866 }, { "epoch": 132.34, "learning_rate": 4.164912280701754e-05, "loss": 0.0506, "step": 8867 }, { "epoch": 132.36, "learning_rate": 4.161403508771929e-05, "loss": 0.0004, "step": 8868 }, { "epoch": 132.37, "learning_rate": 4.1578947368421044e-05, "loss": 0.0002, "step": 8869 }, { "epoch": 132.39, "learning_rate": 4.1543859649122806e-05, "loss": 0.0002, "step": 8870 }, { "epoch": 132.4, "learning_rate": 4.150877192982456e-05, "loss": 0.0006, "step": 8871 }, { "epoch": 132.42, "learning_rate": 4.147368421052632e-05, "loss": 0.0002, "step": 8872 }, { "epoch": 132.43, "learning_rate": 4.1438596491228066e-05, "loss": 0.0002, "step": 8873 }, { "epoch": 132.45, "learning_rate": 4.140350877192982e-05, "loss": 0.0003, "step": 8874 }, { "epoch": 132.46, "learning_rate": 4.136842105263158e-05, "loss": 0.0003, "step": 8875 }, { "epoch": 132.48, "learning_rate": 4.1333333333333326e-05, "loss": 0.0238, "step": 8876 }, { "epoch": 132.49, "learning_rate": 4.129824561403508e-05, "loss": 0.0003, "step": 8877 }, { "epoch": 132.51, "learning_rate": 4.126315789473684e-05, "loss": 0.0002, "step": 8878 }, { "epoch": 132.52, "learning_rate": 4.122807017543859e-05, "loss": 0.0006, "step": 8879 }, { "epoch": 132.54, "learning_rate": 4.119298245614034e-05, "loss": 0.0003, "step": 8880 }, { "epoch": 132.55, "learning_rate": 4.11578947368421e-05, "loss": 0.0003, "step": 8881 }, { "epoch": 132.57, "learning_rate": 4.112280701754386e-05, "loss": 0.0003, "step": 8882 }, { "epoch": 132.58, "learning_rate": 4.1087719298245615e-05, "loss": 0.0002, "step": 8883 }, { "epoch": 132.59, "learning_rate": 4.105263157894737e-05, "loss": 0.0003, "step": 8884 }, { "epoch": 132.61, "learning_rate": 4.101754385964912e-05, "loss": 0.0002, "step": 8885 }, { "epoch": 132.62, "learning_rate": 4.0982456140350875e-05, "loss": 0.0002, "step": 8886 }, { "epoch": 132.64, "learning_rate": 4.094736842105263e-05, "loss": 0.0003, "step": 8887 }, { "epoch": 132.65, "learning_rate": 4.091228070175438e-05, "loss": 0.0002, "step": 8888 }, { "epoch": 132.67, "learning_rate": 4.0877192982456135e-05, "loss": 0.0002, "step": 8889 }, { "epoch": 132.68, "learning_rate": 4.084210526315789e-05, "loss": 0.0003, "step": 8890 }, { "epoch": 132.7, "learning_rate": 4.0807017543859646e-05, "loss": 0.0003, "step": 8891 }, { "epoch": 132.71, "learning_rate": 4.0771929824561395e-05, "loss": 0.0002, "step": 8892 }, { "epoch": 132.73, "learning_rate": 4.073684210526315e-05, "loss": 0.0002, "step": 8893 }, { "epoch": 132.74, "learning_rate": 4.0701754385964906e-05, "loss": 0.0003, "step": 8894 }, { "epoch": 132.76, "learning_rate": 4.066666666666667e-05, "loss": 0.0003, "step": 8895 }, { "epoch": 132.77, "learning_rate": 4.0631578947368424e-05, "loss": 0.0002, "step": 8896 }, { "epoch": 132.79, "learning_rate": 4.059649122807017e-05, "loss": 0.0002, "step": 8897 }, { "epoch": 132.8, "learning_rate": 4.056140350877193e-05, "loss": 0.0062, "step": 8898 }, { "epoch": 132.82, "learning_rate": 4.0526315789473684e-05, "loss": 0.0178, "step": 8899 }, { "epoch": 132.83, "learning_rate": 4.049122807017543e-05, "loss": 0.0005, "step": 8900 }, { "epoch": 132.85, "learning_rate": 4.045614035087719e-05, "loss": 0.0003, "step": 8901 }, { "epoch": 132.86, "learning_rate": 4.0421052631578943e-05, "loss": 0.0003, "step": 8902 }, { "epoch": 132.88, "learning_rate": 4.03859649122807e-05, "loss": 0.0002, "step": 8903 }, { "epoch": 132.89, "learning_rate": 4.035087719298245e-05, "loss": 0.0002, "step": 8904 }, { "epoch": 132.91, "learning_rate": 4.03157894736842e-05, "loss": 0.0003, "step": 8905 }, { "epoch": 132.92, "learning_rate": 4.028070175438596e-05, "loss": 0.0002, "step": 8906 }, { "epoch": 132.94, "learning_rate": 4.024561403508772e-05, "loss": 0.0039, "step": 8907 }, { "epoch": 132.95, "learning_rate": 4.021052631578948e-05, "loss": 0.0004, "step": 8908 }, { "epoch": 132.97, "learning_rate": 4.0175438596491226e-05, "loss": 0.0048, "step": 8909 }, { "epoch": 132.98, "learning_rate": 4.014035087719298e-05, "loss": 0.0053, "step": 8910 }, { "epoch": 133.0, "learning_rate": 4.0105263157894737e-05, "loss": 0.0002, "step": 8911 }, { "epoch": 133.01, "learning_rate": 4.0070175438596485e-05, "loss": 0.0002, "step": 8912 }, { "epoch": 133.03, "learning_rate": 4.003508771929824e-05, "loss": 0.0002, "step": 8913 }, { "epoch": 133.04, "learning_rate": 3.9999999999999996e-05, "loss": 0.0003, "step": 8914 }, { "epoch": 133.06, "learning_rate": 3.996491228070175e-05, "loss": 0.0011, "step": 8915 }, { "epoch": 133.07, "learning_rate": 3.99298245614035e-05, "loss": 0.0002, "step": 8916 }, { "epoch": 133.09, "learning_rate": 3.9894736842105256e-05, "loss": 0.0004, "step": 8917 }, { "epoch": 133.1, "learning_rate": 3.985964912280701e-05, "loss": 0.0003, "step": 8918 }, { "epoch": 133.12, "learning_rate": 3.982456140350877e-05, "loss": 0.0002, "step": 8919 }, { "epoch": 133.13, "learning_rate": 3.978947368421053e-05, "loss": 0.0002, "step": 8920 }, { "epoch": 133.15, "learning_rate": 3.975438596491228e-05, "loss": 0.0002, "step": 8921 }, { "epoch": 133.16, "learning_rate": 3.9719298245614034e-05, "loss": 0.0348, "step": 8922 }, { "epoch": 133.18, "learning_rate": 3.968421052631579e-05, "loss": 0.0002, "step": 8923 }, { "epoch": 133.19, "learning_rate": 3.964912280701754e-05, "loss": 0.0002, "step": 8924 }, { "epoch": 133.21, "learning_rate": 3.9614035087719294e-05, "loss": 0.0002, "step": 8925 }, { "epoch": 133.22, "learning_rate": 3.957894736842105e-05, "loss": 0.0017, "step": 8926 }, { "epoch": 133.24, "learning_rate": 3.9543859649122805e-05, "loss": 0.0003, "step": 8927 }, { "epoch": 133.25, "learning_rate": 3.9508771929824554e-05, "loss": 0.0003, "step": 8928 }, { "epoch": 133.27, "learning_rate": 3.947368421052631e-05, "loss": 0.0003, "step": 8929 }, { "epoch": 133.28, "learning_rate": 3.9438596491228065e-05, "loss": 0.0002, "step": 8930 }, { "epoch": 133.3, "learning_rate": 3.940350877192982e-05, "loss": 0.0002, "step": 8931 }, { "epoch": 133.31, "learning_rate": 3.936842105263158e-05, "loss": 0.0002, "step": 8932 }, { "epoch": 133.33, "learning_rate": 3.933333333333333e-05, "loss": 0.0003, "step": 8933 }, { "epoch": 133.34, "learning_rate": 3.929824561403509e-05, "loss": 0.0003, "step": 8934 }, { "epoch": 133.36, "learning_rate": 3.926315789473684e-05, "loss": 0.0003, "step": 8935 }, { "epoch": 133.37, "learning_rate": 3.922807017543859e-05, "loss": 0.0002, "step": 8936 }, { "epoch": 133.39, "learning_rate": 3.919298245614035e-05, "loss": 0.0002, "step": 8937 }, { "epoch": 133.4, "learning_rate": 3.91578947368421e-05, "loss": 0.0003, "step": 8938 }, { "epoch": 133.42, "learning_rate": 3.912280701754386e-05, "loss": 0.0002, "step": 8939 }, { "epoch": 133.43, "learning_rate": 3.908771929824561e-05, "loss": 0.0002, "step": 8940 }, { "epoch": 133.45, "learning_rate": 3.905263157894736e-05, "loss": 0.0374, "step": 8941 }, { "epoch": 133.46, "learning_rate": 3.901754385964912e-05, "loss": 0.0002, "step": 8942 }, { "epoch": 133.48, "learning_rate": 3.8982456140350874e-05, "loss": 0.0004, "step": 8943 }, { "epoch": 133.49, "learning_rate": 3.894736842105262e-05, "loss": 0.0002, "step": 8944 }, { "epoch": 133.51, "learning_rate": 3.8912280701754385e-05, "loss": 0.173, "step": 8945 }, { "epoch": 133.52, "learning_rate": 3.887719298245614e-05, "loss": 0.0002, "step": 8946 }, { "epoch": 133.54, "learning_rate": 3.8842105263157896e-05, "loss": 0.0002, "step": 8947 }, { "epoch": 133.55, "learning_rate": 3.8807017543859645e-05, "loss": 0.0006, "step": 8948 }, { "epoch": 133.57, "learning_rate": 3.87719298245614e-05, "loss": 0.0002, "step": 8949 }, { "epoch": 133.58, "learning_rate": 3.8736842105263156e-05, "loss": 0.0002, "step": 8950 }, { "epoch": 133.59, "learning_rate": 3.870175438596491e-05, "loss": 0.0002, "step": 8951 }, { "epoch": 133.61, "learning_rate": 3.866666666666666e-05, "loss": 0.0002, "step": 8952 }, { "epoch": 133.62, "learning_rate": 3.8631578947368416e-05, "loss": 0.0002, "step": 8953 }, { "epoch": 133.64, "learning_rate": 3.859649122807017e-05, "loss": 0.0003, "step": 8954 }, { "epoch": 133.65, "learning_rate": 3.856140350877193e-05, "loss": 0.0003, "step": 8955 }, { "epoch": 133.67, "learning_rate": 3.8526315789473676e-05, "loss": 0.0003, "step": 8956 }, { "epoch": 133.68, "learning_rate": 3.849122807017543e-05, "loss": 0.0003, "step": 8957 }, { "epoch": 133.7, "learning_rate": 3.8456140350877193e-05, "loss": 0.0002, "step": 8958 }, { "epoch": 133.71, "learning_rate": 3.842105263157895e-05, "loss": 0.0002, "step": 8959 }, { "epoch": 133.73, "learning_rate": 3.83859649122807e-05, "loss": 0.0002, "step": 8960 }, { "epoch": 133.74, "learning_rate": 3.835087719298245e-05, "loss": 0.0729, "step": 8961 }, { "epoch": 133.76, "learning_rate": 3.831578947368421e-05, "loss": 0.0002, "step": 8962 }, { "epoch": 133.77, "learning_rate": 3.8280701754385964e-05, "loss": 0.0002, "step": 8963 }, { "epoch": 133.79, "learning_rate": 3.824561403508771e-05, "loss": 0.0002, "step": 8964 }, { "epoch": 133.8, "learning_rate": 3.821052631578947e-05, "loss": 0.0002, "step": 8965 }, { "epoch": 133.82, "learning_rate": 3.8175438596491224e-05, "loss": 0.0399, "step": 8966 }, { "epoch": 133.83, "learning_rate": 3.814035087719298e-05, "loss": 0.0002, "step": 8967 }, { "epoch": 133.85, "learning_rate": 3.810526315789473e-05, "loss": 0.0005, "step": 8968 }, { "epoch": 133.86, "learning_rate": 3.8070175438596484e-05, "loss": 0.0002, "step": 8969 }, { "epoch": 133.88, "learning_rate": 3.8035087719298247e-05, "loss": 0.0002, "step": 8970 }, { "epoch": 133.89, "learning_rate": 3.8e-05, "loss": 0.0002, "step": 8971 }, { "epoch": 133.91, "learning_rate": 3.796491228070175e-05, "loss": 0.0002, "step": 8972 }, { "epoch": 133.92, "learning_rate": 3.7929824561403506e-05, "loss": 0.0004, "step": 8973 }, { "epoch": 133.94, "learning_rate": 3.789473684210526e-05, "loss": 0.0011, "step": 8974 }, { "epoch": 133.95, "learning_rate": 3.785964912280702e-05, "loss": 0.0014, "step": 8975 }, { "epoch": 133.97, "learning_rate": 3.7824561403508766e-05, "loss": 0.0002, "step": 8976 }, { "epoch": 133.98, "learning_rate": 3.778947368421052e-05, "loss": 0.0002, "step": 8977 }, { "epoch": 134.0, "learning_rate": 3.775438596491228e-05, "loss": 0.0002, "step": 8978 }, { "epoch": 134.01, "learning_rate": 3.771929824561403e-05, "loss": 0.0004, "step": 8979 }, { "epoch": 134.03, "learning_rate": 3.768421052631578e-05, "loss": 0.0003, "step": 8980 }, { "epoch": 134.04, "learning_rate": 3.764912280701754e-05, "loss": 0.0003, "step": 8981 }, { "epoch": 134.06, "learning_rate": 3.761403508771929e-05, "loss": 0.0002, "step": 8982 }, { "epoch": 134.07, "learning_rate": 3.7578947368421055e-05, "loss": 0.0002, "step": 8983 }, { "epoch": 134.09, "learning_rate": 3.7543859649122804e-05, "loss": 0.0002, "step": 8984 }, { "epoch": 134.1, "learning_rate": 3.750877192982456e-05, "loss": 0.0003, "step": 8985 }, { "epoch": 134.12, "learning_rate": 3.7473684210526315e-05, "loss": 0.0003, "step": 8986 }, { "epoch": 134.13, "learning_rate": 3.743859649122807e-05, "loss": 0.0002, "step": 8987 }, { "epoch": 134.15, "learning_rate": 3.740350877192982e-05, "loss": 0.0003, "step": 8988 }, { "epoch": 134.16, "learning_rate": 3.7368421052631575e-05, "loss": 0.0002, "step": 8989 }, { "epoch": 134.18, "learning_rate": 3.733333333333333e-05, "loss": 0.0002, "step": 8990 }, { "epoch": 134.19, "learning_rate": 3.7298245614035086e-05, "loss": 0.0002, "step": 8991 }, { "epoch": 134.21, "learning_rate": 3.726315789473684e-05, "loss": 0.0004, "step": 8992 }, { "epoch": 134.22, "learning_rate": 3.72280701754386e-05, "loss": 0.0002, "step": 8993 }, { "epoch": 134.24, "learning_rate": 3.7192982456140346e-05, "loss": 0.0002, "step": 8994 }, { "epoch": 134.25, "learning_rate": 3.71578947368421e-05, "loss": 0.0002, "step": 8995 }, { "epoch": 134.27, "learning_rate": 3.712280701754386e-05, "loss": 0.0002, "step": 8996 }, { "epoch": 134.28, "learning_rate": 3.7087719298245606e-05, "loss": 0.0002, "step": 8997 }, { "epoch": 134.3, "learning_rate": 3.705263157894737e-05, "loss": 0.001, "step": 8998 }, { "epoch": 134.31, "learning_rate": 3.7017543859649124e-05, "loss": 0.0002, "step": 8999 }, { "epoch": 134.33, "learning_rate": 3.698245614035087e-05, "loss": 0.0002, "step": 9000 }, { "epoch": 134.33, "eval_accuracy": 0.8888888888888888, "eval_f1": 0.8887363564634383, "eval_loss": 0.6235546469688416, "eval_runtime": 344.0974, "eval_samples_per_second": 11.875, "eval_steps_per_second": 0.744, "step": 9000 }, { "epoch": 134.34, "learning_rate": 3.694736842105263e-05, "loss": 0.0051, "step": 9001 }, { "epoch": 134.36, "learning_rate": 3.6912280701754384e-05, "loss": 0.0002, "step": 9002 }, { "epoch": 134.37, "learning_rate": 3.687719298245613e-05, "loss": 0.0003, "step": 9003 }, { "epoch": 134.39, "learning_rate": 3.684210526315789e-05, "loss": 0.0002, "step": 9004 }, { "epoch": 134.4, "learning_rate": 3.680701754385965e-05, "loss": 0.0002, "step": 9005 }, { "epoch": 134.42, "learning_rate": 3.67719298245614e-05, "loss": 0.0002, "step": 9006 }, { "epoch": 134.43, "learning_rate": 3.6736842105263155e-05, "loss": 0.0002, "step": 9007 }, { "epoch": 134.45, "learning_rate": 3.670175438596491e-05, "loss": 0.0003, "step": 9008 }, { "epoch": 134.46, "learning_rate": 3.666666666666666e-05, "loss": 0.0003, "step": 9009 }, { "epoch": 134.48, "learning_rate": 3.6631578947368414e-05, "loss": 0.139, "step": 9010 }, { "epoch": 134.49, "learning_rate": 3.659649122807018e-05, "loss": 0.0002, "step": 9011 }, { "epoch": 134.51, "learning_rate": 3.6561403508771926e-05, "loss": 0.0003, "step": 9012 }, { "epoch": 134.52, "learning_rate": 3.652631578947368e-05, "loss": 0.0002, "step": 9013 }, { "epoch": 134.54, "learning_rate": 3.649122807017544e-05, "loss": 0.0002, "step": 9014 }, { "epoch": 134.55, "learning_rate": 3.6456140350877185e-05, "loss": 0.0002, "step": 9015 }, { "epoch": 134.57, "learning_rate": 3.642105263157894e-05, "loss": 0.0002, "step": 9016 }, { "epoch": 134.58, "learning_rate": 3.63859649122807e-05, "loss": 0.0002, "step": 9017 }, { "epoch": 134.59, "learning_rate": 3.635087719298245e-05, "loss": 0.0004, "step": 9018 }, { "epoch": 134.61, "learning_rate": 3.631578947368421e-05, "loss": 0.0007, "step": 9019 }, { "epoch": 134.62, "learning_rate": 3.628070175438596e-05, "loss": 0.0002, "step": 9020 }, { "epoch": 134.64, "learning_rate": 3.624561403508771e-05, "loss": 0.0566, "step": 9021 }, { "epoch": 134.65, "learning_rate": 3.621052631578947e-05, "loss": 0.0002, "step": 9022 }, { "epoch": 134.67, "learning_rate": 3.617543859649123e-05, "loss": 0.0002, "step": 9023 }, { "epoch": 134.68, "learning_rate": 3.614035087719298e-05, "loss": 0.0002, "step": 9024 }, { "epoch": 134.7, "learning_rate": 3.6105263157894734e-05, "loss": 0.0005, "step": 9025 }, { "epoch": 134.71, "learning_rate": 3.607017543859649e-05, "loss": 0.0002, "step": 9026 }, { "epoch": 134.73, "learning_rate": 3.603508771929824e-05, "loss": 0.0002, "step": 9027 }, { "epoch": 134.74, "learning_rate": 3.5999999999999994e-05, "loss": 0.0002, "step": 9028 }, { "epoch": 134.76, "learning_rate": 3.596491228070175e-05, "loss": 0.0003, "step": 9029 }, { "epoch": 134.77, "learning_rate": 3.5929824561403505e-05, "loss": 0.0003, "step": 9030 }, { "epoch": 134.79, "learning_rate": 3.589473684210526e-05, "loss": 0.0002, "step": 9031 }, { "epoch": 134.8, "learning_rate": 3.5859649122807016e-05, "loss": 0.0005, "step": 9032 }, { "epoch": 134.82, "learning_rate": 3.5824561403508765e-05, "loss": 0.0002, "step": 9033 }, { "epoch": 134.83, "learning_rate": 3.578947368421052e-05, "loss": 0.0002, "step": 9034 }, { "epoch": 134.85, "learning_rate": 3.5754385964912276e-05, "loss": 0.0002, "step": 9035 }, { "epoch": 134.86, "learning_rate": 3.571929824561403e-05, "loss": 0.0002, "step": 9036 }, { "epoch": 134.88, "learning_rate": 3.568421052631579e-05, "loss": 0.0002, "step": 9037 }, { "epoch": 134.89, "learning_rate": 3.564912280701754e-05, "loss": 0.0002, "step": 9038 }, { "epoch": 134.91, "learning_rate": 3.561403508771929e-05, "loss": 0.0002, "step": 9039 }, { "epoch": 134.92, "learning_rate": 3.557894736842105e-05, "loss": 0.0002, "step": 9040 }, { "epoch": 134.94, "learning_rate": 3.55438596491228e-05, "loss": 0.0002, "step": 9041 }, { "epoch": 134.95, "learning_rate": 3.550877192982456e-05, "loss": 0.0003, "step": 9042 }, { "epoch": 134.97, "learning_rate": 3.5473684210526314e-05, "loss": 0.0002, "step": 9043 }, { "epoch": 134.98, "learning_rate": 3.543859649122807e-05, "loss": 0.0002, "step": 9044 }, { "epoch": 135.0, "learning_rate": 3.540350877192982e-05, "loss": 0.0002, "step": 9045 }, { "epoch": 135.01, "learning_rate": 3.5368421052631574e-05, "loss": 0.0003, "step": 9046 }, { "epoch": 135.03, "learning_rate": 3.533333333333333e-05, "loss": 0.0002, "step": 9047 }, { "epoch": 135.04, "learning_rate": 3.5298245614035085e-05, "loss": 0.0644, "step": 9048 }, { "epoch": 135.06, "learning_rate": 3.526315789473684e-05, "loss": 0.0002, "step": 9049 }, { "epoch": 135.07, "learning_rate": 3.5228070175438596e-05, "loss": 0.0002, "step": 9050 }, { "epoch": 135.09, "learning_rate": 3.5192982456140345e-05, "loss": 0.0006, "step": 9051 }, { "epoch": 135.1, "learning_rate": 3.51578947368421e-05, "loss": 0.0004, "step": 9052 }, { "epoch": 135.12, "learning_rate": 3.5122807017543856e-05, "loss": 0.0002, "step": 9053 }, { "epoch": 135.13, "learning_rate": 3.508771929824561e-05, "loss": 0.0004, "step": 9054 }, { "epoch": 135.15, "learning_rate": 3.505263157894737e-05, "loss": 0.0006, "step": 9055 }, { "epoch": 135.16, "learning_rate": 3.501754385964912e-05, "loss": 0.0002, "step": 9056 }, { "epoch": 135.18, "learning_rate": 3.498245614035087e-05, "loss": 0.0002, "step": 9057 }, { "epoch": 135.19, "learning_rate": 3.494736842105263e-05, "loss": 0.0002, "step": 9058 }, { "epoch": 135.21, "learning_rate": 3.491228070175438e-05, "loss": 0.0002, "step": 9059 }, { "epoch": 135.22, "learning_rate": 3.487719298245614e-05, "loss": 0.0829, "step": 9060 }, { "epoch": 135.24, "learning_rate": 3.4842105263157894e-05, "loss": 0.0002, "step": 9061 }, { "epoch": 135.25, "learning_rate": 3.480701754385965e-05, "loss": 0.0005, "step": 9062 }, { "epoch": 135.27, "learning_rate": 3.47719298245614e-05, "loss": 0.0003, "step": 9063 }, { "epoch": 135.28, "learning_rate": 3.4736842105263153e-05, "loss": 0.0002, "step": 9064 }, { "epoch": 135.3, "learning_rate": 3.470175438596491e-05, "loss": 0.0003, "step": 9065 }, { "epoch": 135.31, "learning_rate": 3.4666666666666665e-05, "loss": 0.0002, "step": 9066 }, { "epoch": 135.33, "learning_rate": 3.463157894736842e-05, "loss": 0.0004, "step": 9067 }, { "epoch": 135.34, "learning_rate": 3.4596491228070176e-05, "loss": 0.0002, "step": 9068 }, { "epoch": 135.36, "learning_rate": 3.4561403508771924e-05, "loss": 0.0002, "step": 9069 }, { "epoch": 135.37, "learning_rate": 3.452631578947368e-05, "loss": 0.001, "step": 9070 }, { "epoch": 135.39, "learning_rate": 3.4491228070175436e-05, "loss": 0.0002, "step": 9071 }, { "epoch": 135.4, "learning_rate": 3.445614035087719e-05, "loss": 0.0002, "step": 9072 }, { "epoch": 135.42, "learning_rate": 3.442105263157894e-05, "loss": 0.0002, "step": 9073 }, { "epoch": 135.43, "learning_rate": 3.43859649122807e-05, "loss": 0.0002, "step": 9074 }, { "epoch": 135.45, "learning_rate": 3.435087719298245e-05, "loss": 0.0002, "step": 9075 }, { "epoch": 135.46, "learning_rate": 3.4315789473684207e-05, "loss": 0.0002, "step": 9076 }, { "epoch": 135.48, "learning_rate": 3.428070175438596e-05, "loss": 0.0002, "step": 9077 }, { "epoch": 135.49, "learning_rate": 3.424561403508772e-05, "loss": 0.0002, "step": 9078 }, { "epoch": 135.51, "learning_rate": 3.4210526315789466e-05, "loss": 0.0002, "step": 9079 }, { "epoch": 135.52, "learning_rate": 3.417543859649123e-05, "loss": 0.0002, "step": 9080 }, { "epoch": 135.54, "learning_rate": 3.414035087719298e-05, "loss": 0.0002, "step": 9081 }, { "epoch": 135.55, "learning_rate": 3.410526315789473e-05, "loss": 0.0002, "step": 9082 }, { "epoch": 135.57, "learning_rate": 3.407017543859649e-05, "loss": 0.0002, "step": 9083 }, { "epoch": 135.58, "learning_rate": 3.4035087719298244e-05, "loss": 0.0002, "step": 9084 }, { "epoch": 135.59, "learning_rate": 3.399999999999999e-05, "loss": 0.0007, "step": 9085 }, { "epoch": 135.61, "learning_rate": 3.3964912280701755e-05, "loss": 0.0005, "step": 9086 }, { "epoch": 135.62, "learning_rate": 3.3929824561403504e-05, "loss": 0.0002, "step": 9087 }, { "epoch": 135.64, "learning_rate": 3.389473684210526e-05, "loss": 0.0002, "step": 9088 }, { "epoch": 135.65, "learning_rate": 3.3859649122807015e-05, "loss": 0.0002, "step": 9089 }, { "epoch": 135.67, "learning_rate": 3.382456140350877e-05, "loss": 0.0002, "step": 9090 }, { "epoch": 135.68, "learning_rate": 3.378947368421052e-05, "loss": 0.0006, "step": 9091 }, { "epoch": 135.7, "learning_rate": 3.375438596491228e-05, "loss": 0.0002, "step": 9092 }, { "epoch": 135.71, "learning_rate": 3.371929824561403e-05, "loss": 0.1007, "step": 9093 }, { "epoch": 135.73, "learning_rate": 3.3684210526315786e-05, "loss": 0.0003, "step": 9094 }, { "epoch": 135.74, "learning_rate": 3.364912280701754e-05, "loss": 0.0002, "step": 9095 }, { "epoch": 135.76, "learning_rate": 3.36140350877193e-05, "loss": 0.0002, "step": 9096 }, { "epoch": 135.77, "learning_rate": 3.3578947368421046e-05, "loss": 0.0002, "step": 9097 }, { "epoch": 135.79, "learning_rate": 3.35438596491228e-05, "loss": 0.0138, "step": 9098 }, { "epoch": 135.8, "learning_rate": 3.350877192982456e-05, "loss": 0.0002, "step": 9099 }, { "epoch": 135.82, "learning_rate": 3.347368421052631e-05, "loss": 0.0002, "step": 9100 }, { "epoch": 135.83, "learning_rate": 3.343859649122807e-05, "loss": 0.0618, "step": 9101 }, { "epoch": 135.85, "learning_rate": 3.3403508771929824e-05, "loss": 0.0002, "step": 9102 }, { "epoch": 135.86, "learning_rate": 3.336842105263157e-05, "loss": 0.0002, "step": 9103 }, { "epoch": 135.88, "learning_rate": 3.333333333333333e-05, "loss": 0.0002, "step": 9104 }, { "epoch": 135.89, "learning_rate": 3.3298245614035084e-05, "loss": 0.0002, "step": 9105 }, { "epoch": 135.91, "learning_rate": 3.326315789473684e-05, "loss": 0.0003, "step": 9106 }, { "epoch": 135.92, "learning_rate": 3.3228070175438595e-05, "loss": 0.0002, "step": 9107 }, { "epoch": 135.94, "learning_rate": 3.319298245614035e-05, "loss": 0.0002, "step": 9108 }, { "epoch": 135.95, "learning_rate": 3.31578947368421e-05, "loss": 0.0054, "step": 9109 }, { "epoch": 135.97, "learning_rate": 3.3122807017543855e-05, "loss": 0.0002, "step": 9110 }, { "epoch": 135.98, "learning_rate": 3.308771929824561e-05, "loss": 0.0002, "step": 9111 }, { "epoch": 136.0, "learning_rate": 3.3052631578947366e-05, "loss": 0.0002, "step": 9112 }, { "epoch": 136.01, "learning_rate": 3.301754385964912e-05, "loss": 0.0008, "step": 9113 }, { "epoch": 136.03, "learning_rate": 3.298245614035088e-05, "loss": 0.0002, "step": 9114 }, { "epoch": 136.04, "learning_rate": 3.2947368421052626e-05, "loss": 0.0002, "step": 9115 }, { "epoch": 136.06, "learning_rate": 3.291228070175438e-05, "loss": 0.0002, "step": 9116 }, { "epoch": 136.07, "learning_rate": 3.287719298245614e-05, "loss": 0.0002, "step": 9117 }, { "epoch": 136.09, "learning_rate": 3.284210526315789e-05, "loss": 0.0845, "step": 9118 }, { "epoch": 136.1, "learning_rate": 3.280701754385965e-05, "loss": 0.0002, "step": 9119 }, { "epoch": 136.12, "learning_rate": 3.2771929824561403e-05, "loss": 0.0002, "step": 9120 }, { "epoch": 136.13, "learning_rate": 3.273684210526315e-05, "loss": 0.0002, "step": 9121 }, { "epoch": 136.15, "learning_rate": 3.270175438596491e-05, "loss": 0.0019, "step": 9122 }, { "epoch": 136.16, "learning_rate": 3.266666666666666e-05, "loss": 0.0002, "step": 9123 }, { "epoch": 136.18, "learning_rate": 3.263157894736842e-05, "loss": 0.0002, "step": 9124 }, { "epoch": 136.19, "learning_rate": 3.2596491228070174e-05, "loss": 0.0002, "step": 9125 }, { "epoch": 136.21, "learning_rate": 3.256140350877193e-05, "loss": 0.1942, "step": 9126 }, { "epoch": 136.22, "learning_rate": 3.252631578947368e-05, "loss": 0.0002, "step": 9127 }, { "epoch": 136.24, "learning_rate": 3.2491228070175434e-05, "loss": 0.0002, "step": 9128 }, { "epoch": 136.25, "learning_rate": 3.245614035087719e-05, "loss": 0.0002, "step": 9129 }, { "epoch": 136.27, "learning_rate": 3.2421052631578945e-05, "loss": 0.0002, "step": 9130 }, { "epoch": 136.28, "learning_rate": 3.23859649122807e-05, "loss": 0.0007, "step": 9131 }, { "epoch": 136.3, "learning_rate": 3.2350877192982457e-05, "loss": 0.0002, "step": 9132 }, { "epoch": 136.31, "learning_rate": 3.2315789473684205e-05, "loss": 0.0002, "step": 9133 }, { "epoch": 136.33, "learning_rate": 3.228070175438596e-05, "loss": 0.0002, "step": 9134 }, { "epoch": 136.34, "learning_rate": 3.2245614035087716e-05, "loss": 0.0002, "step": 9135 }, { "epoch": 136.36, "learning_rate": 3.221052631578947e-05, "loss": 0.0002, "step": 9136 }, { "epoch": 136.37, "learning_rate": 3.217543859649123e-05, "loss": 0.0002, "step": 9137 }, { "epoch": 136.39, "learning_rate": 3.214035087719298e-05, "loss": 0.0002, "step": 9138 }, { "epoch": 136.4, "learning_rate": 3.210526315789473e-05, "loss": 0.0002, "step": 9139 }, { "epoch": 136.42, "learning_rate": 3.207017543859649e-05, "loss": 0.0002, "step": 9140 }, { "epoch": 136.43, "learning_rate": 3.203508771929824e-05, "loss": 0.0002, "step": 9141 }, { "epoch": 136.45, "learning_rate": 3.2e-05, "loss": 0.0004, "step": 9142 }, { "epoch": 136.46, "learning_rate": 3.1964912280701754e-05, "loss": 0.0007, "step": 9143 }, { "epoch": 136.48, "learning_rate": 3.192982456140351e-05, "loss": 0.0018, "step": 9144 }, { "epoch": 136.49, "learning_rate": 3.189473684210526e-05, "loss": 0.0002, "step": 9145 }, { "epoch": 136.51, "learning_rate": 3.1859649122807014e-05, "loss": 0.0003, "step": 9146 }, { "epoch": 136.52, "learning_rate": 3.182456140350877e-05, "loss": 0.0002, "step": 9147 }, { "epoch": 136.54, "learning_rate": 3.178947368421052e-05, "loss": 0.0002, "step": 9148 }, { "epoch": 136.55, "learning_rate": 3.175438596491228e-05, "loss": 0.0002, "step": 9149 }, { "epoch": 136.57, "learning_rate": 3.1719298245614036e-05, "loss": 0.19, "step": 9150 }, { "epoch": 136.58, "learning_rate": 3.1684210526315785e-05, "loss": 0.0011, "step": 9151 }, { "epoch": 136.59, "learning_rate": 3.164912280701754e-05, "loss": 0.0005, "step": 9152 }, { "epoch": 136.61, "learning_rate": 3.1614035087719296e-05, "loss": 0.0002, "step": 9153 }, { "epoch": 136.62, "learning_rate": 3.1578947368421045e-05, "loss": 0.0002, "step": 9154 }, { "epoch": 136.64, "learning_rate": 3.154385964912281e-05, "loss": 0.0002, "step": 9155 }, { "epoch": 136.65, "learning_rate": 3.150877192982456e-05, "loss": 0.0042, "step": 9156 }, { "epoch": 136.67, "learning_rate": 3.147368421052631e-05, "loss": 0.0002, "step": 9157 }, { "epoch": 136.68, "learning_rate": 3.143859649122807e-05, "loss": 0.0016, "step": 9158 }, { "epoch": 136.7, "learning_rate": 3.140350877192982e-05, "loss": 0.0003, "step": 9159 }, { "epoch": 136.71, "learning_rate": 3.136842105263157e-05, "loss": 0.0002, "step": 9160 }, { "epoch": 136.73, "learning_rate": 3.1333333333333334e-05, "loss": 0.0006, "step": 9161 }, { "epoch": 136.74, "learning_rate": 3.129824561403509e-05, "loss": 0.0002, "step": 9162 }, { "epoch": 136.76, "learning_rate": 3.126315789473684e-05, "loss": 0.0002, "step": 9163 }, { "epoch": 136.77, "learning_rate": 3.1228070175438594e-05, "loss": 0.0015, "step": 9164 }, { "epoch": 136.79, "learning_rate": 3.119298245614035e-05, "loss": 0.0002, "step": 9165 }, { "epoch": 136.8, "learning_rate": 3.11578947368421e-05, "loss": 0.0027, "step": 9166 }, { "epoch": 136.82, "learning_rate": 3.1122807017543854e-05, "loss": 0.0004, "step": 9167 }, { "epoch": 136.83, "learning_rate": 3.1087719298245616e-05, "loss": 0.0005, "step": 9168 }, { "epoch": 136.85, "learning_rate": 3.1052631578947365e-05, "loss": 0.0003, "step": 9169 }, { "epoch": 136.86, "learning_rate": 3.101754385964912e-05, "loss": 0.0002, "step": 9170 }, { "epoch": 136.88, "learning_rate": 3.0982456140350876e-05, "loss": 0.0002, "step": 9171 }, { "epoch": 136.89, "learning_rate": 3.0947368421052625e-05, "loss": 0.0002, "step": 9172 }, { "epoch": 136.91, "learning_rate": 3.091228070175438e-05, "loss": 0.0002, "step": 9173 }, { "epoch": 136.92, "learning_rate": 3.087719298245614e-05, "loss": 0.0053, "step": 9174 }, { "epoch": 136.94, "learning_rate": 3.084210526315789e-05, "loss": 0.0007, "step": 9175 }, { "epoch": 136.95, "learning_rate": 3.080701754385965e-05, "loss": 0.0002, "step": 9176 }, { "epoch": 136.97, "learning_rate": 3.07719298245614e-05, "loss": 0.0003, "step": 9177 }, { "epoch": 136.98, "learning_rate": 3.073684210526315e-05, "loss": 0.0002, "step": 9178 }, { "epoch": 137.0, "learning_rate": 3.070175438596491e-05, "loss": 0.0002, "step": 9179 }, { "epoch": 137.01, "learning_rate": 3.066666666666667e-05, "loss": 0.0855, "step": 9180 }, { "epoch": 137.03, "learning_rate": 3.063157894736842e-05, "loss": 0.0021, "step": 9181 }, { "epoch": 137.04, "learning_rate": 3.059649122807017e-05, "loss": 0.0239, "step": 9182 }, { "epoch": 137.06, "learning_rate": 3.056140350877193e-05, "loss": 0.0002, "step": 9183 }, { "epoch": 137.07, "learning_rate": 3.052631578947368e-05, "loss": 0.0002, "step": 9184 }, { "epoch": 137.09, "learning_rate": 3.0491228070175433e-05, "loss": 0.0031, "step": 9185 }, { "epoch": 137.1, "learning_rate": 3.0456140350877192e-05, "loss": 0.0011, "step": 9186 }, { "epoch": 137.12, "learning_rate": 3.0421052631578948e-05, "loss": 0.0002, "step": 9187 }, { "epoch": 137.13, "learning_rate": 3.03859649122807e-05, "loss": 0.0182, "step": 9188 }, { "epoch": 137.15, "learning_rate": 3.0350877192982452e-05, "loss": 0.0387, "step": 9189 }, { "epoch": 137.16, "learning_rate": 3.0315789473684208e-05, "loss": 0.0002, "step": 9190 }, { "epoch": 137.18, "learning_rate": 3.028070175438596e-05, "loss": 0.0002, "step": 9191 }, { "epoch": 137.19, "learning_rate": 3.0245614035087715e-05, "loss": 0.0002, "step": 9192 }, { "epoch": 137.21, "learning_rate": 3.0210526315789474e-05, "loss": 0.0002, "step": 9193 }, { "epoch": 137.22, "learning_rate": 3.0175438596491226e-05, "loss": 0.0003, "step": 9194 }, { "epoch": 137.24, "learning_rate": 3.014035087719298e-05, "loss": 0.0002, "step": 9195 }, { "epoch": 137.25, "learning_rate": 3.0105263157894734e-05, "loss": 0.0215, "step": 9196 }, { "epoch": 137.27, "learning_rate": 3.0070175438596486e-05, "loss": 0.0002, "step": 9197 }, { "epoch": 137.28, "learning_rate": 3.0035087719298242e-05, "loss": 0.0002, "step": 9198 }, { "epoch": 137.3, "learning_rate": 2.9999999999999997e-05, "loss": 0.0002, "step": 9199 }, { "epoch": 137.31, "learning_rate": 2.9964912280701753e-05, "loss": 0.002, "step": 9200 }, { "epoch": 137.31, "eval_accuracy": 0.8844836025452766, "eval_f1": 0.8846804745314188, "eval_loss": 0.667094349861145, "eval_runtime": 345.8776, "eval_samples_per_second": 11.813, "eval_steps_per_second": 0.74, "step": 9200 }, { "epoch": 137.33, "learning_rate": 2.9929824561403505e-05, "loss": 0.0002, "step": 9201 }, { "epoch": 137.34, "learning_rate": 2.989473684210526e-05, "loss": 0.0002, "step": 9202 }, { "epoch": 137.36, "learning_rate": 2.9859649122807013e-05, "loss": 0.0007, "step": 9203 }, { "epoch": 137.37, "learning_rate": 2.982456140350877e-05, "loss": 0.0002, "step": 9204 }, { "epoch": 137.39, "learning_rate": 2.9789473684210524e-05, "loss": 0.0002, "step": 9205 }, { "epoch": 137.4, "learning_rate": 2.975438596491228e-05, "loss": 0.0003, "step": 9206 }, { "epoch": 137.42, "learning_rate": 2.971929824561403e-05, "loss": 0.0002, "step": 9207 }, { "epoch": 137.43, "learning_rate": 2.9684210526315787e-05, "loss": 0.1348, "step": 9208 }, { "epoch": 137.45, "learning_rate": 2.964912280701754e-05, "loss": 0.0002, "step": 9209 }, { "epoch": 137.46, "learning_rate": 2.9614035087719295e-05, "loss": 0.0002, "step": 9210 }, { "epoch": 137.48, "learning_rate": 2.957894736842105e-05, "loss": 0.0002, "step": 9211 }, { "epoch": 137.49, "learning_rate": 2.9543859649122806e-05, "loss": 0.0002, "step": 9212 }, { "epoch": 137.51, "learning_rate": 2.9508771929824558e-05, "loss": 0.0002, "step": 9213 }, { "epoch": 137.52, "learning_rate": 2.9473684210526314e-05, "loss": 0.0002, "step": 9214 }, { "epoch": 137.54, "learning_rate": 2.9438596491228066e-05, "loss": 0.0002, "step": 9215 }, { "epoch": 137.55, "learning_rate": 2.940350877192982e-05, "loss": 0.0002, "step": 9216 }, { "epoch": 137.57, "learning_rate": 2.9368421052631574e-05, "loss": 0.067, "step": 9217 }, { "epoch": 137.58, "learning_rate": 2.9333333333333333e-05, "loss": 0.0002, "step": 9218 }, { "epoch": 137.59, "learning_rate": 2.9298245614035085e-05, "loss": 0.0002, "step": 9219 }, { "epoch": 137.61, "learning_rate": 2.926315789473684e-05, "loss": 0.0002, "step": 9220 }, { "epoch": 137.62, "learning_rate": 2.9228070175438592e-05, "loss": 0.0002, "step": 9221 }, { "epoch": 137.64, "learning_rate": 2.9192982456140348e-05, "loss": 0.0002, "step": 9222 }, { "epoch": 137.65, "learning_rate": 2.91578947368421e-05, "loss": 0.0002, "step": 9223 }, { "epoch": 137.67, "learning_rate": 2.912280701754386e-05, "loss": 0.0008, "step": 9224 }, { "epoch": 137.68, "learning_rate": 2.908771929824561e-05, "loss": 0.0002, "step": 9225 }, { "epoch": 137.7, "learning_rate": 2.9052631578947367e-05, "loss": 0.0771, "step": 9226 }, { "epoch": 137.71, "learning_rate": 2.901754385964912e-05, "loss": 0.0005, "step": 9227 }, { "epoch": 137.73, "learning_rate": 2.8982456140350875e-05, "loss": 0.0002, "step": 9228 }, { "epoch": 137.74, "learning_rate": 2.8947368421052627e-05, "loss": 0.0006, "step": 9229 }, { "epoch": 137.76, "learning_rate": 2.8912280701754386e-05, "loss": 0.0002, "step": 9230 }, { "epoch": 137.77, "learning_rate": 2.8877192982456138e-05, "loss": 0.0002, "step": 9231 }, { "epoch": 137.79, "learning_rate": 2.8842105263157893e-05, "loss": 0.0002, "step": 9232 }, { "epoch": 137.8, "learning_rate": 2.8807017543859646e-05, "loss": 0.0002, "step": 9233 }, { "epoch": 137.82, "learning_rate": 2.87719298245614e-05, "loss": 0.0002, "step": 9234 }, { "epoch": 137.83, "learning_rate": 2.8736842105263153e-05, "loss": 0.0002, "step": 9235 }, { "epoch": 137.85, "learning_rate": 2.8701754385964912e-05, "loss": 0.0208, "step": 9236 }, { "epoch": 137.86, "learning_rate": 2.8666666666666664e-05, "loss": 0.1129, "step": 9237 }, { "epoch": 137.88, "learning_rate": 2.863157894736842e-05, "loss": 0.0002, "step": 9238 }, { "epoch": 137.89, "learning_rate": 2.8596491228070172e-05, "loss": 0.0002, "step": 9239 }, { "epoch": 137.91, "learning_rate": 2.8561403508771928e-05, "loss": 0.0002, "step": 9240 }, { "epoch": 137.92, "learning_rate": 2.852631578947368e-05, "loss": 0.0003, "step": 9241 }, { "epoch": 137.94, "learning_rate": 2.8491228070175435e-05, "loss": 0.0004, "step": 9242 }, { "epoch": 137.95, "learning_rate": 2.845614035087719e-05, "loss": 0.0002, "step": 9243 }, { "epoch": 137.97, "learning_rate": 2.8421052631578946e-05, "loss": 0.0002, "step": 9244 }, { "epoch": 137.98, "learning_rate": 2.83859649122807e-05, "loss": 0.0002, "step": 9245 }, { "epoch": 138.0, "learning_rate": 2.8350877192982454e-05, "loss": 0.0002, "step": 9246 }, { "epoch": 138.01, "learning_rate": 2.8315789473684206e-05, "loss": 0.0003, "step": 9247 }, { "epoch": 138.03, "learning_rate": 2.8280701754385962e-05, "loss": 0.0003, "step": 9248 }, { "epoch": 138.04, "learning_rate": 2.8245614035087717e-05, "loss": 0.0002, "step": 9249 }, { "epoch": 138.06, "learning_rate": 2.8210526315789473e-05, "loss": 0.0004, "step": 9250 }, { "epoch": 138.07, "learning_rate": 2.8175438596491225e-05, "loss": 0.0002, "step": 9251 }, { "epoch": 138.09, "learning_rate": 2.814035087719298e-05, "loss": 0.0002, "step": 9252 }, { "epoch": 138.1, "learning_rate": 2.8105263157894733e-05, "loss": 0.0016, "step": 9253 }, { "epoch": 138.12, "learning_rate": 2.807017543859649e-05, "loss": 0.0002, "step": 9254 }, { "epoch": 138.13, "learning_rate": 2.8035087719298244e-05, "loss": 0.0002, "step": 9255 }, { "epoch": 138.15, "learning_rate": 2.8e-05, "loss": 0.0002, "step": 9256 }, { "epoch": 138.16, "learning_rate": 2.7964912280701752e-05, "loss": 0.0002, "step": 9257 }, { "epoch": 138.18, "learning_rate": 2.7929824561403507e-05, "loss": 0.0001, "step": 9258 }, { "epoch": 138.19, "learning_rate": 2.789473684210526e-05, "loss": 0.019, "step": 9259 }, { "epoch": 138.21, "learning_rate": 2.7859649122807015e-05, "loss": 0.0002, "step": 9260 }, { "epoch": 138.22, "learning_rate": 2.7824561403508767e-05, "loss": 0.0003, "step": 9261 }, { "epoch": 138.24, "learning_rate": 2.7789473684210526e-05, "loss": 0.0002, "step": 9262 }, { "epoch": 138.25, "learning_rate": 2.7754385964912278e-05, "loss": 0.0003, "step": 9263 }, { "epoch": 138.27, "learning_rate": 2.7719298245614034e-05, "loss": 0.0002, "step": 9264 }, { "epoch": 138.28, "learning_rate": 2.7684210526315786e-05, "loss": 0.1113, "step": 9265 }, { "epoch": 138.3, "learning_rate": 2.764912280701754e-05, "loss": 0.0003, "step": 9266 }, { "epoch": 138.31, "learning_rate": 2.7614035087719294e-05, "loss": 0.0002, "step": 9267 }, { "epoch": 138.33, "learning_rate": 2.7578947368421053e-05, "loss": 0.0002, "step": 9268 }, { "epoch": 138.34, "learning_rate": 2.7543859649122805e-05, "loss": 0.0002, "step": 9269 }, { "epoch": 138.36, "learning_rate": 2.750877192982456e-05, "loss": 0.0002, "step": 9270 }, { "epoch": 138.37, "learning_rate": 2.7473684210526313e-05, "loss": 0.0002, "step": 9271 }, { "epoch": 138.39, "learning_rate": 2.7438596491228068e-05, "loss": 0.0002, "step": 9272 }, { "epoch": 138.4, "learning_rate": 2.740350877192982e-05, "loss": 0.0736, "step": 9273 }, { "epoch": 138.42, "learning_rate": 2.736842105263158e-05, "loss": 0.0002, "step": 9274 }, { "epoch": 138.43, "learning_rate": 2.733333333333333e-05, "loss": 0.0002, "step": 9275 }, { "epoch": 138.45, "learning_rate": 2.7298245614035087e-05, "loss": 0.0002, "step": 9276 }, { "epoch": 138.46, "learning_rate": 2.726315789473684e-05, "loss": 0.0002, "step": 9277 }, { "epoch": 138.48, "learning_rate": 2.7228070175438595e-05, "loss": 0.0002, "step": 9278 }, { "epoch": 138.49, "learning_rate": 2.7192982456140347e-05, "loss": 0.0002, "step": 9279 }, { "epoch": 138.51, "learning_rate": 2.7157894736842106e-05, "loss": 0.0002, "step": 9280 }, { "epoch": 138.52, "learning_rate": 2.7122807017543858e-05, "loss": 0.0001, "step": 9281 }, { "epoch": 138.54, "learning_rate": 2.7087719298245613e-05, "loss": 0.0002, "step": 9282 }, { "epoch": 138.55, "learning_rate": 2.7052631578947366e-05, "loss": 0.0003, "step": 9283 }, { "epoch": 138.57, "learning_rate": 2.701754385964912e-05, "loss": 0.0003, "step": 9284 }, { "epoch": 138.58, "learning_rate": 2.6982456140350873e-05, "loss": 0.0005, "step": 9285 }, { "epoch": 138.59, "learning_rate": 2.6947368421052626e-05, "loss": 0.0002, "step": 9286 }, { "epoch": 138.61, "learning_rate": 2.6912280701754384e-05, "loss": 0.0002, "step": 9287 }, { "epoch": 138.62, "learning_rate": 2.687719298245614e-05, "loss": 0.0003, "step": 9288 }, { "epoch": 138.64, "learning_rate": 2.6842105263157892e-05, "loss": 0.0002, "step": 9289 }, { "epoch": 138.65, "learning_rate": 2.6807017543859648e-05, "loss": 0.0002, "step": 9290 }, { "epoch": 138.67, "learning_rate": 2.67719298245614e-05, "loss": 0.0002, "step": 9291 }, { "epoch": 138.68, "learning_rate": 2.6736842105263152e-05, "loss": 0.001, "step": 9292 }, { "epoch": 138.7, "learning_rate": 2.670175438596491e-05, "loss": 0.0002, "step": 9293 }, { "epoch": 138.71, "learning_rate": 2.6666666666666667e-05, "loss": 0.0002, "step": 9294 }, { "epoch": 138.73, "learning_rate": 2.663157894736842e-05, "loss": 0.0006, "step": 9295 }, { "epoch": 138.74, "learning_rate": 2.6596491228070174e-05, "loss": 0.0002, "step": 9296 }, { "epoch": 138.76, "learning_rate": 2.6561403508771926e-05, "loss": 0.0002, "step": 9297 }, { "epoch": 138.77, "learning_rate": 2.652631578947368e-05, "loss": 0.0002, "step": 9298 }, { "epoch": 138.79, "learning_rate": 2.6491228070175438e-05, "loss": 0.0002, "step": 9299 }, { "epoch": 138.8, "learning_rate": 2.6456140350877193e-05, "loss": 0.0002, "step": 9300 }, { "epoch": 138.82, "learning_rate": 2.6421052631578945e-05, "loss": 0.0003, "step": 9301 }, { "epoch": 138.83, "learning_rate": 2.63859649122807e-05, "loss": 0.0002, "step": 9302 }, { "epoch": 138.85, "learning_rate": 2.6350877192982453e-05, "loss": 0.0097, "step": 9303 }, { "epoch": 138.86, "learning_rate": 2.6315789473684205e-05, "loss": 0.0002, "step": 9304 }, { "epoch": 138.88, "learning_rate": 2.6280701754385964e-05, "loss": 0.0002, "step": 9305 }, { "epoch": 138.89, "learning_rate": 2.624561403508772e-05, "loss": 0.0002, "step": 9306 }, { "epoch": 138.91, "learning_rate": 2.6210526315789472e-05, "loss": 0.0002, "step": 9307 }, { "epoch": 138.92, "learning_rate": 2.6175438596491227e-05, "loss": 0.0002, "step": 9308 }, { "epoch": 138.94, "learning_rate": 2.614035087719298e-05, "loss": 0.0002, "step": 9309 }, { "epoch": 138.95, "learning_rate": 2.6105263157894732e-05, "loss": 0.0002, "step": 9310 }, { "epoch": 138.97, "learning_rate": 2.6070175438596487e-05, "loss": 0.0002, "step": 9311 }, { "epoch": 138.98, "learning_rate": 2.6035087719298246e-05, "loss": 0.0002, "step": 9312 }, { "epoch": 139.0, "learning_rate": 2.6e-05, "loss": 0.0002, "step": 9313 }, { "epoch": 139.01, "learning_rate": 2.5964912280701754e-05, "loss": 0.0005, "step": 9314 }, { "epoch": 139.03, "learning_rate": 2.5929824561403506e-05, "loss": 0.0003, "step": 9315 }, { "epoch": 139.04, "learning_rate": 2.5894736842105258e-05, "loss": 0.0002, "step": 9316 }, { "epoch": 139.06, "learning_rate": 2.5859649122807014e-05, "loss": 0.0002, "step": 9317 }, { "epoch": 139.07, "learning_rate": 2.5824561403508773e-05, "loss": 0.0017, "step": 9318 }, { "epoch": 139.09, "learning_rate": 2.5789473684210525e-05, "loss": 0.0002, "step": 9319 }, { "epoch": 139.1, "learning_rate": 2.575438596491228e-05, "loss": 0.0002, "step": 9320 }, { "epoch": 139.12, "learning_rate": 2.5719298245614033e-05, "loss": 0.0002, "step": 9321 }, { "epoch": 139.13, "learning_rate": 2.5684210526315785e-05, "loss": 0.0002, "step": 9322 }, { "epoch": 139.15, "learning_rate": 2.564912280701754e-05, "loss": 0.0002, "step": 9323 }, { "epoch": 139.16, "learning_rate": 2.56140350877193e-05, "loss": 0.0001, "step": 9324 }, { "epoch": 139.18, "learning_rate": 2.557894736842105e-05, "loss": 0.0002, "step": 9325 }, { "epoch": 139.19, "learning_rate": 2.5543859649122807e-05, "loss": 0.0002, "step": 9326 }, { "epoch": 139.21, "learning_rate": 2.550877192982456e-05, "loss": 0.0002, "step": 9327 }, { "epoch": 139.22, "learning_rate": 2.547368421052631e-05, "loss": 0.0002, "step": 9328 }, { "epoch": 139.24, "learning_rate": 2.5438596491228067e-05, "loss": 0.0002, "step": 9329 }, { "epoch": 139.25, "learning_rate": 2.540350877192982e-05, "loss": 0.0002, "step": 9330 }, { "epoch": 139.27, "learning_rate": 2.5368421052631578e-05, "loss": 0.0002, "step": 9331 }, { "epoch": 139.28, "learning_rate": 2.533333333333333e-05, "loss": 0.0002, "step": 9332 }, { "epoch": 139.3, "learning_rate": 2.5298245614035086e-05, "loss": 0.0002, "step": 9333 }, { "epoch": 139.31, "learning_rate": 2.5263157894736838e-05, "loss": 0.0002, "step": 9334 }, { "epoch": 139.33, "learning_rate": 2.5228070175438593e-05, "loss": 0.0002, "step": 9335 }, { "epoch": 139.34, "learning_rate": 2.5192982456140346e-05, "loss": 0.0002, "step": 9336 }, { "epoch": 139.36, "learning_rate": 2.5157894736842105e-05, "loss": 0.0002, "step": 9337 }, { "epoch": 139.37, "learning_rate": 2.5122807017543857e-05, "loss": 0.0002, "step": 9338 }, { "epoch": 139.39, "learning_rate": 2.5087719298245612e-05, "loss": 0.0002, "step": 9339 }, { "epoch": 139.4, "learning_rate": 2.5052631578947364e-05, "loss": 0.0002, "step": 9340 }, { "epoch": 139.42, "learning_rate": 2.501754385964912e-05, "loss": 0.0002, "step": 9341 }, { "epoch": 139.43, "learning_rate": 2.4982456140350872e-05, "loss": 0.0002, "step": 9342 }, { "epoch": 139.45, "learning_rate": 2.494736842105263e-05, "loss": 0.0002, "step": 9343 }, { "epoch": 139.46, "learning_rate": 2.4912280701754383e-05, "loss": 0.0003, "step": 9344 }, { "epoch": 139.48, "learning_rate": 2.487719298245614e-05, "loss": 0.0002, "step": 9345 }, { "epoch": 139.49, "learning_rate": 2.484210526315789e-05, "loss": 0.0002, "step": 9346 }, { "epoch": 139.51, "learning_rate": 2.4807017543859647e-05, "loss": 0.0002, "step": 9347 }, { "epoch": 139.52, "learning_rate": 2.47719298245614e-05, "loss": 0.0002, "step": 9348 }, { "epoch": 139.54, "learning_rate": 2.4736842105263158e-05, "loss": 0.0002, "step": 9349 }, { "epoch": 139.55, "learning_rate": 2.470175438596491e-05, "loss": 0.0002, "step": 9350 }, { "epoch": 139.57, "learning_rate": 2.4666666666666665e-05, "loss": 0.0015, "step": 9351 }, { "epoch": 139.58, "learning_rate": 2.4631578947368418e-05, "loss": 0.0007, "step": 9352 }, { "epoch": 139.59, "learning_rate": 2.4596491228070173e-05, "loss": 0.0002, "step": 9353 }, { "epoch": 139.61, "learning_rate": 2.4561403508771925e-05, "loss": 0.0002, "step": 9354 }, { "epoch": 139.62, "learning_rate": 2.452631578947368e-05, "loss": 0.0002, "step": 9355 }, { "epoch": 139.64, "learning_rate": 2.4491228070175436e-05, "loss": 0.0002, "step": 9356 }, { "epoch": 139.65, "learning_rate": 2.4456140350877192e-05, "loss": 0.0002, "step": 9357 }, { "epoch": 139.67, "learning_rate": 2.4421052631578944e-05, "loss": 0.0002, "step": 9358 }, { "epoch": 139.68, "learning_rate": 2.43859649122807e-05, "loss": 0.0002, "step": 9359 }, { "epoch": 139.7, "learning_rate": 2.4350877192982452e-05, "loss": 0.0002, "step": 9360 }, { "epoch": 139.71, "learning_rate": 2.4315789473684207e-05, "loss": 0.0002, "step": 9361 }, { "epoch": 139.73, "learning_rate": 2.4280701754385963e-05, "loss": 0.0002, "step": 9362 }, { "epoch": 139.74, "learning_rate": 2.424561403508772e-05, "loss": 0.0002, "step": 9363 }, { "epoch": 139.76, "learning_rate": 2.421052631578947e-05, "loss": 0.0002, "step": 9364 }, { "epoch": 139.77, "learning_rate": 2.4175438596491226e-05, "loss": 0.0002, "step": 9365 }, { "epoch": 139.79, "learning_rate": 2.414035087719298e-05, "loss": 0.0251, "step": 9366 }, { "epoch": 139.8, "learning_rate": 2.4105263157894734e-05, "loss": 0.0002, "step": 9367 }, { "epoch": 139.82, "learning_rate": 2.407017543859649e-05, "loss": 0.0118, "step": 9368 }, { "epoch": 139.83, "learning_rate": 2.4035087719298245e-05, "loss": 0.2565, "step": 9369 }, { "epoch": 139.85, "learning_rate": 2.3999999999999997e-05, "loss": 0.0004, "step": 9370 }, { "epoch": 139.86, "learning_rate": 2.3964912280701753e-05, "loss": 0.0002, "step": 9371 }, { "epoch": 139.88, "learning_rate": 2.3929824561403505e-05, "loss": 0.0003, "step": 9372 }, { "epoch": 139.89, "learning_rate": 2.389473684210526e-05, "loss": 0.0002, "step": 9373 }, { "epoch": 139.91, "learning_rate": 2.3859649122807016e-05, "loss": 0.0033, "step": 9374 }, { "epoch": 139.92, "learning_rate": 2.382456140350877e-05, "loss": 0.0002, "step": 9375 }, { "epoch": 139.94, "learning_rate": 2.3789473684210524e-05, "loss": 0.0002, "step": 9376 }, { "epoch": 139.95, "learning_rate": 2.375438596491228e-05, "loss": 0.0002, "step": 9377 }, { "epoch": 139.97, "learning_rate": 2.371929824561403e-05, "loss": 0.0001, "step": 9378 }, { "epoch": 139.98, "learning_rate": 2.3684210526315787e-05, "loss": 0.0002, "step": 9379 }, { "epoch": 140.0, "learning_rate": 2.364912280701754e-05, "loss": 0.0002, "step": 9380 }, { "epoch": 140.01, "learning_rate": 2.3614035087719298e-05, "loss": 0.0002, "step": 9381 }, { "epoch": 140.03, "learning_rate": 2.357894736842105e-05, "loss": 0.0002, "step": 9382 }, { "epoch": 140.04, "learning_rate": 2.3543859649122806e-05, "loss": 0.0002, "step": 9383 }, { "epoch": 140.06, "learning_rate": 2.3508771929824558e-05, "loss": 0.0002, "step": 9384 }, { "epoch": 140.07, "learning_rate": 2.3473684210526314e-05, "loss": 0.0032, "step": 9385 }, { "epoch": 140.09, "learning_rate": 2.3438596491228066e-05, "loss": 0.0002, "step": 9386 }, { "epoch": 140.1, "learning_rate": 2.3403508771929825e-05, "loss": 0.0002, "step": 9387 }, { "epoch": 140.12, "learning_rate": 2.3368421052631577e-05, "loss": 0.0002, "step": 9388 }, { "epoch": 140.13, "learning_rate": 2.3333333333333332e-05, "loss": 0.0005, "step": 9389 }, { "epoch": 140.15, "learning_rate": 2.3298245614035085e-05, "loss": 0.0003, "step": 9390 }, { "epoch": 140.16, "learning_rate": 2.326315789473684e-05, "loss": 0.0002, "step": 9391 }, { "epoch": 140.18, "learning_rate": 2.3228070175438592e-05, "loss": 0.0007, "step": 9392 }, { "epoch": 140.19, "learning_rate": 2.319298245614035e-05, "loss": 0.0002, "step": 9393 }, { "epoch": 140.21, "learning_rate": 2.3157894736842103e-05, "loss": 0.0003, "step": 9394 }, { "epoch": 140.22, "learning_rate": 2.312280701754386e-05, "loss": 0.0002, "step": 9395 }, { "epoch": 140.24, "learning_rate": 2.308771929824561e-05, "loss": 0.0014, "step": 9396 }, { "epoch": 140.25, "learning_rate": 2.3052631578947367e-05, "loss": 0.0002, "step": 9397 }, { "epoch": 140.27, "learning_rate": 2.301754385964912e-05, "loss": 0.0002, "step": 9398 }, { "epoch": 140.28, "learning_rate": 2.2982456140350878e-05, "loss": 0.0002, "step": 9399 }, { "epoch": 140.3, "learning_rate": 2.294736842105263e-05, "loss": 0.0002, "step": 9400 }, { "epoch": 140.3, "eval_accuracy": 0.8935389133627019, "eval_f1": 0.8931026363559365, "eval_loss": 0.5969720482826233, "eval_runtime": 345.1416, "eval_samples_per_second": 11.839, "eval_steps_per_second": 0.742, "step": 9400 }, { "epoch": 140.31, "learning_rate": 2.2912280701754386e-05, "loss": 0.0002, "step": 9401 }, { "epoch": 140.33, "learning_rate": 2.2877192982456138e-05, "loss": 0.0002, "step": 9402 }, { "epoch": 140.34, "learning_rate": 2.2842105263157893e-05, "loss": 0.0182, "step": 9403 }, { "epoch": 140.36, "learning_rate": 2.2807017543859645e-05, "loss": 0.1147, "step": 9404 }, { "epoch": 140.37, "learning_rate": 2.27719298245614e-05, "loss": 0.0002, "step": 9405 }, { "epoch": 140.39, "learning_rate": 2.2736842105263157e-05, "loss": 0.0019, "step": 9406 }, { "epoch": 140.4, "learning_rate": 2.2701754385964912e-05, "loss": 0.0002, "step": 9407 }, { "epoch": 140.42, "learning_rate": 2.2666666666666664e-05, "loss": 0.0543, "step": 9408 }, { "epoch": 140.43, "learning_rate": 2.263157894736842e-05, "loss": 0.0002, "step": 9409 }, { "epoch": 140.45, "learning_rate": 2.2596491228070172e-05, "loss": 0.0002, "step": 9410 }, { "epoch": 140.46, "learning_rate": 2.2561403508771928e-05, "loss": 0.0002, "step": 9411 }, { "epoch": 140.48, "learning_rate": 2.2526315789473683e-05, "loss": 0.0002, "step": 9412 }, { "epoch": 140.49, "learning_rate": 2.249122807017544e-05, "loss": 0.0002, "step": 9413 }, { "epoch": 140.51, "learning_rate": 2.245614035087719e-05, "loss": 0.0002, "step": 9414 }, { "epoch": 140.52, "learning_rate": 2.2421052631578946e-05, "loss": 0.0002, "step": 9415 }, { "epoch": 140.54, "learning_rate": 2.23859649122807e-05, "loss": 0.0001, "step": 9416 }, { "epoch": 140.55, "learning_rate": 2.2350877192982454e-05, "loss": 0.0002, "step": 9417 }, { "epoch": 140.57, "learning_rate": 2.231578947368421e-05, "loss": 0.0002, "step": 9418 }, { "epoch": 140.58, "learning_rate": 2.2280701754385965e-05, "loss": 0.0002, "step": 9419 }, { "epoch": 140.59, "learning_rate": 2.2245614035087717e-05, "loss": 0.0002, "step": 9420 }, { "epoch": 140.61, "learning_rate": 2.2210526315789473e-05, "loss": 0.0002, "step": 9421 }, { "epoch": 140.62, "learning_rate": 2.2175438596491225e-05, "loss": 0.0002, "step": 9422 }, { "epoch": 140.64, "learning_rate": 2.214035087719298e-05, "loss": 0.0002, "step": 9423 }, { "epoch": 140.65, "learning_rate": 2.2105263157894733e-05, "loss": 0.0002, "step": 9424 }, { "epoch": 140.67, "learning_rate": 2.2070175438596492e-05, "loss": 0.0002, "step": 9425 }, { "epoch": 140.68, "learning_rate": 2.2035087719298244e-05, "loss": 0.0002, "step": 9426 }, { "epoch": 140.7, "learning_rate": 2.2e-05, "loss": 0.0003, "step": 9427 }, { "epoch": 140.71, "learning_rate": 2.196491228070175e-05, "loss": 0.0002, "step": 9428 }, { "epoch": 140.73, "learning_rate": 2.1929824561403507e-05, "loss": 0.0002, "step": 9429 }, { "epoch": 140.74, "learning_rate": 2.189473684210526e-05, "loss": 0.0002, "step": 9430 }, { "epoch": 140.76, "learning_rate": 2.1859649122807018e-05, "loss": 0.0003, "step": 9431 }, { "epoch": 140.77, "learning_rate": 2.182456140350877e-05, "loss": 0.0002, "step": 9432 }, { "epoch": 140.79, "learning_rate": 2.1789473684210526e-05, "loss": 0.0002, "step": 9433 }, { "epoch": 140.8, "learning_rate": 2.1754385964912278e-05, "loss": 0.0064, "step": 9434 }, { "epoch": 140.82, "learning_rate": 2.1719298245614034e-05, "loss": 0.0004, "step": 9435 }, { "epoch": 140.83, "learning_rate": 2.1684210526315786e-05, "loss": 0.0002, "step": 9436 }, { "epoch": 140.85, "learning_rate": 2.1649122807017545e-05, "loss": 0.0002, "step": 9437 }, { "epoch": 140.86, "learning_rate": 2.1614035087719297e-05, "loss": 0.0002, "step": 9438 }, { "epoch": 140.88, "learning_rate": 2.1578947368421053e-05, "loss": 0.0001, "step": 9439 }, { "epoch": 140.89, "learning_rate": 2.1543859649122805e-05, "loss": 0.0014, "step": 9440 }, { "epoch": 140.91, "learning_rate": 2.150877192982456e-05, "loss": 0.0654, "step": 9441 }, { "epoch": 140.92, "learning_rate": 2.1473684210526312e-05, "loss": 0.0002, "step": 9442 }, { "epoch": 140.94, "learning_rate": 2.143859649122807e-05, "loss": 0.0004, "step": 9443 }, { "epoch": 140.95, "learning_rate": 2.1403508771929824e-05, "loss": 0.0003, "step": 9444 }, { "epoch": 140.97, "learning_rate": 2.136842105263158e-05, "loss": 0.0002, "step": 9445 }, { "epoch": 140.98, "learning_rate": 2.133333333333333e-05, "loss": 0.0002, "step": 9446 }, { "epoch": 141.0, "learning_rate": 2.1298245614035087e-05, "loss": 0.0002, "step": 9447 }, { "epoch": 141.01, "learning_rate": 2.126315789473684e-05, "loss": 0.0003, "step": 9448 }, { "epoch": 141.03, "learning_rate": 2.122807017543859e-05, "loss": 0.0002, "step": 9449 }, { "epoch": 141.04, "learning_rate": 2.119298245614035e-05, "loss": 0.0002, "step": 9450 }, { "epoch": 141.06, "learning_rate": 2.1157894736842106e-05, "loss": 0.0002, "step": 9451 }, { "epoch": 141.07, "learning_rate": 2.1122807017543858e-05, "loss": 0.0002, "step": 9452 }, { "epoch": 141.09, "learning_rate": 2.1087719298245613e-05, "loss": 0.1253, "step": 9453 }, { "epoch": 141.1, "learning_rate": 2.1052631578947366e-05, "loss": 0.0002, "step": 9454 }, { "epoch": 141.12, "learning_rate": 2.1017543859649118e-05, "loss": 0.0002, "step": 9455 }, { "epoch": 141.13, "learning_rate": 2.0982456140350877e-05, "loss": 0.0002, "step": 9456 }, { "epoch": 141.15, "learning_rate": 2.0947368421052632e-05, "loss": 0.1106, "step": 9457 }, { "epoch": 141.16, "learning_rate": 2.0912280701754384e-05, "loss": 0.0002, "step": 9458 }, { "epoch": 141.18, "learning_rate": 2.087719298245614e-05, "loss": 0.0002, "step": 9459 }, { "epoch": 141.19, "learning_rate": 2.0842105263157892e-05, "loss": 0.0002, "step": 9460 }, { "epoch": 141.21, "learning_rate": 2.0807017543859644e-05, "loss": 0.0002, "step": 9461 }, { "epoch": 141.22, "learning_rate": 2.0771929824561403e-05, "loss": 0.0002, "step": 9462 }, { "epoch": 141.24, "learning_rate": 2.073684210526316e-05, "loss": 0.0007, "step": 9463 }, { "epoch": 141.25, "learning_rate": 2.070175438596491e-05, "loss": 0.0002, "step": 9464 }, { "epoch": 141.27, "learning_rate": 2.0666666666666663e-05, "loss": 0.0003, "step": 9465 }, { "epoch": 141.28, "learning_rate": 2.063157894736842e-05, "loss": 0.0561, "step": 9466 }, { "epoch": 141.3, "learning_rate": 2.059649122807017e-05, "loss": 0.0002, "step": 9467 }, { "epoch": 141.31, "learning_rate": 2.056140350877193e-05, "loss": 0.0003, "step": 9468 }, { "epoch": 141.33, "learning_rate": 2.0526315789473685e-05, "loss": 0.0002, "step": 9469 }, { "epoch": 141.34, "learning_rate": 2.0491228070175437e-05, "loss": 0.0002, "step": 9470 }, { "epoch": 141.36, "learning_rate": 2.045614035087719e-05, "loss": 0.0002, "step": 9471 }, { "epoch": 141.37, "learning_rate": 2.0421052631578945e-05, "loss": 0.0002, "step": 9472 }, { "epoch": 141.39, "learning_rate": 2.0385964912280697e-05, "loss": 0.0002, "step": 9473 }, { "epoch": 141.4, "learning_rate": 2.0350877192982453e-05, "loss": 0.0002, "step": 9474 }, { "epoch": 141.42, "learning_rate": 2.0315789473684212e-05, "loss": 0.0002, "step": 9475 }, { "epoch": 141.43, "learning_rate": 2.0280701754385964e-05, "loss": 0.0002, "step": 9476 }, { "epoch": 141.45, "learning_rate": 2.0245614035087716e-05, "loss": 0.0002, "step": 9477 }, { "epoch": 141.46, "learning_rate": 2.0210526315789472e-05, "loss": 0.0018, "step": 9478 }, { "epoch": 141.48, "learning_rate": 2.0175438596491224e-05, "loss": 0.0002, "step": 9479 }, { "epoch": 141.49, "learning_rate": 2.014035087719298e-05, "loss": 0.0002, "step": 9480 }, { "epoch": 141.51, "learning_rate": 2.010526315789474e-05, "loss": 0.0002, "step": 9481 }, { "epoch": 141.52, "learning_rate": 2.007017543859649e-05, "loss": 0.0002, "step": 9482 }, { "epoch": 141.54, "learning_rate": 2.0035087719298243e-05, "loss": 0.0002, "step": 9483 }, { "epoch": 141.55, "learning_rate": 1.9999999999999998e-05, "loss": 0.0002, "step": 9484 }, { "epoch": 141.57, "learning_rate": 1.996491228070175e-05, "loss": 0.0002, "step": 9485 }, { "epoch": 141.58, "learning_rate": 1.9929824561403506e-05, "loss": 0.0002, "step": 9486 }, { "epoch": 141.59, "learning_rate": 1.9894736842105265e-05, "loss": 0.0002, "step": 9487 }, { "epoch": 141.61, "learning_rate": 1.9859649122807017e-05, "loss": 0.0002, "step": 9488 }, { "epoch": 141.62, "learning_rate": 1.982456140350877e-05, "loss": 0.0002, "step": 9489 }, { "epoch": 141.64, "learning_rate": 1.9789473684210525e-05, "loss": 0.0002, "step": 9490 }, { "epoch": 141.65, "learning_rate": 1.9754385964912277e-05, "loss": 0.0002, "step": 9491 }, { "epoch": 141.67, "learning_rate": 1.9719298245614033e-05, "loss": 0.0002, "step": 9492 }, { "epoch": 141.68, "learning_rate": 1.968421052631579e-05, "loss": 0.0002, "step": 9493 }, { "epoch": 141.7, "learning_rate": 1.9649122807017544e-05, "loss": 0.0002, "step": 9494 }, { "epoch": 141.71, "learning_rate": 1.9614035087719296e-05, "loss": 0.0002, "step": 9495 }, { "epoch": 141.73, "learning_rate": 1.957894736842105e-05, "loss": 0.0002, "step": 9496 }, { "epoch": 141.74, "learning_rate": 1.9543859649122804e-05, "loss": 0.0002, "step": 9497 }, { "epoch": 141.76, "learning_rate": 1.950877192982456e-05, "loss": 0.0005, "step": 9498 }, { "epoch": 141.77, "learning_rate": 1.947368421052631e-05, "loss": 0.0002, "step": 9499 }, { "epoch": 141.79, "learning_rate": 1.943859649122807e-05, "loss": 0.0002, "step": 9500 }, { "epoch": 141.8, "learning_rate": 1.9403508771929822e-05, "loss": 0.0002, "step": 9501 }, { "epoch": 141.82, "learning_rate": 1.9368421052631578e-05, "loss": 0.001, "step": 9502 }, { "epoch": 141.83, "learning_rate": 1.933333333333333e-05, "loss": 0.0002, "step": 9503 }, { "epoch": 141.85, "learning_rate": 1.9298245614035086e-05, "loss": 0.0014, "step": 9504 }, { "epoch": 141.86, "learning_rate": 1.9263157894736838e-05, "loss": 0.0002, "step": 9505 }, { "epoch": 141.88, "learning_rate": 1.9228070175438597e-05, "loss": 0.0002, "step": 9506 }, { "epoch": 141.89, "learning_rate": 1.919298245614035e-05, "loss": 0.0002, "step": 9507 }, { "epoch": 141.91, "learning_rate": 1.9157894736842104e-05, "loss": 0.0002, "step": 9508 }, { "epoch": 141.92, "learning_rate": 1.9122807017543857e-05, "loss": 0.0001, "step": 9509 }, { "epoch": 141.94, "learning_rate": 1.9087719298245612e-05, "loss": 0.0002, "step": 9510 }, { "epoch": 141.95, "learning_rate": 1.9052631578947364e-05, "loss": 0.0002, "step": 9511 }, { "epoch": 141.97, "learning_rate": 1.9017543859649123e-05, "loss": 0.0002, "step": 9512 }, { "epoch": 141.98, "learning_rate": 1.8982456140350875e-05, "loss": 0.0002, "step": 9513 }, { "epoch": 142.0, "learning_rate": 1.894736842105263e-05, "loss": 0.0014, "step": 9514 }, { "epoch": 142.01, "learning_rate": 1.8912280701754383e-05, "loss": 0.0002, "step": 9515 }, { "epoch": 142.03, "learning_rate": 1.887719298245614e-05, "loss": 0.0002, "step": 9516 }, { "epoch": 142.04, "learning_rate": 1.884210526315789e-05, "loss": 0.0002, "step": 9517 }, { "epoch": 142.06, "learning_rate": 1.8807017543859646e-05, "loss": 0.0002, "step": 9518 }, { "epoch": 142.07, "learning_rate": 1.8771929824561402e-05, "loss": 0.0002, "step": 9519 }, { "epoch": 142.09, "learning_rate": 1.8736842105263158e-05, "loss": 0.0003, "step": 9520 }, { "epoch": 142.1, "learning_rate": 1.870175438596491e-05, "loss": 0.0041, "step": 9521 }, { "epoch": 142.12, "learning_rate": 1.8666666666666665e-05, "loss": 0.0002, "step": 9522 }, { "epoch": 142.13, "learning_rate": 1.863157894736842e-05, "loss": 0.0011, "step": 9523 }, { "epoch": 142.15, "learning_rate": 1.8596491228070173e-05, "loss": 0.0002, "step": 9524 }, { "epoch": 142.16, "learning_rate": 1.856140350877193e-05, "loss": 0.0002, "step": 9525 }, { "epoch": 142.18, "learning_rate": 1.8526315789473684e-05, "loss": 0.0002, "step": 9526 }, { "epoch": 142.19, "learning_rate": 1.8491228070175436e-05, "loss": 0.0002, "step": 9527 }, { "epoch": 142.21, "learning_rate": 1.8456140350877192e-05, "loss": 0.0002, "step": 9528 }, { "epoch": 142.22, "learning_rate": 1.8421052631578944e-05, "loss": 0.0002, "step": 9529 }, { "epoch": 142.24, "learning_rate": 1.83859649122807e-05, "loss": 0.0003, "step": 9530 }, { "epoch": 142.25, "learning_rate": 1.8350877192982455e-05, "loss": 0.008, "step": 9531 }, { "epoch": 142.27, "learning_rate": 1.8315789473684207e-05, "loss": 0.0002, "step": 9532 }, { "epoch": 142.28, "learning_rate": 1.8280701754385963e-05, "loss": 0.0002, "step": 9533 }, { "epoch": 142.3, "learning_rate": 1.824561403508772e-05, "loss": 0.0002, "step": 9534 }, { "epoch": 142.31, "learning_rate": 1.821052631578947e-05, "loss": 0.0002, "step": 9535 }, { "epoch": 142.33, "learning_rate": 1.8175438596491226e-05, "loss": 0.0004, "step": 9536 }, { "epoch": 142.34, "learning_rate": 1.814035087719298e-05, "loss": 0.0002, "step": 9537 }, { "epoch": 142.36, "learning_rate": 1.8105263157894734e-05, "loss": 0.0004, "step": 9538 }, { "epoch": 142.37, "learning_rate": 1.807017543859649e-05, "loss": 0.0002, "step": 9539 }, { "epoch": 142.39, "learning_rate": 1.8035087719298245e-05, "loss": 0.0004, "step": 9540 }, { "epoch": 142.4, "learning_rate": 1.7999999999999997e-05, "loss": 0.0002, "step": 9541 }, { "epoch": 142.42, "learning_rate": 1.7964912280701753e-05, "loss": 0.0002, "step": 9542 }, { "epoch": 142.43, "learning_rate": 1.7929824561403508e-05, "loss": 0.0002, "step": 9543 }, { "epoch": 142.45, "learning_rate": 1.789473684210526e-05, "loss": 0.0058, "step": 9544 }, { "epoch": 142.46, "learning_rate": 1.7859649122807016e-05, "loss": 0.0002, "step": 9545 }, { "epoch": 142.48, "learning_rate": 1.782456140350877e-05, "loss": 0.0003, "step": 9546 }, { "epoch": 142.49, "learning_rate": 1.7789473684210524e-05, "loss": 0.0002, "step": 9547 }, { "epoch": 142.51, "learning_rate": 1.775438596491228e-05, "loss": 0.0005, "step": 9548 }, { "epoch": 142.52, "learning_rate": 1.7719298245614035e-05, "loss": 0.0001, "step": 9549 }, { "epoch": 142.54, "learning_rate": 1.7684210526315787e-05, "loss": 0.0003, "step": 9550 }, { "epoch": 142.55, "learning_rate": 1.7649122807017542e-05, "loss": 0.0002, "step": 9551 }, { "epoch": 142.57, "learning_rate": 1.7614035087719298e-05, "loss": 0.0003, "step": 9552 }, { "epoch": 142.58, "learning_rate": 1.757894736842105e-05, "loss": 0.0002, "step": 9553 }, { "epoch": 142.59, "learning_rate": 1.7543859649122806e-05, "loss": 0.0002, "step": 9554 }, { "epoch": 142.61, "learning_rate": 1.750877192982456e-05, "loss": 0.0002, "step": 9555 }, { "epoch": 142.62, "learning_rate": 1.7473684210526313e-05, "loss": 0.0585, "step": 9556 }, { "epoch": 142.64, "learning_rate": 1.743859649122807e-05, "loss": 0.0004, "step": 9557 }, { "epoch": 142.65, "learning_rate": 1.7403508771929825e-05, "loss": 0.0002, "step": 9558 }, { "epoch": 142.67, "learning_rate": 1.7368421052631577e-05, "loss": 0.0002, "step": 9559 }, { "epoch": 142.68, "learning_rate": 1.7333333333333332e-05, "loss": 0.0002, "step": 9560 }, { "epoch": 142.7, "learning_rate": 1.7298245614035088e-05, "loss": 0.0002, "step": 9561 }, { "epoch": 142.71, "learning_rate": 1.726315789473684e-05, "loss": 0.0002, "step": 9562 }, { "epoch": 142.73, "learning_rate": 1.7228070175438596e-05, "loss": 0.0002, "step": 9563 }, { "epoch": 142.74, "learning_rate": 1.719298245614035e-05, "loss": 0.0002, "step": 9564 }, { "epoch": 142.76, "learning_rate": 1.7157894736842103e-05, "loss": 0.0002, "step": 9565 }, { "epoch": 142.77, "learning_rate": 1.712280701754386e-05, "loss": 0.0328, "step": 9566 }, { "epoch": 142.79, "learning_rate": 1.7087719298245614e-05, "loss": 0.0002, "step": 9567 }, { "epoch": 142.8, "learning_rate": 1.7052631578947367e-05, "loss": 0.0002, "step": 9568 }, { "epoch": 142.82, "learning_rate": 1.7017543859649122e-05, "loss": 0.0002, "step": 9569 }, { "epoch": 142.83, "learning_rate": 1.6982456140350878e-05, "loss": 0.0002, "step": 9570 }, { "epoch": 142.85, "learning_rate": 1.694736842105263e-05, "loss": 0.0002, "step": 9571 }, { "epoch": 142.86, "learning_rate": 1.6912280701754385e-05, "loss": 0.0002, "step": 9572 }, { "epoch": 142.88, "learning_rate": 1.687719298245614e-05, "loss": 0.0002, "step": 9573 }, { "epoch": 142.89, "learning_rate": 1.6842105263157893e-05, "loss": 0.0001, "step": 9574 }, { "epoch": 142.91, "learning_rate": 1.680701754385965e-05, "loss": 0.0001, "step": 9575 }, { "epoch": 142.92, "learning_rate": 1.67719298245614e-05, "loss": 0.0002, "step": 9576 }, { "epoch": 142.94, "learning_rate": 1.6736842105263156e-05, "loss": 0.0002, "step": 9577 }, { "epoch": 142.95, "learning_rate": 1.6701754385964912e-05, "loss": 0.0002, "step": 9578 }, { "epoch": 142.97, "learning_rate": 1.6666666666666664e-05, "loss": 0.0002, "step": 9579 }, { "epoch": 142.98, "learning_rate": 1.663157894736842e-05, "loss": 0.0002, "step": 9580 }, { "epoch": 143.0, "learning_rate": 1.6596491228070175e-05, "loss": 0.0002, "step": 9581 }, { "epoch": 143.01, "learning_rate": 1.6561403508771927e-05, "loss": 0.0002, "step": 9582 }, { "epoch": 143.03, "learning_rate": 1.6526315789473683e-05, "loss": 0.0002, "step": 9583 }, { "epoch": 143.04, "learning_rate": 1.649122807017544e-05, "loss": 0.0018, "step": 9584 }, { "epoch": 143.06, "learning_rate": 1.645614035087719e-05, "loss": 0.0002, "step": 9585 }, { "epoch": 143.07, "learning_rate": 1.6421052631578946e-05, "loss": 0.0169, "step": 9586 }, { "epoch": 143.09, "learning_rate": 1.6385964912280702e-05, "loss": 0.0004, "step": 9587 }, { "epoch": 143.1, "learning_rate": 1.6350877192982454e-05, "loss": 0.0002, "step": 9588 }, { "epoch": 143.12, "learning_rate": 1.631578947368421e-05, "loss": 0.0002, "step": 9589 }, { "epoch": 143.13, "learning_rate": 1.6280701754385965e-05, "loss": 0.0002, "step": 9590 }, { "epoch": 143.15, "learning_rate": 1.6245614035087717e-05, "loss": 0.0002, "step": 9591 }, { "epoch": 143.16, "learning_rate": 1.6210526315789473e-05, "loss": 0.0002, "step": 9592 }, { "epoch": 143.18, "learning_rate": 1.6175438596491228e-05, "loss": 0.0002, "step": 9593 }, { "epoch": 143.19, "learning_rate": 1.614035087719298e-05, "loss": 0.0002, "step": 9594 }, { "epoch": 143.21, "learning_rate": 1.6105263157894736e-05, "loss": 0.0002, "step": 9595 }, { "epoch": 143.22, "learning_rate": 1.607017543859649e-05, "loss": 0.0002, "step": 9596 }, { "epoch": 143.24, "learning_rate": 1.6035087719298244e-05, "loss": 0.0865, "step": 9597 }, { "epoch": 143.25, "learning_rate": 1.6e-05, "loss": 0.0002, "step": 9598 }, { "epoch": 143.27, "learning_rate": 1.5964912280701755e-05, "loss": 0.0003, "step": 9599 }, { "epoch": 143.28, "learning_rate": 1.5929824561403507e-05, "loss": 0.0002, "step": 9600 }, { "epoch": 143.28, "eval_accuracy": 0.8913362701908958, "eval_f1": 0.8906088969378417, "eval_loss": 0.6095054745674133, "eval_runtime": 344.9289, "eval_samples_per_second": 11.846, "eval_steps_per_second": 0.742, "step": 9600 }, { "epoch": 143.3, "learning_rate": 1.589473684210526e-05, "loss": 0.0002, "step": 9601 }, { "epoch": 143.31, "learning_rate": 1.5859649122807018e-05, "loss": 0.0002, "step": 9602 }, { "epoch": 143.33, "learning_rate": 1.582456140350877e-05, "loss": 0.0002, "step": 9603 }, { "epoch": 143.34, "learning_rate": 1.5789473684210522e-05, "loss": 0.0002, "step": 9604 }, { "epoch": 143.36, "learning_rate": 1.575438596491228e-05, "loss": 0.0002, "step": 9605 }, { "epoch": 143.37, "learning_rate": 1.5719298245614034e-05, "loss": 0.0002, "step": 9606 }, { "epoch": 143.39, "learning_rate": 1.5684210526315786e-05, "loss": 0.0002, "step": 9607 }, { "epoch": 143.4, "learning_rate": 1.5649122807017545e-05, "loss": 0.0006, "step": 9608 }, { "epoch": 143.42, "learning_rate": 1.5614035087719297e-05, "loss": 0.0002, "step": 9609 }, { "epoch": 143.43, "learning_rate": 1.557894736842105e-05, "loss": 0.0002, "step": 9610 }, { "epoch": 143.45, "learning_rate": 1.5543859649122808e-05, "loss": 0.1441, "step": 9611 }, { "epoch": 143.46, "learning_rate": 1.550877192982456e-05, "loss": 0.0002, "step": 9612 }, { "epoch": 143.48, "learning_rate": 1.5473684210526312e-05, "loss": 0.0002, "step": 9613 }, { "epoch": 143.49, "learning_rate": 1.543859649122807e-05, "loss": 0.0002, "step": 9614 }, { "epoch": 143.51, "learning_rate": 1.5403508771929823e-05, "loss": 0.0002, "step": 9615 }, { "epoch": 143.52, "learning_rate": 1.5368421052631576e-05, "loss": 0.0002, "step": 9616 }, { "epoch": 143.54, "learning_rate": 1.5333333333333334e-05, "loss": 0.0002, "step": 9617 }, { "epoch": 143.55, "learning_rate": 1.5298245614035087e-05, "loss": 0.0002, "step": 9618 }, { "epoch": 143.57, "learning_rate": 1.526315789473684e-05, "loss": 0.0002, "step": 9619 }, { "epoch": 143.58, "learning_rate": 1.5228070175438596e-05, "loss": 0.0002, "step": 9620 }, { "epoch": 143.59, "learning_rate": 1.519298245614035e-05, "loss": 0.0003, "step": 9621 }, { "epoch": 143.61, "learning_rate": 1.5157894736842104e-05, "loss": 0.0002, "step": 9622 }, { "epoch": 143.62, "learning_rate": 1.5122807017543858e-05, "loss": 0.0003, "step": 9623 }, { "epoch": 143.64, "learning_rate": 1.5087719298245613e-05, "loss": 0.0002, "step": 9624 }, { "epoch": 143.65, "learning_rate": 1.5052631578947367e-05, "loss": 0.0002, "step": 9625 }, { "epoch": 143.67, "learning_rate": 1.5017543859649121e-05, "loss": 0.0002, "step": 9626 }, { "epoch": 143.68, "learning_rate": 1.4982456140350876e-05, "loss": 0.0007, "step": 9627 }, { "epoch": 143.7, "learning_rate": 1.494736842105263e-05, "loss": 0.0002, "step": 9628 }, { "epoch": 143.71, "learning_rate": 1.4912280701754384e-05, "loss": 0.0002, "step": 9629 }, { "epoch": 143.73, "learning_rate": 1.487719298245614e-05, "loss": 0.0002, "step": 9630 }, { "epoch": 143.74, "learning_rate": 1.4842105263157894e-05, "loss": 0.0002, "step": 9631 }, { "epoch": 143.76, "learning_rate": 1.4807017543859647e-05, "loss": 0.0002, "step": 9632 }, { "epoch": 143.77, "learning_rate": 1.4771929824561403e-05, "loss": 0.0002, "step": 9633 }, { "epoch": 143.79, "learning_rate": 1.4736842105263157e-05, "loss": 0.0002, "step": 9634 }, { "epoch": 143.8, "learning_rate": 1.470175438596491e-05, "loss": 0.0003, "step": 9635 }, { "epoch": 143.82, "learning_rate": 1.4666666666666666e-05, "loss": 0.1042, "step": 9636 }, { "epoch": 143.83, "learning_rate": 1.463157894736842e-05, "loss": 0.0003, "step": 9637 }, { "epoch": 143.85, "learning_rate": 1.4596491228070174e-05, "loss": 0.0002, "step": 9638 }, { "epoch": 143.86, "learning_rate": 1.456140350877193e-05, "loss": 0.0002, "step": 9639 }, { "epoch": 143.88, "learning_rate": 1.4526315789473683e-05, "loss": 0.0002, "step": 9640 }, { "epoch": 143.89, "learning_rate": 1.4491228070175437e-05, "loss": 0.0002, "step": 9641 }, { "epoch": 143.91, "learning_rate": 1.4456140350877193e-05, "loss": 0.0002, "step": 9642 }, { "epoch": 143.92, "learning_rate": 1.4421052631578947e-05, "loss": 0.0002, "step": 9643 }, { "epoch": 143.94, "learning_rate": 1.43859649122807e-05, "loss": 0.0001, "step": 9644 }, { "epoch": 143.95, "learning_rate": 1.4350877192982456e-05, "loss": 0.0002, "step": 9645 }, { "epoch": 143.97, "learning_rate": 1.431578947368421e-05, "loss": 0.0002, "step": 9646 }, { "epoch": 143.98, "learning_rate": 1.4280701754385964e-05, "loss": 0.0945, "step": 9647 }, { "epoch": 144.0, "learning_rate": 1.4245614035087718e-05, "loss": 0.0022, "step": 9648 }, { "epoch": 144.01, "learning_rate": 1.4210526315789473e-05, "loss": 0.0151, "step": 9649 }, { "epoch": 144.03, "learning_rate": 1.4175438596491227e-05, "loss": 0.0002, "step": 9650 }, { "epoch": 144.04, "learning_rate": 1.4140350877192981e-05, "loss": 0.0002, "step": 9651 }, { "epoch": 144.06, "learning_rate": 1.4105263157894737e-05, "loss": 0.0002, "step": 9652 }, { "epoch": 144.07, "learning_rate": 1.407017543859649e-05, "loss": 0.0001, "step": 9653 }, { "epoch": 144.09, "learning_rate": 1.4035087719298244e-05, "loss": 0.0002, "step": 9654 }, { "epoch": 144.1, "learning_rate": 1.4e-05, "loss": 0.0003, "step": 9655 }, { "epoch": 144.12, "learning_rate": 1.3964912280701754e-05, "loss": 0.0001, "step": 9656 }, { "epoch": 144.13, "learning_rate": 1.3929824561403508e-05, "loss": 0.0002, "step": 9657 }, { "epoch": 144.15, "learning_rate": 1.3894736842105263e-05, "loss": 0.0001, "step": 9658 }, { "epoch": 144.16, "learning_rate": 1.3859649122807017e-05, "loss": 0.0002, "step": 9659 }, { "epoch": 144.18, "learning_rate": 1.382456140350877e-05, "loss": 0.0002, "step": 9660 }, { "epoch": 144.19, "learning_rate": 1.3789473684210526e-05, "loss": 0.0002, "step": 9661 }, { "epoch": 144.21, "learning_rate": 1.375438596491228e-05, "loss": 0.0002, "step": 9662 }, { "epoch": 144.22, "learning_rate": 1.3719298245614034e-05, "loss": 0.0002, "step": 9663 }, { "epoch": 144.24, "learning_rate": 1.368421052631579e-05, "loss": 0.0002, "step": 9664 }, { "epoch": 144.25, "learning_rate": 1.3649122807017543e-05, "loss": 0.0002, "step": 9665 }, { "epoch": 144.27, "learning_rate": 1.3614035087719297e-05, "loss": 0.0002, "step": 9666 }, { "epoch": 144.28, "learning_rate": 1.3578947368421053e-05, "loss": 0.0002, "step": 9667 }, { "epoch": 144.3, "learning_rate": 1.3543859649122807e-05, "loss": 0.0003, "step": 9668 }, { "epoch": 144.31, "learning_rate": 1.350877192982456e-05, "loss": 0.0002, "step": 9669 }, { "epoch": 144.33, "learning_rate": 1.3473684210526313e-05, "loss": 0.0004, "step": 9670 }, { "epoch": 144.34, "learning_rate": 1.343859649122807e-05, "loss": 0.0002, "step": 9671 }, { "epoch": 144.36, "learning_rate": 1.3403508771929824e-05, "loss": 0.0098, "step": 9672 }, { "epoch": 144.37, "learning_rate": 1.3368421052631576e-05, "loss": 0.0002, "step": 9673 }, { "epoch": 144.39, "learning_rate": 1.3333333333333333e-05, "loss": 0.0002, "step": 9674 }, { "epoch": 144.4, "learning_rate": 1.3298245614035087e-05, "loss": 0.0002, "step": 9675 }, { "epoch": 144.42, "learning_rate": 1.326315789473684e-05, "loss": 0.0001, "step": 9676 }, { "epoch": 144.43, "learning_rate": 1.3228070175438597e-05, "loss": 0.0001, "step": 9677 }, { "epoch": 144.45, "learning_rate": 1.319298245614035e-05, "loss": 0.0002, "step": 9678 }, { "epoch": 144.46, "learning_rate": 1.3157894736842103e-05, "loss": 0.0068, "step": 9679 }, { "epoch": 144.48, "learning_rate": 1.312280701754386e-05, "loss": 0.0002, "step": 9680 }, { "epoch": 144.49, "learning_rate": 1.3087719298245614e-05, "loss": 0.0005, "step": 9681 }, { "epoch": 144.51, "learning_rate": 1.3052631578947366e-05, "loss": 0.0002, "step": 9682 }, { "epoch": 144.52, "learning_rate": 1.3017543859649123e-05, "loss": 0.0002, "step": 9683 }, { "epoch": 144.54, "learning_rate": 1.2982456140350877e-05, "loss": 0.0002, "step": 9684 }, { "epoch": 144.55, "learning_rate": 1.2947368421052629e-05, "loss": 0.049, "step": 9685 }, { "epoch": 144.57, "learning_rate": 1.2912280701754386e-05, "loss": 0.0002, "step": 9686 }, { "epoch": 144.58, "learning_rate": 1.287719298245614e-05, "loss": 0.3249, "step": 9687 }, { "epoch": 144.59, "learning_rate": 1.2842105263157892e-05, "loss": 0.0002, "step": 9688 }, { "epoch": 144.61, "learning_rate": 1.280701754385965e-05, "loss": 0.0002, "step": 9689 }, { "epoch": 144.62, "learning_rate": 1.2771929824561404e-05, "loss": 0.0002, "step": 9690 }, { "epoch": 144.64, "learning_rate": 1.2736842105263156e-05, "loss": 0.0002, "step": 9691 }, { "epoch": 144.65, "learning_rate": 1.270175438596491e-05, "loss": 0.0001, "step": 9692 }, { "epoch": 144.67, "learning_rate": 1.2666666666666665e-05, "loss": 0.0002, "step": 9693 }, { "epoch": 144.68, "learning_rate": 1.2631578947368419e-05, "loss": 0.0002, "step": 9694 }, { "epoch": 144.7, "learning_rate": 1.2596491228070173e-05, "loss": 0.0002, "step": 9695 }, { "epoch": 144.71, "learning_rate": 1.2561403508771928e-05, "loss": 0.0002, "step": 9696 }, { "epoch": 144.73, "learning_rate": 1.2526315789473682e-05, "loss": 0.0002, "step": 9697 }, { "epoch": 144.74, "learning_rate": 1.2491228070175436e-05, "loss": 0.0001, "step": 9698 }, { "epoch": 144.76, "learning_rate": 1.2456140350877192e-05, "loss": 0.0012, "step": 9699 }, { "epoch": 144.77, "learning_rate": 1.2421052631578946e-05, "loss": 0.0013, "step": 9700 }, { "epoch": 144.79, "learning_rate": 1.23859649122807e-05, "loss": 0.0001, "step": 9701 }, { "epoch": 144.8, "learning_rate": 1.2350877192982455e-05, "loss": 0.0005, "step": 9702 }, { "epoch": 144.82, "learning_rate": 1.2315789473684209e-05, "loss": 0.0002, "step": 9703 }, { "epoch": 144.83, "learning_rate": 1.2280701754385963e-05, "loss": 0.0002, "step": 9704 }, { "epoch": 144.85, "learning_rate": 1.2245614035087718e-05, "loss": 0.0002, "step": 9705 }, { "epoch": 144.86, "learning_rate": 1.2210526315789472e-05, "loss": 0.0003, "step": 9706 }, { "epoch": 144.88, "learning_rate": 1.2175438596491226e-05, "loss": 0.0002, "step": 9707 }, { "epoch": 144.89, "learning_rate": 1.2140350877192981e-05, "loss": 0.0002, "step": 9708 }, { "epoch": 144.91, "learning_rate": 1.2105263157894735e-05, "loss": 0.0002, "step": 9709 }, { "epoch": 144.92, "learning_rate": 1.207017543859649e-05, "loss": 0.0002, "step": 9710 }, { "epoch": 144.94, "learning_rate": 1.2035087719298245e-05, "loss": 0.0002, "step": 9711 }, { "epoch": 144.95, "learning_rate": 1.1999999999999999e-05, "loss": 0.0002, "step": 9712 }, { "epoch": 144.97, "learning_rate": 1.1964912280701752e-05, "loss": 0.0002, "step": 9713 }, { "epoch": 144.98, "learning_rate": 1.1929824561403508e-05, "loss": 0.0002, "step": 9714 }, { "epoch": 145.0, "learning_rate": 1.1894736842105262e-05, "loss": 0.0002, "step": 9715 }, { "epoch": 145.01, "learning_rate": 1.1859649122807016e-05, "loss": 0.0002, "step": 9716 }, { "epoch": 145.03, "learning_rate": 1.182456140350877e-05, "loss": 0.0542, "step": 9717 }, { "epoch": 145.04, "learning_rate": 1.1789473684210525e-05, "loss": 0.0002, "step": 9718 }, { "epoch": 145.06, "learning_rate": 1.1754385964912279e-05, "loss": 0.0002, "step": 9719 }, { "epoch": 145.07, "learning_rate": 1.1719298245614033e-05, "loss": 0.0002, "step": 9720 }, { "epoch": 145.09, "learning_rate": 1.1684210526315788e-05, "loss": 0.0002, "step": 9721 }, { "epoch": 145.1, "learning_rate": 1.1649122807017542e-05, "loss": 0.0002, "step": 9722 }, { "epoch": 145.12, "learning_rate": 1.1614035087719296e-05, "loss": 0.0002, "step": 9723 }, { "epoch": 145.13, "learning_rate": 1.1578947368421052e-05, "loss": 0.0002, "step": 9724 }, { "epoch": 145.15, "learning_rate": 1.1543859649122806e-05, "loss": 0.0002, "step": 9725 }, { "epoch": 145.16, "learning_rate": 1.150877192982456e-05, "loss": 0.0002, "step": 9726 }, { "epoch": 145.18, "learning_rate": 1.1473684210526315e-05, "loss": 0.0002, "step": 9727 }, { "epoch": 145.19, "learning_rate": 1.1438596491228069e-05, "loss": 0.0002, "step": 9728 }, { "epoch": 145.21, "learning_rate": 1.1403508771929823e-05, "loss": 0.0002, "step": 9729 }, { "epoch": 145.22, "learning_rate": 1.1368421052631578e-05, "loss": 0.0002, "step": 9730 }, { "epoch": 145.24, "learning_rate": 1.1333333333333332e-05, "loss": 0.0002, "step": 9731 }, { "epoch": 145.25, "learning_rate": 1.1298245614035086e-05, "loss": 0.0002, "step": 9732 }, { "epoch": 145.27, "learning_rate": 1.1263157894736842e-05, "loss": 0.0004, "step": 9733 }, { "epoch": 145.28, "learning_rate": 1.1228070175438595e-05, "loss": 0.0002, "step": 9734 }, { "epoch": 145.3, "learning_rate": 1.119298245614035e-05, "loss": 0.0002, "step": 9735 }, { "epoch": 145.31, "learning_rate": 1.1157894736842105e-05, "loss": 0.0009, "step": 9736 }, { "epoch": 145.33, "learning_rate": 1.1122807017543859e-05, "loss": 0.0002, "step": 9737 }, { "epoch": 145.34, "learning_rate": 1.1087719298245613e-05, "loss": 0.0002, "step": 9738 }, { "epoch": 145.36, "learning_rate": 1.1052631578947366e-05, "loss": 0.0002, "step": 9739 }, { "epoch": 145.37, "learning_rate": 1.1017543859649122e-05, "loss": 0.0002, "step": 9740 }, { "epoch": 145.39, "learning_rate": 1.0982456140350876e-05, "loss": 0.0002, "step": 9741 }, { "epoch": 145.4, "learning_rate": 1.094736842105263e-05, "loss": 0.0002, "step": 9742 }, { "epoch": 145.42, "learning_rate": 1.0912280701754385e-05, "loss": 0.0002, "step": 9743 }, { "epoch": 145.43, "learning_rate": 1.0877192982456139e-05, "loss": 0.0002, "step": 9744 }, { "epoch": 145.45, "learning_rate": 1.0842105263157893e-05, "loss": 0.0002, "step": 9745 }, { "epoch": 145.46, "learning_rate": 1.0807017543859648e-05, "loss": 0.0002, "step": 9746 }, { "epoch": 145.48, "learning_rate": 1.0771929824561402e-05, "loss": 0.0002, "step": 9747 }, { "epoch": 145.49, "learning_rate": 1.0736842105263156e-05, "loss": 0.0002, "step": 9748 }, { "epoch": 145.51, "learning_rate": 1.0701754385964912e-05, "loss": 0.0002, "step": 9749 }, { "epoch": 145.52, "learning_rate": 1.0666666666666666e-05, "loss": 0.0006, "step": 9750 }, { "epoch": 145.54, "learning_rate": 1.063157894736842e-05, "loss": 0.0002, "step": 9751 }, { "epoch": 145.55, "learning_rate": 1.0596491228070175e-05, "loss": 0.0002, "step": 9752 }, { "epoch": 145.57, "learning_rate": 1.0561403508771929e-05, "loss": 0.0002, "step": 9753 }, { "epoch": 145.58, "learning_rate": 1.0526315789473683e-05, "loss": 0.0002, "step": 9754 }, { "epoch": 145.59, "learning_rate": 1.0491228070175438e-05, "loss": 0.0002, "step": 9755 }, { "epoch": 145.61, "learning_rate": 1.0456140350877192e-05, "loss": 0.0002, "step": 9756 }, { "epoch": 145.62, "learning_rate": 1.0421052631578946e-05, "loss": 0.0002, "step": 9757 }, { "epoch": 145.64, "learning_rate": 1.0385964912280702e-05, "loss": 0.0002, "step": 9758 }, { "epoch": 145.65, "learning_rate": 1.0350877192982455e-05, "loss": 0.0002, "step": 9759 }, { "epoch": 145.67, "learning_rate": 1.031578947368421e-05, "loss": 0.0001, "step": 9760 }, { "epoch": 145.68, "learning_rate": 1.0280701754385965e-05, "loss": 0.0002, "step": 9761 }, { "epoch": 145.7, "learning_rate": 1.0245614035087719e-05, "loss": 0.0004, "step": 9762 }, { "epoch": 145.71, "learning_rate": 1.0210526315789473e-05, "loss": 0.0005, "step": 9763 }, { "epoch": 145.73, "learning_rate": 1.0175438596491226e-05, "loss": 0.0002, "step": 9764 }, { "epoch": 145.74, "learning_rate": 1.0140350877192982e-05, "loss": 0.0002, "step": 9765 }, { "epoch": 145.76, "learning_rate": 1.0105263157894736e-05, "loss": 0.0002, "step": 9766 }, { "epoch": 145.77, "learning_rate": 1.007017543859649e-05, "loss": 0.0002, "step": 9767 }, { "epoch": 145.79, "learning_rate": 1.0035087719298245e-05, "loss": 0.0001, "step": 9768 }, { "epoch": 145.8, "learning_rate": 9.999999999999999e-06, "loss": 0.0002, "step": 9769 }, { "epoch": 145.82, "learning_rate": 9.964912280701753e-06, "loss": 0.0003, "step": 9770 }, { "epoch": 145.83, "learning_rate": 9.929824561403509e-06, "loss": 0.0002, "step": 9771 }, { "epoch": 145.85, "learning_rate": 9.894736842105262e-06, "loss": 0.0002, "step": 9772 }, { "epoch": 145.86, "learning_rate": 9.859649122807016e-06, "loss": 0.0009, "step": 9773 }, { "epoch": 145.88, "learning_rate": 9.824561403508772e-06, "loss": 0.0002, "step": 9774 }, { "epoch": 145.89, "learning_rate": 9.789473684210526e-06, "loss": 0.0002, "step": 9775 }, { "epoch": 145.91, "learning_rate": 9.75438596491228e-06, "loss": 0.0802, "step": 9776 }, { "epoch": 145.92, "learning_rate": 9.719298245614035e-06, "loss": 0.029, "step": 9777 }, { "epoch": 145.94, "learning_rate": 9.684210526315789e-06, "loss": 0.0002, "step": 9778 }, { "epoch": 145.95, "learning_rate": 9.649122807017543e-06, "loss": 0.0002, "step": 9779 }, { "epoch": 145.97, "learning_rate": 9.614035087719298e-06, "loss": 0.0002, "step": 9780 }, { "epoch": 145.98, "learning_rate": 9.578947368421052e-06, "loss": 0.0001, "step": 9781 }, { "epoch": 146.0, "learning_rate": 9.543859649122806e-06, "loss": 0.0002, "step": 9782 }, { "epoch": 146.01, "learning_rate": 9.508771929824562e-06, "loss": 0.0002, "step": 9783 }, { "epoch": 146.03, "learning_rate": 9.473684210526315e-06, "loss": 0.0002, "step": 9784 }, { "epoch": 146.04, "learning_rate": 9.43859649122807e-06, "loss": 0.0003, "step": 9785 }, { "epoch": 146.06, "learning_rate": 9.403508771929823e-06, "loss": 0.0002, "step": 9786 }, { "epoch": 146.07, "learning_rate": 9.368421052631579e-06, "loss": 0.0002, "step": 9787 }, { "epoch": 146.09, "learning_rate": 9.333333333333333e-06, "loss": 0.0002, "step": 9788 }, { "epoch": 146.1, "learning_rate": 9.298245614035086e-06, "loss": 0.0002, "step": 9789 }, { "epoch": 146.12, "learning_rate": 9.263157894736842e-06, "loss": 0.0002, "step": 9790 }, { "epoch": 146.13, "learning_rate": 9.228070175438596e-06, "loss": 0.0002, "step": 9791 }, { "epoch": 146.15, "learning_rate": 9.19298245614035e-06, "loss": 0.0002, "step": 9792 }, { "epoch": 146.16, "learning_rate": 9.157894736842104e-06, "loss": 0.0002, "step": 9793 }, { "epoch": 146.18, "learning_rate": 9.12280701754386e-06, "loss": 0.0002, "step": 9794 }, { "epoch": 146.19, "learning_rate": 9.087719298245613e-06, "loss": 0.0002, "step": 9795 }, { "epoch": 146.21, "learning_rate": 9.052631578947367e-06, "loss": 0.0002, "step": 9796 }, { "epoch": 146.22, "learning_rate": 9.017543859649122e-06, "loss": 0.0004, "step": 9797 }, { "epoch": 146.24, "learning_rate": 8.982456140350876e-06, "loss": 0.0002, "step": 9798 }, { "epoch": 146.25, "learning_rate": 8.94736842105263e-06, "loss": 0.0001, "step": 9799 }, { "epoch": 146.27, "learning_rate": 8.912280701754386e-06, "loss": 0.0002, "step": 9800 }, { "epoch": 146.27, "eval_accuracy": 0.8913362701908958, "eval_f1": 0.8909876245610147, "eval_loss": 0.6056233644485474, "eval_runtime": 345.3935, "eval_samples_per_second": 11.83, "eval_steps_per_second": 0.741, "step": 9800 }, { "epoch": 146.28, "learning_rate": 8.87719298245614e-06, "loss": 0.0002, "step": 9801 }, { "epoch": 146.3, "learning_rate": 8.842105263157893e-06, "loss": 0.0002, "step": 9802 }, { "epoch": 146.31, "learning_rate": 8.807017543859649e-06, "loss": 0.0002, "step": 9803 }, { "epoch": 146.33, "learning_rate": 8.771929824561403e-06, "loss": 0.0002, "step": 9804 }, { "epoch": 146.34, "learning_rate": 8.736842105263157e-06, "loss": 0.0002, "step": 9805 }, { "epoch": 146.36, "learning_rate": 8.701754385964912e-06, "loss": 0.0696, "step": 9806 }, { "epoch": 146.37, "learning_rate": 8.666666666666666e-06, "loss": 0.0002, "step": 9807 }, { "epoch": 146.39, "learning_rate": 8.63157894736842e-06, "loss": 0.0002, "step": 9808 }, { "epoch": 146.4, "learning_rate": 8.596491228070176e-06, "loss": 0.0002, "step": 9809 }, { "epoch": 146.42, "learning_rate": 8.56140350877193e-06, "loss": 0.0002, "step": 9810 }, { "epoch": 146.43, "learning_rate": 8.526315789473683e-06, "loss": 0.0002, "step": 9811 }, { "epoch": 146.45, "learning_rate": 8.491228070175439e-06, "loss": 0.0002, "step": 9812 }, { "epoch": 146.46, "learning_rate": 8.456140350877193e-06, "loss": 0.0002, "step": 9813 }, { "epoch": 146.48, "learning_rate": 8.421052631578947e-06, "loss": 0.0002, "step": 9814 }, { "epoch": 146.49, "learning_rate": 8.3859649122807e-06, "loss": 0.1118, "step": 9815 }, { "epoch": 146.51, "learning_rate": 8.350877192982456e-06, "loss": 0.0002, "step": 9816 }, { "epoch": 146.52, "learning_rate": 8.31578947368421e-06, "loss": 0.0014, "step": 9817 }, { "epoch": 146.54, "learning_rate": 8.280701754385964e-06, "loss": 0.0001, "step": 9818 }, { "epoch": 146.55, "learning_rate": 8.24561403508772e-06, "loss": 0.0002, "step": 9819 }, { "epoch": 146.57, "learning_rate": 8.210526315789473e-06, "loss": 0.0002, "step": 9820 }, { "epoch": 146.58, "learning_rate": 8.175438596491227e-06, "loss": 0.0005, "step": 9821 }, { "epoch": 146.59, "learning_rate": 8.140350877192983e-06, "loss": 0.0003, "step": 9822 }, { "epoch": 146.61, "learning_rate": 8.105263157894736e-06, "loss": 0.0002, "step": 9823 }, { "epoch": 146.62, "learning_rate": 8.07017543859649e-06, "loss": 0.0002, "step": 9824 }, { "epoch": 146.64, "learning_rate": 8.035087719298246e-06, "loss": 0.0002, "step": 9825 }, { "epoch": 146.65, "learning_rate": 8e-06, "loss": 0.0002, "step": 9826 }, { "epoch": 146.67, "learning_rate": 7.964912280701753e-06, "loss": 0.0012, "step": 9827 }, { "epoch": 146.68, "learning_rate": 7.929824561403509e-06, "loss": 0.0002, "step": 9828 }, { "epoch": 146.7, "learning_rate": 7.894736842105261e-06, "loss": 0.0003, "step": 9829 }, { "epoch": 146.71, "learning_rate": 7.859649122807017e-06, "loss": 0.0002, "step": 9830 }, { "epoch": 146.73, "learning_rate": 7.824561403508772e-06, "loss": 0.0002, "step": 9831 }, { "epoch": 146.74, "learning_rate": 7.789473684210524e-06, "loss": 0.0002, "step": 9832 }, { "epoch": 146.76, "learning_rate": 7.75438596491228e-06, "loss": 0.0002, "step": 9833 }, { "epoch": 146.77, "learning_rate": 7.719298245614036e-06, "loss": 0.0002, "step": 9834 }, { "epoch": 146.79, "learning_rate": 7.684210526315788e-06, "loss": 0.0002, "step": 9835 }, { "epoch": 146.8, "learning_rate": 7.649122807017543e-06, "loss": 0.0002, "step": 9836 }, { "epoch": 146.82, "learning_rate": 7.614035087719298e-06, "loss": 0.0002, "step": 9837 }, { "epoch": 146.83, "learning_rate": 7.578947368421052e-06, "loss": 0.0004, "step": 9838 }, { "epoch": 146.85, "learning_rate": 7.543859649122807e-06, "loss": 0.0002, "step": 9839 }, { "epoch": 146.86, "learning_rate": 7.5087719298245605e-06, "loss": 0.0002, "step": 9840 }, { "epoch": 146.88, "learning_rate": 7.473684210526315e-06, "loss": 0.0002, "step": 9841 }, { "epoch": 146.89, "learning_rate": 7.43859649122807e-06, "loss": 0.0002, "step": 9842 }, { "epoch": 146.91, "learning_rate": 7.403508771929824e-06, "loss": 0.0001, "step": 9843 }, { "epoch": 146.92, "learning_rate": 7.3684210526315784e-06, "loss": 0.0001, "step": 9844 }, { "epoch": 146.94, "learning_rate": 7.333333333333333e-06, "loss": 0.0002, "step": 9845 }, { "epoch": 146.95, "learning_rate": 7.298245614035087e-06, "loss": 0.0004, "step": 9846 }, { "epoch": 146.97, "learning_rate": 7.263157894736842e-06, "loss": 0.0003, "step": 9847 }, { "epoch": 146.98, "learning_rate": 7.228070175438596e-06, "loss": 0.0001, "step": 9848 }, { "epoch": 147.0, "learning_rate": 7.19298245614035e-06, "loss": 0.0002, "step": 9849 }, { "epoch": 147.01, "learning_rate": 7.157894736842105e-06, "loss": 0.0002, "step": 9850 }, { "epoch": 147.03, "learning_rate": 7.122807017543859e-06, "loss": 0.0002, "step": 9851 }, { "epoch": 147.04, "learning_rate": 7.0877192982456136e-06, "loss": 0.0002, "step": 9852 }, { "epoch": 147.06, "learning_rate": 7.052631578947368e-06, "loss": 0.0002, "step": 9853 }, { "epoch": 147.07, "learning_rate": 7.017543859649122e-06, "loss": 0.0002, "step": 9854 }, { "epoch": 147.09, "learning_rate": 6.982456140350877e-06, "loss": 0.0002, "step": 9855 }, { "epoch": 147.1, "learning_rate": 6.9473684210526315e-06, "loss": 0.0002, "step": 9856 }, { "epoch": 147.12, "learning_rate": 6.912280701754385e-06, "loss": 0.0002, "step": 9857 }, { "epoch": 147.13, "learning_rate": 6.87719298245614e-06, "loss": 0.0002, "step": 9858 }, { "epoch": 147.15, "learning_rate": 6.842105263157895e-06, "loss": 0.0002, "step": 9859 }, { "epoch": 147.16, "learning_rate": 6.807017543859649e-06, "loss": 0.0002, "step": 9860 }, { "epoch": 147.18, "learning_rate": 6.771929824561403e-06, "loss": 0.0002, "step": 9861 }, { "epoch": 147.19, "learning_rate": 6.736842105263156e-06, "loss": 0.0002, "step": 9862 }, { "epoch": 147.21, "learning_rate": 6.701754385964912e-06, "loss": 0.0002, "step": 9863 }, { "epoch": 147.22, "learning_rate": 6.666666666666667e-06, "loss": 0.0002, "step": 9864 }, { "epoch": 147.24, "learning_rate": 6.63157894736842e-06, "loss": 0.0004, "step": 9865 }, { "epoch": 147.25, "learning_rate": 6.596491228070175e-06, "loss": 0.0002, "step": 9866 }, { "epoch": 147.27, "learning_rate": 6.56140350877193e-06, "loss": 0.0002, "step": 9867 }, { "epoch": 147.28, "learning_rate": 6.526315789473683e-06, "loss": 0.0002, "step": 9868 }, { "epoch": 147.3, "learning_rate": 6.4912280701754385e-06, "loss": 0.0001, "step": 9869 }, { "epoch": 147.31, "learning_rate": 6.456140350877193e-06, "loss": 0.0002, "step": 9870 }, { "epoch": 147.33, "learning_rate": 6.421052631578946e-06, "loss": 0.0002, "step": 9871 }, { "epoch": 147.34, "learning_rate": 6.385964912280702e-06, "loss": 0.0004, "step": 9872 }, { "epoch": 147.36, "learning_rate": 6.350877192982455e-06, "loss": 0.0002, "step": 9873 }, { "epoch": 147.37, "learning_rate": 6.3157894736842095e-06, "loss": 0.0002, "step": 9874 }, { "epoch": 147.39, "learning_rate": 6.280701754385964e-06, "loss": 0.0002, "step": 9875 }, { "epoch": 147.4, "learning_rate": 6.245614035087718e-06, "loss": 0.0002, "step": 9876 }, { "epoch": 147.42, "learning_rate": 6.210526315789473e-06, "loss": 0.0002, "step": 9877 }, { "epoch": 147.43, "learning_rate": 6.1754385964912275e-06, "loss": 0.0002, "step": 9878 }, { "epoch": 147.45, "learning_rate": 6.140350877192981e-06, "loss": 0.0002, "step": 9879 }, { "epoch": 147.46, "learning_rate": 6.105263157894736e-06, "loss": 0.0002, "step": 9880 }, { "epoch": 147.48, "learning_rate": 6.070175438596491e-06, "loss": 0.0002, "step": 9881 }, { "epoch": 147.49, "learning_rate": 6.035087719298245e-06, "loss": 0.0002, "step": 9882 }, { "epoch": 147.51, "learning_rate": 5.999999999999999e-06, "loss": 0.0217, "step": 9883 }, { "epoch": 147.52, "learning_rate": 5.964912280701754e-06, "loss": 0.0003, "step": 9884 }, { "epoch": 147.54, "learning_rate": 5.929824561403508e-06, "loss": 0.0002, "step": 9885 }, { "epoch": 147.55, "learning_rate": 5.894736842105263e-06, "loss": 0.0002, "step": 9886 }, { "epoch": 147.57, "learning_rate": 5.8596491228070164e-06, "loss": 0.0001, "step": 9887 }, { "epoch": 147.58, "learning_rate": 5.824561403508771e-06, "loss": 0.0003, "step": 9888 }, { "epoch": 147.59, "learning_rate": 5.789473684210526e-06, "loss": 0.0002, "step": 9889 }, { "epoch": 147.61, "learning_rate": 5.75438596491228e-06, "loss": 0.0005, "step": 9890 }, { "epoch": 147.62, "learning_rate": 5.719298245614034e-06, "loss": 0.0002, "step": 9891 }, { "epoch": 147.64, "learning_rate": 5.684210526315789e-06, "loss": 0.0004, "step": 9892 }, { "epoch": 147.65, "learning_rate": 5.649122807017543e-06, "loss": 0.0002, "step": 9893 }, { "epoch": 147.67, "learning_rate": 5.614035087719298e-06, "loss": 0.0001, "step": 9894 }, { "epoch": 147.68, "learning_rate": 5.578947368421052e-06, "loss": 0.0002, "step": 9895 }, { "epoch": 147.7, "learning_rate": 5.543859649122806e-06, "loss": 0.0002, "step": 9896 }, { "epoch": 147.71, "learning_rate": 5.508771929824561e-06, "loss": 0.0002, "step": 9897 }, { "epoch": 147.73, "learning_rate": 5.473684210526315e-06, "loss": 0.0001, "step": 9898 }, { "epoch": 147.74, "learning_rate": 5.4385964912280695e-06, "loss": 0.0002, "step": 9899 }, { "epoch": 147.76, "learning_rate": 5.403508771929824e-06, "loss": 0.0002, "step": 9900 }, { "epoch": 147.77, "learning_rate": 5.368421052631578e-06, "loss": 0.0004, "step": 9901 }, { "epoch": 147.79, "learning_rate": 5.333333333333333e-06, "loss": 0.0039, "step": 9902 }, { "epoch": 147.8, "learning_rate": 5.2982456140350875e-06, "loss": 0.0002, "step": 9903 }, { "epoch": 147.82, "learning_rate": 5.263157894736841e-06, "loss": 0.0002, "step": 9904 }, { "epoch": 147.83, "learning_rate": 5.228070175438596e-06, "loss": 0.0002, "step": 9905 }, { "epoch": 147.85, "learning_rate": 5.192982456140351e-06, "loss": 0.0002, "step": 9906 }, { "epoch": 147.86, "learning_rate": 5.157894736842105e-06, "loss": 0.0001, "step": 9907 }, { "epoch": 147.88, "learning_rate": 5.122807017543859e-06, "loss": 0.0092, "step": 9908 }, { "epoch": 147.89, "learning_rate": 5.087719298245613e-06, "loss": 0.0002, "step": 9909 }, { "epoch": 147.91, "learning_rate": 5.052631578947368e-06, "loss": 0.0002, "step": 9910 }, { "epoch": 147.92, "learning_rate": 5.017543859649123e-06, "loss": 0.0002, "step": 9911 }, { "epoch": 147.94, "learning_rate": 4.9824561403508765e-06, "loss": 0.0002, "step": 9912 }, { "epoch": 147.95, "learning_rate": 4.947368421052631e-06, "loss": 0.0002, "step": 9913 }, { "epoch": 147.97, "learning_rate": 4.912280701754386e-06, "loss": 0.0002, "step": 9914 }, { "epoch": 147.98, "learning_rate": 4.87719298245614e-06, "loss": 0.0002, "step": 9915 }, { "epoch": 148.0, "learning_rate": 4.8421052631578945e-06, "loss": 0.0002, "step": 9916 }, { "epoch": 148.01, "learning_rate": 4.807017543859649e-06, "loss": 0.0004, "step": 9917 }, { "epoch": 148.03, "learning_rate": 4.771929824561403e-06, "loss": 0.0002, "step": 9918 }, { "epoch": 148.04, "learning_rate": 4.736842105263158e-06, "loss": 0.0002, "step": 9919 }, { "epoch": 148.06, "learning_rate": 4.701754385964912e-06, "loss": 0.0002, "step": 9920 }, { "epoch": 148.07, "learning_rate": 4.666666666666666e-06, "loss": 0.0002, "step": 9921 }, { "epoch": 148.09, "learning_rate": 4.631578947368421e-06, "loss": 0.0002, "step": 9922 }, { "epoch": 148.1, "learning_rate": 4.596491228070175e-06, "loss": 0.0002, "step": 9923 }, { "epoch": 148.12, "learning_rate": 4.56140350877193e-06, "loss": 0.0003, "step": 9924 }, { "epoch": 148.13, "learning_rate": 4.5263157894736834e-06, "loss": 0.0001, "step": 9925 }, { "epoch": 148.15, "learning_rate": 4.491228070175438e-06, "loss": 0.0003, "step": 9926 }, { "epoch": 148.16, "learning_rate": 4.456140350877193e-06, "loss": 0.0002, "step": 9927 }, { "epoch": 148.18, "learning_rate": 4.421052631578947e-06, "loss": 0.0002, "step": 9928 }, { "epoch": 148.19, "learning_rate": 4.3859649122807014e-06, "loss": 0.0002, "step": 9929 }, { "epoch": 148.21, "learning_rate": 4.350877192982456e-06, "loss": 0.0006, "step": 9930 }, { "epoch": 148.22, "learning_rate": 4.31578947368421e-06, "loss": 0.0001, "step": 9931 }, { "epoch": 148.24, "learning_rate": 4.280701754385965e-06, "loss": 0.0002, "step": 9932 }, { "epoch": 148.25, "learning_rate": 4.280701754385965e-06, "loss": 0.1554, "step": 9933 }, { "epoch": 148.27, "learning_rate": 4.245614035087719e-06, "loss": 0.0004, "step": 9934 }, { "epoch": 148.28, "learning_rate": 4.210526315789473e-06, "loss": 0.0002, "step": 9935 }, { "epoch": 148.3, "learning_rate": 4.175438596491228e-06, "loss": 0.0001, "step": 9936 }, { "epoch": 148.31, "learning_rate": 4.140350877192982e-06, "loss": 0.0002, "step": 9937 }, { "epoch": 148.33, "learning_rate": 4.1052631578947365e-06, "loss": 0.0002, "step": 9938 }, { "epoch": 148.34, "learning_rate": 4.070175438596491e-06, "loss": 0.0002, "step": 9939 }, { "epoch": 148.36, "learning_rate": 4.035087719298245e-06, "loss": 0.0002, "step": 9940 }, { "epoch": 148.37, "learning_rate": 4e-06, "loss": 0.0003, "step": 9941 }, { "epoch": 148.39, "learning_rate": 3.9649122807017545e-06, "loss": 0.0002, "step": 9942 }, { "epoch": 148.4, "learning_rate": 3.929824561403508e-06, "loss": 0.0002, "step": 9943 }, { "epoch": 148.42, "learning_rate": 3.894736842105262e-06, "loss": 0.0001, "step": 9944 }, { "epoch": 148.43, "learning_rate": 3.859649122807018e-06, "loss": 0.0014, "step": 9945 }, { "epoch": 148.45, "learning_rate": 3.824561403508772e-06, "loss": 0.0005, "step": 9946 }, { "epoch": 148.46, "learning_rate": 3.789473684210526e-06, "loss": 0.0002, "step": 9947 }, { "epoch": 148.48, "learning_rate": 3.7543859649122802e-06, "loss": 0.0002, "step": 9948 }, { "epoch": 148.49, "learning_rate": 3.719298245614035e-06, "loss": 0.0002, "step": 9949 }, { "epoch": 148.51, "learning_rate": 3.6842105263157892e-06, "loss": 0.0002, "step": 9950 }, { "epoch": 148.52, "learning_rate": 3.6491228070175435e-06, "loss": 0.0002, "step": 9951 }, { "epoch": 148.54, "learning_rate": 3.614035087719298e-06, "loss": 0.0001, "step": 9952 }, { "epoch": 148.55, "learning_rate": 3.5789473684210525e-06, "loss": 0.0002, "step": 9953 }, { "epoch": 148.57, "learning_rate": 3.5438596491228068e-06, "loss": 0.003, "step": 9954 }, { "epoch": 148.58, "learning_rate": 3.508771929824561e-06, "loss": 0.0002, "step": 9955 }, { "epoch": 148.59, "learning_rate": 3.4736842105263158e-06, "loss": 0.0002, "step": 9956 }, { "epoch": 148.61, "learning_rate": 3.43859649122807e-06, "loss": 0.0182, "step": 9957 }, { "epoch": 148.62, "learning_rate": 3.4035087719298243e-06, "loss": 0.0002, "step": 9958 }, { "epoch": 148.64, "learning_rate": 3.368421052631578e-06, "loss": 0.0002, "step": 9959 }, { "epoch": 148.65, "learning_rate": 3.3333333333333333e-06, "loss": 0.0002, "step": 9960 }, { "epoch": 148.67, "learning_rate": 3.2982456140350876e-06, "loss": 0.0002, "step": 9961 }, { "epoch": 148.68, "learning_rate": 3.2631578947368415e-06, "loss": 0.0002, "step": 9962 }, { "epoch": 148.7, "learning_rate": 3.2280701754385966e-06, "loss": 0.0002, "step": 9963 }, { "epoch": 148.71, "learning_rate": 3.192982456140351e-06, "loss": 0.0002, "step": 9964 }, { "epoch": 148.73, "learning_rate": 3.1578947368421047e-06, "loss": 0.0002, "step": 9965 }, { "epoch": 148.74, "learning_rate": 3.122807017543859e-06, "loss": 0.0002, "step": 9966 }, { "epoch": 148.76, "learning_rate": 3.0877192982456137e-06, "loss": 0.0002, "step": 9967 }, { "epoch": 148.77, "learning_rate": 3.052631578947368e-06, "loss": 0.0002, "step": 9968 }, { "epoch": 148.79, "learning_rate": 3.0175438596491223e-06, "loss": 0.0002, "step": 9969 }, { "epoch": 148.8, "learning_rate": 2.982456140350877e-06, "loss": 0.0002, "step": 9970 }, { "epoch": 148.82, "learning_rate": 2.9473684210526313e-06, "loss": 0.0002, "step": 9971 }, { "epoch": 148.83, "learning_rate": 2.9122807017543856e-06, "loss": 0.0002, "step": 9972 }, { "epoch": 148.85, "learning_rate": 2.87719298245614e-06, "loss": 0.0002, "step": 9973 }, { "epoch": 148.86, "learning_rate": 2.8421052631578946e-06, "loss": 0.0002, "step": 9974 }, { "epoch": 148.88, "learning_rate": 2.807017543859649e-06, "loss": 0.0002, "step": 9975 }, { "epoch": 148.89, "learning_rate": 2.771929824561403e-06, "loss": 0.0002, "step": 9976 }, { "epoch": 148.91, "learning_rate": 2.7368421052631574e-06, "loss": 0.0001, "step": 9977 }, { "epoch": 148.92, "learning_rate": 2.701754385964912e-06, "loss": 0.0002, "step": 9978 }, { "epoch": 148.94, "learning_rate": 2.6666666666666664e-06, "loss": 0.0019, "step": 9979 }, { "epoch": 148.95, "learning_rate": 2.6315789473684207e-06, "loss": 0.0003, "step": 9980 }, { "epoch": 148.97, "learning_rate": 2.5964912280701754e-06, "loss": 0.0002, "step": 9981 }, { "epoch": 148.98, "learning_rate": 2.5614035087719297e-06, "loss": 0.0002, "step": 9982 }, { "epoch": 149.0, "learning_rate": 2.526315789473684e-06, "loss": 0.0001, "step": 9983 }, { "epoch": 149.01, "learning_rate": 2.4912280701754382e-06, "loss": 0.0002, "step": 9984 }, { "epoch": 149.03, "learning_rate": 2.456140350877193e-06, "loss": 0.0977, "step": 9985 }, { "epoch": 149.04, "learning_rate": 2.4210526315789472e-06, "loss": 0.0002, "step": 9986 }, { "epoch": 149.06, "learning_rate": 2.3859649122807015e-06, "loss": 0.0002, "step": 9987 }, { "epoch": 149.07, "learning_rate": 2.350877192982456e-06, "loss": 0.0002, "step": 9988 }, { "epoch": 149.09, "learning_rate": 2.3157894736842105e-06, "loss": 0.0002, "step": 9989 }, { "epoch": 149.1, "learning_rate": 2.280701754385965e-06, "loss": 0.0002, "step": 9990 }, { "epoch": 149.12, "learning_rate": 2.245614035087719e-06, "loss": 0.0002, "step": 9991 }, { "epoch": 149.13, "learning_rate": 2.2105263157894734e-06, "loss": 0.0002, "step": 9992 }, { "epoch": 149.15, "learning_rate": 2.175438596491228e-06, "loss": 0.0002, "step": 9993 }, { "epoch": 149.16, "learning_rate": 2.1403508771929824e-06, "loss": 0.1007, "step": 9994 }, { "epoch": 149.18, "learning_rate": 2.1052631578947366e-06, "loss": 0.0002, "step": 9995 }, { "epoch": 149.19, "learning_rate": 2.070175438596491e-06, "loss": 0.0002, "step": 9996 }, { "epoch": 149.21, "learning_rate": 2.0350877192982456e-06, "loss": 0.0002, "step": 9997 }, { "epoch": 149.22, "learning_rate": 2e-06, "loss": 0.0002, "step": 9998 }, { "epoch": 149.24, "learning_rate": 1.964912280701754e-06, "loss": 0.0002, "step": 9999 }, { "epoch": 149.25, "learning_rate": 1.929824561403509e-06, "loss": 0.0002, "step": 10000 }, { "epoch": 149.25, "eval_accuracy": 0.8920704845814978, "eval_f1": 0.8917851618940525, "eval_loss": 0.5978561639785767, "eval_runtime": 345.0023, "eval_samples_per_second": 11.843, "eval_steps_per_second": 0.742, "step": 10000 }, { "epoch": 149.27, "learning_rate": 1.894736842105263e-06, "loss": 0.0002, "step": 10001 }, { "epoch": 149.28, "learning_rate": 1.8596491228070175e-06, "loss": 0.0001, "step": 10002 }, { "epoch": 149.3, "learning_rate": 1.8245614035087718e-06, "loss": 0.0001, "step": 10003 }, { "epoch": 149.31, "learning_rate": 1.7894736842105262e-06, "loss": 0.0002, "step": 10004 }, { "epoch": 149.33, "learning_rate": 1.7543859649122805e-06, "loss": 0.0002, "step": 10005 }, { "epoch": 149.34, "learning_rate": 1.719298245614035e-06, "loss": 0.0002, "step": 10006 }, { "epoch": 149.36, "learning_rate": 1.684210526315789e-06, "loss": 0.0002, "step": 10007 }, { "epoch": 149.37, "learning_rate": 1.6491228070175438e-06, "loss": 0.0002, "step": 10008 }, { "epoch": 149.39, "learning_rate": 1.6140350877192983e-06, "loss": 0.0001, "step": 10009 }, { "epoch": 149.4, "learning_rate": 1.5789473684210524e-06, "loss": 0.0002, "step": 10010 }, { "epoch": 149.42, "learning_rate": 1.5438596491228069e-06, "loss": 0.0003, "step": 10011 }, { "epoch": 149.43, "learning_rate": 1.5087719298245611e-06, "loss": 0.0002, "step": 10012 }, { "epoch": 149.45, "learning_rate": 1.4736842105263156e-06, "loss": 0.0002, "step": 10013 }, { "epoch": 149.46, "learning_rate": 1.43859649122807e-06, "loss": 0.0027, "step": 10014 }, { "epoch": 149.48, "learning_rate": 1.4035087719298244e-06, "loss": 0.0002, "step": 10015 }, { "epoch": 149.49, "learning_rate": 1.3684210526315787e-06, "loss": 0.0001, "step": 10016 }, { "epoch": 149.51, "learning_rate": 1.3333333333333332e-06, "loss": 0.0002, "step": 10017 }, { "epoch": 149.52, "learning_rate": 1.2982456140350877e-06, "loss": 0.0002, "step": 10018 }, { "epoch": 149.54, "learning_rate": 1.263157894736842e-06, "loss": 0.0002, "step": 10019 }, { "epoch": 149.55, "learning_rate": 1.2280701754385965e-06, "loss": 0.0002, "step": 10020 }, { "epoch": 149.57, "learning_rate": 1.1929824561403508e-06, "loss": 0.0002, "step": 10021 }, { "epoch": 149.58, "learning_rate": 1.1578947368421053e-06, "loss": 0.0002, "step": 10022 }, { "epoch": 149.59, "learning_rate": 1.1228070175438595e-06, "loss": 0.0002, "step": 10023 }, { "epoch": 149.61, "learning_rate": 1.087719298245614e-06, "loss": 0.0002, "step": 10024 }, { "epoch": 149.62, "learning_rate": 1.0526315789473683e-06, "loss": 0.0002, "step": 10025 }, { "epoch": 149.64, "learning_rate": 1.0175438596491228e-06, "loss": 0.0002, "step": 10026 }, { "epoch": 149.65, "learning_rate": 9.82456140350877e-07, "loss": 0.0002, "step": 10027 }, { "epoch": 149.67, "learning_rate": 9.473684210526315e-07, "loss": 0.0002, "step": 10028 }, { "epoch": 149.68, "learning_rate": 9.122807017543859e-07, "loss": 0.0003, "step": 10029 }, { "epoch": 149.7, "learning_rate": 8.771929824561403e-07, "loss": 0.0025, "step": 10030 }, { "epoch": 149.71, "learning_rate": 8.421052631578945e-07, "loss": 0.0769, "step": 10031 }, { "epoch": 149.73, "learning_rate": 8.070175438596491e-07, "loss": 0.0002, "step": 10032 }, { "epoch": 149.74, "learning_rate": 7.719298245614034e-07, "loss": 0.0001, "step": 10033 }, { "epoch": 149.76, "learning_rate": 7.368421052631578e-07, "loss": 0.0003, "step": 10034 }, { "epoch": 149.77, "learning_rate": 7.017543859649122e-07, "loss": 0.0002, "step": 10035 }, { "epoch": 149.79, "learning_rate": 6.666666666666666e-07, "loss": 0.0001, "step": 10036 }, { "epoch": 149.8, "learning_rate": 6.31578947368421e-07, "loss": 0.0002, "step": 10037 }, { "epoch": 149.82, "learning_rate": 5.964912280701754e-07, "loss": 0.0002, "step": 10038 }, { "epoch": 149.83, "learning_rate": 5.614035087719298e-07, "loss": 0.0002, "step": 10039 }, { "epoch": 149.85, "learning_rate": 5.263157894736842e-07, "loss": 0.0002, "step": 10040 }, { "epoch": 149.86, "learning_rate": 4.912280701754385e-07, "loss": 0.0002, "step": 10041 }, { "epoch": 149.88, "learning_rate": 4.5614035087719294e-07, "loss": 0.0003, "step": 10042 }, { "epoch": 149.89, "learning_rate": 4.210526315789473e-07, "loss": 0.0002, "step": 10043 }, { "epoch": 149.91, "learning_rate": 3.859649122807017e-07, "loss": 0.0002, "step": 10044 }, { "epoch": 149.92, "learning_rate": 3.508771929824561e-07, "loss": 0.0145, "step": 10045 }, { "epoch": 149.94, "learning_rate": 3.157894736842105e-07, "loss": 0.0002, "step": 10046 }, { "epoch": 149.95, "learning_rate": 2.807017543859649e-07, "loss": 0.2392, "step": 10047 }, { "epoch": 149.97, "learning_rate": 2.456140350877193e-07, "loss": 0.0002, "step": 10048 }, { "epoch": 149.98, "learning_rate": 2.1052631578947364e-07, "loss": 0.0002, "step": 10049 }, { "epoch": 150.0, "learning_rate": 1.7543859649122805e-07, "loss": 0.0001, "step": 10050 }, { "epoch": 150.0, "step": 10050, "total_flos": 1.3732582771233128e+20, "train_loss": 0.2665309856276014, "train_runtime": 44149.9371, "train_samples_per_second": 7.308, "train_steps_per_second": 0.228 } ], "max_steps": 10050, "num_train_epochs": 150, "total_flos": 1.3732582771233128e+20, "trial_name": null, "trial_params": null }