diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,54340 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.027484143763214, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 8.7413, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 7.938, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 8.3007, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 4.999471458773785e-05, + "loss": 7.1444, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 4.998942917547569e-05, + "loss": 9.6766, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984143763213534e-05, + "loss": 5.474, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 4.997885835095137e-05, + "loss": 4.2579, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 4.9973572938689226e-05, + "loss": 3.893, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968287526427065e-05, + "loss": 4.3634, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 4.996300211416491e-05, + "loss": 4.1838, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 4.995771670190275e-05, + "loss": 3.5253, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 4.9952431289640597e-05, + "loss": 3.3387, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 4.9947145877378436e-05, + "loss": 2.9198, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 4.994186046511628e-05, + "loss": 2.8243, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 4.993657505285412e-05, + "loss": 2.5279, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 4.993128964059197e-05, + "loss": 2.3727, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 4.992600422832981e-05, + "loss": 2.4728, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 4.992071881606766e-05, + "loss": 2.4637, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 4.99154334038055e-05, + "loss": 2.4645, + "step": 38 + }, + { + "epoch": 0.04, + "learning_rate": 4.9910147991543345e-05, + "loss": 2.4394, + "step": 40 + }, + { + "epoch": 0.04, + "learning_rate": 4.9904862579281184e-05, + "loss": 2.5194, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 4.989957716701903e-05, + "loss": 2.9867, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 4.989429175475687e-05, + "loss": 2.2137, + "step": 46 + }, + { + "epoch": 0.05, + "learning_rate": 4.9889006342494715e-05, + "loss": 2.6259, + "step": 48 + }, + { + "epoch": 0.05, + "learning_rate": 4.9883720930232555e-05, + "loss": 2.768, + "step": 50 + }, + { + "epoch": 0.05, + "learning_rate": 4.987843551797041e-05, + "loss": 1.9139, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 4.987315010570825e-05, + "loss": 1.8541, + "step": 54 + }, + { + "epoch": 0.06, + "learning_rate": 4.986786469344609e-05, + "loss": 1.9566, + "step": 56 + }, + { + "epoch": 0.06, + "learning_rate": 4.986257928118393e-05, + "loss": 1.7323, + "step": 58 + }, + { + "epoch": 0.06, + "learning_rate": 4.985729386892178e-05, + "loss": 1.8061, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 4.9852008456659624e-05, + "loss": 1.8183, + "step": 62 + }, + { + "epoch": 0.07, + "learning_rate": 4.9846723044397464e-05, + "loss": 1.5695, + "step": 64 + }, + { + "epoch": 0.07, + "learning_rate": 4.984143763213531e-05, + "loss": 1.5084, + "step": 66 + }, + { + "epoch": 0.07, + "learning_rate": 4.983615221987315e-05, + "loss": 1.5239, + "step": 68 + }, + { + "epoch": 0.07, + "learning_rate": 4.9830866807611e-05, + "loss": 1.1437, + "step": 70 + }, + { + "epoch": 0.08, + "learning_rate": 4.982558139534884e-05, + "loss": 1.7891, + "step": 72 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029598308669e-05, + "loss": 1.8291, + "step": 74 + }, + { + "epoch": 0.08, + "learning_rate": 4.9815010570824526e-05, + "loss": 1.6036, + "step": 76 + }, + { + "epoch": 0.08, + "learning_rate": 4.980972515856237e-05, + "loss": 1.2715, + "step": 78 + }, + { + "epoch": 0.08, + "learning_rate": 4.980443974630021e-05, + "loss": 1.1591, + "step": 80 + }, + { + "epoch": 0.09, + "learning_rate": 4.979915433403806e-05, + "loss": 1.6209, + "step": 82 + }, + { + "epoch": 0.09, + "learning_rate": 4.97938689217759e-05, + "loss": 1.478, + "step": 84 + }, + { + "epoch": 0.09, + "learning_rate": 4.978858350951374e-05, + "loss": 0.986, + "step": 86 + }, + { + "epoch": 0.09, + "learning_rate": 4.978329809725159e-05, + "loss": 1.5824, + "step": 88 + }, + { + "epoch": 0.1, + "learning_rate": 4.9778012684989435e-05, + "loss": 1.3657, + "step": 90 + }, + { + "epoch": 0.1, + "learning_rate": 4.9772727272727275e-05, + "loss": 1.3425, + "step": 92 + }, + { + "epoch": 0.1, + "learning_rate": 4.976744186046512e-05, + "loss": 1.1693, + "step": 94 + }, + { + "epoch": 0.1, + "learning_rate": 4.976215644820296e-05, + "loss": 1.3152, + "step": 96 + }, + { + "epoch": 0.1, + "learning_rate": 4.9756871035940806e-05, + "loss": 0.9633, + "step": 98 + }, + { + "epoch": 0.11, + "learning_rate": 4.9751585623678645e-05, + "loss": 1.0875, + "step": 100 + }, + { + "epoch": 0.11, + "learning_rate": 4.974630021141649e-05, + "loss": 0.9981, + "step": 102 + }, + { + "epoch": 0.11, + "learning_rate": 4.974101479915434e-05, + "loss": 1.003, + "step": 104 + }, + { + "epoch": 0.11, + "learning_rate": 4.9735729386892183e-05, + "loss": 1.1275, + "step": 106 + }, + { + "epoch": 0.11, + "learning_rate": 4.973044397463002e-05, + "loss": 1.2824, + "step": 108 + }, + { + "epoch": 0.12, + "learning_rate": 4.972515856236787e-05, + "loss": 1.2133, + "step": 110 + }, + { + "epoch": 0.12, + "learning_rate": 4.971987315010571e-05, + "loss": 1.1642, + "step": 112 + }, + { + "epoch": 0.12, + "learning_rate": 4.9714587737843554e-05, + "loss": 1.0921, + "step": 114 + }, + { + "epoch": 0.12, + "learning_rate": 4.97093023255814e-05, + "loss": 1.3712, + "step": 116 + }, + { + "epoch": 0.12, + "learning_rate": 4.970401691331924e-05, + "loss": 1.1114, + "step": 118 + }, + { + "epoch": 0.13, + "learning_rate": 4.9698731501057085e-05, + "loss": 1.1718, + "step": 120 + }, + { + "epoch": 0.13, + "learning_rate": 4.9693446088794925e-05, + "loss": 1.0484, + "step": 122 + }, + { + "epoch": 0.13, + "learning_rate": 4.968816067653278e-05, + "loss": 0.8344, + "step": 124 + }, + { + "epoch": 0.13, + "learning_rate": 4.968287526427062e-05, + "loss": 1.4522, + "step": 126 + }, + { + "epoch": 0.14, + "learning_rate": 4.967758985200846e-05, + "loss": 2.1993, + "step": 128 + }, + { + "epoch": 0.14, + "learning_rate": 4.96723044397463e-05, + "loss": 1.0746, + "step": 130 + }, + { + "epoch": 0.14, + "learning_rate": 4.966701902748415e-05, + "loss": 0.9988, + "step": 132 + }, + { + "epoch": 0.14, + "learning_rate": 4.966173361522199e-05, + "loss": 1.04, + "step": 134 + }, + { + "epoch": 0.14, + "learning_rate": 4.9656448202959834e-05, + "loss": 1.434, + "step": 136 + }, + { + "epoch": 0.15, + "learning_rate": 4.965116279069767e-05, + "loss": 1.0161, + "step": 138 + }, + { + "epoch": 0.15, + "learning_rate": 4.964587737843552e-05, + "loss": 1.3142, + "step": 140 + }, + { + "epoch": 0.15, + "learning_rate": 4.9640591966173365e-05, + "loss": 1.1679, + "step": 142 + }, + { + "epoch": 0.15, + "learning_rate": 4.963530655391121e-05, + "loss": 1.3562, + "step": 144 + }, + { + "epoch": 0.15, + "learning_rate": 4.963002114164905e-05, + "loss": 1.2236, + "step": 146 + }, + { + "epoch": 0.16, + "learning_rate": 4.9624735729386896e-05, + "loss": 1.2391, + "step": 148 + }, + { + "epoch": 0.16, + "learning_rate": 4.9619450317124736e-05, + "loss": 1.1668, + "step": 150 + }, + { + "epoch": 0.16, + "learning_rate": 4.961416490486258e-05, + "loss": 1.3492, + "step": 152 + }, + { + "epoch": 0.16, + "learning_rate": 4.960887949260042e-05, + "loss": 0.8301, + "step": 154 + }, + { + "epoch": 0.16, + "learning_rate": 4.960359408033827e-05, + "loss": 1.2563, + "step": 156 + }, + { + "epoch": 0.17, + "learning_rate": 4.959830866807611e-05, + "loss": 0.9544, + "step": 158 + }, + { + "epoch": 0.17, + "learning_rate": 4.959302325581396e-05, + "loss": 1.0004, + "step": 160 + }, + { + "epoch": 0.17, + "learning_rate": 4.95877378435518e-05, + "loss": 1.2484, + "step": 162 + }, + { + "epoch": 0.17, + "learning_rate": 4.9582452431289645e-05, + "loss": 1.2366, + "step": 164 + }, + { + "epoch": 0.18, + "learning_rate": 4.957716701902749e-05, + "loss": 0.9897, + "step": 166 + }, + { + "epoch": 0.18, + "learning_rate": 4.957188160676533e-05, + "loss": 1.0405, + "step": 168 + }, + { + "epoch": 0.18, + "learning_rate": 4.9566596194503176e-05, + "loss": 0.9499, + "step": 170 + }, + { + "epoch": 0.18, + "learning_rate": 4.9561310782241015e-05, + "loss": 1.1503, + "step": 172 + }, + { + "epoch": 0.18, + "learning_rate": 4.955602536997886e-05, + "loss": 0.9555, + "step": 174 + }, + { + "epoch": 0.19, + "learning_rate": 4.95507399577167e-05, + "loss": 0.9089, + "step": 176 + }, + { + "epoch": 0.19, + "learning_rate": 4.9545454545454553e-05, + "loss": 1.3379, + "step": 178 + }, + { + "epoch": 0.19, + "learning_rate": 4.954016913319239e-05, + "loss": 0.8154, + "step": 180 + }, + { + "epoch": 0.19, + "learning_rate": 4.953488372093024e-05, + "loss": 0.669, + "step": 182 + }, + { + "epoch": 0.19, + "learning_rate": 4.952959830866808e-05, + "loss": 0.9411, + "step": 184 + }, + { + "epoch": 0.2, + "learning_rate": 4.9524312896405924e-05, + "loss": 1.1204, + "step": 186 + }, + { + "epoch": 0.2, + "learning_rate": 4.9519027484143763e-05, + "loss": 1.1171, + "step": 188 + }, + { + "epoch": 0.2, + "learning_rate": 4.951374207188161e-05, + "loss": 1.167, + "step": 190 + }, + { + "epoch": 0.2, + "learning_rate": 4.950845665961945e-05, + "loss": 0.85, + "step": 192 + }, + { + "epoch": 0.21, + "learning_rate": 4.9503171247357295e-05, + "loss": 0.9862, + "step": 194 + }, + { + "epoch": 0.21, + "learning_rate": 4.949788583509514e-05, + "loss": 1.0706, + "step": 196 + }, + { + "epoch": 0.21, + "learning_rate": 4.949260042283299e-05, + "loss": 0.9666, + "step": 198 + }, + { + "epoch": 0.21, + "learning_rate": 4.9487315010570826e-05, + "loss": 0.9068, + "step": 200 + }, + { + "epoch": 0.21, + "learning_rate": 4.948202959830867e-05, + "loss": 0.7309, + "step": 202 + }, + { + "epoch": 0.22, + "learning_rate": 4.947674418604651e-05, + "loss": 1.3316, + "step": 204 + }, + { + "epoch": 0.22, + "learning_rate": 4.947145877378436e-05, + "loss": 1.0254, + "step": 206 + }, + { + "epoch": 0.22, + "learning_rate": 4.94661733615222e-05, + "loss": 0.8078, + "step": 208 + }, + { + "epoch": 0.22, + "learning_rate": 4.946088794926004e-05, + "loss": 0.9885, + "step": 210 + }, + { + "epoch": 0.22, + "learning_rate": 4.945560253699789e-05, + "loss": 1.3213, + "step": 212 + }, + { + "epoch": 0.23, + "learning_rate": 4.9450317124735735e-05, + "loss": 1.0121, + "step": 214 + }, + { + "epoch": 0.23, + "learning_rate": 4.9445031712473574e-05, + "loss": 1.2225, + "step": 216 + }, + { + "epoch": 0.23, + "learning_rate": 4.943974630021142e-05, + "loss": 1.207, + "step": 218 + }, + { + "epoch": 0.23, + "learning_rate": 4.9434460887949266e-05, + "loss": 0.6599, + "step": 220 + }, + { + "epoch": 0.23, + "learning_rate": 4.9429175475687106e-05, + "loss": 0.9893, + "step": 222 + }, + { + "epoch": 0.24, + "learning_rate": 4.942389006342495e-05, + "loss": 0.8, + "step": 224 + }, + { + "epoch": 0.24, + "learning_rate": 4.941860465116279e-05, + "loss": 1.1942, + "step": 226 + }, + { + "epoch": 0.24, + "learning_rate": 4.941331923890064e-05, + "loss": 1.3698, + "step": 228 + }, + { + "epoch": 0.24, + "learning_rate": 4.9408033826638476e-05, + "loss": 1.0476, + "step": 230 + }, + { + "epoch": 0.25, + "learning_rate": 4.940274841437633e-05, + "loss": 1.1916, + "step": 232 + }, + { + "epoch": 0.25, + "learning_rate": 4.939746300211417e-05, + "loss": 1.2326, + "step": 234 + }, + { + "epoch": 0.25, + "learning_rate": 4.9392177589852015e-05, + "loss": 0.9399, + "step": 236 + }, + { + "epoch": 0.25, + "learning_rate": 4.9386892177589854e-05, + "loss": 1.2254, + "step": 238 + }, + { + "epoch": 0.25, + "learning_rate": 4.93816067653277e-05, + "loss": 0.9529, + "step": 240 + }, + { + "epoch": 0.26, + "learning_rate": 4.937632135306554e-05, + "loss": 1.0299, + "step": 242 + }, + { + "epoch": 0.26, + "learning_rate": 4.9371035940803385e-05, + "loss": 0.7292, + "step": 244 + }, + { + "epoch": 0.26, + "learning_rate": 4.9365750528541225e-05, + "loss": 0.9186, + "step": 246 + }, + { + "epoch": 0.26, + "learning_rate": 4.936046511627907e-05, + "loss": 0.7604, + "step": 248 + }, + { + "epoch": 0.26, + "learning_rate": 4.935517970401692e-05, + "loss": 0.8169, + "step": 250 + }, + { + "epoch": 0.27, + "learning_rate": 4.934989429175476e-05, + "loss": 0.973, + "step": 252 + }, + { + "epoch": 0.27, + "learning_rate": 4.93446088794926e-05, + "loss": 0.8682, + "step": 254 + }, + { + "epoch": 0.27, + "learning_rate": 4.933932346723045e-05, + "loss": 0.9603, + "step": 256 + }, + { + "epoch": 0.27, + "learning_rate": 4.933403805496829e-05, + "loss": 1.1217, + "step": 258 + }, + { + "epoch": 0.27, + "learning_rate": 4.9328752642706133e-05, + "loss": 1.0628, + "step": 260 + }, + { + "epoch": 0.28, + "learning_rate": 4.932346723044397e-05, + "loss": 1.0257, + "step": 262 + }, + { + "epoch": 0.28, + "learning_rate": 4.931818181818182e-05, + "loss": 0.8398, + "step": 264 + }, + { + "epoch": 0.28, + "learning_rate": 4.9312896405919665e-05, + "loss": 0.9168, + "step": 266 + }, + { + "epoch": 0.28, + "learning_rate": 4.930761099365751e-05, + "loss": 0.8925, + "step": 268 + }, + { + "epoch": 0.29, + "learning_rate": 4.930232558139535e-05, + "loss": 0.7816, + "step": 270 + }, + { + "epoch": 0.29, + "learning_rate": 4.9297040169133196e-05, + "loss": 1.0841, + "step": 272 + }, + { + "epoch": 0.29, + "learning_rate": 4.929175475687104e-05, + "loss": 0.9992, + "step": 274 + }, + { + "epoch": 0.29, + "learning_rate": 4.928646934460888e-05, + "loss": 1.2984, + "step": 276 + }, + { + "epoch": 0.29, + "learning_rate": 4.928118393234673e-05, + "loss": 1.2014, + "step": 278 + }, + { + "epoch": 0.3, + "learning_rate": 4.927589852008457e-05, + "loss": 0.8883, + "step": 280 + }, + { + "epoch": 0.3, + "learning_rate": 4.927061310782241e-05, + "loss": 1.1356, + "step": 282 + }, + { + "epoch": 0.3, + "learning_rate": 4.926532769556025e-05, + "loss": 0.9082, + "step": 284 + }, + { + "epoch": 0.3, + "learning_rate": 4.9260042283298105e-05, + "loss": 0.7397, + "step": 286 + }, + { + "epoch": 0.3, + "learning_rate": 4.9254756871035944e-05, + "loss": 1.0028, + "step": 288 + }, + { + "epoch": 0.31, + "learning_rate": 4.924947145877379e-05, + "loss": 0.8124, + "step": 290 + }, + { + "epoch": 0.31, + "learning_rate": 4.924418604651163e-05, + "loss": 0.9205, + "step": 292 + }, + { + "epoch": 0.31, + "learning_rate": 4.9238900634249476e-05, + "loss": 1.2545, + "step": 294 + }, + { + "epoch": 0.31, + "learning_rate": 4.9233615221987315e-05, + "loss": 0.9391, + "step": 296 + }, + { + "epoch": 0.32, + "learning_rate": 4.922832980972516e-05, + "loss": 0.9877, + "step": 298 + }, + { + "epoch": 0.32, + "learning_rate": 4.9223044397463e-05, + "loss": 0.7637, + "step": 300 + }, + { + "epoch": 0.32, + "learning_rate": 4.9217758985200846e-05, + "loss": 1.0561, + "step": 302 + }, + { + "epoch": 0.32, + "learning_rate": 4.921247357293869e-05, + "loss": 0.6914, + "step": 304 + }, + { + "epoch": 0.32, + "learning_rate": 4.920718816067654e-05, + "loss": 0.933, + "step": 306 + }, + { + "epoch": 0.33, + "learning_rate": 4.920190274841438e-05, + "loss": 0.916, + "step": 308 + }, + { + "epoch": 0.33, + "learning_rate": 4.9196617336152224e-05, + "loss": 0.9402, + "step": 310 + }, + { + "epoch": 0.33, + "learning_rate": 4.919133192389006e-05, + "loss": 0.8271, + "step": 312 + }, + { + "epoch": 0.33, + "learning_rate": 4.918604651162791e-05, + "loss": 0.7607, + "step": 314 + }, + { + "epoch": 0.33, + "learning_rate": 4.918076109936575e-05, + "loss": 0.7434, + "step": 316 + }, + { + "epoch": 0.34, + "learning_rate": 4.9175475687103595e-05, + "loss": 0.8802, + "step": 318 + }, + { + "epoch": 0.34, + "learning_rate": 4.917019027484144e-05, + "loss": 1.3608, + "step": 320 + }, + { + "epoch": 0.34, + "learning_rate": 4.916490486257929e-05, + "loss": 1.0817, + "step": 322 + }, + { + "epoch": 0.34, + "learning_rate": 4.9159619450317126e-05, + "loss": 0.9358, + "step": 324 + }, + { + "epoch": 0.34, + "learning_rate": 4.915433403805497e-05, + "loss": 0.6262, + "step": 326 + }, + { + "epoch": 0.35, + "learning_rate": 4.914904862579282e-05, + "loss": 1.1558, + "step": 328 + }, + { + "epoch": 0.35, + "learning_rate": 4.914376321353066e-05, + "loss": 0.9856, + "step": 330 + }, + { + "epoch": 0.35, + "learning_rate": 4.9138477801268503e-05, + "loss": 0.9328, + "step": 332 + }, + { + "epoch": 0.35, + "learning_rate": 4.913319238900634e-05, + "loss": 0.9733, + "step": 334 + }, + { + "epoch": 0.36, + "learning_rate": 4.912790697674419e-05, + "loss": 0.9509, + "step": 336 + }, + { + "epoch": 0.36, + "learning_rate": 4.912262156448203e-05, + "loss": 1.201, + "step": 338 + }, + { + "epoch": 0.36, + "learning_rate": 4.911733615221988e-05, + "loss": 1.1361, + "step": 340 + }, + { + "epoch": 0.36, + "learning_rate": 4.911205073995772e-05, + "loss": 0.8874, + "step": 342 + }, + { + "epoch": 0.36, + "learning_rate": 4.9106765327695566e-05, + "loss": 0.7402, + "step": 344 + }, + { + "epoch": 0.37, + "learning_rate": 4.9101479915433406e-05, + "loss": 0.957, + "step": 346 + }, + { + "epoch": 0.37, + "learning_rate": 4.909619450317125e-05, + "loss": 0.7774, + "step": 348 + }, + { + "epoch": 0.37, + "learning_rate": 4.909090909090909e-05, + "loss": 0.8865, + "step": 350 + }, + { + "epoch": 0.37, + "learning_rate": 4.908562367864694e-05, + "loss": 0.8735, + "step": 352 + }, + { + "epoch": 0.37, + "learning_rate": 4.9080338266384776e-05, + "loss": 0.7073, + "step": 354 + }, + { + "epoch": 0.38, + "learning_rate": 4.907505285412262e-05, + "loss": 0.6819, + "step": 356 + }, + { + "epoch": 0.38, + "learning_rate": 4.906976744186046e-05, + "loss": 1.3813, + "step": 358 + }, + { + "epoch": 0.38, + "learning_rate": 4.9064482029598314e-05, + "loss": 0.8954, + "step": 360 + }, + { + "epoch": 0.38, + "learning_rate": 4.9059196617336154e-05, + "loss": 0.758, + "step": 362 + }, + { + "epoch": 0.38, + "learning_rate": 4.9053911205074e-05, + "loss": 1.0526, + "step": 364 + }, + { + "epoch": 0.39, + "learning_rate": 4.904862579281184e-05, + "loss": 0.7349, + "step": 366 + }, + { + "epoch": 0.39, + "learning_rate": 4.9043340380549685e-05, + "loss": 0.8793, + "step": 368 + }, + { + "epoch": 0.39, + "learning_rate": 4.903805496828753e-05, + "loss": 0.5134, + "step": 370 + }, + { + "epoch": 0.39, + "learning_rate": 4.903276955602537e-05, + "loss": 0.6381, + "step": 372 + }, + { + "epoch": 0.4, + "learning_rate": 4.9027484143763217e-05, + "loss": 0.949, + "step": 374 + }, + { + "epoch": 0.4, + "learning_rate": 4.9022198731501056e-05, + "loss": 0.8732, + "step": 376 + }, + { + "epoch": 0.4, + "learning_rate": 4.90169133192389e-05, + "loss": 0.6792, + "step": 378 + }, + { + "epoch": 0.4, + "learning_rate": 4.901162790697675e-05, + "loss": 0.8423, + "step": 380 + }, + { + "epoch": 0.4, + "learning_rate": 4.9006342494714594e-05, + "loss": 0.9052, + "step": 382 + }, + { + "epoch": 0.41, + "learning_rate": 4.900105708245243e-05, + "loss": 0.8757, + "step": 384 + }, + { + "epoch": 0.41, + "learning_rate": 4.899577167019028e-05, + "loss": 1.0981, + "step": 386 + }, + { + "epoch": 0.41, + "learning_rate": 4.899048625792812e-05, + "loss": 1.1139, + "step": 388 + }, + { + "epoch": 0.41, + "learning_rate": 4.8985200845665965e-05, + "loss": 0.8369, + "step": 390 + }, + { + "epoch": 0.41, + "learning_rate": 4.8979915433403804e-05, + "loss": 0.8961, + "step": 392 + }, + { + "epoch": 0.42, + "learning_rate": 4.897463002114165e-05, + "loss": 1.1247, + "step": 394 + }, + { + "epoch": 0.42, + "learning_rate": 4.8969344608879496e-05, + "loss": 0.7586, + "step": 396 + }, + { + "epoch": 0.42, + "learning_rate": 4.896405919661734e-05, + "loss": 0.8276, + "step": 398 + }, + { + "epoch": 0.42, + "learning_rate": 4.895877378435518e-05, + "loss": 0.7506, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 4.895348837209303e-05, + "loss": 0.6963, + "step": 402 + }, + { + "epoch": 0.43, + "learning_rate": 4.894820295983087e-05, + "loss": 0.7005, + "step": 404 + }, + { + "epoch": 0.43, + "learning_rate": 4.894291754756871e-05, + "loss": 0.9046, + "step": 406 + }, + { + "epoch": 0.43, + "learning_rate": 4.893763213530655e-05, + "loss": 0.8376, + "step": 408 + }, + { + "epoch": 0.43, + "learning_rate": 4.89323467230444e-05, + "loss": 0.7533, + "step": 410 + }, + { + "epoch": 0.44, + "learning_rate": 4.892706131078224e-05, + "loss": 0.7952, + "step": 412 + }, + { + "epoch": 0.44, + "learning_rate": 4.892177589852009e-05, + "loss": 0.8707, + "step": 414 + }, + { + "epoch": 0.44, + "learning_rate": 4.891649048625793e-05, + "loss": 0.9241, + "step": 416 + }, + { + "epoch": 0.44, + "learning_rate": 4.8911205073995776e-05, + "loss": 0.8749, + "step": 418 + }, + { + "epoch": 0.44, + "learning_rate": 4.8905919661733615e-05, + "loss": 0.8629, + "step": 420 + }, + { + "epoch": 0.45, + "learning_rate": 4.890063424947146e-05, + "loss": 1.1447, + "step": 422 + }, + { + "epoch": 0.45, + "learning_rate": 4.889534883720931e-05, + "loss": 0.7138, + "step": 424 + }, + { + "epoch": 0.45, + "learning_rate": 4.8890063424947146e-05, + "loss": 0.6883, + "step": 426 + }, + { + "epoch": 0.45, + "learning_rate": 4.888477801268499e-05, + "loss": 0.7576, + "step": 428 + }, + { + "epoch": 0.45, + "learning_rate": 4.887949260042283e-05, + "loss": 0.6091, + "step": 430 + }, + { + "epoch": 0.46, + "learning_rate": 4.8874207188160684e-05, + "loss": 0.7623, + "step": 432 + }, + { + "epoch": 0.46, + "learning_rate": 4.8868921775898524e-05, + "loss": 0.7938, + "step": 434 + }, + { + "epoch": 0.46, + "learning_rate": 4.886363636363637e-05, + "loss": 0.9892, + "step": 436 + }, + { + "epoch": 0.46, + "learning_rate": 4.885835095137421e-05, + "loss": 0.9199, + "step": 438 + }, + { + "epoch": 0.47, + "learning_rate": 4.8853065539112055e-05, + "loss": 1.0223, + "step": 440 + }, + { + "epoch": 0.47, + "learning_rate": 4.8847780126849894e-05, + "loss": 0.8448, + "step": 442 + }, + { + "epoch": 0.47, + "learning_rate": 4.884249471458774e-05, + "loss": 0.7734, + "step": 444 + }, + { + "epoch": 0.47, + "learning_rate": 4.883720930232558e-05, + "loss": 1.2691, + "step": 446 + }, + { + "epoch": 0.47, + "learning_rate": 4.8831923890063426e-05, + "loss": 0.9567, + "step": 448 + }, + { + "epoch": 0.48, + "learning_rate": 4.882663847780127e-05, + "loss": 0.9065, + "step": 450 + }, + { + "epoch": 0.48, + "learning_rate": 4.882135306553912e-05, + "loss": 0.706, + "step": 452 + }, + { + "epoch": 0.48, + "learning_rate": 4.881606765327696e-05, + "loss": 0.8382, + "step": 454 + }, + { + "epoch": 0.48, + "learning_rate": 4.88107822410148e-05, + "loss": 0.8763, + "step": 456 + }, + { + "epoch": 0.48, + "learning_rate": 4.880549682875264e-05, + "loss": 0.6008, + "step": 458 + }, + { + "epoch": 0.49, + "learning_rate": 4.880021141649049e-05, + "loss": 0.9091, + "step": 460 + }, + { + "epoch": 0.49, + "learning_rate": 4.879492600422833e-05, + "loss": 1.0063, + "step": 462 + }, + { + "epoch": 0.49, + "learning_rate": 4.8789640591966174e-05, + "loss": 0.6134, + "step": 464 + }, + { + "epoch": 0.49, + "learning_rate": 4.878435517970401e-05, + "loss": 0.9105, + "step": 466 + }, + { + "epoch": 0.49, + "learning_rate": 4.8779069767441866e-05, + "loss": 0.8555, + "step": 468 + }, + { + "epoch": 0.5, + "learning_rate": 4.8773784355179705e-05, + "loss": 1.0469, + "step": 470 + }, + { + "epoch": 0.5, + "learning_rate": 4.876849894291755e-05, + "loss": 1.0043, + "step": 472 + }, + { + "epoch": 0.5, + "learning_rate": 4.876321353065539e-05, + "loss": 0.8886, + "step": 474 + }, + { + "epoch": 0.5, + "learning_rate": 4.875792811839324e-05, + "loss": 0.8378, + "step": 476 + }, + { + "epoch": 0.51, + "learning_rate": 4.875264270613108e-05, + "loss": 0.8491, + "step": 478 + }, + { + "epoch": 0.51, + "learning_rate": 4.874735729386892e-05, + "loss": 0.8847, + "step": 480 + }, + { + "epoch": 0.51, + "learning_rate": 4.874207188160677e-05, + "loss": 0.8345, + "step": 482 + }, + { + "epoch": 0.51, + "learning_rate": 4.873678646934461e-05, + "loss": 0.882, + "step": 484 + }, + { + "epoch": 0.51, + "learning_rate": 4.873150105708246e-05, + "loss": 1.0436, + "step": 486 + }, + { + "epoch": 0.52, + "learning_rate": 4.87262156448203e-05, + "loss": 0.9272, + "step": 488 + }, + { + "epoch": 0.52, + "learning_rate": 4.8720930232558146e-05, + "loss": 0.8267, + "step": 490 + }, + { + "epoch": 0.52, + "learning_rate": 4.8715644820295985e-05, + "loss": 0.8139, + "step": 492 + }, + { + "epoch": 0.52, + "learning_rate": 4.871035940803383e-05, + "loss": 0.9702, + "step": 494 + }, + { + "epoch": 0.52, + "learning_rate": 4.870507399577167e-05, + "loss": 0.9904, + "step": 496 + }, + { + "epoch": 0.53, + "learning_rate": 4.8699788583509516e-05, + "loss": 0.8216, + "step": 498 + }, + { + "epoch": 0.53, + "learning_rate": 4.8694503171247356e-05, + "loss": 0.8989, + "step": 500 + }, + { + "epoch": 0.53, + "eval_cer": 0.08070675406098604, + "eval_loss": 0.7687897086143494, + "eval_runtime": 128.8472, + "eval_samples_per_second": 6.527, + "eval_steps_per_second": 0.823, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 4.86892177589852e-05, + "loss": 1.0833, + "step": 502 + }, + { + "epoch": 0.53, + "learning_rate": 4.868393234672305e-05, + "loss": 0.8538, + "step": 504 + }, + { + "epoch": 0.53, + "learning_rate": 4.8678646934460894e-05, + "loss": 0.6459, + "step": 506 + }, + { + "epoch": 0.54, + "learning_rate": 4.867336152219873e-05, + "loss": 0.6752, + "step": 508 + }, + { + "epoch": 0.54, + "learning_rate": 4.866807610993658e-05, + "loss": 0.7075, + "step": 510 + }, + { + "epoch": 0.54, + "learning_rate": 4.866279069767442e-05, + "loss": 0.7817, + "step": 512 + }, + { + "epoch": 0.54, + "learning_rate": 4.8657505285412264e-05, + "loss": 0.8261, + "step": 514 + }, + { + "epoch": 0.55, + "learning_rate": 4.8652219873150104e-05, + "loss": 0.8365, + "step": 516 + }, + { + "epoch": 0.55, + "learning_rate": 4.864693446088795e-05, + "loss": 0.7488, + "step": 518 + }, + { + "epoch": 0.55, + "learning_rate": 4.8641649048625796e-05, + "loss": 1.0128, + "step": 520 + }, + { + "epoch": 0.55, + "learning_rate": 4.863636363636364e-05, + "loss": 1.1424, + "step": 522 + }, + { + "epoch": 0.55, + "learning_rate": 4.863107822410148e-05, + "loss": 0.8287, + "step": 524 + }, + { + "epoch": 0.56, + "learning_rate": 4.862579281183933e-05, + "loss": 0.718, + "step": 526 + }, + { + "epoch": 0.56, + "learning_rate": 4.8620507399577167e-05, + "loss": 0.5674, + "step": 528 + }, + { + "epoch": 0.56, + "learning_rate": 4.861522198731501e-05, + "loss": 0.8469, + "step": 530 + }, + { + "epoch": 0.56, + "learning_rate": 4.860993657505286e-05, + "loss": 1.1801, + "step": 532 + }, + { + "epoch": 0.56, + "learning_rate": 4.86046511627907e-05, + "loss": 0.7818, + "step": 534 + }, + { + "epoch": 0.57, + "learning_rate": 4.8599365750528544e-05, + "loss": 0.5959, + "step": 536 + }, + { + "epoch": 0.57, + "learning_rate": 4.859408033826638e-05, + "loss": 0.8464, + "step": 538 + }, + { + "epoch": 0.57, + "learning_rate": 4.8588794926004236e-05, + "loss": 0.711, + "step": 540 + }, + { + "epoch": 0.57, + "learning_rate": 4.8583509513742075e-05, + "loss": 0.5881, + "step": 542 + }, + { + "epoch": 0.58, + "learning_rate": 4.857822410147992e-05, + "loss": 0.7966, + "step": 544 + }, + { + "epoch": 0.58, + "learning_rate": 4.857293868921776e-05, + "loss": 0.542, + "step": 546 + }, + { + "epoch": 0.58, + "learning_rate": 4.856765327695561e-05, + "loss": 0.8169, + "step": 548 + }, + { + "epoch": 0.58, + "learning_rate": 4.8562367864693446e-05, + "loss": 0.9783, + "step": 550 + }, + { + "epoch": 0.58, + "learning_rate": 4.855708245243129e-05, + "loss": 0.7638, + "step": 552 + }, + { + "epoch": 0.59, + "learning_rate": 4.855179704016913e-05, + "loss": 0.8195, + "step": 554 + }, + { + "epoch": 0.59, + "learning_rate": 4.854651162790698e-05, + "loss": 1.0137, + "step": 556 + }, + { + "epoch": 0.59, + "learning_rate": 4.8541226215644824e-05, + "loss": 1.0664, + "step": 558 + }, + { + "epoch": 0.59, + "learning_rate": 4.853594080338267e-05, + "loss": 0.7189, + "step": 560 + }, + { + "epoch": 0.59, + "learning_rate": 4.853065539112051e-05, + "loss": 0.7912, + "step": 562 + }, + { + "epoch": 0.6, + "learning_rate": 4.8525369978858355e-05, + "loss": 0.7657, + "step": 564 + }, + { + "epoch": 0.6, + "learning_rate": 4.8520084566596194e-05, + "loss": 0.7469, + "step": 566 + }, + { + "epoch": 0.6, + "learning_rate": 4.851479915433404e-05, + "loss": 0.6355, + "step": 568 + }, + { + "epoch": 0.6, + "learning_rate": 4.850951374207188e-05, + "loss": 0.7281, + "step": 570 + }, + { + "epoch": 0.6, + "learning_rate": 4.8504228329809726e-05, + "loss": 0.8989, + "step": 572 + }, + { + "epoch": 0.61, + "learning_rate": 4.849894291754757e-05, + "loss": 0.837, + "step": 574 + }, + { + "epoch": 0.61, + "learning_rate": 4.849365750528542e-05, + "loss": 1.067, + "step": 576 + }, + { + "epoch": 0.61, + "learning_rate": 4.848837209302326e-05, + "loss": 0.7942, + "step": 578 + }, + { + "epoch": 0.61, + "learning_rate": 4.84830866807611e-05, + "loss": 0.8782, + "step": 580 + }, + { + "epoch": 0.62, + "learning_rate": 4.847780126849894e-05, + "loss": 0.6836, + "step": 582 + }, + { + "epoch": 0.62, + "learning_rate": 4.847251585623679e-05, + "loss": 0.8591, + "step": 584 + }, + { + "epoch": 0.62, + "learning_rate": 4.8467230443974635e-05, + "loss": 1.1122, + "step": 586 + }, + { + "epoch": 0.62, + "learning_rate": 4.8461945031712474e-05, + "loss": 1.2, + "step": 588 + }, + { + "epoch": 0.62, + "learning_rate": 4.845665961945032e-05, + "loss": 0.6635, + "step": 590 + }, + { + "epoch": 0.63, + "learning_rate": 4.845137420718816e-05, + "loss": 0.7363, + "step": 592 + }, + { + "epoch": 0.63, + "learning_rate": 4.844608879492601e-05, + "loss": 0.6953, + "step": 594 + }, + { + "epoch": 0.63, + "learning_rate": 4.844080338266385e-05, + "loss": 0.8619, + "step": 596 + }, + { + "epoch": 0.63, + "learning_rate": 4.84355179704017e-05, + "loss": 0.8771, + "step": 598 + }, + { + "epoch": 0.63, + "learning_rate": 4.843023255813954e-05, + "loss": 0.83, + "step": 600 + }, + { + "epoch": 0.64, + "learning_rate": 4.842494714587738e-05, + "loss": 0.6184, + "step": 602 + }, + { + "epoch": 0.64, + "learning_rate": 4.841966173361522e-05, + "loss": 0.6461, + "step": 604 + }, + { + "epoch": 0.64, + "learning_rate": 4.841437632135307e-05, + "loss": 0.9024, + "step": 606 + }, + { + "epoch": 0.64, + "learning_rate": 4.840909090909091e-05, + "loss": 0.8576, + "step": 608 + }, + { + "epoch": 0.64, + "learning_rate": 4.840380549682875e-05, + "loss": 0.4654, + "step": 610 + }, + { + "epoch": 0.65, + "learning_rate": 4.83985200845666e-05, + "loss": 0.6721, + "step": 612 + }, + { + "epoch": 0.65, + "learning_rate": 4.8393234672304445e-05, + "loss": 0.7993, + "step": 614 + }, + { + "epoch": 0.65, + "learning_rate": 4.8387949260042285e-05, + "loss": 0.6995, + "step": 616 + }, + { + "epoch": 0.65, + "learning_rate": 4.838266384778013e-05, + "loss": 0.767, + "step": 618 + }, + { + "epoch": 0.66, + "learning_rate": 4.837737843551797e-05, + "loss": 0.6822, + "step": 620 + }, + { + "epoch": 0.66, + "learning_rate": 4.8372093023255816e-05, + "loss": 0.7364, + "step": 622 + }, + { + "epoch": 0.66, + "learning_rate": 4.8366807610993655e-05, + "loss": 0.8019, + "step": 624 + }, + { + "epoch": 0.66, + "learning_rate": 4.83615221987315e-05, + "loss": 0.9001, + "step": 626 + }, + { + "epoch": 0.66, + "learning_rate": 4.835623678646935e-05, + "loss": 1.0015, + "step": 628 + }, + { + "epoch": 0.67, + "learning_rate": 4.8350951374207194e-05, + "loss": 1.0959, + "step": 630 + }, + { + "epoch": 0.67, + "learning_rate": 4.834566596194503e-05, + "loss": 0.7717, + "step": 632 + }, + { + "epoch": 0.67, + "learning_rate": 4.834038054968288e-05, + "loss": 0.5083, + "step": 634 + }, + { + "epoch": 0.67, + "learning_rate": 4.8335095137420725e-05, + "loss": 0.8368, + "step": 636 + }, + { + "epoch": 0.67, + "learning_rate": 4.8329809725158564e-05, + "loss": 0.5508, + "step": 638 + }, + { + "epoch": 0.68, + "learning_rate": 4.832452431289641e-05, + "loss": 0.7068, + "step": 640 + }, + { + "epoch": 0.68, + "learning_rate": 4.831923890063425e-05, + "loss": 0.6711, + "step": 642 + }, + { + "epoch": 0.68, + "learning_rate": 4.8313953488372096e-05, + "loss": 0.9935, + "step": 644 + }, + { + "epoch": 0.68, + "learning_rate": 4.8308668076109935e-05, + "loss": 0.609, + "step": 646 + }, + { + "epoch": 0.68, + "learning_rate": 4.830338266384779e-05, + "loss": 0.7019, + "step": 648 + }, + { + "epoch": 0.69, + "learning_rate": 4.829809725158563e-05, + "loss": 1.0631, + "step": 650 + }, + { + "epoch": 0.69, + "learning_rate": 4.829281183932347e-05, + "loss": 1.0473, + "step": 652 + }, + { + "epoch": 0.69, + "learning_rate": 4.828752642706131e-05, + "loss": 1.1643, + "step": 654 + }, + { + "epoch": 0.69, + "learning_rate": 4.828224101479916e-05, + "loss": 0.7424, + "step": 656 + }, + { + "epoch": 0.7, + "learning_rate": 4.8276955602537e-05, + "loss": 0.8219, + "step": 658 + }, + { + "epoch": 0.7, + "learning_rate": 4.8271670190274844e-05, + "loss": 0.8203, + "step": 660 + }, + { + "epoch": 0.7, + "learning_rate": 4.826638477801268e-05, + "loss": 0.8933, + "step": 662 + }, + { + "epoch": 0.7, + "learning_rate": 4.826109936575053e-05, + "loss": 0.5901, + "step": 664 + }, + { + "epoch": 0.7, + "learning_rate": 4.8255813953488375e-05, + "loss": 0.5379, + "step": 666 + }, + { + "epoch": 0.71, + "learning_rate": 4.825052854122622e-05, + "loss": 0.7992, + "step": 668 + }, + { + "epoch": 0.71, + "learning_rate": 4.824524312896406e-05, + "loss": 0.8626, + "step": 670 + }, + { + "epoch": 0.71, + "learning_rate": 4.823995771670191e-05, + "loss": 0.7633, + "step": 672 + }, + { + "epoch": 0.71, + "learning_rate": 4.8234672304439746e-05, + "loss": 1.0405, + "step": 674 + }, + { + "epoch": 0.71, + "learning_rate": 4.822938689217759e-05, + "loss": 0.6841, + "step": 676 + }, + { + "epoch": 0.72, + "learning_rate": 4.822410147991543e-05, + "loss": 0.7965, + "step": 678 + }, + { + "epoch": 0.72, + "learning_rate": 4.821881606765328e-05, + "loss": 0.7564, + "step": 680 + }, + { + "epoch": 0.72, + "learning_rate": 4.8213530655391123e-05, + "loss": 0.6262, + "step": 682 + }, + { + "epoch": 0.72, + "learning_rate": 4.820824524312897e-05, + "loss": 0.6553, + "step": 684 + }, + { + "epoch": 0.73, + "learning_rate": 4.820295983086681e-05, + "loss": 0.6584, + "step": 686 + }, + { + "epoch": 0.73, + "learning_rate": 4.8197674418604655e-05, + "loss": 0.9452, + "step": 688 + }, + { + "epoch": 0.73, + "learning_rate": 4.81923890063425e-05, + "loss": 0.574, + "step": 690 + }, + { + "epoch": 0.73, + "learning_rate": 4.818710359408034e-05, + "loss": 0.7583, + "step": 692 + }, + { + "epoch": 0.73, + "learning_rate": 4.8181818181818186e-05, + "loss": 0.7767, + "step": 694 + }, + { + "epoch": 0.74, + "learning_rate": 4.8176532769556026e-05, + "loss": 0.8234, + "step": 696 + }, + { + "epoch": 0.74, + "learning_rate": 4.817124735729387e-05, + "loss": 1.0249, + "step": 698 + }, + { + "epoch": 0.74, + "learning_rate": 4.816596194503171e-05, + "loss": 1.0602, + "step": 700 + }, + { + "epoch": 0.74, + "learning_rate": 4.8160676532769564e-05, + "loss": 0.981, + "step": 702 + }, + { + "epoch": 0.74, + "learning_rate": 4.81553911205074e-05, + "loss": 0.8407, + "step": 704 + }, + { + "epoch": 0.75, + "learning_rate": 4.815010570824525e-05, + "loss": 0.7522, + "step": 706 + }, + { + "epoch": 0.75, + "learning_rate": 4.814482029598309e-05, + "loss": 0.7369, + "step": 708 + }, + { + "epoch": 0.75, + "learning_rate": 4.8139534883720934e-05, + "loss": 0.6982, + "step": 710 + }, + { + "epoch": 0.75, + "learning_rate": 4.8134249471458774e-05, + "loss": 0.5641, + "step": 712 + }, + { + "epoch": 0.75, + "learning_rate": 4.812896405919662e-05, + "loss": 0.9017, + "step": 714 + }, + { + "epoch": 0.76, + "learning_rate": 4.812367864693446e-05, + "loss": 0.9177, + "step": 716 + }, + { + "epoch": 0.76, + "learning_rate": 4.8118393234672305e-05, + "loss": 0.9034, + "step": 718 + }, + { + "epoch": 0.76, + "learning_rate": 4.811310782241015e-05, + "loss": 0.7752, + "step": 720 + }, + { + "epoch": 0.76, + "learning_rate": 4.8107822410148e-05, + "loss": 0.7535, + "step": 722 + }, + { + "epoch": 0.77, + "learning_rate": 4.8102536997885836e-05, + "loss": 0.7685, + "step": 724 + }, + { + "epoch": 0.77, + "learning_rate": 4.809725158562368e-05, + "loss": 0.9903, + "step": 726 + }, + { + "epoch": 0.77, + "learning_rate": 4.809196617336152e-05, + "loss": 0.9556, + "step": 728 + }, + { + "epoch": 0.77, + "learning_rate": 4.808668076109937e-05, + "loss": 0.7408, + "step": 730 + }, + { + "epoch": 0.77, + "learning_rate": 4.808139534883721e-05, + "loss": 0.9776, + "step": 732 + }, + { + "epoch": 0.78, + "learning_rate": 4.807610993657505e-05, + "loss": 0.6268, + "step": 734 + }, + { + "epoch": 0.78, + "learning_rate": 4.80708245243129e-05, + "loss": 0.7483, + "step": 736 + }, + { + "epoch": 0.78, + "learning_rate": 4.8065539112050745e-05, + "loss": 0.5685, + "step": 738 + }, + { + "epoch": 0.78, + "learning_rate": 4.8060253699788585e-05, + "loss": 0.6607, + "step": 740 + }, + { + "epoch": 0.78, + "learning_rate": 4.805496828752643e-05, + "loss": 0.6576, + "step": 742 + }, + { + "epoch": 0.79, + "learning_rate": 4.804968287526428e-05, + "loss": 0.7757, + "step": 744 + }, + { + "epoch": 0.79, + "learning_rate": 4.8044397463002116e-05, + "loss": 0.4771, + "step": 746 + }, + { + "epoch": 0.79, + "learning_rate": 4.803911205073996e-05, + "loss": 0.7673, + "step": 748 + }, + { + "epoch": 0.79, + "learning_rate": 4.80338266384778e-05, + "loss": 0.5479, + "step": 750 + }, + { + "epoch": 0.79, + "learning_rate": 4.802854122621565e-05, + "loss": 0.8368, + "step": 752 + }, + { + "epoch": 0.8, + "learning_rate": 4.802325581395349e-05, + "loss": 0.7432, + "step": 754 + }, + { + "epoch": 0.8, + "learning_rate": 4.801797040169134e-05, + "loss": 0.9759, + "step": 756 + }, + { + "epoch": 0.8, + "learning_rate": 4.801268498942918e-05, + "loss": 0.5834, + "step": 758 + }, + { + "epoch": 0.8, + "learning_rate": 4.8007399577167025e-05, + "loss": 0.6704, + "step": 760 + }, + { + "epoch": 0.81, + "learning_rate": 4.8002114164904864e-05, + "loss": 0.6211, + "step": 762 + }, + { + "epoch": 0.81, + "learning_rate": 4.799682875264271e-05, + "loss": 0.7186, + "step": 764 + }, + { + "epoch": 0.81, + "learning_rate": 4.799154334038055e-05, + "loss": 0.6583, + "step": 766 + }, + { + "epoch": 0.81, + "learning_rate": 4.7986257928118396e-05, + "loss": 0.5375, + "step": 768 + }, + { + "epoch": 0.81, + "learning_rate": 4.7980972515856235e-05, + "loss": 0.7341, + "step": 770 + }, + { + "epoch": 0.82, + "learning_rate": 4.797568710359408e-05, + "loss": 0.5711, + "step": 772 + }, + { + "epoch": 0.82, + "learning_rate": 4.797040169133193e-05, + "loss": 0.5584, + "step": 774 + }, + { + "epoch": 0.82, + "learning_rate": 4.796511627906977e-05, + "loss": 0.5109, + "step": 776 + }, + { + "epoch": 0.82, + "learning_rate": 4.795983086680761e-05, + "loss": 0.5721, + "step": 778 + }, + { + "epoch": 0.82, + "learning_rate": 4.795454545454546e-05, + "loss": 0.6839, + "step": 780 + }, + { + "epoch": 0.83, + "learning_rate": 4.79492600422833e-05, + "loss": 0.5016, + "step": 782 + }, + { + "epoch": 0.83, + "learning_rate": 4.7943974630021144e-05, + "loss": 1.2874, + "step": 784 + }, + { + "epoch": 0.83, + "learning_rate": 4.793868921775898e-05, + "loss": 0.6109, + "step": 786 + }, + { + "epoch": 0.83, + "learning_rate": 4.793340380549683e-05, + "loss": 0.7601, + "step": 788 + }, + { + "epoch": 0.84, + "learning_rate": 4.7928118393234675e-05, + "loss": 0.9023, + "step": 790 + }, + { + "epoch": 0.84, + "learning_rate": 4.792283298097252e-05, + "loss": 0.6537, + "step": 792 + }, + { + "epoch": 0.84, + "learning_rate": 4.791754756871036e-05, + "loss": 0.6189, + "step": 794 + }, + { + "epoch": 0.84, + "learning_rate": 4.7912262156448206e-05, + "loss": 0.9409, + "step": 796 + }, + { + "epoch": 0.84, + "learning_rate": 4.790697674418605e-05, + "loss": 0.7473, + "step": 798 + }, + { + "epoch": 0.85, + "learning_rate": 4.790169133192389e-05, + "loss": 0.6643, + "step": 800 + }, + { + "epoch": 0.85, + "learning_rate": 4.789640591966174e-05, + "loss": 0.6447, + "step": 802 + }, + { + "epoch": 0.85, + "learning_rate": 4.789112050739958e-05, + "loss": 0.5678, + "step": 804 + }, + { + "epoch": 0.85, + "learning_rate": 4.788583509513742e-05, + "loss": 0.5642, + "step": 806 + }, + { + "epoch": 0.85, + "learning_rate": 4.788054968287526e-05, + "loss": 0.7571, + "step": 808 + }, + { + "epoch": 0.86, + "learning_rate": 4.7875264270613115e-05, + "loss": 0.7114, + "step": 810 + }, + { + "epoch": 0.86, + "learning_rate": 4.7869978858350955e-05, + "loss": 0.7462, + "step": 812 + }, + { + "epoch": 0.86, + "learning_rate": 4.78646934460888e-05, + "loss": 0.8739, + "step": 814 + }, + { + "epoch": 0.86, + "learning_rate": 4.785940803382664e-05, + "loss": 0.9708, + "step": 816 + }, + { + "epoch": 0.86, + "learning_rate": 4.7854122621564486e-05, + "loss": 0.757, + "step": 818 + }, + { + "epoch": 0.87, + "learning_rate": 4.7848837209302325e-05, + "loss": 0.6415, + "step": 820 + }, + { + "epoch": 0.87, + "learning_rate": 4.784355179704017e-05, + "loss": 0.5284, + "step": 822 + }, + { + "epoch": 0.87, + "learning_rate": 4.783826638477801e-05, + "loss": 0.9145, + "step": 824 + }, + { + "epoch": 0.87, + "learning_rate": 4.783298097251586e-05, + "loss": 0.6378, + "step": 826 + }, + { + "epoch": 0.88, + "learning_rate": 4.78276955602537e-05, + "loss": 0.7644, + "step": 828 + }, + { + "epoch": 0.88, + "learning_rate": 4.782241014799155e-05, + "loss": 0.7083, + "step": 830 + }, + { + "epoch": 0.88, + "learning_rate": 4.781712473572939e-05, + "loss": 0.5737, + "step": 832 + }, + { + "epoch": 0.88, + "learning_rate": 4.7811839323467234e-05, + "loss": 0.9134, + "step": 834 + }, + { + "epoch": 0.88, + "learning_rate": 4.7806553911205073e-05, + "loss": 0.7197, + "step": 836 + }, + { + "epoch": 0.89, + "learning_rate": 4.780126849894292e-05, + "loss": 0.7081, + "step": 838 + }, + { + "epoch": 0.89, + "learning_rate": 4.7795983086680766e-05, + "loss": 0.6583, + "step": 840 + }, + { + "epoch": 0.89, + "learning_rate": 4.7790697674418605e-05, + "loss": 0.5343, + "step": 842 + }, + { + "epoch": 0.89, + "learning_rate": 4.778541226215645e-05, + "loss": 0.4942, + "step": 844 + }, + { + "epoch": 0.89, + "learning_rate": 4.77801268498943e-05, + "loss": 0.6151, + "step": 846 + }, + { + "epoch": 0.9, + "learning_rate": 4.7774841437632136e-05, + "loss": 0.6031, + "step": 848 + }, + { + "epoch": 0.9, + "learning_rate": 4.776955602536998e-05, + "loss": 0.8094, + "step": 850 + }, + { + "epoch": 0.9, + "learning_rate": 4.776427061310783e-05, + "loss": 0.6032, + "step": 852 + }, + { + "epoch": 0.9, + "learning_rate": 4.775898520084567e-05, + "loss": 0.9748, + "step": 854 + }, + { + "epoch": 0.9, + "learning_rate": 4.7753699788583514e-05, + "loss": 0.8298, + "step": 856 + }, + { + "epoch": 0.91, + "learning_rate": 4.774841437632135e-05, + "loss": 0.8524, + "step": 858 + }, + { + "epoch": 0.91, + "learning_rate": 4.77431289640592e-05, + "loss": 0.671, + "step": 860 + }, + { + "epoch": 0.91, + "learning_rate": 4.773784355179704e-05, + "loss": 0.6802, + "step": 862 + }, + { + "epoch": 0.91, + "learning_rate": 4.773255813953489e-05, + "loss": 0.8394, + "step": 864 + }, + { + "epoch": 0.92, + "learning_rate": 4.772727272727273e-05, + "loss": 0.6247, + "step": 866 + }, + { + "epoch": 0.92, + "learning_rate": 4.7721987315010577e-05, + "loss": 0.6984, + "step": 868 + }, + { + "epoch": 0.92, + "learning_rate": 4.7716701902748416e-05, + "loss": 0.732, + "step": 870 + }, + { + "epoch": 0.92, + "learning_rate": 4.771141649048626e-05, + "loss": 0.6115, + "step": 872 + }, + { + "epoch": 0.92, + "learning_rate": 4.77061310782241e-05, + "loss": 0.7653, + "step": 874 + }, + { + "epoch": 0.93, + "learning_rate": 4.770084566596195e-05, + "loss": 0.6636, + "step": 876 + }, + { + "epoch": 0.93, + "learning_rate": 4.7695560253699787e-05, + "loss": 0.9399, + "step": 878 + }, + { + "epoch": 0.93, + "learning_rate": 4.769027484143763e-05, + "loss": 0.4535, + "step": 880 + }, + { + "epoch": 0.93, + "learning_rate": 4.768498942917548e-05, + "loss": 0.5472, + "step": 882 + }, + { + "epoch": 0.93, + "learning_rate": 4.7679704016913325e-05, + "loss": 0.8799, + "step": 884 + }, + { + "epoch": 0.94, + "learning_rate": 4.7674418604651164e-05, + "loss": 0.694, + "step": 886 + }, + { + "epoch": 0.94, + "learning_rate": 4.766913319238901e-05, + "loss": 0.9077, + "step": 888 + }, + { + "epoch": 0.94, + "learning_rate": 4.766384778012685e-05, + "loss": 0.6647, + "step": 890 + }, + { + "epoch": 0.94, + "learning_rate": 4.7658562367864695e-05, + "loss": 0.7828, + "step": 892 + }, + { + "epoch": 0.95, + "learning_rate": 4.765327695560254e-05, + "loss": 0.7554, + "step": 894 + }, + { + "epoch": 0.95, + "learning_rate": 4.764799154334038e-05, + "loss": 0.683, + "step": 896 + }, + { + "epoch": 0.95, + "learning_rate": 4.764270613107823e-05, + "loss": 0.6435, + "step": 898 + }, + { + "epoch": 0.95, + "learning_rate": 4.763742071881607e-05, + "loss": 0.8451, + "step": 900 + }, + { + "epoch": 0.95, + "learning_rate": 4.763213530655392e-05, + "loss": 0.9021, + "step": 902 + }, + { + "epoch": 0.96, + "learning_rate": 4.762684989429176e-05, + "loss": 0.5666, + "step": 904 + }, + { + "epoch": 0.96, + "learning_rate": 4.7621564482029604e-05, + "loss": 0.7458, + "step": 906 + }, + { + "epoch": 0.96, + "learning_rate": 4.7616279069767444e-05, + "loss": 0.8112, + "step": 908 + }, + { + "epoch": 0.96, + "learning_rate": 4.761099365750529e-05, + "loss": 0.7296, + "step": 910 + }, + { + "epoch": 0.96, + "learning_rate": 4.760570824524313e-05, + "loss": 0.6413, + "step": 912 + }, + { + "epoch": 0.97, + "learning_rate": 4.7600422832980975e-05, + "loss": 0.8478, + "step": 914 + }, + { + "epoch": 0.97, + "learning_rate": 4.7595137420718814e-05, + "loss": 0.7438, + "step": 916 + }, + { + "epoch": 0.97, + "learning_rate": 4.758985200845667e-05, + "loss": 0.9292, + "step": 918 + }, + { + "epoch": 0.97, + "learning_rate": 4.7584566596194506e-05, + "loss": 0.6936, + "step": 920 + }, + { + "epoch": 0.97, + "learning_rate": 4.757928118393235e-05, + "loss": 0.6346, + "step": 922 + }, + { + "epoch": 0.98, + "learning_rate": 4.757399577167019e-05, + "loss": 0.6186, + "step": 924 + }, + { + "epoch": 0.98, + "learning_rate": 4.756871035940804e-05, + "loss": 0.6978, + "step": 926 + }, + { + "epoch": 0.98, + "learning_rate": 4.756342494714588e-05, + "loss": 0.7146, + "step": 928 + }, + { + "epoch": 0.98, + "learning_rate": 4.755813953488372e-05, + "loss": 0.655, + "step": 930 + }, + { + "epoch": 0.99, + "learning_rate": 4.755285412262156e-05, + "loss": 0.7053, + "step": 932 + }, + { + "epoch": 0.99, + "learning_rate": 4.754756871035941e-05, + "loss": 0.5712, + "step": 934 + }, + { + "epoch": 0.99, + "learning_rate": 4.7542283298097254e-05, + "loss": 0.6625, + "step": 936 + }, + { + "epoch": 0.99, + "learning_rate": 4.75369978858351e-05, + "loss": 0.7332, + "step": 938 + }, + { + "epoch": 0.99, + "learning_rate": 4.753171247357294e-05, + "loss": 0.6703, + "step": 940 + }, + { + "epoch": 1.0, + "learning_rate": 4.7526427061310786e-05, + "loss": 0.6486, + "step": 942 + }, + { + "epoch": 1.0, + "learning_rate": 4.7521141649048625e-05, + "loss": 0.729, + "step": 944 + }, + { + "epoch": 1.0, + "learning_rate": 4.751585623678647e-05, + "loss": 0.7503, + "step": 946 + }, + { + "epoch": 1.0, + "learning_rate": 4.751057082452432e-05, + "loss": 0.6571, + "step": 948 + }, + { + "epoch": 1.0, + "learning_rate": 4.7505285412262157e-05, + "loss": 0.5435, + "step": 950 + }, + { + "epoch": 1.01, + "learning_rate": 4.75e-05, + "loss": 0.4819, + "step": 952 + }, + { + "epoch": 1.01, + "learning_rate": 4.749471458773785e-05, + "loss": 0.6185, + "step": 954 + }, + { + "epoch": 1.01, + "learning_rate": 4.7489429175475695e-05, + "loss": 0.8085, + "step": 956 + }, + { + "epoch": 1.01, + "learning_rate": 4.7484143763213534e-05, + "loss": 0.681, + "step": 958 + }, + { + "epoch": 1.01, + "learning_rate": 4.747885835095138e-05, + "loss": 1.0466, + "step": 960 + }, + { + "epoch": 1.02, + "learning_rate": 4.747357293868922e-05, + "loss": 0.6033, + "step": 962 + }, + { + "epoch": 1.02, + "learning_rate": 4.7468287526427065e-05, + "loss": 0.7136, + "step": 964 + }, + { + "epoch": 1.02, + "learning_rate": 4.7463002114164905e-05, + "loss": 0.5318, + "step": 966 + }, + { + "epoch": 1.02, + "learning_rate": 4.745771670190275e-05, + "loss": 0.4115, + "step": 968 + }, + { + "epoch": 1.03, + "learning_rate": 4.745243128964059e-05, + "loss": 0.643, + "step": 970 + }, + { + "epoch": 1.03, + "learning_rate": 4.744714587737844e-05, + "loss": 0.5568, + "step": 972 + }, + { + "epoch": 1.03, + "learning_rate": 4.744186046511628e-05, + "loss": 0.8738, + "step": 974 + }, + { + "epoch": 1.03, + "learning_rate": 4.743657505285413e-05, + "loss": 0.9258, + "step": 976 + }, + { + "epoch": 1.03, + "learning_rate": 4.743128964059197e-05, + "loss": 0.583, + "step": 978 + }, + { + "epoch": 1.04, + "learning_rate": 4.7426004228329814e-05, + "loss": 0.5308, + "step": 980 + }, + { + "epoch": 1.04, + "learning_rate": 4.742071881606765e-05, + "loss": 0.9331, + "step": 982 + }, + { + "epoch": 1.04, + "learning_rate": 4.74154334038055e-05, + "loss": 0.6287, + "step": 984 + }, + { + "epoch": 1.04, + "learning_rate": 4.741014799154334e-05, + "loss": 0.4476, + "step": 986 + }, + { + "epoch": 1.04, + "learning_rate": 4.7404862579281184e-05, + "loss": 0.5554, + "step": 988 + }, + { + "epoch": 1.05, + "learning_rate": 4.739957716701903e-05, + "loss": 0.4947, + "step": 990 + }, + { + "epoch": 1.05, + "learning_rate": 4.7394291754756876e-05, + "loss": 0.4979, + "step": 992 + }, + { + "epoch": 1.05, + "learning_rate": 4.7389006342494716e-05, + "loss": 0.6792, + "step": 994 + }, + { + "epoch": 1.05, + "learning_rate": 4.738372093023256e-05, + "loss": 0.8503, + "step": 996 + }, + { + "epoch": 1.05, + "learning_rate": 4.73784355179704e-05, + "loss": 0.7678, + "step": 998 + }, + { + "epoch": 1.06, + "learning_rate": 4.737315010570825e-05, + "loss": 0.6055, + "step": 1000 + }, + { + "epoch": 1.06, + "eval_cer": 0.08395554288971217, + "eval_loss": 0.5935352444648743, + "eval_runtime": 130.5287, + "eval_samples_per_second": 6.443, + "eval_steps_per_second": 0.812, + "step": 1000 + }, + { + "epoch": 1.06, + "learning_rate": 4.736786469344609e-05, + "loss": 0.6435, + "step": 1002 + }, + { + "epoch": 1.06, + "learning_rate": 4.736257928118393e-05, + "loss": 0.4943, + "step": 1004 + }, + { + "epoch": 1.06, + "learning_rate": 4.735729386892178e-05, + "loss": 0.6907, + "step": 1006 + }, + { + "epoch": 1.07, + "learning_rate": 4.7352008456659625e-05, + "loss": 0.8161, + "step": 1008 + }, + { + "epoch": 1.07, + "learning_rate": 4.734672304439747e-05, + "loss": 0.7885, + "step": 1010 + }, + { + "epoch": 1.07, + "learning_rate": 4.734143763213531e-05, + "loss": 0.6831, + "step": 1012 + }, + { + "epoch": 1.07, + "learning_rate": 4.7336152219873156e-05, + "loss": 0.5823, + "step": 1014 + }, + { + "epoch": 1.07, + "learning_rate": 4.7330866807610995e-05, + "loss": 0.6312, + "step": 1016 + }, + { + "epoch": 1.08, + "learning_rate": 4.732558139534884e-05, + "loss": 0.5614, + "step": 1018 + }, + { + "epoch": 1.08, + "learning_rate": 4.732029598308668e-05, + "loss": 0.6951, + "step": 1020 + }, + { + "epoch": 1.08, + "learning_rate": 4.7315010570824527e-05, + "loss": 0.7978, + "step": 1022 + }, + { + "epoch": 1.08, + "learning_rate": 4.7309725158562366e-05, + "loss": 0.4999, + "step": 1024 + }, + { + "epoch": 1.08, + "learning_rate": 4.730443974630021e-05, + "loss": 0.5096, + "step": 1026 + }, + { + "epoch": 1.09, + "learning_rate": 4.729915433403806e-05, + "loss": 0.4208, + "step": 1028 + }, + { + "epoch": 1.09, + "learning_rate": 4.7293868921775904e-05, + "loss": 0.6277, + "step": 1030 + }, + { + "epoch": 1.09, + "learning_rate": 4.728858350951374e-05, + "loss": 0.7658, + "step": 1032 + }, + { + "epoch": 1.09, + "learning_rate": 4.728329809725159e-05, + "loss": 0.7402, + "step": 1034 + }, + { + "epoch": 1.1, + "learning_rate": 4.727801268498943e-05, + "loss": 0.5968, + "step": 1036 + }, + { + "epoch": 1.1, + "learning_rate": 4.7272727272727275e-05, + "loss": 0.5667, + "step": 1038 + }, + { + "epoch": 1.1, + "learning_rate": 4.7267441860465114e-05, + "loss": 0.5122, + "step": 1040 + }, + { + "epoch": 1.1, + "learning_rate": 4.726215644820296e-05, + "loss": 0.5836, + "step": 1042 + }, + { + "epoch": 1.1, + "learning_rate": 4.7256871035940806e-05, + "loss": 0.5438, + "step": 1044 + }, + { + "epoch": 1.11, + "learning_rate": 4.725158562367865e-05, + "loss": 0.5334, + "step": 1046 + }, + { + "epoch": 1.11, + "learning_rate": 4.724630021141649e-05, + "loss": 0.5546, + "step": 1048 + }, + { + "epoch": 1.11, + "learning_rate": 4.724101479915434e-05, + "loss": 0.63, + "step": 1050 + }, + { + "epoch": 1.11, + "learning_rate": 4.723572938689218e-05, + "loss": 0.4592, + "step": 1052 + }, + { + "epoch": 1.11, + "learning_rate": 4.723044397463002e-05, + "loss": 0.3419, + "step": 1054 + }, + { + "epoch": 1.12, + "learning_rate": 4.722515856236787e-05, + "loss": 0.5825, + "step": 1056 + }, + { + "epoch": 1.12, + "learning_rate": 4.721987315010571e-05, + "loss": 0.5554, + "step": 1058 + }, + { + "epoch": 1.12, + "learning_rate": 4.7214587737843554e-05, + "loss": 0.5521, + "step": 1060 + }, + { + "epoch": 1.12, + "learning_rate": 4.7209302325581394e-05, + "loss": 0.6327, + "step": 1062 + }, + { + "epoch": 1.12, + "learning_rate": 4.7204016913319246e-05, + "loss": 0.7493, + "step": 1064 + }, + { + "epoch": 1.13, + "learning_rate": 4.7198731501057086e-05, + "loss": 0.6406, + "step": 1066 + }, + { + "epoch": 1.13, + "learning_rate": 4.719344608879493e-05, + "loss": 0.6063, + "step": 1068 + }, + { + "epoch": 1.13, + "learning_rate": 4.718816067653277e-05, + "loss": 0.7856, + "step": 1070 + }, + { + "epoch": 1.13, + "learning_rate": 4.718287526427062e-05, + "loss": 0.9146, + "step": 1072 + }, + { + "epoch": 1.14, + "learning_rate": 4.7177589852008456e-05, + "loss": 0.9914, + "step": 1074 + }, + { + "epoch": 1.14, + "learning_rate": 4.71723044397463e-05, + "loss": 0.6502, + "step": 1076 + }, + { + "epoch": 1.14, + "learning_rate": 4.716701902748414e-05, + "loss": 0.5753, + "step": 1078 + }, + { + "epoch": 1.14, + "learning_rate": 4.716173361522199e-05, + "loss": 0.6573, + "step": 1080 + }, + { + "epoch": 1.14, + "learning_rate": 4.7156448202959834e-05, + "loss": 0.6994, + "step": 1082 + }, + { + "epoch": 1.15, + "learning_rate": 4.715116279069768e-05, + "loss": 0.4771, + "step": 1084 + }, + { + "epoch": 1.15, + "learning_rate": 4.714587737843552e-05, + "loss": 0.4544, + "step": 1086 + }, + { + "epoch": 1.15, + "learning_rate": 4.7140591966173365e-05, + "loss": 0.5647, + "step": 1088 + }, + { + "epoch": 1.15, + "learning_rate": 4.7135306553911205e-05, + "loss": 0.6566, + "step": 1090 + }, + { + "epoch": 1.15, + "learning_rate": 4.713002114164905e-05, + "loss": 0.5029, + "step": 1092 + }, + { + "epoch": 1.16, + "learning_rate": 4.712473572938689e-05, + "loss": 0.6242, + "step": 1094 + }, + { + "epoch": 1.16, + "learning_rate": 4.7119450317124736e-05, + "loss": 0.5858, + "step": 1096 + }, + { + "epoch": 1.16, + "learning_rate": 4.711416490486258e-05, + "loss": 0.4173, + "step": 1098 + }, + { + "epoch": 1.16, + "learning_rate": 4.710887949260043e-05, + "loss": 0.4745, + "step": 1100 + }, + { + "epoch": 1.16, + "learning_rate": 4.710359408033827e-05, + "loss": 0.6373, + "step": 1102 + }, + { + "epoch": 1.17, + "learning_rate": 4.7098308668076113e-05, + "loss": 0.6338, + "step": 1104 + }, + { + "epoch": 1.17, + "learning_rate": 4.709302325581396e-05, + "loss": 0.502, + "step": 1106 + }, + { + "epoch": 1.17, + "learning_rate": 4.70877378435518e-05, + "loss": 0.5438, + "step": 1108 + }, + { + "epoch": 1.17, + "learning_rate": 4.7082452431289645e-05, + "loss": 0.4672, + "step": 1110 + }, + { + "epoch": 1.18, + "learning_rate": 4.7077167019027484e-05, + "loss": 0.3756, + "step": 1112 + }, + { + "epoch": 1.18, + "learning_rate": 4.707188160676533e-05, + "loss": 0.5381, + "step": 1114 + }, + { + "epoch": 1.18, + "learning_rate": 4.706659619450317e-05, + "loss": 0.4322, + "step": 1116 + }, + { + "epoch": 1.18, + "learning_rate": 4.706131078224102e-05, + "loss": 0.5286, + "step": 1118 + }, + { + "epoch": 1.18, + "learning_rate": 4.705602536997886e-05, + "loss": 0.3846, + "step": 1120 + }, + { + "epoch": 1.19, + "learning_rate": 4.705073995771671e-05, + "loss": 0.6808, + "step": 1122 + }, + { + "epoch": 1.19, + "learning_rate": 4.704545454545455e-05, + "loss": 0.501, + "step": 1124 + }, + { + "epoch": 1.19, + "learning_rate": 4.704016913319239e-05, + "loss": 0.514, + "step": 1126 + }, + { + "epoch": 1.19, + "learning_rate": 4.703488372093023e-05, + "loss": 0.6151, + "step": 1128 + }, + { + "epoch": 1.19, + "learning_rate": 4.702959830866808e-05, + "loss": 0.6041, + "step": 1130 + }, + { + "epoch": 1.2, + "learning_rate": 4.702431289640592e-05, + "loss": 0.6237, + "step": 1132 + }, + { + "epoch": 1.2, + "learning_rate": 4.7019027484143764e-05, + "loss": 0.4988, + "step": 1134 + }, + { + "epoch": 1.2, + "learning_rate": 4.701374207188161e-05, + "loss": 0.5912, + "step": 1136 + }, + { + "epoch": 1.2, + "learning_rate": 4.7008456659619456e-05, + "loss": 0.6301, + "step": 1138 + }, + { + "epoch": 1.21, + "learning_rate": 4.7003171247357295e-05, + "loss": 0.5094, + "step": 1140 + }, + { + "epoch": 1.21, + "learning_rate": 4.699788583509514e-05, + "loss": 0.6045, + "step": 1142 + }, + { + "epoch": 1.21, + "learning_rate": 4.699260042283298e-05, + "loss": 0.5534, + "step": 1144 + }, + { + "epoch": 1.21, + "learning_rate": 4.6987315010570826e-05, + "loss": 0.649, + "step": 1146 + }, + { + "epoch": 1.21, + "learning_rate": 4.6982029598308666e-05, + "loss": 0.4673, + "step": 1148 + }, + { + "epoch": 1.22, + "learning_rate": 4.697674418604651e-05, + "loss": 0.6251, + "step": 1150 + }, + { + "epoch": 1.22, + "learning_rate": 4.697145877378436e-05, + "loss": 0.6932, + "step": 1152 + }, + { + "epoch": 1.22, + "learning_rate": 4.6966173361522204e-05, + "loss": 0.7142, + "step": 1154 + }, + { + "epoch": 1.22, + "learning_rate": 4.696088794926004e-05, + "loss": 0.5804, + "step": 1156 + }, + { + "epoch": 1.22, + "learning_rate": 4.695560253699789e-05, + "loss": 0.8723, + "step": 1158 + }, + { + "epoch": 1.23, + "learning_rate": 4.6950317124735735e-05, + "loss": 0.4948, + "step": 1160 + }, + { + "epoch": 1.23, + "learning_rate": 4.6945031712473575e-05, + "loss": 0.5645, + "step": 1162 + }, + { + "epoch": 1.23, + "learning_rate": 4.693974630021142e-05, + "loss": 0.4606, + "step": 1164 + }, + { + "epoch": 1.23, + "learning_rate": 4.693446088794926e-05, + "loss": 0.5359, + "step": 1166 + }, + { + "epoch": 1.23, + "learning_rate": 4.6929175475687106e-05, + "loss": 0.4132, + "step": 1168 + }, + { + "epoch": 1.24, + "learning_rate": 4.6923890063424945e-05, + "loss": 0.6268, + "step": 1170 + }, + { + "epoch": 1.24, + "learning_rate": 4.69186046511628e-05, + "loss": 0.5611, + "step": 1172 + }, + { + "epoch": 1.24, + "learning_rate": 4.691331923890064e-05, + "loss": 0.6392, + "step": 1174 + }, + { + "epoch": 1.24, + "learning_rate": 4.6908033826638483e-05, + "loss": 0.8275, + "step": 1176 + }, + { + "epoch": 1.25, + "learning_rate": 4.690274841437632e-05, + "loss": 0.7045, + "step": 1178 + }, + { + "epoch": 1.25, + "learning_rate": 4.689746300211417e-05, + "loss": 0.7826, + "step": 1180 + }, + { + "epoch": 1.25, + "learning_rate": 4.689217758985201e-05, + "loss": 0.7238, + "step": 1182 + }, + { + "epoch": 1.25, + "learning_rate": 4.6886892177589854e-05, + "loss": 0.4435, + "step": 1184 + }, + { + "epoch": 1.25, + "learning_rate": 4.6881606765327693e-05, + "loss": 0.5589, + "step": 1186 + }, + { + "epoch": 1.26, + "learning_rate": 4.687632135306554e-05, + "loss": 0.5764, + "step": 1188 + }, + { + "epoch": 1.26, + "learning_rate": 4.6871035940803386e-05, + "loss": 0.4692, + "step": 1190 + }, + { + "epoch": 1.26, + "learning_rate": 4.686575052854123e-05, + "loss": 0.4907, + "step": 1192 + }, + { + "epoch": 1.26, + "learning_rate": 4.686046511627907e-05, + "loss": 0.5897, + "step": 1194 + }, + { + "epoch": 1.26, + "learning_rate": 4.685517970401692e-05, + "loss": 0.4332, + "step": 1196 + }, + { + "epoch": 1.27, + "learning_rate": 4.6849894291754756e-05, + "loss": 0.3469, + "step": 1198 + }, + { + "epoch": 1.27, + "learning_rate": 4.68446088794926e-05, + "loss": 0.592, + "step": 1200 + }, + { + "epoch": 1.27, + "learning_rate": 4.683932346723044e-05, + "loss": 0.6763, + "step": 1202 + }, + { + "epoch": 1.27, + "learning_rate": 4.683403805496829e-05, + "loss": 0.9583, + "step": 1204 + }, + { + "epoch": 1.27, + "learning_rate": 4.6828752642706134e-05, + "loss": 0.5109, + "step": 1206 + }, + { + "epoch": 1.28, + "learning_rate": 4.682346723044398e-05, + "loss": 0.631, + "step": 1208 + }, + { + "epoch": 1.28, + "learning_rate": 4.681818181818182e-05, + "loss": 0.7334, + "step": 1210 + }, + { + "epoch": 1.28, + "learning_rate": 4.6812896405919665e-05, + "loss": 0.6342, + "step": 1212 + }, + { + "epoch": 1.28, + "learning_rate": 4.680761099365751e-05, + "loss": 0.6196, + "step": 1214 + }, + { + "epoch": 1.29, + "learning_rate": 4.680232558139535e-05, + "loss": 0.5315, + "step": 1216 + }, + { + "epoch": 1.29, + "learning_rate": 4.6797040169133196e-05, + "loss": 0.5387, + "step": 1218 + }, + { + "epoch": 1.29, + "learning_rate": 4.6791754756871036e-05, + "loss": 0.8501, + "step": 1220 + }, + { + "epoch": 1.29, + "learning_rate": 4.678646934460888e-05, + "loss": 0.8125, + "step": 1222 + }, + { + "epoch": 1.29, + "learning_rate": 4.678118393234672e-05, + "loss": 0.6331, + "step": 1224 + }, + { + "epoch": 1.3, + "learning_rate": 4.6775898520084574e-05, + "loss": 0.5622, + "step": 1226 + }, + { + "epoch": 1.3, + "learning_rate": 4.677061310782241e-05, + "loss": 0.5663, + "step": 1228 + }, + { + "epoch": 1.3, + "learning_rate": 4.676532769556026e-05, + "loss": 0.5199, + "step": 1230 + }, + { + "epoch": 1.3, + "learning_rate": 4.67600422832981e-05, + "loss": 0.8869, + "step": 1232 + }, + { + "epoch": 1.3, + "learning_rate": 4.6754756871035945e-05, + "loss": 0.6617, + "step": 1234 + }, + { + "epoch": 1.31, + "learning_rate": 4.6749471458773784e-05, + "loss": 0.4335, + "step": 1236 + }, + { + "epoch": 1.31, + "learning_rate": 4.674418604651163e-05, + "loss": 0.8475, + "step": 1238 + }, + { + "epoch": 1.31, + "learning_rate": 4.673890063424947e-05, + "loss": 0.5508, + "step": 1240 + }, + { + "epoch": 1.31, + "learning_rate": 4.6733615221987315e-05, + "loss": 0.6348, + "step": 1242 + }, + { + "epoch": 1.32, + "learning_rate": 4.672832980972516e-05, + "loss": 0.6159, + "step": 1244 + }, + { + "epoch": 1.32, + "learning_rate": 4.672304439746301e-05, + "loss": 0.477, + "step": 1246 + }, + { + "epoch": 1.32, + "learning_rate": 4.671775898520085e-05, + "loss": 0.5092, + "step": 1248 + }, + { + "epoch": 1.32, + "learning_rate": 4.671247357293869e-05, + "loss": 0.5635, + "step": 1250 + }, + { + "epoch": 1.32, + "learning_rate": 4.670718816067653e-05, + "loss": 0.8466, + "step": 1252 + }, + { + "epoch": 1.33, + "learning_rate": 4.670190274841438e-05, + "loss": 0.5669, + "step": 1254 + }, + { + "epoch": 1.33, + "learning_rate": 4.6696617336152224e-05, + "loss": 0.6561, + "step": 1256 + }, + { + "epoch": 1.33, + "learning_rate": 4.6691331923890063e-05, + "loss": 0.7537, + "step": 1258 + }, + { + "epoch": 1.33, + "learning_rate": 4.668604651162791e-05, + "loss": 0.5566, + "step": 1260 + }, + { + "epoch": 1.33, + "learning_rate": 4.6680761099365756e-05, + "loss": 1.1829, + "step": 1262 + }, + { + "epoch": 1.34, + "learning_rate": 4.6675475687103595e-05, + "loss": 0.8556, + "step": 1264 + }, + { + "epoch": 1.34, + "learning_rate": 4.667019027484144e-05, + "loss": 0.6861, + "step": 1266 + }, + { + "epoch": 1.34, + "learning_rate": 4.666490486257929e-05, + "loss": 0.634, + "step": 1268 + }, + { + "epoch": 1.34, + "learning_rate": 4.6659619450317126e-05, + "loss": 0.6609, + "step": 1270 + }, + { + "epoch": 1.34, + "learning_rate": 4.665433403805497e-05, + "loss": 0.6384, + "step": 1272 + }, + { + "epoch": 1.35, + "learning_rate": 4.664904862579281e-05, + "loss": 0.4699, + "step": 1274 + }, + { + "epoch": 1.35, + "learning_rate": 4.664376321353066e-05, + "loss": 0.6589, + "step": 1276 + }, + { + "epoch": 1.35, + "learning_rate": 4.66384778012685e-05, + "loss": 0.6271, + "step": 1278 + }, + { + "epoch": 1.35, + "learning_rate": 4.663319238900635e-05, + "loss": 0.7925, + "step": 1280 + }, + { + "epoch": 1.36, + "learning_rate": 4.662790697674419e-05, + "loss": 0.7247, + "step": 1282 + }, + { + "epoch": 1.36, + "learning_rate": 4.6622621564482035e-05, + "loss": 0.5185, + "step": 1284 + }, + { + "epoch": 1.36, + "learning_rate": 4.6617336152219874e-05, + "loss": 0.5875, + "step": 1286 + }, + { + "epoch": 1.36, + "learning_rate": 4.661205073995772e-05, + "loss": 0.6124, + "step": 1288 + }, + { + "epoch": 1.36, + "learning_rate": 4.660676532769556e-05, + "loss": 0.7029, + "step": 1290 + }, + { + "epoch": 1.37, + "learning_rate": 4.6601479915433406e-05, + "loss": 0.5547, + "step": 1292 + }, + { + "epoch": 1.37, + "learning_rate": 4.6596194503171245e-05, + "loss": 0.8943, + "step": 1294 + }, + { + "epoch": 1.37, + "learning_rate": 4.659090909090909e-05, + "loss": 0.5128, + "step": 1296 + }, + { + "epoch": 1.37, + "learning_rate": 4.658562367864694e-05, + "loss": 0.5178, + "step": 1298 + }, + { + "epoch": 1.37, + "learning_rate": 4.658033826638478e-05, + "loss": 0.4883, + "step": 1300 + }, + { + "epoch": 1.38, + "learning_rate": 4.657505285412262e-05, + "loss": 0.7211, + "step": 1302 + }, + { + "epoch": 1.38, + "learning_rate": 4.656976744186047e-05, + "loss": 0.4854, + "step": 1304 + }, + { + "epoch": 1.38, + "learning_rate": 4.656448202959831e-05, + "loss": 0.5831, + "step": 1306 + }, + { + "epoch": 1.38, + "learning_rate": 4.6559196617336154e-05, + "loss": 0.4782, + "step": 1308 + }, + { + "epoch": 1.38, + "learning_rate": 4.6553911205074e-05, + "loss": 0.6382, + "step": 1310 + }, + { + "epoch": 1.39, + "learning_rate": 4.654862579281184e-05, + "loss": 0.3921, + "step": 1312 + }, + { + "epoch": 1.39, + "learning_rate": 4.6543340380549685e-05, + "loss": 0.4697, + "step": 1314 + }, + { + "epoch": 1.39, + "learning_rate": 4.653805496828753e-05, + "loss": 0.8628, + "step": 1316 + }, + { + "epoch": 1.39, + "learning_rate": 4.653276955602537e-05, + "loss": 0.6685, + "step": 1318 + }, + { + "epoch": 1.4, + "learning_rate": 4.652748414376322e-05, + "loss": 0.7783, + "step": 1320 + }, + { + "epoch": 1.4, + "learning_rate": 4.652219873150106e-05, + "loss": 0.4967, + "step": 1322 + }, + { + "epoch": 1.4, + "learning_rate": 4.65169133192389e-05, + "loss": 0.7527, + "step": 1324 + }, + { + "epoch": 1.4, + "learning_rate": 4.651162790697675e-05, + "loss": 0.9232, + "step": 1326 + }, + { + "epoch": 1.4, + "learning_rate": 4.650634249471459e-05, + "loss": 0.7172, + "step": 1328 + }, + { + "epoch": 1.41, + "learning_rate": 4.6501057082452433e-05, + "loss": 0.7406, + "step": 1330 + }, + { + "epoch": 1.41, + "learning_rate": 4.649577167019027e-05, + "loss": 0.5102, + "step": 1332 + }, + { + "epoch": 1.41, + "learning_rate": 4.6490486257928126e-05, + "loss": 0.7775, + "step": 1334 + }, + { + "epoch": 1.41, + "learning_rate": 4.6485200845665965e-05, + "loss": 0.6671, + "step": 1336 + }, + { + "epoch": 1.41, + "learning_rate": 4.647991543340381e-05, + "loss": 0.726, + "step": 1338 + }, + { + "epoch": 1.42, + "learning_rate": 4.647463002114165e-05, + "loss": 0.7379, + "step": 1340 + }, + { + "epoch": 1.42, + "learning_rate": 4.6469344608879496e-05, + "loss": 0.5184, + "step": 1342 + }, + { + "epoch": 1.42, + "learning_rate": 4.6464059196617336e-05, + "loss": 0.774, + "step": 1344 + }, + { + "epoch": 1.42, + "learning_rate": 4.645877378435518e-05, + "loss": 0.7082, + "step": 1346 + }, + { + "epoch": 1.42, + "learning_rate": 4.645348837209302e-05, + "loss": 0.655, + "step": 1348 + }, + { + "epoch": 1.43, + "learning_rate": 4.644820295983087e-05, + "loss": 0.5688, + "step": 1350 + }, + { + "epoch": 1.43, + "learning_rate": 4.644291754756871e-05, + "loss": 0.5803, + "step": 1352 + }, + { + "epoch": 1.43, + "learning_rate": 4.643763213530656e-05, + "loss": 0.5136, + "step": 1354 + }, + { + "epoch": 1.43, + "learning_rate": 4.64323467230444e-05, + "loss": 0.5327, + "step": 1356 + }, + { + "epoch": 1.44, + "learning_rate": 4.6427061310782244e-05, + "loss": 0.3571, + "step": 1358 + }, + { + "epoch": 1.44, + "learning_rate": 4.6421775898520084e-05, + "loss": 0.61, + "step": 1360 + }, + { + "epoch": 1.44, + "learning_rate": 4.641649048625793e-05, + "loss": 0.6218, + "step": 1362 + }, + { + "epoch": 1.44, + "learning_rate": 4.6411205073995776e-05, + "loss": 0.5032, + "step": 1364 + }, + { + "epoch": 1.44, + "learning_rate": 4.6405919661733615e-05, + "loss": 0.6186, + "step": 1366 + }, + { + "epoch": 1.45, + "learning_rate": 4.640063424947146e-05, + "loss": 0.5374, + "step": 1368 + }, + { + "epoch": 1.45, + "learning_rate": 4.639534883720931e-05, + "loss": 0.5657, + "step": 1370 + }, + { + "epoch": 1.45, + "learning_rate": 4.639006342494715e-05, + "loss": 0.6641, + "step": 1372 + }, + { + "epoch": 1.45, + "learning_rate": 4.638477801268499e-05, + "loss": 0.6886, + "step": 1374 + }, + { + "epoch": 1.45, + "learning_rate": 4.637949260042284e-05, + "loss": 0.7777, + "step": 1376 + }, + { + "epoch": 1.46, + "learning_rate": 4.637420718816068e-05, + "loss": 0.5609, + "step": 1378 + }, + { + "epoch": 1.46, + "learning_rate": 4.6368921775898524e-05, + "loss": 0.6644, + "step": 1380 + }, + { + "epoch": 1.46, + "learning_rate": 4.636363636363636e-05, + "loss": 0.6055, + "step": 1382 + }, + { + "epoch": 1.46, + "learning_rate": 4.635835095137421e-05, + "loss": 0.579, + "step": 1384 + }, + { + "epoch": 1.47, + "learning_rate": 4.635306553911205e-05, + "loss": 0.6299, + "step": 1386 + }, + { + "epoch": 1.47, + "learning_rate": 4.63477801268499e-05, + "loss": 0.3263, + "step": 1388 + }, + { + "epoch": 1.47, + "learning_rate": 4.634249471458774e-05, + "loss": 0.4689, + "step": 1390 + }, + { + "epoch": 1.47, + "learning_rate": 4.633720930232559e-05, + "loss": 0.596, + "step": 1392 + }, + { + "epoch": 1.47, + "learning_rate": 4.6331923890063426e-05, + "loss": 0.6729, + "step": 1394 + }, + { + "epoch": 1.48, + "learning_rate": 4.632663847780127e-05, + "loss": 0.6163, + "step": 1396 + }, + { + "epoch": 1.48, + "learning_rate": 4.632135306553911e-05, + "loss": 0.5728, + "step": 1398 + }, + { + "epoch": 1.48, + "learning_rate": 4.631606765327696e-05, + "loss": 0.5581, + "step": 1400 + }, + { + "epoch": 1.48, + "learning_rate": 4.63107822410148e-05, + "loss": 0.5471, + "step": 1402 + }, + { + "epoch": 1.48, + "learning_rate": 4.630549682875264e-05, + "loss": 0.6324, + "step": 1404 + }, + { + "epoch": 1.49, + "learning_rate": 4.630021141649049e-05, + "loss": 0.4537, + "step": 1406 + }, + { + "epoch": 1.49, + "learning_rate": 4.6294926004228335e-05, + "loss": 0.5605, + "step": 1408 + }, + { + "epoch": 1.49, + "learning_rate": 4.6289640591966174e-05, + "loss": 0.3762, + "step": 1410 + }, + { + "epoch": 1.49, + "learning_rate": 4.628435517970402e-05, + "loss": 0.4534, + "step": 1412 + }, + { + "epoch": 1.49, + "learning_rate": 4.627906976744186e-05, + "loss": 0.4939, + "step": 1414 + }, + { + "epoch": 1.5, + "learning_rate": 4.6273784355179706e-05, + "loss": 0.5225, + "step": 1416 + }, + { + "epoch": 1.5, + "learning_rate": 4.626849894291755e-05, + "loss": 0.3952, + "step": 1418 + }, + { + "epoch": 1.5, + "learning_rate": 4.626321353065539e-05, + "loss": 0.4838, + "step": 1420 + }, + { + "epoch": 1.5, + "learning_rate": 4.625792811839324e-05, + "loss": 0.5493, + "step": 1422 + }, + { + "epoch": 1.51, + "learning_rate": 4.625264270613108e-05, + "loss": 0.7322, + "step": 1424 + }, + { + "epoch": 1.51, + "learning_rate": 4.624735729386893e-05, + "loss": 0.6912, + "step": 1426 + }, + { + "epoch": 1.51, + "learning_rate": 4.624207188160677e-05, + "loss": 0.5979, + "step": 1428 + }, + { + "epoch": 1.51, + "learning_rate": 4.6236786469344614e-05, + "loss": 0.5229, + "step": 1430 + }, + { + "epoch": 1.51, + "learning_rate": 4.6231501057082454e-05, + "loss": 0.5489, + "step": 1432 + }, + { + "epoch": 1.52, + "learning_rate": 4.62262156448203e-05, + "loss": 0.6757, + "step": 1434 + }, + { + "epoch": 1.52, + "learning_rate": 4.622093023255814e-05, + "loss": 0.8744, + "step": 1436 + }, + { + "epoch": 1.52, + "learning_rate": 4.6215644820295985e-05, + "loss": 0.6652, + "step": 1438 + }, + { + "epoch": 1.52, + "learning_rate": 4.6210359408033824e-05, + "loss": 0.7747, + "step": 1440 + }, + { + "epoch": 1.52, + "learning_rate": 4.620507399577168e-05, + "loss": 0.8047, + "step": 1442 + }, + { + "epoch": 1.53, + "learning_rate": 4.6199788583509517e-05, + "loss": 0.6374, + "step": 1444 + }, + { + "epoch": 1.53, + "learning_rate": 4.619450317124736e-05, + "loss": 0.8522, + "step": 1446 + }, + { + "epoch": 1.53, + "learning_rate": 4.61892177589852e-05, + "loss": 1.1201, + "step": 1448 + }, + { + "epoch": 1.53, + "learning_rate": 4.618393234672305e-05, + "loss": 0.7091, + "step": 1450 + }, + { + "epoch": 1.53, + "learning_rate": 4.617864693446089e-05, + "loss": 0.6903, + "step": 1452 + }, + { + "epoch": 1.54, + "learning_rate": 4.617336152219873e-05, + "loss": 0.7191, + "step": 1454 + }, + { + "epoch": 1.54, + "learning_rate": 4.616807610993657e-05, + "loss": 0.6447, + "step": 1456 + }, + { + "epoch": 1.54, + "learning_rate": 4.616279069767442e-05, + "loss": 0.5796, + "step": 1458 + }, + { + "epoch": 1.54, + "learning_rate": 4.6157505285412265e-05, + "loss": 0.4474, + "step": 1460 + }, + { + "epoch": 1.55, + "learning_rate": 4.615221987315011e-05, + "loss": 0.6297, + "step": 1462 + }, + { + "epoch": 1.55, + "learning_rate": 4.614693446088795e-05, + "loss": 0.7815, + "step": 1464 + }, + { + "epoch": 1.55, + "learning_rate": 4.6141649048625796e-05, + "loss": 0.4299, + "step": 1466 + }, + { + "epoch": 1.55, + "learning_rate": 4.6136363636363635e-05, + "loss": 0.6843, + "step": 1468 + }, + { + "epoch": 1.55, + "learning_rate": 4.613107822410148e-05, + "loss": 0.3767, + "step": 1470 + }, + { + "epoch": 1.56, + "learning_rate": 4.612579281183933e-05, + "loss": 0.6761, + "step": 1472 + }, + { + "epoch": 1.56, + "learning_rate": 4.612050739957717e-05, + "loss": 0.9297, + "step": 1474 + }, + { + "epoch": 1.56, + "learning_rate": 4.611522198731501e-05, + "loss": 0.8386, + "step": 1476 + }, + { + "epoch": 1.56, + "learning_rate": 4.610993657505286e-05, + "loss": 0.8722, + "step": 1478 + }, + { + "epoch": 1.56, + "learning_rate": 4.6104651162790705e-05, + "loss": 0.8477, + "step": 1480 + }, + { + "epoch": 1.57, + "learning_rate": 4.6099365750528544e-05, + "loss": 0.5865, + "step": 1482 + }, + { + "epoch": 1.57, + "learning_rate": 4.609408033826639e-05, + "loss": 0.7178, + "step": 1484 + }, + { + "epoch": 1.57, + "learning_rate": 4.608879492600423e-05, + "loss": 0.5507, + "step": 1486 + }, + { + "epoch": 1.57, + "learning_rate": 4.6083509513742076e-05, + "loss": 0.5724, + "step": 1488 + }, + { + "epoch": 1.58, + "learning_rate": 4.6078224101479915e-05, + "loss": 0.5513, + "step": 1490 + }, + { + "epoch": 1.58, + "learning_rate": 4.607293868921776e-05, + "loss": 0.7183, + "step": 1492 + }, + { + "epoch": 1.58, + "learning_rate": 4.60676532769556e-05, + "loss": 0.4628, + "step": 1494 + }, + { + "epoch": 1.58, + "learning_rate": 4.606236786469345e-05, + "loss": 0.4151, + "step": 1496 + }, + { + "epoch": 1.58, + "learning_rate": 4.605708245243129e-05, + "loss": 0.665, + "step": 1498 + }, + { + "epoch": 1.59, + "learning_rate": 4.605179704016914e-05, + "loss": 0.4722, + "step": 1500 + }, + { + "epoch": 1.59, + "eval_cer": 0.06184098033627814, + "eval_loss": 0.5636065006256104, + "eval_runtime": 127.4465, + "eval_samples_per_second": 6.599, + "eval_steps_per_second": 0.832, + "step": 1500 + }, + { + "epoch": 1.59, + "learning_rate": 4.604651162790698e-05, + "loss": 0.4211, + "step": 1502 + }, + { + "epoch": 1.59, + "learning_rate": 4.6041226215644824e-05, + "loss": 0.7062, + "step": 1504 + }, + { + "epoch": 1.59, + "learning_rate": 4.603594080338266e-05, + "loss": 0.5417, + "step": 1506 + }, + { + "epoch": 1.59, + "learning_rate": 4.603065539112051e-05, + "loss": 0.6403, + "step": 1508 + }, + { + "epoch": 1.6, + "learning_rate": 4.602536997885835e-05, + "loss": 0.6087, + "step": 1510 + }, + { + "epoch": 1.6, + "learning_rate": 4.6020084566596194e-05, + "loss": 0.5924, + "step": 1512 + }, + { + "epoch": 1.6, + "learning_rate": 4.601479915433404e-05, + "loss": 0.457, + "step": 1514 + }, + { + "epoch": 1.6, + "learning_rate": 4.600951374207189e-05, + "loss": 0.5073, + "step": 1516 + }, + { + "epoch": 1.6, + "learning_rate": 4.6004228329809726e-05, + "loss": 0.4704, + "step": 1518 + }, + { + "epoch": 1.61, + "learning_rate": 4.599894291754757e-05, + "loss": 0.6511, + "step": 1520 + }, + { + "epoch": 1.61, + "learning_rate": 4.599365750528541e-05, + "loss": 0.5483, + "step": 1522 + }, + { + "epoch": 1.61, + "learning_rate": 4.598837209302326e-05, + "loss": 0.5997, + "step": 1524 + }, + { + "epoch": 1.61, + "learning_rate": 4.59830866807611e-05, + "loss": 0.4576, + "step": 1526 + }, + { + "epoch": 1.62, + "learning_rate": 4.597780126849894e-05, + "loss": 0.6991, + "step": 1528 + }, + { + "epoch": 1.62, + "learning_rate": 4.597251585623679e-05, + "loss": 0.7717, + "step": 1530 + }, + { + "epoch": 1.62, + "learning_rate": 4.5967230443974635e-05, + "loss": 0.6123, + "step": 1532 + }, + { + "epoch": 1.62, + "learning_rate": 4.596194503171248e-05, + "loss": 0.5343, + "step": 1534 + }, + { + "epoch": 1.62, + "learning_rate": 4.595665961945032e-05, + "loss": 0.4572, + "step": 1536 + }, + { + "epoch": 1.63, + "learning_rate": 4.5951374207188166e-05, + "loss": 0.8137, + "step": 1538 + }, + { + "epoch": 1.63, + "learning_rate": 4.5946088794926005e-05, + "loss": 0.4088, + "step": 1540 + }, + { + "epoch": 1.63, + "learning_rate": 4.594080338266385e-05, + "loss": 0.5861, + "step": 1542 + }, + { + "epoch": 1.63, + "learning_rate": 4.593551797040169e-05, + "loss": 0.5733, + "step": 1544 + }, + { + "epoch": 1.63, + "learning_rate": 4.593023255813954e-05, + "loss": 0.6435, + "step": 1546 + }, + { + "epoch": 1.64, + "learning_rate": 4.5924947145877376e-05, + "loss": 0.4275, + "step": 1548 + }, + { + "epoch": 1.64, + "learning_rate": 4.591966173361523e-05, + "loss": 0.4566, + "step": 1550 + }, + { + "epoch": 1.64, + "learning_rate": 4.591437632135307e-05, + "loss": 0.5268, + "step": 1552 + }, + { + "epoch": 1.64, + "learning_rate": 4.5909090909090914e-05, + "loss": 0.4623, + "step": 1554 + }, + { + "epoch": 1.64, + "learning_rate": 4.5903805496828754e-05, + "loss": 0.5294, + "step": 1556 + }, + { + "epoch": 1.65, + "learning_rate": 4.58985200845666e-05, + "loss": 0.3979, + "step": 1558 + }, + { + "epoch": 1.65, + "learning_rate": 4.589323467230444e-05, + "loss": 0.5877, + "step": 1560 + }, + { + "epoch": 1.65, + "learning_rate": 4.5887949260042285e-05, + "loss": 0.6169, + "step": 1562 + }, + { + "epoch": 1.65, + "learning_rate": 4.5882663847780124e-05, + "loss": 0.6217, + "step": 1564 + }, + { + "epoch": 1.66, + "learning_rate": 4.587737843551797e-05, + "loss": 0.5586, + "step": 1566 + }, + { + "epoch": 1.66, + "learning_rate": 4.5872093023255816e-05, + "loss": 0.8468, + "step": 1568 + }, + { + "epoch": 1.66, + "learning_rate": 4.586680761099366e-05, + "loss": 0.673, + "step": 1570 + }, + { + "epoch": 1.66, + "learning_rate": 4.58615221987315e-05, + "loss": 0.6664, + "step": 1572 + }, + { + "epoch": 1.66, + "learning_rate": 4.585623678646935e-05, + "loss": 0.644, + "step": 1574 + }, + { + "epoch": 1.67, + "learning_rate": 4.5850951374207194e-05, + "loss": 0.7479, + "step": 1576 + }, + { + "epoch": 1.67, + "learning_rate": 4.584566596194503e-05, + "loss": 0.6967, + "step": 1578 + }, + { + "epoch": 1.67, + "learning_rate": 4.584038054968288e-05, + "loss": 0.4362, + "step": 1580 + }, + { + "epoch": 1.67, + "learning_rate": 4.583509513742072e-05, + "loss": 0.6123, + "step": 1582 + }, + { + "epoch": 1.67, + "learning_rate": 4.5829809725158565e-05, + "loss": 0.6258, + "step": 1584 + }, + { + "epoch": 1.68, + "learning_rate": 4.582452431289641e-05, + "loss": 0.6673, + "step": 1586 + }, + { + "epoch": 1.68, + "learning_rate": 4.581923890063426e-05, + "loss": 0.7197, + "step": 1588 + }, + { + "epoch": 1.68, + "learning_rate": 4.5813953488372096e-05, + "loss": 0.6184, + "step": 1590 + }, + { + "epoch": 1.68, + "learning_rate": 4.580866807610994e-05, + "loss": 0.4795, + "step": 1592 + }, + { + "epoch": 1.68, + "learning_rate": 4.580338266384778e-05, + "loss": 0.5485, + "step": 1594 + }, + { + "epoch": 1.69, + "learning_rate": 4.579809725158563e-05, + "loss": 0.6806, + "step": 1596 + }, + { + "epoch": 1.69, + "learning_rate": 4.579281183932347e-05, + "loss": 0.735, + "step": 1598 + }, + { + "epoch": 1.69, + "learning_rate": 4.578752642706131e-05, + "loss": 0.6628, + "step": 1600 + }, + { + "epoch": 1.69, + "learning_rate": 4.578224101479915e-05, + "loss": 0.7737, + "step": 1602 + }, + { + "epoch": 1.7, + "learning_rate": 4.5776955602537005e-05, + "loss": 0.7288, + "step": 1604 + }, + { + "epoch": 1.7, + "learning_rate": 4.5771670190274844e-05, + "loss": 0.4968, + "step": 1606 + }, + { + "epoch": 1.7, + "learning_rate": 4.576638477801269e-05, + "loss": 0.4477, + "step": 1608 + }, + { + "epoch": 1.7, + "learning_rate": 4.576109936575053e-05, + "loss": 0.5333, + "step": 1610 + }, + { + "epoch": 1.7, + "learning_rate": 4.5755813953488375e-05, + "loss": 0.4593, + "step": 1612 + }, + { + "epoch": 1.71, + "learning_rate": 4.5750528541226215e-05, + "loss": 0.516, + "step": 1614 + }, + { + "epoch": 1.71, + "learning_rate": 4.574524312896406e-05, + "loss": 0.5752, + "step": 1616 + }, + { + "epoch": 1.71, + "learning_rate": 4.57399577167019e-05, + "loss": 0.3994, + "step": 1618 + }, + { + "epoch": 1.71, + "learning_rate": 4.5734672304439746e-05, + "loss": 0.4733, + "step": 1620 + }, + { + "epoch": 1.71, + "learning_rate": 4.572938689217759e-05, + "loss": 0.5311, + "step": 1622 + }, + { + "epoch": 1.72, + "learning_rate": 4.572410147991544e-05, + "loss": 0.7522, + "step": 1624 + }, + { + "epoch": 1.72, + "learning_rate": 4.571881606765328e-05, + "loss": 0.5047, + "step": 1626 + }, + { + "epoch": 1.72, + "learning_rate": 4.5713530655391124e-05, + "loss": 0.6271, + "step": 1628 + }, + { + "epoch": 1.72, + "learning_rate": 4.570824524312897e-05, + "loss": 0.5289, + "step": 1630 + }, + { + "epoch": 1.73, + "learning_rate": 4.570295983086681e-05, + "loss": 0.5299, + "step": 1632 + }, + { + "epoch": 1.73, + "learning_rate": 4.5697674418604655e-05, + "loss": 0.6072, + "step": 1634 + }, + { + "epoch": 1.73, + "learning_rate": 4.5692389006342494e-05, + "loss": 0.5663, + "step": 1636 + }, + { + "epoch": 1.73, + "learning_rate": 4.568710359408034e-05, + "loss": 0.366, + "step": 1638 + }, + { + "epoch": 1.73, + "learning_rate": 4.5681818181818186e-05, + "loss": 0.5508, + "step": 1640 + }, + { + "epoch": 1.74, + "learning_rate": 4.567653276955603e-05, + "loss": 0.3564, + "step": 1642 + }, + { + "epoch": 1.74, + "learning_rate": 4.567124735729387e-05, + "loss": 0.8547, + "step": 1644 + }, + { + "epoch": 1.74, + "learning_rate": 4.566596194503172e-05, + "loss": 0.7649, + "step": 1646 + }, + { + "epoch": 1.74, + "learning_rate": 4.566067653276956e-05, + "loss": 0.866, + "step": 1648 + }, + { + "epoch": 1.74, + "learning_rate": 4.56553911205074e-05, + "loss": 0.3577, + "step": 1650 + }, + { + "epoch": 1.75, + "learning_rate": 4.565010570824524e-05, + "loss": 0.4708, + "step": 1652 + }, + { + "epoch": 1.75, + "learning_rate": 4.564482029598309e-05, + "loss": 0.5484, + "step": 1654 + }, + { + "epoch": 1.75, + "learning_rate": 4.563953488372093e-05, + "loss": 0.5227, + "step": 1656 + }, + { + "epoch": 1.75, + "learning_rate": 4.563424947145878e-05, + "loss": 0.4581, + "step": 1658 + }, + { + "epoch": 1.75, + "learning_rate": 4.562896405919662e-05, + "loss": 0.4865, + "step": 1660 + }, + { + "epoch": 1.76, + "learning_rate": 4.5623678646934466e-05, + "loss": 0.4035, + "step": 1662 + }, + { + "epoch": 1.76, + "learning_rate": 4.5618393234672305e-05, + "loss": 0.445, + "step": 1664 + }, + { + "epoch": 1.76, + "learning_rate": 4.561310782241015e-05, + "loss": 0.3442, + "step": 1666 + }, + { + "epoch": 1.76, + "learning_rate": 4.560782241014799e-05, + "loss": 0.5735, + "step": 1668 + }, + { + "epoch": 1.77, + "learning_rate": 4.560253699788584e-05, + "loss": 0.4627, + "step": 1670 + }, + { + "epoch": 1.77, + "learning_rate": 4.5597251585623676e-05, + "loss": 0.7131, + "step": 1672 + }, + { + "epoch": 1.77, + "learning_rate": 4.559196617336152e-05, + "loss": 0.4328, + "step": 1674 + }, + { + "epoch": 1.77, + "learning_rate": 4.558668076109937e-05, + "loss": 0.5691, + "step": 1676 + }, + { + "epoch": 1.77, + "learning_rate": 4.5581395348837214e-05, + "loss": 0.6523, + "step": 1678 + }, + { + "epoch": 1.78, + "learning_rate": 4.5576109936575053e-05, + "loss": 0.6022, + "step": 1680 + }, + { + "epoch": 1.78, + "learning_rate": 4.55708245243129e-05, + "loss": 0.7375, + "step": 1682 + }, + { + "epoch": 1.78, + "learning_rate": 4.5565539112050746e-05, + "loss": 0.6585, + "step": 1684 + }, + { + "epoch": 1.78, + "learning_rate": 4.5560253699788585e-05, + "loss": 0.8858, + "step": 1686 + }, + { + "epoch": 1.78, + "learning_rate": 4.555496828752643e-05, + "loss": 0.67, + "step": 1688 + }, + { + "epoch": 1.79, + "learning_rate": 4.554968287526427e-05, + "loss": 0.7282, + "step": 1690 + }, + { + "epoch": 1.79, + "learning_rate": 4.5544397463002116e-05, + "loss": 0.6527, + "step": 1692 + }, + { + "epoch": 1.79, + "learning_rate": 4.553911205073996e-05, + "loss": 0.4792, + "step": 1694 + }, + { + "epoch": 1.79, + "learning_rate": 4.553382663847781e-05, + "loss": 0.745, + "step": 1696 + }, + { + "epoch": 1.79, + "learning_rate": 4.552854122621565e-05, + "loss": 0.6185, + "step": 1698 + }, + { + "epoch": 1.8, + "learning_rate": 4.5523255813953494e-05, + "loss": 0.446, + "step": 1700 + }, + { + "epoch": 1.8, + "learning_rate": 4.551797040169133e-05, + "loss": 0.5001, + "step": 1702 + }, + { + "epoch": 1.8, + "learning_rate": 4.551268498942918e-05, + "loss": 0.375, + "step": 1704 + }, + { + "epoch": 1.8, + "learning_rate": 4.550739957716702e-05, + "loss": 0.3736, + "step": 1706 + }, + { + "epoch": 1.81, + "learning_rate": 4.5502114164904864e-05, + "loss": 0.5857, + "step": 1708 + }, + { + "epoch": 1.81, + "learning_rate": 4.5496828752642704e-05, + "loss": 0.4895, + "step": 1710 + }, + { + "epoch": 1.81, + "learning_rate": 4.549154334038055e-05, + "loss": 0.6071, + "step": 1712 + }, + { + "epoch": 1.81, + "learning_rate": 4.5486257928118396e-05, + "loss": 0.5467, + "step": 1714 + }, + { + "epoch": 1.81, + "learning_rate": 4.548097251585624e-05, + "loss": 0.511, + "step": 1716 + }, + { + "epoch": 1.82, + "learning_rate": 4.547568710359408e-05, + "loss": 0.8558, + "step": 1718 + }, + { + "epoch": 1.82, + "learning_rate": 4.547040169133193e-05, + "loss": 0.8073, + "step": 1720 + }, + { + "epoch": 1.82, + "learning_rate": 4.5465116279069766e-05, + "loss": 0.6995, + "step": 1722 + }, + { + "epoch": 1.82, + "learning_rate": 4.545983086680761e-05, + "loss": 0.798, + "step": 1724 + }, + { + "epoch": 1.82, + "learning_rate": 4.545454545454546e-05, + "loss": 0.4806, + "step": 1726 + }, + { + "epoch": 1.83, + "learning_rate": 4.54492600422833e-05, + "loss": 0.4374, + "step": 1728 + }, + { + "epoch": 1.83, + "learning_rate": 4.5443974630021144e-05, + "loss": 0.7523, + "step": 1730 + }, + { + "epoch": 1.83, + "learning_rate": 4.543868921775899e-05, + "loss": 0.5914, + "step": 1732 + }, + { + "epoch": 1.83, + "learning_rate": 4.543340380549683e-05, + "loss": 0.4302, + "step": 1734 + }, + { + "epoch": 1.84, + "learning_rate": 4.5428118393234675e-05, + "loss": 0.7145, + "step": 1736 + }, + { + "epoch": 1.84, + "learning_rate": 4.542283298097252e-05, + "loss": 0.3885, + "step": 1738 + }, + { + "epoch": 1.84, + "learning_rate": 4.541754756871036e-05, + "loss": 0.5144, + "step": 1740 + }, + { + "epoch": 1.84, + "learning_rate": 4.541226215644821e-05, + "loss": 0.6347, + "step": 1742 + }, + { + "epoch": 1.84, + "learning_rate": 4.5406976744186046e-05, + "loss": 0.6682, + "step": 1744 + }, + { + "epoch": 1.85, + "learning_rate": 4.540169133192389e-05, + "loss": 0.422, + "step": 1746 + }, + { + "epoch": 1.85, + "learning_rate": 4.539640591966173e-05, + "loss": 0.417, + "step": 1748 + }, + { + "epoch": 1.85, + "learning_rate": 4.5391120507399584e-05, + "loss": 0.4943, + "step": 1750 + }, + { + "epoch": 1.85, + "learning_rate": 4.5385835095137423e-05, + "loss": 0.6295, + "step": 1752 + }, + { + "epoch": 1.85, + "learning_rate": 4.538054968287527e-05, + "loss": 0.6961, + "step": 1754 + }, + { + "epoch": 1.86, + "learning_rate": 4.537526427061311e-05, + "loss": 0.6064, + "step": 1756 + }, + { + "epoch": 1.86, + "learning_rate": 4.5369978858350955e-05, + "loss": 0.5825, + "step": 1758 + }, + { + "epoch": 1.86, + "learning_rate": 4.5364693446088794e-05, + "loss": 0.5187, + "step": 1760 + }, + { + "epoch": 1.86, + "learning_rate": 4.535940803382664e-05, + "loss": 0.7391, + "step": 1762 + }, + { + "epoch": 1.86, + "learning_rate": 4.535412262156448e-05, + "loss": 0.7237, + "step": 1764 + }, + { + "epoch": 1.87, + "learning_rate": 4.5348837209302326e-05, + "loss": 0.6358, + "step": 1766 + }, + { + "epoch": 1.87, + "learning_rate": 4.534355179704017e-05, + "loss": 0.4581, + "step": 1768 + }, + { + "epoch": 1.87, + "learning_rate": 4.533826638477802e-05, + "loss": 0.3613, + "step": 1770 + }, + { + "epoch": 1.87, + "learning_rate": 4.533298097251586e-05, + "loss": 0.7856, + "step": 1772 + }, + { + "epoch": 1.88, + "learning_rate": 4.53276955602537e-05, + "loss": 0.4416, + "step": 1774 + }, + { + "epoch": 1.88, + "learning_rate": 4.532241014799154e-05, + "loss": 0.5197, + "step": 1776 + }, + { + "epoch": 1.88, + "learning_rate": 4.531712473572939e-05, + "loss": 0.536, + "step": 1778 + }, + { + "epoch": 1.88, + "learning_rate": 4.5311839323467234e-05, + "loss": 0.5705, + "step": 1780 + }, + { + "epoch": 1.88, + "learning_rate": 4.5306553911205074e-05, + "loss": 0.4337, + "step": 1782 + }, + { + "epoch": 1.89, + "learning_rate": 4.530126849894292e-05, + "loss": 0.4389, + "step": 1784 + }, + { + "epoch": 1.89, + "learning_rate": 4.5295983086680766e-05, + "loss": 0.5438, + "step": 1786 + }, + { + "epoch": 1.89, + "learning_rate": 4.5290697674418605e-05, + "loss": 0.4809, + "step": 1788 + }, + { + "epoch": 1.89, + "learning_rate": 4.528541226215645e-05, + "loss": 0.4446, + "step": 1790 + }, + { + "epoch": 1.89, + "learning_rate": 4.52801268498943e-05, + "loss": 0.7674, + "step": 1792 + }, + { + "epoch": 1.9, + "learning_rate": 4.5274841437632136e-05, + "loss": 0.4956, + "step": 1794 + }, + { + "epoch": 1.9, + "learning_rate": 4.526955602536998e-05, + "loss": 0.5586, + "step": 1796 + }, + { + "epoch": 1.9, + "learning_rate": 4.526427061310782e-05, + "loss": 0.5171, + "step": 1798 + }, + { + "epoch": 1.9, + "learning_rate": 4.525898520084567e-05, + "loss": 0.6569, + "step": 1800 + }, + { + "epoch": 1.9, + "learning_rate": 4.525369978858351e-05, + "loss": 0.4957, + "step": 1802 + }, + { + "epoch": 1.91, + "learning_rate": 4.524841437632136e-05, + "loss": 0.4242, + "step": 1804 + }, + { + "epoch": 1.91, + "learning_rate": 4.52431289640592e-05, + "loss": 0.422, + "step": 1806 + }, + { + "epoch": 1.91, + "learning_rate": 4.5237843551797045e-05, + "loss": 0.4738, + "step": 1808 + }, + { + "epoch": 1.91, + "learning_rate": 4.5232558139534885e-05, + "loss": 0.4787, + "step": 1810 + }, + { + "epoch": 1.92, + "learning_rate": 4.522727272727273e-05, + "loss": 0.4131, + "step": 1812 + }, + { + "epoch": 1.92, + "learning_rate": 4.522198731501057e-05, + "loss": 0.5565, + "step": 1814 + }, + { + "epoch": 1.92, + "learning_rate": 4.5216701902748416e-05, + "loss": 0.401, + "step": 1816 + }, + { + "epoch": 1.92, + "learning_rate": 4.5211416490486255e-05, + "loss": 0.4694, + "step": 1818 + }, + { + "epoch": 1.92, + "learning_rate": 4.52061310782241e-05, + "loss": 0.4531, + "step": 1820 + }, + { + "epoch": 1.93, + "learning_rate": 4.520084566596195e-05, + "loss": 0.5779, + "step": 1822 + }, + { + "epoch": 1.93, + "learning_rate": 4.5195560253699794e-05, + "loss": 0.5072, + "step": 1824 + }, + { + "epoch": 1.93, + "learning_rate": 4.519027484143763e-05, + "loss": 0.7621, + "step": 1826 + }, + { + "epoch": 1.93, + "learning_rate": 4.518498942917548e-05, + "loss": 0.45, + "step": 1828 + }, + { + "epoch": 1.93, + "learning_rate": 4.517970401691332e-05, + "loss": 0.663, + "step": 1830 + }, + { + "epoch": 1.94, + "learning_rate": 4.5174418604651164e-05, + "loss": 0.4742, + "step": 1832 + }, + { + "epoch": 1.94, + "learning_rate": 4.516913319238901e-05, + "loss": 0.7337, + "step": 1834 + }, + { + "epoch": 1.94, + "learning_rate": 4.516384778012685e-05, + "loss": 0.3277, + "step": 1836 + }, + { + "epoch": 1.94, + "learning_rate": 4.5158562367864696e-05, + "loss": 0.4131, + "step": 1838 + }, + { + "epoch": 1.95, + "learning_rate": 4.515327695560254e-05, + "loss": 0.5652, + "step": 1840 + }, + { + "epoch": 1.95, + "learning_rate": 4.514799154334039e-05, + "loss": 0.6251, + "step": 1842 + }, + { + "epoch": 1.95, + "learning_rate": 4.514270613107823e-05, + "loss": 0.428, + "step": 1844 + }, + { + "epoch": 1.95, + "learning_rate": 4.513742071881607e-05, + "loss": 0.3634, + "step": 1846 + }, + { + "epoch": 1.95, + "learning_rate": 4.513213530655391e-05, + "loss": 0.4997, + "step": 1848 + }, + { + "epoch": 1.96, + "learning_rate": 4.512684989429176e-05, + "loss": 0.4841, + "step": 1850 + }, + { + "epoch": 1.96, + "learning_rate": 4.51215644820296e-05, + "loss": 0.3301, + "step": 1852 + }, + { + "epoch": 1.96, + "learning_rate": 4.5116279069767444e-05, + "loss": 0.4506, + "step": 1854 + }, + { + "epoch": 1.96, + "learning_rate": 4.511099365750528e-05, + "loss": 0.4708, + "step": 1856 + }, + { + "epoch": 1.96, + "learning_rate": 4.5105708245243136e-05, + "loss": 0.613, + "step": 1858 + }, + { + "epoch": 1.97, + "learning_rate": 4.5100422832980975e-05, + "loss": 0.534, + "step": 1860 + }, + { + "epoch": 1.97, + "learning_rate": 4.509513742071882e-05, + "loss": 0.6985, + "step": 1862 + }, + { + "epoch": 1.97, + "learning_rate": 4.508985200845666e-05, + "loss": 0.6257, + "step": 1864 + }, + { + "epoch": 1.97, + "learning_rate": 4.5084566596194507e-05, + "loss": 0.61, + "step": 1866 + }, + { + "epoch": 1.97, + "learning_rate": 4.5079281183932346e-05, + "loss": 0.4835, + "step": 1868 + }, + { + "epoch": 1.98, + "learning_rate": 4.507399577167019e-05, + "loss": 0.6178, + "step": 1870 + }, + { + "epoch": 1.98, + "learning_rate": 4.506871035940803e-05, + "loss": 0.4642, + "step": 1872 + }, + { + "epoch": 1.98, + "learning_rate": 4.506342494714588e-05, + "loss": 0.616, + "step": 1874 + }, + { + "epoch": 1.98, + "learning_rate": 4.505813953488372e-05, + "loss": 0.6076, + "step": 1876 + }, + { + "epoch": 1.99, + "learning_rate": 4.505285412262157e-05, + "loss": 0.5103, + "step": 1878 + }, + { + "epoch": 1.99, + "learning_rate": 4.504756871035941e-05, + "loss": 0.5121, + "step": 1880 + }, + { + "epoch": 1.99, + "learning_rate": 4.5042283298097255e-05, + "loss": 0.3769, + "step": 1882 + }, + { + "epoch": 1.99, + "learning_rate": 4.5036997885835094e-05, + "loss": 0.3962, + "step": 1884 + }, + { + "epoch": 1.99, + "learning_rate": 4.503171247357294e-05, + "loss": 0.3739, + "step": 1886 + }, + { + "epoch": 2.0, + "learning_rate": 4.5026427061310786e-05, + "loss": 0.3313, + "step": 1888 + }, + { + "epoch": 2.0, + "learning_rate": 4.5021141649048625e-05, + "loss": 0.463, + "step": 1890 + }, + { + "epoch": 2.0, + "learning_rate": 4.501585623678647e-05, + "loss": 0.5002, + "step": 1892 + }, + { + "epoch": 2.0, + "learning_rate": 4.501057082452432e-05, + "loss": 0.3315, + "step": 1894 + }, + { + "epoch": 2.0, + "learning_rate": 4.5005285412262164e-05, + "loss": 0.339, + "step": 1896 + }, + { + "epoch": 2.01, + "learning_rate": 4.5e-05, + "loss": 0.468, + "step": 1898 + }, + { + "epoch": 2.01, + "learning_rate": 4.499471458773785e-05, + "loss": 0.4289, + "step": 1900 + }, + { + "epoch": 2.01, + "learning_rate": 4.498942917547569e-05, + "loss": 0.4292, + "step": 1902 + }, + { + "epoch": 2.01, + "learning_rate": 4.4984143763213534e-05, + "loss": 0.609, + "step": 1904 + }, + { + "epoch": 2.01, + "learning_rate": 4.4978858350951374e-05, + "loss": 0.4683, + "step": 1906 + }, + { + "epoch": 2.02, + "learning_rate": 4.497357293868922e-05, + "loss": 0.4534, + "step": 1908 + }, + { + "epoch": 2.02, + "learning_rate": 4.496828752642706e-05, + "loss": 0.5481, + "step": 1910 + }, + { + "epoch": 2.02, + "learning_rate": 4.496300211416491e-05, + "loss": 0.4727, + "step": 1912 + }, + { + "epoch": 2.02, + "learning_rate": 4.495771670190275e-05, + "loss": 0.4213, + "step": 1914 + }, + { + "epoch": 2.03, + "learning_rate": 4.49524312896406e-05, + "loss": 0.4907, + "step": 1916 + }, + { + "epoch": 2.03, + "learning_rate": 4.4947145877378436e-05, + "loss": 0.5339, + "step": 1918 + }, + { + "epoch": 2.03, + "learning_rate": 4.494186046511628e-05, + "loss": 0.4394, + "step": 1920 + }, + { + "epoch": 2.03, + "learning_rate": 4.493657505285412e-05, + "loss": 0.5539, + "step": 1922 + }, + { + "epoch": 2.03, + "learning_rate": 4.493128964059197e-05, + "loss": 0.4241, + "step": 1924 + }, + { + "epoch": 2.04, + "learning_rate": 4.492600422832981e-05, + "loss": 0.4557, + "step": 1926 + }, + { + "epoch": 2.04, + "learning_rate": 4.492071881606765e-05, + "loss": 0.4038, + "step": 1928 + }, + { + "epoch": 2.04, + "learning_rate": 4.49154334038055e-05, + "loss": 0.4505, + "step": 1930 + }, + { + "epoch": 2.04, + "learning_rate": 4.4910147991543345e-05, + "loss": 0.4646, + "step": 1932 + }, + { + "epoch": 2.04, + "learning_rate": 4.4904862579281184e-05, + "loss": 0.401, + "step": 1934 + }, + { + "epoch": 2.05, + "learning_rate": 4.489957716701903e-05, + "loss": 0.3632, + "step": 1936 + }, + { + "epoch": 2.05, + "learning_rate": 4.489429175475687e-05, + "loss": 0.3835, + "step": 1938 + }, + { + "epoch": 2.05, + "learning_rate": 4.4889006342494716e-05, + "loss": 0.5795, + "step": 1940 + }, + { + "epoch": 2.05, + "learning_rate": 4.488372093023256e-05, + "loss": 0.4528, + "step": 1942 + }, + { + "epoch": 2.05, + "learning_rate": 4.48784355179704e-05, + "loss": 0.4788, + "step": 1944 + }, + { + "epoch": 2.06, + "learning_rate": 4.487315010570825e-05, + "loss": 0.3118, + "step": 1946 + }, + { + "epoch": 2.06, + "learning_rate": 4.486786469344609e-05, + "loss": 0.548, + "step": 1948 + }, + { + "epoch": 2.06, + "learning_rate": 4.486257928118394e-05, + "loss": 0.5891, + "step": 1950 + }, + { + "epoch": 2.06, + "learning_rate": 4.485729386892178e-05, + "loss": 0.5405, + "step": 1952 + }, + { + "epoch": 2.07, + "learning_rate": 4.4852008456659625e-05, + "loss": 0.3774, + "step": 1954 + }, + { + "epoch": 2.07, + "learning_rate": 4.4846723044397464e-05, + "loss": 0.6053, + "step": 1956 + }, + { + "epoch": 2.07, + "learning_rate": 4.484143763213531e-05, + "loss": 0.557, + "step": 1958 + }, + { + "epoch": 2.07, + "learning_rate": 4.483615221987315e-05, + "loss": 0.3202, + "step": 1960 + }, + { + "epoch": 2.07, + "learning_rate": 4.4830866807610995e-05, + "loss": 0.4831, + "step": 1962 + }, + { + "epoch": 2.08, + "learning_rate": 4.4825581395348835e-05, + "loss": 0.3305, + "step": 1964 + }, + { + "epoch": 2.08, + "learning_rate": 4.482029598308669e-05, + "loss": 0.2836, + "step": 1966 + }, + { + "epoch": 2.08, + "learning_rate": 4.481501057082453e-05, + "loss": 0.3986, + "step": 1968 + }, + { + "epoch": 2.08, + "learning_rate": 4.480972515856237e-05, + "loss": 0.3521, + "step": 1970 + }, + { + "epoch": 2.08, + "learning_rate": 4.480443974630021e-05, + "loss": 0.5026, + "step": 1972 + }, + { + "epoch": 2.09, + "learning_rate": 4.479915433403806e-05, + "loss": 0.287, + "step": 1974 + }, + { + "epoch": 2.09, + "learning_rate": 4.47938689217759e-05, + "loss": 0.3817, + "step": 1976 + }, + { + "epoch": 2.09, + "learning_rate": 4.4788583509513744e-05, + "loss": 0.4855, + "step": 1978 + }, + { + "epoch": 2.09, + "learning_rate": 4.478329809725158e-05, + "loss": 0.4728, + "step": 1980 + }, + { + "epoch": 2.1, + "learning_rate": 4.477801268498943e-05, + "loss": 0.3979, + "step": 1982 + }, + { + "epoch": 2.1, + "learning_rate": 4.4772727272727275e-05, + "loss": 0.3166, + "step": 1984 + }, + { + "epoch": 2.1, + "learning_rate": 4.476744186046512e-05, + "loss": 0.5586, + "step": 1986 + }, + { + "epoch": 2.1, + "learning_rate": 4.476215644820296e-05, + "loss": 0.5657, + "step": 1988 + }, + { + "epoch": 2.1, + "learning_rate": 4.4756871035940806e-05, + "loss": 0.4058, + "step": 1990 + }, + { + "epoch": 2.11, + "learning_rate": 4.475158562367865e-05, + "loss": 0.33, + "step": 1992 + }, + { + "epoch": 2.11, + "learning_rate": 4.474630021141649e-05, + "loss": 0.3553, + "step": 1994 + }, + { + "epoch": 2.11, + "learning_rate": 4.474101479915434e-05, + "loss": 0.3877, + "step": 1996 + }, + { + "epoch": 2.11, + "learning_rate": 4.473572938689218e-05, + "loss": 0.481, + "step": 1998 + }, + { + "epoch": 2.11, + "learning_rate": 4.473044397463002e-05, + "loss": 0.4098, + "step": 2000 + }, + { + "epoch": 2.11, + "eval_cer": 0.054830436021658595, + "eval_loss": 0.4985881447792053, + "eval_runtime": 132.0888, + "eval_samples_per_second": 6.367, + "eval_steps_per_second": 0.802, + "step": 2000 + }, + { + "epoch": 2.12, + "learning_rate": 4.472515856236787e-05, + "loss": 0.482, + "step": 2002 + }, + { + "epoch": 2.12, + "learning_rate": 4.4719873150105715e-05, + "loss": 0.3529, + "step": 2004 + }, + { + "epoch": 2.12, + "learning_rate": 4.4714587737843555e-05, + "loss": 0.4144, + "step": 2006 + }, + { + "epoch": 2.12, + "learning_rate": 4.47093023255814e-05, + "loss": 0.2902, + "step": 2008 + }, + { + "epoch": 2.12, + "learning_rate": 4.470401691331924e-05, + "loss": 0.4057, + "step": 2010 + }, + { + "epoch": 2.13, + "learning_rate": 4.4698731501057086e-05, + "loss": 0.4743, + "step": 2012 + }, + { + "epoch": 2.13, + "learning_rate": 4.4693446088794925e-05, + "loss": 0.3409, + "step": 2014 + }, + { + "epoch": 2.13, + "learning_rate": 4.468816067653277e-05, + "loss": 0.4337, + "step": 2016 + }, + { + "epoch": 2.13, + "learning_rate": 4.468287526427061e-05, + "loss": 0.5333, + "step": 2018 + }, + { + "epoch": 2.14, + "learning_rate": 4.467758985200846e-05, + "loss": 0.2719, + "step": 2020 + }, + { + "epoch": 2.14, + "learning_rate": 4.46723044397463e-05, + "loss": 0.4625, + "step": 2022 + }, + { + "epoch": 2.14, + "learning_rate": 4.466701902748415e-05, + "loss": 0.2435, + "step": 2024 + }, + { + "epoch": 2.14, + "learning_rate": 4.466173361522199e-05, + "loss": 0.4023, + "step": 2026 + }, + { + "epoch": 2.14, + "learning_rate": 4.4656448202959834e-05, + "loss": 0.4343, + "step": 2028 + }, + { + "epoch": 2.15, + "learning_rate": 4.465116279069767e-05, + "loss": 0.3526, + "step": 2030 + }, + { + "epoch": 2.15, + "learning_rate": 4.464587737843552e-05, + "loss": 0.4709, + "step": 2032 + }, + { + "epoch": 2.15, + "learning_rate": 4.464059196617336e-05, + "loss": 0.5203, + "step": 2034 + }, + { + "epoch": 2.15, + "learning_rate": 4.4635306553911205e-05, + "loss": 0.7966, + "step": 2036 + }, + { + "epoch": 2.15, + "learning_rate": 4.463002114164905e-05, + "loss": 0.5275, + "step": 2038 + }, + { + "epoch": 2.16, + "learning_rate": 4.46247357293869e-05, + "loss": 0.4834, + "step": 2040 + }, + { + "epoch": 2.16, + "learning_rate": 4.4619450317124736e-05, + "loss": 0.6763, + "step": 2042 + }, + { + "epoch": 2.16, + "learning_rate": 4.461416490486258e-05, + "loss": 0.5105, + "step": 2044 + }, + { + "epoch": 2.16, + "learning_rate": 4.460887949260043e-05, + "loss": 0.3415, + "step": 2046 + }, + { + "epoch": 2.16, + "learning_rate": 4.460359408033827e-05, + "loss": 0.5119, + "step": 2048 + }, + { + "epoch": 2.17, + "learning_rate": 4.4598308668076114e-05, + "loss": 0.3535, + "step": 2050 + }, + { + "epoch": 2.17, + "learning_rate": 4.459302325581395e-05, + "loss": 0.3661, + "step": 2052 + }, + { + "epoch": 2.17, + "learning_rate": 4.45877378435518e-05, + "loss": 0.4584, + "step": 2054 + }, + { + "epoch": 2.17, + "learning_rate": 4.4582452431289645e-05, + "loss": 0.5095, + "step": 2056 + }, + { + "epoch": 2.18, + "learning_rate": 4.457716701902749e-05, + "loss": 0.7405, + "step": 2058 + }, + { + "epoch": 2.18, + "learning_rate": 4.457188160676533e-05, + "loss": 0.412, + "step": 2060 + }, + { + "epoch": 2.18, + "learning_rate": 4.4566596194503176e-05, + "loss": 0.3918, + "step": 2062 + }, + { + "epoch": 2.18, + "learning_rate": 4.4561310782241016e-05, + "loss": 0.4374, + "step": 2064 + }, + { + "epoch": 2.18, + "learning_rate": 4.455602536997886e-05, + "loss": 0.2783, + "step": 2066 + }, + { + "epoch": 2.19, + "learning_rate": 4.45507399577167e-05, + "loss": 0.5863, + "step": 2068 + }, + { + "epoch": 2.19, + "learning_rate": 4.454545454545455e-05, + "loss": 0.3459, + "step": 2070 + }, + { + "epoch": 2.19, + "learning_rate": 4.4540169133192386e-05, + "loss": 0.3781, + "step": 2072 + }, + { + "epoch": 2.19, + "learning_rate": 4.453488372093024e-05, + "loss": 0.2417, + "step": 2074 + }, + { + "epoch": 2.19, + "learning_rate": 4.452959830866808e-05, + "loss": 0.3893, + "step": 2076 + }, + { + "epoch": 2.2, + "learning_rate": 4.4524312896405925e-05, + "loss": 0.4426, + "step": 2078 + }, + { + "epoch": 2.2, + "learning_rate": 4.4519027484143764e-05, + "loss": 0.372, + "step": 2080 + }, + { + "epoch": 2.2, + "learning_rate": 4.451374207188161e-05, + "loss": 0.5169, + "step": 2082 + }, + { + "epoch": 2.2, + "learning_rate": 4.450845665961945e-05, + "loss": 0.484, + "step": 2084 + }, + { + "epoch": 2.21, + "learning_rate": 4.4503171247357295e-05, + "loss": 0.3746, + "step": 2086 + }, + { + "epoch": 2.21, + "learning_rate": 4.4497885835095135e-05, + "loss": 0.3725, + "step": 2088 + }, + { + "epoch": 2.21, + "learning_rate": 4.449260042283298e-05, + "loss": 0.4112, + "step": 2090 + }, + { + "epoch": 2.21, + "learning_rate": 4.448731501057083e-05, + "loss": 0.3573, + "step": 2092 + }, + { + "epoch": 2.21, + "learning_rate": 4.448202959830867e-05, + "loss": 0.6657, + "step": 2094 + }, + { + "epoch": 2.22, + "learning_rate": 4.447674418604651e-05, + "loss": 0.4883, + "step": 2096 + }, + { + "epoch": 2.22, + "learning_rate": 4.447145877378436e-05, + "loss": 0.4407, + "step": 2098 + }, + { + "epoch": 2.22, + "learning_rate": 4.4466173361522204e-05, + "loss": 0.4828, + "step": 2100 + }, + { + "epoch": 2.22, + "learning_rate": 4.4460887949260043e-05, + "loss": 0.4325, + "step": 2102 + }, + { + "epoch": 2.22, + "learning_rate": 4.445560253699789e-05, + "loss": 0.4517, + "step": 2104 + }, + { + "epoch": 2.23, + "learning_rate": 4.445031712473573e-05, + "loss": 0.3728, + "step": 2106 + }, + { + "epoch": 2.23, + "learning_rate": 4.4445031712473575e-05, + "loss": 0.4571, + "step": 2108 + }, + { + "epoch": 2.23, + "learning_rate": 4.443974630021142e-05, + "loss": 0.5384, + "step": 2110 + }, + { + "epoch": 2.23, + "learning_rate": 4.443446088794927e-05, + "loss": 0.7312, + "step": 2112 + }, + { + "epoch": 2.23, + "learning_rate": 4.4429175475687106e-05, + "loss": 0.4921, + "step": 2114 + }, + { + "epoch": 2.24, + "learning_rate": 4.442389006342495e-05, + "loss": 0.3399, + "step": 2116 + }, + { + "epoch": 2.24, + "learning_rate": 4.441860465116279e-05, + "loss": 0.4376, + "step": 2118 + }, + { + "epoch": 2.24, + "learning_rate": 4.441331923890064e-05, + "loss": 0.3879, + "step": 2120 + }, + { + "epoch": 2.24, + "learning_rate": 4.440803382663848e-05, + "loss": 0.272, + "step": 2122 + }, + { + "epoch": 2.25, + "learning_rate": 4.440274841437632e-05, + "loss": 0.5572, + "step": 2124 + }, + { + "epoch": 2.25, + "learning_rate": 4.439746300211416e-05, + "loss": 0.4251, + "step": 2126 + }, + { + "epoch": 2.25, + "learning_rate": 4.4392177589852015e-05, + "loss": 0.4728, + "step": 2128 + }, + { + "epoch": 2.25, + "learning_rate": 4.4386892177589854e-05, + "loss": 0.6148, + "step": 2130 + }, + { + "epoch": 2.25, + "learning_rate": 4.43816067653277e-05, + "loss": 0.4623, + "step": 2132 + }, + { + "epoch": 2.26, + "learning_rate": 4.437632135306554e-05, + "loss": 0.4297, + "step": 2134 + }, + { + "epoch": 2.26, + "learning_rate": 4.4371035940803386e-05, + "loss": 0.2851, + "step": 2136 + }, + { + "epoch": 2.26, + "learning_rate": 4.4365750528541225e-05, + "loss": 0.4021, + "step": 2138 + }, + { + "epoch": 2.26, + "learning_rate": 4.436046511627907e-05, + "loss": 0.3805, + "step": 2140 + }, + { + "epoch": 2.26, + "learning_rate": 4.435517970401691e-05, + "loss": 0.3594, + "step": 2142 + }, + { + "epoch": 2.27, + "learning_rate": 4.4349894291754756e-05, + "loss": 0.2067, + "step": 2144 + }, + { + "epoch": 2.27, + "learning_rate": 4.43446088794926e-05, + "loss": 0.4969, + "step": 2146 + }, + { + "epoch": 2.27, + "learning_rate": 4.433932346723045e-05, + "loss": 0.3155, + "step": 2148 + }, + { + "epoch": 2.27, + "learning_rate": 4.433403805496829e-05, + "loss": 0.4758, + "step": 2150 + }, + { + "epoch": 2.27, + "learning_rate": 4.4328752642706134e-05, + "loss": 0.2722, + "step": 2152 + }, + { + "epoch": 2.28, + "learning_rate": 4.432346723044398e-05, + "loss": 0.5713, + "step": 2154 + }, + { + "epoch": 2.28, + "learning_rate": 4.431818181818182e-05, + "loss": 0.4699, + "step": 2156 + }, + { + "epoch": 2.28, + "learning_rate": 4.4312896405919665e-05, + "loss": 0.4833, + "step": 2158 + }, + { + "epoch": 2.28, + "learning_rate": 4.4307610993657505e-05, + "loss": 0.2738, + "step": 2160 + }, + { + "epoch": 2.29, + "learning_rate": 4.430232558139535e-05, + "loss": 0.5175, + "step": 2162 + }, + { + "epoch": 2.29, + "learning_rate": 4.42970401691332e-05, + "loss": 0.3853, + "step": 2164 + }, + { + "epoch": 2.29, + "learning_rate": 4.429175475687104e-05, + "loss": 0.519, + "step": 2166 + }, + { + "epoch": 2.29, + "learning_rate": 4.428646934460888e-05, + "loss": 0.4999, + "step": 2168 + }, + { + "epoch": 2.29, + "learning_rate": 4.428118393234673e-05, + "loss": 0.2495, + "step": 2170 + }, + { + "epoch": 2.3, + "learning_rate": 4.427589852008457e-05, + "loss": 0.4331, + "step": 2172 + }, + { + "epoch": 2.3, + "learning_rate": 4.4270613107822413e-05, + "loss": 0.4553, + "step": 2174 + }, + { + "epoch": 2.3, + "learning_rate": 4.426532769556025e-05, + "loss": 0.4201, + "step": 2176 + }, + { + "epoch": 2.3, + "learning_rate": 4.42600422832981e-05, + "loss": 0.567, + "step": 2178 + }, + { + "epoch": 2.3, + "learning_rate": 4.425475687103594e-05, + "loss": 0.2782, + "step": 2180 + }, + { + "epoch": 2.31, + "learning_rate": 4.424947145877379e-05, + "loss": 0.4802, + "step": 2182 + }, + { + "epoch": 2.31, + "learning_rate": 4.424418604651163e-05, + "loss": 0.3771, + "step": 2184 + }, + { + "epoch": 2.31, + "learning_rate": 4.4238900634249476e-05, + "loss": 0.2445, + "step": 2186 + }, + { + "epoch": 2.31, + "learning_rate": 4.4233615221987316e-05, + "loss": 0.4321, + "step": 2188 + }, + { + "epoch": 2.32, + "learning_rate": 4.422832980972516e-05, + "loss": 0.3165, + "step": 2190 + }, + { + "epoch": 2.32, + "learning_rate": 4.4223044397463e-05, + "loss": 0.4181, + "step": 2192 + }, + { + "epoch": 2.32, + "learning_rate": 4.421775898520085e-05, + "loss": 0.3771, + "step": 2194 + }, + { + "epoch": 2.32, + "learning_rate": 4.421247357293869e-05, + "loss": 0.4594, + "step": 2196 + }, + { + "epoch": 2.32, + "learning_rate": 4.420718816067653e-05, + "loss": 0.357, + "step": 2198 + }, + { + "epoch": 2.33, + "learning_rate": 4.420190274841438e-05, + "loss": 0.3837, + "step": 2200 + }, + { + "epoch": 2.33, + "learning_rate": 4.4196617336152224e-05, + "loss": 0.3, + "step": 2202 + }, + { + "epoch": 2.33, + "learning_rate": 4.4191331923890064e-05, + "loss": 0.5577, + "step": 2204 + }, + { + "epoch": 2.33, + "learning_rate": 4.418604651162791e-05, + "loss": 0.7798, + "step": 2206 + }, + { + "epoch": 2.33, + "learning_rate": 4.4180761099365756e-05, + "loss": 0.3581, + "step": 2208 + }, + { + "epoch": 2.34, + "learning_rate": 4.4175475687103595e-05, + "loss": 0.5661, + "step": 2210 + }, + { + "epoch": 2.34, + "learning_rate": 4.417019027484144e-05, + "loss": 0.4161, + "step": 2212 + }, + { + "epoch": 2.34, + "learning_rate": 4.416490486257928e-05, + "loss": 0.3675, + "step": 2214 + }, + { + "epoch": 2.34, + "learning_rate": 4.4159619450317126e-05, + "loss": 0.5372, + "step": 2216 + }, + { + "epoch": 2.34, + "learning_rate": 4.415433403805497e-05, + "loss": 0.5398, + "step": 2218 + }, + { + "epoch": 2.35, + "learning_rate": 4.414904862579282e-05, + "loss": 0.3739, + "step": 2220 + }, + { + "epoch": 2.35, + "learning_rate": 4.414376321353066e-05, + "loss": 0.5355, + "step": 2222 + }, + { + "epoch": 2.35, + "learning_rate": 4.4138477801268504e-05, + "loss": 0.5392, + "step": 2224 + }, + { + "epoch": 2.35, + "learning_rate": 4.413319238900634e-05, + "loss": 0.4832, + "step": 2226 + }, + { + "epoch": 2.36, + "learning_rate": 4.412790697674419e-05, + "loss": 0.4194, + "step": 2228 + }, + { + "epoch": 2.36, + "learning_rate": 4.412262156448203e-05, + "loss": 0.5088, + "step": 2230 + }, + { + "epoch": 2.36, + "learning_rate": 4.4117336152219875e-05, + "loss": 0.401, + "step": 2232 + }, + { + "epoch": 2.36, + "learning_rate": 4.4112050739957714e-05, + "loss": 0.5165, + "step": 2234 + }, + { + "epoch": 2.36, + "learning_rate": 4.410676532769557e-05, + "loss": 0.4715, + "step": 2236 + }, + { + "epoch": 2.37, + "learning_rate": 4.4101479915433406e-05, + "loss": 0.3201, + "step": 2238 + }, + { + "epoch": 2.37, + "learning_rate": 4.409619450317125e-05, + "loss": 0.4222, + "step": 2240 + }, + { + "epoch": 2.37, + "learning_rate": 4.409090909090909e-05, + "loss": 0.3206, + "step": 2242 + }, + { + "epoch": 2.37, + "learning_rate": 4.408562367864694e-05, + "loss": 0.3141, + "step": 2244 + }, + { + "epoch": 2.37, + "learning_rate": 4.408033826638478e-05, + "loss": 0.5056, + "step": 2246 + }, + { + "epoch": 2.38, + "learning_rate": 4.407505285412262e-05, + "loss": 0.4578, + "step": 2248 + }, + { + "epoch": 2.38, + "learning_rate": 4.406976744186047e-05, + "loss": 0.3295, + "step": 2250 + }, + { + "epoch": 2.38, + "learning_rate": 4.406448202959831e-05, + "loss": 0.3376, + "step": 2252 + }, + { + "epoch": 2.38, + "learning_rate": 4.4059196617336154e-05, + "loss": 0.4404, + "step": 2254 + }, + { + "epoch": 2.38, + "learning_rate": 4.4053911205074e-05, + "loss": 0.3081, + "step": 2256 + }, + { + "epoch": 2.39, + "learning_rate": 4.404862579281184e-05, + "loss": 0.2706, + "step": 2258 + }, + { + "epoch": 2.39, + "learning_rate": 4.4043340380549686e-05, + "loss": 0.5501, + "step": 2260 + }, + { + "epoch": 2.39, + "learning_rate": 4.403805496828753e-05, + "loss": 0.3977, + "step": 2262 + }, + { + "epoch": 2.39, + "learning_rate": 4.403276955602537e-05, + "loss": 0.3854, + "step": 2264 + }, + { + "epoch": 2.4, + "learning_rate": 4.402748414376322e-05, + "loss": 0.5107, + "step": 2266 + }, + { + "epoch": 2.4, + "learning_rate": 4.4022198731501056e-05, + "loss": 0.3551, + "step": 2268 + }, + { + "epoch": 2.4, + "learning_rate": 4.40169133192389e-05, + "loss": 0.376, + "step": 2270 + }, + { + "epoch": 2.4, + "learning_rate": 4.401162790697675e-05, + "loss": 0.3767, + "step": 2272 + }, + { + "epoch": 2.4, + "learning_rate": 4.4006342494714594e-05, + "loss": 0.3593, + "step": 2274 + }, + { + "epoch": 2.41, + "learning_rate": 4.4001057082452434e-05, + "loss": 0.5396, + "step": 2276 + }, + { + "epoch": 2.41, + "learning_rate": 4.399577167019028e-05, + "loss": 0.4324, + "step": 2278 + }, + { + "epoch": 2.41, + "learning_rate": 4.399048625792812e-05, + "loss": 0.0967, + "step": 2280 + }, + { + "epoch": 2.41, + "learning_rate": 4.3985200845665965e-05, + "loss": 0.4564, + "step": 2282 + }, + { + "epoch": 2.41, + "learning_rate": 4.3979915433403804e-05, + "loss": 0.3536, + "step": 2284 + }, + { + "epoch": 2.42, + "learning_rate": 4.397463002114165e-05, + "loss": 0.5021, + "step": 2286 + }, + { + "epoch": 2.42, + "learning_rate": 4.396934460887949e-05, + "loss": 0.555, + "step": 2288 + }, + { + "epoch": 2.42, + "learning_rate": 4.396405919661734e-05, + "loss": 0.5308, + "step": 2290 + }, + { + "epoch": 2.42, + "learning_rate": 4.395877378435518e-05, + "loss": 0.4026, + "step": 2292 + }, + { + "epoch": 2.42, + "learning_rate": 4.395348837209303e-05, + "loss": 0.5569, + "step": 2294 + }, + { + "epoch": 2.43, + "learning_rate": 4.394820295983087e-05, + "loss": 0.5554, + "step": 2296 + }, + { + "epoch": 2.43, + "learning_rate": 4.394291754756871e-05, + "loss": 0.5833, + "step": 2298 + }, + { + "epoch": 2.43, + "learning_rate": 4.393763213530655e-05, + "loss": 0.5731, + "step": 2300 + }, + { + "epoch": 2.43, + "learning_rate": 4.39323467230444e-05, + "loss": 0.2924, + "step": 2302 + }, + { + "epoch": 2.44, + "learning_rate": 4.3927061310782245e-05, + "loss": 0.3258, + "step": 2304 + }, + { + "epoch": 2.44, + "learning_rate": 4.3921775898520084e-05, + "loss": 0.4652, + "step": 2306 + }, + { + "epoch": 2.44, + "learning_rate": 4.391649048625793e-05, + "loss": 0.4098, + "step": 2308 + }, + { + "epoch": 2.44, + "learning_rate": 4.3911205073995776e-05, + "loss": 0.4637, + "step": 2310 + }, + { + "epoch": 2.44, + "learning_rate": 4.390591966173362e-05, + "loss": 0.5208, + "step": 2312 + }, + { + "epoch": 2.45, + "learning_rate": 4.390063424947146e-05, + "loss": 0.3701, + "step": 2314 + }, + { + "epoch": 2.45, + "learning_rate": 4.389534883720931e-05, + "loss": 0.3613, + "step": 2316 + }, + { + "epoch": 2.45, + "learning_rate": 4.389006342494715e-05, + "loss": 0.5958, + "step": 2318 + }, + { + "epoch": 2.45, + "learning_rate": 4.388477801268499e-05, + "loss": 0.3932, + "step": 2320 + }, + { + "epoch": 2.45, + "learning_rate": 4.387949260042283e-05, + "loss": 0.5768, + "step": 2322 + }, + { + "epoch": 2.46, + "learning_rate": 4.387420718816068e-05, + "loss": 0.6017, + "step": 2324 + }, + { + "epoch": 2.46, + "learning_rate": 4.3868921775898524e-05, + "loss": 0.4225, + "step": 2326 + }, + { + "epoch": 2.46, + "learning_rate": 4.386363636363637e-05, + "loss": 0.5372, + "step": 2328 + }, + { + "epoch": 2.46, + "learning_rate": 4.385835095137421e-05, + "loss": 0.4131, + "step": 2330 + }, + { + "epoch": 2.47, + "learning_rate": 4.3853065539112056e-05, + "loss": 0.5359, + "step": 2332 + }, + { + "epoch": 2.47, + "learning_rate": 4.3847780126849895e-05, + "loss": 0.2422, + "step": 2334 + }, + { + "epoch": 2.47, + "learning_rate": 4.384249471458774e-05, + "loss": 0.3424, + "step": 2336 + }, + { + "epoch": 2.47, + "learning_rate": 4.383720930232558e-05, + "loss": 0.4612, + "step": 2338 + }, + { + "epoch": 2.47, + "learning_rate": 4.3831923890063426e-05, + "loss": 0.3877, + "step": 2340 + }, + { + "epoch": 2.48, + "learning_rate": 4.3826638477801266e-05, + "loss": 0.7339, + "step": 2342 + }, + { + "epoch": 2.48, + "learning_rate": 4.382135306553912e-05, + "loss": 0.4984, + "step": 2344 + }, + { + "epoch": 2.48, + "learning_rate": 4.381606765327696e-05, + "loss": 0.6876, + "step": 2346 + }, + { + "epoch": 2.48, + "learning_rate": 4.3810782241014804e-05, + "loss": 0.4758, + "step": 2348 + }, + { + "epoch": 2.48, + "learning_rate": 4.380549682875264e-05, + "loss": 0.324, + "step": 2350 + }, + { + "epoch": 2.49, + "learning_rate": 4.380021141649049e-05, + "loss": 0.4365, + "step": 2352 + }, + { + "epoch": 2.49, + "learning_rate": 4.379492600422833e-05, + "loss": 0.3447, + "step": 2354 + }, + { + "epoch": 2.49, + "learning_rate": 4.3789640591966174e-05, + "loss": 0.3975, + "step": 2356 + }, + { + "epoch": 2.49, + "learning_rate": 4.378435517970402e-05, + "loss": 0.3833, + "step": 2358 + }, + { + "epoch": 2.49, + "learning_rate": 4.377906976744186e-05, + "loss": 0.386, + "step": 2360 + }, + { + "epoch": 2.5, + "learning_rate": 4.3773784355179706e-05, + "loss": 0.5437, + "step": 2362 + }, + { + "epoch": 2.5, + "learning_rate": 4.376849894291755e-05, + "loss": 0.4142, + "step": 2364 + }, + { + "epoch": 2.5, + "learning_rate": 4.37632135306554e-05, + "loss": 0.4588, + "step": 2366 + }, + { + "epoch": 2.5, + "learning_rate": 4.375792811839324e-05, + "loss": 0.3383, + "step": 2368 + }, + { + "epoch": 2.51, + "learning_rate": 4.375264270613108e-05, + "loss": 0.3192, + "step": 2370 + }, + { + "epoch": 2.51, + "learning_rate": 4.374735729386892e-05, + "loss": 0.5349, + "step": 2372 + }, + { + "epoch": 2.51, + "learning_rate": 4.374207188160677e-05, + "loss": 0.2711, + "step": 2374 + }, + { + "epoch": 2.51, + "learning_rate": 4.373678646934461e-05, + "loss": 0.3692, + "step": 2376 + }, + { + "epoch": 2.51, + "learning_rate": 4.3731501057082454e-05, + "loss": 0.2674, + "step": 2378 + }, + { + "epoch": 2.52, + "learning_rate": 4.372621564482029e-05, + "loss": 0.3274, + "step": 2380 + }, + { + "epoch": 2.52, + "learning_rate": 4.3720930232558146e-05, + "loss": 0.4498, + "step": 2382 + }, + { + "epoch": 2.52, + "learning_rate": 4.3715644820295985e-05, + "loss": 0.4294, + "step": 2384 + }, + { + "epoch": 2.52, + "learning_rate": 4.371035940803383e-05, + "loss": 0.4257, + "step": 2386 + }, + { + "epoch": 2.52, + "learning_rate": 4.370507399577167e-05, + "loss": 0.4678, + "step": 2388 + }, + { + "epoch": 2.53, + "learning_rate": 4.369978858350952e-05, + "loss": 0.3635, + "step": 2390 + }, + { + "epoch": 2.53, + "learning_rate": 4.3694503171247356e-05, + "loss": 0.5947, + "step": 2392 + }, + { + "epoch": 2.53, + "learning_rate": 4.36892177589852e-05, + "loss": 0.4308, + "step": 2394 + }, + { + "epoch": 2.53, + "learning_rate": 4.368393234672304e-05, + "loss": 0.3585, + "step": 2396 + }, + { + "epoch": 2.53, + "learning_rate": 4.367864693446089e-05, + "loss": 0.844, + "step": 2398 + }, + { + "epoch": 2.54, + "learning_rate": 4.3673361522198734e-05, + "loss": 0.4748, + "step": 2400 + }, + { + "epoch": 2.54, + "learning_rate": 4.366807610993658e-05, + "loss": 0.3876, + "step": 2402 + }, + { + "epoch": 2.54, + "learning_rate": 4.366279069767442e-05, + "loss": 0.4596, + "step": 2404 + }, + { + "epoch": 2.54, + "learning_rate": 4.3657505285412265e-05, + "loss": 0.2919, + "step": 2406 + }, + { + "epoch": 2.55, + "learning_rate": 4.3652219873150104e-05, + "loss": 0.4279, + "step": 2408 + }, + { + "epoch": 2.55, + "learning_rate": 4.364693446088795e-05, + "loss": 0.3172, + "step": 2410 + }, + { + "epoch": 2.55, + "learning_rate": 4.3641649048625796e-05, + "loss": 0.3116, + "step": 2412 + }, + { + "epoch": 2.55, + "learning_rate": 4.3636363636363636e-05, + "loss": 0.4824, + "step": 2414 + }, + { + "epoch": 2.55, + "learning_rate": 4.363107822410148e-05, + "loss": 0.3576, + "step": 2416 + }, + { + "epoch": 2.56, + "learning_rate": 4.362579281183933e-05, + "loss": 0.28, + "step": 2418 + }, + { + "epoch": 2.56, + "learning_rate": 4.3620507399577174e-05, + "loss": 0.3368, + "step": 2420 + }, + { + "epoch": 2.56, + "learning_rate": 4.361522198731501e-05, + "loss": 0.399, + "step": 2422 + }, + { + "epoch": 2.56, + "learning_rate": 4.360993657505286e-05, + "loss": 0.3882, + "step": 2424 + }, + { + "epoch": 2.56, + "learning_rate": 4.36046511627907e-05, + "loss": 0.3533, + "step": 2426 + }, + { + "epoch": 2.57, + "learning_rate": 4.3599365750528544e-05, + "loss": 0.4115, + "step": 2428 + }, + { + "epoch": 2.57, + "learning_rate": 4.3594080338266384e-05, + "loss": 0.4398, + "step": 2430 + }, + { + "epoch": 2.57, + "learning_rate": 4.358879492600423e-05, + "loss": 0.3063, + "step": 2432 + }, + { + "epoch": 2.57, + "learning_rate": 4.358350951374207e-05, + "loss": 0.3138, + "step": 2434 + }, + { + "epoch": 2.58, + "learning_rate": 4.357822410147992e-05, + "loss": 0.3835, + "step": 2436 + }, + { + "epoch": 2.58, + "learning_rate": 4.357293868921776e-05, + "loss": 0.493, + "step": 2438 + }, + { + "epoch": 2.58, + "learning_rate": 4.356765327695561e-05, + "loss": 0.4659, + "step": 2440 + }, + { + "epoch": 2.58, + "learning_rate": 4.3562367864693447e-05, + "loss": 0.4226, + "step": 2442 + }, + { + "epoch": 2.58, + "learning_rate": 4.355708245243129e-05, + "loss": 0.3218, + "step": 2444 + }, + { + "epoch": 2.59, + "learning_rate": 4.355179704016913e-05, + "loss": 0.271, + "step": 2446 + }, + { + "epoch": 2.59, + "learning_rate": 4.354651162790698e-05, + "loss": 0.4356, + "step": 2448 + }, + { + "epoch": 2.59, + "learning_rate": 4.354122621564482e-05, + "loss": 0.4688, + "step": 2450 + }, + { + "epoch": 2.59, + "learning_rate": 4.353594080338266e-05, + "loss": 0.4016, + "step": 2452 + }, + { + "epoch": 2.59, + "learning_rate": 4.353065539112051e-05, + "loss": 0.4853, + "step": 2454 + }, + { + "epoch": 2.6, + "learning_rate": 4.3525369978858355e-05, + "loss": 0.4232, + "step": 2456 + }, + { + "epoch": 2.6, + "learning_rate": 4.3520084566596195e-05, + "loss": 0.2393, + "step": 2458 + }, + { + "epoch": 2.6, + "learning_rate": 4.351479915433404e-05, + "loss": 0.3287, + "step": 2460 + }, + { + "epoch": 2.6, + "learning_rate": 4.350951374207189e-05, + "loss": 0.3494, + "step": 2462 + }, + { + "epoch": 2.6, + "learning_rate": 4.3504228329809726e-05, + "loss": 0.5148, + "step": 2464 + }, + { + "epoch": 2.61, + "learning_rate": 4.349894291754757e-05, + "loss": 0.4271, + "step": 2466 + }, + { + "epoch": 2.61, + "learning_rate": 4.349365750528541e-05, + "loss": 0.2264, + "step": 2468 + }, + { + "epoch": 2.61, + "learning_rate": 4.348837209302326e-05, + "loss": 0.3973, + "step": 2470 + }, + { + "epoch": 2.61, + "learning_rate": 4.3483086680761104e-05, + "loss": 0.3798, + "step": 2472 + }, + { + "epoch": 2.62, + "learning_rate": 4.347780126849895e-05, + "loss": 0.4355, + "step": 2474 + }, + { + "epoch": 2.62, + "learning_rate": 4.347251585623679e-05, + "loss": 0.5765, + "step": 2476 + }, + { + "epoch": 2.62, + "learning_rate": 4.3467230443974635e-05, + "loss": 0.4032, + "step": 2478 + }, + { + "epoch": 2.62, + "learning_rate": 4.3461945031712474e-05, + "loss": 0.348, + "step": 2480 + }, + { + "epoch": 2.62, + "learning_rate": 4.345665961945032e-05, + "loss": 0.2763, + "step": 2482 + }, + { + "epoch": 2.63, + "learning_rate": 4.345137420718816e-05, + "loss": 0.4957, + "step": 2484 + }, + { + "epoch": 2.63, + "learning_rate": 4.3446088794926006e-05, + "loss": 0.3058, + "step": 2486 + }, + { + "epoch": 2.63, + "learning_rate": 4.3440803382663845e-05, + "loss": 0.4072, + "step": 2488 + }, + { + "epoch": 2.63, + "learning_rate": 4.34355179704017e-05, + "loss": 0.3913, + "step": 2490 + }, + { + "epoch": 2.63, + "learning_rate": 4.343023255813954e-05, + "loss": 0.2427, + "step": 2492 + }, + { + "epoch": 2.64, + "learning_rate": 4.342494714587738e-05, + "loss": 0.3805, + "step": 2494 + }, + { + "epoch": 2.64, + "learning_rate": 4.341966173361522e-05, + "loss": 0.3677, + "step": 2496 + }, + { + "epoch": 2.64, + "learning_rate": 4.341437632135307e-05, + "loss": 0.3233, + "step": 2498 + }, + { + "epoch": 2.64, + "learning_rate": 4.340909090909091e-05, + "loss": 0.3522, + "step": 2500 + }, + { + "epoch": 2.64, + "eval_cer": 0.0402393844400114, + "eval_loss": 0.47415897250175476, + "eval_runtime": 131.0076, + "eval_samples_per_second": 6.419, + "eval_steps_per_second": 0.809, + "step": 2500 + }, + { + "epoch": 2.64, + "learning_rate": 4.3403805496828754e-05, + "loss": 0.258, + "step": 2502 + }, + { + "epoch": 2.65, + "learning_rate": 4.339852008456659e-05, + "loss": 0.3387, + "step": 2504 + }, + { + "epoch": 2.65, + "learning_rate": 4.339323467230444e-05, + "loss": 0.3951, + "step": 2506 + }, + { + "epoch": 2.65, + "learning_rate": 4.3387949260042285e-05, + "loss": 0.5066, + "step": 2508 + }, + { + "epoch": 2.65, + "learning_rate": 4.338266384778013e-05, + "loss": 0.3595, + "step": 2510 + }, + { + "epoch": 2.66, + "learning_rate": 4.337737843551797e-05, + "loss": 0.3599, + "step": 2512 + }, + { + "epoch": 2.66, + "learning_rate": 4.337209302325582e-05, + "loss": 0.3667, + "step": 2514 + }, + { + "epoch": 2.66, + "learning_rate": 4.336680761099366e-05, + "loss": 0.4277, + "step": 2516 + }, + { + "epoch": 2.66, + "learning_rate": 4.33615221987315e-05, + "loss": 0.5447, + "step": 2518 + }, + { + "epoch": 2.66, + "learning_rate": 4.335623678646935e-05, + "loss": 0.339, + "step": 2520 + }, + { + "epoch": 2.67, + "learning_rate": 4.335095137420719e-05, + "loss": 0.4673, + "step": 2522 + }, + { + "epoch": 2.67, + "learning_rate": 4.334566596194503e-05, + "loss": 0.408, + "step": 2524 + }, + { + "epoch": 2.67, + "learning_rate": 4.334038054968288e-05, + "loss": 0.3587, + "step": 2526 + }, + { + "epoch": 2.67, + "learning_rate": 4.3335095137420725e-05, + "loss": 0.464, + "step": 2528 + }, + { + "epoch": 2.67, + "learning_rate": 4.3329809725158565e-05, + "loss": 0.3021, + "step": 2530 + }, + { + "epoch": 2.68, + "learning_rate": 4.332452431289641e-05, + "loss": 0.4235, + "step": 2532 + }, + { + "epoch": 2.68, + "learning_rate": 4.331923890063425e-05, + "loss": 0.413, + "step": 2534 + }, + { + "epoch": 2.68, + "learning_rate": 4.3313953488372096e-05, + "loss": 0.3187, + "step": 2536 + }, + { + "epoch": 2.68, + "learning_rate": 4.3308668076109935e-05, + "loss": 0.4356, + "step": 2538 + }, + { + "epoch": 2.68, + "learning_rate": 4.330338266384778e-05, + "loss": 0.4303, + "step": 2540 + }, + { + "epoch": 2.69, + "learning_rate": 4.329809725158562e-05, + "loss": 0.4085, + "step": 2542 + }, + { + "epoch": 2.69, + "learning_rate": 4.3292811839323474e-05, + "loss": 0.4981, + "step": 2544 + }, + { + "epoch": 2.69, + "learning_rate": 4.328752642706131e-05, + "loss": 0.6279, + "step": 2546 + }, + { + "epoch": 2.69, + "learning_rate": 4.328224101479916e-05, + "loss": 0.5337, + "step": 2548 + }, + { + "epoch": 2.7, + "learning_rate": 4.3276955602537e-05, + "loss": 0.3601, + "step": 2550 + }, + { + "epoch": 2.7, + "learning_rate": 4.3271670190274844e-05, + "loss": 0.4367, + "step": 2552 + }, + { + "epoch": 2.7, + "learning_rate": 4.3266384778012684e-05, + "loss": 0.276, + "step": 2554 + }, + { + "epoch": 2.7, + "learning_rate": 4.326109936575053e-05, + "loss": 0.3276, + "step": 2556 + }, + { + "epoch": 2.7, + "learning_rate": 4.325581395348837e-05, + "loss": 0.4869, + "step": 2558 + }, + { + "epoch": 2.71, + "learning_rate": 4.3250528541226215e-05, + "loss": 0.468, + "step": 2560 + }, + { + "epoch": 2.71, + "learning_rate": 4.324524312896406e-05, + "loss": 0.3476, + "step": 2562 + }, + { + "epoch": 2.71, + "learning_rate": 4.323995771670191e-05, + "loss": 0.4213, + "step": 2564 + }, + { + "epoch": 2.71, + "learning_rate": 4.3234672304439746e-05, + "loss": 0.2384, + "step": 2566 + }, + { + "epoch": 2.71, + "learning_rate": 4.322938689217759e-05, + "loss": 0.4982, + "step": 2568 + }, + { + "epoch": 2.72, + "learning_rate": 4.322410147991544e-05, + "loss": 0.372, + "step": 2570 + }, + { + "epoch": 2.72, + "learning_rate": 4.321881606765328e-05, + "loss": 0.3857, + "step": 2572 + }, + { + "epoch": 2.72, + "learning_rate": 4.3213530655391124e-05, + "loss": 0.3346, + "step": 2574 + }, + { + "epoch": 2.72, + "learning_rate": 4.320824524312896e-05, + "loss": 0.4945, + "step": 2576 + }, + { + "epoch": 2.73, + "learning_rate": 4.320295983086681e-05, + "loss": 0.4013, + "step": 2578 + }, + { + "epoch": 2.73, + "learning_rate": 4.3197674418604655e-05, + "loss": 0.2965, + "step": 2580 + }, + { + "epoch": 2.73, + "learning_rate": 4.31923890063425e-05, + "loss": 0.3815, + "step": 2582 + }, + { + "epoch": 2.73, + "learning_rate": 4.318710359408034e-05, + "loss": 0.2204, + "step": 2584 + }, + { + "epoch": 2.73, + "learning_rate": 4.318181818181819e-05, + "loss": 0.3671, + "step": 2586 + }, + { + "epoch": 2.74, + "learning_rate": 4.3176532769556026e-05, + "loss": 0.5415, + "step": 2588 + }, + { + "epoch": 2.74, + "learning_rate": 4.317124735729387e-05, + "loss": 0.3768, + "step": 2590 + }, + { + "epoch": 2.74, + "learning_rate": 4.316596194503171e-05, + "loss": 0.377, + "step": 2592 + }, + { + "epoch": 2.74, + "learning_rate": 4.316067653276956e-05, + "loss": 0.5208, + "step": 2594 + }, + { + "epoch": 2.74, + "learning_rate": 4.31553911205074e-05, + "loss": 0.3114, + "step": 2596 + }, + { + "epoch": 2.75, + "learning_rate": 4.315010570824525e-05, + "loss": 0.2648, + "step": 2598 + }, + { + "epoch": 2.75, + "learning_rate": 4.314482029598309e-05, + "loss": 0.2037, + "step": 2600 + }, + { + "epoch": 2.75, + "learning_rate": 4.3139534883720935e-05, + "loss": 0.2728, + "step": 2602 + }, + { + "epoch": 2.75, + "learning_rate": 4.3134249471458774e-05, + "loss": 0.5251, + "step": 2604 + }, + { + "epoch": 2.75, + "learning_rate": 4.312896405919662e-05, + "loss": 0.4707, + "step": 2606 + }, + { + "epoch": 2.76, + "learning_rate": 4.312367864693446e-05, + "loss": 0.2391, + "step": 2608 + }, + { + "epoch": 2.76, + "learning_rate": 4.3118393234672305e-05, + "loss": 0.3286, + "step": 2610 + }, + { + "epoch": 2.76, + "learning_rate": 4.3113107822410145e-05, + "loss": 0.2599, + "step": 2612 + }, + { + "epoch": 2.76, + "learning_rate": 4.310782241014799e-05, + "loss": 0.5016, + "step": 2614 + }, + { + "epoch": 2.77, + "learning_rate": 4.310253699788584e-05, + "loss": 0.4022, + "step": 2616 + }, + { + "epoch": 2.77, + "learning_rate": 4.309725158562368e-05, + "loss": 0.4263, + "step": 2618 + }, + { + "epoch": 2.77, + "learning_rate": 4.309196617336152e-05, + "loss": 0.4461, + "step": 2620 + }, + { + "epoch": 2.77, + "learning_rate": 4.308668076109937e-05, + "loss": 0.3174, + "step": 2622 + }, + { + "epoch": 2.77, + "learning_rate": 4.3081395348837214e-05, + "loss": 0.4512, + "step": 2624 + }, + { + "epoch": 2.78, + "learning_rate": 4.3076109936575054e-05, + "loss": 0.3774, + "step": 2626 + }, + { + "epoch": 2.78, + "learning_rate": 4.30708245243129e-05, + "loss": 0.3699, + "step": 2628 + }, + { + "epoch": 2.78, + "learning_rate": 4.306553911205074e-05, + "loss": 0.4952, + "step": 2630 + }, + { + "epoch": 2.78, + "learning_rate": 4.3060253699788585e-05, + "loss": 0.3127, + "step": 2632 + }, + { + "epoch": 2.78, + "learning_rate": 4.305496828752643e-05, + "loss": 0.3005, + "step": 2634 + }, + { + "epoch": 2.79, + "learning_rate": 4.304968287526428e-05, + "loss": 0.5216, + "step": 2636 + }, + { + "epoch": 2.79, + "learning_rate": 4.3044397463002116e-05, + "loss": 0.1992, + "step": 2638 + }, + { + "epoch": 2.79, + "learning_rate": 4.303911205073996e-05, + "loss": 0.3832, + "step": 2640 + }, + { + "epoch": 2.79, + "learning_rate": 4.30338266384778e-05, + "loss": 0.3892, + "step": 2642 + }, + { + "epoch": 2.79, + "learning_rate": 4.302854122621565e-05, + "loss": 0.4152, + "step": 2644 + }, + { + "epoch": 2.8, + "learning_rate": 4.302325581395349e-05, + "loss": 0.3142, + "step": 2646 + }, + { + "epoch": 2.8, + "learning_rate": 4.301797040169133e-05, + "loss": 0.381, + "step": 2648 + }, + { + "epoch": 2.8, + "learning_rate": 4.301268498942917e-05, + "loss": 0.4858, + "step": 2650 + }, + { + "epoch": 2.8, + "learning_rate": 4.3007399577167025e-05, + "loss": 0.4228, + "step": 2652 + }, + { + "epoch": 2.81, + "learning_rate": 4.3002114164904865e-05, + "loss": 0.3344, + "step": 2654 + }, + { + "epoch": 2.81, + "learning_rate": 4.299682875264271e-05, + "loss": 0.3139, + "step": 2656 + }, + { + "epoch": 2.81, + "learning_rate": 4.299154334038055e-05, + "loss": 0.4021, + "step": 2658 + }, + { + "epoch": 2.81, + "learning_rate": 4.2986257928118396e-05, + "loss": 0.2641, + "step": 2660 + }, + { + "epoch": 2.81, + "learning_rate": 4.2980972515856235e-05, + "loss": 0.2865, + "step": 2662 + }, + { + "epoch": 2.82, + "learning_rate": 4.297568710359408e-05, + "loss": 0.4632, + "step": 2664 + }, + { + "epoch": 2.82, + "learning_rate": 4.297040169133193e-05, + "loss": 0.4016, + "step": 2666 + }, + { + "epoch": 2.82, + "learning_rate": 4.296511627906977e-05, + "loss": 0.3713, + "step": 2668 + }, + { + "epoch": 2.82, + "learning_rate": 4.295983086680761e-05, + "loss": 0.5221, + "step": 2670 + }, + { + "epoch": 2.82, + "learning_rate": 4.295454545454546e-05, + "loss": 0.398, + "step": 2672 + }, + { + "epoch": 2.83, + "learning_rate": 4.29492600422833e-05, + "loss": 0.4723, + "step": 2674 + }, + { + "epoch": 2.83, + "learning_rate": 4.2943974630021144e-05, + "loss": 0.441, + "step": 2676 + }, + { + "epoch": 2.83, + "learning_rate": 4.293868921775899e-05, + "loss": 0.3097, + "step": 2678 + }, + { + "epoch": 2.83, + "learning_rate": 4.293340380549683e-05, + "loss": 0.4571, + "step": 2680 + }, + { + "epoch": 2.84, + "learning_rate": 4.2928118393234676e-05, + "loss": 0.2796, + "step": 2682 + }, + { + "epoch": 2.84, + "learning_rate": 4.2922832980972515e-05, + "loss": 0.2998, + "step": 2684 + }, + { + "epoch": 2.84, + "learning_rate": 4.291754756871036e-05, + "loss": 0.3923, + "step": 2686 + }, + { + "epoch": 2.84, + "learning_rate": 4.291226215644821e-05, + "loss": 0.2452, + "step": 2688 + }, + { + "epoch": 2.84, + "learning_rate": 4.290697674418605e-05, + "loss": 0.4391, + "step": 2690 + }, + { + "epoch": 2.85, + "learning_rate": 4.290169133192389e-05, + "loss": 0.4308, + "step": 2692 + }, + { + "epoch": 2.85, + "learning_rate": 4.289640591966174e-05, + "loss": 0.2987, + "step": 2694 + }, + { + "epoch": 2.85, + "learning_rate": 4.289112050739958e-05, + "loss": 0.6786, + "step": 2696 + }, + { + "epoch": 2.85, + "learning_rate": 4.2885835095137424e-05, + "loss": 0.645, + "step": 2698 + }, + { + "epoch": 2.85, + "learning_rate": 4.288054968287526e-05, + "loss": 0.6325, + "step": 2700 + }, + { + "epoch": 2.86, + "learning_rate": 4.287526427061311e-05, + "loss": 0.3546, + "step": 2702 + }, + { + "epoch": 2.86, + "learning_rate": 4.286997885835095e-05, + "loss": 0.3041, + "step": 2704 + }, + { + "epoch": 2.86, + "learning_rate": 4.28646934460888e-05, + "loss": 0.3767, + "step": 2706 + }, + { + "epoch": 2.86, + "learning_rate": 4.285940803382664e-05, + "loss": 0.3425, + "step": 2708 + }, + { + "epoch": 2.86, + "learning_rate": 4.2854122621564486e-05, + "loss": 0.3014, + "step": 2710 + }, + { + "epoch": 2.87, + "learning_rate": 4.2848837209302326e-05, + "loss": 0.3421, + "step": 2712 + }, + { + "epoch": 2.87, + "learning_rate": 4.284355179704017e-05, + "loss": 0.3349, + "step": 2714 + }, + { + "epoch": 2.87, + "learning_rate": 4.283826638477801e-05, + "loss": 0.2842, + "step": 2716 + }, + { + "epoch": 2.87, + "learning_rate": 4.283298097251586e-05, + "loss": 0.4648, + "step": 2718 + }, + { + "epoch": 2.88, + "learning_rate": 4.28276955602537e-05, + "loss": 0.2628, + "step": 2720 + }, + { + "epoch": 2.88, + "learning_rate": 4.282241014799154e-05, + "loss": 0.3664, + "step": 2722 + }, + { + "epoch": 2.88, + "learning_rate": 4.281712473572939e-05, + "loss": 0.4732, + "step": 2724 + }, + { + "epoch": 2.88, + "learning_rate": 4.2811839323467235e-05, + "loss": 0.3712, + "step": 2726 + }, + { + "epoch": 2.88, + "learning_rate": 4.2806553911205074e-05, + "loss": 0.4412, + "step": 2728 + }, + { + "epoch": 2.89, + "learning_rate": 4.280126849894292e-05, + "loss": 0.2067, + "step": 2730 + }, + { + "epoch": 2.89, + "learning_rate": 4.2795983086680766e-05, + "loss": 0.3256, + "step": 2732 + }, + { + "epoch": 2.89, + "learning_rate": 4.2790697674418605e-05, + "loss": 0.2648, + "step": 2734 + }, + { + "epoch": 2.89, + "learning_rate": 4.278541226215645e-05, + "loss": 0.2814, + "step": 2736 + }, + { + "epoch": 2.89, + "learning_rate": 4.278012684989429e-05, + "loss": 0.4367, + "step": 2738 + }, + { + "epoch": 2.9, + "learning_rate": 4.277484143763214e-05, + "loss": 0.3559, + "step": 2740 + }, + { + "epoch": 2.9, + "learning_rate": 4.276955602536998e-05, + "loss": 0.6629, + "step": 2742 + }, + { + "epoch": 2.9, + "learning_rate": 4.276427061310783e-05, + "loss": 0.5756, + "step": 2744 + }, + { + "epoch": 2.9, + "learning_rate": 4.275898520084567e-05, + "loss": 0.337, + "step": 2746 + }, + { + "epoch": 2.9, + "learning_rate": 4.2753699788583514e-05, + "loss": 0.577, + "step": 2748 + }, + { + "epoch": 2.91, + "learning_rate": 4.2748414376321353e-05, + "loss": 0.4437, + "step": 2750 + }, + { + "epoch": 2.91, + "learning_rate": 4.27431289640592e-05, + "loss": 0.2032, + "step": 2752 + }, + { + "epoch": 2.91, + "learning_rate": 4.273784355179704e-05, + "loss": 0.4406, + "step": 2754 + }, + { + "epoch": 2.91, + "learning_rate": 4.2732558139534885e-05, + "loss": 0.3424, + "step": 2756 + }, + { + "epoch": 2.92, + "learning_rate": 4.2727272727272724e-05, + "loss": 0.4671, + "step": 2758 + }, + { + "epoch": 2.92, + "learning_rate": 4.272198731501058e-05, + "loss": 0.3583, + "step": 2760 + }, + { + "epoch": 2.92, + "learning_rate": 4.2716701902748416e-05, + "loss": 0.5143, + "step": 2762 + }, + { + "epoch": 2.92, + "learning_rate": 4.271141649048626e-05, + "loss": 0.4685, + "step": 2764 + }, + { + "epoch": 2.92, + "learning_rate": 4.27061310782241e-05, + "loss": 0.7, + "step": 2766 + }, + { + "epoch": 2.93, + "learning_rate": 4.270084566596195e-05, + "loss": 0.5151, + "step": 2768 + }, + { + "epoch": 2.93, + "learning_rate": 4.269556025369979e-05, + "loss": 0.4323, + "step": 2770 + }, + { + "epoch": 2.93, + "learning_rate": 4.269027484143763e-05, + "loss": 0.4289, + "step": 2772 + }, + { + "epoch": 2.93, + "learning_rate": 4.268498942917548e-05, + "loss": 0.2696, + "step": 2774 + }, + { + "epoch": 2.93, + "learning_rate": 4.267970401691332e-05, + "loss": 0.649, + "step": 2776 + }, + { + "epoch": 2.94, + "learning_rate": 4.2674418604651164e-05, + "loss": 0.3215, + "step": 2778 + }, + { + "epoch": 2.94, + "learning_rate": 4.266913319238901e-05, + "loss": 0.3971, + "step": 2780 + }, + { + "epoch": 2.94, + "learning_rate": 4.2663847780126857e-05, + "loss": 0.4493, + "step": 2782 + }, + { + "epoch": 2.94, + "learning_rate": 4.2658562367864696e-05, + "loss": 0.523, + "step": 2784 + }, + { + "epoch": 2.95, + "learning_rate": 4.265327695560254e-05, + "loss": 0.3435, + "step": 2786 + }, + { + "epoch": 2.95, + "learning_rate": 4.264799154334038e-05, + "loss": 0.2757, + "step": 2788 + }, + { + "epoch": 2.95, + "learning_rate": 4.264270613107823e-05, + "loss": 0.4671, + "step": 2790 + }, + { + "epoch": 2.95, + "learning_rate": 4.2637420718816066e-05, + "loss": 0.3897, + "step": 2792 + }, + { + "epoch": 2.95, + "learning_rate": 4.263213530655391e-05, + "loss": 0.2445, + "step": 2794 + }, + { + "epoch": 2.96, + "learning_rate": 4.262684989429176e-05, + "loss": 0.3071, + "step": 2796 + }, + { + "epoch": 2.96, + "learning_rate": 4.2621564482029605e-05, + "loss": 0.3959, + "step": 2798 + }, + { + "epoch": 2.96, + "learning_rate": 4.2616279069767444e-05, + "loss": 0.3327, + "step": 2800 + }, + { + "epoch": 2.96, + "learning_rate": 4.261099365750529e-05, + "loss": 0.3544, + "step": 2802 + }, + { + "epoch": 2.96, + "learning_rate": 4.260570824524313e-05, + "loss": 0.4166, + "step": 2804 + }, + { + "epoch": 2.97, + "learning_rate": 4.2600422832980975e-05, + "loss": 0.3214, + "step": 2806 + }, + { + "epoch": 2.97, + "learning_rate": 4.2595137420718815e-05, + "loss": 0.5656, + "step": 2808 + }, + { + "epoch": 2.97, + "learning_rate": 4.258985200845666e-05, + "loss": 0.3529, + "step": 2810 + }, + { + "epoch": 2.97, + "learning_rate": 4.25845665961945e-05, + "loss": 0.5777, + "step": 2812 + }, + { + "epoch": 2.97, + "learning_rate": 4.257928118393235e-05, + "loss": 0.3313, + "step": 2814 + }, + { + "epoch": 2.98, + "learning_rate": 4.257399577167019e-05, + "loss": 0.3668, + "step": 2816 + }, + { + "epoch": 2.98, + "learning_rate": 4.256871035940804e-05, + "loss": 0.3936, + "step": 2818 + }, + { + "epoch": 2.98, + "learning_rate": 4.256342494714588e-05, + "loss": 0.3968, + "step": 2820 + }, + { + "epoch": 2.98, + "learning_rate": 4.2558139534883724e-05, + "loss": 0.6001, + "step": 2822 + }, + { + "epoch": 2.99, + "learning_rate": 4.255285412262156e-05, + "loss": 0.4649, + "step": 2824 + }, + { + "epoch": 2.99, + "learning_rate": 4.254756871035941e-05, + "loss": 0.3453, + "step": 2826 + }, + { + "epoch": 2.99, + "learning_rate": 4.2542283298097255e-05, + "loss": 0.4322, + "step": 2828 + }, + { + "epoch": 2.99, + "learning_rate": 4.2536997885835094e-05, + "loss": 0.4257, + "step": 2830 + }, + { + "epoch": 2.99, + "learning_rate": 4.253171247357294e-05, + "loss": 0.2827, + "step": 2832 + }, + { + "epoch": 3.0, + "learning_rate": 4.2526427061310786e-05, + "loss": 0.4138, + "step": 2834 + }, + { + "epoch": 3.0, + "learning_rate": 4.252114164904863e-05, + "loss": 0.3991, + "step": 2836 + }, + { + "epoch": 3.0, + "learning_rate": 4.251585623678647e-05, + "loss": 0.3762, + "step": 2838 + }, + { + "epoch": 3.0, + "learning_rate": 4.251057082452432e-05, + "loss": 0.3813, + "step": 2840 + }, + { + "epoch": 3.0, + "learning_rate": 4.250528541226216e-05, + "loss": 0.4144, + "step": 2842 + }, + { + "epoch": 3.01, + "learning_rate": 4.25e-05, + "loss": 0.3277, + "step": 2844 + }, + { + "epoch": 3.01, + "learning_rate": 4.249471458773784e-05, + "loss": 0.4051, + "step": 2846 + }, + { + "epoch": 3.01, + "learning_rate": 4.248942917547569e-05, + "loss": 0.4003, + "step": 2848 + }, + { + "epoch": 3.01, + "learning_rate": 4.2484143763213534e-05, + "loss": 0.3624, + "step": 2850 + }, + { + "epoch": 3.01, + "learning_rate": 4.247885835095138e-05, + "loss": 0.4119, + "step": 2852 + }, + { + "epoch": 3.02, + "learning_rate": 4.247357293868922e-05, + "loss": 0.6358, + "step": 2854 + }, + { + "epoch": 3.02, + "learning_rate": 4.2468287526427066e-05, + "loss": 0.5912, + "step": 2856 + }, + { + "epoch": 3.02, + "learning_rate": 4.2463002114164905e-05, + "loss": 0.295, + "step": 2858 + }, + { + "epoch": 3.02, + "learning_rate": 4.245771670190275e-05, + "loss": 0.3205, + "step": 2860 + }, + { + "epoch": 3.03, + "learning_rate": 4.245243128964059e-05, + "loss": 0.3077, + "step": 2862 + }, + { + "epoch": 3.03, + "learning_rate": 4.2447145877378437e-05, + "loss": 0.259, + "step": 2864 + }, + { + "epoch": 3.03, + "learning_rate": 4.2441860465116276e-05, + "loss": 0.4562, + "step": 2866 + }, + { + "epoch": 3.03, + "learning_rate": 4.243657505285413e-05, + "loss": 0.2321, + "step": 2868 + }, + { + "epoch": 3.03, + "learning_rate": 4.243128964059197e-05, + "loss": 0.539, + "step": 2870 + }, + { + "epoch": 3.04, + "learning_rate": 4.2426004228329814e-05, + "loss": 0.3658, + "step": 2872 + }, + { + "epoch": 3.04, + "learning_rate": 4.242071881606765e-05, + "loss": 0.2908, + "step": 2874 + }, + { + "epoch": 3.04, + "learning_rate": 4.24154334038055e-05, + "loss": 0.262, + "step": 2876 + }, + { + "epoch": 3.04, + "learning_rate": 4.241014799154334e-05, + "loss": 0.3137, + "step": 2878 + }, + { + "epoch": 3.04, + "learning_rate": 4.2404862579281185e-05, + "loss": 0.2519, + "step": 2880 + }, + { + "epoch": 3.05, + "learning_rate": 4.239957716701903e-05, + "loss": 0.3046, + "step": 2882 + }, + { + "epoch": 3.05, + "learning_rate": 4.239429175475687e-05, + "loss": 0.5082, + "step": 2884 + }, + { + "epoch": 3.05, + "learning_rate": 4.2389006342494716e-05, + "loss": 0.2263, + "step": 2886 + }, + { + "epoch": 3.05, + "learning_rate": 4.238372093023256e-05, + "loss": 0.4031, + "step": 2888 + }, + { + "epoch": 3.05, + "learning_rate": 4.237843551797041e-05, + "loss": 0.3983, + "step": 2890 + }, + { + "epoch": 3.06, + "learning_rate": 4.237315010570825e-05, + "loss": 0.2879, + "step": 2892 + }, + { + "epoch": 3.06, + "learning_rate": 4.2367864693446094e-05, + "loss": 0.2856, + "step": 2894 + }, + { + "epoch": 3.06, + "learning_rate": 4.236257928118393e-05, + "loss": 0.2928, + "step": 2896 + }, + { + "epoch": 3.06, + "learning_rate": 4.235729386892178e-05, + "loss": 0.2686, + "step": 2898 + }, + { + "epoch": 3.07, + "learning_rate": 4.235200845665962e-05, + "loss": 0.4004, + "step": 2900 + }, + { + "epoch": 3.07, + "learning_rate": 4.2346723044397464e-05, + "loss": 0.455, + "step": 2902 + }, + { + "epoch": 3.07, + "learning_rate": 4.234143763213531e-05, + "loss": 0.5315, + "step": 2904 + }, + { + "epoch": 3.07, + "learning_rate": 4.2336152219873156e-05, + "loss": 0.3951, + "step": 2906 + }, + { + "epoch": 3.07, + "learning_rate": 4.2330866807610996e-05, + "loss": 0.3007, + "step": 2908 + }, + { + "epoch": 3.08, + "learning_rate": 4.232558139534884e-05, + "loss": 0.3168, + "step": 2910 + }, + { + "epoch": 3.08, + "learning_rate": 4.232029598308668e-05, + "loss": 0.3028, + "step": 2912 + }, + { + "epoch": 3.08, + "learning_rate": 4.231501057082453e-05, + "loss": 0.1548, + "step": 2914 + }, + { + "epoch": 3.08, + "learning_rate": 4.2309725158562366e-05, + "loss": 0.2496, + "step": 2916 + }, + { + "epoch": 3.08, + "learning_rate": 4.230443974630021e-05, + "loss": 0.4231, + "step": 2918 + }, + { + "epoch": 3.09, + "learning_rate": 4.229915433403805e-05, + "loss": 0.2385, + "step": 2920 + }, + { + "epoch": 3.09, + "learning_rate": 4.2293868921775904e-05, + "loss": 0.342, + "step": 2922 + }, + { + "epoch": 3.09, + "learning_rate": 4.2288583509513744e-05, + "loss": 0.2755, + "step": 2924 + }, + { + "epoch": 3.09, + "learning_rate": 4.228329809725159e-05, + "loss": 0.2918, + "step": 2926 + }, + { + "epoch": 3.1, + "learning_rate": 4.227801268498943e-05, + "loss": 0.3105, + "step": 2928 + }, + { + "epoch": 3.1, + "learning_rate": 4.2272727272727275e-05, + "loss": 0.1969, + "step": 2930 + }, + { + "epoch": 3.1, + "learning_rate": 4.226744186046512e-05, + "loss": 0.3114, + "step": 2932 + }, + { + "epoch": 3.1, + "learning_rate": 4.226215644820296e-05, + "loss": 0.2758, + "step": 2934 + }, + { + "epoch": 3.1, + "learning_rate": 4.2256871035940807e-05, + "loss": 0.2677, + "step": 2936 + }, + { + "epoch": 3.11, + "learning_rate": 4.2251585623678646e-05, + "loss": 0.4188, + "step": 2938 + }, + { + "epoch": 3.11, + "learning_rate": 4.224630021141649e-05, + "loss": 0.2955, + "step": 2940 + }, + { + "epoch": 3.11, + "learning_rate": 4.224101479915434e-05, + "loss": 0.3511, + "step": 2942 + }, + { + "epoch": 3.11, + "learning_rate": 4.2235729386892184e-05, + "loss": 0.2193, + "step": 2944 + }, + { + "epoch": 3.11, + "learning_rate": 4.223044397463002e-05, + "loss": 0.2371, + "step": 2946 + }, + { + "epoch": 3.12, + "learning_rate": 4.222515856236787e-05, + "loss": 0.3929, + "step": 2948 + }, + { + "epoch": 3.12, + "learning_rate": 4.221987315010571e-05, + "loss": 0.4396, + "step": 2950 + }, + { + "epoch": 3.12, + "learning_rate": 4.2214587737843555e-05, + "loss": 0.5568, + "step": 2952 + }, + { + "epoch": 3.12, + "learning_rate": 4.2209302325581394e-05, + "loss": 0.4471, + "step": 2954 + }, + { + "epoch": 3.12, + "learning_rate": 4.220401691331924e-05, + "loss": 0.4105, + "step": 2956 + }, + { + "epoch": 3.13, + "learning_rate": 4.2198731501057086e-05, + "loss": 0.5462, + "step": 2958 + }, + { + "epoch": 3.13, + "learning_rate": 4.219344608879493e-05, + "loss": 0.5864, + "step": 2960 + }, + { + "epoch": 3.13, + "learning_rate": 4.218816067653277e-05, + "loss": 0.5403, + "step": 2962 + }, + { + "epoch": 3.13, + "learning_rate": 4.218287526427062e-05, + "loss": 0.611, + "step": 2964 + }, + { + "epoch": 3.14, + "learning_rate": 4.217758985200846e-05, + "loss": 0.4517, + "step": 2966 + }, + { + "epoch": 3.14, + "learning_rate": 4.21723044397463e-05, + "loss": 0.3851, + "step": 2968 + }, + { + "epoch": 3.14, + "learning_rate": 4.216701902748414e-05, + "loss": 0.3653, + "step": 2970 + }, + { + "epoch": 3.14, + "learning_rate": 4.216173361522199e-05, + "loss": 0.4052, + "step": 2972 + }, + { + "epoch": 3.14, + "learning_rate": 4.215644820295983e-05, + "loss": 0.3847, + "step": 2974 + }, + { + "epoch": 3.15, + "learning_rate": 4.215116279069768e-05, + "loss": 0.2637, + "step": 2976 + }, + { + "epoch": 3.15, + "learning_rate": 4.214587737843552e-05, + "loss": 0.3769, + "step": 2978 + }, + { + "epoch": 3.15, + "learning_rate": 4.2140591966173366e-05, + "loss": 0.3022, + "step": 2980 + }, + { + "epoch": 3.15, + "learning_rate": 4.2135306553911205e-05, + "loss": 0.3418, + "step": 2982 + }, + { + "epoch": 3.15, + "learning_rate": 4.213002114164905e-05, + "loss": 0.2043, + "step": 2984 + }, + { + "epoch": 3.16, + "learning_rate": 4.21247357293869e-05, + "loss": 0.263, + "step": 2986 + }, + { + "epoch": 3.16, + "learning_rate": 4.2119450317124736e-05, + "loss": 0.2247, + "step": 2988 + }, + { + "epoch": 3.16, + "learning_rate": 4.211416490486258e-05, + "loss": 0.2779, + "step": 2990 + }, + { + "epoch": 3.16, + "learning_rate": 4.210887949260042e-05, + "loss": 0.274, + "step": 2992 + }, + { + "epoch": 3.16, + "learning_rate": 4.210359408033827e-05, + "loss": 0.2576, + "step": 2994 + }, + { + "epoch": 3.17, + "learning_rate": 4.2098308668076114e-05, + "loss": 0.4279, + "step": 2996 + }, + { + "epoch": 3.17, + "learning_rate": 4.209302325581396e-05, + "loss": 0.2541, + "step": 2998 + }, + { + "epoch": 3.17, + "learning_rate": 4.20877378435518e-05, + "loss": 0.3729, + "step": 3000 + }, + { + "epoch": 3.17, + "eval_cer": 0.048674836135651184, + "eval_loss": 0.49270230531692505, + "eval_runtime": 130.1947, + "eval_samples_per_second": 6.46, + "eval_steps_per_second": 0.814, + "step": 3000 + }, + { + "epoch": 3.17, + "learning_rate": 4.2082452431289645e-05, + "loss": 0.3319, + "step": 3002 + }, + { + "epoch": 3.18, + "learning_rate": 4.2077167019027485e-05, + "loss": 0.3086, + "step": 3004 + }, + { + "epoch": 3.18, + "learning_rate": 4.207188160676533e-05, + "loss": 0.3868, + "step": 3006 + }, + { + "epoch": 3.18, + "learning_rate": 4.206659619450317e-05, + "loss": 0.3581, + "step": 3008 + }, + { + "epoch": 3.18, + "learning_rate": 4.2061310782241016e-05, + "loss": 0.2487, + "step": 3010 + }, + { + "epoch": 3.18, + "learning_rate": 4.205602536997886e-05, + "loss": 0.38, + "step": 3012 + }, + { + "epoch": 3.19, + "learning_rate": 4.205073995771671e-05, + "loss": 0.4104, + "step": 3014 + }, + { + "epoch": 3.19, + "learning_rate": 4.204545454545455e-05, + "loss": 0.251, + "step": 3016 + }, + { + "epoch": 3.19, + "learning_rate": 4.204016913319239e-05, + "loss": 0.2777, + "step": 3018 + }, + { + "epoch": 3.19, + "learning_rate": 4.203488372093023e-05, + "loss": 0.2662, + "step": 3020 + }, + { + "epoch": 3.19, + "learning_rate": 4.202959830866808e-05, + "loss": 0.2533, + "step": 3022 + }, + { + "epoch": 3.2, + "learning_rate": 4.202431289640592e-05, + "loss": 0.3681, + "step": 3024 + }, + { + "epoch": 3.2, + "learning_rate": 4.2019027484143764e-05, + "loss": 0.4595, + "step": 3026 + }, + { + "epoch": 3.2, + "learning_rate": 4.20137420718816e-05, + "loss": 0.358, + "step": 3028 + }, + { + "epoch": 3.2, + "learning_rate": 4.2008456659619456e-05, + "loss": 0.2452, + "step": 3030 + }, + { + "epoch": 3.21, + "learning_rate": 4.2003171247357295e-05, + "loss": 0.5741, + "step": 3032 + }, + { + "epoch": 3.21, + "learning_rate": 4.199788583509514e-05, + "loss": 0.652, + "step": 3034 + }, + { + "epoch": 3.21, + "learning_rate": 4.199260042283298e-05, + "loss": 0.2136, + "step": 3036 + }, + { + "epoch": 3.21, + "learning_rate": 4.198731501057083e-05, + "loss": 0.3405, + "step": 3038 + }, + { + "epoch": 3.21, + "learning_rate": 4.198202959830867e-05, + "loss": 0.5211, + "step": 3040 + }, + { + "epoch": 3.22, + "learning_rate": 4.197674418604651e-05, + "loss": 0.2128, + "step": 3042 + }, + { + "epoch": 3.22, + "learning_rate": 4.197145877378436e-05, + "loss": 0.3436, + "step": 3044 + }, + { + "epoch": 3.22, + "learning_rate": 4.19661733615222e-05, + "loss": 0.3668, + "step": 3046 + }, + { + "epoch": 3.22, + "learning_rate": 4.1960887949260044e-05, + "loss": 0.2641, + "step": 3048 + }, + { + "epoch": 3.22, + "learning_rate": 4.195560253699789e-05, + "loss": 0.2484, + "step": 3050 + }, + { + "epoch": 3.23, + "learning_rate": 4.1950317124735736e-05, + "loss": 0.3322, + "step": 3052 + }, + { + "epoch": 3.23, + "learning_rate": 4.1945031712473575e-05, + "loss": 0.3741, + "step": 3054 + }, + { + "epoch": 3.23, + "learning_rate": 4.193974630021142e-05, + "loss": 0.5316, + "step": 3056 + }, + { + "epoch": 3.23, + "learning_rate": 4.193446088794926e-05, + "loss": 0.2144, + "step": 3058 + }, + { + "epoch": 3.23, + "learning_rate": 4.1929175475687106e-05, + "loss": 0.4243, + "step": 3060 + }, + { + "epoch": 3.24, + "learning_rate": 4.1923890063424946e-05, + "loss": 0.3349, + "step": 3062 + }, + { + "epoch": 3.24, + "learning_rate": 4.191860465116279e-05, + "loss": 0.3093, + "step": 3064 + }, + { + "epoch": 3.24, + "learning_rate": 4.191331923890063e-05, + "loss": 0.2343, + "step": 3066 + }, + { + "epoch": 3.24, + "learning_rate": 4.1908033826638484e-05, + "loss": 0.2864, + "step": 3068 + }, + { + "epoch": 3.25, + "learning_rate": 4.190274841437632e-05, + "loss": 0.2337, + "step": 3070 + }, + { + "epoch": 3.25, + "learning_rate": 4.189746300211417e-05, + "loss": 0.333, + "step": 3072 + }, + { + "epoch": 3.25, + "learning_rate": 4.189217758985201e-05, + "loss": 0.2783, + "step": 3074 + }, + { + "epoch": 3.25, + "learning_rate": 4.1886892177589855e-05, + "loss": 0.3432, + "step": 3076 + }, + { + "epoch": 3.25, + "learning_rate": 4.1881606765327694e-05, + "loss": 0.3868, + "step": 3078 + }, + { + "epoch": 3.26, + "learning_rate": 4.187632135306554e-05, + "loss": 0.3249, + "step": 3080 + }, + { + "epoch": 3.26, + "learning_rate": 4.187103594080338e-05, + "loss": 0.3919, + "step": 3082 + }, + { + "epoch": 3.26, + "learning_rate": 4.1865750528541225e-05, + "loss": 0.2175, + "step": 3084 + }, + { + "epoch": 3.26, + "learning_rate": 4.186046511627907e-05, + "loss": 0.3022, + "step": 3086 + }, + { + "epoch": 3.26, + "learning_rate": 4.185517970401692e-05, + "loss": 0.2926, + "step": 3088 + }, + { + "epoch": 3.27, + "learning_rate": 4.184989429175476e-05, + "loss": 0.2434, + "step": 3090 + }, + { + "epoch": 3.27, + "learning_rate": 4.18446088794926e-05, + "loss": 0.2615, + "step": 3092 + }, + { + "epoch": 3.27, + "learning_rate": 4.183932346723045e-05, + "loss": 0.3064, + "step": 3094 + }, + { + "epoch": 3.27, + "learning_rate": 4.183403805496829e-05, + "loss": 0.281, + "step": 3096 + }, + { + "epoch": 3.27, + "learning_rate": 4.1828752642706134e-05, + "loss": 0.2042, + "step": 3098 + }, + { + "epoch": 3.28, + "learning_rate": 4.182346723044397e-05, + "loss": 0.4044, + "step": 3100 + }, + { + "epoch": 3.28, + "learning_rate": 4.181818181818182e-05, + "loss": 0.266, + "step": 3102 + }, + { + "epoch": 3.28, + "learning_rate": 4.1812896405919666e-05, + "loss": 0.2419, + "step": 3104 + }, + { + "epoch": 3.28, + "learning_rate": 4.180761099365751e-05, + "loss": 0.2929, + "step": 3106 + }, + { + "epoch": 3.29, + "learning_rate": 4.180232558139535e-05, + "loss": 0.4756, + "step": 3108 + }, + { + "epoch": 3.29, + "learning_rate": 4.17970401691332e-05, + "loss": 0.3084, + "step": 3110 + }, + { + "epoch": 3.29, + "learning_rate": 4.1791754756871036e-05, + "loss": 0.3543, + "step": 3112 + }, + { + "epoch": 3.29, + "learning_rate": 4.178646934460888e-05, + "loss": 0.393, + "step": 3114 + }, + { + "epoch": 3.29, + "learning_rate": 4.178118393234672e-05, + "loss": 0.3009, + "step": 3116 + }, + { + "epoch": 3.3, + "learning_rate": 4.177589852008457e-05, + "loss": 0.185, + "step": 3118 + }, + { + "epoch": 3.3, + "learning_rate": 4.177061310782241e-05, + "loss": 0.2683, + "step": 3120 + }, + { + "epoch": 3.3, + "learning_rate": 4.176532769556026e-05, + "loss": 0.5429, + "step": 3122 + }, + { + "epoch": 3.3, + "learning_rate": 4.17600422832981e-05, + "loss": 0.3401, + "step": 3124 + }, + { + "epoch": 3.3, + "learning_rate": 4.1754756871035945e-05, + "loss": 0.3918, + "step": 3126 + }, + { + "epoch": 3.31, + "learning_rate": 4.1749471458773784e-05, + "loss": 0.3553, + "step": 3128 + }, + { + "epoch": 3.31, + "learning_rate": 4.174418604651163e-05, + "loss": 0.2473, + "step": 3130 + }, + { + "epoch": 3.31, + "learning_rate": 4.173890063424947e-05, + "loss": 0.4602, + "step": 3132 + }, + { + "epoch": 3.31, + "learning_rate": 4.1733615221987316e-05, + "loss": 0.4051, + "step": 3134 + }, + { + "epoch": 3.32, + "learning_rate": 4.172832980972516e-05, + "loss": 0.2363, + "step": 3136 + }, + { + "epoch": 3.32, + "learning_rate": 4.1723044397463e-05, + "loss": 0.2591, + "step": 3138 + }, + { + "epoch": 3.32, + "learning_rate": 4.171775898520085e-05, + "loss": 0.3065, + "step": 3140 + }, + { + "epoch": 3.32, + "learning_rate": 4.171247357293869e-05, + "loss": 0.2056, + "step": 3142 + }, + { + "epoch": 3.32, + "learning_rate": 4.170718816067653e-05, + "loss": 0.302, + "step": 3144 + }, + { + "epoch": 3.33, + "learning_rate": 4.170190274841438e-05, + "loss": 0.3368, + "step": 3146 + }, + { + "epoch": 3.33, + "learning_rate": 4.1696617336152225e-05, + "loss": 0.3408, + "step": 3148 + }, + { + "epoch": 3.33, + "learning_rate": 4.1691331923890064e-05, + "loss": 0.3338, + "step": 3150 + }, + { + "epoch": 3.33, + "learning_rate": 4.168604651162791e-05, + "loss": 0.4434, + "step": 3152 + }, + { + "epoch": 3.33, + "learning_rate": 4.168076109936575e-05, + "loss": 0.4222, + "step": 3154 + }, + { + "epoch": 3.34, + "learning_rate": 4.1675475687103595e-05, + "loss": 0.3405, + "step": 3156 + }, + { + "epoch": 3.34, + "learning_rate": 4.167019027484144e-05, + "loss": 0.3749, + "step": 3158 + }, + { + "epoch": 3.34, + "learning_rate": 4.166490486257929e-05, + "loss": 0.2047, + "step": 3160 + }, + { + "epoch": 3.34, + "learning_rate": 4.165961945031713e-05, + "loss": 0.3287, + "step": 3162 + }, + { + "epoch": 3.34, + "learning_rate": 4.165433403805497e-05, + "loss": 0.3589, + "step": 3164 + }, + { + "epoch": 3.35, + "learning_rate": 4.164904862579281e-05, + "loss": 0.6135, + "step": 3166 + }, + { + "epoch": 3.35, + "learning_rate": 4.164376321353066e-05, + "loss": 0.8384, + "step": 3168 + }, + { + "epoch": 3.35, + "learning_rate": 4.16384778012685e-05, + "loss": 0.3048, + "step": 3170 + }, + { + "epoch": 3.35, + "learning_rate": 4.1633192389006343e-05, + "loss": 0.4265, + "step": 3172 + }, + { + "epoch": 3.36, + "learning_rate": 4.162790697674418e-05, + "loss": 0.4554, + "step": 3174 + }, + { + "epoch": 3.36, + "learning_rate": 4.1622621564482036e-05, + "loss": 0.3637, + "step": 3176 + }, + { + "epoch": 3.36, + "learning_rate": 4.1617336152219875e-05, + "loss": 0.5761, + "step": 3178 + }, + { + "epoch": 3.36, + "learning_rate": 4.161205073995772e-05, + "loss": 0.2125, + "step": 3180 + }, + { + "epoch": 3.36, + "learning_rate": 4.160676532769556e-05, + "loss": 0.2687, + "step": 3182 + }, + { + "epoch": 3.37, + "learning_rate": 4.1601479915433406e-05, + "loss": 0.4152, + "step": 3184 + }, + { + "epoch": 3.37, + "learning_rate": 4.1596194503171246e-05, + "loss": 0.3966, + "step": 3186 + }, + { + "epoch": 3.37, + "learning_rate": 4.159090909090909e-05, + "loss": 0.3472, + "step": 3188 + }, + { + "epoch": 3.37, + "learning_rate": 4.158562367864694e-05, + "loss": 0.2772, + "step": 3190 + }, + { + "epoch": 3.37, + "learning_rate": 4.158033826638478e-05, + "loss": 0.2744, + "step": 3192 + }, + { + "epoch": 3.38, + "learning_rate": 4.157505285412262e-05, + "loss": 0.3092, + "step": 3194 + }, + { + "epoch": 3.38, + "learning_rate": 4.156976744186047e-05, + "loss": 0.4398, + "step": 3196 + }, + { + "epoch": 3.38, + "learning_rate": 4.1564482029598315e-05, + "loss": 0.1722, + "step": 3198 + }, + { + "epoch": 3.38, + "learning_rate": 4.1559196617336154e-05, + "loss": 0.4008, + "step": 3200 + }, + { + "epoch": 3.38, + "learning_rate": 4.1553911205074e-05, + "loss": 0.3137, + "step": 3202 + }, + { + "epoch": 3.39, + "learning_rate": 4.154862579281184e-05, + "loss": 0.4953, + "step": 3204 + }, + { + "epoch": 3.39, + "learning_rate": 4.1543340380549686e-05, + "loss": 0.2774, + "step": 3206 + }, + { + "epoch": 3.39, + "learning_rate": 4.1538054968287525e-05, + "loss": 0.3924, + "step": 3208 + }, + { + "epoch": 3.39, + "learning_rate": 4.153276955602537e-05, + "loss": 0.3354, + "step": 3210 + }, + { + "epoch": 3.4, + "learning_rate": 4.152748414376322e-05, + "loss": 0.2924, + "step": 3212 + }, + { + "epoch": 3.4, + "learning_rate": 4.152219873150106e-05, + "loss": 0.4382, + "step": 3214 + }, + { + "epoch": 3.4, + "learning_rate": 4.15169133192389e-05, + "loss": 0.2454, + "step": 3216 + }, + { + "epoch": 3.4, + "learning_rate": 4.151162790697675e-05, + "loss": 0.3126, + "step": 3218 + }, + { + "epoch": 3.4, + "learning_rate": 4.150634249471459e-05, + "loss": 0.5072, + "step": 3220 + }, + { + "epoch": 3.41, + "learning_rate": 4.1501057082452434e-05, + "loss": 0.1802, + "step": 3222 + }, + { + "epoch": 3.41, + "learning_rate": 4.149577167019027e-05, + "loss": 0.2585, + "step": 3224 + }, + { + "epoch": 3.41, + "learning_rate": 4.149048625792812e-05, + "loss": 0.3149, + "step": 3226 + }, + { + "epoch": 3.41, + "learning_rate": 4.148520084566596e-05, + "loss": 0.2631, + "step": 3228 + }, + { + "epoch": 3.41, + "learning_rate": 4.147991543340381e-05, + "loss": 0.3179, + "step": 3230 + }, + { + "epoch": 3.42, + "learning_rate": 4.147463002114165e-05, + "loss": 0.2586, + "step": 3232 + }, + { + "epoch": 3.42, + "learning_rate": 4.14693446088795e-05, + "loss": 0.3983, + "step": 3234 + }, + { + "epoch": 3.42, + "learning_rate": 4.1464059196617336e-05, + "loss": 0.2693, + "step": 3236 + }, + { + "epoch": 3.42, + "learning_rate": 4.145877378435518e-05, + "loss": 0.1979, + "step": 3238 + }, + { + "epoch": 3.42, + "learning_rate": 4.145348837209302e-05, + "loss": 0.2388, + "step": 3240 + }, + { + "epoch": 3.43, + "learning_rate": 4.144820295983087e-05, + "loss": 0.361, + "step": 3242 + }, + { + "epoch": 3.43, + "learning_rate": 4.1442917547568713e-05, + "loss": 0.2067, + "step": 3244 + }, + { + "epoch": 3.43, + "learning_rate": 4.143763213530655e-05, + "loss": 0.2476, + "step": 3246 + }, + { + "epoch": 3.43, + "learning_rate": 4.14323467230444e-05, + "loss": 0.4077, + "step": 3248 + }, + { + "epoch": 3.44, + "learning_rate": 4.1427061310782245e-05, + "loss": 0.255, + "step": 3250 + }, + { + "epoch": 3.44, + "learning_rate": 4.142177589852009e-05, + "loss": 0.3061, + "step": 3252 + }, + { + "epoch": 3.44, + "learning_rate": 4.141649048625793e-05, + "loss": 0.6814, + "step": 3254 + }, + { + "epoch": 3.44, + "learning_rate": 4.1411205073995776e-05, + "loss": 0.3628, + "step": 3256 + }, + { + "epoch": 3.44, + "learning_rate": 4.1405919661733616e-05, + "loss": 0.2991, + "step": 3258 + }, + { + "epoch": 3.45, + "learning_rate": 4.140063424947146e-05, + "loss": 0.3783, + "step": 3260 + }, + { + "epoch": 3.45, + "learning_rate": 4.13953488372093e-05, + "loss": 0.2028, + "step": 3262 + }, + { + "epoch": 3.45, + "learning_rate": 4.139006342494715e-05, + "loss": 0.2912, + "step": 3264 + }, + { + "epoch": 3.45, + "learning_rate": 4.138477801268499e-05, + "loss": 0.4558, + "step": 3266 + }, + { + "epoch": 3.45, + "learning_rate": 4.137949260042284e-05, + "loss": 0.4464, + "step": 3268 + }, + { + "epoch": 3.46, + "learning_rate": 4.137420718816068e-05, + "loss": 0.5572, + "step": 3270 + }, + { + "epoch": 3.46, + "learning_rate": 4.1368921775898524e-05, + "loss": 0.3313, + "step": 3272 + }, + { + "epoch": 3.46, + "learning_rate": 4.1363636363636364e-05, + "loss": 0.27, + "step": 3274 + }, + { + "epoch": 3.46, + "learning_rate": 4.135835095137421e-05, + "loss": 0.5401, + "step": 3276 + }, + { + "epoch": 3.47, + "learning_rate": 4.135306553911205e-05, + "loss": 0.2565, + "step": 3278 + }, + { + "epoch": 3.47, + "learning_rate": 4.1347780126849895e-05, + "loss": 0.3559, + "step": 3280 + }, + { + "epoch": 3.47, + "learning_rate": 4.1342494714587734e-05, + "loss": 0.3827, + "step": 3282 + }, + { + "epoch": 3.47, + "learning_rate": 4.133720930232559e-05, + "loss": 0.2983, + "step": 3284 + }, + { + "epoch": 3.47, + "learning_rate": 4.1331923890063427e-05, + "loss": 0.4047, + "step": 3286 + }, + { + "epoch": 3.48, + "learning_rate": 4.132663847780127e-05, + "loss": 0.4661, + "step": 3288 + }, + { + "epoch": 3.48, + "learning_rate": 4.132135306553911e-05, + "loss": 0.623, + "step": 3290 + }, + { + "epoch": 3.48, + "learning_rate": 4.131606765327696e-05, + "loss": 0.3762, + "step": 3292 + }, + { + "epoch": 3.48, + "learning_rate": 4.13107822410148e-05, + "loss": 0.3287, + "step": 3294 + }, + { + "epoch": 3.48, + "learning_rate": 4.130549682875264e-05, + "loss": 0.5006, + "step": 3296 + }, + { + "epoch": 3.49, + "learning_rate": 4.130021141649049e-05, + "loss": 0.3024, + "step": 3298 + }, + { + "epoch": 3.49, + "learning_rate": 4.129492600422833e-05, + "loss": 0.2122, + "step": 3300 + }, + { + "epoch": 3.49, + "learning_rate": 4.1289640591966175e-05, + "loss": 0.7112, + "step": 3302 + }, + { + "epoch": 3.49, + "learning_rate": 4.128435517970402e-05, + "loss": 0.1781, + "step": 3304 + }, + { + "epoch": 3.49, + "learning_rate": 4.127906976744187e-05, + "loss": 0.3929, + "step": 3306 + }, + { + "epoch": 3.5, + "learning_rate": 4.1273784355179706e-05, + "loss": 0.3355, + "step": 3308 + }, + { + "epoch": 3.5, + "learning_rate": 4.126849894291755e-05, + "loss": 0.3383, + "step": 3310 + }, + { + "epoch": 3.5, + "learning_rate": 4.126321353065539e-05, + "loss": 0.3993, + "step": 3312 + }, + { + "epoch": 3.5, + "learning_rate": 4.125792811839324e-05, + "loss": 0.2688, + "step": 3314 + }, + { + "epoch": 3.51, + "learning_rate": 4.125264270613108e-05, + "loss": 0.2217, + "step": 3316 + }, + { + "epoch": 3.51, + "learning_rate": 4.124735729386892e-05, + "loss": 0.262, + "step": 3318 + }, + { + "epoch": 3.51, + "learning_rate": 4.124207188160677e-05, + "loss": 0.3199, + "step": 3320 + }, + { + "epoch": 3.51, + "learning_rate": 4.1236786469344615e-05, + "loss": 0.2936, + "step": 3322 + }, + { + "epoch": 3.51, + "learning_rate": 4.1231501057082454e-05, + "loss": 0.2725, + "step": 3324 + }, + { + "epoch": 3.52, + "learning_rate": 4.12262156448203e-05, + "loss": 0.2933, + "step": 3326 + }, + { + "epoch": 3.52, + "learning_rate": 4.122093023255814e-05, + "loss": 0.392, + "step": 3328 + }, + { + "epoch": 3.52, + "learning_rate": 4.1215644820295986e-05, + "loss": 0.2794, + "step": 3330 + }, + { + "epoch": 3.52, + "learning_rate": 4.1210359408033825e-05, + "loss": 0.2913, + "step": 3332 + }, + { + "epoch": 3.52, + "learning_rate": 4.120507399577167e-05, + "loss": 0.3144, + "step": 3334 + }, + { + "epoch": 3.53, + "learning_rate": 4.119978858350951e-05, + "loss": 0.2582, + "step": 3336 + }, + { + "epoch": 3.53, + "learning_rate": 4.119450317124736e-05, + "loss": 0.2666, + "step": 3338 + }, + { + "epoch": 3.53, + "learning_rate": 4.11892177589852e-05, + "loss": 0.3842, + "step": 3340 + }, + { + "epoch": 3.53, + "learning_rate": 4.118393234672305e-05, + "loss": 0.5303, + "step": 3342 + }, + { + "epoch": 3.53, + "learning_rate": 4.117864693446089e-05, + "loss": 0.3226, + "step": 3344 + }, + { + "epoch": 3.54, + "learning_rate": 4.1173361522198734e-05, + "loss": 0.2795, + "step": 3346 + }, + { + "epoch": 3.54, + "learning_rate": 4.116807610993657e-05, + "loss": 0.3647, + "step": 3348 + }, + { + "epoch": 3.54, + "learning_rate": 4.116279069767442e-05, + "loss": 0.3534, + "step": 3350 + }, + { + "epoch": 3.54, + "learning_rate": 4.1157505285412265e-05, + "loss": 0.4055, + "step": 3352 + }, + { + "epoch": 3.55, + "learning_rate": 4.1152219873150104e-05, + "loss": 0.5529, + "step": 3354 + }, + { + "epoch": 3.55, + "learning_rate": 4.114693446088795e-05, + "loss": 0.5072, + "step": 3356 + }, + { + "epoch": 3.55, + "learning_rate": 4.1141649048625797e-05, + "loss": 0.2841, + "step": 3358 + }, + { + "epoch": 3.55, + "learning_rate": 4.113636363636364e-05, + "loss": 0.3891, + "step": 3360 + }, + { + "epoch": 3.55, + "learning_rate": 4.113107822410148e-05, + "loss": 0.3782, + "step": 3362 + }, + { + "epoch": 3.56, + "learning_rate": 4.112579281183933e-05, + "loss": 0.27, + "step": 3364 + }, + { + "epoch": 3.56, + "learning_rate": 4.112050739957717e-05, + "loss": 0.2478, + "step": 3366 + }, + { + "epoch": 3.56, + "learning_rate": 4.111522198731501e-05, + "loss": 0.3285, + "step": 3368 + }, + { + "epoch": 3.56, + "learning_rate": 4.110993657505285e-05, + "loss": 0.3204, + "step": 3370 + }, + { + "epoch": 3.56, + "learning_rate": 4.11046511627907e-05, + "loss": 0.3976, + "step": 3372 + }, + { + "epoch": 3.57, + "learning_rate": 4.1099365750528545e-05, + "loss": 0.3353, + "step": 3374 + }, + { + "epoch": 3.57, + "learning_rate": 4.109408033826639e-05, + "loss": 0.5545, + "step": 3376 + }, + { + "epoch": 3.57, + "learning_rate": 4.108879492600423e-05, + "loss": 0.3212, + "step": 3378 + }, + { + "epoch": 3.57, + "learning_rate": 4.1083509513742076e-05, + "loss": 0.4366, + "step": 3380 + }, + { + "epoch": 3.58, + "learning_rate": 4.1078224101479915e-05, + "loss": 0.2358, + "step": 3382 + }, + { + "epoch": 3.58, + "learning_rate": 4.107293868921776e-05, + "loss": 0.285, + "step": 3384 + }, + { + "epoch": 3.58, + "learning_rate": 4.10676532769556e-05, + "loss": 0.2965, + "step": 3386 + }, + { + "epoch": 3.58, + "learning_rate": 4.106236786469345e-05, + "loss": 0.2111, + "step": 3388 + }, + { + "epoch": 3.58, + "learning_rate": 4.1057082452431286e-05, + "loss": 0.2055, + "step": 3390 + }, + { + "epoch": 3.59, + "learning_rate": 4.105179704016914e-05, + "loss": 0.3141, + "step": 3392 + }, + { + "epoch": 3.59, + "learning_rate": 4.104651162790698e-05, + "loss": 0.2548, + "step": 3394 + }, + { + "epoch": 3.59, + "learning_rate": 4.1041226215644824e-05, + "loss": 0.1756, + "step": 3396 + }, + { + "epoch": 3.59, + "learning_rate": 4.1035940803382664e-05, + "loss": 0.1975, + "step": 3398 + }, + { + "epoch": 3.59, + "learning_rate": 4.103065539112051e-05, + "loss": 0.4897, + "step": 3400 + }, + { + "epoch": 3.6, + "learning_rate": 4.1025369978858356e-05, + "loss": 0.243, + "step": 3402 + }, + { + "epoch": 3.6, + "learning_rate": 4.1020084566596195e-05, + "loss": 0.2601, + "step": 3404 + }, + { + "epoch": 3.6, + "learning_rate": 4.101479915433404e-05, + "loss": 0.3227, + "step": 3406 + }, + { + "epoch": 3.6, + "learning_rate": 4.100951374207188e-05, + "loss": 0.3445, + "step": 3408 + }, + { + "epoch": 3.6, + "learning_rate": 4.1004228329809726e-05, + "loss": 0.2215, + "step": 3410 + }, + { + "epoch": 3.61, + "learning_rate": 4.099894291754757e-05, + "loss": 0.2449, + "step": 3412 + }, + { + "epoch": 3.61, + "learning_rate": 4.099365750528542e-05, + "loss": 0.3551, + "step": 3414 + }, + { + "epoch": 3.61, + "learning_rate": 4.098837209302326e-05, + "loss": 0.1306, + "step": 3416 + }, + { + "epoch": 3.61, + "learning_rate": 4.0983086680761104e-05, + "loss": 0.2045, + "step": 3418 + }, + { + "epoch": 3.62, + "learning_rate": 4.097780126849894e-05, + "loss": 0.1964, + "step": 3420 + }, + { + "epoch": 3.62, + "learning_rate": 4.097251585623679e-05, + "loss": 0.3248, + "step": 3422 + }, + { + "epoch": 3.62, + "learning_rate": 4.096723044397463e-05, + "loss": 0.4956, + "step": 3424 + }, + { + "epoch": 3.62, + "learning_rate": 4.0961945031712474e-05, + "loss": 0.2128, + "step": 3426 + }, + { + "epoch": 3.62, + "learning_rate": 4.095665961945032e-05, + "loss": 0.354, + "step": 3428 + }, + { + "epoch": 3.63, + "learning_rate": 4.0951374207188167e-05, + "loss": 0.2535, + "step": 3430 + }, + { + "epoch": 3.63, + "learning_rate": 4.0946088794926006e-05, + "loss": 0.3954, + "step": 3432 + }, + { + "epoch": 3.63, + "learning_rate": 4.094080338266385e-05, + "loss": 0.4083, + "step": 3434 + }, + { + "epoch": 3.63, + "learning_rate": 4.093551797040169e-05, + "loss": 0.2527, + "step": 3436 + }, + { + "epoch": 3.63, + "learning_rate": 4.093023255813954e-05, + "loss": 0.2535, + "step": 3438 + }, + { + "epoch": 3.64, + "learning_rate": 4.0924947145877377e-05, + "loss": 0.261, + "step": 3440 + }, + { + "epoch": 3.64, + "learning_rate": 4.091966173361522e-05, + "loss": 0.4259, + "step": 3442 + }, + { + "epoch": 3.64, + "learning_rate": 4.091437632135306e-05, + "loss": 0.4179, + "step": 3444 + }, + { + "epoch": 3.64, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.2675, + "step": 3446 + }, + { + "epoch": 3.64, + "learning_rate": 4.0903805496828754e-05, + "loss": 0.4809, + "step": 3448 + }, + { + "epoch": 3.65, + "learning_rate": 4.08985200845666e-05, + "loss": 0.2123, + "step": 3450 + }, + { + "epoch": 3.65, + "learning_rate": 4.089323467230444e-05, + "loss": 0.1502, + "step": 3452 + }, + { + "epoch": 3.65, + "learning_rate": 4.0887949260042285e-05, + "loss": 0.2021, + "step": 3454 + }, + { + "epoch": 3.65, + "learning_rate": 4.088266384778013e-05, + "loss": 0.296, + "step": 3456 + }, + { + "epoch": 3.66, + "learning_rate": 4.087737843551797e-05, + "loss": 0.3827, + "step": 3458 + }, + { + "epoch": 3.66, + "learning_rate": 4.087209302325582e-05, + "loss": 0.3757, + "step": 3460 + }, + { + "epoch": 3.66, + "learning_rate": 4.0866807610993656e-05, + "loss": 0.2955, + "step": 3462 + }, + { + "epoch": 3.66, + "learning_rate": 4.08615221987315e-05, + "loss": 0.2052, + "step": 3464 + }, + { + "epoch": 3.66, + "learning_rate": 4.085623678646935e-05, + "loss": 0.211, + "step": 3466 + }, + { + "epoch": 3.67, + "learning_rate": 4.0850951374207194e-05, + "loss": 0.3555, + "step": 3468 + }, + { + "epoch": 3.67, + "learning_rate": 4.0845665961945034e-05, + "loss": 0.3841, + "step": 3470 + }, + { + "epoch": 3.67, + "learning_rate": 4.084038054968288e-05, + "loss": 0.3611, + "step": 3472 + }, + { + "epoch": 3.67, + "learning_rate": 4.083509513742072e-05, + "loss": 0.4419, + "step": 3474 + }, + { + "epoch": 3.67, + "learning_rate": 4.0829809725158565e-05, + "loss": 0.5077, + "step": 3476 + }, + { + "epoch": 3.68, + "learning_rate": 4.0824524312896404e-05, + "loss": 0.2802, + "step": 3478 + }, + { + "epoch": 3.68, + "learning_rate": 4.081923890063425e-05, + "loss": 0.5795, + "step": 3480 + }, + { + "epoch": 3.68, + "learning_rate": 4.0813953488372096e-05, + "loss": 0.2565, + "step": 3482 + }, + { + "epoch": 3.68, + "learning_rate": 4.080866807610994e-05, + "loss": 0.2815, + "step": 3484 + }, + { + "epoch": 3.68, + "learning_rate": 4.080338266384778e-05, + "loss": 0.5439, + "step": 3486 + }, + { + "epoch": 3.69, + "learning_rate": 4.079809725158563e-05, + "loss": 0.2885, + "step": 3488 + }, + { + "epoch": 3.69, + "learning_rate": 4.079281183932347e-05, + "loss": 0.295, + "step": 3490 + }, + { + "epoch": 3.69, + "learning_rate": 4.078752642706131e-05, + "loss": 0.2716, + "step": 3492 + }, + { + "epoch": 3.69, + "learning_rate": 4.078224101479915e-05, + "loss": 0.423, + "step": 3494 + }, + { + "epoch": 3.7, + "learning_rate": 4.0776955602537e-05, + "loss": 0.229, + "step": 3496 + }, + { + "epoch": 3.7, + "learning_rate": 4.077167019027484e-05, + "loss": 0.3066, + "step": 3498 + }, + { + "epoch": 3.7, + "learning_rate": 4.076638477801269e-05, + "loss": 0.2746, + "step": 3500 + }, + { + "epoch": 3.7, + "eval_cer": 0.031917925334853235, + "eval_loss": 0.4219053387641907, + "eval_runtime": 130.4096, + "eval_samples_per_second": 6.449, + "eval_steps_per_second": 0.813, + "step": 3500 + }, + { + "epoch": 3.7, + "learning_rate": 4.076109936575053e-05, + "loss": 0.4058, + "step": 3502 + }, + { + "epoch": 3.7, + "learning_rate": 4.0755813953488376e-05, + "loss": 0.1374, + "step": 3504 + }, + { + "epoch": 3.71, + "learning_rate": 4.0750528541226215e-05, + "loss": 0.1596, + "step": 3506 + }, + { + "epoch": 3.71, + "learning_rate": 4.074524312896406e-05, + "loss": 0.3841, + "step": 3508 + }, + { + "epoch": 3.71, + "learning_rate": 4.073995771670191e-05, + "loss": 0.326, + "step": 3510 + }, + { + "epoch": 3.71, + "learning_rate": 4.073467230443975e-05, + "loss": 0.2823, + "step": 3512 + }, + { + "epoch": 3.71, + "learning_rate": 4.072938689217759e-05, + "loss": 0.3303, + "step": 3514 + }, + { + "epoch": 3.72, + "learning_rate": 4.072410147991543e-05, + "loss": 0.3175, + "step": 3516 + }, + { + "epoch": 3.72, + "learning_rate": 4.0718816067653285e-05, + "loss": 0.2827, + "step": 3518 + }, + { + "epoch": 3.72, + "learning_rate": 4.0713530655391124e-05, + "loss": 0.3925, + "step": 3520 + }, + { + "epoch": 3.72, + "learning_rate": 4.070824524312897e-05, + "loss": 0.315, + "step": 3522 + }, + { + "epoch": 3.73, + "learning_rate": 4.070295983086681e-05, + "loss": 0.318, + "step": 3524 + }, + { + "epoch": 3.73, + "learning_rate": 4.0697674418604655e-05, + "loss": 0.3343, + "step": 3526 + }, + { + "epoch": 3.73, + "learning_rate": 4.0692389006342495e-05, + "loss": 0.2611, + "step": 3528 + }, + { + "epoch": 3.73, + "learning_rate": 4.068710359408034e-05, + "loss": 0.1885, + "step": 3530 + }, + { + "epoch": 3.73, + "learning_rate": 4.068181818181818e-05, + "loss": 0.4439, + "step": 3532 + }, + { + "epoch": 3.74, + "learning_rate": 4.0676532769556026e-05, + "loss": 0.1693, + "step": 3534 + }, + { + "epoch": 3.74, + "learning_rate": 4.067124735729387e-05, + "loss": 0.2422, + "step": 3536 + }, + { + "epoch": 3.74, + "learning_rate": 4.066596194503172e-05, + "loss": 0.2957, + "step": 3538 + }, + { + "epoch": 3.74, + "learning_rate": 4.066067653276956e-05, + "loss": 0.4195, + "step": 3540 + }, + { + "epoch": 3.74, + "learning_rate": 4.0655391120507404e-05, + "loss": 0.2689, + "step": 3542 + }, + { + "epoch": 3.75, + "learning_rate": 4.065010570824524e-05, + "loss": 0.2917, + "step": 3544 + }, + { + "epoch": 3.75, + "learning_rate": 4.064482029598309e-05, + "loss": 0.1926, + "step": 3546 + }, + { + "epoch": 3.75, + "learning_rate": 4.063953488372093e-05, + "loss": 0.2167, + "step": 3548 + }, + { + "epoch": 3.75, + "learning_rate": 4.0634249471458774e-05, + "loss": 0.2325, + "step": 3550 + }, + { + "epoch": 3.75, + "learning_rate": 4.0628964059196614e-05, + "loss": 0.2004, + "step": 3552 + }, + { + "epoch": 3.76, + "learning_rate": 4.0623678646934466e-05, + "loss": 0.2498, + "step": 3554 + }, + { + "epoch": 3.76, + "learning_rate": 4.0618393234672306e-05, + "loss": 0.2831, + "step": 3556 + }, + { + "epoch": 3.76, + "learning_rate": 4.061310782241015e-05, + "loss": 0.2611, + "step": 3558 + }, + { + "epoch": 3.76, + "learning_rate": 4.060782241014799e-05, + "loss": 0.2673, + "step": 3560 + }, + { + "epoch": 3.77, + "learning_rate": 4.060253699788584e-05, + "loss": 0.1925, + "step": 3562 + }, + { + "epoch": 3.77, + "learning_rate": 4.059725158562368e-05, + "loss": 0.2026, + "step": 3564 + }, + { + "epoch": 3.77, + "learning_rate": 4.059196617336152e-05, + "loss": 0.2491, + "step": 3566 + }, + { + "epoch": 3.77, + "learning_rate": 4.058668076109937e-05, + "loss": 0.1843, + "step": 3568 + }, + { + "epoch": 3.77, + "learning_rate": 4.058139534883721e-05, + "loss": 0.2792, + "step": 3570 + }, + { + "epoch": 3.78, + "learning_rate": 4.057610993657506e-05, + "loss": 0.3438, + "step": 3572 + }, + { + "epoch": 3.78, + "learning_rate": 4.05708245243129e-05, + "loss": 0.2346, + "step": 3574 + }, + { + "epoch": 3.78, + "learning_rate": 4.0565539112050746e-05, + "loss": 0.2819, + "step": 3576 + }, + { + "epoch": 3.78, + "learning_rate": 4.0560253699788585e-05, + "loss": 0.2978, + "step": 3578 + }, + { + "epoch": 3.78, + "learning_rate": 4.055496828752643e-05, + "loss": 0.4547, + "step": 3580 + }, + { + "epoch": 3.79, + "learning_rate": 4.054968287526427e-05, + "loss": 0.3207, + "step": 3582 + }, + { + "epoch": 3.79, + "learning_rate": 4.054439746300212e-05, + "loss": 0.2616, + "step": 3584 + }, + { + "epoch": 3.79, + "learning_rate": 4.0539112050739956e-05, + "loss": 0.3679, + "step": 3586 + }, + { + "epoch": 3.79, + "learning_rate": 4.05338266384778e-05, + "loss": 0.2208, + "step": 3588 + }, + { + "epoch": 3.79, + "learning_rate": 4.052854122621565e-05, + "loss": 0.2534, + "step": 3590 + }, + { + "epoch": 3.8, + "learning_rate": 4.0523255813953494e-05, + "loss": 0.2538, + "step": 3592 + }, + { + "epoch": 3.8, + "learning_rate": 4.0517970401691333e-05, + "loss": 0.3186, + "step": 3594 + }, + { + "epoch": 3.8, + "learning_rate": 4.051268498942918e-05, + "loss": 0.216, + "step": 3596 + }, + { + "epoch": 3.8, + "learning_rate": 4.050739957716702e-05, + "loss": 0.252, + "step": 3598 + }, + { + "epoch": 3.81, + "learning_rate": 4.0502114164904865e-05, + "loss": 0.3972, + "step": 3600 + }, + { + "epoch": 3.81, + "learning_rate": 4.0496828752642704e-05, + "loss": 0.3036, + "step": 3602 + }, + { + "epoch": 3.81, + "learning_rate": 4.049154334038055e-05, + "loss": 0.3883, + "step": 3604 + }, + { + "epoch": 3.81, + "learning_rate": 4.0486257928118396e-05, + "loss": 0.2229, + "step": 3606 + }, + { + "epoch": 3.81, + "learning_rate": 4.048097251585624e-05, + "loss": 0.359, + "step": 3608 + }, + { + "epoch": 3.82, + "learning_rate": 4.047568710359408e-05, + "loss": 0.2468, + "step": 3610 + }, + { + "epoch": 3.82, + "learning_rate": 4.047040169133193e-05, + "loss": 0.2458, + "step": 3612 + }, + { + "epoch": 3.82, + "learning_rate": 4.046511627906977e-05, + "loss": 0.2324, + "step": 3614 + }, + { + "epoch": 3.82, + "learning_rate": 4.045983086680761e-05, + "loss": 0.2602, + "step": 3616 + }, + { + "epoch": 3.82, + "learning_rate": 4.045454545454546e-05, + "loss": 0.2921, + "step": 3618 + }, + { + "epoch": 3.83, + "learning_rate": 4.04492600422833e-05, + "loss": 0.3894, + "step": 3620 + }, + { + "epoch": 3.83, + "learning_rate": 4.0443974630021144e-05, + "loss": 0.3837, + "step": 3622 + }, + { + "epoch": 3.83, + "learning_rate": 4.0438689217758984e-05, + "loss": 0.1566, + "step": 3624 + }, + { + "epoch": 3.83, + "learning_rate": 4.0433403805496836e-05, + "loss": 0.2718, + "step": 3626 + }, + { + "epoch": 3.84, + "learning_rate": 4.0428118393234676e-05, + "loss": 0.2338, + "step": 3628 + }, + { + "epoch": 3.84, + "learning_rate": 4.042283298097252e-05, + "loss": 0.2583, + "step": 3630 + }, + { + "epoch": 3.84, + "learning_rate": 4.041754756871036e-05, + "loss": 0.4098, + "step": 3632 + }, + { + "epoch": 3.84, + "learning_rate": 4.041226215644821e-05, + "loss": 0.3291, + "step": 3634 + }, + { + "epoch": 3.84, + "learning_rate": 4.0406976744186046e-05, + "loss": 0.3875, + "step": 3636 + }, + { + "epoch": 3.85, + "learning_rate": 4.040169133192389e-05, + "loss": 0.3359, + "step": 3638 + }, + { + "epoch": 3.85, + "learning_rate": 4.039640591966173e-05, + "loss": 0.545, + "step": 3640 + }, + { + "epoch": 3.85, + "learning_rate": 4.039112050739958e-05, + "loss": 0.2262, + "step": 3642 + }, + { + "epoch": 3.85, + "learning_rate": 4.0385835095137424e-05, + "loss": 0.2983, + "step": 3644 + }, + { + "epoch": 3.85, + "learning_rate": 4.038054968287527e-05, + "loss": 0.3009, + "step": 3646 + }, + { + "epoch": 3.86, + "learning_rate": 4.037526427061311e-05, + "loss": 0.3872, + "step": 3648 + }, + { + "epoch": 3.86, + "learning_rate": 4.0369978858350955e-05, + "loss": 0.5378, + "step": 3650 + }, + { + "epoch": 3.86, + "learning_rate": 4.0364693446088795e-05, + "loss": 0.3241, + "step": 3652 + }, + { + "epoch": 3.86, + "learning_rate": 4.035940803382664e-05, + "loss": 0.2517, + "step": 3654 + }, + { + "epoch": 3.86, + "learning_rate": 4.035412262156448e-05, + "loss": 0.3658, + "step": 3656 + }, + { + "epoch": 3.87, + "learning_rate": 4.0348837209302326e-05, + "loss": 0.2965, + "step": 3658 + }, + { + "epoch": 3.87, + "learning_rate": 4.034355179704017e-05, + "loss": 0.2556, + "step": 3660 + }, + { + "epoch": 3.87, + "learning_rate": 4.033826638477802e-05, + "loss": 0.2858, + "step": 3662 + }, + { + "epoch": 3.87, + "learning_rate": 4.033298097251586e-05, + "loss": 0.2957, + "step": 3664 + }, + { + "epoch": 3.88, + "learning_rate": 4.0327695560253703e-05, + "loss": 0.2999, + "step": 3666 + }, + { + "epoch": 3.88, + "learning_rate": 4.032241014799155e-05, + "loss": 0.3098, + "step": 3668 + }, + { + "epoch": 3.88, + "learning_rate": 4.031712473572939e-05, + "loss": 0.2422, + "step": 3670 + }, + { + "epoch": 3.88, + "learning_rate": 4.0311839323467235e-05, + "loss": 0.2735, + "step": 3672 + }, + { + "epoch": 3.88, + "learning_rate": 4.0306553911205074e-05, + "loss": 0.3498, + "step": 3674 + }, + { + "epoch": 3.89, + "learning_rate": 4.030126849894292e-05, + "loss": 0.2948, + "step": 3676 + }, + { + "epoch": 3.89, + "learning_rate": 4.029598308668076e-05, + "loss": 0.2371, + "step": 3678 + }, + { + "epoch": 3.89, + "learning_rate": 4.029069767441861e-05, + "loss": 0.3495, + "step": 3680 + }, + { + "epoch": 3.89, + "learning_rate": 4.028541226215645e-05, + "loss": 0.2426, + "step": 3682 + }, + { + "epoch": 3.89, + "learning_rate": 4.02801268498943e-05, + "loss": 0.4343, + "step": 3684 + }, + { + "epoch": 3.9, + "learning_rate": 4.027484143763214e-05, + "loss": 0.3005, + "step": 3686 + }, + { + "epoch": 3.9, + "learning_rate": 4.026955602536998e-05, + "loss": 0.233, + "step": 3688 + }, + { + "epoch": 3.9, + "learning_rate": 4.026427061310782e-05, + "loss": 0.3673, + "step": 3690 + }, + { + "epoch": 3.9, + "learning_rate": 4.025898520084567e-05, + "loss": 0.1899, + "step": 3692 + }, + { + "epoch": 3.9, + "learning_rate": 4.025369978858351e-05, + "loss": 0.2641, + "step": 3694 + }, + { + "epoch": 3.91, + "learning_rate": 4.0248414376321354e-05, + "loss": 0.2289, + "step": 3696 + }, + { + "epoch": 3.91, + "learning_rate": 4.02431289640592e-05, + "loss": 0.1489, + "step": 3698 + }, + { + "epoch": 3.91, + "learning_rate": 4.0237843551797046e-05, + "loss": 0.3439, + "step": 3700 + }, + { + "epoch": 3.91, + "learning_rate": 4.0232558139534885e-05, + "loss": 0.2439, + "step": 3702 + }, + { + "epoch": 3.92, + "learning_rate": 4.022727272727273e-05, + "loss": 0.3292, + "step": 3704 + }, + { + "epoch": 3.92, + "learning_rate": 4.022198731501057e-05, + "loss": 0.1504, + "step": 3706 + }, + { + "epoch": 3.92, + "learning_rate": 4.0216701902748416e-05, + "loss": 0.3573, + "step": 3708 + }, + { + "epoch": 3.92, + "learning_rate": 4.0211416490486256e-05, + "loss": 0.2399, + "step": 3710 + }, + { + "epoch": 3.92, + "learning_rate": 4.02061310782241e-05, + "loss": 0.2406, + "step": 3712 + }, + { + "epoch": 3.93, + "learning_rate": 4.020084566596195e-05, + "loss": 0.2586, + "step": 3714 + }, + { + "epoch": 3.93, + "learning_rate": 4.0195560253699794e-05, + "loss": 0.4283, + "step": 3716 + }, + { + "epoch": 3.93, + "learning_rate": 4.019027484143763e-05, + "loss": 0.2388, + "step": 3718 + }, + { + "epoch": 3.93, + "learning_rate": 4.018498942917548e-05, + "loss": 0.4452, + "step": 3720 + }, + { + "epoch": 3.93, + "learning_rate": 4.0179704016913325e-05, + "loss": 0.2854, + "step": 3722 + }, + { + "epoch": 3.94, + "learning_rate": 4.0174418604651165e-05, + "loss": 0.2051, + "step": 3724 + }, + { + "epoch": 3.94, + "learning_rate": 4.016913319238901e-05, + "loss": 0.339, + "step": 3726 + }, + { + "epoch": 3.94, + "learning_rate": 4.016384778012685e-05, + "loss": 0.1278, + "step": 3728 + }, + { + "epoch": 3.94, + "learning_rate": 4.0158562367864696e-05, + "loss": 0.2469, + "step": 3730 + }, + { + "epoch": 3.95, + "learning_rate": 4.0153276955602535e-05, + "loss": 0.2045, + "step": 3732 + }, + { + "epoch": 3.95, + "learning_rate": 4.014799154334038e-05, + "loss": 0.2847, + "step": 3734 + }, + { + "epoch": 3.95, + "learning_rate": 4.014270613107823e-05, + "loss": 0.1441, + "step": 3736 + }, + { + "epoch": 3.95, + "learning_rate": 4.0137420718816073e-05, + "loss": 0.3645, + "step": 3738 + }, + { + "epoch": 3.95, + "learning_rate": 4.013213530655391e-05, + "loss": 0.2791, + "step": 3740 + }, + { + "epoch": 3.96, + "learning_rate": 4.012684989429176e-05, + "loss": 0.3352, + "step": 3742 + }, + { + "epoch": 3.96, + "learning_rate": 4.01215644820296e-05, + "loss": 0.4004, + "step": 3744 + }, + { + "epoch": 3.96, + "learning_rate": 4.0116279069767444e-05, + "loss": 0.2803, + "step": 3746 + }, + { + "epoch": 3.96, + "learning_rate": 4.0110993657505283e-05, + "loss": 0.3235, + "step": 3748 + }, + { + "epoch": 3.96, + "learning_rate": 4.010570824524313e-05, + "loss": 0.2786, + "step": 3750 + }, + { + "epoch": 3.97, + "learning_rate": 4.010042283298097e-05, + "loss": 0.277, + "step": 3752 + }, + { + "epoch": 3.97, + "learning_rate": 4.009513742071882e-05, + "loss": 0.3252, + "step": 3754 + }, + { + "epoch": 3.97, + "learning_rate": 4.008985200845666e-05, + "loss": 0.361, + "step": 3756 + }, + { + "epoch": 3.97, + "learning_rate": 4.008456659619451e-05, + "loss": 0.3615, + "step": 3758 + }, + { + "epoch": 3.97, + "learning_rate": 4.0079281183932346e-05, + "loss": 0.3047, + "step": 3760 + }, + { + "epoch": 3.98, + "learning_rate": 4.007399577167019e-05, + "loss": 0.3798, + "step": 3762 + }, + { + "epoch": 3.98, + "learning_rate": 4.006871035940803e-05, + "loss": 0.3093, + "step": 3764 + }, + { + "epoch": 3.98, + "learning_rate": 4.006342494714588e-05, + "loss": 0.2643, + "step": 3766 + }, + { + "epoch": 3.98, + "learning_rate": 4.0058139534883724e-05, + "loss": 0.4188, + "step": 3768 + }, + { + "epoch": 3.99, + "learning_rate": 4.005285412262156e-05, + "loss": 0.3424, + "step": 3770 + }, + { + "epoch": 3.99, + "learning_rate": 4.004756871035941e-05, + "loss": 0.2737, + "step": 3772 + }, + { + "epoch": 3.99, + "learning_rate": 4.0042283298097255e-05, + "loss": 0.2672, + "step": 3774 + }, + { + "epoch": 3.99, + "learning_rate": 4.00369978858351e-05, + "loss": 0.2018, + "step": 3776 + }, + { + "epoch": 3.99, + "learning_rate": 4.003171247357294e-05, + "loss": 0.1879, + "step": 3778 + }, + { + "epoch": 4.0, + "learning_rate": 4.0026427061310787e-05, + "loss": 0.2752, + "step": 3780 + }, + { + "epoch": 4.0, + "learning_rate": 4.0021141649048626e-05, + "loss": 0.304, + "step": 3782 + }, + { + "epoch": 4.0, + "learning_rate": 4.001585623678647e-05, + "loss": 0.1757, + "step": 3784 + }, + { + "epoch": 4.0, + "learning_rate": 4.001057082452431e-05, + "loss": 0.2514, + "step": 3786 + }, + { + "epoch": 4.0, + "learning_rate": 4.000528541226216e-05, + "loss": 0.2022, + "step": 3788 + }, + { + "epoch": 4.01, + "learning_rate": 4e-05, + "loss": 0.2014, + "step": 3790 + }, + { + "epoch": 4.01, + "learning_rate": 3.999471458773785e-05, + "loss": 0.1431, + "step": 3792 + }, + { + "epoch": 4.01, + "learning_rate": 3.998942917547569e-05, + "loss": 0.2986, + "step": 3794 + }, + { + "epoch": 4.01, + "learning_rate": 3.9984143763213535e-05, + "loss": 0.2922, + "step": 3796 + }, + { + "epoch": 4.01, + "learning_rate": 3.9978858350951374e-05, + "loss": 0.323, + "step": 3798 + }, + { + "epoch": 4.02, + "learning_rate": 3.997357293868922e-05, + "loss": 0.1974, + "step": 3800 + }, + { + "epoch": 4.02, + "learning_rate": 3.996828752642706e-05, + "loss": 0.2474, + "step": 3802 + }, + { + "epoch": 4.02, + "learning_rate": 3.9963002114164905e-05, + "loss": 0.3273, + "step": 3804 + }, + { + "epoch": 4.02, + "learning_rate": 3.9957716701902745e-05, + "loss": 0.2557, + "step": 3806 + }, + { + "epoch": 4.03, + "learning_rate": 3.99524312896406e-05, + "loss": 0.1653, + "step": 3808 + }, + { + "epoch": 4.03, + "learning_rate": 3.994714587737844e-05, + "loss": 0.2223, + "step": 3810 + }, + { + "epoch": 4.03, + "learning_rate": 3.994186046511628e-05, + "loss": 0.186, + "step": 3812 + }, + { + "epoch": 4.03, + "learning_rate": 3.993657505285412e-05, + "loss": 0.1906, + "step": 3814 + }, + { + "epoch": 4.03, + "learning_rate": 3.993128964059197e-05, + "loss": 0.2265, + "step": 3816 + }, + { + "epoch": 4.04, + "learning_rate": 3.992600422832981e-05, + "loss": 0.2011, + "step": 3818 + }, + { + "epoch": 4.04, + "learning_rate": 3.9920718816067654e-05, + "loss": 0.2358, + "step": 3820 + }, + { + "epoch": 4.04, + "learning_rate": 3.99154334038055e-05, + "loss": 0.1286, + "step": 3822 + }, + { + "epoch": 4.04, + "learning_rate": 3.991014799154334e-05, + "loss": 0.2227, + "step": 3824 + }, + { + "epoch": 4.04, + "learning_rate": 3.9904862579281185e-05, + "loss": 0.1696, + "step": 3826 + }, + { + "epoch": 4.05, + "learning_rate": 3.989957716701903e-05, + "loss": 0.2811, + "step": 3828 + }, + { + "epoch": 4.05, + "learning_rate": 3.989429175475688e-05, + "loss": 0.2175, + "step": 3830 + }, + { + "epoch": 4.05, + "learning_rate": 3.9889006342494716e-05, + "loss": 0.5743, + "step": 3832 + }, + { + "epoch": 4.05, + "learning_rate": 3.988372093023256e-05, + "loss": 0.6482, + "step": 3834 + }, + { + "epoch": 4.05, + "learning_rate": 3.98784355179704e-05, + "loss": 0.4804, + "step": 3836 + }, + { + "epoch": 4.06, + "learning_rate": 3.987315010570825e-05, + "loss": 0.505, + "step": 3838 + }, + { + "epoch": 4.06, + "learning_rate": 3.986786469344609e-05, + "loss": 0.6236, + "step": 3840 + }, + { + "epoch": 4.06, + "learning_rate": 3.986257928118393e-05, + "loss": 0.3889, + "step": 3842 + }, + { + "epoch": 4.06, + "learning_rate": 3.985729386892178e-05, + "loss": 0.1109, + "step": 3844 + }, + { + "epoch": 4.07, + "learning_rate": 3.9852008456659625e-05, + "loss": 0.4023, + "step": 3846 + }, + { + "epoch": 4.07, + "learning_rate": 3.9846723044397464e-05, + "loss": 0.4235, + "step": 3848 + }, + { + "epoch": 4.07, + "learning_rate": 3.984143763213531e-05, + "loss": 0.3534, + "step": 3850 + }, + { + "epoch": 4.07, + "learning_rate": 3.983615221987315e-05, + "loss": 0.3166, + "step": 3852 + }, + { + "epoch": 4.07, + "learning_rate": 3.9830866807610996e-05, + "loss": 0.2571, + "step": 3854 + }, + { + "epoch": 4.08, + "learning_rate": 3.9825581395348835e-05, + "loss": 0.4292, + "step": 3856 + }, + { + "epoch": 4.08, + "learning_rate": 3.982029598308668e-05, + "loss": 0.3876, + "step": 3858 + }, + { + "epoch": 4.08, + "learning_rate": 3.981501057082452e-05, + "loss": 0.4129, + "step": 3860 + }, + { + "epoch": 4.08, + "learning_rate": 3.980972515856237e-05, + "loss": 0.1996, + "step": 3862 + }, + { + "epoch": 4.08, + "learning_rate": 3.980443974630021e-05, + "loss": 0.2858, + "step": 3864 + }, + { + "epoch": 4.09, + "learning_rate": 3.979915433403806e-05, + "loss": 0.33, + "step": 3866 + }, + { + "epoch": 4.09, + "learning_rate": 3.97938689217759e-05, + "loss": 0.1376, + "step": 3868 + }, + { + "epoch": 4.09, + "learning_rate": 3.9788583509513744e-05, + "loss": 0.1673, + "step": 3870 + }, + { + "epoch": 4.09, + "learning_rate": 3.978329809725159e-05, + "loss": 0.172, + "step": 3872 + }, + { + "epoch": 4.1, + "learning_rate": 3.977801268498943e-05, + "loss": 0.2426, + "step": 3874 + }, + { + "epoch": 4.1, + "learning_rate": 3.9772727272727275e-05, + "loss": 0.3028, + "step": 3876 + }, + { + "epoch": 4.1, + "learning_rate": 3.9767441860465115e-05, + "loss": 0.2281, + "step": 3878 + }, + { + "epoch": 4.1, + "learning_rate": 3.976215644820296e-05, + "loss": 0.1447, + "step": 3880 + }, + { + "epoch": 4.1, + "learning_rate": 3.975687103594081e-05, + "loss": 0.2487, + "step": 3882 + }, + { + "epoch": 4.11, + "learning_rate": 3.975158562367865e-05, + "loss": 0.1818, + "step": 3884 + }, + { + "epoch": 4.11, + "learning_rate": 3.974630021141649e-05, + "loss": 0.1965, + "step": 3886 + }, + { + "epoch": 4.11, + "learning_rate": 3.974101479915434e-05, + "loss": 0.2904, + "step": 3888 + }, + { + "epoch": 4.11, + "learning_rate": 3.973572938689218e-05, + "loss": 0.3242, + "step": 3890 + }, + { + "epoch": 4.11, + "learning_rate": 3.9730443974630024e-05, + "loss": 0.263, + "step": 3892 + }, + { + "epoch": 4.12, + "learning_rate": 3.972515856236786e-05, + "loss": 0.1676, + "step": 3894 + }, + { + "epoch": 4.12, + "learning_rate": 3.971987315010571e-05, + "loss": 0.1705, + "step": 3896 + }, + { + "epoch": 4.12, + "learning_rate": 3.9714587737843555e-05, + "loss": 0.2202, + "step": 3898 + }, + { + "epoch": 4.12, + "learning_rate": 3.97093023255814e-05, + "loss": 0.2569, + "step": 3900 + }, + { + "epoch": 4.12, + "learning_rate": 3.970401691331924e-05, + "loss": 0.1988, + "step": 3902 + }, + { + "epoch": 4.13, + "learning_rate": 3.9698731501057086e-05, + "loss": 0.3049, + "step": 3904 + }, + { + "epoch": 4.13, + "learning_rate": 3.9693446088794926e-05, + "loss": 0.1223, + "step": 3906 + }, + { + "epoch": 4.13, + "learning_rate": 3.968816067653277e-05, + "loss": 0.2061, + "step": 3908 + }, + { + "epoch": 4.13, + "learning_rate": 3.968287526427061e-05, + "loss": 0.1779, + "step": 3910 + }, + { + "epoch": 4.14, + "learning_rate": 3.967758985200846e-05, + "loss": 0.2722, + "step": 3912 + }, + { + "epoch": 4.14, + "learning_rate": 3.9672304439746296e-05, + "loss": 0.2599, + "step": 3914 + }, + { + "epoch": 4.14, + "learning_rate": 3.966701902748415e-05, + "loss": 0.1343, + "step": 3916 + }, + { + "epoch": 4.14, + "learning_rate": 3.966173361522199e-05, + "loss": 0.2221, + "step": 3918 + }, + { + "epoch": 4.14, + "learning_rate": 3.9656448202959834e-05, + "loss": 0.2081, + "step": 3920 + }, + { + "epoch": 4.15, + "learning_rate": 3.9651162790697674e-05, + "loss": 0.2064, + "step": 3922 + }, + { + "epoch": 4.15, + "learning_rate": 3.964587737843552e-05, + "loss": 0.2185, + "step": 3924 + }, + { + "epoch": 4.15, + "learning_rate": 3.9640591966173366e-05, + "loss": 0.2235, + "step": 3926 + }, + { + "epoch": 4.15, + "learning_rate": 3.9635306553911205e-05, + "loss": 0.205, + "step": 3928 + }, + { + "epoch": 4.15, + "learning_rate": 3.963002114164905e-05, + "loss": 0.1873, + "step": 3930 + }, + { + "epoch": 4.16, + "learning_rate": 3.962473572938689e-05, + "loss": 0.1276, + "step": 3932 + }, + { + "epoch": 4.16, + "learning_rate": 3.961945031712474e-05, + "loss": 0.2165, + "step": 3934 + }, + { + "epoch": 4.16, + "learning_rate": 3.961416490486258e-05, + "loss": 0.1211, + "step": 3936 + }, + { + "epoch": 4.16, + "learning_rate": 3.960887949260043e-05, + "loss": 0.1964, + "step": 3938 + }, + { + "epoch": 4.16, + "learning_rate": 3.960359408033827e-05, + "loss": 0.3566, + "step": 3940 + }, + { + "epoch": 4.17, + "learning_rate": 3.9598308668076114e-05, + "loss": 0.2372, + "step": 3942 + }, + { + "epoch": 4.17, + "learning_rate": 3.959302325581395e-05, + "loss": 0.2184, + "step": 3944 + }, + { + "epoch": 4.17, + "learning_rate": 3.95877378435518e-05, + "loss": 0.1324, + "step": 3946 + }, + { + "epoch": 4.17, + "learning_rate": 3.958245243128964e-05, + "loss": 0.1911, + "step": 3948 + }, + { + "epoch": 4.18, + "learning_rate": 3.9577167019027485e-05, + "loss": 0.3218, + "step": 3950 + }, + { + "epoch": 4.18, + "learning_rate": 3.957188160676533e-05, + "loss": 0.1894, + "step": 3952 + }, + { + "epoch": 4.18, + "learning_rate": 3.956659619450318e-05, + "loss": 0.323, + "step": 3954 + }, + { + "epoch": 4.18, + "learning_rate": 3.9561310782241016e-05, + "loss": 0.3028, + "step": 3956 + }, + { + "epoch": 4.18, + "learning_rate": 3.955602536997886e-05, + "loss": 0.2063, + "step": 3958 + }, + { + "epoch": 4.19, + "learning_rate": 3.95507399577167e-05, + "loss": 0.3204, + "step": 3960 + }, + { + "epoch": 4.19, + "learning_rate": 3.954545454545455e-05, + "loss": 0.1358, + "step": 3962 + }, + { + "epoch": 4.19, + "learning_rate": 3.954016913319239e-05, + "loss": 0.3048, + "step": 3964 + }, + { + "epoch": 4.19, + "learning_rate": 3.953488372093023e-05, + "loss": 0.2386, + "step": 3966 + }, + { + "epoch": 4.19, + "learning_rate": 3.952959830866807e-05, + "loss": 0.2359, + "step": 3968 + }, + { + "epoch": 4.2, + "learning_rate": 3.9524312896405925e-05, + "loss": 0.1386, + "step": 3970 + }, + { + "epoch": 4.2, + "learning_rate": 3.9519027484143764e-05, + "loss": 0.1509, + "step": 3972 + }, + { + "epoch": 4.2, + "learning_rate": 3.951374207188161e-05, + "loss": 0.1746, + "step": 3974 + }, + { + "epoch": 4.2, + "learning_rate": 3.950845665961945e-05, + "loss": 0.2711, + "step": 3976 + }, + { + "epoch": 4.21, + "learning_rate": 3.9503171247357296e-05, + "loss": 0.2591, + "step": 3978 + }, + { + "epoch": 4.21, + "learning_rate": 3.949788583509514e-05, + "loss": 0.3, + "step": 3980 + }, + { + "epoch": 4.21, + "learning_rate": 3.949260042283298e-05, + "loss": 0.2035, + "step": 3982 + }, + { + "epoch": 4.21, + "learning_rate": 3.948731501057083e-05, + "loss": 0.2351, + "step": 3984 + }, + { + "epoch": 4.21, + "learning_rate": 3.9482029598308666e-05, + "loss": 0.1863, + "step": 3986 + }, + { + "epoch": 4.22, + "learning_rate": 3.947674418604652e-05, + "loss": 0.1715, + "step": 3988 + }, + { + "epoch": 4.22, + "learning_rate": 3.947145877378436e-05, + "loss": 0.1884, + "step": 3990 + }, + { + "epoch": 4.22, + "learning_rate": 3.9466173361522205e-05, + "loss": 0.4106, + "step": 3992 + }, + { + "epoch": 4.22, + "learning_rate": 3.9460887949260044e-05, + "loss": 0.3041, + "step": 3994 + }, + { + "epoch": 4.22, + "learning_rate": 3.945560253699789e-05, + "loss": 0.22, + "step": 3996 + }, + { + "epoch": 4.23, + "learning_rate": 3.945031712473573e-05, + "loss": 0.2508, + "step": 3998 + }, + { + "epoch": 4.23, + "learning_rate": 3.9445031712473575e-05, + "loss": 0.1456, + "step": 4000 + }, + { + "epoch": 4.23, + "eval_cer": 0.039897406668566546, + "eval_loss": 0.5804613828659058, + "eval_runtime": 129.4688, + "eval_samples_per_second": 6.496, + "eval_steps_per_second": 0.819, + "step": 4000 + }, + { + "epoch": 4.23, + "learning_rate": 3.9439746300211415e-05, + "loss": 0.2934, + "step": 4002 + }, + { + "epoch": 4.23, + "learning_rate": 3.943446088794926e-05, + "loss": 0.1868, + "step": 4004 + }, + { + "epoch": 4.23, + "learning_rate": 3.942917547568711e-05, + "loss": 0.2808, + "step": 4006 + }, + { + "epoch": 4.24, + "learning_rate": 3.942389006342495e-05, + "loss": 0.1501, + "step": 4008 + }, + { + "epoch": 4.24, + "learning_rate": 3.941860465116279e-05, + "loss": 0.241, + "step": 4010 + }, + { + "epoch": 4.24, + "learning_rate": 3.941331923890064e-05, + "loss": 0.1349, + "step": 4012 + }, + { + "epoch": 4.24, + "learning_rate": 3.940803382663848e-05, + "loss": 0.3111, + "step": 4014 + }, + { + "epoch": 4.25, + "learning_rate": 3.940274841437632e-05, + "loss": 0.2869, + "step": 4016 + }, + { + "epoch": 4.25, + "learning_rate": 3.939746300211416e-05, + "loss": 0.2255, + "step": 4018 + }, + { + "epoch": 4.25, + "learning_rate": 3.939217758985201e-05, + "loss": 0.2188, + "step": 4020 + }, + { + "epoch": 4.25, + "learning_rate": 3.9386892177589855e-05, + "loss": 0.1902, + "step": 4022 + }, + { + "epoch": 4.25, + "learning_rate": 3.93816067653277e-05, + "loss": 0.1701, + "step": 4024 + }, + { + "epoch": 4.26, + "learning_rate": 3.937632135306554e-05, + "loss": 0.1584, + "step": 4026 + }, + { + "epoch": 4.26, + "learning_rate": 3.9371035940803386e-05, + "loss": 0.2237, + "step": 4028 + }, + { + "epoch": 4.26, + "learning_rate": 3.9365750528541225e-05, + "loss": 0.398, + "step": 4030 + }, + { + "epoch": 4.26, + "learning_rate": 3.936046511627907e-05, + "loss": 0.1038, + "step": 4032 + }, + { + "epoch": 4.26, + "learning_rate": 3.935517970401692e-05, + "loss": 0.1975, + "step": 4034 + }, + { + "epoch": 4.27, + "learning_rate": 3.934989429175476e-05, + "loss": 0.2825, + "step": 4036 + }, + { + "epoch": 4.27, + "learning_rate": 3.93446088794926e-05, + "loss": 0.1508, + "step": 4038 + }, + { + "epoch": 4.27, + "learning_rate": 3.933932346723044e-05, + "loss": 0.1505, + "step": 4040 + }, + { + "epoch": 4.27, + "learning_rate": 3.9334038054968295e-05, + "loss": 0.2344, + "step": 4042 + }, + { + "epoch": 4.27, + "learning_rate": 3.9328752642706134e-05, + "loss": 0.1584, + "step": 4044 + }, + { + "epoch": 4.28, + "learning_rate": 3.932346723044398e-05, + "loss": 0.2375, + "step": 4046 + }, + { + "epoch": 4.28, + "learning_rate": 3.931818181818182e-05, + "loss": 0.3126, + "step": 4048 + }, + { + "epoch": 4.28, + "learning_rate": 3.9312896405919666e-05, + "loss": 0.2596, + "step": 4050 + }, + { + "epoch": 4.28, + "learning_rate": 3.9307610993657505e-05, + "loss": 0.1258, + "step": 4052 + }, + { + "epoch": 4.29, + "learning_rate": 3.930232558139535e-05, + "loss": 0.2411, + "step": 4054 + }, + { + "epoch": 4.29, + "learning_rate": 3.929704016913319e-05, + "loss": 0.2466, + "step": 4056 + }, + { + "epoch": 4.29, + "learning_rate": 3.9291754756871036e-05, + "loss": 0.125, + "step": 4058 + }, + { + "epoch": 4.29, + "learning_rate": 3.928646934460888e-05, + "loss": 0.1596, + "step": 4060 + }, + { + "epoch": 4.29, + "learning_rate": 3.928118393234673e-05, + "loss": 0.2621, + "step": 4062 + }, + { + "epoch": 4.3, + "learning_rate": 3.927589852008457e-05, + "loss": 0.208, + "step": 4064 + }, + { + "epoch": 4.3, + "learning_rate": 3.9270613107822414e-05, + "loss": 0.2743, + "step": 4066 + }, + { + "epoch": 4.3, + "learning_rate": 3.926532769556025e-05, + "loss": 0.3313, + "step": 4068 + }, + { + "epoch": 4.3, + "learning_rate": 3.92600422832981e-05, + "loss": 0.1685, + "step": 4070 + }, + { + "epoch": 4.3, + "learning_rate": 3.925475687103594e-05, + "loss": 0.2687, + "step": 4072 + }, + { + "epoch": 4.31, + "learning_rate": 3.9249471458773785e-05, + "loss": 0.3284, + "step": 4074 + }, + { + "epoch": 4.31, + "learning_rate": 3.924418604651163e-05, + "loss": 0.2718, + "step": 4076 + }, + { + "epoch": 4.31, + "learning_rate": 3.923890063424948e-05, + "loss": 0.2013, + "step": 4078 + }, + { + "epoch": 4.31, + "learning_rate": 3.9233615221987316e-05, + "loss": 0.2228, + "step": 4080 + }, + { + "epoch": 4.32, + "learning_rate": 3.922832980972516e-05, + "loss": 0.5322, + "step": 4082 + }, + { + "epoch": 4.32, + "learning_rate": 3.9223044397463e-05, + "loss": 0.36, + "step": 4084 + }, + { + "epoch": 4.32, + "learning_rate": 3.921775898520085e-05, + "loss": 0.1814, + "step": 4086 + }, + { + "epoch": 4.32, + "learning_rate": 3.9212473572938693e-05, + "loss": 0.2867, + "step": 4088 + }, + { + "epoch": 4.32, + "learning_rate": 3.920718816067653e-05, + "loss": 0.2947, + "step": 4090 + }, + { + "epoch": 4.33, + "learning_rate": 3.920190274841438e-05, + "loss": 0.3523, + "step": 4092 + }, + { + "epoch": 4.33, + "learning_rate": 3.919661733615222e-05, + "loss": 0.1549, + "step": 4094 + }, + { + "epoch": 4.33, + "learning_rate": 3.919133192389007e-05, + "loss": 0.3549, + "step": 4096 + }, + { + "epoch": 4.33, + "learning_rate": 3.918604651162791e-05, + "loss": 0.3409, + "step": 4098 + }, + { + "epoch": 4.33, + "learning_rate": 3.9180761099365756e-05, + "loss": 0.2373, + "step": 4100 + }, + { + "epoch": 4.34, + "learning_rate": 3.9175475687103596e-05, + "loss": 0.2428, + "step": 4102 + }, + { + "epoch": 4.34, + "learning_rate": 3.917019027484144e-05, + "loss": 0.2634, + "step": 4104 + }, + { + "epoch": 4.34, + "learning_rate": 3.916490486257928e-05, + "loss": 0.1302, + "step": 4106 + }, + { + "epoch": 4.34, + "learning_rate": 3.915961945031713e-05, + "loss": 0.26, + "step": 4108 + }, + { + "epoch": 4.34, + "learning_rate": 3.9154334038054966e-05, + "loss": 0.217, + "step": 4110 + }, + { + "epoch": 4.35, + "learning_rate": 3.914904862579281e-05, + "loss": 0.1351, + "step": 4112 + }, + { + "epoch": 4.35, + "learning_rate": 3.914376321353066e-05, + "loss": 0.2058, + "step": 4114 + }, + { + "epoch": 4.35, + "learning_rate": 3.9138477801268504e-05, + "loss": 0.1843, + "step": 4116 + }, + { + "epoch": 4.35, + "learning_rate": 3.9133192389006344e-05, + "loss": 0.4963, + "step": 4118 + }, + { + "epoch": 4.36, + "learning_rate": 3.912790697674419e-05, + "loss": 0.2972, + "step": 4120 + }, + { + "epoch": 4.36, + "learning_rate": 3.912262156448203e-05, + "loss": 0.2613, + "step": 4122 + }, + { + "epoch": 4.36, + "learning_rate": 3.9117336152219875e-05, + "loss": 0.1996, + "step": 4124 + }, + { + "epoch": 4.36, + "learning_rate": 3.9112050739957714e-05, + "loss": 0.3973, + "step": 4126 + }, + { + "epoch": 4.36, + "learning_rate": 3.910676532769556e-05, + "loss": 0.149, + "step": 4128 + }, + { + "epoch": 4.37, + "learning_rate": 3.9101479915433406e-05, + "loss": 0.1527, + "step": 4130 + }, + { + "epoch": 4.37, + "learning_rate": 3.909619450317125e-05, + "loss": 0.2652, + "step": 4132 + }, + { + "epoch": 4.37, + "learning_rate": 3.909090909090909e-05, + "loss": 0.2499, + "step": 4134 + }, + { + "epoch": 4.37, + "learning_rate": 3.908562367864694e-05, + "loss": 0.2321, + "step": 4136 + }, + { + "epoch": 4.37, + "learning_rate": 3.9080338266384784e-05, + "loss": 0.2166, + "step": 4138 + }, + { + "epoch": 4.38, + "learning_rate": 3.907505285412262e-05, + "loss": 0.2361, + "step": 4140 + }, + { + "epoch": 4.38, + "learning_rate": 3.906976744186047e-05, + "loss": 0.3066, + "step": 4142 + }, + { + "epoch": 4.38, + "learning_rate": 3.906448202959831e-05, + "loss": 0.1623, + "step": 4144 + }, + { + "epoch": 4.38, + "learning_rate": 3.9059196617336155e-05, + "loss": 0.1691, + "step": 4146 + }, + { + "epoch": 4.38, + "learning_rate": 3.9053911205073994e-05, + "loss": 0.2238, + "step": 4148 + }, + { + "epoch": 4.39, + "learning_rate": 3.904862579281185e-05, + "loss": 0.2006, + "step": 4150 + }, + { + "epoch": 4.39, + "learning_rate": 3.9043340380549686e-05, + "loss": 0.2972, + "step": 4152 + }, + { + "epoch": 4.39, + "learning_rate": 3.903805496828753e-05, + "loss": 0.3729, + "step": 4154 + }, + { + "epoch": 4.39, + "learning_rate": 3.903276955602537e-05, + "loss": 0.3234, + "step": 4156 + }, + { + "epoch": 4.4, + "learning_rate": 3.902748414376322e-05, + "loss": 0.1333, + "step": 4158 + }, + { + "epoch": 4.4, + "learning_rate": 3.902219873150106e-05, + "loss": 0.2893, + "step": 4160 + }, + { + "epoch": 4.4, + "learning_rate": 3.90169133192389e-05, + "loss": 0.1391, + "step": 4162 + }, + { + "epoch": 4.4, + "learning_rate": 3.901162790697674e-05, + "loss": 0.1938, + "step": 4164 + }, + { + "epoch": 4.4, + "learning_rate": 3.900634249471459e-05, + "loss": 0.1412, + "step": 4166 + }, + { + "epoch": 4.41, + "learning_rate": 3.9001057082452434e-05, + "loss": 0.3682, + "step": 4168 + }, + { + "epoch": 4.41, + "learning_rate": 3.899577167019028e-05, + "loss": 0.1981, + "step": 4170 + }, + { + "epoch": 4.41, + "learning_rate": 3.899048625792812e-05, + "loss": 0.1542, + "step": 4172 + }, + { + "epoch": 4.41, + "learning_rate": 3.8985200845665966e-05, + "loss": 0.1952, + "step": 4174 + }, + { + "epoch": 4.41, + "learning_rate": 3.8979915433403805e-05, + "loss": 0.234, + "step": 4176 + }, + { + "epoch": 4.42, + "learning_rate": 3.897463002114165e-05, + "loss": 0.159, + "step": 4178 + }, + { + "epoch": 4.42, + "learning_rate": 3.896934460887949e-05, + "loss": 0.2722, + "step": 4180 + }, + { + "epoch": 4.42, + "learning_rate": 3.8964059196617336e-05, + "loss": 0.2511, + "step": 4182 + }, + { + "epoch": 4.42, + "learning_rate": 3.895877378435518e-05, + "loss": 0.2582, + "step": 4184 + }, + { + "epoch": 4.42, + "learning_rate": 3.895348837209303e-05, + "loss": 0.2339, + "step": 4186 + }, + { + "epoch": 4.43, + "learning_rate": 3.894820295983087e-05, + "loss": 0.1998, + "step": 4188 + }, + { + "epoch": 4.43, + "learning_rate": 3.8942917547568714e-05, + "loss": 0.1863, + "step": 4190 + }, + { + "epoch": 4.43, + "learning_rate": 3.893763213530656e-05, + "loss": 0.3524, + "step": 4192 + }, + { + "epoch": 4.43, + "learning_rate": 3.89323467230444e-05, + "loss": 0.3165, + "step": 4194 + }, + { + "epoch": 4.44, + "learning_rate": 3.8927061310782245e-05, + "loss": 0.2742, + "step": 4196 + }, + { + "epoch": 4.44, + "learning_rate": 3.8921775898520084e-05, + "loss": 0.1488, + "step": 4198 + }, + { + "epoch": 4.44, + "learning_rate": 3.891649048625793e-05, + "loss": 0.3678, + "step": 4200 + }, + { + "epoch": 4.44, + "learning_rate": 3.891120507399577e-05, + "loss": 0.2203, + "step": 4202 + }, + { + "epoch": 4.44, + "learning_rate": 3.890591966173362e-05, + "loss": 0.2065, + "step": 4204 + }, + { + "epoch": 4.45, + "learning_rate": 3.890063424947146e-05, + "loss": 0.184, + "step": 4206 + }, + { + "epoch": 4.45, + "learning_rate": 3.889534883720931e-05, + "loss": 0.1449, + "step": 4208 + }, + { + "epoch": 4.45, + "learning_rate": 3.889006342494715e-05, + "loss": 0.2047, + "step": 4210 + }, + { + "epoch": 4.45, + "learning_rate": 3.888477801268499e-05, + "loss": 0.3436, + "step": 4212 + }, + { + "epoch": 4.45, + "learning_rate": 3.887949260042283e-05, + "loss": 0.2107, + "step": 4214 + }, + { + "epoch": 4.46, + "learning_rate": 3.887420718816068e-05, + "loss": 0.1966, + "step": 4216 + }, + { + "epoch": 4.46, + "learning_rate": 3.886892177589852e-05, + "loss": 0.2369, + "step": 4218 + }, + { + "epoch": 4.46, + "learning_rate": 3.8863636363636364e-05, + "loss": 0.2596, + "step": 4220 + }, + { + "epoch": 4.46, + "learning_rate": 3.885835095137421e-05, + "loss": 0.2708, + "step": 4222 + }, + { + "epoch": 4.47, + "learning_rate": 3.8853065539112056e-05, + "loss": 0.3761, + "step": 4224 + }, + { + "epoch": 4.47, + "learning_rate": 3.8847780126849895e-05, + "loss": 0.3693, + "step": 4226 + }, + { + "epoch": 4.47, + "learning_rate": 3.884249471458774e-05, + "loss": 0.3575, + "step": 4228 + }, + { + "epoch": 4.47, + "learning_rate": 3.883720930232558e-05, + "loss": 0.4008, + "step": 4230 + }, + { + "epoch": 4.47, + "learning_rate": 3.883192389006343e-05, + "loss": 0.1546, + "step": 4232 + }, + { + "epoch": 4.48, + "learning_rate": 3.8826638477801266e-05, + "loss": 0.2317, + "step": 4234 + }, + { + "epoch": 4.48, + "learning_rate": 3.882135306553911e-05, + "loss": 0.1259, + "step": 4236 + }, + { + "epoch": 4.48, + "learning_rate": 3.881606765327696e-05, + "loss": 0.2638, + "step": 4238 + }, + { + "epoch": 4.48, + "learning_rate": 3.8810782241014804e-05, + "loss": 0.3114, + "step": 4240 + }, + { + "epoch": 4.48, + "learning_rate": 3.8805496828752643e-05, + "loss": 0.4307, + "step": 4242 + }, + { + "epoch": 4.49, + "learning_rate": 3.880021141649049e-05, + "loss": 0.2683, + "step": 4244 + }, + { + "epoch": 4.49, + "learning_rate": 3.8794926004228336e-05, + "loss": 0.2929, + "step": 4246 + }, + { + "epoch": 4.49, + "learning_rate": 3.8789640591966175e-05, + "loss": 0.2354, + "step": 4248 + }, + { + "epoch": 4.49, + "learning_rate": 3.878435517970402e-05, + "loss": 0.1789, + "step": 4250 + }, + { + "epoch": 4.49, + "learning_rate": 3.877906976744186e-05, + "loss": 0.2649, + "step": 4252 + }, + { + "epoch": 4.5, + "learning_rate": 3.8773784355179706e-05, + "loss": 0.234, + "step": 4254 + }, + { + "epoch": 4.5, + "learning_rate": 3.8768498942917546e-05, + "loss": 0.2865, + "step": 4256 + }, + { + "epoch": 4.5, + "learning_rate": 3.87632135306554e-05, + "loss": 0.2417, + "step": 4258 + }, + { + "epoch": 4.5, + "learning_rate": 3.875792811839324e-05, + "loss": 0.1494, + "step": 4260 + }, + { + "epoch": 4.51, + "learning_rate": 3.8752642706131084e-05, + "loss": 0.3175, + "step": 4262 + }, + { + "epoch": 4.51, + "learning_rate": 3.874735729386892e-05, + "loss": 0.3808, + "step": 4264 + }, + { + "epoch": 4.51, + "learning_rate": 3.874207188160677e-05, + "loss": 0.2946, + "step": 4266 + }, + { + "epoch": 4.51, + "learning_rate": 3.873678646934461e-05, + "loss": 0.3407, + "step": 4268 + }, + { + "epoch": 4.51, + "learning_rate": 3.8731501057082454e-05, + "loss": 0.2248, + "step": 4270 + }, + { + "epoch": 4.52, + "learning_rate": 3.8726215644820294e-05, + "loss": 0.2135, + "step": 4272 + }, + { + "epoch": 4.52, + "learning_rate": 3.872093023255814e-05, + "loss": 0.2039, + "step": 4274 + }, + { + "epoch": 4.52, + "learning_rate": 3.8715644820295986e-05, + "loss": 0.2309, + "step": 4276 + }, + { + "epoch": 4.52, + "learning_rate": 3.871035940803383e-05, + "loss": 0.3951, + "step": 4278 + }, + { + "epoch": 4.52, + "learning_rate": 3.870507399577167e-05, + "loss": 0.296, + "step": 4280 + }, + { + "epoch": 4.53, + "learning_rate": 3.869978858350952e-05, + "loss": 0.2853, + "step": 4282 + }, + { + "epoch": 4.53, + "learning_rate": 3.8694503171247357e-05, + "loss": 0.1644, + "step": 4284 + }, + { + "epoch": 4.53, + "learning_rate": 3.86892177589852e-05, + "loss": 0.2342, + "step": 4286 + }, + { + "epoch": 4.53, + "learning_rate": 3.868393234672304e-05, + "loss": 0.1685, + "step": 4288 + }, + { + "epoch": 4.53, + "learning_rate": 3.867864693446089e-05, + "loss": 0.1452, + "step": 4290 + }, + { + "epoch": 4.54, + "learning_rate": 3.8673361522198734e-05, + "loss": 0.2645, + "step": 4292 + }, + { + "epoch": 4.54, + "learning_rate": 3.866807610993658e-05, + "loss": 0.2565, + "step": 4294 + }, + { + "epoch": 4.54, + "learning_rate": 3.866279069767442e-05, + "loss": 0.0879, + "step": 4296 + }, + { + "epoch": 4.54, + "learning_rate": 3.8657505285412265e-05, + "loss": 0.2067, + "step": 4298 + }, + { + "epoch": 4.55, + "learning_rate": 3.865221987315011e-05, + "loss": 0.163, + "step": 4300 + }, + { + "epoch": 4.55, + "learning_rate": 3.864693446088795e-05, + "loss": 0.2396, + "step": 4302 + }, + { + "epoch": 4.55, + "learning_rate": 3.86416490486258e-05, + "loss": 0.1791, + "step": 4304 + }, + { + "epoch": 4.55, + "learning_rate": 3.8636363636363636e-05, + "loss": 0.263, + "step": 4306 + }, + { + "epoch": 4.55, + "learning_rate": 3.863107822410148e-05, + "loss": 0.2548, + "step": 4308 + }, + { + "epoch": 4.56, + "learning_rate": 3.862579281183932e-05, + "loss": 0.3144, + "step": 4310 + }, + { + "epoch": 4.56, + "learning_rate": 3.8620507399577174e-05, + "loss": 0.1509, + "step": 4312 + }, + { + "epoch": 4.56, + "learning_rate": 3.8615221987315014e-05, + "loss": 0.254, + "step": 4314 + }, + { + "epoch": 4.56, + "learning_rate": 3.860993657505286e-05, + "loss": 0.2525, + "step": 4316 + }, + { + "epoch": 4.56, + "learning_rate": 3.86046511627907e-05, + "loss": 0.3747, + "step": 4318 + }, + { + "epoch": 4.57, + "learning_rate": 3.8599365750528545e-05, + "loss": 0.3409, + "step": 4320 + }, + { + "epoch": 4.57, + "learning_rate": 3.8594080338266384e-05, + "loss": 0.1191, + "step": 4322 + }, + { + "epoch": 4.57, + "learning_rate": 3.858879492600423e-05, + "loss": 0.2891, + "step": 4324 + }, + { + "epoch": 4.57, + "learning_rate": 3.858350951374207e-05, + "loss": 0.1214, + "step": 4326 + }, + { + "epoch": 4.58, + "learning_rate": 3.8578224101479916e-05, + "loss": 0.2835, + "step": 4328 + }, + { + "epoch": 4.58, + "learning_rate": 3.857293868921776e-05, + "loss": 0.2172, + "step": 4330 + }, + { + "epoch": 4.58, + "learning_rate": 3.856765327695561e-05, + "loss": 0.1076, + "step": 4332 + }, + { + "epoch": 4.58, + "learning_rate": 3.856236786469345e-05, + "loss": 0.2779, + "step": 4334 + }, + { + "epoch": 4.58, + "learning_rate": 3.855708245243129e-05, + "loss": 0.2579, + "step": 4336 + }, + { + "epoch": 4.59, + "learning_rate": 3.855179704016913e-05, + "loss": 0.2079, + "step": 4338 + }, + { + "epoch": 4.59, + "learning_rate": 3.854651162790698e-05, + "loss": 0.4571, + "step": 4340 + }, + { + "epoch": 4.59, + "learning_rate": 3.8541226215644824e-05, + "loss": 0.304, + "step": 4342 + }, + { + "epoch": 4.59, + "learning_rate": 3.8535940803382664e-05, + "loss": 0.2778, + "step": 4344 + }, + { + "epoch": 4.59, + "learning_rate": 3.853065539112051e-05, + "loss": 0.1885, + "step": 4346 + }, + { + "epoch": 4.6, + "learning_rate": 3.8525369978858356e-05, + "loss": 0.2225, + "step": 4348 + }, + { + "epoch": 4.6, + "learning_rate": 3.8520084566596195e-05, + "loss": 0.2183, + "step": 4350 + }, + { + "epoch": 4.6, + "learning_rate": 3.851479915433404e-05, + "loss": 0.2757, + "step": 4352 + }, + { + "epoch": 4.6, + "learning_rate": 3.850951374207189e-05, + "loss": 0.4415, + "step": 4354 + }, + { + "epoch": 4.6, + "learning_rate": 3.8504228329809727e-05, + "loss": 0.2638, + "step": 4356 + }, + { + "epoch": 4.61, + "learning_rate": 3.849894291754757e-05, + "loss": 0.2034, + "step": 4358 + }, + { + "epoch": 4.61, + "learning_rate": 3.849365750528541e-05, + "loss": 0.261, + "step": 4360 + }, + { + "epoch": 4.61, + "learning_rate": 3.848837209302326e-05, + "loss": 0.2194, + "step": 4362 + }, + { + "epoch": 4.61, + "learning_rate": 3.84830866807611e-05, + "loss": 0.2475, + "step": 4364 + }, + { + "epoch": 4.62, + "learning_rate": 3.847780126849895e-05, + "loss": 0.289, + "step": 4366 + }, + { + "epoch": 4.62, + "learning_rate": 3.847251585623679e-05, + "loss": 0.1766, + "step": 4368 + }, + { + "epoch": 4.62, + "learning_rate": 3.8467230443974635e-05, + "loss": 0.2849, + "step": 4370 + }, + { + "epoch": 4.62, + "learning_rate": 3.8461945031712475e-05, + "loss": 0.2087, + "step": 4372 + }, + { + "epoch": 4.62, + "learning_rate": 3.845665961945032e-05, + "loss": 0.1763, + "step": 4374 + }, + { + "epoch": 4.63, + "learning_rate": 3.845137420718816e-05, + "loss": 0.2629, + "step": 4376 + }, + { + "epoch": 4.63, + "learning_rate": 3.8446088794926006e-05, + "loss": 0.2354, + "step": 4378 + }, + { + "epoch": 4.63, + "learning_rate": 3.8440803382663845e-05, + "loss": 0.2312, + "step": 4380 + }, + { + "epoch": 4.63, + "learning_rate": 3.843551797040169e-05, + "loss": 0.2316, + "step": 4382 + }, + { + "epoch": 4.63, + "learning_rate": 3.843023255813954e-05, + "loss": 0.3674, + "step": 4384 + }, + { + "epoch": 4.64, + "learning_rate": 3.8424947145877384e-05, + "loss": 0.1878, + "step": 4386 + }, + { + "epoch": 4.64, + "learning_rate": 3.841966173361522e-05, + "loss": 0.1896, + "step": 4388 + }, + { + "epoch": 4.64, + "learning_rate": 3.841437632135307e-05, + "loss": 0.3298, + "step": 4390 + }, + { + "epoch": 4.64, + "learning_rate": 3.840909090909091e-05, + "loss": 0.1981, + "step": 4392 + }, + { + "epoch": 4.64, + "learning_rate": 3.8403805496828754e-05, + "loss": 0.2604, + "step": 4394 + }, + { + "epoch": 4.65, + "learning_rate": 3.83985200845666e-05, + "loss": 0.1946, + "step": 4396 + }, + { + "epoch": 4.65, + "learning_rate": 3.839323467230444e-05, + "loss": 0.2386, + "step": 4398 + }, + { + "epoch": 4.65, + "learning_rate": 3.8387949260042286e-05, + "loss": 0.1483, + "step": 4400 + }, + { + "epoch": 4.65, + "learning_rate": 3.8382663847780125e-05, + "loss": 0.2855, + "step": 4402 + }, + { + "epoch": 4.66, + "learning_rate": 3.837737843551798e-05, + "loss": 0.186, + "step": 4404 + }, + { + "epoch": 4.66, + "learning_rate": 3.837209302325582e-05, + "loss": 0.215, + "step": 4406 + }, + { + "epoch": 4.66, + "learning_rate": 3.836680761099366e-05, + "loss": 0.1128, + "step": 4408 + }, + { + "epoch": 4.66, + "learning_rate": 3.83615221987315e-05, + "loss": 0.1597, + "step": 4410 + }, + { + "epoch": 4.66, + "learning_rate": 3.835623678646935e-05, + "loss": 0.2013, + "step": 4412 + }, + { + "epoch": 4.67, + "learning_rate": 3.835095137420719e-05, + "loss": 0.2183, + "step": 4414 + }, + { + "epoch": 4.67, + "learning_rate": 3.8345665961945034e-05, + "loss": 0.3207, + "step": 4416 + }, + { + "epoch": 4.67, + "learning_rate": 3.834038054968287e-05, + "loss": 0.2389, + "step": 4418 + }, + { + "epoch": 4.67, + "learning_rate": 3.833509513742072e-05, + "loss": 0.3231, + "step": 4420 + }, + { + "epoch": 4.67, + "learning_rate": 3.8329809725158565e-05, + "loss": 0.2078, + "step": 4422 + }, + { + "epoch": 4.68, + "learning_rate": 3.832452431289641e-05, + "loss": 0.3902, + "step": 4424 + }, + { + "epoch": 4.68, + "learning_rate": 3.831923890063425e-05, + "loss": 0.1898, + "step": 4426 + }, + { + "epoch": 4.68, + "learning_rate": 3.8313953488372097e-05, + "loss": 0.2032, + "step": 4428 + }, + { + "epoch": 4.68, + "learning_rate": 3.8308668076109936e-05, + "loss": 0.065, + "step": 4430 + }, + { + "epoch": 4.68, + "learning_rate": 3.830338266384778e-05, + "loss": 0.197, + "step": 4432 + }, + { + "epoch": 4.69, + "learning_rate": 3.829809725158562e-05, + "loss": 0.244, + "step": 4434 + }, + { + "epoch": 4.69, + "learning_rate": 3.829281183932347e-05, + "loss": 0.3045, + "step": 4436 + }, + { + "epoch": 4.69, + "learning_rate": 3.8287526427061307e-05, + "loss": 0.2582, + "step": 4438 + }, + { + "epoch": 4.69, + "learning_rate": 3.828224101479916e-05, + "loss": 0.2491, + "step": 4440 + }, + { + "epoch": 4.7, + "learning_rate": 3.8276955602537e-05, + "loss": 0.2893, + "step": 4442 + }, + { + "epoch": 4.7, + "learning_rate": 3.8271670190274845e-05, + "loss": 0.2808, + "step": 4444 + }, + { + "epoch": 4.7, + "learning_rate": 3.8266384778012684e-05, + "loss": 0.2402, + "step": 4446 + }, + { + "epoch": 4.7, + "learning_rate": 3.826109936575053e-05, + "loss": 0.328, + "step": 4448 + }, + { + "epoch": 4.7, + "learning_rate": 3.8255813953488376e-05, + "loss": 0.1825, + "step": 4450 + }, + { + "epoch": 4.71, + "learning_rate": 3.8250528541226215e-05, + "loss": 0.2451, + "step": 4452 + }, + { + "epoch": 4.71, + "learning_rate": 3.824524312896406e-05, + "loss": 0.2322, + "step": 4454 + }, + { + "epoch": 4.71, + "learning_rate": 3.82399577167019e-05, + "loss": 0.239, + "step": 4456 + }, + { + "epoch": 4.71, + "learning_rate": 3.8234672304439754e-05, + "loss": 0.4527, + "step": 4458 + }, + { + "epoch": 4.71, + "learning_rate": 3.822938689217759e-05, + "loss": 0.3426, + "step": 4460 + }, + { + "epoch": 4.72, + "learning_rate": 3.822410147991544e-05, + "loss": 0.1954, + "step": 4462 + }, + { + "epoch": 4.72, + "learning_rate": 3.821881606765328e-05, + "loss": 0.2278, + "step": 4464 + }, + { + "epoch": 4.72, + "learning_rate": 3.8213530655391124e-05, + "loss": 0.1666, + "step": 4466 + }, + { + "epoch": 4.72, + "learning_rate": 3.8208245243128964e-05, + "loss": 0.137, + "step": 4468 + }, + { + "epoch": 4.73, + "learning_rate": 3.820295983086681e-05, + "loss": 0.292, + "step": 4470 + }, + { + "epoch": 4.73, + "learning_rate": 3.819767441860465e-05, + "loss": 0.2981, + "step": 4472 + }, + { + "epoch": 4.73, + "learning_rate": 3.8192389006342495e-05, + "loss": 0.3711, + "step": 4474 + }, + { + "epoch": 4.73, + "learning_rate": 3.818710359408034e-05, + "loss": 0.2092, + "step": 4476 + }, + { + "epoch": 4.73, + "learning_rate": 3.818181818181819e-05, + "loss": 0.2499, + "step": 4478 + }, + { + "epoch": 4.74, + "learning_rate": 3.8176532769556026e-05, + "loss": 0.2701, + "step": 4480 + }, + { + "epoch": 4.74, + "learning_rate": 3.817124735729387e-05, + "loss": 0.1771, + "step": 4482 + }, + { + "epoch": 4.74, + "learning_rate": 3.816596194503171e-05, + "loss": 0.2258, + "step": 4484 + }, + { + "epoch": 4.74, + "learning_rate": 3.816067653276956e-05, + "loss": 0.4113, + "step": 4486 + }, + { + "epoch": 4.74, + "learning_rate": 3.81553911205074e-05, + "loss": 0.1089, + "step": 4488 + }, + { + "epoch": 4.75, + "learning_rate": 3.815010570824524e-05, + "loss": 0.1204, + "step": 4490 + }, + { + "epoch": 4.75, + "learning_rate": 3.814482029598309e-05, + "loss": 0.1931, + "step": 4492 + }, + { + "epoch": 4.75, + "learning_rate": 3.8139534883720935e-05, + "loss": 0.1632, + "step": 4494 + }, + { + "epoch": 4.75, + "learning_rate": 3.8134249471458775e-05, + "loss": 0.2525, + "step": 4496 + }, + { + "epoch": 4.75, + "learning_rate": 3.812896405919662e-05, + "loss": 0.1257, + "step": 4498 + }, + { + "epoch": 4.76, + "learning_rate": 3.812367864693446e-05, + "loss": 0.3264, + "step": 4500 + }, + { + "epoch": 4.76, + "eval_cer": 0.030265032772869762, + "eval_loss": 0.3518345057964325, + "eval_runtime": 127.9997, + "eval_samples_per_second": 6.57, + "eval_steps_per_second": 0.828, + "step": 4500 + }, + { + "epoch": 4.76, + "learning_rate": 3.8118393234672306e-05, + "loss": 0.1576, + "step": 4502 + }, + { + "epoch": 4.76, + "learning_rate": 3.811310782241015e-05, + "loss": 0.1078, + "step": 4504 + }, + { + "epoch": 4.76, + "learning_rate": 3.810782241014799e-05, + "loss": 0.2641, + "step": 4506 + }, + { + "epoch": 4.77, + "learning_rate": 3.810253699788584e-05, + "loss": 0.2368, + "step": 4508 + }, + { + "epoch": 4.77, + "learning_rate": 3.809725158562368e-05, + "loss": 0.218, + "step": 4510 + }, + { + "epoch": 4.77, + "learning_rate": 3.809196617336153e-05, + "loss": 0.3123, + "step": 4512 + }, + { + "epoch": 4.77, + "learning_rate": 3.808668076109937e-05, + "loss": 0.1478, + "step": 4514 + }, + { + "epoch": 4.77, + "learning_rate": 3.8081395348837215e-05, + "loss": 0.2465, + "step": 4516 + }, + { + "epoch": 4.78, + "learning_rate": 3.8076109936575054e-05, + "loss": 0.1555, + "step": 4518 + }, + { + "epoch": 4.78, + "learning_rate": 3.80708245243129e-05, + "loss": 0.139, + "step": 4520 + }, + { + "epoch": 4.78, + "learning_rate": 3.806553911205074e-05, + "loss": 0.2661, + "step": 4522 + }, + { + "epoch": 4.78, + "learning_rate": 3.8060253699788585e-05, + "loss": 0.1964, + "step": 4524 + }, + { + "epoch": 4.78, + "learning_rate": 3.8054968287526425e-05, + "loss": 0.1876, + "step": 4526 + }, + { + "epoch": 4.79, + "learning_rate": 3.804968287526427e-05, + "loss": 0.2145, + "step": 4528 + }, + { + "epoch": 4.79, + "learning_rate": 3.804439746300212e-05, + "loss": 0.2774, + "step": 4530 + }, + { + "epoch": 4.79, + "learning_rate": 3.803911205073996e-05, + "loss": 0.1954, + "step": 4532 + }, + { + "epoch": 4.79, + "learning_rate": 3.80338266384778e-05, + "loss": 0.2758, + "step": 4534 + }, + { + "epoch": 4.79, + "learning_rate": 3.802854122621565e-05, + "loss": 0.2895, + "step": 4536 + }, + { + "epoch": 4.8, + "learning_rate": 3.802325581395349e-05, + "loss": 0.2015, + "step": 4538 + }, + { + "epoch": 4.8, + "learning_rate": 3.8017970401691334e-05, + "loss": 0.1902, + "step": 4540 + }, + { + "epoch": 4.8, + "learning_rate": 3.801268498942917e-05, + "loss": 0.2945, + "step": 4542 + }, + { + "epoch": 4.8, + "learning_rate": 3.800739957716702e-05, + "loss": 0.3151, + "step": 4544 + }, + { + "epoch": 4.81, + "learning_rate": 3.8002114164904865e-05, + "loss": 0.2132, + "step": 4546 + }, + { + "epoch": 4.81, + "learning_rate": 3.799682875264271e-05, + "loss": 0.1585, + "step": 4548 + }, + { + "epoch": 4.81, + "learning_rate": 3.799154334038055e-05, + "loss": 0.3348, + "step": 4550 + }, + { + "epoch": 4.81, + "learning_rate": 3.7986257928118396e-05, + "loss": 0.1911, + "step": 4552 + }, + { + "epoch": 4.81, + "learning_rate": 3.7980972515856236e-05, + "loss": 0.2412, + "step": 4554 + }, + { + "epoch": 4.82, + "learning_rate": 3.797568710359408e-05, + "loss": 0.2531, + "step": 4556 + }, + { + "epoch": 4.82, + "learning_rate": 3.797040169133193e-05, + "loss": 0.2128, + "step": 4558 + }, + { + "epoch": 4.82, + "learning_rate": 3.796511627906977e-05, + "loss": 0.288, + "step": 4560 + }, + { + "epoch": 4.82, + "learning_rate": 3.795983086680761e-05, + "loss": 0.2211, + "step": 4562 + }, + { + "epoch": 4.82, + "learning_rate": 3.795454545454545e-05, + "loss": 0.1853, + "step": 4564 + }, + { + "epoch": 4.83, + "learning_rate": 3.7949260042283305e-05, + "loss": 0.2396, + "step": 4566 + }, + { + "epoch": 4.83, + "learning_rate": 3.7943974630021145e-05, + "loss": 0.163, + "step": 4568 + }, + { + "epoch": 4.83, + "learning_rate": 3.793868921775899e-05, + "loss": 0.2536, + "step": 4570 + }, + { + "epoch": 4.83, + "learning_rate": 3.793340380549683e-05, + "loss": 0.2265, + "step": 4572 + }, + { + "epoch": 4.84, + "learning_rate": 3.7928118393234676e-05, + "loss": 0.1727, + "step": 4574 + }, + { + "epoch": 4.84, + "learning_rate": 3.7922832980972515e-05, + "loss": 0.1687, + "step": 4576 + }, + { + "epoch": 4.84, + "learning_rate": 3.791754756871036e-05, + "loss": 0.3511, + "step": 4578 + }, + { + "epoch": 4.84, + "learning_rate": 3.79122621564482e-05, + "loss": 0.1654, + "step": 4580 + }, + { + "epoch": 4.84, + "learning_rate": 3.790697674418605e-05, + "loss": 0.2532, + "step": 4582 + }, + { + "epoch": 4.85, + "learning_rate": 3.790169133192389e-05, + "loss": 0.2507, + "step": 4584 + }, + { + "epoch": 4.85, + "learning_rate": 3.789640591966174e-05, + "loss": 0.1839, + "step": 4586 + }, + { + "epoch": 4.85, + "learning_rate": 3.789112050739958e-05, + "loss": 0.2172, + "step": 4588 + }, + { + "epoch": 4.85, + "learning_rate": 3.7885835095137424e-05, + "loss": 0.1444, + "step": 4590 + }, + { + "epoch": 4.85, + "learning_rate": 3.7880549682875263e-05, + "loss": 0.1246, + "step": 4592 + }, + { + "epoch": 4.86, + "learning_rate": 3.787526427061311e-05, + "loss": 0.1763, + "step": 4594 + }, + { + "epoch": 4.86, + "learning_rate": 3.786997885835095e-05, + "loss": 0.2812, + "step": 4596 + }, + { + "epoch": 4.86, + "learning_rate": 3.7864693446088795e-05, + "loss": 0.2248, + "step": 4598 + }, + { + "epoch": 4.86, + "learning_rate": 3.785940803382664e-05, + "loss": 0.137, + "step": 4600 + }, + { + "epoch": 4.86, + "learning_rate": 3.785412262156449e-05, + "loss": 0.3015, + "step": 4602 + }, + { + "epoch": 4.87, + "learning_rate": 3.7848837209302326e-05, + "loss": 0.1787, + "step": 4604 + }, + { + "epoch": 4.87, + "learning_rate": 3.784355179704017e-05, + "loss": 0.2139, + "step": 4606 + }, + { + "epoch": 4.87, + "learning_rate": 3.783826638477802e-05, + "loss": 0.1668, + "step": 4608 + }, + { + "epoch": 4.87, + "learning_rate": 3.783298097251586e-05, + "loss": 0.215, + "step": 4610 + }, + { + "epoch": 4.88, + "learning_rate": 3.7827695560253704e-05, + "loss": 0.1996, + "step": 4612 + }, + { + "epoch": 4.88, + "learning_rate": 3.782241014799154e-05, + "loss": 0.208, + "step": 4614 + }, + { + "epoch": 4.88, + "learning_rate": 3.781712473572939e-05, + "loss": 0.3617, + "step": 4616 + }, + { + "epoch": 4.88, + "learning_rate": 3.781183932346723e-05, + "loss": 0.2004, + "step": 4618 + }, + { + "epoch": 4.88, + "learning_rate": 3.780655391120508e-05, + "loss": 0.2921, + "step": 4620 + }, + { + "epoch": 4.89, + "learning_rate": 3.780126849894292e-05, + "loss": 0.1529, + "step": 4622 + }, + { + "epoch": 4.89, + "learning_rate": 3.7795983086680766e-05, + "loss": 0.1936, + "step": 4624 + }, + { + "epoch": 4.89, + "learning_rate": 3.7790697674418606e-05, + "loss": 0.1113, + "step": 4626 + }, + { + "epoch": 4.89, + "learning_rate": 3.778541226215645e-05, + "loss": 0.3297, + "step": 4628 + }, + { + "epoch": 4.89, + "learning_rate": 3.778012684989429e-05, + "loss": 0.2202, + "step": 4630 + }, + { + "epoch": 4.9, + "learning_rate": 3.777484143763214e-05, + "loss": 0.1229, + "step": 4632 + }, + { + "epoch": 4.9, + "learning_rate": 3.7769556025369976e-05, + "loss": 0.1535, + "step": 4634 + }, + { + "epoch": 4.9, + "learning_rate": 3.776427061310782e-05, + "loss": 0.2717, + "step": 4636 + }, + { + "epoch": 4.9, + "learning_rate": 3.775898520084567e-05, + "loss": 0.2224, + "step": 4638 + }, + { + "epoch": 4.9, + "learning_rate": 3.7753699788583515e-05, + "loss": 0.1491, + "step": 4640 + }, + { + "epoch": 4.91, + "learning_rate": 3.7748414376321354e-05, + "loss": 0.3249, + "step": 4642 + }, + { + "epoch": 4.91, + "learning_rate": 3.77431289640592e-05, + "loss": 0.1969, + "step": 4644 + }, + { + "epoch": 4.91, + "learning_rate": 3.773784355179704e-05, + "loss": 0.1595, + "step": 4646 + }, + { + "epoch": 4.91, + "learning_rate": 3.7732558139534885e-05, + "loss": 0.2788, + "step": 4648 + }, + { + "epoch": 4.92, + "learning_rate": 3.7727272727272725e-05, + "loss": 0.1207, + "step": 4650 + }, + { + "epoch": 4.92, + "learning_rate": 3.772198731501057e-05, + "loss": 0.1545, + "step": 4652 + }, + { + "epoch": 4.92, + "learning_rate": 3.771670190274842e-05, + "loss": 0.3822, + "step": 4654 + }, + { + "epoch": 4.92, + "learning_rate": 3.771141649048626e-05, + "loss": 0.2045, + "step": 4656 + }, + { + "epoch": 4.92, + "learning_rate": 3.77061310782241e-05, + "loss": 0.1368, + "step": 4658 + }, + { + "epoch": 4.93, + "learning_rate": 3.770084566596195e-05, + "loss": 0.1766, + "step": 4660 + }, + { + "epoch": 4.93, + "learning_rate": 3.7695560253699794e-05, + "loss": 0.6979, + "step": 4662 + }, + { + "epoch": 4.93, + "learning_rate": 3.7690274841437633e-05, + "loss": 0.1898, + "step": 4664 + }, + { + "epoch": 4.93, + "learning_rate": 3.768498942917548e-05, + "loss": 0.205, + "step": 4666 + }, + { + "epoch": 4.93, + "learning_rate": 3.767970401691332e-05, + "loss": 0.2457, + "step": 4668 + }, + { + "epoch": 4.94, + "learning_rate": 3.7674418604651165e-05, + "loss": 0.2407, + "step": 4670 + }, + { + "epoch": 4.94, + "learning_rate": 3.7669133192389004e-05, + "loss": 0.219, + "step": 4672 + }, + { + "epoch": 4.94, + "learning_rate": 3.766384778012686e-05, + "loss": 0.1399, + "step": 4674 + }, + { + "epoch": 4.94, + "learning_rate": 3.7658562367864696e-05, + "loss": 0.291, + "step": 4676 + }, + { + "epoch": 4.95, + "learning_rate": 3.765327695560254e-05, + "loss": 0.1945, + "step": 4678 + }, + { + "epoch": 4.95, + "learning_rate": 3.764799154334038e-05, + "loss": 0.2359, + "step": 4680 + }, + { + "epoch": 4.95, + "learning_rate": 3.764270613107823e-05, + "loss": 0.2426, + "step": 4682 + }, + { + "epoch": 4.95, + "learning_rate": 3.763742071881607e-05, + "loss": 0.2948, + "step": 4684 + }, + { + "epoch": 4.95, + "learning_rate": 3.763213530655391e-05, + "loss": 0.2388, + "step": 4686 + }, + { + "epoch": 4.96, + "learning_rate": 3.762684989429175e-05, + "loss": 0.0963, + "step": 4688 + }, + { + "epoch": 4.96, + "learning_rate": 3.76215644820296e-05, + "loss": 0.25, + "step": 4690 + }, + { + "epoch": 4.96, + "learning_rate": 3.7616279069767444e-05, + "loss": 0.1789, + "step": 4692 + }, + { + "epoch": 4.96, + "learning_rate": 3.761099365750529e-05, + "loss": 0.2834, + "step": 4694 + }, + { + "epoch": 4.96, + "learning_rate": 3.760570824524313e-05, + "loss": 0.1475, + "step": 4696 + }, + { + "epoch": 4.97, + "learning_rate": 3.7600422832980976e-05, + "loss": 0.1781, + "step": 4698 + }, + { + "epoch": 4.97, + "learning_rate": 3.7595137420718815e-05, + "loss": 0.2353, + "step": 4700 + }, + { + "epoch": 4.97, + "learning_rate": 3.758985200845666e-05, + "loss": 0.4259, + "step": 4702 + }, + { + "epoch": 4.97, + "learning_rate": 3.75845665961945e-05, + "loss": 0.2206, + "step": 4704 + }, + { + "epoch": 4.97, + "learning_rate": 3.7579281183932346e-05, + "loss": 0.2497, + "step": 4706 + }, + { + "epoch": 4.98, + "learning_rate": 3.757399577167019e-05, + "loss": 0.2344, + "step": 4708 + }, + { + "epoch": 4.98, + "learning_rate": 3.756871035940804e-05, + "loss": 0.3204, + "step": 4710 + }, + { + "epoch": 4.98, + "learning_rate": 3.756342494714588e-05, + "loss": 0.3352, + "step": 4712 + }, + { + "epoch": 4.98, + "learning_rate": 3.7558139534883724e-05, + "loss": 0.2001, + "step": 4714 + }, + { + "epoch": 4.99, + "learning_rate": 3.755285412262157e-05, + "loss": 0.1399, + "step": 4716 + }, + { + "epoch": 4.99, + "learning_rate": 3.754756871035941e-05, + "loss": 0.1997, + "step": 4718 + }, + { + "epoch": 4.99, + "learning_rate": 3.7542283298097255e-05, + "loss": 0.173, + "step": 4720 + }, + { + "epoch": 4.99, + "learning_rate": 3.7536997885835095e-05, + "loss": 0.3078, + "step": 4722 + }, + { + "epoch": 4.99, + "learning_rate": 3.753171247357294e-05, + "loss": 0.1478, + "step": 4724 + }, + { + "epoch": 5.0, + "learning_rate": 3.752642706131078e-05, + "loss": 0.218, + "step": 4726 + }, + { + "epoch": 5.0, + "learning_rate": 3.752114164904863e-05, + "loss": 0.2081, + "step": 4728 + }, + { + "epoch": 5.0, + "learning_rate": 3.751585623678647e-05, + "loss": 0.1173, + "step": 4730 + }, + { + "epoch": 5.0, + "learning_rate": 3.751057082452432e-05, + "loss": 0.2947, + "step": 4732 + }, + { + "epoch": 5.0, + "learning_rate": 3.750528541226216e-05, + "loss": 0.1826, + "step": 4734 + }, + { + "epoch": 5.01, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.2061, + "step": 4736 + }, + { + "epoch": 5.01, + "learning_rate": 3.749471458773784e-05, + "loss": 0.2811, + "step": 4738 + }, + { + "epoch": 5.01, + "learning_rate": 3.748942917547569e-05, + "loss": 0.1638, + "step": 4740 + }, + { + "epoch": 5.01, + "learning_rate": 3.748414376321353e-05, + "loss": 0.3567, + "step": 4742 + }, + { + "epoch": 5.01, + "learning_rate": 3.7478858350951374e-05, + "loss": 0.2977, + "step": 4744 + }, + { + "epoch": 5.02, + "learning_rate": 3.747357293868922e-05, + "loss": 0.1962, + "step": 4746 + }, + { + "epoch": 5.02, + "learning_rate": 3.7468287526427066e-05, + "loss": 0.1857, + "step": 4748 + }, + { + "epoch": 5.02, + "learning_rate": 3.7463002114164906e-05, + "loss": 0.2079, + "step": 4750 + }, + { + "epoch": 5.02, + "learning_rate": 3.745771670190275e-05, + "loss": 0.1679, + "step": 4752 + }, + { + "epoch": 5.03, + "learning_rate": 3.745243128964059e-05, + "loss": 0.299, + "step": 4754 + }, + { + "epoch": 5.03, + "learning_rate": 3.744714587737844e-05, + "loss": 0.1429, + "step": 4756 + }, + { + "epoch": 5.03, + "learning_rate": 3.7441860465116276e-05, + "loss": 0.1921, + "step": 4758 + }, + { + "epoch": 5.03, + "learning_rate": 3.743657505285412e-05, + "loss": 0.2171, + "step": 4760 + }, + { + "epoch": 5.03, + "learning_rate": 3.743128964059197e-05, + "loss": 0.1292, + "step": 4762 + }, + { + "epoch": 5.04, + "learning_rate": 3.7426004228329814e-05, + "loss": 0.2783, + "step": 4764 + }, + { + "epoch": 5.04, + "learning_rate": 3.7420718816067654e-05, + "loss": 0.2958, + "step": 4766 + }, + { + "epoch": 5.04, + "learning_rate": 3.74154334038055e-05, + "loss": 0.146, + "step": 4768 + }, + { + "epoch": 5.04, + "learning_rate": 3.7410147991543346e-05, + "loss": 0.1545, + "step": 4770 + }, + { + "epoch": 5.04, + "learning_rate": 3.7404862579281185e-05, + "loss": 0.2194, + "step": 4772 + }, + { + "epoch": 5.05, + "learning_rate": 3.739957716701903e-05, + "loss": 0.1789, + "step": 4774 + }, + { + "epoch": 5.05, + "learning_rate": 3.739429175475687e-05, + "loss": 0.2369, + "step": 4776 + }, + { + "epoch": 5.05, + "learning_rate": 3.7389006342494717e-05, + "loss": 0.1711, + "step": 4778 + }, + { + "epoch": 5.05, + "learning_rate": 3.7383720930232556e-05, + "loss": 0.2223, + "step": 4780 + }, + { + "epoch": 5.05, + "learning_rate": 3.737843551797041e-05, + "loss": 0.2188, + "step": 4782 + }, + { + "epoch": 5.06, + "learning_rate": 3.737315010570825e-05, + "loss": 0.2343, + "step": 4784 + }, + { + "epoch": 5.06, + "learning_rate": 3.7367864693446094e-05, + "loss": 0.109, + "step": 4786 + }, + { + "epoch": 5.06, + "learning_rate": 3.736257928118393e-05, + "loss": 0.2162, + "step": 4788 + }, + { + "epoch": 5.06, + "learning_rate": 3.735729386892178e-05, + "loss": 0.3718, + "step": 4790 + }, + { + "epoch": 5.07, + "learning_rate": 3.735200845665962e-05, + "loss": 0.2146, + "step": 4792 + }, + { + "epoch": 5.07, + "learning_rate": 3.7346723044397465e-05, + "loss": 0.1319, + "step": 4794 + }, + { + "epoch": 5.07, + "learning_rate": 3.7341437632135304e-05, + "loss": 0.2113, + "step": 4796 + }, + { + "epoch": 5.07, + "learning_rate": 3.733615221987315e-05, + "loss": 0.1171, + "step": 4798 + }, + { + "epoch": 5.07, + "learning_rate": 3.7330866807610996e-05, + "loss": 0.2382, + "step": 4800 + }, + { + "epoch": 5.08, + "learning_rate": 3.732558139534884e-05, + "loss": 0.1432, + "step": 4802 + }, + { + "epoch": 5.08, + "learning_rate": 3.732029598308668e-05, + "loss": 0.2301, + "step": 4804 + }, + { + "epoch": 5.08, + "learning_rate": 3.731501057082453e-05, + "loss": 0.1705, + "step": 4806 + }, + { + "epoch": 5.08, + "learning_rate": 3.730972515856237e-05, + "loss": 0.0764, + "step": 4808 + }, + { + "epoch": 5.08, + "learning_rate": 3.730443974630021e-05, + "loss": 0.123, + "step": 4810 + }, + { + "epoch": 5.09, + "learning_rate": 3.729915433403806e-05, + "loss": 0.156, + "step": 4812 + }, + { + "epoch": 5.09, + "learning_rate": 3.72938689217759e-05, + "loss": 0.2732, + "step": 4814 + }, + { + "epoch": 5.09, + "learning_rate": 3.7288583509513744e-05, + "loss": 0.2014, + "step": 4816 + }, + { + "epoch": 5.09, + "learning_rate": 3.728329809725159e-05, + "loss": 0.2538, + "step": 4818 + }, + { + "epoch": 5.1, + "learning_rate": 3.727801268498943e-05, + "loss": 0.2327, + "step": 4820 + }, + { + "epoch": 5.1, + "learning_rate": 3.7272727272727276e-05, + "loss": 0.2191, + "step": 4822 + }, + { + "epoch": 5.1, + "learning_rate": 3.726744186046512e-05, + "loss": 0.2461, + "step": 4824 + }, + { + "epoch": 5.1, + "learning_rate": 3.726215644820296e-05, + "loss": 0.1499, + "step": 4826 + }, + { + "epoch": 5.1, + "learning_rate": 3.725687103594081e-05, + "loss": 0.1612, + "step": 4828 + }, + { + "epoch": 5.11, + "learning_rate": 3.7251585623678646e-05, + "loss": 0.2029, + "step": 4830 + }, + { + "epoch": 5.11, + "learning_rate": 3.724630021141649e-05, + "loss": 0.3797, + "step": 4832 + }, + { + "epoch": 5.11, + "learning_rate": 3.724101479915433e-05, + "loss": 0.2086, + "step": 4834 + }, + { + "epoch": 5.11, + "learning_rate": 3.7235729386892184e-05, + "loss": 0.1579, + "step": 4836 + }, + { + "epoch": 5.11, + "learning_rate": 3.7230443974630024e-05, + "loss": 0.2036, + "step": 4838 + }, + { + "epoch": 5.12, + "learning_rate": 3.722515856236787e-05, + "loss": 0.1914, + "step": 4840 + }, + { + "epoch": 5.12, + "learning_rate": 3.721987315010571e-05, + "loss": 0.1965, + "step": 4842 + }, + { + "epoch": 5.12, + "learning_rate": 3.7214587737843555e-05, + "loss": 0.109, + "step": 4844 + }, + { + "epoch": 5.12, + "learning_rate": 3.7209302325581394e-05, + "loss": 0.1581, + "step": 4846 + }, + { + "epoch": 5.12, + "learning_rate": 3.720401691331924e-05, + "loss": 0.3613, + "step": 4848 + }, + { + "epoch": 5.13, + "learning_rate": 3.719873150105708e-05, + "loss": 0.2111, + "step": 4850 + }, + { + "epoch": 5.13, + "learning_rate": 3.7193446088794926e-05, + "loss": 0.1572, + "step": 4852 + }, + { + "epoch": 5.13, + "learning_rate": 3.718816067653277e-05, + "loss": 0.2681, + "step": 4854 + }, + { + "epoch": 5.13, + "learning_rate": 3.718287526427062e-05, + "loss": 0.24, + "step": 4856 + }, + { + "epoch": 5.14, + "learning_rate": 3.717758985200846e-05, + "loss": 0.1684, + "step": 4858 + }, + { + "epoch": 5.14, + "learning_rate": 3.71723044397463e-05, + "loss": 0.1807, + "step": 4860 + }, + { + "epoch": 5.14, + "learning_rate": 3.716701902748414e-05, + "loss": 0.171, + "step": 4862 + }, + { + "epoch": 5.14, + "learning_rate": 3.716173361522199e-05, + "loss": 0.1212, + "step": 4864 + }, + { + "epoch": 5.14, + "learning_rate": 3.7156448202959835e-05, + "loss": 0.2092, + "step": 4866 + }, + { + "epoch": 5.15, + "learning_rate": 3.7151162790697674e-05, + "loss": 0.1592, + "step": 4868 + }, + { + "epoch": 5.15, + "learning_rate": 3.714587737843552e-05, + "loss": 0.268, + "step": 4870 + }, + { + "epoch": 5.15, + "learning_rate": 3.7140591966173366e-05, + "loss": 0.2267, + "step": 4872 + }, + { + "epoch": 5.15, + "learning_rate": 3.713530655391121e-05, + "loss": 0.1299, + "step": 4874 + }, + { + "epoch": 5.15, + "learning_rate": 3.713002114164905e-05, + "loss": 0.1087, + "step": 4876 + }, + { + "epoch": 5.16, + "learning_rate": 3.71247357293869e-05, + "loss": 0.2027, + "step": 4878 + }, + { + "epoch": 5.16, + "learning_rate": 3.711945031712474e-05, + "loss": 0.1678, + "step": 4880 + }, + { + "epoch": 5.16, + "learning_rate": 3.711416490486258e-05, + "loss": 0.2345, + "step": 4882 + }, + { + "epoch": 5.16, + "learning_rate": 3.710887949260042e-05, + "loss": 0.149, + "step": 4884 + }, + { + "epoch": 5.16, + "learning_rate": 3.710359408033827e-05, + "loss": 0.1786, + "step": 4886 + }, + { + "epoch": 5.17, + "learning_rate": 3.709830866807611e-05, + "loss": 0.2472, + "step": 4888 + }, + { + "epoch": 5.17, + "learning_rate": 3.709302325581396e-05, + "loss": 0.1835, + "step": 4890 + }, + { + "epoch": 5.17, + "learning_rate": 3.70877378435518e-05, + "loss": 0.0931, + "step": 4892 + }, + { + "epoch": 5.17, + "learning_rate": 3.7082452431289646e-05, + "loss": 0.1802, + "step": 4894 + }, + { + "epoch": 5.18, + "learning_rate": 3.7077167019027485e-05, + "loss": 0.2437, + "step": 4896 + }, + { + "epoch": 5.18, + "learning_rate": 3.707188160676533e-05, + "loss": 0.1213, + "step": 4898 + }, + { + "epoch": 5.18, + "learning_rate": 3.706659619450317e-05, + "loss": 0.1104, + "step": 4900 + }, + { + "epoch": 5.18, + "learning_rate": 3.7061310782241016e-05, + "loss": 0.2072, + "step": 4902 + }, + { + "epoch": 5.18, + "learning_rate": 3.7056025369978856e-05, + "loss": 0.1542, + "step": 4904 + }, + { + "epoch": 5.19, + "learning_rate": 3.70507399577167e-05, + "loss": 0.1379, + "step": 4906 + }, + { + "epoch": 5.19, + "learning_rate": 3.704545454545455e-05, + "loss": 0.1268, + "step": 4908 + }, + { + "epoch": 5.19, + "learning_rate": 3.7040169133192394e-05, + "loss": 0.1603, + "step": 4910 + }, + { + "epoch": 5.19, + "learning_rate": 3.703488372093023e-05, + "loss": 0.1306, + "step": 4912 + }, + { + "epoch": 5.19, + "learning_rate": 3.702959830866808e-05, + "loss": 0.0702, + "step": 4914 + }, + { + "epoch": 5.2, + "learning_rate": 3.702431289640592e-05, + "loss": 0.0872, + "step": 4916 + }, + { + "epoch": 5.2, + "learning_rate": 3.7019027484143764e-05, + "loss": 0.4123, + "step": 4918 + }, + { + "epoch": 5.2, + "learning_rate": 3.701374207188161e-05, + "loss": 0.2372, + "step": 4920 + }, + { + "epoch": 5.2, + "learning_rate": 3.700845665961945e-05, + "loss": 0.2711, + "step": 4922 + }, + { + "epoch": 5.21, + "learning_rate": 3.7003171247357296e-05, + "loss": 0.2139, + "step": 4924 + }, + { + "epoch": 5.21, + "learning_rate": 3.699788583509514e-05, + "loss": 0.227, + "step": 4926 + }, + { + "epoch": 5.21, + "learning_rate": 3.699260042283299e-05, + "loss": 0.0749, + "step": 4928 + }, + { + "epoch": 5.21, + "learning_rate": 3.698731501057083e-05, + "loss": 0.1619, + "step": 4930 + }, + { + "epoch": 5.21, + "learning_rate": 3.698202959830867e-05, + "loss": 0.3009, + "step": 4932 + }, + { + "epoch": 5.22, + "learning_rate": 3.697674418604651e-05, + "loss": 0.2073, + "step": 4934 + }, + { + "epoch": 5.22, + "learning_rate": 3.697145877378436e-05, + "loss": 0.326, + "step": 4936 + }, + { + "epoch": 5.22, + "learning_rate": 3.69661733615222e-05, + "loss": 0.2313, + "step": 4938 + }, + { + "epoch": 5.22, + "learning_rate": 3.6960887949260044e-05, + "loss": 0.1468, + "step": 4940 + }, + { + "epoch": 5.22, + "learning_rate": 3.695560253699788e-05, + "loss": 0.1478, + "step": 4942 + }, + { + "epoch": 5.23, + "learning_rate": 3.6950317124735736e-05, + "loss": 0.2012, + "step": 4944 + }, + { + "epoch": 5.23, + "learning_rate": 3.6945031712473575e-05, + "loss": 0.1614, + "step": 4946 + }, + { + "epoch": 5.23, + "learning_rate": 3.693974630021142e-05, + "loss": 0.1517, + "step": 4948 + }, + { + "epoch": 5.23, + "learning_rate": 3.693446088794926e-05, + "loss": 0.1746, + "step": 4950 + }, + { + "epoch": 5.23, + "learning_rate": 3.692917547568711e-05, + "loss": 0.1299, + "step": 4952 + }, + { + "epoch": 5.24, + "learning_rate": 3.6923890063424946e-05, + "loss": 0.1044, + "step": 4954 + }, + { + "epoch": 5.24, + "learning_rate": 3.691860465116279e-05, + "loss": 0.1396, + "step": 4956 + }, + { + "epoch": 5.24, + "learning_rate": 3.691331923890063e-05, + "loss": 0.1941, + "step": 4958 + }, + { + "epoch": 5.24, + "learning_rate": 3.690803382663848e-05, + "loss": 0.1695, + "step": 4960 + }, + { + "epoch": 5.25, + "learning_rate": 3.6902748414376324e-05, + "loss": 0.2018, + "step": 4962 + }, + { + "epoch": 5.25, + "learning_rate": 3.689746300211417e-05, + "loss": 0.1901, + "step": 4964 + }, + { + "epoch": 5.25, + "learning_rate": 3.689217758985201e-05, + "loss": 0.1719, + "step": 4966 + }, + { + "epoch": 5.25, + "learning_rate": 3.6886892177589855e-05, + "loss": 0.2726, + "step": 4968 + }, + { + "epoch": 5.25, + "learning_rate": 3.6881606765327694e-05, + "loss": 0.2805, + "step": 4970 + }, + { + "epoch": 5.26, + "learning_rate": 3.687632135306554e-05, + "loss": 0.2122, + "step": 4972 + }, + { + "epoch": 5.26, + "learning_rate": 3.6871035940803386e-05, + "loss": 0.1963, + "step": 4974 + }, + { + "epoch": 5.26, + "learning_rate": 3.6865750528541226e-05, + "loss": 0.2034, + "step": 4976 + }, + { + "epoch": 5.26, + "learning_rate": 3.686046511627907e-05, + "loss": 0.1429, + "step": 4978 + }, + { + "epoch": 5.26, + "learning_rate": 3.685517970401692e-05, + "loss": 0.2629, + "step": 4980 + }, + { + "epoch": 5.27, + "learning_rate": 3.6849894291754764e-05, + "loss": 0.2621, + "step": 4982 + }, + { + "epoch": 5.27, + "learning_rate": 3.68446088794926e-05, + "loss": 0.204, + "step": 4984 + }, + { + "epoch": 5.27, + "learning_rate": 3.683932346723045e-05, + "loss": 0.0373, + "step": 4986 + }, + { + "epoch": 5.27, + "learning_rate": 3.683403805496829e-05, + "loss": 0.2303, + "step": 4988 + }, + { + "epoch": 5.27, + "learning_rate": 3.6828752642706135e-05, + "loss": 0.1849, + "step": 4990 + }, + { + "epoch": 5.28, + "learning_rate": 3.6823467230443974e-05, + "loss": 0.2762, + "step": 4992 + }, + { + "epoch": 5.28, + "learning_rate": 3.681818181818182e-05, + "loss": 0.2258, + "step": 4994 + }, + { + "epoch": 5.28, + "learning_rate": 3.681289640591966e-05, + "loss": 0.1006, + "step": 4996 + }, + { + "epoch": 5.28, + "learning_rate": 3.680761099365751e-05, + "loss": 0.2063, + "step": 4998 + }, + { + "epoch": 5.29, + "learning_rate": 3.680232558139535e-05, + "loss": 0.1996, + "step": 5000 + }, + { + "epoch": 5.29, + "eval_cer": 0.04149330293530921, + "eval_loss": 0.4960121810436249, + "eval_runtime": 132.0838, + "eval_samples_per_second": 6.367, + "eval_steps_per_second": 0.803, + "step": 5000 + }, + { + "epoch": 5.29, + "learning_rate": 3.67970401691332e-05, + "loss": 0.2156, + "step": 5002 + }, + { + "epoch": 5.29, + "learning_rate": 3.679175475687104e-05, + "loss": 0.2322, + "step": 5004 + }, + { + "epoch": 5.29, + "learning_rate": 3.678646934460888e-05, + "loss": 0.2876, + "step": 5006 + }, + { + "epoch": 5.29, + "learning_rate": 3.678118393234672e-05, + "loss": 0.1763, + "step": 5008 + }, + { + "epoch": 5.3, + "learning_rate": 3.677589852008457e-05, + "loss": 0.3529, + "step": 5010 + }, + { + "epoch": 5.3, + "learning_rate": 3.677061310782241e-05, + "loss": 0.2029, + "step": 5012 + }, + { + "epoch": 5.3, + "learning_rate": 3.676532769556025e-05, + "loss": 0.1825, + "step": 5014 + }, + { + "epoch": 5.3, + "learning_rate": 3.67600422832981e-05, + "loss": 0.2479, + "step": 5016 + }, + { + "epoch": 5.3, + "learning_rate": 3.6754756871035945e-05, + "loss": 0.166, + "step": 5018 + }, + { + "epoch": 5.31, + "learning_rate": 3.6749471458773785e-05, + "loss": 0.1522, + "step": 5020 + }, + { + "epoch": 5.31, + "learning_rate": 3.674418604651163e-05, + "loss": 0.2038, + "step": 5022 + }, + { + "epoch": 5.31, + "learning_rate": 3.673890063424947e-05, + "loss": 0.1599, + "step": 5024 + }, + { + "epoch": 5.31, + "learning_rate": 3.6733615221987316e-05, + "loss": 0.1344, + "step": 5026 + }, + { + "epoch": 5.32, + "learning_rate": 3.672832980972516e-05, + "loss": 0.2085, + "step": 5028 + }, + { + "epoch": 5.32, + "learning_rate": 3.6723044397463e-05, + "loss": 0.2214, + "step": 5030 + }, + { + "epoch": 5.32, + "learning_rate": 3.671775898520085e-05, + "loss": 0.2809, + "step": 5032 + }, + { + "epoch": 5.32, + "learning_rate": 3.6712473572938694e-05, + "loss": 0.3275, + "step": 5034 + }, + { + "epoch": 5.32, + "learning_rate": 3.670718816067654e-05, + "loss": 0.1518, + "step": 5036 + }, + { + "epoch": 5.33, + "learning_rate": 3.670190274841438e-05, + "loss": 0.137, + "step": 5038 + }, + { + "epoch": 5.33, + "learning_rate": 3.6696617336152225e-05, + "loss": 0.1788, + "step": 5040 + }, + { + "epoch": 5.33, + "learning_rate": 3.6691331923890064e-05, + "loss": 0.1478, + "step": 5042 + }, + { + "epoch": 5.33, + "learning_rate": 3.668604651162791e-05, + "loss": 0.17, + "step": 5044 + }, + { + "epoch": 5.33, + "learning_rate": 3.668076109936575e-05, + "loss": 0.1549, + "step": 5046 + }, + { + "epoch": 5.34, + "learning_rate": 3.6675475687103596e-05, + "loss": 0.218, + "step": 5048 + }, + { + "epoch": 5.34, + "learning_rate": 3.6670190274841435e-05, + "loss": 0.2056, + "step": 5050 + }, + { + "epoch": 5.34, + "learning_rate": 3.666490486257929e-05, + "loss": 0.1911, + "step": 5052 + }, + { + "epoch": 5.34, + "learning_rate": 3.665961945031713e-05, + "loss": 0.1425, + "step": 5054 + }, + { + "epoch": 5.34, + "learning_rate": 3.665433403805497e-05, + "loss": 0.236, + "step": 5056 + }, + { + "epoch": 5.35, + "learning_rate": 3.664904862579281e-05, + "loss": 0.2433, + "step": 5058 + }, + { + "epoch": 5.35, + "learning_rate": 3.664376321353066e-05, + "loss": 0.1608, + "step": 5060 + }, + { + "epoch": 5.35, + "learning_rate": 3.66384778012685e-05, + "loss": 0.1666, + "step": 5062 + }, + { + "epoch": 5.35, + "learning_rate": 3.6633192389006344e-05, + "loss": 0.1748, + "step": 5064 + }, + { + "epoch": 5.36, + "learning_rate": 3.662790697674418e-05, + "loss": 0.188, + "step": 5066 + }, + { + "epoch": 5.36, + "learning_rate": 3.662262156448203e-05, + "loss": 0.1773, + "step": 5068 + }, + { + "epoch": 5.36, + "learning_rate": 3.6617336152219875e-05, + "loss": 0.228, + "step": 5070 + }, + { + "epoch": 5.36, + "learning_rate": 3.661205073995772e-05, + "loss": 0.1715, + "step": 5072 + }, + { + "epoch": 5.36, + "learning_rate": 3.660676532769556e-05, + "loss": 0.2415, + "step": 5074 + }, + { + "epoch": 5.37, + "learning_rate": 3.660147991543341e-05, + "loss": 0.1875, + "step": 5076 + }, + { + "epoch": 5.37, + "learning_rate": 3.659619450317125e-05, + "loss": 0.2753, + "step": 5078 + }, + { + "epoch": 5.37, + "learning_rate": 3.659090909090909e-05, + "loss": 0.2631, + "step": 5080 + }, + { + "epoch": 5.37, + "learning_rate": 3.658562367864694e-05, + "loss": 0.1206, + "step": 5082 + }, + { + "epoch": 5.37, + "learning_rate": 3.658033826638478e-05, + "loss": 0.2814, + "step": 5084 + }, + { + "epoch": 5.38, + "learning_rate": 3.6575052854122623e-05, + "loss": 0.2033, + "step": 5086 + }, + { + "epoch": 5.38, + "learning_rate": 3.656976744186046e-05, + "loss": 0.1643, + "step": 5088 + }, + { + "epoch": 5.38, + "learning_rate": 3.6564482029598316e-05, + "loss": 0.1404, + "step": 5090 + }, + { + "epoch": 5.38, + "learning_rate": 3.6559196617336155e-05, + "loss": 0.2004, + "step": 5092 + }, + { + "epoch": 5.38, + "learning_rate": 3.6553911205074e-05, + "loss": 0.1059, + "step": 5094 + }, + { + "epoch": 5.39, + "learning_rate": 3.654862579281184e-05, + "loss": 0.1295, + "step": 5096 + }, + { + "epoch": 5.39, + "learning_rate": 3.6543340380549686e-05, + "loss": 0.3503, + "step": 5098 + }, + { + "epoch": 5.39, + "learning_rate": 3.6538054968287526e-05, + "loss": 0.1633, + "step": 5100 + }, + { + "epoch": 5.39, + "learning_rate": 3.653276955602537e-05, + "loss": 0.1093, + "step": 5102 + }, + { + "epoch": 5.4, + "learning_rate": 3.652748414376321e-05, + "loss": 0.2215, + "step": 5104 + }, + { + "epoch": 5.4, + "learning_rate": 3.652219873150106e-05, + "loss": 0.1032, + "step": 5106 + }, + { + "epoch": 5.4, + "learning_rate": 3.65169133192389e-05, + "loss": 0.3177, + "step": 5108 + }, + { + "epoch": 5.4, + "learning_rate": 3.651162790697675e-05, + "loss": 0.209, + "step": 5110 + }, + { + "epoch": 5.4, + "learning_rate": 3.650634249471459e-05, + "loss": 0.1654, + "step": 5112 + }, + { + "epoch": 5.41, + "learning_rate": 3.6501057082452434e-05, + "loss": 0.2999, + "step": 5114 + }, + { + "epoch": 5.41, + "learning_rate": 3.6495771670190274e-05, + "loss": 0.1211, + "step": 5116 + }, + { + "epoch": 5.41, + "learning_rate": 3.649048625792812e-05, + "loss": 0.2942, + "step": 5118 + }, + { + "epoch": 5.41, + "learning_rate": 3.648520084566596e-05, + "loss": 0.2251, + "step": 5120 + }, + { + "epoch": 5.41, + "learning_rate": 3.6479915433403805e-05, + "loss": 0.2525, + "step": 5122 + }, + { + "epoch": 5.42, + "learning_rate": 3.647463002114165e-05, + "loss": 0.2324, + "step": 5124 + }, + { + "epoch": 5.42, + "learning_rate": 3.64693446088795e-05, + "loss": 0.2722, + "step": 5126 + }, + { + "epoch": 5.42, + "learning_rate": 3.6464059196617336e-05, + "loss": 0.1604, + "step": 5128 + }, + { + "epoch": 5.42, + "learning_rate": 3.645877378435518e-05, + "loss": 0.155, + "step": 5130 + }, + { + "epoch": 5.42, + "learning_rate": 3.645348837209303e-05, + "loss": 0.1968, + "step": 5132 + }, + { + "epoch": 5.43, + "learning_rate": 3.644820295983087e-05, + "loss": 0.214, + "step": 5134 + }, + { + "epoch": 5.43, + "learning_rate": 3.6442917547568714e-05, + "loss": 0.1837, + "step": 5136 + }, + { + "epoch": 5.43, + "learning_rate": 3.643763213530655e-05, + "loss": 0.0795, + "step": 5138 + }, + { + "epoch": 5.43, + "learning_rate": 3.64323467230444e-05, + "loss": 0.2023, + "step": 5140 + }, + { + "epoch": 5.44, + "learning_rate": 3.642706131078224e-05, + "loss": 0.151, + "step": 5142 + }, + { + "epoch": 5.44, + "learning_rate": 3.642177589852009e-05, + "loss": 0.1739, + "step": 5144 + }, + { + "epoch": 5.44, + "learning_rate": 3.641649048625793e-05, + "loss": 0.0803, + "step": 5146 + }, + { + "epoch": 5.44, + "learning_rate": 3.641120507399578e-05, + "loss": 0.2561, + "step": 5148 + }, + { + "epoch": 5.44, + "learning_rate": 3.6405919661733616e-05, + "loss": 0.1125, + "step": 5150 + }, + { + "epoch": 5.45, + "learning_rate": 3.640063424947146e-05, + "loss": 0.1583, + "step": 5152 + }, + { + "epoch": 5.45, + "learning_rate": 3.63953488372093e-05, + "loss": 0.2679, + "step": 5154 + }, + { + "epoch": 5.45, + "learning_rate": 3.639006342494715e-05, + "loss": 0.1561, + "step": 5156 + }, + { + "epoch": 5.45, + "learning_rate": 3.638477801268499e-05, + "loss": 0.1881, + "step": 5158 + }, + { + "epoch": 5.45, + "learning_rate": 3.637949260042283e-05, + "loss": 0.213, + "step": 5160 + }, + { + "epoch": 5.46, + "learning_rate": 3.637420718816068e-05, + "loss": 0.0868, + "step": 5162 + }, + { + "epoch": 5.46, + "learning_rate": 3.6368921775898525e-05, + "loss": 0.1728, + "step": 5164 + }, + { + "epoch": 5.46, + "learning_rate": 3.6363636363636364e-05, + "loss": 0.1656, + "step": 5166 + }, + { + "epoch": 5.46, + "learning_rate": 3.635835095137421e-05, + "loss": 0.4125, + "step": 5168 + }, + { + "epoch": 5.47, + "learning_rate": 3.635306553911205e-05, + "loss": 0.2769, + "step": 5170 + }, + { + "epoch": 5.47, + "learning_rate": 3.6347780126849896e-05, + "loss": 0.1373, + "step": 5172 + }, + { + "epoch": 5.47, + "learning_rate": 3.6342494714587735e-05, + "loss": 0.3027, + "step": 5174 + }, + { + "epoch": 5.47, + "learning_rate": 3.633720930232558e-05, + "loss": 0.2377, + "step": 5176 + }, + { + "epoch": 5.47, + "learning_rate": 3.633192389006343e-05, + "loss": 0.1191, + "step": 5178 + }, + { + "epoch": 5.48, + "learning_rate": 3.632663847780127e-05, + "loss": 0.1034, + "step": 5180 + }, + { + "epoch": 5.48, + "learning_rate": 3.632135306553911e-05, + "loss": 0.1489, + "step": 5182 + }, + { + "epoch": 5.48, + "learning_rate": 3.631606765327696e-05, + "loss": 0.1935, + "step": 5184 + }, + { + "epoch": 5.48, + "learning_rate": 3.6310782241014804e-05, + "loss": 0.117, + "step": 5186 + }, + { + "epoch": 5.48, + "learning_rate": 3.6305496828752644e-05, + "loss": 0.2481, + "step": 5188 + }, + { + "epoch": 5.49, + "learning_rate": 3.630021141649049e-05, + "loss": 0.1525, + "step": 5190 + }, + { + "epoch": 5.49, + "learning_rate": 3.629492600422833e-05, + "loss": 0.1148, + "step": 5192 + }, + { + "epoch": 5.49, + "learning_rate": 3.6289640591966175e-05, + "loss": 0.1546, + "step": 5194 + }, + { + "epoch": 5.49, + "learning_rate": 3.6284355179704014e-05, + "loss": 0.136, + "step": 5196 + }, + { + "epoch": 5.49, + "learning_rate": 3.627906976744187e-05, + "loss": 0.1691, + "step": 5198 + }, + { + "epoch": 5.5, + "learning_rate": 3.6273784355179706e-05, + "loss": 0.1914, + "step": 5200 + }, + { + "epoch": 5.5, + "learning_rate": 3.626849894291755e-05, + "loss": 0.1867, + "step": 5202 + }, + { + "epoch": 5.5, + "learning_rate": 3.626321353065539e-05, + "loss": 0.2652, + "step": 5204 + }, + { + "epoch": 5.5, + "learning_rate": 3.625792811839324e-05, + "loss": 0.2976, + "step": 5206 + }, + { + "epoch": 5.51, + "learning_rate": 3.625264270613108e-05, + "loss": 0.2308, + "step": 5208 + }, + { + "epoch": 5.51, + "learning_rate": 3.624735729386892e-05, + "loss": 0.1615, + "step": 5210 + }, + { + "epoch": 5.51, + "learning_rate": 3.624207188160676e-05, + "loss": 0.1069, + "step": 5212 + }, + { + "epoch": 5.51, + "learning_rate": 3.623678646934461e-05, + "loss": 0.2488, + "step": 5214 + }, + { + "epoch": 5.51, + "learning_rate": 3.6231501057082455e-05, + "loss": 0.2028, + "step": 5216 + }, + { + "epoch": 5.52, + "learning_rate": 3.62262156448203e-05, + "loss": 0.4859, + "step": 5218 + }, + { + "epoch": 5.52, + "learning_rate": 3.622093023255814e-05, + "loss": 0.1746, + "step": 5220 + }, + { + "epoch": 5.52, + "learning_rate": 3.6215644820295986e-05, + "loss": 0.1445, + "step": 5222 + }, + { + "epoch": 5.52, + "learning_rate": 3.6210359408033825e-05, + "loss": 0.1672, + "step": 5224 + }, + { + "epoch": 5.52, + "learning_rate": 3.620507399577167e-05, + "loss": 0.1847, + "step": 5226 + }, + { + "epoch": 5.53, + "learning_rate": 3.619978858350952e-05, + "loss": 0.3719, + "step": 5228 + }, + { + "epoch": 5.53, + "learning_rate": 3.619450317124736e-05, + "loss": 0.4154, + "step": 5230 + }, + { + "epoch": 5.53, + "learning_rate": 3.61892177589852e-05, + "loss": 0.2469, + "step": 5232 + }, + { + "epoch": 5.53, + "learning_rate": 3.618393234672305e-05, + "loss": 0.1911, + "step": 5234 + }, + { + "epoch": 5.53, + "learning_rate": 3.617864693446089e-05, + "loss": 0.177, + "step": 5236 + }, + { + "epoch": 5.54, + "learning_rate": 3.6173361522198734e-05, + "loss": 0.1316, + "step": 5238 + }, + { + "epoch": 5.54, + "learning_rate": 3.616807610993658e-05, + "loss": 0.3266, + "step": 5240 + }, + { + "epoch": 5.54, + "learning_rate": 3.616279069767442e-05, + "loss": 0.2839, + "step": 5242 + }, + { + "epoch": 5.54, + "learning_rate": 3.6157505285412266e-05, + "loss": 0.2419, + "step": 5244 + }, + { + "epoch": 5.55, + "learning_rate": 3.6152219873150105e-05, + "loss": 0.2582, + "step": 5246 + }, + { + "epoch": 5.55, + "learning_rate": 3.614693446088795e-05, + "loss": 0.2562, + "step": 5248 + }, + { + "epoch": 5.55, + "learning_rate": 3.614164904862579e-05, + "loss": 0.3777, + "step": 5250 + }, + { + "epoch": 5.55, + "learning_rate": 3.613636363636364e-05, + "loss": 0.3315, + "step": 5252 + }, + { + "epoch": 5.55, + "learning_rate": 3.613107822410148e-05, + "loss": 0.1576, + "step": 5254 + }, + { + "epoch": 5.56, + "learning_rate": 3.612579281183933e-05, + "loss": 0.1337, + "step": 5256 + }, + { + "epoch": 5.56, + "learning_rate": 3.612050739957717e-05, + "loss": 0.1628, + "step": 5258 + }, + { + "epoch": 5.56, + "learning_rate": 3.6115221987315014e-05, + "loss": 0.1957, + "step": 5260 + }, + { + "epoch": 5.56, + "learning_rate": 3.610993657505285e-05, + "loss": 0.2507, + "step": 5262 + }, + { + "epoch": 5.56, + "learning_rate": 3.61046511627907e-05, + "loss": 0.2434, + "step": 5264 + }, + { + "epoch": 5.57, + "learning_rate": 3.609936575052854e-05, + "loss": 0.2171, + "step": 5266 + }, + { + "epoch": 5.57, + "learning_rate": 3.6094080338266384e-05, + "loss": 0.3111, + "step": 5268 + }, + { + "epoch": 5.57, + "learning_rate": 3.608879492600423e-05, + "loss": 0.153, + "step": 5270 + }, + { + "epoch": 5.57, + "learning_rate": 3.6083509513742077e-05, + "loss": 0.1377, + "step": 5272 + }, + { + "epoch": 5.58, + "learning_rate": 3.6078224101479916e-05, + "loss": 0.2569, + "step": 5274 + }, + { + "epoch": 5.58, + "learning_rate": 3.607293868921776e-05, + "loss": 0.179, + "step": 5276 + }, + { + "epoch": 5.58, + "learning_rate": 3.60676532769556e-05, + "loss": 0.2822, + "step": 5278 + }, + { + "epoch": 5.58, + "learning_rate": 3.606236786469345e-05, + "loss": 0.1619, + "step": 5280 + }, + { + "epoch": 5.58, + "learning_rate": 3.605708245243129e-05, + "loss": 0.2394, + "step": 5282 + }, + { + "epoch": 5.59, + "learning_rate": 3.605179704016913e-05, + "loss": 0.2473, + "step": 5284 + }, + { + "epoch": 5.59, + "learning_rate": 3.604651162790698e-05, + "loss": 0.2508, + "step": 5286 + }, + { + "epoch": 5.59, + "learning_rate": 3.6041226215644825e-05, + "loss": 0.2127, + "step": 5288 + }, + { + "epoch": 5.59, + "learning_rate": 3.6035940803382664e-05, + "loss": 0.1293, + "step": 5290 + }, + { + "epoch": 5.59, + "learning_rate": 3.603065539112051e-05, + "loss": 0.4284, + "step": 5292 + }, + { + "epoch": 5.6, + "learning_rate": 3.6025369978858356e-05, + "loss": 0.2449, + "step": 5294 + }, + { + "epoch": 5.6, + "learning_rate": 3.6020084566596195e-05, + "loss": 0.2241, + "step": 5296 + }, + { + "epoch": 5.6, + "learning_rate": 3.601479915433404e-05, + "loss": 0.1785, + "step": 5298 + }, + { + "epoch": 5.6, + "learning_rate": 3.600951374207188e-05, + "loss": 0.1411, + "step": 5300 + }, + { + "epoch": 5.6, + "learning_rate": 3.600422832980973e-05, + "loss": 0.0909, + "step": 5302 + }, + { + "epoch": 5.61, + "learning_rate": 3.5998942917547566e-05, + "loss": 0.1268, + "step": 5304 + }, + { + "epoch": 5.61, + "learning_rate": 3.599365750528542e-05, + "loss": 0.1075, + "step": 5306 + }, + { + "epoch": 5.61, + "learning_rate": 3.598837209302326e-05, + "loss": 0.1462, + "step": 5308 + }, + { + "epoch": 5.61, + "learning_rate": 3.5983086680761104e-05, + "loss": 0.2667, + "step": 5310 + }, + { + "epoch": 5.62, + "learning_rate": 3.5977801268498944e-05, + "loss": 0.1675, + "step": 5312 + }, + { + "epoch": 5.62, + "learning_rate": 3.597251585623679e-05, + "loss": 0.2495, + "step": 5314 + }, + { + "epoch": 5.62, + "learning_rate": 3.596723044397463e-05, + "loss": 0.2061, + "step": 5316 + }, + { + "epoch": 5.62, + "learning_rate": 3.5961945031712475e-05, + "loss": 0.2483, + "step": 5318 + }, + { + "epoch": 5.62, + "learning_rate": 3.5956659619450314e-05, + "loss": 0.1289, + "step": 5320 + }, + { + "epoch": 5.63, + "learning_rate": 3.595137420718816e-05, + "loss": 0.1895, + "step": 5322 + }, + { + "epoch": 5.63, + "learning_rate": 3.5946088794926006e-05, + "loss": 0.2229, + "step": 5324 + }, + { + "epoch": 5.63, + "learning_rate": 3.594080338266385e-05, + "loss": 0.2349, + "step": 5326 + }, + { + "epoch": 5.63, + "learning_rate": 3.593551797040169e-05, + "loss": 0.0876, + "step": 5328 + }, + { + "epoch": 5.63, + "learning_rate": 3.593023255813954e-05, + "loss": 0.1031, + "step": 5330 + }, + { + "epoch": 5.64, + "learning_rate": 3.592494714587738e-05, + "loss": 0.1326, + "step": 5332 + }, + { + "epoch": 5.64, + "learning_rate": 3.591966173361522e-05, + "loss": 0.2142, + "step": 5334 + }, + { + "epoch": 5.64, + "learning_rate": 3.591437632135307e-05, + "loss": 0.2463, + "step": 5336 + }, + { + "epoch": 5.64, + "learning_rate": 3.590909090909091e-05, + "loss": 0.2288, + "step": 5338 + }, + { + "epoch": 5.64, + "learning_rate": 3.5903805496828754e-05, + "loss": 0.2073, + "step": 5340 + }, + { + "epoch": 5.65, + "learning_rate": 3.58985200845666e-05, + "loss": 0.0884, + "step": 5342 + }, + { + "epoch": 5.65, + "learning_rate": 3.5893234672304447e-05, + "loss": 0.2159, + "step": 5344 + }, + { + "epoch": 5.65, + "learning_rate": 3.5887949260042286e-05, + "loss": 0.1365, + "step": 5346 + }, + { + "epoch": 5.65, + "learning_rate": 3.588266384778013e-05, + "loss": 0.2142, + "step": 5348 + }, + { + "epoch": 5.66, + "learning_rate": 3.587737843551797e-05, + "loss": 0.2101, + "step": 5350 + }, + { + "epoch": 5.66, + "learning_rate": 3.587209302325582e-05, + "loss": 0.3025, + "step": 5352 + }, + { + "epoch": 5.66, + "learning_rate": 3.5866807610993657e-05, + "loss": 0.1892, + "step": 5354 + }, + { + "epoch": 5.66, + "learning_rate": 3.58615221987315e-05, + "loss": 0.1453, + "step": 5356 + }, + { + "epoch": 5.66, + "learning_rate": 3.585623678646934e-05, + "loss": 0.1022, + "step": 5358 + }, + { + "epoch": 5.67, + "learning_rate": 3.5850951374207195e-05, + "loss": 0.1156, + "step": 5360 + }, + { + "epoch": 5.67, + "learning_rate": 3.5845665961945034e-05, + "loss": 0.2799, + "step": 5362 + }, + { + "epoch": 5.67, + "learning_rate": 3.584038054968288e-05, + "loss": 0.2196, + "step": 5364 + }, + { + "epoch": 5.67, + "learning_rate": 3.583509513742072e-05, + "loss": 0.277, + "step": 5366 + }, + { + "epoch": 5.67, + "learning_rate": 3.5829809725158565e-05, + "loss": 0.1254, + "step": 5368 + }, + { + "epoch": 5.68, + "learning_rate": 3.5824524312896405e-05, + "loss": 0.1106, + "step": 5370 + }, + { + "epoch": 5.68, + "learning_rate": 3.581923890063425e-05, + "loss": 0.1766, + "step": 5372 + }, + { + "epoch": 5.68, + "learning_rate": 3.581395348837209e-05, + "loss": 0.3162, + "step": 5374 + }, + { + "epoch": 5.68, + "learning_rate": 3.5808668076109936e-05, + "loss": 0.2226, + "step": 5376 + }, + { + "epoch": 5.68, + "learning_rate": 3.580338266384778e-05, + "loss": 0.1871, + "step": 5378 + }, + { + "epoch": 5.69, + "learning_rate": 3.579809725158563e-05, + "loss": 0.3953, + "step": 5380 + }, + { + "epoch": 5.69, + "learning_rate": 3.579281183932347e-05, + "loss": 0.2468, + "step": 5382 + }, + { + "epoch": 5.69, + "learning_rate": 3.5787526427061314e-05, + "loss": 0.1599, + "step": 5384 + }, + { + "epoch": 5.69, + "learning_rate": 3.578224101479915e-05, + "loss": 0.1013, + "step": 5386 + }, + { + "epoch": 5.7, + "learning_rate": 3.5776955602537e-05, + "loss": 0.304, + "step": 5388 + }, + { + "epoch": 5.7, + "learning_rate": 3.5771670190274845e-05, + "loss": 0.228, + "step": 5390 + }, + { + "epoch": 5.7, + "learning_rate": 3.5766384778012684e-05, + "loss": 0.1226, + "step": 5392 + }, + { + "epoch": 5.7, + "learning_rate": 3.576109936575053e-05, + "loss": 0.1913, + "step": 5394 + }, + { + "epoch": 5.7, + "learning_rate": 3.5755813953488376e-05, + "loss": 0.1811, + "step": 5396 + }, + { + "epoch": 5.71, + "learning_rate": 3.575052854122622e-05, + "loss": 0.2636, + "step": 5398 + }, + { + "epoch": 5.71, + "learning_rate": 3.574524312896406e-05, + "loss": 0.1235, + "step": 5400 + }, + { + "epoch": 5.71, + "learning_rate": 3.573995771670191e-05, + "loss": 0.2232, + "step": 5402 + }, + { + "epoch": 5.71, + "learning_rate": 3.573467230443975e-05, + "loss": 0.1785, + "step": 5404 + }, + { + "epoch": 5.71, + "learning_rate": 3.572938689217759e-05, + "loss": 0.1615, + "step": 5406 + }, + { + "epoch": 5.72, + "learning_rate": 3.572410147991543e-05, + "loss": 0.101, + "step": 5408 + }, + { + "epoch": 5.72, + "learning_rate": 3.571881606765328e-05, + "loss": 0.1539, + "step": 5410 + }, + { + "epoch": 5.72, + "learning_rate": 3.571353065539112e-05, + "loss": 0.1954, + "step": 5412 + }, + { + "epoch": 5.72, + "learning_rate": 3.570824524312897e-05, + "loss": 0.2136, + "step": 5414 + }, + { + "epoch": 5.73, + "learning_rate": 3.570295983086681e-05, + "loss": 0.1079, + "step": 5416 + }, + { + "epoch": 5.73, + "learning_rate": 3.5697674418604656e-05, + "loss": 0.2143, + "step": 5418 + }, + { + "epoch": 5.73, + "learning_rate": 3.5692389006342495e-05, + "loss": 0.2804, + "step": 5420 + }, + { + "epoch": 5.73, + "learning_rate": 3.568710359408034e-05, + "loss": 0.0697, + "step": 5422 + }, + { + "epoch": 5.73, + "learning_rate": 3.568181818181818e-05, + "loss": 0.1546, + "step": 5424 + }, + { + "epoch": 5.74, + "learning_rate": 3.5676532769556027e-05, + "loss": 0.2973, + "step": 5426 + }, + { + "epoch": 5.74, + "learning_rate": 3.5671247357293866e-05, + "loss": 0.2149, + "step": 5428 + }, + { + "epoch": 5.74, + "learning_rate": 3.566596194503171e-05, + "loss": 0.3598, + "step": 5430 + }, + { + "epoch": 5.74, + "learning_rate": 3.566067653276956e-05, + "loss": 0.172, + "step": 5432 + }, + { + "epoch": 5.74, + "learning_rate": 3.5655391120507404e-05, + "loss": 0.166, + "step": 5434 + }, + { + "epoch": 5.75, + "learning_rate": 3.565010570824524e-05, + "loss": 0.1814, + "step": 5436 + }, + { + "epoch": 5.75, + "learning_rate": 3.564482029598309e-05, + "loss": 0.1576, + "step": 5438 + }, + { + "epoch": 5.75, + "learning_rate": 3.563953488372093e-05, + "loss": 0.1284, + "step": 5440 + }, + { + "epoch": 5.75, + "learning_rate": 3.5634249471458775e-05, + "loss": 0.1752, + "step": 5442 + }, + { + "epoch": 5.75, + "learning_rate": 3.562896405919662e-05, + "loss": 0.2017, + "step": 5444 + }, + { + "epoch": 5.76, + "learning_rate": 3.562367864693446e-05, + "loss": 0.1809, + "step": 5446 + }, + { + "epoch": 5.76, + "learning_rate": 3.5618393234672306e-05, + "loss": 0.1873, + "step": 5448 + }, + { + "epoch": 5.76, + "learning_rate": 3.561310782241015e-05, + "loss": 0.1214, + "step": 5450 + }, + { + "epoch": 5.76, + "learning_rate": 3.5607822410148e-05, + "loss": 0.0826, + "step": 5452 + }, + { + "epoch": 5.77, + "learning_rate": 3.560253699788584e-05, + "loss": 0.2159, + "step": 5454 + }, + { + "epoch": 5.77, + "learning_rate": 3.5597251585623684e-05, + "loss": 0.1289, + "step": 5456 + }, + { + "epoch": 5.77, + "learning_rate": 3.559196617336152e-05, + "loss": 0.1642, + "step": 5458 + }, + { + "epoch": 5.77, + "learning_rate": 3.558668076109937e-05, + "loss": 0.1882, + "step": 5460 + }, + { + "epoch": 5.77, + "learning_rate": 3.558139534883721e-05, + "loss": 0.1427, + "step": 5462 + }, + { + "epoch": 5.78, + "learning_rate": 3.5576109936575054e-05, + "loss": 0.104, + "step": 5464 + }, + { + "epoch": 5.78, + "learning_rate": 3.5570824524312894e-05, + "loss": 0.11, + "step": 5466 + }, + { + "epoch": 5.78, + "learning_rate": 3.5565539112050746e-05, + "loss": 0.097, + "step": 5468 + }, + { + "epoch": 5.78, + "learning_rate": 3.5560253699788586e-05, + "loss": 0.1465, + "step": 5470 + }, + { + "epoch": 5.78, + "learning_rate": 3.555496828752643e-05, + "loss": 0.2532, + "step": 5472 + }, + { + "epoch": 5.79, + "learning_rate": 3.554968287526427e-05, + "loss": 0.1785, + "step": 5474 + }, + { + "epoch": 5.79, + "learning_rate": 3.554439746300212e-05, + "loss": 0.2498, + "step": 5476 + }, + { + "epoch": 5.79, + "learning_rate": 3.5539112050739956e-05, + "loss": 0.3845, + "step": 5478 + }, + { + "epoch": 5.79, + "learning_rate": 3.55338266384778e-05, + "loss": 0.2038, + "step": 5480 + }, + { + "epoch": 5.79, + "learning_rate": 3.552854122621564e-05, + "loss": 0.1827, + "step": 5482 + }, + { + "epoch": 5.8, + "learning_rate": 3.552325581395349e-05, + "loss": 0.1429, + "step": 5484 + }, + { + "epoch": 5.8, + "learning_rate": 3.5517970401691334e-05, + "loss": 0.1874, + "step": 5486 + }, + { + "epoch": 5.8, + "learning_rate": 3.551268498942918e-05, + "loss": 0.1165, + "step": 5488 + }, + { + "epoch": 5.8, + "learning_rate": 3.550739957716702e-05, + "loss": 0.1995, + "step": 5490 + }, + { + "epoch": 5.81, + "learning_rate": 3.5502114164904865e-05, + "loss": 0.2121, + "step": 5492 + }, + { + "epoch": 5.81, + "learning_rate": 3.5496828752642705e-05, + "loss": 0.1209, + "step": 5494 + }, + { + "epoch": 5.81, + "learning_rate": 3.549154334038055e-05, + "loss": 0.1137, + "step": 5496 + }, + { + "epoch": 5.81, + "learning_rate": 3.54862579281184e-05, + "loss": 0.1164, + "step": 5498 + }, + { + "epoch": 5.81, + "learning_rate": 3.5480972515856236e-05, + "loss": 0.1558, + "step": 5500 + }, + { + "epoch": 5.81, + "eval_cer": 0.023254488458250212, + "eval_loss": 0.6493213176727295, + "eval_runtime": 128.6984, + "eval_samples_per_second": 6.535, + "eval_steps_per_second": 0.824, + "step": 5500 + }, + { + "epoch": 5.82, + "learning_rate": 3.547568710359408e-05, + "loss": 0.1527, + "step": 5502 + }, + { + "epoch": 5.82, + "learning_rate": 3.547040169133193e-05, + "loss": 0.1787, + "step": 5504 + }, + { + "epoch": 5.82, + "learning_rate": 3.5465116279069774e-05, + "loss": 0.0842, + "step": 5506 + }, + { + "epoch": 5.82, + "learning_rate": 3.5459830866807613e-05, + "loss": 0.1248, + "step": 5508 + }, + { + "epoch": 5.82, + "learning_rate": 3.545454545454546e-05, + "loss": 0.2904, + "step": 5510 + }, + { + "epoch": 5.83, + "learning_rate": 3.54492600422833e-05, + "loss": 0.1775, + "step": 5512 + }, + { + "epoch": 5.83, + "learning_rate": 3.5443974630021145e-05, + "loss": 0.1804, + "step": 5514 + }, + { + "epoch": 5.83, + "learning_rate": 3.5438689217758984e-05, + "loss": 0.1622, + "step": 5516 + }, + { + "epoch": 5.83, + "learning_rate": 3.543340380549683e-05, + "loss": 0.1061, + "step": 5518 + }, + { + "epoch": 5.84, + "learning_rate": 3.542811839323467e-05, + "loss": 0.2467, + "step": 5520 + }, + { + "epoch": 5.84, + "learning_rate": 3.542283298097252e-05, + "loss": 0.1456, + "step": 5522 + }, + { + "epoch": 5.84, + "learning_rate": 3.541754756871036e-05, + "loss": 0.0853, + "step": 5524 + }, + { + "epoch": 5.84, + "learning_rate": 3.541226215644821e-05, + "loss": 0.2468, + "step": 5526 + }, + { + "epoch": 5.84, + "learning_rate": 3.540697674418605e-05, + "loss": 0.1191, + "step": 5528 + }, + { + "epoch": 5.85, + "learning_rate": 3.540169133192389e-05, + "loss": 0.1519, + "step": 5530 + }, + { + "epoch": 5.85, + "learning_rate": 3.539640591966173e-05, + "loss": 0.1389, + "step": 5532 + }, + { + "epoch": 5.85, + "learning_rate": 3.539112050739958e-05, + "loss": 0.1521, + "step": 5534 + }, + { + "epoch": 5.85, + "learning_rate": 3.538583509513742e-05, + "loss": 0.1933, + "step": 5536 + }, + { + "epoch": 5.85, + "learning_rate": 3.5380549682875264e-05, + "loss": 0.2494, + "step": 5538 + }, + { + "epoch": 5.86, + "learning_rate": 3.537526427061311e-05, + "loss": 0.2218, + "step": 5540 + }, + { + "epoch": 5.86, + "learning_rate": 3.5369978858350956e-05, + "loss": 0.193, + "step": 5542 + }, + { + "epoch": 5.86, + "learning_rate": 3.5364693446088795e-05, + "loss": 0.149, + "step": 5544 + }, + { + "epoch": 5.86, + "learning_rate": 3.535940803382664e-05, + "loss": 0.211, + "step": 5546 + }, + { + "epoch": 5.86, + "learning_rate": 3.535412262156449e-05, + "loss": 0.2097, + "step": 5548 + }, + { + "epoch": 5.87, + "learning_rate": 3.5348837209302326e-05, + "loss": 0.2378, + "step": 5550 + }, + { + "epoch": 5.87, + "learning_rate": 3.534355179704017e-05, + "loss": 0.2757, + "step": 5552 + }, + { + "epoch": 5.87, + "learning_rate": 3.533826638477801e-05, + "loss": 0.1784, + "step": 5554 + }, + { + "epoch": 5.87, + "learning_rate": 3.533298097251586e-05, + "loss": 0.1451, + "step": 5556 + }, + { + "epoch": 5.88, + "learning_rate": 3.5327695560253704e-05, + "loss": 0.1482, + "step": 5558 + }, + { + "epoch": 5.88, + "learning_rate": 3.532241014799155e-05, + "loss": 0.1061, + "step": 5560 + }, + { + "epoch": 5.88, + "learning_rate": 3.531712473572939e-05, + "loss": 0.0902, + "step": 5562 + }, + { + "epoch": 5.88, + "learning_rate": 3.5311839323467235e-05, + "loss": 0.1869, + "step": 5564 + }, + { + "epoch": 5.88, + "learning_rate": 3.5306553911205075e-05, + "loss": 0.2725, + "step": 5566 + }, + { + "epoch": 5.89, + "learning_rate": 3.530126849894292e-05, + "loss": 0.2675, + "step": 5568 + }, + { + "epoch": 5.89, + "learning_rate": 3.529598308668076e-05, + "loss": 0.2006, + "step": 5570 + }, + { + "epoch": 5.89, + "learning_rate": 3.5290697674418606e-05, + "loss": 0.1599, + "step": 5572 + }, + { + "epoch": 5.89, + "learning_rate": 3.5285412262156445e-05, + "loss": 0.1574, + "step": 5574 + }, + { + "epoch": 5.89, + "learning_rate": 3.52801268498943e-05, + "loss": 0.103, + "step": 5576 + }, + { + "epoch": 5.9, + "learning_rate": 3.527484143763214e-05, + "loss": 0.2207, + "step": 5578 + }, + { + "epoch": 5.9, + "learning_rate": 3.5269556025369983e-05, + "loss": 0.1215, + "step": 5580 + }, + { + "epoch": 5.9, + "learning_rate": 3.526427061310782e-05, + "loss": 0.3131, + "step": 5582 + }, + { + "epoch": 5.9, + "learning_rate": 3.525898520084567e-05, + "loss": 0.2382, + "step": 5584 + }, + { + "epoch": 5.9, + "learning_rate": 3.525369978858351e-05, + "loss": 0.2115, + "step": 5586 + }, + { + "epoch": 5.91, + "learning_rate": 3.5248414376321354e-05, + "loss": 0.315, + "step": 5588 + }, + { + "epoch": 5.91, + "learning_rate": 3.5243128964059193e-05, + "loss": 0.3125, + "step": 5590 + }, + { + "epoch": 5.91, + "learning_rate": 3.523784355179704e-05, + "loss": 0.2226, + "step": 5592 + }, + { + "epoch": 5.91, + "learning_rate": 3.5232558139534886e-05, + "loss": 0.1697, + "step": 5594 + }, + { + "epoch": 5.92, + "learning_rate": 3.522727272727273e-05, + "loss": 0.199, + "step": 5596 + }, + { + "epoch": 5.92, + "learning_rate": 3.522198731501057e-05, + "loss": 0.116, + "step": 5598 + }, + { + "epoch": 5.92, + "learning_rate": 3.521670190274842e-05, + "loss": 0.3555, + "step": 5600 + }, + { + "epoch": 5.92, + "learning_rate": 3.521141649048626e-05, + "loss": 0.1411, + "step": 5602 + }, + { + "epoch": 5.92, + "learning_rate": 3.52061310782241e-05, + "loss": 0.1481, + "step": 5604 + }, + { + "epoch": 5.93, + "learning_rate": 3.520084566596195e-05, + "loss": 0.2624, + "step": 5606 + }, + { + "epoch": 5.93, + "learning_rate": 3.519556025369979e-05, + "loss": 0.2433, + "step": 5608 + }, + { + "epoch": 5.93, + "learning_rate": 3.5190274841437634e-05, + "loss": 0.1429, + "step": 5610 + }, + { + "epoch": 5.93, + "learning_rate": 3.518498942917548e-05, + "loss": 0.119, + "step": 5612 + }, + { + "epoch": 5.93, + "learning_rate": 3.5179704016913326e-05, + "loss": 0.0767, + "step": 5614 + }, + { + "epoch": 5.94, + "learning_rate": 3.5174418604651165e-05, + "loss": 0.2706, + "step": 5616 + }, + { + "epoch": 5.94, + "learning_rate": 3.516913319238901e-05, + "loss": 0.1723, + "step": 5618 + }, + { + "epoch": 5.94, + "learning_rate": 3.516384778012685e-05, + "loss": 0.1937, + "step": 5620 + }, + { + "epoch": 5.94, + "learning_rate": 3.5158562367864696e-05, + "loss": 0.1043, + "step": 5622 + }, + { + "epoch": 5.95, + "learning_rate": 3.5153276955602536e-05, + "loss": 0.2116, + "step": 5624 + }, + { + "epoch": 5.95, + "learning_rate": 3.514799154334038e-05, + "loss": 0.1653, + "step": 5626 + }, + { + "epoch": 5.95, + "learning_rate": 3.514270613107822e-05, + "loss": 0.1505, + "step": 5628 + }, + { + "epoch": 5.95, + "learning_rate": 3.5137420718816074e-05, + "loss": 0.2293, + "step": 5630 + }, + { + "epoch": 5.95, + "learning_rate": 3.513213530655391e-05, + "loss": 0.2016, + "step": 5632 + }, + { + "epoch": 5.96, + "learning_rate": 3.512684989429176e-05, + "loss": 0.1426, + "step": 5634 + }, + { + "epoch": 5.96, + "learning_rate": 3.51215644820296e-05, + "loss": 0.129, + "step": 5636 + }, + { + "epoch": 5.96, + "learning_rate": 3.5116279069767445e-05, + "loss": 0.1259, + "step": 5638 + }, + { + "epoch": 5.96, + "learning_rate": 3.5110993657505284e-05, + "loss": 0.1072, + "step": 5640 + }, + { + "epoch": 5.96, + "learning_rate": 3.510570824524313e-05, + "loss": 0.2352, + "step": 5642 + }, + { + "epoch": 5.97, + "learning_rate": 3.510042283298097e-05, + "loss": 0.3217, + "step": 5644 + }, + { + "epoch": 5.97, + "learning_rate": 3.5095137420718815e-05, + "loss": 0.0973, + "step": 5646 + }, + { + "epoch": 5.97, + "learning_rate": 3.508985200845666e-05, + "loss": 0.1055, + "step": 5648 + }, + { + "epoch": 5.97, + "learning_rate": 3.508456659619451e-05, + "loss": 0.1537, + "step": 5650 + }, + { + "epoch": 5.97, + "learning_rate": 3.507928118393235e-05, + "loss": 0.1017, + "step": 5652 + }, + { + "epoch": 5.98, + "learning_rate": 3.507399577167019e-05, + "loss": 0.2179, + "step": 5654 + }, + { + "epoch": 5.98, + "learning_rate": 3.506871035940804e-05, + "loss": 0.1378, + "step": 5656 + }, + { + "epoch": 5.98, + "learning_rate": 3.506342494714588e-05, + "loss": 0.251, + "step": 5658 + }, + { + "epoch": 5.98, + "learning_rate": 3.5058139534883724e-05, + "loss": 0.146, + "step": 5660 + }, + { + "epoch": 5.99, + "learning_rate": 3.5052854122621563e-05, + "loss": 0.1565, + "step": 5662 + }, + { + "epoch": 5.99, + "learning_rate": 3.504756871035941e-05, + "loss": 0.1334, + "step": 5664 + }, + { + "epoch": 5.99, + "learning_rate": 3.5042283298097256e-05, + "loss": 0.1833, + "step": 5666 + }, + { + "epoch": 5.99, + "learning_rate": 3.50369978858351e-05, + "loss": 0.1748, + "step": 5668 + }, + { + "epoch": 5.99, + "learning_rate": 3.503171247357294e-05, + "loss": 0.1905, + "step": 5670 + }, + { + "epoch": 6.0, + "learning_rate": 3.502642706131079e-05, + "loss": 0.1416, + "step": 5672 + }, + { + "epoch": 6.0, + "learning_rate": 3.5021141649048626e-05, + "loss": 0.189, + "step": 5674 + }, + { + "epoch": 6.0, + "learning_rate": 3.501585623678647e-05, + "loss": 0.1976, + "step": 5676 + }, + { + "epoch": 6.0, + "learning_rate": 3.501057082452431e-05, + "loss": 0.1923, + "step": 5678 + }, + { + "epoch": 6.0, + "learning_rate": 3.500528541226216e-05, + "loss": 0.2369, + "step": 5680 + }, + { + "epoch": 6.01, + "learning_rate": 3.5e-05, + "loss": 0.0773, + "step": 5682 + }, + { + "epoch": 6.01, + "learning_rate": 3.499471458773785e-05, + "loss": 0.1012, + "step": 5684 + }, + { + "epoch": 6.01, + "learning_rate": 3.498942917547569e-05, + "loss": 0.1372, + "step": 5686 + }, + { + "epoch": 6.01, + "learning_rate": 3.4984143763213535e-05, + "loss": 0.2137, + "step": 5688 + }, + { + "epoch": 6.01, + "learning_rate": 3.4978858350951374e-05, + "loss": 0.213, + "step": 5690 + }, + { + "epoch": 6.02, + "learning_rate": 3.497357293868922e-05, + "loss": 0.0937, + "step": 5692 + }, + { + "epoch": 6.02, + "learning_rate": 3.496828752642706e-05, + "loss": 0.2398, + "step": 5694 + }, + { + "epoch": 6.02, + "learning_rate": 3.4963002114164906e-05, + "loss": 0.1157, + "step": 5696 + }, + { + "epoch": 6.02, + "learning_rate": 3.495771670190275e-05, + "loss": 0.0669, + "step": 5698 + }, + { + "epoch": 6.03, + "learning_rate": 3.495243128964059e-05, + "loss": 0.2217, + "step": 5700 + }, + { + "epoch": 6.03, + "learning_rate": 3.494714587737844e-05, + "loss": 0.2695, + "step": 5702 + }, + { + "epoch": 6.03, + "learning_rate": 3.494186046511628e-05, + "loss": 0.2532, + "step": 5704 + }, + { + "epoch": 6.03, + "learning_rate": 3.493657505285412e-05, + "loss": 0.1932, + "step": 5706 + }, + { + "epoch": 6.03, + "learning_rate": 3.493128964059197e-05, + "loss": 0.2101, + "step": 5708 + }, + { + "epoch": 6.04, + "learning_rate": 3.4926004228329815e-05, + "loss": 0.2146, + "step": 5710 + }, + { + "epoch": 6.04, + "learning_rate": 3.4920718816067654e-05, + "loss": 0.1982, + "step": 5712 + }, + { + "epoch": 6.04, + "learning_rate": 3.49154334038055e-05, + "loss": 0.0912, + "step": 5714 + }, + { + "epoch": 6.04, + "learning_rate": 3.491014799154334e-05, + "loss": 0.1053, + "step": 5716 + }, + { + "epoch": 6.04, + "learning_rate": 3.4904862579281185e-05, + "loss": 0.2478, + "step": 5718 + }, + { + "epoch": 6.05, + "learning_rate": 3.489957716701903e-05, + "loss": 0.2633, + "step": 5720 + }, + { + "epoch": 6.05, + "learning_rate": 3.489429175475688e-05, + "loss": 0.1436, + "step": 5722 + }, + { + "epoch": 6.05, + "learning_rate": 3.488900634249472e-05, + "loss": 0.2109, + "step": 5724 + }, + { + "epoch": 6.05, + "learning_rate": 3.488372093023256e-05, + "loss": 0.1513, + "step": 5726 + }, + { + "epoch": 6.05, + "learning_rate": 3.48784355179704e-05, + "loss": 0.1171, + "step": 5728 + }, + { + "epoch": 6.06, + "learning_rate": 3.487315010570825e-05, + "loss": 0.0842, + "step": 5730 + }, + { + "epoch": 6.06, + "learning_rate": 3.486786469344609e-05, + "loss": 0.2574, + "step": 5732 + }, + { + "epoch": 6.06, + "learning_rate": 3.4862579281183933e-05, + "loss": 0.0996, + "step": 5734 + }, + { + "epoch": 6.06, + "learning_rate": 3.485729386892177e-05, + "loss": 0.0907, + "step": 5736 + }, + { + "epoch": 6.07, + "learning_rate": 3.4852008456659626e-05, + "loss": 0.1762, + "step": 5738 + }, + { + "epoch": 6.07, + "learning_rate": 3.4846723044397465e-05, + "loss": 0.116, + "step": 5740 + }, + { + "epoch": 6.07, + "learning_rate": 3.484143763213531e-05, + "loss": 0.3131, + "step": 5742 + }, + { + "epoch": 6.07, + "learning_rate": 3.483615221987315e-05, + "loss": 0.2087, + "step": 5744 + }, + { + "epoch": 6.07, + "learning_rate": 3.4830866807610996e-05, + "loss": 0.2606, + "step": 5746 + }, + { + "epoch": 6.08, + "learning_rate": 3.4825581395348836e-05, + "loss": 0.1987, + "step": 5748 + }, + { + "epoch": 6.08, + "learning_rate": 3.482029598308668e-05, + "loss": 0.3575, + "step": 5750 + }, + { + "epoch": 6.08, + "learning_rate": 3.481501057082453e-05, + "loss": 0.2289, + "step": 5752 + }, + { + "epoch": 6.08, + "learning_rate": 3.480972515856237e-05, + "loss": 0.179, + "step": 5754 + }, + { + "epoch": 6.08, + "learning_rate": 3.480443974630021e-05, + "loss": 0.1997, + "step": 5756 + }, + { + "epoch": 6.09, + "learning_rate": 3.479915433403806e-05, + "loss": 0.1259, + "step": 5758 + }, + { + "epoch": 6.09, + "learning_rate": 3.47938689217759e-05, + "loss": 0.1962, + "step": 5760 + }, + { + "epoch": 6.09, + "learning_rate": 3.4788583509513744e-05, + "loss": 0.2209, + "step": 5762 + }, + { + "epoch": 6.09, + "learning_rate": 3.478329809725159e-05, + "loss": 0.1538, + "step": 5764 + }, + { + "epoch": 6.1, + "learning_rate": 3.477801268498943e-05, + "loss": 0.2426, + "step": 5766 + }, + { + "epoch": 6.1, + "learning_rate": 3.4772727272727276e-05, + "loss": 0.3101, + "step": 5768 + }, + { + "epoch": 6.1, + "learning_rate": 3.4767441860465115e-05, + "loss": 0.3042, + "step": 5770 + }, + { + "epoch": 6.1, + "learning_rate": 3.476215644820296e-05, + "loss": 0.2477, + "step": 5772 + }, + { + "epoch": 6.1, + "learning_rate": 3.47568710359408e-05, + "loss": 0.1398, + "step": 5774 + }, + { + "epoch": 6.11, + "learning_rate": 3.475158562367865e-05, + "loss": 0.1257, + "step": 5776 + }, + { + "epoch": 6.11, + "learning_rate": 3.474630021141649e-05, + "loss": 0.1497, + "step": 5778 + }, + { + "epoch": 6.11, + "learning_rate": 3.474101479915434e-05, + "loss": 0.1602, + "step": 5780 + }, + { + "epoch": 6.11, + "learning_rate": 3.473572938689218e-05, + "loss": 0.1371, + "step": 5782 + }, + { + "epoch": 6.11, + "learning_rate": 3.4730443974630024e-05, + "loss": 0.1337, + "step": 5784 + }, + { + "epoch": 6.12, + "learning_rate": 3.472515856236786e-05, + "loss": 0.139, + "step": 5786 + }, + { + "epoch": 6.12, + "learning_rate": 3.471987315010571e-05, + "loss": 0.2151, + "step": 5788 + }, + { + "epoch": 6.12, + "learning_rate": 3.471458773784355e-05, + "loss": 0.1326, + "step": 5790 + }, + { + "epoch": 6.12, + "learning_rate": 3.4709302325581395e-05, + "loss": 0.1047, + "step": 5792 + }, + { + "epoch": 6.12, + "learning_rate": 3.470401691331924e-05, + "loss": 0.2743, + "step": 5794 + }, + { + "epoch": 6.13, + "learning_rate": 3.469873150105709e-05, + "loss": 0.1087, + "step": 5796 + }, + { + "epoch": 6.13, + "learning_rate": 3.4693446088794926e-05, + "loss": 0.1329, + "step": 5798 + }, + { + "epoch": 6.13, + "learning_rate": 3.468816067653277e-05, + "loss": 0.0915, + "step": 5800 + }, + { + "epoch": 6.13, + "learning_rate": 3.468287526427061e-05, + "loss": 0.0456, + "step": 5802 + }, + { + "epoch": 6.14, + "learning_rate": 3.467758985200846e-05, + "loss": 0.1955, + "step": 5804 + }, + { + "epoch": 6.14, + "learning_rate": 3.4672304439746304e-05, + "loss": 0.1902, + "step": 5806 + }, + { + "epoch": 6.14, + "learning_rate": 3.466701902748414e-05, + "loss": 0.1203, + "step": 5808 + }, + { + "epoch": 6.14, + "learning_rate": 3.466173361522199e-05, + "loss": 0.1, + "step": 5810 + }, + { + "epoch": 6.14, + "learning_rate": 3.4656448202959835e-05, + "loss": 0.112, + "step": 5812 + }, + { + "epoch": 6.15, + "learning_rate": 3.465116279069768e-05, + "loss": 0.1416, + "step": 5814 + }, + { + "epoch": 6.15, + "learning_rate": 3.464587737843552e-05, + "loss": 0.1604, + "step": 5816 + }, + { + "epoch": 6.15, + "learning_rate": 3.4640591966173366e-05, + "loss": 0.1217, + "step": 5818 + }, + { + "epoch": 6.15, + "learning_rate": 3.4635306553911206e-05, + "loss": 0.2795, + "step": 5820 + }, + { + "epoch": 6.15, + "learning_rate": 3.463002114164905e-05, + "loss": 0.221, + "step": 5822 + }, + { + "epoch": 6.16, + "learning_rate": 3.462473572938689e-05, + "loss": 0.1878, + "step": 5824 + }, + { + "epoch": 6.16, + "learning_rate": 3.461945031712474e-05, + "loss": 0.1025, + "step": 5826 + }, + { + "epoch": 6.16, + "learning_rate": 3.4614164904862576e-05, + "loss": 0.1729, + "step": 5828 + }, + { + "epoch": 6.16, + "learning_rate": 3.460887949260043e-05, + "loss": 0.096, + "step": 5830 + }, + { + "epoch": 6.16, + "learning_rate": 3.460359408033827e-05, + "loss": 0.158, + "step": 5832 + }, + { + "epoch": 6.17, + "learning_rate": 3.4598308668076114e-05, + "loss": 0.1745, + "step": 5834 + }, + { + "epoch": 6.17, + "learning_rate": 3.4593023255813954e-05, + "loss": 0.1705, + "step": 5836 + }, + { + "epoch": 6.17, + "learning_rate": 3.459038054968288e-05, + "loss": 0.3037, + "step": 5838 + }, + { + "epoch": 6.17, + "learning_rate": 3.458509513742072e-05, + "loss": 0.1047, + "step": 5840 + }, + { + "epoch": 6.18, + "learning_rate": 3.457980972515856e-05, + "loss": 0.0526, + "step": 5842 + }, + { + "epoch": 6.18, + "learning_rate": 3.457452431289641e-05, + "loss": 0.2037, + "step": 5844 + }, + { + "epoch": 6.18, + "learning_rate": 3.456923890063425e-05, + "loss": 0.087, + "step": 5846 + }, + { + "epoch": 6.18, + "learning_rate": 3.45639534883721e-05, + "loss": 0.1763, + "step": 5848 + }, + { + "epoch": 6.18, + "learning_rate": 3.455866807610994e-05, + "loss": 0.0715, + "step": 5850 + }, + { + "epoch": 6.19, + "learning_rate": 3.4553382663847786e-05, + "loss": 0.2581, + "step": 5852 + }, + { + "epoch": 6.19, + "learning_rate": 3.4548097251585625e-05, + "loss": 0.1202, + "step": 5854 + }, + { + "epoch": 6.19, + "learning_rate": 3.454281183932347e-05, + "loss": 0.0978, + "step": 5856 + }, + { + "epoch": 6.19, + "learning_rate": 3.453752642706131e-05, + "loss": 0.1894, + "step": 5858 + }, + { + "epoch": 6.19, + "learning_rate": 3.4532241014799156e-05, + "loss": 0.1545, + "step": 5860 + }, + { + "epoch": 6.2, + "learning_rate": 3.4526955602536996e-05, + "loss": 0.1361, + "step": 5862 + }, + { + "epoch": 6.2, + "learning_rate": 3.452167019027484e-05, + "loss": 0.2735, + "step": 5864 + }, + { + "epoch": 6.2, + "learning_rate": 3.451638477801269e-05, + "loss": 0.211, + "step": 5866 + }, + { + "epoch": 6.2, + "learning_rate": 3.4511099365750534e-05, + "loss": 0.107, + "step": 5868 + }, + { + "epoch": 6.21, + "learning_rate": 3.450581395348837e-05, + "loss": 0.1412, + "step": 5870 + }, + { + "epoch": 6.21, + "learning_rate": 3.450052854122622e-05, + "loss": 0.2465, + "step": 5872 + }, + { + "epoch": 6.21, + "learning_rate": 3.449524312896406e-05, + "loss": 0.1566, + "step": 5874 + }, + { + "epoch": 6.21, + "learning_rate": 3.4489957716701904e-05, + "loss": 0.1874, + "step": 5876 + }, + { + "epoch": 6.21, + "learning_rate": 3.4484672304439744e-05, + "loss": 0.2111, + "step": 5878 + }, + { + "epoch": 6.22, + "learning_rate": 3.447938689217759e-05, + "loss": 0.1737, + "step": 5880 + }, + { + "epoch": 6.22, + "learning_rate": 3.4474101479915436e-05, + "loss": 0.0879, + "step": 5882 + }, + { + "epoch": 6.22, + "learning_rate": 3.446881606765328e-05, + "loss": 0.158, + "step": 5884 + }, + { + "epoch": 6.22, + "learning_rate": 3.446353065539112e-05, + "loss": 0.2724, + "step": 5886 + }, + { + "epoch": 6.22, + "learning_rate": 3.445824524312897e-05, + "loss": 0.1235, + "step": 5888 + }, + { + "epoch": 6.23, + "learning_rate": 3.445295983086681e-05, + "loss": 0.2047, + "step": 5890 + }, + { + "epoch": 6.23, + "learning_rate": 3.444767441860465e-05, + "loss": 0.2076, + "step": 5892 + }, + { + "epoch": 6.23, + "learning_rate": 3.44423890063425e-05, + "loss": 0.1676, + "step": 5894 + }, + { + "epoch": 6.23, + "learning_rate": 3.443710359408034e-05, + "loss": 0.1896, + "step": 5896 + }, + { + "epoch": 6.23, + "learning_rate": 3.4431818181818184e-05, + "loss": 0.145, + "step": 5898 + }, + { + "epoch": 6.24, + "learning_rate": 3.442653276955602e-05, + "loss": 0.0989, + "step": 5900 + }, + { + "epoch": 6.24, + "learning_rate": 3.4421247357293876e-05, + "loss": 0.1219, + "step": 5902 + }, + { + "epoch": 6.24, + "learning_rate": 3.4415961945031715e-05, + "loss": 0.1201, + "step": 5904 + }, + { + "epoch": 6.24, + "learning_rate": 3.441067653276956e-05, + "loss": 0.2582, + "step": 5906 + }, + { + "epoch": 6.25, + "learning_rate": 3.44053911205074e-05, + "loss": 0.1738, + "step": 5908 + }, + { + "epoch": 6.25, + "learning_rate": 3.440010570824525e-05, + "loss": 0.2368, + "step": 5910 + }, + { + "epoch": 6.25, + "learning_rate": 3.4394820295983086e-05, + "loss": 0.1735, + "step": 5912 + }, + { + "epoch": 6.25, + "learning_rate": 3.438953488372093e-05, + "loss": 0.1939, + "step": 5914 + }, + { + "epoch": 6.25, + "learning_rate": 3.438424947145877e-05, + "loss": 0.1386, + "step": 5916 + }, + { + "epoch": 6.26, + "learning_rate": 3.437896405919662e-05, + "loss": 0.0951, + "step": 5918 + }, + { + "epoch": 6.26, + "learning_rate": 3.4373678646934464e-05, + "loss": 0.061, + "step": 5920 + }, + { + "epoch": 6.26, + "learning_rate": 3.436839323467231e-05, + "loss": 0.1492, + "step": 5922 + }, + { + "epoch": 6.26, + "learning_rate": 3.436310782241015e-05, + "loss": 0.1947, + "step": 5924 + }, + { + "epoch": 6.26, + "learning_rate": 3.4357822410147995e-05, + "loss": 0.1067, + "step": 5926 + }, + { + "epoch": 6.27, + "learning_rate": 3.4352536997885834e-05, + "loss": 0.1853, + "step": 5928 + }, + { + "epoch": 6.27, + "learning_rate": 3.434725158562368e-05, + "loss": 0.2154, + "step": 5930 + }, + { + "epoch": 6.27, + "learning_rate": 3.434196617336152e-05, + "loss": 0.2419, + "step": 5932 + }, + { + "epoch": 6.27, + "learning_rate": 3.4336680761099366e-05, + "loss": 0.184, + "step": 5934 + }, + { + "epoch": 6.27, + "learning_rate": 3.433139534883721e-05, + "loss": 0.1954, + "step": 5936 + }, + { + "epoch": 6.28, + "learning_rate": 3.432610993657506e-05, + "loss": 0.2227, + "step": 5938 + }, + { + "epoch": 6.28, + "learning_rate": 3.43208245243129e-05, + "loss": 0.2063, + "step": 5940 + }, + { + "epoch": 6.28, + "learning_rate": 3.431553911205074e-05, + "loss": 0.1288, + "step": 5942 + }, + { + "epoch": 6.28, + "learning_rate": 3.431025369978859e-05, + "loss": 0.1622, + "step": 5944 + }, + { + "epoch": 6.29, + "learning_rate": 3.430496828752643e-05, + "loss": 0.1974, + "step": 5946 + }, + { + "epoch": 6.29, + "learning_rate": 3.4299682875264275e-05, + "loss": 0.1642, + "step": 5948 + }, + { + "epoch": 6.29, + "learning_rate": 3.4294397463002114e-05, + "loss": 0.1333, + "step": 5950 + }, + { + "epoch": 6.29, + "learning_rate": 3.428911205073996e-05, + "loss": 0.1552, + "step": 5952 + }, + { + "epoch": 6.29, + "learning_rate": 3.42838266384778e-05, + "loss": 0.165, + "step": 5954 + }, + { + "epoch": 6.3, + "learning_rate": 3.427854122621565e-05, + "loss": 0.1734, + "step": 5956 + }, + { + "epoch": 6.3, + "learning_rate": 3.427325581395349e-05, + "loss": 0.1146, + "step": 5958 + }, + { + "epoch": 6.3, + "learning_rate": 3.426797040169134e-05, + "loss": 0.1115, + "step": 5960 + }, + { + "epoch": 6.3, + "learning_rate": 3.426268498942918e-05, + "loss": 0.2913, + "step": 5962 + }, + { + "epoch": 6.3, + "learning_rate": 3.425739957716702e-05, + "loss": 0.1059, + "step": 5964 + }, + { + "epoch": 6.31, + "learning_rate": 3.425211416490486e-05, + "loss": 0.1041, + "step": 5966 + }, + { + "epoch": 6.31, + "learning_rate": 3.424682875264271e-05, + "loss": 0.1611, + "step": 5968 + }, + { + "epoch": 6.31, + "learning_rate": 3.424154334038055e-05, + "loss": 0.0764, + "step": 5970 + }, + { + "epoch": 6.31, + "learning_rate": 3.423625792811839e-05, + "loss": 0.1817, + "step": 5972 + }, + { + "epoch": 6.32, + "learning_rate": 3.423097251585624e-05, + "loss": 0.1004, + "step": 5974 + }, + { + "epoch": 6.32, + "learning_rate": 3.4225687103594085e-05, + "loss": 0.0877, + "step": 5976 + }, + { + "epoch": 6.32, + "learning_rate": 3.4220401691331925e-05, + "loss": 0.1462, + "step": 5978 + }, + { + "epoch": 6.32, + "learning_rate": 3.421511627906977e-05, + "loss": 0.1329, + "step": 5980 + }, + { + "epoch": 6.32, + "learning_rate": 3.420983086680761e-05, + "loss": 0.1345, + "step": 5982 + }, + { + "epoch": 6.33, + "learning_rate": 3.4204545454545456e-05, + "loss": 0.1035, + "step": 5984 + }, + { + "epoch": 6.33, + "learning_rate": 3.4199260042283295e-05, + "loss": 0.2515, + "step": 5986 + }, + { + "epoch": 6.33, + "learning_rate": 3.419397463002114e-05, + "loss": 0.1297, + "step": 5988 + }, + { + "epoch": 6.33, + "learning_rate": 3.418868921775899e-05, + "loss": 0.2148, + "step": 5990 + }, + { + "epoch": 6.33, + "learning_rate": 3.4183403805496834e-05, + "loss": 0.1103, + "step": 5992 + }, + { + "epoch": 6.34, + "learning_rate": 3.417811839323467e-05, + "loss": 0.114, + "step": 5994 + }, + { + "epoch": 6.34, + "learning_rate": 3.417283298097252e-05, + "loss": 0.1915, + "step": 5996 + }, + { + "epoch": 6.34, + "learning_rate": 3.4167547568710365e-05, + "loss": 0.1484, + "step": 5998 + }, + { + "epoch": 6.34, + "learning_rate": 3.4162262156448204e-05, + "loss": 0.1164, + "step": 6000 + }, + { + "epoch": 6.34, + "eval_cer": 0.025876318039327443, + "eval_loss": 0.5018019676208496, + "eval_runtime": 128.3817, + "eval_samples_per_second": 6.551, + "eval_steps_per_second": 0.826, + "step": 6000 + }, + { + "epoch": 6.34, + "learning_rate": 3.415697674418605e-05, + "loss": 0.0918, + "step": 6002 + }, + { + "epoch": 6.35, + "learning_rate": 3.415169133192389e-05, + "loss": 0.1645, + "step": 6004 + }, + { + "epoch": 6.35, + "learning_rate": 3.4146405919661736e-05, + "loss": 0.0649, + "step": 6006 + }, + { + "epoch": 6.35, + "learning_rate": 3.4141120507399575e-05, + "loss": 0.158, + "step": 6008 + }, + { + "epoch": 6.35, + "learning_rate": 3.413583509513743e-05, + "loss": 0.1728, + "step": 6010 + }, + { + "epoch": 6.36, + "learning_rate": 3.413054968287527e-05, + "loss": 0.2382, + "step": 6012 + }, + { + "epoch": 6.36, + "learning_rate": 3.412526427061311e-05, + "loss": 0.1304, + "step": 6014 + }, + { + "epoch": 6.36, + "learning_rate": 3.411997885835095e-05, + "loss": 0.1578, + "step": 6016 + }, + { + "epoch": 6.36, + "learning_rate": 3.41146934460888e-05, + "loss": 0.1018, + "step": 6018 + }, + { + "epoch": 6.36, + "learning_rate": 3.410940803382664e-05, + "loss": 0.1749, + "step": 6020 + }, + { + "epoch": 6.37, + "learning_rate": 3.4104122621564484e-05, + "loss": 0.0877, + "step": 6022 + }, + { + "epoch": 6.37, + "learning_rate": 3.409883720930232e-05, + "loss": 0.1199, + "step": 6024 + }, + { + "epoch": 6.37, + "learning_rate": 3.409355179704017e-05, + "loss": 0.1858, + "step": 6026 + }, + { + "epoch": 6.37, + "learning_rate": 3.4088266384778015e-05, + "loss": 0.1073, + "step": 6028 + }, + { + "epoch": 6.37, + "learning_rate": 3.408298097251586e-05, + "loss": 0.0823, + "step": 6030 + }, + { + "epoch": 6.38, + "learning_rate": 3.40776955602537e-05, + "loss": 0.1692, + "step": 6032 + }, + { + "epoch": 6.38, + "learning_rate": 3.407241014799155e-05, + "loss": 0.1037, + "step": 6034 + }, + { + "epoch": 6.38, + "learning_rate": 3.4067124735729386e-05, + "loss": 0.0908, + "step": 6036 + }, + { + "epoch": 6.38, + "learning_rate": 3.406183932346723e-05, + "loss": 0.1123, + "step": 6038 + }, + { + "epoch": 6.38, + "learning_rate": 3.405655391120507e-05, + "loss": 0.1272, + "step": 6040 + }, + { + "epoch": 6.39, + "learning_rate": 3.405126849894292e-05, + "loss": 0.1125, + "step": 6042 + }, + { + "epoch": 6.39, + "learning_rate": 3.4045983086680763e-05, + "loss": 0.1315, + "step": 6044 + }, + { + "epoch": 6.39, + "learning_rate": 3.404069767441861e-05, + "loss": 0.2304, + "step": 6046 + }, + { + "epoch": 6.39, + "learning_rate": 3.403541226215645e-05, + "loss": 0.108, + "step": 6048 + }, + { + "epoch": 6.4, + "learning_rate": 3.4030126849894295e-05, + "loss": 0.2105, + "step": 6050 + }, + { + "epoch": 6.4, + "learning_rate": 3.402484143763214e-05, + "loss": 0.1575, + "step": 6052 + }, + { + "epoch": 6.4, + "learning_rate": 3.401955602536998e-05, + "loss": 0.1972, + "step": 6054 + }, + { + "epoch": 6.4, + "learning_rate": 3.4014270613107826e-05, + "loss": 0.1202, + "step": 6056 + }, + { + "epoch": 6.4, + "learning_rate": 3.4008985200845665e-05, + "loss": 0.1426, + "step": 6058 + }, + { + "epoch": 6.41, + "learning_rate": 3.400369978858351e-05, + "loss": 0.1163, + "step": 6060 + }, + { + "epoch": 6.41, + "learning_rate": 3.399841437632135e-05, + "loss": 0.2422, + "step": 6062 + }, + { + "epoch": 6.41, + "learning_rate": 3.3993128964059204e-05, + "loss": 0.1269, + "step": 6064 + }, + { + "epoch": 6.41, + "learning_rate": 3.398784355179704e-05, + "loss": 0.1424, + "step": 6066 + }, + { + "epoch": 6.41, + "learning_rate": 3.398255813953489e-05, + "loss": 0.1831, + "step": 6068 + }, + { + "epoch": 6.42, + "learning_rate": 3.397727272727273e-05, + "loss": 0.2557, + "step": 6070 + }, + { + "epoch": 6.42, + "learning_rate": 3.3971987315010574e-05, + "loss": 0.3715, + "step": 6072 + }, + { + "epoch": 6.42, + "learning_rate": 3.3966701902748414e-05, + "loss": 0.3253, + "step": 6074 + }, + { + "epoch": 6.42, + "learning_rate": 3.396141649048626e-05, + "loss": 0.0648, + "step": 6076 + }, + { + "epoch": 6.42, + "learning_rate": 3.39561310782241e-05, + "loss": 0.2528, + "step": 6078 + }, + { + "epoch": 6.43, + "learning_rate": 3.3950845665961945e-05, + "loss": 0.1489, + "step": 6080 + }, + { + "epoch": 6.43, + "learning_rate": 3.394556025369979e-05, + "loss": 0.1198, + "step": 6082 + }, + { + "epoch": 6.43, + "learning_rate": 3.394027484143764e-05, + "loss": 0.1566, + "step": 6084 + }, + { + "epoch": 6.43, + "learning_rate": 3.3934989429175476e-05, + "loss": 0.2284, + "step": 6086 + }, + { + "epoch": 6.44, + "learning_rate": 3.392970401691332e-05, + "loss": 0.1358, + "step": 6088 + }, + { + "epoch": 6.44, + "learning_rate": 3.392441860465116e-05, + "loss": 0.0754, + "step": 6090 + }, + { + "epoch": 6.44, + "learning_rate": 3.391913319238901e-05, + "loss": 0.0822, + "step": 6092 + }, + { + "epoch": 6.44, + "learning_rate": 3.3913847780126854e-05, + "loss": 0.1326, + "step": 6094 + }, + { + "epoch": 6.44, + "learning_rate": 3.390856236786469e-05, + "loss": 0.1727, + "step": 6096 + }, + { + "epoch": 6.45, + "learning_rate": 3.390327695560254e-05, + "loss": 0.1164, + "step": 6098 + }, + { + "epoch": 6.45, + "learning_rate": 3.389799154334038e-05, + "loss": 0.3431, + "step": 6100 + }, + { + "epoch": 6.45, + "learning_rate": 3.3892706131078225e-05, + "loss": 0.1192, + "step": 6102 + }, + { + "epoch": 6.45, + "learning_rate": 3.388742071881607e-05, + "loss": 0.0976, + "step": 6104 + }, + { + "epoch": 6.45, + "learning_rate": 3.388213530655392e-05, + "loss": 0.1534, + "step": 6106 + }, + { + "epoch": 6.46, + "learning_rate": 3.3876849894291756e-05, + "loss": 0.1809, + "step": 6108 + }, + { + "epoch": 6.46, + "learning_rate": 3.38715644820296e-05, + "loss": 0.1546, + "step": 6110 + }, + { + "epoch": 6.46, + "learning_rate": 3.386627906976744e-05, + "loss": 0.0665, + "step": 6112 + }, + { + "epoch": 6.46, + "learning_rate": 3.386099365750529e-05, + "loss": 0.1246, + "step": 6114 + }, + { + "epoch": 6.47, + "learning_rate": 3.385570824524313e-05, + "loss": 0.2174, + "step": 6116 + }, + { + "epoch": 6.47, + "learning_rate": 3.385042283298097e-05, + "loss": 0.1845, + "step": 6118 + }, + { + "epoch": 6.47, + "learning_rate": 3.384513742071882e-05, + "loss": 0.1134, + "step": 6120 + }, + { + "epoch": 6.47, + "learning_rate": 3.3839852008456665e-05, + "loss": 0.2435, + "step": 6122 + }, + { + "epoch": 6.47, + "learning_rate": 3.3834566596194504e-05, + "loss": 0.1558, + "step": 6124 + }, + { + "epoch": 6.48, + "learning_rate": 3.382928118393235e-05, + "loss": 0.0459, + "step": 6126 + }, + { + "epoch": 6.48, + "learning_rate": 3.382399577167019e-05, + "loss": 0.1331, + "step": 6128 + }, + { + "epoch": 6.48, + "learning_rate": 3.3818710359408036e-05, + "loss": 0.1732, + "step": 6130 + }, + { + "epoch": 6.48, + "learning_rate": 3.3813424947145875e-05, + "loss": 0.1099, + "step": 6132 + }, + { + "epoch": 6.48, + "learning_rate": 3.380813953488372e-05, + "loss": 0.2, + "step": 6134 + }, + { + "epoch": 6.49, + "learning_rate": 3.380285412262156e-05, + "loss": 0.1682, + "step": 6136 + }, + { + "epoch": 6.49, + "learning_rate": 3.379756871035941e-05, + "loss": 0.1874, + "step": 6138 + }, + { + "epoch": 6.49, + "learning_rate": 3.379228329809725e-05, + "loss": 0.1585, + "step": 6140 + }, + { + "epoch": 6.49, + "learning_rate": 3.37869978858351e-05, + "loss": 0.2005, + "step": 6142 + }, + { + "epoch": 6.49, + "learning_rate": 3.378171247357294e-05, + "loss": 0.1283, + "step": 6144 + }, + { + "epoch": 6.5, + "learning_rate": 3.3776427061310784e-05, + "loss": 0.0849, + "step": 6146 + }, + { + "epoch": 6.5, + "learning_rate": 3.377114164904863e-05, + "loss": 0.1139, + "step": 6148 + }, + { + "epoch": 6.5, + "learning_rate": 3.376585623678647e-05, + "loss": 0.2118, + "step": 6150 + }, + { + "epoch": 6.5, + "learning_rate": 3.3760570824524315e-05, + "loss": 0.2581, + "step": 6152 + }, + { + "epoch": 6.51, + "learning_rate": 3.3755285412262154e-05, + "loss": 0.1161, + "step": 6154 + }, + { + "epoch": 6.51, + "learning_rate": 3.375000000000001e-05, + "loss": 0.1572, + "step": 6156 + }, + { + "epoch": 6.51, + "learning_rate": 3.3744714587737846e-05, + "loss": 0.1451, + "step": 6158 + }, + { + "epoch": 6.51, + "learning_rate": 3.373942917547569e-05, + "loss": 0.1042, + "step": 6160 + }, + { + "epoch": 6.51, + "learning_rate": 3.373414376321353e-05, + "loss": 0.143, + "step": 6162 + }, + { + "epoch": 6.52, + "learning_rate": 3.372885835095138e-05, + "loss": 0.0863, + "step": 6164 + }, + { + "epoch": 6.52, + "learning_rate": 3.372357293868922e-05, + "loss": 0.1268, + "step": 6166 + }, + { + "epoch": 6.52, + "learning_rate": 3.371828752642706e-05, + "loss": 0.1119, + "step": 6168 + }, + { + "epoch": 6.52, + "learning_rate": 3.37130021141649e-05, + "loss": 0.1199, + "step": 6170 + }, + { + "epoch": 6.52, + "learning_rate": 3.370771670190275e-05, + "loss": 0.1628, + "step": 6172 + }, + { + "epoch": 6.53, + "learning_rate": 3.3702431289640595e-05, + "loss": 0.2543, + "step": 6174 + }, + { + "epoch": 6.53, + "learning_rate": 3.369714587737844e-05, + "loss": 0.1345, + "step": 6176 + }, + { + "epoch": 6.53, + "learning_rate": 3.369186046511628e-05, + "loss": 0.0945, + "step": 6178 + }, + { + "epoch": 6.53, + "learning_rate": 3.3686575052854126e-05, + "loss": 0.112, + "step": 6180 + }, + { + "epoch": 6.53, + "learning_rate": 3.3681289640591965e-05, + "loss": 0.1513, + "step": 6182 + }, + { + "epoch": 6.54, + "learning_rate": 3.367600422832981e-05, + "loss": 0.1097, + "step": 6184 + }, + { + "epoch": 6.54, + "learning_rate": 3.367071881606765e-05, + "loss": 0.0726, + "step": 6186 + }, + { + "epoch": 6.54, + "learning_rate": 3.36654334038055e-05, + "loss": 0.2753, + "step": 6188 + }, + { + "epoch": 6.54, + "learning_rate": 3.3660147991543336e-05, + "loss": 0.3024, + "step": 6190 + }, + { + "epoch": 6.55, + "learning_rate": 3.365486257928119e-05, + "loss": 0.4197, + "step": 6192 + }, + { + "epoch": 6.55, + "learning_rate": 3.364957716701903e-05, + "loss": 0.093, + "step": 6194 + }, + { + "epoch": 6.55, + "learning_rate": 3.3644291754756874e-05, + "loss": 0.1188, + "step": 6196 + }, + { + "epoch": 6.55, + "learning_rate": 3.3639006342494713e-05, + "loss": 0.2256, + "step": 6198 + }, + { + "epoch": 6.55, + "learning_rate": 3.363372093023256e-05, + "loss": 0.1508, + "step": 6200 + }, + { + "epoch": 6.56, + "learning_rate": 3.3628435517970406e-05, + "loss": 0.1873, + "step": 6202 + }, + { + "epoch": 6.56, + "learning_rate": 3.3623150105708245e-05, + "loss": 0.0964, + "step": 6204 + }, + { + "epoch": 6.56, + "learning_rate": 3.361786469344609e-05, + "loss": 0.2517, + "step": 6206 + }, + { + "epoch": 6.56, + "learning_rate": 3.361257928118393e-05, + "loss": 0.2191, + "step": 6208 + }, + { + "epoch": 6.56, + "learning_rate": 3.360729386892178e-05, + "loss": 0.1957, + "step": 6210 + }, + { + "epoch": 6.57, + "learning_rate": 3.360200845665962e-05, + "loss": 0.226, + "step": 6212 + }, + { + "epoch": 6.57, + "learning_rate": 3.359672304439747e-05, + "loss": 0.1739, + "step": 6214 + }, + { + "epoch": 6.57, + "learning_rate": 3.359143763213531e-05, + "loss": 0.1438, + "step": 6216 + }, + { + "epoch": 6.57, + "learning_rate": 3.3586152219873154e-05, + "loss": 0.0768, + "step": 6218 + }, + { + "epoch": 6.58, + "learning_rate": 3.358086680761099e-05, + "loss": 0.1309, + "step": 6220 + }, + { + "epoch": 6.58, + "learning_rate": 3.357558139534884e-05, + "loss": 0.1162, + "step": 6222 + }, + { + "epoch": 6.58, + "learning_rate": 3.357029598308668e-05, + "loss": 0.1296, + "step": 6224 + }, + { + "epoch": 6.58, + "learning_rate": 3.3565010570824524e-05, + "loss": 0.1339, + "step": 6226 + }, + { + "epoch": 6.58, + "learning_rate": 3.355972515856237e-05, + "loss": 0.1097, + "step": 6228 + }, + { + "epoch": 6.59, + "learning_rate": 3.3554439746300217e-05, + "loss": 0.1484, + "step": 6230 + }, + { + "epoch": 6.59, + "learning_rate": 3.3549154334038056e-05, + "loss": 0.1588, + "step": 6232 + }, + { + "epoch": 6.59, + "learning_rate": 3.35438689217759e-05, + "loss": 0.3276, + "step": 6234 + }, + { + "epoch": 6.59, + "learning_rate": 3.353858350951374e-05, + "loss": 0.0764, + "step": 6236 + }, + { + "epoch": 6.59, + "learning_rate": 3.353329809725159e-05, + "loss": 0.1882, + "step": 6238 + }, + { + "epoch": 6.6, + "learning_rate": 3.3528012684989427e-05, + "loss": 0.3437, + "step": 6240 + }, + { + "epoch": 6.6, + "learning_rate": 3.352272727272727e-05, + "loss": 0.2354, + "step": 6242 + }, + { + "epoch": 6.6, + "learning_rate": 3.351744186046512e-05, + "loss": 0.1915, + "step": 6244 + }, + { + "epoch": 6.6, + "learning_rate": 3.3512156448202965e-05, + "loss": 0.1843, + "step": 6246 + }, + { + "epoch": 6.6, + "learning_rate": 3.3506871035940804e-05, + "loss": 0.0724, + "step": 6248 + }, + { + "epoch": 6.61, + "learning_rate": 3.350158562367865e-05, + "loss": 0.141, + "step": 6250 + }, + { + "epoch": 6.61, + "learning_rate": 3.349630021141649e-05, + "loss": 0.1799, + "step": 6252 + }, + { + "epoch": 6.61, + "learning_rate": 3.3491014799154335e-05, + "loss": 0.1416, + "step": 6254 + }, + { + "epoch": 6.61, + "learning_rate": 3.348572938689218e-05, + "loss": 0.1412, + "step": 6256 + }, + { + "epoch": 6.62, + "learning_rate": 3.348044397463002e-05, + "loss": 0.1163, + "step": 6258 + }, + { + "epoch": 6.62, + "learning_rate": 3.347515856236787e-05, + "loss": 0.1239, + "step": 6260 + }, + { + "epoch": 6.62, + "learning_rate": 3.3469873150105706e-05, + "loss": 0.2748, + "step": 6262 + }, + { + "epoch": 6.62, + "learning_rate": 3.346458773784356e-05, + "loss": 0.13, + "step": 6264 + }, + { + "epoch": 6.62, + "learning_rate": 3.34593023255814e-05, + "loss": 0.1479, + "step": 6266 + }, + { + "epoch": 6.63, + "learning_rate": 3.3454016913319244e-05, + "loss": 0.162, + "step": 6268 + }, + { + "epoch": 6.63, + "learning_rate": 3.3448731501057084e-05, + "loss": 0.1834, + "step": 6270 + }, + { + "epoch": 6.63, + "learning_rate": 3.344344608879493e-05, + "loss": 0.1042, + "step": 6272 + }, + { + "epoch": 6.63, + "learning_rate": 3.343816067653277e-05, + "loss": 0.1347, + "step": 6274 + }, + { + "epoch": 6.63, + "learning_rate": 3.3432875264270615e-05, + "loss": 0.2021, + "step": 6276 + }, + { + "epoch": 6.64, + "learning_rate": 3.3427589852008454e-05, + "loss": 0.1335, + "step": 6278 + }, + { + "epoch": 6.64, + "learning_rate": 3.34223044397463e-05, + "loss": 0.1198, + "step": 6280 + }, + { + "epoch": 6.64, + "learning_rate": 3.3417019027484146e-05, + "loss": 0.1227, + "step": 6282 + }, + { + "epoch": 6.64, + "learning_rate": 3.341173361522199e-05, + "loss": 0.227, + "step": 6284 + }, + { + "epoch": 6.64, + "learning_rate": 3.340644820295983e-05, + "loss": 0.1764, + "step": 6286 + }, + { + "epoch": 6.65, + "learning_rate": 3.340116279069768e-05, + "loss": 0.2143, + "step": 6288 + }, + { + "epoch": 6.65, + "learning_rate": 3.339587737843552e-05, + "loss": 0.1631, + "step": 6290 + }, + { + "epoch": 6.65, + "learning_rate": 3.339059196617336e-05, + "loss": 0.3019, + "step": 6292 + }, + { + "epoch": 6.65, + "learning_rate": 3.33853065539112e-05, + "loss": 0.1745, + "step": 6294 + }, + { + "epoch": 6.66, + "learning_rate": 3.338002114164905e-05, + "loss": 0.189, + "step": 6296 + }, + { + "epoch": 6.66, + "learning_rate": 3.3374735729386894e-05, + "loss": 0.1097, + "step": 6298 + }, + { + "epoch": 6.66, + "learning_rate": 3.336945031712474e-05, + "loss": 0.1263, + "step": 6300 + }, + { + "epoch": 6.66, + "learning_rate": 3.336416490486258e-05, + "loss": 0.1255, + "step": 6302 + }, + { + "epoch": 6.66, + "learning_rate": 3.3358879492600426e-05, + "loss": 0.1996, + "step": 6304 + }, + { + "epoch": 6.67, + "learning_rate": 3.3353594080338265e-05, + "loss": 0.2422, + "step": 6306 + }, + { + "epoch": 6.67, + "learning_rate": 3.334830866807611e-05, + "loss": 0.111, + "step": 6308 + }, + { + "epoch": 6.67, + "learning_rate": 3.334302325581396e-05, + "loss": 0.1312, + "step": 6310 + }, + { + "epoch": 6.67, + "learning_rate": 3.3337737843551797e-05, + "loss": 0.2097, + "step": 6312 + }, + { + "epoch": 6.67, + "learning_rate": 3.333245243128964e-05, + "loss": 0.2236, + "step": 6314 + }, + { + "epoch": 6.68, + "learning_rate": 3.332716701902748e-05, + "loss": 0.2479, + "step": 6316 + }, + { + "epoch": 6.68, + "learning_rate": 3.3321881606765335e-05, + "loss": 0.1397, + "step": 6318 + }, + { + "epoch": 6.68, + "learning_rate": 3.3316596194503174e-05, + "loss": 0.1172, + "step": 6320 + }, + { + "epoch": 6.68, + "learning_rate": 3.331131078224102e-05, + "loss": 0.1166, + "step": 6322 + }, + { + "epoch": 6.68, + "learning_rate": 3.330602536997886e-05, + "loss": 0.1666, + "step": 6324 + }, + { + "epoch": 6.69, + "learning_rate": 3.3300739957716705e-05, + "loss": 0.1743, + "step": 6326 + }, + { + "epoch": 6.69, + "learning_rate": 3.3295454545454545e-05, + "loss": 0.1095, + "step": 6328 + }, + { + "epoch": 6.69, + "learning_rate": 3.329016913319239e-05, + "loss": 0.1689, + "step": 6330 + }, + { + "epoch": 6.69, + "learning_rate": 3.328488372093023e-05, + "loss": 0.1888, + "step": 6332 + }, + { + "epoch": 6.7, + "learning_rate": 3.3279598308668076e-05, + "loss": 0.2615, + "step": 6334 + }, + { + "epoch": 6.7, + "learning_rate": 3.327431289640592e-05, + "loss": 0.1308, + "step": 6336 + }, + { + "epoch": 6.7, + "learning_rate": 3.326902748414377e-05, + "loss": 0.142, + "step": 6338 + }, + { + "epoch": 6.7, + "learning_rate": 3.326374207188161e-05, + "loss": 0.3503, + "step": 6340 + }, + { + "epoch": 6.7, + "learning_rate": 3.3258456659619454e-05, + "loss": 0.2034, + "step": 6342 + }, + { + "epoch": 6.71, + "learning_rate": 3.325317124735729e-05, + "loss": 0.1894, + "step": 6344 + }, + { + "epoch": 6.71, + "learning_rate": 3.324788583509514e-05, + "loss": 0.141, + "step": 6346 + }, + { + "epoch": 6.71, + "learning_rate": 3.324260042283298e-05, + "loss": 0.1073, + "step": 6348 + }, + { + "epoch": 6.71, + "learning_rate": 3.3237315010570824e-05, + "loss": 0.1851, + "step": 6350 + }, + { + "epoch": 6.71, + "learning_rate": 3.323202959830867e-05, + "loss": 0.1882, + "step": 6352 + }, + { + "epoch": 6.72, + "learning_rate": 3.3226744186046516e-05, + "loss": 0.1869, + "step": 6354 + }, + { + "epoch": 6.72, + "learning_rate": 3.3221458773784356e-05, + "loss": 0.2836, + "step": 6356 + }, + { + "epoch": 6.72, + "learning_rate": 3.32161733615222e-05, + "loss": 0.1976, + "step": 6358 + }, + { + "epoch": 6.72, + "learning_rate": 3.321088794926005e-05, + "loss": 0.043, + "step": 6360 + }, + { + "epoch": 6.73, + "learning_rate": 3.320560253699789e-05, + "loss": 0.0924, + "step": 6362 + }, + { + "epoch": 6.73, + "learning_rate": 3.320031712473573e-05, + "loss": 0.2507, + "step": 6364 + }, + { + "epoch": 6.73, + "learning_rate": 3.319503171247357e-05, + "loss": 0.232, + "step": 6366 + }, + { + "epoch": 6.73, + "learning_rate": 3.318974630021142e-05, + "loss": 0.2588, + "step": 6368 + }, + { + "epoch": 6.73, + "learning_rate": 3.318446088794926e-05, + "loss": 0.2609, + "step": 6370 + }, + { + "epoch": 6.74, + "learning_rate": 3.317917547568711e-05, + "loss": 0.0929, + "step": 6372 + }, + { + "epoch": 6.74, + "learning_rate": 3.317389006342495e-05, + "loss": 0.3943, + "step": 6374 + }, + { + "epoch": 6.74, + "learning_rate": 3.3168604651162796e-05, + "loss": 0.1662, + "step": 6376 + }, + { + "epoch": 6.74, + "learning_rate": 3.3163319238900635e-05, + "loss": 0.1248, + "step": 6378 + }, + { + "epoch": 6.74, + "learning_rate": 3.315803382663848e-05, + "loss": 0.1521, + "step": 6380 + }, + { + "epoch": 6.75, + "learning_rate": 3.315274841437632e-05, + "loss": 0.1616, + "step": 6382 + }, + { + "epoch": 6.75, + "learning_rate": 3.3147463002114167e-05, + "loss": 0.1287, + "step": 6384 + }, + { + "epoch": 6.75, + "learning_rate": 3.3142177589852006e-05, + "loss": 0.1336, + "step": 6386 + }, + { + "epoch": 6.75, + "learning_rate": 3.313689217758985e-05, + "loss": 0.0605, + "step": 6388 + }, + { + "epoch": 6.75, + "learning_rate": 3.31316067653277e-05, + "loss": 0.1347, + "step": 6390 + }, + { + "epoch": 6.76, + "learning_rate": 3.3126321353065544e-05, + "loss": 0.2142, + "step": 6392 + }, + { + "epoch": 6.76, + "learning_rate": 3.312103594080338e-05, + "loss": 0.1862, + "step": 6394 + }, + { + "epoch": 6.76, + "learning_rate": 3.311575052854123e-05, + "loss": 0.1138, + "step": 6396 + }, + { + "epoch": 6.76, + "learning_rate": 3.311046511627907e-05, + "loss": 0.1733, + "step": 6398 + }, + { + "epoch": 6.77, + "learning_rate": 3.3105179704016915e-05, + "loss": 0.0542, + "step": 6400 + }, + { + "epoch": 6.77, + "learning_rate": 3.3099894291754754e-05, + "loss": 0.1072, + "step": 6402 + }, + { + "epoch": 6.77, + "learning_rate": 3.30946088794926e-05, + "loss": 0.2929, + "step": 6404 + }, + { + "epoch": 6.77, + "learning_rate": 3.3089323467230446e-05, + "loss": 0.3078, + "step": 6406 + }, + { + "epoch": 6.77, + "learning_rate": 3.308403805496829e-05, + "loss": 0.2085, + "step": 6408 + }, + { + "epoch": 6.78, + "learning_rate": 3.307875264270613e-05, + "loss": 0.1387, + "step": 6410 + }, + { + "epoch": 6.78, + "learning_rate": 3.307346723044398e-05, + "loss": 0.1443, + "step": 6412 + }, + { + "epoch": 6.78, + "learning_rate": 3.3068181818181824e-05, + "loss": 0.2217, + "step": 6414 + }, + { + "epoch": 6.78, + "learning_rate": 3.306289640591966e-05, + "loss": 0.1486, + "step": 6416 + }, + { + "epoch": 6.78, + "learning_rate": 3.305761099365751e-05, + "loss": 0.1025, + "step": 6418 + }, + { + "epoch": 6.79, + "learning_rate": 3.305232558139535e-05, + "loss": 0.1191, + "step": 6420 + }, + { + "epoch": 6.79, + "learning_rate": 3.3047040169133194e-05, + "loss": 0.1424, + "step": 6422 + }, + { + "epoch": 6.79, + "learning_rate": 3.3041754756871034e-05, + "loss": 0.2869, + "step": 6424 + }, + { + "epoch": 6.79, + "learning_rate": 3.3036469344608886e-05, + "loss": 0.1608, + "step": 6426 + }, + { + "epoch": 6.79, + "learning_rate": 3.3031183932346726e-05, + "loss": 0.1171, + "step": 6428 + }, + { + "epoch": 6.8, + "learning_rate": 3.302589852008457e-05, + "loss": 0.0738, + "step": 6430 + }, + { + "epoch": 6.8, + "learning_rate": 3.302061310782241e-05, + "loss": 0.2901, + "step": 6432 + }, + { + "epoch": 6.8, + "learning_rate": 3.301532769556026e-05, + "loss": 0.2777, + "step": 6434 + }, + { + "epoch": 6.8, + "learning_rate": 3.3010042283298096e-05, + "loss": 0.1199, + "step": 6436 + }, + { + "epoch": 6.81, + "learning_rate": 3.300475687103594e-05, + "loss": 0.112, + "step": 6438 + }, + { + "epoch": 6.81, + "learning_rate": 3.299947145877378e-05, + "loss": 0.1459, + "step": 6440 + }, + { + "epoch": 6.81, + "learning_rate": 3.299418604651163e-05, + "loss": 0.0753, + "step": 6442 + }, + { + "epoch": 6.81, + "learning_rate": 3.2988900634249474e-05, + "loss": 0.1264, + "step": 6444 + }, + { + "epoch": 6.81, + "learning_rate": 3.298361522198732e-05, + "loss": 0.188, + "step": 6446 + }, + { + "epoch": 6.82, + "learning_rate": 3.297832980972516e-05, + "loss": 0.1609, + "step": 6448 + }, + { + "epoch": 6.82, + "learning_rate": 3.2973044397463005e-05, + "loss": 0.0904, + "step": 6450 + }, + { + "epoch": 6.82, + "learning_rate": 3.2967758985200845e-05, + "loss": 0.1965, + "step": 6452 + }, + { + "epoch": 6.82, + "learning_rate": 3.296247357293869e-05, + "loss": 0.1269, + "step": 6454 + }, + { + "epoch": 6.82, + "learning_rate": 3.295718816067653e-05, + "loss": 0.1102, + "step": 6456 + }, + { + "epoch": 6.83, + "learning_rate": 3.2951902748414376e-05, + "loss": 0.0769, + "step": 6458 + }, + { + "epoch": 6.83, + "learning_rate": 3.294661733615222e-05, + "loss": 0.1696, + "step": 6460 + }, + { + "epoch": 6.83, + "learning_rate": 3.294133192389007e-05, + "loss": 0.1866, + "step": 6462 + }, + { + "epoch": 6.83, + "learning_rate": 3.293604651162791e-05, + "loss": 0.1734, + "step": 6464 + }, + { + "epoch": 6.84, + "learning_rate": 3.293076109936575e-05, + "loss": 0.2824, + "step": 6466 + }, + { + "epoch": 6.84, + "learning_rate": 3.29254756871036e-05, + "loss": 0.1817, + "step": 6468 + }, + { + "epoch": 6.84, + "learning_rate": 3.292019027484144e-05, + "loss": 0.1357, + "step": 6470 + }, + { + "epoch": 6.84, + "learning_rate": 3.2914904862579285e-05, + "loss": 0.2785, + "step": 6472 + }, + { + "epoch": 6.84, + "learning_rate": 3.2909619450317124e-05, + "loss": 0.15, + "step": 6474 + }, + { + "epoch": 6.85, + "learning_rate": 3.290433403805497e-05, + "loss": 0.1504, + "step": 6476 + }, + { + "epoch": 6.85, + "learning_rate": 3.289904862579281e-05, + "loss": 0.3111, + "step": 6478 + }, + { + "epoch": 6.85, + "learning_rate": 3.289376321353066e-05, + "loss": 0.1105, + "step": 6480 + }, + { + "epoch": 6.85, + "learning_rate": 3.28884778012685e-05, + "loss": 0.0804, + "step": 6482 + }, + { + "epoch": 6.85, + "learning_rate": 3.288319238900635e-05, + "loss": 0.23, + "step": 6484 + }, + { + "epoch": 6.86, + "learning_rate": 3.287790697674419e-05, + "loss": 0.1211, + "step": 6486 + }, + { + "epoch": 6.86, + "learning_rate": 3.287262156448203e-05, + "loss": 0.1741, + "step": 6488 + }, + { + "epoch": 6.86, + "learning_rate": 3.286733615221987e-05, + "loss": 0.1537, + "step": 6490 + }, + { + "epoch": 6.86, + "learning_rate": 3.286205073995772e-05, + "loss": 0.1826, + "step": 6492 + }, + { + "epoch": 6.86, + "learning_rate": 3.285676532769556e-05, + "loss": 0.231, + "step": 6494 + }, + { + "epoch": 6.87, + "learning_rate": 3.2851479915433404e-05, + "loss": 0.0593, + "step": 6496 + }, + { + "epoch": 6.87, + "learning_rate": 3.284619450317125e-05, + "loss": 0.1864, + "step": 6498 + }, + { + "epoch": 6.87, + "learning_rate": 3.2840909090909096e-05, + "loss": 0.1587, + "step": 6500 + }, + { + "epoch": 6.87, + "eval_cer": 0.018067825591336562, + "eval_loss": 0.6848969459533691, + "eval_runtime": 130.3001, + "eval_samples_per_second": 6.454, + "eval_steps_per_second": 0.814, + "step": 6500 + }, + { + "epoch": 6.87, + "learning_rate": 3.2835623678646935e-05, + "loss": 0.0839, + "step": 6502 + }, + { + "epoch": 6.88, + "learning_rate": 3.283033826638478e-05, + "loss": 0.2005, + "step": 6504 + }, + { + "epoch": 6.88, + "learning_rate": 3.282505285412262e-05, + "loss": 0.086, + "step": 6506 + }, + { + "epoch": 6.88, + "learning_rate": 3.2819767441860466e-05, + "loss": 0.0649, + "step": 6508 + }, + { + "epoch": 6.88, + "learning_rate": 3.2814482029598306e-05, + "loss": 0.2194, + "step": 6510 + }, + { + "epoch": 6.88, + "learning_rate": 3.280919661733615e-05, + "loss": 0.1811, + "step": 6512 + }, + { + "epoch": 6.89, + "learning_rate": 3.2803911205074e-05, + "loss": 0.1294, + "step": 6514 + }, + { + "epoch": 6.89, + "learning_rate": 3.2798625792811844e-05, + "loss": 0.1745, + "step": 6516 + }, + { + "epoch": 6.89, + "learning_rate": 3.279334038054968e-05, + "loss": 0.2444, + "step": 6518 + }, + { + "epoch": 6.89, + "learning_rate": 3.278805496828753e-05, + "loss": 0.1645, + "step": 6520 + }, + { + "epoch": 6.89, + "learning_rate": 3.2782769556025375e-05, + "loss": 0.3193, + "step": 6522 + }, + { + "epoch": 6.9, + "learning_rate": 3.2777484143763215e-05, + "loss": 0.262, + "step": 6524 + }, + { + "epoch": 6.9, + "learning_rate": 3.277219873150106e-05, + "loss": 0.1257, + "step": 6526 + }, + { + "epoch": 6.9, + "learning_rate": 3.27669133192389e-05, + "loss": 0.2004, + "step": 6528 + }, + { + "epoch": 6.9, + "learning_rate": 3.2761627906976746e-05, + "loss": 0.1237, + "step": 6530 + }, + { + "epoch": 6.9, + "learning_rate": 3.2756342494714585e-05, + "loss": 0.1606, + "step": 6532 + }, + { + "epoch": 6.91, + "learning_rate": 3.275105708245244e-05, + "loss": 0.134, + "step": 6534 + }, + { + "epoch": 6.91, + "learning_rate": 3.274577167019028e-05, + "loss": 0.2359, + "step": 6536 + }, + { + "epoch": 6.91, + "learning_rate": 3.2740486257928123e-05, + "loss": 0.1267, + "step": 6538 + }, + { + "epoch": 6.91, + "learning_rate": 3.273520084566596e-05, + "loss": 0.1402, + "step": 6540 + }, + { + "epoch": 6.92, + "learning_rate": 3.272991543340381e-05, + "loss": 0.2543, + "step": 6542 + }, + { + "epoch": 6.92, + "learning_rate": 3.272463002114165e-05, + "loss": 0.085, + "step": 6544 + }, + { + "epoch": 6.92, + "learning_rate": 3.2719344608879494e-05, + "loss": 0.305, + "step": 6546 + }, + { + "epoch": 6.92, + "learning_rate": 3.2714059196617333e-05, + "loss": 0.1374, + "step": 6548 + }, + { + "epoch": 6.92, + "learning_rate": 3.270877378435518e-05, + "loss": 0.1432, + "step": 6550 + }, + { + "epoch": 6.93, + "learning_rate": 3.2703488372093026e-05, + "loss": 0.1224, + "step": 6552 + }, + { + "epoch": 6.93, + "learning_rate": 3.269820295983087e-05, + "loss": 0.1348, + "step": 6554 + }, + { + "epoch": 6.93, + "learning_rate": 3.269291754756871e-05, + "loss": 0.1378, + "step": 6556 + }, + { + "epoch": 6.93, + "learning_rate": 3.268763213530656e-05, + "loss": 0.1377, + "step": 6558 + }, + { + "epoch": 6.93, + "learning_rate": 3.2682346723044396e-05, + "loss": 0.0546, + "step": 6560 + }, + { + "epoch": 6.94, + "learning_rate": 3.267706131078224e-05, + "loss": 0.1603, + "step": 6562 + }, + { + "epoch": 6.94, + "learning_rate": 3.267177589852009e-05, + "loss": 0.1801, + "step": 6564 + }, + { + "epoch": 6.94, + "learning_rate": 3.266649048625793e-05, + "loss": 0.0764, + "step": 6566 + }, + { + "epoch": 6.94, + "learning_rate": 3.2661205073995774e-05, + "loss": 0.1717, + "step": 6568 + }, + { + "epoch": 6.95, + "learning_rate": 3.265591966173362e-05, + "loss": 0.1367, + "step": 6570 + }, + { + "epoch": 6.95, + "learning_rate": 3.265063424947146e-05, + "loss": 0.2169, + "step": 6572 + }, + { + "epoch": 6.95, + "learning_rate": 3.2645348837209305e-05, + "loss": 0.1314, + "step": 6574 + }, + { + "epoch": 6.95, + "learning_rate": 3.264006342494715e-05, + "loss": 0.1091, + "step": 6576 + }, + { + "epoch": 6.95, + "learning_rate": 3.263477801268499e-05, + "loss": 0.129, + "step": 6578 + }, + { + "epoch": 6.96, + "learning_rate": 3.2629492600422836e-05, + "loss": 0.2033, + "step": 6580 + }, + { + "epoch": 6.96, + "learning_rate": 3.2624207188160676e-05, + "loss": 0.1272, + "step": 6582 + }, + { + "epoch": 6.96, + "learning_rate": 3.261892177589852e-05, + "loss": 0.211, + "step": 6584 + }, + { + "epoch": 6.96, + "learning_rate": 3.261363636363636e-05, + "loss": 0.2411, + "step": 6586 + }, + { + "epoch": 6.96, + "learning_rate": 3.2608350951374214e-05, + "loss": 0.1173, + "step": 6588 + }, + { + "epoch": 6.97, + "learning_rate": 3.260306553911205e-05, + "loss": 0.1694, + "step": 6590 + }, + { + "epoch": 6.97, + "learning_rate": 3.25977801268499e-05, + "loss": 0.15, + "step": 6592 + }, + { + "epoch": 6.97, + "learning_rate": 3.259249471458774e-05, + "loss": 0.1222, + "step": 6594 + }, + { + "epoch": 6.97, + "learning_rate": 3.2587209302325585e-05, + "loss": 0.0788, + "step": 6596 + }, + { + "epoch": 6.97, + "learning_rate": 3.2581923890063424e-05, + "loss": 0.1771, + "step": 6598 + }, + { + "epoch": 6.98, + "learning_rate": 3.257663847780127e-05, + "loss": 0.0798, + "step": 6600 + }, + { + "epoch": 6.98, + "learning_rate": 3.257135306553911e-05, + "loss": 0.2348, + "step": 6602 + }, + { + "epoch": 6.98, + "learning_rate": 3.2566067653276955e-05, + "loss": 0.1541, + "step": 6604 + }, + { + "epoch": 6.98, + "learning_rate": 3.25607822410148e-05, + "loss": 0.0848, + "step": 6606 + }, + { + "epoch": 6.99, + "learning_rate": 3.255549682875265e-05, + "loss": 0.0845, + "step": 6608 + }, + { + "epoch": 6.99, + "learning_rate": 3.255021141649049e-05, + "loss": 0.1386, + "step": 6610 + }, + { + "epoch": 6.99, + "learning_rate": 3.254492600422833e-05, + "loss": 0.07, + "step": 6612 + }, + { + "epoch": 6.99, + "learning_rate": 3.253964059196617e-05, + "loss": 0.0768, + "step": 6614 + }, + { + "epoch": 6.99, + "learning_rate": 3.253435517970402e-05, + "loss": 0.1933, + "step": 6616 + }, + { + "epoch": 7.0, + "learning_rate": 3.2529069767441864e-05, + "loss": 0.1392, + "step": 6618 + }, + { + "epoch": 7.0, + "learning_rate": 3.2523784355179703e-05, + "loss": 0.1438, + "step": 6620 + }, + { + "epoch": 7.0, + "learning_rate": 3.251849894291755e-05, + "loss": 0.1893, + "step": 6622 + }, + { + "epoch": 7.0, + "learning_rate": 3.2513213530655396e-05, + "loss": 0.1026, + "step": 6624 + }, + { + "epoch": 7.0, + "learning_rate": 3.250792811839324e-05, + "loss": 0.1147, + "step": 6626 + }, + { + "epoch": 7.01, + "learning_rate": 3.250264270613108e-05, + "loss": 0.0963, + "step": 6628 + }, + { + "epoch": 7.01, + "learning_rate": 3.249735729386893e-05, + "loss": 0.0652, + "step": 6630 + }, + { + "epoch": 7.01, + "learning_rate": 3.2492071881606766e-05, + "loss": 0.0809, + "step": 6632 + }, + { + "epoch": 7.01, + "learning_rate": 3.248678646934461e-05, + "loss": 0.1304, + "step": 6634 + }, + { + "epoch": 7.01, + "learning_rate": 3.248150105708245e-05, + "loss": 0.1327, + "step": 6636 + }, + { + "epoch": 7.02, + "learning_rate": 3.24762156448203e-05, + "loss": 0.1143, + "step": 6638 + }, + { + "epoch": 7.02, + "learning_rate": 3.247093023255814e-05, + "loss": 0.0977, + "step": 6640 + }, + { + "epoch": 7.02, + "learning_rate": 3.246564482029599e-05, + "loss": 0.149, + "step": 6642 + }, + { + "epoch": 7.02, + "learning_rate": 3.246035940803383e-05, + "loss": 0.1369, + "step": 6644 + }, + { + "epoch": 7.03, + "learning_rate": 3.2455073995771675e-05, + "loss": 0.1006, + "step": 6646 + }, + { + "epoch": 7.03, + "learning_rate": 3.2449788583509514e-05, + "loss": 0.1559, + "step": 6648 + }, + { + "epoch": 7.03, + "learning_rate": 3.244450317124736e-05, + "loss": 0.0658, + "step": 6650 + }, + { + "epoch": 7.03, + "learning_rate": 3.24392177589852e-05, + "loss": 0.2514, + "step": 6652 + }, + { + "epoch": 7.03, + "learning_rate": 3.2433932346723046e-05, + "loss": 0.1672, + "step": 6654 + }, + { + "epoch": 7.04, + "learning_rate": 3.2428646934460885e-05, + "loss": 0.1332, + "step": 6656 + }, + { + "epoch": 7.04, + "learning_rate": 3.242336152219873e-05, + "loss": 0.1938, + "step": 6658 + }, + { + "epoch": 7.04, + "learning_rate": 3.241807610993658e-05, + "loss": 0.1068, + "step": 6660 + }, + { + "epoch": 7.04, + "learning_rate": 3.241279069767442e-05, + "loss": 0.1107, + "step": 6662 + }, + { + "epoch": 7.04, + "learning_rate": 3.240750528541226e-05, + "loss": 0.0745, + "step": 6664 + }, + { + "epoch": 7.05, + "learning_rate": 3.240221987315011e-05, + "loss": 0.0882, + "step": 6666 + }, + { + "epoch": 7.05, + "learning_rate": 3.239693446088795e-05, + "loss": 0.0983, + "step": 6668 + }, + { + "epoch": 7.05, + "learning_rate": 3.2391649048625794e-05, + "loss": 0.0925, + "step": 6670 + }, + { + "epoch": 7.05, + "learning_rate": 3.238636363636364e-05, + "loss": 0.1857, + "step": 6672 + }, + { + "epoch": 7.05, + "learning_rate": 3.238107822410148e-05, + "loss": 0.1183, + "step": 6674 + }, + { + "epoch": 7.06, + "learning_rate": 3.2375792811839325e-05, + "loss": 0.067, + "step": 6676 + }, + { + "epoch": 7.06, + "learning_rate": 3.237050739957717e-05, + "loss": 0.1301, + "step": 6678 + }, + { + "epoch": 7.06, + "learning_rate": 3.236522198731502e-05, + "loss": 0.1827, + "step": 6680 + }, + { + "epoch": 7.06, + "learning_rate": 3.235993657505286e-05, + "loss": 0.0783, + "step": 6682 + }, + { + "epoch": 7.07, + "learning_rate": 3.23546511627907e-05, + "loss": 0.0923, + "step": 6684 + }, + { + "epoch": 7.07, + "learning_rate": 3.234936575052854e-05, + "loss": 0.1305, + "step": 6686 + }, + { + "epoch": 7.07, + "learning_rate": 3.234408033826639e-05, + "loss": 0.0894, + "step": 6688 + }, + { + "epoch": 7.07, + "learning_rate": 3.233879492600423e-05, + "loss": 0.1987, + "step": 6690 + }, + { + "epoch": 7.07, + "learning_rate": 3.2333509513742073e-05, + "loss": 0.1331, + "step": 6692 + }, + { + "epoch": 7.08, + "learning_rate": 3.232822410147991e-05, + "loss": 0.1275, + "step": 6694 + }, + { + "epoch": 7.08, + "learning_rate": 3.2322938689217766e-05, + "loss": 0.1744, + "step": 6696 + }, + { + "epoch": 7.08, + "learning_rate": 3.2317653276955605e-05, + "loss": 0.0823, + "step": 6698 + }, + { + "epoch": 7.08, + "learning_rate": 3.231236786469345e-05, + "loss": 0.1182, + "step": 6700 + }, + { + "epoch": 7.08, + "learning_rate": 3.230708245243129e-05, + "loss": 0.1176, + "step": 6702 + }, + { + "epoch": 7.09, + "learning_rate": 3.2301797040169136e-05, + "loss": 0.1704, + "step": 6704 + }, + { + "epoch": 7.09, + "learning_rate": 3.2296511627906976e-05, + "loss": 0.1159, + "step": 6706 + }, + { + "epoch": 7.09, + "learning_rate": 3.229122621564482e-05, + "loss": 0.1607, + "step": 6708 + }, + { + "epoch": 7.09, + "learning_rate": 3.228594080338266e-05, + "loss": 0.0877, + "step": 6710 + }, + { + "epoch": 7.1, + "learning_rate": 3.228065539112051e-05, + "loss": 0.1278, + "step": 6712 + }, + { + "epoch": 7.1, + "learning_rate": 3.227536997885835e-05, + "loss": 0.1083, + "step": 6714 + }, + { + "epoch": 7.1, + "learning_rate": 3.22700845665962e-05, + "loss": 0.1, + "step": 6716 + }, + { + "epoch": 7.1, + "learning_rate": 3.226479915433404e-05, + "loss": 0.1898, + "step": 6718 + }, + { + "epoch": 7.1, + "learning_rate": 3.2259513742071884e-05, + "loss": 0.0798, + "step": 6720 + }, + { + "epoch": 7.11, + "learning_rate": 3.2254228329809724e-05, + "loss": 0.0928, + "step": 6722 + }, + { + "epoch": 7.11, + "learning_rate": 3.224894291754757e-05, + "loss": 0.1268, + "step": 6724 + }, + { + "epoch": 7.11, + "learning_rate": 3.2243657505285416e-05, + "loss": 0.1476, + "step": 6726 + }, + { + "epoch": 7.11, + "learning_rate": 3.2238372093023255e-05, + "loss": 0.0909, + "step": 6728 + }, + { + "epoch": 7.11, + "learning_rate": 3.22330866807611e-05, + "loss": 0.1367, + "step": 6730 + }, + { + "epoch": 7.12, + "learning_rate": 3.222780126849895e-05, + "loss": 0.1678, + "step": 6732 + }, + { + "epoch": 7.12, + "learning_rate": 3.222251585623679e-05, + "loss": 0.0876, + "step": 6734 + }, + { + "epoch": 7.12, + "learning_rate": 3.221723044397463e-05, + "loss": 0.1025, + "step": 6736 + }, + { + "epoch": 7.12, + "learning_rate": 3.221194503171248e-05, + "loss": 0.1406, + "step": 6738 + }, + { + "epoch": 7.12, + "learning_rate": 3.220665961945032e-05, + "loss": 0.1127, + "step": 6740 + }, + { + "epoch": 7.13, + "learning_rate": 3.2201374207188164e-05, + "loss": 0.0851, + "step": 6742 + }, + { + "epoch": 7.13, + "learning_rate": 3.2196088794926e-05, + "loss": 0.1614, + "step": 6744 + }, + { + "epoch": 7.13, + "learning_rate": 3.219080338266385e-05, + "loss": 0.0852, + "step": 6746 + }, + { + "epoch": 7.13, + "learning_rate": 3.218551797040169e-05, + "loss": 0.0843, + "step": 6748 + }, + { + "epoch": 7.14, + "learning_rate": 3.218023255813954e-05, + "loss": 0.1957, + "step": 6750 + }, + { + "epoch": 7.14, + "learning_rate": 3.217494714587738e-05, + "loss": 0.2616, + "step": 6752 + }, + { + "epoch": 7.14, + "learning_rate": 3.216966173361523e-05, + "loss": 0.203, + "step": 6754 + }, + { + "epoch": 7.14, + "learning_rate": 3.2164376321353066e-05, + "loss": 0.115, + "step": 6756 + }, + { + "epoch": 7.14, + "learning_rate": 3.215909090909091e-05, + "loss": 0.1622, + "step": 6758 + }, + { + "epoch": 7.15, + "learning_rate": 3.215380549682875e-05, + "loss": 0.0653, + "step": 6760 + }, + { + "epoch": 7.15, + "learning_rate": 3.21485200845666e-05, + "loss": 0.1484, + "step": 6762 + }, + { + "epoch": 7.15, + "learning_rate": 3.214323467230444e-05, + "loss": 0.2066, + "step": 6764 + }, + { + "epoch": 7.15, + "learning_rate": 3.213794926004228e-05, + "loss": 0.2578, + "step": 6766 + }, + { + "epoch": 7.15, + "learning_rate": 3.213266384778013e-05, + "loss": 0.1562, + "step": 6768 + }, + { + "epoch": 7.16, + "learning_rate": 3.2127378435517975e-05, + "loss": 0.2174, + "step": 6770 + }, + { + "epoch": 7.16, + "learning_rate": 3.2122093023255814e-05, + "loss": 0.0996, + "step": 6772 + }, + { + "epoch": 7.16, + "learning_rate": 3.211680761099366e-05, + "loss": 0.144, + "step": 6774 + }, + { + "epoch": 7.16, + "learning_rate": 3.21115221987315e-05, + "loss": 0.2435, + "step": 6776 + }, + { + "epoch": 7.16, + "learning_rate": 3.2106236786469346e-05, + "loss": 0.1747, + "step": 6778 + }, + { + "epoch": 7.17, + "learning_rate": 3.210095137420719e-05, + "loss": 0.1178, + "step": 6780 + }, + { + "epoch": 7.17, + "learning_rate": 3.209566596194503e-05, + "loss": 0.0761, + "step": 6782 + }, + { + "epoch": 7.17, + "learning_rate": 3.209038054968288e-05, + "loss": 0.1601, + "step": 6784 + }, + { + "epoch": 7.17, + "learning_rate": 3.2085095137420716e-05, + "loss": 0.1881, + "step": 6786 + }, + { + "epoch": 7.18, + "learning_rate": 3.207980972515857e-05, + "loss": 0.0601, + "step": 6788 + }, + { + "epoch": 7.18, + "learning_rate": 3.207452431289641e-05, + "loss": 0.0526, + "step": 6790 + }, + { + "epoch": 7.18, + "learning_rate": 3.2069238900634254e-05, + "loss": 0.1336, + "step": 6792 + }, + { + "epoch": 7.18, + "learning_rate": 3.2063953488372094e-05, + "loss": 0.3576, + "step": 6794 + }, + { + "epoch": 7.18, + "learning_rate": 3.205866807610994e-05, + "loss": 0.1493, + "step": 6796 + }, + { + "epoch": 7.19, + "learning_rate": 3.205338266384778e-05, + "loss": 0.1175, + "step": 6798 + }, + { + "epoch": 7.19, + "learning_rate": 3.2048097251585625e-05, + "loss": 0.0731, + "step": 6800 + }, + { + "epoch": 7.19, + "learning_rate": 3.2042811839323464e-05, + "loss": 0.1651, + "step": 6802 + }, + { + "epoch": 7.19, + "learning_rate": 3.203752642706131e-05, + "loss": 0.1258, + "step": 6804 + }, + { + "epoch": 7.19, + "learning_rate": 3.2032241014799157e-05, + "loss": 0.0815, + "step": 6806 + }, + { + "epoch": 7.2, + "learning_rate": 3.2026955602537e-05, + "loss": 0.1047, + "step": 6808 + }, + { + "epoch": 7.2, + "learning_rate": 3.202167019027484e-05, + "loss": 0.0978, + "step": 6810 + }, + { + "epoch": 7.2, + "learning_rate": 3.201638477801269e-05, + "loss": 0.122, + "step": 6812 + }, + { + "epoch": 7.2, + "learning_rate": 3.201109936575053e-05, + "loss": 0.2287, + "step": 6814 + }, + { + "epoch": 7.21, + "learning_rate": 3.200581395348837e-05, + "loss": 0.1758, + "step": 6816 + }, + { + "epoch": 7.21, + "learning_rate": 3.200052854122621e-05, + "loss": 0.1481, + "step": 6818 + }, + { + "epoch": 7.21, + "learning_rate": 3.199524312896406e-05, + "loss": 0.1527, + "step": 6820 + }, + { + "epoch": 7.21, + "learning_rate": 3.1989957716701905e-05, + "loss": 0.0681, + "step": 6822 + }, + { + "epoch": 7.21, + "learning_rate": 3.198467230443975e-05, + "loss": 0.34, + "step": 6824 + }, + { + "epoch": 7.22, + "learning_rate": 3.197938689217759e-05, + "loss": 0.0775, + "step": 6826 + }, + { + "epoch": 7.22, + "learning_rate": 3.1974101479915436e-05, + "loss": 0.1494, + "step": 6828 + }, + { + "epoch": 7.22, + "learning_rate": 3.196881606765328e-05, + "loss": 0.1659, + "step": 6830 + }, + { + "epoch": 7.22, + "learning_rate": 3.196353065539112e-05, + "loss": 0.1885, + "step": 6832 + }, + { + "epoch": 7.22, + "learning_rate": 3.195824524312897e-05, + "loss": 0.1345, + "step": 6834 + }, + { + "epoch": 7.23, + "learning_rate": 3.195295983086681e-05, + "loss": 0.0972, + "step": 6836 + }, + { + "epoch": 7.23, + "learning_rate": 3.194767441860465e-05, + "loss": 0.096, + "step": 6838 + }, + { + "epoch": 7.23, + "learning_rate": 3.194238900634249e-05, + "loss": 0.2733, + "step": 6840 + }, + { + "epoch": 7.23, + "learning_rate": 3.1937103594080345e-05, + "loss": 0.1251, + "step": 6842 + }, + { + "epoch": 7.23, + "learning_rate": 3.1931818181818184e-05, + "loss": 0.1594, + "step": 6844 + }, + { + "epoch": 7.24, + "learning_rate": 3.192653276955603e-05, + "loss": 0.1894, + "step": 6846 + }, + { + "epoch": 7.24, + "learning_rate": 3.192124735729387e-05, + "loss": 0.125, + "step": 6848 + }, + { + "epoch": 7.24, + "learning_rate": 3.1915961945031716e-05, + "loss": 0.0435, + "step": 6850 + }, + { + "epoch": 7.24, + "learning_rate": 3.1910676532769555e-05, + "loss": 0.1681, + "step": 6852 + }, + { + "epoch": 7.25, + "learning_rate": 3.19053911205074e-05, + "loss": 0.0633, + "step": 6854 + }, + { + "epoch": 7.25, + "learning_rate": 3.190010570824524e-05, + "loss": 0.0713, + "step": 6856 + }, + { + "epoch": 7.25, + "learning_rate": 3.1894820295983086e-05, + "loss": 0.0999, + "step": 6858 + }, + { + "epoch": 7.25, + "learning_rate": 3.188953488372093e-05, + "loss": 0.1284, + "step": 6860 + }, + { + "epoch": 7.25, + "learning_rate": 3.188424947145878e-05, + "loss": 0.0723, + "step": 6862 + }, + { + "epoch": 7.26, + "learning_rate": 3.187896405919662e-05, + "loss": 0.0406, + "step": 6864 + }, + { + "epoch": 7.26, + "learning_rate": 3.1873678646934464e-05, + "loss": 0.0977, + "step": 6866 + }, + { + "epoch": 7.26, + "learning_rate": 3.18683932346723e-05, + "loss": 0.0727, + "step": 6868 + }, + { + "epoch": 7.26, + "learning_rate": 3.186310782241015e-05, + "loss": 0.2983, + "step": 6870 + }, + { + "epoch": 7.26, + "learning_rate": 3.185782241014799e-05, + "loss": 0.1568, + "step": 6872 + }, + { + "epoch": 7.27, + "learning_rate": 3.1852536997885834e-05, + "loss": 0.306, + "step": 6874 + }, + { + "epoch": 7.27, + "learning_rate": 3.184725158562368e-05, + "loss": 0.4258, + "step": 6876 + }, + { + "epoch": 7.27, + "learning_rate": 3.1841966173361527e-05, + "loss": 0.1543, + "step": 6878 + }, + { + "epoch": 7.27, + "learning_rate": 3.1836680761099366e-05, + "loss": 0.1296, + "step": 6880 + }, + { + "epoch": 7.27, + "learning_rate": 3.183139534883721e-05, + "loss": 0.1369, + "step": 6882 + }, + { + "epoch": 7.28, + "learning_rate": 3.182610993657506e-05, + "loss": 0.1389, + "step": 6884 + }, + { + "epoch": 7.28, + "learning_rate": 3.18208245243129e-05, + "loss": 0.1247, + "step": 6886 + }, + { + "epoch": 7.28, + "learning_rate": 3.181553911205074e-05, + "loss": 0.1223, + "step": 6888 + }, + { + "epoch": 7.28, + "learning_rate": 3.181025369978858e-05, + "loss": 0.1556, + "step": 6890 + }, + { + "epoch": 7.29, + "learning_rate": 3.180496828752643e-05, + "loss": 0.1862, + "step": 6892 + }, + { + "epoch": 7.29, + "learning_rate": 3.179968287526427e-05, + "loss": 0.1612, + "step": 6894 + }, + { + "epoch": 7.29, + "learning_rate": 3.179439746300212e-05, + "loss": 0.1985, + "step": 6896 + }, + { + "epoch": 7.29, + "learning_rate": 3.178911205073996e-05, + "loss": 0.12, + "step": 6898 + }, + { + "epoch": 7.29, + "learning_rate": 3.1783826638477806e-05, + "loss": 0.2225, + "step": 6900 + }, + { + "epoch": 7.3, + "learning_rate": 3.1778541226215645e-05, + "loss": 0.1292, + "step": 6902 + }, + { + "epoch": 7.3, + "learning_rate": 3.177325581395349e-05, + "loss": 0.0941, + "step": 6904 + }, + { + "epoch": 7.3, + "learning_rate": 3.176797040169133e-05, + "loss": 0.318, + "step": 6906 + }, + { + "epoch": 7.3, + "learning_rate": 3.176268498942918e-05, + "loss": 0.3259, + "step": 6908 + }, + { + "epoch": 7.3, + "learning_rate": 3.1757399577167016e-05, + "loss": 0.1176, + "step": 6910 + }, + { + "epoch": 7.31, + "learning_rate": 3.175211416490486e-05, + "loss": 0.1663, + "step": 6912 + }, + { + "epoch": 7.31, + "learning_rate": 3.174682875264271e-05, + "loss": 0.171, + "step": 6914 + }, + { + "epoch": 7.31, + "learning_rate": 3.1741543340380554e-05, + "loss": 0.2284, + "step": 6916 + }, + { + "epoch": 7.31, + "learning_rate": 3.1736257928118394e-05, + "loss": 0.1861, + "step": 6918 + }, + { + "epoch": 7.32, + "learning_rate": 3.173097251585624e-05, + "loss": 0.2835, + "step": 6920 + }, + { + "epoch": 7.32, + "learning_rate": 3.172568710359408e-05, + "loss": 0.149, + "step": 6922 + }, + { + "epoch": 7.32, + "learning_rate": 3.1720401691331925e-05, + "loss": 0.0834, + "step": 6924 + }, + { + "epoch": 7.32, + "learning_rate": 3.1715116279069764e-05, + "loss": 0.1018, + "step": 6926 + }, + { + "epoch": 7.32, + "learning_rate": 3.170983086680761e-05, + "loss": 0.057, + "step": 6928 + }, + { + "epoch": 7.33, + "learning_rate": 3.1704545454545456e-05, + "loss": 0.1553, + "step": 6930 + }, + { + "epoch": 7.33, + "learning_rate": 3.16992600422833e-05, + "loss": 0.0442, + "step": 6932 + }, + { + "epoch": 7.33, + "learning_rate": 3.169397463002114e-05, + "loss": 0.0943, + "step": 6934 + }, + { + "epoch": 7.33, + "learning_rate": 3.168868921775899e-05, + "loss": 0.1423, + "step": 6936 + }, + { + "epoch": 7.33, + "learning_rate": 3.1683403805496834e-05, + "loss": 0.101, + "step": 6938 + }, + { + "epoch": 7.34, + "learning_rate": 3.167811839323467e-05, + "loss": 0.0697, + "step": 6940 + }, + { + "epoch": 7.34, + "learning_rate": 3.167283298097252e-05, + "loss": 0.0719, + "step": 6942 + }, + { + "epoch": 7.34, + "learning_rate": 3.166754756871036e-05, + "loss": 0.2244, + "step": 6944 + }, + { + "epoch": 7.34, + "learning_rate": 3.1662262156448205e-05, + "loss": 0.0793, + "step": 6946 + }, + { + "epoch": 7.34, + "learning_rate": 3.1656976744186044e-05, + "loss": 0.0689, + "step": 6948 + }, + { + "epoch": 7.35, + "learning_rate": 3.16516913319239e-05, + "loss": 0.1631, + "step": 6950 + }, + { + "epoch": 7.35, + "learning_rate": 3.1646405919661736e-05, + "loss": 0.057, + "step": 6952 + }, + { + "epoch": 7.35, + "learning_rate": 3.164112050739958e-05, + "loss": 0.1282, + "step": 6954 + }, + { + "epoch": 7.35, + "learning_rate": 3.163583509513742e-05, + "loss": 0.183, + "step": 6956 + }, + { + "epoch": 7.36, + "learning_rate": 3.163054968287527e-05, + "loss": 0.114, + "step": 6958 + }, + { + "epoch": 7.36, + "learning_rate": 3.162526427061311e-05, + "loss": 0.1625, + "step": 6960 + }, + { + "epoch": 7.36, + "learning_rate": 3.161997885835095e-05, + "loss": 0.1943, + "step": 6962 + }, + { + "epoch": 7.36, + "learning_rate": 3.161469344608879e-05, + "loss": 0.1572, + "step": 6964 + }, + { + "epoch": 7.36, + "learning_rate": 3.160940803382664e-05, + "loss": 0.1261, + "step": 6966 + }, + { + "epoch": 7.37, + "learning_rate": 3.1604122621564484e-05, + "loss": 0.0798, + "step": 6968 + }, + { + "epoch": 7.37, + "learning_rate": 3.159883720930233e-05, + "loss": 0.1337, + "step": 6970 + }, + { + "epoch": 7.37, + "learning_rate": 3.159355179704017e-05, + "loss": 0.0814, + "step": 6972 + }, + { + "epoch": 7.37, + "learning_rate": 3.1588266384778015e-05, + "loss": 0.0963, + "step": 6974 + }, + { + "epoch": 7.37, + "learning_rate": 3.1582980972515855e-05, + "loss": 0.1317, + "step": 6976 + }, + { + "epoch": 7.38, + "learning_rate": 3.15776955602537e-05, + "loss": 0.1232, + "step": 6978 + }, + { + "epoch": 7.38, + "learning_rate": 3.157241014799155e-05, + "loss": 0.1589, + "step": 6980 + }, + { + "epoch": 7.38, + "learning_rate": 3.1567124735729386e-05, + "loss": 0.1632, + "step": 6982 + }, + { + "epoch": 7.38, + "learning_rate": 3.156183932346723e-05, + "loss": 0.0875, + "step": 6984 + }, + { + "epoch": 7.38, + "learning_rate": 3.155655391120508e-05, + "loss": 0.1064, + "step": 6986 + }, + { + "epoch": 7.39, + "learning_rate": 3.155126849894292e-05, + "loss": 0.134, + "step": 6988 + }, + { + "epoch": 7.39, + "learning_rate": 3.1545983086680764e-05, + "loss": 0.0632, + "step": 6990 + }, + { + "epoch": 7.39, + "learning_rate": 3.154069767441861e-05, + "loss": 0.1357, + "step": 6992 + }, + { + "epoch": 7.39, + "learning_rate": 3.153541226215645e-05, + "loss": 0.0639, + "step": 6994 + }, + { + "epoch": 7.4, + "learning_rate": 3.1530126849894295e-05, + "loss": 0.0779, + "step": 6996 + }, + { + "epoch": 7.4, + "learning_rate": 3.1524841437632134e-05, + "loss": 0.1556, + "step": 6998 + }, + { + "epoch": 7.4, + "learning_rate": 3.151955602536998e-05, + "loss": 0.2285, + "step": 7000 + }, + { + "epoch": 7.4, + "eval_cer": 0.016699914505557138, + "eval_loss": 0.7643967270851135, + "eval_runtime": 129.8037, + "eval_samples_per_second": 6.479, + "eval_steps_per_second": 0.817, + "step": 7000 + }, + { + "epoch": 7.4, + "learning_rate": 3.151427061310782e-05, + "loss": 0.0917, + "step": 7002 + }, + { + "epoch": 7.4, + "learning_rate": 3.150898520084567e-05, + "loss": 0.1798, + "step": 7004 + }, + { + "epoch": 7.41, + "learning_rate": 3.150369978858351e-05, + "loss": 0.122, + "step": 7006 + }, + { + "epoch": 7.41, + "learning_rate": 3.149841437632136e-05, + "loss": 0.0871, + "step": 7008 + }, + { + "epoch": 7.41, + "learning_rate": 3.14931289640592e-05, + "loss": 0.0828, + "step": 7010 + }, + { + "epoch": 7.41, + "learning_rate": 3.148784355179704e-05, + "loss": 0.1366, + "step": 7012 + }, + { + "epoch": 7.41, + "learning_rate": 3.148255813953488e-05, + "loss": 0.1001, + "step": 7014 + }, + { + "epoch": 7.42, + "learning_rate": 3.147727272727273e-05, + "loss": 0.1294, + "step": 7016 + }, + { + "epoch": 7.42, + "learning_rate": 3.147198731501057e-05, + "loss": 0.116, + "step": 7018 + }, + { + "epoch": 7.42, + "learning_rate": 3.1466701902748414e-05, + "loss": 0.0794, + "step": 7020 + }, + { + "epoch": 7.42, + "learning_rate": 3.146141649048626e-05, + "loss": 0.0282, + "step": 7022 + }, + { + "epoch": 7.42, + "learning_rate": 3.1456131078224106e-05, + "loss": 0.1102, + "step": 7024 + }, + { + "epoch": 7.43, + "learning_rate": 3.1450845665961945e-05, + "loss": 0.0907, + "step": 7026 + }, + { + "epoch": 7.43, + "learning_rate": 3.144556025369979e-05, + "loss": 0.097, + "step": 7028 + }, + { + "epoch": 7.43, + "learning_rate": 3.144027484143763e-05, + "loss": 0.1432, + "step": 7030 + }, + { + "epoch": 7.43, + "learning_rate": 3.143498942917548e-05, + "loss": 0.1493, + "step": 7032 + }, + { + "epoch": 7.44, + "learning_rate": 3.142970401691332e-05, + "loss": 0.2088, + "step": 7034 + }, + { + "epoch": 7.44, + "learning_rate": 3.142441860465116e-05, + "loss": 0.5473, + "step": 7036 + }, + { + "epoch": 7.44, + "learning_rate": 3.141913319238901e-05, + "loss": 0.1131, + "step": 7038 + }, + { + "epoch": 7.44, + "learning_rate": 3.1413847780126854e-05, + "loss": 0.0682, + "step": 7040 + }, + { + "epoch": 7.44, + "learning_rate": 3.1408562367864693e-05, + "loss": 0.1884, + "step": 7042 + }, + { + "epoch": 7.45, + "learning_rate": 3.140327695560254e-05, + "loss": 0.1398, + "step": 7044 + }, + { + "epoch": 7.45, + "learning_rate": 3.1397991543340386e-05, + "loss": 0.2104, + "step": 7046 + }, + { + "epoch": 7.45, + "learning_rate": 3.1392706131078225e-05, + "loss": 0.089, + "step": 7048 + }, + { + "epoch": 7.45, + "learning_rate": 3.138742071881607e-05, + "loss": 0.2262, + "step": 7050 + }, + { + "epoch": 7.45, + "learning_rate": 3.138213530655391e-05, + "loss": 0.3409, + "step": 7052 + }, + { + "epoch": 7.46, + "learning_rate": 3.1376849894291756e-05, + "loss": 0.1543, + "step": 7054 + }, + { + "epoch": 7.46, + "learning_rate": 3.1371564482029595e-05, + "loss": 0.079, + "step": 7056 + }, + { + "epoch": 7.46, + "learning_rate": 3.136627906976745e-05, + "loss": 0.172, + "step": 7058 + }, + { + "epoch": 7.46, + "learning_rate": 3.136099365750529e-05, + "loss": 0.1977, + "step": 7060 + }, + { + "epoch": 7.47, + "learning_rate": 3.1355708245243134e-05, + "loss": 0.2035, + "step": 7062 + }, + { + "epoch": 7.47, + "learning_rate": 3.135042283298097e-05, + "loss": 0.0555, + "step": 7064 + }, + { + "epoch": 7.47, + "learning_rate": 3.134513742071882e-05, + "loss": 0.0926, + "step": 7066 + }, + { + "epoch": 7.47, + "learning_rate": 3.133985200845666e-05, + "loss": 0.1551, + "step": 7068 + }, + { + "epoch": 7.47, + "learning_rate": 3.1334566596194504e-05, + "loss": 0.1136, + "step": 7070 + }, + { + "epoch": 7.48, + "learning_rate": 3.1329281183932344e-05, + "loss": 0.1401, + "step": 7072 + }, + { + "epoch": 7.48, + "learning_rate": 3.132399577167019e-05, + "loss": 0.1895, + "step": 7074 + }, + { + "epoch": 7.48, + "learning_rate": 3.1318710359408036e-05, + "loss": 0.1383, + "step": 7076 + }, + { + "epoch": 7.48, + "learning_rate": 3.131342494714588e-05, + "loss": 0.1553, + "step": 7078 + }, + { + "epoch": 7.48, + "learning_rate": 3.130813953488372e-05, + "loss": 0.0743, + "step": 7080 + }, + { + "epoch": 7.49, + "learning_rate": 3.130285412262157e-05, + "loss": 0.0585, + "step": 7082 + }, + { + "epoch": 7.49, + "learning_rate": 3.1297568710359406e-05, + "loss": 0.0266, + "step": 7084 + }, + { + "epoch": 7.49, + "learning_rate": 3.129228329809725e-05, + "loss": 0.0958, + "step": 7086 + }, + { + "epoch": 7.49, + "learning_rate": 3.12869978858351e-05, + "loss": 0.2182, + "step": 7088 + }, + { + "epoch": 7.49, + "learning_rate": 3.128171247357294e-05, + "loss": 0.1488, + "step": 7090 + }, + { + "epoch": 7.5, + "learning_rate": 3.1276427061310784e-05, + "loss": 0.2245, + "step": 7092 + }, + { + "epoch": 7.5, + "learning_rate": 3.127114164904863e-05, + "loss": 0.1023, + "step": 7094 + }, + { + "epoch": 7.5, + "learning_rate": 3.1265856236786476e-05, + "loss": 0.2489, + "step": 7096 + }, + { + "epoch": 7.5, + "learning_rate": 3.1260570824524315e-05, + "loss": 0.2633, + "step": 7098 + }, + { + "epoch": 7.51, + "learning_rate": 3.125528541226216e-05, + "loss": 0.1263, + "step": 7100 + }, + { + "epoch": 7.51, + "learning_rate": 3.125e-05, + "loss": 0.1476, + "step": 7102 + }, + { + "epoch": 7.51, + "learning_rate": 3.124471458773785e-05, + "loss": 0.1078, + "step": 7104 + }, + { + "epoch": 7.51, + "learning_rate": 3.1239429175475686e-05, + "loss": 0.1322, + "step": 7106 + }, + { + "epoch": 7.51, + "learning_rate": 3.123414376321353e-05, + "loss": 0.1396, + "step": 7108 + }, + { + "epoch": 7.52, + "learning_rate": 3.122885835095137e-05, + "loss": 0.1505, + "step": 7110 + }, + { + "epoch": 7.52, + "learning_rate": 3.1223572938689224e-05, + "loss": 0.2306, + "step": 7112 + }, + { + "epoch": 7.52, + "learning_rate": 3.1218287526427063e-05, + "loss": 0.115, + "step": 7114 + }, + { + "epoch": 7.52, + "learning_rate": 3.121300211416491e-05, + "loss": 0.0433, + "step": 7116 + }, + { + "epoch": 7.52, + "learning_rate": 3.120771670190275e-05, + "loss": 0.059, + "step": 7118 + }, + { + "epoch": 7.53, + "learning_rate": 3.1202431289640595e-05, + "loss": 0.0396, + "step": 7120 + }, + { + "epoch": 7.53, + "learning_rate": 3.1197145877378434e-05, + "loss": 0.1071, + "step": 7122 + }, + { + "epoch": 7.53, + "learning_rate": 3.119186046511628e-05, + "loss": 0.1492, + "step": 7124 + }, + { + "epoch": 7.53, + "learning_rate": 3.118657505285412e-05, + "loss": 0.1364, + "step": 7126 + }, + { + "epoch": 7.53, + "learning_rate": 3.1181289640591966e-05, + "loss": 0.365, + "step": 7128 + }, + { + "epoch": 7.54, + "learning_rate": 3.117600422832981e-05, + "loss": 0.1789, + "step": 7130 + }, + { + "epoch": 7.54, + "learning_rate": 3.117071881606766e-05, + "loss": 0.0941, + "step": 7132 + }, + { + "epoch": 7.54, + "learning_rate": 3.11654334038055e-05, + "loss": 0.1209, + "step": 7134 + }, + { + "epoch": 7.54, + "learning_rate": 3.116014799154334e-05, + "loss": 0.2109, + "step": 7136 + }, + { + "epoch": 7.55, + "learning_rate": 3.115486257928118e-05, + "loss": 0.0642, + "step": 7138 + }, + { + "epoch": 7.55, + "learning_rate": 3.114957716701903e-05, + "loss": 0.1333, + "step": 7140 + }, + { + "epoch": 7.55, + "learning_rate": 3.1144291754756874e-05, + "loss": 0.0888, + "step": 7142 + }, + { + "epoch": 7.55, + "learning_rate": 3.1139006342494714e-05, + "loss": 0.1243, + "step": 7144 + }, + { + "epoch": 7.55, + "learning_rate": 3.113372093023256e-05, + "loss": 0.0929, + "step": 7146 + }, + { + "epoch": 7.56, + "learning_rate": 3.1128435517970406e-05, + "loss": 0.1093, + "step": 7148 + }, + { + "epoch": 7.56, + "learning_rate": 3.112315010570825e-05, + "loss": 0.1267, + "step": 7150 + }, + { + "epoch": 7.56, + "learning_rate": 3.111786469344609e-05, + "loss": 0.0882, + "step": 7152 + }, + { + "epoch": 7.56, + "learning_rate": 3.111257928118394e-05, + "loss": 0.2262, + "step": 7154 + }, + { + "epoch": 7.56, + "learning_rate": 3.1107293868921776e-05, + "loss": 0.0967, + "step": 7156 + }, + { + "epoch": 7.57, + "learning_rate": 3.110200845665962e-05, + "loss": 0.2711, + "step": 7158 + }, + { + "epoch": 7.57, + "learning_rate": 3.109672304439746e-05, + "loss": 0.0687, + "step": 7160 + }, + { + "epoch": 7.57, + "learning_rate": 3.109143763213531e-05, + "loss": 0.2071, + "step": 7162 + }, + { + "epoch": 7.57, + "learning_rate": 3.108615221987315e-05, + "loss": 0.1116, + "step": 7164 + }, + { + "epoch": 7.58, + "learning_rate": 3.1080866807611e-05, + "loss": 0.1037, + "step": 7166 + }, + { + "epoch": 7.58, + "learning_rate": 3.107558139534884e-05, + "loss": 0.1094, + "step": 7168 + }, + { + "epoch": 7.58, + "learning_rate": 3.1070295983086685e-05, + "loss": 0.123, + "step": 7170 + }, + { + "epoch": 7.58, + "learning_rate": 3.1065010570824525e-05, + "loss": 0.1824, + "step": 7172 + }, + { + "epoch": 7.58, + "learning_rate": 3.105972515856237e-05, + "loss": 0.0796, + "step": 7174 + }, + { + "epoch": 7.59, + "learning_rate": 3.105443974630021e-05, + "loss": 0.0495, + "step": 7176 + }, + { + "epoch": 7.59, + "learning_rate": 3.1049154334038056e-05, + "loss": 0.1828, + "step": 7178 + }, + { + "epoch": 7.59, + "learning_rate": 3.1043868921775895e-05, + "loss": 0.0642, + "step": 7180 + }, + { + "epoch": 7.59, + "learning_rate": 3.103858350951374e-05, + "loss": 0.1328, + "step": 7182 + }, + { + "epoch": 7.59, + "learning_rate": 3.103329809725159e-05, + "loss": 0.1206, + "step": 7184 + }, + { + "epoch": 7.6, + "learning_rate": 3.1028012684989433e-05, + "loss": 0.1608, + "step": 7186 + }, + { + "epoch": 7.6, + "learning_rate": 3.102272727272727e-05, + "loss": 0.157, + "step": 7188 + }, + { + "epoch": 7.6, + "learning_rate": 3.101744186046512e-05, + "loss": 0.0986, + "step": 7190 + }, + { + "epoch": 7.6, + "learning_rate": 3.101215644820296e-05, + "loss": 0.1187, + "step": 7192 + }, + { + "epoch": 7.6, + "learning_rate": 3.1006871035940804e-05, + "loss": 0.0329, + "step": 7194 + }, + { + "epoch": 7.61, + "learning_rate": 3.100158562367865e-05, + "loss": 0.138, + "step": 7196 + }, + { + "epoch": 7.61, + "learning_rate": 3.099630021141649e-05, + "loss": 0.1953, + "step": 7198 + }, + { + "epoch": 7.61, + "learning_rate": 3.0991014799154336e-05, + "loss": 0.2044, + "step": 7200 + }, + { + "epoch": 7.61, + "learning_rate": 3.098572938689218e-05, + "loss": 0.0648, + "step": 7202 + }, + { + "epoch": 7.62, + "learning_rate": 3.098044397463003e-05, + "loss": 0.1554, + "step": 7204 + }, + { + "epoch": 7.62, + "learning_rate": 3.097515856236787e-05, + "loss": 0.1202, + "step": 7206 + }, + { + "epoch": 7.62, + "learning_rate": 3.096987315010571e-05, + "loss": 0.1143, + "step": 7208 + }, + { + "epoch": 7.62, + "learning_rate": 3.096458773784355e-05, + "loss": 0.0999, + "step": 7210 + }, + { + "epoch": 7.62, + "learning_rate": 3.09593023255814e-05, + "loss": 0.1562, + "step": 7212 + }, + { + "epoch": 7.63, + "learning_rate": 3.095401691331924e-05, + "loss": 0.1498, + "step": 7214 + }, + { + "epoch": 7.63, + "learning_rate": 3.0948731501057084e-05, + "loss": 0.1716, + "step": 7216 + }, + { + "epoch": 7.63, + "learning_rate": 3.094344608879492e-05, + "loss": 0.1226, + "step": 7218 + }, + { + "epoch": 7.63, + "learning_rate": 3.0938160676532776e-05, + "loss": 0.1101, + "step": 7220 + }, + { + "epoch": 7.63, + "learning_rate": 3.0932875264270615e-05, + "loss": 0.0776, + "step": 7222 + }, + { + "epoch": 7.64, + "learning_rate": 3.092758985200846e-05, + "loss": 0.1376, + "step": 7224 + }, + { + "epoch": 7.64, + "learning_rate": 3.09223044397463e-05, + "loss": 0.1301, + "step": 7226 + }, + { + "epoch": 7.64, + "learning_rate": 3.0917019027484147e-05, + "loss": 0.1084, + "step": 7228 + }, + { + "epoch": 7.64, + "learning_rate": 3.0911733615221986e-05, + "loss": 0.1025, + "step": 7230 + }, + { + "epoch": 7.64, + "learning_rate": 3.090644820295983e-05, + "loss": 0.0797, + "step": 7232 + }, + { + "epoch": 7.65, + "learning_rate": 3.090116279069767e-05, + "loss": 0.0559, + "step": 7234 + }, + { + "epoch": 7.65, + "learning_rate": 3.089587737843552e-05, + "loss": 0.1271, + "step": 7236 + }, + { + "epoch": 7.65, + "learning_rate": 3.089059196617336e-05, + "loss": 0.1511, + "step": 7238 + }, + { + "epoch": 7.65, + "learning_rate": 3.088530655391121e-05, + "loss": 0.0656, + "step": 7240 + }, + { + "epoch": 7.66, + "learning_rate": 3.088002114164905e-05, + "loss": 0.3565, + "step": 7242 + }, + { + "epoch": 7.66, + "learning_rate": 3.0874735729386895e-05, + "loss": 0.1438, + "step": 7244 + }, + { + "epoch": 7.66, + "learning_rate": 3.0869450317124734e-05, + "loss": 0.1133, + "step": 7246 + }, + { + "epoch": 7.66, + "learning_rate": 3.086416490486258e-05, + "loss": 0.1269, + "step": 7248 + }, + { + "epoch": 7.66, + "learning_rate": 3.0858879492600426e-05, + "loss": 0.0913, + "step": 7250 + }, + { + "epoch": 7.67, + "learning_rate": 3.0853594080338265e-05, + "loss": 0.0962, + "step": 7252 + }, + { + "epoch": 7.67, + "learning_rate": 3.084830866807611e-05, + "loss": 0.2436, + "step": 7254 + }, + { + "epoch": 7.67, + "learning_rate": 3.084302325581396e-05, + "loss": 0.1272, + "step": 7256 + }, + { + "epoch": 7.67, + "learning_rate": 3.0837737843551804e-05, + "loss": 0.0837, + "step": 7258 + }, + { + "epoch": 7.67, + "learning_rate": 3.083245243128964e-05, + "loss": 0.084, + "step": 7260 + }, + { + "epoch": 7.68, + "learning_rate": 3.082716701902749e-05, + "loss": 0.1018, + "step": 7262 + }, + { + "epoch": 7.68, + "learning_rate": 3.082188160676533e-05, + "loss": 0.0634, + "step": 7264 + }, + { + "epoch": 7.68, + "learning_rate": 3.0816596194503174e-05, + "loss": 0.0727, + "step": 7266 + }, + { + "epoch": 7.68, + "learning_rate": 3.0811310782241014e-05, + "loss": 0.0855, + "step": 7268 + }, + { + "epoch": 7.68, + "learning_rate": 3.080602536997886e-05, + "loss": 0.1704, + "step": 7270 + }, + { + "epoch": 7.69, + "learning_rate": 3.08007399577167e-05, + "loss": 0.1212, + "step": 7272 + }, + { + "epoch": 7.69, + "learning_rate": 3.079545454545455e-05, + "loss": 0.1861, + "step": 7274 + }, + { + "epoch": 7.69, + "learning_rate": 3.079016913319239e-05, + "loss": 0.1109, + "step": 7276 + }, + { + "epoch": 7.69, + "learning_rate": 3.078488372093024e-05, + "loss": 0.148, + "step": 7278 + }, + { + "epoch": 7.7, + "learning_rate": 3.0779598308668076e-05, + "loss": 0.107, + "step": 7280 + }, + { + "epoch": 7.7, + "learning_rate": 3.077431289640592e-05, + "loss": 0.1321, + "step": 7282 + }, + { + "epoch": 7.7, + "learning_rate": 3.076902748414376e-05, + "loss": 0.1371, + "step": 7284 + }, + { + "epoch": 7.7, + "learning_rate": 3.076374207188161e-05, + "loss": 0.0779, + "step": 7286 + }, + { + "epoch": 7.7, + "learning_rate": 3.075845665961945e-05, + "loss": 0.2148, + "step": 7288 + }, + { + "epoch": 7.71, + "learning_rate": 3.075317124735729e-05, + "loss": 0.083, + "step": 7290 + }, + { + "epoch": 7.71, + "learning_rate": 3.074788583509514e-05, + "loss": 0.1508, + "step": 7292 + }, + { + "epoch": 7.71, + "learning_rate": 3.0742600422832985e-05, + "loss": 0.1394, + "step": 7294 + }, + { + "epoch": 7.71, + "learning_rate": 3.0737315010570824e-05, + "loss": 0.221, + "step": 7296 + }, + { + "epoch": 7.71, + "learning_rate": 3.073202959830867e-05, + "loss": 0.1494, + "step": 7298 + }, + { + "epoch": 7.72, + "learning_rate": 3.0726744186046517e-05, + "loss": 0.121, + "step": 7300 + }, + { + "epoch": 7.72, + "learning_rate": 3.0721458773784356e-05, + "loss": 0.1156, + "step": 7302 + }, + { + "epoch": 7.72, + "learning_rate": 3.07161733615222e-05, + "loss": 0.0434, + "step": 7304 + }, + { + "epoch": 7.72, + "learning_rate": 3.071088794926004e-05, + "loss": 0.138, + "step": 7306 + }, + { + "epoch": 7.73, + "learning_rate": 3.070560253699789e-05, + "loss": 0.1409, + "step": 7308 + }, + { + "epoch": 7.73, + "learning_rate": 3.070031712473573e-05, + "loss": 0.1047, + "step": 7310 + }, + { + "epoch": 7.73, + "learning_rate": 3.069503171247358e-05, + "loss": 0.1325, + "step": 7312 + }, + { + "epoch": 7.73, + "learning_rate": 3.068974630021142e-05, + "loss": 0.1291, + "step": 7314 + }, + { + "epoch": 7.73, + "learning_rate": 3.0684460887949265e-05, + "loss": 0.0921, + "step": 7316 + }, + { + "epoch": 7.74, + "learning_rate": 3.0679175475687104e-05, + "loss": 0.0725, + "step": 7318 + }, + { + "epoch": 7.74, + "learning_rate": 3.067389006342495e-05, + "loss": 0.13, + "step": 7320 + }, + { + "epoch": 7.74, + "learning_rate": 3.066860465116279e-05, + "loss": 0.0861, + "step": 7322 + }, + { + "epoch": 7.74, + "learning_rate": 3.0663319238900635e-05, + "loss": 0.0812, + "step": 7324 + }, + { + "epoch": 7.74, + "learning_rate": 3.0658033826638475e-05, + "loss": 0.2347, + "step": 7326 + }, + { + "epoch": 7.75, + "learning_rate": 3.065274841437633e-05, + "loss": 0.0432, + "step": 7328 + }, + { + "epoch": 7.75, + "learning_rate": 3.064746300211417e-05, + "loss": 0.168, + "step": 7330 + }, + { + "epoch": 7.75, + "learning_rate": 3.064217758985201e-05, + "loss": 0.1131, + "step": 7332 + }, + { + "epoch": 7.75, + "learning_rate": 3.063689217758985e-05, + "loss": 0.155, + "step": 7334 + }, + { + "epoch": 7.75, + "learning_rate": 3.06316067653277e-05, + "loss": 0.1038, + "step": 7336 + }, + { + "epoch": 7.76, + "learning_rate": 3.062632135306554e-05, + "loss": 0.1024, + "step": 7338 + }, + { + "epoch": 7.76, + "learning_rate": 3.0621035940803384e-05, + "loss": 0.108, + "step": 7340 + }, + { + "epoch": 7.76, + "learning_rate": 3.061575052854122e-05, + "loss": 0.0993, + "step": 7342 + }, + { + "epoch": 7.76, + "learning_rate": 3.061046511627907e-05, + "loss": 0.1252, + "step": 7344 + }, + { + "epoch": 7.77, + "learning_rate": 3.0605179704016915e-05, + "loss": 0.1117, + "step": 7346 + }, + { + "epoch": 7.77, + "learning_rate": 3.059989429175476e-05, + "loss": 0.0714, + "step": 7348 + }, + { + "epoch": 7.77, + "learning_rate": 3.05946088794926e-05, + "loss": 0.152, + "step": 7350 + }, + { + "epoch": 7.77, + "learning_rate": 3.0589323467230446e-05, + "loss": 0.2915, + "step": 7352 + }, + { + "epoch": 7.77, + "learning_rate": 3.058403805496829e-05, + "loss": 0.1396, + "step": 7354 + }, + { + "epoch": 7.78, + "learning_rate": 3.057875264270613e-05, + "loss": 0.1844, + "step": 7356 + }, + { + "epoch": 7.78, + "learning_rate": 3.057346723044398e-05, + "loss": 0.1751, + "step": 7358 + }, + { + "epoch": 7.78, + "learning_rate": 3.056818181818182e-05, + "loss": 0.1955, + "step": 7360 + }, + { + "epoch": 7.78, + "learning_rate": 3.056289640591966e-05, + "loss": 0.1451, + "step": 7362 + }, + { + "epoch": 7.78, + "learning_rate": 3.055761099365751e-05, + "loss": 0.131, + "step": 7364 + }, + { + "epoch": 7.79, + "learning_rate": 3.0552325581395355e-05, + "loss": 0.094, + "step": 7366 + }, + { + "epoch": 7.79, + "learning_rate": 3.0547040169133195e-05, + "loss": 0.1367, + "step": 7368 + }, + { + "epoch": 7.79, + "learning_rate": 3.054175475687104e-05, + "loss": 0.1617, + "step": 7370 + }, + { + "epoch": 7.79, + "learning_rate": 3.053646934460888e-05, + "loss": 0.213, + "step": 7372 + }, + { + "epoch": 7.79, + "learning_rate": 3.0531183932346726e-05, + "loss": 0.0715, + "step": 7374 + }, + { + "epoch": 7.8, + "learning_rate": 3.0525898520084565e-05, + "loss": 0.1576, + "step": 7376 + }, + { + "epoch": 7.8, + "learning_rate": 3.052061310782241e-05, + "loss": 0.1469, + "step": 7378 + }, + { + "epoch": 7.8, + "learning_rate": 3.0515327695560254e-05, + "loss": 0.0214, + "step": 7380 + }, + { + "epoch": 7.8, + "learning_rate": 3.05100422832981e-05, + "loss": 0.1703, + "step": 7382 + }, + { + "epoch": 7.81, + "learning_rate": 3.0504756871035946e-05, + "loss": 0.0795, + "step": 7384 + }, + { + "epoch": 7.81, + "learning_rate": 3.049947145877379e-05, + "loss": 0.0787, + "step": 7386 + }, + { + "epoch": 7.81, + "learning_rate": 3.049418604651163e-05, + "loss": 0.1197, + "step": 7388 + }, + { + "epoch": 7.81, + "learning_rate": 3.0488900634249474e-05, + "loss": 0.0709, + "step": 7390 + }, + { + "epoch": 7.81, + "learning_rate": 3.0483615221987317e-05, + "loss": 0.0778, + "step": 7392 + }, + { + "epoch": 7.82, + "learning_rate": 3.047832980972516e-05, + "loss": 0.2426, + "step": 7394 + }, + { + "epoch": 7.82, + "learning_rate": 3.0473044397463002e-05, + "loss": 0.0691, + "step": 7396 + }, + { + "epoch": 7.82, + "learning_rate": 3.0467758985200845e-05, + "loss": 0.1296, + "step": 7398 + }, + { + "epoch": 7.82, + "learning_rate": 3.0462473572938694e-05, + "loss": 0.1927, + "step": 7400 + }, + { + "epoch": 7.82, + "learning_rate": 3.0457188160676537e-05, + "loss": 0.1771, + "step": 7402 + }, + { + "epoch": 7.83, + "learning_rate": 3.045190274841438e-05, + "loss": 0.1729, + "step": 7404 + }, + { + "epoch": 7.83, + "learning_rate": 3.0446617336152222e-05, + "loss": 0.1181, + "step": 7406 + }, + { + "epoch": 7.83, + "learning_rate": 3.0441331923890065e-05, + "loss": 0.1441, + "step": 7408 + }, + { + "epoch": 7.83, + "learning_rate": 3.0436046511627908e-05, + "loss": 0.1117, + "step": 7410 + }, + { + "epoch": 7.84, + "learning_rate": 3.043076109936575e-05, + "loss": 0.1002, + "step": 7412 + }, + { + "epoch": 7.84, + "learning_rate": 3.0425475687103593e-05, + "loss": 0.1109, + "step": 7414 + }, + { + "epoch": 7.84, + "learning_rate": 3.0420190274841436e-05, + "loss": 0.1843, + "step": 7416 + }, + { + "epoch": 7.84, + "learning_rate": 3.0414904862579285e-05, + "loss": 0.2138, + "step": 7418 + }, + { + "epoch": 7.84, + "learning_rate": 3.0409619450317128e-05, + "loss": 0.0987, + "step": 7420 + }, + { + "epoch": 7.85, + "learning_rate": 3.040433403805497e-05, + "loss": 0.2392, + "step": 7422 + }, + { + "epoch": 7.85, + "learning_rate": 3.0399048625792813e-05, + "loss": 0.2863, + "step": 7424 + }, + { + "epoch": 7.85, + "learning_rate": 3.0393763213530656e-05, + "loss": 0.1728, + "step": 7426 + }, + { + "epoch": 7.85, + "learning_rate": 3.0388477801268502e-05, + "loss": 0.21, + "step": 7428 + }, + { + "epoch": 7.85, + "learning_rate": 3.0383192389006344e-05, + "loss": 0.1339, + "step": 7430 + }, + { + "epoch": 7.86, + "learning_rate": 3.0377906976744187e-05, + "loss": 0.0803, + "step": 7432 + }, + { + "epoch": 7.86, + "learning_rate": 3.037262156448203e-05, + "loss": 0.303, + "step": 7434 + }, + { + "epoch": 7.86, + "learning_rate": 3.0367336152219876e-05, + "loss": 0.1453, + "step": 7436 + }, + { + "epoch": 7.86, + "learning_rate": 3.0362050739957722e-05, + "loss": 0.0911, + "step": 7438 + }, + { + "epoch": 7.86, + "learning_rate": 3.0356765327695565e-05, + "loss": 0.1804, + "step": 7440 + }, + { + "epoch": 7.87, + "learning_rate": 3.0351479915433407e-05, + "loss": 0.0878, + "step": 7442 + }, + { + "epoch": 7.87, + "learning_rate": 3.034619450317125e-05, + "loss": 0.1076, + "step": 7444 + }, + { + "epoch": 7.87, + "learning_rate": 3.0340909090909093e-05, + "loss": 0.0919, + "step": 7446 + }, + { + "epoch": 7.87, + "learning_rate": 3.0335623678646935e-05, + "loss": 0.1405, + "step": 7448 + }, + { + "epoch": 7.88, + "learning_rate": 3.0330338266384778e-05, + "loss": 0.2786, + "step": 7450 + }, + { + "epoch": 7.88, + "learning_rate": 3.032505285412262e-05, + "loss": 0.0523, + "step": 7452 + }, + { + "epoch": 7.88, + "learning_rate": 3.0319767441860463e-05, + "loss": 0.1314, + "step": 7454 + }, + { + "epoch": 7.88, + "learning_rate": 3.0314482029598313e-05, + "loss": 0.0958, + "step": 7456 + }, + { + "epoch": 7.88, + "learning_rate": 3.0309196617336155e-05, + "loss": 0.1255, + "step": 7458 + }, + { + "epoch": 7.89, + "learning_rate": 3.0303911205073998e-05, + "loss": 0.1244, + "step": 7460 + }, + { + "epoch": 7.89, + "learning_rate": 3.029862579281184e-05, + "loss": 0.1663, + "step": 7462 + }, + { + "epoch": 7.89, + "learning_rate": 3.0293340380549683e-05, + "loss": 0.092, + "step": 7464 + }, + { + "epoch": 7.89, + "learning_rate": 3.0288054968287526e-05, + "loss": 0.1858, + "step": 7466 + }, + { + "epoch": 7.89, + "learning_rate": 3.028276955602537e-05, + "loss": 0.1454, + "step": 7468 + }, + { + "epoch": 7.9, + "learning_rate": 3.027748414376321e-05, + "loss": 0.1785, + "step": 7470 + }, + { + "epoch": 7.9, + "learning_rate": 3.0272198731501057e-05, + "loss": 0.1131, + "step": 7472 + }, + { + "epoch": 7.9, + "learning_rate": 3.0266913319238904e-05, + "loss": 0.1604, + "step": 7474 + }, + { + "epoch": 7.9, + "learning_rate": 3.0261627906976746e-05, + "loss": 0.0588, + "step": 7476 + }, + { + "epoch": 7.9, + "learning_rate": 3.025634249471459e-05, + "loss": 0.1184, + "step": 7478 + }, + { + "epoch": 7.91, + "learning_rate": 3.025105708245243e-05, + "loss": 0.1357, + "step": 7480 + }, + { + "epoch": 7.91, + "learning_rate": 3.0245771670190278e-05, + "loss": 0.0666, + "step": 7482 + }, + { + "epoch": 7.91, + "learning_rate": 3.024048625792812e-05, + "loss": 0.0618, + "step": 7484 + }, + { + "epoch": 7.91, + "learning_rate": 3.0235200845665963e-05, + "loss": 0.1297, + "step": 7486 + }, + { + "epoch": 7.92, + "learning_rate": 3.0229915433403806e-05, + "loss": 0.0984, + "step": 7488 + }, + { + "epoch": 7.92, + "learning_rate": 3.0224630021141648e-05, + "loss": 0.2079, + "step": 7490 + }, + { + "epoch": 7.92, + "learning_rate": 3.0219344608879498e-05, + "loss": 0.0577, + "step": 7492 + }, + { + "epoch": 7.92, + "learning_rate": 3.021405919661734e-05, + "loss": 0.1102, + "step": 7494 + }, + { + "epoch": 7.92, + "learning_rate": 3.0208773784355183e-05, + "loss": 0.108, + "step": 7496 + }, + { + "epoch": 7.93, + "learning_rate": 3.0203488372093026e-05, + "loss": 0.2446, + "step": 7498 + }, + { + "epoch": 7.93, + "learning_rate": 3.019820295983087e-05, + "loss": 0.0924, + "step": 7500 + }, + { + "epoch": 7.93, + "eval_cer": 0.02718723282986606, + "eval_loss": 0.7875532507896423, + "eval_runtime": 127.8203, + "eval_samples_per_second": 6.58, + "eval_steps_per_second": 0.829, + "step": 7500 + }, + { + "epoch": 7.93, + "learning_rate": 3.019291754756871e-05, + "loss": 0.1296, + "step": 7502 + }, + { + "epoch": 7.93, + "learning_rate": 3.0187632135306554e-05, + "loss": 0.2015, + "step": 7504 + }, + { + "epoch": 7.93, + "learning_rate": 3.0182346723044396e-05, + "loss": 0.0772, + "step": 7506 + }, + { + "epoch": 7.94, + "learning_rate": 3.017706131078224e-05, + "loss": 0.1157, + "step": 7508 + }, + { + "epoch": 7.94, + "learning_rate": 3.017177589852009e-05, + "loss": 0.0763, + "step": 7510 + }, + { + "epoch": 7.94, + "learning_rate": 3.016649048625793e-05, + "loss": 0.275, + "step": 7512 + }, + { + "epoch": 7.94, + "learning_rate": 3.0161205073995774e-05, + "loss": 0.1857, + "step": 7514 + }, + { + "epoch": 7.95, + "learning_rate": 3.0155919661733617e-05, + "loss": 0.1416, + "step": 7516 + }, + { + "epoch": 7.95, + "learning_rate": 3.015063424947146e-05, + "loss": 0.048, + "step": 7518 + }, + { + "epoch": 7.95, + "learning_rate": 3.0145348837209302e-05, + "loss": 0.0743, + "step": 7520 + }, + { + "epoch": 7.95, + "learning_rate": 3.0140063424947145e-05, + "loss": 0.1346, + "step": 7522 + }, + { + "epoch": 7.95, + "learning_rate": 3.0134778012684987e-05, + "loss": 0.0731, + "step": 7524 + }, + { + "epoch": 7.96, + "learning_rate": 3.0129492600422833e-05, + "loss": 0.0553, + "step": 7526 + }, + { + "epoch": 7.96, + "learning_rate": 3.012420718816068e-05, + "loss": 0.0786, + "step": 7528 + }, + { + "epoch": 7.96, + "learning_rate": 3.0118921775898522e-05, + "loss": 0.0682, + "step": 7530 + }, + { + "epoch": 7.96, + "learning_rate": 3.0113636363636365e-05, + "loss": 0.0939, + "step": 7532 + }, + { + "epoch": 7.96, + "learning_rate": 3.0108350951374207e-05, + "loss": 0.2162, + "step": 7534 + }, + { + "epoch": 7.97, + "learning_rate": 3.0103065539112053e-05, + "loss": 0.2454, + "step": 7536 + }, + { + "epoch": 7.97, + "learning_rate": 3.0097780126849896e-05, + "loss": 0.1349, + "step": 7538 + }, + { + "epoch": 7.97, + "learning_rate": 3.009249471458774e-05, + "loss": 0.1636, + "step": 7540 + }, + { + "epoch": 7.97, + "learning_rate": 3.008720930232558e-05, + "loss": 0.0599, + "step": 7542 + }, + { + "epoch": 7.97, + "learning_rate": 3.0081923890063424e-05, + "loss": 0.1237, + "step": 7544 + }, + { + "epoch": 7.98, + "learning_rate": 3.0076638477801274e-05, + "loss": 0.1353, + "step": 7546 + }, + { + "epoch": 7.98, + "learning_rate": 3.0071353065539116e-05, + "loss": 0.162, + "step": 7548 + }, + { + "epoch": 7.98, + "learning_rate": 3.006606765327696e-05, + "loss": 0.2098, + "step": 7550 + }, + { + "epoch": 7.98, + "learning_rate": 3.00607822410148e-05, + "loss": 0.0433, + "step": 7552 + }, + { + "epoch": 7.99, + "learning_rate": 3.0055496828752644e-05, + "loss": 0.0537, + "step": 7554 + }, + { + "epoch": 7.99, + "learning_rate": 3.0050211416490487e-05, + "loss": 0.0926, + "step": 7556 + }, + { + "epoch": 7.99, + "learning_rate": 3.004492600422833e-05, + "loss": 0.1546, + "step": 7558 + }, + { + "epoch": 7.99, + "learning_rate": 3.0039640591966172e-05, + "loss": 0.1787, + "step": 7560 + }, + { + "epoch": 7.99, + "learning_rate": 3.0034355179704015e-05, + "loss": 0.1037, + "step": 7562 + }, + { + "epoch": 8.0, + "learning_rate": 3.0029069767441864e-05, + "loss": 0.1001, + "step": 7564 + }, + { + "epoch": 8.0, + "learning_rate": 3.0023784355179707e-05, + "loss": 0.1114, + "step": 7566 + }, + { + "epoch": 8.0, + "learning_rate": 3.001849894291755e-05, + "loss": 0.0804, + "step": 7568 + }, + { + "epoch": 8.0, + "learning_rate": 3.0013213530655392e-05, + "loss": 0.0532, + "step": 7570 + }, + { + "epoch": 8.0, + "learning_rate": 3.0007928118393235e-05, + "loss": 0.0644, + "step": 7572 + }, + { + "epoch": 8.01, + "learning_rate": 3.0002642706131078e-05, + "loss": 0.0707, + "step": 7574 + }, + { + "epoch": 8.01, + "learning_rate": 2.999735729386892e-05, + "loss": 0.1153, + "step": 7576 + }, + { + "epoch": 8.01, + "learning_rate": 2.9992071881606763e-05, + "loss": 0.0249, + "step": 7578 + }, + { + "epoch": 8.01, + "learning_rate": 2.998678646934461e-05, + "loss": 0.054, + "step": 7580 + }, + { + "epoch": 8.01, + "learning_rate": 2.9981501057082455e-05, + "loss": 0.1267, + "step": 7582 + }, + { + "epoch": 8.02, + "learning_rate": 2.9976215644820298e-05, + "loss": 0.0665, + "step": 7584 + }, + { + "epoch": 8.02, + "learning_rate": 2.997093023255814e-05, + "loss": 0.1242, + "step": 7586 + }, + { + "epoch": 8.02, + "learning_rate": 2.9965644820295987e-05, + "loss": 0.0634, + "step": 7588 + }, + { + "epoch": 8.02, + "learning_rate": 2.996035940803383e-05, + "loss": 0.132, + "step": 7590 + }, + { + "epoch": 8.03, + "learning_rate": 2.9955073995771672e-05, + "loss": 0.0788, + "step": 7592 + }, + { + "epoch": 8.03, + "learning_rate": 2.9949788583509515e-05, + "loss": 0.1367, + "step": 7594 + }, + { + "epoch": 8.03, + "learning_rate": 2.9944503171247357e-05, + "loss": 0.0911, + "step": 7596 + }, + { + "epoch": 8.03, + "learning_rate": 2.99392177589852e-05, + "loss": 0.2105, + "step": 7598 + }, + { + "epoch": 8.03, + "learning_rate": 2.993393234672305e-05, + "loss": 0.1738, + "step": 7600 + }, + { + "epoch": 8.04, + "learning_rate": 2.9928646934460892e-05, + "loss": 0.22, + "step": 7602 + }, + { + "epoch": 8.04, + "learning_rate": 2.9923361522198735e-05, + "loss": 0.1878, + "step": 7604 + }, + { + "epoch": 8.04, + "learning_rate": 2.9918076109936577e-05, + "loss": 0.0618, + "step": 7606 + }, + { + "epoch": 8.04, + "learning_rate": 2.991279069767442e-05, + "loss": 0.0667, + "step": 7608 + }, + { + "epoch": 8.04, + "learning_rate": 2.9907505285412263e-05, + "loss": 0.0914, + "step": 7610 + }, + { + "epoch": 8.05, + "learning_rate": 2.9902219873150105e-05, + "loss": 0.1477, + "step": 7612 + }, + { + "epoch": 8.05, + "learning_rate": 2.9896934460887948e-05, + "loss": 0.16, + "step": 7614 + }, + { + "epoch": 8.05, + "learning_rate": 2.989164904862579e-05, + "loss": 0.1531, + "step": 7616 + }, + { + "epoch": 8.05, + "learning_rate": 2.988636363636364e-05, + "loss": 0.1038, + "step": 7618 + }, + { + "epoch": 8.05, + "learning_rate": 2.9881078224101483e-05, + "loss": 0.0792, + "step": 7620 + }, + { + "epoch": 8.06, + "learning_rate": 2.9875792811839326e-05, + "loss": 0.1897, + "step": 7622 + }, + { + "epoch": 8.06, + "learning_rate": 2.9870507399577168e-05, + "loss": 0.1093, + "step": 7624 + }, + { + "epoch": 8.06, + "learning_rate": 2.986522198731501e-05, + "loss": 0.1908, + "step": 7626 + }, + { + "epoch": 8.06, + "learning_rate": 2.9859936575052854e-05, + "loss": 0.1275, + "step": 7628 + }, + { + "epoch": 8.07, + "learning_rate": 2.9854651162790696e-05, + "loss": 0.0609, + "step": 7630 + }, + { + "epoch": 8.07, + "learning_rate": 2.9849365750528542e-05, + "loss": 0.105, + "step": 7632 + }, + { + "epoch": 8.07, + "learning_rate": 2.9844080338266385e-05, + "loss": 0.0822, + "step": 7634 + }, + { + "epoch": 8.07, + "learning_rate": 2.983879492600423e-05, + "loss": 0.1121, + "step": 7636 + }, + { + "epoch": 8.07, + "learning_rate": 2.9833509513742074e-05, + "loss": 0.0555, + "step": 7638 + }, + { + "epoch": 8.08, + "learning_rate": 2.9828224101479916e-05, + "loss": 0.1721, + "step": 7640 + }, + { + "epoch": 8.08, + "learning_rate": 2.9822938689217762e-05, + "loss": 0.0414, + "step": 7642 + }, + { + "epoch": 8.08, + "learning_rate": 2.9817653276955605e-05, + "loss": 0.1267, + "step": 7644 + }, + { + "epoch": 8.08, + "learning_rate": 2.9812367864693448e-05, + "loss": 0.116, + "step": 7646 + }, + { + "epoch": 8.08, + "learning_rate": 2.980708245243129e-05, + "loss": 0.1334, + "step": 7648 + }, + { + "epoch": 8.09, + "learning_rate": 2.9801797040169133e-05, + "loss": 0.062, + "step": 7650 + }, + { + "epoch": 8.09, + "learning_rate": 2.9796511627906976e-05, + "loss": 0.2137, + "step": 7652 + }, + { + "epoch": 8.09, + "learning_rate": 2.9791226215644825e-05, + "loss": 0.2643, + "step": 7654 + }, + { + "epoch": 8.09, + "learning_rate": 2.9785940803382668e-05, + "loss": 0.159, + "step": 7656 + }, + { + "epoch": 8.1, + "learning_rate": 2.978065539112051e-05, + "loss": 0.0571, + "step": 7658 + }, + { + "epoch": 8.1, + "learning_rate": 2.9775369978858353e-05, + "loss": 0.1387, + "step": 7660 + }, + { + "epoch": 8.1, + "learning_rate": 2.9770084566596196e-05, + "loss": 0.0776, + "step": 7662 + }, + { + "epoch": 8.1, + "learning_rate": 2.976479915433404e-05, + "loss": 0.0535, + "step": 7664 + }, + { + "epoch": 8.1, + "learning_rate": 2.975951374207188e-05, + "loss": 0.06, + "step": 7666 + }, + { + "epoch": 8.11, + "learning_rate": 2.9754228329809724e-05, + "loss": 0.1233, + "step": 7668 + }, + { + "epoch": 8.11, + "learning_rate": 2.9748942917547567e-05, + "loss": 0.1132, + "step": 7670 + }, + { + "epoch": 8.11, + "learning_rate": 2.9743657505285416e-05, + "loss": 0.1112, + "step": 7672 + }, + { + "epoch": 8.11, + "learning_rate": 2.973837209302326e-05, + "loss": 0.2012, + "step": 7674 + }, + { + "epoch": 8.11, + "learning_rate": 2.97330866807611e-05, + "loss": 0.0808, + "step": 7676 + }, + { + "epoch": 8.12, + "learning_rate": 2.9727801268498944e-05, + "loss": 0.063, + "step": 7678 + }, + { + "epoch": 8.12, + "learning_rate": 2.9722515856236787e-05, + "loss": 0.0613, + "step": 7680 + }, + { + "epoch": 8.12, + "learning_rate": 2.971723044397463e-05, + "loss": 0.1812, + "step": 7682 + }, + { + "epoch": 8.12, + "learning_rate": 2.9711945031712472e-05, + "loss": 0.0844, + "step": 7684 + }, + { + "epoch": 8.12, + "learning_rate": 2.9706659619450318e-05, + "loss": 0.2003, + "step": 7686 + }, + { + "epoch": 8.13, + "learning_rate": 2.970137420718816e-05, + "loss": 0.1232, + "step": 7688 + }, + { + "epoch": 8.13, + "learning_rate": 2.9696088794926007e-05, + "loss": 0.1256, + "step": 7690 + }, + { + "epoch": 8.13, + "learning_rate": 2.969080338266385e-05, + "loss": 0.0865, + "step": 7692 + }, + { + "epoch": 8.13, + "learning_rate": 2.9685517970401696e-05, + "loss": 0.102, + "step": 7694 + }, + { + "epoch": 8.14, + "learning_rate": 2.9680232558139538e-05, + "loss": 0.1025, + "step": 7696 + }, + { + "epoch": 8.14, + "learning_rate": 2.967494714587738e-05, + "loss": 0.0941, + "step": 7698 + }, + { + "epoch": 8.14, + "learning_rate": 2.9669661733615224e-05, + "loss": 0.0641, + "step": 7700 + }, + { + "epoch": 8.14, + "learning_rate": 2.9664376321353066e-05, + "loss": 0.0927, + "step": 7702 + }, + { + "epoch": 8.14, + "learning_rate": 2.965909090909091e-05, + "loss": 0.1103, + "step": 7704 + }, + { + "epoch": 8.15, + "learning_rate": 2.965380549682875e-05, + "loss": 0.2576, + "step": 7706 + }, + { + "epoch": 8.15, + "learning_rate": 2.96485200845666e-05, + "loss": 0.0838, + "step": 7708 + }, + { + "epoch": 8.15, + "learning_rate": 2.9643234672304444e-05, + "loss": 0.1668, + "step": 7710 + }, + { + "epoch": 8.15, + "learning_rate": 2.9637949260042286e-05, + "loss": 0.1558, + "step": 7712 + }, + { + "epoch": 8.15, + "learning_rate": 2.963266384778013e-05, + "loss": 0.0759, + "step": 7714 + }, + { + "epoch": 8.16, + "learning_rate": 2.9627378435517972e-05, + "loss": 0.111, + "step": 7716 + }, + { + "epoch": 8.16, + "learning_rate": 2.9622093023255814e-05, + "loss": 0.1078, + "step": 7718 + }, + { + "epoch": 8.16, + "learning_rate": 2.9616807610993657e-05, + "loss": 0.1482, + "step": 7720 + }, + { + "epoch": 8.16, + "learning_rate": 2.96115221987315e-05, + "loss": 0.1622, + "step": 7722 + }, + { + "epoch": 8.16, + "learning_rate": 2.9606236786469342e-05, + "loss": 0.1325, + "step": 7724 + }, + { + "epoch": 8.17, + "learning_rate": 2.9600951374207192e-05, + "loss": 0.1058, + "step": 7726 + }, + { + "epoch": 8.17, + "learning_rate": 2.9595665961945035e-05, + "loss": 0.0843, + "step": 7728 + }, + { + "epoch": 8.17, + "learning_rate": 2.9590380549682877e-05, + "loss": 0.1694, + "step": 7730 + }, + { + "epoch": 8.17, + "learning_rate": 2.958509513742072e-05, + "loss": 0.0398, + "step": 7732 + }, + { + "epoch": 8.18, + "learning_rate": 2.9579809725158563e-05, + "loss": 0.1988, + "step": 7734 + }, + { + "epoch": 8.18, + "learning_rate": 2.9574524312896405e-05, + "loss": 0.1308, + "step": 7736 + }, + { + "epoch": 8.18, + "learning_rate": 2.956923890063425e-05, + "loss": 0.053, + "step": 7738 + }, + { + "epoch": 8.18, + "learning_rate": 2.9563953488372094e-05, + "loss": 0.1263, + "step": 7740 + }, + { + "epoch": 8.18, + "learning_rate": 2.9558668076109937e-05, + "loss": 0.1169, + "step": 7742 + }, + { + "epoch": 8.19, + "learning_rate": 2.9553382663847783e-05, + "loss": 0.02, + "step": 7744 + }, + { + "epoch": 8.19, + "learning_rate": 2.9548097251585625e-05, + "loss": 0.083, + "step": 7746 + }, + { + "epoch": 8.19, + "learning_rate": 2.954281183932347e-05, + "loss": 0.0907, + "step": 7748 + }, + { + "epoch": 8.19, + "learning_rate": 2.9537526427061314e-05, + "loss": 0.0935, + "step": 7750 + }, + { + "epoch": 8.19, + "learning_rate": 2.9532241014799157e-05, + "loss": 0.0604, + "step": 7752 + }, + { + "epoch": 8.2, + "learning_rate": 2.9526955602537e-05, + "loss": 0.092, + "step": 7754 + }, + { + "epoch": 8.2, + "learning_rate": 2.9521670190274842e-05, + "loss": 0.1189, + "step": 7756 + }, + { + "epoch": 8.2, + "learning_rate": 2.9516384778012685e-05, + "loss": 0.1149, + "step": 7758 + }, + { + "epoch": 8.2, + "learning_rate": 2.9511099365750527e-05, + "loss": 0.111, + "step": 7760 + }, + { + "epoch": 8.21, + "learning_rate": 2.9505813953488377e-05, + "loss": 0.146, + "step": 7762 + }, + { + "epoch": 8.21, + "learning_rate": 2.950052854122622e-05, + "loss": 0.0594, + "step": 7764 + }, + { + "epoch": 8.21, + "learning_rate": 2.9495243128964062e-05, + "loss": 0.1223, + "step": 7766 + }, + { + "epoch": 8.21, + "learning_rate": 2.9489957716701905e-05, + "loss": 0.1484, + "step": 7768 + }, + { + "epoch": 8.21, + "learning_rate": 2.9484672304439748e-05, + "loss": 0.139, + "step": 7770 + }, + { + "epoch": 8.22, + "learning_rate": 2.947938689217759e-05, + "loss": 0.2436, + "step": 7772 + }, + { + "epoch": 8.22, + "learning_rate": 2.9474101479915433e-05, + "loss": 0.1065, + "step": 7774 + }, + { + "epoch": 8.22, + "learning_rate": 2.9468816067653276e-05, + "loss": 0.1723, + "step": 7776 + }, + { + "epoch": 8.22, + "learning_rate": 2.9463530655391118e-05, + "loss": 0.0795, + "step": 7778 + }, + { + "epoch": 8.22, + "learning_rate": 2.9458245243128968e-05, + "loss": 0.0833, + "step": 7780 + }, + { + "epoch": 8.23, + "learning_rate": 2.945295983086681e-05, + "loss": 0.0397, + "step": 7782 + }, + { + "epoch": 8.23, + "learning_rate": 2.9447674418604653e-05, + "loss": 0.055, + "step": 7784 + }, + { + "epoch": 8.23, + "learning_rate": 2.9442389006342496e-05, + "loss": 0.0573, + "step": 7786 + }, + { + "epoch": 8.23, + "learning_rate": 2.943710359408034e-05, + "loss": 0.0732, + "step": 7788 + }, + { + "epoch": 8.23, + "learning_rate": 2.943181818181818e-05, + "loss": 0.1044, + "step": 7790 + }, + { + "epoch": 8.24, + "learning_rate": 2.9426532769556027e-05, + "loss": 0.0832, + "step": 7792 + }, + { + "epoch": 8.24, + "learning_rate": 2.942124735729387e-05, + "loss": 0.0394, + "step": 7794 + }, + { + "epoch": 8.24, + "learning_rate": 2.9415961945031712e-05, + "loss": 0.0308, + "step": 7796 + }, + { + "epoch": 8.24, + "learning_rate": 2.941067653276956e-05, + "loss": 0.1011, + "step": 7798 + }, + { + "epoch": 8.25, + "learning_rate": 2.94053911205074e-05, + "loss": 0.1278, + "step": 7800 + }, + { + "epoch": 8.25, + "learning_rate": 2.9400105708245247e-05, + "loss": 0.1258, + "step": 7802 + }, + { + "epoch": 8.25, + "learning_rate": 2.939482029598309e-05, + "loss": 0.1203, + "step": 7804 + }, + { + "epoch": 8.25, + "learning_rate": 2.9389534883720933e-05, + "loss": 0.0701, + "step": 7806 + }, + { + "epoch": 8.25, + "learning_rate": 2.9384249471458775e-05, + "loss": 0.1125, + "step": 7808 + }, + { + "epoch": 8.26, + "learning_rate": 2.9378964059196618e-05, + "loss": 0.158, + "step": 7810 + }, + { + "epoch": 8.26, + "learning_rate": 2.937367864693446e-05, + "loss": 0.1057, + "step": 7812 + }, + { + "epoch": 8.26, + "learning_rate": 2.9368393234672303e-05, + "loss": 0.0738, + "step": 7814 + }, + { + "epoch": 8.26, + "learning_rate": 2.9363107822410153e-05, + "loss": 0.1133, + "step": 7816 + }, + { + "epoch": 8.26, + "learning_rate": 2.9357822410147995e-05, + "loss": 0.108, + "step": 7818 + }, + { + "epoch": 8.27, + "learning_rate": 2.9352536997885838e-05, + "loss": 0.0945, + "step": 7820 + }, + { + "epoch": 8.27, + "learning_rate": 2.934725158562368e-05, + "loss": 0.0498, + "step": 7822 + }, + { + "epoch": 8.27, + "learning_rate": 2.9341966173361523e-05, + "loss": 0.0087, + "step": 7824 + }, + { + "epoch": 8.27, + "learning_rate": 2.9336680761099366e-05, + "loss": 0.027, + "step": 7826 + }, + { + "epoch": 8.27, + "learning_rate": 2.933139534883721e-05, + "loss": 0.0578, + "step": 7828 + }, + { + "epoch": 8.28, + "learning_rate": 2.932610993657505e-05, + "loss": 0.118, + "step": 7830 + }, + { + "epoch": 8.28, + "learning_rate": 2.9320824524312894e-05, + "loss": 0.1397, + "step": 7832 + }, + { + "epoch": 8.28, + "learning_rate": 2.9315539112050744e-05, + "loss": 0.1436, + "step": 7834 + }, + { + "epoch": 8.28, + "learning_rate": 2.9310253699788586e-05, + "loss": 0.0833, + "step": 7836 + }, + { + "epoch": 8.29, + "learning_rate": 2.930496828752643e-05, + "loss": 0.2005, + "step": 7838 + }, + { + "epoch": 8.29, + "learning_rate": 2.929968287526427e-05, + "loss": 0.0683, + "step": 7840 + }, + { + "epoch": 8.29, + "learning_rate": 2.9294397463002114e-05, + "loss": 0.1512, + "step": 7842 + }, + { + "epoch": 8.29, + "learning_rate": 2.9289112050739957e-05, + "loss": 0.1971, + "step": 7844 + }, + { + "epoch": 8.29, + "learning_rate": 2.9283826638477803e-05, + "loss": 0.1115, + "step": 7846 + }, + { + "epoch": 8.3, + "learning_rate": 2.9278541226215646e-05, + "loss": 0.1307, + "step": 7848 + }, + { + "epoch": 8.3, + "learning_rate": 2.927325581395349e-05, + "loss": 0.1031, + "step": 7850 + }, + { + "epoch": 8.3, + "learning_rate": 2.9267970401691334e-05, + "loss": 0.0409, + "step": 7852 + }, + { + "epoch": 8.3, + "learning_rate": 2.926268498942918e-05, + "loss": 0.0485, + "step": 7854 + }, + { + "epoch": 8.3, + "learning_rate": 2.9257399577167023e-05, + "loss": 0.0778, + "step": 7856 + }, + { + "epoch": 8.31, + "learning_rate": 2.9252114164904866e-05, + "loss": 0.2528, + "step": 7858 + }, + { + "epoch": 8.31, + "learning_rate": 2.924682875264271e-05, + "loss": 0.0568, + "step": 7860 + }, + { + "epoch": 8.31, + "learning_rate": 2.924154334038055e-05, + "loss": 0.0842, + "step": 7862 + }, + { + "epoch": 8.31, + "learning_rate": 2.9236257928118394e-05, + "loss": 0.1228, + "step": 7864 + }, + { + "epoch": 8.32, + "learning_rate": 2.9230972515856236e-05, + "loss": 0.1569, + "step": 7866 + }, + { + "epoch": 8.32, + "learning_rate": 2.922568710359408e-05, + "loss": 0.1621, + "step": 7868 + }, + { + "epoch": 8.32, + "learning_rate": 2.922040169133193e-05, + "loss": 0.1528, + "step": 7870 + }, + { + "epoch": 8.32, + "learning_rate": 2.921511627906977e-05, + "loss": 0.0645, + "step": 7872 + }, + { + "epoch": 8.32, + "learning_rate": 2.9209830866807614e-05, + "loss": 0.1805, + "step": 7874 + }, + { + "epoch": 8.33, + "learning_rate": 2.9204545454545457e-05, + "loss": 0.159, + "step": 7876 + }, + { + "epoch": 8.33, + "learning_rate": 2.91992600422833e-05, + "loss": 0.2725, + "step": 7878 + }, + { + "epoch": 8.33, + "learning_rate": 2.9193974630021142e-05, + "loss": 0.2843, + "step": 7880 + }, + { + "epoch": 8.33, + "learning_rate": 2.9188689217758985e-05, + "loss": 0.0793, + "step": 7882 + }, + { + "epoch": 8.33, + "learning_rate": 2.9183403805496827e-05, + "loss": 0.0768, + "step": 7884 + }, + { + "epoch": 8.34, + "learning_rate": 2.917811839323467e-05, + "loss": 0.0909, + "step": 7886 + }, + { + "epoch": 8.34, + "learning_rate": 2.917283298097252e-05, + "loss": 0.274, + "step": 7888 + }, + { + "epoch": 8.34, + "learning_rate": 2.9167547568710362e-05, + "loss": 0.0688, + "step": 7890 + }, + { + "epoch": 8.34, + "learning_rate": 2.9162262156448205e-05, + "loss": 0.1227, + "step": 7892 + }, + { + "epoch": 8.34, + "learning_rate": 2.9156976744186047e-05, + "loss": 0.1165, + "step": 7894 + }, + { + "epoch": 8.35, + "learning_rate": 2.915169133192389e-05, + "loss": 0.272, + "step": 7896 + }, + { + "epoch": 8.35, + "learning_rate": 2.9146405919661736e-05, + "loss": 0.11, + "step": 7898 + }, + { + "epoch": 8.35, + "learning_rate": 2.914112050739958e-05, + "loss": 0.1305, + "step": 7900 + }, + { + "epoch": 8.35, + "learning_rate": 2.913583509513742e-05, + "loss": 0.0879, + "step": 7902 + }, + { + "epoch": 8.36, + "learning_rate": 2.9130549682875264e-05, + "loss": 0.0813, + "step": 7904 + }, + { + "epoch": 8.36, + "learning_rate": 2.912526427061311e-05, + "loss": 0.0774, + "step": 7906 + }, + { + "epoch": 8.36, + "learning_rate": 2.9119978858350956e-05, + "loss": 0.0652, + "step": 7908 + }, + { + "epoch": 8.36, + "learning_rate": 2.91146934460888e-05, + "loss": 0.1315, + "step": 7910 + }, + { + "epoch": 8.36, + "learning_rate": 2.910940803382664e-05, + "loss": 0.1017, + "step": 7912 + }, + { + "epoch": 8.37, + "learning_rate": 2.9104122621564484e-05, + "loss": 0.0486, + "step": 7914 + }, + { + "epoch": 8.37, + "learning_rate": 2.9098837209302327e-05, + "loss": 0.0856, + "step": 7916 + }, + { + "epoch": 8.37, + "learning_rate": 2.909355179704017e-05, + "loss": 0.0431, + "step": 7918 + }, + { + "epoch": 8.37, + "learning_rate": 2.9088266384778012e-05, + "loss": 0.0312, + "step": 7920 + }, + { + "epoch": 8.37, + "learning_rate": 2.9082980972515855e-05, + "loss": 0.1018, + "step": 7922 + }, + { + "epoch": 8.38, + "learning_rate": 2.9077695560253704e-05, + "loss": 0.0721, + "step": 7924 + }, + { + "epoch": 8.38, + "learning_rate": 2.9072410147991547e-05, + "loss": 0.0959, + "step": 7926 + }, + { + "epoch": 8.38, + "learning_rate": 2.906712473572939e-05, + "loss": 0.0566, + "step": 7928 + }, + { + "epoch": 8.38, + "learning_rate": 2.9061839323467232e-05, + "loss": 0.0306, + "step": 7930 + }, + { + "epoch": 8.38, + "learning_rate": 2.9056553911205075e-05, + "loss": 0.2017, + "step": 7932 + }, + { + "epoch": 8.39, + "learning_rate": 2.9051268498942918e-05, + "loss": 0.0892, + "step": 7934 + }, + { + "epoch": 8.39, + "learning_rate": 2.904598308668076e-05, + "loss": 0.0562, + "step": 7936 + }, + { + "epoch": 8.39, + "learning_rate": 2.9040697674418603e-05, + "loss": 0.0532, + "step": 7938 + }, + { + "epoch": 8.39, + "learning_rate": 2.9035412262156446e-05, + "loss": 0.189, + "step": 7940 + }, + { + "epoch": 8.4, + "learning_rate": 2.9030126849894295e-05, + "loss": 0.1269, + "step": 7942 + }, + { + "epoch": 8.4, + "learning_rate": 2.9024841437632138e-05, + "loss": 0.0781, + "step": 7944 + }, + { + "epoch": 8.4, + "learning_rate": 2.901955602536998e-05, + "loss": 0.0659, + "step": 7946 + }, + { + "epoch": 8.4, + "learning_rate": 2.9014270613107823e-05, + "loss": 0.0893, + "step": 7948 + }, + { + "epoch": 8.4, + "learning_rate": 2.9008985200845666e-05, + "loss": 0.1148, + "step": 7950 + }, + { + "epoch": 8.41, + "learning_rate": 2.9003699788583512e-05, + "loss": 0.0789, + "step": 7952 + }, + { + "epoch": 8.41, + "learning_rate": 2.8998414376321355e-05, + "loss": 0.1647, + "step": 7954 + }, + { + "epoch": 8.41, + "learning_rate": 2.8993128964059197e-05, + "loss": 0.1093, + "step": 7956 + }, + { + "epoch": 8.41, + "learning_rate": 2.898784355179704e-05, + "loss": 0.272, + "step": 7958 + }, + { + "epoch": 8.41, + "learning_rate": 2.8982558139534886e-05, + "loss": 0.2171, + "step": 7960 + }, + { + "epoch": 8.42, + "learning_rate": 2.8977272727272732e-05, + "loss": 0.0894, + "step": 7962 + }, + { + "epoch": 8.42, + "learning_rate": 2.8971987315010575e-05, + "loss": 0.2088, + "step": 7964 + }, + { + "epoch": 8.42, + "learning_rate": 2.8966701902748417e-05, + "loss": 0.0528, + "step": 7966 + }, + { + "epoch": 8.42, + "learning_rate": 2.896141649048626e-05, + "loss": 0.1048, + "step": 7968 + }, + { + "epoch": 8.42, + "learning_rate": 2.8956131078224103e-05, + "loss": 0.101, + "step": 7970 + }, + { + "epoch": 8.43, + "learning_rate": 2.8950845665961945e-05, + "loss": 0.2322, + "step": 7972 + }, + { + "epoch": 8.43, + "learning_rate": 2.8945560253699788e-05, + "loss": 0.2275, + "step": 7974 + }, + { + "epoch": 8.43, + "learning_rate": 2.894027484143763e-05, + "loss": 0.1182, + "step": 7976 + }, + { + "epoch": 8.43, + "learning_rate": 2.893498942917548e-05, + "loss": 0.0821, + "step": 7978 + }, + { + "epoch": 8.44, + "learning_rate": 2.8929704016913323e-05, + "loss": 0.0642, + "step": 7980 + }, + { + "epoch": 8.44, + "learning_rate": 2.8924418604651166e-05, + "loss": 0.0747, + "step": 7982 + }, + { + "epoch": 8.44, + "learning_rate": 2.8919133192389008e-05, + "loss": 0.1306, + "step": 7984 + }, + { + "epoch": 8.44, + "learning_rate": 2.891384778012685e-05, + "loss": 0.1336, + "step": 7986 + }, + { + "epoch": 8.44, + "learning_rate": 2.8908562367864694e-05, + "loss": 0.074, + "step": 7988 + }, + { + "epoch": 8.45, + "learning_rate": 2.8903276955602536e-05, + "loss": 0.2258, + "step": 7990 + }, + { + "epoch": 8.45, + "learning_rate": 2.889799154334038e-05, + "loss": 0.0969, + "step": 7992 + }, + { + "epoch": 8.45, + "learning_rate": 2.889270613107822e-05, + "loss": 0.0191, + "step": 7994 + }, + { + "epoch": 8.45, + "learning_rate": 2.888742071881607e-05, + "loss": 0.0725, + "step": 7996 + }, + { + "epoch": 8.45, + "learning_rate": 2.8882135306553914e-05, + "loss": 0.0552, + "step": 7998 + }, + { + "epoch": 8.46, + "learning_rate": 2.8876849894291756e-05, + "loss": 0.0584, + "step": 8000 + }, + { + "epoch": 8.46, + "eval_cer": 0.023083499572527786, + "eval_loss": 0.834189772605896, + "eval_runtime": 129.0007, + "eval_samples_per_second": 6.519, + "eval_steps_per_second": 0.822, + "step": 8000 + }, + { + "epoch": 8.46, + "learning_rate": 2.88715644820296e-05, + "loss": 0.0839, + "step": 8002 + }, + { + "epoch": 8.46, + "learning_rate": 2.8866279069767442e-05, + "loss": 0.101, + "step": 8004 + }, + { + "epoch": 8.46, + "learning_rate": 2.8860993657505288e-05, + "loss": 0.1871, + "step": 8006 + }, + { + "epoch": 8.47, + "learning_rate": 2.885570824524313e-05, + "loss": 0.0541, + "step": 8008 + }, + { + "epoch": 8.47, + "learning_rate": 2.8850422832980973e-05, + "loss": 0.071, + "step": 8010 + }, + { + "epoch": 8.47, + "learning_rate": 2.8845137420718816e-05, + "loss": 0.0442, + "step": 8012 + }, + { + "epoch": 8.47, + "learning_rate": 2.8839852008456665e-05, + "loss": 0.1274, + "step": 8014 + }, + { + "epoch": 8.47, + "learning_rate": 2.8834566596194508e-05, + "loss": 0.1477, + "step": 8016 + }, + { + "epoch": 8.48, + "learning_rate": 2.882928118393235e-05, + "loss": 0.2032, + "step": 8018 + }, + { + "epoch": 8.48, + "learning_rate": 2.8823995771670193e-05, + "loss": 0.0629, + "step": 8020 + }, + { + "epoch": 8.48, + "learning_rate": 2.8818710359408036e-05, + "loss": 0.1614, + "step": 8022 + }, + { + "epoch": 8.48, + "learning_rate": 2.881342494714588e-05, + "loss": 0.2354, + "step": 8024 + }, + { + "epoch": 8.48, + "learning_rate": 2.880813953488372e-05, + "loss": 0.0932, + "step": 8026 + }, + { + "epoch": 8.49, + "learning_rate": 2.8802854122621564e-05, + "loss": 0.1353, + "step": 8028 + }, + { + "epoch": 8.49, + "learning_rate": 2.8797568710359407e-05, + "loss": 0.1506, + "step": 8030 + }, + { + "epoch": 8.49, + "learning_rate": 2.8792283298097256e-05, + "loss": 0.0269, + "step": 8032 + }, + { + "epoch": 8.49, + "learning_rate": 2.87869978858351e-05, + "loss": 0.0624, + "step": 8034 + }, + { + "epoch": 8.49, + "learning_rate": 2.878171247357294e-05, + "loss": 0.1213, + "step": 8036 + }, + { + "epoch": 8.5, + "learning_rate": 2.8776427061310784e-05, + "loss": 0.1265, + "step": 8038 + }, + { + "epoch": 8.5, + "learning_rate": 2.8771141649048627e-05, + "loss": 0.1232, + "step": 8040 + }, + { + "epoch": 8.5, + "learning_rate": 2.876585623678647e-05, + "loss": 0.0704, + "step": 8042 + }, + { + "epoch": 8.5, + "learning_rate": 2.8760570824524312e-05, + "loss": 0.0532, + "step": 8044 + }, + { + "epoch": 8.51, + "learning_rate": 2.8755285412262155e-05, + "loss": 0.06, + "step": 8046 + }, + { + "epoch": 8.51, + "learning_rate": 2.8749999999999997e-05, + "loss": 0.088, + "step": 8048 + }, + { + "epoch": 8.51, + "learning_rate": 2.8744714587737847e-05, + "loss": 0.063, + "step": 8050 + }, + { + "epoch": 8.51, + "learning_rate": 2.873942917547569e-05, + "loss": 0.0784, + "step": 8052 + }, + { + "epoch": 8.51, + "learning_rate": 2.8734143763213532e-05, + "loss": 0.0938, + "step": 8054 + }, + { + "epoch": 8.52, + "learning_rate": 2.8728858350951375e-05, + "loss": 0.1824, + "step": 8056 + }, + { + "epoch": 8.52, + "learning_rate": 2.872357293868922e-05, + "loss": 0.2113, + "step": 8058 + }, + { + "epoch": 8.52, + "learning_rate": 2.8718287526427064e-05, + "loss": 0.1183, + "step": 8060 + }, + { + "epoch": 8.52, + "learning_rate": 2.8713002114164906e-05, + "loss": 0.0816, + "step": 8062 + }, + { + "epoch": 8.52, + "learning_rate": 2.870771670190275e-05, + "loss": 0.1049, + "step": 8064 + }, + { + "epoch": 8.53, + "learning_rate": 2.870243128964059e-05, + "loss": 0.1227, + "step": 8066 + }, + { + "epoch": 8.53, + "learning_rate": 2.869714587737844e-05, + "loss": 0.1581, + "step": 8068 + }, + { + "epoch": 8.53, + "learning_rate": 2.8691860465116284e-05, + "loss": 0.0702, + "step": 8070 + }, + { + "epoch": 8.53, + "learning_rate": 2.8686575052854126e-05, + "loss": 0.2441, + "step": 8072 + }, + { + "epoch": 8.53, + "learning_rate": 2.868128964059197e-05, + "loss": 0.1847, + "step": 8074 + }, + { + "epoch": 8.54, + "learning_rate": 2.8676004228329812e-05, + "loss": 0.1233, + "step": 8076 + }, + { + "epoch": 8.54, + "learning_rate": 2.8670718816067654e-05, + "loss": 0.3098, + "step": 8078 + }, + { + "epoch": 8.54, + "learning_rate": 2.8665433403805497e-05, + "loss": 0.2001, + "step": 8080 + }, + { + "epoch": 8.54, + "learning_rate": 2.866014799154334e-05, + "loss": 0.2392, + "step": 8082 + }, + { + "epoch": 8.55, + "learning_rate": 2.8654862579281183e-05, + "loss": 0.1514, + "step": 8084 + }, + { + "epoch": 8.55, + "learning_rate": 2.8649577167019032e-05, + "loss": 0.1599, + "step": 8086 + }, + { + "epoch": 8.55, + "learning_rate": 2.8644291754756875e-05, + "loss": 0.1155, + "step": 8088 + }, + { + "epoch": 8.55, + "learning_rate": 2.8639006342494717e-05, + "loss": 0.041, + "step": 8090 + }, + { + "epoch": 8.55, + "learning_rate": 2.863372093023256e-05, + "loss": 0.1052, + "step": 8092 + }, + { + "epoch": 8.56, + "learning_rate": 2.8628435517970403e-05, + "loss": 0.1127, + "step": 8094 + }, + { + "epoch": 8.56, + "learning_rate": 2.8623150105708245e-05, + "loss": 0.1245, + "step": 8096 + }, + { + "epoch": 8.56, + "learning_rate": 2.8617864693446088e-05, + "loss": 0.084, + "step": 8098 + }, + { + "epoch": 8.56, + "learning_rate": 2.861257928118393e-05, + "loss": 0.1355, + "step": 8100 + }, + { + "epoch": 8.56, + "learning_rate": 2.8607293868921777e-05, + "loss": 0.0758, + "step": 8102 + }, + { + "epoch": 8.57, + "learning_rate": 2.8602008456659623e-05, + "loss": 0.1272, + "step": 8104 + }, + { + "epoch": 8.57, + "learning_rate": 2.8596723044397465e-05, + "loss": 0.085, + "step": 8106 + }, + { + "epoch": 8.57, + "learning_rate": 2.8591437632135308e-05, + "loss": 0.0379, + "step": 8108 + }, + { + "epoch": 8.57, + "learning_rate": 2.858615221987315e-05, + "loss": 0.0785, + "step": 8110 + }, + { + "epoch": 8.58, + "learning_rate": 2.8580866807610997e-05, + "loss": 0.072, + "step": 8112 + }, + { + "epoch": 8.58, + "learning_rate": 2.857558139534884e-05, + "loss": 0.121, + "step": 8114 + }, + { + "epoch": 8.58, + "learning_rate": 2.8570295983086682e-05, + "loss": 0.0658, + "step": 8116 + }, + { + "epoch": 8.58, + "learning_rate": 2.8565010570824525e-05, + "loss": 0.0765, + "step": 8118 + }, + { + "epoch": 8.58, + "learning_rate": 2.8559725158562368e-05, + "loss": 0.0394, + "step": 8120 + }, + { + "epoch": 8.59, + "learning_rate": 2.855443974630021e-05, + "loss": 0.2187, + "step": 8122 + }, + { + "epoch": 8.59, + "learning_rate": 2.854915433403806e-05, + "loss": 0.0988, + "step": 8124 + }, + { + "epoch": 8.59, + "learning_rate": 2.8543868921775902e-05, + "loss": 0.07, + "step": 8126 + }, + { + "epoch": 8.59, + "learning_rate": 2.8538583509513745e-05, + "loss": 0.0686, + "step": 8128 + }, + { + "epoch": 8.59, + "learning_rate": 2.8533298097251588e-05, + "loss": 0.261, + "step": 8130 + }, + { + "epoch": 8.6, + "learning_rate": 2.852801268498943e-05, + "loss": 0.1215, + "step": 8132 + }, + { + "epoch": 8.6, + "learning_rate": 2.8522727272727273e-05, + "loss": 0.155, + "step": 8134 + }, + { + "epoch": 8.6, + "learning_rate": 2.8517441860465116e-05, + "loss": 0.1924, + "step": 8136 + }, + { + "epoch": 8.6, + "learning_rate": 2.851215644820296e-05, + "loss": 0.0767, + "step": 8138 + }, + { + "epoch": 8.6, + "learning_rate": 2.85068710359408e-05, + "loss": 0.1421, + "step": 8140 + }, + { + "epoch": 8.61, + "learning_rate": 2.850158562367865e-05, + "loss": 0.0753, + "step": 8142 + }, + { + "epoch": 8.61, + "learning_rate": 2.8496300211416493e-05, + "loss": 0.1448, + "step": 8144 + }, + { + "epoch": 8.61, + "learning_rate": 2.8491014799154336e-05, + "loss": 0.1149, + "step": 8146 + }, + { + "epoch": 8.61, + "learning_rate": 2.848572938689218e-05, + "loss": 0.0598, + "step": 8148 + }, + { + "epoch": 8.62, + "learning_rate": 2.848044397463002e-05, + "loss": 0.1086, + "step": 8150 + }, + { + "epoch": 8.62, + "learning_rate": 2.8475158562367864e-05, + "loss": 0.0888, + "step": 8152 + }, + { + "epoch": 8.62, + "learning_rate": 2.8469873150105706e-05, + "loss": 0.0574, + "step": 8154 + }, + { + "epoch": 8.62, + "learning_rate": 2.8464587737843553e-05, + "loss": 0.0729, + "step": 8156 + }, + { + "epoch": 8.62, + "learning_rate": 2.8459302325581395e-05, + "loss": 0.1762, + "step": 8158 + }, + { + "epoch": 8.63, + "learning_rate": 2.845401691331924e-05, + "loss": 0.0921, + "step": 8160 + }, + { + "epoch": 8.63, + "learning_rate": 2.8448731501057084e-05, + "loss": 0.0903, + "step": 8162 + }, + { + "epoch": 8.63, + "learning_rate": 2.844344608879493e-05, + "loss": 0.1676, + "step": 8164 + }, + { + "epoch": 8.63, + "learning_rate": 2.8438160676532773e-05, + "loss": 0.1132, + "step": 8166 + }, + { + "epoch": 8.63, + "learning_rate": 2.8432875264270615e-05, + "loss": 0.1617, + "step": 8168 + }, + { + "epoch": 8.64, + "learning_rate": 2.8427589852008458e-05, + "loss": 0.0692, + "step": 8170 + }, + { + "epoch": 8.64, + "learning_rate": 2.84223044397463e-05, + "loss": 0.072, + "step": 8172 + }, + { + "epoch": 8.64, + "learning_rate": 2.8417019027484143e-05, + "loss": 0.0783, + "step": 8174 + }, + { + "epoch": 8.64, + "learning_rate": 2.8411733615221986e-05, + "loss": 0.0356, + "step": 8176 + }, + { + "epoch": 8.64, + "learning_rate": 2.8406448202959835e-05, + "loss": 0.0552, + "step": 8178 + }, + { + "epoch": 8.65, + "learning_rate": 2.8401162790697678e-05, + "loss": 0.1139, + "step": 8180 + }, + { + "epoch": 8.65, + "learning_rate": 2.839587737843552e-05, + "loss": 0.1773, + "step": 8182 + }, + { + "epoch": 8.65, + "learning_rate": 2.8390591966173363e-05, + "loss": 0.2647, + "step": 8184 + }, + { + "epoch": 8.65, + "learning_rate": 2.8385306553911206e-05, + "loss": 0.1721, + "step": 8186 + }, + { + "epoch": 8.66, + "learning_rate": 2.838002114164905e-05, + "loss": 0.0464, + "step": 8188 + }, + { + "epoch": 8.66, + "learning_rate": 2.837473572938689e-05, + "loss": 0.0901, + "step": 8190 + }, + { + "epoch": 8.66, + "learning_rate": 2.8369450317124734e-05, + "loss": 0.0405, + "step": 8192 + }, + { + "epoch": 8.66, + "learning_rate": 2.8364164904862577e-05, + "loss": 0.0503, + "step": 8194 + }, + { + "epoch": 8.66, + "learning_rate": 2.8358879492600426e-05, + "loss": 0.1983, + "step": 8196 + }, + { + "epoch": 8.67, + "learning_rate": 2.835359408033827e-05, + "loss": 0.0998, + "step": 8198 + }, + { + "epoch": 8.67, + "learning_rate": 2.834830866807611e-05, + "loss": 0.0986, + "step": 8200 + }, + { + "epoch": 8.67, + "learning_rate": 2.8343023255813954e-05, + "loss": 0.1944, + "step": 8202 + }, + { + "epoch": 8.67, + "learning_rate": 2.8337737843551797e-05, + "loss": 0.1027, + "step": 8204 + }, + { + "epoch": 8.67, + "learning_rate": 2.833245243128964e-05, + "loss": 0.1601, + "step": 8206 + }, + { + "epoch": 8.68, + "learning_rate": 2.8327167019027486e-05, + "loss": 0.0563, + "step": 8208 + }, + { + "epoch": 8.68, + "learning_rate": 2.832188160676533e-05, + "loss": 0.0967, + "step": 8210 + }, + { + "epoch": 8.68, + "learning_rate": 2.831659619450317e-05, + "loss": 0.0645, + "step": 8212 + }, + { + "epoch": 8.68, + "learning_rate": 2.8311310782241017e-05, + "loss": 0.1535, + "step": 8214 + }, + { + "epoch": 8.68, + "learning_rate": 2.830602536997886e-05, + "loss": 0.1287, + "step": 8216 + }, + { + "epoch": 8.69, + "learning_rate": 2.8300739957716706e-05, + "loss": 0.0618, + "step": 8218 + }, + { + "epoch": 8.69, + "learning_rate": 2.829545454545455e-05, + "loss": 0.1816, + "step": 8220 + }, + { + "epoch": 8.69, + "learning_rate": 2.829016913319239e-05, + "loss": 0.1294, + "step": 8222 + }, + { + "epoch": 8.69, + "learning_rate": 2.8284883720930234e-05, + "loss": 0.1351, + "step": 8224 + }, + { + "epoch": 8.7, + "learning_rate": 2.8279598308668077e-05, + "loss": 0.1923, + "step": 8226 + }, + { + "epoch": 8.7, + "learning_rate": 2.827431289640592e-05, + "loss": 0.0535, + "step": 8228 + }, + { + "epoch": 8.7, + "learning_rate": 2.8269027484143762e-05, + "loss": 0.1522, + "step": 8230 + }, + { + "epoch": 8.7, + "learning_rate": 2.826374207188161e-05, + "loss": 0.1228, + "step": 8232 + }, + { + "epoch": 8.7, + "learning_rate": 2.8258456659619454e-05, + "loss": 0.2214, + "step": 8234 + }, + { + "epoch": 8.71, + "learning_rate": 2.8253171247357297e-05, + "loss": 0.055, + "step": 8236 + }, + { + "epoch": 8.71, + "learning_rate": 2.824788583509514e-05, + "loss": 0.0697, + "step": 8238 + }, + { + "epoch": 8.71, + "learning_rate": 2.8242600422832982e-05, + "loss": 0.0846, + "step": 8240 + }, + { + "epoch": 8.71, + "learning_rate": 2.8237315010570825e-05, + "loss": 0.1981, + "step": 8242 + }, + { + "epoch": 8.71, + "learning_rate": 2.8232029598308667e-05, + "loss": 0.2626, + "step": 8244 + }, + { + "epoch": 8.72, + "learning_rate": 2.822674418604651e-05, + "loss": 0.1027, + "step": 8246 + }, + { + "epoch": 8.72, + "learning_rate": 2.8221458773784353e-05, + "loss": 0.072, + "step": 8248 + }, + { + "epoch": 8.72, + "learning_rate": 2.8216173361522202e-05, + "loss": 0.0512, + "step": 8250 + }, + { + "epoch": 8.72, + "learning_rate": 2.8210887949260045e-05, + "loss": 0.1911, + "step": 8252 + }, + { + "epoch": 8.73, + "learning_rate": 2.8205602536997887e-05, + "loss": 0.1951, + "step": 8254 + }, + { + "epoch": 8.73, + "learning_rate": 2.820031712473573e-05, + "loss": 0.1127, + "step": 8256 + }, + { + "epoch": 8.73, + "learning_rate": 2.8195031712473573e-05, + "loss": 0.0594, + "step": 8258 + }, + { + "epoch": 8.73, + "learning_rate": 2.8189746300211415e-05, + "loss": 0.0958, + "step": 8260 + }, + { + "epoch": 8.73, + "learning_rate": 2.818446088794926e-05, + "loss": 0.2258, + "step": 8262 + }, + { + "epoch": 8.74, + "learning_rate": 2.8179175475687104e-05, + "loss": 0.0807, + "step": 8264 + }, + { + "epoch": 8.74, + "learning_rate": 2.8173890063424947e-05, + "loss": 0.1206, + "step": 8266 + }, + { + "epoch": 8.74, + "learning_rate": 2.8168604651162793e-05, + "loss": 0.0782, + "step": 8268 + }, + { + "epoch": 8.74, + "learning_rate": 2.8163319238900636e-05, + "loss": 0.067, + "step": 8270 + }, + { + "epoch": 8.74, + "learning_rate": 2.815803382663848e-05, + "loss": 0.2475, + "step": 8272 + }, + { + "epoch": 8.75, + "learning_rate": 2.8152748414376324e-05, + "loss": 0.2549, + "step": 8274 + }, + { + "epoch": 8.75, + "learning_rate": 2.8147463002114167e-05, + "loss": 0.1755, + "step": 8276 + }, + { + "epoch": 8.75, + "learning_rate": 2.814217758985201e-05, + "loss": 0.1862, + "step": 8278 + }, + { + "epoch": 8.75, + "learning_rate": 2.8136892177589852e-05, + "loss": 0.1525, + "step": 8280 + }, + { + "epoch": 8.75, + "learning_rate": 2.8131606765327695e-05, + "loss": 0.1334, + "step": 8282 + }, + { + "epoch": 8.76, + "learning_rate": 2.8126321353065538e-05, + "loss": 0.0858, + "step": 8284 + }, + { + "epoch": 8.76, + "learning_rate": 2.8121035940803387e-05, + "loss": 0.0971, + "step": 8286 + }, + { + "epoch": 8.76, + "learning_rate": 2.811575052854123e-05, + "loss": 0.077, + "step": 8288 + }, + { + "epoch": 8.76, + "learning_rate": 2.8110465116279073e-05, + "loss": 0.1299, + "step": 8290 + }, + { + "epoch": 8.77, + "learning_rate": 2.8105179704016915e-05, + "loss": 0.1872, + "step": 8292 + }, + { + "epoch": 8.77, + "learning_rate": 2.8099894291754758e-05, + "loss": 0.0986, + "step": 8294 + }, + { + "epoch": 8.77, + "learning_rate": 2.80946088794926e-05, + "loss": 0.0461, + "step": 8296 + }, + { + "epoch": 8.77, + "learning_rate": 2.8089323467230443e-05, + "loss": 0.1037, + "step": 8298 + }, + { + "epoch": 8.77, + "learning_rate": 2.8084038054968286e-05, + "loss": 0.1127, + "step": 8300 + }, + { + "epoch": 8.78, + "learning_rate": 2.807875264270613e-05, + "loss": 0.1026, + "step": 8302 + }, + { + "epoch": 8.78, + "learning_rate": 2.8073467230443978e-05, + "loss": 0.0636, + "step": 8304 + }, + { + "epoch": 8.78, + "learning_rate": 2.806818181818182e-05, + "loss": 0.1557, + "step": 8306 + }, + { + "epoch": 8.78, + "learning_rate": 2.8062896405919663e-05, + "loss": 0.1203, + "step": 8308 + }, + { + "epoch": 8.78, + "learning_rate": 2.8057610993657506e-05, + "loss": 0.0859, + "step": 8310 + }, + { + "epoch": 8.79, + "learning_rate": 2.805232558139535e-05, + "loss": 0.1833, + "step": 8312 + }, + { + "epoch": 8.79, + "learning_rate": 2.804704016913319e-05, + "loss": 0.2377, + "step": 8314 + }, + { + "epoch": 8.79, + "learning_rate": 2.8041754756871037e-05, + "loss": 0.1107, + "step": 8316 + }, + { + "epoch": 8.79, + "learning_rate": 2.803646934460888e-05, + "loss": 0.1154, + "step": 8318 + }, + { + "epoch": 8.79, + "learning_rate": 2.8031183932346723e-05, + "loss": 0.1013, + "step": 8320 + }, + { + "epoch": 8.8, + "learning_rate": 2.802589852008457e-05, + "loss": 0.0989, + "step": 8322 + }, + { + "epoch": 8.8, + "learning_rate": 2.8020613107822415e-05, + "loss": 0.0776, + "step": 8324 + }, + { + "epoch": 8.8, + "learning_rate": 2.8015327695560258e-05, + "loss": 0.0925, + "step": 8326 + }, + { + "epoch": 8.8, + "learning_rate": 2.80100422832981e-05, + "loss": 0.1062, + "step": 8328 + }, + { + "epoch": 8.81, + "learning_rate": 2.8004756871035943e-05, + "loss": 0.0799, + "step": 8330 + }, + { + "epoch": 8.81, + "learning_rate": 2.7999471458773786e-05, + "loss": 0.1206, + "step": 8332 + }, + { + "epoch": 8.81, + "learning_rate": 2.7994186046511628e-05, + "loss": 0.1726, + "step": 8334 + }, + { + "epoch": 8.81, + "learning_rate": 2.798890063424947e-05, + "loss": 0.1599, + "step": 8336 + }, + { + "epoch": 8.81, + "learning_rate": 2.7983615221987314e-05, + "loss": 0.1454, + "step": 8338 + }, + { + "epoch": 8.82, + "learning_rate": 2.7978329809725163e-05, + "loss": 0.1246, + "step": 8340 + }, + { + "epoch": 8.82, + "learning_rate": 2.7973044397463006e-05, + "loss": 0.0782, + "step": 8342 + }, + { + "epoch": 8.82, + "learning_rate": 2.796775898520085e-05, + "loss": 0.1085, + "step": 8344 + }, + { + "epoch": 8.82, + "learning_rate": 2.796247357293869e-05, + "loss": 0.0896, + "step": 8346 + }, + { + "epoch": 8.82, + "learning_rate": 2.7957188160676534e-05, + "loss": 0.0475, + "step": 8348 + }, + { + "epoch": 8.83, + "learning_rate": 2.7951902748414376e-05, + "loss": 0.1508, + "step": 8350 + }, + { + "epoch": 8.83, + "learning_rate": 2.794661733615222e-05, + "loss": 0.0623, + "step": 8352 + }, + { + "epoch": 8.83, + "learning_rate": 2.7941331923890062e-05, + "loss": 0.1372, + "step": 8354 + }, + { + "epoch": 8.83, + "learning_rate": 2.7936046511627904e-05, + "loss": 0.0929, + "step": 8356 + }, + { + "epoch": 8.84, + "learning_rate": 2.7930761099365754e-05, + "loss": 0.0702, + "step": 8358 + }, + { + "epoch": 8.84, + "learning_rate": 2.7925475687103596e-05, + "loss": 0.107, + "step": 8360 + }, + { + "epoch": 8.84, + "learning_rate": 2.792019027484144e-05, + "loss": 0.0428, + "step": 8362 + }, + { + "epoch": 8.84, + "learning_rate": 2.7914904862579282e-05, + "loss": 0.1434, + "step": 8364 + }, + { + "epoch": 8.84, + "learning_rate": 2.7909619450317125e-05, + "loss": 0.0995, + "step": 8366 + }, + { + "epoch": 8.85, + "learning_rate": 2.790433403805497e-05, + "loss": 0.1524, + "step": 8368 + }, + { + "epoch": 8.85, + "learning_rate": 2.7899048625792813e-05, + "loss": 0.0819, + "step": 8370 + }, + { + "epoch": 8.85, + "learning_rate": 2.7893763213530656e-05, + "loss": 0.0574, + "step": 8372 + }, + { + "epoch": 8.85, + "learning_rate": 2.78884778012685e-05, + "loss": 0.0799, + "step": 8374 + }, + { + "epoch": 8.85, + "learning_rate": 2.7883192389006345e-05, + "loss": 0.1706, + "step": 8376 + }, + { + "epoch": 8.86, + "learning_rate": 2.787790697674419e-05, + "loss": 0.0798, + "step": 8378 + }, + { + "epoch": 8.86, + "learning_rate": 2.7872621564482033e-05, + "loss": 0.1162, + "step": 8380 + }, + { + "epoch": 8.86, + "learning_rate": 2.7867336152219876e-05, + "loss": 0.0853, + "step": 8382 + }, + { + "epoch": 8.86, + "learning_rate": 2.786205073995772e-05, + "loss": 0.0931, + "step": 8384 + }, + { + "epoch": 8.86, + "learning_rate": 2.785676532769556e-05, + "loss": 0.0971, + "step": 8386 + }, + { + "epoch": 8.87, + "learning_rate": 2.7851479915433404e-05, + "loss": 0.043, + "step": 8388 + }, + { + "epoch": 8.87, + "learning_rate": 2.7846194503171247e-05, + "loss": 0.0413, + "step": 8390 + }, + { + "epoch": 8.87, + "learning_rate": 2.784090909090909e-05, + "loss": 0.0894, + "step": 8392 + }, + { + "epoch": 8.87, + "learning_rate": 2.783562367864694e-05, + "loss": 0.029, + "step": 8394 + }, + { + "epoch": 8.88, + "learning_rate": 2.783033826638478e-05, + "loss": 0.0832, + "step": 8396 + }, + { + "epoch": 8.88, + "learning_rate": 2.7825052854122624e-05, + "loss": 0.0876, + "step": 8398 + }, + { + "epoch": 8.88, + "learning_rate": 2.7819767441860467e-05, + "loss": 0.1376, + "step": 8400 + }, + { + "epoch": 8.88, + "learning_rate": 2.781448202959831e-05, + "loss": 0.1387, + "step": 8402 + }, + { + "epoch": 8.88, + "learning_rate": 2.7809196617336152e-05, + "loss": 0.1245, + "step": 8404 + }, + { + "epoch": 8.89, + "learning_rate": 2.7803911205073995e-05, + "loss": 0.1056, + "step": 8406 + }, + { + "epoch": 8.89, + "learning_rate": 2.7798625792811838e-05, + "loss": 0.0913, + "step": 8408 + }, + { + "epoch": 8.89, + "learning_rate": 2.779334038054968e-05, + "loss": 0.0532, + "step": 8410 + }, + { + "epoch": 8.89, + "learning_rate": 2.778805496828753e-05, + "loss": 0.1707, + "step": 8412 + }, + { + "epoch": 8.89, + "learning_rate": 2.7782769556025372e-05, + "loss": 0.1036, + "step": 8414 + }, + { + "epoch": 8.9, + "learning_rate": 2.7777484143763215e-05, + "loss": 0.1387, + "step": 8416 + }, + { + "epoch": 8.9, + "learning_rate": 2.7772198731501058e-05, + "loss": 0.091, + "step": 8418 + }, + { + "epoch": 8.9, + "learning_rate": 2.77669133192389e-05, + "loss": 0.0763, + "step": 8420 + }, + { + "epoch": 8.9, + "learning_rate": 2.7761627906976746e-05, + "loss": 0.092, + "step": 8422 + }, + { + "epoch": 8.9, + "learning_rate": 2.775634249471459e-05, + "loss": 0.0948, + "step": 8424 + }, + { + "epoch": 8.91, + "learning_rate": 2.7751057082452432e-05, + "loss": 0.0832, + "step": 8426 + }, + { + "epoch": 8.91, + "learning_rate": 2.7745771670190274e-05, + "loss": 0.0764, + "step": 8428 + }, + { + "epoch": 8.91, + "learning_rate": 2.774048625792812e-05, + "loss": 0.1405, + "step": 8430 + }, + { + "epoch": 8.91, + "learning_rate": 2.7735200845665967e-05, + "loss": 0.0823, + "step": 8432 + }, + { + "epoch": 8.92, + "learning_rate": 2.772991543340381e-05, + "loss": 0.1438, + "step": 8434 + }, + { + "epoch": 8.92, + "learning_rate": 2.7724630021141652e-05, + "loss": 0.228, + "step": 8436 + }, + { + "epoch": 8.92, + "learning_rate": 2.7719344608879495e-05, + "loss": 0.0858, + "step": 8438 + }, + { + "epoch": 8.92, + "learning_rate": 2.7714059196617337e-05, + "loss": 0.0388, + "step": 8440 + }, + { + "epoch": 8.92, + "learning_rate": 2.770877378435518e-05, + "loss": 0.0782, + "step": 8442 + }, + { + "epoch": 8.93, + "learning_rate": 2.7703488372093023e-05, + "loss": 0.1014, + "step": 8444 + }, + { + "epoch": 8.93, + "learning_rate": 2.7698202959830865e-05, + "loss": 0.1315, + "step": 8446 + }, + { + "epoch": 8.93, + "learning_rate": 2.7692917547568715e-05, + "loss": 0.0287, + "step": 8448 + }, + { + "epoch": 8.93, + "learning_rate": 2.7687632135306557e-05, + "loss": 0.0993, + "step": 8450 + }, + { + "epoch": 8.93, + "learning_rate": 2.76823467230444e-05, + "loss": 0.1108, + "step": 8452 + }, + { + "epoch": 8.94, + "learning_rate": 2.7677061310782243e-05, + "loss": 0.1189, + "step": 8454 + }, + { + "epoch": 8.94, + "learning_rate": 2.7671775898520085e-05, + "loss": 0.0465, + "step": 8456 + }, + { + "epoch": 8.94, + "learning_rate": 2.7666490486257928e-05, + "loss": 0.2443, + "step": 8458 + }, + { + "epoch": 8.94, + "learning_rate": 2.766120507399577e-05, + "loss": 0.1708, + "step": 8460 + }, + { + "epoch": 8.95, + "learning_rate": 2.7655919661733613e-05, + "loss": 0.2846, + "step": 8462 + }, + { + "epoch": 8.95, + "learning_rate": 2.7650634249471456e-05, + "loss": 0.079, + "step": 8464 + }, + { + "epoch": 8.95, + "learning_rate": 2.7645348837209305e-05, + "loss": 0.3057, + "step": 8466 + }, + { + "epoch": 8.95, + "learning_rate": 2.7640063424947148e-05, + "loss": 0.0952, + "step": 8468 + }, + { + "epoch": 8.95, + "learning_rate": 2.763477801268499e-05, + "loss": 0.0733, + "step": 8470 + }, + { + "epoch": 8.96, + "learning_rate": 2.7629492600422834e-05, + "loss": 0.1229, + "step": 8472 + }, + { + "epoch": 8.96, + "learning_rate": 2.7624207188160676e-05, + "loss": 0.1157, + "step": 8474 + }, + { + "epoch": 8.96, + "learning_rate": 2.7618921775898522e-05, + "loss": 0.1169, + "step": 8476 + }, + { + "epoch": 8.96, + "learning_rate": 2.7613636363636365e-05, + "loss": 0.0718, + "step": 8478 + }, + { + "epoch": 8.96, + "learning_rate": 2.7608350951374208e-05, + "loss": 0.2532, + "step": 8480 + }, + { + "epoch": 8.97, + "learning_rate": 2.760306553911205e-05, + "loss": 0.052, + "step": 8482 + }, + { + "epoch": 8.97, + "learning_rate": 2.75977801268499e-05, + "loss": 0.1107, + "step": 8484 + }, + { + "epoch": 8.97, + "learning_rate": 2.7592494714587742e-05, + "loss": 0.1298, + "step": 8486 + }, + { + "epoch": 8.97, + "learning_rate": 2.7587209302325585e-05, + "loss": 0.0634, + "step": 8488 + }, + { + "epoch": 8.97, + "learning_rate": 2.7581923890063428e-05, + "loss": 0.0778, + "step": 8490 + }, + { + "epoch": 8.98, + "learning_rate": 2.757663847780127e-05, + "loss": 0.0882, + "step": 8492 + }, + { + "epoch": 8.98, + "learning_rate": 2.7571353065539113e-05, + "loss": 0.0727, + "step": 8494 + }, + { + "epoch": 8.98, + "learning_rate": 2.7566067653276956e-05, + "loss": 0.0334, + "step": 8496 + }, + { + "epoch": 8.98, + "learning_rate": 2.75607822410148e-05, + "loss": 0.0673, + "step": 8498 + }, + { + "epoch": 8.99, + "learning_rate": 2.755549682875264e-05, + "loss": 0.2054, + "step": 8500 + }, + { + "epoch": 8.99, + "eval_cer": 0.03425477343972642, + "eval_loss": 0.6548437476158142, + "eval_runtime": 125.5264, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.844, + "step": 8500 + }, + { + "epoch": 8.99, + "learning_rate": 2.755021141649049e-05, + "loss": 0.0321, + "step": 8502 + }, + { + "epoch": 8.99, + "learning_rate": 2.7544926004228333e-05, + "loss": 0.0655, + "step": 8504 + }, + { + "epoch": 8.99, + "learning_rate": 2.7539640591966176e-05, + "loss": 0.1483, + "step": 8506 + }, + { + "epoch": 8.99, + "learning_rate": 2.753435517970402e-05, + "loss": 0.0967, + "step": 8508 + }, + { + "epoch": 9.0, + "learning_rate": 2.752906976744186e-05, + "loss": 0.1002, + "step": 8510 + }, + { + "epoch": 9.0, + "learning_rate": 2.7523784355179704e-05, + "loss": 0.1041, + "step": 8512 + }, + { + "epoch": 9.0, + "learning_rate": 2.7518498942917547e-05, + "loss": 0.1675, + "step": 8514 + }, + { + "epoch": 9.0, + "learning_rate": 2.751321353065539e-05, + "loss": 0.0342, + "step": 8516 + }, + { + "epoch": 9.0, + "learning_rate": 2.7507928118393232e-05, + "loss": 0.0659, + "step": 8518 + }, + { + "epoch": 9.01, + "learning_rate": 2.750264270613108e-05, + "loss": 0.0966, + "step": 8520 + }, + { + "epoch": 9.01, + "learning_rate": 2.7497357293868924e-05, + "loss": 0.0474, + "step": 8522 + }, + { + "epoch": 9.01, + "learning_rate": 2.7492071881606767e-05, + "loss": 0.1123, + "step": 8524 + }, + { + "epoch": 9.01, + "learning_rate": 2.748678646934461e-05, + "loss": 0.1332, + "step": 8526 + }, + { + "epoch": 9.01, + "learning_rate": 2.7481501057082455e-05, + "loss": 0.2334, + "step": 8528 + }, + { + "epoch": 9.02, + "learning_rate": 2.7476215644820298e-05, + "loss": 0.1152, + "step": 8530 + }, + { + "epoch": 9.02, + "learning_rate": 2.747093023255814e-05, + "loss": 0.1294, + "step": 8532 + }, + { + "epoch": 9.02, + "learning_rate": 2.7465644820295983e-05, + "loss": 0.051, + "step": 8534 + }, + { + "epoch": 9.02, + "learning_rate": 2.7460359408033826e-05, + "loss": 0.0726, + "step": 8536 + }, + { + "epoch": 9.03, + "learning_rate": 2.7455073995771676e-05, + "loss": 0.0495, + "step": 8538 + }, + { + "epoch": 9.03, + "learning_rate": 2.7449788583509518e-05, + "loss": 0.134, + "step": 8540 + }, + { + "epoch": 9.03, + "learning_rate": 2.744450317124736e-05, + "loss": 0.0087, + "step": 8542 + }, + { + "epoch": 9.03, + "learning_rate": 2.7439217758985204e-05, + "loss": 0.1752, + "step": 8544 + }, + { + "epoch": 9.03, + "learning_rate": 2.7433932346723046e-05, + "loss": 0.1692, + "step": 8546 + }, + { + "epoch": 9.04, + "learning_rate": 2.742864693446089e-05, + "loss": 0.1097, + "step": 8548 + }, + { + "epoch": 9.04, + "learning_rate": 2.742336152219873e-05, + "loss": 0.045, + "step": 8550 + }, + { + "epoch": 9.04, + "learning_rate": 2.7418076109936574e-05, + "loss": 0.1581, + "step": 8552 + }, + { + "epoch": 9.04, + "learning_rate": 2.7412790697674417e-05, + "loss": 0.0721, + "step": 8554 + }, + { + "epoch": 9.04, + "learning_rate": 2.7407505285412266e-05, + "loss": 0.0617, + "step": 8556 + }, + { + "epoch": 9.05, + "learning_rate": 2.740221987315011e-05, + "loss": 0.0526, + "step": 8558 + }, + { + "epoch": 9.05, + "learning_rate": 2.7396934460887952e-05, + "loss": 0.0785, + "step": 8560 + }, + { + "epoch": 9.05, + "learning_rate": 2.7391649048625794e-05, + "loss": 0.0973, + "step": 8562 + }, + { + "epoch": 9.05, + "learning_rate": 2.7386363636363637e-05, + "loss": 0.0743, + "step": 8564 + }, + { + "epoch": 9.05, + "learning_rate": 2.738107822410148e-05, + "loss": 0.1105, + "step": 8566 + }, + { + "epoch": 9.06, + "learning_rate": 2.7375792811839322e-05, + "loss": 0.1266, + "step": 8568 + }, + { + "epoch": 9.06, + "learning_rate": 2.7370507399577165e-05, + "loss": 0.0903, + "step": 8570 + }, + { + "epoch": 9.06, + "learning_rate": 2.736522198731501e-05, + "loss": 0.2459, + "step": 8572 + }, + { + "epoch": 9.06, + "learning_rate": 2.7359936575052857e-05, + "loss": 0.1202, + "step": 8574 + }, + { + "epoch": 9.07, + "learning_rate": 2.73546511627907e-05, + "loss": 0.0555, + "step": 8576 + }, + { + "epoch": 9.07, + "learning_rate": 2.7349365750528543e-05, + "loss": 0.218, + "step": 8578 + }, + { + "epoch": 9.07, + "learning_rate": 2.7344080338266385e-05, + "loss": 0.0653, + "step": 8580 + }, + { + "epoch": 9.07, + "learning_rate": 2.733879492600423e-05, + "loss": 0.0576, + "step": 8582 + }, + { + "epoch": 9.07, + "learning_rate": 2.7333509513742074e-05, + "loss": 0.0617, + "step": 8584 + }, + { + "epoch": 9.08, + "learning_rate": 2.7328224101479917e-05, + "loss": 0.0679, + "step": 8586 + }, + { + "epoch": 9.08, + "learning_rate": 2.732293868921776e-05, + "loss": 0.0907, + "step": 8588 + }, + { + "epoch": 9.08, + "learning_rate": 2.7317653276955602e-05, + "loss": 0.1537, + "step": 8590 + }, + { + "epoch": 9.08, + "learning_rate": 2.731236786469345e-05, + "loss": 0.1437, + "step": 8592 + }, + { + "epoch": 9.08, + "learning_rate": 2.7307082452431294e-05, + "loss": 0.0707, + "step": 8594 + }, + { + "epoch": 9.09, + "learning_rate": 2.7301797040169137e-05, + "loss": 0.183, + "step": 8596 + }, + { + "epoch": 9.09, + "learning_rate": 2.729651162790698e-05, + "loss": 0.0725, + "step": 8598 + }, + { + "epoch": 9.09, + "learning_rate": 2.7291226215644822e-05, + "loss": 0.147, + "step": 8600 + }, + { + "epoch": 9.09, + "learning_rate": 2.7285940803382665e-05, + "loss": 0.1171, + "step": 8602 + }, + { + "epoch": 9.1, + "learning_rate": 2.7280655391120507e-05, + "loss": 0.0401, + "step": 8604 + }, + { + "epoch": 9.1, + "learning_rate": 2.727536997885835e-05, + "loss": 0.0992, + "step": 8606 + }, + { + "epoch": 9.1, + "learning_rate": 2.7270084566596193e-05, + "loss": 0.0551, + "step": 8608 + }, + { + "epoch": 9.1, + "learning_rate": 2.7264799154334042e-05, + "loss": 0.1109, + "step": 8610 + }, + { + "epoch": 9.1, + "learning_rate": 2.7259513742071885e-05, + "loss": 0.076, + "step": 8612 + }, + { + "epoch": 9.11, + "learning_rate": 2.7254228329809728e-05, + "loss": 0.0863, + "step": 8614 + }, + { + "epoch": 9.11, + "learning_rate": 2.724894291754757e-05, + "loss": 0.049, + "step": 8616 + }, + { + "epoch": 9.11, + "learning_rate": 2.7243657505285413e-05, + "loss": 0.1014, + "step": 8618 + }, + { + "epoch": 9.11, + "learning_rate": 2.7238372093023256e-05, + "loss": 0.0364, + "step": 8620 + }, + { + "epoch": 9.11, + "learning_rate": 2.7233086680761098e-05, + "loss": 0.1735, + "step": 8622 + }, + { + "epoch": 9.12, + "learning_rate": 2.722780126849894e-05, + "loss": 0.0667, + "step": 8624 + }, + { + "epoch": 9.12, + "learning_rate": 2.7222515856236787e-05, + "loss": 0.0699, + "step": 8626 + }, + { + "epoch": 9.12, + "learning_rate": 2.7217230443974633e-05, + "loss": 0.0562, + "step": 8628 + }, + { + "epoch": 9.12, + "learning_rate": 2.7211945031712476e-05, + "loss": 0.0501, + "step": 8630 + }, + { + "epoch": 9.12, + "learning_rate": 2.720665961945032e-05, + "loss": 0.0928, + "step": 8632 + }, + { + "epoch": 9.13, + "learning_rate": 2.7201374207188164e-05, + "loss": 0.1008, + "step": 8634 + }, + { + "epoch": 9.13, + "learning_rate": 2.7196088794926007e-05, + "loss": 0.0146, + "step": 8636 + }, + { + "epoch": 9.13, + "learning_rate": 2.719080338266385e-05, + "loss": 0.0512, + "step": 8638 + }, + { + "epoch": 9.13, + "learning_rate": 2.7185517970401692e-05, + "loss": 0.0517, + "step": 8640 + }, + { + "epoch": 9.14, + "learning_rate": 2.7180232558139535e-05, + "loss": 0.0155, + "step": 8642 + }, + { + "epoch": 9.14, + "learning_rate": 2.7174947145877378e-05, + "loss": 0.0762, + "step": 8644 + }, + { + "epoch": 9.14, + "learning_rate": 2.7169661733615227e-05, + "loss": 0.0822, + "step": 8646 + }, + { + "epoch": 9.14, + "learning_rate": 2.716437632135307e-05, + "loss": 0.0619, + "step": 8648 + }, + { + "epoch": 9.14, + "learning_rate": 2.7159090909090913e-05, + "loss": 0.1611, + "step": 8650 + }, + { + "epoch": 9.15, + "learning_rate": 2.7153805496828755e-05, + "loss": 0.0875, + "step": 8652 + }, + { + "epoch": 9.15, + "learning_rate": 2.7148520084566598e-05, + "loss": 0.0389, + "step": 8654 + }, + { + "epoch": 9.15, + "learning_rate": 2.714323467230444e-05, + "loss": 0.109, + "step": 8656 + }, + { + "epoch": 9.15, + "learning_rate": 2.7137949260042283e-05, + "loss": 0.0651, + "step": 8658 + }, + { + "epoch": 9.15, + "learning_rate": 2.7132663847780126e-05, + "loss": 0.1393, + "step": 8660 + }, + { + "epoch": 9.16, + "learning_rate": 2.712737843551797e-05, + "loss": 0.1152, + "step": 8662 + }, + { + "epoch": 9.16, + "learning_rate": 2.7122093023255818e-05, + "loss": 0.0792, + "step": 8664 + }, + { + "epoch": 9.16, + "learning_rate": 2.711680761099366e-05, + "loss": 0.0683, + "step": 8666 + }, + { + "epoch": 9.16, + "learning_rate": 2.7111522198731503e-05, + "loss": 0.1229, + "step": 8668 + }, + { + "epoch": 9.16, + "learning_rate": 2.7106236786469346e-05, + "loss": 0.1954, + "step": 8670 + }, + { + "epoch": 9.17, + "learning_rate": 2.710095137420719e-05, + "loss": 0.1128, + "step": 8672 + }, + { + "epoch": 9.17, + "learning_rate": 2.709566596194503e-05, + "loss": 0.0474, + "step": 8674 + }, + { + "epoch": 9.17, + "learning_rate": 2.7090380549682874e-05, + "loss": 0.152, + "step": 8676 + }, + { + "epoch": 9.17, + "learning_rate": 2.708509513742072e-05, + "loss": 0.0323, + "step": 8678 + }, + { + "epoch": 9.18, + "learning_rate": 2.7079809725158563e-05, + "loss": 0.0839, + "step": 8680 + }, + { + "epoch": 9.18, + "learning_rate": 2.707452431289641e-05, + "loss": 0.1011, + "step": 8682 + }, + { + "epoch": 9.18, + "learning_rate": 2.706923890063425e-05, + "loss": 0.0682, + "step": 8684 + }, + { + "epoch": 9.18, + "learning_rate": 2.7063953488372094e-05, + "loss": 0.1056, + "step": 8686 + }, + { + "epoch": 9.18, + "learning_rate": 2.705866807610994e-05, + "loss": 0.123, + "step": 8688 + }, + { + "epoch": 9.19, + "learning_rate": 2.7053382663847783e-05, + "loss": 0.0388, + "step": 8690 + }, + { + "epoch": 9.19, + "learning_rate": 2.7048097251585626e-05, + "loss": 0.0797, + "step": 8692 + }, + { + "epoch": 9.19, + "learning_rate": 2.7042811839323468e-05, + "loss": 0.0899, + "step": 8694 + }, + { + "epoch": 9.19, + "learning_rate": 2.703752642706131e-05, + "loss": 0.0955, + "step": 8696 + }, + { + "epoch": 9.19, + "learning_rate": 2.7032241014799154e-05, + "loss": 0.1046, + "step": 8698 + }, + { + "epoch": 9.2, + "learning_rate": 2.7026955602537003e-05, + "loss": 0.0889, + "step": 8700 + }, + { + "epoch": 9.2, + "learning_rate": 2.7021670190274846e-05, + "loss": 0.0868, + "step": 8702 + }, + { + "epoch": 9.2, + "learning_rate": 2.701638477801269e-05, + "loss": 0.1856, + "step": 8704 + }, + { + "epoch": 9.2, + "learning_rate": 2.701109936575053e-05, + "loss": 0.1437, + "step": 8706 + }, + { + "epoch": 9.21, + "learning_rate": 2.7005813953488374e-05, + "loss": 0.1376, + "step": 8708 + }, + { + "epoch": 9.21, + "learning_rate": 2.7000528541226216e-05, + "loss": 0.0924, + "step": 8710 + }, + { + "epoch": 9.21, + "learning_rate": 2.699524312896406e-05, + "loss": 0.0697, + "step": 8712 + }, + { + "epoch": 9.21, + "learning_rate": 2.6989957716701902e-05, + "loss": 0.2081, + "step": 8714 + }, + { + "epoch": 9.21, + "learning_rate": 2.6984672304439744e-05, + "loss": 0.0381, + "step": 8716 + }, + { + "epoch": 9.22, + "learning_rate": 2.6979386892177594e-05, + "loss": 0.2289, + "step": 8718 + }, + { + "epoch": 9.22, + "learning_rate": 2.6974101479915437e-05, + "loss": 0.0454, + "step": 8720 + }, + { + "epoch": 9.22, + "learning_rate": 2.696881606765328e-05, + "loss": 0.1052, + "step": 8722 + }, + { + "epoch": 9.22, + "learning_rate": 2.6963530655391122e-05, + "loss": 0.0191, + "step": 8724 + }, + { + "epoch": 9.22, + "learning_rate": 2.6958245243128965e-05, + "loss": 0.035, + "step": 8726 + }, + { + "epoch": 9.23, + "learning_rate": 2.6952959830866807e-05, + "loss": 0.1103, + "step": 8728 + }, + { + "epoch": 9.23, + "learning_rate": 2.694767441860465e-05, + "loss": 0.0613, + "step": 8730 + }, + { + "epoch": 9.23, + "learning_rate": 2.6942389006342496e-05, + "loss": 0.0965, + "step": 8732 + }, + { + "epoch": 9.23, + "learning_rate": 2.693710359408034e-05, + "loss": 0.094, + "step": 8734 + }, + { + "epoch": 9.23, + "learning_rate": 2.6931818181818185e-05, + "loss": 0.0707, + "step": 8736 + }, + { + "epoch": 9.24, + "learning_rate": 2.6926532769556027e-05, + "loss": 0.0763, + "step": 8738 + }, + { + "epoch": 9.24, + "learning_rate": 2.692124735729387e-05, + "loss": 0.2875, + "step": 8740 + }, + { + "epoch": 9.24, + "learning_rate": 2.6915961945031716e-05, + "loss": 0.046, + "step": 8742 + }, + { + "epoch": 9.24, + "learning_rate": 2.691067653276956e-05, + "loss": 0.0547, + "step": 8744 + }, + { + "epoch": 9.25, + "learning_rate": 2.69053911205074e-05, + "loss": 0.0899, + "step": 8746 + }, + { + "epoch": 9.25, + "learning_rate": 2.6900105708245244e-05, + "loss": 0.056, + "step": 8748 + }, + { + "epoch": 9.25, + "learning_rate": 2.6894820295983087e-05, + "loss": 0.0906, + "step": 8750 + }, + { + "epoch": 9.25, + "learning_rate": 2.688953488372093e-05, + "loss": 0.1307, + "step": 8752 + }, + { + "epoch": 9.25, + "learning_rate": 2.688424947145878e-05, + "loss": 0.1062, + "step": 8754 + }, + { + "epoch": 9.26, + "learning_rate": 2.687896405919662e-05, + "loss": 0.0706, + "step": 8756 + }, + { + "epoch": 9.26, + "learning_rate": 2.6873678646934464e-05, + "loss": 0.0468, + "step": 8758 + }, + { + "epoch": 9.26, + "learning_rate": 2.6868393234672307e-05, + "loss": 0.1303, + "step": 8760 + }, + { + "epoch": 9.26, + "learning_rate": 2.686310782241015e-05, + "loss": 0.0624, + "step": 8762 + }, + { + "epoch": 9.26, + "learning_rate": 2.6857822410147992e-05, + "loss": 0.0877, + "step": 8764 + }, + { + "epoch": 9.27, + "learning_rate": 2.6852536997885835e-05, + "loss": 0.053, + "step": 8766 + }, + { + "epoch": 9.27, + "learning_rate": 2.6847251585623678e-05, + "loss": 0.0406, + "step": 8768 + }, + { + "epoch": 9.27, + "learning_rate": 2.684196617336152e-05, + "loss": 0.0733, + "step": 8770 + }, + { + "epoch": 9.27, + "learning_rate": 2.683668076109937e-05, + "loss": 0.0395, + "step": 8772 + }, + { + "epoch": 9.27, + "learning_rate": 2.6831395348837212e-05, + "loss": 0.04, + "step": 8774 + }, + { + "epoch": 9.28, + "learning_rate": 2.6826109936575055e-05, + "loss": 0.0447, + "step": 8776 + }, + { + "epoch": 9.28, + "learning_rate": 2.6820824524312898e-05, + "loss": 0.1337, + "step": 8778 + }, + { + "epoch": 9.28, + "learning_rate": 2.681553911205074e-05, + "loss": 0.1441, + "step": 8780 + }, + { + "epoch": 9.28, + "learning_rate": 2.6810253699788583e-05, + "loss": 0.0266, + "step": 8782 + }, + { + "epoch": 9.29, + "learning_rate": 2.6804968287526426e-05, + "loss": 0.082, + "step": 8784 + }, + { + "epoch": 9.29, + "learning_rate": 2.6799682875264272e-05, + "loss": 0.0276, + "step": 8786 + }, + { + "epoch": 9.29, + "learning_rate": 2.6794397463002114e-05, + "loss": 0.0664, + "step": 8788 + }, + { + "epoch": 9.29, + "learning_rate": 2.678911205073996e-05, + "loss": 0.0992, + "step": 8790 + }, + { + "epoch": 9.29, + "learning_rate": 2.6783826638477803e-05, + "loss": 0.1348, + "step": 8792 + }, + { + "epoch": 9.3, + "learning_rate": 2.677854122621565e-05, + "loss": 0.0989, + "step": 8794 + }, + { + "epoch": 9.3, + "learning_rate": 2.6773255813953492e-05, + "loss": 0.0812, + "step": 8796 + }, + { + "epoch": 9.3, + "learning_rate": 2.6767970401691335e-05, + "loss": 0.1295, + "step": 8798 + }, + { + "epoch": 9.3, + "learning_rate": 2.6762684989429177e-05, + "loss": 0.1148, + "step": 8800 + }, + { + "epoch": 9.3, + "learning_rate": 2.675739957716702e-05, + "loss": 0.0984, + "step": 8802 + }, + { + "epoch": 9.31, + "learning_rate": 2.6752114164904863e-05, + "loss": 0.0716, + "step": 8804 + }, + { + "epoch": 9.31, + "learning_rate": 2.6746828752642705e-05, + "loss": 0.0396, + "step": 8806 + }, + { + "epoch": 9.31, + "learning_rate": 2.6741543340380548e-05, + "loss": 0.0483, + "step": 8808 + }, + { + "epoch": 9.31, + "learning_rate": 2.6736257928118397e-05, + "loss": 0.0657, + "step": 8810 + }, + { + "epoch": 9.32, + "learning_rate": 2.673097251585624e-05, + "loss": 0.2031, + "step": 8812 + }, + { + "epoch": 9.32, + "learning_rate": 2.6725687103594083e-05, + "loss": 0.1448, + "step": 8814 + }, + { + "epoch": 9.32, + "learning_rate": 2.6720401691331925e-05, + "loss": 0.0513, + "step": 8816 + }, + { + "epoch": 9.32, + "learning_rate": 2.6715116279069768e-05, + "loss": 0.0472, + "step": 8818 + }, + { + "epoch": 9.32, + "learning_rate": 2.670983086680761e-05, + "loss": 0.0745, + "step": 8820 + }, + { + "epoch": 9.33, + "learning_rate": 2.6704545454545453e-05, + "loss": 0.0551, + "step": 8822 + }, + { + "epoch": 9.33, + "learning_rate": 2.6699260042283296e-05, + "loss": 0.0293, + "step": 8824 + }, + { + "epoch": 9.33, + "learning_rate": 2.669397463002114e-05, + "loss": 0.0633, + "step": 8826 + }, + { + "epoch": 9.33, + "learning_rate": 2.6688689217758988e-05, + "loss": 0.0671, + "step": 8828 + }, + { + "epoch": 9.33, + "learning_rate": 2.668340380549683e-05, + "loss": 0.1661, + "step": 8830 + }, + { + "epoch": 9.34, + "learning_rate": 2.6678118393234674e-05, + "loss": 0.0222, + "step": 8832 + }, + { + "epoch": 9.34, + "learning_rate": 2.6672832980972516e-05, + "loss": 0.1829, + "step": 8834 + }, + { + "epoch": 9.34, + "learning_rate": 2.666754756871036e-05, + "loss": 0.2226, + "step": 8836 + }, + { + "epoch": 9.34, + "learning_rate": 2.6662262156448205e-05, + "loss": 0.2149, + "step": 8838 + }, + { + "epoch": 9.34, + "learning_rate": 2.6656976744186048e-05, + "loss": 0.1276, + "step": 8840 + }, + { + "epoch": 9.35, + "learning_rate": 2.665169133192389e-05, + "loss": 0.1313, + "step": 8842 + }, + { + "epoch": 9.35, + "learning_rate": 2.6646405919661733e-05, + "loss": 0.0533, + "step": 8844 + }, + { + "epoch": 9.35, + "learning_rate": 2.664112050739958e-05, + "loss": 0.1269, + "step": 8846 + }, + { + "epoch": 9.35, + "learning_rate": 2.6635835095137425e-05, + "loss": 0.153, + "step": 8848 + }, + { + "epoch": 9.36, + "learning_rate": 2.6630549682875268e-05, + "loss": 0.1629, + "step": 8850 + }, + { + "epoch": 9.36, + "learning_rate": 2.662526427061311e-05, + "loss": 0.1014, + "step": 8852 + }, + { + "epoch": 9.36, + "learning_rate": 2.6619978858350953e-05, + "loss": 0.105, + "step": 8854 + }, + { + "epoch": 9.36, + "learning_rate": 2.6614693446088796e-05, + "loss": 0.0713, + "step": 8856 + }, + { + "epoch": 9.36, + "learning_rate": 2.660940803382664e-05, + "loss": 0.1546, + "step": 8858 + }, + { + "epoch": 9.37, + "learning_rate": 2.660412262156448e-05, + "loss": 0.0783, + "step": 8860 + }, + { + "epoch": 9.37, + "learning_rate": 2.6598837209302324e-05, + "loss": 0.0218, + "step": 8862 + }, + { + "epoch": 9.37, + "learning_rate": 2.6593551797040173e-05, + "loss": 0.059, + "step": 8864 + }, + { + "epoch": 9.37, + "learning_rate": 2.6588266384778016e-05, + "loss": 0.08, + "step": 8866 + }, + { + "epoch": 9.37, + "learning_rate": 2.658298097251586e-05, + "loss": 0.0333, + "step": 8868 + }, + { + "epoch": 9.38, + "learning_rate": 2.65776955602537e-05, + "loss": 0.1142, + "step": 8870 + }, + { + "epoch": 9.38, + "learning_rate": 2.6572410147991544e-05, + "loss": 0.1063, + "step": 8872 + }, + { + "epoch": 9.38, + "learning_rate": 2.6567124735729387e-05, + "loss": 0.0753, + "step": 8874 + }, + { + "epoch": 9.38, + "learning_rate": 2.656183932346723e-05, + "loss": 0.0615, + "step": 8876 + }, + { + "epoch": 9.38, + "learning_rate": 2.6556553911205072e-05, + "loss": 0.0701, + "step": 8878 + }, + { + "epoch": 9.39, + "learning_rate": 2.6551268498942915e-05, + "loss": 0.0558, + "step": 8880 + }, + { + "epoch": 9.39, + "learning_rate": 2.6545983086680764e-05, + "loss": 0.0647, + "step": 8882 + }, + { + "epoch": 9.39, + "learning_rate": 2.6540697674418607e-05, + "loss": 0.1294, + "step": 8884 + }, + { + "epoch": 9.39, + "learning_rate": 2.653541226215645e-05, + "loss": 0.145, + "step": 8886 + }, + { + "epoch": 9.4, + "learning_rate": 2.6530126849894292e-05, + "loss": 0.1311, + "step": 8888 + }, + { + "epoch": 9.4, + "learning_rate": 2.6524841437632135e-05, + "loss": 0.1293, + "step": 8890 + }, + { + "epoch": 9.4, + "learning_rate": 2.651955602536998e-05, + "loss": 0.0762, + "step": 8892 + }, + { + "epoch": 9.4, + "learning_rate": 2.6514270613107823e-05, + "loss": 0.029, + "step": 8894 + }, + { + "epoch": 9.4, + "learning_rate": 2.6508985200845666e-05, + "loss": 0.086, + "step": 8896 + }, + { + "epoch": 9.41, + "learning_rate": 2.650369978858351e-05, + "loss": 0.0596, + "step": 8898 + }, + { + "epoch": 9.41, + "learning_rate": 2.6498414376321358e-05, + "loss": 0.0437, + "step": 8900 + }, + { + "epoch": 9.41, + "learning_rate": 2.64931289640592e-05, + "loss": 0.1034, + "step": 8902 + }, + { + "epoch": 9.41, + "learning_rate": 2.6487843551797044e-05, + "loss": 0.0791, + "step": 8904 + }, + { + "epoch": 9.41, + "learning_rate": 2.6482558139534886e-05, + "loss": 0.1433, + "step": 8906 + }, + { + "epoch": 9.42, + "learning_rate": 2.647727272727273e-05, + "loss": 0.0915, + "step": 8908 + }, + { + "epoch": 9.42, + "learning_rate": 2.647198731501057e-05, + "loss": 0.052, + "step": 8910 + }, + { + "epoch": 9.42, + "learning_rate": 2.6466701902748414e-05, + "loss": 0.1161, + "step": 8912 + }, + { + "epoch": 9.42, + "learning_rate": 2.6461416490486257e-05, + "loss": 0.0793, + "step": 8914 + }, + { + "epoch": 9.42, + "learning_rate": 2.64561310782241e-05, + "loss": 0.1089, + "step": 8916 + }, + { + "epoch": 9.43, + "learning_rate": 2.645084566596195e-05, + "loss": 0.1494, + "step": 8918 + }, + { + "epoch": 9.43, + "learning_rate": 2.6445560253699792e-05, + "loss": 0.128, + "step": 8920 + }, + { + "epoch": 9.43, + "learning_rate": 2.6440274841437634e-05, + "loss": 0.0654, + "step": 8922 + }, + { + "epoch": 9.43, + "learning_rate": 2.6434989429175477e-05, + "loss": 0.1304, + "step": 8924 + }, + { + "epoch": 9.44, + "learning_rate": 2.642970401691332e-05, + "loss": 0.0682, + "step": 8926 + }, + { + "epoch": 9.44, + "learning_rate": 2.6424418604651162e-05, + "loss": 0.0443, + "step": 8928 + }, + { + "epoch": 9.44, + "learning_rate": 2.6419133192389005e-05, + "loss": 0.1399, + "step": 8930 + }, + { + "epoch": 9.44, + "learning_rate": 2.6413847780126848e-05, + "loss": 0.0601, + "step": 8932 + }, + { + "epoch": 9.44, + "learning_rate": 2.640856236786469e-05, + "loss": 0.0917, + "step": 8934 + }, + { + "epoch": 9.45, + "learning_rate": 2.640327695560254e-05, + "loss": 0.0802, + "step": 8936 + }, + { + "epoch": 9.45, + "learning_rate": 2.6397991543340383e-05, + "loss": 0.0652, + "step": 8938 + }, + { + "epoch": 9.45, + "learning_rate": 2.6392706131078225e-05, + "loss": 0.0827, + "step": 8940 + }, + { + "epoch": 9.45, + "learning_rate": 2.6387420718816068e-05, + "loss": 0.0792, + "step": 8942 + }, + { + "epoch": 9.45, + "learning_rate": 2.6382135306553914e-05, + "loss": 0.0509, + "step": 8944 + }, + { + "epoch": 9.46, + "learning_rate": 2.6376849894291757e-05, + "loss": 0.0269, + "step": 8946 + }, + { + "epoch": 9.46, + "learning_rate": 2.63715644820296e-05, + "loss": 0.115, + "step": 8948 + }, + { + "epoch": 9.46, + "learning_rate": 2.6366279069767442e-05, + "loss": 0.068, + "step": 8950 + }, + { + "epoch": 9.46, + "learning_rate": 2.6360993657505285e-05, + "loss": 0.0246, + "step": 8952 + }, + { + "epoch": 9.47, + "learning_rate": 2.6355708245243134e-05, + "loss": 0.0227, + "step": 8954 + }, + { + "epoch": 9.47, + "learning_rate": 2.6350422832980977e-05, + "loss": 0.1095, + "step": 8956 + }, + { + "epoch": 9.47, + "learning_rate": 2.634513742071882e-05, + "loss": 0.0427, + "step": 8958 + }, + { + "epoch": 9.47, + "learning_rate": 2.6339852008456662e-05, + "loss": 0.1263, + "step": 8960 + }, + { + "epoch": 9.47, + "learning_rate": 2.6334566596194505e-05, + "loss": 0.0586, + "step": 8962 + }, + { + "epoch": 9.48, + "learning_rate": 2.6329281183932347e-05, + "loss": 0.2595, + "step": 8964 + }, + { + "epoch": 9.48, + "learning_rate": 2.632399577167019e-05, + "loss": 0.0935, + "step": 8966 + }, + { + "epoch": 9.48, + "learning_rate": 2.6318710359408033e-05, + "loss": 0.0694, + "step": 8968 + }, + { + "epoch": 9.48, + "learning_rate": 2.6313424947145875e-05, + "loss": 0.0243, + "step": 8970 + }, + { + "epoch": 9.48, + "learning_rate": 2.6308139534883725e-05, + "loss": 0.0722, + "step": 8972 + }, + { + "epoch": 9.49, + "learning_rate": 2.6302854122621568e-05, + "loss": 0.0713, + "step": 8974 + }, + { + "epoch": 9.49, + "learning_rate": 2.629756871035941e-05, + "loss": 0.0691, + "step": 8976 + }, + { + "epoch": 9.49, + "learning_rate": 2.6292283298097253e-05, + "loss": 0.1, + "step": 8978 + }, + { + "epoch": 9.49, + "learning_rate": 2.6286997885835096e-05, + "loss": 0.0964, + "step": 8980 + }, + { + "epoch": 9.49, + "learning_rate": 2.6281712473572938e-05, + "loss": 0.0803, + "step": 8982 + }, + { + "epoch": 9.5, + "learning_rate": 2.627642706131078e-05, + "loss": 0.0492, + "step": 8984 + }, + { + "epoch": 9.5, + "learning_rate": 2.6271141649048624e-05, + "loss": 0.0719, + "step": 8986 + }, + { + "epoch": 9.5, + "learning_rate": 2.626585623678647e-05, + "loss": 0.0741, + "step": 8988 + }, + { + "epoch": 9.5, + "learning_rate": 2.6260570824524316e-05, + "loss": 0.0719, + "step": 8990 + }, + { + "epoch": 9.51, + "learning_rate": 2.625528541226216e-05, + "loss": 0.0528, + "step": 8992 + }, + { + "epoch": 9.51, + "learning_rate": 2.625e-05, + "loss": 0.0613, + "step": 8994 + }, + { + "epoch": 9.51, + "learning_rate": 2.6244714587737844e-05, + "loss": 0.0499, + "step": 8996 + }, + { + "epoch": 9.51, + "learning_rate": 2.623942917547569e-05, + "loss": 0.0338, + "step": 8998 + }, + { + "epoch": 9.51, + "learning_rate": 2.6234143763213532e-05, + "loss": 0.1687, + "step": 9000 + }, + { + "epoch": 9.51, + "eval_cer": 0.03066400683955543, + "eval_loss": 0.7769936323165894, + "eval_runtime": 127.7044, + "eval_samples_per_second": 6.586, + "eval_steps_per_second": 0.83, + "step": 9000 + }, + { + "epoch": 9.52, + "learning_rate": 2.6228858350951375e-05, + "loss": 0.0774, + "step": 9002 + }, + { + "epoch": 9.52, + "learning_rate": 2.6223572938689218e-05, + "loss": 0.09, + "step": 9004 + }, + { + "epoch": 9.52, + "learning_rate": 2.621828752642706e-05, + "loss": 0.0983, + "step": 9006 + }, + { + "epoch": 9.52, + "learning_rate": 2.621300211416491e-05, + "loss": 0.0581, + "step": 9008 + }, + { + "epoch": 9.52, + "learning_rate": 2.6207716701902753e-05, + "loss": 0.0721, + "step": 9010 + }, + { + "epoch": 9.53, + "learning_rate": 2.6202431289640595e-05, + "loss": 0.0352, + "step": 9012 + }, + { + "epoch": 9.53, + "learning_rate": 2.6197145877378438e-05, + "loss": 0.1807, + "step": 9014 + }, + { + "epoch": 9.53, + "learning_rate": 2.619186046511628e-05, + "loss": 0.0639, + "step": 9016 + }, + { + "epoch": 9.53, + "learning_rate": 2.6186575052854123e-05, + "loss": 0.0835, + "step": 9018 + }, + { + "epoch": 9.53, + "learning_rate": 2.6181289640591966e-05, + "loss": 0.139, + "step": 9020 + }, + { + "epoch": 9.54, + "learning_rate": 2.617600422832981e-05, + "loss": 0.0826, + "step": 9022 + }, + { + "epoch": 9.54, + "learning_rate": 2.617071881606765e-05, + "loss": 0.1681, + "step": 9024 + }, + { + "epoch": 9.54, + "learning_rate": 2.61654334038055e-05, + "loss": 0.0913, + "step": 9026 + }, + { + "epoch": 9.54, + "learning_rate": 2.6160147991543343e-05, + "loss": 0.0909, + "step": 9028 + }, + { + "epoch": 9.55, + "learning_rate": 2.6154862579281186e-05, + "loss": 0.0458, + "step": 9030 + }, + { + "epoch": 9.55, + "learning_rate": 2.614957716701903e-05, + "loss": 0.0742, + "step": 9032 + }, + { + "epoch": 9.55, + "learning_rate": 2.614429175475687e-05, + "loss": 0.1146, + "step": 9034 + }, + { + "epoch": 9.55, + "learning_rate": 2.6139006342494714e-05, + "loss": 0.1539, + "step": 9036 + }, + { + "epoch": 9.55, + "learning_rate": 2.6133720930232557e-05, + "loss": 0.0891, + "step": 9038 + }, + { + "epoch": 9.56, + "learning_rate": 2.61284355179704e-05, + "loss": 0.2471, + "step": 9040 + }, + { + "epoch": 9.56, + "learning_rate": 2.6123150105708246e-05, + "loss": 0.1032, + "step": 9042 + }, + { + "epoch": 9.56, + "learning_rate": 2.611786469344609e-05, + "loss": 0.1223, + "step": 9044 + }, + { + "epoch": 9.56, + "learning_rate": 2.6112579281183934e-05, + "loss": 0.1236, + "step": 9046 + }, + { + "epoch": 9.56, + "learning_rate": 2.6107293868921777e-05, + "loss": 0.1142, + "step": 9048 + }, + { + "epoch": 9.57, + "learning_rate": 2.610200845665962e-05, + "loss": 0.1083, + "step": 9050 + }, + { + "epoch": 9.57, + "learning_rate": 2.6096723044397466e-05, + "loss": 0.0754, + "step": 9052 + }, + { + "epoch": 9.57, + "learning_rate": 2.609143763213531e-05, + "loss": 0.0589, + "step": 9054 + }, + { + "epoch": 9.57, + "learning_rate": 2.608615221987315e-05, + "loss": 0.0763, + "step": 9056 + }, + { + "epoch": 9.58, + "learning_rate": 2.6080866807610994e-05, + "loss": 0.0907, + "step": 9058 + }, + { + "epoch": 9.58, + "learning_rate": 2.6075581395348836e-05, + "loss": 0.0649, + "step": 9060 + }, + { + "epoch": 9.58, + "learning_rate": 2.6070295983086686e-05, + "loss": 0.0344, + "step": 9062 + }, + { + "epoch": 9.58, + "learning_rate": 2.606501057082453e-05, + "loss": 0.0958, + "step": 9064 + }, + { + "epoch": 9.58, + "learning_rate": 2.605972515856237e-05, + "loss": 0.0438, + "step": 9066 + }, + { + "epoch": 9.59, + "learning_rate": 2.6054439746300214e-05, + "loss": 0.0463, + "step": 9068 + }, + { + "epoch": 9.59, + "learning_rate": 2.6049154334038056e-05, + "loss": 0.032, + "step": 9070 + }, + { + "epoch": 9.59, + "learning_rate": 2.60438689217759e-05, + "loss": 0.0797, + "step": 9072 + }, + { + "epoch": 9.59, + "learning_rate": 2.6038583509513742e-05, + "loss": 0.1319, + "step": 9074 + }, + { + "epoch": 9.59, + "learning_rate": 2.6033298097251584e-05, + "loss": 0.0902, + "step": 9076 + }, + { + "epoch": 9.6, + "learning_rate": 2.6028012684989427e-05, + "loss": 0.0814, + "step": 9078 + }, + { + "epoch": 9.6, + "learning_rate": 2.6022727272727277e-05, + "loss": 0.1081, + "step": 9080 + }, + { + "epoch": 9.6, + "learning_rate": 2.601744186046512e-05, + "loss": 0.0604, + "step": 9082 + }, + { + "epoch": 9.6, + "learning_rate": 2.6012156448202962e-05, + "loss": 0.0702, + "step": 9084 + }, + { + "epoch": 9.6, + "learning_rate": 2.6006871035940805e-05, + "loss": 0.0974, + "step": 9086 + }, + { + "epoch": 9.61, + "learning_rate": 2.6001585623678647e-05, + "loss": 0.0582, + "step": 9088 + }, + { + "epoch": 9.61, + "learning_rate": 2.599630021141649e-05, + "loss": 0.1291, + "step": 9090 + }, + { + "epoch": 9.61, + "learning_rate": 2.5991014799154333e-05, + "loss": 0.0836, + "step": 9092 + }, + { + "epoch": 9.61, + "learning_rate": 2.5985729386892175e-05, + "loss": 0.0978, + "step": 9094 + }, + { + "epoch": 9.62, + "learning_rate": 2.598044397463002e-05, + "loss": 0.0746, + "step": 9096 + }, + { + "epoch": 9.62, + "learning_rate": 2.5975158562367867e-05, + "loss": 0.0816, + "step": 9098 + }, + { + "epoch": 9.62, + "learning_rate": 2.596987315010571e-05, + "loss": 0.0664, + "step": 9100 + }, + { + "epoch": 9.62, + "learning_rate": 2.5964587737843553e-05, + "loss": 0.0668, + "step": 9102 + }, + { + "epoch": 9.62, + "learning_rate": 2.59593023255814e-05, + "loss": 0.1026, + "step": 9104 + }, + { + "epoch": 9.63, + "learning_rate": 2.595401691331924e-05, + "loss": 0.0647, + "step": 9106 + }, + { + "epoch": 9.63, + "learning_rate": 2.5948731501057084e-05, + "loss": 0.0637, + "step": 9108 + }, + { + "epoch": 9.63, + "learning_rate": 2.5943446088794927e-05, + "loss": 0.093, + "step": 9110 + }, + { + "epoch": 9.63, + "learning_rate": 2.593816067653277e-05, + "loss": 0.0501, + "step": 9112 + }, + { + "epoch": 9.63, + "learning_rate": 2.5932875264270612e-05, + "loss": 0.1349, + "step": 9114 + }, + { + "epoch": 9.64, + "learning_rate": 2.592758985200846e-05, + "loss": 0.0521, + "step": 9116 + }, + { + "epoch": 9.64, + "learning_rate": 2.5922304439746304e-05, + "loss": 0.0251, + "step": 9118 + }, + { + "epoch": 9.64, + "learning_rate": 2.5917019027484147e-05, + "loss": 0.0449, + "step": 9120 + }, + { + "epoch": 9.64, + "learning_rate": 2.591173361522199e-05, + "loss": 0.0847, + "step": 9122 + }, + { + "epoch": 9.64, + "learning_rate": 2.5906448202959832e-05, + "loss": 0.1406, + "step": 9124 + }, + { + "epoch": 9.65, + "learning_rate": 2.5901162790697675e-05, + "loss": 0.0304, + "step": 9126 + }, + { + "epoch": 9.65, + "learning_rate": 2.5895877378435518e-05, + "loss": 0.0721, + "step": 9128 + }, + { + "epoch": 9.65, + "learning_rate": 2.589059196617336e-05, + "loss": 0.061, + "step": 9130 + }, + { + "epoch": 9.65, + "learning_rate": 2.5885306553911203e-05, + "loss": 0.1242, + "step": 9132 + }, + { + "epoch": 9.66, + "learning_rate": 2.5880021141649052e-05, + "loss": 0.0477, + "step": 9134 + }, + { + "epoch": 9.66, + "learning_rate": 2.5874735729386895e-05, + "loss": 0.0527, + "step": 9136 + }, + { + "epoch": 9.66, + "learning_rate": 2.5869450317124738e-05, + "loss": 0.3248, + "step": 9138 + }, + { + "epoch": 9.66, + "learning_rate": 2.586416490486258e-05, + "loss": 0.112, + "step": 9140 + }, + { + "epoch": 9.66, + "learning_rate": 2.5858879492600423e-05, + "loss": 0.1445, + "step": 9142 + }, + { + "epoch": 9.67, + "learning_rate": 2.5853594080338266e-05, + "loss": 0.131, + "step": 9144 + }, + { + "epoch": 9.67, + "learning_rate": 2.584830866807611e-05, + "loss": 0.069, + "step": 9146 + }, + { + "epoch": 9.67, + "learning_rate": 2.5843023255813955e-05, + "loss": 0.0555, + "step": 9148 + }, + { + "epoch": 9.67, + "learning_rate": 2.5837737843551797e-05, + "loss": 0.1066, + "step": 9150 + }, + { + "epoch": 9.67, + "learning_rate": 2.5832452431289643e-05, + "loss": 0.0949, + "step": 9152 + }, + { + "epoch": 9.68, + "learning_rate": 2.5827167019027486e-05, + "loss": 0.1722, + "step": 9154 + }, + { + "epoch": 9.68, + "learning_rate": 2.582188160676533e-05, + "loss": 0.0974, + "step": 9156 + }, + { + "epoch": 9.68, + "learning_rate": 2.5816596194503175e-05, + "loss": 0.146, + "step": 9158 + }, + { + "epoch": 9.68, + "learning_rate": 2.5811310782241017e-05, + "loss": 0.0641, + "step": 9160 + }, + { + "epoch": 9.68, + "learning_rate": 2.580602536997886e-05, + "loss": 0.0692, + "step": 9162 + }, + { + "epoch": 9.69, + "learning_rate": 2.5800739957716703e-05, + "loss": 0.0374, + "step": 9164 + }, + { + "epoch": 9.69, + "learning_rate": 2.5795454545454545e-05, + "loss": 0.0374, + "step": 9166 + }, + { + "epoch": 9.69, + "learning_rate": 2.5790169133192388e-05, + "loss": 0.0742, + "step": 9168 + }, + { + "epoch": 9.69, + "learning_rate": 2.5784883720930237e-05, + "loss": 0.0503, + "step": 9170 + }, + { + "epoch": 9.7, + "learning_rate": 2.577959830866808e-05, + "loss": 0.0705, + "step": 9172 + }, + { + "epoch": 9.7, + "learning_rate": 2.5774312896405923e-05, + "loss": 0.1188, + "step": 9174 + }, + { + "epoch": 9.7, + "learning_rate": 2.5769027484143765e-05, + "loss": 0.1263, + "step": 9176 + }, + { + "epoch": 9.7, + "learning_rate": 2.5763742071881608e-05, + "loss": 0.0268, + "step": 9178 + }, + { + "epoch": 9.7, + "learning_rate": 2.575845665961945e-05, + "loss": 0.0401, + "step": 9180 + }, + { + "epoch": 9.71, + "learning_rate": 2.5753171247357293e-05, + "loss": 0.2058, + "step": 9182 + }, + { + "epoch": 9.71, + "learning_rate": 2.5747885835095136e-05, + "loss": 0.0778, + "step": 9184 + }, + { + "epoch": 9.71, + "learning_rate": 2.574260042283298e-05, + "loss": 0.0561, + "step": 9186 + }, + { + "epoch": 9.71, + "learning_rate": 2.5737315010570828e-05, + "loss": 0.2252, + "step": 9188 + }, + { + "epoch": 9.71, + "learning_rate": 2.573202959830867e-05, + "loss": 0.1165, + "step": 9190 + }, + { + "epoch": 9.72, + "learning_rate": 2.5726744186046514e-05, + "loss": 0.1053, + "step": 9192 + }, + { + "epoch": 9.72, + "learning_rate": 2.5721458773784356e-05, + "loss": 0.0465, + "step": 9194 + }, + { + "epoch": 9.72, + "learning_rate": 2.57161733615222e-05, + "loss": 0.0784, + "step": 9196 + }, + { + "epoch": 9.72, + "learning_rate": 2.571088794926004e-05, + "loss": 0.0708, + "step": 9198 + }, + { + "epoch": 9.73, + "learning_rate": 2.5705602536997884e-05, + "loss": 0.2093, + "step": 9200 + }, + { + "epoch": 9.73, + "learning_rate": 2.570031712473573e-05, + "loss": 0.2111, + "step": 9202 + }, + { + "epoch": 9.73, + "learning_rate": 2.5695031712473573e-05, + "loss": 0.1183, + "step": 9204 + }, + { + "epoch": 9.73, + "learning_rate": 2.568974630021142e-05, + "loss": 0.05, + "step": 9206 + }, + { + "epoch": 9.73, + "learning_rate": 2.5684460887949262e-05, + "loss": 0.2306, + "step": 9208 + }, + { + "epoch": 9.74, + "learning_rate": 2.5679175475687104e-05, + "loss": 0.0342, + "step": 9210 + }, + { + "epoch": 9.74, + "learning_rate": 2.567389006342495e-05, + "loss": 0.1203, + "step": 9212 + }, + { + "epoch": 9.74, + "learning_rate": 2.5668604651162793e-05, + "loss": 0.1217, + "step": 9214 + }, + { + "epoch": 9.74, + "learning_rate": 2.5663319238900636e-05, + "loss": 0.0543, + "step": 9216 + }, + { + "epoch": 9.74, + "learning_rate": 2.565803382663848e-05, + "loss": 0.1482, + "step": 9218 + }, + { + "epoch": 9.75, + "learning_rate": 2.565274841437632e-05, + "loss": 0.1286, + "step": 9220 + }, + { + "epoch": 9.75, + "learning_rate": 2.5647463002114164e-05, + "loss": 0.1369, + "step": 9222 + }, + { + "epoch": 9.75, + "learning_rate": 2.5642177589852013e-05, + "loss": 0.0661, + "step": 9224 + }, + { + "epoch": 9.75, + "learning_rate": 2.5636892177589856e-05, + "loss": 0.1009, + "step": 9226 + }, + { + "epoch": 9.75, + "learning_rate": 2.56316067653277e-05, + "loss": 0.0759, + "step": 9228 + }, + { + "epoch": 9.76, + "learning_rate": 2.562632135306554e-05, + "loss": 0.099, + "step": 9230 + }, + { + "epoch": 9.76, + "learning_rate": 2.5621035940803384e-05, + "loss": 0.1197, + "step": 9232 + }, + { + "epoch": 9.76, + "learning_rate": 2.5615750528541227e-05, + "loss": 0.1576, + "step": 9234 + }, + { + "epoch": 9.76, + "learning_rate": 2.561046511627907e-05, + "loss": 0.1255, + "step": 9236 + }, + { + "epoch": 9.77, + "learning_rate": 2.5605179704016912e-05, + "loss": 0.1013, + "step": 9238 + }, + { + "epoch": 9.77, + "learning_rate": 2.5599894291754755e-05, + "loss": 0.0482, + "step": 9240 + }, + { + "epoch": 9.77, + "learning_rate": 2.5594608879492604e-05, + "loss": 0.0474, + "step": 9242 + }, + { + "epoch": 9.77, + "learning_rate": 2.5589323467230447e-05, + "loss": 0.0222, + "step": 9244 + }, + { + "epoch": 9.77, + "learning_rate": 2.558403805496829e-05, + "loss": 0.0592, + "step": 9246 + }, + { + "epoch": 9.78, + "learning_rate": 2.5578752642706132e-05, + "loss": 0.0322, + "step": 9248 + }, + { + "epoch": 9.78, + "learning_rate": 2.5573467230443975e-05, + "loss": 0.1504, + "step": 9250 + }, + { + "epoch": 9.78, + "learning_rate": 2.5568181818181817e-05, + "loss": 0.0378, + "step": 9252 + }, + { + "epoch": 9.78, + "learning_rate": 2.556289640591966e-05, + "loss": 0.1115, + "step": 9254 + }, + { + "epoch": 9.78, + "learning_rate": 2.5557610993657506e-05, + "loss": 0.1217, + "step": 9256 + }, + { + "epoch": 9.79, + "learning_rate": 2.555232558139535e-05, + "loss": 0.1118, + "step": 9258 + }, + { + "epoch": 9.79, + "learning_rate": 2.5547040169133195e-05, + "loss": 0.0188, + "step": 9260 + }, + { + "epoch": 9.79, + "learning_rate": 2.5541754756871038e-05, + "loss": 0.092, + "step": 9262 + }, + { + "epoch": 9.79, + "learning_rate": 2.5536469344608884e-05, + "loss": 0.203, + "step": 9264 + }, + { + "epoch": 9.79, + "learning_rate": 2.5531183932346726e-05, + "loss": 0.0928, + "step": 9266 + }, + { + "epoch": 9.8, + "learning_rate": 2.552589852008457e-05, + "loss": 0.0479, + "step": 9268 + }, + { + "epoch": 9.8, + "learning_rate": 2.552061310782241e-05, + "loss": 0.0719, + "step": 9270 + }, + { + "epoch": 9.8, + "learning_rate": 2.5515327695560254e-05, + "loss": 0.0685, + "step": 9272 + }, + { + "epoch": 9.8, + "learning_rate": 2.5510042283298097e-05, + "loss": 0.0747, + "step": 9274 + }, + { + "epoch": 9.81, + "learning_rate": 2.550475687103594e-05, + "loss": 0.1232, + "step": 9276 + }, + { + "epoch": 9.81, + "learning_rate": 2.549947145877379e-05, + "loss": 0.0749, + "step": 9278 + }, + { + "epoch": 9.81, + "learning_rate": 2.5494186046511632e-05, + "loss": 0.091, + "step": 9280 + }, + { + "epoch": 9.81, + "learning_rate": 2.5488900634249474e-05, + "loss": 0.0926, + "step": 9282 + }, + { + "epoch": 9.81, + "learning_rate": 2.5483615221987317e-05, + "loss": 0.0651, + "step": 9284 + }, + { + "epoch": 9.82, + "learning_rate": 2.547832980972516e-05, + "loss": 0.0229, + "step": 9286 + }, + { + "epoch": 9.82, + "learning_rate": 2.5473044397463003e-05, + "loss": 0.0882, + "step": 9288 + }, + { + "epoch": 9.82, + "learning_rate": 2.5467758985200845e-05, + "loss": 0.151, + "step": 9290 + }, + { + "epoch": 9.82, + "learning_rate": 2.5462473572938688e-05, + "loss": 0.0874, + "step": 9292 + }, + { + "epoch": 9.82, + "learning_rate": 2.545718816067653e-05, + "loss": 0.1641, + "step": 9294 + }, + { + "epoch": 9.83, + "learning_rate": 2.545190274841438e-05, + "loss": 0.1099, + "step": 9296 + }, + { + "epoch": 9.83, + "learning_rate": 2.5446617336152223e-05, + "loss": 0.0902, + "step": 9298 + }, + { + "epoch": 9.83, + "learning_rate": 2.5441331923890065e-05, + "loss": 0.0434, + "step": 9300 + }, + { + "epoch": 9.83, + "learning_rate": 2.5436046511627908e-05, + "loss": 0.1326, + "step": 9302 + }, + { + "epoch": 9.84, + "learning_rate": 2.543076109936575e-05, + "loss": 0.0813, + "step": 9304 + }, + { + "epoch": 9.84, + "learning_rate": 2.5425475687103593e-05, + "loss": 0.0893, + "step": 9306 + }, + { + "epoch": 9.84, + "learning_rate": 2.542019027484144e-05, + "loss": 0.0796, + "step": 9308 + }, + { + "epoch": 9.84, + "learning_rate": 2.5414904862579282e-05, + "loss": 0.0507, + "step": 9310 + }, + { + "epoch": 9.84, + "learning_rate": 2.5409619450317125e-05, + "loss": 0.05, + "step": 9312 + }, + { + "epoch": 9.85, + "learning_rate": 2.540433403805497e-05, + "loss": 0.1088, + "step": 9314 + }, + { + "epoch": 9.85, + "learning_rate": 2.5399048625792813e-05, + "loss": 0.108, + "step": 9316 + }, + { + "epoch": 9.85, + "learning_rate": 2.539376321353066e-05, + "loss": 0.0851, + "step": 9318 + }, + { + "epoch": 9.85, + "learning_rate": 2.5388477801268502e-05, + "loss": 0.0538, + "step": 9320 + }, + { + "epoch": 9.85, + "learning_rate": 2.5383192389006345e-05, + "loss": 0.0998, + "step": 9322 + }, + { + "epoch": 9.86, + "learning_rate": 2.5377906976744188e-05, + "loss": 0.0808, + "step": 9324 + }, + { + "epoch": 9.86, + "learning_rate": 2.537262156448203e-05, + "loss": 0.086, + "step": 9326 + }, + { + "epoch": 9.86, + "learning_rate": 2.5367336152219873e-05, + "loss": 0.0555, + "step": 9328 + }, + { + "epoch": 9.86, + "learning_rate": 2.5362050739957716e-05, + "loss": 0.0468, + "step": 9330 + }, + { + "epoch": 9.86, + "learning_rate": 2.5356765327695565e-05, + "loss": 0.0644, + "step": 9332 + }, + { + "epoch": 9.87, + "learning_rate": 2.5351479915433408e-05, + "loss": 0.2785, + "step": 9334 + }, + { + "epoch": 9.87, + "learning_rate": 2.534619450317125e-05, + "loss": 0.0736, + "step": 9336 + }, + { + "epoch": 9.87, + "learning_rate": 2.5340909090909093e-05, + "loss": 0.0374, + "step": 9338 + }, + { + "epoch": 9.87, + "learning_rate": 2.5335623678646936e-05, + "loss": 0.0782, + "step": 9340 + }, + { + "epoch": 9.88, + "learning_rate": 2.533033826638478e-05, + "loss": 0.1181, + "step": 9342 + }, + { + "epoch": 9.88, + "learning_rate": 2.532505285412262e-05, + "loss": 0.1358, + "step": 9344 + }, + { + "epoch": 9.88, + "learning_rate": 2.5319767441860464e-05, + "loss": 0.0958, + "step": 9346 + }, + { + "epoch": 9.88, + "learning_rate": 2.5314482029598306e-05, + "loss": 0.1822, + "step": 9348 + }, + { + "epoch": 9.88, + "learning_rate": 2.5309196617336156e-05, + "loss": 0.1081, + "step": 9350 + }, + { + "epoch": 9.89, + "learning_rate": 2.5303911205074e-05, + "loss": 0.0782, + "step": 9352 + }, + { + "epoch": 9.89, + "learning_rate": 2.529862579281184e-05, + "loss": 0.1172, + "step": 9354 + }, + { + "epoch": 9.89, + "learning_rate": 2.5293340380549684e-05, + "loss": 0.0593, + "step": 9356 + }, + { + "epoch": 9.89, + "learning_rate": 2.5288054968287526e-05, + "loss": 0.0192, + "step": 9358 + }, + { + "epoch": 9.89, + "learning_rate": 2.528276955602537e-05, + "loss": 0.073, + "step": 9360 + }, + { + "epoch": 9.9, + "learning_rate": 2.5277484143763215e-05, + "loss": 0.0692, + "step": 9362 + }, + { + "epoch": 9.9, + "learning_rate": 2.5272198731501058e-05, + "loss": 0.0479, + "step": 9364 + }, + { + "epoch": 9.9, + "learning_rate": 2.52669133192389e-05, + "loss": 0.0936, + "step": 9366 + }, + { + "epoch": 9.9, + "learning_rate": 2.5261627906976747e-05, + "loss": 0.06, + "step": 9368 + }, + { + "epoch": 9.9, + "learning_rate": 2.5256342494714593e-05, + "loss": 0.194, + "step": 9370 + }, + { + "epoch": 9.91, + "learning_rate": 2.5251057082452435e-05, + "loss": 0.1149, + "step": 9372 + }, + { + "epoch": 9.91, + "learning_rate": 2.5245771670190278e-05, + "loss": 0.0893, + "step": 9374 + }, + { + "epoch": 9.91, + "learning_rate": 2.524048625792812e-05, + "loss": 0.0446, + "step": 9376 + }, + { + "epoch": 9.91, + "learning_rate": 2.5235200845665963e-05, + "loss": 0.1115, + "step": 9378 + }, + { + "epoch": 9.92, + "learning_rate": 2.5229915433403806e-05, + "loss": 0.1539, + "step": 9380 + }, + { + "epoch": 9.92, + "learning_rate": 2.522463002114165e-05, + "loss": 0.0537, + "step": 9382 + }, + { + "epoch": 9.92, + "learning_rate": 2.521934460887949e-05, + "loss": 0.047, + "step": 9384 + }, + { + "epoch": 9.92, + "learning_rate": 2.521405919661734e-05, + "loss": 0.0716, + "step": 9386 + }, + { + "epoch": 9.92, + "learning_rate": 2.5208773784355183e-05, + "loss": 0.0348, + "step": 9388 + }, + { + "epoch": 9.93, + "learning_rate": 2.5203488372093026e-05, + "loss": 0.1238, + "step": 9390 + }, + { + "epoch": 9.93, + "learning_rate": 2.519820295983087e-05, + "loss": 0.016, + "step": 9392 + }, + { + "epoch": 9.93, + "learning_rate": 2.519291754756871e-05, + "loss": 0.099, + "step": 9394 + }, + { + "epoch": 9.93, + "learning_rate": 2.5187632135306554e-05, + "loss": 0.1096, + "step": 9396 + }, + { + "epoch": 9.93, + "learning_rate": 2.5182346723044397e-05, + "loss": 0.1113, + "step": 9398 + }, + { + "epoch": 9.94, + "learning_rate": 2.517706131078224e-05, + "loss": 0.038, + "step": 9400 + }, + { + "epoch": 9.94, + "learning_rate": 2.5171775898520082e-05, + "loss": 0.0852, + "step": 9402 + }, + { + "epoch": 9.94, + "learning_rate": 2.516649048625793e-05, + "loss": 0.1253, + "step": 9404 + }, + { + "epoch": 9.94, + "learning_rate": 2.5161205073995774e-05, + "loss": 0.0963, + "step": 9406 + }, + { + "epoch": 9.95, + "learning_rate": 2.5155919661733617e-05, + "loss": 0.1237, + "step": 9408 + }, + { + "epoch": 9.95, + "learning_rate": 2.515063424947146e-05, + "loss": 0.0919, + "step": 9410 + }, + { + "epoch": 9.95, + "learning_rate": 2.5145348837209302e-05, + "loss": 0.0494, + "step": 9412 + }, + { + "epoch": 9.95, + "learning_rate": 2.514006342494715e-05, + "loss": 0.0852, + "step": 9414 + }, + { + "epoch": 9.95, + "learning_rate": 2.513477801268499e-05, + "loss": 0.1492, + "step": 9416 + }, + { + "epoch": 9.96, + "learning_rate": 2.5129492600422834e-05, + "loss": 0.0884, + "step": 9418 + }, + { + "epoch": 9.96, + "learning_rate": 2.5124207188160676e-05, + "loss": 0.1411, + "step": 9420 + }, + { + "epoch": 9.96, + "learning_rate": 2.5118921775898522e-05, + "loss": 0.0512, + "step": 9422 + }, + { + "epoch": 9.96, + "learning_rate": 2.511363636363637e-05, + "loss": 0.1133, + "step": 9424 + }, + { + "epoch": 9.96, + "learning_rate": 2.510835095137421e-05, + "loss": 0.0455, + "step": 9426 + }, + { + "epoch": 9.97, + "learning_rate": 2.5103065539112054e-05, + "loss": 0.0743, + "step": 9428 + }, + { + "epoch": 9.97, + "learning_rate": 2.5097780126849897e-05, + "loss": 0.1326, + "step": 9430 + }, + { + "epoch": 9.97, + "learning_rate": 2.509249471458774e-05, + "loss": 0.0816, + "step": 9432 + }, + { + "epoch": 9.97, + "learning_rate": 2.5087209302325582e-05, + "loss": 0.0581, + "step": 9434 + }, + { + "epoch": 9.97, + "learning_rate": 2.5081923890063425e-05, + "loss": 0.1075, + "step": 9436 + }, + { + "epoch": 9.98, + "learning_rate": 2.5076638477801267e-05, + "loss": 0.0341, + "step": 9438 + }, + { + "epoch": 9.98, + "learning_rate": 2.5071353065539117e-05, + "loss": 0.0657, + "step": 9440 + }, + { + "epoch": 9.98, + "learning_rate": 2.506606765327696e-05, + "loss": 0.0317, + "step": 9442 + }, + { + "epoch": 9.98, + "learning_rate": 2.5060782241014802e-05, + "loss": 0.1027, + "step": 9444 + }, + { + "epoch": 9.99, + "learning_rate": 2.5055496828752645e-05, + "loss": 0.1306, + "step": 9446 + }, + { + "epoch": 9.99, + "learning_rate": 2.5050211416490487e-05, + "loss": 0.0308, + "step": 9448 + }, + { + "epoch": 9.99, + "learning_rate": 2.504492600422833e-05, + "loss": 0.0878, + "step": 9450 + }, + { + "epoch": 9.99, + "learning_rate": 2.5039640591966173e-05, + "loss": 0.0352, + "step": 9452 + }, + { + "epoch": 9.99, + "learning_rate": 2.5034355179704015e-05, + "loss": 0.0678, + "step": 9454 + }, + { + "epoch": 10.0, + "learning_rate": 2.5029069767441858e-05, + "loss": 0.1047, + "step": 9456 + }, + { + "epoch": 10.0, + "learning_rate": 2.5023784355179707e-05, + "loss": 0.0423, + "step": 9458 + }, + { + "epoch": 10.0, + "learning_rate": 2.501849894291755e-05, + "loss": 0.0521, + "step": 9460 + }, + { + "epoch": 10.0, + "learning_rate": 2.5013213530655393e-05, + "loss": 0.0502, + "step": 9462 + }, + { + "epoch": 10.0, + "learning_rate": 2.5007928118393235e-05, + "loss": 0.0516, + "step": 9464 + }, + { + "epoch": 10.01, + "learning_rate": 2.5002642706131078e-05, + "loss": 0.1356, + "step": 9466 + }, + { + "epoch": 10.01, + "learning_rate": 2.4997357293868924e-05, + "loss": 0.142, + "step": 9468 + }, + { + "epoch": 10.01, + "learning_rate": 2.4992071881606767e-05, + "loss": 0.0707, + "step": 9470 + }, + { + "epoch": 10.01, + "learning_rate": 2.4986786469344613e-05, + "loss": 0.0274, + "step": 9472 + }, + { + "epoch": 10.01, + "learning_rate": 2.4981501057082456e-05, + "loss": 0.1028, + "step": 9474 + }, + { + "epoch": 10.02, + "learning_rate": 2.4976215644820298e-05, + "loss": 0.0566, + "step": 9476 + }, + { + "epoch": 10.02, + "learning_rate": 2.497093023255814e-05, + "loss": 0.2284, + "step": 9478 + }, + { + "epoch": 10.02, + "learning_rate": 2.4965644820295984e-05, + "loss": 0.106, + "step": 9480 + }, + { + "epoch": 10.02, + "learning_rate": 2.496035940803383e-05, + "loss": 0.076, + "step": 9482 + }, + { + "epoch": 10.03, + "learning_rate": 2.4955073995771672e-05, + "loss": 0.1524, + "step": 9484 + }, + { + "epoch": 10.03, + "learning_rate": 2.4949788583509515e-05, + "loss": 0.024, + "step": 9486 + }, + { + "epoch": 10.03, + "learning_rate": 2.4944503171247358e-05, + "loss": 0.0783, + "step": 9488 + }, + { + "epoch": 10.03, + "learning_rate": 2.4939217758985204e-05, + "loss": 0.0752, + "step": 9490 + }, + { + "epoch": 10.03, + "learning_rate": 2.4933932346723046e-05, + "loss": 0.0556, + "step": 9492 + }, + { + "epoch": 10.04, + "learning_rate": 2.492864693446089e-05, + "loss": 0.0667, + "step": 9494 + }, + { + "epoch": 10.04, + "learning_rate": 2.4923361522198732e-05, + "loss": 0.0455, + "step": 9496 + }, + { + "epoch": 10.04, + "learning_rate": 2.4918076109936574e-05, + "loss": 0.0423, + "step": 9498 + }, + { + "epoch": 10.04, + "learning_rate": 2.491279069767442e-05, + "loss": 0.0616, + "step": 9500 + }, + { + "epoch": 10.04, + "eval_cer": 0.020860644058136223, + "eval_loss": 0.5472248196601868, + "eval_runtime": 127.8063, + "eval_samples_per_second": 6.58, + "eval_steps_per_second": 0.829, + "step": 9500 + }, + { + "epoch": 10.04, + "learning_rate": 2.4907505285412263e-05, + "loss": 0.1153, + "step": 9502 + }, + { + "epoch": 10.05, + "learning_rate": 2.4902219873150106e-05, + "loss": 0.0961, + "step": 9504 + }, + { + "epoch": 10.05, + "learning_rate": 2.489693446088795e-05, + "loss": 0.0327, + "step": 9506 + }, + { + "epoch": 10.05, + "learning_rate": 2.4891649048625795e-05, + "loss": 0.0733, + "step": 9508 + }, + { + "epoch": 10.05, + "learning_rate": 2.4886363636363637e-05, + "loss": 0.0801, + "step": 9510 + }, + { + "epoch": 10.05, + "learning_rate": 2.488107822410148e-05, + "loss": 0.0344, + "step": 9512 + }, + { + "epoch": 10.06, + "learning_rate": 2.4875792811839323e-05, + "loss": 0.0918, + "step": 9514 + }, + { + "epoch": 10.06, + "learning_rate": 2.487050739957717e-05, + "loss": 0.1247, + "step": 9516 + }, + { + "epoch": 10.06, + "learning_rate": 2.486522198731501e-05, + "loss": 0.0697, + "step": 9518 + }, + { + "epoch": 10.06, + "learning_rate": 2.4859936575052854e-05, + "loss": 0.0941, + "step": 9520 + }, + { + "epoch": 10.07, + "learning_rate": 2.48546511627907e-05, + "loss": 0.0839, + "step": 9522 + }, + { + "epoch": 10.07, + "learning_rate": 2.4849365750528543e-05, + "loss": 0.0163, + "step": 9524 + }, + { + "epoch": 10.07, + "learning_rate": 2.484408033826639e-05, + "loss": 0.0625, + "step": 9526 + }, + { + "epoch": 10.07, + "learning_rate": 2.483879492600423e-05, + "loss": 0.0795, + "step": 9528 + }, + { + "epoch": 10.07, + "learning_rate": 2.4833509513742074e-05, + "loss": 0.0719, + "step": 9530 + }, + { + "epoch": 10.08, + "learning_rate": 2.4828224101479917e-05, + "loss": 0.0796, + "step": 9532 + }, + { + "epoch": 10.08, + "learning_rate": 2.482293868921776e-05, + "loss": 0.0175, + "step": 9534 + }, + { + "epoch": 10.08, + "learning_rate": 2.4817653276955606e-05, + "loss": 0.0797, + "step": 9536 + }, + { + "epoch": 10.08, + "learning_rate": 2.4812367864693448e-05, + "loss": 0.0956, + "step": 9538 + }, + { + "epoch": 10.08, + "learning_rate": 2.480708245243129e-05, + "loss": 0.0526, + "step": 9540 + }, + { + "epoch": 10.09, + "learning_rate": 2.4801797040169134e-05, + "loss": 0.0719, + "step": 9542 + }, + { + "epoch": 10.09, + "learning_rate": 2.479651162790698e-05, + "loss": 0.0664, + "step": 9544 + }, + { + "epoch": 10.09, + "learning_rate": 2.4791226215644822e-05, + "loss": 0.0272, + "step": 9546 + }, + { + "epoch": 10.09, + "learning_rate": 2.4785940803382665e-05, + "loss": 0.043, + "step": 9548 + }, + { + "epoch": 10.1, + "learning_rate": 2.4780655391120508e-05, + "loss": 0.1261, + "step": 9550 + }, + { + "epoch": 10.1, + "learning_rate": 2.477536997885835e-05, + "loss": 0.2794, + "step": 9552 + }, + { + "epoch": 10.1, + "learning_rate": 2.4770084566596196e-05, + "loss": 0.0698, + "step": 9554 + }, + { + "epoch": 10.1, + "learning_rate": 2.476479915433404e-05, + "loss": 0.2251, + "step": 9556 + }, + { + "epoch": 10.1, + "learning_rate": 2.4759513742071882e-05, + "loss": 0.0471, + "step": 9558 + }, + { + "epoch": 10.11, + "learning_rate": 2.4754228329809724e-05, + "loss": 0.0455, + "step": 9560 + }, + { + "epoch": 10.11, + "learning_rate": 2.474894291754757e-05, + "loss": 0.3214, + "step": 9562 + }, + { + "epoch": 10.11, + "learning_rate": 2.4743657505285413e-05, + "loss": 0.2688, + "step": 9564 + }, + { + "epoch": 10.11, + "learning_rate": 2.4738372093023256e-05, + "loss": 0.1333, + "step": 9566 + }, + { + "epoch": 10.11, + "learning_rate": 2.47330866807611e-05, + "loss": 0.1161, + "step": 9568 + }, + { + "epoch": 10.12, + "learning_rate": 2.4727801268498945e-05, + "loss": 0.0928, + "step": 9570 + }, + { + "epoch": 10.12, + "learning_rate": 2.4722515856236787e-05, + "loss": 0.0991, + "step": 9572 + }, + { + "epoch": 10.12, + "learning_rate": 2.4717230443974633e-05, + "loss": 0.0438, + "step": 9574 + }, + { + "epoch": 10.12, + "learning_rate": 2.4711945031712476e-05, + "loss": 0.1875, + "step": 9576 + }, + { + "epoch": 10.12, + "learning_rate": 2.470665961945032e-05, + "loss": 0.1174, + "step": 9578 + }, + { + "epoch": 10.13, + "learning_rate": 2.4701374207188165e-05, + "loss": 0.0306, + "step": 9580 + }, + { + "epoch": 10.13, + "learning_rate": 2.4696088794926007e-05, + "loss": 0.1353, + "step": 9582 + }, + { + "epoch": 10.13, + "learning_rate": 2.469080338266385e-05, + "loss": 0.0791, + "step": 9584 + }, + { + "epoch": 10.13, + "learning_rate": 2.4685517970401693e-05, + "loss": 0.0907, + "step": 9586 + }, + { + "epoch": 10.14, + "learning_rate": 2.4680232558139535e-05, + "loss": 0.1941, + "step": 9588 + }, + { + "epoch": 10.14, + "learning_rate": 2.467494714587738e-05, + "loss": 0.0339, + "step": 9590 + }, + { + "epoch": 10.14, + "learning_rate": 2.4669661733615224e-05, + "loss": 0.0292, + "step": 9592 + }, + { + "epoch": 10.14, + "learning_rate": 2.4664376321353067e-05, + "loss": 0.0474, + "step": 9594 + }, + { + "epoch": 10.14, + "learning_rate": 2.465909090909091e-05, + "loss": 0.0832, + "step": 9596 + }, + { + "epoch": 10.15, + "learning_rate": 2.4653805496828755e-05, + "loss": 0.0618, + "step": 9598 + }, + { + "epoch": 10.15, + "learning_rate": 2.4648520084566598e-05, + "loss": 0.0498, + "step": 9600 + }, + { + "epoch": 10.15, + "learning_rate": 2.464323467230444e-05, + "loss": 0.0797, + "step": 9602 + }, + { + "epoch": 10.15, + "learning_rate": 2.4637949260042283e-05, + "loss": 0.0558, + "step": 9604 + }, + { + "epoch": 10.15, + "learning_rate": 2.4632663847780126e-05, + "loss": 0.0621, + "step": 9606 + }, + { + "epoch": 10.16, + "learning_rate": 2.4627378435517972e-05, + "loss": 0.0617, + "step": 9608 + }, + { + "epoch": 10.16, + "learning_rate": 2.4622093023255815e-05, + "loss": 0.1126, + "step": 9610 + }, + { + "epoch": 10.16, + "learning_rate": 2.4616807610993658e-05, + "loss": 0.0502, + "step": 9612 + }, + { + "epoch": 10.16, + "learning_rate": 2.46115221987315e-05, + "loss": 0.0547, + "step": 9614 + }, + { + "epoch": 10.16, + "learning_rate": 2.4606236786469346e-05, + "loss": 0.0615, + "step": 9616 + }, + { + "epoch": 10.17, + "learning_rate": 2.460095137420719e-05, + "loss": 0.0155, + "step": 9618 + }, + { + "epoch": 10.17, + "learning_rate": 2.459566596194503e-05, + "loss": 0.0557, + "step": 9620 + }, + { + "epoch": 10.17, + "learning_rate": 2.4590380549682874e-05, + "loss": 0.0594, + "step": 9622 + }, + { + "epoch": 10.17, + "learning_rate": 2.458509513742072e-05, + "loss": 0.0639, + "step": 9624 + }, + { + "epoch": 10.18, + "learning_rate": 2.4579809725158563e-05, + "loss": 0.0344, + "step": 9626 + }, + { + "epoch": 10.18, + "learning_rate": 2.457452431289641e-05, + "loss": 0.0442, + "step": 9628 + }, + { + "epoch": 10.18, + "learning_rate": 2.4569238900634252e-05, + "loss": 0.1143, + "step": 9630 + }, + { + "epoch": 10.18, + "learning_rate": 2.4563953488372094e-05, + "loss": 0.0763, + "step": 9632 + }, + { + "epoch": 10.18, + "learning_rate": 2.455866807610994e-05, + "loss": 0.0544, + "step": 9634 + }, + { + "epoch": 10.19, + "learning_rate": 2.4553382663847783e-05, + "loss": 0.0375, + "step": 9636 + }, + { + "epoch": 10.19, + "learning_rate": 2.4548097251585626e-05, + "loss": 0.0968, + "step": 9638 + }, + { + "epoch": 10.19, + "learning_rate": 2.454281183932347e-05, + "loss": 0.1139, + "step": 9640 + }, + { + "epoch": 10.19, + "learning_rate": 2.453752642706131e-05, + "loss": 0.0868, + "step": 9642 + }, + { + "epoch": 10.19, + "learning_rate": 2.4532241014799157e-05, + "loss": 0.0274, + "step": 9644 + }, + { + "epoch": 10.2, + "learning_rate": 2.4526955602537e-05, + "loss": 0.1256, + "step": 9646 + }, + { + "epoch": 10.2, + "learning_rate": 2.4521670190274843e-05, + "loss": 0.123, + "step": 9648 + }, + { + "epoch": 10.2, + "learning_rate": 2.4516384778012685e-05, + "loss": 0.0492, + "step": 9650 + }, + { + "epoch": 10.2, + "learning_rate": 2.4511099365750528e-05, + "loss": 0.0606, + "step": 9652 + }, + { + "epoch": 10.21, + "learning_rate": 2.4505813953488374e-05, + "loss": 0.0915, + "step": 9654 + }, + { + "epoch": 10.21, + "learning_rate": 2.4500528541226217e-05, + "loss": 0.0606, + "step": 9656 + }, + { + "epoch": 10.21, + "learning_rate": 2.449524312896406e-05, + "loss": 0.0424, + "step": 9658 + }, + { + "epoch": 10.21, + "learning_rate": 2.4489957716701902e-05, + "loss": 0.0585, + "step": 9660 + }, + { + "epoch": 10.21, + "learning_rate": 2.4484672304439748e-05, + "loss": 0.0735, + "step": 9662 + }, + { + "epoch": 10.22, + "learning_rate": 2.447938689217759e-05, + "loss": 0.0558, + "step": 9664 + }, + { + "epoch": 10.22, + "learning_rate": 2.4474101479915433e-05, + "loss": 0.0444, + "step": 9666 + }, + { + "epoch": 10.22, + "learning_rate": 2.4468816067653276e-05, + "loss": 0.0697, + "step": 9668 + }, + { + "epoch": 10.22, + "learning_rate": 2.446353065539112e-05, + "loss": 0.1268, + "step": 9670 + }, + { + "epoch": 10.22, + "learning_rate": 2.4458245243128965e-05, + "loss": 0.1274, + "step": 9672 + }, + { + "epoch": 10.23, + "learning_rate": 2.4452959830866807e-05, + "loss": 0.1141, + "step": 9674 + }, + { + "epoch": 10.23, + "learning_rate": 2.4447674418604654e-05, + "loss": 0.0624, + "step": 9676 + }, + { + "epoch": 10.23, + "learning_rate": 2.4442389006342496e-05, + "loss": 0.054, + "step": 9678 + }, + { + "epoch": 10.23, + "learning_rate": 2.4437103594080342e-05, + "loss": 0.0644, + "step": 9680 + }, + { + "epoch": 10.23, + "learning_rate": 2.4431818181818185e-05, + "loss": 0.0435, + "step": 9682 + }, + { + "epoch": 10.24, + "learning_rate": 2.4426532769556028e-05, + "loss": 0.0393, + "step": 9684 + }, + { + "epoch": 10.24, + "learning_rate": 2.442124735729387e-05, + "loss": 0.1323, + "step": 9686 + }, + { + "epoch": 10.24, + "learning_rate": 2.4415961945031713e-05, + "loss": 0.1019, + "step": 9688 + }, + { + "epoch": 10.24, + "learning_rate": 2.441067653276956e-05, + "loss": 0.0803, + "step": 9690 + }, + { + "epoch": 10.25, + "learning_rate": 2.44053911205074e-05, + "loss": 0.0582, + "step": 9692 + }, + { + "epoch": 10.25, + "learning_rate": 2.4400105708245244e-05, + "loss": 0.0659, + "step": 9694 + }, + { + "epoch": 10.25, + "learning_rate": 2.4394820295983087e-05, + "loss": 0.0528, + "step": 9696 + }, + { + "epoch": 10.25, + "learning_rate": 2.4389534883720933e-05, + "loss": 0.0614, + "step": 9698 + }, + { + "epoch": 10.25, + "learning_rate": 2.4384249471458776e-05, + "loss": 0.1287, + "step": 9700 + }, + { + "epoch": 10.26, + "learning_rate": 2.437896405919662e-05, + "loss": 0.211, + "step": 9702 + }, + { + "epoch": 10.26, + "learning_rate": 2.437367864693446e-05, + "loss": 0.0376, + "step": 9704 + }, + { + "epoch": 10.26, + "learning_rate": 2.4368393234672304e-05, + "loss": 0.0271, + "step": 9706 + }, + { + "epoch": 10.26, + "learning_rate": 2.436310782241015e-05, + "loss": 0.1002, + "step": 9708 + }, + { + "epoch": 10.26, + "learning_rate": 2.4357822410147992e-05, + "loss": 0.1325, + "step": 9710 + }, + { + "epoch": 10.27, + "learning_rate": 2.4352536997885835e-05, + "loss": 0.0062, + "step": 9712 + }, + { + "epoch": 10.27, + "learning_rate": 2.4347251585623678e-05, + "loss": 0.068, + "step": 9714 + }, + { + "epoch": 10.27, + "learning_rate": 2.4341966173361524e-05, + "loss": 0.0643, + "step": 9716 + }, + { + "epoch": 10.27, + "learning_rate": 2.4336680761099367e-05, + "loss": 0.1679, + "step": 9718 + }, + { + "epoch": 10.27, + "learning_rate": 2.433139534883721e-05, + "loss": 0.1077, + "step": 9720 + }, + { + "epoch": 10.28, + "learning_rate": 2.4326109936575052e-05, + "loss": 0.0535, + "step": 9722 + }, + { + "epoch": 10.28, + "learning_rate": 2.4320824524312898e-05, + "loss": 0.1192, + "step": 9724 + }, + { + "epoch": 10.28, + "learning_rate": 2.431553911205074e-05, + "loss": 0.057, + "step": 9726 + }, + { + "epoch": 10.28, + "learning_rate": 2.4310253699788583e-05, + "loss": 0.1002, + "step": 9728 + }, + { + "epoch": 10.29, + "learning_rate": 2.430496828752643e-05, + "loss": 0.2332, + "step": 9730 + }, + { + "epoch": 10.29, + "learning_rate": 2.4299682875264272e-05, + "loss": 0.0869, + "step": 9732 + }, + { + "epoch": 10.29, + "learning_rate": 2.4294397463002118e-05, + "loss": 0.2152, + "step": 9734 + }, + { + "epoch": 10.29, + "learning_rate": 2.428911205073996e-05, + "loss": 0.1526, + "step": 9736 + }, + { + "epoch": 10.29, + "learning_rate": 2.4283826638477803e-05, + "loss": 0.1586, + "step": 9738 + }, + { + "epoch": 10.3, + "learning_rate": 2.4278541226215646e-05, + "loss": 0.1074, + "step": 9740 + }, + { + "epoch": 10.3, + "learning_rate": 2.427325581395349e-05, + "loss": 0.0455, + "step": 9742 + }, + { + "epoch": 10.3, + "learning_rate": 2.4267970401691335e-05, + "loss": 0.0448, + "step": 9744 + }, + { + "epoch": 10.3, + "learning_rate": 2.4262684989429177e-05, + "loss": 0.0873, + "step": 9746 + }, + { + "epoch": 10.3, + "learning_rate": 2.425739957716702e-05, + "loss": 0.0349, + "step": 9748 + }, + { + "epoch": 10.31, + "learning_rate": 2.4252114164904863e-05, + "loss": 0.0399, + "step": 9750 + }, + { + "epoch": 10.31, + "learning_rate": 2.424682875264271e-05, + "loss": 0.0379, + "step": 9752 + }, + { + "epoch": 10.31, + "learning_rate": 2.424154334038055e-05, + "loss": 0.0655, + "step": 9754 + }, + { + "epoch": 10.31, + "learning_rate": 2.4236257928118394e-05, + "loss": 0.0885, + "step": 9756 + }, + { + "epoch": 10.32, + "learning_rate": 2.4230972515856237e-05, + "loss": 0.0374, + "step": 9758 + }, + { + "epoch": 10.32, + "learning_rate": 2.422568710359408e-05, + "loss": 0.0658, + "step": 9760 + }, + { + "epoch": 10.32, + "learning_rate": 2.4220401691331926e-05, + "loss": 0.042, + "step": 9762 + }, + { + "epoch": 10.32, + "learning_rate": 2.421511627906977e-05, + "loss": 0.0622, + "step": 9764 + }, + { + "epoch": 10.32, + "learning_rate": 2.420983086680761e-05, + "loss": 0.0568, + "step": 9766 + }, + { + "epoch": 10.33, + "learning_rate": 2.4204545454545454e-05, + "loss": 0.0538, + "step": 9768 + }, + { + "epoch": 10.33, + "learning_rate": 2.41992600422833e-05, + "loss": 0.0909, + "step": 9770 + }, + { + "epoch": 10.33, + "learning_rate": 2.4193974630021142e-05, + "loss": 0.1713, + "step": 9772 + }, + { + "epoch": 10.33, + "learning_rate": 2.4188689217758985e-05, + "loss": 0.0973, + "step": 9774 + }, + { + "epoch": 10.33, + "learning_rate": 2.4183403805496828e-05, + "loss": 0.0276, + "step": 9776 + }, + { + "epoch": 10.34, + "learning_rate": 2.4178118393234674e-05, + "loss": 0.063, + "step": 9778 + }, + { + "epoch": 10.34, + "learning_rate": 2.4172832980972516e-05, + "loss": 0.0193, + "step": 9780 + }, + { + "epoch": 10.34, + "learning_rate": 2.4167547568710363e-05, + "loss": 0.0401, + "step": 9782 + }, + { + "epoch": 10.34, + "learning_rate": 2.4162262156448205e-05, + "loss": 0.1097, + "step": 9784 + }, + { + "epoch": 10.34, + "learning_rate": 2.4156976744186048e-05, + "loss": 0.0611, + "step": 9786 + }, + { + "epoch": 10.35, + "learning_rate": 2.4151691331923894e-05, + "loss": 0.0231, + "step": 9788 + }, + { + "epoch": 10.35, + "learning_rate": 2.4146405919661737e-05, + "loss": 0.145, + "step": 9790 + }, + { + "epoch": 10.35, + "learning_rate": 2.414112050739958e-05, + "loss": 0.0839, + "step": 9792 + }, + { + "epoch": 10.35, + "learning_rate": 2.4135835095137422e-05, + "loss": 0.0581, + "step": 9794 + }, + { + "epoch": 10.36, + "learning_rate": 2.4130549682875265e-05, + "loss": 0.0452, + "step": 9796 + }, + { + "epoch": 10.36, + "learning_rate": 2.412526427061311e-05, + "loss": 0.0309, + "step": 9798 + }, + { + "epoch": 10.36, + "learning_rate": 2.4119978858350953e-05, + "loss": 0.1888, + "step": 9800 + }, + { + "epoch": 10.36, + "learning_rate": 2.4114693446088796e-05, + "loss": 0.1226, + "step": 9802 + }, + { + "epoch": 10.36, + "learning_rate": 2.410940803382664e-05, + "loss": 0.0596, + "step": 9804 + }, + { + "epoch": 10.37, + "learning_rate": 2.4104122621564485e-05, + "loss": 0.0507, + "step": 9806 + }, + { + "epoch": 10.37, + "learning_rate": 2.4098837209302327e-05, + "loss": 0.0564, + "step": 9808 + }, + { + "epoch": 10.37, + "learning_rate": 2.409355179704017e-05, + "loss": 0.0319, + "step": 9810 + }, + { + "epoch": 10.37, + "learning_rate": 2.4088266384778013e-05, + "loss": 0.1549, + "step": 9812 + }, + { + "epoch": 10.37, + "learning_rate": 2.4082980972515855e-05, + "loss": 0.1253, + "step": 9814 + }, + { + "epoch": 10.38, + "learning_rate": 2.40776955602537e-05, + "loss": 0.0819, + "step": 9816 + }, + { + "epoch": 10.38, + "learning_rate": 2.4072410147991544e-05, + "loss": 0.126, + "step": 9818 + }, + { + "epoch": 10.38, + "learning_rate": 2.4067124735729387e-05, + "loss": 0.0943, + "step": 9820 + }, + { + "epoch": 10.38, + "learning_rate": 2.406183932346723e-05, + "loss": 0.0304, + "step": 9822 + }, + { + "epoch": 10.38, + "learning_rate": 2.4056553911205076e-05, + "loss": 0.1265, + "step": 9824 + }, + { + "epoch": 10.39, + "learning_rate": 2.4051268498942918e-05, + "loss": 0.0787, + "step": 9826 + }, + { + "epoch": 10.39, + "learning_rate": 2.404598308668076e-05, + "loss": 0.104, + "step": 9828 + }, + { + "epoch": 10.39, + "learning_rate": 2.4040697674418604e-05, + "loss": 0.0846, + "step": 9830 + }, + { + "epoch": 10.39, + "learning_rate": 2.403541226215645e-05, + "loss": 0.0298, + "step": 9832 + }, + { + "epoch": 10.4, + "learning_rate": 2.4030126849894292e-05, + "loss": 0.1509, + "step": 9834 + }, + { + "epoch": 10.4, + "learning_rate": 2.402484143763214e-05, + "loss": 0.0735, + "step": 9836 + }, + { + "epoch": 10.4, + "learning_rate": 2.401955602536998e-05, + "loss": 0.0591, + "step": 9838 + }, + { + "epoch": 10.4, + "learning_rate": 2.4014270613107824e-05, + "loss": 0.0444, + "step": 9840 + }, + { + "epoch": 10.4, + "learning_rate": 2.400898520084567e-05, + "loss": 0.032, + "step": 9842 + }, + { + "epoch": 10.41, + "learning_rate": 2.4003699788583512e-05, + "loss": 0.103, + "step": 9844 + }, + { + "epoch": 10.41, + "learning_rate": 2.3998414376321355e-05, + "loss": 0.0892, + "step": 9846 + }, + { + "epoch": 10.41, + "learning_rate": 2.3993128964059198e-05, + "loss": 0.0488, + "step": 9848 + }, + { + "epoch": 10.41, + "learning_rate": 2.398784355179704e-05, + "loss": 0.0486, + "step": 9850 + }, + { + "epoch": 10.41, + "learning_rate": 2.3982558139534887e-05, + "loss": 0.0858, + "step": 9852 + }, + { + "epoch": 10.42, + "learning_rate": 2.397727272727273e-05, + "loss": 0.0312, + "step": 9854 + }, + { + "epoch": 10.42, + "learning_rate": 2.3971987315010572e-05, + "loss": 0.0556, + "step": 9856 + }, + { + "epoch": 10.42, + "learning_rate": 2.3966701902748415e-05, + "loss": 0.0405, + "step": 9858 + }, + { + "epoch": 10.42, + "learning_rate": 2.396141649048626e-05, + "loss": 0.0065, + "step": 9860 + }, + { + "epoch": 10.42, + "learning_rate": 2.3956131078224103e-05, + "loss": 0.0873, + "step": 9862 + }, + { + "epoch": 10.43, + "learning_rate": 2.3950845665961946e-05, + "loss": 0.116, + "step": 9864 + }, + { + "epoch": 10.43, + "learning_rate": 2.394556025369979e-05, + "loss": 0.0445, + "step": 9866 + }, + { + "epoch": 10.43, + "learning_rate": 2.394027484143763e-05, + "loss": 0.0315, + "step": 9868 + }, + { + "epoch": 10.43, + "learning_rate": 2.3934989429175477e-05, + "loss": 0.0742, + "step": 9870 + }, + { + "epoch": 10.44, + "learning_rate": 2.392970401691332e-05, + "loss": 0.0774, + "step": 9872 + }, + { + "epoch": 10.44, + "learning_rate": 2.3924418604651163e-05, + "loss": 0.1122, + "step": 9874 + }, + { + "epoch": 10.44, + "learning_rate": 2.3919133192389005e-05, + "loss": 0.0437, + "step": 9876 + }, + { + "epoch": 10.44, + "learning_rate": 2.391384778012685e-05, + "loss": 0.1272, + "step": 9878 + }, + { + "epoch": 10.44, + "learning_rate": 2.3908562367864694e-05, + "loss": 0.0312, + "step": 9880 + }, + { + "epoch": 10.45, + "learning_rate": 2.3903276955602537e-05, + "loss": 0.1325, + "step": 9882 + }, + { + "epoch": 10.45, + "learning_rate": 2.3897991543340383e-05, + "loss": 0.0612, + "step": 9884 + }, + { + "epoch": 10.45, + "learning_rate": 2.3892706131078225e-05, + "loss": 0.0457, + "step": 9886 + }, + { + "epoch": 10.45, + "learning_rate": 2.3887420718816068e-05, + "loss": 0.1169, + "step": 9888 + }, + { + "epoch": 10.45, + "learning_rate": 2.3882135306553914e-05, + "loss": 0.031, + "step": 9890 + }, + { + "epoch": 10.46, + "learning_rate": 2.3876849894291757e-05, + "loss": 0.0832, + "step": 9892 + }, + { + "epoch": 10.46, + "learning_rate": 2.38715644820296e-05, + "loss": 0.069, + "step": 9894 + }, + { + "epoch": 10.46, + "learning_rate": 2.3866279069767446e-05, + "loss": 0.0545, + "step": 9896 + }, + { + "epoch": 10.46, + "learning_rate": 2.3860993657505288e-05, + "loss": 0.0588, + "step": 9898 + }, + { + "epoch": 10.47, + "learning_rate": 2.385570824524313e-05, + "loss": 0.1478, + "step": 9900 + }, + { + "epoch": 10.47, + "learning_rate": 2.3850422832980974e-05, + "loss": 0.057, + "step": 9902 + }, + { + "epoch": 10.47, + "learning_rate": 2.3845137420718816e-05, + "loss": 0.1162, + "step": 9904 + }, + { + "epoch": 10.47, + "learning_rate": 2.3839852008456662e-05, + "loss": 0.0453, + "step": 9906 + }, + { + "epoch": 10.47, + "learning_rate": 2.3834566596194505e-05, + "loss": 0.0668, + "step": 9908 + }, + { + "epoch": 10.48, + "learning_rate": 2.3829281183932348e-05, + "loss": 0.0392, + "step": 9910 + }, + { + "epoch": 10.48, + "learning_rate": 2.382399577167019e-05, + "loss": 0.0875, + "step": 9912 + }, + { + "epoch": 10.48, + "learning_rate": 2.3818710359408036e-05, + "loss": 0.0435, + "step": 9914 + }, + { + "epoch": 10.48, + "learning_rate": 2.381342494714588e-05, + "loss": 0.1691, + "step": 9916 + }, + { + "epoch": 10.48, + "learning_rate": 2.3808139534883722e-05, + "loss": 0.0769, + "step": 9918 + }, + { + "epoch": 10.49, + "learning_rate": 2.3802854122621564e-05, + "loss": 0.0625, + "step": 9920 + }, + { + "epoch": 10.49, + "learning_rate": 2.3797568710359407e-05, + "loss": 0.0797, + "step": 9922 + }, + { + "epoch": 10.49, + "learning_rate": 2.3792283298097253e-05, + "loss": 0.0506, + "step": 9924 + }, + { + "epoch": 10.49, + "learning_rate": 2.3786997885835096e-05, + "loss": 0.0871, + "step": 9926 + }, + { + "epoch": 10.49, + "learning_rate": 2.378171247357294e-05, + "loss": 0.1237, + "step": 9928 + }, + { + "epoch": 10.5, + "learning_rate": 2.377642706131078e-05, + "loss": 0.1047, + "step": 9930 + }, + { + "epoch": 10.5, + "learning_rate": 2.3771141649048627e-05, + "loss": 0.07, + "step": 9932 + }, + { + "epoch": 10.5, + "learning_rate": 2.376585623678647e-05, + "loss": 0.0599, + "step": 9934 + }, + { + "epoch": 10.5, + "learning_rate": 2.3760570824524313e-05, + "loss": 0.0793, + "step": 9936 + }, + { + "epoch": 10.51, + "learning_rate": 2.375528541226216e-05, + "loss": 0.1223, + "step": 9938 + }, + { + "epoch": 10.51, + "learning_rate": 2.375e-05, + "loss": 0.0498, + "step": 9940 + }, + { + "epoch": 10.51, + "learning_rate": 2.3744714587737847e-05, + "loss": 0.1055, + "step": 9942 + }, + { + "epoch": 10.51, + "learning_rate": 2.373942917547569e-05, + "loss": 0.0825, + "step": 9944 + }, + { + "epoch": 10.51, + "learning_rate": 2.3734143763213533e-05, + "loss": 0.023, + "step": 9946 + }, + { + "epoch": 10.52, + "learning_rate": 2.3728858350951375e-05, + "loss": 0.1028, + "step": 9948 + }, + { + "epoch": 10.52, + "learning_rate": 2.372357293868922e-05, + "loss": 0.0584, + "step": 9950 + }, + { + "epoch": 10.52, + "learning_rate": 2.3718287526427064e-05, + "loss": 0.1151, + "step": 9952 + }, + { + "epoch": 10.52, + "learning_rate": 2.3713002114164907e-05, + "loss": 0.0949, + "step": 9954 + }, + { + "epoch": 10.52, + "learning_rate": 2.370771670190275e-05, + "loss": 0.1587, + "step": 9956 + }, + { + "epoch": 10.53, + "learning_rate": 2.3702431289640592e-05, + "loss": 0.0479, + "step": 9958 + }, + { + "epoch": 10.53, + "learning_rate": 2.3697145877378438e-05, + "loss": 0.0412, + "step": 9960 + }, + { + "epoch": 10.53, + "learning_rate": 2.369186046511628e-05, + "loss": 0.1578, + "step": 9962 + }, + { + "epoch": 10.53, + "learning_rate": 2.3686575052854124e-05, + "loss": 0.1135, + "step": 9964 + }, + { + "epoch": 10.53, + "learning_rate": 2.3681289640591966e-05, + "loss": 0.1214, + "step": 9966 + }, + { + "epoch": 10.54, + "learning_rate": 2.3676004228329812e-05, + "loss": 0.0285, + "step": 9968 + }, + { + "epoch": 10.54, + "learning_rate": 2.3670718816067655e-05, + "loss": 0.0551, + "step": 9970 + }, + { + "epoch": 10.54, + "learning_rate": 2.3665433403805498e-05, + "loss": 0.0443, + "step": 9972 + }, + { + "epoch": 10.54, + "learning_rate": 2.366014799154334e-05, + "loss": 0.0498, + "step": 9974 + }, + { + "epoch": 10.55, + "learning_rate": 2.3654862579281183e-05, + "loss": 0.283, + "step": 9976 + }, + { + "epoch": 10.55, + "learning_rate": 2.364957716701903e-05, + "loss": 0.0721, + "step": 9978 + }, + { + "epoch": 10.55, + "learning_rate": 2.364429175475687e-05, + "loss": 0.0452, + "step": 9980 + }, + { + "epoch": 10.55, + "learning_rate": 2.3639006342494714e-05, + "loss": 0.1145, + "step": 9982 + }, + { + "epoch": 10.55, + "learning_rate": 2.3633720930232557e-05, + "loss": 0.163, + "step": 9984 + }, + { + "epoch": 10.56, + "learning_rate": 2.3628435517970403e-05, + "loss": 0.0282, + "step": 9986 + }, + { + "epoch": 10.56, + "learning_rate": 2.3623150105708246e-05, + "loss": 0.0182, + "step": 9988 + }, + { + "epoch": 10.56, + "learning_rate": 2.361786469344609e-05, + "loss": 0.0602, + "step": 9990 + }, + { + "epoch": 10.56, + "learning_rate": 2.3612579281183934e-05, + "loss": 0.1005, + "step": 9992 + }, + { + "epoch": 10.56, + "learning_rate": 2.3607293868921777e-05, + "loss": 0.0487, + "step": 9994 + }, + { + "epoch": 10.57, + "learning_rate": 2.3602008456659623e-05, + "loss": 0.1124, + "step": 9996 + }, + { + "epoch": 10.57, + "learning_rate": 2.3596723044397466e-05, + "loss": 0.0693, + "step": 9998 + }, + { + "epoch": 10.57, + "learning_rate": 2.359143763213531e-05, + "loss": 0.05, + "step": 10000 + }, + { + "epoch": 10.57, + "eval_cer": 0.02638928469649473, + "eval_loss": 0.38202860951423645, + "eval_runtime": 128.6979, + "eval_samples_per_second": 6.535, + "eval_steps_per_second": 0.824, + "step": 10000 + }, + { + "epoch": 10.57, + "learning_rate": 2.358615221987315e-05, + "loss": 0.0817, + "step": 10002 + }, + { + "epoch": 10.58, + "learning_rate": 2.3580866807610994e-05, + "loss": 0.0701, + "step": 10004 + }, + { + "epoch": 10.58, + "learning_rate": 2.357558139534884e-05, + "loss": 0.0567, + "step": 10006 + }, + { + "epoch": 10.58, + "learning_rate": 2.3570295983086683e-05, + "loss": 0.0494, + "step": 10008 + }, + { + "epoch": 10.58, + "learning_rate": 2.3565010570824525e-05, + "loss": 0.0508, + "step": 10010 + }, + { + "epoch": 10.58, + "learning_rate": 2.3559725158562368e-05, + "loss": 0.1178, + "step": 10012 + }, + { + "epoch": 10.59, + "learning_rate": 2.3554439746300214e-05, + "loss": 0.0839, + "step": 10014 + }, + { + "epoch": 10.59, + "learning_rate": 2.3549154334038057e-05, + "loss": 0.1188, + "step": 10016 + }, + { + "epoch": 10.59, + "learning_rate": 2.35438689217759e-05, + "loss": 0.0868, + "step": 10018 + }, + { + "epoch": 10.59, + "learning_rate": 2.3538583509513742e-05, + "loss": 0.0215, + "step": 10020 + }, + { + "epoch": 10.59, + "learning_rate": 2.3533298097251585e-05, + "loss": 0.0431, + "step": 10022 + }, + { + "epoch": 10.6, + "learning_rate": 2.352801268498943e-05, + "loss": 0.0646, + "step": 10024 + }, + { + "epoch": 10.6, + "learning_rate": 2.3522727272727273e-05, + "loss": 0.0371, + "step": 10026 + }, + { + "epoch": 10.6, + "learning_rate": 2.3517441860465116e-05, + "loss": 0.0433, + "step": 10028 + }, + { + "epoch": 10.6, + "learning_rate": 2.351215644820296e-05, + "loss": 0.0562, + "step": 10030 + }, + { + "epoch": 10.6, + "learning_rate": 2.3506871035940805e-05, + "loss": 0.0157, + "step": 10032 + }, + { + "epoch": 10.61, + "learning_rate": 2.3501585623678648e-05, + "loss": 0.0435, + "step": 10034 + }, + { + "epoch": 10.61, + "learning_rate": 2.349630021141649e-05, + "loss": 0.0897, + "step": 10036 + }, + { + "epoch": 10.61, + "learning_rate": 2.3491014799154333e-05, + "loss": 0.184, + "step": 10038 + }, + { + "epoch": 10.61, + "learning_rate": 2.348572938689218e-05, + "loss": 0.2017, + "step": 10040 + }, + { + "epoch": 10.62, + "learning_rate": 2.348044397463002e-05, + "loss": 0.1572, + "step": 10042 + }, + { + "epoch": 10.62, + "learning_rate": 2.3475158562367868e-05, + "loss": 0.0323, + "step": 10044 + }, + { + "epoch": 10.62, + "learning_rate": 2.346987315010571e-05, + "loss": 0.1133, + "step": 10046 + }, + { + "epoch": 10.62, + "learning_rate": 2.3464587737843553e-05, + "loss": 0.1693, + "step": 10048 + }, + { + "epoch": 10.62, + "learning_rate": 2.34593023255814e-05, + "loss": 0.1112, + "step": 10050 + }, + { + "epoch": 10.63, + "learning_rate": 2.3454016913319242e-05, + "loss": 0.137, + "step": 10052 + }, + { + "epoch": 10.63, + "learning_rate": 2.3448731501057084e-05, + "loss": 0.0497, + "step": 10054 + }, + { + "epoch": 10.63, + "learning_rate": 2.3443446088794927e-05, + "loss": 0.1043, + "step": 10056 + }, + { + "epoch": 10.63, + "learning_rate": 2.343816067653277e-05, + "loss": 0.0784, + "step": 10058 + }, + { + "epoch": 10.63, + "learning_rate": 2.3432875264270616e-05, + "loss": 0.0274, + "step": 10060 + }, + { + "epoch": 10.64, + "learning_rate": 2.342758985200846e-05, + "loss": 0.0521, + "step": 10062 + }, + { + "epoch": 10.64, + "learning_rate": 2.34223044397463e-05, + "loss": 0.0672, + "step": 10064 + }, + { + "epoch": 10.64, + "learning_rate": 2.3417019027484144e-05, + "loss": 0.0268, + "step": 10066 + }, + { + "epoch": 10.64, + "learning_rate": 2.341173361522199e-05, + "loss": 0.0964, + "step": 10068 + }, + { + "epoch": 10.64, + "learning_rate": 2.3406448202959833e-05, + "loss": 0.0146, + "step": 10070 + }, + { + "epoch": 10.65, + "learning_rate": 2.3401162790697675e-05, + "loss": 0.052, + "step": 10072 + }, + { + "epoch": 10.65, + "learning_rate": 2.3395877378435518e-05, + "loss": 0.0886, + "step": 10074 + }, + { + "epoch": 10.65, + "learning_rate": 2.339059196617336e-05, + "loss": 0.0271, + "step": 10076 + }, + { + "epoch": 10.65, + "learning_rate": 2.3385306553911207e-05, + "loss": 0.0203, + "step": 10078 + }, + { + "epoch": 10.66, + "learning_rate": 2.338002114164905e-05, + "loss": 0.0978, + "step": 10080 + }, + { + "epoch": 10.66, + "learning_rate": 2.3374735729386892e-05, + "loss": 0.0224, + "step": 10082 + }, + { + "epoch": 10.66, + "learning_rate": 2.3369450317124735e-05, + "loss": 0.0761, + "step": 10084 + }, + { + "epoch": 10.66, + "learning_rate": 2.336416490486258e-05, + "loss": 0.0888, + "step": 10086 + }, + { + "epoch": 10.66, + "learning_rate": 2.3358879492600423e-05, + "loss": 0.0903, + "step": 10088 + }, + { + "epoch": 10.67, + "learning_rate": 2.3353594080338266e-05, + "loss": 0.1393, + "step": 10090 + }, + { + "epoch": 10.67, + "learning_rate": 2.3348308668076112e-05, + "loss": 0.0509, + "step": 10092 + }, + { + "epoch": 10.67, + "learning_rate": 2.3343023255813955e-05, + "loss": 0.0558, + "step": 10094 + }, + { + "epoch": 10.67, + "learning_rate": 2.3337737843551797e-05, + "loss": 0.0986, + "step": 10096 + }, + { + "epoch": 10.67, + "learning_rate": 2.3332452431289643e-05, + "loss": 0.0502, + "step": 10098 + }, + { + "epoch": 10.68, + "learning_rate": 2.3327167019027486e-05, + "loss": 0.0581, + "step": 10100 + }, + { + "epoch": 10.68, + "learning_rate": 2.332188160676533e-05, + "loss": 0.0655, + "step": 10102 + }, + { + "epoch": 10.68, + "learning_rate": 2.3316596194503175e-05, + "loss": 0.0805, + "step": 10104 + }, + { + "epoch": 10.68, + "learning_rate": 2.3311310782241018e-05, + "loss": 0.0529, + "step": 10106 + }, + { + "epoch": 10.68, + "learning_rate": 2.330602536997886e-05, + "loss": 0.1108, + "step": 10108 + }, + { + "epoch": 10.69, + "learning_rate": 2.3300739957716703e-05, + "loss": 0.0586, + "step": 10110 + }, + { + "epoch": 10.69, + "learning_rate": 2.3295454545454546e-05, + "loss": 0.0512, + "step": 10112 + }, + { + "epoch": 10.69, + "learning_rate": 2.329016913319239e-05, + "loss": 0.0463, + "step": 10114 + }, + { + "epoch": 10.69, + "learning_rate": 2.3284883720930234e-05, + "loss": 0.0569, + "step": 10116 + }, + { + "epoch": 10.7, + "learning_rate": 2.3279598308668077e-05, + "loss": 0.0277, + "step": 10118 + }, + { + "epoch": 10.7, + "learning_rate": 2.327431289640592e-05, + "loss": 0.0731, + "step": 10120 + }, + { + "epoch": 10.7, + "learning_rate": 2.3269027484143766e-05, + "loss": 0.0775, + "step": 10122 + }, + { + "epoch": 10.7, + "learning_rate": 2.326374207188161e-05, + "loss": 0.04, + "step": 10124 + }, + { + "epoch": 10.7, + "learning_rate": 2.325845665961945e-05, + "loss": 0.084, + "step": 10126 + }, + { + "epoch": 10.71, + "learning_rate": 2.3253171247357294e-05, + "loss": 0.056, + "step": 10128 + }, + { + "epoch": 10.71, + "learning_rate": 2.3247885835095136e-05, + "loss": 0.0541, + "step": 10130 + }, + { + "epoch": 10.71, + "learning_rate": 2.3242600422832982e-05, + "loss": 0.0266, + "step": 10132 + }, + { + "epoch": 10.71, + "learning_rate": 2.3237315010570825e-05, + "loss": 0.0888, + "step": 10134 + }, + { + "epoch": 10.71, + "learning_rate": 2.3232029598308668e-05, + "loss": 0.0318, + "step": 10136 + }, + { + "epoch": 10.72, + "learning_rate": 2.322674418604651e-05, + "loss": 0.0515, + "step": 10138 + }, + { + "epoch": 10.72, + "learning_rate": 2.3221458773784357e-05, + "loss": 0.0594, + "step": 10140 + }, + { + "epoch": 10.72, + "learning_rate": 2.32161733615222e-05, + "loss": 0.0935, + "step": 10142 + }, + { + "epoch": 10.72, + "learning_rate": 2.3210887949260042e-05, + "loss": 0.0298, + "step": 10144 + }, + { + "epoch": 10.73, + "learning_rate": 2.3205602536997888e-05, + "loss": 0.1176, + "step": 10146 + }, + { + "epoch": 10.73, + "learning_rate": 2.320031712473573e-05, + "loss": 0.0506, + "step": 10148 + }, + { + "epoch": 10.73, + "learning_rate": 2.3195031712473577e-05, + "loss": 0.0595, + "step": 10150 + }, + { + "epoch": 10.73, + "learning_rate": 2.318974630021142e-05, + "loss": 0.0413, + "step": 10152 + }, + { + "epoch": 10.73, + "learning_rate": 2.3184460887949262e-05, + "loss": 0.102, + "step": 10154 + }, + { + "epoch": 10.74, + "learning_rate": 2.3179175475687105e-05, + "loss": 0.0363, + "step": 10156 + }, + { + "epoch": 10.74, + "learning_rate": 2.317389006342495e-05, + "loss": 0.0616, + "step": 10158 + }, + { + "epoch": 10.74, + "learning_rate": 2.3168604651162793e-05, + "loss": 0.1399, + "step": 10160 + }, + { + "epoch": 10.74, + "learning_rate": 2.3163319238900636e-05, + "loss": 0.0462, + "step": 10162 + }, + { + "epoch": 10.74, + "learning_rate": 2.315803382663848e-05, + "loss": 0.0246, + "step": 10164 + }, + { + "epoch": 10.75, + "learning_rate": 2.315274841437632e-05, + "loss": 0.0721, + "step": 10166 + }, + { + "epoch": 10.75, + "learning_rate": 2.3147463002114167e-05, + "loss": 0.0664, + "step": 10168 + }, + { + "epoch": 10.75, + "learning_rate": 2.314217758985201e-05, + "loss": 0.0476, + "step": 10170 + }, + { + "epoch": 10.75, + "learning_rate": 2.3136892177589853e-05, + "loss": 0.0586, + "step": 10172 + }, + { + "epoch": 10.75, + "learning_rate": 2.3131606765327695e-05, + "loss": 0.0424, + "step": 10174 + }, + { + "epoch": 10.76, + "learning_rate": 2.312632135306554e-05, + "loss": 0.0312, + "step": 10176 + }, + { + "epoch": 10.76, + "learning_rate": 2.3121035940803384e-05, + "loss": 0.0746, + "step": 10178 + }, + { + "epoch": 10.76, + "learning_rate": 2.3115750528541227e-05, + "loss": 0.0858, + "step": 10180 + }, + { + "epoch": 10.76, + "learning_rate": 2.311046511627907e-05, + "loss": 0.0282, + "step": 10182 + }, + { + "epoch": 10.77, + "learning_rate": 2.3105179704016912e-05, + "loss": 0.0502, + "step": 10184 + }, + { + "epoch": 10.77, + "learning_rate": 2.3099894291754758e-05, + "loss": 0.0535, + "step": 10186 + }, + { + "epoch": 10.77, + "learning_rate": 2.30946088794926e-05, + "loss": 0.0495, + "step": 10188 + }, + { + "epoch": 10.77, + "learning_rate": 2.3089323467230444e-05, + "loss": 0.1278, + "step": 10190 + }, + { + "epoch": 10.77, + "learning_rate": 2.3084038054968286e-05, + "loss": 0.1004, + "step": 10192 + }, + { + "epoch": 10.78, + "learning_rate": 2.3078752642706132e-05, + "loss": 0.0488, + "step": 10194 + }, + { + "epoch": 10.78, + "learning_rate": 2.3073467230443975e-05, + "loss": 0.0498, + "step": 10196 + }, + { + "epoch": 10.78, + "learning_rate": 2.3068181818181818e-05, + "loss": 0.0595, + "step": 10198 + }, + { + "epoch": 10.78, + "learning_rate": 2.3062896405919664e-05, + "loss": 0.0699, + "step": 10200 + }, + { + "epoch": 10.78, + "learning_rate": 2.3057610993657506e-05, + "loss": 0.0941, + "step": 10202 + }, + { + "epoch": 10.79, + "learning_rate": 2.3052325581395352e-05, + "loss": 0.0409, + "step": 10204 + }, + { + "epoch": 10.79, + "learning_rate": 2.3047040169133195e-05, + "loss": 0.0915, + "step": 10206 + }, + { + "epoch": 10.79, + "learning_rate": 2.3041754756871038e-05, + "loss": 0.0957, + "step": 10208 + }, + { + "epoch": 10.79, + "learning_rate": 2.303646934460888e-05, + "loss": 0.0636, + "step": 10210 + }, + { + "epoch": 10.79, + "learning_rate": 2.3031183932346727e-05, + "loss": 0.0596, + "step": 10212 + }, + { + "epoch": 10.8, + "learning_rate": 2.302589852008457e-05, + "loss": 0.05, + "step": 10214 + }, + { + "epoch": 10.8, + "learning_rate": 2.3020613107822412e-05, + "loss": 0.0246, + "step": 10216 + }, + { + "epoch": 10.8, + "learning_rate": 2.3015327695560255e-05, + "loss": 0.142, + "step": 10218 + }, + { + "epoch": 10.8, + "learning_rate": 2.3010042283298097e-05, + "loss": 0.0424, + "step": 10220 + }, + { + "epoch": 10.81, + "learning_rate": 2.3004756871035943e-05, + "loss": 0.0676, + "step": 10222 + }, + { + "epoch": 10.81, + "learning_rate": 2.2999471458773786e-05, + "loss": 0.0551, + "step": 10224 + }, + { + "epoch": 10.81, + "learning_rate": 2.299418604651163e-05, + "loss": 0.0686, + "step": 10226 + }, + { + "epoch": 10.81, + "learning_rate": 2.298890063424947e-05, + "loss": 0.1185, + "step": 10228 + }, + { + "epoch": 10.81, + "learning_rate": 2.2983615221987317e-05, + "loss": 0.0675, + "step": 10230 + }, + { + "epoch": 10.82, + "learning_rate": 2.297832980972516e-05, + "loss": 0.0656, + "step": 10232 + }, + { + "epoch": 10.82, + "learning_rate": 2.2973044397463003e-05, + "loss": 0.0615, + "step": 10234 + }, + { + "epoch": 10.82, + "learning_rate": 2.2967758985200845e-05, + "loss": 0.0193, + "step": 10236 + }, + { + "epoch": 10.82, + "learning_rate": 2.2962473572938688e-05, + "loss": 0.1593, + "step": 10238 + }, + { + "epoch": 10.82, + "learning_rate": 2.2957188160676534e-05, + "loss": 0.1292, + "step": 10240 + }, + { + "epoch": 10.83, + "learning_rate": 2.2951902748414377e-05, + "loss": 0.0303, + "step": 10242 + }, + { + "epoch": 10.83, + "learning_rate": 2.294661733615222e-05, + "loss": 0.1374, + "step": 10244 + }, + { + "epoch": 10.83, + "learning_rate": 2.2941331923890062e-05, + "loss": 0.0827, + "step": 10246 + }, + { + "epoch": 10.83, + "learning_rate": 2.2936046511627908e-05, + "loss": 0.0772, + "step": 10248 + }, + { + "epoch": 10.84, + "learning_rate": 2.293076109936575e-05, + "loss": 0.0461, + "step": 10250 + }, + { + "epoch": 10.84, + "learning_rate": 2.2925475687103597e-05, + "loss": 0.0469, + "step": 10252 + }, + { + "epoch": 10.84, + "learning_rate": 2.292019027484144e-05, + "loss": 0.0446, + "step": 10254 + }, + { + "epoch": 10.84, + "learning_rate": 2.2914904862579282e-05, + "loss": 0.0505, + "step": 10256 + }, + { + "epoch": 10.84, + "learning_rate": 2.290961945031713e-05, + "loss": 0.017, + "step": 10258 + }, + { + "epoch": 10.85, + "learning_rate": 2.290433403805497e-05, + "loss": 0.0467, + "step": 10260 + }, + { + "epoch": 10.85, + "learning_rate": 2.2899048625792814e-05, + "loss": 0.136, + "step": 10262 + }, + { + "epoch": 10.85, + "learning_rate": 2.2893763213530656e-05, + "loss": 0.0482, + "step": 10264 + }, + { + "epoch": 10.85, + "learning_rate": 2.2888477801268502e-05, + "loss": 0.065, + "step": 10266 + }, + { + "epoch": 10.85, + "learning_rate": 2.2883192389006345e-05, + "loss": 0.0573, + "step": 10268 + }, + { + "epoch": 10.86, + "learning_rate": 2.2877906976744188e-05, + "loss": 0.0658, + "step": 10270 + }, + { + "epoch": 10.86, + "learning_rate": 2.287262156448203e-05, + "loss": 0.0434, + "step": 10272 + }, + { + "epoch": 10.86, + "learning_rate": 2.2867336152219873e-05, + "loss": 0.1148, + "step": 10274 + }, + { + "epoch": 10.86, + "learning_rate": 2.286205073995772e-05, + "loss": 0.0269, + "step": 10276 + }, + { + "epoch": 10.86, + "learning_rate": 2.2856765327695562e-05, + "loss": 0.06, + "step": 10278 + }, + { + "epoch": 10.87, + "learning_rate": 2.2851479915433404e-05, + "loss": 0.0524, + "step": 10280 + }, + { + "epoch": 10.87, + "learning_rate": 2.2846194503171247e-05, + "loss": 0.0526, + "step": 10282 + }, + { + "epoch": 10.87, + "learning_rate": 2.2840909090909093e-05, + "loss": 0.0903, + "step": 10284 + }, + { + "epoch": 10.87, + "learning_rate": 2.2835623678646936e-05, + "loss": 0.0244, + "step": 10286 + }, + { + "epoch": 10.88, + "learning_rate": 2.283033826638478e-05, + "loss": 0.0735, + "step": 10288 + }, + { + "epoch": 10.88, + "learning_rate": 2.282505285412262e-05, + "loss": 0.0204, + "step": 10290 + }, + { + "epoch": 10.88, + "learning_rate": 2.2819767441860464e-05, + "loss": 0.0459, + "step": 10292 + }, + { + "epoch": 10.88, + "learning_rate": 2.281448202959831e-05, + "loss": 0.0396, + "step": 10294 + }, + { + "epoch": 10.88, + "learning_rate": 2.2809196617336153e-05, + "loss": 0.0582, + "step": 10296 + }, + { + "epoch": 10.89, + "learning_rate": 2.2803911205073995e-05, + "loss": 0.0827, + "step": 10298 + }, + { + "epoch": 10.89, + "learning_rate": 2.2798625792811838e-05, + "loss": 0.2031, + "step": 10300 + }, + { + "epoch": 10.89, + "learning_rate": 2.2793340380549684e-05, + "loss": 0.0703, + "step": 10302 + }, + { + "epoch": 10.89, + "learning_rate": 2.2788054968287527e-05, + "loss": 0.0775, + "step": 10304 + }, + { + "epoch": 10.89, + "learning_rate": 2.2782769556025373e-05, + "loss": 0.0718, + "step": 10306 + }, + { + "epoch": 10.9, + "learning_rate": 2.2777484143763215e-05, + "loss": 0.1008, + "step": 10308 + }, + { + "epoch": 10.9, + "learning_rate": 2.2772198731501058e-05, + "loss": 0.0488, + "step": 10310 + }, + { + "epoch": 10.9, + "learning_rate": 2.2766913319238904e-05, + "loss": 0.091, + "step": 10312 + }, + { + "epoch": 10.9, + "learning_rate": 2.2761627906976747e-05, + "loss": 0.0164, + "step": 10314 + }, + { + "epoch": 10.9, + "learning_rate": 2.275634249471459e-05, + "loss": 0.0562, + "step": 10316 + }, + { + "epoch": 10.91, + "learning_rate": 2.2751057082452432e-05, + "loss": 0.1001, + "step": 10318 + }, + { + "epoch": 10.91, + "learning_rate": 2.2745771670190275e-05, + "loss": 0.0833, + "step": 10320 + }, + { + "epoch": 10.91, + "learning_rate": 2.274048625792812e-05, + "loss": 0.0365, + "step": 10322 + }, + { + "epoch": 10.91, + "learning_rate": 2.2735200845665964e-05, + "loss": 0.0233, + "step": 10324 + }, + { + "epoch": 10.92, + "learning_rate": 2.2729915433403806e-05, + "loss": 0.0548, + "step": 10326 + }, + { + "epoch": 10.92, + "learning_rate": 2.272463002114165e-05, + "loss": 0.082, + "step": 10328 + }, + { + "epoch": 10.92, + "learning_rate": 2.2719344608879495e-05, + "loss": 0.0264, + "step": 10330 + }, + { + "epoch": 10.92, + "learning_rate": 2.2714059196617338e-05, + "loss": 0.1164, + "step": 10332 + }, + { + "epoch": 10.92, + "learning_rate": 2.270877378435518e-05, + "loss": 0.1032, + "step": 10334 + }, + { + "epoch": 10.93, + "learning_rate": 2.2703488372093023e-05, + "loss": 0.0402, + "step": 10336 + }, + { + "epoch": 10.93, + "learning_rate": 2.2698202959830866e-05, + "loss": 0.0097, + "step": 10338 + }, + { + "epoch": 10.93, + "learning_rate": 2.2692917547568712e-05, + "loss": 0.0675, + "step": 10340 + }, + { + "epoch": 10.93, + "learning_rate": 2.2687632135306554e-05, + "loss": 0.1228, + "step": 10342 + }, + { + "epoch": 10.93, + "learning_rate": 2.2682346723044397e-05, + "loss": 0.1078, + "step": 10344 + }, + { + "epoch": 10.94, + "learning_rate": 2.267706131078224e-05, + "loss": 0.0542, + "step": 10346 + }, + { + "epoch": 10.94, + "learning_rate": 2.2671775898520086e-05, + "loss": 0.046, + "step": 10348 + }, + { + "epoch": 10.94, + "learning_rate": 2.266649048625793e-05, + "loss": 0.0806, + "step": 10350 + }, + { + "epoch": 10.94, + "learning_rate": 2.266120507399577e-05, + "loss": 0.0393, + "step": 10352 + }, + { + "epoch": 10.95, + "learning_rate": 2.2655919661733617e-05, + "loss": 0.0835, + "step": 10354 + }, + { + "epoch": 10.95, + "learning_rate": 2.265063424947146e-05, + "loss": 0.0646, + "step": 10356 + }, + { + "epoch": 10.95, + "learning_rate": 2.2645348837209303e-05, + "loss": 0.0517, + "step": 10358 + }, + { + "epoch": 10.95, + "learning_rate": 2.264006342494715e-05, + "loss": 0.0558, + "step": 10360 + }, + { + "epoch": 10.95, + "learning_rate": 2.263477801268499e-05, + "loss": 0.1552, + "step": 10362 + }, + { + "epoch": 10.96, + "learning_rate": 2.2629492600422834e-05, + "loss": 0.0535, + "step": 10364 + }, + { + "epoch": 10.96, + "learning_rate": 2.262420718816068e-05, + "loss": 0.0364, + "step": 10366 + }, + { + "epoch": 10.96, + "learning_rate": 2.2618921775898523e-05, + "loss": 0.0611, + "step": 10368 + }, + { + "epoch": 10.96, + "learning_rate": 2.2613636363636365e-05, + "loss": 0.0374, + "step": 10370 + }, + { + "epoch": 10.96, + "learning_rate": 2.2608350951374208e-05, + "loss": 0.0747, + "step": 10372 + }, + { + "epoch": 10.97, + "learning_rate": 2.260306553911205e-05, + "loss": 0.0561, + "step": 10374 + }, + { + "epoch": 10.97, + "learning_rate": 2.2597780126849897e-05, + "loss": 0.1045, + "step": 10376 + }, + { + "epoch": 10.97, + "learning_rate": 2.259249471458774e-05, + "loss": 0.0336, + "step": 10378 + }, + { + "epoch": 10.97, + "learning_rate": 2.2587209302325582e-05, + "loss": 0.1093, + "step": 10380 + }, + { + "epoch": 10.97, + "learning_rate": 2.2581923890063425e-05, + "loss": 0.1052, + "step": 10382 + }, + { + "epoch": 10.98, + "learning_rate": 2.257663847780127e-05, + "loss": 0.0394, + "step": 10384 + }, + { + "epoch": 10.98, + "learning_rate": 2.2571353065539113e-05, + "loss": 0.0291, + "step": 10386 + }, + { + "epoch": 10.98, + "learning_rate": 2.2566067653276956e-05, + "loss": 0.0769, + "step": 10388 + }, + { + "epoch": 10.98, + "learning_rate": 2.25607822410148e-05, + "loss": 0.1155, + "step": 10390 + }, + { + "epoch": 10.99, + "learning_rate": 2.255549682875264e-05, + "loss": 0.0829, + "step": 10392 + }, + { + "epoch": 10.99, + "learning_rate": 2.2550211416490488e-05, + "loss": 0.0843, + "step": 10394 + }, + { + "epoch": 10.99, + "learning_rate": 2.254492600422833e-05, + "loss": 0.0408, + "step": 10396 + }, + { + "epoch": 10.99, + "learning_rate": 2.2539640591966173e-05, + "loss": 0.1647, + "step": 10398 + }, + { + "epoch": 10.99, + "learning_rate": 2.2534355179704016e-05, + "loss": 0.0599, + "step": 10400 + }, + { + "epoch": 11.0, + "learning_rate": 2.252906976744186e-05, + "loss": 0.0424, + "step": 10402 + }, + { + "epoch": 11.0, + "learning_rate": 2.2523784355179704e-05, + "loss": 0.09, + "step": 10404 + }, + { + "epoch": 11.0, + "learning_rate": 2.2518498942917547e-05, + "loss": 0.0305, + "step": 10406 + }, + { + "epoch": 11.0, + "learning_rate": 2.2513213530655393e-05, + "loss": 0.0331, + "step": 10408 + }, + { + "epoch": 11.0, + "learning_rate": 2.2507928118393236e-05, + "loss": 0.0907, + "step": 10410 + }, + { + "epoch": 11.01, + "learning_rate": 2.2502642706131082e-05, + "loss": 0.0668, + "step": 10412 + }, + { + "epoch": 11.01, + "learning_rate": 2.2497357293868924e-05, + "loss": 0.1693, + "step": 10414 + }, + { + "epoch": 11.01, + "learning_rate": 2.2492071881606767e-05, + "loss": 0.0328, + "step": 10416 + }, + { + "epoch": 11.01, + "learning_rate": 2.248678646934461e-05, + "loss": 0.0536, + "step": 10418 + }, + { + "epoch": 11.01, + "learning_rate": 2.2481501057082456e-05, + "loss": 0.1321, + "step": 10420 + }, + { + "epoch": 11.02, + "learning_rate": 2.24762156448203e-05, + "loss": 0.0779, + "step": 10422 + }, + { + "epoch": 11.02, + "learning_rate": 2.247093023255814e-05, + "loss": 0.0142, + "step": 10424 + }, + { + "epoch": 11.02, + "learning_rate": 2.2465644820295984e-05, + "loss": 0.0312, + "step": 10426 + }, + { + "epoch": 11.02, + "learning_rate": 2.2460359408033827e-05, + "loss": 0.1316, + "step": 10428 + }, + { + "epoch": 11.03, + "learning_rate": 2.2455073995771673e-05, + "loss": 0.0545, + "step": 10430 + }, + { + "epoch": 11.03, + "learning_rate": 2.2449788583509515e-05, + "loss": 0.0712, + "step": 10432 + }, + { + "epoch": 11.03, + "learning_rate": 2.2444503171247358e-05, + "loss": 0.0942, + "step": 10434 + }, + { + "epoch": 11.03, + "learning_rate": 2.24392177589852e-05, + "loss": 0.0524, + "step": 10436 + }, + { + "epoch": 11.03, + "learning_rate": 2.2433932346723047e-05, + "loss": 0.0469, + "step": 10438 + }, + { + "epoch": 11.04, + "learning_rate": 2.242864693446089e-05, + "loss": 0.0925, + "step": 10440 + }, + { + "epoch": 11.04, + "learning_rate": 2.2423361522198732e-05, + "loss": 0.045, + "step": 10442 + }, + { + "epoch": 11.04, + "learning_rate": 2.2418076109936575e-05, + "loss": 0.0913, + "step": 10444 + }, + { + "epoch": 11.04, + "learning_rate": 2.2412790697674417e-05, + "loss": 0.0557, + "step": 10446 + }, + { + "epoch": 11.04, + "learning_rate": 2.2407505285412263e-05, + "loss": 0.036, + "step": 10448 + }, + { + "epoch": 11.05, + "learning_rate": 2.2402219873150106e-05, + "loss": 0.0623, + "step": 10450 + }, + { + "epoch": 11.05, + "learning_rate": 2.239693446088795e-05, + "loss": 0.0375, + "step": 10452 + }, + { + "epoch": 11.05, + "learning_rate": 2.239164904862579e-05, + "loss": 0.1366, + "step": 10454 + }, + { + "epoch": 11.05, + "learning_rate": 2.2386363636363637e-05, + "loss": 0.0478, + "step": 10456 + }, + { + "epoch": 11.05, + "learning_rate": 2.238107822410148e-05, + "loss": 0.0697, + "step": 10458 + }, + { + "epoch": 11.06, + "learning_rate": 2.2375792811839326e-05, + "loss": 0.0432, + "step": 10460 + }, + { + "epoch": 11.06, + "learning_rate": 2.237050739957717e-05, + "loss": 0.0327, + "step": 10462 + }, + { + "epoch": 11.06, + "learning_rate": 2.236522198731501e-05, + "loss": 0.0793, + "step": 10464 + }, + { + "epoch": 11.06, + "learning_rate": 2.2359936575052858e-05, + "loss": 0.0506, + "step": 10466 + }, + { + "epoch": 11.07, + "learning_rate": 2.23546511627907e-05, + "loss": 0.0744, + "step": 10468 + }, + { + "epoch": 11.07, + "learning_rate": 2.2349365750528543e-05, + "loss": 0.0531, + "step": 10470 + }, + { + "epoch": 11.07, + "learning_rate": 2.2344080338266386e-05, + "loss": 0.1243, + "step": 10472 + }, + { + "epoch": 11.07, + "learning_rate": 2.233879492600423e-05, + "loss": 0.0289, + "step": 10474 + }, + { + "epoch": 11.07, + "learning_rate": 2.2333509513742074e-05, + "loss": 0.0458, + "step": 10476 + }, + { + "epoch": 11.08, + "learning_rate": 2.2328224101479917e-05, + "loss": 0.0888, + "step": 10478 + }, + { + "epoch": 11.08, + "learning_rate": 2.232293868921776e-05, + "loss": 0.0729, + "step": 10480 + }, + { + "epoch": 11.08, + "learning_rate": 2.2317653276955602e-05, + "loss": 0.054, + "step": 10482 + }, + { + "epoch": 11.08, + "learning_rate": 2.231236786469345e-05, + "loss": 0.0521, + "step": 10484 + }, + { + "epoch": 11.08, + "learning_rate": 2.230708245243129e-05, + "loss": 0.0531, + "step": 10486 + }, + { + "epoch": 11.09, + "learning_rate": 2.2301797040169134e-05, + "loss": 0.0542, + "step": 10488 + }, + { + "epoch": 11.09, + "learning_rate": 2.2296511627906976e-05, + "loss": 0.0279, + "step": 10490 + }, + { + "epoch": 11.09, + "learning_rate": 2.2291226215644823e-05, + "loss": 0.0912, + "step": 10492 + }, + { + "epoch": 11.09, + "learning_rate": 2.2285940803382665e-05, + "loss": 0.0324, + "step": 10494 + }, + { + "epoch": 11.1, + "learning_rate": 2.2280655391120508e-05, + "loss": 0.0314, + "step": 10496 + }, + { + "epoch": 11.1, + "learning_rate": 2.227536997885835e-05, + "loss": 0.1059, + "step": 10498 + }, + { + "epoch": 11.1, + "learning_rate": 2.2270084566596193e-05, + "loss": 0.042, + "step": 10500 + }, + { + "epoch": 11.1, + "eval_cer": 0.04098033627814192, + "eval_loss": 0.8228550553321838, + "eval_runtime": 124.2745, + "eval_samples_per_second": 6.767, + "eval_steps_per_second": 0.853, + "step": 10500 + }, + { + "epoch": 11.1, + "learning_rate": 2.226479915433404e-05, + "loss": 0.0726, + "step": 10502 + }, + { + "epoch": 11.1, + "learning_rate": 2.2259513742071882e-05, + "loss": 0.0406, + "step": 10504 + }, + { + "epoch": 11.11, + "learning_rate": 2.2254228329809725e-05, + "loss": 0.0535, + "step": 10506 + }, + { + "epoch": 11.11, + "learning_rate": 2.2248942917547567e-05, + "loss": 0.0596, + "step": 10508 + }, + { + "epoch": 11.11, + "learning_rate": 2.2243657505285413e-05, + "loss": 0.0626, + "step": 10510 + }, + { + "epoch": 11.11, + "learning_rate": 2.2238372093023256e-05, + "loss": 0.0582, + "step": 10512 + }, + { + "epoch": 11.11, + "learning_rate": 2.2233086680761102e-05, + "loss": 0.1281, + "step": 10514 + }, + { + "epoch": 11.12, + "learning_rate": 2.2227801268498945e-05, + "loss": 0.0823, + "step": 10516 + }, + { + "epoch": 11.12, + "learning_rate": 2.2222515856236787e-05, + "loss": 0.0629, + "step": 10518 + }, + { + "epoch": 11.12, + "learning_rate": 2.2217230443974633e-05, + "loss": 0.0487, + "step": 10520 + }, + { + "epoch": 11.12, + "learning_rate": 2.2211945031712476e-05, + "loss": 0.0708, + "step": 10522 + }, + { + "epoch": 11.12, + "learning_rate": 2.220665961945032e-05, + "loss": 0.073, + "step": 10524 + }, + { + "epoch": 11.13, + "learning_rate": 2.220137420718816e-05, + "loss": 0.1439, + "step": 10526 + }, + { + "epoch": 11.13, + "learning_rate": 2.2196088794926008e-05, + "loss": 0.0255, + "step": 10528 + }, + { + "epoch": 11.13, + "learning_rate": 2.219080338266385e-05, + "loss": 0.0569, + "step": 10530 + }, + { + "epoch": 11.13, + "learning_rate": 2.2185517970401693e-05, + "loss": 0.0554, + "step": 10532 + }, + { + "epoch": 11.14, + "learning_rate": 2.2180232558139536e-05, + "loss": 0.0647, + "step": 10534 + }, + { + "epoch": 11.14, + "learning_rate": 2.2174947145877378e-05, + "loss": 0.0534, + "step": 10536 + }, + { + "epoch": 11.14, + "learning_rate": 2.2169661733615224e-05, + "loss": 0.0495, + "step": 10538 + }, + { + "epoch": 11.14, + "learning_rate": 2.2164376321353067e-05, + "loss": 0.128, + "step": 10540 + }, + { + "epoch": 11.14, + "learning_rate": 2.215909090909091e-05, + "loss": 0.1084, + "step": 10542 + }, + { + "epoch": 11.15, + "learning_rate": 2.2153805496828752e-05, + "loss": 0.0622, + "step": 10544 + }, + { + "epoch": 11.15, + "learning_rate": 2.21485200845666e-05, + "loss": 0.0289, + "step": 10546 + }, + { + "epoch": 11.15, + "learning_rate": 2.214323467230444e-05, + "loss": 0.0682, + "step": 10548 + }, + { + "epoch": 11.15, + "learning_rate": 2.2137949260042284e-05, + "loss": 0.0404, + "step": 10550 + }, + { + "epoch": 11.15, + "learning_rate": 2.2132663847780126e-05, + "loss": 0.0743, + "step": 10552 + }, + { + "epoch": 11.16, + "learning_rate": 2.212737843551797e-05, + "loss": 0.0205, + "step": 10554 + }, + { + "epoch": 11.16, + "learning_rate": 2.2122093023255815e-05, + "loss": 0.0295, + "step": 10556 + }, + { + "epoch": 11.16, + "learning_rate": 2.2116807610993658e-05, + "loss": 0.0505, + "step": 10558 + }, + { + "epoch": 11.16, + "learning_rate": 2.21115221987315e-05, + "loss": 0.0379, + "step": 10560 + }, + { + "epoch": 11.16, + "learning_rate": 2.2106236786469346e-05, + "loss": 0.0194, + "step": 10562 + }, + { + "epoch": 11.17, + "learning_rate": 2.210095137420719e-05, + "loss": 0.0343, + "step": 10564 + }, + { + "epoch": 11.17, + "learning_rate": 2.2095665961945032e-05, + "loss": 0.077, + "step": 10566 + }, + { + "epoch": 11.17, + "learning_rate": 2.2090380549682878e-05, + "loss": 0.0756, + "step": 10568 + }, + { + "epoch": 11.17, + "learning_rate": 2.208509513742072e-05, + "loss": 0.049, + "step": 10570 + }, + { + "epoch": 11.18, + "learning_rate": 2.2079809725158563e-05, + "loss": 0.0582, + "step": 10572 + }, + { + "epoch": 11.18, + "learning_rate": 2.207452431289641e-05, + "loss": 0.0347, + "step": 10574 + }, + { + "epoch": 11.18, + "learning_rate": 2.2069238900634252e-05, + "loss": 0.0778, + "step": 10576 + }, + { + "epoch": 11.18, + "learning_rate": 2.2063953488372095e-05, + "loss": 0.0305, + "step": 10578 + }, + { + "epoch": 11.18, + "learning_rate": 2.2058668076109937e-05, + "loss": 0.098, + "step": 10580 + }, + { + "epoch": 11.19, + "learning_rate": 2.2053382663847783e-05, + "loss": 0.0289, + "step": 10582 + }, + { + "epoch": 11.19, + "learning_rate": 2.2048097251585626e-05, + "loss": 0.0603, + "step": 10584 + }, + { + "epoch": 11.19, + "learning_rate": 2.204281183932347e-05, + "loss": 0.0724, + "step": 10586 + }, + { + "epoch": 11.19, + "learning_rate": 2.203752642706131e-05, + "loss": 0.0864, + "step": 10588 + }, + { + "epoch": 11.19, + "learning_rate": 2.2032241014799154e-05, + "loss": 0.1251, + "step": 10590 + }, + { + "epoch": 11.2, + "learning_rate": 2.2026955602537e-05, + "loss": 0.0161, + "step": 10592 + }, + { + "epoch": 11.2, + "learning_rate": 2.2021670190274843e-05, + "loss": 0.0633, + "step": 10594 + }, + { + "epoch": 11.2, + "learning_rate": 2.2016384778012685e-05, + "loss": 0.0709, + "step": 10596 + }, + { + "epoch": 11.2, + "learning_rate": 2.2011099365750528e-05, + "loss": 0.0971, + "step": 10598 + }, + { + "epoch": 11.21, + "learning_rate": 2.2005813953488374e-05, + "loss": 0.0582, + "step": 10600 + }, + { + "epoch": 11.21, + "learning_rate": 2.2000528541226217e-05, + "loss": 0.0354, + "step": 10602 + }, + { + "epoch": 11.21, + "learning_rate": 2.199524312896406e-05, + "loss": 0.0754, + "step": 10604 + }, + { + "epoch": 11.21, + "learning_rate": 2.1989957716701902e-05, + "loss": 0.0231, + "step": 10606 + }, + { + "epoch": 11.21, + "learning_rate": 2.1984672304439745e-05, + "loss": 0.0977, + "step": 10608 + }, + { + "epoch": 11.22, + "learning_rate": 2.197938689217759e-05, + "loss": 0.0853, + "step": 10610 + }, + { + "epoch": 11.22, + "learning_rate": 2.1974101479915434e-05, + "loss": 0.0304, + "step": 10612 + }, + { + "epoch": 11.22, + "learning_rate": 2.1968816067653276e-05, + "loss": 0.0818, + "step": 10614 + }, + { + "epoch": 11.22, + "learning_rate": 2.1963530655391122e-05, + "loss": 0.1034, + "step": 10616 + }, + { + "epoch": 11.22, + "learning_rate": 2.1958245243128965e-05, + "loss": 0.0743, + "step": 10618 + }, + { + "epoch": 11.23, + "learning_rate": 2.195295983086681e-05, + "loss": 0.0717, + "step": 10620 + }, + { + "epoch": 11.23, + "learning_rate": 2.1947674418604654e-05, + "loss": 0.0263, + "step": 10622 + }, + { + "epoch": 11.23, + "learning_rate": 2.1942389006342496e-05, + "loss": 0.065, + "step": 10624 + }, + { + "epoch": 11.23, + "learning_rate": 2.193710359408034e-05, + "loss": 0.1063, + "step": 10626 + }, + { + "epoch": 11.23, + "learning_rate": 2.1931818181818185e-05, + "loss": 0.0699, + "step": 10628 + }, + { + "epoch": 11.24, + "learning_rate": 2.1926532769556028e-05, + "loss": 0.0324, + "step": 10630 + }, + { + "epoch": 11.24, + "learning_rate": 2.192124735729387e-05, + "loss": 0.0283, + "step": 10632 + }, + { + "epoch": 11.24, + "learning_rate": 2.1915961945031713e-05, + "loss": 0.0296, + "step": 10634 + }, + { + "epoch": 11.24, + "learning_rate": 2.191067653276956e-05, + "loss": 0.0686, + "step": 10636 + }, + { + "epoch": 11.25, + "learning_rate": 2.1905391120507402e-05, + "loss": 0.0577, + "step": 10638 + }, + { + "epoch": 11.25, + "learning_rate": 2.1900105708245245e-05, + "loss": 0.0288, + "step": 10640 + }, + { + "epoch": 11.25, + "learning_rate": 2.1894820295983087e-05, + "loss": 0.1001, + "step": 10642 + }, + { + "epoch": 11.25, + "learning_rate": 2.188953488372093e-05, + "loss": 0.0518, + "step": 10644 + }, + { + "epoch": 11.25, + "learning_rate": 2.1884249471458776e-05, + "loss": 0.0222, + "step": 10646 + }, + { + "epoch": 11.26, + "learning_rate": 2.187896405919662e-05, + "loss": 0.0773, + "step": 10648 + }, + { + "epoch": 11.26, + "learning_rate": 2.187367864693446e-05, + "loss": 0.0045, + "step": 10650 + }, + { + "epoch": 11.26, + "learning_rate": 2.1868393234672304e-05, + "loss": 0.0173, + "step": 10652 + }, + { + "epoch": 11.26, + "learning_rate": 2.1863107822410147e-05, + "loss": 0.0316, + "step": 10654 + }, + { + "epoch": 11.26, + "learning_rate": 2.1857822410147993e-05, + "loss": 0.1172, + "step": 10656 + }, + { + "epoch": 11.27, + "learning_rate": 2.1852536997885835e-05, + "loss": 0.0507, + "step": 10658 + }, + { + "epoch": 11.27, + "learning_rate": 2.1847251585623678e-05, + "loss": 0.0951, + "step": 10660 + }, + { + "epoch": 11.27, + "learning_rate": 2.184196617336152e-05, + "loss": 0.0199, + "step": 10662 + }, + { + "epoch": 11.27, + "learning_rate": 2.1836680761099367e-05, + "loss": 0.0583, + "step": 10664 + }, + { + "epoch": 11.27, + "learning_rate": 2.183139534883721e-05, + "loss": 0.041, + "step": 10666 + }, + { + "epoch": 11.28, + "learning_rate": 2.1826109936575052e-05, + "loss": 0.0624, + "step": 10668 + }, + { + "epoch": 11.28, + "learning_rate": 2.1820824524312898e-05, + "loss": 0.0803, + "step": 10670 + }, + { + "epoch": 11.28, + "learning_rate": 2.181553911205074e-05, + "loss": 0.1347, + "step": 10672 + }, + { + "epoch": 11.28, + "learning_rate": 2.1810253699788587e-05, + "loss": 0.0725, + "step": 10674 + }, + { + "epoch": 11.29, + "learning_rate": 2.180496828752643e-05, + "loss": 0.0355, + "step": 10676 + }, + { + "epoch": 11.29, + "learning_rate": 2.1799682875264272e-05, + "loss": 0.0703, + "step": 10678 + }, + { + "epoch": 11.29, + "learning_rate": 2.1794397463002115e-05, + "loss": 0.0373, + "step": 10680 + }, + { + "epoch": 11.29, + "learning_rate": 2.178911205073996e-05, + "loss": 0.0535, + "step": 10682 + }, + { + "epoch": 11.29, + "learning_rate": 2.1783826638477804e-05, + "loss": 0.0635, + "step": 10684 + }, + { + "epoch": 11.3, + "learning_rate": 2.1778541226215646e-05, + "loss": 0.0548, + "step": 10686 + }, + { + "epoch": 11.3, + "learning_rate": 2.177325581395349e-05, + "loss": 0.0797, + "step": 10688 + }, + { + "epoch": 11.3, + "learning_rate": 2.176797040169133e-05, + "loss": 0.1646, + "step": 10690 + }, + { + "epoch": 11.3, + "learning_rate": 2.1762684989429178e-05, + "loss": 0.0078, + "step": 10692 + }, + { + "epoch": 11.3, + "learning_rate": 2.175739957716702e-05, + "loss": 0.039, + "step": 10694 + }, + { + "epoch": 11.31, + "learning_rate": 2.1752114164904863e-05, + "loss": 0.0683, + "step": 10696 + }, + { + "epoch": 11.31, + "learning_rate": 2.1746828752642706e-05, + "loss": 0.0309, + "step": 10698 + }, + { + "epoch": 11.31, + "learning_rate": 2.1741543340380552e-05, + "loss": 0.0709, + "step": 10700 + }, + { + "epoch": 11.31, + "learning_rate": 2.1736257928118394e-05, + "loss": 0.0945, + "step": 10702 + }, + { + "epoch": 11.32, + "learning_rate": 2.1730972515856237e-05, + "loss": 0.0391, + "step": 10704 + }, + { + "epoch": 11.32, + "learning_rate": 2.172568710359408e-05, + "loss": 0.0402, + "step": 10706 + }, + { + "epoch": 11.32, + "learning_rate": 2.1720401691331922e-05, + "loss": 0.1571, + "step": 10708 + }, + { + "epoch": 11.32, + "learning_rate": 2.171511627906977e-05, + "loss": 0.0544, + "step": 10710 + }, + { + "epoch": 11.32, + "learning_rate": 2.170983086680761e-05, + "loss": 0.0227, + "step": 10712 + }, + { + "epoch": 11.33, + "learning_rate": 2.1704545454545454e-05, + "loss": 0.0334, + "step": 10714 + }, + { + "epoch": 11.33, + "learning_rate": 2.1699260042283297e-05, + "loss": 0.041, + "step": 10716 + }, + { + "epoch": 11.33, + "learning_rate": 2.1693974630021143e-05, + "loss": 0.108, + "step": 10718 + }, + { + "epoch": 11.33, + "learning_rate": 2.1688689217758985e-05, + "loss": 0.0199, + "step": 10720 + }, + { + "epoch": 11.33, + "learning_rate": 2.168340380549683e-05, + "loss": 0.0907, + "step": 10722 + }, + { + "epoch": 11.34, + "learning_rate": 2.1678118393234674e-05, + "loss": 0.0606, + "step": 10724 + }, + { + "epoch": 11.34, + "learning_rate": 2.1672832980972517e-05, + "loss": 0.0314, + "step": 10726 + }, + { + "epoch": 11.34, + "learning_rate": 2.1667547568710363e-05, + "loss": 0.0559, + "step": 10728 + }, + { + "epoch": 11.34, + "learning_rate": 2.1662262156448205e-05, + "loss": 0.0155, + "step": 10730 + }, + { + "epoch": 11.34, + "learning_rate": 2.1656976744186048e-05, + "loss": 0.068, + "step": 10732 + }, + { + "epoch": 11.35, + "learning_rate": 2.165169133192389e-05, + "loss": 0.037, + "step": 10734 + }, + { + "epoch": 11.35, + "learning_rate": 2.1646405919661737e-05, + "loss": 0.0204, + "step": 10736 + }, + { + "epoch": 11.35, + "learning_rate": 2.164112050739958e-05, + "loss": 0.0349, + "step": 10738 + }, + { + "epoch": 11.35, + "learning_rate": 2.1635835095137422e-05, + "loss": 0.0516, + "step": 10740 + }, + { + "epoch": 11.36, + "learning_rate": 2.1630549682875265e-05, + "loss": 0.0482, + "step": 10742 + }, + { + "epoch": 11.36, + "learning_rate": 2.1625264270613107e-05, + "loss": 0.022, + "step": 10744 + }, + { + "epoch": 11.36, + "learning_rate": 2.1619978858350954e-05, + "loss": 0.0717, + "step": 10746 + }, + { + "epoch": 11.36, + "learning_rate": 2.1614693446088796e-05, + "loss": 0.0435, + "step": 10748 + }, + { + "epoch": 11.36, + "learning_rate": 2.160940803382664e-05, + "loss": 0.0449, + "step": 10750 + }, + { + "epoch": 11.37, + "learning_rate": 2.160412262156448e-05, + "loss": 0.0319, + "step": 10752 + }, + { + "epoch": 11.37, + "learning_rate": 2.1598837209302328e-05, + "loss": 0.0255, + "step": 10754 + }, + { + "epoch": 11.37, + "learning_rate": 2.159355179704017e-05, + "loss": 0.1095, + "step": 10756 + }, + { + "epoch": 11.37, + "learning_rate": 2.1588266384778013e-05, + "loss": 0.0939, + "step": 10758 + }, + { + "epoch": 11.37, + "learning_rate": 2.1582980972515856e-05, + "loss": 0.055, + "step": 10760 + }, + { + "epoch": 11.38, + "learning_rate": 2.15776955602537e-05, + "loss": 0.1075, + "step": 10762 + }, + { + "epoch": 11.38, + "learning_rate": 2.1572410147991544e-05, + "loss": 0.0374, + "step": 10764 + }, + { + "epoch": 11.38, + "learning_rate": 2.1567124735729387e-05, + "loss": 0.0753, + "step": 10766 + }, + { + "epoch": 11.38, + "learning_rate": 2.156183932346723e-05, + "loss": 0.1265, + "step": 10768 + }, + { + "epoch": 11.38, + "learning_rate": 2.1556553911205072e-05, + "loss": 0.0542, + "step": 10770 + }, + { + "epoch": 11.39, + "learning_rate": 2.155126849894292e-05, + "loss": 0.0173, + "step": 10772 + }, + { + "epoch": 11.39, + "learning_rate": 2.154598308668076e-05, + "loss": 0.0681, + "step": 10774 + }, + { + "epoch": 11.39, + "learning_rate": 2.1540697674418607e-05, + "loss": 0.0296, + "step": 10776 + }, + { + "epoch": 11.39, + "learning_rate": 2.153541226215645e-05, + "loss": 0.0658, + "step": 10778 + }, + { + "epoch": 11.4, + "learning_rate": 2.1530126849894293e-05, + "loss": 0.1054, + "step": 10780 + }, + { + "epoch": 11.4, + "learning_rate": 2.152484143763214e-05, + "loss": 0.1244, + "step": 10782 + }, + { + "epoch": 11.4, + "learning_rate": 2.151955602536998e-05, + "loss": 0.026, + "step": 10784 + }, + { + "epoch": 11.4, + "learning_rate": 2.1514270613107824e-05, + "loss": 0.0251, + "step": 10786 + }, + { + "epoch": 11.4, + "learning_rate": 2.1508985200845667e-05, + "loss": 0.0284, + "step": 10788 + }, + { + "epoch": 11.41, + "learning_rate": 2.1503699788583513e-05, + "loss": 0.0288, + "step": 10790 + }, + { + "epoch": 11.41, + "learning_rate": 2.1498414376321355e-05, + "loss": 0.0914, + "step": 10792 + }, + { + "epoch": 11.41, + "learning_rate": 2.1493128964059198e-05, + "loss": 0.0433, + "step": 10794 + }, + { + "epoch": 11.41, + "learning_rate": 2.148784355179704e-05, + "loss": 0.0501, + "step": 10796 + }, + { + "epoch": 11.41, + "learning_rate": 2.1482558139534883e-05, + "loss": 0.0046, + "step": 10798 + }, + { + "epoch": 11.42, + "learning_rate": 2.147727272727273e-05, + "loss": 0.0498, + "step": 10800 + }, + { + "epoch": 11.42, + "learning_rate": 2.1471987315010572e-05, + "loss": 0.0928, + "step": 10802 + }, + { + "epoch": 11.42, + "learning_rate": 2.1466701902748415e-05, + "loss": 0.0529, + "step": 10804 + }, + { + "epoch": 11.42, + "learning_rate": 2.1461416490486257e-05, + "loss": 0.0392, + "step": 10806 + }, + { + "epoch": 11.42, + "learning_rate": 2.1456131078224103e-05, + "loss": 0.0596, + "step": 10808 + }, + { + "epoch": 11.43, + "learning_rate": 2.1450845665961946e-05, + "loss": 0.0378, + "step": 10810 + }, + { + "epoch": 11.43, + "learning_rate": 2.144556025369979e-05, + "loss": 0.0409, + "step": 10812 + }, + { + "epoch": 11.43, + "learning_rate": 2.144027484143763e-05, + "loss": 0.0267, + "step": 10814 + }, + { + "epoch": 11.43, + "learning_rate": 2.1434989429175474e-05, + "loss": 0.0452, + "step": 10816 + }, + { + "epoch": 11.44, + "learning_rate": 2.142970401691332e-05, + "loss": 0.0531, + "step": 10818 + }, + { + "epoch": 11.44, + "learning_rate": 2.1424418604651163e-05, + "loss": 0.0386, + "step": 10820 + }, + { + "epoch": 11.44, + "learning_rate": 2.1419133192389006e-05, + "loss": 0.063, + "step": 10822 + }, + { + "epoch": 11.44, + "learning_rate": 2.141384778012685e-05, + "loss": 0.0325, + "step": 10824 + }, + { + "epoch": 11.44, + "learning_rate": 2.1408562367864694e-05, + "loss": 0.0868, + "step": 10826 + }, + { + "epoch": 11.45, + "learning_rate": 2.1403276955602537e-05, + "loss": 0.0804, + "step": 10828 + }, + { + "epoch": 11.45, + "learning_rate": 2.1397991543340383e-05, + "loss": 0.0758, + "step": 10830 + }, + { + "epoch": 11.45, + "learning_rate": 2.1392706131078226e-05, + "loss": 0.1376, + "step": 10832 + }, + { + "epoch": 11.45, + "learning_rate": 2.138742071881607e-05, + "loss": 0.0455, + "step": 10834 + }, + { + "epoch": 11.45, + "learning_rate": 2.1382135306553914e-05, + "loss": 0.0557, + "step": 10836 + }, + { + "epoch": 11.46, + "learning_rate": 2.1376849894291757e-05, + "loss": 0.004, + "step": 10838 + }, + { + "epoch": 11.46, + "learning_rate": 2.13715644820296e-05, + "loss": 0.0539, + "step": 10840 + }, + { + "epoch": 11.46, + "learning_rate": 2.1366279069767442e-05, + "loss": 0.1415, + "step": 10842 + }, + { + "epoch": 11.46, + "learning_rate": 2.136099365750529e-05, + "loss": 0.0817, + "step": 10844 + }, + { + "epoch": 11.47, + "learning_rate": 2.135570824524313e-05, + "loss": 0.0372, + "step": 10846 + }, + { + "epoch": 11.47, + "learning_rate": 2.1350422832980974e-05, + "loss": 0.0595, + "step": 10848 + }, + { + "epoch": 11.47, + "learning_rate": 2.1345137420718817e-05, + "loss": 0.04, + "step": 10850 + }, + { + "epoch": 11.47, + "learning_rate": 2.133985200845666e-05, + "loss": 0.0356, + "step": 10852 + }, + { + "epoch": 11.47, + "learning_rate": 2.1334566596194505e-05, + "loss": 0.0702, + "step": 10854 + }, + { + "epoch": 11.48, + "learning_rate": 2.1329281183932348e-05, + "loss": 0.0667, + "step": 10856 + }, + { + "epoch": 11.48, + "learning_rate": 2.132399577167019e-05, + "loss": 0.0514, + "step": 10858 + }, + { + "epoch": 11.48, + "learning_rate": 2.1318710359408033e-05, + "loss": 0.1231, + "step": 10860 + }, + { + "epoch": 11.48, + "learning_rate": 2.131342494714588e-05, + "loss": 0.0405, + "step": 10862 + }, + { + "epoch": 11.48, + "learning_rate": 2.1308139534883722e-05, + "loss": 0.0731, + "step": 10864 + }, + { + "epoch": 11.49, + "learning_rate": 2.1302854122621565e-05, + "loss": 0.0312, + "step": 10866 + }, + { + "epoch": 11.49, + "learning_rate": 2.1297568710359407e-05, + "loss": 0.0644, + "step": 10868 + }, + { + "epoch": 11.49, + "learning_rate": 2.129228329809725e-05, + "loss": 0.0827, + "step": 10870 + }, + { + "epoch": 11.49, + "learning_rate": 2.1286997885835096e-05, + "loss": 0.0253, + "step": 10872 + }, + { + "epoch": 11.49, + "learning_rate": 2.128171247357294e-05, + "loss": 0.0523, + "step": 10874 + }, + { + "epoch": 11.5, + "learning_rate": 2.127642706131078e-05, + "loss": 0.0856, + "step": 10876 + }, + { + "epoch": 11.5, + "learning_rate": 2.1271141649048627e-05, + "loss": 0.065, + "step": 10878 + }, + { + "epoch": 11.5, + "learning_rate": 2.126585623678647e-05, + "loss": 0.0138, + "step": 10880 + }, + { + "epoch": 11.5, + "learning_rate": 2.1260570824524316e-05, + "loss": 0.0285, + "step": 10882 + }, + { + "epoch": 11.51, + "learning_rate": 2.125528541226216e-05, + "loss": 0.0584, + "step": 10884 + }, + { + "epoch": 11.51, + "learning_rate": 2.125e-05, + "loss": 0.0262, + "step": 10886 + }, + { + "epoch": 11.51, + "learning_rate": 2.1244714587737844e-05, + "loss": 0.0238, + "step": 10888 + }, + { + "epoch": 11.51, + "learning_rate": 2.123942917547569e-05, + "loss": 0.0802, + "step": 10890 + }, + { + "epoch": 11.51, + "learning_rate": 2.1234143763213533e-05, + "loss": 0.0672, + "step": 10892 + }, + { + "epoch": 11.52, + "learning_rate": 2.1228858350951376e-05, + "loss": 0.0377, + "step": 10894 + }, + { + "epoch": 11.52, + "learning_rate": 2.1223572938689218e-05, + "loss": 0.028, + "step": 10896 + }, + { + "epoch": 11.52, + "learning_rate": 2.1218287526427064e-05, + "loss": 0.0638, + "step": 10898 + }, + { + "epoch": 11.52, + "learning_rate": 2.1213002114164907e-05, + "loss": 0.0875, + "step": 10900 + }, + { + "epoch": 11.52, + "learning_rate": 2.120771670190275e-05, + "loss": 0.0416, + "step": 10902 + }, + { + "epoch": 11.53, + "learning_rate": 2.1202431289640592e-05, + "loss": 0.006, + "step": 10904 + }, + { + "epoch": 11.53, + "learning_rate": 2.1197145877378435e-05, + "loss": 0.0266, + "step": 10906 + }, + { + "epoch": 11.53, + "learning_rate": 2.119186046511628e-05, + "loss": 0.0754, + "step": 10908 + }, + { + "epoch": 11.53, + "learning_rate": 2.1186575052854124e-05, + "loss": 0.0288, + "step": 10910 + }, + { + "epoch": 11.53, + "learning_rate": 2.1181289640591966e-05, + "loss": 0.0511, + "step": 10912 + }, + { + "epoch": 11.54, + "learning_rate": 2.117600422832981e-05, + "loss": 0.0647, + "step": 10914 + }, + { + "epoch": 11.54, + "learning_rate": 2.1170718816067655e-05, + "loss": 0.0274, + "step": 10916 + }, + { + "epoch": 11.54, + "learning_rate": 2.1165433403805498e-05, + "loss": 0.0344, + "step": 10918 + }, + { + "epoch": 11.54, + "learning_rate": 2.116014799154334e-05, + "loss": 0.0904, + "step": 10920 + }, + { + "epoch": 11.55, + "learning_rate": 2.1154862579281183e-05, + "loss": 0.0463, + "step": 10922 + }, + { + "epoch": 11.55, + "learning_rate": 2.1149577167019026e-05, + "loss": 0.139, + "step": 10924 + }, + { + "epoch": 11.55, + "learning_rate": 2.1144291754756872e-05, + "loss": 0.0338, + "step": 10926 + }, + { + "epoch": 11.55, + "learning_rate": 2.1139006342494715e-05, + "loss": 0.0309, + "step": 10928 + }, + { + "epoch": 11.55, + "learning_rate": 2.113372093023256e-05, + "loss": 0.0738, + "step": 10930 + }, + { + "epoch": 11.56, + "learning_rate": 2.1128435517970403e-05, + "loss": 0.0942, + "step": 10932 + }, + { + "epoch": 11.56, + "learning_rate": 2.1123150105708246e-05, + "loss": 0.0721, + "step": 10934 + }, + { + "epoch": 11.56, + "learning_rate": 2.1117864693446092e-05, + "loss": 0.1223, + "step": 10936 + }, + { + "epoch": 11.56, + "learning_rate": 2.1112579281183935e-05, + "loss": 0.0341, + "step": 10938 + }, + { + "epoch": 11.56, + "learning_rate": 2.1107293868921777e-05, + "loss": 0.0784, + "step": 10940 + }, + { + "epoch": 11.57, + "learning_rate": 2.110200845665962e-05, + "loss": 0.0329, + "step": 10942 + }, + { + "epoch": 11.57, + "learning_rate": 2.1096723044397466e-05, + "loss": 0.0208, + "step": 10944 + }, + { + "epoch": 11.57, + "learning_rate": 2.109143763213531e-05, + "loss": 0.0544, + "step": 10946 + }, + { + "epoch": 11.57, + "learning_rate": 2.108615221987315e-05, + "loss": 0.0241, + "step": 10948 + }, + { + "epoch": 11.58, + "learning_rate": 2.1080866807610994e-05, + "loss": 0.0443, + "step": 10950 + }, + { + "epoch": 11.58, + "learning_rate": 2.107558139534884e-05, + "loss": 0.0468, + "step": 10952 + }, + { + "epoch": 11.58, + "learning_rate": 2.1070295983086683e-05, + "loss": 0.0101, + "step": 10954 + }, + { + "epoch": 11.58, + "learning_rate": 2.1065010570824526e-05, + "loss": 0.132, + "step": 10956 + }, + { + "epoch": 11.58, + "learning_rate": 2.1059725158562368e-05, + "loss": 0.0627, + "step": 10958 + }, + { + "epoch": 11.59, + "learning_rate": 2.105443974630021e-05, + "loss": 0.033, + "step": 10960 + }, + { + "epoch": 11.59, + "learning_rate": 2.1049154334038057e-05, + "loss": 0.0594, + "step": 10962 + }, + { + "epoch": 11.59, + "learning_rate": 2.10438689217759e-05, + "loss": 0.0433, + "step": 10964 + }, + { + "epoch": 11.59, + "learning_rate": 2.1038583509513742e-05, + "loss": 0.0288, + "step": 10966 + }, + { + "epoch": 11.59, + "learning_rate": 2.1033298097251585e-05, + "loss": 0.0383, + "step": 10968 + }, + { + "epoch": 11.6, + "learning_rate": 2.102801268498943e-05, + "loss": 0.118, + "step": 10970 + }, + { + "epoch": 11.6, + "learning_rate": 2.1022727272727274e-05, + "loss": 0.0767, + "step": 10972 + }, + { + "epoch": 11.6, + "learning_rate": 2.1017441860465116e-05, + "loss": 0.1311, + "step": 10974 + }, + { + "epoch": 11.6, + "learning_rate": 2.101215644820296e-05, + "loss": 0.0383, + "step": 10976 + }, + { + "epoch": 11.6, + "learning_rate": 2.10068710359408e-05, + "loss": 0.0315, + "step": 10978 + }, + { + "epoch": 11.61, + "learning_rate": 2.1001585623678648e-05, + "loss": 0.1211, + "step": 10980 + }, + { + "epoch": 11.61, + "learning_rate": 2.099630021141649e-05, + "loss": 0.1321, + "step": 10982 + }, + { + "epoch": 11.61, + "learning_rate": 2.0991014799154336e-05, + "loss": 0.0749, + "step": 10984 + }, + { + "epoch": 11.61, + "learning_rate": 2.098572938689218e-05, + "loss": 0.0581, + "step": 10986 + }, + { + "epoch": 11.62, + "learning_rate": 2.0980443974630022e-05, + "loss": 0.0501, + "step": 10988 + }, + { + "epoch": 11.62, + "learning_rate": 2.0975158562367868e-05, + "loss": 0.1586, + "step": 10990 + }, + { + "epoch": 11.62, + "learning_rate": 2.096987315010571e-05, + "loss": 0.1585, + "step": 10992 + }, + { + "epoch": 11.62, + "learning_rate": 2.0964587737843553e-05, + "loss": 0.016, + "step": 10994 + }, + { + "epoch": 11.62, + "learning_rate": 2.0959302325581396e-05, + "loss": 0.0378, + "step": 10996 + }, + { + "epoch": 11.63, + "learning_rate": 2.0954016913319242e-05, + "loss": 0.0257, + "step": 10998 + }, + { + "epoch": 11.63, + "learning_rate": 2.0948731501057085e-05, + "loss": 0.0754, + "step": 11000 + }, + { + "epoch": 11.63, + "eval_cer": 0.027586206896551724, + "eval_loss": 0.871033787727356, + "eval_runtime": 128.8039, + "eval_samples_per_second": 6.529, + "eval_steps_per_second": 0.823, + "step": 11000 + }, + { + "epoch": 11.63, + "learning_rate": 2.0943446088794927e-05, + "loss": 0.0617, + "step": 11002 + }, + { + "epoch": 11.63, + "learning_rate": 2.093816067653277e-05, + "loss": 0.0261, + "step": 11004 + }, + { + "epoch": 11.63, + "learning_rate": 2.0932875264270613e-05, + "loss": 0.0115, + "step": 11006 + }, + { + "epoch": 11.64, + "learning_rate": 2.092758985200846e-05, + "loss": 0.0594, + "step": 11008 + }, + { + "epoch": 11.64, + "learning_rate": 2.09223044397463e-05, + "loss": 0.0841, + "step": 11010 + }, + { + "epoch": 11.64, + "learning_rate": 2.0917019027484144e-05, + "loss": 0.0745, + "step": 11012 + }, + { + "epoch": 11.64, + "learning_rate": 2.0911733615221987e-05, + "loss": 0.0111, + "step": 11014 + }, + { + "epoch": 11.64, + "learning_rate": 2.0906448202959833e-05, + "loss": 0.0268, + "step": 11016 + }, + { + "epoch": 11.65, + "learning_rate": 2.0901162790697675e-05, + "loss": 0.0558, + "step": 11018 + }, + { + "epoch": 11.65, + "learning_rate": 2.0895877378435518e-05, + "loss": 0.0958, + "step": 11020 + }, + { + "epoch": 11.65, + "learning_rate": 2.089059196617336e-05, + "loss": 0.0142, + "step": 11022 + }, + { + "epoch": 11.65, + "learning_rate": 2.0885306553911203e-05, + "loss": 0.013, + "step": 11024 + }, + { + "epoch": 11.66, + "learning_rate": 2.088002114164905e-05, + "loss": 0.0168, + "step": 11026 + }, + { + "epoch": 11.66, + "learning_rate": 2.0874735729386892e-05, + "loss": 0.0582, + "step": 11028 + }, + { + "epoch": 11.66, + "learning_rate": 2.0869450317124735e-05, + "loss": 0.1415, + "step": 11030 + }, + { + "epoch": 11.66, + "learning_rate": 2.086416490486258e-05, + "loss": 0.0226, + "step": 11032 + }, + { + "epoch": 11.66, + "learning_rate": 2.0858879492600424e-05, + "loss": 0.0594, + "step": 11034 + }, + { + "epoch": 11.67, + "learning_rate": 2.0853594080338266e-05, + "loss": 0.0904, + "step": 11036 + }, + { + "epoch": 11.67, + "learning_rate": 2.0848308668076112e-05, + "loss": 0.0244, + "step": 11038 + }, + { + "epoch": 11.67, + "learning_rate": 2.0843023255813955e-05, + "loss": 0.0395, + "step": 11040 + }, + { + "epoch": 11.67, + "learning_rate": 2.0837737843551798e-05, + "loss": 0.0772, + "step": 11042 + }, + { + "epoch": 11.67, + "learning_rate": 2.0832452431289644e-05, + "loss": 0.0739, + "step": 11044 + }, + { + "epoch": 11.68, + "learning_rate": 2.0827167019027486e-05, + "loss": 0.0474, + "step": 11046 + }, + { + "epoch": 11.68, + "learning_rate": 2.082188160676533e-05, + "loss": 0.0723, + "step": 11048 + }, + { + "epoch": 11.68, + "learning_rate": 2.0816596194503172e-05, + "loss": 0.0151, + "step": 11050 + }, + { + "epoch": 11.68, + "learning_rate": 2.0811310782241018e-05, + "loss": 0.024, + "step": 11052 + }, + { + "epoch": 11.68, + "learning_rate": 2.080602536997886e-05, + "loss": 0.0285, + "step": 11054 + }, + { + "epoch": 11.69, + "learning_rate": 2.0800739957716703e-05, + "loss": 0.146, + "step": 11056 + }, + { + "epoch": 11.69, + "learning_rate": 2.0795454545454546e-05, + "loss": 0.056, + "step": 11058 + }, + { + "epoch": 11.69, + "learning_rate": 2.079016913319239e-05, + "loss": 0.0106, + "step": 11060 + }, + { + "epoch": 11.69, + "learning_rate": 2.0784883720930235e-05, + "loss": 0.0663, + "step": 11062 + }, + { + "epoch": 11.7, + "learning_rate": 2.0779598308668077e-05, + "loss": 0.1001, + "step": 11064 + }, + { + "epoch": 11.7, + "learning_rate": 2.077431289640592e-05, + "loss": 0.0979, + "step": 11066 + }, + { + "epoch": 11.7, + "learning_rate": 2.0769027484143763e-05, + "loss": 0.0581, + "step": 11068 + }, + { + "epoch": 11.7, + "learning_rate": 2.076374207188161e-05, + "loss": 0.0471, + "step": 11070 + }, + { + "epoch": 11.7, + "learning_rate": 2.075845665961945e-05, + "loss": 0.0102, + "step": 11072 + }, + { + "epoch": 11.71, + "learning_rate": 2.0753171247357294e-05, + "loss": 0.0279, + "step": 11074 + }, + { + "epoch": 11.71, + "learning_rate": 2.0747885835095137e-05, + "loss": 0.0497, + "step": 11076 + }, + { + "epoch": 11.71, + "learning_rate": 2.074260042283298e-05, + "loss": 0.0217, + "step": 11078 + }, + { + "epoch": 11.71, + "learning_rate": 2.0737315010570825e-05, + "loss": 0.0711, + "step": 11080 + }, + { + "epoch": 11.71, + "learning_rate": 2.0732029598308668e-05, + "loss": 0.0176, + "step": 11082 + }, + { + "epoch": 11.72, + "learning_rate": 2.072674418604651e-05, + "loss": 0.116, + "step": 11084 + }, + { + "epoch": 11.72, + "learning_rate": 2.0721458773784357e-05, + "loss": 0.0255, + "step": 11086 + }, + { + "epoch": 11.72, + "learning_rate": 2.07161733615222e-05, + "loss": 0.0766, + "step": 11088 + }, + { + "epoch": 11.72, + "learning_rate": 2.0710887949260045e-05, + "loss": 0.0453, + "step": 11090 + }, + { + "epoch": 11.73, + "learning_rate": 2.0705602536997888e-05, + "loss": 0.0803, + "step": 11092 + }, + { + "epoch": 11.73, + "learning_rate": 2.070031712473573e-05, + "loss": 0.0871, + "step": 11094 + }, + { + "epoch": 11.73, + "learning_rate": 2.0695031712473573e-05, + "loss": 0.0988, + "step": 11096 + }, + { + "epoch": 11.73, + "learning_rate": 2.068974630021142e-05, + "loss": 0.0257, + "step": 11098 + }, + { + "epoch": 11.73, + "learning_rate": 2.0684460887949262e-05, + "loss": 0.1108, + "step": 11100 + }, + { + "epoch": 11.74, + "learning_rate": 2.0679175475687105e-05, + "loss": 0.0522, + "step": 11102 + }, + { + "epoch": 11.74, + "learning_rate": 2.0673890063424948e-05, + "loss": 0.0438, + "step": 11104 + }, + { + "epoch": 11.74, + "learning_rate": 2.0668604651162794e-05, + "loss": 0.0334, + "step": 11106 + }, + { + "epoch": 11.74, + "learning_rate": 2.0663319238900636e-05, + "loss": 0.0322, + "step": 11108 + }, + { + "epoch": 11.74, + "learning_rate": 2.065803382663848e-05, + "loss": 0.0284, + "step": 11110 + }, + { + "epoch": 11.75, + "learning_rate": 2.065274841437632e-05, + "loss": 0.0106, + "step": 11112 + }, + { + "epoch": 11.75, + "learning_rate": 2.0647463002114164e-05, + "loss": 0.0164, + "step": 11114 + }, + { + "epoch": 11.75, + "learning_rate": 2.064217758985201e-05, + "loss": 0.0614, + "step": 11116 + }, + { + "epoch": 11.75, + "learning_rate": 2.0636892177589853e-05, + "loss": 0.0688, + "step": 11118 + }, + { + "epoch": 11.75, + "learning_rate": 2.0631606765327696e-05, + "loss": 0.0633, + "step": 11120 + }, + { + "epoch": 11.76, + "learning_rate": 2.062632135306554e-05, + "loss": 0.1126, + "step": 11122 + }, + { + "epoch": 11.76, + "learning_rate": 2.0621035940803384e-05, + "loss": 0.004, + "step": 11124 + }, + { + "epoch": 11.76, + "learning_rate": 2.0615750528541227e-05, + "loss": 0.0379, + "step": 11126 + }, + { + "epoch": 11.76, + "learning_rate": 2.061046511627907e-05, + "loss": 0.085, + "step": 11128 + }, + { + "epoch": 11.77, + "learning_rate": 2.0605179704016912e-05, + "loss": 0.0781, + "step": 11130 + }, + { + "epoch": 11.77, + "learning_rate": 2.0599894291754755e-05, + "loss": 0.0755, + "step": 11132 + }, + { + "epoch": 11.77, + "learning_rate": 2.05946088794926e-05, + "loss": 0.0244, + "step": 11134 + }, + { + "epoch": 11.77, + "learning_rate": 2.0589323467230444e-05, + "loss": 0.0323, + "step": 11136 + }, + { + "epoch": 11.77, + "learning_rate": 2.0584038054968287e-05, + "loss": 0.0872, + "step": 11138 + }, + { + "epoch": 11.78, + "learning_rate": 2.0578752642706133e-05, + "loss": 0.0902, + "step": 11140 + }, + { + "epoch": 11.78, + "learning_rate": 2.0573467230443975e-05, + "loss": 0.0517, + "step": 11142 + }, + { + "epoch": 11.78, + "learning_rate": 2.056818181818182e-05, + "loss": 0.1477, + "step": 11144 + }, + { + "epoch": 11.78, + "learning_rate": 2.0562896405919664e-05, + "loss": 0.0487, + "step": 11146 + }, + { + "epoch": 11.78, + "learning_rate": 2.0557610993657507e-05, + "loss": 0.0588, + "step": 11148 + }, + { + "epoch": 11.79, + "learning_rate": 2.055232558139535e-05, + "loss": 0.0118, + "step": 11150 + }, + { + "epoch": 11.79, + "learning_rate": 2.0547040169133195e-05, + "loss": 0.0789, + "step": 11152 + }, + { + "epoch": 11.79, + "learning_rate": 2.0541754756871038e-05, + "loss": 0.0406, + "step": 11154 + }, + { + "epoch": 11.79, + "learning_rate": 2.053646934460888e-05, + "loss": 0.0636, + "step": 11156 + }, + { + "epoch": 11.79, + "learning_rate": 2.0531183932346723e-05, + "loss": 0.0327, + "step": 11158 + }, + { + "epoch": 11.8, + "learning_rate": 2.052589852008457e-05, + "loss": 0.0546, + "step": 11160 + }, + { + "epoch": 11.8, + "learning_rate": 2.0520613107822412e-05, + "loss": 0.0355, + "step": 11162 + }, + { + "epoch": 11.8, + "learning_rate": 2.0515327695560255e-05, + "loss": 0.053, + "step": 11164 + }, + { + "epoch": 11.8, + "learning_rate": 2.0510042283298097e-05, + "loss": 0.0439, + "step": 11166 + }, + { + "epoch": 11.81, + "learning_rate": 2.050475687103594e-05, + "loss": 0.0155, + "step": 11168 + }, + { + "epoch": 11.81, + "learning_rate": 2.0499471458773786e-05, + "loss": 0.0532, + "step": 11170 + }, + { + "epoch": 11.81, + "learning_rate": 2.049418604651163e-05, + "loss": 0.0108, + "step": 11172 + }, + { + "epoch": 11.81, + "learning_rate": 2.048890063424947e-05, + "loss": 0.0571, + "step": 11174 + }, + { + "epoch": 11.81, + "learning_rate": 2.0483615221987314e-05, + "loss": 0.0961, + "step": 11176 + }, + { + "epoch": 11.82, + "learning_rate": 2.047832980972516e-05, + "loss": 0.0407, + "step": 11178 + }, + { + "epoch": 11.82, + "learning_rate": 2.0473044397463003e-05, + "loss": 0.0304, + "step": 11180 + }, + { + "epoch": 11.82, + "learning_rate": 2.0467758985200846e-05, + "loss": 0.0345, + "step": 11182 + }, + { + "epoch": 11.82, + "learning_rate": 2.0462473572938688e-05, + "loss": 0.019, + "step": 11184 + }, + { + "epoch": 11.82, + "learning_rate": 2.045718816067653e-05, + "loss": 0.071, + "step": 11186 + }, + { + "epoch": 11.83, + "learning_rate": 2.0451902748414377e-05, + "loss": 0.2046, + "step": 11188 + }, + { + "epoch": 11.83, + "learning_rate": 2.044661733615222e-05, + "loss": 0.0732, + "step": 11190 + }, + { + "epoch": 11.83, + "learning_rate": 2.0441331923890066e-05, + "loss": 0.0416, + "step": 11192 + }, + { + "epoch": 11.83, + "learning_rate": 2.043604651162791e-05, + "loss": 0.031, + "step": 11194 + }, + { + "epoch": 11.84, + "learning_rate": 2.043076109936575e-05, + "loss": 0.0405, + "step": 11196 + }, + { + "epoch": 11.84, + "learning_rate": 2.0425475687103597e-05, + "loss": 0.0323, + "step": 11198 + }, + { + "epoch": 11.84, + "learning_rate": 2.042019027484144e-05, + "loss": 0.063, + "step": 11200 + }, + { + "epoch": 11.84, + "learning_rate": 2.0414904862579282e-05, + "loss": 0.1026, + "step": 11202 + }, + { + "epoch": 11.84, + "learning_rate": 2.0409619450317125e-05, + "loss": 0.0508, + "step": 11204 + }, + { + "epoch": 11.85, + "learning_rate": 2.040433403805497e-05, + "loss": 0.0286, + "step": 11206 + }, + { + "epoch": 11.85, + "learning_rate": 2.0399048625792814e-05, + "loss": 0.0337, + "step": 11208 + }, + { + "epoch": 11.85, + "learning_rate": 2.0393763213530657e-05, + "loss": 0.061, + "step": 11210 + }, + { + "epoch": 11.85, + "learning_rate": 2.03884778012685e-05, + "loss": 0.0663, + "step": 11212 + }, + { + "epoch": 11.85, + "learning_rate": 2.0383192389006345e-05, + "loss": 0.091, + "step": 11214 + }, + { + "epoch": 11.86, + "learning_rate": 2.0377906976744188e-05, + "loss": 0.0043, + "step": 11216 + }, + { + "epoch": 11.86, + "learning_rate": 2.037262156448203e-05, + "loss": 0.0518, + "step": 11218 + }, + { + "epoch": 11.86, + "learning_rate": 2.0367336152219873e-05, + "loss": 0.0485, + "step": 11220 + }, + { + "epoch": 11.86, + "learning_rate": 2.0362050739957716e-05, + "loss": 0.0537, + "step": 11222 + }, + { + "epoch": 11.86, + "learning_rate": 2.0356765327695562e-05, + "loss": 0.0262, + "step": 11224 + }, + { + "epoch": 11.87, + "learning_rate": 2.0351479915433405e-05, + "loss": 0.1172, + "step": 11226 + }, + { + "epoch": 11.87, + "learning_rate": 2.0346194503171247e-05, + "loss": 0.0423, + "step": 11228 + }, + { + "epoch": 11.87, + "learning_rate": 2.034355179704017e-05, + "loss": 0.0751, + "step": 11230 + }, + { + "epoch": 11.87, + "learning_rate": 2.0338266384778013e-05, + "loss": 0.0498, + "step": 11232 + }, + { + "epoch": 11.88, + "learning_rate": 2.033298097251586e-05, + "loss": 0.0332, + "step": 11234 + }, + { + "epoch": 11.88, + "learning_rate": 2.0327695560253702e-05, + "loss": 0.0439, + "step": 11236 + }, + { + "epoch": 11.88, + "learning_rate": 2.0322410147991544e-05, + "loss": 0.0399, + "step": 11238 + }, + { + "epoch": 11.88, + "learning_rate": 2.0317124735729387e-05, + "loss": 0.0337, + "step": 11240 + }, + { + "epoch": 11.88, + "learning_rate": 2.0311839323467233e-05, + "loss": 0.0582, + "step": 11242 + }, + { + "epoch": 11.89, + "learning_rate": 2.0306553911205076e-05, + "loss": 0.1019, + "step": 11244 + }, + { + "epoch": 11.89, + "learning_rate": 2.030126849894292e-05, + "loss": 0.0754, + "step": 11246 + }, + { + "epoch": 11.89, + "learning_rate": 2.029598308668076e-05, + "loss": 0.0471, + "step": 11248 + }, + { + "epoch": 11.89, + "learning_rate": 2.0290697674418604e-05, + "loss": 0.0311, + "step": 11250 + }, + { + "epoch": 11.89, + "learning_rate": 2.028541226215645e-05, + "loss": 0.0573, + "step": 11252 + }, + { + "epoch": 11.9, + "learning_rate": 2.0280126849894293e-05, + "loss": 0.0474, + "step": 11254 + }, + { + "epoch": 11.9, + "learning_rate": 2.0274841437632135e-05, + "loss": 0.0533, + "step": 11256 + }, + { + "epoch": 11.9, + "learning_rate": 2.0269556025369978e-05, + "loss": 0.1002, + "step": 11258 + }, + { + "epoch": 11.9, + "learning_rate": 2.0264270613107824e-05, + "loss": 0.045, + "step": 11260 + }, + { + "epoch": 11.9, + "learning_rate": 2.0258985200845667e-05, + "loss": 0.0355, + "step": 11262 + }, + { + "epoch": 11.91, + "learning_rate": 2.025369978858351e-05, + "loss": 0.0453, + "step": 11264 + }, + { + "epoch": 11.91, + "learning_rate": 2.0248414376321352e-05, + "loss": 0.0356, + "step": 11266 + }, + { + "epoch": 11.91, + "learning_rate": 2.0243128964059198e-05, + "loss": 0.0951, + "step": 11268 + }, + { + "epoch": 11.91, + "learning_rate": 2.023784355179704e-05, + "loss": 0.0593, + "step": 11270 + }, + { + "epoch": 11.92, + "learning_rate": 2.0232558139534883e-05, + "loss": 0.046, + "step": 11272 + }, + { + "epoch": 11.92, + "learning_rate": 2.022727272727273e-05, + "loss": 0.0185, + "step": 11274 + }, + { + "epoch": 11.92, + "learning_rate": 2.0221987315010572e-05, + "loss": 0.041, + "step": 11276 + }, + { + "epoch": 11.92, + "learning_rate": 2.0216701902748418e-05, + "loss": 0.0857, + "step": 11278 + }, + { + "epoch": 11.92, + "learning_rate": 2.021141649048626e-05, + "loss": 0.0111, + "step": 11280 + }, + { + "epoch": 11.93, + "learning_rate": 2.0206131078224104e-05, + "loss": 0.0913, + "step": 11282 + }, + { + "epoch": 11.93, + "learning_rate": 2.0200845665961946e-05, + "loss": 0.0448, + "step": 11284 + }, + { + "epoch": 11.93, + "learning_rate": 2.019556025369979e-05, + "loss": 0.0471, + "step": 11286 + }, + { + "epoch": 11.93, + "learning_rate": 2.0190274841437635e-05, + "loss": 0.0396, + "step": 11288 + }, + { + "epoch": 11.93, + "learning_rate": 2.0184989429175478e-05, + "loss": 0.0769, + "step": 11290 + }, + { + "epoch": 11.94, + "learning_rate": 2.017970401691332e-05, + "loss": 0.053, + "step": 11292 + }, + { + "epoch": 11.94, + "learning_rate": 2.0174418604651163e-05, + "loss": 0.1192, + "step": 11294 + }, + { + "epoch": 11.94, + "learning_rate": 2.016913319238901e-05, + "loss": 0.0102, + "step": 11296 + }, + { + "epoch": 11.94, + "learning_rate": 2.0163847780126852e-05, + "loss": 0.0832, + "step": 11298 + }, + { + "epoch": 11.95, + "learning_rate": 2.0158562367864694e-05, + "loss": 0.0162, + "step": 11300 + }, + { + "epoch": 11.95, + "learning_rate": 2.0153276955602537e-05, + "loss": 0.0205, + "step": 11302 + }, + { + "epoch": 11.95, + "learning_rate": 2.014799154334038e-05, + "loss": 0.016, + "step": 11304 + }, + { + "epoch": 11.95, + "learning_rate": 2.0142706131078226e-05, + "loss": 0.0455, + "step": 11306 + }, + { + "epoch": 11.95, + "learning_rate": 2.013742071881607e-05, + "loss": 0.0433, + "step": 11308 + }, + { + "epoch": 11.96, + "learning_rate": 2.013213530655391e-05, + "loss": 0.0466, + "step": 11310 + }, + { + "epoch": 11.96, + "learning_rate": 2.0126849894291754e-05, + "loss": 0.0151, + "step": 11312 + }, + { + "epoch": 11.96, + "learning_rate": 2.01215644820296e-05, + "loss": 0.0353, + "step": 11314 + }, + { + "epoch": 11.96, + "learning_rate": 2.0116279069767443e-05, + "loss": 0.1275, + "step": 11316 + }, + { + "epoch": 11.96, + "learning_rate": 2.0110993657505285e-05, + "loss": 0.1457, + "step": 11318 + }, + { + "epoch": 11.97, + "learning_rate": 2.0105708245243128e-05, + "loss": 0.0438, + "step": 11320 + }, + { + "epoch": 11.97, + "learning_rate": 2.0100422832980974e-05, + "loss": 0.0343, + "step": 11322 + }, + { + "epoch": 11.97, + "learning_rate": 2.0095137420718817e-05, + "loss": 0.1007, + "step": 11324 + }, + { + "epoch": 11.97, + "learning_rate": 2.0089852008456663e-05, + "loss": 0.0562, + "step": 11326 + }, + { + "epoch": 11.97, + "learning_rate": 2.0084566596194505e-05, + "loss": 0.0126, + "step": 11328 + }, + { + "epoch": 11.98, + "learning_rate": 2.0079281183932348e-05, + "loss": 0.0698, + "step": 11330 + }, + { + "epoch": 11.98, + "learning_rate": 2.007399577167019e-05, + "loss": 0.0589, + "step": 11332 + }, + { + "epoch": 11.98, + "learning_rate": 2.0068710359408037e-05, + "loss": 0.1048, + "step": 11334 + }, + { + "epoch": 11.98, + "learning_rate": 2.006342494714588e-05, + "loss": 0.0315, + "step": 11336 + }, + { + "epoch": 11.99, + "learning_rate": 2.0058139534883722e-05, + "loss": 0.0187, + "step": 11338 + }, + { + "epoch": 11.99, + "learning_rate": 2.0052854122621565e-05, + "loss": 0.0887, + "step": 11340 + }, + { + "epoch": 11.99, + "learning_rate": 2.004756871035941e-05, + "loss": 0.069, + "step": 11342 + }, + { + "epoch": 11.99, + "learning_rate": 2.0042283298097253e-05, + "loss": 0.0561, + "step": 11344 + }, + { + "epoch": 11.99, + "learning_rate": 2.0036997885835096e-05, + "loss": 0.0311, + "step": 11346 + }, + { + "epoch": 12.0, + "learning_rate": 2.003171247357294e-05, + "loss": 0.0373, + "step": 11348 + }, + { + "epoch": 12.0, + "learning_rate": 2.002642706131078e-05, + "loss": 0.0202, + "step": 11350 + }, + { + "epoch": 12.0, + "learning_rate": 2.0021141649048628e-05, + "loss": 0.0274, + "step": 11352 + }, + { + "epoch": 12.0, + "learning_rate": 2.001585623678647e-05, + "loss": 0.0598, + "step": 11354 + }, + { + "epoch": 12.0, + "learning_rate": 2.0010570824524313e-05, + "loss": 0.0263, + "step": 11356 + }, + { + "epoch": 12.01, + "learning_rate": 2.0005285412262156e-05, + "loss": 0.0716, + "step": 11358 + }, + { + "epoch": 12.01, + "learning_rate": 2e-05, + "loss": 0.0086, + "step": 11360 + }, + { + "epoch": 12.01, + "learning_rate": 1.9994714587737844e-05, + "loss": 0.0221, + "step": 11362 + }, + { + "epoch": 12.01, + "learning_rate": 1.9989429175475687e-05, + "loss": 0.037, + "step": 11364 + }, + { + "epoch": 12.01, + "learning_rate": 1.998414376321353e-05, + "loss": 0.0976, + "step": 11366 + }, + { + "epoch": 12.02, + "learning_rate": 1.9978858350951372e-05, + "loss": 0.0408, + "step": 11368 + }, + { + "epoch": 12.02, + "learning_rate": 1.997357293868922e-05, + "loss": 0.025, + "step": 11370 + }, + { + "epoch": 12.02, + "learning_rate": 1.996828752642706e-05, + "loss": 0.0182, + "step": 11372 + }, + { + "epoch": 12.02, + "learning_rate": 1.9963002114164904e-05, + "loss": 0.0515, + "step": 11374 + }, + { + "epoch": 12.03, + "learning_rate": 1.995771670190275e-05, + "loss": 0.0557, + "step": 11376 + }, + { + "epoch": 12.03, + "learning_rate": 1.9952431289640592e-05, + "loss": 0.0922, + "step": 11378 + }, + { + "epoch": 12.03, + "learning_rate": 1.994714587737844e-05, + "loss": 0.0551, + "step": 11380 + }, + { + "epoch": 12.03, + "learning_rate": 1.994186046511628e-05, + "loss": 0.1137, + "step": 11382 + }, + { + "epoch": 12.03, + "learning_rate": 1.9936575052854124e-05, + "loss": 0.0116, + "step": 11384 + }, + { + "epoch": 12.04, + "learning_rate": 1.9931289640591967e-05, + "loss": 0.0234, + "step": 11386 + }, + { + "epoch": 12.04, + "learning_rate": 1.9926004228329813e-05, + "loss": 0.0043, + "step": 11388 + }, + { + "epoch": 12.04, + "learning_rate": 1.9920718816067655e-05, + "loss": 0.0436, + "step": 11390 + }, + { + "epoch": 12.04, + "learning_rate": 1.9915433403805498e-05, + "loss": 0.0186, + "step": 11392 + }, + { + "epoch": 12.04, + "learning_rate": 1.991014799154334e-05, + "loss": 0.0104, + "step": 11394 + }, + { + "epoch": 12.05, + "learning_rate": 1.9904862579281187e-05, + "loss": 0.0784, + "step": 11396 + }, + { + "epoch": 12.05, + "learning_rate": 1.989957716701903e-05, + "loss": 0.0521, + "step": 11398 + }, + { + "epoch": 12.05, + "learning_rate": 1.9894291754756872e-05, + "loss": 0.0724, + "step": 11400 + }, + { + "epoch": 12.05, + "learning_rate": 1.9889006342494715e-05, + "loss": 0.0619, + "step": 11402 + }, + { + "epoch": 12.05, + "learning_rate": 1.9883720930232557e-05, + "loss": 0.0655, + "step": 11404 + }, + { + "epoch": 12.06, + "learning_rate": 1.9878435517970403e-05, + "loss": 0.0641, + "step": 11406 + }, + { + "epoch": 12.06, + "learning_rate": 1.9873150105708246e-05, + "loss": 0.0981, + "step": 11408 + }, + { + "epoch": 12.06, + "learning_rate": 1.986786469344609e-05, + "loss": 0.0054, + "step": 11410 + }, + { + "epoch": 12.06, + "learning_rate": 1.986257928118393e-05, + "loss": 0.0116, + "step": 11412 + }, + { + "epoch": 12.07, + "learning_rate": 1.9857293868921777e-05, + "loss": 0.0746, + "step": 11414 + }, + { + "epoch": 12.07, + "learning_rate": 1.985200845665962e-05, + "loss": 0.0258, + "step": 11416 + }, + { + "epoch": 12.07, + "learning_rate": 1.9846723044397463e-05, + "loss": 0.0716, + "step": 11418 + }, + { + "epoch": 12.07, + "learning_rate": 1.9841437632135305e-05, + "loss": 0.0096, + "step": 11420 + }, + { + "epoch": 12.07, + "learning_rate": 1.9836152219873148e-05, + "loss": 0.0508, + "step": 11422 + }, + { + "epoch": 12.08, + "learning_rate": 1.9830866807610994e-05, + "loss": 0.0379, + "step": 11424 + }, + { + "epoch": 12.08, + "learning_rate": 1.9825581395348837e-05, + "loss": 0.0356, + "step": 11426 + }, + { + "epoch": 12.08, + "learning_rate": 1.9820295983086683e-05, + "loss": 0.0137, + "step": 11428 + }, + { + "epoch": 12.08, + "learning_rate": 1.9815010570824526e-05, + "loss": 0.0478, + "step": 11430 + }, + { + "epoch": 12.08, + "learning_rate": 1.980972515856237e-05, + "loss": 0.0576, + "step": 11432 + }, + { + "epoch": 12.09, + "learning_rate": 1.9804439746300214e-05, + "loss": 0.0398, + "step": 11434 + }, + { + "epoch": 12.09, + "learning_rate": 1.9799154334038057e-05, + "loss": 0.0909, + "step": 11436 + }, + { + "epoch": 12.09, + "learning_rate": 1.97938689217759e-05, + "loss": 0.0252, + "step": 11438 + }, + { + "epoch": 12.09, + "learning_rate": 1.9788583509513742e-05, + "loss": 0.0412, + "step": 11440 + }, + { + "epoch": 12.1, + "learning_rate": 1.978329809725159e-05, + "loss": 0.0248, + "step": 11442 + }, + { + "epoch": 12.1, + "learning_rate": 1.977801268498943e-05, + "loss": 0.0725, + "step": 11444 + }, + { + "epoch": 12.1, + "learning_rate": 1.9772727272727274e-05, + "loss": 0.03, + "step": 11446 + }, + { + "epoch": 12.1, + "learning_rate": 1.9767441860465116e-05, + "loss": 0.0491, + "step": 11448 + }, + { + "epoch": 12.1, + "learning_rate": 1.9762156448202962e-05, + "loss": 0.0061, + "step": 11450 + }, + { + "epoch": 12.11, + "learning_rate": 1.9756871035940805e-05, + "loss": 0.0138, + "step": 11452 + }, + { + "epoch": 12.11, + "learning_rate": 1.9751585623678648e-05, + "loss": 0.0164, + "step": 11454 + }, + { + "epoch": 12.11, + "learning_rate": 1.974630021141649e-05, + "loss": 0.0317, + "step": 11456 + }, + { + "epoch": 12.11, + "learning_rate": 1.9741014799154333e-05, + "loss": 0.0404, + "step": 11458 + }, + { + "epoch": 12.11, + "learning_rate": 1.973572938689218e-05, + "loss": 0.0162, + "step": 11460 + }, + { + "epoch": 12.12, + "learning_rate": 1.9730443974630022e-05, + "loss": 0.0183, + "step": 11462 + }, + { + "epoch": 12.12, + "learning_rate": 1.9725158562367865e-05, + "loss": 0.0297, + "step": 11464 + }, + { + "epoch": 12.12, + "learning_rate": 1.9719873150105707e-05, + "loss": 0.038, + "step": 11466 + }, + { + "epoch": 12.12, + "learning_rate": 1.9714587737843553e-05, + "loss": 0.0497, + "step": 11468 + }, + { + "epoch": 12.12, + "learning_rate": 1.9709302325581396e-05, + "loss": 0.0859, + "step": 11470 + }, + { + "epoch": 12.13, + "learning_rate": 1.970401691331924e-05, + "loss": 0.0235, + "step": 11472 + }, + { + "epoch": 12.13, + "learning_rate": 1.969873150105708e-05, + "loss": 0.0481, + "step": 11474 + }, + { + "epoch": 12.13, + "learning_rate": 1.9693446088794927e-05, + "loss": 0.0667, + "step": 11476 + }, + { + "epoch": 12.13, + "learning_rate": 1.968816067653277e-05, + "loss": 0.0276, + "step": 11478 + }, + { + "epoch": 12.14, + "learning_rate": 1.9682875264270613e-05, + "loss": 0.0065, + "step": 11480 + }, + { + "epoch": 12.14, + "learning_rate": 1.967758985200846e-05, + "loss": 0.0434, + "step": 11482 + }, + { + "epoch": 12.14, + "learning_rate": 1.96723044397463e-05, + "loss": 0.0375, + "step": 11484 + }, + { + "epoch": 12.14, + "learning_rate": 1.9667019027484148e-05, + "loss": 0.0296, + "step": 11486 + }, + { + "epoch": 12.14, + "learning_rate": 1.966173361522199e-05, + "loss": 0.0439, + "step": 11488 + }, + { + "epoch": 12.15, + "learning_rate": 1.9656448202959833e-05, + "loss": 0.0253, + "step": 11490 + }, + { + "epoch": 12.15, + "learning_rate": 1.9651162790697676e-05, + "loss": 0.0521, + "step": 11492 + }, + { + "epoch": 12.15, + "learning_rate": 1.9645877378435518e-05, + "loss": 0.0333, + "step": 11494 + }, + { + "epoch": 12.15, + "learning_rate": 1.9640591966173364e-05, + "loss": 0.0429, + "step": 11496 + }, + { + "epoch": 12.15, + "learning_rate": 1.9635306553911207e-05, + "loss": 0.1049, + "step": 11498 + }, + { + "epoch": 12.16, + "learning_rate": 1.963002114164905e-05, + "loss": 0.129, + "step": 11500 + }, + { + "epoch": 12.16, + "eval_cer": 0.059048161869478484, + "eval_loss": 0.6092634797096252, + "eval_runtime": 125.9114, + "eval_samples_per_second": 6.679, + "eval_steps_per_second": 0.842, + "step": 11500 + }, + { + "epoch": 12.16, + "learning_rate": 1.9624735729386892e-05, + "loss": 0.0586, + "step": 11502 + }, + { + "epoch": 12.16, + "learning_rate": 1.961945031712474e-05, + "loss": 0.0584, + "step": 11504 + }, + { + "epoch": 12.16, + "learning_rate": 1.961416490486258e-05, + "loss": 0.0592, + "step": 11506 + }, + { + "epoch": 12.16, + "learning_rate": 1.9608879492600424e-05, + "loss": 0.0383, + "step": 11508 + }, + { + "epoch": 12.17, + "learning_rate": 1.9603594080338266e-05, + "loss": 0.0369, + "step": 11510 + }, + { + "epoch": 12.17, + "learning_rate": 1.959830866807611e-05, + "loss": 0.0529, + "step": 11512 + }, + { + "epoch": 12.17, + "learning_rate": 1.9593023255813955e-05, + "loss": 0.0497, + "step": 11514 + }, + { + "epoch": 12.17, + "learning_rate": 1.9587737843551798e-05, + "loss": 0.0366, + "step": 11516 + }, + { + "epoch": 12.18, + "learning_rate": 1.958245243128964e-05, + "loss": 0.044, + "step": 11518 + }, + { + "epoch": 12.18, + "learning_rate": 1.9577167019027483e-05, + "loss": 0.0366, + "step": 11520 + }, + { + "epoch": 12.18, + "learning_rate": 1.957188160676533e-05, + "loss": 0.0423, + "step": 11522 + }, + { + "epoch": 12.18, + "learning_rate": 1.9566596194503172e-05, + "loss": 0.0449, + "step": 11524 + }, + { + "epoch": 12.18, + "learning_rate": 1.9561310782241014e-05, + "loss": 0.0332, + "step": 11526 + }, + { + "epoch": 12.19, + "learning_rate": 1.9556025369978857e-05, + "loss": 0.0076, + "step": 11528 + }, + { + "epoch": 12.19, + "learning_rate": 1.9550739957716703e-05, + "loss": 0.035, + "step": 11530 + }, + { + "epoch": 12.19, + "learning_rate": 1.9545454545454546e-05, + "loss": 0.0676, + "step": 11532 + }, + { + "epoch": 12.19, + "learning_rate": 1.9540169133192392e-05, + "loss": 0.0041, + "step": 11534 + }, + { + "epoch": 12.19, + "learning_rate": 1.9534883720930235e-05, + "loss": 0.0308, + "step": 11536 + }, + { + "epoch": 12.2, + "learning_rate": 1.9529598308668077e-05, + "loss": 0.0438, + "step": 11538 + }, + { + "epoch": 12.2, + "learning_rate": 1.9524312896405923e-05, + "loss": 0.0225, + "step": 11540 + }, + { + "epoch": 12.2, + "learning_rate": 1.9519027484143766e-05, + "loss": 0.0332, + "step": 11542 + }, + { + "epoch": 12.2, + "learning_rate": 1.951374207188161e-05, + "loss": 0.0264, + "step": 11544 + }, + { + "epoch": 12.21, + "learning_rate": 1.950845665961945e-05, + "loss": 0.0327, + "step": 11546 + }, + { + "epoch": 12.21, + "learning_rate": 1.9503171247357294e-05, + "loss": 0.0606, + "step": 11548 + }, + { + "epoch": 12.21, + "learning_rate": 1.949788583509514e-05, + "loss": 0.0591, + "step": 11550 + }, + { + "epoch": 12.21, + "learning_rate": 1.9492600422832983e-05, + "loss": 0.0971, + "step": 11552 + }, + { + "epoch": 12.21, + "learning_rate": 1.9487315010570825e-05, + "loss": 0.0467, + "step": 11554 + }, + { + "epoch": 12.22, + "learning_rate": 1.9482029598308668e-05, + "loss": 0.0173, + "step": 11556 + }, + { + "epoch": 12.22, + "learning_rate": 1.9476744186046514e-05, + "loss": 0.0635, + "step": 11558 + }, + { + "epoch": 12.22, + "learning_rate": 1.9471458773784357e-05, + "loss": 0.0159, + "step": 11560 + }, + { + "epoch": 12.22, + "learning_rate": 1.94661733615222e-05, + "loss": 0.0232, + "step": 11562 + }, + { + "epoch": 12.22, + "learning_rate": 1.9460887949260042e-05, + "loss": 0.022, + "step": 11564 + }, + { + "epoch": 12.23, + "learning_rate": 1.9455602536997885e-05, + "loss": 0.0191, + "step": 11566 + }, + { + "epoch": 12.23, + "learning_rate": 1.945031712473573e-05, + "loss": 0.0209, + "step": 11568 + }, + { + "epoch": 12.23, + "learning_rate": 1.9445031712473574e-05, + "loss": 0.0111, + "step": 11570 + }, + { + "epoch": 12.23, + "learning_rate": 1.9439746300211416e-05, + "loss": 0.0271, + "step": 11572 + }, + { + "epoch": 12.23, + "learning_rate": 1.943446088794926e-05, + "loss": 0.0219, + "step": 11574 + }, + { + "epoch": 12.24, + "learning_rate": 1.9429175475687105e-05, + "loss": 0.0302, + "step": 11576 + }, + { + "epoch": 12.24, + "learning_rate": 1.9423890063424948e-05, + "loss": 0.059, + "step": 11578 + }, + { + "epoch": 12.24, + "learning_rate": 1.941860465116279e-05, + "loss": 0.0582, + "step": 11580 + }, + { + "epoch": 12.24, + "learning_rate": 1.9413319238900633e-05, + "loss": 0.0951, + "step": 11582 + }, + { + "epoch": 12.25, + "learning_rate": 1.940803382663848e-05, + "loss": 0.0089, + "step": 11584 + }, + { + "epoch": 12.25, + "learning_rate": 1.9402748414376322e-05, + "loss": 0.0225, + "step": 11586 + }, + { + "epoch": 12.25, + "learning_rate": 1.9397463002114168e-05, + "loss": 0.1043, + "step": 11588 + }, + { + "epoch": 12.25, + "learning_rate": 1.939217758985201e-05, + "loss": 0.0561, + "step": 11590 + }, + { + "epoch": 12.25, + "learning_rate": 1.9386892177589853e-05, + "loss": 0.0499, + "step": 11592 + }, + { + "epoch": 12.26, + "learning_rate": 1.93816067653277e-05, + "loss": 0.0464, + "step": 11594 + }, + { + "epoch": 12.26, + "learning_rate": 1.9376321353065542e-05, + "loss": 0.0498, + "step": 11596 + }, + { + "epoch": 12.26, + "learning_rate": 1.9371035940803385e-05, + "loss": 0.0296, + "step": 11598 + }, + { + "epoch": 12.26, + "learning_rate": 1.9365750528541227e-05, + "loss": 0.0946, + "step": 11600 + }, + { + "epoch": 12.26, + "learning_rate": 1.936046511627907e-05, + "loss": 0.094, + "step": 11602 + }, + { + "epoch": 12.27, + "learning_rate": 1.9355179704016916e-05, + "loss": 0.1682, + "step": 11604 + }, + { + "epoch": 12.27, + "learning_rate": 1.934989429175476e-05, + "loss": 0.0512, + "step": 11606 + }, + { + "epoch": 12.27, + "learning_rate": 1.93446088794926e-05, + "loss": 0.1028, + "step": 11608 + }, + { + "epoch": 12.27, + "learning_rate": 1.9339323467230444e-05, + "loss": 0.1174, + "step": 11610 + }, + { + "epoch": 12.27, + "learning_rate": 1.933403805496829e-05, + "loss": 0.0504, + "step": 11612 + }, + { + "epoch": 12.28, + "learning_rate": 1.9328752642706133e-05, + "loss": 0.0252, + "step": 11614 + }, + { + "epoch": 12.28, + "learning_rate": 1.9323467230443975e-05, + "loss": 0.0987, + "step": 11616 + }, + { + "epoch": 12.28, + "learning_rate": 1.9318181818181818e-05, + "loss": 0.0166, + "step": 11618 + }, + { + "epoch": 12.28, + "learning_rate": 1.931289640591966e-05, + "loss": 0.1345, + "step": 11620 + }, + { + "epoch": 12.29, + "learning_rate": 1.9307610993657507e-05, + "loss": 0.018, + "step": 11622 + }, + { + "epoch": 12.29, + "learning_rate": 1.930232558139535e-05, + "loss": 0.0405, + "step": 11624 + }, + { + "epoch": 12.29, + "learning_rate": 1.9297040169133192e-05, + "loss": 0.0127, + "step": 11626 + }, + { + "epoch": 12.29, + "learning_rate": 1.9291754756871035e-05, + "loss": 0.0211, + "step": 11628 + }, + { + "epoch": 12.29, + "learning_rate": 1.928646934460888e-05, + "loss": 0.2615, + "step": 11630 + }, + { + "epoch": 12.3, + "learning_rate": 1.9281183932346724e-05, + "loss": 0.0352, + "step": 11632 + }, + { + "epoch": 12.3, + "learning_rate": 1.9275898520084566e-05, + "loss": 0.062, + "step": 11634 + }, + { + "epoch": 12.3, + "learning_rate": 1.9270613107822412e-05, + "loss": 0.0076, + "step": 11636 + }, + { + "epoch": 12.3, + "learning_rate": 1.9265327695560255e-05, + "loss": 0.1104, + "step": 11638 + }, + { + "epoch": 12.3, + "learning_rate": 1.9260042283298098e-05, + "loss": 0.0247, + "step": 11640 + }, + { + "epoch": 12.31, + "learning_rate": 1.9254756871035944e-05, + "loss": 0.0621, + "step": 11642 + }, + { + "epoch": 12.31, + "learning_rate": 1.9249471458773786e-05, + "loss": 0.0341, + "step": 11644 + }, + { + "epoch": 12.31, + "learning_rate": 1.924418604651163e-05, + "loss": 0.0243, + "step": 11646 + }, + { + "epoch": 12.31, + "learning_rate": 1.9238900634249475e-05, + "loss": 0.0353, + "step": 11648 + }, + { + "epoch": 12.32, + "learning_rate": 1.9233615221987318e-05, + "loss": 0.0273, + "step": 11650 + }, + { + "epoch": 12.32, + "learning_rate": 1.922832980972516e-05, + "loss": 0.0557, + "step": 11652 + }, + { + "epoch": 12.32, + "learning_rate": 1.9223044397463003e-05, + "loss": 0.0112, + "step": 11654 + }, + { + "epoch": 12.32, + "learning_rate": 1.9217758985200846e-05, + "loss": 0.0363, + "step": 11656 + }, + { + "epoch": 12.32, + "learning_rate": 1.9212473572938692e-05, + "loss": 0.0698, + "step": 11658 + }, + { + "epoch": 12.33, + "learning_rate": 1.9207188160676534e-05, + "loss": 0.16, + "step": 11660 + }, + { + "epoch": 12.33, + "learning_rate": 1.9201902748414377e-05, + "loss": 0.089, + "step": 11662 + }, + { + "epoch": 12.33, + "learning_rate": 1.919661733615222e-05, + "loss": 0.0624, + "step": 11664 + }, + { + "epoch": 12.33, + "learning_rate": 1.9191331923890062e-05, + "loss": 0.0254, + "step": 11666 + }, + { + "epoch": 12.33, + "learning_rate": 1.918604651162791e-05, + "loss": 0.0436, + "step": 11668 + }, + { + "epoch": 12.34, + "learning_rate": 1.918076109936575e-05, + "loss": 0.0417, + "step": 11670 + }, + { + "epoch": 12.34, + "learning_rate": 1.9175475687103594e-05, + "loss": 0.032, + "step": 11672 + }, + { + "epoch": 12.34, + "learning_rate": 1.9170190274841437e-05, + "loss": 0.0656, + "step": 11674 + }, + { + "epoch": 12.34, + "learning_rate": 1.9164904862579283e-05, + "loss": 0.0597, + "step": 11676 + }, + { + "epoch": 12.34, + "learning_rate": 1.9159619450317125e-05, + "loss": 0.0349, + "step": 11678 + }, + { + "epoch": 12.35, + "learning_rate": 1.9154334038054968e-05, + "loss": 0.0432, + "step": 11680 + }, + { + "epoch": 12.35, + "learning_rate": 1.914904862579281e-05, + "loss": 0.0244, + "step": 11682 + }, + { + "epoch": 12.35, + "learning_rate": 1.9143763213530653e-05, + "loss": 0.0721, + "step": 11684 + }, + { + "epoch": 12.35, + "learning_rate": 1.91384778012685e-05, + "loss": 0.0318, + "step": 11686 + }, + { + "epoch": 12.36, + "learning_rate": 1.9133192389006342e-05, + "loss": 0.0717, + "step": 11688 + }, + { + "epoch": 12.36, + "learning_rate": 1.9127906976744188e-05, + "loss": 0.0474, + "step": 11690 + }, + { + "epoch": 12.36, + "learning_rate": 1.912262156448203e-05, + "loss": 0.0258, + "step": 11692 + }, + { + "epoch": 12.36, + "learning_rate": 1.9117336152219877e-05, + "loss": 0.0452, + "step": 11694 + }, + { + "epoch": 12.36, + "learning_rate": 1.911205073995772e-05, + "loss": 0.0167, + "step": 11696 + }, + { + "epoch": 12.37, + "learning_rate": 1.9106765327695562e-05, + "loss": 0.0303, + "step": 11698 + }, + { + "epoch": 12.37, + "learning_rate": 1.9101479915433405e-05, + "loss": 0.0828, + "step": 11700 + }, + { + "epoch": 12.37, + "learning_rate": 1.9096194503171247e-05, + "loss": 0.015, + "step": 11702 + }, + { + "epoch": 12.37, + "learning_rate": 1.9090909090909094e-05, + "loss": 0.036, + "step": 11704 + }, + { + "epoch": 12.37, + "learning_rate": 1.9085623678646936e-05, + "loss": 0.0045, + "step": 11706 + }, + { + "epoch": 12.38, + "learning_rate": 1.908033826638478e-05, + "loss": 0.0425, + "step": 11708 + }, + { + "epoch": 12.38, + "learning_rate": 1.907505285412262e-05, + "loss": 0.1253, + "step": 11710 + }, + { + "epoch": 12.38, + "learning_rate": 1.9069767441860468e-05, + "loss": 0.051, + "step": 11712 + }, + { + "epoch": 12.38, + "learning_rate": 1.906448202959831e-05, + "loss": 0.0164, + "step": 11714 + }, + { + "epoch": 12.38, + "learning_rate": 1.9059196617336153e-05, + "loss": 0.0591, + "step": 11716 + }, + { + "epoch": 12.39, + "learning_rate": 1.9053911205073996e-05, + "loss": 0.039, + "step": 11718 + }, + { + "epoch": 12.39, + "learning_rate": 1.904862579281184e-05, + "loss": 0.0519, + "step": 11720 + }, + { + "epoch": 12.39, + "learning_rate": 1.9043340380549684e-05, + "loss": 0.0243, + "step": 11722 + }, + { + "epoch": 12.39, + "learning_rate": 1.9038054968287527e-05, + "loss": 0.0222, + "step": 11724 + }, + { + "epoch": 12.4, + "learning_rate": 1.903276955602537e-05, + "loss": 0.0417, + "step": 11726 + }, + { + "epoch": 12.4, + "learning_rate": 1.9027484143763212e-05, + "loss": 0.0052, + "step": 11728 + }, + { + "epoch": 12.4, + "learning_rate": 1.902219873150106e-05, + "loss": 0.1375, + "step": 11730 + }, + { + "epoch": 12.4, + "learning_rate": 1.90169133192389e-05, + "loss": 0.0676, + "step": 11732 + }, + { + "epoch": 12.4, + "learning_rate": 1.9011627906976744e-05, + "loss": 0.0273, + "step": 11734 + }, + { + "epoch": 12.41, + "learning_rate": 1.9006342494714586e-05, + "loss": 0.0784, + "step": 11736 + }, + { + "epoch": 12.41, + "learning_rate": 1.9001057082452433e-05, + "loss": 0.0367, + "step": 11738 + }, + { + "epoch": 12.41, + "learning_rate": 1.8995771670190275e-05, + "loss": 0.0331, + "step": 11740 + }, + { + "epoch": 12.41, + "learning_rate": 1.8990486257928118e-05, + "loss": 0.0026, + "step": 11742 + }, + { + "epoch": 12.41, + "learning_rate": 1.8985200845665964e-05, + "loss": 0.0098, + "step": 11744 + }, + { + "epoch": 12.42, + "learning_rate": 1.8979915433403807e-05, + "loss": 0.0093, + "step": 11746 + }, + { + "epoch": 12.42, + "learning_rate": 1.8974630021141653e-05, + "loss": 0.0568, + "step": 11748 + }, + { + "epoch": 12.42, + "learning_rate": 1.8969344608879495e-05, + "loss": 0.0363, + "step": 11750 + }, + { + "epoch": 12.42, + "learning_rate": 1.8964059196617338e-05, + "loss": 0.0386, + "step": 11752 + }, + { + "epoch": 12.42, + "learning_rate": 1.895877378435518e-05, + "loss": 0.0281, + "step": 11754 + }, + { + "epoch": 12.43, + "learning_rate": 1.8953488372093023e-05, + "loss": 0.018, + "step": 11756 + }, + { + "epoch": 12.43, + "learning_rate": 1.894820295983087e-05, + "loss": 0.0129, + "step": 11758 + }, + { + "epoch": 12.43, + "learning_rate": 1.8942917547568712e-05, + "loss": 0.0137, + "step": 11760 + }, + { + "epoch": 12.43, + "learning_rate": 1.8937632135306555e-05, + "loss": 0.0773, + "step": 11762 + }, + { + "epoch": 12.44, + "learning_rate": 1.8932346723044397e-05, + "loss": 0.069, + "step": 11764 + }, + { + "epoch": 12.44, + "learning_rate": 1.8927061310782243e-05, + "loss": 0.0699, + "step": 11766 + }, + { + "epoch": 12.44, + "learning_rate": 1.8921775898520086e-05, + "loss": 0.0518, + "step": 11768 + }, + { + "epoch": 12.44, + "learning_rate": 1.891649048625793e-05, + "loss": 0.0252, + "step": 11770 + }, + { + "epoch": 12.44, + "learning_rate": 1.891120507399577e-05, + "loss": 0.0087, + "step": 11772 + }, + { + "epoch": 12.45, + "learning_rate": 1.8905919661733614e-05, + "loss": 0.0321, + "step": 11774 + }, + { + "epoch": 12.45, + "learning_rate": 1.890063424947146e-05, + "loss": 0.0232, + "step": 11776 + }, + { + "epoch": 12.45, + "learning_rate": 1.8895348837209303e-05, + "loss": 0.0225, + "step": 11778 + }, + { + "epoch": 12.45, + "learning_rate": 1.8890063424947146e-05, + "loss": 0.043, + "step": 11780 + }, + { + "epoch": 12.45, + "learning_rate": 1.8884778012684988e-05, + "loss": 0.0319, + "step": 11782 + }, + { + "epoch": 12.46, + "learning_rate": 1.8879492600422834e-05, + "loss": 0.0238, + "step": 11784 + }, + { + "epoch": 12.46, + "learning_rate": 1.8874207188160677e-05, + "loss": 0.044, + "step": 11786 + }, + { + "epoch": 12.46, + "learning_rate": 1.886892177589852e-05, + "loss": 0.0721, + "step": 11788 + }, + { + "epoch": 12.46, + "learning_rate": 1.8863636363636362e-05, + "loss": 0.1443, + "step": 11790 + }, + { + "epoch": 12.47, + "learning_rate": 1.885835095137421e-05, + "loss": 0.1707, + "step": 11792 + }, + { + "epoch": 12.47, + "learning_rate": 1.885306553911205e-05, + "loss": 0.0278, + "step": 11794 + }, + { + "epoch": 12.47, + "learning_rate": 1.8847780126849897e-05, + "loss": 0.0472, + "step": 11796 + }, + { + "epoch": 12.47, + "learning_rate": 1.884249471458774e-05, + "loss": 0.0468, + "step": 11798 + }, + { + "epoch": 12.47, + "learning_rate": 1.8837209302325582e-05, + "loss": 0.0368, + "step": 11800 + }, + { + "epoch": 12.48, + "learning_rate": 1.883192389006343e-05, + "loss": 0.0713, + "step": 11802 + }, + { + "epoch": 12.48, + "learning_rate": 1.882663847780127e-05, + "loss": 0.0581, + "step": 11804 + }, + { + "epoch": 12.48, + "learning_rate": 1.8821353065539114e-05, + "loss": 0.1298, + "step": 11806 + }, + { + "epoch": 12.48, + "learning_rate": 1.8816067653276956e-05, + "loss": 0.1243, + "step": 11808 + }, + { + "epoch": 12.48, + "learning_rate": 1.88107822410148e-05, + "loss": 0.078, + "step": 11810 + }, + { + "epoch": 12.49, + "learning_rate": 1.8805496828752645e-05, + "loss": 0.0132, + "step": 11812 + }, + { + "epoch": 12.49, + "learning_rate": 1.8800211416490488e-05, + "loss": 0.0336, + "step": 11814 + }, + { + "epoch": 12.49, + "learning_rate": 1.879492600422833e-05, + "loss": 0.0503, + "step": 11816 + }, + { + "epoch": 12.49, + "learning_rate": 1.8789640591966173e-05, + "loss": 0.0343, + "step": 11818 + }, + { + "epoch": 12.49, + "learning_rate": 1.878435517970402e-05, + "loss": 0.0146, + "step": 11820 + }, + { + "epoch": 12.5, + "learning_rate": 1.8779069767441862e-05, + "loss": 0.0561, + "step": 11822 + }, + { + "epoch": 12.5, + "learning_rate": 1.8773784355179705e-05, + "loss": 0.141, + "step": 11824 + }, + { + "epoch": 12.5, + "learning_rate": 1.8768498942917547e-05, + "loss": 0.0475, + "step": 11826 + }, + { + "epoch": 12.5, + "learning_rate": 1.876321353065539e-05, + "loss": 0.0365, + "step": 11828 + }, + { + "epoch": 12.51, + "learning_rate": 1.8757928118393236e-05, + "loss": 0.0063, + "step": 11830 + }, + { + "epoch": 12.51, + "learning_rate": 1.875264270613108e-05, + "loss": 0.0754, + "step": 11832 + }, + { + "epoch": 12.51, + "learning_rate": 1.874735729386892e-05, + "loss": 0.06, + "step": 11834 + }, + { + "epoch": 12.51, + "learning_rate": 1.8742071881606764e-05, + "loss": 0.1181, + "step": 11836 + }, + { + "epoch": 12.51, + "learning_rate": 1.873678646934461e-05, + "loss": 0.0563, + "step": 11838 + }, + { + "epoch": 12.52, + "learning_rate": 1.8731501057082453e-05, + "loss": 0.0358, + "step": 11840 + }, + { + "epoch": 12.52, + "learning_rate": 1.8726215644820295e-05, + "loss": 0.0087, + "step": 11842 + }, + { + "epoch": 12.52, + "learning_rate": 1.8720930232558138e-05, + "loss": 0.0112, + "step": 11844 + }, + { + "epoch": 12.52, + "learning_rate": 1.8715644820295984e-05, + "loss": 0.0286, + "step": 11846 + }, + { + "epoch": 12.52, + "learning_rate": 1.8710359408033827e-05, + "loss": 0.0281, + "step": 11848 + }, + { + "epoch": 12.53, + "learning_rate": 1.8705073995771673e-05, + "loss": 0.0756, + "step": 11850 + }, + { + "epoch": 12.53, + "learning_rate": 1.8699788583509516e-05, + "loss": 0.0139, + "step": 11852 + }, + { + "epoch": 12.53, + "learning_rate": 1.8694503171247358e-05, + "loss": 0.0402, + "step": 11854 + }, + { + "epoch": 12.53, + "learning_rate": 1.8689217758985204e-05, + "loss": 0.023, + "step": 11856 + }, + { + "epoch": 12.53, + "learning_rate": 1.8683932346723047e-05, + "loss": 0.0187, + "step": 11858 + }, + { + "epoch": 12.54, + "learning_rate": 1.867864693446089e-05, + "loss": 0.0562, + "step": 11860 + }, + { + "epoch": 12.54, + "learning_rate": 1.8673361522198732e-05, + "loss": 0.0147, + "step": 11862 + }, + { + "epoch": 12.54, + "learning_rate": 1.8668076109936575e-05, + "loss": 0.0217, + "step": 11864 + }, + { + "epoch": 12.54, + "learning_rate": 1.866279069767442e-05, + "loss": 0.0082, + "step": 11866 + }, + { + "epoch": 12.55, + "learning_rate": 1.8657505285412264e-05, + "loss": 0.039, + "step": 11868 + }, + { + "epoch": 12.55, + "learning_rate": 1.8652219873150106e-05, + "loss": 0.0157, + "step": 11870 + }, + { + "epoch": 12.55, + "learning_rate": 1.864693446088795e-05, + "loss": 0.0795, + "step": 11872 + }, + { + "epoch": 12.55, + "learning_rate": 1.8641649048625795e-05, + "loss": 0.0533, + "step": 11874 + }, + { + "epoch": 12.55, + "learning_rate": 1.8636363636363638e-05, + "loss": 0.0508, + "step": 11876 + }, + { + "epoch": 12.56, + "learning_rate": 1.863107822410148e-05, + "loss": 0.0334, + "step": 11878 + }, + { + "epoch": 12.56, + "learning_rate": 1.8625792811839323e-05, + "loss": 0.022, + "step": 11880 + }, + { + "epoch": 12.56, + "learning_rate": 1.8620507399577166e-05, + "loss": 0.0166, + "step": 11882 + }, + { + "epoch": 12.56, + "learning_rate": 1.8615221987315012e-05, + "loss": 0.0703, + "step": 11884 + }, + { + "epoch": 12.56, + "learning_rate": 1.8609936575052855e-05, + "loss": 0.0374, + "step": 11886 + }, + { + "epoch": 12.57, + "learning_rate": 1.8604651162790697e-05, + "loss": 0.0318, + "step": 11888 + }, + { + "epoch": 12.57, + "learning_rate": 1.859936575052854e-05, + "loss": 0.1932, + "step": 11890 + }, + { + "epoch": 12.57, + "learning_rate": 1.8594080338266386e-05, + "loss": 0.0277, + "step": 11892 + }, + { + "epoch": 12.57, + "learning_rate": 1.858879492600423e-05, + "loss": 0.0648, + "step": 11894 + }, + { + "epoch": 12.58, + "learning_rate": 1.858350951374207e-05, + "loss": 0.0769, + "step": 11896 + }, + { + "epoch": 12.58, + "learning_rate": 1.8578224101479917e-05, + "loss": 0.0774, + "step": 11898 + }, + { + "epoch": 12.58, + "learning_rate": 1.857293868921776e-05, + "loss": 0.0401, + "step": 11900 + }, + { + "epoch": 12.58, + "learning_rate": 1.8567653276955606e-05, + "loss": 0.0181, + "step": 11902 + }, + { + "epoch": 12.58, + "learning_rate": 1.856236786469345e-05, + "loss": 0.0538, + "step": 11904 + }, + { + "epoch": 12.59, + "learning_rate": 1.855708245243129e-05, + "loss": 0.0022, + "step": 11906 + }, + { + "epoch": 12.59, + "learning_rate": 1.8551797040169134e-05, + "loss": 0.0727, + "step": 11908 + }, + { + "epoch": 12.59, + "learning_rate": 1.854651162790698e-05, + "loss": 0.003, + "step": 11910 + }, + { + "epoch": 12.59, + "learning_rate": 1.8541226215644823e-05, + "loss": 0.0089, + "step": 11912 + }, + { + "epoch": 12.59, + "learning_rate": 1.8535940803382666e-05, + "loss": 0.0329, + "step": 11914 + }, + { + "epoch": 12.6, + "learning_rate": 1.8530655391120508e-05, + "loss": 0.0231, + "step": 11916 + }, + { + "epoch": 12.6, + "learning_rate": 1.852536997885835e-05, + "loss": 0.0387, + "step": 11918 + }, + { + "epoch": 12.6, + "learning_rate": 1.8520084566596197e-05, + "loss": 0.061, + "step": 11920 + }, + { + "epoch": 12.6, + "learning_rate": 1.851479915433404e-05, + "loss": 0.0585, + "step": 11922 + }, + { + "epoch": 12.6, + "learning_rate": 1.8509513742071882e-05, + "loss": 0.0702, + "step": 11924 + }, + { + "epoch": 12.61, + "learning_rate": 1.8504228329809725e-05, + "loss": 0.0598, + "step": 11926 + }, + { + "epoch": 12.61, + "learning_rate": 1.849894291754757e-05, + "loss": 0.024, + "step": 11928 + }, + { + "epoch": 12.61, + "learning_rate": 1.8493657505285414e-05, + "loss": 0.0682, + "step": 11930 + }, + { + "epoch": 12.61, + "learning_rate": 1.8488372093023256e-05, + "loss": 0.0347, + "step": 11932 + }, + { + "epoch": 12.62, + "learning_rate": 1.84830866807611e-05, + "loss": 0.0395, + "step": 11934 + }, + { + "epoch": 12.62, + "learning_rate": 1.847780126849894e-05, + "loss": 0.0333, + "step": 11936 + }, + { + "epoch": 12.62, + "learning_rate": 1.8472515856236788e-05, + "loss": 0.0432, + "step": 11938 + }, + { + "epoch": 12.62, + "learning_rate": 1.846723044397463e-05, + "loss": 0.0321, + "step": 11940 + }, + { + "epoch": 12.62, + "learning_rate": 1.8461945031712473e-05, + "loss": 0.0308, + "step": 11942 + }, + { + "epoch": 12.63, + "learning_rate": 1.8456659619450316e-05, + "loss": 0.0345, + "step": 11944 + }, + { + "epoch": 12.63, + "learning_rate": 1.8451374207188162e-05, + "loss": 0.0509, + "step": 11946 + }, + { + "epoch": 12.63, + "learning_rate": 1.8446088794926004e-05, + "loss": 0.0276, + "step": 11948 + }, + { + "epoch": 12.63, + "learning_rate": 1.8440803382663847e-05, + "loss": 0.0526, + "step": 11950 + }, + { + "epoch": 12.63, + "learning_rate": 1.8435517970401693e-05, + "loss": 0.0226, + "step": 11952 + }, + { + "epoch": 12.64, + "learning_rate": 1.8430232558139536e-05, + "loss": 0.0062, + "step": 11954 + }, + { + "epoch": 12.64, + "learning_rate": 1.8424947145877382e-05, + "loss": 0.0682, + "step": 11956 + }, + { + "epoch": 12.64, + "learning_rate": 1.8419661733615225e-05, + "loss": 0.0851, + "step": 11958 + }, + { + "epoch": 12.64, + "learning_rate": 1.8414376321353067e-05, + "loss": 0.0219, + "step": 11960 + }, + { + "epoch": 12.64, + "learning_rate": 1.840909090909091e-05, + "loss": 0.0094, + "step": 11962 + }, + { + "epoch": 12.65, + "learning_rate": 1.8403805496828756e-05, + "loss": 0.0361, + "step": 11964 + }, + { + "epoch": 12.65, + "learning_rate": 1.83985200845666e-05, + "loss": 0.1176, + "step": 11966 + }, + { + "epoch": 12.65, + "learning_rate": 1.839323467230444e-05, + "loss": 0.0238, + "step": 11968 + }, + { + "epoch": 12.65, + "learning_rate": 1.8387949260042284e-05, + "loss": 0.0309, + "step": 11970 + }, + { + "epoch": 12.66, + "learning_rate": 1.8382663847780127e-05, + "loss": 0.0262, + "step": 11972 + }, + { + "epoch": 12.66, + "learning_rate": 1.8377378435517973e-05, + "loss": 0.0233, + "step": 11974 + }, + { + "epoch": 12.66, + "learning_rate": 1.8372093023255815e-05, + "loss": 0.0249, + "step": 11976 + }, + { + "epoch": 12.66, + "learning_rate": 1.8366807610993658e-05, + "loss": 0.1753, + "step": 11978 + }, + { + "epoch": 12.66, + "learning_rate": 1.83615221987315e-05, + "loss": 0.0459, + "step": 11980 + }, + { + "epoch": 12.67, + "learning_rate": 1.8356236786469347e-05, + "loss": 0.0319, + "step": 11982 + }, + { + "epoch": 12.67, + "learning_rate": 1.835095137420719e-05, + "loss": 0.088, + "step": 11984 + }, + { + "epoch": 12.67, + "learning_rate": 1.8345665961945032e-05, + "loss": 0.0274, + "step": 11986 + }, + { + "epoch": 12.67, + "learning_rate": 1.8340380549682875e-05, + "loss": 0.0498, + "step": 11988 + }, + { + "epoch": 12.67, + "learning_rate": 1.8335095137420718e-05, + "loss": 0.0145, + "step": 11990 + }, + { + "epoch": 12.68, + "learning_rate": 1.8329809725158564e-05, + "loss": 0.0204, + "step": 11992 + }, + { + "epoch": 12.68, + "learning_rate": 1.8324524312896406e-05, + "loss": 0.0554, + "step": 11994 + }, + { + "epoch": 12.68, + "learning_rate": 1.831923890063425e-05, + "loss": 0.0453, + "step": 11996 + }, + { + "epoch": 12.68, + "learning_rate": 1.831395348837209e-05, + "loss": 0.0281, + "step": 11998 + }, + { + "epoch": 12.68, + "learning_rate": 1.8308668076109938e-05, + "loss": 0.0519, + "step": 12000 + }, + { + "epoch": 12.68, + "eval_cer": 0.03493872898261613, + "eval_loss": 0.7152830958366394, + "eval_runtime": 126.707, + "eval_samples_per_second": 6.637, + "eval_steps_per_second": 0.837, + "step": 12000 + }, + { + "epoch": 12.69, + "learning_rate": 1.830338266384778e-05, + "loss": 0.0397, + "step": 12002 + }, + { + "epoch": 12.69, + "learning_rate": 1.8298097251585626e-05, + "loss": 0.0566, + "step": 12004 + }, + { + "epoch": 12.69, + "learning_rate": 1.829281183932347e-05, + "loss": 0.0203, + "step": 12006 + }, + { + "epoch": 12.69, + "learning_rate": 1.8287526427061312e-05, + "loss": 0.0205, + "step": 12008 + }, + { + "epoch": 12.7, + "learning_rate": 1.8282241014799158e-05, + "loss": 0.0227, + "step": 12010 + }, + { + "epoch": 12.7, + "learning_rate": 1.8276955602537e-05, + "loss": 0.0056, + "step": 12012 + }, + { + "epoch": 12.7, + "learning_rate": 1.8271670190274843e-05, + "loss": 0.0421, + "step": 12014 + }, + { + "epoch": 12.7, + "learning_rate": 1.8266384778012686e-05, + "loss": 0.0131, + "step": 12016 + }, + { + "epoch": 12.7, + "learning_rate": 1.826109936575053e-05, + "loss": 0.0164, + "step": 12018 + }, + { + "epoch": 12.71, + "learning_rate": 1.8255813953488375e-05, + "loss": 0.0469, + "step": 12020 + }, + { + "epoch": 12.71, + "learning_rate": 1.8250528541226217e-05, + "loss": 0.0405, + "step": 12022 + }, + { + "epoch": 12.71, + "learning_rate": 1.824524312896406e-05, + "loss": 0.0053, + "step": 12024 + }, + { + "epoch": 12.71, + "learning_rate": 1.8239957716701903e-05, + "loss": 0.0114, + "step": 12026 + }, + { + "epoch": 12.71, + "learning_rate": 1.823467230443975e-05, + "loss": 0.0044, + "step": 12028 + }, + { + "epoch": 12.72, + "learning_rate": 1.822938689217759e-05, + "loss": 0.0255, + "step": 12030 + }, + { + "epoch": 12.72, + "learning_rate": 1.8224101479915434e-05, + "loss": 0.0303, + "step": 12032 + }, + { + "epoch": 12.72, + "learning_rate": 1.8218816067653277e-05, + "loss": 0.0222, + "step": 12034 + }, + { + "epoch": 12.72, + "learning_rate": 1.821353065539112e-05, + "loss": 0.0577, + "step": 12036 + }, + { + "epoch": 12.73, + "learning_rate": 1.8208245243128965e-05, + "loss": 0.0462, + "step": 12038 + }, + { + "epoch": 12.73, + "learning_rate": 1.8202959830866808e-05, + "loss": 0.0222, + "step": 12040 + }, + { + "epoch": 12.73, + "learning_rate": 1.819767441860465e-05, + "loss": 0.0696, + "step": 12042 + }, + { + "epoch": 12.73, + "learning_rate": 1.8192389006342493e-05, + "loss": 0.0139, + "step": 12044 + }, + { + "epoch": 12.73, + "learning_rate": 1.818710359408034e-05, + "loss": 0.0103, + "step": 12046 + }, + { + "epoch": 12.74, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.0333, + "step": 12048 + }, + { + "epoch": 12.74, + "learning_rate": 1.8176532769556025e-05, + "loss": 0.0181, + "step": 12050 + }, + { + "epoch": 12.74, + "learning_rate": 1.8171247357293867e-05, + "loss": 0.0193, + "step": 12052 + }, + { + "epoch": 12.74, + "learning_rate": 1.8165961945031713e-05, + "loss": 0.051, + "step": 12054 + }, + { + "epoch": 12.74, + "learning_rate": 1.8160676532769556e-05, + "loss": 0.0071, + "step": 12056 + }, + { + "epoch": 12.75, + "learning_rate": 1.8155391120507402e-05, + "loss": 0.0019, + "step": 12058 + }, + { + "epoch": 12.75, + "learning_rate": 1.8150105708245245e-05, + "loss": 0.0435, + "step": 12060 + }, + { + "epoch": 12.75, + "learning_rate": 1.8144820295983088e-05, + "loss": 0.0204, + "step": 12062 + }, + { + "epoch": 12.75, + "learning_rate": 1.8139534883720934e-05, + "loss": 0.0172, + "step": 12064 + }, + { + "epoch": 12.75, + "learning_rate": 1.8134249471458776e-05, + "loss": 0.0118, + "step": 12066 + }, + { + "epoch": 12.76, + "learning_rate": 1.812896405919662e-05, + "loss": 0.0241, + "step": 12068 + }, + { + "epoch": 12.76, + "learning_rate": 1.812367864693446e-05, + "loss": 0.0647, + "step": 12070 + }, + { + "epoch": 12.76, + "learning_rate": 1.8118393234672304e-05, + "loss": 0.0233, + "step": 12072 + }, + { + "epoch": 12.76, + "learning_rate": 1.811310782241015e-05, + "loss": 0.0086, + "step": 12074 + }, + { + "epoch": 12.77, + "learning_rate": 1.8107822410147993e-05, + "loss": 0.0489, + "step": 12076 + }, + { + "epoch": 12.77, + "learning_rate": 1.8102536997885836e-05, + "loss": 0.0581, + "step": 12078 + }, + { + "epoch": 12.77, + "learning_rate": 1.809725158562368e-05, + "loss": 0.0231, + "step": 12080 + }, + { + "epoch": 12.77, + "learning_rate": 1.8091966173361524e-05, + "loss": 0.041, + "step": 12082 + }, + { + "epoch": 12.77, + "learning_rate": 1.8086680761099367e-05, + "loss": 0.034, + "step": 12084 + }, + { + "epoch": 12.78, + "learning_rate": 1.808139534883721e-05, + "loss": 0.0653, + "step": 12086 + }, + { + "epoch": 12.78, + "learning_rate": 1.8076109936575052e-05, + "loss": 0.0455, + "step": 12088 + }, + { + "epoch": 12.78, + "learning_rate": 1.8070824524312895e-05, + "loss": 0.0227, + "step": 12090 + }, + { + "epoch": 12.78, + "learning_rate": 1.806553911205074e-05, + "loss": 0.1526, + "step": 12092 + }, + { + "epoch": 12.78, + "learning_rate": 1.8060253699788584e-05, + "loss": 0.0399, + "step": 12094 + }, + { + "epoch": 12.79, + "learning_rate": 1.8054968287526427e-05, + "loss": 0.0096, + "step": 12096 + }, + { + "epoch": 12.79, + "learning_rate": 1.804968287526427e-05, + "loss": 0.081, + "step": 12098 + }, + { + "epoch": 12.79, + "learning_rate": 1.8044397463002115e-05, + "loss": 0.0674, + "step": 12100 + }, + { + "epoch": 12.79, + "learning_rate": 1.8039112050739958e-05, + "loss": 0.0201, + "step": 12102 + }, + { + "epoch": 12.79, + "learning_rate": 1.80338266384778e-05, + "loss": 0.0162, + "step": 12104 + }, + { + "epoch": 12.8, + "learning_rate": 1.8028541226215647e-05, + "loss": 0.1635, + "step": 12106 + }, + { + "epoch": 12.8, + "learning_rate": 1.802325581395349e-05, + "loss": 0.0519, + "step": 12108 + }, + { + "epoch": 12.8, + "learning_rate": 1.8017970401691332e-05, + "loss": 0.0668, + "step": 12110 + }, + { + "epoch": 12.8, + "learning_rate": 1.8012684989429178e-05, + "loss": 0.0052, + "step": 12112 + }, + { + "epoch": 12.81, + "learning_rate": 1.800739957716702e-05, + "loss": 0.0597, + "step": 12114 + }, + { + "epoch": 12.81, + "learning_rate": 1.8002114164904863e-05, + "loss": 0.0481, + "step": 12116 + }, + { + "epoch": 12.81, + "learning_rate": 1.799682875264271e-05, + "loss": 0.0407, + "step": 12118 + }, + { + "epoch": 12.81, + "learning_rate": 1.7991543340380552e-05, + "loss": 0.0751, + "step": 12120 + }, + { + "epoch": 12.81, + "learning_rate": 1.7986257928118395e-05, + "loss": 0.0375, + "step": 12122 + }, + { + "epoch": 12.82, + "learning_rate": 1.7980972515856237e-05, + "loss": 0.0605, + "step": 12124 + }, + { + "epoch": 12.82, + "learning_rate": 1.797568710359408e-05, + "loss": 0.0272, + "step": 12126 + }, + { + "epoch": 12.82, + "learning_rate": 1.7970401691331926e-05, + "loss": 0.0457, + "step": 12128 + }, + { + "epoch": 12.82, + "learning_rate": 1.796511627906977e-05, + "loss": 0.0282, + "step": 12130 + }, + { + "epoch": 12.82, + "learning_rate": 1.795983086680761e-05, + "loss": 0.0617, + "step": 12132 + }, + { + "epoch": 12.83, + "learning_rate": 1.7954545454545454e-05, + "loss": 0.0221, + "step": 12134 + }, + { + "epoch": 12.83, + "learning_rate": 1.79492600422833e-05, + "loss": 0.0818, + "step": 12136 + }, + { + "epoch": 12.83, + "learning_rate": 1.7943974630021143e-05, + "loss": 0.0797, + "step": 12138 + }, + { + "epoch": 12.83, + "learning_rate": 1.7938689217758986e-05, + "loss": 0.0649, + "step": 12140 + }, + { + "epoch": 12.84, + "learning_rate": 1.7933403805496828e-05, + "loss": 0.0281, + "step": 12142 + }, + { + "epoch": 12.84, + "learning_rate": 1.792811839323467e-05, + "loss": 0.0125, + "step": 12144 + }, + { + "epoch": 12.84, + "learning_rate": 1.7922832980972517e-05, + "loss": 0.0172, + "step": 12146 + }, + { + "epoch": 12.84, + "learning_rate": 1.791754756871036e-05, + "loss": 0.023, + "step": 12148 + }, + { + "epoch": 12.84, + "learning_rate": 1.7912262156448202e-05, + "loss": 0.0441, + "step": 12150 + }, + { + "epoch": 12.85, + "learning_rate": 1.7906976744186045e-05, + "loss": 0.0704, + "step": 12152 + }, + { + "epoch": 12.85, + "learning_rate": 1.790169133192389e-05, + "loss": 0.1245, + "step": 12154 + }, + { + "epoch": 12.85, + "learning_rate": 1.7896405919661734e-05, + "loss": 0.0597, + "step": 12156 + }, + { + "epoch": 12.85, + "learning_rate": 1.7891120507399576e-05, + "loss": 0.07, + "step": 12158 + }, + { + "epoch": 12.85, + "learning_rate": 1.7885835095137422e-05, + "loss": 0.069, + "step": 12160 + }, + { + "epoch": 12.86, + "learning_rate": 1.7880549682875265e-05, + "loss": 0.0785, + "step": 12162 + }, + { + "epoch": 12.86, + "learning_rate": 1.787526427061311e-05, + "loss": 0.0217, + "step": 12164 + }, + { + "epoch": 12.86, + "learning_rate": 1.7869978858350954e-05, + "loss": 0.0645, + "step": 12166 + }, + { + "epoch": 12.86, + "learning_rate": 1.7864693446088797e-05, + "loss": 0.0546, + "step": 12168 + }, + { + "epoch": 12.86, + "learning_rate": 1.785940803382664e-05, + "loss": 0.0539, + "step": 12170 + }, + { + "epoch": 12.87, + "learning_rate": 1.7854122621564485e-05, + "loss": 0.1025, + "step": 12172 + }, + { + "epoch": 12.87, + "learning_rate": 1.7848837209302328e-05, + "loss": 0.055, + "step": 12174 + }, + { + "epoch": 12.87, + "learning_rate": 1.784355179704017e-05, + "loss": 0.0331, + "step": 12176 + }, + { + "epoch": 12.87, + "learning_rate": 1.7838266384778013e-05, + "loss": 0.0556, + "step": 12178 + }, + { + "epoch": 12.88, + "learning_rate": 1.7832980972515856e-05, + "loss": 0.0582, + "step": 12180 + }, + { + "epoch": 12.88, + "learning_rate": 1.7827695560253702e-05, + "loss": 0.072, + "step": 12182 + }, + { + "epoch": 12.88, + "learning_rate": 1.7822410147991545e-05, + "loss": 0.0276, + "step": 12184 + }, + { + "epoch": 12.88, + "learning_rate": 1.7817124735729387e-05, + "loss": 0.0059, + "step": 12186 + }, + { + "epoch": 12.88, + "learning_rate": 1.781183932346723e-05, + "loss": 0.0242, + "step": 12188 + }, + { + "epoch": 12.89, + "learning_rate": 1.7806553911205076e-05, + "loss": 0.0488, + "step": 12190 + }, + { + "epoch": 12.89, + "learning_rate": 1.780126849894292e-05, + "loss": 0.0418, + "step": 12192 + }, + { + "epoch": 12.89, + "learning_rate": 1.779598308668076e-05, + "loss": 0.0164, + "step": 12194 + }, + { + "epoch": 12.89, + "learning_rate": 1.7790697674418604e-05, + "loss": 0.0877, + "step": 12196 + }, + { + "epoch": 12.89, + "learning_rate": 1.7785412262156447e-05, + "loss": 0.1477, + "step": 12198 + }, + { + "epoch": 12.9, + "learning_rate": 1.7780126849894293e-05, + "loss": 0.0141, + "step": 12200 + }, + { + "epoch": 12.9, + "learning_rate": 1.7774841437632136e-05, + "loss": 0.0179, + "step": 12202 + }, + { + "epoch": 12.9, + "learning_rate": 1.7769556025369978e-05, + "loss": 0.0325, + "step": 12204 + }, + { + "epoch": 12.9, + "learning_rate": 1.776427061310782e-05, + "loss": 0.0368, + "step": 12206 + }, + { + "epoch": 12.9, + "learning_rate": 1.7758985200845667e-05, + "loss": 0.0178, + "step": 12208 + }, + { + "epoch": 12.91, + "learning_rate": 1.775369978858351e-05, + "loss": 0.038, + "step": 12210 + }, + { + "epoch": 12.91, + "learning_rate": 1.7748414376321352e-05, + "loss": 0.011, + "step": 12212 + }, + { + "epoch": 12.91, + "learning_rate": 1.77431289640592e-05, + "loss": 0.0098, + "step": 12214 + }, + { + "epoch": 12.91, + "learning_rate": 1.773784355179704e-05, + "loss": 0.0482, + "step": 12216 + }, + { + "epoch": 12.92, + "learning_rate": 1.7732558139534887e-05, + "loss": 0.0697, + "step": 12218 + }, + { + "epoch": 12.92, + "learning_rate": 1.772727272727273e-05, + "loss": 0.0124, + "step": 12220 + }, + { + "epoch": 12.92, + "learning_rate": 1.7721987315010572e-05, + "loss": 0.1065, + "step": 12222 + }, + { + "epoch": 12.92, + "learning_rate": 1.7716701902748415e-05, + "loss": 0.1214, + "step": 12224 + }, + { + "epoch": 12.92, + "learning_rate": 1.771141649048626e-05, + "loss": 0.0112, + "step": 12226 + }, + { + "epoch": 12.93, + "learning_rate": 1.7706131078224104e-05, + "loss": 0.0301, + "step": 12228 + }, + { + "epoch": 12.93, + "learning_rate": 1.7700845665961946e-05, + "loss": 0.1167, + "step": 12230 + }, + { + "epoch": 12.93, + "learning_rate": 1.769556025369979e-05, + "loss": 0.0479, + "step": 12232 + }, + { + "epoch": 12.93, + "learning_rate": 1.7690274841437632e-05, + "loss": 0.0196, + "step": 12234 + }, + { + "epoch": 12.93, + "learning_rate": 1.7684989429175478e-05, + "loss": 0.0172, + "step": 12236 + }, + { + "epoch": 12.94, + "learning_rate": 1.767970401691332e-05, + "loss": 0.0264, + "step": 12238 + }, + { + "epoch": 12.94, + "learning_rate": 1.7674418604651163e-05, + "loss": 0.121, + "step": 12240 + }, + { + "epoch": 12.94, + "learning_rate": 1.7669133192389006e-05, + "loss": 0.0175, + "step": 12242 + }, + { + "epoch": 12.94, + "learning_rate": 1.7663847780126852e-05, + "loss": 0.0459, + "step": 12244 + }, + { + "epoch": 12.95, + "learning_rate": 1.7658562367864695e-05, + "loss": 0.0183, + "step": 12246 + }, + { + "epoch": 12.95, + "learning_rate": 1.7653276955602537e-05, + "loss": 0.0601, + "step": 12248 + }, + { + "epoch": 12.95, + "learning_rate": 1.764799154334038e-05, + "loss": 0.0104, + "step": 12250 + }, + { + "epoch": 12.95, + "learning_rate": 1.7642706131078223e-05, + "loss": 0.0101, + "step": 12252 + }, + { + "epoch": 12.95, + "learning_rate": 1.763742071881607e-05, + "loss": 0.0273, + "step": 12254 + }, + { + "epoch": 12.96, + "learning_rate": 1.763213530655391e-05, + "loss": 0.0343, + "step": 12256 + }, + { + "epoch": 12.96, + "learning_rate": 1.7626849894291754e-05, + "loss": 0.0275, + "step": 12258 + }, + { + "epoch": 12.96, + "learning_rate": 1.7621564482029597e-05, + "loss": 0.0227, + "step": 12260 + }, + { + "epoch": 12.96, + "learning_rate": 1.7616279069767443e-05, + "loss": 0.0143, + "step": 12262 + }, + { + "epoch": 12.96, + "learning_rate": 1.7610993657505285e-05, + "loss": 0.0261, + "step": 12264 + }, + { + "epoch": 12.97, + "learning_rate": 1.760570824524313e-05, + "loss": 0.0837, + "step": 12266 + }, + { + "epoch": 12.97, + "learning_rate": 1.7600422832980974e-05, + "loss": 0.052, + "step": 12268 + }, + { + "epoch": 12.97, + "learning_rate": 1.7595137420718817e-05, + "loss": 0.0346, + "step": 12270 + }, + { + "epoch": 12.97, + "learning_rate": 1.7589852008456663e-05, + "loss": 0.0276, + "step": 12272 + }, + { + "epoch": 12.97, + "learning_rate": 1.7584566596194506e-05, + "loss": 0.0328, + "step": 12274 + }, + { + "epoch": 12.98, + "learning_rate": 1.7579281183932348e-05, + "loss": 0.0346, + "step": 12276 + }, + { + "epoch": 12.98, + "learning_rate": 1.757399577167019e-05, + "loss": 0.0911, + "step": 12278 + }, + { + "epoch": 12.98, + "learning_rate": 1.7568710359408037e-05, + "loss": 0.0286, + "step": 12280 + }, + { + "epoch": 12.98, + "learning_rate": 1.756342494714588e-05, + "loss": 0.0165, + "step": 12282 + }, + { + "epoch": 12.99, + "learning_rate": 1.7558139534883722e-05, + "loss": 0.0112, + "step": 12284 + }, + { + "epoch": 12.99, + "learning_rate": 1.7552854122621565e-05, + "loss": 0.0442, + "step": 12286 + }, + { + "epoch": 12.99, + "learning_rate": 1.7547568710359408e-05, + "loss": 0.0113, + "step": 12288 + }, + { + "epoch": 12.99, + "learning_rate": 1.7542283298097254e-05, + "loss": 0.0397, + "step": 12290 + }, + { + "epoch": 12.99, + "learning_rate": 1.7536997885835096e-05, + "loss": 0.0113, + "step": 12292 + }, + { + "epoch": 13.0, + "learning_rate": 1.753171247357294e-05, + "loss": 0.0359, + "step": 12294 + }, + { + "epoch": 13.0, + "learning_rate": 1.7526427061310782e-05, + "loss": 0.0269, + "step": 12296 + }, + { + "epoch": 13.0, + "learning_rate": 1.7521141649048628e-05, + "loss": 0.0205, + "step": 12298 + }, + { + "epoch": 13.0, + "learning_rate": 1.751585623678647e-05, + "loss": 0.006, + "step": 12300 + }, + { + "epoch": 13.0, + "learning_rate": 1.7510570824524313e-05, + "loss": 0.0539, + "step": 12302 + }, + { + "epoch": 13.01, + "learning_rate": 1.7505285412262156e-05, + "loss": 0.0533, + "step": 12304 + }, + { + "epoch": 13.01, + "learning_rate": 1.75e-05, + "loss": 0.067, + "step": 12306 + }, + { + "epoch": 13.01, + "learning_rate": 1.7494714587737845e-05, + "loss": 0.0075, + "step": 12308 + }, + { + "epoch": 13.01, + "learning_rate": 1.7489429175475687e-05, + "loss": 0.0233, + "step": 12310 + }, + { + "epoch": 13.01, + "learning_rate": 1.748414376321353e-05, + "loss": 0.0528, + "step": 12312 + }, + { + "epoch": 13.02, + "learning_rate": 1.7478858350951376e-05, + "loss": 0.0479, + "step": 12314 + }, + { + "epoch": 13.02, + "learning_rate": 1.747357293868922e-05, + "loss": 0.1347, + "step": 12316 + }, + { + "epoch": 13.02, + "learning_rate": 1.746828752642706e-05, + "loss": 0.0294, + "step": 12318 + }, + { + "epoch": 13.02, + "learning_rate": 1.7463002114164907e-05, + "loss": 0.0273, + "step": 12320 + }, + { + "epoch": 13.03, + "learning_rate": 1.745771670190275e-05, + "loss": 0.0432, + "step": 12322 + }, + { + "epoch": 13.03, + "learning_rate": 1.7452431289640593e-05, + "loss": 0.106, + "step": 12324 + }, + { + "epoch": 13.03, + "learning_rate": 1.744714587737844e-05, + "loss": 0.0854, + "step": 12326 + }, + { + "epoch": 13.03, + "learning_rate": 1.744186046511628e-05, + "loss": 0.0735, + "step": 12328 + }, + { + "epoch": 13.03, + "learning_rate": 1.7436575052854124e-05, + "loss": 0.0072, + "step": 12330 + }, + { + "epoch": 13.04, + "learning_rate": 1.7431289640591967e-05, + "loss": 0.0377, + "step": 12332 + }, + { + "epoch": 13.04, + "learning_rate": 1.7426004228329813e-05, + "loss": 0.0226, + "step": 12334 + }, + { + "epoch": 13.04, + "learning_rate": 1.7420718816067655e-05, + "loss": 0.0568, + "step": 12336 + }, + { + "epoch": 13.04, + "learning_rate": 1.7415433403805498e-05, + "loss": 0.0474, + "step": 12338 + }, + { + "epoch": 13.04, + "learning_rate": 1.741014799154334e-05, + "loss": 0.0052, + "step": 12340 + }, + { + "epoch": 13.05, + "learning_rate": 1.7404862579281183e-05, + "loss": 0.0388, + "step": 12342 + }, + { + "epoch": 13.05, + "learning_rate": 1.739957716701903e-05, + "loss": 0.0411, + "step": 12344 + }, + { + "epoch": 13.05, + "learning_rate": 1.7394291754756872e-05, + "loss": 0.0087, + "step": 12346 + }, + { + "epoch": 13.05, + "learning_rate": 1.7389006342494715e-05, + "loss": 0.0273, + "step": 12348 + }, + { + "epoch": 13.05, + "learning_rate": 1.7383720930232558e-05, + "loss": 0.0399, + "step": 12350 + }, + { + "epoch": 13.06, + "learning_rate": 1.73784355179704e-05, + "loss": 0.003, + "step": 12352 + }, + { + "epoch": 13.06, + "learning_rate": 1.7373150105708246e-05, + "loss": 0.0108, + "step": 12354 + }, + { + "epoch": 13.06, + "learning_rate": 1.736786469344609e-05, + "loss": 0.0944, + "step": 12356 + }, + { + "epoch": 13.06, + "learning_rate": 1.736257928118393e-05, + "loss": 0.088, + "step": 12358 + }, + { + "epoch": 13.07, + "learning_rate": 1.7357293868921774e-05, + "loss": 0.0058, + "step": 12360 + }, + { + "epoch": 13.07, + "learning_rate": 1.735200845665962e-05, + "loss": 0.0372, + "step": 12362 + }, + { + "epoch": 13.07, + "learning_rate": 1.7346723044397463e-05, + "loss": 0.0398, + "step": 12364 + }, + { + "epoch": 13.07, + "learning_rate": 1.7341437632135306e-05, + "loss": 0.0178, + "step": 12366 + }, + { + "epoch": 13.07, + "learning_rate": 1.7336152219873152e-05, + "loss": 0.0135, + "step": 12368 + }, + { + "epoch": 13.08, + "learning_rate": 1.7330866807610994e-05, + "loss": 0.0077, + "step": 12370 + }, + { + "epoch": 13.08, + "learning_rate": 1.732558139534884e-05, + "loss": 0.016, + "step": 12372 + }, + { + "epoch": 13.08, + "learning_rate": 1.7320295983086683e-05, + "loss": 0.1078, + "step": 12374 + }, + { + "epoch": 13.08, + "learning_rate": 1.7315010570824526e-05, + "loss": 0.0113, + "step": 12376 + }, + { + "epoch": 13.08, + "learning_rate": 1.730972515856237e-05, + "loss": 0.032, + "step": 12378 + }, + { + "epoch": 13.09, + "learning_rate": 1.7304439746300215e-05, + "loss": 0.0365, + "step": 12380 + }, + { + "epoch": 13.09, + "learning_rate": 1.7299154334038057e-05, + "loss": 0.0738, + "step": 12382 + }, + { + "epoch": 13.09, + "learning_rate": 1.72938689217759e-05, + "loss": 0.0072, + "step": 12384 + }, + { + "epoch": 13.09, + "learning_rate": 1.7288583509513743e-05, + "loss": 0.0046, + "step": 12386 + }, + { + "epoch": 13.1, + "learning_rate": 1.7283298097251585e-05, + "loss": 0.0382, + "step": 12388 + }, + { + "epoch": 13.1, + "learning_rate": 1.727801268498943e-05, + "loss": 0.0071, + "step": 12390 + }, + { + "epoch": 13.1, + "learning_rate": 1.7272727272727274e-05, + "loss": 0.005, + "step": 12392 + }, + { + "epoch": 13.1, + "learning_rate": 1.7267441860465117e-05, + "loss": 0.1507, + "step": 12394 + }, + { + "epoch": 13.1, + "learning_rate": 1.726215644820296e-05, + "loss": 0.0189, + "step": 12396 + }, + { + "epoch": 13.11, + "learning_rate": 1.7256871035940805e-05, + "loss": 0.0148, + "step": 12398 + }, + { + "epoch": 13.11, + "learning_rate": 1.7251585623678648e-05, + "loss": 0.0264, + "step": 12400 + }, + { + "epoch": 13.11, + "learning_rate": 1.724630021141649e-05, + "loss": 0.0133, + "step": 12402 + }, + { + "epoch": 13.11, + "learning_rate": 1.7241014799154333e-05, + "loss": 0.0567, + "step": 12404 + }, + { + "epoch": 13.11, + "learning_rate": 1.7235729386892176e-05, + "loss": 0.0368, + "step": 12406 + }, + { + "epoch": 13.12, + "learning_rate": 1.7230443974630022e-05, + "loss": 0.0566, + "step": 12408 + }, + { + "epoch": 13.12, + "learning_rate": 1.7225158562367865e-05, + "loss": 0.0335, + "step": 12410 + }, + { + "epoch": 13.12, + "learning_rate": 1.7219873150105707e-05, + "loss": 0.0466, + "step": 12412 + }, + { + "epoch": 13.12, + "learning_rate": 1.721458773784355e-05, + "loss": 0.0176, + "step": 12414 + }, + { + "epoch": 13.12, + "learning_rate": 1.7209302325581396e-05, + "loss": 0.0085, + "step": 12416 + }, + { + "epoch": 13.13, + "learning_rate": 1.720401691331924e-05, + "loss": 0.0475, + "step": 12418 + }, + { + "epoch": 13.13, + "learning_rate": 1.719873150105708e-05, + "loss": 0.0202, + "step": 12420 + }, + { + "epoch": 13.13, + "learning_rate": 1.7193446088794928e-05, + "loss": 0.0277, + "step": 12422 + }, + { + "epoch": 13.13, + "learning_rate": 1.718816067653277e-05, + "loss": 0.0702, + "step": 12424 + }, + { + "epoch": 13.14, + "learning_rate": 1.7182875264270616e-05, + "loss": 0.0043, + "step": 12426 + }, + { + "epoch": 13.14, + "learning_rate": 1.717758985200846e-05, + "loss": 0.0044, + "step": 12428 + }, + { + "epoch": 13.14, + "learning_rate": 1.71723044397463e-05, + "loss": 0.0139, + "step": 12430 + }, + { + "epoch": 13.14, + "learning_rate": 1.7167019027484144e-05, + "loss": 0.0196, + "step": 12432 + }, + { + "epoch": 13.14, + "learning_rate": 1.716173361522199e-05, + "loss": 0.0219, + "step": 12434 + }, + { + "epoch": 13.15, + "learning_rate": 1.7156448202959833e-05, + "loss": 0.007, + "step": 12436 + }, + { + "epoch": 13.15, + "learning_rate": 1.7151162790697676e-05, + "loss": 0.063, + "step": 12438 + }, + { + "epoch": 13.15, + "learning_rate": 1.714587737843552e-05, + "loss": 0.0267, + "step": 12440 + }, + { + "epoch": 13.15, + "learning_rate": 1.714059196617336e-05, + "loss": 0.0312, + "step": 12442 + }, + { + "epoch": 13.15, + "learning_rate": 1.7135306553911207e-05, + "loss": 0.0269, + "step": 12444 + }, + { + "epoch": 13.16, + "learning_rate": 1.713002114164905e-05, + "loss": 0.0248, + "step": 12446 + }, + { + "epoch": 13.16, + "learning_rate": 1.7124735729386892e-05, + "loss": 0.0452, + "step": 12448 + }, + { + "epoch": 13.16, + "learning_rate": 1.7119450317124735e-05, + "loss": 0.0167, + "step": 12450 + }, + { + "epoch": 13.16, + "learning_rate": 1.711416490486258e-05, + "loss": 0.0416, + "step": 12452 + }, + { + "epoch": 13.16, + "learning_rate": 1.7108879492600424e-05, + "loss": 0.017, + "step": 12454 + }, + { + "epoch": 13.17, + "learning_rate": 1.7103594080338267e-05, + "loss": 0.0101, + "step": 12456 + }, + { + "epoch": 13.17, + "learning_rate": 1.709830866807611e-05, + "loss": 0.0318, + "step": 12458 + }, + { + "epoch": 13.17, + "learning_rate": 1.7093023255813952e-05, + "loss": 0.0352, + "step": 12460 + }, + { + "epoch": 13.17, + "learning_rate": 1.7087737843551798e-05, + "loss": 0.0109, + "step": 12462 + }, + { + "epoch": 13.18, + "learning_rate": 1.708245243128964e-05, + "loss": 0.0253, + "step": 12464 + }, + { + "epoch": 13.18, + "learning_rate": 1.7077167019027483e-05, + "loss": 0.0144, + "step": 12466 + }, + { + "epoch": 13.18, + "learning_rate": 1.7071881606765326e-05, + "loss": 0.0045, + "step": 12468 + }, + { + "epoch": 13.18, + "learning_rate": 1.7066596194503172e-05, + "loss": 0.0683, + "step": 12470 + }, + { + "epoch": 13.18, + "learning_rate": 1.7061310782241015e-05, + "loss": 0.0128, + "step": 12472 + }, + { + "epoch": 13.19, + "learning_rate": 1.705602536997886e-05, + "loss": 0.0172, + "step": 12474 + }, + { + "epoch": 13.19, + "learning_rate": 1.7050739957716703e-05, + "loss": 0.0041, + "step": 12476 + }, + { + "epoch": 13.19, + "learning_rate": 1.7045454545454546e-05, + "loss": 0.044, + "step": 12478 + }, + { + "epoch": 13.19, + "learning_rate": 1.7040169133192392e-05, + "loss": 0.0309, + "step": 12480 + }, + { + "epoch": 13.19, + "learning_rate": 1.7034883720930235e-05, + "loss": 0.0039, + "step": 12482 + }, + { + "epoch": 13.2, + "learning_rate": 1.7029598308668078e-05, + "loss": 0.0094, + "step": 12484 + }, + { + "epoch": 13.2, + "learning_rate": 1.702431289640592e-05, + "loss": 0.0042, + "step": 12486 + }, + { + "epoch": 13.2, + "learning_rate": 1.7019027484143766e-05, + "loss": 0.0219, + "step": 12488 + }, + { + "epoch": 13.2, + "learning_rate": 1.701374207188161e-05, + "loss": 0.0145, + "step": 12490 + }, + { + "epoch": 13.21, + "learning_rate": 1.700845665961945e-05, + "loss": 0.0336, + "step": 12492 + }, + { + "epoch": 13.21, + "learning_rate": 1.7003171247357294e-05, + "loss": 0.0401, + "step": 12494 + }, + { + "epoch": 13.21, + "learning_rate": 1.6997885835095137e-05, + "loss": 0.0391, + "step": 12496 + }, + { + "epoch": 13.21, + "learning_rate": 1.6992600422832983e-05, + "loss": 0.038, + "step": 12498 + }, + { + "epoch": 13.21, + "learning_rate": 1.6987315010570826e-05, + "loss": 0.0196, + "step": 12500 + }, + { + "epoch": 13.21, + "eval_cer": 0.06223995440296381, + "eval_loss": 0.755074143409729, + "eval_runtime": 124.4091, + "eval_samples_per_second": 6.76, + "eval_steps_per_second": 0.852, + "step": 12500 + }, + { + "epoch": 13.22, + "learning_rate": 1.698202959830867e-05, + "loss": 0.007, + "step": 12502 + }, + { + "epoch": 13.22, + "learning_rate": 1.697674418604651e-05, + "loss": 0.025, + "step": 12504 + }, + { + "epoch": 13.22, + "learning_rate": 1.6971458773784357e-05, + "loss": 0.029, + "step": 12506 + }, + { + "epoch": 13.22, + "learning_rate": 1.69661733615222e-05, + "loss": 0.0127, + "step": 12508 + }, + { + "epoch": 13.22, + "learning_rate": 1.6960887949260042e-05, + "loss": 0.0133, + "step": 12510 + }, + { + "epoch": 13.23, + "learning_rate": 1.6955602536997885e-05, + "loss": 0.0413, + "step": 12512 + }, + { + "epoch": 13.23, + "learning_rate": 1.6950317124735728e-05, + "loss": 0.0103, + "step": 12514 + }, + { + "epoch": 13.23, + "learning_rate": 1.6945031712473574e-05, + "loss": 0.0451, + "step": 12516 + }, + { + "epoch": 13.23, + "learning_rate": 1.6939746300211416e-05, + "loss": 0.0842, + "step": 12518 + }, + { + "epoch": 13.23, + "learning_rate": 1.693446088794926e-05, + "loss": 0.0592, + "step": 12520 + }, + { + "epoch": 13.24, + "learning_rate": 1.6929175475687102e-05, + "loss": 0.0405, + "step": 12522 + }, + { + "epoch": 13.24, + "learning_rate": 1.6923890063424948e-05, + "loss": 0.0995, + "step": 12524 + }, + { + "epoch": 13.24, + "learning_rate": 1.691860465116279e-05, + "loss": 0.0079, + "step": 12526 + }, + { + "epoch": 13.24, + "learning_rate": 1.6913319238900637e-05, + "loss": 0.0506, + "step": 12528 + }, + { + "epoch": 13.25, + "learning_rate": 1.690803382663848e-05, + "loss": 0.0021, + "step": 12530 + }, + { + "epoch": 13.25, + "learning_rate": 1.6902748414376322e-05, + "loss": 0.0799, + "step": 12532 + }, + { + "epoch": 13.25, + "learning_rate": 1.6897463002114168e-05, + "loss": 0.0053, + "step": 12534 + }, + { + "epoch": 13.25, + "learning_rate": 1.689217758985201e-05, + "loss": 0.0199, + "step": 12536 + }, + { + "epoch": 13.25, + "learning_rate": 1.6886892177589853e-05, + "loss": 0.0066, + "step": 12538 + }, + { + "epoch": 13.26, + "learning_rate": 1.6881606765327696e-05, + "loss": 0.015, + "step": 12540 + }, + { + "epoch": 13.26, + "learning_rate": 1.6876321353065542e-05, + "loss": 0.0059, + "step": 12542 + }, + { + "epoch": 13.26, + "learning_rate": 1.6871035940803385e-05, + "loss": 0.0768, + "step": 12544 + }, + { + "epoch": 13.26, + "learning_rate": 1.6865750528541227e-05, + "loss": 0.0812, + "step": 12546 + }, + { + "epoch": 13.26, + "learning_rate": 1.686046511627907e-05, + "loss": 0.0314, + "step": 12548 + }, + { + "epoch": 13.27, + "learning_rate": 1.6855179704016913e-05, + "loss": 0.0307, + "step": 12550 + }, + { + "epoch": 13.27, + "learning_rate": 1.684989429175476e-05, + "loss": 0.0369, + "step": 12552 + }, + { + "epoch": 13.27, + "learning_rate": 1.68446088794926e-05, + "loss": 0.0515, + "step": 12554 + }, + { + "epoch": 13.27, + "learning_rate": 1.6839323467230444e-05, + "loss": 0.0093, + "step": 12556 + }, + { + "epoch": 13.27, + "learning_rate": 1.6834038054968287e-05, + "loss": 0.0179, + "step": 12558 + }, + { + "epoch": 13.28, + "learning_rate": 1.6828752642706133e-05, + "loss": 0.0736, + "step": 12560 + }, + { + "epoch": 13.28, + "learning_rate": 1.6823467230443976e-05, + "loss": 0.0317, + "step": 12562 + }, + { + "epoch": 13.28, + "learning_rate": 1.6818181818181818e-05, + "loss": 0.0386, + "step": 12564 + }, + { + "epoch": 13.28, + "learning_rate": 1.681289640591966e-05, + "loss": 0.0064, + "step": 12566 + }, + { + "epoch": 13.29, + "learning_rate": 1.6807610993657504e-05, + "loss": 0.0181, + "step": 12568 + }, + { + "epoch": 13.29, + "learning_rate": 1.680232558139535e-05, + "loss": 0.0671, + "step": 12570 + }, + { + "epoch": 13.29, + "learning_rate": 1.6797040169133192e-05, + "loss": 0.0633, + "step": 12572 + }, + { + "epoch": 13.29, + "learning_rate": 1.6791754756871035e-05, + "loss": 0.0319, + "step": 12574 + }, + { + "epoch": 13.29, + "learning_rate": 1.678646934460888e-05, + "loss": 0.0428, + "step": 12576 + }, + { + "epoch": 13.3, + "learning_rate": 1.6781183932346724e-05, + "loss": 0.0128, + "step": 12578 + }, + { + "epoch": 13.3, + "learning_rate": 1.6775898520084566e-05, + "loss": 0.0144, + "step": 12580 + }, + { + "epoch": 13.3, + "learning_rate": 1.6770613107822412e-05, + "loss": 0.0642, + "step": 12582 + }, + { + "epoch": 13.3, + "learning_rate": 1.6765327695560255e-05, + "loss": 0.0288, + "step": 12584 + }, + { + "epoch": 13.3, + "learning_rate": 1.6760042283298098e-05, + "loss": 0.0676, + "step": 12586 + }, + { + "epoch": 13.31, + "learning_rate": 1.6754756871035944e-05, + "loss": 0.0565, + "step": 12588 + }, + { + "epoch": 13.31, + "learning_rate": 1.6749471458773787e-05, + "loss": 0.0209, + "step": 12590 + }, + { + "epoch": 13.31, + "learning_rate": 1.674418604651163e-05, + "loss": 0.0521, + "step": 12592 + }, + { + "epoch": 13.31, + "learning_rate": 1.6738900634249472e-05, + "loss": 0.0486, + "step": 12594 + }, + { + "epoch": 13.32, + "learning_rate": 1.6733615221987318e-05, + "loss": 0.0414, + "step": 12596 + }, + { + "epoch": 13.32, + "learning_rate": 1.672832980972516e-05, + "loss": 0.1086, + "step": 12598 + }, + { + "epoch": 13.32, + "learning_rate": 1.6723044397463003e-05, + "loss": 0.0127, + "step": 12600 + }, + { + "epoch": 13.32, + "learning_rate": 1.6717758985200846e-05, + "loss": 0.0137, + "step": 12602 + }, + { + "epoch": 13.32, + "learning_rate": 1.671247357293869e-05, + "loss": 0.0251, + "step": 12604 + }, + { + "epoch": 13.33, + "learning_rate": 1.6707188160676535e-05, + "loss": 0.0726, + "step": 12606 + }, + { + "epoch": 13.33, + "learning_rate": 1.6701902748414377e-05, + "loss": 0.0432, + "step": 12608 + }, + { + "epoch": 13.33, + "learning_rate": 1.669661733615222e-05, + "loss": 0.0551, + "step": 12610 + }, + { + "epoch": 13.33, + "learning_rate": 1.6691331923890063e-05, + "loss": 0.022, + "step": 12612 + }, + { + "epoch": 13.33, + "learning_rate": 1.668604651162791e-05, + "loss": 0.0516, + "step": 12614 + }, + { + "epoch": 13.34, + "learning_rate": 1.668076109936575e-05, + "loss": 0.0044, + "step": 12616 + }, + { + "epoch": 13.34, + "learning_rate": 1.6675475687103594e-05, + "loss": 0.0202, + "step": 12618 + }, + { + "epoch": 13.34, + "learning_rate": 1.6670190274841437e-05, + "loss": 0.0538, + "step": 12620 + }, + { + "epoch": 13.34, + "learning_rate": 1.666490486257928e-05, + "loss": 0.0165, + "step": 12622 + }, + { + "epoch": 13.34, + "learning_rate": 1.6659619450317125e-05, + "loss": 0.0322, + "step": 12624 + }, + { + "epoch": 13.35, + "learning_rate": 1.6654334038054968e-05, + "loss": 0.043, + "step": 12626 + }, + { + "epoch": 13.35, + "learning_rate": 1.664904862579281e-05, + "loss": 0.0144, + "step": 12628 + }, + { + "epoch": 13.35, + "learning_rate": 1.6643763213530657e-05, + "loss": 0.0054, + "step": 12630 + }, + { + "epoch": 13.35, + "learning_rate": 1.66384778012685e-05, + "loss": 0.0574, + "step": 12632 + }, + { + "epoch": 13.36, + "learning_rate": 1.6633192389006346e-05, + "loss": 0.0266, + "step": 12634 + }, + { + "epoch": 13.36, + "learning_rate": 1.6627906976744188e-05, + "loss": 0.0316, + "step": 12636 + }, + { + "epoch": 13.36, + "learning_rate": 1.662262156448203e-05, + "loss": 0.0876, + "step": 12638 + }, + { + "epoch": 13.36, + "learning_rate": 1.6617336152219874e-05, + "loss": 0.0855, + "step": 12640 + }, + { + "epoch": 13.36, + "learning_rate": 1.661205073995772e-05, + "loss": 0.1275, + "step": 12642 + }, + { + "epoch": 13.37, + "learning_rate": 1.6606765327695562e-05, + "loss": 0.0203, + "step": 12644 + }, + { + "epoch": 13.37, + "learning_rate": 1.6601479915433405e-05, + "loss": 0.0323, + "step": 12646 + }, + { + "epoch": 13.37, + "learning_rate": 1.6596194503171248e-05, + "loss": 0.0605, + "step": 12648 + }, + { + "epoch": 13.37, + "learning_rate": 1.6590909090909094e-05, + "loss": 0.0931, + "step": 12650 + }, + { + "epoch": 13.37, + "learning_rate": 1.6585623678646936e-05, + "loss": 0.0625, + "step": 12652 + }, + { + "epoch": 13.38, + "learning_rate": 1.658033826638478e-05, + "loss": 0.1087, + "step": 12654 + }, + { + "epoch": 13.38, + "learning_rate": 1.6575052854122622e-05, + "loss": 0.0316, + "step": 12656 + }, + { + "epoch": 13.38, + "learning_rate": 1.6569767441860464e-05, + "loss": 0.0726, + "step": 12658 + }, + { + "epoch": 13.38, + "learning_rate": 1.656448202959831e-05, + "loss": 0.0508, + "step": 12660 + }, + { + "epoch": 13.38, + "learning_rate": 1.6559196617336153e-05, + "loss": 0.0491, + "step": 12662 + }, + { + "epoch": 13.39, + "learning_rate": 1.6553911205073996e-05, + "loss": 0.0286, + "step": 12664 + }, + { + "epoch": 13.39, + "learning_rate": 1.654862579281184e-05, + "loss": 0.0067, + "step": 12666 + }, + { + "epoch": 13.39, + "learning_rate": 1.6543340380549685e-05, + "loss": 0.0397, + "step": 12668 + }, + { + "epoch": 13.39, + "learning_rate": 1.6538054968287527e-05, + "loss": 0.0147, + "step": 12670 + }, + { + "epoch": 13.4, + "learning_rate": 1.653276955602537e-05, + "loss": 0.0627, + "step": 12672 + }, + { + "epoch": 13.4, + "learning_rate": 1.6527484143763213e-05, + "loss": 0.0232, + "step": 12674 + }, + { + "epoch": 13.4, + "learning_rate": 1.6522198731501055e-05, + "loss": 0.0828, + "step": 12676 + }, + { + "epoch": 13.4, + "learning_rate": 1.65169133192389e-05, + "loss": 0.0694, + "step": 12678 + }, + { + "epoch": 13.4, + "learning_rate": 1.6511627906976744e-05, + "loss": 0.0384, + "step": 12680 + }, + { + "epoch": 13.41, + "learning_rate": 1.650634249471459e-05, + "loss": 0.0524, + "step": 12682 + }, + { + "epoch": 13.41, + "learning_rate": 1.6501057082452433e-05, + "loss": 0.0372, + "step": 12684 + }, + { + "epoch": 13.41, + "learning_rate": 1.6495771670190275e-05, + "loss": 0.0427, + "step": 12686 + }, + { + "epoch": 13.41, + "learning_rate": 1.649048625792812e-05, + "loss": 0.0364, + "step": 12688 + }, + { + "epoch": 13.41, + "learning_rate": 1.6485200845665964e-05, + "loss": 0.0206, + "step": 12690 + }, + { + "epoch": 13.42, + "learning_rate": 1.6479915433403807e-05, + "loss": 0.0788, + "step": 12692 + }, + { + "epoch": 13.42, + "learning_rate": 1.647463002114165e-05, + "loss": 0.0328, + "step": 12694 + }, + { + "epoch": 13.42, + "learning_rate": 1.6469344608879496e-05, + "loss": 0.0387, + "step": 12696 + }, + { + "epoch": 13.42, + "learning_rate": 1.6464059196617338e-05, + "loss": 0.0256, + "step": 12698 + }, + { + "epoch": 13.42, + "learning_rate": 1.645877378435518e-05, + "loss": 0.0057, + "step": 12700 + }, + { + "epoch": 13.43, + "learning_rate": 1.6453488372093024e-05, + "loss": 0.0207, + "step": 12702 + }, + { + "epoch": 13.43, + "learning_rate": 1.6448202959830866e-05, + "loss": 0.026, + "step": 12704 + }, + { + "epoch": 13.43, + "learning_rate": 1.6442917547568712e-05, + "loss": 0.0282, + "step": 12706 + }, + { + "epoch": 13.43, + "learning_rate": 1.6437632135306555e-05, + "loss": 0.0158, + "step": 12708 + }, + { + "epoch": 13.44, + "learning_rate": 1.6432346723044398e-05, + "loss": 0.0053, + "step": 12710 + }, + { + "epoch": 13.44, + "learning_rate": 1.642706131078224e-05, + "loss": 0.0359, + "step": 12712 + }, + { + "epoch": 13.44, + "learning_rate": 1.6421775898520086e-05, + "loss": 0.0409, + "step": 12714 + }, + { + "epoch": 13.44, + "learning_rate": 1.641649048625793e-05, + "loss": 0.0491, + "step": 12716 + }, + { + "epoch": 13.44, + "learning_rate": 1.641120507399577e-05, + "loss": 0.0802, + "step": 12718 + }, + { + "epoch": 13.45, + "learning_rate": 1.6405919661733614e-05, + "loss": 0.014, + "step": 12720 + }, + { + "epoch": 13.45, + "learning_rate": 1.6400634249471457e-05, + "loss": 0.0092, + "step": 12722 + }, + { + "epoch": 13.45, + "learning_rate": 1.6395348837209303e-05, + "loss": 0.017, + "step": 12724 + }, + { + "epoch": 13.45, + "learning_rate": 1.6390063424947146e-05, + "loss": 0.0035, + "step": 12726 + }, + { + "epoch": 13.45, + "learning_rate": 1.638477801268499e-05, + "loss": 0.0399, + "step": 12728 + }, + { + "epoch": 13.46, + "learning_rate": 1.637949260042283e-05, + "loss": 0.047, + "step": 12730 + }, + { + "epoch": 13.46, + "learning_rate": 1.6374207188160677e-05, + "loss": 0.0686, + "step": 12732 + }, + { + "epoch": 13.46, + "learning_rate": 1.636892177589852e-05, + "loss": 0.0192, + "step": 12734 + }, + { + "epoch": 13.46, + "learning_rate": 1.6363636363636366e-05, + "loss": 0.0117, + "step": 12736 + }, + { + "epoch": 13.47, + "learning_rate": 1.635835095137421e-05, + "loss": 0.0389, + "step": 12738 + }, + { + "epoch": 13.47, + "learning_rate": 1.635306553911205e-05, + "loss": 0.013, + "step": 12740 + }, + { + "epoch": 13.47, + "learning_rate": 1.6347780126849897e-05, + "loss": 0.0425, + "step": 12742 + }, + { + "epoch": 13.47, + "learning_rate": 1.634249471458774e-05, + "loss": 0.0402, + "step": 12744 + }, + { + "epoch": 13.47, + "learning_rate": 1.6337209302325583e-05, + "loss": 0.0108, + "step": 12746 + }, + { + "epoch": 13.48, + "learning_rate": 1.6331923890063425e-05, + "loss": 0.0307, + "step": 12748 + }, + { + "epoch": 13.48, + "learning_rate": 1.632663847780127e-05, + "loss": 0.0325, + "step": 12750 + }, + { + "epoch": 13.48, + "learning_rate": 1.6321353065539114e-05, + "loss": 0.0307, + "step": 12752 + }, + { + "epoch": 13.48, + "learning_rate": 1.6316067653276957e-05, + "loss": 0.0683, + "step": 12754 + }, + { + "epoch": 13.48, + "learning_rate": 1.63107822410148e-05, + "loss": 0.1128, + "step": 12756 + }, + { + "epoch": 13.49, + "learning_rate": 1.6305496828752642e-05, + "loss": 0.0099, + "step": 12758 + }, + { + "epoch": 13.49, + "learning_rate": 1.6300211416490488e-05, + "loss": 0.0266, + "step": 12760 + }, + { + "epoch": 13.49, + "learning_rate": 1.629492600422833e-05, + "loss": 0.0508, + "step": 12762 + }, + { + "epoch": 13.49, + "learning_rate": 1.6289640591966173e-05, + "loss": 0.0086, + "step": 12764 + }, + { + "epoch": 13.49, + "learning_rate": 1.6284355179704016e-05, + "loss": 0.0065, + "step": 12766 + }, + { + "epoch": 13.5, + "learning_rate": 1.6279069767441862e-05, + "loss": 0.0293, + "step": 12768 + }, + { + "epoch": 13.5, + "learning_rate": 1.6273784355179705e-05, + "loss": 0.0284, + "step": 12770 + }, + { + "epoch": 13.5, + "learning_rate": 1.6268498942917548e-05, + "loss": 0.049, + "step": 12772 + }, + { + "epoch": 13.5, + "learning_rate": 1.626321353065539e-05, + "loss": 0.0055, + "step": 12774 + }, + { + "epoch": 13.51, + "learning_rate": 1.6257928118393233e-05, + "loss": 0.0229, + "step": 12776 + }, + { + "epoch": 13.51, + "learning_rate": 1.625264270613108e-05, + "loss": 0.025, + "step": 12778 + }, + { + "epoch": 13.51, + "learning_rate": 1.624735729386892e-05, + "loss": 0.0119, + "step": 12780 + }, + { + "epoch": 13.51, + "learning_rate": 1.6242071881606764e-05, + "loss": 0.0036, + "step": 12782 + }, + { + "epoch": 13.51, + "learning_rate": 1.623678646934461e-05, + "loss": 0.0205, + "step": 12784 + }, + { + "epoch": 13.52, + "learning_rate": 1.6231501057082453e-05, + "loss": 0.0826, + "step": 12786 + }, + { + "epoch": 13.52, + "learning_rate": 1.6226215644820296e-05, + "loss": 0.0318, + "step": 12788 + }, + { + "epoch": 13.52, + "learning_rate": 1.6220930232558142e-05, + "loss": 0.06, + "step": 12790 + }, + { + "epoch": 13.52, + "learning_rate": 1.6215644820295984e-05, + "loss": 0.0567, + "step": 12792 + }, + { + "epoch": 13.52, + "learning_rate": 1.6210359408033827e-05, + "loss": 0.0042, + "step": 12794 + }, + { + "epoch": 13.53, + "learning_rate": 1.6205073995771673e-05, + "loss": 0.0224, + "step": 12796 + }, + { + "epoch": 13.53, + "learning_rate": 1.6199788583509516e-05, + "loss": 0.0903, + "step": 12798 + }, + { + "epoch": 13.53, + "learning_rate": 1.619450317124736e-05, + "loss": 0.0358, + "step": 12800 + }, + { + "epoch": 13.53, + "learning_rate": 1.61892177589852e-05, + "loss": 0.0387, + "step": 12802 + }, + { + "epoch": 13.53, + "learning_rate": 1.6183932346723047e-05, + "loss": 0.012, + "step": 12804 + }, + { + "epoch": 13.54, + "learning_rate": 1.617864693446089e-05, + "loss": 0.0068, + "step": 12806 + }, + { + "epoch": 13.54, + "learning_rate": 1.6173361522198733e-05, + "loss": 0.0562, + "step": 12808 + }, + { + "epoch": 13.54, + "learning_rate": 1.6168076109936575e-05, + "loss": 0.0161, + "step": 12810 + }, + { + "epoch": 13.54, + "learning_rate": 1.6162790697674418e-05, + "loss": 0.1477, + "step": 12812 + }, + { + "epoch": 13.55, + "learning_rate": 1.6157505285412264e-05, + "loss": 0.0068, + "step": 12814 + }, + { + "epoch": 13.55, + "learning_rate": 1.6152219873150107e-05, + "loss": 0.0012, + "step": 12816 + }, + { + "epoch": 13.55, + "learning_rate": 1.614693446088795e-05, + "loss": 0.0062, + "step": 12818 + }, + { + "epoch": 13.55, + "learning_rate": 1.6141649048625792e-05, + "loss": 0.0027, + "step": 12820 + }, + { + "epoch": 13.55, + "learning_rate": 1.6136363636363638e-05, + "loss": 0.0334, + "step": 12822 + }, + { + "epoch": 13.56, + "learning_rate": 1.613107822410148e-05, + "loss": 0.0741, + "step": 12824 + }, + { + "epoch": 13.56, + "learning_rate": 1.6125792811839323e-05, + "loss": 0.0322, + "step": 12826 + }, + { + "epoch": 13.56, + "learning_rate": 1.6120507399577166e-05, + "loss": 0.0601, + "step": 12828 + }, + { + "epoch": 13.56, + "learning_rate": 1.611522198731501e-05, + "loss": 0.0885, + "step": 12830 + }, + { + "epoch": 13.56, + "learning_rate": 1.6109936575052855e-05, + "loss": 0.0439, + "step": 12832 + }, + { + "epoch": 13.57, + "learning_rate": 1.6104651162790697e-05, + "loss": 0.0516, + "step": 12834 + }, + { + "epoch": 13.57, + "learning_rate": 1.609936575052854e-05, + "loss": 0.0019, + "step": 12836 + }, + { + "epoch": 13.57, + "learning_rate": 1.6094080338266386e-05, + "loss": 0.0077, + "step": 12838 + }, + { + "epoch": 13.57, + "learning_rate": 1.608879492600423e-05, + "loss": 0.0339, + "step": 12840 + }, + { + "epoch": 13.58, + "learning_rate": 1.6083509513742075e-05, + "loss": 0.095, + "step": 12842 + }, + { + "epoch": 13.58, + "learning_rate": 1.6078224101479918e-05, + "loss": 0.0199, + "step": 12844 + }, + { + "epoch": 13.58, + "learning_rate": 1.607293868921776e-05, + "loss": 0.0221, + "step": 12846 + }, + { + "epoch": 13.58, + "learning_rate": 1.6067653276955603e-05, + "loss": 0.0254, + "step": 12848 + }, + { + "epoch": 13.58, + "learning_rate": 1.606236786469345e-05, + "loss": 0.0099, + "step": 12850 + }, + { + "epoch": 13.59, + "learning_rate": 1.605708245243129e-05, + "loss": 0.0319, + "step": 12852 + }, + { + "epoch": 13.59, + "learning_rate": 1.6051797040169134e-05, + "loss": 0.0054, + "step": 12854 + }, + { + "epoch": 13.59, + "learning_rate": 1.6046511627906977e-05, + "loss": 0.0582, + "step": 12856 + }, + { + "epoch": 13.59, + "learning_rate": 1.6041226215644823e-05, + "loss": 0.0241, + "step": 12858 + }, + { + "epoch": 13.59, + "learning_rate": 1.6035940803382666e-05, + "loss": 0.0605, + "step": 12860 + }, + { + "epoch": 13.6, + "learning_rate": 1.603065539112051e-05, + "loss": 0.0041, + "step": 12862 + }, + { + "epoch": 13.6, + "learning_rate": 1.602536997885835e-05, + "loss": 0.0449, + "step": 12864 + }, + { + "epoch": 13.6, + "learning_rate": 1.6020084566596194e-05, + "loss": 0.037, + "step": 12866 + }, + { + "epoch": 13.6, + "learning_rate": 1.601479915433404e-05, + "loss": 0.0217, + "step": 12868 + }, + { + "epoch": 13.6, + "learning_rate": 1.6009513742071882e-05, + "loss": 0.0166, + "step": 12870 + }, + { + "epoch": 13.61, + "learning_rate": 1.6004228329809725e-05, + "loss": 0.0344, + "step": 12872 + }, + { + "epoch": 13.61, + "learning_rate": 1.5998942917547568e-05, + "loss": 0.0757, + "step": 12874 + }, + { + "epoch": 13.61, + "learning_rate": 1.5993657505285414e-05, + "loss": 0.0141, + "step": 12876 + }, + { + "epoch": 13.61, + "learning_rate": 1.5988372093023257e-05, + "loss": 0.0195, + "step": 12878 + }, + { + "epoch": 13.62, + "learning_rate": 1.59830866807611e-05, + "loss": 0.0059, + "step": 12880 + }, + { + "epoch": 13.62, + "learning_rate": 1.5977801268498942e-05, + "loss": 0.0134, + "step": 12882 + }, + { + "epoch": 13.62, + "learning_rate": 1.5972515856236785e-05, + "loss": 0.0369, + "step": 12884 + }, + { + "epoch": 13.62, + "learning_rate": 1.596723044397463e-05, + "loss": 0.0202, + "step": 12886 + }, + { + "epoch": 13.62, + "learning_rate": 1.5961945031712473e-05, + "loss": 0.0057, + "step": 12888 + }, + { + "epoch": 13.63, + "learning_rate": 1.5956659619450316e-05, + "loss": 0.0126, + "step": 12890 + }, + { + "epoch": 13.63, + "learning_rate": 1.5951374207188162e-05, + "loss": 0.0295, + "step": 12892 + }, + { + "epoch": 13.63, + "learning_rate": 1.5946088794926005e-05, + "loss": 0.0557, + "step": 12894 + }, + { + "epoch": 13.63, + "learning_rate": 1.594080338266385e-05, + "loss": 0.0056, + "step": 12896 + }, + { + "epoch": 13.63, + "learning_rate": 1.5935517970401693e-05, + "loss": 0.0762, + "step": 12898 + }, + { + "epoch": 13.64, + "learning_rate": 1.5930232558139536e-05, + "loss": 0.1435, + "step": 12900 + }, + { + "epoch": 13.64, + "learning_rate": 1.592494714587738e-05, + "loss": 0.0182, + "step": 12902 + }, + { + "epoch": 13.64, + "learning_rate": 1.5919661733615225e-05, + "loss": 0.0355, + "step": 12904 + }, + { + "epoch": 13.64, + "learning_rate": 1.5914376321353067e-05, + "loss": 0.0059, + "step": 12906 + }, + { + "epoch": 13.64, + "learning_rate": 1.590909090909091e-05, + "loss": 0.0151, + "step": 12908 + }, + { + "epoch": 13.65, + "learning_rate": 1.5903805496828753e-05, + "loss": 0.0204, + "step": 12910 + }, + { + "epoch": 13.65, + "learning_rate": 1.58985200845666e-05, + "loss": 0.006, + "step": 12912 + }, + { + "epoch": 13.65, + "learning_rate": 1.589323467230444e-05, + "loss": 0.0701, + "step": 12914 + }, + { + "epoch": 13.65, + "learning_rate": 1.5887949260042284e-05, + "loss": 0.0503, + "step": 12916 + }, + { + "epoch": 13.66, + "learning_rate": 1.5882663847780127e-05, + "loss": 0.0082, + "step": 12918 + }, + { + "epoch": 13.66, + "learning_rate": 1.587737843551797e-05, + "loss": 0.0953, + "step": 12920 + }, + { + "epoch": 13.66, + "learning_rate": 1.5872093023255816e-05, + "loss": 0.0477, + "step": 12922 + }, + { + "epoch": 13.66, + "learning_rate": 1.586680761099366e-05, + "loss": 0.0165, + "step": 12924 + }, + { + "epoch": 13.66, + "learning_rate": 1.58615221987315e-05, + "loss": 0.0203, + "step": 12926 + }, + { + "epoch": 13.67, + "learning_rate": 1.5856236786469344e-05, + "loss": 0.0265, + "step": 12928 + }, + { + "epoch": 13.67, + "learning_rate": 1.585095137420719e-05, + "loss": 0.0096, + "step": 12930 + }, + { + "epoch": 13.67, + "learning_rate": 1.5845665961945032e-05, + "loss": 0.001, + "step": 12932 + }, + { + "epoch": 13.67, + "learning_rate": 1.5840380549682875e-05, + "loss": 0.0594, + "step": 12934 + }, + { + "epoch": 13.67, + "learning_rate": 1.5835095137420718e-05, + "loss": 0.0072, + "step": 12936 + }, + { + "epoch": 13.68, + "learning_rate": 1.582980972515856e-05, + "loss": 0.0511, + "step": 12938 + }, + { + "epoch": 13.68, + "learning_rate": 1.5824524312896406e-05, + "loss": 0.0159, + "step": 12940 + }, + { + "epoch": 13.68, + "learning_rate": 1.581923890063425e-05, + "loss": 0.0259, + "step": 12942 + }, + { + "epoch": 13.68, + "learning_rate": 1.5813953488372095e-05, + "loss": 0.1291, + "step": 12944 + }, + { + "epoch": 13.68, + "learning_rate": 1.5808668076109938e-05, + "loss": 0.043, + "step": 12946 + }, + { + "epoch": 13.69, + "learning_rate": 1.580338266384778e-05, + "loss": 0.0402, + "step": 12948 + }, + { + "epoch": 13.69, + "learning_rate": 1.5798097251585627e-05, + "loss": 0.0065, + "step": 12950 + }, + { + "epoch": 13.69, + "learning_rate": 1.579281183932347e-05, + "loss": 0.0221, + "step": 12952 + }, + { + "epoch": 13.69, + "learning_rate": 1.5787526427061312e-05, + "loss": 0.0411, + "step": 12954 + }, + { + "epoch": 13.7, + "learning_rate": 1.5782241014799155e-05, + "loss": 0.0816, + "step": 12956 + }, + { + "epoch": 13.7, + "learning_rate": 1.5776955602537e-05, + "loss": 0.0357, + "step": 12958 + }, + { + "epoch": 13.7, + "learning_rate": 1.5771670190274843e-05, + "loss": 0.0548, + "step": 12960 + }, + { + "epoch": 13.7, + "learning_rate": 1.5766384778012686e-05, + "loss": 0.0553, + "step": 12962 + }, + { + "epoch": 13.7, + "learning_rate": 1.576109936575053e-05, + "loss": 0.0188, + "step": 12964 + }, + { + "epoch": 13.71, + "learning_rate": 1.5755813953488375e-05, + "loss": 0.066, + "step": 12966 + }, + { + "epoch": 13.71, + "learning_rate": 1.5750528541226217e-05, + "loss": 0.0554, + "step": 12968 + }, + { + "epoch": 13.71, + "learning_rate": 1.574524312896406e-05, + "loss": 0.0076, + "step": 12970 + }, + { + "epoch": 13.71, + "learning_rate": 1.5739957716701903e-05, + "loss": 0.0499, + "step": 12972 + }, + { + "epoch": 13.71, + "learning_rate": 1.5734672304439745e-05, + "loss": 0.0239, + "step": 12974 + }, + { + "epoch": 13.72, + "learning_rate": 1.572938689217759e-05, + "loss": 0.009, + "step": 12976 + }, + { + "epoch": 13.72, + "learning_rate": 1.5724101479915434e-05, + "loss": 0.0518, + "step": 12978 + }, + { + "epoch": 13.72, + "learning_rate": 1.5718816067653277e-05, + "loss": 0.0334, + "step": 12980 + }, + { + "epoch": 13.72, + "learning_rate": 1.571353065539112e-05, + "loss": 0.0415, + "step": 12982 + }, + { + "epoch": 13.73, + "learning_rate": 1.5708245243128966e-05, + "loss": 0.0461, + "step": 12984 + }, + { + "epoch": 13.73, + "learning_rate": 1.5702959830866808e-05, + "loss": 0.0394, + "step": 12986 + }, + { + "epoch": 13.73, + "learning_rate": 1.569767441860465e-05, + "loss": 0.0133, + "step": 12988 + }, + { + "epoch": 13.73, + "learning_rate": 1.5692389006342494e-05, + "loss": 0.0237, + "step": 12990 + }, + { + "epoch": 13.73, + "learning_rate": 1.5687103594080336e-05, + "loss": 0.0313, + "step": 12992 + }, + { + "epoch": 13.74, + "learning_rate": 1.5681818181818182e-05, + "loss": 0.0376, + "step": 12994 + }, + { + "epoch": 13.74, + "learning_rate": 1.5676532769556025e-05, + "loss": 0.0207, + "step": 12996 + }, + { + "epoch": 13.74, + "learning_rate": 1.567124735729387e-05, + "loss": 0.0201, + "step": 12998 + }, + { + "epoch": 13.74, + "learning_rate": 1.5665961945031714e-05, + "loss": 0.0495, + "step": 13000 + }, + { + "epoch": 13.74, + "eval_cer": 0.04098033627814192, + "eval_loss": 0.49177786707878113, + "eval_runtime": 126.4592, + "eval_samples_per_second": 6.65, + "eval_steps_per_second": 0.838, + "step": 13000 + }, + { + "epoch": 13.74, + "learning_rate": 1.566067653276956e-05, + "loss": 0.0182, + "step": 13002 + }, + { + "epoch": 13.75, + "learning_rate": 1.5655391120507402e-05, + "loss": 0.0455, + "step": 13004 + }, + { + "epoch": 13.75, + "learning_rate": 1.5650105708245245e-05, + "loss": 0.0239, + "step": 13006 + }, + { + "epoch": 13.75, + "learning_rate": 1.5644820295983088e-05, + "loss": 0.0461, + "step": 13008 + }, + { + "epoch": 13.75, + "learning_rate": 1.563953488372093e-05, + "loss": 0.0214, + "step": 13010 + }, + { + "epoch": 13.75, + "learning_rate": 1.5634249471458776e-05, + "loss": 0.0747, + "step": 13012 + }, + { + "epoch": 13.76, + "learning_rate": 1.562896405919662e-05, + "loss": 0.0303, + "step": 13014 + }, + { + "epoch": 13.76, + "learning_rate": 1.5623678646934462e-05, + "loss": 0.0234, + "step": 13016 + }, + { + "epoch": 13.76, + "learning_rate": 1.5618393234672305e-05, + "loss": 0.0324, + "step": 13018 + }, + { + "epoch": 13.76, + "learning_rate": 1.5613107822410147e-05, + "loss": 0.0018, + "step": 13020 + }, + { + "epoch": 13.77, + "learning_rate": 1.5607822410147993e-05, + "loss": 0.0218, + "step": 13022 + }, + { + "epoch": 13.77, + "learning_rate": 1.5602536997885836e-05, + "loss": 0.0367, + "step": 13024 + }, + { + "epoch": 13.77, + "learning_rate": 1.559725158562368e-05, + "loss": 0.0582, + "step": 13026 + }, + { + "epoch": 13.77, + "learning_rate": 1.559196617336152e-05, + "loss": 0.0355, + "step": 13028 + }, + { + "epoch": 13.77, + "learning_rate": 1.5586680761099367e-05, + "loss": 0.0429, + "step": 13030 + }, + { + "epoch": 13.78, + "learning_rate": 1.558139534883721e-05, + "loss": 0.0299, + "step": 13032 + }, + { + "epoch": 13.78, + "learning_rate": 1.5576109936575053e-05, + "loss": 0.0404, + "step": 13034 + }, + { + "epoch": 13.78, + "learning_rate": 1.5570824524312895e-05, + "loss": 0.0406, + "step": 13036 + }, + { + "epoch": 13.78, + "learning_rate": 1.5565539112050738e-05, + "loss": 0.0185, + "step": 13038 + }, + { + "epoch": 13.78, + "learning_rate": 1.5560253699788584e-05, + "loss": 0.0204, + "step": 13040 + }, + { + "epoch": 13.79, + "learning_rate": 1.5554968287526427e-05, + "loss": 0.0058, + "step": 13042 + }, + { + "epoch": 13.79, + "learning_rate": 1.554968287526427e-05, + "loss": 0.0612, + "step": 13044 + }, + { + "epoch": 13.79, + "learning_rate": 1.5544397463002115e-05, + "loss": 0.0198, + "step": 13046 + }, + { + "epoch": 13.79, + "learning_rate": 1.5539112050739958e-05, + "loss": 0.0196, + "step": 13048 + }, + { + "epoch": 13.79, + "learning_rate": 1.5533826638477804e-05, + "loss": 0.0012, + "step": 13050 + }, + { + "epoch": 13.8, + "learning_rate": 1.5528541226215647e-05, + "loss": 0.029, + "step": 13052 + }, + { + "epoch": 13.8, + "learning_rate": 1.552325581395349e-05, + "loss": 0.0228, + "step": 13054 + }, + { + "epoch": 13.8, + "learning_rate": 1.5517970401691332e-05, + "loss": 0.0367, + "step": 13056 + }, + { + "epoch": 13.8, + "learning_rate": 1.5512684989429178e-05, + "loss": 0.0608, + "step": 13058 + }, + { + "epoch": 13.81, + "learning_rate": 1.550739957716702e-05, + "loss": 0.0569, + "step": 13060 + }, + { + "epoch": 13.81, + "learning_rate": 1.5502114164904864e-05, + "loss": 0.0054, + "step": 13062 + }, + { + "epoch": 13.81, + "learning_rate": 1.5496828752642706e-05, + "loss": 0.0158, + "step": 13064 + }, + { + "epoch": 13.81, + "learning_rate": 1.5491543340380552e-05, + "loss": 0.0341, + "step": 13066 + }, + { + "epoch": 13.81, + "learning_rate": 1.5486257928118395e-05, + "loss": 0.0202, + "step": 13068 + }, + { + "epoch": 13.82, + "learning_rate": 1.5480972515856238e-05, + "loss": 0.0623, + "step": 13070 + }, + { + "epoch": 13.82, + "learning_rate": 1.547568710359408e-05, + "loss": 0.0062, + "step": 13072 + }, + { + "epoch": 13.82, + "learning_rate": 1.5470401691331923e-05, + "loss": 0.0139, + "step": 13074 + }, + { + "epoch": 13.82, + "learning_rate": 1.546511627906977e-05, + "loss": 0.0107, + "step": 13076 + }, + { + "epoch": 13.82, + "learning_rate": 1.5459830866807612e-05, + "loss": 0.0138, + "step": 13078 + }, + { + "epoch": 13.83, + "learning_rate": 1.5454545454545454e-05, + "loss": 0.0398, + "step": 13080 + }, + { + "epoch": 13.83, + "learning_rate": 1.5449260042283297e-05, + "loss": 0.0256, + "step": 13082 + }, + { + "epoch": 13.83, + "learning_rate": 1.5443974630021143e-05, + "loss": 0.0062, + "step": 13084 + }, + { + "epoch": 13.83, + "learning_rate": 1.5438689217758986e-05, + "loss": 0.0163, + "step": 13086 + }, + { + "epoch": 13.84, + "learning_rate": 1.543340380549683e-05, + "loss": 0.0525, + "step": 13088 + }, + { + "epoch": 13.84, + "learning_rate": 1.542811839323467e-05, + "loss": 0.1588, + "step": 13090 + }, + { + "epoch": 13.84, + "learning_rate": 1.5422832980972514e-05, + "loss": 0.0386, + "step": 13092 + }, + { + "epoch": 13.84, + "learning_rate": 1.541754756871036e-05, + "loss": 0.0213, + "step": 13094 + }, + { + "epoch": 13.84, + "learning_rate": 1.5412262156448203e-05, + "loss": 0.0187, + "step": 13096 + }, + { + "epoch": 13.85, + "learning_rate": 1.5406976744186045e-05, + "loss": 0.0402, + "step": 13098 + }, + { + "epoch": 13.85, + "learning_rate": 1.540169133192389e-05, + "loss": 0.0738, + "step": 13100 + }, + { + "epoch": 13.85, + "learning_rate": 1.5396405919661734e-05, + "loss": 0.0397, + "step": 13102 + }, + { + "epoch": 13.85, + "learning_rate": 1.539112050739958e-05, + "loss": 0.0111, + "step": 13104 + }, + { + "epoch": 13.85, + "learning_rate": 1.5385835095137423e-05, + "loss": 0.0102, + "step": 13106 + }, + { + "epoch": 13.86, + "learning_rate": 1.5380549682875265e-05, + "loss": 0.089, + "step": 13108 + }, + { + "epoch": 13.86, + "learning_rate": 1.5375264270613108e-05, + "loss": 0.0461, + "step": 13110 + }, + { + "epoch": 13.86, + "learning_rate": 1.5369978858350954e-05, + "loss": 0.0456, + "step": 13112 + }, + { + "epoch": 13.86, + "learning_rate": 1.5364693446088797e-05, + "loss": 0.0304, + "step": 13114 + }, + { + "epoch": 13.86, + "learning_rate": 1.535940803382664e-05, + "loss": 0.0566, + "step": 13116 + }, + { + "epoch": 13.87, + "learning_rate": 1.5354122621564482e-05, + "loss": 0.0024, + "step": 13118 + }, + { + "epoch": 13.87, + "learning_rate": 1.5348837209302328e-05, + "loss": 0.0571, + "step": 13120 + }, + { + "epoch": 13.87, + "learning_rate": 1.534355179704017e-05, + "loss": 0.0012, + "step": 13122 + }, + { + "epoch": 13.87, + "learning_rate": 1.5338266384778014e-05, + "loss": 0.0618, + "step": 13124 + }, + { + "epoch": 13.88, + "learning_rate": 1.5332980972515856e-05, + "loss": 0.035, + "step": 13126 + }, + { + "epoch": 13.88, + "learning_rate": 1.53276955602537e-05, + "loss": 0.0343, + "step": 13128 + }, + { + "epoch": 13.88, + "learning_rate": 1.5322410147991545e-05, + "loss": 0.0398, + "step": 13130 + }, + { + "epoch": 13.88, + "learning_rate": 1.5317124735729388e-05, + "loss": 0.1157, + "step": 13132 + }, + { + "epoch": 13.88, + "learning_rate": 1.531183932346723e-05, + "loss": 0.0126, + "step": 13134 + }, + { + "epoch": 13.89, + "learning_rate": 1.5306553911205073e-05, + "loss": 0.0197, + "step": 13136 + }, + { + "epoch": 13.89, + "learning_rate": 1.530126849894292e-05, + "loss": 0.0207, + "step": 13138 + }, + { + "epoch": 13.89, + "learning_rate": 1.529598308668076e-05, + "loss": 0.0249, + "step": 13140 + }, + { + "epoch": 13.89, + "learning_rate": 1.5290697674418604e-05, + "loss": 0.0116, + "step": 13142 + }, + { + "epoch": 13.89, + "learning_rate": 1.5285412262156447e-05, + "loss": 0.0212, + "step": 13144 + }, + { + "epoch": 13.9, + "learning_rate": 1.528012684989429e-05, + "loss": 0.0302, + "step": 13146 + }, + { + "epoch": 13.9, + "learning_rate": 1.5274841437632136e-05, + "loss": 0.0309, + "step": 13148 + }, + { + "epoch": 13.9, + "learning_rate": 1.526955602536998e-05, + "loss": 0.0287, + "step": 13150 + }, + { + "epoch": 13.9, + "learning_rate": 1.5264270613107824e-05, + "loss": 0.0315, + "step": 13152 + }, + { + "epoch": 13.9, + "learning_rate": 1.5258985200845667e-05, + "loss": 0.0123, + "step": 13154 + }, + { + "epoch": 13.91, + "learning_rate": 1.5253699788583512e-05, + "loss": 0.0107, + "step": 13156 + }, + { + "epoch": 13.91, + "learning_rate": 1.5248414376321354e-05, + "loss": 0.0162, + "step": 13158 + }, + { + "epoch": 13.91, + "learning_rate": 1.5243128964059197e-05, + "loss": 0.0362, + "step": 13160 + }, + { + "epoch": 13.91, + "learning_rate": 1.523784355179704e-05, + "loss": 0.0339, + "step": 13162 + }, + { + "epoch": 13.92, + "learning_rate": 1.5232558139534884e-05, + "loss": 0.0635, + "step": 13164 + }, + { + "epoch": 13.92, + "learning_rate": 1.5227272727272728e-05, + "loss": 0.024, + "step": 13166 + }, + { + "epoch": 13.92, + "learning_rate": 1.5221987315010573e-05, + "loss": 0.023, + "step": 13168 + }, + { + "epoch": 13.92, + "learning_rate": 1.5216701902748415e-05, + "loss": 0.0562, + "step": 13170 + }, + { + "epoch": 13.92, + "learning_rate": 1.5211416490486258e-05, + "loss": 0.0143, + "step": 13172 + }, + { + "epoch": 13.93, + "learning_rate": 1.5206131078224104e-05, + "loss": 0.0143, + "step": 13174 + }, + { + "epoch": 13.93, + "learning_rate": 1.5200845665961947e-05, + "loss": 0.0185, + "step": 13176 + }, + { + "epoch": 13.93, + "learning_rate": 1.519556025369979e-05, + "loss": 0.0204, + "step": 13178 + }, + { + "epoch": 13.93, + "learning_rate": 1.5190274841437632e-05, + "loss": 0.0322, + "step": 13180 + }, + { + "epoch": 13.93, + "learning_rate": 1.5184989429175475e-05, + "loss": 0.047, + "step": 13182 + }, + { + "epoch": 13.94, + "learning_rate": 1.517970401691332e-05, + "loss": 0.0296, + "step": 13184 + }, + { + "epoch": 13.94, + "learning_rate": 1.5174418604651163e-05, + "loss": 0.0115, + "step": 13186 + }, + { + "epoch": 13.94, + "learning_rate": 1.5169133192389006e-05, + "loss": 0.0086, + "step": 13188 + }, + { + "epoch": 13.94, + "learning_rate": 1.516384778012685e-05, + "loss": 0.0098, + "step": 13190 + }, + { + "epoch": 13.95, + "learning_rate": 1.5158562367864695e-05, + "loss": 0.0352, + "step": 13192 + }, + { + "epoch": 13.95, + "learning_rate": 1.5153276955602538e-05, + "loss": 0.003, + "step": 13194 + }, + { + "epoch": 13.95, + "learning_rate": 1.5147991543340382e-05, + "loss": 0.0179, + "step": 13196 + }, + { + "epoch": 13.95, + "learning_rate": 1.5142706131078225e-05, + "loss": 0.0103, + "step": 13198 + }, + { + "epoch": 13.95, + "learning_rate": 1.5137420718816067e-05, + "loss": 0.0212, + "step": 13200 + }, + { + "epoch": 13.96, + "learning_rate": 1.5132135306553913e-05, + "loss": 0.0327, + "step": 13202 + }, + { + "epoch": 13.96, + "learning_rate": 1.5126849894291756e-05, + "loss": 0.0174, + "step": 13204 + }, + { + "epoch": 13.96, + "learning_rate": 1.5121564482029599e-05, + "loss": 0.0536, + "step": 13206 + }, + { + "epoch": 13.96, + "learning_rate": 1.5116279069767441e-05, + "loss": 0.0491, + "step": 13208 + }, + { + "epoch": 13.96, + "learning_rate": 1.5110993657505287e-05, + "loss": 0.0246, + "step": 13210 + }, + { + "epoch": 13.97, + "learning_rate": 1.510570824524313e-05, + "loss": 0.0733, + "step": 13212 + }, + { + "epoch": 13.97, + "learning_rate": 1.5100422832980973e-05, + "loss": 0.005, + "step": 13214 + }, + { + "epoch": 13.97, + "learning_rate": 1.5095137420718815e-05, + "loss": 0.0153, + "step": 13216 + }, + { + "epoch": 13.97, + "learning_rate": 1.508985200845666e-05, + "loss": 0.0374, + "step": 13218 + }, + { + "epoch": 13.97, + "learning_rate": 1.5084566596194504e-05, + "loss": 0.035, + "step": 13220 + }, + { + "epoch": 13.98, + "learning_rate": 1.5079281183932348e-05, + "loss": 0.0276, + "step": 13222 + }, + { + "epoch": 13.98, + "learning_rate": 1.5073995771670191e-05, + "loss": 0.0337, + "step": 13224 + }, + { + "epoch": 13.98, + "learning_rate": 1.5068710359408034e-05, + "loss": 0.0762, + "step": 13226 + }, + { + "epoch": 13.98, + "learning_rate": 1.506342494714588e-05, + "loss": 0.0072, + "step": 13228 + }, + { + "epoch": 13.99, + "learning_rate": 1.5058139534883723e-05, + "loss": 0.0254, + "step": 13230 + }, + { + "epoch": 13.99, + "learning_rate": 1.5052854122621565e-05, + "loss": 0.0068, + "step": 13232 + }, + { + "epoch": 13.99, + "learning_rate": 1.5047568710359408e-05, + "loss": 0.018, + "step": 13234 + }, + { + "epoch": 13.99, + "learning_rate": 1.504228329809725e-05, + "loss": 0.0269, + "step": 13236 + }, + { + "epoch": 13.99, + "learning_rate": 1.5036997885835097e-05, + "loss": 0.054, + "step": 13238 + }, + { + "epoch": 14.0, + "learning_rate": 1.503171247357294e-05, + "loss": 0.0689, + "step": 13240 + }, + { + "epoch": 14.0, + "learning_rate": 1.5026427061310782e-05, + "loss": 0.0259, + "step": 13242 + }, + { + "epoch": 14.0, + "learning_rate": 1.5021141649048626e-05, + "loss": 0.0249, + "step": 13244 + }, + { + "epoch": 14.0, + "learning_rate": 1.501585623678647e-05, + "loss": 0.0192, + "step": 13246 + }, + { + "epoch": 14.0, + "learning_rate": 1.5010570824524315e-05, + "loss": 0.0105, + "step": 13248 + }, + { + "epoch": 14.01, + "learning_rate": 1.5005285412262158e-05, + "loss": 0.0108, + "step": 13250 + }, + { + "epoch": 14.01, + "learning_rate": 1.5e-05, + "loss": 0.0063, + "step": 13252 + }, + { + "epoch": 14.01, + "learning_rate": 1.4994714587737843e-05, + "loss": 0.0109, + "step": 13254 + }, + { + "epoch": 14.01, + "learning_rate": 1.4989429175475689e-05, + "loss": 0.0026, + "step": 13256 + }, + { + "epoch": 14.01, + "learning_rate": 1.4984143763213532e-05, + "loss": 0.0211, + "step": 13258 + }, + { + "epoch": 14.02, + "learning_rate": 1.4978858350951374e-05, + "loss": 0.0473, + "step": 13260 + }, + { + "epoch": 14.02, + "learning_rate": 1.4973572938689217e-05, + "loss": 0.0033, + "step": 13262 + }, + { + "epoch": 14.02, + "learning_rate": 1.4968287526427063e-05, + "loss": 0.0232, + "step": 13264 + }, + { + "epoch": 14.02, + "learning_rate": 1.4963002114164906e-05, + "loss": 0.0317, + "step": 13266 + }, + { + "epoch": 14.03, + "learning_rate": 1.4957716701902749e-05, + "loss": 0.0207, + "step": 13268 + }, + { + "epoch": 14.03, + "learning_rate": 1.4952431289640593e-05, + "loss": 0.0295, + "step": 13270 + }, + { + "epoch": 14.03, + "learning_rate": 1.4947145877378436e-05, + "loss": 0.0027, + "step": 13272 + }, + { + "epoch": 14.03, + "learning_rate": 1.4941860465116282e-05, + "loss": 0.0149, + "step": 13274 + }, + { + "epoch": 14.03, + "learning_rate": 1.4936575052854124e-05, + "loss": 0.0253, + "step": 13276 + }, + { + "epoch": 14.04, + "learning_rate": 1.4931289640591967e-05, + "loss": 0.0139, + "step": 13278 + }, + { + "epoch": 14.04, + "learning_rate": 1.492600422832981e-05, + "loss": 0.0864, + "step": 13280 + }, + { + "epoch": 14.04, + "learning_rate": 1.4920718816067656e-05, + "loss": 0.0691, + "step": 13282 + }, + { + "epoch": 14.04, + "learning_rate": 1.4915433403805498e-05, + "loss": 0.015, + "step": 13284 + }, + { + "epoch": 14.04, + "learning_rate": 1.4910147991543341e-05, + "loss": 0.0429, + "step": 13286 + }, + { + "epoch": 14.05, + "learning_rate": 1.4904862579281184e-05, + "loss": 0.0207, + "step": 13288 + }, + { + "epoch": 14.05, + "learning_rate": 1.4899577167019026e-05, + "loss": 0.0256, + "step": 13290 + }, + { + "epoch": 14.05, + "learning_rate": 1.4894291754756872e-05, + "loss": 0.0107, + "step": 13292 + }, + { + "epoch": 14.05, + "learning_rate": 1.4889006342494715e-05, + "loss": 0.0192, + "step": 13294 + }, + { + "epoch": 14.05, + "learning_rate": 1.488372093023256e-05, + "loss": 0.0007, + "step": 13296 + }, + { + "epoch": 14.06, + "learning_rate": 1.4878435517970402e-05, + "loss": 0.087, + "step": 13298 + }, + { + "epoch": 14.06, + "learning_rate": 1.4873150105708247e-05, + "loss": 0.003, + "step": 13300 + }, + { + "epoch": 14.06, + "learning_rate": 1.4867864693446091e-05, + "loss": 0.0655, + "step": 13302 + }, + { + "epoch": 14.06, + "learning_rate": 1.4862579281183934e-05, + "loss": 0.0009, + "step": 13304 + }, + { + "epoch": 14.07, + "learning_rate": 1.4857293868921776e-05, + "loss": 0.0076, + "step": 13306 + }, + { + "epoch": 14.07, + "learning_rate": 1.4852008456659619e-05, + "loss": 0.0231, + "step": 13308 + }, + { + "epoch": 14.07, + "learning_rate": 1.4846723044397465e-05, + "loss": 0.0266, + "step": 13310 + }, + { + "epoch": 14.07, + "learning_rate": 1.4841437632135308e-05, + "loss": 0.0252, + "step": 13312 + }, + { + "epoch": 14.07, + "learning_rate": 1.483615221987315e-05, + "loss": 0.0915, + "step": 13314 + }, + { + "epoch": 14.08, + "learning_rate": 1.4830866807610993e-05, + "loss": 0.0135, + "step": 13316 + }, + { + "epoch": 14.08, + "learning_rate": 1.4825581395348839e-05, + "loss": 0.019, + "step": 13318 + }, + { + "epoch": 14.08, + "learning_rate": 1.4820295983086682e-05, + "loss": 0.0622, + "step": 13320 + }, + { + "epoch": 14.08, + "learning_rate": 1.4815010570824524e-05, + "loss": 0.0025, + "step": 13322 + }, + { + "epoch": 14.08, + "learning_rate": 1.4809725158562369e-05, + "loss": 0.0052, + "step": 13324 + }, + { + "epoch": 14.09, + "learning_rate": 1.4804439746300211e-05, + "loss": 0.0065, + "step": 13326 + }, + { + "epoch": 14.09, + "learning_rate": 1.4799154334038057e-05, + "loss": 0.063, + "step": 13328 + }, + { + "epoch": 14.09, + "learning_rate": 1.47938689217759e-05, + "loss": 0.0188, + "step": 13330 + }, + { + "epoch": 14.09, + "learning_rate": 1.4788583509513743e-05, + "loss": 0.0182, + "step": 13332 + }, + { + "epoch": 14.1, + "learning_rate": 1.4783298097251585e-05, + "loss": 0.021, + "step": 13334 + }, + { + "epoch": 14.1, + "learning_rate": 1.4778012684989432e-05, + "loss": 0.0099, + "step": 13336 + }, + { + "epoch": 14.1, + "learning_rate": 1.4772727272727274e-05, + "loss": 0.0057, + "step": 13338 + }, + { + "epoch": 14.1, + "learning_rate": 1.4767441860465117e-05, + "loss": 0.0193, + "step": 13340 + }, + { + "epoch": 14.1, + "learning_rate": 1.476215644820296e-05, + "loss": 0.0487, + "step": 13342 + }, + { + "epoch": 14.11, + "learning_rate": 1.4756871035940802e-05, + "loss": 0.0194, + "step": 13344 + }, + { + "epoch": 14.11, + "learning_rate": 1.4751585623678648e-05, + "loss": 0.0289, + "step": 13346 + }, + { + "epoch": 14.11, + "learning_rate": 1.4746300211416491e-05, + "loss": 0.0224, + "step": 13348 + }, + { + "epoch": 14.11, + "learning_rate": 1.4741014799154335e-05, + "loss": 0.0137, + "step": 13350 + }, + { + "epoch": 14.11, + "learning_rate": 1.4735729386892178e-05, + "loss": 0.0165, + "step": 13352 + }, + { + "epoch": 14.12, + "learning_rate": 1.473044397463002e-05, + "loss": 0.0291, + "step": 13354 + }, + { + "epoch": 14.12, + "learning_rate": 1.4725158562367867e-05, + "loss": 0.0642, + "step": 13356 + }, + { + "epoch": 14.12, + "learning_rate": 1.471987315010571e-05, + "loss": 0.0494, + "step": 13358 + }, + { + "epoch": 14.12, + "learning_rate": 1.4714587737843552e-05, + "loss": 0.0077, + "step": 13360 + }, + { + "epoch": 14.12, + "learning_rate": 1.4709302325581395e-05, + "loss": 0.0222, + "step": 13362 + }, + { + "epoch": 14.13, + "learning_rate": 1.470401691331924e-05, + "loss": 0.0185, + "step": 13364 + }, + { + "epoch": 14.13, + "learning_rate": 1.4698731501057083e-05, + "loss": 0.0556, + "step": 13366 + }, + { + "epoch": 14.13, + "learning_rate": 1.4693446088794926e-05, + "loss": 0.0324, + "step": 13368 + }, + { + "epoch": 14.13, + "learning_rate": 1.4688160676532769e-05, + "loss": 0.0105, + "step": 13370 + }, + { + "epoch": 14.14, + "learning_rate": 1.4682875264270613e-05, + "loss": 0.0087, + "step": 13372 + }, + { + "epoch": 14.14, + "learning_rate": 1.4677589852008458e-05, + "loss": 0.0375, + "step": 13374 + }, + { + "epoch": 14.14, + "learning_rate": 1.4672304439746302e-05, + "loss": 0.0014, + "step": 13376 + }, + { + "epoch": 14.14, + "learning_rate": 1.4667019027484145e-05, + "loss": 0.019, + "step": 13378 + }, + { + "epoch": 14.14, + "learning_rate": 1.4661733615221987e-05, + "loss": 0.0439, + "step": 13380 + }, + { + "epoch": 14.15, + "learning_rate": 1.4656448202959833e-05, + "loss": 0.017, + "step": 13382 + }, + { + "epoch": 14.15, + "learning_rate": 1.4651162790697676e-05, + "loss": 0.0047, + "step": 13384 + }, + { + "epoch": 14.15, + "learning_rate": 1.4645877378435519e-05, + "loss": 0.0519, + "step": 13386 + }, + { + "epoch": 14.15, + "learning_rate": 1.4640591966173361e-05, + "loss": 0.0107, + "step": 13388 + }, + { + "epoch": 14.15, + "learning_rate": 1.4635306553911204e-05, + "loss": 0.0064, + "step": 13390 + }, + { + "epoch": 14.16, + "learning_rate": 1.463002114164905e-05, + "loss": 0.0832, + "step": 13392 + }, + { + "epoch": 14.16, + "learning_rate": 1.4624735729386893e-05, + "loss": 0.0079, + "step": 13394 + }, + { + "epoch": 14.16, + "learning_rate": 1.4619450317124735e-05, + "loss": 0.0031, + "step": 13396 + }, + { + "epoch": 14.16, + "learning_rate": 1.461416490486258e-05, + "loss": 0.0436, + "step": 13398 + }, + { + "epoch": 14.16, + "learning_rate": 1.4608879492600424e-05, + "loss": 0.0132, + "step": 13400 + }, + { + "epoch": 14.17, + "learning_rate": 1.4603594080338267e-05, + "loss": 0.0417, + "step": 13402 + }, + { + "epoch": 14.17, + "learning_rate": 1.4598308668076111e-05, + "loss": 0.0505, + "step": 13404 + }, + { + "epoch": 14.17, + "learning_rate": 1.4593023255813954e-05, + "loss": 0.0369, + "step": 13406 + }, + { + "epoch": 14.17, + "learning_rate": 1.4587737843551796e-05, + "loss": 0.072, + "step": 13408 + }, + { + "epoch": 14.18, + "learning_rate": 1.4582452431289643e-05, + "loss": 0.0264, + "step": 13410 + }, + { + "epoch": 14.18, + "learning_rate": 1.4577167019027485e-05, + "loss": 0.0184, + "step": 13412 + }, + { + "epoch": 14.18, + "learning_rate": 1.4571881606765328e-05, + "loss": 0.0311, + "step": 13414 + }, + { + "epoch": 14.18, + "learning_rate": 1.456659619450317e-05, + "loss": 0.0128, + "step": 13416 + }, + { + "epoch": 14.18, + "learning_rate": 1.4561310782241017e-05, + "loss": 0.0044, + "step": 13418 + }, + { + "epoch": 14.19, + "learning_rate": 1.455602536997886e-05, + "loss": 0.007, + "step": 13420 + }, + { + "epoch": 14.19, + "learning_rate": 1.4550739957716702e-05, + "loss": 0.0169, + "step": 13422 + }, + { + "epoch": 14.19, + "learning_rate": 1.4545454545454545e-05, + "loss": 0.0117, + "step": 13424 + }, + { + "epoch": 14.19, + "learning_rate": 1.4540169133192389e-05, + "loss": 0.0298, + "step": 13426 + }, + { + "epoch": 14.19, + "learning_rate": 1.4534883720930233e-05, + "loss": 0.0403, + "step": 13428 + }, + { + "epoch": 14.2, + "learning_rate": 1.4529598308668078e-05, + "loss": 0.0188, + "step": 13430 + }, + { + "epoch": 14.2, + "learning_rate": 1.452431289640592e-05, + "loss": 0.0029, + "step": 13432 + }, + { + "epoch": 14.2, + "learning_rate": 1.4519027484143763e-05, + "loss": 0.0201, + "step": 13434 + }, + { + "epoch": 14.2, + "learning_rate": 1.4513742071881609e-05, + "loss": 0.0215, + "step": 13436 + }, + { + "epoch": 14.21, + "learning_rate": 1.4508456659619452e-05, + "loss": 0.0227, + "step": 13438 + }, + { + "epoch": 14.21, + "learning_rate": 1.4503171247357294e-05, + "loss": 0.0074, + "step": 13440 + }, + { + "epoch": 14.21, + "learning_rate": 1.4497885835095137e-05, + "loss": 0.0111, + "step": 13442 + }, + { + "epoch": 14.21, + "learning_rate": 1.449260042283298e-05, + "loss": 0.0031, + "step": 13444 + }, + { + "epoch": 14.21, + "learning_rate": 1.4487315010570826e-05, + "loss": 0.0568, + "step": 13446 + }, + { + "epoch": 14.22, + "learning_rate": 1.4482029598308669e-05, + "loss": 0.0475, + "step": 13448 + }, + { + "epoch": 14.22, + "learning_rate": 1.4476744186046511e-05, + "loss": 0.0066, + "step": 13450 + }, + { + "epoch": 14.22, + "learning_rate": 1.4471458773784356e-05, + "loss": 0.0605, + "step": 13452 + }, + { + "epoch": 14.22, + "learning_rate": 1.44661733615222e-05, + "loss": 0.0407, + "step": 13454 + }, + { + "epoch": 14.22, + "learning_rate": 1.4460887949260044e-05, + "loss": 0.0279, + "step": 13456 + }, + { + "epoch": 14.23, + "learning_rate": 1.4455602536997887e-05, + "loss": 0.009, + "step": 13458 + }, + { + "epoch": 14.23, + "learning_rate": 1.445031712473573e-05, + "loss": 0.0206, + "step": 13460 + }, + { + "epoch": 14.23, + "learning_rate": 1.4445031712473572e-05, + "loss": 0.1183, + "step": 13462 + }, + { + "epoch": 14.23, + "learning_rate": 1.4439746300211418e-05, + "loss": 0.039, + "step": 13464 + }, + { + "epoch": 14.23, + "learning_rate": 1.4434460887949261e-05, + "loss": 0.0591, + "step": 13466 + }, + { + "epoch": 14.24, + "learning_rate": 1.4429175475687104e-05, + "loss": 0.0715, + "step": 13468 + }, + { + "epoch": 14.24, + "learning_rate": 1.4423890063424946e-05, + "loss": 0.024, + "step": 13470 + }, + { + "epoch": 14.24, + "learning_rate": 1.4418604651162792e-05, + "loss": 0.0236, + "step": 13472 + }, + { + "epoch": 14.24, + "learning_rate": 1.4413319238900635e-05, + "loss": 0.0232, + "step": 13474 + }, + { + "epoch": 14.25, + "learning_rate": 1.4408033826638478e-05, + "loss": 0.0394, + "step": 13476 + }, + { + "epoch": 14.25, + "learning_rate": 1.4402748414376322e-05, + "loss": 0.0025, + "step": 13478 + }, + { + "epoch": 14.25, + "learning_rate": 1.4397463002114165e-05, + "loss": 0.0709, + "step": 13480 + }, + { + "epoch": 14.25, + "learning_rate": 1.439217758985201e-05, + "loss": 0.0878, + "step": 13482 + }, + { + "epoch": 14.25, + "learning_rate": 1.4386892177589854e-05, + "loss": 0.008, + "step": 13484 + }, + { + "epoch": 14.26, + "learning_rate": 1.4381606765327696e-05, + "loss": 0.02, + "step": 13486 + }, + { + "epoch": 14.26, + "learning_rate": 1.4376321353065539e-05, + "loss": 0.0175, + "step": 13488 + }, + { + "epoch": 14.26, + "learning_rate": 1.4371035940803385e-05, + "loss": 0.018, + "step": 13490 + }, + { + "epoch": 14.26, + "learning_rate": 1.4365750528541228e-05, + "loss": 0.0914, + "step": 13492 + }, + { + "epoch": 14.26, + "learning_rate": 1.436046511627907e-05, + "loss": 0.0175, + "step": 13494 + }, + { + "epoch": 14.27, + "learning_rate": 1.4355179704016913e-05, + "loss": 0.0415, + "step": 13496 + }, + { + "epoch": 14.27, + "learning_rate": 1.4349894291754756e-05, + "loss": 0.0276, + "step": 13498 + }, + { + "epoch": 14.27, + "learning_rate": 1.4344608879492602e-05, + "loss": 0.0012, + "step": 13500 + }, + { + "epoch": 14.27, + "eval_cer": 0.09563978341407808, + "eval_loss": 1.003995656967163, + "eval_runtime": 120.9517, + "eval_samples_per_second": 6.953, + "eval_steps_per_second": 0.876, + "step": 13500 + }, + { + "epoch": 14.27, + "learning_rate": 1.4339323467230444e-05, + "loss": 0.1183, + "step": 13502 + }, + { + "epoch": 14.27, + "learning_rate": 1.4334038054968287e-05, + "loss": 0.062, + "step": 13504 + }, + { + "epoch": 14.28, + "learning_rate": 1.4328752642706131e-05, + "loss": 0.0552, + "step": 13506 + }, + { + "epoch": 14.28, + "learning_rate": 1.4323467230443976e-05, + "loss": 0.0046, + "step": 13508 + }, + { + "epoch": 14.28, + "learning_rate": 1.431818181818182e-05, + "loss": 0.0243, + "step": 13510 + }, + { + "epoch": 14.28, + "learning_rate": 1.4312896405919663e-05, + "loss": 0.1092, + "step": 13512 + }, + { + "epoch": 14.29, + "learning_rate": 1.4307610993657506e-05, + "loss": 0.1191, + "step": 13514 + }, + { + "epoch": 14.29, + "learning_rate": 1.4302325581395348e-05, + "loss": 0.0201, + "step": 13516 + }, + { + "epoch": 14.29, + "learning_rate": 1.4297040169133194e-05, + "loss": 0.0286, + "step": 13518 + }, + { + "epoch": 14.29, + "learning_rate": 1.4291754756871037e-05, + "loss": 0.0275, + "step": 13520 + }, + { + "epoch": 14.29, + "learning_rate": 1.428646934460888e-05, + "loss": 0.0763, + "step": 13522 + }, + { + "epoch": 14.3, + "learning_rate": 1.4281183932346722e-05, + "loss": 0.1111, + "step": 13524 + }, + { + "epoch": 14.3, + "learning_rate": 1.4275898520084568e-05, + "loss": 0.0027, + "step": 13526 + }, + { + "epoch": 14.3, + "learning_rate": 1.4270613107822411e-05, + "loss": 0.0311, + "step": 13528 + }, + { + "epoch": 14.3, + "learning_rate": 1.4265327695560254e-05, + "loss": 0.0032, + "step": 13530 + }, + { + "epoch": 14.3, + "learning_rate": 1.4260042283298098e-05, + "loss": 0.0262, + "step": 13532 + }, + { + "epoch": 14.31, + "learning_rate": 1.425475687103594e-05, + "loss": 0.0174, + "step": 13534 + }, + { + "epoch": 14.31, + "learning_rate": 1.4249471458773787e-05, + "loss": 0.0109, + "step": 13536 + }, + { + "epoch": 14.31, + "learning_rate": 1.424418604651163e-05, + "loss": 0.0691, + "step": 13538 + }, + { + "epoch": 14.31, + "learning_rate": 1.4238900634249472e-05, + "loss": 0.1051, + "step": 13540 + }, + { + "epoch": 14.32, + "learning_rate": 1.4233615221987315e-05, + "loss": 0.1075, + "step": 13542 + }, + { + "epoch": 14.32, + "learning_rate": 1.422832980972516e-05, + "loss": 0.0403, + "step": 13544 + }, + { + "epoch": 14.32, + "learning_rate": 1.4223044397463003e-05, + "loss": 0.0249, + "step": 13546 + }, + { + "epoch": 14.32, + "learning_rate": 1.4217758985200846e-05, + "loss": 0.0125, + "step": 13548 + }, + { + "epoch": 14.32, + "learning_rate": 1.4212473572938689e-05, + "loss": 0.0637, + "step": 13550 + }, + { + "epoch": 14.33, + "learning_rate": 1.4207188160676532e-05, + "loss": 0.0158, + "step": 13552 + }, + { + "epoch": 14.33, + "learning_rate": 1.4201902748414378e-05, + "loss": 0.0537, + "step": 13554 + }, + { + "epoch": 14.33, + "learning_rate": 1.419661733615222e-05, + "loss": 0.027, + "step": 13556 + }, + { + "epoch": 14.33, + "learning_rate": 1.4191331923890065e-05, + "loss": 0.0184, + "step": 13558 + }, + { + "epoch": 14.33, + "learning_rate": 1.4186046511627907e-05, + "loss": 0.0254, + "step": 13560 + }, + { + "epoch": 14.34, + "learning_rate": 1.4180761099365752e-05, + "loss": 0.0231, + "step": 13562 + }, + { + "epoch": 14.34, + "learning_rate": 1.4175475687103596e-05, + "loss": 0.0292, + "step": 13564 + }, + { + "epoch": 14.34, + "learning_rate": 1.4170190274841439e-05, + "loss": 0.0091, + "step": 13566 + }, + { + "epoch": 14.34, + "learning_rate": 1.4164904862579281e-05, + "loss": 0.0139, + "step": 13568 + }, + { + "epoch": 14.34, + "learning_rate": 1.4159619450317124e-05, + "loss": 0.0491, + "step": 13570 + }, + { + "epoch": 14.35, + "learning_rate": 1.415433403805497e-05, + "loss": 0.0261, + "step": 13572 + }, + { + "epoch": 14.35, + "learning_rate": 1.4149048625792813e-05, + "loss": 0.0122, + "step": 13574 + }, + { + "epoch": 14.35, + "learning_rate": 1.4143763213530655e-05, + "loss": 0.0122, + "step": 13576 + }, + { + "epoch": 14.35, + "learning_rate": 1.4138477801268498e-05, + "loss": 0.049, + "step": 13578 + }, + { + "epoch": 14.36, + "learning_rate": 1.4133192389006344e-05, + "loss": 0.0109, + "step": 13580 + }, + { + "epoch": 14.36, + "learning_rate": 1.4127906976744187e-05, + "loss": 0.1036, + "step": 13582 + }, + { + "epoch": 14.36, + "learning_rate": 1.412262156448203e-05, + "loss": 0.0188, + "step": 13584 + }, + { + "epoch": 14.36, + "learning_rate": 1.4117336152219874e-05, + "loss": 0.0124, + "step": 13586 + }, + { + "epoch": 14.36, + "learning_rate": 1.4112050739957717e-05, + "loss": 0.0425, + "step": 13588 + }, + { + "epoch": 14.37, + "learning_rate": 1.4106765327695563e-05, + "loss": 0.0123, + "step": 13590 + }, + { + "epoch": 14.37, + "learning_rate": 1.4101479915433405e-05, + "loss": 0.0958, + "step": 13592 + }, + { + "epoch": 14.37, + "learning_rate": 1.4096194503171248e-05, + "loss": 0.0522, + "step": 13594 + }, + { + "epoch": 14.37, + "learning_rate": 1.409090909090909e-05, + "loss": 0.0211, + "step": 13596 + }, + { + "epoch": 14.37, + "learning_rate": 1.4085623678646937e-05, + "loss": 0.0055, + "step": 13598 + }, + { + "epoch": 14.38, + "learning_rate": 1.408033826638478e-05, + "loss": 0.0014, + "step": 13600 + }, + { + "epoch": 14.38, + "learning_rate": 1.4075052854122622e-05, + "loss": 0.0414, + "step": 13602 + }, + { + "epoch": 14.38, + "learning_rate": 1.4069767441860465e-05, + "loss": 0.0068, + "step": 13604 + }, + { + "epoch": 14.38, + "learning_rate": 1.4064482029598307e-05, + "loss": 0.0163, + "step": 13606 + }, + { + "epoch": 14.38, + "learning_rate": 1.4059196617336153e-05, + "loss": 0.0078, + "step": 13608 + }, + { + "epoch": 14.39, + "learning_rate": 1.4053911205073996e-05, + "loss": 0.0214, + "step": 13610 + }, + { + "epoch": 14.39, + "learning_rate": 1.404862579281184e-05, + "loss": 0.0221, + "step": 13612 + }, + { + "epoch": 14.39, + "learning_rate": 1.4043340380549683e-05, + "loss": 0.0048, + "step": 13614 + }, + { + "epoch": 14.39, + "learning_rate": 1.403805496828753e-05, + "loss": 0.0113, + "step": 13616 + }, + { + "epoch": 14.4, + "learning_rate": 1.4032769556025372e-05, + "loss": 0.0915, + "step": 13618 + }, + { + "epoch": 14.4, + "learning_rate": 1.4027484143763215e-05, + "loss": 0.0354, + "step": 13620 + }, + { + "epoch": 14.4, + "learning_rate": 1.4022198731501057e-05, + "loss": 0.0057, + "step": 13622 + }, + { + "epoch": 14.4, + "learning_rate": 1.40169133192389e-05, + "loss": 0.003, + "step": 13624 + }, + { + "epoch": 14.4, + "learning_rate": 1.4011627906976746e-05, + "loss": 0.0131, + "step": 13626 + }, + { + "epoch": 14.41, + "learning_rate": 1.4006342494714589e-05, + "loss": 0.0242, + "step": 13628 + }, + { + "epoch": 14.41, + "learning_rate": 1.4001057082452431e-05, + "loss": 0.018, + "step": 13630 + }, + { + "epoch": 14.41, + "learning_rate": 1.3995771670190274e-05, + "loss": 0.0172, + "step": 13632 + }, + { + "epoch": 14.41, + "learning_rate": 1.399048625792812e-05, + "loss": 0.0313, + "step": 13634 + }, + { + "epoch": 14.41, + "learning_rate": 1.3985200845665963e-05, + "loss": 0.0219, + "step": 13636 + }, + { + "epoch": 14.42, + "learning_rate": 1.3979915433403807e-05, + "loss": 0.0044, + "step": 13638 + }, + { + "epoch": 14.42, + "learning_rate": 1.397463002114165e-05, + "loss": 0.0299, + "step": 13640 + }, + { + "epoch": 14.42, + "learning_rate": 1.3969344608879492e-05, + "loss": 0.0146, + "step": 13642 + }, + { + "epoch": 14.42, + "learning_rate": 1.3964059196617338e-05, + "loss": 0.0114, + "step": 13644 + }, + { + "epoch": 14.42, + "learning_rate": 1.3958773784355181e-05, + "loss": 0.1177, + "step": 13646 + }, + { + "epoch": 14.43, + "learning_rate": 1.3953488372093024e-05, + "loss": 0.0769, + "step": 13648 + }, + { + "epoch": 14.43, + "learning_rate": 1.3948202959830866e-05, + "loss": 0.0479, + "step": 13650 + }, + { + "epoch": 14.43, + "learning_rate": 1.3942917547568712e-05, + "loss": 0.0186, + "step": 13652 + }, + { + "epoch": 14.43, + "learning_rate": 1.3937632135306555e-05, + "loss": 0.0156, + "step": 13654 + }, + { + "epoch": 14.44, + "learning_rate": 1.3932346723044398e-05, + "loss": 0.0296, + "step": 13656 + }, + { + "epoch": 14.44, + "learning_rate": 1.392706131078224e-05, + "loss": 0.0376, + "step": 13658 + }, + { + "epoch": 14.44, + "learning_rate": 1.3921775898520085e-05, + "loss": 0.0162, + "step": 13660 + }, + { + "epoch": 14.44, + "learning_rate": 1.391649048625793e-05, + "loss": 0.02, + "step": 13662 + }, + { + "epoch": 14.44, + "learning_rate": 1.3911205073995774e-05, + "loss": 0.0107, + "step": 13664 + }, + { + "epoch": 14.45, + "learning_rate": 1.3905919661733616e-05, + "loss": 0.0131, + "step": 13666 + }, + { + "epoch": 14.45, + "learning_rate": 1.3900634249471459e-05, + "loss": 0.0253, + "step": 13668 + }, + { + "epoch": 14.45, + "learning_rate": 1.3895348837209305e-05, + "loss": 0.0241, + "step": 13670 + }, + { + "epoch": 14.45, + "learning_rate": 1.3890063424947148e-05, + "loss": 0.0411, + "step": 13672 + }, + { + "epoch": 14.45, + "learning_rate": 1.388477801268499e-05, + "loss": 0.0222, + "step": 13674 + }, + { + "epoch": 14.46, + "learning_rate": 1.3879492600422833e-05, + "loss": 0.0701, + "step": 13676 + }, + { + "epoch": 14.46, + "learning_rate": 1.3874207188160676e-05, + "loss": 0.0616, + "step": 13678 + }, + { + "epoch": 14.46, + "learning_rate": 1.3868921775898522e-05, + "loss": 0.0431, + "step": 13680 + }, + { + "epoch": 14.46, + "learning_rate": 1.3863636363636364e-05, + "loss": 0.0509, + "step": 13682 + }, + { + "epoch": 14.47, + "learning_rate": 1.3858350951374207e-05, + "loss": 0.0369, + "step": 13684 + }, + { + "epoch": 14.47, + "learning_rate": 1.3853065539112051e-05, + "loss": 0.0113, + "step": 13686 + }, + { + "epoch": 14.47, + "learning_rate": 1.3847780126849896e-05, + "loss": 0.051, + "step": 13688 + }, + { + "epoch": 14.47, + "learning_rate": 1.3842494714587738e-05, + "loss": 0.0501, + "step": 13690 + }, + { + "epoch": 14.47, + "learning_rate": 1.3837209302325583e-05, + "loss": 0.0243, + "step": 13692 + }, + { + "epoch": 14.48, + "learning_rate": 1.3831923890063426e-05, + "loss": 0.0398, + "step": 13694 + }, + { + "epoch": 14.48, + "learning_rate": 1.3826638477801268e-05, + "loss": 0.0111, + "step": 13696 + }, + { + "epoch": 14.48, + "learning_rate": 1.3821353065539114e-05, + "loss": 0.0178, + "step": 13698 + }, + { + "epoch": 14.48, + "learning_rate": 1.3816067653276957e-05, + "loss": 0.0069, + "step": 13700 + }, + { + "epoch": 14.48, + "learning_rate": 1.38107822410148e-05, + "loss": 0.0212, + "step": 13702 + }, + { + "epoch": 14.49, + "learning_rate": 1.3805496828752642e-05, + "loss": 0.0212, + "step": 13704 + }, + { + "epoch": 14.49, + "learning_rate": 1.3800211416490485e-05, + "loss": 0.0104, + "step": 13706 + }, + { + "epoch": 14.49, + "learning_rate": 1.3794926004228331e-05, + "loss": 0.0028, + "step": 13708 + }, + { + "epoch": 14.49, + "learning_rate": 1.3789640591966174e-05, + "loss": 0.0084, + "step": 13710 + }, + { + "epoch": 14.49, + "learning_rate": 1.3784355179704016e-05, + "loss": 0.0075, + "step": 13712 + }, + { + "epoch": 14.5, + "learning_rate": 1.377906976744186e-05, + "loss": 0.0149, + "step": 13714 + }, + { + "epoch": 14.5, + "learning_rate": 1.3773784355179705e-05, + "loss": 0.0051, + "step": 13716 + }, + { + "epoch": 14.5, + "learning_rate": 1.376849894291755e-05, + "loss": 0.0321, + "step": 13718 + }, + { + "epoch": 14.5, + "learning_rate": 1.3763213530655392e-05, + "loss": 0.0161, + "step": 13720 + }, + { + "epoch": 14.51, + "learning_rate": 1.3757928118393235e-05, + "loss": 0.0121, + "step": 13722 + }, + { + "epoch": 14.51, + "learning_rate": 1.3752642706131077e-05, + "loss": 0.0102, + "step": 13724 + }, + { + "epoch": 14.51, + "learning_rate": 1.3747357293868924e-05, + "loss": 0.005, + "step": 13726 + }, + { + "epoch": 14.51, + "learning_rate": 1.3742071881606766e-05, + "loss": 0.0073, + "step": 13728 + }, + { + "epoch": 14.51, + "learning_rate": 1.3736786469344609e-05, + "loss": 0.0379, + "step": 13730 + }, + { + "epoch": 14.52, + "learning_rate": 1.3731501057082452e-05, + "loss": 0.0563, + "step": 13732 + }, + { + "epoch": 14.52, + "learning_rate": 1.3726215644820298e-05, + "loss": 0.0341, + "step": 13734 + }, + { + "epoch": 14.52, + "learning_rate": 1.372093023255814e-05, + "loss": 0.0172, + "step": 13736 + }, + { + "epoch": 14.52, + "learning_rate": 1.3715644820295983e-05, + "loss": 0.0016, + "step": 13738 + }, + { + "epoch": 14.52, + "learning_rate": 1.3710359408033827e-05, + "loss": 0.0094, + "step": 13740 + }, + { + "epoch": 14.53, + "learning_rate": 1.370507399577167e-05, + "loss": 0.0734, + "step": 13742 + }, + { + "epoch": 14.53, + "learning_rate": 1.3699788583509516e-05, + "loss": 0.0563, + "step": 13744 + }, + { + "epoch": 14.53, + "learning_rate": 1.3694503171247359e-05, + "loss": 0.0231, + "step": 13746 + }, + { + "epoch": 14.53, + "learning_rate": 1.3689217758985201e-05, + "loss": 0.0068, + "step": 13748 + }, + { + "epoch": 14.53, + "learning_rate": 1.3683932346723044e-05, + "loss": 0.0094, + "step": 13750 + }, + { + "epoch": 14.54, + "learning_rate": 1.367864693446089e-05, + "loss": 0.0178, + "step": 13752 + }, + { + "epoch": 14.54, + "learning_rate": 1.3673361522198733e-05, + "loss": 0.0592, + "step": 13754 + }, + { + "epoch": 14.54, + "learning_rate": 1.3668076109936575e-05, + "loss": 0.0183, + "step": 13756 + }, + { + "epoch": 14.54, + "learning_rate": 1.3662790697674418e-05, + "loss": 0.0065, + "step": 13758 + }, + { + "epoch": 14.55, + "learning_rate": 1.365750528541226e-05, + "loss": 0.0021, + "step": 13760 + }, + { + "epoch": 14.55, + "learning_rate": 1.3652219873150107e-05, + "loss": 0.0198, + "step": 13762 + }, + { + "epoch": 14.55, + "learning_rate": 1.364693446088795e-05, + "loss": 0.0197, + "step": 13764 + }, + { + "epoch": 14.55, + "learning_rate": 1.3641649048625794e-05, + "loss": 0.0105, + "step": 13766 + }, + { + "epoch": 14.55, + "learning_rate": 1.3636363636363637e-05, + "loss": 0.0388, + "step": 13768 + }, + { + "epoch": 14.56, + "learning_rate": 1.3631078224101481e-05, + "loss": 0.0011, + "step": 13770 + }, + { + "epoch": 14.56, + "learning_rate": 1.3625792811839325e-05, + "loss": 0.0006, + "step": 13772 + }, + { + "epoch": 14.56, + "learning_rate": 1.3620507399577168e-05, + "loss": 0.0233, + "step": 13774 + }, + { + "epoch": 14.56, + "learning_rate": 1.361522198731501e-05, + "loss": 0.0077, + "step": 13776 + }, + { + "epoch": 14.56, + "learning_rate": 1.3609936575052853e-05, + "loss": 0.003, + "step": 13778 + }, + { + "epoch": 14.57, + "learning_rate": 1.36046511627907e-05, + "loss": 0.0128, + "step": 13780 + }, + { + "epoch": 14.57, + "learning_rate": 1.3599365750528542e-05, + "loss": 0.0146, + "step": 13782 + }, + { + "epoch": 14.57, + "learning_rate": 1.3594080338266385e-05, + "loss": 0.0699, + "step": 13784 + }, + { + "epoch": 14.57, + "learning_rate": 1.3588794926004227e-05, + "loss": 0.0145, + "step": 13786 + }, + { + "epoch": 14.58, + "learning_rate": 1.3583509513742073e-05, + "loss": 0.0151, + "step": 13788 + }, + { + "epoch": 14.58, + "learning_rate": 1.3578224101479916e-05, + "loss": 0.039, + "step": 13790 + }, + { + "epoch": 14.58, + "learning_rate": 1.3572938689217759e-05, + "loss": 0.0277, + "step": 13792 + }, + { + "epoch": 14.58, + "learning_rate": 1.3567653276955603e-05, + "loss": 0.0039, + "step": 13794 + }, + { + "epoch": 14.58, + "learning_rate": 1.3562367864693446e-05, + "loss": 0.0024, + "step": 13796 + }, + { + "epoch": 14.59, + "learning_rate": 1.3557082452431292e-05, + "loss": 0.0033, + "step": 13798 + }, + { + "epoch": 14.59, + "learning_rate": 1.3551797040169135e-05, + "loss": 0.0173, + "step": 13800 + }, + { + "epoch": 14.59, + "learning_rate": 1.3546511627906977e-05, + "loss": 0.0648, + "step": 13802 + }, + { + "epoch": 14.59, + "learning_rate": 1.354122621564482e-05, + "loss": 0.0049, + "step": 13804 + }, + { + "epoch": 14.59, + "learning_rate": 1.3535940803382666e-05, + "loss": 0.0268, + "step": 13806 + }, + { + "epoch": 14.6, + "learning_rate": 1.3530655391120509e-05, + "loss": 0.0133, + "step": 13808 + }, + { + "epoch": 14.6, + "learning_rate": 1.3525369978858351e-05, + "loss": 0.0039, + "step": 13810 + }, + { + "epoch": 14.6, + "learning_rate": 1.3520084566596194e-05, + "loss": 0.0697, + "step": 13812 + }, + { + "epoch": 14.6, + "learning_rate": 1.3514799154334037e-05, + "loss": 0.0069, + "step": 13814 + }, + { + "epoch": 14.6, + "learning_rate": 1.3509513742071883e-05, + "loss": 0.0679, + "step": 13816 + }, + { + "epoch": 14.61, + "learning_rate": 1.3504228329809725e-05, + "loss": 0.0769, + "step": 13818 + }, + { + "epoch": 14.61, + "learning_rate": 1.349894291754757e-05, + "loss": 0.0052, + "step": 13820 + }, + { + "epoch": 14.61, + "learning_rate": 1.3493657505285412e-05, + "loss": 0.0194, + "step": 13822 + }, + { + "epoch": 14.61, + "learning_rate": 1.3488372093023258e-05, + "loss": 0.004, + "step": 13824 + }, + { + "epoch": 14.62, + "learning_rate": 1.3483086680761101e-05, + "loss": 0.0115, + "step": 13826 + }, + { + "epoch": 14.62, + "learning_rate": 1.3477801268498944e-05, + "loss": 0.0701, + "step": 13828 + }, + { + "epoch": 14.62, + "learning_rate": 1.3472515856236786e-05, + "loss": 0.0325, + "step": 13830 + }, + { + "epoch": 14.62, + "learning_rate": 1.3467230443974629e-05, + "loss": 0.0148, + "step": 13832 + }, + { + "epoch": 14.62, + "learning_rate": 1.3461945031712475e-05, + "loss": 0.0282, + "step": 13834 + }, + { + "epoch": 14.63, + "learning_rate": 1.3456659619450318e-05, + "loss": 0.0206, + "step": 13836 + }, + { + "epoch": 14.63, + "learning_rate": 1.345137420718816e-05, + "loss": 0.0577, + "step": 13838 + }, + { + "epoch": 14.63, + "learning_rate": 1.3446088794926003e-05, + "loss": 0.0582, + "step": 13840 + }, + { + "epoch": 14.63, + "learning_rate": 1.344080338266385e-05, + "loss": 0.0143, + "step": 13842 + }, + { + "epoch": 14.63, + "learning_rate": 1.3435517970401692e-05, + "loss": 0.0237, + "step": 13844 + }, + { + "epoch": 14.64, + "learning_rate": 1.3430232558139536e-05, + "loss": 0.0128, + "step": 13846 + }, + { + "epoch": 14.64, + "learning_rate": 1.3424947145877379e-05, + "loss": 0.0338, + "step": 13848 + }, + { + "epoch": 14.64, + "learning_rate": 1.3419661733615222e-05, + "loss": 0.0459, + "step": 13850 + }, + { + "epoch": 14.64, + "learning_rate": 1.3414376321353068e-05, + "loss": 0.0092, + "step": 13852 + }, + { + "epoch": 14.64, + "learning_rate": 1.340909090909091e-05, + "loss": 0.069, + "step": 13854 + }, + { + "epoch": 14.65, + "learning_rate": 1.3403805496828753e-05, + "loss": 0.0052, + "step": 13856 + }, + { + "epoch": 14.65, + "learning_rate": 1.3398520084566596e-05, + "loss": 0.0128, + "step": 13858 + }, + { + "epoch": 14.65, + "learning_rate": 1.3393234672304442e-05, + "loss": 0.0054, + "step": 13860 + }, + { + "epoch": 14.65, + "learning_rate": 1.3387949260042284e-05, + "loss": 0.0318, + "step": 13862 + }, + { + "epoch": 14.66, + "learning_rate": 1.3382663847780127e-05, + "loss": 0.0147, + "step": 13864 + }, + { + "epoch": 14.66, + "learning_rate": 1.337737843551797e-05, + "loss": 0.0183, + "step": 13866 + }, + { + "epoch": 14.66, + "learning_rate": 1.3372093023255814e-05, + "loss": 0.0115, + "step": 13868 + }, + { + "epoch": 14.66, + "learning_rate": 1.3366807610993659e-05, + "loss": 0.0046, + "step": 13870 + }, + { + "epoch": 14.66, + "learning_rate": 1.3361522198731501e-05, + "loss": 0.0153, + "step": 13872 + }, + { + "epoch": 14.67, + "learning_rate": 1.3356236786469346e-05, + "loss": 0.0257, + "step": 13874 + }, + { + "epoch": 14.67, + "learning_rate": 1.3350951374207188e-05, + "loss": 0.0091, + "step": 13876 + }, + { + "epoch": 14.67, + "learning_rate": 1.3345665961945034e-05, + "loss": 0.0428, + "step": 13878 + }, + { + "epoch": 14.67, + "learning_rate": 1.3340380549682877e-05, + "loss": 0.006, + "step": 13880 + }, + { + "epoch": 14.67, + "learning_rate": 1.333509513742072e-05, + "loss": 0.0107, + "step": 13882 + }, + { + "epoch": 14.68, + "learning_rate": 1.3329809725158562e-05, + "loss": 0.0074, + "step": 13884 + }, + { + "epoch": 14.68, + "learning_rate": 1.3324524312896405e-05, + "loss": 0.0027, + "step": 13886 + }, + { + "epoch": 14.68, + "learning_rate": 1.3319238900634251e-05, + "loss": 0.0179, + "step": 13888 + }, + { + "epoch": 14.68, + "learning_rate": 1.3313953488372094e-05, + "loss": 0.0392, + "step": 13890 + }, + { + "epoch": 14.68, + "learning_rate": 1.3308668076109936e-05, + "loss": 0.0578, + "step": 13892 + }, + { + "epoch": 14.69, + "learning_rate": 1.3303382663847779e-05, + "loss": 0.0031, + "step": 13894 + }, + { + "epoch": 14.69, + "learning_rate": 1.3298097251585625e-05, + "loss": 0.0141, + "step": 13896 + }, + { + "epoch": 14.69, + "learning_rate": 1.3292811839323468e-05, + "loss": 0.023, + "step": 13898 + }, + { + "epoch": 14.69, + "learning_rate": 1.3287526427061312e-05, + "loss": 0.0334, + "step": 13900 + }, + { + "epoch": 14.7, + "learning_rate": 1.3282241014799155e-05, + "loss": 0.0172, + "step": 13902 + }, + { + "epoch": 14.7, + "learning_rate": 1.3276955602536997e-05, + "loss": 0.0193, + "step": 13904 + }, + { + "epoch": 14.7, + "learning_rate": 1.3271670190274844e-05, + "loss": 0.012, + "step": 13906 + }, + { + "epoch": 14.7, + "learning_rate": 1.3266384778012686e-05, + "loss": 0.0029, + "step": 13908 + }, + { + "epoch": 14.7, + "learning_rate": 1.3261099365750529e-05, + "loss": 0.0846, + "step": 13910 + }, + { + "epoch": 14.71, + "learning_rate": 1.3255813953488372e-05, + "loss": 0.0243, + "step": 13912 + }, + { + "epoch": 14.71, + "learning_rate": 1.3250528541226218e-05, + "loss": 0.0498, + "step": 13914 + }, + { + "epoch": 14.71, + "learning_rate": 1.324524312896406e-05, + "loss": 0.0711, + "step": 13916 + }, + { + "epoch": 14.71, + "learning_rate": 1.3239957716701903e-05, + "loss": 0.0096, + "step": 13918 + }, + { + "epoch": 14.71, + "learning_rate": 1.3234672304439746e-05, + "loss": 0.01, + "step": 13920 + }, + { + "epoch": 14.72, + "learning_rate": 1.322938689217759e-05, + "loss": 0.0635, + "step": 13922 + }, + { + "epoch": 14.72, + "learning_rate": 1.3224101479915434e-05, + "loss": 0.004, + "step": 13924 + }, + { + "epoch": 14.72, + "learning_rate": 1.3218816067653279e-05, + "loss": 0.0302, + "step": 13926 + }, + { + "epoch": 14.72, + "learning_rate": 1.3213530655391121e-05, + "loss": 0.0196, + "step": 13928 + }, + { + "epoch": 14.73, + "learning_rate": 1.3208245243128964e-05, + "loss": 0.0089, + "step": 13930 + }, + { + "epoch": 14.73, + "learning_rate": 1.320295983086681e-05, + "loss": 0.0592, + "step": 13932 + }, + { + "epoch": 14.73, + "learning_rate": 1.3197674418604653e-05, + "loss": 0.0242, + "step": 13934 + }, + { + "epoch": 14.73, + "learning_rate": 1.3192389006342495e-05, + "loss": 0.0562, + "step": 13936 + }, + { + "epoch": 14.73, + "learning_rate": 1.3187103594080338e-05, + "loss": 0.1009, + "step": 13938 + }, + { + "epoch": 14.74, + "learning_rate": 1.318181818181818e-05, + "loss": 0.0292, + "step": 13940 + }, + { + "epoch": 14.74, + "learning_rate": 1.3176532769556027e-05, + "loss": 0.0123, + "step": 13942 + }, + { + "epoch": 14.74, + "learning_rate": 1.317124735729387e-05, + "loss": 0.002, + "step": 13944 + }, + { + "epoch": 14.74, + "learning_rate": 1.3165961945031712e-05, + "loss": 0.0556, + "step": 13946 + }, + { + "epoch": 14.74, + "learning_rate": 1.3160676532769557e-05, + "loss": 0.0065, + "step": 13948 + }, + { + "epoch": 14.75, + "learning_rate": 1.3155391120507401e-05, + "loss": 0.0098, + "step": 13950 + }, + { + "epoch": 14.75, + "learning_rate": 1.3150105708245244e-05, + "loss": 0.0191, + "step": 13952 + }, + { + "epoch": 14.75, + "learning_rate": 1.3144820295983088e-05, + "loss": 0.0013, + "step": 13954 + }, + { + "epoch": 14.75, + "learning_rate": 1.313953488372093e-05, + "loss": 0.0402, + "step": 13956 + }, + { + "epoch": 14.75, + "learning_rate": 1.3134249471458773e-05, + "loss": 0.0177, + "step": 13958 + }, + { + "epoch": 14.76, + "learning_rate": 1.312896405919662e-05, + "loss": 0.0181, + "step": 13960 + }, + { + "epoch": 14.76, + "learning_rate": 1.3123678646934462e-05, + "loss": 0.0154, + "step": 13962 + }, + { + "epoch": 14.76, + "learning_rate": 1.3118393234672305e-05, + "loss": 0.0194, + "step": 13964 + }, + { + "epoch": 14.76, + "learning_rate": 1.3113107822410147e-05, + "loss": 0.0338, + "step": 13966 + }, + { + "epoch": 14.77, + "learning_rate": 1.3107822410147993e-05, + "loss": 0.0014, + "step": 13968 + }, + { + "epoch": 14.77, + "learning_rate": 1.3102536997885836e-05, + "loss": 0.0198, + "step": 13970 + }, + { + "epoch": 14.77, + "learning_rate": 1.3097251585623679e-05, + "loss": 0.0695, + "step": 13972 + }, + { + "epoch": 14.77, + "learning_rate": 1.3091966173361521e-05, + "loss": 0.0272, + "step": 13974 + }, + { + "epoch": 14.77, + "learning_rate": 1.3086680761099366e-05, + "loss": 0.0626, + "step": 13976 + }, + { + "epoch": 14.78, + "learning_rate": 1.308139534883721e-05, + "loss": 0.011, + "step": 13978 + }, + { + "epoch": 14.78, + "learning_rate": 1.3076109936575055e-05, + "loss": 0.017, + "step": 13980 + }, + { + "epoch": 14.78, + "learning_rate": 1.3070824524312897e-05, + "loss": 0.0119, + "step": 13982 + }, + { + "epoch": 14.78, + "learning_rate": 1.306553911205074e-05, + "loss": 0.02, + "step": 13984 + }, + { + "epoch": 14.78, + "learning_rate": 1.3060253699788586e-05, + "loss": 0.0353, + "step": 13986 + }, + { + "epoch": 14.79, + "learning_rate": 1.3054968287526429e-05, + "loss": 0.039, + "step": 13988 + }, + { + "epoch": 14.79, + "learning_rate": 1.3049682875264271e-05, + "loss": 0.0174, + "step": 13990 + }, + { + "epoch": 14.79, + "learning_rate": 1.3044397463002114e-05, + "loss": 0.0433, + "step": 13992 + }, + { + "epoch": 14.79, + "learning_rate": 1.3039112050739957e-05, + "loss": 0.0249, + "step": 13994 + }, + { + "epoch": 14.79, + "learning_rate": 1.3033826638477803e-05, + "loss": 0.0049, + "step": 13996 + }, + { + "epoch": 14.8, + "learning_rate": 1.3028541226215645e-05, + "loss": 0.0084, + "step": 13998 + }, + { + "epoch": 14.8, + "learning_rate": 1.3023255813953488e-05, + "loss": 0.043, + "step": 14000 + }, + { + "epoch": 14.8, + "eval_cer": 0.11011684240524366, + "eval_loss": 0.7147426009178162, + "eval_runtime": 118.439, + "eval_samples_per_second": 7.101, + "eval_steps_per_second": 0.895, + "step": 14000 + }, + { + "epoch": 14.8, + "learning_rate": 1.3017970401691332e-05, + "loss": 0.0128, + "step": 14002 + }, + { + "epoch": 14.8, + "learning_rate": 1.3012684989429177e-05, + "loss": 0.0234, + "step": 14004 + }, + { + "epoch": 14.81, + "learning_rate": 1.3007399577167021e-05, + "loss": 0.0215, + "step": 14006 + }, + { + "epoch": 14.81, + "learning_rate": 1.3002114164904864e-05, + "loss": 0.0191, + "step": 14008 + }, + { + "epoch": 14.81, + "learning_rate": 1.2996828752642706e-05, + "loss": 0.0338, + "step": 14010 + }, + { + "epoch": 14.81, + "learning_rate": 1.299154334038055e-05, + "loss": 0.0133, + "step": 14012 + }, + { + "epoch": 14.81, + "learning_rate": 1.2986257928118395e-05, + "loss": 0.0235, + "step": 14014 + }, + { + "epoch": 14.82, + "learning_rate": 1.2980972515856238e-05, + "loss": 0.0398, + "step": 14016 + }, + { + "epoch": 14.82, + "learning_rate": 1.297568710359408e-05, + "loss": 0.0036, + "step": 14018 + }, + { + "epoch": 14.82, + "learning_rate": 1.2970401691331923e-05, + "loss": 0.0039, + "step": 14020 + }, + { + "epoch": 14.82, + "learning_rate": 1.296511627906977e-05, + "loss": 0.031, + "step": 14022 + }, + { + "epoch": 14.82, + "learning_rate": 1.2959830866807612e-05, + "loss": 0.0258, + "step": 14024 + }, + { + "epoch": 14.83, + "learning_rate": 1.2954545454545455e-05, + "loss": 0.0334, + "step": 14026 + }, + { + "epoch": 14.83, + "learning_rate": 1.2949260042283299e-05, + "loss": 0.025, + "step": 14028 + }, + { + "epoch": 14.83, + "learning_rate": 1.2943974630021142e-05, + "loss": 0.0311, + "step": 14030 + }, + { + "epoch": 14.83, + "learning_rate": 1.2938689217758988e-05, + "loss": 0.008, + "step": 14032 + }, + { + "epoch": 14.84, + "learning_rate": 1.293340380549683e-05, + "loss": 0.0442, + "step": 14034 + }, + { + "epoch": 14.84, + "learning_rate": 1.2928118393234673e-05, + "loss": 0.0072, + "step": 14036 + }, + { + "epoch": 14.84, + "learning_rate": 1.2922832980972516e-05, + "loss": 0.0121, + "step": 14038 + }, + { + "epoch": 14.84, + "learning_rate": 1.2917547568710358e-05, + "loss": 0.0383, + "step": 14040 + }, + { + "epoch": 14.84, + "learning_rate": 1.2912262156448204e-05, + "loss": 0.0156, + "step": 14042 + }, + { + "epoch": 14.85, + "learning_rate": 1.2906976744186047e-05, + "loss": 0.0085, + "step": 14044 + }, + { + "epoch": 14.85, + "learning_rate": 1.290169133192389e-05, + "loss": 0.0195, + "step": 14046 + }, + { + "epoch": 14.85, + "learning_rate": 1.2896405919661732e-05, + "loss": 0.027, + "step": 14048 + }, + { + "epoch": 14.85, + "learning_rate": 1.2891120507399579e-05, + "loss": 0.0062, + "step": 14050 + }, + { + "epoch": 14.85, + "learning_rate": 1.2885835095137421e-05, + "loss": 0.0284, + "step": 14052 + }, + { + "epoch": 14.86, + "learning_rate": 1.2880549682875266e-05, + "loss": 0.0132, + "step": 14054 + }, + { + "epoch": 14.86, + "learning_rate": 1.2875264270613108e-05, + "loss": 0.0049, + "step": 14056 + }, + { + "epoch": 14.86, + "learning_rate": 1.2869978858350951e-05, + "loss": 0.1056, + "step": 14058 + }, + { + "epoch": 14.86, + "learning_rate": 1.2864693446088797e-05, + "loss": 0.0352, + "step": 14060 + }, + { + "epoch": 14.86, + "learning_rate": 1.285940803382664e-05, + "loss": 0.0039, + "step": 14062 + }, + { + "epoch": 14.87, + "learning_rate": 1.2854122621564482e-05, + "loss": 0.0626, + "step": 14064 + }, + { + "epoch": 14.87, + "learning_rate": 1.2848837209302325e-05, + "loss": 0.0113, + "step": 14066 + }, + { + "epoch": 14.87, + "learning_rate": 1.2843551797040171e-05, + "loss": 0.0759, + "step": 14068 + }, + { + "epoch": 14.87, + "learning_rate": 1.2838266384778014e-05, + "loss": 0.045, + "step": 14070 + }, + { + "epoch": 14.88, + "learning_rate": 1.2832980972515856e-05, + "loss": 0.0482, + "step": 14072 + }, + { + "epoch": 14.88, + "learning_rate": 1.2827695560253699e-05, + "loss": 0.0117, + "step": 14074 + }, + { + "epoch": 14.88, + "learning_rate": 1.2822410147991543e-05, + "loss": 0.0192, + "step": 14076 + }, + { + "epoch": 14.88, + "learning_rate": 1.2817124735729388e-05, + "loss": 0.0201, + "step": 14078 + }, + { + "epoch": 14.88, + "learning_rate": 1.281183932346723e-05, + "loss": 0.0157, + "step": 14080 + }, + { + "epoch": 14.89, + "learning_rate": 1.2806553911205075e-05, + "loss": 0.0105, + "step": 14082 + }, + { + "epoch": 14.89, + "learning_rate": 1.2801268498942918e-05, + "loss": 0.0715, + "step": 14084 + }, + { + "epoch": 14.89, + "learning_rate": 1.2795983086680764e-05, + "loss": 0.0033, + "step": 14086 + }, + { + "epoch": 14.89, + "learning_rate": 1.2790697674418606e-05, + "loss": 0.0399, + "step": 14088 + }, + { + "epoch": 14.89, + "learning_rate": 1.2785412262156449e-05, + "loss": 0.0123, + "step": 14090 + }, + { + "epoch": 14.9, + "learning_rate": 1.2780126849894292e-05, + "loss": 0.0195, + "step": 14092 + }, + { + "epoch": 14.9, + "learning_rate": 1.2774841437632134e-05, + "loss": 0.0414, + "step": 14094 + }, + { + "epoch": 14.9, + "learning_rate": 1.276955602536998e-05, + "loss": 0.0498, + "step": 14096 + }, + { + "epoch": 14.9, + "learning_rate": 1.2764270613107823e-05, + "loss": 0.0189, + "step": 14098 + }, + { + "epoch": 14.9, + "learning_rate": 1.2758985200845666e-05, + "loss": 0.0561, + "step": 14100 + }, + { + "epoch": 14.91, + "learning_rate": 1.2753699788583508e-05, + "loss": 0.0084, + "step": 14102 + }, + { + "epoch": 14.91, + "learning_rate": 1.2748414376321354e-05, + "loss": 0.0024, + "step": 14104 + }, + { + "epoch": 14.91, + "learning_rate": 1.2743128964059197e-05, + "loss": 0.0021, + "step": 14106 + }, + { + "epoch": 14.91, + "learning_rate": 1.2737843551797041e-05, + "loss": 0.0055, + "step": 14108 + }, + { + "epoch": 14.92, + "learning_rate": 1.2732558139534884e-05, + "loss": 0.0232, + "step": 14110 + }, + { + "epoch": 14.92, + "learning_rate": 1.2727272727272727e-05, + "loss": 0.0115, + "step": 14112 + }, + { + "epoch": 14.92, + "learning_rate": 1.2721987315010573e-05, + "loss": 0.0262, + "step": 14114 + }, + { + "epoch": 14.92, + "learning_rate": 1.2716701902748416e-05, + "loss": 0.0173, + "step": 14116 + }, + { + "epoch": 14.92, + "learning_rate": 1.2711416490486258e-05, + "loss": 0.0069, + "step": 14118 + }, + { + "epoch": 14.93, + "learning_rate": 1.27061310782241e-05, + "loss": 0.0197, + "step": 14120 + }, + { + "epoch": 14.93, + "learning_rate": 1.2700845665961947e-05, + "loss": 0.0512, + "step": 14122 + }, + { + "epoch": 14.93, + "learning_rate": 1.269556025369979e-05, + "loss": 0.0183, + "step": 14124 + }, + { + "epoch": 14.93, + "learning_rate": 1.2690274841437632e-05, + "loss": 0.0081, + "step": 14126 + }, + { + "epoch": 14.93, + "learning_rate": 1.2684989429175475e-05, + "loss": 0.0204, + "step": 14128 + }, + { + "epoch": 14.94, + "learning_rate": 1.267970401691332e-05, + "loss": 0.0542, + "step": 14130 + }, + { + "epoch": 14.94, + "learning_rate": 1.2674418604651164e-05, + "loss": 0.0014, + "step": 14132 + }, + { + "epoch": 14.94, + "learning_rate": 1.2669133192389008e-05, + "loss": 0.0283, + "step": 14134 + }, + { + "epoch": 14.94, + "learning_rate": 1.266384778012685e-05, + "loss": 0.0025, + "step": 14136 + }, + { + "epoch": 14.95, + "learning_rate": 1.2658562367864693e-05, + "loss": 0.0045, + "step": 14138 + }, + { + "epoch": 14.95, + "learning_rate": 1.265327695560254e-05, + "loss": 0.0321, + "step": 14140 + }, + { + "epoch": 14.95, + "learning_rate": 1.2647991543340382e-05, + "loss": 0.0172, + "step": 14142 + }, + { + "epoch": 14.95, + "learning_rate": 1.2642706131078225e-05, + "loss": 0.0381, + "step": 14144 + }, + { + "epoch": 14.95, + "learning_rate": 1.2637420718816067e-05, + "loss": 0.0101, + "step": 14146 + }, + { + "epoch": 14.96, + "learning_rate": 1.263213530655391e-05, + "loss": 0.0131, + "step": 14148 + }, + { + "epoch": 14.96, + "learning_rate": 1.2626849894291756e-05, + "loss": 0.0297, + "step": 14150 + }, + { + "epoch": 14.96, + "learning_rate": 1.2621564482029599e-05, + "loss": 0.0712, + "step": 14152 + }, + { + "epoch": 14.96, + "learning_rate": 1.2616279069767442e-05, + "loss": 0.0195, + "step": 14154 + }, + { + "epoch": 14.96, + "learning_rate": 1.2610993657505286e-05, + "loss": 0.0028, + "step": 14156 + }, + { + "epoch": 14.97, + "learning_rate": 1.260570824524313e-05, + "loss": 0.0212, + "step": 14158 + }, + { + "epoch": 14.97, + "learning_rate": 1.2600422832980973e-05, + "loss": 0.0071, + "step": 14160 + }, + { + "epoch": 14.97, + "learning_rate": 1.2595137420718817e-05, + "loss": 0.0148, + "step": 14162 + }, + { + "epoch": 14.97, + "learning_rate": 1.258985200845666e-05, + "loss": 0.0342, + "step": 14164 + }, + { + "epoch": 14.97, + "learning_rate": 1.2584566596194503e-05, + "loss": 0.0767, + "step": 14166 + }, + { + "epoch": 14.98, + "learning_rate": 1.2579281183932349e-05, + "loss": 0.021, + "step": 14168 + }, + { + "epoch": 14.98, + "learning_rate": 1.2573995771670191e-05, + "loss": 0.0338, + "step": 14170 + }, + { + "epoch": 14.98, + "learning_rate": 1.2568710359408034e-05, + "loss": 0.0312, + "step": 14172 + }, + { + "epoch": 14.98, + "learning_rate": 1.2563424947145877e-05, + "loss": 0.0186, + "step": 14174 + }, + { + "epoch": 14.99, + "learning_rate": 1.2558139534883723e-05, + "loss": 0.0552, + "step": 14176 + }, + { + "epoch": 14.99, + "learning_rate": 1.2552854122621565e-05, + "loss": 0.0098, + "step": 14178 + }, + { + "epoch": 14.99, + "learning_rate": 1.2547568710359408e-05, + "loss": 0.0243, + "step": 14180 + }, + { + "epoch": 14.99, + "learning_rate": 1.254228329809725e-05, + "loss": 0.0223, + "step": 14182 + }, + { + "epoch": 14.99, + "learning_rate": 1.2536997885835095e-05, + "loss": 0.0292, + "step": 14184 + }, + { + "epoch": 15.0, + "learning_rate": 1.253171247357294e-05, + "loss": 0.0551, + "step": 14186 + }, + { + "epoch": 15.0, + "learning_rate": 1.2526427061310784e-05, + "loss": 0.0148, + "step": 14188 + }, + { + "epoch": 15.0, + "learning_rate": 1.2521141649048627e-05, + "loss": 0.0025, + "step": 14190 + }, + { + "epoch": 15.0, + "learning_rate": 1.251585623678647e-05, + "loss": 0.0089, + "step": 14192 + }, + { + "epoch": 15.0, + "learning_rate": 1.2510570824524315e-05, + "loss": 0.0835, + "step": 14194 + }, + { + "epoch": 15.01, + "learning_rate": 1.2505285412262158e-05, + "loss": 0.0132, + "step": 14196 + }, + { + "epoch": 15.01, + "learning_rate": 1.25e-05, + "loss": 0.0119, + "step": 14198 + }, + { + "epoch": 15.01, + "learning_rate": 1.2494714587737843e-05, + "loss": 0.0015, + "step": 14200 + }, + { + "epoch": 15.01, + "learning_rate": 1.2489429175475688e-05, + "loss": 0.0044, + "step": 14202 + }, + { + "epoch": 15.01, + "learning_rate": 1.248414376321353e-05, + "loss": 0.0366, + "step": 14204 + }, + { + "epoch": 15.02, + "learning_rate": 1.2478858350951375e-05, + "loss": 0.1134, + "step": 14206 + }, + { + "epoch": 15.02, + "learning_rate": 1.2473572938689217e-05, + "loss": 0.0088, + "step": 14208 + }, + { + "epoch": 15.02, + "learning_rate": 1.2468287526427062e-05, + "loss": 0.0351, + "step": 14210 + }, + { + "epoch": 15.02, + "learning_rate": 1.2463002114164906e-05, + "loss": 0.0116, + "step": 14212 + }, + { + "epoch": 15.03, + "learning_rate": 1.245771670190275e-05, + "loss": 0.0377, + "step": 14214 + }, + { + "epoch": 15.03, + "learning_rate": 1.2452431289640593e-05, + "loss": 0.0145, + "step": 14216 + }, + { + "epoch": 15.03, + "learning_rate": 1.2447145877378436e-05, + "loss": 0.0409, + "step": 14218 + }, + { + "epoch": 15.03, + "learning_rate": 1.244186046511628e-05, + "loss": 0.0149, + "step": 14220 + }, + { + "epoch": 15.03, + "learning_rate": 1.2436575052854123e-05, + "loss": 0.0156, + "step": 14222 + }, + { + "epoch": 15.04, + "learning_rate": 1.2431289640591967e-05, + "loss": 0.011, + "step": 14224 + }, + { + "epoch": 15.04, + "learning_rate": 1.242600422832981e-05, + "loss": 0.0027, + "step": 14226 + }, + { + "epoch": 15.04, + "learning_rate": 1.2420718816067654e-05, + "loss": 0.0122, + "step": 14228 + }, + { + "epoch": 15.04, + "learning_rate": 1.2415433403805497e-05, + "loss": 0.0271, + "step": 14230 + }, + { + "epoch": 15.04, + "learning_rate": 1.2410147991543341e-05, + "loss": 0.0151, + "step": 14232 + }, + { + "epoch": 15.05, + "learning_rate": 1.2404862579281184e-05, + "loss": 0.0016, + "step": 14234 + }, + { + "epoch": 15.05, + "learning_rate": 1.2399577167019028e-05, + "loss": 0.0097, + "step": 14236 + }, + { + "epoch": 15.05, + "learning_rate": 1.2394291754756873e-05, + "loss": 0.0016, + "step": 14238 + }, + { + "epoch": 15.05, + "learning_rate": 1.2389006342494715e-05, + "loss": 0.0188, + "step": 14240 + }, + { + "epoch": 15.05, + "learning_rate": 1.238372093023256e-05, + "loss": 0.0048, + "step": 14242 + }, + { + "epoch": 15.06, + "learning_rate": 1.2378435517970402e-05, + "loss": 0.0289, + "step": 14244 + }, + { + "epoch": 15.06, + "learning_rate": 1.2373150105708247e-05, + "loss": 0.0386, + "step": 14246 + }, + { + "epoch": 15.06, + "learning_rate": 1.236786469344609e-05, + "loss": 0.0394, + "step": 14248 + }, + { + "epoch": 15.06, + "learning_rate": 1.2362579281183934e-05, + "loss": 0.0077, + "step": 14250 + }, + { + "epoch": 15.07, + "learning_rate": 1.2357293868921776e-05, + "loss": 0.007, + "step": 14252 + }, + { + "epoch": 15.07, + "learning_rate": 1.2352008456659619e-05, + "loss": 0.0025, + "step": 14254 + }, + { + "epoch": 15.07, + "learning_rate": 1.2346723044397463e-05, + "loss": 0.018, + "step": 14256 + }, + { + "epoch": 15.07, + "learning_rate": 1.2341437632135306e-05, + "loss": 0.0049, + "step": 14258 + }, + { + "epoch": 15.07, + "learning_rate": 1.233615221987315e-05, + "loss": 0.0545, + "step": 14260 + }, + { + "epoch": 15.08, + "learning_rate": 1.2330866807610993e-05, + "loss": 0.0179, + "step": 14262 + }, + { + "epoch": 15.08, + "learning_rate": 1.2325581395348838e-05, + "loss": 0.0084, + "step": 14264 + }, + { + "epoch": 15.08, + "learning_rate": 1.2320295983086682e-05, + "loss": 0.0726, + "step": 14266 + }, + { + "epoch": 15.08, + "learning_rate": 1.2315010570824526e-05, + "loss": 0.0107, + "step": 14268 + }, + { + "epoch": 15.08, + "learning_rate": 1.2309725158562369e-05, + "loss": 0.0246, + "step": 14270 + }, + { + "epoch": 15.09, + "learning_rate": 1.2304439746300212e-05, + "loss": 0.0071, + "step": 14272 + }, + { + "epoch": 15.09, + "learning_rate": 1.2299154334038056e-05, + "loss": 0.0165, + "step": 14274 + }, + { + "epoch": 15.09, + "learning_rate": 1.2293868921775899e-05, + "loss": 0.0012, + "step": 14276 + }, + { + "epoch": 15.09, + "learning_rate": 1.2288583509513743e-05, + "loss": 0.0039, + "step": 14278 + }, + { + "epoch": 15.1, + "learning_rate": 1.2283298097251586e-05, + "loss": 0.0154, + "step": 14280 + }, + { + "epoch": 15.1, + "learning_rate": 1.227801268498943e-05, + "loss": 0.009, + "step": 14282 + }, + { + "epoch": 15.1, + "learning_rate": 1.2272727272727273e-05, + "loss": 0.0496, + "step": 14284 + }, + { + "epoch": 15.1, + "learning_rate": 1.2267441860465115e-05, + "loss": 0.0129, + "step": 14286 + }, + { + "epoch": 15.1, + "learning_rate": 1.226215644820296e-05, + "loss": 0.0179, + "step": 14288 + }, + { + "epoch": 15.11, + "learning_rate": 1.2256871035940804e-05, + "loss": 0.0161, + "step": 14290 + }, + { + "epoch": 15.11, + "learning_rate": 1.2251585623678648e-05, + "loss": 0.0031, + "step": 14292 + }, + { + "epoch": 15.11, + "learning_rate": 1.2246300211416491e-05, + "loss": 0.0035, + "step": 14294 + }, + { + "epoch": 15.11, + "learning_rate": 1.2241014799154336e-05, + "loss": 0.0109, + "step": 14296 + }, + { + "epoch": 15.11, + "learning_rate": 1.2235729386892178e-05, + "loss": 0.0152, + "step": 14298 + }, + { + "epoch": 15.12, + "learning_rate": 1.2230443974630023e-05, + "loss": 0.0054, + "step": 14300 + }, + { + "epoch": 15.12, + "learning_rate": 1.2225158562367865e-05, + "loss": 0.0019, + "step": 14302 + }, + { + "epoch": 15.12, + "learning_rate": 1.2219873150105708e-05, + "loss": 0.0168, + "step": 14304 + }, + { + "epoch": 15.12, + "learning_rate": 1.2214587737843552e-05, + "loss": 0.01, + "step": 14306 + }, + { + "epoch": 15.12, + "learning_rate": 1.2209302325581395e-05, + "loss": 0.0634, + "step": 14308 + }, + { + "epoch": 15.13, + "learning_rate": 1.220401691331924e-05, + "loss": 0.0297, + "step": 14310 + }, + { + "epoch": 15.13, + "learning_rate": 1.2198731501057082e-05, + "loss": 0.0023, + "step": 14312 + }, + { + "epoch": 15.13, + "learning_rate": 1.2193446088794926e-05, + "loss": 0.0024, + "step": 14314 + }, + { + "epoch": 15.13, + "learning_rate": 1.218816067653277e-05, + "loss": 0.0011, + "step": 14316 + }, + { + "epoch": 15.14, + "learning_rate": 1.2182875264270615e-05, + "loss": 0.019, + "step": 14318 + }, + { + "epoch": 15.14, + "learning_rate": 1.2177589852008458e-05, + "loss": 0.014, + "step": 14320 + }, + { + "epoch": 15.14, + "learning_rate": 1.21723044397463e-05, + "loss": 0.0491, + "step": 14322 + }, + { + "epoch": 15.14, + "learning_rate": 1.2167019027484145e-05, + "loss": 0.0111, + "step": 14324 + }, + { + "epoch": 15.14, + "learning_rate": 1.2161733615221987e-05, + "loss": 0.1101, + "step": 14326 + }, + { + "epoch": 15.15, + "learning_rate": 1.2156448202959832e-05, + "loss": 0.0013, + "step": 14328 + }, + { + "epoch": 15.15, + "learning_rate": 1.2151162790697674e-05, + "loss": 0.0017, + "step": 14330 + }, + { + "epoch": 15.15, + "learning_rate": 1.2145877378435519e-05, + "loss": 0.0016, + "step": 14332 + }, + { + "epoch": 15.15, + "learning_rate": 1.2140591966173362e-05, + "loss": 0.0131, + "step": 14334 + }, + { + "epoch": 15.15, + "learning_rate": 1.2135306553911206e-05, + "loss": 0.0167, + "step": 14336 + }, + { + "epoch": 15.16, + "learning_rate": 1.2130021141649049e-05, + "loss": 0.0574, + "step": 14338 + }, + { + "epoch": 15.16, + "learning_rate": 1.2124735729386893e-05, + "loss": 0.0031, + "step": 14340 + }, + { + "epoch": 15.16, + "learning_rate": 1.2119450317124736e-05, + "loss": 0.0017, + "step": 14342 + }, + { + "epoch": 15.16, + "learning_rate": 1.211416490486258e-05, + "loss": 0.0011, + "step": 14344 + }, + { + "epoch": 15.16, + "learning_rate": 1.2108879492600424e-05, + "loss": 0.0199, + "step": 14346 + }, + { + "epoch": 15.17, + "learning_rate": 1.2103594080338267e-05, + "loss": 0.0643, + "step": 14348 + }, + { + "epoch": 15.17, + "learning_rate": 1.2098308668076111e-05, + "loss": 0.0007, + "step": 14350 + }, + { + "epoch": 15.17, + "learning_rate": 1.2093023255813954e-05, + "loss": 0.0077, + "step": 14352 + }, + { + "epoch": 15.17, + "learning_rate": 1.2087737843551798e-05, + "loss": 0.0107, + "step": 14354 + }, + { + "epoch": 15.18, + "learning_rate": 1.2082452431289641e-05, + "loss": 0.0029, + "step": 14356 + }, + { + "epoch": 15.18, + "learning_rate": 1.2077167019027484e-05, + "loss": 0.0179, + "step": 14358 + }, + { + "epoch": 15.18, + "learning_rate": 1.2071881606765328e-05, + "loss": 0.0099, + "step": 14360 + }, + { + "epoch": 15.18, + "learning_rate": 1.206659619450317e-05, + "loss": 0.0019, + "step": 14362 + }, + { + "epoch": 15.18, + "learning_rate": 1.2061310782241015e-05, + "loss": 0.0512, + "step": 14364 + }, + { + "epoch": 15.19, + "learning_rate": 1.2056025369978858e-05, + "loss": 0.0281, + "step": 14366 + }, + { + "epoch": 15.19, + "learning_rate": 1.2050739957716702e-05, + "loss": 0.0039, + "step": 14368 + }, + { + "epoch": 15.19, + "learning_rate": 1.2045454545454547e-05, + "loss": 0.0081, + "step": 14370 + }, + { + "epoch": 15.19, + "learning_rate": 1.2040169133192391e-05, + "loss": 0.038, + "step": 14372 + }, + { + "epoch": 15.19, + "learning_rate": 1.2034883720930234e-05, + "loss": 0.0053, + "step": 14374 + }, + { + "epoch": 15.2, + "learning_rate": 1.2029598308668076e-05, + "loss": 0.0346, + "step": 14376 + }, + { + "epoch": 15.2, + "learning_rate": 1.202431289640592e-05, + "loss": 0.0205, + "step": 14378 + }, + { + "epoch": 15.2, + "learning_rate": 1.2019027484143763e-05, + "loss": 0.0112, + "step": 14380 + }, + { + "epoch": 15.2, + "learning_rate": 1.2013742071881608e-05, + "loss": 0.0561, + "step": 14382 + }, + { + "epoch": 15.21, + "learning_rate": 1.200845665961945e-05, + "loss": 0.0052, + "step": 14384 + }, + { + "epoch": 15.21, + "learning_rate": 1.2003171247357295e-05, + "loss": 0.0107, + "step": 14386 + }, + { + "epoch": 15.21, + "learning_rate": 1.1997885835095137e-05, + "loss": 0.0043, + "step": 14388 + }, + { + "epoch": 15.21, + "learning_rate": 1.1992600422832982e-05, + "loss": 0.0613, + "step": 14390 + }, + { + "epoch": 15.21, + "learning_rate": 1.1987315010570824e-05, + "loss": 0.0243, + "step": 14392 + }, + { + "epoch": 15.22, + "learning_rate": 1.1982029598308669e-05, + "loss": 0.0275, + "step": 14394 + }, + { + "epoch": 15.22, + "learning_rate": 1.1976744186046513e-05, + "loss": 0.0478, + "step": 14396 + }, + { + "epoch": 15.22, + "learning_rate": 1.1971458773784356e-05, + "loss": 0.0442, + "step": 14398 + }, + { + "epoch": 15.22, + "learning_rate": 1.19661733615222e-05, + "loss": 0.0173, + "step": 14400 + }, + { + "epoch": 15.22, + "learning_rate": 1.1960887949260043e-05, + "loss": 0.082, + "step": 14402 + }, + { + "epoch": 15.23, + "learning_rate": 1.1955602536997887e-05, + "loss": 0.0412, + "step": 14404 + }, + { + "epoch": 15.23, + "learning_rate": 1.195031712473573e-05, + "loss": 0.0079, + "step": 14406 + }, + { + "epoch": 15.23, + "learning_rate": 1.1945031712473574e-05, + "loss": 0.0347, + "step": 14408 + }, + { + "epoch": 15.23, + "learning_rate": 1.1939746300211417e-05, + "loss": 0.0082, + "step": 14410 + }, + { + "epoch": 15.23, + "learning_rate": 1.193446088794926e-05, + "loss": 0.0088, + "step": 14412 + }, + { + "epoch": 15.24, + "learning_rate": 1.1929175475687104e-05, + "loss": 0.0102, + "step": 14414 + }, + { + "epoch": 15.24, + "learning_rate": 1.1923890063424947e-05, + "loss": 0.0421, + "step": 14416 + }, + { + "epoch": 15.24, + "learning_rate": 1.1918604651162791e-05, + "loss": 0.0322, + "step": 14418 + }, + { + "epoch": 15.24, + "learning_rate": 1.1913319238900635e-05, + "loss": 0.0186, + "step": 14420 + }, + { + "epoch": 15.25, + "learning_rate": 1.190803382663848e-05, + "loss": 0.0491, + "step": 14422 + }, + { + "epoch": 15.25, + "learning_rate": 1.1902748414376322e-05, + "loss": 0.0272, + "step": 14424 + }, + { + "epoch": 15.25, + "learning_rate": 1.1897463002114167e-05, + "loss": 0.0482, + "step": 14426 + }, + { + "epoch": 15.25, + "learning_rate": 1.189217758985201e-05, + "loss": 0.0494, + "step": 14428 + }, + { + "epoch": 15.25, + "learning_rate": 1.1886892177589852e-05, + "loss": 0.0158, + "step": 14430 + }, + { + "epoch": 15.26, + "learning_rate": 1.1881606765327696e-05, + "loss": 0.0274, + "step": 14432 + }, + { + "epoch": 15.26, + "learning_rate": 1.1876321353065539e-05, + "loss": 0.0167, + "step": 14434 + }, + { + "epoch": 15.26, + "learning_rate": 1.1871035940803384e-05, + "loss": 0.1149, + "step": 14436 + }, + { + "epoch": 15.26, + "learning_rate": 1.1865750528541226e-05, + "loss": 0.0095, + "step": 14438 + }, + { + "epoch": 15.26, + "learning_rate": 1.186046511627907e-05, + "loss": 0.0087, + "step": 14440 + }, + { + "epoch": 15.27, + "learning_rate": 1.1855179704016913e-05, + "loss": 0.0084, + "step": 14442 + }, + { + "epoch": 15.27, + "learning_rate": 1.1849894291754758e-05, + "loss": 0.0193, + "step": 14444 + }, + { + "epoch": 15.27, + "learning_rate": 1.18446088794926e-05, + "loss": 0.0106, + "step": 14446 + }, + { + "epoch": 15.27, + "learning_rate": 1.1839323467230445e-05, + "loss": 0.0039, + "step": 14448 + }, + { + "epoch": 15.27, + "learning_rate": 1.1834038054968289e-05, + "loss": 0.0293, + "step": 14450 + }, + { + "epoch": 15.28, + "learning_rate": 1.1828752642706132e-05, + "loss": 0.0379, + "step": 14452 + }, + { + "epoch": 15.28, + "learning_rate": 1.1823467230443976e-05, + "loss": 0.0193, + "step": 14454 + }, + { + "epoch": 15.28, + "learning_rate": 1.1818181818181819e-05, + "loss": 0.0078, + "step": 14456 + }, + { + "epoch": 15.28, + "learning_rate": 1.1812896405919663e-05, + "loss": 0.0086, + "step": 14458 + }, + { + "epoch": 15.29, + "learning_rate": 1.1807610993657506e-05, + "loss": 0.0157, + "step": 14460 + }, + { + "epoch": 15.29, + "learning_rate": 1.1802325581395348e-05, + "loss": 0.0359, + "step": 14462 + }, + { + "epoch": 15.29, + "learning_rate": 1.1797040169133193e-05, + "loss": 0.0405, + "step": 14464 + }, + { + "epoch": 15.29, + "learning_rate": 1.1791754756871035e-05, + "loss": 0.0195, + "step": 14466 + }, + { + "epoch": 15.29, + "learning_rate": 1.178646934460888e-05, + "loss": 0.0071, + "step": 14468 + }, + { + "epoch": 15.3, + "learning_rate": 1.1781183932346722e-05, + "loss": 0.0268, + "step": 14470 + }, + { + "epoch": 15.3, + "learning_rate": 1.1775898520084567e-05, + "loss": 0.0203, + "step": 14472 + }, + { + "epoch": 15.3, + "learning_rate": 1.1770613107822411e-05, + "loss": 0.0033, + "step": 14474 + }, + { + "epoch": 15.3, + "learning_rate": 1.1765327695560256e-05, + "loss": 0.0172, + "step": 14476 + }, + { + "epoch": 15.3, + "learning_rate": 1.1760042283298098e-05, + "loss": 0.007, + "step": 14478 + }, + { + "epoch": 15.31, + "learning_rate": 1.1754756871035941e-05, + "loss": 0.0289, + "step": 14480 + }, + { + "epoch": 15.31, + "learning_rate": 1.1749471458773785e-05, + "loss": 0.0525, + "step": 14482 + }, + { + "epoch": 15.31, + "learning_rate": 1.1744186046511628e-05, + "loss": 0.0071, + "step": 14484 + }, + { + "epoch": 15.31, + "learning_rate": 1.1738900634249472e-05, + "loss": 0.0152, + "step": 14486 + }, + { + "epoch": 15.32, + "learning_rate": 1.1733615221987315e-05, + "loss": 0.0126, + "step": 14488 + }, + { + "epoch": 15.32, + "learning_rate": 1.172832980972516e-05, + "loss": 0.0154, + "step": 14490 + }, + { + "epoch": 15.32, + "learning_rate": 1.1723044397463002e-05, + "loss": 0.0023, + "step": 14492 + }, + { + "epoch": 15.32, + "learning_rate": 1.1717758985200846e-05, + "loss": 0.0349, + "step": 14494 + }, + { + "epoch": 15.32, + "learning_rate": 1.1712473572938689e-05, + "loss": 0.0262, + "step": 14496 + }, + { + "epoch": 15.33, + "learning_rate": 1.1707188160676533e-05, + "loss": 0.031, + "step": 14498 + }, + { + "epoch": 15.33, + "learning_rate": 1.1701902748414378e-05, + "loss": 0.0121, + "step": 14500 + }, + { + "epoch": 15.33, + "eval_cer": 0.04895981761185523, + "eval_loss": 0.6679000854492188, + "eval_runtime": 125.3329, + "eval_samples_per_second": 6.71, + "eval_steps_per_second": 0.846, + "step": 14500 + }, + { + "epoch": 15.33, + "learning_rate": 1.169661733615222e-05, + "loss": 0.0094, + "step": 14502 + }, + { + "epoch": 15.33, + "learning_rate": 1.1691331923890065e-05, + "loss": 0.0073, + "step": 14504 + }, + { + "epoch": 15.33, + "learning_rate": 1.1686046511627907e-05, + "loss": 0.0042, + "step": 14506 + }, + { + "epoch": 15.34, + "learning_rate": 1.1680761099365752e-05, + "loss": 0.0144, + "step": 14508 + }, + { + "epoch": 15.34, + "learning_rate": 1.1675475687103595e-05, + "loss": 0.0017, + "step": 14510 + }, + { + "epoch": 15.34, + "learning_rate": 1.1670190274841439e-05, + "loss": 0.0413, + "step": 14512 + }, + { + "epoch": 15.34, + "learning_rate": 1.1664904862579282e-05, + "loss": 0.0259, + "step": 14514 + }, + { + "epoch": 15.34, + "learning_rate": 1.1659619450317124e-05, + "loss": 0.0061, + "step": 14516 + }, + { + "epoch": 15.35, + "learning_rate": 1.1654334038054969e-05, + "loss": 0.0018, + "step": 14518 + }, + { + "epoch": 15.35, + "learning_rate": 1.1649048625792811e-05, + "loss": 0.0007, + "step": 14520 + }, + { + "epoch": 15.35, + "learning_rate": 1.1643763213530656e-05, + "loss": 0.0474, + "step": 14522 + }, + { + "epoch": 15.35, + "learning_rate": 1.16384778012685e-05, + "loss": 0.0409, + "step": 14524 + }, + { + "epoch": 15.36, + "learning_rate": 1.1633192389006343e-05, + "loss": 0.0263, + "step": 14526 + }, + { + "epoch": 15.36, + "learning_rate": 1.1627906976744187e-05, + "loss": 0.0323, + "step": 14528 + }, + { + "epoch": 15.36, + "learning_rate": 1.1622621564482031e-05, + "loss": 0.0008, + "step": 14530 + }, + { + "epoch": 15.36, + "learning_rate": 1.1617336152219874e-05, + "loss": 0.0061, + "step": 14532 + }, + { + "epoch": 15.36, + "learning_rate": 1.1612050739957717e-05, + "loss": 0.0196, + "step": 14534 + }, + { + "epoch": 15.37, + "learning_rate": 1.1606765327695561e-05, + "loss": 0.0093, + "step": 14536 + }, + { + "epoch": 15.37, + "learning_rate": 1.1601479915433404e-05, + "loss": 0.0166, + "step": 14538 + }, + { + "epoch": 15.37, + "learning_rate": 1.1596194503171248e-05, + "loss": 0.0296, + "step": 14540 + }, + { + "epoch": 15.37, + "learning_rate": 1.159090909090909e-05, + "loss": 0.0298, + "step": 14542 + }, + { + "epoch": 15.37, + "learning_rate": 1.1585623678646935e-05, + "loss": 0.0354, + "step": 14544 + }, + { + "epoch": 15.38, + "learning_rate": 1.1580338266384778e-05, + "loss": 0.0048, + "step": 14546 + }, + { + "epoch": 15.38, + "learning_rate": 1.1575052854122622e-05, + "loss": 0.0014, + "step": 14548 + }, + { + "epoch": 15.38, + "learning_rate": 1.1569767441860465e-05, + "loss": 0.0582, + "step": 14550 + }, + { + "epoch": 15.38, + "learning_rate": 1.156448202959831e-05, + "loss": 0.0271, + "step": 14552 + }, + { + "epoch": 15.38, + "learning_rate": 1.1559196617336154e-05, + "loss": 0.0263, + "step": 14554 + }, + { + "epoch": 15.39, + "learning_rate": 1.1553911205073996e-05, + "loss": 0.029, + "step": 14556 + }, + { + "epoch": 15.39, + "learning_rate": 1.154862579281184e-05, + "loss": 0.0028, + "step": 14558 + }, + { + "epoch": 15.39, + "learning_rate": 1.1543340380549683e-05, + "loss": 0.0368, + "step": 14560 + }, + { + "epoch": 15.39, + "learning_rate": 1.1538054968287528e-05, + "loss": 0.0295, + "step": 14562 + }, + { + "epoch": 15.4, + "learning_rate": 1.153276955602537e-05, + "loss": 0.0139, + "step": 14564 + }, + { + "epoch": 15.4, + "learning_rate": 1.1527484143763215e-05, + "loss": 0.0055, + "step": 14566 + }, + { + "epoch": 15.4, + "learning_rate": 1.1522198731501057e-05, + "loss": 0.0277, + "step": 14568 + }, + { + "epoch": 15.4, + "learning_rate": 1.15169133192389e-05, + "loss": 0.0919, + "step": 14570 + }, + { + "epoch": 15.4, + "learning_rate": 1.1511627906976744e-05, + "loss": 0.033, + "step": 14572 + }, + { + "epoch": 15.41, + "learning_rate": 1.1506342494714587e-05, + "loss": 0.0078, + "step": 14574 + }, + { + "epoch": 15.41, + "learning_rate": 1.1501057082452431e-05, + "loss": 0.0171, + "step": 14576 + }, + { + "epoch": 15.41, + "learning_rate": 1.1495771670190276e-05, + "loss": 0.0377, + "step": 14578 + }, + { + "epoch": 15.41, + "learning_rate": 1.149048625792812e-05, + "loss": 0.0205, + "step": 14580 + }, + { + "epoch": 15.41, + "learning_rate": 1.1485200845665963e-05, + "loss": 0.0259, + "step": 14582 + }, + { + "epoch": 15.42, + "learning_rate": 1.1479915433403807e-05, + "loss": 0.0213, + "step": 14584 + }, + { + "epoch": 15.42, + "learning_rate": 1.147463002114165e-05, + "loss": 0.0033, + "step": 14586 + }, + { + "epoch": 15.42, + "learning_rate": 1.1469344608879493e-05, + "loss": 0.0379, + "step": 14588 + }, + { + "epoch": 15.42, + "learning_rate": 1.1464059196617337e-05, + "loss": 0.0388, + "step": 14590 + }, + { + "epoch": 15.42, + "learning_rate": 1.145877378435518e-05, + "loss": 0.0007, + "step": 14592 + }, + { + "epoch": 15.43, + "learning_rate": 1.1453488372093024e-05, + "loss": 0.0107, + "step": 14594 + }, + { + "epoch": 15.43, + "learning_rate": 1.1448202959830867e-05, + "loss": 0.018, + "step": 14596 + }, + { + "epoch": 15.43, + "learning_rate": 1.1442917547568711e-05, + "loss": 0.0355, + "step": 14598 + }, + { + "epoch": 15.43, + "learning_rate": 1.1437632135306554e-05, + "loss": 0.0101, + "step": 14600 + }, + { + "epoch": 15.44, + "learning_rate": 1.1432346723044398e-05, + "loss": 0.0064, + "step": 14602 + }, + { + "epoch": 15.44, + "learning_rate": 1.1427061310782242e-05, + "loss": 0.0077, + "step": 14604 + }, + { + "epoch": 15.44, + "learning_rate": 1.1421775898520085e-05, + "loss": 0.003, + "step": 14606 + }, + { + "epoch": 15.44, + "learning_rate": 1.141649048625793e-05, + "loss": 0.0049, + "step": 14608 + }, + { + "epoch": 15.44, + "learning_rate": 1.1411205073995772e-05, + "loss": 0.0184, + "step": 14610 + }, + { + "epoch": 15.45, + "learning_rate": 1.1405919661733616e-05, + "loss": 0.0231, + "step": 14612 + }, + { + "epoch": 15.45, + "learning_rate": 1.140063424947146e-05, + "loss": 0.0215, + "step": 14614 + }, + { + "epoch": 15.45, + "learning_rate": 1.1395348837209304e-05, + "loss": 0.0232, + "step": 14616 + }, + { + "epoch": 15.45, + "learning_rate": 1.1390063424947146e-05, + "loss": 0.0191, + "step": 14618 + }, + { + "epoch": 15.45, + "learning_rate": 1.138477801268499e-05, + "loss": 0.0127, + "step": 14620 + }, + { + "epoch": 15.46, + "learning_rate": 1.1379492600422833e-05, + "loss": 0.031, + "step": 14622 + }, + { + "epoch": 15.46, + "learning_rate": 1.1374207188160676e-05, + "loss": 0.0854, + "step": 14624 + }, + { + "epoch": 15.46, + "learning_rate": 1.136892177589852e-05, + "loss": 0.0194, + "step": 14626 + }, + { + "epoch": 15.46, + "learning_rate": 1.1363636363636365e-05, + "loss": 0.0172, + "step": 14628 + }, + { + "epoch": 15.47, + "learning_rate": 1.1358350951374207e-05, + "loss": 0.0028, + "step": 14630 + }, + { + "epoch": 15.47, + "learning_rate": 1.1353065539112052e-05, + "loss": 0.0134, + "step": 14632 + }, + { + "epoch": 15.47, + "learning_rate": 1.1347780126849896e-05, + "loss": 0.0056, + "step": 14634 + }, + { + "epoch": 15.47, + "learning_rate": 1.1342494714587739e-05, + "loss": 0.0076, + "step": 14636 + }, + { + "epoch": 15.47, + "learning_rate": 1.1337209302325581e-05, + "loss": 0.0103, + "step": 14638 + }, + { + "epoch": 15.48, + "learning_rate": 1.1331923890063426e-05, + "loss": 0.0054, + "step": 14640 + }, + { + "epoch": 15.48, + "learning_rate": 1.1326638477801268e-05, + "loss": 0.0255, + "step": 14642 + }, + { + "epoch": 15.48, + "learning_rate": 1.1321353065539113e-05, + "loss": 0.0609, + "step": 14644 + }, + { + "epoch": 15.48, + "learning_rate": 1.1316067653276955e-05, + "loss": 0.0092, + "step": 14646 + }, + { + "epoch": 15.48, + "learning_rate": 1.13107822410148e-05, + "loss": 0.013, + "step": 14648 + }, + { + "epoch": 15.49, + "learning_rate": 1.1305496828752642e-05, + "loss": 0.024, + "step": 14650 + }, + { + "epoch": 15.49, + "learning_rate": 1.1300211416490487e-05, + "loss": 0.0104, + "step": 14652 + }, + { + "epoch": 15.49, + "learning_rate": 1.129492600422833e-05, + "loss": 0.0106, + "step": 14654 + }, + { + "epoch": 15.49, + "learning_rate": 1.1289640591966174e-05, + "loss": 0.0158, + "step": 14656 + }, + { + "epoch": 15.49, + "learning_rate": 1.1284355179704018e-05, + "loss": 0.0783, + "step": 14658 + }, + { + "epoch": 15.5, + "learning_rate": 1.1279069767441861e-05, + "loss": 0.0025, + "step": 14660 + }, + { + "epoch": 15.5, + "learning_rate": 1.1273784355179705e-05, + "loss": 0.055, + "step": 14662 + }, + { + "epoch": 15.5, + "learning_rate": 1.1268498942917548e-05, + "loss": 0.0038, + "step": 14664 + }, + { + "epoch": 15.5, + "learning_rate": 1.1263213530655392e-05, + "loss": 0.0024, + "step": 14666 + }, + { + "epoch": 15.51, + "learning_rate": 1.1257928118393235e-05, + "loss": 0.0337, + "step": 14668 + }, + { + "epoch": 15.51, + "learning_rate": 1.125264270613108e-05, + "loss": 0.0129, + "step": 14670 + }, + { + "epoch": 15.51, + "learning_rate": 1.1247357293868922e-05, + "loss": 0.0065, + "step": 14672 + }, + { + "epoch": 15.51, + "learning_rate": 1.1242071881606765e-05, + "loss": 0.0291, + "step": 14674 + }, + { + "epoch": 15.51, + "learning_rate": 1.1236786469344609e-05, + "loss": 0.0127, + "step": 14676 + }, + { + "epoch": 15.52, + "learning_rate": 1.1231501057082452e-05, + "loss": 0.0149, + "step": 14678 + }, + { + "epoch": 15.52, + "learning_rate": 1.1226215644820296e-05, + "loss": 0.0389, + "step": 14680 + }, + { + "epoch": 15.52, + "learning_rate": 1.122093023255814e-05, + "loss": 0.0657, + "step": 14682 + }, + { + "epoch": 15.52, + "learning_rate": 1.1215644820295985e-05, + "loss": 0.0156, + "step": 14684 + }, + { + "epoch": 15.52, + "learning_rate": 1.1210359408033828e-05, + "loss": 0.0137, + "step": 14686 + }, + { + "epoch": 15.53, + "learning_rate": 1.1205073995771672e-05, + "loss": 0.0564, + "step": 14688 + }, + { + "epoch": 15.53, + "learning_rate": 1.1199788583509515e-05, + "loss": 0.0165, + "step": 14690 + }, + { + "epoch": 15.53, + "learning_rate": 1.1194503171247357e-05, + "loss": 0.0152, + "step": 14692 + }, + { + "epoch": 15.53, + "learning_rate": 1.1189217758985202e-05, + "loss": 0.0335, + "step": 14694 + }, + { + "epoch": 15.53, + "learning_rate": 1.1183932346723044e-05, + "loss": 0.0033, + "step": 14696 + }, + { + "epoch": 15.54, + "learning_rate": 1.1178646934460889e-05, + "loss": 0.0171, + "step": 14698 + }, + { + "epoch": 15.54, + "learning_rate": 1.1173361522198731e-05, + "loss": 0.0164, + "step": 14700 + }, + { + "epoch": 15.54, + "learning_rate": 1.1168076109936576e-05, + "loss": 0.0208, + "step": 14702 + }, + { + "epoch": 15.54, + "learning_rate": 1.1162790697674418e-05, + "loss": 0.0062, + "step": 14704 + }, + { + "epoch": 15.55, + "learning_rate": 1.1157505285412263e-05, + "loss": 0.0031, + "step": 14706 + }, + { + "epoch": 15.55, + "learning_rate": 1.1152219873150107e-05, + "loss": 0.0039, + "step": 14708 + }, + { + "epoch": 15.55, + "learning_rate": 1.114693446088795e-05, + "loss": 0.04, + "step": 14710 + }, + { + "epoch": 15.55, + "learning_rate": 1.1141649048625794e-05, + "loss": 0.0146, + "step": 14712 + }, + { + "epoch": 15.55, + "learning_rate": 1.1136363636363637e-05, + "loss": 0.0118, + "step": 14714 + }, + { + "epoch": 15.56, + "learning_rate": 1.1131078224101481e-05, + "loss": 0.001, + "step": 14716 + }, + { + "epoch": 15.56, + "learning_rate": 1.1125792811839324e-05, + "loss": 0.0444, + "step": 14718 + }, + { + "epoch": 15.56, + "learning_rate": 1.1120507399577168e-05, + "loss": 0.0169, + "step": 14720 + }, + { + "epoch": 15.56, + "learning_rate": 1.1115221987315011e-05, + "loss": 0.0395, + "step": 14722 + }, + { + "epoch": 15.56, + "learning_rate": 1.1109936575052855e-05, + "loss": 0.0146, + "step": 14724 + }, + { + "epoch": 15.57, + "learning_rate": 1.1104651162790698e-05, + "loss": 0.0048, + "step": 14726 + }, + { + "epoch": 15.57, + "learning_rate": 1.109936575052854e-05, + "loss": 0.0195, + "step": 14728 + }, + { + "epoch": 15.57, + "learning_rate": 1.1094080338266385e-05, + "loss": 0.0036, + "step": 14730 + }, + { + "epoch": 15.57, + "learning_rate": 1.1088794926004228e-05, + "loss": 0.0068, + "step": 14732 + }, + { + "epoch": 15.58, + "learning_rate": 1.1083509513742072e-05, + "loss": 0.0082, + "step": 14734 + }, + { + "epoch": 15.58, + "learning_rate": 1.1078224101479916e-05, + "loss": 0.0098, + "step": 14736 + }, + { + "epoch": 15.58, + "learning_rate": 1.107293868921776e-05, + "loss": 0.004, + "step": 14738 + }, + { + "epoch": 15.58, + "learning_rate": 1.1067653276955603e-05, + "loss": 0.0357, + "step": 14740 + }, + { + "epoch": 15.58, + "learning_rate": 1.1062367864693448e-05, + "loss": 0.0158, + "step": 14742 + }, + { + "epoch": 15.59, + "learning_rate": 1.105708245243129e-05, + "loss": 0.0059, + "step": 14744 + }, + { + "epoch": 15.59, + "learning_rate": 1.1051797040169133e-05, + "loss": 0.0125, + "step": 14746 + }, + { + "epoch": 15.59, + "learning_rate": 1.1046511627906977e-05, + "loss": 0.0443, + "step": 14748 + }, + { + "epoch": 15.59, + "learning_rate": 1.104122621564482e-05, + "loss": 0.0017, + "step": 14750 + }, + { + "epoch": 15.59, + "learning_rate": 1.1035940803382664e-05, + "loss": 0.062, + "step": 14752 + }, + { + "epoch": 15.6, + "learning_rate": 1.1030655391120507e-05, + "loss": 0.006, + "step": 14754 + }, + { + "epoch": 15.6, + "learning_rate": 1.1025369978858352e-05, + "loss": 0.0143, + "step": 14756 + }, + { + "epoch": 15.6, + "learning_rate": 1.1020084566596194e-05, + "loss": 0.0055, + "step": 14758 + }, + { + "epoch": 15.6, + "learning_rate": 1.1014799154334039e-05, + "loss": 0.0177, + "step": 14760 + }, + { + "epoch": 15.6, + "learning_rate": 1.1009513742071883e-05, + "loss": 0.0077, + "step": 14762 + }, + { + "epoch": 15.61, + "learning_rate": 1.1004228329809726e-05, + "loss": 0.0037, + "step": 14764 + }, + { + "epoch": 15.61, + "learning_rate": 1.099894291754757e-05, + "loss": 0.0334, + "step": 14766 + }, + { + "epoch": 15.61, + "learning_rate": 1.0993657505285413e-05, + "loss": 0.0067, + "step": 14768 + }, + { + "epoch": 15.61, + "learning_rate": 1.0988372093023257e-05, + "loss": 0.0158, + "step": 14770 + }, + { + "epoch": 15.62, + "learning_rate": 1.09830866807611e-05, + "loss": 0.0067, + "step": 14772 + }, + { + "epoch": 15.62, + "learning_rate": 1.0977801268498944e-05, + "loss": 0.0298, + "step": 14774 + }, + { + "epoch": 15.62, + "learning_rate": 1.0972515856236787e-05, + "loss": 0.0008, + "step": 14776 + }, + { + "epoch": 15.62, + "learning_rate": 1.0967230443974631e-05, + "loss": 0.0064, + "step": 14778 + }, + { + "epoch": 15.62, + "learning_rate": 1.0961945031712474e-05, + "loss": 0.0055, + "step": 14780 + }, + { + "epoch": 15.63, + "learning_rate": 1.0956659619450316e-05, + "loss": 0.0701, + "step": 14782 + }, + { + "epoch": 15.63, + "learning_rate": 1.095137420718816e-05, + "loss": 0.0278, + "step": 14784 + }, + { + "epoch": 15.63, + "learning_rate": 1.0946088794926005e-05, + "loss": 0.0189, + "step": 14786 + }, + { + "epoch": 15.63, + "learning_rate": 1.094080338266385e-05, + "loss": 0.0136, + "step": 14788 + }, + { + "epoch": 15.63, + "learning_rate": 1.0935517970401692e-05, + "loss": 0.0049, + "step": 14790 + }, + { + "epoch": 15.64, + "learning_rate": 1.0930232558139537e-05, + "loss": 0.0078, + "step": 14792 + }, + { + "epoch": 15.64, + "learning_rate": 1.092494714587738e-05, + "loss": 0.0471, + "step": 14794 + }, + { + "epoch": 15.64, + "learning_rate": 1.0919661733615222e-05, + "loss": 0.0093, + "step": 14796 + }, + { + "epoch": 15.64, + "learning_rate": 1.0914376321353066e-05, + "loss": 0.0332, + "step": 14798 + }, + { + "epoch": 15.64, + "learning_rate": 1.0909090909090909e-05, + "loss": 0.046, + "step": 14800 + }, + { + "epoch": 15.65, + "learning_rate": 1.0903805496828753e-05, + "loss": 0.0186, + "step": 14802 + }, + { + "epoch": 15.65, + "learning_rate": 1.0898520084566596e-05, + "loss": 0.0015, + "step": 14804 + }, + { + "epoch": 15.65, + "learning_rate": 1.089323467230444e-05, + "loss": 0.0335, + "step": 14806 + }, + { + "epoch": 15.65, + "learning_rate": 1.0887949260042283e-05, + "loss": 0.0217, + "step": 14808 + }, + { + "epoch": 15.66, + "learning_rate": 1.0882663847780127e-05, + "loss": 0.0014, + "step": 14810 + }, + { + "epoch": 15.66, + "learning_rate": 1.0877378435517972e-05, + "loss": 0.0465, + "step": 14812 + }, + { + "epoch": 15.66, + "learning_rate": 1.0872093023255814e-05, + "loss": 0.0107, + "step": 14814 + }, + { + "epoch": 15.66, + "learning_rate": 1.0866807610993659e-05, + "loss": 0.0903, + "step": 14816 + }, + { + "epoch": 15.66, + "learning_rate": 1.0861522198731501e-05, + "loss": 0.0038, + "step": 14818 + }, + { + "epoch": 15.67, + "learning_rate": 1.0856236786469346e-05, + "loss": 0.0018, + "step": 14820 + }, + { + "epoch": 15.67, + "learning_rate": 1.0850951374207188e-05, + "loss": 0.0343, + "step": 14822 + }, + { + "epoch": 15.67, + "learning_rate": 1.0845665961945033e-05, + "loss": 0.0074, + "step": 14824 + }, + { + "epoch": 15.67, + "learning_rate": 1.0840380549682875e-05, + "loss": 0.0257, + "step": 14826 + }, + { + "epoch": 15.67, + "learning_rate": 1.083509513742072e-05, + "loss": 0.0017, + "step": 14828 + }, + { + "epoch": 15.68, + "learning_rate": 1.0829809725158563e-05, + "loss": 0.0189, + "step": 14830 + }, + { + "epoch": 15.68, + "learning_rate": 1.0824524312896405e-05, + "loss": 0.0674, + "step": 14832 + }, + { + "epoch": 15.68, + "learning_rate": 1.081923890063425e-05, + "loss": 0.012, + "step": 14834 + }, + { + "epoch": 15.68, + "learning_rate": 1.0813953488372092e-05, + "loss": 0.034, + "step": 14836 + }, + { + "epoch": 15.68, + "learning_rate": 1.0808668076109937e-05, + "loss": 0.0246, + "step": 14838 + }, + { + "epoch": 15.69, + "learning_rate": 1.0803382663847781e-05, + "loss": 0.0021, + "step": 14840 + }, + { + "epoch": 15.69, + "learning_rate": 1.0798097251585625e-05, + "loss": 0.0245, + "step": 14842 + }, + { + "epoch": 15.69, + "learning_rate": 1.0792811839323468e-05, + "loss": 0.0014, + "step": 14844 + }, + { + "epoch": 15.69, + "learning_rate": 1.0787526427061312e-05, + "loss": 0.0155, + "step": 14846 + }, + { + "epoch": 15.7, + "learning_rate": 1.0782241014799155e-05, + "loss": 0.0208, + "step": 14848 + }, + { + "epoch": 15.7, + "learning_rate": 1.0776955602536998e-05, + "loss": 0.0056, + "step": 14850 + }, + { + "epoch": 15.7, + "learning_rate": 1.0771670190274842e-05, + "loss": 0.0019, + "step": 14852 + }, + { + "epoch": 15.7, + "learning_rate": 1.0766384778012685e-05, + "loss": 0.031, + "step": 14854 + }, + { + "epoch": 15.7, + "learning_rate": 1.0761099365750529e-05, + "loss": 0.0575, + "step": 14856 + }, + { + "epoch": 15.71, + "learning_rate": 1.0755813953488372e-05, + "loss": 0.0129, + "step": 14858 + }, + { + "epoch": 15.71, + "learning_rate": 1.0750528541226216e-05, + "loss": 0.0061, + "step": 14860 + }, + { + "epoch": 15.71, + "learning_rate": 1.0745243128964059e-05, + "loss": 0.0038, + "step": 14862 + }, + { + "epoch": 15.71, + "learning_rate": 1.0739957716701903e-05, + "loss": 0.0207, + "step": 14864 + }, + { + "epoch": 15.71, + "learning_rate": 1.0734672304439748e-05, + "loss": 0.0242, + "step": 14866 + }, + { + "epoch": 15.72, + "learning_rate": 1.072938689217759e-05, + "loss": 0.0162, + "step": 14868 + }, + { + "epoch": 15.72, + "learning_rate": 1.0724101479915435e-05, + "loss": 0.0286, + "step": 14870 + }, + { + "epoch": 15.72, + "learning_rate": 1.0718816067653277e-05, + "loss": 0.0316, + "step": 14872 + }, + { + "epoch": 15.72, + "learning_rate": 1.0713530655391122e-05, + "loss": 0.0073, + "step": 14874 + }, + { + "epoch": 15.73, + "learning_rate": 1.0708245243128964e-05, + "loss": 0.0109, + "step": 14876 + }, + { + "epoch": 15.73, + "learning_rate": 1.0702959830866809e-05, + "loss": 0.0047, + "step": 14878 + }, + { + "epoch": 15.73, + "learning_rate": 1.0697674418604651e-05, + "loss": 0.0354, + "step": 14880 + }, + { + "epoch": 15.73, + "learning_rate": 1.0692389006342496e-05, + "loss": 0.0226, + "step": 14882 + }, + { + "epoch": 15.73, + "learning_rate": 1.0687103594080338e-05, + "loss": 0.017, + "step": 14884 + }, + { + "epoch": 15.74, + "learning_rate": 1.0681818181818181e-05, + "loss": 0.003, + "step": 14886 + }, + { + "epoch": 15.74, + "learning_rate": 1.0676532769556025e-05, + "loss": 0.0335, + "step": 14888 + }, + { + "epoch": 15.74, + "learning_rate": 1.067124735729387e-05, + "loss": 0.0027, + "step": 14890 + }, + { + "epoch": 15.74, + "learning_rate": 1.0665961945031714e-05, + "loss": 0.0122, + "step": 14892 + }, + { + "epoch": 15.74, + "learning_rate": 1.0660676532769557e-05, + "loss": 0.0095, + "step": 14894 + }, + { + "epoch": 15.75, + "learning_rate": 1.0655391120507401e-05, + "loss": 0.0042, + "step": 14896 + }, + { + "epoch": 15.75, + "learning_rate": 1.0652748414376323e-05, + "loss": 0.1013, + "step": 14898 + }, + { + "epoch": 15.75, + "learning_rate": 1.0647463002114165e-05, + "loss": 0.0157, + "step": 14900 + }, + { + "epoch": 15.75, + "learning_rate": 1.064217758985201e-05, + "loss": 0.0084, + "step": 14902 + }, + { + "epoch": 15.75, + "learning_rate": 1.0636892177589852e-05, + "loss": 0.0251, + "step": 14904 + }, + { + "epoch": 15.76, + "learning_rate": 1.0631606765327697e-05, + "loss": 0.0067, + "step": 14906 + }, + { + "epoch": 15.76, + "learning_rate": 1.062632135306554e-05, + "loss": 0.0134, + "step": 14908 + }, + { + "epoch": 15.76, + "learning_rate": 1.0621035940803384e-05, + "loss": 0.0021, + "step": 14910 + }, + { + "epoch": 15.76, + "learning_rate": 1.0615750528541226e-05, + "loss": 0.0048, + "step": 14912 + }, + { + "epoch": 15.77, + "learning_rate": 1.0610465116279069e-05, + "loss": 0.0014, + "step": 14914 + }, + { + "epoch": 15.77, + "learning_rate": 1.0605179704016913e-05, + "loss": 0.0111, + "step": 14916 + }, + { + "epoch": 15.77, + "learning_rate": 1.0599894291754758e-05, + "loss": 0.0058, + "step": 14918 + }, + { + "epoch": 15.77, + "learning_rate": 1.0594608879492602e-05, + "loss": 0.0129, + "step": 14920 + }, + { + "epoch": 15.77, + "learning_rate": 1.0589323467230445e-05, + "loss": 0.0626, + "step": 14922 + }, + { + "epoch": 15.78, + "learning_rate": 1.0584038054968289e-05, + "loss": 0.0111, + "step": 14924 + }, + { + "epoch": 15.78, + "learning_rate": 1.0578752642706132e-05, + "loss": 0.0184, + "step": 14926 + }, + { + "epoch": 15.78, + "learning_rate": 1.0573467230443976e-05, + "loss": 0.0208, + "step": 14928 + }, + { + "epoch": 15.78, + "learning_rate": 1.0568181818181819e-05, + "loss": 0.0076, + "step": 14930 + }, + { + "epoch": 15.78, + "learning_rate": 1.0562896405919661e-05, + "loss": 0.0395, + "step": 14932 + }, + { + "epoch": 15.79, + "learning_rate": 1.0557610993657506e-05, + "loss": 0.0079, + "step": 14934 + }, + { + "epoch": 15.79, + "learning_rate": 1.0552325581395349e-05, + "loss": 0.0393, + "step": 14936 + }, + { + "epoch": 15.79, + "learning_rate": 1.0547040169133193e-05, + "loss": 0.0174, + "step": 14938 + }, + { + "epoch": 15.79, + "learning_rate": 1.0541754756871036e-05, + "loss": 0.0146, + "step": 14940 + }, + { + "epoch": 15.79, + "learning_rate": 1.053646934460888e-05, + "loss": 0.0084, + "step": 14942 + }, + { + "epoch": 15.8, + "learning_rate": 1.0531183932346724e-05, + "loss": 0.0514, + "step": 14944 + }, + { + "epoch": 15.8, + "learning_rate": 1.0525898520084567e-05, + "loss": 0.0202, + "step": 14946 + }, + { + "epoch": 15.8, + "learning_rate": 1.0520613107822411e-05, + "loss": 0.011, + "step": 14948 + }, + { + "epoch": 15.8, + "learning_rate": 1.0515327695560254e-05, + "loss": 0.0199, + "step": 14950 + }, + { + "epoch": 15.81, + "learning_rate": 1.0510042283298098e-05, + "loss": 0.0292, + "step": 14952 + }, + { + "epoch": 15.81, + "learning_rate": 1.0504756871035941e-05, + "loss": 0.0477, + "step": 14954 + }, + { + "epoch": 15.81, + "learning_rate": 1.0499471458773785e-05, + "loss": 0.0244, + "step": 14956 + }, + { + "epoch": 15.81, + "learning_rate": 1.0494186046511628e-05, + "loss": 0.0155, + "step": 14958 + }, + { + "epoch": 15.81, + "learning_rate": 1.0488900634249472e-05, + "loss": 0.029, + "step": 14960 + }, + { + "epoch": 15.82, + "learning_rate": 1.0483615221987315e-05, + "loss": 0.0276, + "step": 14962 + }, + { + "epoch": 15.82, + "learning_rate": 1.0478329809725158e-05, + "loss": 0.0275, + "step": 14964 + }, + { + "epoch": 15.82, + "learning_rate": 1.0473044397463002e-05, + "loss": 0.0165, + "step": 14966 + }, + { + "epoch": 15.82, + "learning_rate": 1.0467758985200845e-05, + "loss": 0.0428, + "step": 14968 + }, + { + "epoch": 15.82, + "learning_rate": 1.046247357293869e-05, + "loss": 0.0454, + "step": 14970 + }, + { + "epoch": 15.83, + "learning_rate": 1.0457188160676534e-05, + "loss": 0.1425, + "step": 14972 + }, + { + "epoch": 15.83, + "learning_rate": 1.0451902748414378e-05, + "loss": 0.0476, + "step": 14974 + }, + { + "epoch": 15.83, + "learning_rate": 1.044661733615222e-05, + "loss": 0.0332, + "step": 14976 + }, + { + "epoch": 15.83, + "learning_rate": 1.0441331923890065e-05, + "loss": 0.0403, + "step": 14978 + }, + { + "epoch": 15.84, + "learning_rate": 1.0436046511627908e-05, + "loss": 0.0198, + "step": 14980 + }, + { + "epoch": 15.84, + "learning_rate": 1.043076109936575e-05, + "loss": 0.0125, + "step": 14982 + }, + { + "epoch": 15.84, + "learning_rate": 1.0425475687103595e-05, + "loss": 0.0084, + "step": 14984 + }, + { + "epoch": 15.84, + "learning_rate": 1.0420190274841437e-05, + "loss": 0.0013, + "step": 14986 + }, + { + "epoch": 15.84, + "learning_rate": 1.0414904862579282e-05, + "loss": 0.0103, + "step": 14988 + }, + { + "epoch": 15.85, + "learning_rate": 1.0409619450317124e-05, + "loss": 0.0311, + "step": 14990 + }, + { + "epoch": 15.85, + "learning_rate": 1.0404334038054969e-05, + "loss": 0.0204, + "step": 14992 + }, + { + "epoch": 15.85, + "learning_rate": 1.0399048625792811e-05, + "loss": 0.0037, + "step": 14994 + }, + { + "epoch": 15.85, + "learning_rate": 1.0393763213530656e-05, + "loss": 0.0013, + "step": 14996 + }, + { + "epoch": 15.85, + "learning_rate": 1.03884778012685e-05, + "loss": 0.0351, + "step": 14998 + }, + { + "epoch": 15.86, + "learning_rate": 1.0383192389006343e-05, + "loss": 0.0054, + "step": 15000 + }, + { + "epoch": 15.86, + "eval_cer": 0.0335138216015959, + "eval_loss": 0.8025035262107849, + "eval_runtime": 125.5761, + "eval_samples_per_second": 6.697, + "eval_steps_per_second": 0.844, + "step": 15000 + }, + { + "epoch": 15.86, + "learning_rate": 1.0377906976744187e-05, + "loss": 0.0153, + "step": 15002 + }, + { + "epoch": 15.86, + "learning_rate": 1.037262156448203e-05, + "loss": 0.0195, + "step": 15004 + }, + { + "epoch": 15.86, + "learning_rate": 1.0367336152219874e-05, + "loss": 0.0055, + "step": 15006 + }, + { + "epoch": 15.86, + "learning_rate": 1.0362050739957717e-05, + "loss": 0.0015, + "step": 15008 + }, + { + "epoch": 15.87, + "learning_rate": 1.0356765327695561e-05, + "loss": 0.0036, + "step": 15010 + }, + { + "epoch": 15.87, + "learning_rate": 1.0351479915433404e-05, + "loss": 0.0304, + "step": 15012 + }, + { + "epoch": 15.87, + "learning_rate": 1.0346194503171248e-05, + "loss": 0.021, + "step": 15014 + }, + { + "epoch": 15.87, + "learning_rate": 1.0340909090909091e-05, + "loss": 0.0065, + "step": 15016 + }, + { + "epoch": 15.88, + "learning_rate": 1.0335623678646934e-05, + "loss": 0.006, + "step": 15018 + }, + { + "epoch": 15.88, + "learning_rate": 1.0330338266384778e-05, + "loss": 0.0214, + "step": 15020 + }, + { + "epoch": 15.88, + "learning_rate": 1.0325052854122622e-05, + "loss": 0.0925, + "step": 15022 + }, + { + "epoch": 15.88, + "learning_rate": 1.0319767441860467e-05, + "loss": 0.0072, + "step": 15024 + }, + { + "epoch": 15.88, + "learning_rate": 1.031448202959831e-05, + "loss": 0.0101, + "step": 15026 + }, + { + "epoch": 15.89, + "learning_rate": 1.0309196617336154e-05, + "loss": 0.0601, + "step": 15028 + }, + { + "epoch": 15.89, + "learning_rate": 1.0303911205073996e-05, + "loss": 0.0107, + "step": 15030 + }, + { + "epoch": 15.89, + "learning_rate": 1.029862579281184e-05, + "loss": 0.0073, + "step": 15032 + }, + { + "epoch": 15.89, + "learning_rate": 1.0293340380549683e-05, + "loss": 0.0511, + "step": 15034 + }, + { + "epoch": 15.89, + "learning_rate": 1.0288054968287526e-05, + "loss": 0.0102, + "step": 15036 + }, + { + "epoch": 15.9, + "learning_rate": 1.028276955602537e-05, + "loss": 0.0081, + "step": 15038 + }, + { + "epoch": 15.9, + "learning_rate": 1.0277484143763213e-05, + "loss": 0.0111, + "step": 15040 + }, + { + "epoch": 15.9, + "learning_rate": 1.0272198731501058e-05, + "loss": 0.0022, + "step": 15042 + }, + { + "epoch": 15.9, + "learning_rate": 1.02669133192389e-05, + "loss": 0.0098, + "step": 15044 + }, + { + "epoch": 15.9, + "learning_rate": 1.0261627906976745e-05, + "loss": 0.014, + "step": 15046 + }, + { + "epoch": 15.91, + "learning_rate": 1.0256342494714589e-05, + "loss": 0.0183, + "step": 15048 + }, + { + "epoch": 15.91, + "learning_rate": 1.0251057082452432e-05, + "loss": 0.004, + "step": 15050 + }, + { + "epoch": 15.91, + "learning_rate": 1.0245771670190276e-05, + "loss": 0.0045, + "step": 15052 + }, + { + "epoch": 15.91, + "learning_rate": 1.0240486257928119e-05, + "loss": 0.0175, + "step": 15054 + }, + { + "epoch": 15.92, + "learning_rate": 1.0235200845665963e-05, + "loss": 0.0855, + "step": 15056 + }, + { + "epoch": 15.92, + "learning_rate": 1.0229915433403806e-05, + "loss": 0.0145, + "step": 15058 + }, + { + "epoch": 15.92, + "learning_rate": 1.022463002114165e-05, + "loss": 0.0268, + "step": 15060 + }, + { + "epoch": 15.92, + "learning_rate": 1.0219344608879493e-05, + "loss": 0.0131, + "step": 15062 + }, + { + "epoch": 15.92, + "learning_rate": 1.0214059196617337e-05, + "loss": 0.0231, + "step": 15064 + }, + { + "epoch": 15.93, + "learning_rate": 1.020877378435518e-05, + "loss": 0.0278, + "step": 15066 + }, + { + "epoch": 15.93, + "learning_rate": 1.0203488372093024e-05, + "loss": 0.0021, + "step": 15068 + }, + { + "epoch": 15.93, + "learning_rate": 1.0198202959830867e-05, + "loss": 0.007, + "step": 15070 + }, + { + "epoch": 15.93, + "learning_rate": 1.019291754756871e-05, + "loss": 0.0337, + "step": 15072 + }, + { + "epoch": 15.93, + "learning_rate": 1.0187632135306554e-05, + "loss": 0.0458, + "step": 15074 + }, + { + "epoch": 15.94, + "learning_rate": 1.0182346723044398e-05, + "loss": 0.0022, + "step": 15076 + }, + { + "epoch": 15.94, + "learning_rate": 1.0177061310782243e-05, + "loss": 0.0135, + "step": 15078 + }, + { + "epoch": 15.94, + "learning_rate": 1.0171775898520085e-05, + "loss": 0.0165, + "step": 15080 + }, + { + "epoch": 15.94, + "learning_rate": 1.016649048625793e-05, + "loss": 0.0712, + "step": 15082 + }, + { + "epoch": 15.95, + "learning_rate": 1.0161205073995772e-05, + "loss": 0.024, + "step": 15084 + }, + { + "epoch": 15.95, + "learning_rate": 1.0155919661733617e-05, + "loss": 0.0021, + "step": 15086 + }, + { + "epoch": 15.95, + "learning_rate": 1.015063424947146e-05, + "loss": 0.0026, + "step": 15088 + }, + { + "epoch": 15.95, + "learning_rate": 1.0145348837209302e-05, + "loss": 0.0148, + "step": 15090 + }, + { + "epoch": 15.95, + "learning_rate": 1.0140063424947146e-05, + "loss": 0.0023, + "step": 15092 + }, + { + "epoch": 15.96, + "learning_rate": 1.0134778012684989e-05, + "loss": 0.0029, + "step": 15094 + }, + { + "epoch": 15.96, + "learning_rate": 1.0129492600422833e-05, + "loss": 0.0083, + "step": 15096 + }, + { + "epoch": 15.96, + "learning_rate": 1.0124207188160676e-05, + "loss": 0.0047, + "step": 15098 + }, + { + "epoch": 15.96, + "learning_rate": 1.011892177589852e-05, + "loss": 0.0348, + "step": 15100 + }, + { + "epoch": 15.96, + "learning_rate": 1.0113636363636365e-05, + "loss": 0.0033, + "step": 15102 + }, + { + "epoch": 15.97, + "learning_rate": 1.0108350951374209e-05, + "loss": 0.0005, + "step": 15104 + }, + { + "epoch": 15.97, + "learning_rate": 1.0103065539112052e-05, + "loss": 0.0072, + "step": 15106 + }, + { + "epoch": 15.97, + "learning_rate": 1.0097780126849894e-05, + "loss": 0.0082, + "step": 15108 + }, + { + "epoch": 15.97, + "learning_rate": 1.0092494714587739e-05, + "loss": 0.0054, + "step": 15110 + }, + { + "epoch": 15.97, + "learning_rate": 1.0087209302325581e-05, + "loss": 0.0637, + "step": 15112 + }, + { + "epoch": 15.98, + "learning_rate": 1.0081923890063426e-05, + "loss": 0.0627, + "step": 15114 + }, + { + "epoch": 15.98, + "learning_rate": 1.0076638477801269e-05, + "loss": 0.0063, + "step": 15116 + }, + { + "epoch": 15.98, + "learning_rate": 1.0071353065539113e-05, + "loss": 0.0118, + "step": 15118 + }, + { + "epoch": 15.98, + "learning_rate": 1.0066067653276956e-05, + "loss": 0.0127, + "step": 15120 + }, + { + "epoch": 15.99, + "learning_rate": 1.00607822410148e-05, + "loss": 0.0117, + "step": 15122 + }, + { + "epoch": 15.99, + "learning_rate": 1.0055496828752643e-05, + "loss": 0.0017, + "step": 15124 + }, + { + "epoch": 15.99, + "learning_rate": 1.0050211416490487e-05, + "loss": 0.0018, + "step": 15126 + }, + { + "epoch": 15.99, + "learning_rate": 1.0044926004228331e-05, + "loss": 0.0018, + "step": 15128 + }, + { + "epoch": 15.99, + "learning_rate": 1.0039640591966174e-05, + "loss": 0.0301, + "step": 15130 + }, + { + "epoch": 16.0, + "learning_rate": 1.0034355179704018e-05, + "loss": 0.074, + "step": 15132 + }, + { + "epoch": 16.0, + "learning_rate": 1.0029069767441861e-05, + "loss": 0.0381, + "step": 15134 + }, + { + "epoch": 16.0, + "learning_rate": 1.0023784355179705e-05, + "loss": 0.0129, + "step": 15136 + }, + { + "epoch": 16.0, + "learning_rate": 1.0018498942917548e-05, + "loss": 0.001, + "step": 15138 + }, + { + "epoch": 16.0, + "learning_rate": 1.001321353065539e-05, + "loss": 0.0034, + "step": 15140 + }, + { + "epoch": 16.01, + "learning_rate": 1.0007928118393235e-05, + "loss": 0.014, + "step": 15142 + }, + { + "epoch": 16.01, + "learning_rate": 1.0002642706131078e-05, + "loss": 0.0118, + "step": 15144 + }, + { + "epoch": 16.01, + "learning_rate": 9.997357293868922e-06, + "loss": 0.0318, + "step": 15146 + }, + { + "epoch": 16.01, + "learning_rate": 9.992071881606765e-06, + "loss": 0.0006, + "step": 15148 + }, + { + "epoch": 16.01, + "learning_rate": 9.98678646934461e-06, + "loss": 0.0669, + "step": 15150 + }, + { + "epoch": 16.02, + "learning_rate": 9.981501057082452e-06, + "loss": 0.0426, + "step": 15152 + }, + { + "epoch": 16.02, + "learning_rate": 9.976215644820296e-06, + "loss": 0.0088, + "step": 15154 + }, + { + "epoch": 16.02, + "learning_rate": 9.97093023255814e-06, + "loss": 0.0141, + "step": 15156 + }, + { + "epoch": 16.02, + "learning_rate": 9.965644820295983e-06, + "loss": 0.0079, + "step": 15158 + }, + { + "epoch": 16.03, + "learning_rate": 9.960359408033828e-06, + "loss": 0.0047, + "step": 15160 + }, + { + "epoch": 16.03, + "learning_rate": 9.95507399577167e-06, + "loss": 0.0025, + "step": 15162 + }, + { + "epoch": 16.03, + "learning_rate": 9.949788583509515e-06, + "loss": 0.0007, + "step": 15164 + }, + { + "epoch": 16.03, + "learning_rate": 9.944503171247357e-06, + "loss": 0.0009, + "step": 15166 + }, + { + "epoch": 16.03, + "learning_rate": 9.939217758985202e-06, + "loss": 0.0158, + "step": 15168 + }, + { + "epoch": 16.04, + "learning_rate": 9.933932346723044e-06, + "loss": 0.023, + "step": 15170 + }, + { + "epoch": 16.04, + "learning_rate": 9.928646934460889e-06, + "loss": 0.0184, + "step": 15172 + }, + { + "epoch": 16.04, + "learning_rate": 9.923361522198731e-06, + "loss": 0.0449, + "step": 15174 + }, + { + "epoch": 16.04, + "learning_rate": 9.918076109936574e-06, + "loss": 0.0215, + "step": 15176 + }, + { + "epoch": 16.04, + "learning_rate": 9.912790697674418e-06, + "loss": 0.0148, + "step": 15178 + }, + { + "epoch": 16.05, + "learning_rate": 9.907505285412263e-06, + "loss": 0.0143, + "step": 15180 + }, + { + "epoch": 16.05, + "learning_rate": 9.902219873150107e-06, + "loss": 0.0121, + "step": 15182 + }, + { + "epoch": 16.05, + "learning_rate": 9.89693446088795e-06, + "loss": 0.0063, + "step": 15184 + }, + { + "epoch": 16.05, + "learning_rate": 9.891649048625794e-06, + "loss": 0.0178, + "step": 15186 + }, + { + "epoch": 16.05, + "learning_rate": 9.886363636363637e-06, + "loss": 0.0131, + "step": 15188 + }, + { + "epoch": 16.06, + "learning_rate": 9.881078224101481e-06, + "loss": 0.0121, + "step": 15190 + }, + { + "epoch": 16.06, + "learning_rate": 9.875792811839324e-06, + "loss": 0.0007, + "step": 15192 + }, + { + "epoch": 16.06, + "learning_rate": 9.870507399577167e-06, + "loss": 0.0022, + "step": 15194 + }, + { + "epoch": 16.06, + "learning_rate": 9.865221987315011e-06, + "loss": 0.0076, + "step": 15196 + }, + { + "epoch": 16.07, + "learning_rate": 9.859936575052854e-06, + "loss": 0.0417, + "step": 15198 + }, + { + "epoch": 16.07, + "learning_rate": 9.854651162790698e-06, + "loss": 0.0325, + "step": 15200 + }, + { + "epoch": 16.07, + "learning_rate": 9.84936575052854e-06, + "loss": 0.022, + "step": 15202 + }, + { + "epoch": 16.07, + "learning_rate": 9.844080338266385e-06, + "loss": 0.0188, + "step": 15204 + }, + { + "epoch": 16.07, + "learning_rate": 9.83879492600423e-06, + "loss": 0.0085, + "step": 15206 + }, + { + "epoch": 16.08, + "learning_rate": 9.833509513742074e-06, + "loss": 0.003, + "step": 15208 + }, + { + "epoch": 16.08, + "learning_rate": 9.828224101479916e-06, + "loss": 0.0306, + "step": 15210 + }, + { + "epoch": 16.08, + "learning_rate": 9.822938689217759e-06, + "loss": 0.0018, + "step": 15212 + }, + { + "epoch": 16.08, + "learning_rate": 9.817653276955603e-06, + "loss": 0.0404, + "step": 15214 + }, + { + "epoch": 16.08, + "learning_rate": 9.812367864693446e-06, + "loss": 0.0178, + "step": 15216 + }, + { + "epoch": 16.09, + "learning_rate": 9.80708245243129e-06, + "loss": 0.0063, + "step": 15218 + }, + { + "epoch": 16.09, + "learning_rate": 9.801797040169133e-06, + "loss": 0.0222, + "step": 15220 + }, + { + "epoch": 16.09, + "learning_rate": 9.796511627906978e-06, + "loss": 0.0006, + "step": 15222 + }, + { + "epoch": 16.09, + "learning_rate": 9.79122621564482e-06, + "loss": 0.0088, + "step": 15224 + }, + { + "epoch": 16.1, + "learning_rate": 9.785940803382665e-06, + "loss": 0.014, + "step": 15226 + }, + { + "epoch": 16.1, + "learning_rate": 9.780655391120507e-06, + "loss": 0.001, + "step": 15228 + }, + { + "epoch": 16.1, + "learning_rate": 9.775369978858352e-06, + "loss": 0.0105, + "step": 15230 + }, + { + "epoch": 16.1, + "learning_rate": 9.770084566596196e-06, + "loss": 0.0037, + "step": 15232 + }, + { + "epoch": 16.1, + "learning_rate": 9.764799154334039e-06, + "loss": 0.0118, + "step": 15234 + }, + { + "epoch": 16.11, + "learning_rate": 9.759513742071883e-06, + "loss": 0.0172, + "step": 15236 + }, + { + "epoch": 16.11, + "learning_rate": 9.754228329809726e-06, + "loss": 0.0048, + "step": 15238 + }, + { + "epoch": 16.11, + "learning_rate": 9.74894291754757e-06, + "loss": 0.0003, + "step": 15240 + }, + { + "epoch": 16.11, + "learning_rate": 9.743657505285413e-06, + "loss": 0.0022, + "step": 15242 + }, + { + "epoch": 16.11, + "learning_rate": 9.738372093023257e-06, + "loss": 0.0045, + "step": 15244 + }, + { + "epoch": 16.12, + "learning_rate": 9.7330866807611e-06, + "loss": 0.0046, + "step": 15246 + }, + { + "epoch": 16.12, + "learning_rate": 9.727801268498942e-06, + "loss": 0.005, + "step": 15248 + }, + { + "epoch": 16.12, + "learning_rate": 9.722515856236787e-06, + "loss": 0.0402, + "step": 15250 + }, + { + "epoch": 16.12, + "learning_rate": 9.71723044397463e-06, + "loss": 0.0045, + "step": 15252 + }, + { + "epoch": 16.12, + "learning_rate": 9.711945031712474e-06, + "loss": 0.0552, + "step": 15254 + }, + { + "epoch": 16.13, + "learning_rate": 9.706659619450317e-06, + "loss": 0.0035, + "step": 15256 + }, + { + "epoch": 16.13, + "learning_rate": 9.701374207188161e-06, + "loss": 0.0225, + "step": 15258 + }, + { + "epoch": 16.13, + "learning_rate": 9.696088794926005e-06, + "loss": 0.069, + "step": 15260 + }, + { + "epoch": 16.13, + "learning_rate": 9.69080338266385e-06, + "loss": 0.0067, + "step": 15262 + }, + { + "epoch": 16.14, + "learning_rate": 9.685517970401692e-06, + "loss": 0.0032, + "step": 15264 + }, + { + "epoch": 16.14, + "learning_rate": 9.680232558139535e-06, + "loss": 0.0054, + "step": 15266 + }, + { + "epoch": 16.14, + "learning_rate": 9.67494714587738e-06, + "loss": 0.0207, + "step": 15268 + }, + { + "epoch": 16.14, + "learning_rate": 9.669661733615222e-06, + "loss": 0.0059, + "step": 15270 + }, + { + "epoch": 16.14, + "learning_rate": 9.664376321353066e-06, + "loss": 0.0024, + "step": 15272 + }, + { + "epoch": 16.15, + "learning_rate": 9.659090909090909e-06, + "loss": 0.0056, + "step": 15274 + }, + { + "epoch": 16.15, + "learning_rate": 9.653805496828753e-06, + "loss": 0.0047, + "step": 15276 + }, + { + "epoch": 16.15, + "learning_rate": 9.648520084566596e-06, + "loss": 0.0111, + "step": 15278 + }, + { + "epoch": 16.15, + "learning_rate": 9.64323467230444e-06, + "loss": 0.0028, + "step": 15280 + }, + { + "epoch": 16.15, + "learning_rate": 9.637949260042283e-06, + "loss": 0.001, + "step": 15282 + }, + { + "epoch": 16.16, + "learning_rate": 9.632663847780127e-06, + "loss": 0.0099, + "step": 15284 + }, + { + "epoch": 16.16, + "learning_rate": 9.627378435517972e-06, + "loss": 0.0105, + "step": 15286 + }, + { + "epoch": 16.16, + "learning_rate": 9.622093023255814e-06, + "loss": 0.0016, + "step": 15288 + }, + { + "epoch": 16.16, + "learning_rate": 9.616807610993659e-06, + "loss": 0.0077, + "step": 15290 + }, + { + "epoch": 16.16, + "learning_rate": 9.611522198731502e-06, + "loss": 0.0269, + "step": 15292 + }, + { + "epoch": 16.17, + "learning_rate": 9.606236786469346e-06, + "loss": 0.0276, + "step": 15294 + }, + { + "epoch": 16.17, + "learning_rate": 9.600951374207189e-06, + "loss": 0.0022, + "step": 15296 + }, + { + "epoch": 16.17, + "learning_rate": 9.595665961945031e-06, + "loss": 0.0045, + "step": 15298 + }, + { + "epoch": 16.17, + "learning_rate": 9.590380549682876e-06, + "loss": 0.0041, + "step": 15300 + }, + { + "epoch": 16.18, + "learning_rate": 9.585095137420718e-06, + "loss": 0.0145, + "step": 15302 + }, + { + "epoch": 16.18, + "learning_rate": 9.579809725158563e-06, + "loss": 0.0014, + "step": 15304 + }, + { + "epoch": 16.18, + "learning_rate": 9.574524312896405e-06, + "loss": 0.0012, + "step": 15306 + }, + { + "epoch": 16.18, + "learning_rate": 9.56923890063425e-06, + "loss": 0.0182, + "step": 15308 + }, + { + "epoch": 16.18, + "learning_rate": 9.563953488372094e-06, + "loss": 0.0387, + "step": 15310 + }, + { + "epoch": 16.19, + "learning_rate": 9.558668076109938e-06, + "loss": 0.0252, + "step": 15312 + }, + { + "epoch": 16.19, + "learning_rate": 9.553382663847781e-06, + "loss": 0.0138, + "step": 15314 + }, + { + "epoch": 16.19, + "learning_rate": 9.548097251585624e-06, + "loss": 0.0383, + "step": 15316 + }, + { + "epoch": 16.19, + "learning_rate": 9.542811839323468e-06, + "loss": 0.0089, + "step": 15318 + }, + { + "epoch": 16.19, + "learning_rate": 9.53752642706131e-06, + "loss": 0.0074, + "step": 15320 + }, + { + "epoch": 16.2, + "learning_rate": 9.532241014799155e-06, + "loss": 0.0173, + "step": 15322 + }, + { + "epoch": 16.2, + "learning_rate": 9.526955602536998e-06, + "loss": 0.0107, + "step": 15324 + }, + { + "epoch": 16.2, + "learning_rate": 9.521670190274842e-06, + "loss": 0.0162, + "step": 15326 + }, + { + "epoch": 16.2, + "learning_rate": 9.516384778012685e-06, + "loss": 0.0182, + "step": 15328 + }, + { + "epoch": 16.21, + "learning_rate": 9.51109936575053e-06, + "loss": 0.0373, + "step": 15330 + }, + { + "epoch": 16.21, + "learning_rate": 9.505813953488372e-06, + "loss": 0.0092, + "step": 15332 + }, + { + "epoch": 16.21, + "learning_rate": 9.500528541226216e-06, + "loss": 0.0283, + "step": 15334 + }, + { + "epoch": 16.21, + "learning_rate": 9.495243128964059e-06, + "loss": 0.0025, + "step": 15336 + }, + { + "epoch": 16.21, + "learning_rate": 9.489957716701903e-06, + "loss": 0.0232, + "step": 15338 + }, + { + "epoch": 16.22, + "learning_rate": 9.484672304439748e-06, + "loss": 0.0578, + "step": 15340 + }, + { + "epoch": 16.22, + "learning_rate": 9.47938689217759e-06, + "loss": 0.0006, + "step": 15342 + }, + { + "epoch": 16.22, + "learning_rate": 9.474101479915435e-06, + "loss": 0.0544, + "step": 15344 + }, + { + "epoch": 16.22, + "learning_rate": 9.468816067653277e-06, + "loss": 0.0355, + "step": 15346 + }, + { + "epoch": 16.22, + "learning_rate": 9.463530655391122e-06, + "loss": 0.0202, + "step": 15348 + }, + { + "epoch": 16.23, + "learning_rate": 9.458245243128964e-06, + "loss": 0.0019, + "step": 15350 + }, + { + "epoch": 16.23, + "learning_rate": 9.452959830866807e-06, + "loss": 0.0169, + "step": 15352 + }, + { + "epoch": 16.23, + "learning_rate": 9.447674418604651e-06, + "loss": 0.0172, + "step": 15354 + }, + { + "epoch": 16.23, + "learning_rate": 9.442389006342494e-06, + "loss": 0.0081, + "step": 15356 + }, + { + "epoch": 16.23, + "learning_rate": 9.437103594080338e-06, + "loss": 0.0047, + "step": 15358 + }, + { + "epoch": 16.24, + "learning_rate": 9.431818181818181e-06, + "loss": 0.0017, + "step": 15360 + }, + { + "epoch": 16.24, + "learning_rate": 9.426532769556026e-06, + "loss": 0.0409, + "step": 15362 + }, + { + "epoch": 16.24, + "learning_rate": 9.42124735729387e-06, + "loss": 0.004, + "step": 15364 + }, + { + "epoch": 16.24, + "learning_rate": 9.415961945031714e-06, + "loss": 0.0238, + "step": 15366 + }, + { + "epoch": 16.25, + "learning_rate": 9.410676532769557e-06, + "loss": 0.0093, + "step": 15368 + }, + { + "epoch": 16.25, + "learning_rate": 9.4053911205074e-06, + "loss": 0.0115, + "step": 15370 + }, + { + "epoch": 16.25, + "learning_rate": 9.400105708245244e-06, + "loss": 0.0043, + "step": 15372 + }, + { + "epoch": 16.25, + "learning_rate": 9.394820295983087e-06, + "loss": 0.0007, + "step": 15374 + }, + { + "epoch": 16.25, + "learning_rate": 9.389534883720931e-06, + "loss": 0.0024, + "step": 15376 + }, + { + "epoch": 16.26, + "learning_rate": 9.384249471458774e-06, + "loss": 0.0156, + "step": 15378 + }, + { + "epoch": 16.26, + "learning_rate": 9.378964059196618e-06, + "loss": 0.019, + "step": 15380 + }, + { + "epoch": 16.26, + "learning_rate": 9.37367864693446e-06, + "loss": 0.0122, + "step": 15382 + }, + { + "epoch": 16.26, + "learning_rate": 9.368393234672305e-06, + "loss": 0.014, + "step": 15384 + }, + { + "epoch": 16.26, + "learning_rate": 9.363107822410148e-06, + "loss": 0.0191, + "step": 15386 + }, + { + "epoch": 16.27, + "learning_rate": 9.357822410147992e-06, + "loss": 0.0772, + "step": 15388 + }, + { + "epoch": 16.27, + "learning_rate": 9.352536997885836e-06, + "loss": 0.0306, + "step": 15390 + }, + { + "epoch": 16.27, + "learning_rate": 9.347251585623679e-06, + "loss": 0.0118, + "step": 15392 + }, + { + "epoch": 16.27, + "learning_rate": 9.341966173361523e-06, + "loss": 0.0154, + "step": 15394 + }, + { + "epoch": 16.27, + "learning_rate": 9.336680761099366e-06, + "loss": 0.0113, + "step": 15396 + }, + { + "epoch": 16.28, + "learning_rate": 9.33139534883721e-06, + "loss": 0.0242, + "step": 15398 + }, + { + "epoch": 16.28, + "learning_rate": 9.326109936575053e-06, + "loss": 0.0293, + "step": 15400 + }, + { + "epoch": 16.28, + "learning_rate": 9.320824524312898e-06, + "loss": 0.0092, + "step": 15402 + }, + { + "epoch": 16.28, + "learning_rate": 9.31553911205074e-06, + "loss": 0.0074, + "step": 15404 + }, + { + "epoch": 16.29, + "learning_rate": 9.310253699788583e-06, + "loss": 0.0031, + "step": 15406 + }, + { + "epoch": 16.29, + "learning_rate": 9.304968287526427e-06, + "loss": 0.0091, + "step": 15408 + }, + { + "epoch": 16.29, + "learning_rate": 9.29968287526427e-06, + "loss": 0.0013, + "step": 15410 + }, + { + "epoch": 16.29, + "learning_rate": 9.294397463002114e-06, + "loss": 0.0035, + "step": 15412 + }, + { + "epoch": 16.29, + "learning_rate": 9.289112050739959e-06, + "loss": 0.0462, + "step": 15414 + }, + { + "epoch": 16.3, + "learning_rate": 9.283826638477803e-06, + "loss": 0.0069, + "step": 15416 + }, + { + "epoch": 16.3, + "learning_rate": 9.278541226215646e-06, + "loss": 0.0014, + "step": 15418 + }, + { + "epoch": 16.3, + "learning_rate": 9.27325581395349e-06, + "loss": 0.0758, + "step": 15420 + }, + { + "epoch": 16.3, + "learning_rate": 9.267970401691333e-06, + "loss": 0.0031, + "step": 15422 + }, + { + "epoch": 16.3, + "learning_rate": 9.262684989429175e-06, + "loss": 0.0355, + "step": 15424 + }, + { + "epoch": 16.31, + "learning_rate": 9.25739957716702e-06, + "loss": 0.0242, + "step": 15426 + }, + { + "epoch": 16.31, + "learning_rate": 9.252114164904862e-06, + "loss": 0.002, + "step": 15428 + }, + { + "epoch": 16.31, + "learning_rate": 9.246828752642707e-06, + "loss": 0.0156, + "step": 15430 + }, + { + "epoch": 16.31, + "learning_rate": 9.24154334038055e-06, + "loss": 0.0538, + "step": 15432 + }, + { + "epoch": 16.32, + "learning_rate": 9.236257928118394e-06, + "loss": 0.0066, + "step": 15434 + }, + { + "epoch": 16.32, + "learning_rate": 9.230972515856237e-06, + "loss": 0.0657, + "step": 15436 + }, + { + "epoch": 16.32, + "learning_rate": 9.225687103594081e-06, + "loss": 0.0134, + "step": 15438 + }, + { + "epoch": 16.32, + "learning_rate": 9.220401691331924e-06, + "loss": 0.0129, + "step": 15440 + }, + { + "epoch": 16.32, + "learning_rate": 9.215116279069768e-06, + "loss": 0.0293, + "step": 15442 + }, + { + "epoch": 16.33, + "learning_rate": 9.209830866807612e-06, + "loss": 0.0413, + "step": 15444 + }, + { + "epoch": 16.33, + "learning_rate": 9.204545454545455e-06, + "loss": 0.0044, + "step": 15446 + }, + { + "epoch": 16.33, + "learning_rate": 9.1992600422833e-06, + "loss": 0.0584, + "step": 15448 + }, + { + "epoch": 16.33, + "learning_rate": 9.193974630021142e-06, + "loss": 0.1936, + "step": 15450 + }, + { + "epoch": 16.33, + "learning_rate": 9.188689217758986e-06, + "loss": 0.0127, + "step": 15452 + }, + { + "epoch": 16.34, + "learning_rate": 9.183403805496829e-06, + "loss": 0.0157, + "step": 15454 + }, + { + "epoch": 16.34, + "learning_rate": 9.178118393234673e-06, + "loss": 0.0288, + "step": 15456 + }, + { + "epoch": 16.34, + "learning_rate": 9.172832980972516e-06, + "loss": 0.0008, + "step": 15458 + }, + { + "epoch": 16.34, + "learning_rate": 9.167547568710359e-06, + "loss": 0.0394, + "step": 15460 + }, + { + "epoch": 16.34, + "learning_rate": 9.162262156448203e-06, + "loss": 0.0032, + "step": 15462 + }, + { + "epoch": 16.35, + "learning_rate": 9.156976744186046e-06, + "loss": 0.035, + "step": 15464 + }, + { + "epoch": 16.35, + "learning_rate": 9.15169133192389e-06, + "loss": 0.0011, + "step": 15466 + }, + { + "epoch": 16.35, + "learning_rate": 9.146405919661735e-06, + "loss": 0.0099, + "step": 15468 + }, + { + "epoch": 16.35, + "learning_rate": 9.141120507399579e-06, + "loss": 0.0036, + "step": 15470 + }, + { + "epoch": 16.36, + "learning_rate": 9.135835095137422e-06, + "loss": 0.0008, + "step": 15472 + }, + { + "epoch": 16.36, + "learning_rate": 9.130549682875264e-06, + "loss": 0.0411, + "step": 15474 + }, + { + "epoch": 16.36, + "learning_rate": 9.125264270613109e-06, + "loss": 0.0199, + "step": 15476 + }, + { + "epoch": 16.36, + "learning_rate": 9.119978858350951e-06, + "loss": 0.0192, + "step": 15478 + }, + { + "epoch": 16.36, + "learning_rate": 9.114693446088796e-06, + "loss": 0.0034, + "step": 15480 + }, + { + "epoch": 16.37, + "learning_rate": 9.109408033826638e-06, + "loss": 0.064, + "step": 15482 + }, + { + "epoch": 16.37, + "learning_rate": 9.104122621564483e-06, + "loss": 0.0287, + "step": 15484 + }, + { + "epoch": 16.37, + "learning_rate": 9.098837209302325e-06, + "loss": 0.0124, + "step": 15486 + }, + { + "epoch": 16.37, + "learning_rate": 9.09355179704017e-06, + "loss": 0.035, + "step": 15488 + }, + { + "epoch": 16.37, + "learning_rate": 9.088266384778012e-06, + "loss": 0.0143, + "step": 15490 + }, + { + "epoch": 16.38, + "learning_rate": 9.082980972515857e-06, + "loss": 0.0335, + "step": 15492 + }, + { + "epoch": 16.38, + "learning_rate": 9.077695560253701e-06, + "loss": 0.0384, + "step": 15494 + }, + { + "epoch": 16.38, + "learning_rate": 9.072410147991544e-06, + "loss": 0.039, + "step": 15496 + }, + { + "epoch": 16.38, + "learning_rate": 9.067124735729388e-06, + "loss": 0.0183, + "step": 15498 + }, + { + "epoch": 16.38, + "learning_rate": 9.06183932346723e-06, + "loss": 0.0215, + "step": 15500 + }, + { + "epoch": 16.38, + "eval_cer": 0.04058136221145626, + "eval_loss": 0.7027328014373779, + "eval_runtime": 129.2071, + "eval_samples_per_second": 6.509, + "eval_steps_per_second": 0.82, + "step": 15500 + }, + { + "epoch": 16.39, + "learning_rate": 9.056553911205075e-06, + "loss": 0.0107, + "step": 15502 + }, + { + "epoch": 16.39, + "learning_rate": 9.051268498942918e-06, + "loss": 0.0201, + "step": 15504 + }, + { + "epoch": 16.39, + "learning_rate": 9.045983086680762e-06, + "loss": 0.0449, + "step": 15506 + }, + { + "epoch": 16.39, + "learning_rate": 9.040697674418605e-06, + "loss": 0.0063, + "step": 15508 + }, + { + "epoch": 16.4, + "learning_rate": 9.035412262156448e-06, + "loss": 0.0025, + "step": 15510 + }, + { + "epoch": 16.4, + "learning_rate": 9.030126849894292e-06, + "loss": 0.0022, + "step": 15512 + }, + { + "epoch": 16.4, + "learning_rate": 9.024841437632135e-06, + "loss": 0.0365, + "step": 15514 + }, + { + "epoch": 16.4, + "learning_rate": 9.019556025369979e-06, + "loss": 0.0839, + "step": 15516 + }, + { + "epoch": 16.4, + "learning_rate": 9.014270613107823e-06, + "loss": 0.0562, + "step": 15518 + }, + { + "epoch": 16.41, + "learning_rate": 9.008985200845666e-06, + "loss": 0.0425, + "step": 15520 + }, + { + "epoch": 16.41, + "learning_rate": 9.00369978858351e-06, + "loss": 0.0228, + "step": 15522 + }, + { + "epoch": 16.41, + "learning_rate": 8.998414376321355e-06, + "loss": 0.0055, + "step": 15524 + }, + { + "epoch": 16.41, + "learning_rate": 8.993128964059197e-06, + "loss": 0.01, + "step": 15526 + }, + { + "epoch": 16.41, + "learning_rate": 8.98784355179704e-06, + "loss": 0.0198, + "step": 15528 + }, + { + "epoch": 16.42, + "learning_rate": 8.982558139534884e-06, + "loss": 0.0038, + "step": 15530 + }, + { + "epoch": 16.42, + "learning_rate": 8.977272727272727e-06, + "loss": 0.0086, + "step": 15532 + }, + { + "epoch": 16.42, + "learning_rate": 8.971987315010571e-06, + "loss": 0.0288, + "step": 15534 + }, + { + "epoch": 16.42, + "learning_rate": 8.966701902748414e-06, + "loss": 0.0007, + "step": 15536 + }, + { + "epoch": 16.42, + "learning_rate": 8.961416490486259e-06, + "loss": 0.0206, + "step": 15538 + }, + { + "epoch": 16.43, + "learning_rate": 8.956131078224101e-06, + "loss": 0.0066, + "step": 15540 + }, + { + "epoch": 16.43, + "learning_rate": 8.950845665961946e-06, + "loss": 0.0089, + "step": 15542 + }, + { + "epoch": 16.43, + "learning_rate": 8.945560253699788e-06, + "loss": 0.0103, + "step": 15544 + }, + { + "epoch": 16.43, + "learning_rate": 8.940274841437633e-06, + "loss": 0.0196, + "step": 15546 + }, + { + "epoch": 16.44, + "learning_rate": 8.934989429175477e-06, + "loss": 0.02, + "step": 15548 + }, + { + "epoch": 16.44, + "learning_rate": 8.92970401691332e-06, + "loss": 0.0585, + "step": 15550 + }, + { + "epoch": 16.44, + "learning_rate": 8.924418604651164e-06, + "loss": 0.0204, + "step": 15552 + }, + { + "epoch": 16.44, + "learning_rate": 8.919133192389007e-06, + "loss": 0.0195, + "step": 15554 + }, + { + "epoch": 16.44, + "learning_rate": 8.913847780126851e-06, + "loss": 0.0055, + "step": 15556 + }, + { + "epoch": 16.45, + "learning_rate": 8.908562367864694e-06, + "loss": 0.037, + "step": 15558 + }, + { + "epoch": 16.45, + "learning_rate": 8.903276955602538e-06, + "loss": 0.005, + "step": 15560 + }, + { + "epoch": 16.45, + "learning_rate": 8.89799154334038e-06, + "loss": 0.0041, + "step": 15562 + }, + { + "epoch": 16.45, + "learning_rate": 8.892706131078223e-06, + "loss": 0.0289, + "step": 15564 + }, + { + "epoch": 16.45, + "learning_rate": 8.887420718816068e-06, + "loss": 0.0018, + "step": 15566 + }, + { + "epoch": 16.46, + "learning_rate": 8.88213530655391e-06, + "loss": 0.0025, + "step": 15568 + }, + { + "epoch": 16.46, + "learning_rate": 8.876849894291755e-06, + "loss": 0.0317, + "step": 15570 + }, + { + "epoch": 16.46, + "learning_rate": 8.8715644820296e-06, + "loss": 0.0246, + "step": 15572 + }, + { + "epoch": 16.46, + "learning_rate": 8.866279069767444e-06, + "loss": 0.0013, + "step": 15574 + }, + { + "epoch": 16.47, + "learning_rate": 8.860993657505286e-06, + "loss": 0.0173, + "step": 15576 + }, + { + "epoch": 16.47, + "learning_rate": 8.85570824524313e-06, + "loss": 0.018, + "step": 15578 + }, + { + "epoch": 16.47, + "learning_rate": 8.850422832980973e-06, + "loss": 0.0055, + "step": 15580 + }, + { + "epoch": 16.47, + "learning_rate": 8.845137420718816e-06, + "loss": 0.0109, + "step": 15582 + }, + { + "epoch": 16.47, + "learning_rate": 8.83985200845666e-06, + "loss": 0.0013, + "step": 15584 + }, + { + "epoch": 16.48, + "learning_rate": 8.834566596194503e-06, + "loss": 0.0017, + "step": 15586 + }, + { + "epoch": 16.48, + "learning_rate": 8.829281183932347e-06, + "loss": 0.0039, + "step": 15588 + }, + { + "epoch": 16.48, + "learning_rate": 8.82399577167019e-06, + "loss": 0.002, + "step": 15590 + }, + { + "epoch": 16.48, + "learning_rate": 8.818710359408034e-06, + "loss": 0.0175, + "step": 15592 + }, + { + "epoch": 16.48, + "learning_rate": 8.813424947145877e-06, + "loss": 0.0164, + "step": 15594 + }, + { + "epoch": 16.49, + "learning_rate": 8.808139534883721e-06, + "loss": 0.0724, + "step": 15596 + }, + { + "epoch": 16.49, + "learning_rate": 8.802854122621566e-06, + "loss": 0.0191, + "step": 15598 + }, + { + "epoch": 16.49, + "learning_rate": 8.797568710359408e-06, + "loss": 0.053, + "step": 15600 + }, + { + "epoch": 16.49, + "learning_rate": 8.792283298097253e-06, + "loss": 0.0191, + "step": 15602 + }, + { + "epoch": 16.49, + "learning_rate": 8.786997885835095e-06, + "loss": 0.0064, + "step": 15604 + }, + { + "epoch": 16.5, + "learning_rate": 8.78171247357294e-06, + "loss": 0.0036, + "step": 15606 + }, + { + "epoch": 16.5, + "learning_rate": 8.776427061310782e-06, + "loss": 0.0846, + "step": 15608 + }, + { + "epoch": 16.5, + "learning_rate": 8.771141649048627e-06, + "loss": 0.0507, + "step": 15610 + }, + { + "epoch": 16.5, + "learning_rate": 8.76585623678647e-06, + "loss": 0.0054, + "step": 15612 + }, + { + "epoch": 16.51, + "learning_rate": 8.760570824524314e-06, + "loss": 0.0069, + "step": 15614 + }, + { + "epoch": 16.51, + "learning_rate": 8.755285412262157e-06, + "loss": 0.0042, + "step": 15616 + }, + { + "epoch": 16.51, + "learning_rate": 8.75e-06, + "loss": 0.0057, + "step": 15618 + }, + { + "epoch": 16.51, + "learning_rate": 8.744714587737844e-06, + "loss": 0.0052, + "step": 15620 + }, + { + "epoch": 16.51, + "learning_rate": 8.739429175475688e-06, + "loss": 0.0015, + "step": 15622 + }, + { + "epoch": 16.52, + "learning_rate": 8.73414376321353e-06, + "loss": 0.0027, + "step": 15624 + }, + { + "epoch": 16.52, + "learning_rate": 8.728858350951375e-06, + "loss": 0.0023, + "step": 15626 + }, + { + "epoch": 16.52, + "learning_rate": 8.72357293868922e-06, + "loss": 0.0057, + "step": 15628 + }, + { + "epoch": 16.52, + "learning_rate": 8.718287526427062e-06, + "loss": 0.0022, + "step": 15630 + }, + { + "epoch": 16.52, + "learning_rate": 8.713002114164906e-06, + "loss": 0.0113, + "step": 15632 + }, + { + "epoch": 16.53, + "learning_rate": 8.707716701902749e-06, + "loss": 0.0017, + "step": 15634 + }, + { + "epoch": 16.53, + "learning_rate": 8.702431289640592e-06, + "loss": 0.0328, + "step": 15636 + }, + { + "epoch": 16.53, + "learning_rate": 8.697145877378436e-06, + "loss": 0.0094, + "step": 15638 + }, + { + "epoch": 16.53, + "learning_rate": 8.691860465116279e-06, + "loss": 0.0201, + "step": 15640 + }, + { + "epoch": 16.53, + "learning_rate": 8.686575052854123e-06, + "loss": 0.0059, + "step": 15642 + }, + { + "epoch": 16.54, + "learning_rate": 8.681289640591966e-06, + "loss": 0.0061, + "step": 15644 + }, + { + "epoch": 16.54, + "learning_rate": 8.67600422832981e-06, + "loss": 0.0123, + "step": 15646 + }, + { + "epoch": 16.54, + "learning_rate": 8.670718816067653e-06, + "loss": 0.038, + "step": 15648 + }, + { + "epoch": 16.54, + "learning_rate": 8.665433403805497e-06, + "loss": 0.0219, + "step": 15650 + }, + { + "epoch": 16.55, + "learning_rate": 8.660147991543342e-06, + "loss": 0.0099, + "step": 15652 + }, + { + "epoch": 16.55, + "learning_rate": 8.654862579281184e-06, + "loss": 0.019, + "step": 15654 + }, + { + "epoch": 16.55, + "learning_rate": 8.649577167019029e-06, + "loss": 0.001, + "step": 15656 + }, + { + "epoch": 16.55, + "learning_rate": 8.644291754756871e-06, + "loss": 0.0327, + "step": 15658 + }, + { + "epoch": 16.55, + "learning_rate": 8.639006342494716e-06, + "loss": 0.0166, + "step": 15660 + }, + { + "epoch": 16.56, + "learning_rate": 8.633720930232558e-06, + "loss": 0.0084, + "step": 15662 + }, + { + "epoch": 16.56, + "learning_rate": 8.628435517970403e-06, + "loss": 0.0051, + "step": 15664 + }, + { + "epoch": 16.56, + "learning_rate": 8.623150105708245e-06, + "loss": 0.0303, + "step": 15666 + }, + { + "epoch": 16.56, + "learning_rate": 8.617864693446088e-06, + "loss": 0.0372, + "step": 15668 + }, + { + "epoch": 16.56, + "learning_rate": 8.612579281183932e-06, + "loss": 0.0384, + "step": 15670 + }, + { + "epoch": 16.57, + "learning_rate": 8.607293868921775e-06, + "loss": 0.0705, + "step": 15672 + }, + { + "epoch": 16.57, + "learning_rate": 8.60200845665962e-06, + "loss": 0.012, + "step": 15674 + }, + { + "epoch": 16.57, + "learning_rate": 8.596723044397464e-06, + "loss": 0.0019, + "step": 15676 + }, + { + "epoch": 16.57, + "learning_rate": 8.591437632135308e-06, + "loss": 0.0009, + "step": 15678 + }, + { + "epoch": 16.58, + "learning_rate": 8.58615221987315e-06, + "loss": 0.0035, + "step": 15680 + }, + { + "epoch": 16.58, + "learning_rate": 8.580866807610995e-06, + "loss": 0.0016, + "step": 15682 + }, + { + "epoch": 16.58, + "learning_rate": 8.575581395348838e-06, + "loss": 0.0076, + "step": 15684 + }, + { + "epoch": 16.58, + "learning_rate": 8.57029598308668e-06, + "loss": 0.0049, + "step": 15686 + }, + { + "epoch": 16.58, + "learning_rate": 8.565010570824525e-06, + "loss": 0.0144, + "step": 15688 + }, + { + "epoch": 16.59, + "learning_rate": 8.559725158562368e-06, + "loss": 0.0011, + "step": 15690 + }, + { + "epoch": 16.59, + "learning_rate": 8.554439746300212e-06, + "loss": 0.0017, + "step": 15692 + }, + { + "epoch": 16.59, + "learning_rate": 8.549154334038055e-06, + "loss": 0.0122, + "step": 15694 + }, + { + "epoch": 16.59, + "learning_rate": 8.543868921775899e-06, + "loss": 0.0038, + "step": 15696 + }, + { + "epoch": 16.59, + "learning_rate": 8.538583509513742e-06, + "loss": 0.0005, + "step": 15698 + }, + { + "epoch": 16.6, + "learning_rate": 8.533298097251586e-06, + "loss": 0.0341, + "step": 15700 + }, + { + "epoch": 16.6, + "learning_rate": 8.52801268498943e-06, + "loss": 0.0007, + "step": 15702 + }, + { + "epoch": 16.6, + "learning_rate": 8.522727272727273e-06, + "loss": 0.0677, + "step": 15704 + }, + { + "epoch": 16.6, + "learning_rate": 8.517441860465117e-06, + "loss": 0.0053, + "step": 15706 + }, + { + "epoch": 16.6, + "learning_rate": 8.51215644820296e-06, + "loss": 0.0009, + "step": 15708 + }, + { + "epoch": 16.61, + "learning_rate": 8.506871035940804e-06, + "loss": 0.0008, + "step": 15710 + }, + { + "epoch": 16.61, + "learning_rate": 8.501585623678647e-06, + "loss": 0.0008, + "step": 15712 + }, + { + "epoch": 16.61, + "learning_rate": 8.496300211416491e-06, + "loss": 0.0045, + "step": 15714 + }, + { + "epoch": 16.61, + "learning_rate": 8.491014799154334e-06, + "loss": 0.018, + "step": 15716 + }, + { + "epoch": 16.62, + "learning_rate": 8.485729386892179e-06, + "loss": 0.0387, + "step": 15718 + }, + { + "epoch": 16.62, + "learning_rate": 8.480443974630021e-06, + "loss": 0.0098, + "step": 15720 + }, + { + "epoch": 16.62, + "learning_rate": 8.475158562367864e-06, + "loss": 0.0036, + "step": 15722 + }, + { + "epoch": 16.62, + "learning_rate": 8.469873150105708e-06, + "loss": 0.0045, + "step": 15724 + }, + { + "epoch": 16.62, + "learning_rate": 8.464587737843551e-06, + "loss": 0.023, + "step": 15726 + }, + { + "epoch": 16.63, + "learning_rate": 8.459302325581395e-06, + "loss": 0.0013, + "step": 15728 + }, + { + "epoch": 16.63, + "learning_rate": 8.45401691331924e-06, + "loss": 0.0501, + "step": 15730 + }, + { + "epoch": 16.63, + "learning_rate": 8.448731501057084e-06, + "loss": 0.0244, + "step": 15732 + }, + { + "epoch": 16.63, + "learning_rate": 8.443446088794927e-06, + "loss": 0.003, + "step": 15734 + }, + { + "epoch": 16.63, + "learning_rate": 8.438160676532771e-06, + "loss": 0.0093, + "step": 15736 + }, + { + "epoch": 16.64, + "learning_rate": 8.432875264270614e-06, + "loss": 0.0012, + "step": 15738 + }, + { + "epoch": 16.64, + "learning_rate": 8.427589852008456e-06, + "loss": 0.0012, + "step": 15740 + }, + { + "epoch": 16.64, + "learning_rate": 8.4223044397463e-06, + "loss": 0.0007, + "step": 15742 + }, + { + "epoch": 16.64, + "learning_rate": 8.417019027484143e-06, + "loss": 0.003, + "step": 15744 + }, + { + "epoch": 16.64, + "learning_rate": 8.411733615221988e-06, + "loss": 0.0197, + "step": 15746 + }, + { + "epoch": 16.65, + "learning_rate": 8.40644820295983e-06, + "loss": 0.0114, + "step": 15748 + }, + { + "epoch": 16.65, + "learning_rate": 8.401162790697675e-06, + "loss": 0.0009, + "step": 15750 + }, + { + "epoch": 16.65, + "learning_rate": 8.395877378435517e-06, + "loss": 0.0244, + "step": 15752 + }, + { + "epoch": 16.65, + "learning_rate": 8.390591966173362e-06, + "loss": 0.0402, + "step": 15754 + }, + { + "epoch": 16.66, + "learning_rate": 8.385306553911206e-06, + "loss": 0.0028, + "step": 15756 + }, + { + "epoch": 16.66, + "learning_rate": 8.380021141649049e-06, + "loss": 0.0157, + "step": 15758 + }, + { + "epoch": 16.66, + "learning_rate": 8.374735729386893e-06, + "loss": 0.0052, + "step": 15760 + }, + { + "epoch": 16.66, + "learning_rate": 8.369450317124736e-06, + "loss": 0.0008, + "step": 15762 + }, + { + "epoch": 16.66, + "learning_rate": 8.36416490486258e-06, + "loss": 0.0569, + "step": 15764 + }, + { + "epoch": 16.67, + "learning_rate": 8.358879492600423e-06, + "loss": 0.0022, + "step": 15766 + }, + { + "epoch": 16.67, + "learning_rate": 8.353594080338267e-06, + "loss": 0.0005, + "step": 15768 + }, + { + "epoch": 16.67, + "learning_rate": 8.34830866807611e-06, + "loss": 0.0443, + "step": 15770 + }, + { + "epoch": 16.67, + "learning_rate": 8.343023255813954e-06, + "loss": 0.0037, + "step": 15772 + }, + { + "epoch": 16.67, + "learning_rate": 8.337737843551797e-06, + "loss": 0.0637, + "step": 15774 + }, + { + "epoch": 16.68, + "learning_rate": 8.33245243128964e-06, + "loss": 0.0059, + "step": 15776 + }, + { + "epoch": 16.68, + "learning_rate": 8.327167019027484e-06, + "loss": 0.0065, + "step": 15778 + }, + { + "epoch": 16.68, + "learning_rate": 8.321881606765328e-06, + "loss": 0.0043, + "step": 15780 + }, + { + "epoch": 16.68, + "learning_rate": 8.316596194503173e-06, + "loss": 0.0005, + "step": 15782 + }, + { + "epoch": 16.68, + "learning_rate": 8.311310782241015e-06, + "loss": 0.0015, + "step": 15784 + }, + { + "epoch": 16.69, + "learning_rate": 8.30602536997886e-06, + "loss": 0.0041, + "step": 15786 + }, + { + "epoch": 16.69, + "learning_rate": 8.300739957716703e-06, + "loss": 0.0269, + "step": 15788 + }, + { + "epoch": 16.69, + "learning_rate": 8.295454545454547e-06, + "loss": 0.0046, + "step": 15790 + }, + { + "epoch": 16.69, + "learning_rate": 8.29016913319239e-06, + "loss": 0.029, + "step": 15792 + }, + { + "epoch": 16.7, + "learning_rate": 8.284883720930232e-06, + "loss": 0.0044, + "step": 15794 + }, + { + "epoch": 16.7, + "learning_rate": 8.279598308668077e-06, + "loss": 0.0115, + "step": 15796 + }, + { + "epoch": 16.7, + "learning_rate": 8.27431289640592e-06, + "loss": 0.0472, + "step": 15798 + }, + { + "epoch": 16.7, + "learning_rate": 8.269027484143764e-06, + "loss": 0.0076, + "step": 15800 + }, + { + "epoch": 16.7, + "learning_rate": 8.263742071881606e-06, + "loss": 0.0065, + "step": 15802 + }, + { + "epoch": 16.71, + "learning_rate": 8.25845665961945e-06, + "loss": 0.0278, + "step": 15804 + }, + { + "epoch": 16.71, + "learning_rate": 8.253171247357295e-06, + "loss": 0.0006, + "step": 15806 + }, + { + "epoch": 16.71, + "learning_rate": 8.247885835095138e-06, + "loss": 0.0354, + "step": 15808 + }, + { + "epoch": 16.71, + "learning_rate": 8.242600422832982e-06, + "loss": 0.0012, + "step": 15810 + }, + { + "epoch": 16.71, + "learning_rate": 8.237315010570825e-06, + "loss": 0.0095, + "step": 15812 + }, + { + "epoch": 16.72, + "learning_rate": 8.232029598308669e-06, + "loss": 0.0546, + "step": 15814 + }, + { + "epoch": 16.72, + "learning_rate": 8.226744186046512e-06, + "loss": 0.0153, + "step": 15816 + }, + { + "epoch": 16.72, + "learning_rate": 8.221458773784356e-06, + "loss": 0.0081, + "step": 15818 + }, + { + "epoch": 16.72, + "learning_rate": 8.216173361522199e-06, + "loss": 0.0276, + "step": 15820 + }, + { + "epoch": 16.73, + "learning_rate": 8.210887949260043e-06, + "loss": 0.0224, + "step": 15822 + }, + { + "epoch": 16.73, + "learning_rate": 8.205602536997886e-06, + "loss": 0.0207, + "step": 15824 + }, + { + "epoch": 16.73, + "learning_rate": 8.200317124735729e-06, + "loss": 0.0153, + "step": 15826 + }, + { + "epoch": 16.73, + "learning_rate": 8.195031712473573e-06, + "loss": 0.0632, + "step": 15828 + }, + { + "epoch": 16.73, + "learning_rate": 8.189746300211416e-06, + "loss": 0.0229, + "step": 15830 + }, + { + "epoch": 16.74, + "learning_rate": 8.18446088794926e-06, + "loss": 0.004, + "step": 15832 + }, + { + "epoch": 16.74, + "learning_rate": 8.179175475687104e-06, + "loss": 0.0379, + "step": 15834 + }, + { + "epoch": 16.74, + "learning_rate": 8.173890063424949e-06, + "loss": 0.0025, + "step": 15836 + }, + { + "epoch": 16.74, + "learning_rate": 8.168604651162791e-06, + "loss": 0.0197, + "step": 15838 + }, + { + "epoch": 16.74, + "learning_rate": 8.163319238900636e-06, + "loss": 0.0026, + "step": 15840 + }, + { + "epoch": 16.75, + "learning_rate": 8.158033826638478e-06, + "loss": 0.0005, + "step": 15842 + }, + { + "epoch": 16.75, + "learning_rate": 8.152748414376321e-06, + "loss": 0.0158, + "step": 15844 + }, + { + "epoch": 16.75, + "learning_rate": 8.147463002114165e-06, + "loss": 0.0148, + "step": 15846 + }, + { + "epoch": 16.75, + "learning_rate": 8.142177589852008e-06, + "loss": 0.025, + "step": 15848 + }, + { + "epoch": 16.75, + "learning_rate": 8.136892177589852e-06, + "loss": 0.0016, + "step": 15850 + }, + { + "epoch": 16.76, + "learning_rate": 8.131606765327695e-06, + "loss": 0.0008, + "step": 15852 + }, + { + "epoch": 16.76, + "learning_rate": 8.12632135306554e-06, + "loss": 0.0041, + "step": 15854 + }, + { + "epoch": 16.76, + "learning_rate": 8.121035940803382e-06, + "loss": 0.0118, + "step": 15856 + }, + { + "epoch": 16.76, + "learning_rate": 8.115750528541227e-06, + "loss": 0.0253, + "step": 15858 + }, + { + "epoch": 16.77, + "learning_rate": 8.110465116279071e-06, + "loss": 0.0133, + "step": 15860 + }, + { + "epoch": 16.77, + "learning_rate": 8.105179704016914e-06, + "loss": 0.0063, + "step": 15862 + }, + { + "epoch": 16.77, + "learning_rate": 8.099894291754758e-06, + "loss": 0.0136, + "step": 15864 + }, + { + "epoch": 16.77, + "learning_rate": 8.0946088794926e-06, + "loss": 0.003, + "step": 15866 + }, + { + "epoch": 16.77, + "learning_rate": 8.089323467230445e-06, + "loss": 0.0086, + "step": 15868 + }, + { + "epoch": 16.78, + "learning_rate": 8.084038054968288e-06, + "loss": 0.003, + "step": 15870 + }, + { + "epoch": 16.78, + "learning_rate": 8.078752642706132e-06, + "loss": 0.0041, + "step": 15872 + }, + { + "epoch": 16.78, + "learning_rate": 8.073467230443975e-06, + "loss": 0.0026, + "step": 15874 + }, + { + "epoch": 16.78, + "learning_rate": 8.068181818181819e-06, + "loss": 0.0174, + "step": 15876 + }, + { + "epoch": 16.78, + "learning_rate": 8.062896405919662e-06, + "loss": 0.0183, + "step": 15878 + }, + { + "epoch": 16.79, + "learning_rate": 8.057610993657504e-06, + "loss": 0.005, + "step": 15880 + }, + { + "epoch": 16.79, + "learning_rate": 8.052325581395349e-06, + "loss": 0.0087, + "step": 15882 + }, + { + "epoch": 16.79, + "learning_rate": 8.047040169133193e-06, + "loss": 0.0174, + "step": 15884 + }, + { + "epoch": 16.79, + "learning_rate": 8.041754756871037e-06, + "loss": 0.0063, + "step": 15886 + }, + { + "epoch": 16.79, + "learning_rate": 8.03646934460888e-06, + "loss": 0.0044, + "step": 15888 + }, + { + "epoch": 16.8, + "learning_rate": 8.031183932346724e-06, + "loss": 0.0145, + "step": 15890 + }, + { + "epoch": 16.8, + "learning_rate": 8.025898520084567e-06, + "loss": 0.0314, + "step": 15892 + }, + { + "epoch": 16.8, + "learning_rate": 8.020613107822412e-06, + "loss": 0.0004, + "step": 15894 + }, + { + "epoch": 16.8, + "learning_rate": 8.015327695560254e-06, + "loss": 0.0327, + "step": 15896 + }, + { + "epoch": 16.81, + "learning_rate": 8.010042283298097e-06, + "loss": 0.0079, + "step": 15898 + }, + { + "epoch": 16.81, + "learning_rate": 8.004756871035941e-06, + "loss": 0.0295, + "step": 15900 + }, + { + "epoch": 16.81, + "learning_rate": 7.999471458773784e-06, + "loss": 0.0153, + "step": 15902 + }, + { + "epoch": 16.81, + "learning_rate": 7.994186046511628e-06, + "loss": 0.0069, + "step": 15904 + }, + { + "epoch": 16.81, + "learning_rate": 7.988900634249471e-06, + "loss": 0.001, + "step": 15906 + }, + { + "epoch": 16.82, + "learning_rate": 7.983615221987315e-06, + "loss": 0.0023, + "step": 15908 + }, + { + "epoch": 16.82, + "learning_rate": 7.978329809725158e-06, + "loss": 0.0239, + "step": 15910 + }, + { + "epoch": 16.82, + "learning_rate": 7.973044397463002e-06, + "loss": 0.0301, + "step": 15912 + }, + { + "epoch": 16.82, + "learning_rate": 7.967758985200847e-06, + "loss": 0.0061, + "step": 15914 + }, + { + "epoch": 16.82, + "learning_rate": 7.96247357293869e-06, + "loss": 0.0106, + "step": 15916 + }, + { + "epoch": 16.83, + "learning_rate": 7.957188160676534e-06, + "loss": 0.0008, + "step": 15918 + }, + { + "epoch": 16.83, + "learning_rate": 7.951902748414376e-06, + "loss": 0.0302, + "step": 15920 + }, + { + "epoch": 16.83, + "learning_rate": 7.94661733615222e-06, + "loss": 0.014, + "step": 15922 + }, + { + "epoch": 16.83, + "learning_rate": 7.941331923890063e-06, + "loss": 0.007, + "step": 15924 + }, + { + "epoch": 16.84, + "learning_rate": 7.936046511627908e-06, + "loss": 0.0229, + "step": 15926 + }, + { + "epoch": 16.84, + "learning_rate": 7.93076109936575e-06, + "loss": 0.0071, + "step": 15928 + }, + { + "epoch": 16.84, + "learning_rate": 7.925475687103595e-06, + "loss": 0.0022, + "step": 15930 + }, + { + "epoch": 16.84, + "learning_rate": 7.920190274841438e-06, + "loss": 0.0043, + "step": 15932 + }, + { + "epoch": 16.84, + "learning_rate": 7.91490486257928e-06, + "loss": 0.023, + "step": 15934 + }, + { + "epoch": 16.85, + "learning_rate": 7.909619450317125e-06, + "loss": 0.0016, + "step": 15936 + }, + { + "epoch": 16.85, + "learning_rate": 7.904334038054969e-06, + "loss": 0.0009, + "step": 15938 + }, + { + "epoch": 16.85, + "learning_rate": 7.899048625792813e-06, + "loss": 0.0116, + "step": 15940 + }, + { + "epoch": 16.85, + "learning_rate": 7.893763213530656e-06, + "loss": 0.0317, + "step": 15942 + }, + { + "epoch": 16.85, + "learning_rate": 7.8884778012685e-06, + "loss": 0.0041, + "step": 15944 + }, + { + "epoch": 16.86, + "learning_rate": 7.883192389006343e-06, + "loss": 0.0323, + "step": 15946 + }, + { + "epoch": 16.86, + "learning_rate": 7.877906976744187e-06, + "loss": 0.0025, + "step": 15948 + }, + { + "epoch": 16.86, + "learning_rate": 7.87262156448203e-06, + "loss": 0.0025, + "step": 15950 + }, + { + "epoch": 16.86, + "learning_rate": 7.867336152219873e-06, + "loss": 0.0026, + "step": 15952 + }, + { + "epoch": 16.86, + "learning_rate": 7.862050739957717e-06, + "loss": 0.0046, + "step": 15954 + }, + { + "epoch": 16.87, + "learning_rate": 7.85676532769556e-06, + "loss": 0.0028, + "step": 15956 + }, + { + "epoch": 16.87, + "learning_rate": 7.851479915433404e-06, + "loss": 0.0106, + "step": 15958 + }, + { + "epoch": 16.87, + "learning_rate": 7.846194503171247e-06, + "loss": 0.0023, + "step": 15960 + }, + { + "epoch": 16.87, + "learning_rate": 7.840909090909091e-06, + "loss": 0.0007, + "step": 15962 + }, + { + "epoch": 16.88, + "learning_rate": 7.835623678646936e-06, + "loss": 0.0119, + "step": 15964 + }, + { + "epoch": 16.88, + "learning_rate": 7.83033826638478e-06, + "loss": 0.0067, + "step": 15966 + }, + { + "epoch": 16.88, + "learning_rate": 7.825052854122623e-06, + "loss": 0.0019, + "step": 15968 + }, + { + "epoch": 16.88, + "learning_rate": 7.819767441860465e-06, + "loss": 0.0025, + "step": 15970 + }, + { + "epoch": 16.88, + "learning_rate": 7.81448202959831e-06, + "loss": 0.007, + "step": 15972 + }, + { + "epoch": 16.89, + "learning_rate": 7.809196617336152e-06, + "loss": 0.0025, + "step": 15974 + }, + { + "epoch": 16.89, + "learning_rate": 7.803911205073997e-06, + "loss": 0.0012, + "step": 15976 + }, + { + "epoch": 16.89, + "learning_rate": 7.79862579281184e-06, + "loss": 0.0012, + "step": 15978 + }, + { + "epoch": 16.89, + "learning_rate": 7.793340380549684e-06, + "loss": 0.005, + "step": 15980 + }, + { + "epoch": 16.89, + "learning_rate": 7.788054968287526e-06, + "loss": 0.0006, + "step": 15982 + }, + { + "epoch": 16.9, + "learning_rate": 7.782769556025369e-06, + "loss": 0.0004, + "step": 15984 + }, + { + "epoch": 16.9, + "learning_rate": 7.777484143763213e-06, + "loss": 0.002, + "step": 15986 + }, + { + "epoch": 16.9, + "learning_rate": 7.772198731501058e-06, + "loss": 0.0265, + "step": 15988 + }, + { + "epoch": 16.9, + "learning_rate": 7.766913319238902e-06, + "loss": 0.0074, + "step": 15990 + }, + { + "epoch": 16.9, + "learning_rate": 7.761627906976745e-06, + "loss": 0.0025, + "step": 15992 + }, + { + "epoch": 16.91, + "learning_rate": 7.756342494714589e-06, + "loss": 0.0256, + "step": 15994 + }, + { + "epoch": 16.91, + "learning_rate": 7.751057082452432e-06, + "loss": 0.0007, + "step": 15996 + }, + { + "epoch": 16.91, + "learning_rate": 7.745771670190276e-06, + "loss": 0.0028, + "step": 15998 + }, + { + "epoch": 16.91, + "learning_rate": 7.740486257928119e-06, + "loss": 0.0029, + "step": 16000 + }, + { + "epoch": 16.91, + "eval_cer": 0.06184098033627814, + "eval_loss": 0.898295521736145, + "eval_runtime": 124.9218, + "eval_samples_per_second": 6.732, + "eval_steps_per_second": 0.849, + "step": 16000 + }, + { + "epoch": 16.92, + "learning_rate": 7.735200845665962e-06, + "loss": 0.0036, + "step": 16002 + }, + { + "epoch": 16.92, + "learning_rate": 7.729915433403806e-06, + "loss": 0.0021, + "step": 16004 + }, + { + "epoch": 16.92, + "learning_rate": 7.724630021141649e-06, + "loss": 0.0006, + "step": 16006 + }, + { + "epoch": 16.92, + "learning_rate": 7.719344608879493e-06, + "loss": 0.0126, + "step": 16008 + }, + { + "epoch": 16.92, + "learning_rate": 7.714059196617336e-06, + "loss": 0.0391, + "step": 16010 + }, + { + "epoch": 16.93, + "learning_rate": 7.70877378435518e-06, + "loss": 0.0153, + "step": 16012 + }, + { + "epoch": 16.93, + "learning_rate": 7.703488372093023e-06, + "loss": 0.0036, + "step": 16014 + }, + { + "epoch": 16.93, + "learning_rate": 7.698202959830867e-06, + "loss": 0.0109, + "step": 16016 + }, + { + "epoch": 16.93, + "learning_rate": 7.692917547568711e-06, + "loss": 0.0096, + "step": 16018 + }, + { + "epoch": 16.93, + "learning_rate": 7.687632135306554e-06, + "loss": 0.0146, + "step": 16020 + }, + { + "epoch": 16.94, + "learning_rate": 7.682346723044398e-06, + "loss": 0.0411, + "step": 16022 + }, + { + "epoch": 16.94, + "learning_rate": 7.677061310782241e-06, + "loss": 0.0046, + "step": 16024 + }, + { + "epoch": 16.94, + "learning_rate": 7.671775898520085e-06, + "loss": 0.0061, + "step": 16026 + }, + { + "epoch": 16.94, + "learning_rate": 7.666490486257928e-06, + "loss": 0.0031, + "step": 16028 + }, + { + "epoch": 16.95, + "learning_rate": 7.661205073995772e-06, + "loss": 0.0096, + "step": 16030 + }, + { + "epoch": 16.95, + "learning_rate": 7.655919661733615e-06, + "loss": 0.0065, + "step": 16032 + }, + { + "epoch": 16.95, + "learning_rate": 7.65063424947146e-06, + "loss": 0.0007, + "step": 16034 + }, + { + "epoch": 16.95, + "learning_rate": 7.645348837209302e-06, + "loss": 0.0067, + "step": 16036 + }, + { + "epoch": 16.95, + "learning_rate": 7.640063424947145e-06, + "loss": 0.0016, + "step": 16038 + }, + { + "epoch": 16.96, + "learning_rate": 7.63477801268499e-06, + "loss": 0.0039, + "step": 16040 + }, + { + "epoch": 16.96, + "learning_rate": 7.629492600422834e-06, + "loss": 0.017, + "step": 16042 + }, + { + "epoch": 16.96, + "learning_rate": 7.624207188160677e-06, + "loss": 0.004, + "step": 16044 + }, + { + "epoch": 16.96, + "learning_rate": 7.61892177589852e-06, + "loss": 0.0162, + "step": 16046 + }, + { + "epoch": 16.96, + "learning_rate": 7.613636363636364e-06, + "loss": 0.0003, + "step": 16048 + }, + { + "epoch": 16.97, + "learning_rate": 7.608350951374208e-06, + "loss": 0.0022, + "step": 16050 + }, + { + "epoch": 16.97, + "learning_rate": 7.603065539112052e-06, + "loss": 0.0284, + "step": 16052 + }, + { + "epoch": 16.97, + "learning_rate": 7.597780126849895e-06, + "loss": 0.0081, + "step": 16054 + }, + { + "epoch": 16.97, + "learning_rate": 7.592494714587737e-06, + "loss": 0.0081, + "step": 16056 + }, + { + "epoch": 16.97, + "learning_rate": 7.587209302325582e-06, + "loss": 0.0119, + "step": 16058 + }, + { + "epoch": 16.98, + "learning_rate": 7.581923890063425e-06, + "loss": 0.0114, + "step": 16060 + }, + { + "epoch": 16.98, + "learning_rate": 7.576638477801269e-06, + "loss": 0.0038, + "step": 16062 + }, + { + "epoch": 16.98, + "learning_rate": 7.571353065539112e-06, + "loss": 0.0042, + "step": 16064 + }, + { + "epoch": 16.98, + "learning_rate": 7.566067653276957e-06, + "loss": 0.0329, + "step": 16066 + }, + { + "epoch": 16.99, + "learning_rate": 7.560782241014799e-06, + "loss": 0.0011, + "step": 16068 + }, + { + "epoch": 16.99, + "learning_rate": 7.555496828752644e-06, + "loss": 0.0017, + "step": 16070 + }, + { + "epoch": 16.99, + "learning_rate": 7.550211416490486e-06, + "loss": 0.0037, + "step": 16072 + }, + { + "epoch": 16.99, + "learning_rate": 7.54492600422833e-06, + "loss": 0.0032, + "step": 16074 + }, + { + "epoch": 16.99, + "learning_rate": 7.539640591966174e-06, + "loss": 0.0028, + "step": 16076 + }, + { + "epoch": 17.0, + "learning_rate": 7.534355179704017e-06, + "loss": 0.0038, + "step": 16078 + }, + { + "epoch": 17.0, + "learning_rate": 7.529069767441861e-06, + "loss": 0.026, + "step": 16080 + }, + { + "epoch": 17.0, + "learning_rate": 7.523784355179704e-06, + "loss": 0.0007, + "step": 16082 + }, + { + "epoch": 17.0, + "learning_rate": 7.518498942917548e-06, + "loss": 0.001, + "step": 16084 + }, + { + "epoch": 17.0, + "learning_rate": 7.513213530655391e-06, + "loss": 0.021, + "step": 16086 + }, + { + "epoch": 17.01, + "learning_rate": 7.507928118393235e-06, + "loss": 0.0175, + "step": 16088 + }, + { + "epoch": 17.01, + "learning_rate": 7.502642706131079e-06, + "loss": 0.0011, + "step": 16090 + }, + { + "epoch": 17.01, + "learning_rate": 7.4973572938689215e-06, + "loss": 0.0009, + "step": 16092 + }, + { + "epoch": 17.01, + "learning_rate": 7.492071881606766e-06, + "loss": 0.0044, + "step": 16094 + }, + { + "epoch": 17.01, + "learning_rate": 7.4867864693446086e-06, + "loss": 0.0152, + "step": 16096 + }, + { + "epoch": 17.02, + "learning_rate": 7.481501057082453e-06, + "loss": 0.0012, + "step": 16098 + }, + { + "epoch": 17.02, + "learning_rate": 7.4762156448202964e-06, + "loss": 0.0022, + "step": 16100 + }, + { + "epoch": 17.02, + "learning_rate": 7.470930232558141e-06, + "loss": 0.0017, + "step": 16102 + }, + { + "epoch": 17.02, + "learning_rate": 7.4656448202959835e-06, + "loss": 0.0019, + "step": 16104 + }, + { + "epoch": 17.03, + "learning_rate": 7.460359408033828e-06, + "loss": 0.0055, + "step": 16106 + }, + { + "epoch": 17.03, + "learning_rate": 7.4550739957716705e-06, + "loss": 0.0068, + "step": 16108 + }, + { + "epoch": 17.03, + "learning_rate": 7.449788583509513e-06, + "loss": 0.0033, + "step": 16110 + }, + { + "epoch": 17.03, + "learning_rate": 7.4445031712473576e-06, + "loss": 0.0175, + "step": 16112 + }, + { + "epoch": 17.03, + "learning_rate": 7.439217758985201e-06, + "loss": 0.0005, + "step": 16114 + }, + { + "epoch": 17.04, + "learning_rate": 7.4339323467230454e-06, + "loss": 0.0096, + "step": 16116 + }, + { + "epoch": 17.04, + "learning_rate": 7.428646934460888e-06, + "loss": 0.004, + "step": 16118 + }, + { + "epoch": 17.04, + "learning_rate": 7.4233615221987325e-06, + "loss": 0.0256, + "step": 16120 + }, + { + "epoch": 17.04, + "learning_rate": 7.418076109936575e-06, + "loss": 0.013, + "step": 16122 + }, + { + "epoch": 17.04, + "learning_rate": 7.4127906976744195e-06, + "loss": 0.0216, + "step": 16124 + }, + { + "epoch": 17.05, + "learning_rate": 7.407505285412262e-06, + "loss": 0.0032, + "step": 16126 + }, + { + "epoch": 17.05, + "learning_rate": 7.402219873150106e-06, + "loss": 0.0082, + "step": 16128 + }, + { + "epoch": 17.05, + "learning_rate": 7.39693446088795e-06, + "loss": 0.0007, + "step": 16130 + }, + { + "epoch": 17.05, + "learning_rate": 7.391649048625793e-06, + "loss": 0.0019, + "step": 16132 + }, + { + "epoch": 17.05, + "learning_rate": 7.386363636363637e-06, + "loss": 0.0007, + "step": 16134 + }, + { + "epoch": 17.06, + "learning_rate": 7.38107822410148e-06, + "loss": 0.0008, + "step": 16136 + }, + { + "epoch": 17.06, + "learning_rate": 7.375792811839324e-06, + "loss": 0.0093, + "step": 16138 + }, + { + "epoch": 17.06, + "learning_rate": 7.370507399577168e-06, + "loss": 0.0072, + "step": 16140 + }, + { + "epoch": 17.06, + "learning_rate": 7.36522198731501e-06, + "loss": 0.0097, + "step": 16142 + }, + { + "epoch": 17.07, + "learning_rate": 7.359936575052855e-06, + "loss": 0.0028, + "step": 16144 + }, + { + "epoch": 17.07, + "learning_rate": 7.354651162790697e-06, + "loss": 0.0212, + "step": 16146 + }, + { + "epoch": 17.07, + "learning_rate": 7.349365750528542e-06, + "loss": 0.0076, + "step": 16148 + }, + { + "epoch": 17.07, + "learning_rate": 7.344080338266384e-06, + "loss": 0.0006, + "step": 16150 + }, + { + "epoch": 17.07, + "learning_rate": 7.338794926004229e-06, + "loss": 0.0008, + "step": 16152 + }, + { + "epoch": 17.08, + "learning_rate": 7.333509513742072e-06, + "loss": 0.0017, + "step": 16154 + }, + { + "epoch": 17.08, + "learning_rate": 7.328224101479917e-06, + "loss": 0.0008, + "step": 16156 + }, + { + "epoch": 17.08, + "learning_rate": 7.322938689217759e-06, + "loss": 0.005, + "step": 16158 + }, + { + "epoch": 17.08, + "learning_rate": 7.317653276955602e-06, + "loss": 0.0022, + "step": 16160 + }, + { + "epoch": 17.08, + "learning_rate": 7.312367864693446e-06, + "loss": 0.001, + "step": 16162 + }, + { + "epoch": 17.09, + "learning_rate": 7.30708245243129e-06, + "loss": 0.0022, + "step": 16164 + }, + { + "epoch": 17.09, + "learning_rate": 7.301797040169133e-06, + "loss": 0.0132, + "step": 16166 + }, + { + "epoch": 17.09, + "learning_rate": 7.296511627906977e-06, + "loss": 0.0175, + "step": 16168 + }, + { + "epoch": 17.09, + "learning_rate": 7.291226215644821e-06, + "loss": 0.0066, + "step": 16170 + }, + { + "epoch": 17.1, + "learning_rate": 7.285940803382664e-06, + "loss": 0.0012, + "step": 16172 + }, + { + "epoch": 17.1, + "learning_rate": 7.280655391120508e-06, + "loss": 0.0157, + "step": 16174 + }, + { + "epoch": 17.1, + "learning_rate": 7.275369978858351e-06, + "loss": 0.0004, + "step": 16176 + }, + { + "epoch": 17.1, + "learning_rate": 7.2700845665961945e-06, + "loss": 0.0009, + "step": 16178 + }, + { + "epoch": 17.1, + "learning_rate": 7.264799154334039e-06, + "loss": 0.0012, + "step": 16180 + }, + { + "epoch": 17.11, + "learning_rate": 7.2595137420718815e-06, + "loss": 0.0054, + "step": 16182 + }, + { + "epoch": 17.11, + "learning_rate": 7.254228329809726e-06, + "loss": 0.0018, + "step": 16184 + }, + { + "epoch": 17.11, + "learning_rate": 7.248942917547569e-06, + "loss": 0.0034, + "step": 16186 + }, + { + "epoch": 17.11, + "learning_rate": 7.243657505285413e-06, + "loss": 0.0015, + "step": 16188 + }, + { + "epoch": 17.11, + "learning_rate": 7.238372093023256e-06, + "loss": 0.0046, + "step": 16190 + }, + { + "epoch": 17.12, + "learning_rate": 7.2330866807611e-06, + "loss": 0.0049, + "step": 16192 + }, + { + "epoch": 17.12, + "learning_rate": 7.2278012684989435e-06, + "loss": 0.0222, + "step": 16194 + }, + { + "epoch": 17.12, + "learning_rate": 7.222515856236786e-06, + "loss": 0.0074, + "step": 16196 + }, + { + "epoch": 17.12, + "learning_rate": 7.2172304439746305e-06, + "loss": 0.0017, + "step": 16198 + }, + { + "epoch": 17.12, + "learning_rate": 7.211945031712473e-06, + "loss": 0.0007, + "step": 16200 + }, + { + "epoch": 17.13, + "learning_rate": 7.2066596194503176e-06, + "loss": 0.0201, + "step": 16202 + }, + { + "epoch": 17.13, + "learning_rate": 7.201374207188161e-06, + "loss": 0.0021, + "step": 16204 + }, + { + "epoch": 17.13, + "learning_rate": 7.196088794926005e-06, + "loss": 0.0318, + "step": 16206 + }, + { + "epoch": 17.13, + "learning_rate": 7.190803382663848e-06, + "loss": 0.0256, + "step": 16208 + }, + { + "epoch": 17.14, + "learning_rate": 7.1855179704016925e-06, + "loss": 0.0006, + "step": 16210 + }, + { + "epoch": 17.14, + "learning_rate": 7.180232558139535e-06, + "loss": 0.0008, + "step": 16212 + }, + { + "epoch": 17.14, + "learning_rate": 7.174947145877378e-06, + "loss": 0.0055, + "step": 16214 + }, + { + "epoch": 17.14, + "learning_rate": 7.169661733615222e-06, + "loss": 0.0084, + "step": 16216 + }, + { + "epoch": 17.14, + "learning_rate": 7.164376321353066e-06, + "loss": 0.0011, + "step": 16218 + }, + { + "epoch": 17.15, + "learning_rate": 7.15909090909091e-06, + "loss": 0.0011, + "step": 16220 + }, + { + "epoch": 17.15, + "learning_rate": 7.153805496828753e-06, + "loss": 0.0394, + "step": 16222 + }, + { + "epoch": 17.15, + "learning_rate": 7.148520084566597e-06, + "loss": 0.0011, + "step": 16224 + }, + { + "epoch": 17.15, + "learning_rate": 7.14323467230444e-06, + "loss": 0.0027, + "step": 16226 + }, + { + "epoch": 17.15, + "learning_rate": 7.137949260042284e-06, + "loss": 0.0044, + "step": 16228 + }, + { + "epoch": 17.16, + "learning_rate": 7.132663847780127e-06, + "loss": 0.0014, + "step": 16230 + }, + { + "epoch": 17.16, + "learning_rate": 7.12737843551797e-06, + "loss": 0.0003, + "step": 16232 + }, + { + "epoch": 17.16, + "learning_rate": 7.122093023255815e-06, + "loss": 0.0099, + "step": 16234 + }, + { + "epoch": 17.16, + "learning_rate": 7.116807610993657e-06, + "loss": 0.0033, + "step": 16236 + }, + { + "epoch": 17.16, + "learning_rate": 7.111522198731502e-06, + "loss": 0.0008, + "step": 16238 + }, + { + "epoch": 17.17, + "learning_rate": 7.106236786469344e-06, + "loss": 0.0005, + "step": 16240 + }, + { + "epoch": 17.17, + "learning_rate": 7.100951374207189e-06, + "loss": 0.0012, + "step": 16242 + }, + { + "epoch": 17.17, + "learning_rate": 7.095665961945032e-06, + "loss": 0.0004, + "step": 16244 + }, + { + "epoch": 17.17, + "learning_rate": 7.090380549682876e-06, + "loss": 0.0134, + "step": 16246 + }, + { + "epoch": 17.18, + "learning_rate": 7.085095137420719e-06, + "loss": 0.0119, + "step": 16248 + }, + { + "epoch": 17.18, + "learning_rate": 7.079809725158562e-06, + "loss": 0.0072, + "step": 16250 + }, + { + "epoch": 17.18, + "learning_rate": 7.074524312896406e-06, + "loss": 0.0175, + "step": 16252 + }, + { + "epoch": 17.18, + "learning_rate": 7.069238900634249e-06, + "loss": 0.0004, + "step": 16254 + }, + { + "epoch": 17.18, + "learning_rate": 7.063953488372093e-06, + "loss": 0.0064, + "step": 16256 + }, + { + "epoch": 17.19, + "learning_rate": 7.058668076109937e-06, + "loss": 0.0226, + "step": 16258 + }, + { + "epoch": 17.19, + "learning_rate": 7.053382663847781e-06, + "loss": 0.0171, + "step": 16260 + }, + { + "epoch": 17.19, + "learning_rate": 7.048097251585624e-06, + "loss": 0.0052, + "step": 16262 + }, + { + "epoch": 17.19, + "learning_rate": 7.042811839323468e-06, + "loss": 0.0022, + "step": 16264 + }, + { + "epoch": 17.19, + "learning_rate": 7.037526427061311e-06, + "loss": 0.0127, + "step": 16266 + }, + { + "epoch": 17.2, + "learning_rate": 7.032241014799154e-06, + "loss": 0.0005, + "step": 16268 + }, + { + "epoch": 17.2, + "learning_rate": 7.026955602536998e-06, + "loss": 0.0222, + "step": 16270 + }, + { + "epoch": 17.2, + "learning_rate": 7.0216701902748416e-06, + "loss": 0.0011, + "step": 16272 + }, + { + "epoch": 17.2, + "learning_rate": 7.016384778012686e-06, + "loss": 0.0004, + "step": 16274 + }, + { + "epoch": 17.21, + "learning_rate": 7.011099365750529e-06, + "loss": 0.0028, + "step": 16276 + }, + { + "epoch": 17.21, + "learning_rate": 7.005813953488373e-06, + "loss": 0.0113, + "step": 16278 + }, + { + "epoch": 17.21, + "learning_rate": 7.000528541226216e-06, + "loss": 0.0286, + "step": 16280 + }, + { + "epoch": 17.21, + "learning_rate": 6.99524312896406e-06, + "loss": 0.0013, + "step": 16282 + }, + { + "epoch": 17.21, + "learning_rate": 6.9899577167019035e-06, + "loss": 0.0129, + "step": 16284 + }, + { + "epoch": 17.22, + "learning_rate": 6.984672304439746e-06, + "loss": 0.0817, + "step": 16286 + }, + { + "epoch": 17.22, + "learning_rate": 6.9793868921775905e-06, + "loss": 0.0009, + "step": 16288 + }, + { + "epoch": 17.22, + "learning_rate": 6.974101479915433e-06, + "loss": 0.0052, + "step": 16290 + }, + { + "epoch": 17.22, + "learning_rate": 6.968816067653278e-06, + "loss": 0.0012, + "step": 16292 + }, + { + "epoch": 17.22, + "learning_rate": 6.96353065539112e-06, + "loss": 0.014, + "step": 16294 + }, + { + "epoch": 17.23, + "learning_rate": 6.958245243128965e-06, + "loss": 0.0027, + "step": 16296 + }, + { + "epoch": 17.23, + "learning_rate": 6.952959830866808e-06, + "loss": 0.0091, + "step": 16298 + }, + { + "epoch": 17.23, + "learning_rate": 6.9476744186046525e-06, + "loss": 0.0008, + "step": 16300 + }, + { + "epoch": 17.23, + "learning_rate": 6.942389006342495e-06, + "loss": 0.0025, + "step": 16302 + }, + { + "epoch": 17.23, + "learning_rate": 6.937103594080338e-06, + "loss": 0.0054, + "step": 16304 + }, + { + "epoch": 17.24, + "learning_rate": 6.931818181818182e-06, + "loss": 0.0019, + "step": 16306 + }, + { + "epoch": 17.24, + "learning_rate": 6.926532769556026e-06, + "loss": 0.0156, + "step": 16308 + }, + { + "epoch": 17.24, + "learning_rate": 6.921247357293869e-06, + "loss": 0.0007, + "step": 16310 + }, + { + "epoch": 17.24, + "learning_rate": 6.915961945031713e-06, + "loss": 0.0075, + "step": 16312 + }, + { + "epoch": 17.25, + "learning_rate": 6.910676532769557e-06, + "loss": 0.0142, + "step": 16314 + }, + { + "epoch": 17.25, + "learning_rate": 6.9053911205074e-06, + "loss": 0.0168, + "step": 16316 + }, + { + "epoch": 17.25, + "learning_rate": 6.9001057082452425e-06, + "loss": 0.0031, + "step": 16318 + }, + { + "epoch": 17.25, + "learning_rate": 6.894820295983087e-06, + "loss": 0.002, + "step": 16320 + }, + { + "epoch": 17.25, + "learning_rate": 6.88953488372093e-06, + "loss": 0.0541, + "step": 16322 + }, + { + "epoch": 17.26, + "learning_rate": 6.884249471458775e-06, + "loss": 0.0205, + "step": 16324 + }, + { + "epoch": 17.26, + "learning_rate": 6.878964059196617e-06, + "loss": 0.0156, + "step": 16326 + }, + { + "epoch": 17.26, + "learning_rate": 6.873678646934462e-06, + "loss": 0.011, + "step": 16328 + }, + { + "epoch": 17.26, + "learning_rate": 6.8683932346723044e-06, + "loss": 0.0022, + "step": 16330 + }, + { + "epoch": 17.26, + "learning_rate": 6.863107822410149e-06, + "loss": 0.0043, + "step": 16332 + }, + { + "epoch": 17.27, + "learning_rate": 6.8578224101479915e-06, + "loss": 0.0053, + "step": 16334 + }, + { + "epoch": 17.27, + "learning_rate": 6.852536997885835e-06, + "loss": 0.02, + "step": 16336 + }, + { + "epoch": 17.27, + "learning_rate": 6.847251585623679e-06, + "loss": 0.0137, + "step": 16338 + }, + { + "epoch": 17.27, + "learning_rate": 6.841966173361522e-06, + "loss": 0.0012, + "step": 16340 + }, + { + "epoch": 17.27, + "learning_rate": 6.836680761099366e-06, + "loss": 0.0009, + "step": 16342 + }, + { + "epoch": 17.28, + "learning_rate": 6.831395348837209e-06, + "loss": 0.0044, + "step": 16344 + }, + { + "epoch": 17.28, + "learning_rate": 6.8261099365750534e-06, + "loss": 0.0085, + "step": 16346 + }, + { + "epoch": 17.28, + "learning_rate": 6.820824524312897e-06, + "loss": 0.0034, + "step": 16348 + }, + { + "epoch": 17.28, + "learning_rate": 6.8155391120507405e-06, + "loss": 0.0038, + "step": 16350 + }, + { + "epoch": 17.29, + "learning_rate": 6.810253699788584e-06, + "loss": 0.0003, + "step": 16352 + }, + { + "epoch": 17.29, + "learning_rate": 6.804968287526427e-06, + "loss": 0.007, + "step": 16354 + }, + { + "epoch": 17.29, + "learning_rate": 6.799682875264271e-06, + "loss": 0.0029, + "step": 16356 + }, + { + "epoch": 17.29, + "learning_rate": 6.794397463002114e-06, + "loss": 0.0007, + "step": 16358 + }, + { + "epoch": 17.29, + "learning_rate": 6.789112050739958e-06, + "loss": 0.0047, + "step": 16360 + }, + { + "epoch": 17.3, + "learning_rate": 6.7838266384778016e-06, + "loss": 0.015, + "step": 16362 + }, + { + "epoch": 17.3, + "learning_rate": 6.778541226215646e-06, + "loss": 0.0027, + "step": 16364 + }, + { + "epoch": 17.3, + "learning_rate": 6.773255813953489e-06, + "loss": 0.0054, + "step": 16366 + }, + { + "epoch": 17.3, + "learning_rate": 6.767970401691333e-06, + "loss": 0.0051, + "step": 16368 + }, + { + "epoch": 17.3, + "learning_rate": 6.762684989429176e-06, + "loss": 0.0084, + "step": 16370 + }, + { + "epoch": 17.31, + "learning_rate": 6.757399577167018e-06, + "loss": 0.0005, + "step": 16372 + }, + { + "epoch": 17.31, + "learning_rate": 6.752114164904863e-06, + "loss": 0.016, + "step": 16374 + }, + { + "epoch": 17.31, + "learning_rate": 6.746828752642706e-06, + "loss": 0.0017, + "step": 16376 + }, + { + "epoch": 17.31, + "learning_rate": 6.7415433403805506e-06, + "loss": 0.0436, + "step": 16378 + }, + { + "epoch": 17.32, + "learning_rate": 6.736257928118393e-06, + "loss": 0.0104, + "step": 16380 + }, + { + "epoch": 17.32, + "learning_rate": 6.730972515856238e-06, + "loss": 0.001, + "step": 16382 + }, + { + "epoch": 17.32, + "learning_rate": 6.72568710359408e-06, + "loss": 0.0574, + "step": 16384 + }, + { + "epoch": 17.32, + "learning_rate": 6.720401691331925e-06, + "loss": 0.0078, + "step": 16386 + }, + { + "epoch": 17.32, + "learning_rate": 6.715116279069768e-06, + "loss": 0.0009, + "step": 16388 + }, + { + "epoch": 17.33, + "learning_rate": 6.709830866807611e-06, + "loss": 0.0058, + "step": 16390 + }, + { + "epoch": 17.33, + "learning_rate": 6.704545454545455e-06, + "loss": 0.0005, + "step": 16392 + }, + { + "epoch": 17.33, + "learning_rate": 6.699260042283298e-06, + "loss": 0.0012, + "step": 16394 + }, + { + "epoch": 17.33, + "learning_rate": 6.693974630021142e-06, + "loss": 0.0379, + "step": 16396 + }, + { + "epoch": 17.33, + "learning_rate": 6.688689217758985e-06, + "loss": 0.0008, + "step": 16398 + }, + { + "epoch": 17.34, + "learning_rate": 6.683403805496829e-06, + "loss": 0.0317, + "step": 16400 + }, + { + "epoch": 17.34, + "learning_rate": 6.678118393234673e-06, + "loss": 0.0498, + "step": 16402 + }, + { + "epoch": 17.34, + "learning_rate": 6.672832980972517e-06, + "loss": 0.0172, + "step": 16404 + }, + { + "epoch": 17.34, + "learning_rate": 6.66754756871036e-06, + "loss": 0.0015, + "step": 16406 + }, + { + "epoch": 17.34, + "learning_rate": 6.6622621564482025e-06, + "loss": 0.0068, + "step": 16408 + }, + { + "epoch": 17.35, + "learning_rate": 6.656976744186047e-06, + "loss": 0.0037, + "step": 16410 + }, + { + "epoch": 17.35, + "learning_rate": 6.6516913319238895e-06, + "loss": 0.0012, + "step": 16412 + }, + { + "epoch": 17.35, + "learning_rate": 6.646405919661734e-06, + "loss": 0.0078, + "step": 16414 + }, + { + "epoch": 17.35, + "learning_rate": 6.641120507399577e-06, + "loss": 0.0044, + "step": 16416 + }, + { + "epoch": 17.36, + "learning_rate": 6.635835095137422e-06, + "loss": 0.0033, + "step": 16418 + }, + { + "epoch": 17.36, + "learning_rate": 6.6305496828752644e-06, + "loss": 0.0222, + "step": 16420 + }, + { + "epoch": 17.36, + "learning_rate": 6.625264270613109e-06, + "loss": 0.0202, + "step": 16422 + }, + { + "epoch": 17.36, + "learning_rate": 6.6199788583509515e-06, + "loss": 0.0009, + "step": 16424 + }, + { + "epoch": 17.36, + "learning_rate": 6.614693446088795e-06, + "loss": 0.0109, + "step": 16426 + }, + { + "epoch": 17.37, + "learning_rate": 6.609408033826639e-06, + "loss": 0.0003, + "step": 16428 + }, + { + "epoch": 17.37, + "learning_rate": 6.604122621564482e-06, + "loss": 0.0081, + "step": 16430 + }, + { + "epoch": 17.37, + "learning_rate": 6.598837209302326e-06, + "loss": 0.0067, + "step": 16432 + }, + { + "epoch": 17.37, + "learning_rate": 6.593551797040169e-06, + "loss": 0.0026, + "step": 16434 + }, + { + "epoch": 17.37, + "learning_rate": 6.5882663847780134e-06, + "loss": 0.0021, + "step": 16436 + }, + { + "epoch": 17.38, + "learning_rate": 6.582980972515856e-06, + "loss": 0.0019, + "step": 16438 + }, + { + "epoch": 17.38, + "learning_rate": 6.5776955602537005e-06, + "loss": 0.0143, + "step": 16440 + }, + { + "epoch": 17.38, + "learning_rate": 6.572410147991544e-06, + "loss": 0.0131, + "step": 16442 + }, + { + "epoch": 17.38, + "learning_rate": 6.567124735729387e-06, + "loss": 0.0088, + "step": 16444 + }, + { + "epoch": 17.38, + "learning_rate": 6.561839323467231e-06, + "loss": 0.0022, + "step": 16446 + }, + { + "epoch": 17.39, + "learning_rate": 6.556553911205074e-06, + "loss": 0.0029, + "step": 16448 + }, + { + "epoch": 17.39, + "learning_rate": 6.551268498942918e-06, + "loss": 0.0072, + "step": 16450 + }, + { + "epoch": 17.39, + "learning_rate": 6.545983086680761e-06, + "loss": 0.0004, + "step": 16452 + }, + { + "epoch": 17.39, + "learning_rate": 6.540697674418605e-06, + "loss": 0.0018, + "step": 16454 + }, + { + "epoch": 17.4, + "learning_rate": 6.535412262156449e-06, + "loss": 0.0021, + "step": 16456 + }, + { + "epoch": 17.4, + "learning_rate": 6.530126849894293e-06, + "loss": 0.0125, + "step": 16458 + }, + { + "epoch": 17.4, + "learning_rate": 6.524841437632136e-06, + "loss": 0.0024, + "step": 16460 + }, + { + "epoch": 17.4, + "learning_rate": 6.519556025369978e-06, + "loss": 0.0167, + "step": 16462 + }, + { + "epoch": 17.4, + "learning_rate": 6.514270613107823e-06, + "loss": 0.001, + "step": 16464 + }, + { + "epoch": 17.41, + "learning_rate": 6.508985200845666e-06, + "loss": 0.0194, + "step": 16466 + }, + { + "epoch": 17.41, + "learning_rate": 6.503699788583511e-06, + "loss": 0.0352, + "step": 16468 + }, + { + "epoch": 17.41, + "learning_rate": 6.498414376321353e-06, + "loss": 0.0035, + "step": 16470 + }, + { + "epoch": 17.41, + "learning_rate": 6.493128964059198e-06, + "loss": 0.007, + "step": 16472 + }, + { + "epoch": 17.41, + "learning_rate": 6.48784355179704e-06, + "loss": 0.0011, + "step": 16474 + }, + { + "epoch": 17.42, + "learning_rate": 6.482558139534885e-06, + "loss": 0.0042, + "step": 16476 + }, + { + "epoch": 17.42, + "learning_rate": 6.477272727272727e-06, + "loss": 0.004, + "step": 16478 + }, + { + "epoch": 17.42, + "learning_rate": 6.471987315010571e-06, + "loss": 0.0087, + "step": 16480 + }, + { + "epoch": 17.42, + "learning_rate": 6.466701902748415e-06, + "loss": 0.0063, + "step": 16482 + }, + { + "epoch": 17.42, + "learning_rate": 6.461416490486258e-06, + "loss": 0.0038, + "step": 16484 + }, + { + "epoch": 17.43, + "learning_rate": 6.456131078224102e-06, + "loss": 0.0028, + "step": 16486 + }, + { + "epoch": 17.43, + "learning_rate": 6.450845665961945e-06, + "loss": 0.0137, + "step": 16488 + }, + { + "epoch": 17.43, + "learning_rate": 6.445560253699789e-06, + "loss": 0.0012, + "step": 16490 + }, + { + "epoch": 17.43, + "learning_rate": 6.440274841437633e-06, + "loss": 0.0024, + "step": 16492 + }, + { + "epoch": 17.44, + "learning_rate": 6.4349894291754755e-06, + "loss": 0.028, + "step": 16494 + }, + { + "epoch": 17.44, + "learning_rate": 6.42970401691332e-06, + "loss": 0.0153, + "step": 16496 + }, + { + "epoch": 17.44, + "learning_rate": 6.4244186046511625e-06, + "loss": 0.0074, + "step": 16498 + }, + { + "epoch": 17.44, + "learning_rate": 6.419133192389007e-06, + "loss": 0.0004, + "step": 16500 + }, + { + "epoch": 17.44, + "eval_cer": 0.04713593616414933, + "eval_loss": 0.8180399537086487, + "eval_runtime": 126.9565, + "eval_samples_per_second": 6.624, + "eval_steps_per_second": 0.835, + "step": 16500 + }, + { + "epoch": 17.44, + "learning_rate": 6.4138477801268495e-06, + "loss": 0.0022, + "step": 16502 + }, + { + "epoch": 17.45, + "learning_rate": 6.408562367864694e-06, + "loss": 0.011, + "step": 16504 + }, + { + "epoch": 17.45, + "learning_rate": 6.4032769556025374e-06, + "loss": 0.0024, + "step": 16506 + }, + { + "epoch": 17.45, + "learning_rate": 6.397991543340382e-06, + "loss": 0.0023, + "step": 16508 + }, + { + "epoch": 17.45, + "learning_rate": 6.3927061310782245e-06, + "loss": 0.0145, + "step": 16510 + }, + { + "epoch": 17.45, + "learning_rate": 6.387420718816067e-06, + "loss": 0.0111, + "step": 16512 + }, + { + "epoch": 17.46, + "learning_rate": 6.3821353065539115e-06, + "loss": 0.0011, + "step": 16514 + }, + { + "epoch": 17.46, + "learning_rate": 6.376849894291754e-06, + "loss": 0.0246, + "step": 16516 + }, + { + "epoch": 17.46, + "learning_rate": 6.3715644820295985e-06, + "loss": 0.0247, + "step": 16518 + }, + { + "epoch": 17.46, + "learning_rate": 6.366279069767442e-06, + "loss": 0.0227, + "step": 16520 + }, + { + "epoch": 17.47, + "learning_rate": 6.360993657505286e-06, + "loss": 0.0443, + "step": 16522 + }, + { + "epoch": 17.47, + "learning_rate": 6.355708245243129e-06, + "loss": 0.0018, + "step": 16524 + }, + { + "epoch": 17.47, + "learning_rate": 6.3504228329809735e-06, + "loss": 0.0099, + "step": 16526 + }, + { + "epoch": 17.47, + "learning_rate": 6.345137420718816e-06, + "loss": 0.0018, + "step": 16528 + }, + { + "epoch": 17.47, + "learning_rate": 6.33985200845666e-06, + "loss": 0.1091, + "step": 16530 + }, + { + "epoch": 17.48, + "learning_rate": 6.334566596194504e-06, + "loss": 0.0205, + "step": 16532 + }, + { + "epoch": 17.48, + "learning_rate": 6.329281183932347e-06, + "loss": 0.0005, + "step": 16534 + }, + { + "epoch": 17.48, + "learning_rate": 6.323995771670191e-06, + "loss": 0.0016, + "step": 16536 + }, + { + "epoch": 17.48, + "learning_rate": 6.318710359408034e-06, + "loss": 0.0003, + "step": 16538 + }, + { + "epoch": 17.48, + "learning_rate": 6.313424947145878e-06, + "loss": 0.0024, + "step": 16540 + }, + { + "epoch": 17.49, + "learning_rate": 6.308139534883721e-06, + "loss": 0.0043, + "step": 16542 + }, + { + "epoch": 17.49, + "learning_rate": 6.302854122621565e-06, + "loss": 0.0262, + "step": 16544 + }, + { + "epoch": 17.49, + "learning_rate": 6.297568710359409e-06, + "loss": 0.0064, + "step": 16546 + }, + { + "epoch": 17.49, + "learning_rate": 6.292283298097251e-06, + "loss": 0.0018, + "step": 16548 + }, + { + "epoch": 17.49, + "learning_rate": 6.286997885835096e-06, + "loss": 0.0133, + "step": 16550 + }, + { + "epoch": 17.5, + "learning_rate": 6.281712473572938e-06, + "loss": 0.0052, + "step": 16552 + }, + { + "epoch": 17.5, + "learning_rate": 6.276427061310783e-06, + "loss": 0.0006, + "step": 16554 + }, + { + "epoch": 17.5, + "learning_rate": 6.271141649048625e-06, + "loss": 0.0827, + "step": 16556 + }, + { + "epoch": 17.5, + "learning_rate": 6.26585623678647e-06, + "loss": 0.0039, + "step": 16558 + }, + { + "epoch": 17.51, + "learning_rate": 6.260570824524313e-06, + "loss": 0.0007, + "step": 16560 + }, + { + "epoch": 17.51, + "learning_rate": 6.255285412262158e-06, + "loss": 0.0035, + "step": 16562 + }, + { + "epoch": 17.51, + "learning_rate": 6.25e-06, + "loss": 0.0046, + "step": 16564 + }, + { + "epoch": 17.51, + "learning_rate": 6.244714587737844e-06, + "loss": 0.008, + "step": 16566 + }, + { + "epoch": 17.51, + "learning_rate": 6.239429175475687e-06, + "loss": 0.0105, + "step": 16568 + }, + { + "epoch": 17.52, + "learning_rate": 6.234143763213531e-06, + "loss": 0.0166, + "step": 16570 + }, + { + "epoch": 17.52, + "learning_rate": 6.228858350951375e-06, + "loss": 0.0054, + "step": 16572 + }, + { + "epoch": 17.52, + "learning_rate": 6.223572938689218e-06, + "loss": 0.0008, + "step": 16574 + }, + { + "epoch": 17.52, + "learning_rate": 6.218287526427061e-06, + "loss": 0.0012, + "step": 16576 + }, + { + "epoch": 17.52, + "learning_rate": 6.213002114164905e-06, + "loss": 0.0022, + "step": 16578 + }, + { + "epoch": 17.53, + "learning_rate": 6.2077167019027484e-06, + "loss": 0.0016, + "step": 16580 + }, + { + "epoch": 17.53, + "learning_rate": 6.202431289640592e-06, + "loss": 0.0014, + "step": 16582 + }, + { + "epoch": 17.53, + "learning_rate": 6.197145877378436e-06, + "loss": 0.0407, + "step": 16584 + }, + { + "epoch": 17.53, + "learning_rate": 6.19186046511628e-06, + "loss": 0.0259, + "step": 16586 + }, + { + "epoch": 17.53, + "learning_rate": 6.186575052854123e-06, + "loss": 0.0132, + "step": 16588 + }, + { + "epoch": 17.54, + "learning_rate": 6.181289640591967e-06, + "loss": 0.0078, + "step": 16590 + }, + { + "epoch": 17.54, + "learning_rate": 6.1760042283298096e-06, + "loss": 0.0276, + "step": 16592 + }, + { + "epoch": 17.54, + "learning_rate": 6.170718816067653e-06, + "loss": 0.0355, + "step": 16594 + }, + { + "epoch": 17.54, + "learning_rate": 6.165433403805497e-06, + "loss": 0.0024, + "step": 16596 + }, + { + "epoch": 17.55, + "learning_rate": 6.160147991543341e-06, + "loss": 0.0605, + "step": 16598 + }, + { + "epoch": 17.55, + "learning_rate": 6.1548625792811845e-06, + "loss": 0.0178, + "step": 16600 + }, + { + "epoch": 17.55, + "learning_rate": 6.149577167019028e-06, + "loss": 0.0032, + "step": 16602 + }, + { + "epoch": 17.55, + "learning_rate": 6.1442917547568715e-06, + "loss": 0.009, + "step": 16604 + }, + { + "epoch": 17.55, + "learning_rate": 6.139006342494715e-06, + "loss": 0.0005, + "step": 16606 + }, + { + "epoch": 17.56, + "learning_rate": 6.133720930232558e-06, + "loss": 0.0131, + "step": 16608 + }, + { + "epoch": 17.56, + "learning_rate": 6.128435517970402e-06, + "loss": 0.0034, + "step": 16610 + }, + { + "epoch": 17.56, + "learning_rate": 6.123150105708246e-06, + "loss": 0.0038, + "step": 16612 + }, + { + "epoch": 17.56, + "learning_rate": 6.117864693446089e-06, + "loss": 0.0088, + "step": 16614 + }, + { + "epoch": 17.56, + "learning_rate": 6.112579281183933e-06, + "loss": 0.001, + "step": 16616 + }, + { + "epoch": 17.57, + "learning_rate": 6.107293868921776e-06, + "loss": 0.0009, + "step": 16618 + }, + { + "epoch": 17.57, + "learning_rate": 6.10200845665962e-06, + "loss": 0.0175, + "step": 16620 + }, + { + "epoch": 17.57, + "learning_rate": 6.096723044397463e-06, + "loss": 0.0262, + "step": 16622 + }, + { + "epoch": 17.57, + "learning_rate": 6.0914376321353075e-06, + "loss": 0.0154, + "step": 16624 + }, + { + "epoch": 17.58, + "learning_rate": 6.08615221987315e-06, + "loss": 0.018, + "step": 16626 + }, + { + "epoch": 17.58, + "learning_rate": 6.080866807610994e-06, + "loss": 0.0041, + "step": 16628 + }, + { + "epoch": 17.58, + "learning_rate": 6.075581395348837e-06, + "loss": 0.0035, + "step": 16630 + }, + { + "epoch": 17.58, + "learning_rate": 6.070295983086681e-06, + "loss": 0.0379, + "step": 16632 + }, + { + "epoch": 17.58, + "learning_rate": 6.065010570824524e-06, + "loss": 0.0023, + "step": 16634 + }, + { + "epoch": 17.59, + "learning_rate": 6.059725158562368e-06, + "loss": 0.0011, + "step": 16636 + }, + { + "epoch": 17.59, + "learning_rate": 6.054439746300212e-06, + "loss": 0.0198, + "step": 16638 + }, + { + "epoch": 17.59, + "learning_rate": 6.049154334038056e-06, + "loss": 0.0122, + "step": 16640 + }, + { + "epoch": 17.59, + "learning_rate": 6.043868921775899e-06, + "loss": 0.0144, + "step": 16642 + }, + { + "epoch": 17.59, + "learning_rate": 6.038583509513742e-06, + "loss": 0.0008, + "step": 16644 + }, + { + "epoch": 17.6, + "learning_rate": 6.033298097251585e-06, + "loss": 0.0019, + "step": 16646 + }, + { + "epoch": 17.6, + "learning_rate": 6.028012684989429e-06, + "loss": 0.0022, + "step": 16648 + }, + { + "epoch": 17.6, + "learning_rate": 6.022727272727273e-06, + "loss": 0.0007, + "step": 16650 + }, + { + "epoch": 17.6, + "learning_rate": 6.017441860465117e-06, + "loss": 0.0312, + "step": 16652 + }, + { + "epoch": 17.6, + "learning_rate": 6.01215644820296e-06, + "loss": 0.0004, + "step": 16654 + }, + { + "epoch": 17.61, + "learning_rate": 6.006871035940804e-06, + "loss": 0.0004, + "step": 16656 + }, + { + "epoch": 17.61, + "learning_rate": 6.001585623678647e-06, + "loss": 0.0015, + "step": 16658 + }, + { + "epoch": 17.61, + "learning_rate": 5.996300211416491e-06, + "loss": 0.0029, + "step": 16660 + }, + { + "epoch": 17.61, + "learning_rate": 5.991014799154334e-06, + "loss": 0.0024, + "step": 16662 + }, + { + "epoch": 17.62, + "learning_rate": 5.985729386892178e-06, + "loss": 0.022, + "step": 16664 + }, + { + "epoch": 17.62, + "learning_rate": 5.9804439746300214e-06, + "loss": 0.0022, + "step": 16666 + }, + { + "epoch": 17.62, + "learning_rate": 5.975158562367865e-06, + "loss": 0.0008, + "step": 16668 + }, + { + "epoch": 17.62, + "learning_rate": 5.9698731501057085e-06, + "loss": 0.0008, + "step": 16670 + }, + { + "epoch": 17.62, + "learning_rate": 5.964587737843552e-06, + "loss": 0.0016, + "step": 16672 + }, + { + "epoch": 17.63, + "learning_rate": 5.9593023255813955e-06, + "loss": 0.0059, + "step": 16674 + }, + { + "epoch": 17.63, + "learning_rate": 5.95401691331924e-06, + "loss": 0.0033, + "step": 16676 + }, + { + "epoch": 17.63, + "learning_rate": 5.948731501057083e-06, + "loss": 0.0013, + "step": 16678 + }, + { + "epoch": 17.63, + "learning_rate": 5.943446088794926e-06, + "loss": 0.0049, + "step": 16680 + }, + { + "epoch": 17.63, + "learning_rate": 5.9381606765327696e-06, + "loss": 0.0126, + "step": 16682 + }, + { + "epoch": 17.64, + "learning_rate": 5.932875264270613e-06, + "loss": 0.0058, + "step": 16684 + }, + { + "epoch": 17.64, + "learning_rate": 5.927589852008457e-06, + "loss": 0.0008, + "step": 16686 + }, + { + "epoch": 17.64, + "learning_rate": 5.9223044397463e-06, + "loss": 0.0017, + "step": 16688 + }, + { + "epoch": 17.64, + "learning_rate": 5.9170190274841445e-06, + "loss": 0.003, + "step": 16690 + }, + { + "epoch": 17.64, + "learning_rate": 5.911733615221988e-06, + "loss": 0.0069, + "step": 16692 + }, + { + "epoch": 17.65, + "learning_rate": 5.9064482029598315e-06, + "loss": 0.0078, + "step": 16694 + }, + { + "epoch": 17.65, + "learning_rate": 5.901162790697674e-06, + "loss": 0.0326, + "step": 16696 + }, + { + "epoch": 17.65, + "learning_rate": 5.895877378435518e-06, + "loss": 0.0114, + "step": 16698 + }, + { + "epoch": 17.65, + "learning_rate": 5.890591966173361e-06, + "loss": 0.0008, + "step": 16700 + }, + { + "epoch": 17.66, + "learning_rate": 5.885306553911206e-06, + "loss": 0.0004, + "step": 16702 + }, + { + "epoch": 17.66, + "learning_rate": 5.880021141649049e-06, + "loss": 0.0994, + "step": 16704 + }, + { + "epoch": 17.66, + "learning_rate": 5.874735729386893e-06, + "loss": 0.0003, + "step": 16706 + }, + { + "epoch": 17.66, + "learning_rate": 5.869450317124736e-06, + "loss": 0.016, + "step": 16708 + }, + { + "epoch": 17.66, + "learning_rate": 5.86416490486258e-06, + "loss": 0.0019, + "step": 16710 + }, + { + "epoch": 17.67, + "learning_rate": 5.858879492600423e-06, + "loss": 0.0014, + "step": 16712 + }, + { + "epoch": 17.67, + "learning_rate": 5.853594080338267e-06, + "loss": 0.0013, + "step": 16714 + }, + { + "epoch": 17.67, + "learning_rate": 5.84830866807611e-06, + "loss": 0.0011, + "step": 16716 + }, + { + "epoch": 17.67, + "learning_rate": 5.843023255813954e-06, + "loss": 0.0334, + "step": 16718 + }, + { + "epoch": 17.67, + "learning_rate": 5.837737843551797e-06, + "loss": 0.0304, + "step": 16720 + }, + { + "epoch": 17.68, + "learning_rate": 5.832452431289641e-06, + "loss": 0.0167, + "step": 16722 + }, + { + "epoch": 17.68, + "learning_rate": 5.827167019027484e-06, + "loss": 0.0008, + "step": 16724 + }, + { + "epoch": 17.68, + "learning_rate": 5.821881606765328e-06, + "loss": 0.0021, + "step": 16726 + }, + { + "epoch": 17.68, + "learning_rate": 5.816596194503171e-06, + "loss": 0.0056, + "step": 16728 + }, + { + "epoch": 17.68, + "learning_rate": 5.811310782241016e-06, + "loss": 0.0006, + "step": 16730 + }, + { + "epoch": 17.69, + "learning_rate": 5.806025369978858e-06, + "loss": 0.0135, + "step": 16732 + }, + { + "epoch": 17.69, + "learning_rate": 5.800739957716702e-06, + "loss": 0.0047, + "step": 16734 + }, + { + "epoch": 17.69, + "learning_rate": 5.795454545454545e-06, + "loss": 0.0223, + "step": 16736 + }, + { + "epoch": 17.69, + "learning_rate": 5.790169133192389e-06, + "loss": 0.0008, + "step": 16738 + }, + { + "epoch": 17.7, + "learning_rate": 5.7848837209302324e-06, + "loss": 0.0048, + "step": 16740 + }, + { + "epoch": 17.7, + "learning_rate": 5.779598308668077e-06, + "loss": 0.0023, + "step": 16742 + }, + { + "epoch": 17.7, + "learning_rate": 5.77431289640592e-06, + "loss": 0.0018, + "step": 16744 + }, + { + "epoch": 17.7, + "learning_rate": 5.769027484143764e-06, + "loss": 0.0167, + "step": 16746 + }, + { + "epoch": 17.7, + "learning_rate": 5.763742071881607e-06, + "loss": 0.0047, + "step": 16748 + }, + { + "epoch": 17.71, + "learning_rate": 5.75845665961945e-06, + "loss": 0.0176, + "step": 16750 + }, + { + "epoch": 17.71, + "learning_rate": 5.7531712473572936e-06, + "loss": 0.0423, + "step": 16752 + }, + { + "epoch": 17.71, + "learning_rate": 5.747885835095138e-06, + "loss": 0.0035, + "step": 16754 + }, + { + "epoch": 17.71, + "learning_rate": 5.7426004228329814e-06, + "loss": 0.0093, + "step": 16756 + }, + { + "epoch": 17.71, + "learning_rate": 5.737315010570825e-06, + "loss": 0.0033, + "step": 16758 + }, + { + "epoch": 17.72, + "learning_rate": 5.7320295983086685e-06, + "loss": 0.0033, + "step": 16760 + }, + { + "epoch": 17.72, + "learning_rate": 5.726744186046512e-06, + "loss": 0.0085, + "step": 16762 + }, + { + "epoch": 17.72, + "learning_rate": 5.7214587737843555e-06, + "loss": 0.0017, + "step": 16764 + }, + { + "epoch": 17.72, + "learning_rate": 5.716173361522199e-06, + "loss": 0.0031, + "step": 16766 + }, + { + "epoch": 17.73, + "learning_rate": 5.7108879492600425e-06, + "loss": 0.0007, + "step": 16768 + }, + { + "epoch": 17.73, + "learning_rate": 5.705602536997886e-06, + "loss": 0.0165, + "step": 16770 + }, + { + "epoch": 17.73, + "learning_rate": 5.70031712473573e-06, + "loss": 0.0498, + "step": 16772 + }, + { + "epoch": 17.73, + "learning_rate": 5.695031712473573e-06, + "loss": 0.0039, + "step": 16774 + }, + { + "epoch": 17.73, + "learning_rate": 5.689746300211417e-06, + "loss": 0.0053, + "step": 16776 + }, + { + "epoch": 17.74, + "learning_rate": 5.68446088794926e-06, + "loss": 0.0161, + "step": 16778 + }, + { + "epoch": 17.74, + "learning_rate": 5.679175475687104e-06, + "loss": 0.0134, + "step": 16780 + }, + { + "epoch": 17.74, + "learning_rate": 5.673890063424948e-06, + "loss": 0.0224, + "step": 16782 + }, + { + "epoch": 17.74, + "learning_rate": 5.668604651162791e-06, + "loss": 0.0039, + "step": 16784 + }, + { + "epoch": 17.74, + "learning_rate": 5.663319238900634e-06, + "loss": 0.0132, + "step": 16786 + }, + { + "epoch": 17.75, + "learning_rate": 5.658033826638478e-06, + "loss": 0.0004, + "step": 16788 + }, + { + "epoch": 17.75, + "learning_rate": 5.652748414376321e-06, + "loss": 0.0033, + "step": 16790 + }, + { + "epoch": 17.75, + "learning_rate": 5.647463002114165e-06, + "loss": 0.0037, + "step": 16792 + }, + { + "epoch": 17.75, + "learning_rate": 5.642177589852009e-06, + "loss": 0.0128, + "step": 16794 + }, + { + "epoch": 17.75, + "learning_rate": 5.636892177589853e-06, + "loss": 0.009, + "step": 16796 + }, + { + "epoch": 17.76, + "learning_rate": 5.631606765327696e-06, + "loss": 0.005, + "step": 16798 + }, + { + "epoch": 17.76, + "learning_rate": 5.62632135306554e-06, + "loss": 0.0005, + "step": 16800 + }, + { + "epoch": 17.76, + "learning_rate": 5.621035940803382e-06, + "loss": 0.0268, + "step": 16802 + }, + { + "epoch": 17.76, + "learning_rate": 5.615750528541226e-06, + "loss": 0.0753, + "step": 16804 + }, + { + "epoch": 17.77, + "learning_rate": 5.61046511627907e-06, + "loss": 0.0016, + "step": 16806 + }, + { + "epoch": 17.77, + "learning_rate": 5.605179704016914e-06, + "loss": 0.009, + "step": 16808 + }, + { + "epoch": 17.77, + "learning_rate": 5.599894291754757e-06, + "loss": 0.0004, + "step": 16810 + }, + { + "epoch": 17.77, + "learning_rate": 5.594608879492601e-06, + "loss": 0.0033, + "step": 16812 + }, + { + "epoch": 17.77, + "learning_rate": 5.589323467230444e-06, + "loss": 0.0091, + "step": 16814 + }, + { + "epoch": 17.78, + "learning_rate": 5.584038054968288e-06, + "loss": 0.0008, + "step": 16816 + }, + { + "epoch": 17.78, + "learning_rate": 5.578752642706131e-06, + "loss": 0.0018, + "step": 16818 + }, + { + "epoch": 17.78, + "learning_rate": 5.573467230443975e-06, + "loss": 0.0183, + "step": 16820 + }, + { + "epoch": 17.78, + "learning_rate": 5.568181818181818e-06, + "loss": 0.0005, + "step": 16822 + }, + { + "epoch": 17.78, + "learning_rate": 5.562896405919662e-06, + "loss": 0.0205, + "step": 16824 + }, + { + "epoch": 17.79, + "learning_rate": 5.5576109936575054e-06, + "loss": 0.0003, + "step": 16826 + }, + { + "epoch": 17.79, + "learning_rate": 5.552325581395349e-06, + "loss": 0.0037, + "step": 16828 + }, + { + "epoch": 17.79, + "learning_rate": 5.5470401691331925e-06, + "loss": 0.0097, + "step": 16830 + }, + { + "epoch": 17.79, + "learning_rate": 5.541754756871036e-06, + "loss": 0.0008, + "step": 16832 + }, + { + "epoch": 17.79, + "learning_rate": 5.53646934460888e-06, + "loss": 0.0003, + "step": 16834 + }, + { + "epoch": 17.8, + "learning_rate": 5.531183932346724e-06, + "loss": 0.009, + "step": 16836 + }, + { + "epoch": 17.8, + "learning_rate": 5.5258985200845665e-06, + "loss": 0.0004, + "step": 16838 + }, + { + "epoch": 17.8, + "learning_rate": 5.52061310782241e-06, + "loss": 0.0005, + "step": 16840 + }, + { + "epoch": 17.8, + "learning_rate": 5.5153276955602536e-06, + "loss": 0.0223, + "step": 16842 + }, + { + "epoch": 17.81, + "learning_rate": 5.510042283298097e-06, + "loss": 0.0009, + "step": 16844 + }, + { + "epoch": 17.81, + "learning_rate": 5.5047568710359415e-06, + "loss": 0.003, + "step": 16846 + }, + { + "epoch": 17.81, + "learning_rate": 5.499471458773785e-06, + "loss": 0.0013, + "step": 16848 + }, + { + "epoch": 17.81, + "learning_rate": 5.4941860465116285e-06, + "loss": 0.0194, + "step": 16850 + }, + { + "epoch": 17.81, + "learning_rate": 5.488900634249472e-06, + "loss": 0.0211, + "step": 16852 + }, + { + "epoch": 17.82, + "learning_rate": 5.4836152219873155e-06, + "loss": 0.0007, + "step": 16854 + }, + { + "epoch": 17.82, + "learning_rate": 5.478329809725158e-06, + "loss": 0.0041, + "step": 16856 + }, + { + "epoch": 17.82, + "learning_rate": 5.4730443974630026e-06, + "loss": 0.0142, + "step": 16858 + }, + { + "epoch": 17.82, + "learning_rate": 5.467758985200846e-06, + "loss": 0.0151, + "step": 16860 + }, + { + "epoch": 17.82, + "learning_rate": 5.46247357293869e-06, + "loss": 0.0087, + "step": 16862 + }, + { + "epoch": 17.83, + "learning_rate": 5.457188160676533e-06, + "loss": 0.0009, + "step": 16864 + }, + { + "epoch": 17.83, + "learning_rate": 5.451902748414377e-06, + "loss": 0.0349, + "step": 16866 + }, + { + "epoch": 17.83, + "learning_rate": 5.44661733615222e-06, + "loss": 0.0041, + "step": 16868 + }, + { + "epoch": 17.83, + "learning_rate": 5.441331923890064e-06, + "loss": 0.0016, + "step": 16870 + }, + { + "epoch": 17.84, + "learning_rate": 5.436046511627907e-06, + "loss": 0.0013, + "step": 16872 + }, + { + "epoch": 17.84, + "learning_rate": 5.430761099365751e-06, + "loss": 0.0016, + "step": 16874 + }, + { + "epoch": 17.84, + "learning_rate": 5.425475687103594e-06, + "loss": 0.0088, + "step": 16876 + }, + { + "epoch": 17.84, + "learning_rate": 5.420190274841438e-06, + "loss": 0.0036, + "step": 16878 + }, + { + "epoch": 17.84, + "learning_rate": 5.414904862579281e-06, + "loss": 0.003, + "step": 16880 + }, + { + "epoch": 17.85, + "learning_rate": 5.409619450317125e-06, + "loss": 0.0217, + "step": 16882 + }, + { + "epoch": 17.85, + "learning_rate": 5.404334038054968e-06, + "loss": 0.0257, + "step": 16884 + }, + { + "epoch": 17.85, + "learning_rate": 5.399048625792813e-06, + "loss": 0.0067, + "step": 16886 + }, + { + "epoch": 17.85, + "learning_rate": 5.393763213530656e-06, + "loss": 0.0016, + "step": 16888 + }, + { + "epoch": 17.85, + "learning_rate": 5.388477801268499e-06, + "loss": 0.0016, + "step": 16890 + }, + { + "epoch": 17.86, + "learning_rate": 5.383192389006342e-06, + "loss": 0.001, + "step": 16892 + }, + { + "epoch": 17.86, + "learning_rate": 5.377906976744186e-06, + "loss": 0.0179, + "step": 16894 + }, + { + "epoch": 17.86, + "learning_rate": 5.372621564482029e-06, + "loss": 0.0012, + "step": 16896 + }, + { + "epoch": 17.86, + "learning_rate": 5.367336152219874e-06, + "loss": 0.0012, + "step": 16898 + }, + { + "epoch": 17.86, + "learning_rate": 5.362050739957717e-06, + "loss": 0.0142, + "step": 16900 + }, + { + "epoch": 17.87, + "learning_rate": 5.356765327695561e-06, + "loss": 0.0005, + "step": 16902 + }, + { + "epoch": 17.87, + "learning_rate": 5.351479915433404e-06, + "loss": 0.0021, + "step": 16904 + }, + { + "epoch": 17.87, + "learning_rate": 5.346194503171248e-06, + "loss": 0.0076, + "step": 16906 + }, + { + "epoch": 17.87, + "learning_rate": 5.3409090909090905e-06, + "loss": 0.0066, + "step": 16908 + }, + { + "epoch": 17.88, + "learning_rate": 5.335623678646935e-06, + "loss": 0.0152, + "step": 16910 + }, + { + "epoch": 17.88, + "learning_rate": 5.330338266384778e-06, + "loss": 0.0005, + "step": 16912 + }, + { + "epoch": 17.88, + "learning_rate": 5.325052854122622e-06, + "loss": 0.017, + "step": 16914 + }, + { + "epoch": 17.88, + "learning_rate": 5.3197674418604654e-06, + "loss": 0.0011, + "step": 16916 + }, + { + "epoch": 17.88, + "learning_rate": 5.314482029598309e-06, + "loss": 0.0013, + "step": 16918 + }, + { + "epoch": 17.89, + "learning_rate": 5.3091966173361525e-06, + "loss": 0.0104, + "step": 16920 + }, + { + "epoch": 17.89, + "learning_rate": 5.303911205073996e-06, + "loss": 0.0287, + "step": 16922 + }, + { + "epoch": 17.89, + "learning_rate": 5.2986257928118395e-06, + "loss": 0.0144, + "step": 16924 + }, + { + "epoch": 17.89, + "learning_rate": 5.293340380549683e-06, + "loss": 0.0144, + "step": 16926 + }, + { + "epoch": 17.89, + "learning_rate": 5.2880549682875265e-06, + "loss": 0.0034, + "step": 16928 + }, + { + "epoch": 17.9, + "learning_rate": 5.28276955602537e-06, + "loss": 0.0273, + "step": 16930 + }, + { + "epoch": 17.9, + "learning_rate": 5.277484143763214e-06, + "loss": 0.0043, + "step": 16932 + }, + { + "epoch": 17.9, + "learning_rate": 5.272198731501057e-06, + "loss": 0.0009, + "step": 16934 + }, + { + "epoch": 17.9, + "learning_rate": 5.266913319238901e-06, + "loss": 0.0027, + "step": 16936 + }, + { + "epoch": 17.9, + "learning_rate": 5.261627906976745e-06, + "loss": 0.0109, + "step": 16938 + }, + { + "epoch": 17.91, + "learning_rate": 5.2563424947145885e-06, + "loss": 0.0119, + "step": 16940 + }, + { + "epoch": 17.91, + "learning_rate": 5.251057082452432e-06, + "loss": 0.0007, + "step": 16942 + }, + { + "epoch": 17.91, + "learning_rate": 5.245771670190275e-06, + "loss": 0.0151, + "step": 16944 + }, + { + "epoch": 17.91, + "learning_rate": 5.240486257928118e-06, + "loss": 0.0134, + "step": 16946 + }, + { + "epoch": 17.92, + "learning_rate": 5.235200845665962e-06, + "loss": 0.0042, + "step": 16948 + }, + { + "epoch": 17.92, + "learning_rate": 5.229915433403806e-06, + "loss": 0.0037, + "step": 16950 + }, + { + "epoch": 17.92, + "learning_rate": 5.22463002114165e-06, + "loss": 0.002, + "step": 16952 + }, + { + "epoch": 17.92, + "learning_rate": 5.219344608879493e-06, + "loss": 0.0198, + "step": 16954 + }, + { + "epoch": 17.92, + "learning_rate": 5.214059196617337e-06, + "loss": 0.0402, + "step": 16956 + }, + { + "epoch": 17.93, + "learning_rate": 5.20877378435518e-06, + "loss": 0.0003, + "step": 16958 + }, + { + "epoch": 17.93, + "learning_rate": 5.203488372093023e-06, + "loss": 0.0013, + "step": 16960 + }, + { + "epoch": 17.93, + "learning_rate": 5.198202959830867e-06, + "loss": 0.0019, + "step": 16962 + }, + { + "epoch": 17.93, + "learning_rate": 5.192917547568711e-06, + "loss": 0.0023, + "step": 16964 + }, + { + "epoch": 17.93, + "learning_rate": 5.187632135306554e-06, + "loss": 0.0011, + "step": 16966 + }, + { + "epoch": 17.94, + "learning_rate": 5.182346723044398e-06, + "loss": 0.0008, + "step": 16968 + }, + { + "epoch": 17.94, + "learning_rate": 5.177061310782241e-06, + "loss": 0.0268, + "step": 16970 + }, + { + "epoch": 17.94, + "learning_rate": 5.171775898520085e-06, + "loss": 0.0004, + "step": 16972 + }, + { + "epoch": 17.94, + "learning_rate": 5.166490486257928e-06, + "loss": 0.0063, + "step": 16974 + }, + { + "epoch": 17.95, + "learning_rate": 5.161205073995772e-06, + "loss": 0.0004, + "step": 16976 + }, + { + "epoch": 17.95, + "learning_rate": 5.155919661733615e-06, + "loss": 0.0024, + "step": 16978 + }, + { + "epoch": 17.95, + "learning_rate": 5.150634249471459e-06, + "loss": 0.0006, + "step": 16980 + }, + { + "epoch": 17.95, + "learning_rate": 5.145348837209302e-06, + "loss": 0.0141, + "step": 16982 + }, + { + "epoch": 17.95, + "learning_rate": 5.140063424947146e-06, + "loss": 0.0014, + "step": 16984 + }, + { + "epoch": 17.96, + "learning_rate": 5.134778012684989e-06, + "loss": 0.0016, + "step": 16986 + }, + { + "epoch": 17.96, + "learning_rate": 5.129492600422833e-06, + "loss": 0.0181, + "step": 16988 + }, + { + "epoch": 17.96, + "learning_rate": 5.124207188160677e-06, + "loss": 0.0015, + "step": 16990 + }, + { + "epoch": 17.96, + "learning_rate": 5.118921775898521e-06, + "loss": 0.0016, + "step": 16992 + }, + { + "epoch": 17.96, + "learning_rate": 5.113636363636364e-06, + "loss": 0.0005, + "step": 16994 + }, + { + "epoch": 17.97, + "learning_rate": 5.108350951374207e-06, + "loss": 0.0269, + "step": 16996 + }, + { + "epoch": 17.97, + "learning_rate": 5.1030655391120505e-06, + "loss": 0.0113, + "step": 16998 + }, + { + "epoch": 17.97, + "learning_rate": 5.097780126849894e-06, + "loss": 0.0008, + "step": 17000 + }, + { + "epoch": 17.97, + "eval_cer": 0.057395269307495014, + "eval_loss": 0.671505331993103, + "eval_runtime": 126.1941, + "eval_samples_per_second": 6.664, + "eval_steps_per_second": 0.84, + "step": 17000 + }, + { + "epoch": 17.97, + "learning_rate": 5.092494714587738e-06, + "loss": 0.0013, + "step": 17002 + }, + { + "epoch": 17.97, + "learning_rate": 5.087209302325582e-06, + "loss": 0.0057, + "step": 17004 + }, + { + "epoch": 17.98, + "learning_rate": 5.0819238900634255e-06, + "loss": 0.0048, + "step": 17006 + }, + { + "epoch": 17.98, + "learning_rate": 5.076638477801269e-06, + "loss": 0.0005, + "step": 17008 + }, + { + "epoch": 17.98, + "learning_rate": 5.0713530655391125e-06, + "loss": 0.0004, + "step": 17010 + }, + { + "epoch": 17.98, + "learning_rate": 5.066067653276956e-06, + "loss": 0.0017, + "step": 17012 + }, + { + "epoch": 17.99, + "learning_rate": 5.0607822410147995e-06, + "loss": 0.0067, + "step": 17014 + }, + { + "epoch": 17.99, + "learning_rate": 5.055496828752643e-06, + "loss": 0.0015, + "step": 17016 + }, + { + "epoch": 17.99, + "learning_rate": 5.0502114164904866e-06, + "loss": 0.0008, + "step": 17018 + }, + { + "epoch": 17.99, + "learning_rate": 5.04492600422833e-06, + "loss": 0.0005, + "step": 17020 + }, + { + "epoch": 17.99, + "learning_rate": 5.039640591966174e-06, + "loss": 0.0115, + "step": 17022 + }, + { + "epoch": 18.0, + "learning_rate": 5.034355179704017e-06, + "loss": 0.0005, + "step": 17024 + }, + { + "epoch": 18.0, + "learning_rate": 5.029069767441861e-06, + "loss": 0.0053, + "step": 17026 + }, + { + "epoch": 18.0, + "learning_rate": 5.023784355179704e-06, + "loss": 0.011, + "step": 17028 + }, + { + "epoch": 18.0, + "learning_rate": 5.018498942917548e-06, + "loss": 0.0015, + "step": 17030 + }, + { + "epoch": 18.0, + "learning_rate": 5.013213530655391e-06, + "loss": 0.0004, + "step": 17032 + }, + { + "epoch": 18.01, + "learning_rate": 5.007928118393235e-06, + "loss": 0.0022, + "step": 17034 + }, + { + "epoch": 18.01, + "learning_rate": 5.002642706131078e-06, + "loss": 0.0011, + "step": 17036 + }, + { + "epoch": 18.01, + "learning_rate": 4.997357293868922e-06, + "loss": 0.001, + "step": 17038 + }, + { + "epoch": 18.01, + "learning_rate": 4.992071881606765e-06, + "loss": 0.025, + "step": 17040 + }, + { + "epoch": 18.01, + "learning_rate": 4.98678646934461e-06, + "loss": 0.011, + "step": 17042 + }, + { + "epoch": 18.02, + "learning_rate": 4.981501057082453e-06, + "loss": 0.0004, + "step": 17044 + }, + { + "epoch": 18.02, + "learning_rate": 4.976215644820297e-06, + "loss": 0.0018, + "step": 17046 + }, + { + "epoch": 18.02, + "learning_rate": 4.970930232558139e-06, + "loss": 0.0041, + "step": 17048 + }, + { + "epoch": 18.02, + "learning_rate": 4.965644820295983e-06, + "loss": 0.0008, + "step": 17050 + }, + { + "epoch": 18.03, + "learning_rate": 4.960359408033826e-06, + "loss": 0.0003, + "step": 17052 + }, + { + "epoch": 18.03, + "learning_rate": 4.955073995771671e-06, + "loss": 0.0204, + "step": 17054 + }, + { + "epoch": 18.03, + "learning_rate": 4.949788583509514e-06, + "loss": 0.0025, + "step": 17056 + }, + { + "epoch": 18.03, + "learning_rate": 4.944503171247358e-06, + "loss": 0.025, + "step": 17058 + }, + { + "epoch": 18.03, + "learning_rate": 4.939217758985201e-06, + "loss": 0.0006, + "step": 17060 + }, + { + "epoch": 18.04, + "learning_rate": 4.933932346723045e-06, + "loss": 0.0175, + "step": 17062 + }, + { + "epoch": 18.04, + "learning_rate": 4.928646934460888e-06, + "loss": 0.0113, + "step": 17064 + }, + { + "epoch": 18.04, + "learning_rate": 4.923361522198732e-06, + "loss": 0.0055, + "step": 17066 + }, + { + "epoch": 18.04, + "learning_rate": 4.918076109936575e-06, + "loss": 0.0003, + "step": 17068 + }, + { + "epoch": 18.04, + "learning_rate": 4.912790697674419e-06, + "loss": 0.0046, + "step": 17070 + }, + { + "epoch": 18.05, + "learning_rate": 4.907505285412262e-06, + "loss": 0.0007, + "step": 17072 + }, + { + "epoch": 18.05, + "learning_rate": 4.902219873150106e-06, + "loss": 0.0011, + "step": 17074 + }, + { + "epoch": 18.05, + "learning_rate": 4.8969344608879494e-06, + "loss": 0.0246, + "step": 17076 + }, + { + "epoch": 18.05, + "learning_rate": 4.891649048625793e-06, + "loss": 0.0042, + "step": 17078 + }, + { + "epoch": 18.05, + "learning_rate": 4.8863636363636365e-06, + "loss": 0.0019, + "step": 17080 + }, + { + "epoch": 18.06, + "learning_rate": 4.881078224101481e-06, + "loss": 0.0004, + "step": 17082 + }, + { + "epoch": 18.06, + "learning_rate": 4.8757928118393235e-06, + "loss": 0.028, + "step": 17084 + }, + { + "epoch": 18.06, + "learning_rate": 4.870507399577167e-06, + "loss": 0.0627, + "step": 17086 + }, + { + "epoch": 18.06, + "learning_rate": 4.8652219873150105e-06, + "loss": 0.0159, + "step": 17088 + }, + { + "epoch": 18.07, + "learning_rate": 4.859936575052854e-06, + "loss": 0.0006, + "step": 17090 + }, + { + "epoch": 18.07, + "learning_rate": 4.854651162790698e-06, + "loss": 0.0037, + "step": 17092 + }, + { + "epoch": 18.07, + "learning_rate": 4.849365750528542e-06, + "loss": 0.0007, + "step": 17094 + }, + { + "epoch": 18.07, + "learning_rate": 4.8440803382663855e-06, + "loss": 0.0038, + "step": 17096 + }, + { + "epoch": 18.07, + "learning_rate": 4.838794926004229e-06, + "loss": 0.0006, + "step": 17098 + }, + { + "epoch": 18.08, + "learning_rate": 4.8335095137420725e-06, + "loss": 0.0147, + "step": 17100 + }, + { + "epoch": 18.08, + "learning_rate": 4.828224101479915e-06, + "loss": 0.0053, + "step": 17102 + }, + { + "epoch": 18.08, + "learning_rate": 4.822938689217759e-06, + "loss": 0.0038, + "step": 17104 + }, + { + "epoch": 18.08, + "learning_rate": 4.817653276955603e-06, + "loss": 0.0136, + "step": 17106 + }, + { + "epoch": 18.08, + "learning_rate": 4.812367864693447e-06, + "loss": 0.0513, + "step": 17108 + }, + { + "epoch": 18.09, + "learning_rate": 4.80708245243129e-06, + "loss": 0.0018, + "step": 17110 + }, + { + "epoch": 18.09, + "learning_rate": 4.801797040169134e-06, + "loss": 0.0075, + "step": 17112 + }, + { + "epoch": 18.09, + "learning_rate": 4.796511627906977e-06, + "loss": 0.0059, + "step": 17114 + }, + { + "epoch": 18.09, + "learning_rate": 4.791226215644821e-06, + "loss": 0.0047, + "step": 17116 + }, + { + "epoch": 18.1, + "learning_rate": 4.785940803382663e-06, + "loss": 0.0012, + "step": 17118 + }, + { + "epoch": 18.1, + "learning_rate": 4.780655391120508e-06, + "loss": 0.0162, + "step": 17120 + }, + { + "epoch": 18.1, + "learning_rate": 4.775369978858351e-06, + "loss": 0.0011, + "step": 17122 + }, + { + "epoch": 18.1, + "learning_rate": 4.770084566596195e-06, + "loss": 0.0051, + "step": 17124 + }, + { + "epoch": 18.1, + "learning_rate": 4.764799154334038e-06, + "loss": 0.0012, + "step": 17126 + }, + { + "epoch": 18.11, + "learning_rate": 4.759513742071882e-06, + "loss": 0.0008, + "step": 17128 + }, + { + "epoch": 18.11, + "learning_rate": 4.754228329809725e-06, + "loss": 0.0004, + "step": 17130 + }, + { + "epoch": 18.11, + "learning_rate": 4.748942917547569e-06, + "loss": 0.0211, + "step": 17132 + }, + { + "epoch": 18.11, + "learning_rate": 4.743657505285413e-06, + "loss": 0.0265, + "step": 17134 + }, + { + "epoch": 18.11, + "learning_rate": 4.738372093023256e-06, + "loss": 0.0032, + "step": 17136 + }, + { + "epoch": 18.12, + "learning_rate": 4.733086680761099e-06, + "loss": 0.0003, + "step": 17138 + }, + { + "epoch": 18.12, + "learning_rate": 4.727801268498943e-06, + "loss": 0.01, + "step": 17140 + }, + { + "epoch": 18.12, + "learning_rate": 4.722515856236786e-06, + "loss": 0.0005, + "step": 17142 + }, + { + "epoch": 18.12, + "learning_rate": 4.71723044397463e-06, + "loss": 0.0007, + "step": 17144 + }, + { + "epoch": 18.12, + "learning_rate": 4.711945031712474e-06, + "loss": 0.0078, + "step": 17146 + }, + { + "epoch": 18.13, + "learning_rate": 4.706659619450318e-06, + "loss": 0.0009, + "step": 17148 + }, + { + "epoch": 18.13, + "learning_rate": 4.701374207188161e-06, + "loss": 0.0004, + "step": 17150 + }, + { + "epoch": 18.13, + "learning_rate": 4.696088794926005e-06, + "loss": 0.0005, + "step": 17152 + }, + { + "epoch": 18.13, + "learning_rate": 4.6908033826638475e-06, + "loss": 0.0021, + "step": 17154 + }, + { + "epoch": 18.14, + "learning_rate": 4.685517970401691e-06, + "loss": 0.002, + "step": 17156 + }, + { + "epoch": 18.14, + "learning_rate": 4.6802325581395345e-06, + "loss": 0.0031, + "step": 17158 + }, + { + "epoch": 18.14, + "learning_rate": 4.674947145877379e-06, + "loss": 0.0044, + "step": 17160 + }, + { + "epoch": 18.14, + "learning_rate": 4.669661733615222e-06, + "loss": 0.0011, + "step": 17162 + }, + { + "epoch": 18.14, + "learning_rate": 4.664376321353066e-06, + "loss": 0.004, + "step": 17164 + }, + { + "epoch": 18.15, + "learning_rate": 4.6590909090909095e-06, + "loss": 0.0062, + "step": 17166 + }, + { + "epoch": 18.15, + "learning_rate": 4.653805496828753e-06, + "loss": 0.0312, + "step": 17168 + }, + { + "epoch": 18.15, + "learning_rate": 4.6485200845665965e-06, + "loss": 0.0007, + "step": 17170 + }, + { + "epoch": 18.15, + "learning_rate": 4.64323467230444e-06, + "loss": 0.0013, + "step": 17172 + }, + { + "epoch": 18.15, + "learning_rate": 4.6379492600422835e-06, + "loss": 0.0014, + "step": 17174 + }, + { + "epoch": 18.16, + "learning_rate": 4.632663847780127e-06, + "loss": 0.0465, + "step": 17176 + }, + { + "epoch": 18.16, + "learning_rate": 4.6273784355179706e-06, + "loss": 0.0061, + "step": 17178 + }, + { + "epoch": 18.16, + "learning_rate": 4.622093023255814e-06, + "loss": 0.0014, + "step": 17180 + }, + { + "epoch": 18.16, + "learning_rate": 4.616807610993658e-06, + "loss": 0.0049, + "step": 17182 + }, + { + "epoch": 18.16, + "learning_rate": 4.611522198731501e-06, + "loss": 0.0227, + "step": 17184 + }, + { + "epoch": 18.17, + "learning_rate": 4.6062367864693455e-06, + "loss": 0.0047, + "step": 17186 + }, + { + "epoch": 18.17, + "learning_rate": 4.600951374207189e-06, + "loss": 0.0009, + "step": 17188 + }, + { + "epoch": 18.17, + "learning_rate": 4.595665961945032e-06, + "loss": 0.0007, + "step": 17190 + }, + { + "epoch": 18.17, + "learning_rate": 4.590380549682875e-06, + "loss": 0.0038, + "step": 17192 + }, + { + "epoch": 18.18, + "learning_rate": 4.585095137420719e-06, + "loss": 0.0075, + "step": 17194 + }, + { + "epoch": 18.18, + "learning_rate": 4.579809725158562e-06, + "loss": 0.001, + "step": 17196 + }, + { + "epoch": 18.18, + "learning_rate": 4.574524312896407e-06, + "loss": 0.0007, + "step": 17198 + }, + { + "epoch": 18.18, + "learning_rate": 4.56923890063425e-06, + "loss": 0.0105, + "step": 17200 + }, + { + "epoch": 18.18, + "learning_rate": 4.563953488372094e-06, + "loss": 0.0005, + "step": 17202 + }, + { + "epoch": 18.19, + "learning_rate": 4.558668076109937e-06, + "loss": 0.0021, + "step": 17204 + }, + { + "epoch": 18.19, + "learning_rate": 4.55338266384778e-06, + "loss": 0.001, + "step": 17206 + }, + { + "epoch": 18.19, + "learning_rate": 4.548097251585623e-06, + "loss": 0.0008, + "step": 17208 + }, + { + "epoch": 18.19, + "learning_rate": 4.542811839323467e-06, + "loss": 0.0007, + "step": 17210 + }, + { + "epoch": 18.19, + "learning_rate": 4.537526427061311e-06, + "loss": 0.0013, + "step": 17212 + }, + { + "epoch": 18.2, + "learning_rate": 4.532241014799155e-06, + "loss": 0.0004, + "step": 17214 + }, + { + "epoch": 18.2, + "learning_rate": 4.526955602536998e-06, + "loss": 0.004, + "step": 17216 + }, + { + "epoch": 18.2, + "learning_rate": 4.521670190274842e-06, + "loss": 0.0193, + "step": 17218 + }, + { + "epoch": 18.2, + "learning_rate": 4.516384778012685e-06, + "loss": 0.002, + "step": 17220 + }, + { + "epoch": 18.21, + "learning_rate": 4.511099365750529e-06, + "loss": 0.0038, + "step": 17222 + }, + { + "epoch": 18.21, + "learning_rate": 4.505813953488372e-06, + "loss": 0.0089, + "step": 17224 + }, + { + "epoch": 18.21, + "learning_rate": 4.500528541226216e-06, + "loss": 0.0007, + "step": 17226 + }, + { + "epoch": 18.21, + "learning_rate": 4.495243128964059e-06, + "loss": 0.0006, + "step": 17228 + }, + { + "epoch": 18.21, + "learning_rate": 4.489957716701903e-06, + "loss": 0.0071, + "step": 17230 + }, + { + "epoch": 18.22, + "learning_rate": 4.484672304439746e-06, + "loss": 0.0081, + "step": 17232 + }, + { + "epoch": 18.22, + "learning_rate": 4.47938689217759e-06, + "loss": 0.0114, + "step": 17234 + }, + { + "epoch": 18.22, + "learning_rate": 4.4741014799154334e-06, + "loss": 0.0005, + "step": 17236 + }, + { + "epoch": 18.22, + "learning_rate": 4.468816067653278e-06, + "loss": 0.0018, + "step": 17238 + }, + { + "epoch": 18.22, + "learning_rate": 4.463530655391121e-06, + "loss": 0.0116, + "step": 17240 + }, + { + "epoch": 18.23, + "learning_rate": 4.458245243128964e-06, + "loss": 0.001, + "step": 17242 + }, + { + "epoch": 18.23, + "learning_rate": 4.4529598308668075e-06, + "loss": 0.0005, + "step": 17244 + }, + { + "epoch": 18.23, + "learning_rate": 4.447674418604651e-06, + "loss": 0.0117, + "step": 17246 + }, + { + "epoch": 18.23, + "learning_rate": 4.4423890063424945e-06, + "loss": 0.0073, + "step": 17248 + }, + { + "epoch": 18.23, + "learning_rate": 4.437103594080338e-06, + "loss": 0.0003, + "step": 17250 + }, + { + "epoch": 18.24, + "learning_rate": 4.4318181818181824e-06, + "loss": 0.0015, + "step": 17252 + }, + { + "epoch": 18.24, + "learning_rate": 4.426532769556026e-06, + "loss": 0.0032, + "step": 17254 + }, + { + "epoch": 18.24, + "learning_rate": 4.4212473572938695e-06, + "loss": 0.0025, + "step": 17256 + }, + { + "epoch": 18.24, + "learning_rate": 4.415961945031713e-06, + "loss": 0.0003, + "step": 17258 + }, + { + "epoch": 18.25, + "learning_rate": 4.410676532769556e-06, + "loss": 0.0003, + "step": 17260 + }, + { + "epoch": 18.25, + "learning_rate": 4.405391120507399e-06, + "loss": 0.0066, + "step": 17262 + }, + { + "epoch": 18.25, + "learning_rate": 4.4001057082452435e-06, + "loss": 0.0071, + "step": 17264 + }, + { + "epoch": 18.25, + "learning_rate": 4.394820295983087e-06, + "loss": 0.0018, + "step": 17266 + }, + { + "epoch": 18.25, + "learning_rate": 4.389534883720931e-06, + "loss": 0.0005, + "step": 17268 + }, + { + "epoch": 18.26, + "learning_rate": 4.384249471458774e-06, + "loss": 0.0029, + "step": 17270 + }, + { + "epoch": 18.26, + "learning_rate": 4.378964059196618e-06, + "loss": 0.0448, + "step": 17272 + }, + { + "epoch": 18.26, + "learning_rate": 4.373678646934461e-06, + "loss": 0.0132, + "step": 17274 + }, + { + "epoch": 18.26, + "learning_rate": 4.368393234672305e-06, + "loss": 0.0203, + "step": 17276 + }, + { + "epoch": 18.26, + "learning_rate": 4.363107822410148e-06, + "loss": 0.0503, + "step": 17278 + }, + { + "epoch": 18.27, + "learning_rate": 4.357822410147992e-06, + "loss": 0.0411, + "step": 17280 + }, + { + "epoch": 18.27, + "learning_rate": 4.352536997885835e-06, + "loss": 0.0203, + "step": 17282 + }, + { + "epoch": 18.27, + "learning_rate": 4.347251585623679e-06, + "loss": 0.0053, + "step": 17284 + }, + { + "epoch": 18.27, + "learning_rate": 4.341966173361522e-06, + "loss": 0.0148, + "step": 17286 + }, + { + "epoch": 18.27, + "learning_rate": 4.336680761099366e-06, + "loss": 0.016, + "step": 17288 + }, + { + "epoch": 18.28, + "learning_rate": 4.33139534883721e-06, + "loss": 0.0002, + "step": 17290 + }, + { + "epoch": 18.28, + "learning_rate": 4.326109936575054e-06, + "loss": 0.0004, + "step": 17292 + }, + { + "epoch": 18.28, + "learning_rate": 4.320824524312896e-06, + "loss": 0.0252, + "step": 17294 + }, + { + "epoch": 18.28, + "learning_rate": 4.31553911205074e-06, + "loss": 0.0075, + "step": 17296 + }, + { + "epoch": 18.29, + "learning_rate": 4.310253699788583e-06, + "loss": 0.0197, + "step": 17298 + }, + { + "epoch": 18.29, + "learning_rate": 4.304968287526427e-06, + "loss": 0.0009, + "step": 17300 + }, + { + "epoch": 18.29, + "learning_rate": 4.29968287526427e-06, + "loss": 0.0005, + "step": 17302 + }, + { + "epoch": 18.29, + "learning_rate": 4.294397463002115e-06, + "loss": 0.001, + "step": 17304 + }, + { + "epoch": 18.29, + "learning_rate": 4.289112050739958e-06, + "loss": 0.0006, + "step": 17306 + }, + { + "epoch": 18.3, + "learning_rate": 4.283826638477802e-06, + "loss": 0.0085, + "step": 17308 + }, + { + "epoch": 18.3, + "learning_rate": 4.278541226215645e-06, + "loss": 0.0671, + "step": 17310 + }, + { + "epoch": 18.3, + "learning_rate": 4.273255813953488e-06, + "loss": 0.0034, + "step": 17312 + }, + { + "epoch": 18.3, + "learning_rate": 4.2679704016913315e-06, + "loss": 0.0012, + "step": 17314 + }, + { + "epoch": 18.3, + "learning_rate": 4.262684989429176e-06, + "loss": 0.0012, + "step": 17316 + }, + { + "epoch": 18.31, + "learning_rate": 4.257399577167019e-06, + "loss": 0.0005, + "step": 17318 + }, + { + "epoch": 18.31, + "learning_rate": 4.252114164904863e-06, + "loss": 0.0085, + "step": 17320 + }, + { + "epoch": 18.31, + "learning_rate": 4.246828752642706e-06, + "loss": 0.0024, + "step": 17322 + }, + { + "epoch": 18.31, + "learning_rate": 4.24154334038055e-06, + "loss": 0.0004, + "step": 17324 + }, + { + "epoch": 18.32, + "learning_rate": 4.2362579281183935e-06, + "loss": 0.0435, + "step": 17326 + }, + { + "epoch": 18.32, + "learning_rate": 4.230972515856237e-06, + "loss": 0.0032, + "step": 17328 + }, + { + "epoch": 18.32, + "learning_rate": 4.2256871035940805e-06, + "loss": 0.0005, + "step": 17330 + }, + { + "epoch": 18.32, + "learning_rate": 4.220401691331924e-06, + "loss": 0.0004, + "step": 17332 + }, + { + "epoch": 18.32, + "learning_rate": 4.2151162790697675e-06, + "loss": 0.0007, + "step": 17334 + }, + { + "epoch": 18.33, + "learning_rate": 4.209830866807611e-06, + "loss": 0.0007, + "step": 17336 + }, + { + "epoch": 18.33, + "learning_rate": 4.2045454545454546e-06, + "loss": 0.0017, + "step": 17338 + }, + { + "epoch": 18.33, + "learning_rate": 4.199260042283298e-06, + "loss": 0.0008, + "step": 17340 + }, + { + "epoch": 18.33, + "learning_rate": 4.193974630021142e-06, + "loss": 0.0004, + "step": 17342 + }, + { + "epoch": 18.33, + "learning_rate": 4.188689217758986e-06, + "loss": 0.0133, + "step": 17344 + }, + { + "epoch": 18.34, + "learning_rate": 4.1834038054968295e-06, + "loss": 0.0081, + "step": 17346 + }, + { + "epoch": 18.34, + "learning_rate": 4.178118393234672e-06, + "loss": 0.0019, + "step": 17348 + }, + { + "epoch": 18.34, + "learning_rate": 4.172832980972516e-06, + "loss": 0.0026, + "step": 17350 + }, + { + "epoch": 18.34, + "learning_rate": 4.167547568710359e-06, + "loss": 0.0139, + "step": 17352 + }, + { + "epoch": 18.34, + "learning_rate": 4.162262156448203e-06, + "loss": 0.0442, + "step": 17354 + }, + { + "epoch": 18.35, + "learning_rate": 4.156976744186047e-06, + "loss": 0.0009, + "step": 17356 + }, + { + "epoch": 18.35, + "learning_rate": 4.151691331923891e-06, + "loss": 0.0009, + "step": 17358 + }, + { + "epoch": 18.35, + "learning_rate": 4.146405919661734e-06, + "loss": 0.0005, + "step": 17360 + }, + { + "epoch": 18.35, + "learning_rate": 4.141120507399578e-06, + "loss": 0.0018, + "step": 17362 + }, + { + "epoch": 18.36, + "learning_rate": 4.135835095137421e-06, + "loss": 0.0144, + "step": 17364 + }, + { + "epoch": 18.36, + "learning_rate": 4.130549682875264e-06, + "loss": 0.0023, + "step": 17366 + }, + { + "epoch": 18.36, + "learning_rate": 4.125264270613108e-06, + "loss": 0.0035, + "step": 17368 + }, + { + "epoch": 18.36, + "learning_rate": 4.119978858350952e-06, + "loss": 0.0004, + "step": 17370 + }, + { + "epoch": 18.36, + "learning_rate": 4.114693446088795e-06, + "loss": 0.0549, + "step": 17372 + }, + { + "epoch": 18.37, + "learning_rate": 4.109408033826639e-06, + "loss": 0.0007, + "step": 17374 + }, + { + "epoch": 18.37, + "learning_rate": 4.104122621564482e-06, + "loss": 0.006, + "step": 17376 + }, + { + "epoch": 18.37, + "learning_rate": 4.098837209302326e-06, + "loss": 0.0004, + "step": 17378 + }, + { + "epoch": 18.37, + "learning_rate": 4.093551797040169e-06, + "loss": 0.003, + "step": 17380 + }, + { + "epoch": 18.37, + "learning_rate": 4.088266384778013e-06, + "loss": 0.0023, + "step": 17382 + }, + { + "epoch": 18.38, + "learning_rate": 4.082980972515856e-06, + "loss": 0.0006, + "step": 17384 + }, + { + "epoch": 18.38, + "learning_rate": 4.0776955602537e-06, + "loss": 0.0006, + "step": 17386 + }, + { + "epoch": 18.38, + "learning_rate": 4.072410147991543e-06, + "loss": 0.0074, + "step": 17388 + }, + { + "epoch": 18.38, + "learning_rate": 4.067124735729387e-06, + "loss": 0.0004, + "step": 17390 + }, + { + "epoch": 18.38, + "learning_rate": 4.06183932346723e-06, + "loss": 0.0004, + "step": 17392 + }, + { + "epoch": 18.39, + "learning_rate": 4.056553911205074e-06, + "loss": 0.0005, + "step": 17394 + }, + { + "epoch": 18.39, + "learning_rate": 4.051268498942918e-06, + "loss": 0.0007, + "step": 17396 + }, + { + "epoch": 18.39, + "learning_rate": 4.045983086680762e-06, + "loss": 0.0033, + "step": 17398 + }, + { + "epoch": 18.39, + "learning_rate": 4.0406976744186045e-06, + "loss": 0.0014, + "step": 17400 + }, + { + "epoch": 18.4, + "learning_rate": 4.035412262156448e-06, + "loss": 0.0006, + "step": 17402 + }, + { + "epoch": 18.4, + "learning_rate": 4.0301268498942915e-06, + "loss": 0.0051, + "step": 17404 + }, + { + "epoch": 18.4, + "learning_rate": 4.024841437632135e-06, + "loss": 0.0025, + "step": 17406 + }, + { + "epoch": 18.4, + "learning_rate": 4.019556025369979e-06, + "loss": 0.0002, + "step": 17408 + }, + { + "epoch": 18.4, + "learning_rate": 4.014270613107823e-06, + "loss": 0.0013, + "step": 17410 + }, + { + "epoch": 18.41, + "learning_rate": 4.0089852008456664e-06, + "loss": 0.0169, + "step": 17412 + }, + { + "epoch": 18.41, + "learning_rate": 4.00369978858351e-06, + "loss": 0.0014, + "step": 17414 + }, + { + "epoch": 18.41, + "learning_rate": 3.9984143763213535e-06, + "loss": 0.0007, + "step": 17416 + }, + { + "epoch": 18.41, + "learning_rate": 3.993128964059196e-06, + "loss": 0.0175, + "step": 17418 + }, + { + "epoch": 18.41, + "learning_rate": 3.9878435517970405e-06, + "loss": 0.0028, + "step": 17420 + }, + { + "epoch": 18.42, + "learning_rate": 3.982558139534884e-06, + "loss": 0.0155, + "step": 17422 + }, + { + "epoch": 18.42, + "learning_rate": 3.9772727272727275e-06, + "loss": 0.0009, + "step": 17424 + }, + { + "epoch": 18.42, + "learning_rate": 3.971987315010571e-06, + "loss": 0.0016, + "step": 17426 + }, + { + "epoch": 18.42, + "learning_rate": 3.966701902748415e-06, + "loss": 0.0008, + "step": 17428 + }, + { + "epoch": 18.42, + "learning_rate": 3.961416490486258e-06, + "loss": 0.0006, + "step": 17430 + }, + { + "epoch": 18.43, + "learning_rate": 3.956131078224102e-06, + "loss": 0.0017, + "step": 17432 + }, + { + "epoch": 18.43, + "learning_rate": 3.950845665961945e-06, + "loss": 0.0009, + "step": 17434 + }, + { + "epoch": 18.43, + "learning_rate": 3.945560253699789e-06, + "loss": 0.0063, + "step": 17436 + }, + { + "epoch": 18.43, + "learning_rate": 3.940274841437632e-06, + "loss": 0.0134, + "step": 17438 + }, + { + "epoch": 18.44, + "learning_rate": 3.934989429175476e-06, + "loss": 0.0009, + "step": 17440 + }, + { + "epoch": 18.44, + "learning_rate": 3.929704016913319e-06, + "loss": 0.0045, + "step": 17442 + }, + { + "epoch": 18.44, + "learning_rate": 3.924418604651163e-06, + "loss": 0.0083, + "step": 17444 + }, + { + "epoch": 18.44, + "learning_rate": 3.919133192389006e-06, + "loss": 0.0052, + "step": 17446 + }, + { + "epoch": 18.44, + "learning_rate": 3.913847780126851e-06, + "loss": 0.0009, + "step": 17448 + }, + { + "epoch": 18.45, + "learning_rate": 3.908562367864694e-06, + "loss": 0.0041, + "step": 17450 + }, + { + "epoch": 18.45, + "learning_rate": 3.903276955602537e-06, + "loss": 0.0017, + "step": 17452 + }, + { + "epoch": 18.45, + "learning_rate": 3.89799154334038e-06, + "loss": 0.0007, + "step": 17454 + }, + { + "epoch": 18.45, + "learning_rate": 3.892706131078224e-06, + "loss": 0.0005, + "step": 17456 + }, + { + "epoch": 18.45, + "learning_rate": 3.887420718816067e-06, + "loss": 0.0188, + "step": 17458 + }, + { + "epoch": 18.46, + "learning_rate": 3.882135306553912e-06, + "loss": 0.0023, + "step": 17460 + }, + { + "epoch": 18.46, + "learning_rate": 3.876849894291755e-06, + "loss": 0.0008, + "step": 17462 + }, + { + "epoch": 18.46, + "learning_rate": 3.871564482029599e-06, + "loss": 0.0056, + "step": 17464 + }, + { + "epoch": 18.46, + "learning_rate": 3.866279069767442e-06, + "loss": 0.0029, + "step": 17466 + }, + { + "epoch": 18.47, + "learning_rate": 3.860993657505286e-06, + "loss": 0.0027, + "step": 17468 + }, + { + "epoch": 18.47, + "learning_rate": 3.8557082452431285e-06, + "loss": 0.0302, + "step": 17470 + }, + { + "epoch": 18.47, + "learning_rate": 3.850422832980973e-06, + "loss": 0.0005, + "step": 17472 + }, + { + "epoch": 18.47, + "learning_rate": 3.845137420718816e-06, + "loss": 0.0003, + "step": 17474 + }, + { + "epoch": 18.47, + "learning_rate": 3.83985200845666e-06, + "loss": 0.0004, + "step": 17476 + }, + { + "epoch": 18.48, + "learning_rate": 3.834566596194503e-06, + "loss": 0.0151, + "step": 17478 + }, + { + "epoch": 18.48, + "learning_rate": 3.829281183932347e-06, + "loss": 0.0004, + "step": 17480 + }, + { + "epoch": 18.48, + "learning_rate": 3.82399577167019e-06, + "loss": 0.0031, + "step": 17482 + }, + { + "epoch": 18.48, + "learning_rate": 3.818710359408034e-06, + "loss": 0.0012, + "step": 17484 + }, + { + "epoch": 18.48, + "learning_rate": 3.813424947145878e-06, + "loss": 0.0023, + "step": 17486 + }, + { + "epoch": 18.49, + "learning_rate": 3.808139534883721e-06, + "loss": 0.0004, + "step": 17488 + }, + { + "epoch": 18.49, + "learning_rate": 3.8028541226215645e-06, + "loss": 0.0143, + "step": 17490 + }, + { + "epoch": 18.49, + "learning_rate": 3.797568710359408e-06, + "loss": 0.0003, + "step": 17492 + }, + { + "epoch": 18.49, + "learning_rate": 3.7922832980972515e-06, + "loss": 0.0016, + "step": 17494 + }, + { + "epoch": 18.49, + "learning_rate": 3.7869978858350955e-06, + "loss": 0.0011, + "step": 17496 + }, + { + "epoch": 18.5, + "learning_rate": 3.781712473572939e-06, + "loss": 0.074, + "step": 17498 + }, + { + "epoch": 18.5, + "learning_rate": 3.7764270613107825e-06, + "loss": 0.0004, + "step": 17500 + }, + { + "epoch": 18.5, + "eval_cer": 0.06377885437446566, + "eval_loss": 0.9183065891265869, + "eval_runtime": 124.5586, + "eval_samples_per_second": 6.752, + "eval_steps_per_second": 0.851, + "step": 17500 + }, + { + "epoch": 18.5, + "learning_rate": 3.771141649048626e-06, + "loss": 0.0004, + "step": 17502 + }, + { + "epoch": 18.5, + "learning_rate": 3.76585623678647e-06, + "loss": 0.001, + "step": 17504 + }, + { + "epoch": 18.51, + "learning_rate": 3.7605708245243126e-06, + "loss": 0.0004, + "step": 17506 + }, + { + "epoch": 18.51, + "learning_rate": 3.7552854122621566e-06, + "loss": 0.0011, + "step": 17508 + }, + { + "epoch": 18.51, + "learning_rate": 3.75e-06, + "loss": 0.0004, + "step": 17510 + }, + { + "epoch": 18.51, + "learning_rate": 3.7447145877378436e-06, + "loss": 0.0005, + "step": 17512 + }, + { + "epoch": 18.51, + "learning_rate": 3.739429175475687e-06, + "loss": 0.0184, + "step": 17514 + }, + { + "epoch": 18.52, + "learning_rate": 3.734143763213531e-06, + "loss": 0.0004, + "step": 17516 + }, + { + "epoch": 18.52, + "learning_rate": 3.7288583509513746e-06, + "loss": 0.0003, + "step": 17518 + }, + { + "epoch": 18.52, + "learning_rate": 3.723572938689218e-06, + "loss": 0.0005, + "step": 17520 + }, + { + "epoch": 18.52, + "learning_rate": 3.7182875264270616e-06, + "loss": 0.0016, + "step": 17522 + }, + { + "epoch": 18.52, + "learning_rate": 3.7130021141649047e-06, + "loss": 0.0025, + "step": 17524 + }, + { + "epoch": 18.53, + "learning_rate": 3.7077167019027482e-06, + "loss": 0.0025, + "step": 17526 + }, + { + "epoch": 18.53, + "learning_rate": 3.702431289640592e-06, + "loss": 0.0006, + "step": 17528 + }, + { + "epoch": 18.53, + "learning_rate": 3.6971458773784357e-06, + "loss": 0.0008, + "step": 17530 + }, + { + "epoch": 18.53, + "learning_rate": 3.6918604651162792e-06, + "loss": 0.0327, + "step": 17532 + }, + { + "epoch": 18.53, + "learning_rate": 3.6865750528541227e-06, + "loss": 0.0004, + "step": 17534 + }, + { + "epoch": 18.54, + "learning_rate": 3.6812896405919667e-06, + "loss": 0.0006, + "step": 17536 + }, + { + "epoch": 18.54, + "learning_rate": 3.67600422832981e-06, + "loss": 0.0035, + "step": 17538 + }, + { + "epoch": 18.54, + "learning_rate": 3.6707188160676533e-06, + "loss": 0.001, + "step": 17540 + }, + { + "epoch": 18.54, + "learning_rate": 3.665433403805497e-06, + "loss": 0.0108, + "step": 17542 + }, + { + "epoch": 18.55, + "learning_rate": 3.6601479915433403e-06, + "loss": 0.0286, + "step": 17544 + }, + { + "epoch": 18.55, + "learning_rate": 3.654862579281184e-06, + "loss": 0.0024, + "step": 17546 + }, + { + "epoch": 18.55, + "learning_rate": 3.649577167019028e-06, + "loss": 0.0091, + "step": 17548 + }, + { + "epoch": 18.55, + "learning_rate": 3.6442917547568713e-06, + "loss": 0.0008, + "step": 17550 + }, + { + "epoch": 18.55, + "learning_rate": 3.639006342494715e-06, + "loss": 0.0015, + "step": 17552 + }, + { + "epoch": 18.56, + "learning_rate": 3.6337209302325583e-06, + "loss": 0.0009, + "step": 17554 + }, + { + "epoch": 18.56, + "learning_rate": 3.6284355179704023e-06, + "loss": 0.001, + "step": 17556 + }, + { + "epoch": 18.56, + "learning_rate": 3.623150105708245e-06, + "loss": 0.0136, + "step": 17558 + }, + { + "epoch": 18.56, + "learning_rate": 3.617864693446089e-06, + "loss": 0.0007, + "step": 17560 + }, + { + "epoch": 18.56, + "learning_rate": 3.6125792811839324e-06, + "loss": 0.0004, + "step": 17562 + }, + { + "epoch": 18.57, + "learning_rate": 3.607293868921776e-06, + "loss": 0.0346, + "step": 17564 + }, + { + "epoch": 18.57, + "learning_rate": 3.6020084566596195e-06, + "loss": 0.0172, + "step": 17566 + }, + { + "epoch": 18.57, + "learning_rate": 3.5967230443974634e-06, + "loss": 0.0003, + "step": 17568 + }, + { + "epoch": 18.57, + "learning_rate": 3.591437632135307e-06, + "loss": 0.0003, + "step": 17570 + }, + { + "epoch": 18.58, + "learning_rate": 3.5861522198731504e-06, + "loss": 0.0004, + "step": 17572 + }, + { + "epoch": 18.58, + "learning_rate": 3.580866807610994e-06, + "loss": 0.0006, + "step": 17574 + }, + { + "epoch": 18.58, + "learning_rate": 3.575581395348837e-06, + "loss": 0.0066, + "step": 17576 + }, + { + "epoch": 18.58, + "learning_rate": 3.5702959830866806e-06, + "loss": 0.001, + "step": 17578 + }, + { + "epoch": 18.58, + "learning_rate": 3.5650105708245245e-06, + "loss": 0.0041, + "step": 17580 + }, + { + "epoch": 18.59, + "learning_rate": 3.559725158562368e-06, + "loss": 0.0005, + "step": 17582 + }, + { + "epoch": 18.59, + "learning_rate": 3.5544397463002115e-06, + "loss": 0.0005, + "step": 17584 + }, + { + "epoch": 18.59, + "learning_rate": 3.549154334038055e-06, + "loss": 0.0006, + "step": 17586 + }, + { + "epoch": 18.59, + "learning_rate": 3.543868921775899e-06, + "loss": 0.0005, + "step": 17588 + }, + { + "epoch": 18.59, + "learning_rate": 3.5385835095137425e-06, + "loss": 0.0015, + "step": 17590 + }, + { + "epoch": 18.6, + "learning_rate": 3.533298097251586e-06, + "loss": 0.0029, + "step": 17592 + }, + { + "epoch": 18.6, + "learning_rate": 3.528012684989429e-06, + "loss": 0.0004, + "step": 17594 + }, + { + "epoch": 18.6, + "learning_rate": 3.5227272727272726e-06, + "loss": 0.002, + "step": 17596 + }, + { + "epoch": 18.6, + "learning_rate": 3.517441860465116e-06, + "loss": 0.0069, + "step": 17598 + }, + { + "epoch": 18.6, + "learning_rate": 3.51215644820296e-06, + "loss": 0.0047, + "step": 17600 + }, + { + "epoch": 18.61, + "learning_rate": 3.5068710359408036e-06, + "loss": 0.0003, + "step": 17602 + }, + { + "epoch": 18.61, + "learning_rate": 3.501585623678647e-06, + "loss": 0.0008, + "step": 17604 + }, + { + "epoch": 18.61, + "learning_rate": 3.4963002114164907e-06, + "loss": 0.0004, + "step": 17606 + }, + { + "epoch": 18.61, + "learning_rate": 3.4910147991543346e-06, + "loss": 0.0092, + "step": 17608 + }, + { + "epoch": 18.62, + "learning_rate": 3.485729386892178e-06, + "loss": 0.0017, + "step": 17610 + }, + { + "epoch": 18.62, + "learning_rate": 3.4804439746300212e-06, + "loss": 0.0029, + "step": 17612 + }, + { + "epoch": 18.62, + "learning_rate": 3.4751585623678647e-06, + "loss": 0.001, + "step": 17614 + }, + { + "epoch": 18.62, + "learning_rate": 3.4698731501057083e-06, + "loss": 0.0018, + "step": 17616 + }, + { + "epoch": 18.62, + "learning_rate": 3.4645877378435518e-06, + "loss": 0.0067, + "step": 17618 + }, + { + "epoch": 18.63, + "learning_rate": 3.4593023255813957e-06, + "loss": 0.0005, + "step": 17620 + }, + { + "epoch": 18.63, + "learning_rate": 3.4540169133192392e-06, + "loss": 0.0138, + "step": 17622 + }, + { + "epoch": 18.63, + "learning_rate": 3.4487315010570828e-06, + "loss": 0.0013, + "step": 17624 + }, + { + "epoch": 18.63, + "learning_rate": 3.4434460887949263e-06, + "loss": 0.0004, + "step": 17626 + }, + { + "epoch": 18.63, + "learning_rate": 3.4381606765327694e-06, + "loss": 0.0006, + "step": 17628 + }, + { + "epoch": 18.64, + "learning_rate": 3.432875264270613e-06, + "loss": 0.0005, + "step": 17630 + }, + { + "epoch": 18.64, + "learning_rate": 3.427589852008457e-06, + "loss": 0.0012, + "step": 17632 + }, + { + "epoch": 18.64, + "learning_rate": 3.4223044397463003e-06, + "loss": 0.0254, + "step": 17634 + }, + { + "epoch": 18.64, + "learning_rate": 3.417019027484144e-06, + "loss": 0.0005, + "step": 17636 + }, + { + "epoch": 18.64, + "learning_rate": 3.4117336152219874e-06, + "loss": 0.0011, + "step": 17638 + }, + { + "epoch": 18.65, + "learning_rate": 3.4064482029598313e-06, + "loss": 0.0041, + "step": 17640 + }, + { + "epoch": 18.65, + "learning_rate": 3.401162790697675e-06, + "loss": 0.0004, + "step": 17642 + }, + { + "epoch": 18.65, + "learning_rate": 3.3958773784355184e-06, + "loss": 0.0005, + "step": 17644 + }, + { + "epoch": 18.65, + "learning_rate": 3.3905919661733615e-06, + "loss": 0.0051, + "step": 17646 + }, + { + "epoch": 18.66, + "learning_rate": 3.385306553911205e-06, + "loss": 0.0032, + "step": 17648 + }, + { + "epoch": 18.66, + "learning_rate": 3.3800211416490485e-06, + "loss": 0.0003, + "step": 17650 + }, + { + "epoch": 18.66, + "learning_rate": 3.3747357293868924e-06, + "loss": 0.004, + "step": 17652 + }, + { + "epoch": 18.66, + "learning_rate": 3.369450317124736e-06, + "loss": 0.007, + "step": 17654 + }, + { + "epoch": 18.66, + "learning_rate": 3.3641649048625795e-06, + "loss": 0.0007, + "step": 17656 + }, + { + "epoch": 18.67, + "learning_rate": 3.358879492600423e-06, + "loss": 0.0014, + "step": 17658 + }, + { + "epoch": 18.67, + "learning_rate": 3.353594080338267e-06, + "loss": 0.0004, + "step": 17660 + }, + { + "epoch": 18.67, + "learning_rate": 3.3483086680761104e-06, + "loss": 0.002, + "step": 17662 + }, + { + "epoch": 18.67, + "learning_rate": 3.3430232558139535e-06, + "loss": 0.0007, + "step": 17664 + }, + { + "epoch": 18.67, + "learning_rate": 3.337737843551797e-06, + "loss": 0.0006, + "step": 17666 + }, + { + "epoch": 18.68, + "learning_rate": 3.3324524312896406e-06, + "loss": 0.0192, + "step": 17668 + }, + { + "epoch": 18.68, + "learning_rate": 3.327167019027484e-06, + "loss": 0.0065, + "step": 17670 + }, + { + "epoch": 18.68, + "learning_rate": 3.321881606765328e-06, + "loss": 0.0002, + "step": 17672 + }, + { + "epoch": 18.68, + "learning_rate": 3.3165961945031716e-06, + "loss": 0.036, + "step": 17674 + }, + { + "epoch": 18.68, + "learning_rate": 3.311310782241015e-06, + "loss": 0.0006, + "step": 17676 + }, + { + "epoch": 18.69, + "learning_rate": 3.3060253699788586e-06, + "loss": 0.0006, + "step": 17678 + }, + { + "epoch": 18.69, + "learning_rate": 3.3007399577167025e-06, + "loss": 0.0006, + "step": 17680 + }, + { + "epoch": 18.69, + "learning_rate": 3.295454545454545e-06, + "loss": 0.0004, + "step": 17682 + }, + { + "epoch": 18.69, + "learning_rate": 3.290169133192389e-06, + "loss": 0.0024, + "step": 17684 + }, + { + "epoch": 18.7, + "learning_rate": 3.2848837209302327e-06, + "loss": 0.0004, + "step": 17686 + }, + { + "epoch": 18.7, + "learning_rate": 3.279598308668076e-06, + "loss": 0.0011, + "step": 17688 + }, + { + "epoch": 18.7, + "learning_rate": 3.2743128964059197e-06, + "loss": 0.0004, + "step": 17690 + }, + { + "epoch": 18.7, + "learning_rate": 3.2690274841437636e-06, + "loss": 0.0007, + "step": 17692 + }, + { + "epoch": 18.7, + "learning_rate": 3.263742071881607e-06, + "loss": 0.0026, + "step": 17694 + }, + { + "epoch": 18.71, + "learning_rate": 3.2584566596194507e-06, + "loss": 0.0361, + "step": 17696 + }, + { + "epoch": 18.71, + "learning_rate": 3.253171247357294e-06, + "loss": 0.0045, + "step": 17698 + }, + { + "epoch": 18.71, + "learning_rate": 3.2478858350951373e-06, + "loss": 0.0287, + "step": 17700 + }, + { + "epoch": 18.71, + "learning_rate": 3.242600422832981e-06, + "loss": 0.0095, + "step": 17702 + }, + { + "epoch": 18.71, + "learning_rate": 3.2373150105708248e-06, + "loss": 0.0004, + "step": 17704 + }, + { + "epoch": 18.72, + "learning_rate": 3.2320295983086683e-06, + "loss": 0.0014, + "step": 17706 + }, + { + "epoch": 18.72, + "learning_rate": 3.226744186046512e-06, + "loss": 0.0005, + "step": 17708 + }, + { + "epoch": 18.72, + "learning_rate": 3.2214587737843553e-06, + "loss": 0.002, + "step": 17710 + }, + { + "epoch": 18.72, + "learning_rate": 3.2161733615221992e-06, + "loss": 0.0339, + "step": 17712 + }, + { + "epoch": 18.73, + "learning_rate": 3.2108879492600428e-06, + "loss": 0.0011, + "step": 17714 + }, + { + "epoch": 18.73, + "learning_rate": 3.205602536997886e-06, + "loss": 0.0011, + "step": 17716 + }, + { + "epoch": 18.73, + "learning_rate": 3.2003171247357294e-06, + "loss": 0.0005, + "step": 17718 + }, + { + "epoch": 18.73, + "learning_rate": 3.195031712473573e-06, + "loss": 0.0005, + "step": 17720 + }, + { + "epoch": 18.73, + "learning_rate": 3.1897463002114164e-06, + "loss": 0.0006, + "step": 17722 + }, + { + "epoch": 18.74, + "learning_rate": 3.1844608879492604e-06, + "loss": 0.0003, + "step": 17724 + }, + { + "epoch": 18.74, + "learning_rate": 3.179175475687104e-06, + "loss": 0.0003, + "step": 17726 + }, + { + "epoch": 18.74, + "learning_rate": 3.1738900634249474e-06, + "loss": 0.0022, + "step": 17728 + }, + { + "epoch": 18.74, + "learning_rate": 3.168604651162791e-06, + "loss": 0.0022, + "step": 17730 + }, + { + "epoch": 18.74, + "learning_rate": 3.163319238900635e-06, + "loss": 0.0323, + "step": 17732 + }, + { + "epoch": 18.75, + "learning_rate": 3.1580338266384775e-06, + "loss": 0.0049, + "step": 17734 + }, + { + "epoch": 18.75, + "learning_rate": 3.1527484143763215e-06, + "loss": 0.0007, + "step": 17736 + }, + { + "epoch": 18.75, + "learning_rate": 3.147463002114165e-06, + "loss": 0.0311, + "step": 17738 + }, + { + "epoch": 18.75, + "learning_rate": 3.1421775898520085e-06, + "loss": 0.0039, + "step": 17740 + }, + { + "epoch": 18.75, + "learning_rate": 3.136892177589852e-06, + "loss": 0.0193, + "step": 17742 + }, + { + "epoch": 18.76, + "learning_rate": 3.131606765327696e-06, + "loss": 0.0006, + "step": 17744 + }, + { + "epoch": 18.76, + "learning_rate": 3.1263213530655395e-06, + "loss": 0.0074, + "step": 17746 + }, + { + "epoch": 18.76, + "learning_rate": 3.1210359408033826e-06, + "loss": 0.0008, + "step": 17748 + }, + { + "epoch": 18.76, + "learning_rate": 3.1157505285412265e-06, + "loss": 0.0019, + "step": 17750 + }, + { + "epoch": 18.77, + "learning_rate": 3.11046511627907e-06, + "loss": 0.0037, + "step": 17752 + }, + { + "epoch": 18.77, + "learning_rate": 3.1051797040169136e-06, + "loss": 0.0207, + "step": 17754 + }, + { + "epoch": 18.77, + "learning_rate": 3.099894291754757e-06, + "loss": 0.0131, + "step": 17756 + }, + { + "epoch": 18.77, + "learning_rate": 3.0946088794926006e-06, + "loss": 0.0034, + "step": 17758 + }, + { + "epoch": 18.77, + "learning_rate": 3.089323467230444e-06, + "loss": 0.0005, + "step": 17760 + }, + { + "epoch": 18.78, + "learning_rate": 3.0840380549682876e-06, + "loss": 0.0027, + "step": 17762 + }, + { + "epoch": 18.78, + "learning_rate": 3.0787526427061316e-06, + "loss": 0.0404, + "step": 17764 + }, + { + "epoch": 18.78, + "learning_rate": 3.0734672304439747e-06, + "loss": 0.0173, + "step": 17766 + }, + { + "epoch": 18.78, + "learning_rate": 3.068181818181818e-06, + "loss": 0.0083, + "step": 17768 + }, + { + "epoch": 18.78, + "learning_rate": 3.062896405919662e-06, + "loss": 0.0021, + "step": 17770 + }, + { + "epoch": 18.79, + "learning_rate": 3.0576109936575056e-06, + "loss": 0.0009, + "step": 17772 + }, + { + "epoch": 18.79, + "learning_rate": 3.0523255813953487e-06, + "loss": 0.0009, + "step": 17774 + }, + { + "epoch": 18.79, + "learning_rate": 3.0470401691331927e-06, + "loss": 0.0037, + "step": 17776 + }, + { + "epoch": 18.79, + "learning_rate": 3.041754756871036e-06, + "loss": 0.001, + "step": 17778 + }, + { + "epoch": 18.79, + "learning_rate": 3.0364693446088797e-06, + "loss": 0.0078, + "step": 17780 + }, + { + "epoch": 18.8, + "learning_rate": 3.0311839323467232e-06, + "loss": 0.0026, + "step": 17782 + }, + { + "epoch": 18.8, + "learning_rate": 3.0258985200845668e-06, + "loss": 0.0069, + "step": 17784 + }, + { + "epoch": 18.8, + "learning_rate": 3.0206131078224103e-06, + "loss": 0.0006, + "step": 17786 + }, + { + "epoch": 18.8, + "learning_rate": 3.015327695560254e-06, + "loss": 0.0007, + "step": 17788 + }, + { + "epoch": 18.81, + "learning_rate": 3.0100422832980977e-06, + "loss": 0.0286, + "step": 17790 + }, + { + "epoch": 18.81, + "learning_rate": 3.004756871035941e-06, + "loss": 0.0008, + "step": 17792 + }, + { + "epoch": 18.81, + "learning_rate": 2.9994714587737843e-06, + "loss": 0.0005, + "step": 17794 + }, + { + "epoch": 18.81, + "learning_rate": 2.9941860465116283e-06, + "loss": 0.0003, + "step": 17796 + }, + { + "epoch": 18.81, + "learning_rate": 2.988900634249472e-06, + "loss": 0.0008, + "step": 17798 + }, + { + "epoch": 18.82, + "learning_rate": 2.983615221987315e-06, + "loss": 0.0005, + "step": 17800 + }, + { + "epoch": 18.82, + "learning_rate": 2.978329809725159e-06, + "loss": 0.0007, + "step": 17802 + }, + { + "epoch": 18.82, + "learning_rate": 2.9730443974630024e-06, + "loss": 0.0058, + "step": 17804 + }, + { + "epoch": 18.82, + "learning_rate": 2.967758985200846e-06, + "loss": 0.0025, + "step": 17806 + }, + { + "epoch": 18.82, + "learning_rate": 2.9624735729386894e-06, + "loss": 0.0277, + "step": 17808 + }, + { + "epoch": 18.83, + "learning_rate": 2.957188160676533e-06, + "loss": 0.002, + "step": 17810 + }, + { + "epoch": 18.83, + "learning_rate": 2.9519027484143764e-06, + "loss": 0.0102, + "step": 17812 + }, + { + "epoch": 18.83, + "learning_rate": 2.94661733615222e-06, + "loss": 0.0009, + "step": 17814 + }, + { + "epoch": 18.83, + "learning_rate": 2.941331923890064e-06, + "loss": 0.0006, + "step": 17816 + }, + { + "epoch": 18.84, + "learning_rate": 2.936046511627907e-06, + "loss": 0.0009, + "step": 17818 + }, + { + "epoch": 18.84, + "learning_rate": 2.9307610993657505e-06, + "loss": 0.0026, + "step": 17820 + }, + { + "epoch": 18.84, + "learning_rate": 2.9254756871035944e-06, + "loss": 0.0004, + "step": 17822 + }, + { + "epoch": 18.84, + "learning_rate": 2.920190274841438e-06, + "loss": 0.001, + "step": 17824 + }, + { + "epoch": 18.84, + "learning_rate": 2.914904862579281e-06, + "loss": 0.0005, + "step": 17826 + }, + { + "epoch": 18.85, + "learning_rate": 2.909619450317125e-06, + "loss": 0.0021, + "step": 17828 + }, + { + "epoch": 18.85, + "learning_rate": 2.9043340380549685e-06, + "loss": 0.0007, + "step": 17830 + }, + { + "epoch": 18.85, + "learning_rate": 2.899048625792812e-06, + "loss": 0.0008, + "step": 17832 + }, + { + "epoch": 18.85, + "learning_rate": 2.8937632135306556e-06, + "loss": 0.0005, + "step": 17834 + }, + { + "epoch": 18.85, + "learning_rate": 2.888477801268499e-06, + "loss": 0.0023, + "step": 17836 + }, + { + "epoch": 18.86, + "learning_rate": 2.8831923890063426e-06, + "loss": 0.0118, + "step": 17838 + }, + { + "epoch": 18.86, + "learning_rate": 2.877906976744186e-06, + "loss": 0.0006, + "step": 17840 + }, + { + "epoch": 18.86, + "learning_rate": 2.87262156448203e-06, + "loss": 0.0119, + "step": 17842 + }, + { + "epoch": 18.86, + "learning_rate": 2.867336152219873e-06, + "loss": 0.0009, + "step": 17844 + }, + { + "epoch": 18.86, + "learning_rate": 2.8620507399577167e-06, + "loss": 0.0013, + "step": 17846 + }, + { + "epoch": 18.87, + "learning_rate": 2.8567653276955606e-06, + "loss": 0.0023, + "step": 17848 + }, + { + "epoch": 18.87, + "learning_rate": 2.851479915433404e-06, + "loss": 0.0005, + "step": 17850 + }, + { + "epoch": 18.87, + "learning_rate": 2.8461945031712476e-06, + "loss": 0.0621, + "step": 17852 + }, + { + "epoch": 18.87, + "learning_rate": 2.840909090909091e-06, + "loss": 0.0032, + "step": 17854 + }, + { + "epoch": 18.88, + "learning_rate": 2.8356236786469347e-06, + "loss": 0.005, + "step": 17856 + }, + { + "epoch": 18.88, + "learning_rate": 2.830338266384778e-06, + "loss": 0.0021, + "step": 17858 + }, + { + "epoch": 18.88, + "learning_rate": 2.8250528541226217e-06, + "loss": 0.0103, + "step": 17860 + }, + { + "epoch": 18.88, + "learning_rate": 2.8197674418604652e-06, + "loss": 0.0012, + "step": 17862 + }, + { + "epoch": 18.88, + "learning_rate": 2.8144820295983088e-06, + "loss": 0.0004, + "step": 17864 + }, + { + "epoch": 18.89, + "learning_rate": 2.8091966173361523e-06, + "loss": 0.0003, + "step": 17866 + }, + { + "epoch": 18.89, + "learning_rate": 2.8039112050739962e-06, + "loss": 0.0003, + "step": 17868 + }, + { + "epoch": 18.89, + "learning_rate": 2.7986257928118393e-06, + "loss": 0.0013, + "step": 17870 + }, + { + "epoch": 18.89, + "learning_rate": 2.793340380549683e-06, + "loss": 0.0006, + "step": 17872 + }, + { + "epoch": 18.89, + "learning_rate": 2.7880549682875268e-06, + "loss": 0.0406, + "step": 17874 + }, + { + "epoch": 18.9, + "learning_rate": 2.7827695560253703e-06, + "loss": 0.0008, + "step": 17876 + }, + { + "epoch": 18.9, + "learning_rate": 2.777484143763214e-06, + "loss": 0.0009, + "step": 17878 + }, + { + "epoch": 18.9, + "learning_rate": 2.772198731501057e-06, + "loss": 0.0007, + "step": 17880 + }, + { + "epoch": 18.9, + "learning_rate": 2.766913319238901e-06, + "loss": 0.0006, + "step": 17882 + }, + { + "epoch": 18.9, + "learning_rate": 2.7616279069767444e-06, + "loss": 0.0003, + "step": 17884 + }, + { + "epoch": 18.91, + "learning_rate": 2.756342494714588e-06, + "loss": 0.002, + "step": 17886 + }, + { + "epoch": 18.91, + "learning_rate": 2.7510570824524314e-06, + "loss": 0.007, + "step": 17888 + }, + { + "epoch": 18.91, + "learning_rate": 2.745771670190275e-06, + "loss": 0.006, + "step": 17890 + }, + { + "epoch": 18.91, + "learning_rate": 2.7404862579281184e-06, + "loss": 0.0042, + "step": 17892 + }, + { + "epoch": 18.92, + "learning_rate": 2.7352008456659624e-06, + "loss": 0.0003, + "step": 17894 + }, + { + "epoch": 18.92, + "learning_rate": 2.7299154334038055e-06, + "loss": 0.0058, + "step": 17896 + }, + { + "epoch": 18.92, + "learning_rate": 2.724630021141649e-06, + "loss": 0.0012, + "step": 17898 + }, + { + "epoch": 18.92, + "learning_rate": 2.719344608879493e-06, + "loss": 0.0025, + "step": 17900 + }, + { + "epoch": 18.92, + "learning_rate": 2.7140591966173364e-06, + "loss": 0.0011, + "step": 17902 + }, + { + "epoch": 18.93, + "learning_rate": 2.70877378435518e-06, + "loss": 0.0003, + "step": 17904 + }, + { + "epoch": 18.93, + "learning_rate": 2.703488372093023e-06, + "loss": 0.0016, + "step": 17906 + }, + { + "epoch": 18.93, + "learning_rate": 2.698202959830867e-06, + "loss": 0.0005, + "step": 17908 + }, + { + "epoch": 18.93, + "learning_rate": 2.6929175475687105e-06, + "loss": 0.0006, + "step": 17910 + }, + { + "epoch": 18.93, + "learning_rate": 2.687632135306554e-06, + "loss": 0.0004, + "step": 17912 + }, + { + "epoch": 18.94, + "learning_rate": 2.6823467230443976e-06, + "loss": 0.0141, + "step": 17914 + }, + { + "epoch": 18.94, + "learning_rate": 2.677061310782241e-06, + "loss": 0.0009, + "step": 17916 + }, + { + "epoch": 18.94, + "learning_rate": 2.6717758985200846e-06, + "loss": 0.0004, + "step": 17918 + }, + { + "epoch": 18.94, + "learning_rate": 2.6664904862579285e-06, + "loss": 0.0007, + "step": 17920 + }, + { + "epoch": 18.95, + "learning_rate": 2.661205073995772e-06, + "loss": 0.0022, + "step": 17922 + }, + { + "epoch": 18.95, + "learning_rate": 2.655919661733615e-06, + "loss": 0.0007, + "step": 17924 + }, + { + "epoch": 18.95, + "learning_rate": 2.6506342494714587e-06, + "loss": 0.0018, + "step": 17926 + }, + { + "epoch": 18.95, + "learning_rate": 2.6453488372093026e-06, + "loss": 0.0003, + "step": 17928 + }, + { + "epoch": 18.95, + "learning_rate": 2.640063424947146e-06, + "loss": 0.0008, + "step": 17930 + }, + { + "epoch": 18.96, + "learning_rate": 2.6347780126849892e-06, + "loss": 0.0012, + "step": 17932 + }, + { + "epoch": 18.96, + "learning_rate": 2.629492600422833e-06, + "loss": 0.0013, + "step": 17934 + }, + { + "epoch": 18.96, + "learning_rate": 2.6242071881606767e-06, + "loss": 0.0003, + "step": 17936 + }, + { + "epoch": 18.96, + "learning_rate": 2.61892177589852e-06, + "loss": 0.0012, + "step": 17938 + }, + { + "epoch": 18.96, + "learning_rate": 2.6136363636363637e-06, + "loss": 0.0004, + "step": 17940 + }, + { + "epoch": 18.97, + "learning_rate": 2.6083509513742072e-06, + "loss": 0.0146, + "step": 17942 + }, + { + "epoch": 18.97, + "learning_rate": 2.6030655391120508e-06, + "loss": 0.0003, + "step": 17944 + }, + { + "epoch": 18.97, + "learning_rate": 2.5977801268498947e-06, + "loss": 0.0022, + "step": 17946 + }, + { + "epoch": 18.97, + "learning_rate": 2.5924947145877382e-06, + "loss": 0.0004, + "step": 17948 + }, + { + "epoch": 18.97, + "learning_rate": 2.5872093023255813e-06, + "loss": 0.0008, + "step": 17950 + }, + { + "epoch": 18.98, + "learning_rate": 2.581923890063425e-06, + "loss": 0.0023, + "step": 17952 + }, + { + "epoch": 18.98, + "learning_rate": 2.5766384778012688e-06, + "loss": 0.0045, + "step": 17954 + }, + { + "epoch": 18.98, + "learning_rate": 2.5713530655391123e-06, + "loss": 0.0004, + "step": 17956 + }, + { + "epoch": 18.98, + "learning_rate": 2.5660676532769554e-06, + "loss": 0.0008, + "step": 17958 + }, + { + "epoch": 18.99, + "learning_rate": 2.5607822410147993e-06, + "loss": 0.0017, + "step": 17960 + }, + { + "epoch": 18.99, + "learning_rate": 2.555496828752643e-06, + "loss": 0.0004, + "step": 17962 + }, + { + "epoch": 18.99, + "learning_rate": 2.5502114164904864e-06, + "loss": 0.0005, + "step": 17964 + }, + { + "epoch": 18.99, + "learning_rate": 2.5449260042283303e-06, + "loss": 0.001, + "step": 17966 + }, + { + "epoch": 18.99, + "learning_rate": 2.5396405919661734e-06, + "loss": 0.001, + "step": 17968 + }, + { + "epoch": 19.0, + "learning_rate": 2.534355179704017e-06, + "loss": 0.0009, + "step": 17970 + }, + { + "epoch": 19.0, + "learning_rate": 2.5290697674418604e-06, + "loss": 0.0006, + "step": 17972 + }, + { + "epoch": 19.0, + "learning_rate": 2.5237843551797044e-06, + "loss": 0.0012, + "step": 17974 + }, + { + "epoch": 19.0, + "learning_rate": 2.5184989429175475e-06, + "loss": 0.0093, + "step": 17976 + }, + { + "epoch": 19.0, + "learning_rate": 2.513213530655391e-06, + "loss": 0.001, + "step": 17978 + }, + { + "epoch": 19.01, + "learning_rate": 2.507928118393235e-06, + "loss": 0.0007, + "step": 17980 + }, + { + "epoch": 19.01, + "learning_rate": 2.5026427061310784e-06, + "loss": 0.0007, + "step": 17982 + }, + { + "epoch": 19.01, + "learning_rate": 2.4973572938689215e-06, + "loss": 0.0007, + "step": 17984 + }, + { + "epoch": 19.01, + "learning_rate": 2.4920718816067655e-06, + "loss": 0.0109, + "step": 17986 + }, + { + "epoch": 19.01, + "learning_rate": 2.486786469344609e-06, + "loss": 0.0338, + "step": 17988 + }, + { + "epoch": 19.02, + "learning_rate": 2.4815010570824525e-06, + "loss": 0.0005, + "step": 17990 + }, + { + "epoch": 19.02, + "learning_rate": 2.4762156448202965e-06, + "loss": 0.0145, + "step": 17992 + }, + { + "epoch": 19.02, + "learning_rate": 2.4709302325581396e-06, + "loss": 0.0005, + "step": 17994 + }, + { + "epoch": 19.02, + "learning_rate": 2.465644820295983e-06, + "loss": 0.011, + "step": 17996 + }, + { + "epoch": 19.03, + "learning_rate": 2.4603594080338266e-06, + "loss": 0.003, + "step": 17998 + }, + { + "epoch": 19.03, + "learning_rate": 2.4550739957716705e-06, + "loss": 0.0014, + "step": 18000 + }, + { + "epoch": 19.03, + "eval_cer": 0.05528640638358507, + "eval_loss": 0.782919704914093, + "eval_runtime": 125.7137, + "eval_samples_per_second": 6.69, + "eval_steps_per_second": 0.843, + "step": 18000 + } + ], + "max_steps": 18920, + "num_train_epochs": 20, + "total_flos": 2.130711393070083e+20, + "trial_name": null, + "trial_params": null +}