{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.027484143763214, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 8.7413, "step": 2 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 7.938, "step": 4 }, { "epoch": 0.01, "learning_rate": 5e-05, "loss": 8.3007, "step": 6 }, { "epoch": 0.01, "learning_rate": 4.999471458773785e-05, "loss": 7.1444, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.998942917547569e-05, "loss": 9.6766, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9984143763213534e-05, "loss": 5.474, "step": 12 }, { "epoch": 0.01, "learning_rate": 4.997885835095137e-05, "loss": 4.2579, "step": 14 }, { "epoch": 0.02, "learning_rate": 4.9973572938689226e-05, "loss": 3.893, "step": 16 }, { "epoch": 0.02, "learning_rate": 4.9968287526427065e-05, "loss": 4.3634, "step": 18 }, { "epoch": 0.02, "learning_rate": 4.996300211416491e-05, "loss": 4.1838, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.995771670190275e-05, "loss": 3.5253, "step": 22 }, { "epoch": 0.03, "learning_rate": 4.9952431289640597e-05, "loss": 3.3387, "step": 24 }, { "epoch": 0.03, "learning_rate": 4.9947145877378436e-05, "loss": 2.9198, "step": 26 }, { "epoch": 0.03, "learning_rate": 4.994186046511628e-05, "loss": 2.8243, "step": 28 }, { "epoch": 0.03, "learning_rate": 4.993657505285412e-05, "loss": 2.5279, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.993128964059197e-05, "loss": 2.3727, "step": 32 }, { "epoch": 0.04, "learning_rate": 4.992600422832981e-05, "loss": 2.4728, "step": 34 }, { "epoch": 0.04, "learning_rate": 4.992071881606766e-05, "loss": 2.4637, "step": 36 }, { "epoch": 0.04, "learning_rate": 4.99154334038055e-05, "loss": 2.4645, "step": 38 }, { "epoch": 0.04, "learning_rate": 4.9910147991543345e-05, "loss": 2.4394, "step": 40 }, { "epoch": 0.04, "learning_rate": 4.9904862579281184e-05, "loss": 2.5194, "step": 42 }, { "epoch": 0.05, "learning_rate": 4.989957716701903e-05, "loss": 2.9867, "step": 44 }, { "epoch": 0.05, "learning_rate": 4.989429175475687e-05, "loss": 2.2137, "step": 46 }, { "epoch": 0.05, "learning_rate": 4.9889006342494715e-05, "loss": 2.6259, "step": 48 }, { "epoch": 0.05, "learning_rate": 4.9883720930232555e-05, "loss": 2.768, "step": 50 }, { "epoch": 0.05, "learning_rate": 4.987843551797041e-05, "loss": 1.9139, "step": 52 }, { "epoch": 0.06, "learning_rate": 4.987315010570825e-05, "loss": 1.8541, "step": 54 }, { "epoch": 0.06, "learning_rate": 4.986786469344609e-05, "loss": 1.9566, "step": 56 }, { "epoch": 0.06, "learning_rate": 4.986257928118393e-05, "loss": 1.7323, "step": 58 }, { "epoch": 0.06, "learning_rate": 4.985729386892178e-05, "loss": 1.8061, "step": 60 }, { "epoch": 0.07, "learning_rate": 4.9852008456659624e-05, "loss": 1.8183, "step": 62 }, { "epoch": 0.07, "learning_rate": 4.9846723044397464e-05, "loss": 1.5695, "step": 64 }, { "epoch": 0.07, "learning_rate": 4.984143763213531e-05, "loss": 1.5084, "step": 66 }, { "epoch": 0.07, "learning_rate": 4.983615221987315e-05, "loss": 1.5239, "step": 68 }, { "epoch": 0.07, "learning_rate": 4.9830866807611e-05, "loss": 1.1437, "step": 70 }, { "epoch": 0.08, "learning_rate": 4.982558139534884e-05, "loss": 1.7891, "step": 72 }, { "epoch": 0.08, "learning_rate": 4.982029598308669e-05, "loss": 1.8291, "step": 74 }, { "epoch": 0.08, "learning_rate": 4.9815010570824526e-05, "loss": 1.6036, "step": 76 }, { "epoch": 0.08, "learning_rate": 4.980972515856237e-05, "loss": 1.2715, "step": 78 }, { "epoch": 0.08, "learning_rate": 4.980443974630021e-05, "loss": 1.1591, "step": 80 }, { "epoch": 0.09, "learning_rate": 4.979915433403806e-05, "loss": 1.6209, "step": 82 }, { "epoch": 0.09, "learning_rate": 4.97938689217759e-05, "loss": 1.478, "step": 84 }, { "epoch": 0.09, "learning_rate": 4.978858350951374e-05, "loss": 0.986, "step": 86 }, { "epoch": 0.09, "learning_rate": 4.978329809725159e-05, "loss": 1.5824, "step": 88 }, { "epoch": 0.1, "learning_rate": 4.9778012684989435e-05, "loss": 1.3657, "step": 90 }, { "epoch": 0.1, "learning_rate": 4.9772727272727275e-05, "loss": 1.3425, "step": 92 }, { "epoch": 0.1, "learning_rate": 4.976744186046512e-05, "loss": 1.1693, "step": 94 }, { "epoch": 0.1, "learning_rate": 4.976215644820296e-05, "loss": 1.3152, "step": 96 }, { "epoch": 0.1, "learning_rate": 4.9756871035940806e-05, "loss": 0.9633, "step": 98 }, { "epoch": 0.11, "learning_rate": 4.9751585623678645e-05, "loss": 1.0875, "step": 100 }, { "epoch": 0.11, "learning_rate": 4.974630021141649e-05, "loss": 0.9981, "step": 102 }, { "epoch": 0.11, "learning_rate": 4.974101479915434e-05, "loss": 1.003, "step": 104 }, { "epoch": 0.11, "learning_rate": 4.9735729386892183e-05, "loss": 1.1275, "step": 106 }, { "epoch": 0.11, "learning_rate": 4.973044397463002e-05, "loss": 1.2824, "step": 108 }, { "epoch": 0.12, "learning_rate": 4.972515856236787e-05, "loss": 1.2133, "step": 110 }, { "epoch": 0.12, "learning_rate": 4.971987315010571e-05, "loss": 1.1642, "step": 112 }, { "epoch": 0.12, "learning_rate": 4.9714587737843554e-05, "loss": 1.0921, "step": 114 }, { "epoch": 0.12, "learning_rate": 4.97093023255814e-05, "loss": 1.3712, "step": 116 }, { "epoch": 0.12, "learning_rate": 4.970401691331924e-05, "loss": 1.1114, "step": 118 }, { "epoch": 0.13, "learning_rate": 4.9698731501057085e-05, "loss": 1.1718, "step": 120 }, { "epoch": 0.13, "learning_rate": 4.9693446088794925e-05, "loss": 1.0484, "step": 122 }, { "epoch": 0.13, "learning_rate": 4.968816067653278e-05, "loss": 0.8344, "step": 124 }, { "epoch": 0.13, "learning_rate": 4.968287526427062e-05, "loss": 1.4522, "step": 126 }, { "epoch": 0.14, "learning_rate": 4.967758985200846e-05, "loss": 2.1993, "step": 128 }, { "epoch": 0.14, "learning_rate": 4.96723044397463e-05, "loss": 1.0746, "step": 130 }, { "epoch": 0.14, "learning_rate": 4.966701902748415e-05, "loss": 0.9988, "step": 132 }, { "epoch": 0.14, "learning_rate": 4.966173361522199e-05, "loss": 1.04, "step": 134 }, { "epoch": 0.14, "learning_rate": 4.9656448202959834e-05, "loss": 1.434, "step": 136 }, { "epoch": 0.15, "learning_rate": 4.965116279069767e-05, "loss": 1.0161, "step": 138 }, { "epoch": 0.15, "learning_rate": 4.964587737843552e-05, "loss": 1.3142, "step": 140 }, { "epoch": 0.15, "learning_rate": 4.9640591966173365e-05, "loss": 1.1679, "step": 142 }, { "epoch": 0.15, "learning_rate": 4.963530655391121e-05, "loss": 1.3562, "step": 144 }, { "epoch": 0.15, "learning_rate": 4.963002114164905e-05, "loss": 1.2236, "step": 146 }, { "epoch": 0.16, "learning_rate": 4.9624735729386896e-05, "loss": 1.2391, "step": 148 }, { "epoch": 0.16, "learning_rate": 4.9619450317124736e-05, "loss": 1.1668, "step": 150 }, { "epoch": 0.16, "learning_rate": 4.961416490486258e-05, "loss": 1.3492, "step": 152 }, { "epoch": 0.16, "learning_rate": 4.960887949260042e-05, "loss": 0.8301, "step": 154 }, { "epoch": 0.16, "learning_rate": 4.960359408033827e-05, "loss": 1.2563, "step": 156 }, { "epoch": 0.17, "learning_rate": 4.959830866807611e-05, "loss": 0.9544, "step": 158 }, { "epoch": 0.17, "learning_rate": 4.959302325581396e-05, "loss": 1.0004, "step": 160 }, { "epoch": 0.17, "learning_rate": 4.95877378435518e-05, "loss": 1.2484, "step": 162 }, { "epoch": 0.17, "learning_rate": 4.9582452431289645e-05, "loss": 1.2366, "step": 164 }, { "epoch": 0.18, "learning_rate": 4.957716701902749e-05, "loss": 0.9897, "step": 166 }, { "epoch": 0.18, "learning_rate": 4.957188160676533e-05, "loss": 1.0405, "step": 168 }, { "epoch": 0.18, "learning_rate": 4.9566596194503176e-05, "loss": 0.9499, "step": 170 }, { "epoch": 0.18, "learning_rate": 4.9561310782241015e-05, "loss": 1.1503, "step": 172 }, { "epoch": 0.18, "learning_rate": 4.955602536997886e-05, "loss": 0.9555, "step": 174 }, { "epoch": 0.19, "learning_rate": 4.95507399577167e-05, "loss": 0.9089, "step": 176 }, { "epoch": 0.19, "learning_rate": 4.9545454545454553e-05, "loss": 1.3379, "step": 178 }, { "epoch": 0.19, "learning_rate": 4.954016913319239e-05, "loss": 0.8154, "step": 180 }, { "epoch": 0.19, "learning_rate": 4.953488372093024e-05, "loss": 0.669, "step": 182 }, { "epoch": 0.19, "learning_rate": 4.952959830866808e-05, "loss": 0.9411, "step": 184 }, { "epoch": 0.2, "learning_rate": 4.9524312896405924e-05, "loss": 1.1204, "step": 186 }, { "epoch": 0.2, "learning_rate": 4.9519027484143763e-05, "loss": 1.1171, "step": 188 }, { "epoch": 0.2, "learning_rate": 4.951374207188161e-05, "loss": 1.167, "step": 190 }, { "epoch": 0.2, "learning_rate": 4.950845665961945e-05, "loss": 0.85, "step": 192 }, { "epoch": 0.21, "learning_rate": 4.9503171247357295e-05, "loss": 0.9862, "step": 194 }, { "epoch": 0.21, "learning_rate": 4.949788583509514e-05, "loss": 1.0706, "step": 196 }, { "epoch": 0.21, "learning_rate": 4.949260042283299e-05, "loss": 0.9666, "step": 198 }, { "epoch": 0.21, "learning_rate": 4.9487315010570826e-05, "loss": 0.9068, "step": 200 }, { "epoch": 0.21, "learning_rate": 4.948202959830867e-05, "loss": 0.7309, "step": 202 }, { "epoch": 0.22, "learning_rate": 4.947674418604651e-05, "loss": 1.3316, "step": 204 }, { "epoch": 0.22, "learning_rate": 4.947145877378436e-05, "loss": 1.0254, "step": 206 }, { "epoch": 0.22, "learning_rate": 4.94661733615222e-05, "loss": 0.8078, "step": 208 }, { "epoch": 0.22, "learning_rate": 4.946088794926004e-05, "loss": 0.9885, "step": 210 }, { "epoch": 0.22, "learning_rate": 4.945560253699789e-05, "loss": 1.3213, "step": 212 }, { "epoch": 0.23, "learning_rate": 4.9450317124735735e-05, "loss": 1.0121, "step": 214 }, { "epoch": 0.23, "learning_rate": 4.9445031712473574e-05, "loss": 1.2225, "step": 216 }, { "epoch": 0.23, "learning_rate": 4.943974630021142e-05, "loss": 1.207, "step": 218 }, { "epoch": 0.23, "learning_rate": 4.9434460887949266e-05, "loss": 0.6599, "step": 220 }, { "epoch": 0.23, "learning_rate": 4.9429175475687106e-05, "loss": 0.9893, "step": 222 }, { "epoch": 0.24, "learning_rate": 4.942389006342495e-05, "loss": 0.8, "step": 224 }, { "epoch": 0.24, "learning_rate": 4.941860465116279e-05, "loss": 1.1942, "step": 226 }, { "epoch": 0.24, "learning_rate": 4.941331923890064e-05, "loss": 1.3698, "step": 228 }, { "epoch": 0.24, "learning_rate": 4.9408033826638476e-05, "loss": 1.0476, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.940274841437633e-05, "loss": 1.1916, "step": 232 }, { "epoch": 0.25, "learning_rate": 4.939746300211417e-05, "loss": 1.2326, "step": 234 }, { "epoch": 0.25, "learning_rate": 4.9392177589852015e-05, "loss": 0.9399, "step": 236 }, { "epoch": 0.25, "learning_rate": 4.9386892177589854e-05, "loss": 1.2254, "step": 238 }, { "epoch": 0.25, "learning_rate": 4.93816067653277e-05, "loss": 0.9529, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.937632135306554e-05, "loss": 1.0299, "step": 242 }, { "epoch": 0.26, "learning_rate": 4.9371035940803385e-05, "loss": 0.7292, "step": 244 }, { "epoch": 0.26, "learning_rate": 4.9365750528541225e-05, "loss": 0.9186, "step": 246 }, { "epoch": 0.26, "learning_rate": 4.936046511627907e-05, "loss": 0.7604, "step": 248 }, { "epoch": 0.26, "learning_rate": 4.935517970401692e-05, "loss": 0.8169, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.934989429175476e-05, "loss": 0.973, "step": 252 }, { "epoch": 0.27, "learning_rate": 4.93446088794926e-05, "loss": 0.8682, "step": 254 }, { "epoch": 0.27, "learning_rate": 4.933932346723045e-05, "loss": 0.9603, "step": 256 }, { "epoch": 0.27, "learning_rate": 4.933403805496829e-05, "loss": 1.1217, "step": 258 }, { "epoch": 0.27, "learning_rate": 4.9328752642706133e-05, "loss": 1.0628, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.932346723044397e-05, "loss": 1.0257, "step": 262 }, { "epoch": 0.28, "learning_rate": 4.931818181818182e-05, "loss": 0.8398, "step": 264 }, { "epoch": 0.28, "learning_rate": 4.9312896405919665e-05, "loss": 0.9168, "step": 266 }, { "epoch": 0.28, "learning_rate": 4.930761099365751e-05, "loss": 0.8925, "step": 268 }, { "epoch": 0.29, "learning_rate": 4.930232558139535e-05, "loss": 0.7816, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.9297040169133196e-05, "loss": 1.0841, "step": 272 }, { "epoch": 0.29, "learning_rate": 4.929175475687104e-05, "loss": 0.9992, "step": 274 }, { "epoch": 0.29, "learning_rate": 4.928646934460888e-05, "loss": 1.2984, "step": 276 }, { "epoch": 0.29, "learning_rate": 4.928118393234673e-05, "loss": 1.2014, "step": 278 }, { "epoch": 0.3, "learning_rate": 4.927589852008457e-05, "loss": 0.8883, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.927061310782241e-05, "loss": 1.1356, "step": 282 }, { "epoch": 0.3, "learning_rate": 4.926532769556025e-05, "loss": 0.9082, "step": 284 }, { "epoch": 0.3, "learning_rate": 4.9260042283298105e-05, "loss": 0.7397, "step": 286 }, { "epoch": 0.3, "learning_rate": 4.9254756871035944e-05, "loss": 1.0028, "step": 288 }, { "epoch": 0.31, "learning_rate": 4.924947145877379e-05, "loss": 0.8124, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.924418604651163e-05, "loss": 0.9205, "step": 292 }, { "epoch": 0.31, "learning_rate": 4.9238900634249476e-05, "loss": 1.2545, "step": 294 }, { "epoch": 0.31, "learning_rate": 4.9233615221987315e-05, "loss": 0.9391, "step": 296 }, { "epoch": 0.32, "learning_rate": 4.922832980972516e-05, "loss": 0.9877, "step": 298 }, { "epoch": 0.32, "learning_rate": 4.9223044397463e-05, "loss": 0.7637, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.9217758985200846e-05, "loss": 1.0561, "step": 302 }, { "epoch": 0.32, "learning_rate": 4.921247357293869e-05, "loss": 0.6914, "step": 304 }, { "epoch": 0.32, "learning_rate": 4.920718816067654e-05, "loss": 0.933, "step": 306 }, { "epoch": 0.33, "learning_rate": 4.920190274841438e-05, "loss": 0.916, "step": 308 }, { "epoch": 0.33, "learning_rate": 4.9196617336152224e-05, "loss": 0.9402, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.919133192389006e-05, "loss": 0.8271, "step": 312 }, { "epoch": 0.33, "learning_rate": 4.918604651162791e-05, "loss": 0.7607, "step": 314 }, { "epoch": 0.33, "learning_rate": 4.918076109936575e-05, "loss": 0.7434, "step": 316 }, { "epoch": 0.34, "learning_rate": 4.9175475687103595e-05, "loss": 0.8802, "step": 318 }, { "epoch": 0.34, "learning_rate": 4.917019027484144e-05, "loss": 1.3608, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.916490486257929e-05, "loss": 1.0817, "step": 322 }, { "epoch": 0.34, "learning_rate": 4.9159619450317126e-05, "loss": 0.9358, "step": 324 }, { "epoch": 0.34, "learning_rate": 4.915433403805497e-05, "loss": 0.6262, "step": 326 }, { "epoch": 0.35, "learning_rate": 4.914904862579282e-05, "loss": 1.1558, "step": 328 }, { "epoch": 0.35, "learning_rate": 4.914376321353066e-05, "loss": 0.9856, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.9138477801268503e-05, "loss": 0.9328, "step": 332 }, { "epoch": 0.35, "learning_rate": 4.913319238900634e-05, "loss": 0.9733, "step": 334 }, { "epoch": 0.36, "learning_rate": 4.912790697674419e-05, "loss": 0.9509, "step": 336 }, { "epoch": 0.36, "learning_rate": 4.912262156448203e-05, "loss": 1.201, "step": 338 }, { "epoch": 0.36, "learning_rate": 4.911733615221988e-05, "loss": 1.1361, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.911205073995772e-05, "loss": 0.8874, "step": 342 }, { "epoch": 0.36, "learning_rate": 4.9106765327695566e-05, "loss": 0.7402, "step": 344 }, { "epoch": 0.37, "learning_rate": 4.9101479915433406e-05, "loss": 0.957, "step": 346 }, { "epoch": 0.37, "learning_rate": 4.909619450317125e-05, "loss": 0.7774, "step": 348 }, { "epoch": 0.37, "learning_rate": 4.909090909090909e-05, "loss": 0.8865, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.908562367864694e-05, "loss": 0.8735, "step": 352 }, { "epoch": 0.37, "learning_rate": 4.9080338266384776e-05, "loss": 0.7073, "step": 354 }, { "epoch": 0.38, "learning_rate": 4.907505285412262e-05, "loss": 0.6819, "step": 356 }, { "epoch": 0.38, "learning_rate": 4.906976744186046e-05, "loss": 1.3813, "step": 358 }, { "epoch": 0.38, "learning_rate": 4.9064482029598314e-05, "loss": 0.8954, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.9059196617336154e-05, "loss": 0.758, "step": 362 }, { "epoch": 0.38, "learning_rate": 4.9053911205074e-05, "loss": 1.0526, "step": 364 }, { "epoch": 0.39, "learning_rate": 4.904862579281184e-05, "loss": 0.7349, "step": 366 }, { "epoch": 0.39, "learning_rate": 4.9043340380549685e-05, "loss": 0.8793, "step": 368 }, { "epoch": 0.39, "learning_rate": 4.903805496828753e-05, "loss": 0.5134, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.903276955602537e-05, "loss": 0.6381, "step": 372 }, { "epoch": 0.4, "learning_rate": 4.9027484143763217e-05, "loss": 0.949, "step": 374 }, { "epoch": 0.4, "learning_rate": 4.9022198731501056e-05, "loss": 0.8732, "step": 376 }, { "epoch": 0.4, "learning_rate": 4.90169133192389e-05, "loss": 0.6792, "step": 378 }, { "epoch": 0.4, "learning_rate": 4.901162790697675e-05, "loss": 0.8423, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.9006342494714594e-05, "loss": 0.9052, "step": 382 }, { "epoch": 0.41, "learning_rate": 4.900105708245243e-05, "loss": 0.8757, "step": 384 }, { "epoch": 0.41, "learning_rate": 4.899577167019028e-05, "loss": 1.0981, "step": 386 }, { "epoch": 0.41, "learning_rate": 4.899048625792812e-05, "loss": 1.1139, "step": 388 }, { "epoch": 0.41, "learning_rate": 4.8985200845665965e-05, "loss": 0.8369, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.8979915433403804e-05, "loss": 0.8961, "step": 392 }, { "epoch": 0.42, "learning_rate": 4.897463002114165e-05, "loss": 1.1247, "step": 394 }, { "epoch": 0.42, "learning_rate": 4.8969344608879496e-05, "loss": 0.7586, "step": 396 }, { "epoch": 0.42, "learning_rate": 4.896405919661734e-05, "loss": 0.8276, "step": 398 }, { "epoch": 0.42, "learning_rate": 4.895877378435518e-05, "loss": 0.7506, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.895348837209303e-05, "loss": 0.6963, "step": 402 }, { "epoch": 0.43, "learning_rate": 4.894820295983087e-05, "loss": 0.7005, "step": 404 }, { "epoch": 0.43, "learning_rate": 4.894291754756871e-05, "loss": 0.9046, "step": 406 }, { "epoch": 0.43, "learning_rate": 4.893763213530655e-05, "loss": 0.8376, "step": 408 }, { "epoch": 0.43, "learning_rate": 4.89323467230444e-05, "loss": 0.7533, "step": 410 }, { "epoch": 0.44, "learning_rate": 4.892706131078224e-05, "loss": 0.7952, "step": 412 }, { "epoch": 0.44, "learning_rate": 4.892177589852009e-05, "loss": 0.8707, "step": 414 }, { "epoch": 0.44, "learning_rate": 4.891649048625793e-05, "loss": 0.9241, "step": 416 }, { "epoch": 0.44, "learning_rate": 4.8911205073995776e-05, "loss": 0.8749, "step": 418 }, { "epoch": 0.44, "learning_rate": 4.8905919661733615e-05, "loss": 0.8629, "step": 420 }, { "epoch": 0.45, "learning_rate": 4.890063424947146e-05, "loss": 1.1447, "step": 422 }, { "epoch": 0.45, "learning_rate": 4.889534883720931e-05, "loss": 0.7138, "step": 424 }, { "epoch": 0.45, "learning_rate": 4.8890063424947146e-05, "loss": 0.6883, "step": 426 }, { "epoch": 0.45, "learning_rate": 4.888477801268499e-05, "loss": 0.7576, "step": 428 }, { "epoch": 0.45, "learning_rate": 4.887949260042283e-05, "loss": 0.6091, "step": 430 }, { "epoch": 0.46, "learning_rate": 4.8874207188160684e-05, "loss": 0.7623, "step": 432 }, { "epoch": 0.46, "learning_rate": 4.8868921775898524e-05, "loss": 0.7938, "step": 434 }, { "epoch": 0.46, "learning_rate": 4.886363636363637e-05, "loss": 0.9892, "step": 436 }, { "epoch": 0.46, "learning_rate": 4.885835095137421e-05, "loss": 0.9199, "step": 438 }, { "epoch": 0.47, "learning_rate": 4.8853065539112055e-05, "loss": 1.0223, "step": 440 }, { "epoch": 0.47, "learning_rate": 4.8847780126849894e-05, "loss": 0.8448, "step": 442 }, { "epoch": 0.47, "learning_rate": 4.884249471458774e-05, "loss": 0.7734, "step": 444 }, { "epoch": 0.47, "learning_rate": 4.883720930232558e-05, "loss": 1.2691, "step": 446 }, { "epoch": 0.47, "learning_rate": 4.8831923890063426e-05, "loss": 0.9567, "step": 448 }, { "epoch": 0.48, "learning_rate": 4.882663847780127e-05, "loss": 0.9065, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.882135306553912e-05, "loss": 0.706, "step": 452 }, { "epoch": 0.48, "learning_rate": 4.881606765327696e-05, "loss": 0.8382, "step": 454 }, { "epoch": 0.48, "learning_rate": 4.88107822410148e-05, "loss": 0.8763, "step": 456 }, { "epoch": 0.48, "learning_rate": 4.880549682875264e-05, "loss": 0.6008, "step": 458 }, { "epoch": 0.49, "learning_rate": 4.880021141649049e-05, "loss": 0.9091, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.879492600422833e-05, "loss": 1.0063, "step": 462 }, { "epoch": 0.49, "learning_rate": 4.8789640591966174e-05, "loss": 0.6134, "step": 464 }, { "epoch": 0.49, "learning_rate": 4.878435517970401e-05, "loss": 0.9105, "step": 466 }, { "epoch": 0.49, "learning_rate": 4.8779069767441866e-05, "loss": 0.8555, "step": 468 }, { "epoch": 0.5, "learning_rate": 4.8773784355179705e-05, "loss": 1.0469, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.876849894291755e-05, "loss": 1.0043, "step": 472 }, { "epoch": 0.5, "learning_rate": 4.876321353065539e-05, "loss": 0.8886, "step": 474 }, { "epoch": 0.5, "learning_rate": 4.875792811839324e-05, "loss": 0.8378, "step": 476 }, { "epoch": 0.51, "learning_rate": 4.875264270613108e-05, "loss": 0.8491, "step": 478 }, { "epoch": 0.51, "learning_rate": 4.874735729386892e-05, "loss": 0.8847, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.874207188160677e-05, "loss": 0.8345, "step": 482 }, { "epoch": 0.51, "learning_rate": 4.873678646934461e-05, "loss": 0.882, "step": 484 }, { "epoch": 0.51, "learning_rate": 4.873150105708246e-05, "loss": 1.0436, "step": 486 }, { "epoch": 0.52, "learning_rate": 4.87262156448203e-05, "loss": 0.9272, "step": 488 }, { "epoch": 0.52, "learning_rate": 4.8720930232558146e-05, "loss": 0.8267, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.8715644820295985e-05, "loss": 0.8139, "step": 492 }, { "epoch": 0.52, "learning_rate": 4.871035940803383e-05, "loss": 0.9702, "step": 494 }, { "epoch": 0.52, "learning_rate": 4.870507399577167e-05, "loss": 0.9904, "step": 496 }, { "epoch": 0.53, "learning_rate": 4.8699788583509516e-05, "loss": 0.8216, "step": 498 }, { "epoch": 0.53, "learning_rate": 4.8694503171247356e-05, "loss": 0.8989, "step": 500 }, { "epoch": 0.53, "eval_cer": 0.08070675406098604, "eval_loss": 0.7687897086143494, "eval_runtime": 128.8472, "eval_samples_per_second": 6.527, "eval_steps_per_second": 0.823, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.86892177589852e-05, "loss": 1.0833, "step": 502 }, { "epoch": 0.53, "learning_rate": 4.868393234672305e-05, "loss": 0.8538, "step": 504 }, { "epoch": 0.53, "learning_rate": 4.8678646934460894e-05, "loss": 0.6459, "step": 506 }, { "epoch": 0.54, "learning_rate": 4.867336152219873e-05, "loss": 0.6752, "step": 508 }, { "epoch": 0.54, "learning_rate": 4.866807610993658e-05, "loss": 0.7075, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.866279069767442e-05, "loss": 0.7817, "step": 512 }, { "epoch": 0.54, "learning_rate": 4.8657505285412264e-05, "loss": 0.8261, "step": 514 }, { "epoch": 0.55, "learning_rate": 4.8652219873150104e-05, "loss": 0.8365, "step": 516 }, { "epoch": 0.55, "learning_rate": 4.864693446088795e-05, "loss": 0.7488, "step": 518 }, { "epoch": 0.55, "learning_rate": 4.8641649048625796e-05, "loss": 1.0128, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.863636363636364e-05, "loss": 1.1424, "step": 522 }, { "epoch": 0.55, "learning_rate": 4.863107822410148e-05, "loss": 0.8287, "step": 524 }, { "epoch": 0.56, "learning_rate": 4.862579281183933e-05, "loss": 0.718, "step": 526 }, { "epoch": 0.56, "learning_rate": 4.8620507399577167e-05, "loss": 0.5674, "step": 528 }, { "epoch": 0.56, "learning_rate": 4.861522198731501e-05, "loss": 0.8469, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.860993657505286e-05, "loss": 1.1801, "step": 532 }, { "epoch": 0.56, "learning_rate": 4.86046511627907e-05, "loss": 0.7818, "step": 534 }, { "epoch": 0.57, "learning_rate": 4.8599365750528544e-05, "loss": 0.5959, "step": 536 }, { "epoch": 0.57, "learning_rate": 4.859408033826638e-05, "loss": 0.8464, "step": 538 }, { "epoch": 0.57, "learning_rate": 4.8588794926004236e-05, "loss": 0.711, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.8583509513742075e-05, "loss": 0.5881, "step": 542 }, { "epoch": 0.58, "learning_rate": 4.857822410147992e-05, "loss": 0.7966, "step": 544 }, { "epoch": 0.58, "learning_rate": 4.857293868921776e-05, "loss": 0.542, "step": 546 }, { "epoch": 0.58, "learning_rate": 4.856765327695561e-05, "loss": 0.8169, "step": 548 }, { "epoch": 0.58, "learning_rate": 4.8562367864693446e-05, "loss": 0.9783, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.855708245243129e-05, "loss": 0.7638, "step": 552 }, { "epoch": 0.59, "learning_rate": 4.855179704016913e-05, "loss": 0.8195, "step": 554 }, { "epoch": 0.59, "learning_rate": 4.854651162790698e-05, "loss": 1.0137, "step": 556 }, { "epoch": 0.59, "learning_rate": 4.8541226215644824e-05, "loss": 1.0664, "step": 558 }, { "epoch": 0.59, "learning_rate": 4.853594080338267e-05, "loss": 0.7189, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.853065539112051e-05, "loss": 0.7912, "step": 562 }, { "epoch": 0.6, "learning_rate": 4.8525369978858355e-05, "loss": 0.7657, "step": 564 }, { "epoch": 0.6, "learning_rate": 4.8520084566596194e-05, "loss": 0.7469, "step": 566 }, { "epoch": 0.6, "learning_rate": 4.851479915433404e-05, "loss": 0.6355, "step": 568 }, { "epoch": 0.6, "learning_rate": 4.850951374207188e-05, "loss": 0.7281, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.8504228329809726e-05, "loss": 0.8989, "step": 572 }, { "epoch": 0.61, "learning_rate": 4.849894291754757e-05, "loss": 0.837, "step": 574 }, { "epoch": 0.61, "learning_rate": 4.849365750528542e-05, "loss": 1.067, "step": 576 }, { "epoch": 0.61, "learning_rate": 4.848837209302326e-05, "loss": 0.7942, "step": 578 }, { "epoch": 0.61, "learning_rate": 4.84830866807611e-05, "loss": 0.8782, "step": 580 }, { "epoch": 0.62, "learning_rate": 4.847780126849894e-05, "loss": 0.6836, "step": 582 }, { "epoch": 0.62, "learning_rate": 4.847251585623679e-05, "loss": 0.8591, "step": 584 }, { "epoch": 0.62, "learning_rate": 4.8467230443974635e-05, "loss": 1.1122, "step": 586 }, { "epoch": 0.62, "learning_rate": 4.8461945031712474e-05, "loss": 1.2, "step": 588 }, { "epoch": 0.62, "learning_rate": 4.845665961945032e-05, "loss": 0.6635, "step": 590 }, { "epoch": 0.63, "learning_rate": 4.845137420718816e-05, "loss": 0.7363, "step": 592 }, { "epoch": 0.63, "learning_rate": 4.844608879492601e-05, "loss": 0.6953, "step": 594 }, { "epoch": 0.63, "learning_rate": 4.844080338266385e-05, "loss": 0.8619, "step": 596 }, { "epoch": 0.63, "learning_rate": 4.84355179704017e-05, "loss": 0.8771, "step": 598 }, { "epoch": 0.63, "learning_rate": 4.843023255813954e-05, "loss": 0.83, "step": 600 }, { "epoch": 0.64, "learning_rate": 4.842494714587738e-05, "loss": 0.6184, "step": 602 }, { "epoch": 0.64, "learning_rate": 4.841966173361522e-05, "loss": 0.6461, "step": 604 }, { "epoch": 0.64, "learning_rate": 4.841437632135307e-05, "loss": 0.9024, "step": 606 }, { "epoch": 0.64, "learning_rate": 4.840909090909091e-05, "loss": 0.8576, "step": 608 }, { "epoch": 0.64, "learning_rate": 4.840380549682875e-05, "loss": 0.4654, "step": 610 }, { "epoch": 0.65, "learning_rate": 4.83985200845666e-05, "loss": 0.6721, "step": 612 }, { "epoch": 0.65, "learning_rate": 4.8393234672304445e-05, "loss": 0.7993, "step": 614 }, { "epoch": 0.65, "learning_rate": 4.8387949260042285e-05, "loss": 0.6995, "step": 616 }, { "epoch": 0.65, "learning_rate": 4.838266384778013e-05, "loss": 0.767, "step": 618 }, { "epoch": 0.66, "learning_rate": 4.837737843551797e-05, "loss": 0.6822, "step": 620 }, { "epoch": 0.66, "learning_rate": 4.8372093023255816e-05, "loss": 0.7364, "step": 622 }, { "epoch": 0.66, "learning_rate": 4.8366807610993655e-05, "loss": 0.8019, "step": 624 }, { "epoch": 0.66, "learning_rate": 4.83615221987315e-05, "loss": 0.9001, "step": 626 }, { "epoch": 0.66, "learning_rate": 4.835623678646935e-05, "loss": 1.0015, "step": 628 }, { "epoch": 0.67, "learning_rate": 4.8350951374207194e-05, "loss": 1.0959, "step": 630 }, { "epoch": 0.67, "learning_rate": 4.834566596194503e-05, "loss": 0.7717, "step": 632 }, { "epoch": 0.67, "learning_rate": 4.834038054968288e-05, "loss": 0.5083, "step": 634 }, { "epoch": 0.67, "learning_rate": 4.8335095137420725e-05, "loss": 0.8368, "step": 636 }, { "epoch": 0.67, "learning_rate": 4.8329809725158564e-05, "loss": 0.5508, "step": 638 }, { "epoch": 0.68, "learning_rate": 4.832452431289641e-05, "loss": 0.7068, "step": 640 }, { "epoch": 0.68, "learning_rate": 4.831923890063425e-05, "loss": 0.6711, "step": 642 }, { "epoch": 0.68, "learning_rate": 4.8313953488372096e-05, "loss": 0.9935, "step": 644 }, { "epoch": 0.68, "learning_rate": 4.8308668076109935e-05, "loss": 0.609, "step": 646 }, { "epoch": 0.68, "learning_rate": 4.830338266384779e-05, "loss": 0.7019, "step": 648 }, { "epoch": 0.69, "learning_rate": 4.829809725158563e-05, "loss": 1.0631, "step": 650 }, { "epoch": 0.69, "learning_rate": 4.829281183932347e-05, "loss": 1.0473, "step": 652 }, { "epoch": 0.69, "learning_rate": 4.828752642706131e-05, "loss": 1.1643, "step": 654 }, { "epoch": 0.69, "learning_rate": 4.828224101479916e-05, "loss": 0.7424, "step": 656 }, { "epoch": 0.7, "learning_rate": 4.8276955602537e-05, "loss": 0.8219, "step": 658 }, { "epoch": 0.7, "learning_rate": 4.8271670190274844e-05, "loss": 0.8203, "step": 660 }, { "epoch": 0.7, "learning_rate": 4.826638477801268e-05, "loss": 0.8933, "step": 662 }, { "epoch": 0.7, "learning_rate": 4.826109936575053e-05, "loss": 0.5901, "step": 664 }, { "epoch": 0.7, "learning_rate": 4.8255813953488375e-05, "loss": 0.5379, "step": 666 }, { "epoch": 0.71, "learning_rate": 4.825052854122622e-05, "loss": 0.7992, "step": 668 }, { "epoch": 0.71, "learning_rate": 4.824524312896406e-05, "loss": 0.8626, "step": 670 }, { "epoch": 0.71, "learning_rate": 4.823995771670191e-05, "loss": 0.7633, "step": 672 }, { "epoch": 0.71, "learning_rate": 4.8234672304439746e-05, "loss": 1.0405, "step": 674 }, { "epoch": 0.71, "learning_rate": 4.822938689217759e-05, "loss": 0.6841, "step": 676 }, { "epoch": 0.72, "learning_rate": 4.822410147991543e-05, "loss": 0.7965, "step": 678 }, { "epoch": 0.72, "learning_rate": 4.821881606765328e-05, "loss": 0.7564, "step": 680 }, { "epoch": 0.72, "learning_rate": 4.8213530655391123e-05, "loss": 0.6262, "step": 682 }, { "epoch": 0.72, "learning_rate": 4.820824524312897e-05, "loss": 0.6553, "step": 684 }, { "epoch": 0.73, "learning_rate": 4.820295983086681e-05, "loss": 0.6584, "step": 686 }, { "epoch": 0.73, "learning_rate": 4.8197674418604655e-05, "loss": 0.9452, "step": 688 }, { "epoch": 0.73, "learning_rate": 4.81923890063425e-05, "loss": 0.574, "step": 690 }, { "epoch": 0.73, "learning_rate": 4.818710359408034e-05, "loss": 0.7583, "step": 692 }, { "epoch": 0.73, "learning_rate": 4.8181818181818186e-05, "loss": 0.7767, "step": 694 }, { "epoch": 0.74, "learning_rate": 4.8176532769556026e-05, "loss": 0.8234, "step": 696 }, { "epoch": 0.74, "learning_rate": 4.817124735729387e-05, "loss": 1.0249, "step": 698 }, { "epoch": 0.74, "learning_rate": 4.816596194503171e-05, "loss": 1.0602, "step": 700 }, { "epoch": 0.74, "learning_rate": 4.8160676532769564e-05, "loss": 0.981, "step": 702 }, { "epoch": 0.74, "learning_rate": 4.81553911205074e-05, "loss": 0.8407, "step": 704 }, { "epoch": 0.75, "learning_rate": 4.815010570824525e-05, "loss": 0.7522, "step": 706 }, { "epoch": 0.75, "learning_rate": 4.814482029598309e-05, "loss": 0.7369, "step": 708 }, { "epoch": 0.75, "learning_rate": 4.8139534883720934e-05, "loss": 0.6982, "step": 710 }, { "epoch": 0.75, "learning_rate": 4.8134249471458774e-05, "loss": 0.5641, "step": 712 }, { "epoch": 0.75, "learning_rate": 4.812896405919662e-05, "loss": 0.9017, "step": 714 }, { "epoch": 0.76, "learning_rate": 4.812367864693446e-05, "loss": 0.9177, "step": 716 }, { "epoch": 0.76, "learning_rate": 4.8118393234672305e-05, "loss": 0.9034, "step": 718 }, { "epoch": 0.76, "learning_rate": 4.811310782241015e-05, "loss": 0.7752, "step": 720 }, { "epoch": 0.76, "learning_rate": 4.8107822410148e-05, "loss": 0.7535, "step": 722 }, { "epoch": 0.77, "learning_rate": 4.8102536997885836e-05, "loss": 0.7685, "step": 724 }, { "epoch": 0.77, "learning_rate": 4.809725158562368e-05, "loss": 0.9903, "step": 726 }, { "epoch": 0.77, "learning_rate": 4.809196617336152e-05, "loss": 0.9556, "step": 728 }, { "epoch": 0.77, "learning_rate": 4.808668076109937e-05, "loss": 0.7408, "step": 730 }, { "epoch": 0.77, "learning_rate": 4.808139534883721e-05, "loss": 0.9776, "step": 732 }, { "epoch": 0.78, "learning_rate": 4.807610993657505e-05, "loss": 0.6268, "step": 734 }, { "epoch": 0.78, "learning_rate": 4.80708245243129e-05, "loss": 0.7483, "step": 736 }, { "epoch": 0.78, "learning_rate": 4.8065539112050745e-05, "loss": 0.5685, "step": 738 }, { "epoch": 0.78, "learning_rate": 4.8060253699788585e-05, "loss": 0.6607, "step": 740 }, { "epoch": 0.78, "learning_rate": 4.805496828752643e-05, "loss": 0.6576, "step": 742 }, { "epoch": 0.79, "learning_rate": 4.804968287526428e-05, "loss": 0.7757, "step": 744 }, { "epoch": 0.79, "learning_rate": 4.8044397463002116e-05, "loss": 0.4771, "step": 746 }, { "epoch": 0.79, "learning_rate": 4.803911205073996e-05, "loss": 0.7673, "step": 748 }, { "epoch": 0.79, "learning_rate": 4.80338266384778e-05, "loss": 0.5479, "step": 750 }, { "epoch": 0.79, "learning_rate": 4.802854122621565e-05, "loss": 0.8368, "step": 752 }, { "epoch": 0.8, "learning_rate": 4.802325581395349e-05, "loss": 0.7432, "step": 754 }, { "epoch": 0.8, "learning_rate": 4.801797040169134e-05, "loss": 0.9759, "step": 756 }, { "epoch": 0.8, "learning_rate": 4.801268498942918e-05, "loss": 0.5834, "step": 758 }, { "epoch": 0.8, "learning_rate": 4.8007399577167025e-05, "loss": 0.6704, "step": 760 }, { "epoch": 0.81, "learning_rate": 4.8002114164904864e-05, "loss": 0.6211, "step": 762 }, { "epoch": 0.81, "learning_rate": 4.799682875264271e-05, "loss": 0.7186, "step": 764 }, { "epoch": 0.81, "learning_rate": 4.799154334038055e-05, "loss": 0.6583, "step": 766 }, { "epoch": 0.81, "learning_rate": 4.7986257928118396e-05, "loss": 0.5375, "step": 768 }, { "epoch": 0.81, "learning_rate": 4.7980972515856235e-05, "loss": 0.7341, "step": 770 }, { "epoch": 0.82, "learning_rate": 4.797568710359408e-05, "loss": 0.5711, "step": 772 }, { "epoch": 0.82, "learning_rate": 4.797040169133193e-05, "loss": 0.5584, "step": 774 }, { "epoch": 0.82, "learning_rate": 4.796511627906977e-05, "loss": 0.5109, "step": 776 }, { "epoch": 0.82, "learning_rate": 4.795983086680761e-05, "loss": 0.5721, "step": 778 }, { "epoch": 0.82, "learning_rate": 4.795454545454546e-05, "loss": 0.6839, "step": 780 }, { "epoch": 0.83, "learning_rate": 4.79492600422833e-05, "loss": 0.5016, "step": 782 }, { "epoch": 0.83, "learning_rate": 4.7943974630021144e-05, "loss": 1.2874, "step": 784 }, { "epoch": 0.83, "learning_rate": 4.793868921775898e-05, "loss": 0.6109, "step": 786 }, { "epoch": 0.83, "learning_rate": 4.793340380549683e-05, "loss": 0.7601, "step": 788 }, { "epoch": 0.84, "learning_rate": 4.7928118393234675e-05, "loss": 0.9023, "step": 790 }, { "epoch": 0.84, "learning_rate": 4.792283298097252e-05, "loss": 0.6537, "step": 792 }, { "epoch": 0.84, "learning_rate": 4.791754756871036e-05, "loss": 0.6189, "step": 794 }, { "epoch": 0.84, "learning_rate": 4.7912262156448206e-05, "loss": 0.9409, "step": 796 }, { "epoch": 0.84, "learning_rate": 4.790697674418605e-05, "loss": 0.7473, "step": 798 }, { "epoch": 0.85, "learning_rate": 4.790169133192389e-05, "loss": 0.6643, "step": 800 }, { "epoch": 0.85, "learning_rate": 4.789640591966174e-05, "loss": 0.6447, "step": 802 }, { "epoch": 0.85, "learning_rate": 4.789112050739958e-05, "loss": 0.5678, "step": 804 }, { "epoch": 0.85, "learning_rate": 4.788583509513742e-05, "loss": 0.5642, "step": 806 }, { "epoch": 0.85, "learning_rate": 4.788054968287526e-05, "loss": 0.7571, "step": 808 }, { "epoch": 0.86, "learning_rate": 4.7875264270613115e-05, "loss": 0.7114, "step": 810 }, { "epoch": 0.86, "learning_rate": 4.7869978858350955e-05, "loss": 0.7462, "step": 812 }, { "epoch": 0.86, "learning_rate": 4.78646934460888e-05, "loss": 0.8739, "step": 814 }, { "epoch": 0.86, "learning_rate": 4.785940803382664e-05, "loss": 0.9708, "step": 816 }, { "epoch": 0.86, "learning_rate": 4.7854122621564486e-05, "loss": 0.757, "step": 818 }, { "epoch": 0.87, "learning_rate": 4.7848837209302325e-05, "loss": 0.6415, "step": 820 }, { "epoch": 0.87, "learning_rate": 4.784355179704017e-05, "loss": 0.5284, "step": 822 }, { "epoch": 0.87, "learning_rate": 4.783826638477801e-05, "loss": 0.9145, "step": 824 }, { "epoch": 0.87, "learning_rate": 4.783298097251586e-05, "loss": 0.6378, "step": 826 }, { "epoch": 0.88, "learning_rate": 4.78276955602537e-05, "loss": 0.7644, "step": 828 }, { "epoch": 0.88, "learning_rate": 4.782241014799155e-05, "loss": 0.7083, "step": 830 }, { "epoch": 0.88, "learning_rate": 4.781712473572939e-05, "loss": 0.5737, "step": 832 }, { "epoch": 0.88, "learning_rate": 4.7811839323467234e-05, "loss": 0.9134, "step": 834 }, { "epoch": 0.88, "learning_rate": 4.7806553911205073e-05, "loss": 0.7197, "step": 836 }, { "epoch": 0.89, "learning_rate": 4.780126849894292e-05, "loss": 0.7081, "step": 838 }, { "epoch": 0.89, "learning_rate": 4.7795983086680766e-05, "loss": 0.6583, "step": 840 }, { "epoch": 0.89, "learning_rate": 4.7790697674418605e-05, "loss": 0.5343, "step": 842 }, { "epoch": 0.89, "learning_rate": 4.778541226215645e-05, "loss": 0.4942, "step": 844 }, { "epoch": 0.89, "learning_rate": 4.77801268498943e-05, "loss": 0.6151, "step": 846 }, { "epoch": 0.9, "learning_rate": 4.7774841437632136e-05, "loss": 0.6031, "step": 848 }, { "epoch": 0.9, "learning_rate": 4.776955602536998e-05, "loss": 0.8094, "step": 850 }, { "epoch": 0.9, "learning_rate": 4.776427061310783e-05, "loss": 0.6032, "step": 852 }, { "epoch": 0.9, "learning_rate": 4.775898520084567e-05, "loss": 0.9748, "step": 854 }, { "epoch": 0.9, "learning_rate": 4.7753699788583514e-05, "loss": 0.8298, "step": 856 }, { "epoch": 0.91, "learning_rate": 4.774841437632135e-05, "loss": 0.8524, "step": 858 }, { "epoch": 0.91, "learning_rate": 4.77431289640592e-05, "loss": 0.671, "step": 860 }, { "epoch": 0.91, "learning_rate": 4.773784355179704e-05, "loss": 0.6802, "step": 862 }, { "epoch": 0.91, "learning_rate": 4.773255813953489e-05, "loss": 0.8394, "step": 864 }, { "epoch": 0.92, "learning_rate": 4.772727272727273e-05, "loss": 0.6247, "step": 866 }, { "epoch": 0.92, "learning_rate": 4.7721987315010577e-05, "loss": 0.6984, "step": 868 }, { "epoch": 0.92, "learning_rate": 4.7716701902748416e-05, "loss": 0.732, "step": 870 }, { "epoch": 0.92, "learning_rate": 4.771141649048626e-05, "loss": 0.6115, "step": 872 }, { "epoch": 0.92, "learning_rate": 4.77061310782241e-05, "loss": 0.7653, "step": 874 }, { "epoch": 0.93, "learning_rate": 4.770084566596195e-05, "loss": 0.6636, "step": 876 }, { "epoch": 0.93, "learning_rate": 4.7695560253699787e-05, "loss": 0.9399, "step": 878 }, { "epoch": 0.93, "learning_rate": 4.769027484143763e-05, "loss": 0.4535, "step": 880 }, { "epoch": 0.93, "learning_rate": 4.768498942917548e-05, "loss": 0.5472, "step": 882 }, { "epoch": 0.93, "learning_rate": 4.7679704016913325e-05, "loss": 0.8799, "step": 884 }, { "epoch": 0.94, "learning_rate": 4.7674418604651164e-05, "loss": 0.694, "step": 886 }, { "epoch": 0.94, "learning_rate": 4.766913319238901e-05, "loss": 0.9077, "step": 888 }, { "epoch": 0.94, "learning_rate": 4.766384778012685e-05, "loss": 0.6647, "step": 890 }, { "epoch": 0.94, "learning_rate": 4.7658562367864695e-05, "loss": 0.7828, "step": 892 }, { "epoch": 0.95, "learning_rate": 4.765327695560254e-05, "loss": 0.7554, "step": 894 }, { "epoch": 0.95, "learning_rate": 4.764799154334038e-05, "loss": 0.683, "step": 896 }, { "epoch": 0.95, "learning_rate": 4.764270613107823e-05, "loss": 0.6435, "step": 898 }, { "epoch": 0.95, "learning_rate": 4.763742071881607e-05, "loss": 0.8451, "step": 900 }, { "epoch": 0.95, "learning_rate": 4.763213530655392e-05, "loss": 0.9021, "step": 902 }, { "epoch": 0.96, "learning_rate": 4.762684989429176e-05, "loss": 0.5666, "step": 904 }, { "epoch": 0.96, "learning_rate": 4.7621564482029604e-05, "loss": 0.7458, "step": 906 }, { "epoch": 0.96, "learning_rate": 4.7616279069767444e-05, "loss": 0.8112, "step": 908 }, { "epoch": 0.96, "learning_rate": 4.761099365750529e-05, "loss": 0.7296, "step": 910 }, { "epoch": 0.96, "learning_rate": 4.760570824524313e-05, "loss": 0.6413, "step": 912 }, { "epoch": 0.97, "learning_rate": 4.7600422832980975e-05, "loss": 0.8478, "step": 914 }, { "epoch": 0.97, "learning_rate": 4.7595137420718814e-05, "loss": 0.7438, "step": 916 }, { "epoch": 0.97, "learning_rate": 4.758985200845667e-05, "loss": 0.9292, "step": 918 }, { "epoch": 0.97, "learning_rate": 4.7584566596194506e-05, "loss": 0.6936, "step": 920 }, { "epoch": 0.97, "learning_rate": 4.757928118393235e-05, "loss": 0.6346, "step": 922 }, { "epoch": 0.98, "learning_rate": 4.757399577167019e-05, "loss": 0.6186, "step": 924 }, { "epoch": 0.98, "learning_rate": 4.756871035940804e-05, "loss": 0.6978, "step": 926 }, { "epoch": 0.98, "learning_rate": 4.756342494714588e-05, "loss": 0.7146, "step": 928 }, { "epoch": 0.98, "learning_rate": 4.755813953488372e-05, "loss": 0.655, "step": 930 }, { "epoch": 0.99, "learning_rate": 4.755285412262156e-05, "loss": 0.7053, "step": 932 }, { "epoch": 0.99, "learning_rate": 4.754756871035941e-05, "loss": 0.5712, "step": 934 }, { "epoch": 0.99, "learning_rate": 4.7542283298097254e-05, "loss": 0.6625, "step": 936 }, { "epoch": 0.99, "learning_rate": 4.75369978858351e-05, "loss": 0.7332, "step": 938 }, { "epoch": 0.99, "learning_rate": 4.753171247357294e-05, "loss": 0.6703, "step": 940 }, { "epoch": 1.0, "learning_rate": 4.7526427061310786e-05, "loss": 0.6486, "step": 942 }, { "epoch": 1.0, "learning_rate": 4.7521141649048625e-05, "loss": 0.729, "step": 944 }, { "epoch": 1.0, "learning_rate": 4.751585623678647e-05, "loss": 0.7503, "step": 946 }, { "epoch": 1.0, "learning_rate": 4.751057082452432e-05, "loss": 0.6571, "step": 948 }, { "epoch": 1.0, "learning_rate": 4.7505285412262157e-05, "loss": 0.5435, "step": 950 }, { "epoch": 1.01, "learning_rate": 4.75e-05, "loss": 0.4819, "step": 952 }, { "epoch": 1.01, "learning_rate": 4.749471458773785e-05, "loss": 0.6185, "step": 954 }, { "epoch": 1.01, "learning_rate": 4.7489429175475695e-05, "loss": 0.8085, "step": 956 }, { "epoch": 1.01, "learning_rate": 4.7484143763213534e-05, "loss": 0.681, "step": 958 }, { "epoch": 1.01, "learning_rate": 4.747885835095138e-05, "loss": 1.0466, "step": 960 }, { "epoch": 1.02, "learning_rate": 4.747357293868922e-05, "loss": 0.6033, "step": 962 }, { "epoch": 1.02, "learning_rate": 4.7468287526427065e-05, "loss": 0.7136, "step": 964 }, { "epoch": 1.02, "learning_rate": 4.7463002114164905e-05, "loss": 0.5318, "step": 966 }, { "epoch": 1.02, "learning_rate": 4.745771670190275e-05, "loss": 0.4115, "step": 968 }, { "epoch": 1.03, "learning_rate": 4.745243128964059e-05, "loss": 0.643, "step": 970 }, { "epoch": 1.03, "learning_rate": 4.744714587737844e-05, "loss": 0.5568, "step": 972 }, { "epoch": 1.03, "learning_rate": 4.744186046511628e-05, "loss": 0.8738, "step": 974 }, { "epoch": 1.03, "learning_rate": 4.743657505285413e-05, "loss": 0.9258, "step": 976 }, { "epoch": 1.03, "learning_rate": 4.743128964059197e-05, "loss": 0.583, "step": 978 }, { "epoch": 1.04, "learning_rate": 4.7426004228329814e-05, "loss": 0.5308, "step": 980 }, { "epoch": 1.04, "learning_rate": 4.742071881606765e-05, "loss": 0.9331, "step": 982 }, { "epoch": 1.04, "learning_rate": 4.74154334038055e-05, "loss": 0.6287, "step": 984 }, { "epoch": 1.04, "learning_rate": 4.741014799154334e-05, "loss": 0.4476, "step": 986 }, { "epoch": 1.04, "learning_rate": 4.7404862579281184e-05, "loss": 0.5554, "step": 988 }, { "epoch": 1.05, "learning_rate": 4.739957716701903e-05, "loss": 0.4947, "step": 990 }, { "epoch": 1.05, "learning_rate": 4.7394291754756876e-05, "loss": 0.4979, "step": 992 }, { "epoch": 1.05, "learning_rate": 4.7389006342494716e-05, "loss": 0.6792, "step": 994 }, { "epoch": 1.05, "learning_rate": 4.738372093023256e-05, "loss": 0.8503, "step": 996 }, { "epoch": 1.05, "learning_rate": 4.73784355179704e-05, "loss": 0.7678, "step": 998 }, { "epoch": 1.06, "learning_rate": 4.737315010570825e-05, "loss": 0.6055, "step": 1000 }, { "epoch": 1.06, "eval_cer": 0.08395554288971217, "eval_loss": 0.5935352444648743, "eval_runtime": 130.5287, "eval_samples_per_second": 6.443, "eval_steps_per_second": 0.812, "step": 1000 }, { "epoch": 1.06, "learning_rate": 4.736786469344609e-05, "loss": 0.6435, "step": 1002 }, { "epoch": 1.06, "learning_rate": 4.736257928118393e-05, "loss": 0.4943, "step": 1004 }, { "epoch": 1.06, "learning_rate": 4.735729386892178e-05, "loss": 0.6907, "step": 1006 }, { "epoch": 1.07, "learning_rate": 4.7352008456659625e-05, "loss": 0.8161, "step": 1008 }, { "epoch": 1.07, "learning_rate": 4.734672304439747e-05, "loss": 0.7885, "step": 1010 }, { "epoch": 1.07, "learning_rate": 4.734143763213531e-05, "loss": 0.6831, "step": 1012 }, { "epoch": 1.07, "learning_rate": 4.7336152219873156e-05, "loss": 0.5823, "step": 1014 }, { "epoch": 1.07, "learning_rate": 4.7330866807610995e-05, "loss": 0.6312, "step": 1016 }, { "epoch": 1.08, "learning_rate": 4.732558139534884e-05, "loss": 0.5614, "step": 1018 }, { "epoch": 1.08, "learning_rate": 4.732029598308668e-05, "loss": 0.6951, "step": 1020 }, { "epoch": 1.08, "learning_rate": 4.7315010570824527e-05, "loss": 0.7978, "step": 1022 }, { "epoch": 1.08, "learning_rate": 4.7309725158562366e-05, "loss": 0.4999, "step": 1024 }, { "epoch": 1.08, "learning_rate": 4.730443974630021e-05, "loss": 0.5096, "step": 1026 }, { "epoch": 1.09, "learning_rate": 4.729915433403806e-05, "loss": 0.4208, "step": 1028 }, { "epoch": 1.09, "learning_rate": 4.7293868921775904e-05, "loss": 0.6277, "step": 1030 }, { "epoch": 1.09, "learning_rate": 4.728858350951374e-05, "loss": 0.7658, "step": 1032 }, { "epoch": 1.09, "learning_rate": 4.728329809725159e-05, "loss": 0.7402, "step": 1034 }, { "epoch": 1.1, "learning_rate": 4.727801268498943e-05, "loss": 0.5968, "step": 1036 }, { "epoch": 1.1, "learning_rate": 4.7272727272727275e-05, "loss": 0.5667, "step": 1038 }, { "epoch": 1.1, "learning_rate": 4.7267441860465114e-05, "loss": 0.5122, "step": 1040 }, { "epoch": 1.1, "learning_rate": 4.726215644820296e-05, "loss": 0.5836, "step": 1042 }, { "epoch": 1.1, "learning_rate": 4.7256871035940806e-05, "loss": 0.5438, "step": 1044 }, { "epoch": 1.11, "learning_rate": 4.725158562367865e-05, "loss": 0.5334, "step": 1046 }, { "epoch": 1.11, "learning_rate": 4.724630021141649e-05, "loss": 0.5546, "step": 1048 }, { "epoch": 1.11, "learning_rate": 4.724101479915434e-05, "loss": 0.63, "step": 1050 }, { "epoch": 1.11, "learning_rate": 4.723572938689218e-05, "loss": 0.4592, "step": 1052 }, { "epoch": 1.11, "learning_rate": 4.723044397463002e-05, "loss": 0.3419, "step": 1054 }, { "epoch": 1.12, "learning_rate": 4.722515856236787e-05, "loss": 0.5825, "step": 1056 }, { "epoch": 1.12, "learning_rate": 4.721987315010571e-05, "loss": 0.5554, "step": 1058 }, { "epoch": 1.12, "learning_rate": 4.7214587737843554e-05, "loss": 0.5521, "step": 1060 }, { "epoch": 1.12, "learning_rate": 4.7209302325581394e-05, "loss": 0.6327, "step": 1062 }, { "epoch": 1.12, "learning_rate": 4.7204016913319246e-05, "loss": 0.7493, "step": 1064 }, { "epoch": 1.13, "learning_rate": 4.7198731501057086e-05, "loss": 0.6406, "step": 1066 }, { "epoch": 1.13, "learning_rate": 4.719344608879493e-05, "loss": 0.6063, "step": 1068 }, { "epoch": 1.13, "learning_rate": 4.718816067653277e-05, "loss": 0.7856, "step": 1070 }, { "epoch": 1.13, "learning_rate": 4.718287526427062e-05, "loss": 0.9146, "step": 1072 }, { "epoch": 1.14, "learning_rate": 4.7177589852008456e-05, "loss": 0.9914, "step": 1074 }, { "epoch": 1.14, "learning_rate": 4.71723044397463e-05, "loss": 0.6502, "step": 1076 }, { "epoch": 1.14, "learning_rate": 4.716701902748414e-05, "loss": 0.5753, "step": 1078 }, { "epoch": 1.14, "learning_rate": 4.716173361522199e-05, "loss": 0.6573, "step": 1080 }, { "epoch": 1.14, "learning_rate": 4.7156448202959834e-05, "loss": 0.6994, "step": 1082 }, { "epoch": 1.15, "learning_rate": 4.715116279069768e-05, "loss": 0.4771, "step": 1084 }, { "epoch": 1.15, "learning_rate": 4.714587737843552e-05, "loss": 0.4544, "step": 1086 }, { "epoch": 1.15, "learning_rate": 4.7140591966173365e-05, "loss": 0.5647, "step": 1088 }, { "epoch": 1.15, "learning_rate": 4.7135306553911205e-05, "loss": 0.6566, "step": 1090 }, { "epoch": 1.15, "learning_rate": 4.713002114164905e-05, "loss": 0.5029, "step": 1092 }, { "epoch": 1.16, "learning_rate": 4.712473572938689e-05, "loss": 0.6242, "step": 1094 }, { "epoch": 1.16, "learning_rate": 4.7119450317124736e-05, "loss": 0.5858, "step": 1096 }, { "epoch": 1.16, "learning_rate": 4.711416490486258e-05, "loss": 0.4173, "step": 1098 }, { "epoch": 1.16, "learning_rate": 4.710887949260043e-05, "loss": 0.4745, "step": 1100 }, { "epoch": 1.16, "learning_rate": 4.710359408033827e-05, "loss": 0.6373, "step": 1102 }, { "epoch": 1.17, "learning_rate": 4.7098308668076113e-05, "loss": 0.6338, "step": 1104 }, { "epoch": 1.17, "learning_rate": 4.709302325581396e-05, "loss": 0.502, "step": 1106 }, { "epoch": 1.17, "learning_rate": 4.70877378435518e-05, "loss": 0.5438, "step": 1108 }, { "epoch": 1.17, "learning_rate": 4.7082452431289645e-05, "loss": 0.4672, "step": 1110 }, { "epoch": 1.18, "learning_rate": 4.7077167019027484e-05, "loss": 0.3756, "step": 1112 }, { "epoch": 1.18, "learning_rate": 4.707188160676533e-05, "loss": 0.5381, "step": 1114 }, { "epoch": 1.18, "learning_rate": 4.706659619450317e-05, "loss": 0.4322, "step": 1116 }, { "epoch": 1.18, "learning_rate": 4.706131078224102e-05, "loss": 0.5286, "step": 1118 }, { "epoch": 1.18, "learning_rate": 4.705602536997886e-05, "loss": 0.3846, "step": 1120 }, { "epoch": 1.19, "learning_rate": 4.705073995771671e-05, "loss": 0.6808, "step": 1122 }, { "epoch": 1.19, "learning_rate": 4.704545454545455e-05, "loss": 0.501, "step": 1124 }, { "epoch": 1.19, "learning_rate": 4.704016913319239e-05, "loss": 0.514, "step": 1126 }, { "epoch": 1.19, "learning_rate": 4.703488372093023e-05, "loss": 0.6151, "step": 1128 }, { "epoch": 1.19, "learning_rate": 4.702959830866808e-05, "loss": 0.6041, "step": 1130 }, { "epoch": 1.2, "learning_rate": 4.702431289640592e-05, "loss": 0.6237, "step": 1132 }, { "epoch": 1.2, "learning_rate": 4.7019027484143764e-05, "loss": 0.4988, "step": 1134 }, { "epoch": 1.2, "learning_rate": 4.701374207188161e-05, "loss": 0.5912, "step": 1136 }, { "epoch": 1.2, "learning_rate": 4.7008456659619456e-05, "loss": 0.6301, "step": 1138 }, { "epoch": 1.21, "learning_rate": 4.7003171247357295e-05, "loss": 0.5094, "step": 1140 }, { "epoch": 1.21, "learning_rate": 4.699788583509514e-05, "loss": 0.6045, "step": 1142 }, { "epoch": 1.21, "learning_rate": 4.699260042283298e-05, "loss": 0.5534, "step": 1144 }, { "epoch": 1.21, "learning_rate": 4.6987315010570826e-05, "loss": 0.649, "step": 1146 }, { "epoch": 1.21, "learning_rate": 4.6982029598308666e-05, "loss": 0.4673, "step": 1148 }, { "epoch": 1.22, "learning_rate": 4.697674418604651e-05, "loss": 0.6251, "step": 1150 }, { "epoch": 1.22, "learning_rate": 4.697145877378436e-05, "loss": 0.6932, "step": 1152 }, { "epoch": 1.22, "learning_rate": 4.6966173361522204e-05, "loss": 0.7142, "step": 1154 }, { "epoch": 1.22, "learning_rate": 4.696088794926004e-05, "loss": 0.5804, "step": 1156 }, { "epoch": 1.22, "learning_rate": 4.695560253699789e-05, "loss": 0.8723, "step": 1158 }, { "epoch": 1.23, "learning_rate": 4.6950317124735735e-05, "loss": 0.4948, "step": 1160 }, { "epoch": 1.23, "learning_rate": 4.6945031712473575e-05, "loss": 0.5645, "step": 1162 }, { "epoch": 1.23, "learning_rate": 4.693974630021142e-05, "loss": 0.4606, "step": 1164 }, { "epoch": 1.23, "learning_rate": 4.693446088794926e-05, "loss": 0.5359, "step": 1166 }, { "epoch": 1.23, "learning_rate": 4.6929175475687106e-05, "loss": 0.4132, "step": 1168 }, { "epoch": 1.24, "learning_rate": 4.6923890063424945e-05, "loss": 0.6268, "step": 1170 }, { "epoch": 1.24, "learning_rate": 4.69186046511628e-05, "loss": 0.5611, "step": 1172 }, { "epoch": 1.24, "learning_rate": 4.691331923890064e-05, "loss": 0.6392, "step": 1174 }, { "epoch": 1.24, "learning_rate": 4.6908033826638483e-05, "loss": 0.8275, "step": 1176 }, { "epoch": 1.25, "learning_rate": 4.690274841437632e-05, "loss": 0.7045, "step": 1178 }, { "epoch": 1.25, "learning_rate": 4.689746300211417e-05, "loss": 0.7826, "step": 1180 }, { "epoch": 1.25, "learning_rate": 4.689217758985201e-05, "loss": 0.7238, "step": 1182 }, { "epoch": 1.25, "learning_rate": 4.6886892177589854e-05, "loss": 0.4435, "step": 1184 }, { "epoch": 1.25, "learning_rate": 4.6881606765327693e-05, "loss": 0.5589, "step": 1186 }, { "epoch": 1.26, "learning_rate": 4.687632135306554e-05, "loss": 0.5764, "step": 1188 }, { "epoch": 1.26, "learning_rate": 4.6871035940803386e-05, "loss": 0.4692, "step": 1190 }, { "epoch": 1.26, "learning_rate": 4.686575052854123e-05, "loss": 0.4907, "step": 1192 }, { "epoch": 1.26, "learning_rate": 4.686046511627907e-05, "loss": 0.5897, "step": 1194 }, { "epoch": 1.26, "learning_rate": 4.685517970401692e-05, "loss": 0.4332, "step": 1196 }, { "epoch": 1.27, "learning_rate": 4.6849894291754756e-05, "loss": 0.3469, "step": 1198 }, { "epoch": 1.27, "learning_rate": 4.68446088794926e-05, "loss": 0.592, "step": 1200 }, { "epoch": 1.27, "learning_rate": 4.683932346723044e-05, "loss": 0.6763, "step": 1202 }, { "epoch": 1.27, "learning_rate": 4.683403805496829e-05, "loss": 0.9583, "step": 1204 }, { "epoch": 1.27, "learning_rate": 4.6828752642706134e-05, "loss": 0.5109, "step": 1206 }, { "epoch": 1.28, "learning_rate": 4.682346723044398e-05, "loss": 0.631, "step": 1208 }, { "epoch": 1.28, "learning_rate": 4.681818181818182e-05, "loss": 0.7334, "step": 1210 }, { "epoch": 1.28, "learning_rate": 4.6812896405919665e-05, "loss": 0.6342, "step": 1212 }, { "epoch": 1.28, "learning_rate": 4.680761099365751e-05, "loss": 0.6196, "step": 1214 }, { "epoch": 1.29, "learning_rate": 4.680232558139535e-05, "loss": 0.5315, "step": 1216 }, { "epoch": 1.29, "learning_rate": 4.6797040169133196e-05, "loss": 0.5387, "step": 1218 }, { "epoch": 1.29, "learning_rate": 4.6791754756871036e-05, "loss": 0.8501, "step": 1220 }, { "epoch": 1.29, "learning_rate": 4.678646934460888e-05, "loss": 0.8125, "step": 1222 }, { "epoch": 1.29, "learning_rate": 4.678118393234672e-05, "loss": 0.6331, "step": 1224 }, { "epoch": 1.3, "learning_rate": 4.6775898520084574e-05, "loss": 0.5622, "step": 1226 }, { "epoch": 1.3, "learning_rate": 4.677061310782241e-05, "loss": 0.5663, "step": 1228 }, { "epoch": 1.3, "learning_rate": 4.676532769556026e-05, "loss": 0.5199, "step": 1230 }, { "epoch": 1.3, "learning_rate": 4.67600422832981e-05, "loss": 0.8869, "step": 1232 }, { "epoch": 1.3, "learning_rate": 4.6754756871035945e-05, "loss": 0.6617, "step": 1234 }, { "epoch": 1.31, "learning_rate": 4.6749471458773784e-05, "loss": 0.4335, "step": 1236 }, { "epoch": 1.31, "learning_rate": 4.674418604651163e-05, "loss": 0.8475, "step": 1238 }, { "epoch": 1.31, "learning_rate": 4.673890063424947e-05, "loss": 0.5508, "step": 1240 }, { "epoch": 1.31, "learning_rate": 4.6733615221987315e-05, "loss": 0.6348, "step": 1242 }, { "epoch": 1.32, "learning_rate": 4.672832980972516e-05, "loss": 0.6159, "step": 1244 }, { "epoch": 1.32, "learning_rate": 4.672304439746301e-05, "loss": 0.477, "step": 1246 }, { "epoch": 1.32, "learning_rate": 4.671775898520085e-05, "loss": 0.5092, "step": 1248 }, { "epoch": 1.32, "learning_rate": 4.671247357293869e-05, "loss": 0.5635, "step": 1250 }, { "epoch": 1.32, "learning_rate": 4.670718816067653e-05, "loss": 0.8466, "step": 1252 }, { "epoch": 1.33, "learning_rate": 4.670190274841438e-05, "loss": 0.5669, "step": 1254 }, { "epoch": 1.33, "learning_rate": 4.6696617336152224e-05, "loss": 0.6561, "step": 1256 }, { "epoch": 1.33, "learning_rate": 4.6691331923890063e-05, "loss": 0.7537, "step": 1258 }, { "epoch": 1.33, "learning_rate": 4.668604651162791e-05, "loss": 0.5566, "step": 1260 }, { "epoch": 1.33, "learning_rate": 4.6680761099365756e-05, "loss": 1.1829, "step": 1262 }, { "epoch": 1.34, "learning_rate": 4.6675475687103595e-05, "loss": 0.8556, "step": 1264 }, { "epoch": 1.34, "learning_rate": 4.667019027484144e-05, "loss": 0.6861, "step": 1266 }, { "epoch": 1.34, "learning_rate": 4.666490486257929e-05, "loss": 0.634, "step": 1268 }, { "epoch": 1.34, "learning_rate": 4.6659619450317126e-05, "loss": 0.6609, "step": 1270 }, { "epoch": 1.34, "learning_rate": 4.665433403805497e-05, "loss": 0.6384, "step": 1272 }, { "epoch": 1.35, "learning_rate": 4.664904862579281e-05, "loss": 0.4699, "step": 1274 }, { "epoch": 1.35, "learning_rate": 4.664376321353066e-05, "loss": 0.6589, "step": 1276 }, { "epoch": 1.35, "learning_rate": 4.66384778012685e-05, "loss": 0.6271, "step": 1278 }, { "epoch": 1.35, "learning_rate": 4.663319238900635e-05, "loss": 0.7925, "step": 1280 }, { "epoch": 1.36, "learning_rate": 4.662790697674419e-05, "loss": 0.7247, "step": 1282 }, { "epoch": 1.36, "learning_rate": 4.6622621564482035e-05, "loss": 0.5185, "step": 1284 }, { "epoch": 1.36, "learning_rate": 4.6617336152219874e-05, "loss": 0.5875, "step": 1286 }, { "epoch": 1.36, "learning_rate": 4.661205073995772e-05, "loss": 0.6124, "step": 1288 }, { "epoch": 1.36, "learning_rate": 4.660676532769556e-05, "loss": 0.7029, "step": 1290 }, { "epoch": 1.37, "learning_rate": 4.6601479915433406e-05, "loss": 0.5547, "step": 1292 }, { "epoch": 1.37, "learning_rate": 4.6596194503171245e-05, "loss": 0.8943, "step": 1294 }, { "epoch": 1.37, "learning_rate": 4.659090909090909e-05, "loss": 0.5128, "step": 1296 }, { "epoch": 1.37, "learning_rate": 4.658562367864694e-05, "loss": 0.5178, "step": 1298 }, { "epoch": 1.37, "learning_rate": 4.658033826638478e-05, "loss": 0.4883, "step": 1300 }, { "epoch": 1.38, "learning_rate": 4.657505285412262e-05, "loss": 0.7211, "step": 1302 }, { "epoch": 1.38, "learning_rate": 4.656976744186047e-05, "loss": 0.4854, "step": 1304 }, { "epoch": 1.38, "learning_rate": 4.656448202959831e-05, "loss": 0.5831, "step": 1306 }, { "epoch": 1.38, "learning_rate": 4.6559196617336154e-05, "loss": 0.4782, "step": 1308 }, { "epoch": 1.38, "learning_rate": 4.6553911205074e-05, "loss": 0.6382, "step": 1310 }, { "epoch": 1.39, "learning_rate": 4.654862579281184e-05, "loss": 0.3921, "step": 1312 }, { "epoch": 1.39, "learning_rate": 4.6543340380549685e-05, "loss": 0.4697, "step": 1314 }, { "epoch": 1.39, "learning_rate": 4.653805496828753e-05, "loss": 0.8628, "step": 1316 }, { "epoch": 1.39, "learning_rate": 4.653276955602537e-05, "loss": 0.6685, "step": 1318 }, { "epoch": 1.4, "learning_rate": 4.652748414376322e-05, "loss": 0.7783, "step": 1320 }, { "epoch": 1.4, "learning_rate": 4.652219873150106e-05, "loss": 0.4967, "step": 1322 }, { "epoch": 1.4, "learning_rate": 4.65169133192389e-05, "loss": 0.7527, "step": 1324 }, { "epoch": 1.4, "learning_rate": 4.651162790697675e-05, "loss": 0.9232, "step": 1326 }, { "epoch": 1.4, "learning_rate": 4.650634249471459e-05, "loss": 0.7172, "step": 1328 }, { "epoch": 1.41, "learning_rate": 4.6501057082452433e-05, "loss": 0.7406, "step": 1330 }, { "epoch": 1.41, "learning_rate": 4.649577167019027e-05, "loss": 0.5102, "step": 1332 }, { "epoch": 1.41, "learning_rate": 4.6490486257928126e-05, "loss": 0.7775, "step": 1334 }, { "epoch": 1.41, "learning_rate": 4.6485200845665965e-05, "loss": 0.6671, "step": 1336 }, { "epoch": 1.41, "learning_rate": 4.647991543340381e-05, "loss": 0.726, "step": 1338 }, { "epoch": 1.42, "learning_rate": 4.647463002114165e-05, "loss": 0.7379, "step": 1340 }, { "epoch": 1.42, "learning_rate": 4.6469344608879496e-05, "loss": 0.5184, "step": 1342 }, { "epoch": 1.42, "learning_rate": 4.6464059196617336e-05, "loss": 0.774, "step": 1344 }, { "epoch": 1.42, "learning_rate": 4.645877378435518e-05, "loss": 0.7082, "step": 1346 }, { "epoch": 1.42, "learning_rate": 4.645348837209302e-05, "loss": 0.655, "step": 1348 }, { "epoch": 1.43, "learning_rate": 4.644820295983087e-05, "loss": 0.5688, "step": 1350 }, { "epoch": 1.43, "learning_rate": 4.644291754756871e-05, "loss": 0.5803, "step": 1352 }, { "epoch": 1.43, "learning_rate": 4.643763213530656e-05, "loss": 0.5136, "step": 1354 }, { "epoch": 1.43, "learning_rate": 4.64323467230444e-05, "loss": 0.5327, "step": 1356 }, { "epoch": 1.44, "learning_rate": 4.6427061310782244e-05, "loss": 0.3571, "step": 1358 }, { "epoch": 1.44, "learning_rate": 4.6421775898520084e-05, "loss": 0.61, "step": 1360 }, { "epoch": 1.44, "learning_rate": 4.641649048625793e-05, "loss": 0.6218, "step": 1362 }, { "epoch": 1.44, "learning_rate": 4.6411205073995776e-05, "loss": 0.5032, "step": 1364 }, { "epoch": 1.44, "learning_rate": 4.6405919661733615e-05, "loss": 0.6186, "step": 1366 }, { "epoch": 1.45, "learning_rate": 4.640063424947146e-05, "loss": 0.5374, "step": 1368 }, { "epoch": 1.45, "learning_rate": 4.639534883720931e-05, "loss": 0.5657, "step": 1370 }, { "epoch": 1.45, "learning_rate": 4.639006342494715e-05, "loss": 0.6641, "step": 1372 }, { "epoch": 1.45, "learning_rate": 4.638477801268499e-05, "loss": 0.6886, "step": 1374 }, { "epoch": 1.45, "learning_rate": 4.637949260042284e-05, "loss": 0.7777, "step": 1376 }, { "epoch": 1.46, "learning_rate": 4.637420718816068e-05, "loss": 0.5609, "step": 1378 }, { "epoch": 1.46, "learning_rate": 4.6368921775898524e-05, "loss": 0.6644, "step": 1380 }, { "epoch": 1.46, "learning_rate": 4.636363636363636e-05, "loss": 0.6055, "step": 1382 }, { "epoch": 1.46, "learning_rate": 4.635835095137421e-05, "loss": 0.579, "step": 1384 }, { "epoch": 1.47, "learning_rate": 4.635306553911205e-05, "loss": 0.6299, "step": 1386 }, { "epoch": 1.47, "learning_rate": 4.63477801268499e-05, "loss": 0.3263, "step": 1388 }, { "epoch": 1.47, "learning_rate": 4.634249471458774e-05, "loss": 0.4689, "step": 1390 }, { "epoch": 1.47, "learning_rate": 4.633720930232559e-05, "loss": 0.596, "step": 1392 }, { "epoch": 1.47, "learning_rate": 4.6331923890063426e-05, "loss": 0.6729, "step": 1394 }, { "epoch": 1.48, "learning_rate": 4.632663847780127e-05, "loss": 0.6163, "step": 1396 }, { "epoch": 1.48, "learning_rate": 4.632135306553911e-05, "loss": 0.5728, "step": 1398 }, { "epoch": 1.48, "learning_rate": 4.631606765327696e-05, "loss": 0.5581, "step": 1400 }, { "epoch": 1.48, "learning_rate": 4.63107822410148e-05, "loss": 0.5471, "step": 1402 }, { "epoch": 1.48, "learning_rate": 4.630549682875264e-05, "loss": 0.6324, "step": 1404 }, { "epoch": 1.49, "learning_rate": 4.630021141649049e-05, "loss": 0.4537, "step": 1406 }, { "epoch": 1.49, "learning_rate": 4.6294926004228335e-05, "loss": 0.5605, "step": 1408 }, { "epoch": 1.49, "learning_rate": 4.6289640591966174e-05, "loss": 0.3762, "step": 1410 }, { "epoch": 1.49, "learning_rate": 4.628435517970402e-05, "loss": 0.4534, "step": 1412 }, { "epoch": 1.49, "learning_rate": 4.627906976744186e-05, "loss": 0.4939, "step": 1414 }, { "epoch": 1.5, "learning_rate": 4.6273784355179706e-05, "loss": 0.5225, "step": 1416 }, { "epoch": 1.5, "learning_rate": 4.626849894291755e-05, "loss": 0.3952, "step": 1418 }, { "epoch": 1.5, "learning_rate": 4.626321353065539e-05, "loss": 0.4838, "step": 1420 }, { "epoch": 1.5, "learning_rate": 4.625792811839324e-05, "loss": 0.5493, "step": 1422 }, { "epoch": 1.51, "learning_rate": 4.625264270613108e-05, "loss": 0.7322, "step": 1424 }, { "epoch": 1.51, "learning_rate": 4.624735729386893e-05, "loss": 0.6912, "step": 1426 }, { "epoch": 1.51, "learning_rate": 4.624207188160677e-05, "loss": 0.5979, "step": 1428 }, { "epoch": 1.51, "learning_rate": 4.6236786469344614e-05, "loss": 0.5229, "step": 1430 }, { "epoch": 1.51, "learning_rate": 4.6231501057082454e-05, "loss": 0.5489, "step": 1432 }, { "epoch": 1.52, "learning_rate": 4.62262156448203e-05, "loss": 0.6757, "step": 1434 }, { "epoch": 1.52, "learning_rate": 4.622093023255814e-05, "loss": 0.8744, "step": 1436 }, { "epoch": 1.52, "learning_rate": 4.6215644820295985e-05, "loss": 0.6652, "step": 1438 }, { "epoch": 1.52, "learning_rate": 4.6210359408033824e-05, "loss": 0.7747, "step": 1440 }, { "epoch": 1.52, "learning_rate": 4.620507399577168e-05, "loss": 0.8047, "step": 1442 }, { "epoch": 1.53, "learning_rate": 4.6199788583509517e-05, "loss": 0.6374, "step": 1444 }, { "epoch": 1.53, "learning_rate": 4.619450317124736e-05, "loss": 0.8522, "step": 1446 }, { "epoch": 1.53, "learning_rate": 4.61892177589852e-05, "loss": 1.1201, "step": 1448 }, { "epoch": 1.53, "learning_rate": 4.618393234672305e-05, "loss": 0.7091, "step": 1450 }, { "epoch": 1.53, "learning_rate": 4.617864693446089e-05, "loss": 0.6903, "step": 1452 }, { "epoch": 1.54, "learning_rate": 4.617336152219873e-05, "loss": 0.7191, "step": 1454 }, { "epoch": 1.54, "learning_rate": 4.616807610993657e-05, "loss": 0.6447, "step": 1456 }, { "epoch": 1.54, "learning_rate": 4.616279069767442e-05, "loss": 0.5796, "step": 1458 }, { "epoch": 1.54, "learning_rate": 4.6157505285412265e-05, "loss": 0.4474, "step": 1460 }, { "epoch": 1.55, "learning_rate": 4.615221987315011e-05, "loss": 0.6297, "step": 1462 }, { "epoch": 1.55, "learning_rate": 4.614693446088795e-05, "loss": 0.7815, "step": 1464 }, { "epoch": 1.55, "learning_rate": 4.6141649048625796e-05, "loss": 0.4299, "step": 1466 }, { "epoch": 1.55, "learning_rate": 4.6136363636363635e-05, "loss": 0.6843, "step": 1468 }, { "epoch": 1.55, "learning_rate": 4.613107822410148e-05, "loss": 0.3767, "step": 1470 }, { "epoch": 1.56, "learning_rate": 4.612579281183933e-05, "loss": 0.6761, "step": 1472 }, { "epoch": 1.56, "learning_rate": 4.612050739957717e-05, "loss": 0.9297, "step": 1474 }, { "epoch": 1.56, "learning_rate": 4.611522198731501e-05, "loss": 0.8386, "step": 1476 }, { "epoch": 1.56, "learning_rate": 4.610993657505286e-05, "loss": 0.8722, "step": 1478 }, { "epoch": 1.56, "learning_rate": 4.6104651162790705e-05, "loss": 0.8477, "step": 1480 }, { "epoch": 1.57, "learning_rate": 4.6099365750528544e-05, "loss": 0.5865, "step": 1482 }, { "epoch": 1.57, "learning_rate": 4.609408033826639e-05, "loss": 0.7178, "step": 1484 }, { "epoch": 1.57, "learning_rate": 4.608879492600423e-05, "loss": 0.5507, "step": 1486 }, { "epoch": 1.57, "learning_rate": 4.6083509513742076e-05, "loss": 0.5724, "step": 1488 }, { "epoch": 1.58, "learning_rate": 4.6078224101479915e-05, "loss": 0.5513, "step": 1490 }, { "epoch": 1.58, "learning_rate": 4.607293868921776e-05, "loss": 0.7183, "step": 1492 }, { "epoch": 1.58, "learning_rate": 4.60676532769556e-05, "loss": 0.4628, "step": 1494 }, { "epoch": 1.58, "learning_rate": 4.606236786469345e-05, "loss": 0.4151, "step": 1496 }, { "epoch": 1.58, "learning_rate": 4.605708245243129e-05, "loss": 0.665, "step": 1498 }, { "epoch": 1.59, "learning_rate": 4.605179704016914e-05, "loss": 0.4722, "step": 1500 }, { "epoch": 1.59, "eval_cer": 0.06184098033627814, "eval_loss": 0.5636065006256104, "eval_runtime": 127.4465, "eval_samples_per_second": 6.599, "eval_steps_per_second": 0.832, "step": 1500 }, { "epoch": 1.59, "learning_rate": 4.604651162790698e-05, "loss": 0.4211, "step": 1502 }, { "epoch": 1.59, "learning_rate": 4.6041226215644824e-05, "loss": 0.7062, "step": 1504 }, { "epoch": 1.59, "learning_rate": 4.603594080338266e-05, "loss": 0.5417, "step": 1506 }, { "epoch": 1.59, "learning_rate": 4.603065539112051e-05, "loss": 0.6403, "step": 1508 }, { "epoch": 1.6, "learning_rate": 4.602536997885835e-05, "loss": 0.6087, "step": 1510 }, { "epoch": 1.6, "learning_rate": 4.6020084566596194e-05, "loss": 0.5924, "step": 1512 }, { "epoch": 1.6, "learning_rate": 4.601479915433404e-05, "loss": 0.457, "step": 1514 }, { "epoch": 1.6, "learning_rate": 4.600951374207189e-05, "loss": 0.5073, "step": 1516 }, { "epoch": 1.6, "learning_rate": 4.6004228329809726e-05, "loss": 0.4704, "step": 1518 }, { "epoch": 1.61, "learning_rate": 4.599894291754757e-05, "loss": 0.6511, "step": 1520 }, { "epoch": 1.61, "learning_rate": 4.599365750528541e-05, "loss": 0.5483, "step": 1522 }, { "epoch": 1.61, "learning_rate": 4.598837209302326e-05, "loss": 0.5997, "step": 1524 }, { "epoch": 1.61, "learning_rate": 4.59830866807611e-05, "loss": 0.4576, "step": 1526 }, { "epoch": 1.62, "learning_rate": 4.597780126849894e-05, "loss": 0.6991, "step": 1528 }, { "epoch": 1.62, "learning_rate": 4.597251585623679e-05, "loss": 0.7717, "step": 1530 }, { "epoch": 1.62, "learning_rate": 4.5967230443974635e-05, "loss": 0.6123, "step": 1532 }, { "epoch": 1.62, "learning_rate": 4.596194503171248e-05, "loss": 0.5343, "step": 1534 }, { "epoch": 1.62, "learning_rate": 4.595665961945032e-05, "loss": 0.4572, "step": 1536 }, { "epoch": 1.63, "learning_rate": 4.5951374207188166e-05, "loss": 0.8137, "step": 1538 }, { "epoch": 1.63, "learning_rate": 4.5946088794926005e-05, "loss": 0.4088, "step": 1540 }, { "epoch": 1.63, "learning_rate": 4.594080338266385e-05, "loss": 0.5861, "step": 1542 }, { "epoch": 1.63, "learning_rate": 4.593551797040169e-05, "loss": 0.5733, "step": 1544 }, { "epoch": 1.63, "learning_rate": 4.593023255813954e-05, "loss": 0.6435, "step": 1546 }, { "epoch": 1.64, "learning_rate": 4.5924947145877376e-05, "loss": 0.4275, "step": 1548 }, { "epoch": 1.64, "learning_rate": 4.591966173361523e-05, "loss": 0.4566, "step": 1550 }, { "epoch": 1.64, "learning_rate": 4.591437632135307e-05, "loss": 0.5268, "step": 1552 }, { "epoch": 1.64, "learning_rate": 4.5909090909090914e-05, "loss": 0.4623, "step": 1554 }, { "epoch": 1.64, "learning_rate": 4.5903805496828754e-05, "loss": 0.5294, "step": 1556 }, { "epoch": 1.65, "learning_rate": 4.58985200845666e-05, "loss": 0.3979, "step": 1558 }, { "epoch": 1.65, "learning_rate": 4.589323467230444e-05, "loss": 0.5877, "step": 1560 }, { "epoch": 1.65, "learning_rate": 4.5887949260042285e-05, "loss": 0.6169, "step": 1562 }, { "epoch": 1.65, "learning_rate": 4.5882663847780124e-05, "loss": 0.6217, "step": 1564 }, { "epoch": 1.66, "learning_rate": 4.587737843551797e-05, "loss": 0.5586, "step": 1566 }, { "epoch": 1.66, "learning_rate": 4.5872093023255816e-05, "loss": 0.8468, "step": 1568 }, { "epoch": 1.66, "learning_rate": 4.586680761099366e-05, "loss": 0.673, "step": 1570 }, { "epoch": 1.66, "learning_rate": 4.58615221987315e-05, "loss": 0.6664, "step": 1572 }, { "epoch": 1.66, "learning_rate": 4.585623678646935e-05, "loss": 0.644, "step": 1574 }, { "epoch": 1.67, "learning_rate": 4.5850951374207194e-05, "loss": 0.7479, "step": 1576 }, { "epoch": 1.67, "learning_rate": 4.584566596194503e-05, "loss": 0.6967, "step": 1578 }, { "epoch": 1.67, "learning_rate": 4.584038054968288e-05, "loss": 0.4362, "step": 1580 }, { "epoch": 1.67, "learning_rate": 4.583509513742072e-05, "loss": 0.6123, "step": 1582 }, { "epoch": 1.67, "learning_rate": 4.5829809725158565e-05, "loss": 0.6258, "step": 1584 }, { "epoch": 1.68, "learning_rate": 4.582452431289641e-05, "loss": 0.6673, "step": 1586 }, { "epoch": 1.68, "learning_rate": 4.581923890063426e-05, "loss": 0.7197, "step": 1588 }, { "epoch": 1.68, "learning_rate": 4.5813953488372096e-05, "loss": 0.6184, "step": 1590 }, { "epoch": 1.68, "learning_rate": 4.580866807610994e-05, "loss": 0.4795, "step": 1592 }, { "epoch": 1.68, "learning_rate": 4.580338266384778e-05, "loss": 0.5485, "step": 1594 }, { "epoch": 1.69, "learning_rate": 4.579809725158563e-05, "loss": 0.6806, "step": 1596 }, { "epoch": 1.69, "learning_rate": 4.579281183932347e-05, "loss": 0.735, "step": 1598 }, { "epoch": 1.69, "learning_rate": 4.578752642706131e-05, "loss": 0.6628, "step": 1600 }, { "epoch": 1.69, "learning_rate": 4.578224101479915e-05, "loss": 0.7737, "step": 1602 }, { "epoch": 1.7, "learning_rate": 4.5776955602537005e-05, "loss": 0.7288, "step": 1604 }, { "epoch": 1.7, "learning_rate": 4.5771670190274844e-05, "loss": 0.4968, "step": 1606 }, { "epoch": 1.7, "learning_rate": 4.576638477801269e-05, "loss": 0.4477, "step": 1608 }, { "epoch": 1.7, "learning_rate": 4.576109936575053e-05, "loss": 0.5333, "step": 1610 }, { "epoch": 1.7, "learning_rate": 4.5755813953488375e-05, "loss": 0.4593, "step": 1612 }, { "epoch": 1.71, "learning_rate": 4.5750528541226215e-05, "loss": 0.516, "step": 1614 }, { "epoch": 1.71, "learning_rate": 4.574524312896406e-05, "loss": 0.5752, "step": 1616 }, { "epoch": 1.71, "learning_rate": 4.57399577167019e-05, "loss": 0.3994, "step": 1618 }, { "epoch": 1.71, "learning_rate": 4.5734672304439746e-05, "loss": 0.4733, "step": 1620 }, { "epoch": 1.71, "learning_rate": 4.572938689217759e-05, "loss": 0.5311, "step": 1622 }, { "epoch": 1.72, "learning_rate": 4.572410147991544e-05, "loss": 0.7522, "step": 1624 }, { "epoch": 1.72, "learning_rate": 4.571881606765328e-05, "loss": 0.5047, "step": 1626 }, { "epoch": 1.72, "learning_rate": 4.5713530655391124e-05, "loss": 0.6271, "step": 1628 }, { "epoch": 1.72, "learning_rate": 4.570824524312897e-05, "loss": 0.5289, "step": 1630 }, { "epoch": 1.73, "learning_rate": 4.570295983086681e-05, "loss": 0.5299, "step": 1632 }, { "epoch": 1.73, "learning_rate": 4.5697674418604655e-05, "loss": 0.6072, "step": 1634 }, { "epoch": 1.73, "learning_rate": 4.5692389006342494e-05, "loss": 0.5663, "step": 1636 }, { "epoch": 1.73, "learning_rate": 4.568710359408034e-05, "loss": 0.366, "step": 1638 }, { "epoch": 1.73, "learning_rate": 4.5681818181818186e-05, "loss": 0.5508, "step": 1640 }, { "epoch": 1.74, "learning_rate": 4.567653276955603e-05, "loss": 0.3564, "step": 1642 }, { "epoch": 1.74, "learning_rate": 4.567124735729387e-05, "loss": 0.8547, "step": 1644 }, { "epoch": 1.74, "learning_rate": 4.566596194503172e-05, "loss": 0.7649, "step": 1646 }, { "epoch": 1.74, "learning_rate": 4.566067653276956e-05, "loss": 0.866, "step": 1648 }, { "epoch": 1.74, "learning_rate": 4.56553911205074e-05, "loss": 0.3577, "step": 1650 }, { "epoch": 1.75, "learning_rate": 4.565010570824524e-05, "loss": 0.4708, "step": 1652 }, { "epoch": 1.75, "learning_rate": 4.564482029598309e-05, "loss": 0.5484, "step": 1654 }, { "epoch": 1.75, "learning_rate": 4.563953488372093e-05, "loss": 0.5227, "step": 1656 }, { "epoch": 1.75, "learning_rate": 4.563424947145878e-05, "loss": 0.4581, "step": 1658 }, { "epoch": 1.75, "learning_rate": 4.562896405919662e-05, "loss": 0.4865, "step": 1660 }, { "epoch": 1.76, "learning_rate": 4.5623678646934466e-05, "loss": 0.4035, "step": 1662 }, { "epoch": 1.76, "learning_rate": 4.5618393234672305e-05, "loss": 0.445, "step": 1664 }, { "epoch": 1.76, "learning_rate": 4.561310782241015e-05, "loss": 0.3442, "step": 1666 }, { "epoch": 1.76, "learning_rate": 4.560782241014799e-05, "loss": 0.5735, "step": 1668 }, { "epoch": 1.77, "learning_rate": 4.560253699788584e-05, "loss": 0.4627, "step": 1670 }, { "epoch": 1.77, "learning_rate": 4.5597251585623676e-05, "loss": 0.7131, "step": 1672 }, { "epoch": 1.77, "learning_rate": 4.559196617336152e-05, "loss": 0.4328, "step": 1674 }, { "epoch": 1.77, "learning_rate": 4.558668076109937e-05, "loss": 0.5691, "step": 1676 }, { "epoch": 1.77, "learning_rate": 4.5581395348837214e-05, "loss": 0.6523, "step": 1678 }, { "epoch": 1.78, "learning_rate": 4.5576109936575053e-05, "loss": 0.6022, "step": 1680 }, { "epoch": 1.78, "learning_rate": 4.55708245243129e-05, "loss": 0.7375, "step": 1682 }, { "epoch": 1.78, "learning_rate": 4.5565539112050746e-05, "loss": 0.6585, "step": 1684 }, { "epoch": 1.78, "learning_rate": 4.5560253699788585e-05, "loss": 0.8858, "step": 1686 }, { "epoch": 1.78, "learning_rate": 4.555496828752643e-05, "loss": 0.67, "step": 1688 }, { "epoch": 1.79, "learning_rate": 4.554968287526427e-05, "loss": 0.7282, "step": 1690 }, { "epoch": 1.79, "learning_rate": 4.5544397463002116e-05, "loss": 0.6527, "step": 1692 }, { "epoch": 1.79, "learning_rate": 4.553911205073996e-05, "loss": 0.4792, "step": 1694 }, { "epoch": 1.79, "learning_rate": 4.553382663847781e-05, "loss": 0.745, "step": 1696 }, { "epoch": 1.79, "learning_rate": 4.552854122621565e-05, "loss": 0.6185, "step": 1698 }, { "epoch": 1.8, "learning_rate": 4.5523255813953494e-05, "loss": 0.446, "step": 1700 }, { "epoch": 1.8, "learning_rate": 4.551797040169133e-05, "loss": 0.5001, "step": 1702 }, { "epoch": 1.8, "learning_rate": 4.551268498942918e-05, "loss": 0.375, "step": 1704 }, { "epoch": 1.8, "learning_rate": 4.550739957716702e-05, "loss": 0.3736, "step": 1706 }, { "epoch": 1.81, "learning_rate": 4.5502114164904864e-05, "loss": 0.5857, "step": 1708 }, { "epoch": 1.81, "learning_rate": 4.5496828752642704e-05, "loss": 0.4895, "step": 1710 }, { "epoch": 1.81, "learning_rate": 4.549154334038055e-05, "loss": 0.6071, "step": 1712 }, { "epoch": 1.81, "learning_rate": 4.5486257928118396e-05, "loss": 0.5467, "step": 1714 }, { "epoch": 1.81, "learning_rate": 4.548097251585624e-05, "loss": 0.511, "step": 1716 }, { "epoch": 1.82, "learning_rate": 4.547568710359408e-05, "loss": 0.8558, "step": 1718 }, { "epoch": 1.82, "learning_rate": 4.547040169133193e-05, "loss": 0.8073, "step": 1720 }, { "epoch": 1.82, "learning_rate": 4.5465116279069766e-05, "loss": 0.6995, "step": 1722 }, { "epoch": 1.82, "learning_rate": 4.545983086680761e-05, "loss": 0.798, "step": 1724 }, { "epoch": 1.82, "learning_rate": 4.545454545454546e-05, "loss": 0.4806, "step": 1726 }, { "epoch": 1.83, "learning_rate": 4.54492600422833e-05, "loss": 0.4374, "step": 1728 }, { "epoch": 1.83, "learning_rate": 4.5443974630021144e-05, "loss": 0.7523, "step": 1730 }, { "epoch": 1.83, "learning_rate": 4.543868921775899e-05, "loss": 0.5914, "step": 1732 }, { "epoch": 1.83, "learning_rate": 4.543340380549683e-05, "loss": 0.4302, "step": 1734 }, { "epoch": 1.84, "learning_rate": 4.5428118393234675e-05, "loss": 0.7145, "step": 1736 }, { "epoch": 1.84, "learning_rate": 4.542283298097252e-05, "loss": 0.3885, "step": 1738 }, { "epoch": 1.84, "learning_rate": 4.541754756871036e-05, "loss": 0.5144, "step": 1740 }, { "epoch": 1.84, "learning_rate": 4.541226215644821e-05, "loss": 0.6347, "step": 1742 }, { "epoch": 1.84, "learning_rate": 4.5406976744186046e-05, "loss": 0.6682, "step": 1744 }, { "epoch": 1.85, "learning_rate": 4.540169133192389e-05, "loss": 0.422, "step": 1746 }, { "epoch": 1.85, "learning_rate": 4.539640591966173e-05, "loss": 0.417, "step": 1748 }, { "epoch": 1.85, "learning_rate": 4.5391120507399584e-05, "loss": 0.4943, "step": 1750 }, { "epoch": 1.85, "learning_rate": 4.5385835095137423e-05, "loss": 0.6295, "step": 1752 }, { "epoch": 1.85, "learning_rate": 4.538054968287527e-05, "loss": 0.6961, "step": 1754 }, { "epoch": 1.86, "learning_rate": 4.537526427061311e-05, "loss": 0.6064, "step": 1756 }, { "epoch": 1.86, "learning_rate": 4.5369978858350955e-05, "loss": 0.5825, "step": 1758 }, { "epoch": 1.86, "learning_rate": 4.5364693446088794e-05, "loss": 0.5187, "step": 1760 }, { "epoch": 1.86, "learning_rate": 4.535940803382664e-05, "loss": 0.7391, "step": 1762 }, { "epoch": 1.86, "learning_rate": 4.535412262156448e-05, "loss": 0.7237, "step": 1764 }, { "epoch": 1.87, "learning_rate": 4.5348837209302326e-05, "loss": 0.6358, "step": 1766 }, { "epoch": 1.87, "learning_rate": 4.534355179704017e-05, "loss": 0.4581, "step": 1768 }, { "epoch": 1.87, "learning_rate": 4.533826638477802e-05, "loss": 0.3613, "step": 1770 }, { "epoch": 1.87, "learning_rate": 4.533298097251586e-05, "loss": 0.7856, "step": 1772 }, { "epoch": 1.88, "learning_rate": 4.53276955602537e-05, "loss": 0.4416, "step": 1774 }, { "epoch": 1.88, "learning_rate": 4.532241014799154e-05, "loss": 0.5197, "step": 1776 }, { "epoch": 1.88, "learning_rate": 4.531712473572939e-05, "loss": 0.536, "step": 1778 }, { "epoch": 1.88, "learning_rate": 4.5311839323467234e-05, "loss": 0.5705, "step": 1780 }, { "epoch": 1.88, "learning_rate": 4.5306553911205074e-05, "loss": 0.4337, "step": 1782 }, { "epoch": 1.89, "learning_rate": 4.530126849894292e-05, "loss": 0.4389, "step": 1784 }, { "epoch": 1.89, "learning_rate": 4.5295983086680766e-05, "loss": 0.5438, "step": 1786 }, { "epoch": 1.89, "learning_rate": 4.5290697674418605e-05, "loss": 0.4809, "step": 1788 }, { "epoch": 1.89, "learning_rate": 4.528541226215645e-05, "loss": 0.4446, "step": 1790 }, { "epoch": 1.89, "learning_rate": 4.52801268498943e-05, "loss": 0.7674, "step": 1792 }, { "epoch": 1.9, "learning_rate": 4.5274841437632136e-05, "loss": 0.4956, "step": 1794 }, { "epoch": 1.9, "learning_rate": 4.526955602536998e-05, "loss": 0.5586, "step": 1796 }, { "epoch": 1.9, "learning_rate": 4.526427061310782e-05, "loss": 0.5171, "step": 1798 }, { "epoch": 1.9, "learning_rate": 4.525898520084567e-05, "loss": 0.6569, "step": 1800 }, { "epoch": 1.9, "learning_rate": 4.525369978858351e-05, "loss": 0.4957, "step": 1802 }, { "epoch": 1.91, "learning_rate": 4.524841437632136e-05, "loss": 0.4242, "step": 1804 }, { "epoch": 1.91, "learning_rate": 4.52431289640592e-05, "loss": 0.422, "step": 1806 }, { "epoch": 1.91, "learning_rate": 4.5237843551797045e-05, "loss": 0.4738, "step": 1808 }, { "epoch": 1.91, "learning_rate": 4.5232558139534885e-05, "loss": 0.4787, "step": 1810 }, { "epoch": 1.92, "learning_rate": 4.522727272727273e-05, "loss": 0.4131, "step": 1812 }, { "epoch": 1.92, "learning_rate": 4.522198731501057e-05, "loss": 0.5565, "step": 1814 }, { "epoch": 1.92, "learning_rate": 4.5216701902748416e-05, "loss": 0.401, "step": 1816 }, { "epoch": 1.92, "learning_rate": 4.5211416490486255e-05, "loss": 0.4694, "step": 1818 }, { "epoch": 1.92, "learning_rate": 4.52061310782241e-05, "loss": 0.4531, "step": 1820 }, { "epoch": 1.93, "learning_rate": 4.520084566596195e-05, "loss": 0.5779, "step": 1822 }, { "epoch": 1.93, "learning_rate": 4.5195560253699794e-05, "loss": 0.5072, "step": 1824 }, { "epoch": 1.93, "learning_rate": 4.519027484143763e-05, "loss": 0.7621, "step": 1826 }, { "epoch": 1.93, "learning_rate": 4.518498942917548e-05, "loss": 0.45, "step": 1828 }, { "epoch": 1.93, "learning_rate": 4.517970401691332e-05, "loss": 0.663, "step": 1830 }, { "epoch": 1.94, "learning_rate": 4.5174418604651164e-05, "loss": 0.4742, "step": 1832 }, { "epoch": 1.94, "learning_rate": 4.516913319238901e-05, "loss": 0.7337, "step": 1834 }, { "epoch": 1.94, "learning_rate": 4.516384778012685e-05, "loss": 0.3277, "step": 1836 }, { "epoch": 1.94, "learning_rate": 4.5158562367864696e-05, "loss": 0.4131, "step": 1838 }, { "epoch": 1.95, "learning_rate": 4.515327695560254e-05, "loss": 0.5652, "step": 1840 }, { "epoch": 1.95, "learning_rate": 4.514799154334039e-05, "loss": 0.6251, "step": 1842 }, { "epoch": 1.95, "learning_rate": 4.514270613107823e-05, "loss": 0.428, "step": 1844 }, { "epoch": 1.95, "learning_rate": 4.513742071881607e-05, "loss": 0.3634, "step": 1846 }, { "epoch": 1.95, "learning_rate": 4.513213530655391e-05, "loss": 0.4997, "step": 1848 }, { "epoch": 1.96, "learning_rate": 4.512684989429176e-05, "loss": 0.4841, "step": 1850 }, { "epoch": 1.96, "learning_rate": 4.51215644820296e-05, "loss": 0.3301, "step": 1852 }, { "epoch": 1.96, "learning_rate": 4.5116279069767444e-05, "loss": 0.4506, "step": 1854 }, { "epoch": 1.96, "learning_rate": 4.511099365750528e-05, "loss": 0.4708, "step": 1856 }, { "epoch": 1.96, "learning_rate": 4.5105708245243136e-05, "loss": 0.613, "step": 1858 }, { "epoch": 1.97, "learning_rate": 4.5100422832980975e-05, "loss": 0.534, "step": 1860 }, { "epoch": 1.97, "learning_rate": 4.509513742071882e-05, "loss": 0.6985, "step": 1862 }, { "epoch": 1.97, "learning_rate": 4.508985200845666e-05, "loss": 0.6257, "step": 1864 }, { "epoch": 1.97, "learning_rate": 4.5084566596194507e-05, "loss": 0.61, "step": 1866 }, { "epoch": 1.97, "learning_rate": 4.5079281183932346e-05, "loss": 0.4835, "step": 1868 }, { "epoch": 1.98, "learning_rate": 4.507399577167019e-05, "loss": 0.6178, "step": 1870 }, { "epoch": 1.98, "learning_rate": 4.506871035940803e-05, "loss": 0.4642, "step": 1872 }, { "epoch": 1.98, "learning_rate": 4.506342494714588e-05, "loss": 0.616, "step": 1874 }, { "epoch": 1.98, "learning_rate": 4.505813953488372e-05, "loss": 0.6076, "step": 1876 }, { "epoch": 1.99, "learning_rate": 4.505285412262157e-05, "loss": 0.5103, "step": 1878 }, { "epoch": 1.99, "learning_rate": 4.504756871035941e-05, "loss": 0.5121, "step": 1880 }, { "epoch": 1.99, "learning_rate": 4.5042283298097255e-05, "loss": 0.3769, "step": 1882 }, { "epoch": 1.99, "learning_rate": 4.5036997885835094e-05, "loss": 0.3962, "step": 1884 }, { "epoch": 1.99, "learning_rate": 4.503171247357294e-05, "loss": 0.3739, "step": 1886 }, { "epoch": 2.0, "learning_rate": 4.5026427061310786e-05, "loss": 0.3313, "step": 1888 }, { "epoch": 2.0, "learning_rate": 4.5021141649048625e-05, "loss": 0.463, "step": 1890 }, { "epoch": 2.0, "learning_rate": 4.501585623678647e-05, "loss": 0.5002, "step": 1892 }, { "epoch": 2.0, "learning_rate": 4.501057082452432e-05, "loss": 0.3315, "step": 1894 }, { "epoch": 2.0, "learning_rate": 4.5005285412262164e-05, "loss": 0.339, "step": 1896 }, { "epoch": 2.01, "learning_rate": 4.5e-05, "loss": 0.468, "step": 1898 }, { "epoch": 2.01, "learning_rate": 4.499471458773785e-05, "loss": 0.4289, "step": 1900 }, { "epoch": 2.01, "learning_rate": 4.498942917547569e-05, "loss": 0.4292, "step": 1902 }, { "epoch": 2.01, "learning_rate": 4.4984143763213534e-05, "loss": 0.609, "step": 1904 }, { "epoch": 2.01, "learning_rate": 4.4978858350951374e-05, "loss": 0.4683, "step": 1906 }, { "epoch": 2.02, "learning_rate": 4.497357293868922e-05, "loss": 0.4534, "step": 1908 }, { "epoch": 2.02, "learning_rate": 4.496828752642706e-05, "loss": 0.5481, "step": 1910 }, { "epoch": 2.02, "learning_rate": 4.496300211416491e-05, "loss": 0.4727, "step": 1912 }, { "epoch": 2.02, "learning_rate": 4.495771670190275e-05, "loss": 0.4213, "step": 1914 }, { "epoch": 2.03, "learning_rate": 4.49524312896406e-05, "loss": 0.4907, "step": 1916 }, { "epoch": 2.03, "learning_rate": 4.4947145877378436e-05, "loss": 0.5339, "step": 1918 }, { "epoch": 2.03, "learning_rate": 4.494186046511628e-05, "loss": 0.4394, "step": 1920 }, { "epoch": 2.03, "learning_rate": 4.493657505285412e-05, "loss": 0.5539, "step": 1922 }, { "epoch": 2.03, "learning_rate": 4.493128964059197e-05, "loss": 0.4241, "step": 1924 }, { "epoch": 2.04, "learning_rate": 4.492600422832981e-05, "loss": 0.4557, "step": 1926 }, { "epoch": 2.04, "learning_rate": 4.492071881606765e-05, "loss": 0.4038, "step": 1928 }, { "epoch": 2.04, "learning_rate": 4.49154334038055e-05, "loss": 0.4505, "step": 1930 }, { "epoch": 2.04, "learning_rate": 4.4910147991543345e-05, "loss": 0.4646, "step": 1932 }, { "epoch": 2.04, "learning_rate": 4.4904862579281184e-05, "loss": 0.401, "step": 1934 }, { "epoch": 2.05, "learning_rate": 4.489957716701903e-05, "loss": 0.3632, "step": 1936 }, { "epoch": 2.05, "learning_rate": 4.489429175475687e-05, "loss": 0.3835, "step": 1938 }, { "epoch": 2.05, "learning_rate": 4.4889006342494716e-05, "loss": 0.5795, "step": 1940 }, { "epoch": 2.05, "learning_rate": 4.488372093023256e-05, "loss": 0.4528, "step": 1942 }, { "epoch": 2.05, "learning_rate": 4.48784355179704e-05, "loss": 0.4788, "step": 1944 }, { "epoch": 2.06, "learning_rate": 4.487315010570825e-05, "loss": 0.3118, "step": 1946 }, { "epoch": 2.06, "learning_rate": 4.486786469344609e-05, "loss": 0.548, "step": 1948 }, { "epoch": 2.06, "learning_rate": 4.486257928118394e-05, "loss": 0.5891, "step": 1950 }, { "epoch": 2.06, "learning_rate": 4.485729386892178e-05, "loss": 0.5405, "step": 1952 }, { "epoch": 2.07, "learning_rate": 4.4852008456659625e-05, "loss": 0.3774, "step": 1954 }, { "epoch": 2.07, "learning_rate": 4.4846723044397464e-05, "loss": 0.6053, "step": 1956 }, { "epoch": 2.07, "learning_rate": 4.484143763213531e-05, "loss": 0.557, "step": 1958 }, { "epoch": 2.07, "learning_rate": 4.483615221987315e-05, "loss": 0.3202, "step": 1960 }, { "epoch": 2.07, "learning_rate": 4.4830866807610995e-05, "loss": 0.4831, "step": 1962 }, { "epoch": 2.08, "learning_rate": 4.4825581395348835e-05, "loss": 0.3305, "step": 1964 }, { "epoch": 2.08, "learning_rate": 4.482029598308669e-05, "loss": 0.2836, "step": 1966 }, { "epoch": 2.08, "learning_rate": 4.481501057082453e-05, "loss": 0.3986, "step": 1968 }, { "epoch": 2.08, "learning_rate": 4.480972515856237e-05, "loss": 0.3521, "step": 1970 }, { "epoch": 2.08, "learning_rate": 4.480443974630021e-05, "loss": 0.5026, "step": 1972 }, { "epoch": 2.09, "learning_rate": 4.479915433403806e-05, "loss": 0.287, "step": 1974 }, { "epoch": 2.09, "learning_rate": 4.47938689217759e-05, "loss": 0.3817, "step": 1976 }, { "epoch": 2.09, "learning_rate": 4.4788583509513744e-05, "loss": 0.4855, "step": 1978 }, { "epoch": 2.09, "learning_rate": 4.478329809725158e-05, "loss": 0.4728, "step": 1980 }, { "epoch": 2.1, "learning_rate": 4.477801268498943e-05, "loss": 0.3979, "step": 1982 }, { "epoch": 2.1, "learning_rate": 4.4772727272727275e-05, "loss": 0.3166, "step": 1984 }, { "epoch": 2.1, "learning_rate": 4.476744186046512e-05, "loss": 0.5586, "step": 1986 }, { "epoch": 2.1, "learning_rate": 4.476215644820296e-05, "loss": 0.5657, "step": 1988 }, { "epoch": 2.1, "learning_rate": 4.4756871035940806e-05, "loss": 0.4058, "step": 1990 }, { "epoch": 2.11, "learning_rate": 4.475158562367865e-05, "loss": 0.33, "step": 1992 }, { "epoch": 2.11, "learning_rate": 4.474630021141649e-05, "loss": 0.3553, "step": 1994 }, { "epoch": 2.11, "learning_rate": 4.474101479915434e-05, "loss": 0.3877, "step": 1996 }, { "epoch": 2.11, "learning_rate": 4.473572938689218e-05, "loss": 0.481, "step": 1998 }, { "epoch": 2.11, "learning_rate": 4.473044397463002e-05, "loss": 0.4098, "step": 2000 }, { "epoch": 2.11, "eval_cer": 0.054830436021658595, "eval_loss": 0.4985881447792053, "eval_runtime": 132.0888, "eval_samples_per_second": 6.367, "eval_steps_per_second": 0.802, "step": 2000 }, { "epoch": 2.12, "learning_rate": 4.472515856236787e-05, "loss": 0.482, "step": 2002 }, { "epoch": 2.12, "learning_rate": 4.4719873150105715e-05, "loss": 0.3529, "step": 2004 }, { "epoch": 2.12, "learning_rate": 4.4714587737843555e-05, "loss": 0.4144, "step": 2006 }, { "epoch": 2.12, "learning_rate": 4.47093023255814e-05, "loss": 0.2902, "step": 2008 }, { "epoch": 2.12, "learning_rate": 4.470401691331924e-05, "loss": 0.4057, "step": 2010 }, { "epoch": 2.13, "learning_rate": 4.4698731501057086e-05, "loss": 0.4743, "step": 2012 }, { "epoch": 2.13, "learning_rate": 4.4693446088794925e-05, "loss": 0.3409, "step": 2014 }, { "epoch": 2.13, "learning_rate": 4.468816067653277e-05, "loss": 0.4337, "step": 2016 }, { "epoch": 2.13, "learning_rate": 4.468287526427061e-05, "loss": 0.5333, "step": 2018 }, { "epoch": 2.14, "learning_rate": 4.467758985200846e-05, "loss": 0.2719, "step": 2020 }, { "epoch": 2.14, "learning_rate": 4.46723044397463e-05, "loss": 0.4625, "step": 2022 }, { "epoch": 2.14, "learning_rate": 4.466701902748415e-05, "loss": 0.2435, "step": 2024 }, { "epoch": 2.14, "learning_rate": 4.466173361522199e-05, "loss": 0.4023, "step": 2026 }, { "epoch": 2.14, "learning_rate": 4.4656448202959834e-05, "loss": 0.4343, "step": 2028 }, { "epoch": 2.15, "learning_rate": 4.465116279069767e-05, "loss": 0.3526, "step": 2030 }, { "epoch": 2.15, "learning_rate": 4.464587737843552e-05, "loss": 0.4709, "step": 2032 }, { "epoch": 2.15, "learning_rate": 4.464059196617336e-05, "loss": 0.5203, "step": 2034 }, { "epoch": 2.15, "learning_rate": 4.4635306553911205e-05, "loss": 0.7966, "step": 2036 }, { "epoch": 2.15, "learning_rate": 4.463002114164905e-05, "loss": 0.5275, "step": 2038 }, { "epoch": 2.16, "learning_rate": 4.46247357293869e-05, "loss": 0.4834, "step": 2040 }, { "epoch": 2.16, "learning_rate": 4.4619450317124736e-05, "loss": 0.6763, "step": 2042 }, { "epoch": 2.16, "learning_rate": 4.461416490486258e-05, "loss": 0.5105, "step": 2044 }, { "epoch": 2.16, "learning_rate": 4.460887949260043e-05, "loss": 0.3415, "step": 2046 }, { "epoch": 2.16, "learning_rate": 4.460359408033827e-05, "loss": 0.5119, "step": 2048 }, { "epoch": 2.17, "learning_rate": 4.4598308668076114e-05, "loss": 0.3535, "step": 2050 }, { "epoch": 2.17, "learning_rate": 4.459302325581395e-05, "loss": 0.3661, "step": 2052 }, { "epoch": 2.17, "learning_rate": 4.45877378435518e-05, "loss": 0.4584, "step": 2054 }, { "epoch": 2.17, "learning_rate": 4.4582452431289645e-05, "loss": 0.5095, "step": 2056 }, { "epoch": 2.18, "learning_rate": 4.457716701902749e-05, "loss": 0.7405, "step": 2058 }, { "epoch": 2.18, "learning_rate": 4.457188160676533e-05, "loss": 0.412, "step": 2060 }, { "epoch": 2.18, "learning_rate": 4.4566596194503176e-05, "loss": 0.3918, "step": 2062 }, { "epoch": 2.18, "learning_rate": 4.4561310782241016e-05, "loss": 0.4374, "step": 2064 }, { "epoch": 2.18, "learning_rate": 4.455602536997886e-05, "loss": 0.2783, "step": 2066 }, { "epoch": 2.19, "learning_rate": 4.45507399577167e-05, "loss": 0.5863, "step": 2068 }, { "epoch": 2.19, "learning_rate": 4.454545454545455e-05, "loss": 0.3459, "step": 2070 }, { "epoch": 2.19, "learning_rate": 4.4540169133192386e-05, "loss": 0.3781, "step": 2072 }, { "epoch": 2.19, "learning_rate": 4.453488372093024e-05, "loss": 0.2417, "step": 2074 }, { "epoch": 2.19, "learning_rate": 4.452959830866808e-05, "loss": 0.3893, "step": 2076 }, { "epoch": 2.2, "learning_rate": 4.4524312896405925e-05, "loss": 0.4426, "step": 2078 }, { "epoch": 2.2, "learning_rate": 4.4519027484143764e-05, "loss": 0.372, "step": 2080 }, { "epoch": 2.2, "learning_rate": 4.451374207188161e-05, "loss": 0.5169, "step": 2082 }, { "epoch": 2.2, "learning_rate": 4.450845665961945e-05, "loss": 0.484, "step": 2084 }, { "epoch": 2.21, "learning_rate": 4.4503171247357295e-05, "loss": 0.3746, "step": 2086 }, { "epoch": 2.21, "learning_rate": 4.4497885835095135e-05, "loss": 0.3725, "step": 2088 }, { "epoch": 2.21, "learning_rate": 4.449260042283298e-05, "loss": 0.4112, "step": 2090 }, { "epoch": 2.21, "learning_rate": 4.448731501057083e-05, "loss": 0.3573, "step": 2092 }, { "epoch": 2.21, "learning_rate": 4.448202959830867e-05, "loss": 0.6657, "step": 2094 }, { "epoch": 2.22, "learning_rate": 4.447674418604651e-05, "loss": 0.4883, "step": 2096 }, { "epoch": 2.22, "learning_rate": 4.447145877378436e-05, "loss": 0.4407, "step": 2098 }, { "epoch": 2.22, "learning_rate": 4.4466173361522204e-05, "loss": 0.4828, "step": 2100 }, { "epoch": 2.22, "learning_rate": 4.4460887949260043e-05, "loss": 0.4325, "step": 2102 }, { "epoch": 2.22, "learning_rate": 4.445560253699789e-05, "loss": 0.4517, "step": 2104 }, { "epoch": 2.23, "learning_rate": 4.445031712473573e-05, "loss": 0.3728, "step": 2106 }, { "epoch": 2.23, "learning_rate": 4.4445031712473575e-05, "loss": 0.4571, "step": 2108 }, { "epoch": 2.23, "learning_rate": 4.443974630021142e-05, "loss": 0.5384, "step": 2110 }, { "epoch": 2.23, "learning_rate": 4.443446088794927e-05, "loss": 0.7312, "step": 2112 }, { "epoch": 2.23, "learning_rate": 4.4429175475687106e-05, "loss": 0.4921, "step": 2114 }, { "epoch": 2.24, "learning_rate": 4.442389006342495e-05, "loss": 0.3399, "step": 2116 }, { "epoch": 2.24, "learning_rate": 4.441860465116279e-05, "loss": 0.4376, "step": 2118 }, { "epoch": 2.24, "learning_rate": 4.441331923890064e-05, "loss": 0.3879, "step": 2120 }, { "epoch": 2.24, "learning_rate": 4.440803382663848e-05, "loss": 0.272, "step": 2122 }, { "epoch": 2.25, "learning_rate": 4.440274841437632e-05, "loss": 0.5572, "step": 2124 }, { "epoch": 2.25, "learning_rate": 4.439746300211416e-05, "loss": 0.4251, "step": 2126 }, { "epoch": 2.25, "learning_rate": 4.4392177589852015e-05, "loss": 0.4728, "step": 2128 }, { "epoch": 2.25, "learning_rate": 4.4386892177589854e-05, "loss": 0.6148, "step": 2130 }, { "epoch": 2.25, "learning_rate": 4.43816067653277e-05, "loss": 0.4623, "step": 2132 }, { "epoch": 2.26, "learning_rate": 4.437632135306554e-05, "loss": 0.4297, "step": 2134 }, { "epoch": 2.26, "learning_rate": 4.4371035940803386e-05, "loss": 0.2851, "step": 2136 }, { "epoch": 2.26, "learning_rate": 4.4365750528541225e-05, "loss": 0.4021, "step": 2138 }, { "epoch": 2.26, "learning_rate": 4.436046511627907e-05, "loss": 0.3805, "step": 2140 }, { "epoch": 2.26, "learning_rate": 4.435517970401691e-05, "loss": 0.3594, "step": 2142 }, { "epoch": 2.27, "learning_rate": 4.4349894291754756e-05, "loss": 0.2067, "step": 2144 }, { "epoch": 2.27, "learning_rate": 4.43446088794926e-05, "loss": 0.4969, "step": 2146 }, { "epoch": 2.27, "learning_rate": 4.433932346723045e-05, "loss": 0.3155, "step": 2148 }, { "epoch": 2.27, "learning_rate": 4.433403805496829e-05, "loss": 0.4758, "step": 2150 }, { "epoch": 2.27, "learning_rate": 4.4328752642706134e-05, "loss": 0.2722, "step": 2152 }, { "epoch": 2.28, "learning_rate": 4.432346723044398e-05, "loss": 0.5713, "step": 2154 }, { "epoch": 2.28, "learning_rate": 4.431818181818182e-05, "loss": 0.4699, "step": 2156 }, { "epoch": 2.28, "learning_rate": 4.4312896405919665e-05, "loss": 0.4833, "step": 2158 }, { "epoch": 2.28, "learning_rate": 4.4307610993657505e-05, "loss": 0.2738, "step": 2160 }, { "epoch": 2.29, "learning_rate": 4.430232558139535e-05, "loss": 0.5175, "step": 2162 }, { "epoch": 2.29, "learning_rate": 4.42970401691332e-05, "loss": 0.3853, "step": 2164 }, { "epoch": 2.29, "learning_rate": 4.429175475687104e-05, "loss": 0.519, "step": 2166 }, { "epoch": 2.29, "learning_rate": 4.428646934460888e-05, "loss": 0.4999, "step": 2168 }, { "epoch": 2.29, "learning_rate": 4.428118393234673e-05, "loss": 0.2495, "step": 2170 }, { "epoch": 2.3, "learning_rate": 4.427589852008457e-05, "loss": 0.4331, "step": 2172 }, { "epoch": 2.3, "learning_rate": 4.4270613107822413e-05, "loss": 0.4553, "step": 2174 }, { "epoch": 2.3, "learning_rate": 4.426532769556025e-05, "loss": 0.4201, "step": 2176 }, { "epoch": 2.3, "learning_rate": 4.42600422832981e-05, "loss": 0.567, "step": 2178 }, { "epoch": 2.3, "learning_rate": 4.425475687103594e-05, "loss": 0.2782, "step": 2180 }, { "epoch": 2.31, "learning_rate": 4.424947145877379e-05, "loss": 0.4802, "step": 2182 }, { "epoch": 2.31, "learning_rate": 4.424418604651163e-05, "loss": 0.3771, "step": 2184 }, { "epoch": 2.31, "learning_rate": 4.4238900634249476e-05, "loss": 0.2445, "step": 2186 }, { "epoch": 2.31, "learning_rate": 4.4233615221987316e-05, "loss": 0.4321, "step": 2188 }, { "epoch": 2.32, "learning_rate": 4.422832980972516e-05, "loss": 0.3165, "step": 2190 }, { "epoch": 2.32, "learning_rate": 4.4223044397463e-05, "loss": 0.4181, "step": 2192 }, { "epoch": 2.32, "learning_rate": 4.421775898520085e-05, "loss": 0.3771, "step": 2194 }, { "epoch": 2.32, "learning_rate": 4.421247357293869e-05, "loss": 0.4594, "step": 2196 }, { "epoch": 2.32, "learning_rate": 4.420718816067653e-05, "loss": 0.357, "step": 2198 }, { "epoch": 2.33, "learning_rate": 4.420190274841438e-05, "loss": 0.3837, "step": 2200 }, { "epoch": 2.33, "learning_rate": 4.4196617336152224e-05, "loss": 0.3, "step": 2202 }, { "epoch": 2.33, "learning_rate": 4.4191331923890064e-05, "loss": 0.5577, "step": 2204 }, { "epoch": 2.33, "learning_rate": 4.418604651162791e-05, "loss": 0.7798, "step": 2206 }, { "epoch": 2.33, "learning_rate": 4.4180761099365756e-05, "loss": 0.3581, "step": 2208 }, { "epoch": 2.34, "learning_rate": 4.4175475687103595e-05, "loss": 0.5661, "step": 2210 }, { "epoch": 2.34, "learning_rate": 4.417019027484144e-05, "loss": 0.4161, "step": 2212 }, { "epoch": 2.34, "learning_rate": 4.416490486257928e-05, "loss": 0.3675, "step": 2214 }, { "epoch": 2.34, "learning_rate": 4.4159619450317126e-05, "loss": 0.5372, "step": 2216 }, { "epoch": 2.34, "learning_rate": 4.415433403805497e-05, "loss": 0.5398, "step": 2218 }, { "epoch": 2.35, "learning_rate": 4.414904862579282e-05, "loss": 0.3739, "step": 2220 }, { "epoch": 2.35, "learning_rate": 4.414376321353066e-05, "loss": 0.5355, "step": 2222 }, { "epoch": 2.35, "learning_rate": 4.4138477801268504e-05, "loss": 0.5392, "step": 2224 }, { "epoch": 2.35, "learning_rate": 4.413319238900634e-05, "loss": 0.4832, "step": 2226 }, { "epoch": 2.36, "learning_rate": 4.412790697674419e-05, "loss": 0.4194, "step": 2228 }, { "epoch": 2.36, "learning_rate": 4.412262156448203e-05, "loss": 0.5088, "step": 2230 }, { "epoch": 2.36, "learning_rate": 4.4117336152219875e-05, "loss": 0.401, "step": 2232 }, { "epoch": 2.36, "learning_rate": 4.4112050739957714e-05, "loss": 0.5165, "step": 2234 }, { "epoch": 2.36, "learning_rate": 4.410676532769557e-05, "loss": 0.4715, "step": 2236 }, { "epoch": 2.37, "learning_rate": 4.4101479915433406e-05, "loss": 0.3201, "step": 2238 }, { "epoch": 2.37, "learning_rate": 4.409619450317125e-05, "loss": 0.4222, "step": 2240 }, { "epoch": 2.37, "learning_rate": 4.409090909090909e-05, "loss": 0.3206, "step": 2242 }, { "epoch": 2.37, "learning_rate": 4.408562367864694e-05, "loss": 0.3141, "step": 2244 }, { "epoch": 2.37, "learning_rate": 4.408033826638478e-05, "loss": 0.5056, "step": 2246 }, { "epoch": 2.38, "learning_rate": 4.407505285412262e-05, "loss": 0.4578, "step": 2248 }, { "epoch": 2.38, "learning_rate": 4.406976744186047e-05, "loss": 0.3295, "step": 2250 }, { "epoch": 2.38, "learning_rate": 4.406448202959831e-05, "loss": 0.3376, "step": 2252 }, { "epoch": 2.38, "learning_rate": 4.4059196617336154e-05, "loss": 0.4404, "step": 2254 }, { "epoch": 2.38, "learning_rate": 4.4053911205074e-05, "loss": 0.3081, "step": 2256 }, { "epoch": 2.39, "learning_rate": 4.404862579281184e-05, "loss": 0.2706, "step": 2258 }, { "epoch": 2.39, "learning_rate": 4.4043340380549686e-05, "loss": 0.5501, "step": 2260 }, { "epoch": 2.39, "learning_rate": 4.403805496828753e-05, "loss": 0.3977, "step": 2262 }, { "epoch": 2.39, "learning_rate": 4.403276955602537e-05, "loss": 0.3854, "step": 2264 }, { "epoch": 2.4, "learning_rate": 4.402748414376322e-05, "loss": 0.5107, "step": 2266 }, { "epoch": 2.4, "learning_rate": 4.4022198731501056e-05, "loss": 0.3551, "step": 2268 }, { "epoch": 2.4, "learning_rate": 4.40169133192389e-05, "loss": 0.376, "step": 2270 }, { "epoch": 2.4, "learning_rate": 4.401162790697675e-05, "loss": 0.3767, "step": 2272 }, { "epoch": 2.4, "learning_rate": 4.4006342494714594e-05, "loss": 0.3593, "step": 2274 }, { "epoch": 2.41, "learning_rate": 4.4001057082452434e-05, "loss": 0.5396, "step": 2276 }, { "epoch": 2.41, "learning_rate": 4.399577167019028e-05, "loss": 0.4324, "step": 2278 }, { "epoch": 2.41, "learning_rate": 4.399048625792812e-05, "loss": 0.0967, "step": 2280 }, { "epoch": 2.41, "learning_rate": 4.3985200845665965e-05, "loss": 0.4564, "step": 2282 }, { "epoch": 2.41, "learning_rate": 4.3979915433403804e-05, "loss": 0.3536, "step": 2284 }, { "epoch": 2.42, "learning_rate": 4.397463002114165e-05, "loss": 0.5021, "step": 2286 }, { "epoch": 2.42, "learning_rate": 4.396934460887949e-05, "loss": 0.555, "step": 2288 }, { "epoch": 2.42, "learning_rate": 4.396405919661734e-05, "loss": 0.5308, "step": 2290 }, { "epoch": 2.42, "learning_rate": 4.395877378435518e-05, "loss": 0.4026, "step": 2292 }, { "epoch": 2.42, "learning_rate": 4.395348837209303e-05, "loss": 0.5569, "step": 2294 }, { "epoch": 2.43, "learning_rate": 4.394820295983087e-05, "loss": 0.5554, "step": 2296 }, { "epoch": 2.43, "learning_rate": 4.394291754756871e-05, "loss": 0.5833, "step": 2298 }, { "epoch": 2.43, "learning_rate": 4.393763213530655e-05, "loss": 0.5731, "step": 2300 }, { "epoch": 2.43, "learning_rate": 4.39323467230444e-05, "loss": 0.2924, "step": 2302 }, { "epoch": 2.44, "learning_rate": 4.3927061310782245e-05, "loss": 0.3258, "step": 2304 }, { "epoch": 2.44, "learning_rate": 4.3921775898520084e-05, "loss": 0.4652, "step": 2306 }, { "epoch": 2.44, "learning_rate": 4.391649048625793e-05, "loss": 0.4098, "step": 2308 }, { "epoch": 2.44, "learning_rate": 4.3911205073995776e-05, "loss": 0.4637, "step": 2310 }, { "epoch": 2.44, "learning_rate": 4.390591966173362e-05, "loss": 0.5208, "step": 2312 }, { "epoch": 2.45, "learning_rate": 4.390063424947146e-05, "loss": 0.3701, "step": 2314 }, { "epoch": 2.45, "learning_rate": 4.389534883720931e-05, "loss": 0.3613, "step": 2316 }, { "epoch": 2.45, "learning_rate": 4.389006342494715e-05, "loss": 0.5958, "step": 2318 }, { "epoch": 2.45, "learning_rate": 4.388477801268499e-05, "loss": 0.3932, "step": 2320 }, { "epoch": 2.45, "learning_rate": 4.387949260042283e-05, "loss": 0.5768, "step": 2322 }, { "epoch": 2.46, "learning_rate": 4.387420718816068e-05, "loss": 0.6017, "step": 2324 }, { "epoch": 2.46, "learning_rate": 4.3868921775898524e-05, "loss": 0.4225, "step": 2326 }, { "epoch": 2.46, "learning_rate": 4.386363636363637e-05, "loss": 0.5372, "step": 2328 }, { "epoch": 2.46, "learning_rate": 4.385835095137421e-05, "loss": 0.4131, "step": 2330 }, { "epoch": 2.47, "learning_rate": 4.3853065539112056e-05, "loss": 0.5359, "step": 2332 }, { "epoch": 2.47, "learning_rate": 4.3847780126849895e-05, "loss": 0.2422, "step": 2334 }, { "epoch": 2.47, "learning_rate": 4.384249471458774e-05, "loss": 0.3424, "step": 2336 }, { "epoch": 2.47, "learning_rate": 4.383720930232558e-05, "loss": 0.4612, "step": 2338 }, { "epoch": 2.47, "learning_rate": 4.3831923890063426e-05, "loss": 0.3877, "step": 2340 }, { "epoch": 2.48, "learning_rate": 4.3826638477801266e-05, "loss": 0.7339, "step": 2342 }, { "epoch": 2.48, "learning_rate": 4.382135306553912e-05, "loss": 0.4984, "step": 2344 }, { "epoch": 2.48, "learning_rate": 4.381606765327696e-05, "loss": 0.6876, "step": 2346 }, { "epoch": 2.48, "learning_rate": 4.3810782241014804e-05, "loss": 0.4758, "step": 2348 }, { "epoch": 2.48, "learning_rate": 4.380549682875264e-05, "loss": 0.324, "step": 2350 }, { "epoch": 2.49, "learning_rate": 4.380021141649049e-05, "loss": 0.4365, "step": 2352 }, { "epoch": 2.49, "learning_rate": 4.379492600422833e-05, "loss": 0.3447, "step": 2354 }, { "epoch": 2.49, "learning_rate": 4.3789640591966174e-05, "loss": 0.3975, "step": 2356 }, { "epoch": 2.49, "learning_rate": 4.378435517970402e-05, "loss": 0.3833, "step": 2358 }, { "epoch": 2.49, "learning_rate": 4.377906976744186e-05, "loss": 0.386, "step": 2360 }, { "epoch": 2.5, "learning_rate": 4.3773784355179706e-05, "loss": 0.5437, "step": 2362 }, { "epoch": 2.5, "learning_rate": 4.376849894291755e-05, "loss": 0.4142, "step": 2364 }, { "epoch": 2.5, "learning_rate": 4.37632135306554e-05, "loss": 0.4588, "step": 2366 }, { "epoch": 2.5, "learning_rate": 4.375792811839324e-05, "loss": 0.3383, "step": 2368 }, { "epoch": 2.51, "learning_rate": 4.375264270613108e-05, "loss": 0.3192, "step": 2370 }, { "epoch": 2.51, "learning_rate": 4.374735729386892e-05, "loss": 0.5349, "step": 2372 }, { "epoch": 2.51, "learning_rate": 4.374207188160677e-05, "loss": 0.2711, "step": 2374 }, { "epoch": 2.51, "learning_rate": 4.373678646934461e-05, "loss": 0.3692, "step": 2376 }, { "epoch": 2.51, "learning_rate": 4.3731501057082454e-05, "loss": 0.2674, "step": 2378 }, { "epoch": 2.52, "learning_rate": 4.372621564482029e-05, "loss": 0.3274, "step": 2380 }, { "epoch": 2.52, "learning_rate": 4.3720930232558146e-05, "loss": 0.4498, "step": 2382 }, { "epoch": 2.52, "learning_rate": 4.3715644820295985e-05, "loss": 0.4294, "step": 2384 }, { "epoch": 2.52, "learning_rate": 4.371035940803383e-05, "loss": 0.4257, "step": 2386 }, { "epoch": 2.52, "learning_rate": 4.370507399577167e-05, "loss": 0.4678, "step": 2388 }, { "epoch": 2.53, "learning_rate": 4.369978858350952e-05, "loss": 0.3635, "step": 2390 }, { "epoch": 2.53, "learning_rate": 4.3694503171247356e-05, "loss": 0.5947, "step": 2392 }, { "epoch": 2.53, "learning_rate": 4.36892177589852e-05, "loss": 0.4308, "step": 2394 }, { "epoch": 2.53, "learning_rate": 4.368393234672304e-05, "loss": 0.3585, "step": 2396 }, { "epoch": 2.53, "learning_rate": 4.367864693446089e-05, "loss": 0.844, "step": 2398 }, { "epoch": 2.54, "learning_rate": 4.3673361522198734e-05, "loss": 0.4748, "step": 2400 }, { "epoch": 2.54, "learning_rate": 4.366807610993658e-05, "loss": 0.3876, "step": 2402 }, { "epoch": 2.54, "learning_rate": 4.366279069767442e-05, "loss": 0.4596, "step": 2404 }, { "epoch": 2.54, "learning_rate": 4.3657505285412265e-05, "loss": 0.2919, "step": 2406 }, { "epoch": 2.55, "learning_rate": 4.3652219873150104e-05, "loss": 0.4279, "step": 2408 }, { "epoch": 2.55, "learning_rate": 4.364693446088795e-05, "loss": 0.3172, "step": 2410 }, { "epoch": 2.55, "learning_rate": 4.3641649048625796e-05, "loss": 0.3116, "step": 2412 }, { "epoch": 2.55, "learning_rate": 4.3636363636363636e-05, "loss": 0.4824, "step": 2414 }, { "epoch": 2.55, "learning_rate": 4.363107822410148e-05, "loss": 0.3576, "step": 2416 }, { "epoch": 2.56, "learning_rate": 4.362579281183933e-05, "loss": 0.28, "step": 2418 }, { "epoch": 2.56, "learning_rate": 4.3620507399577174e-05, "loss": 0.3368, "step": 2420 }, { "epoch": 2.56, "learning_rate": 4.361522198731501e-05, "loss": 0.399, "step": 2422 }, { "epoch": 2.56, "learning_rate": 4.360993657505286e-05, "loss": 0.3882, "step": 2424 }, { "epoch": 2.56, "learning_rate": 4.36046511627907e-05, "loss": 0.3533, "step": 2426 }, { "epoch": 2.57, "learning_rate": 4.3599365750528544e-05, "loss": 0.4115, "step": 2428 }, { "epoch": 2.57, "learning_rate": 4.3594080338266384e-05, "loss": 0.4398, "step": 2430 }, { "epoch": 2.57, "learning_rate": 4.358879492600423e-05, "loss": 0.3063, "step": 2432 }, { "epoch": 2.57, "learning_rate": 4.358350951374207e-05, "loss": 0.3138, "step": 2434 }, { "epoch": 2.58, "learning_rate": 4.357822410147992e-05, "loss": 0.3835, "step": 2436 }, { "epoch": 2.58, "learning_rate": 4.357293868921776e-05, "loss": 0.493, "step": 2438 }, { "epoch": 2.58, "learning_rate": 4.356765327695561e-05, "loss": 0.4659, "step": 2440 }, { "epoch": 2.58, "learning_rate": 4.3562367864693447e-05, "loss": 0.4226, "step": 2442 }, { "epoch": 2.58, "learning_rate": 4.355708245243129e-05, "loss": 0.3218, "step": 2444 }, { "epoch": 2.59, "learning_rate": 4.355179704016913e-05, "loss": 0.271, "step": 2446 }, { "epoch": 2.59, "learning_rate": 4.354651162790698e-05, "loss": 0.4356, "step": 2448 }, { "epoch": 2.59, "learning_rate": 4.354122621564482e-05, "loss": 0.4688, "step": 2450 }, { "epoch": 2.59, "learning_rate": 4.353594080338266e-05, "loss": 0.4016, "step": 2452 }, { "epoch": 2.59, "learning_rate": 4.353065539112051e-05, "loss": 0.4853, "step": 2454 }, { "epoch": 2.6, "learning_rate": 4.3525369978858355e-05, "loss": 0.4232, "step": 2456 }, { "epoch": 2.6, "learning_rate": 4.3520084566596195e-05, "loss": 0.2393, "step": 2458 }, { "epoch": 2.6, "learning_rate": 4.351479915433404e-05, "loss": 0.3287, "step": 2460 }, { "epoch": 2.6, "learning_rate": 4.350951374207189e-05, "loss": 0.3494, "step": 2462 }, { "epoch": 2.6, "learning_rate": 4.3504228329809726e-05, "loss": 0.5148, "step": 2464 }, { "epoch": 2.61, "learning_rate": 4.349894291754757e-05, "loss": 0.4271, "step": 2466 }, { "epoch": 2.61, "learning_rate": 4.349365750528541e-05, "loss": 0.2264, "step": 2468 }, { "epoch": 2.61, "learning_rate": 4.348837209302326e-05, "loss": 0.3973, "step": 2470 }, { "epoch": 2.61, "learning_rate": 4.3483086680761104e-05, "loss": 0.3798, "step": 2472 }, { "epoch": 2.62, "learning_rate": 4.347780126849895e-05, "loss": 0.4355, "step": 2474 }, { "epoch": 2.62, "learning_rate": 4.347251585623679e-05, "loss": 0.5765, "step": 2476 }, { "epoch": 2.62, "learning_rate": 4.3467230443974635e-05, "loss": 0.4032, "step": 2478 }, { "epoch": 2.62, "learning_rate": 4.3461945031712474e-05, "loss": 0.348, "step": 2480 }, { "epoch": 2.62, "learning_rate": 4.345665961945032e-05, "loss": 0.2763, "step": 2482 }, { "epoch": 2.63, "learning_rate": 4.345137420718816e-05, "loss": 0.4957, "step": 2484 }, { "epoch": 2.63, "learning_rate": 4.3446088794926006e-05, "loss": 0.3058, "step": 2486 }, { "epoch": 2.63, "learning_rate": 4.3440803382663845e-05, "loss": 0.4072, "step": 2488 }, { "epoch": 2.63, "learning_rate": 4.34355179704017e-05, "loss": 0.3913, "step": 2490 }, { "epoch": 2.63, "learning_rate": 4.343023255813954e-05, "loss": 0.2427, "step": 2492 }, { "epoch": 2.64, "learning_rate": 4.342494714587738e-05, "loss": 0.3805, "step": 2494 }, { "epoch": 2.64, "learning_rate": 4.341966173361522e-05, "loss": 0.3677, "step": 2496 }, { "epoch": 2.64, "learning_rate": 4.341437632135307e-05, "loss": 0.3233, "step": 2498 }, { "epoch": 2.64, "learning_rate": 4.340909090909091e-05, "loss": 0.3522, "step": 2500 }, { "epoch": 2.64, "eval_cer": 0.0402393844400114, "eval_loss": 0.47415897250175476, "eval_runtime": 131.0076, "eval_samples_per_second": 6.419, "eval_steps_per_second": 0.809, "step": 2500 }, { "epoch": 2.64, "learning_rate": 4.3403805496828754e-05, "loss": 0.258, "step": 2502 }, { "epoch": 2.65, "learning_rate": 4.339852008456659e-05, "loss": 0.3387, "step": 2504 }, { "epoch": 2.65, "learning_rate": 4.339323467230444e-05, "loss": 0.3951, "step": 2506 }, { "epoch": 2.65, "learning_rate": 4.3387949260042285e-05, "loss": 0.5066, "step": 2508 }, { "epoch": 2.65, "learning_rate": 4.338266384778013e-05, "loss": 0.3595, "step": 2510 }, { "epoch": 2.66, "learning_rate": 4.337737843551797e-05, "loss": 0.3599, "step": 2512 }, { "epoch": 2.66, "learning_rate": 4.337209302325582e-05, "loss": 0.3667, "step": 2514 }, { "epoch": 2.66, "learning_rate": 4.336680761099366e-05, "loss": 0.4277, "step": 2516 }, { "epoch": 2.66, "learning_rate": 4.33615221987315e-05, "loss": 0.5447, "step": 2518 }, { "epoch": 2.66, "learning_rate": 4.335623678646935e-05, "loss": 0.339, "step": 2520 }, { "epoch": 2.67, "learning_rate": 4.335095137420719e-05, "loss": 0.4673, "step": 2522 }, { "epoch": 2.67, "learning_rate": 4.334566596194503e-05, "loss": 0.408, "step": 2524 }, { "epoch": 2.67, "learning_rate": 4.334038054968288e-05, "loss": 0.3587, "step": 2526 }, { "epoch": 2.67, "learning_rate": 4.3335095137420725e-05, "loss": 0.464, "step": 2528 }, { "epoch": 2.67, "learning_rate": 4.3329809725158565e-05, "loss": 0.3021, "step": 2530 }, { "epoch": 2.68, "learning_rate": 4.332452431289641e-05, "loss": 0.4235, "step": 2532 }, { "epoch": 2.68, "learning_rate": 4.331923890063425e-05, "loss": 0.413, "step": 2534 }, { "epoch": 2.68, "learning_rate": 4.3313953488372096e-05, "loss": 0.3187, "step": 2536 }, { "epoch": 2.68, "learning_rate": 4.3308668076109935e-05, "loss": 0.4356, "step": 2538 }, { "epoch": 2.68, "learning_rate": 4.330338266384778e-05, "loss": 0.4303, "step": 2540 }, { "epoch": 2.69, "learning_rate": 4.329809725158562e-05, "loss": 0.4085, "step": 2542 }, { "epoch": 2.69, "learning_rate": 4.3292811839323474e-05, "loss": 0.4981, "step": 2544 }, { "epoch": 2.69, "learning_rate": 4.328752642706131e-05, "loss": 0.6279, "step": 2546 }, { "epoch": 2.69, "learning_rate": 4.328224101479916e-05, "loss": 0.5337, "step": 2548 }, { "epoch": 2.7, "learning_rate": 4.3276955602537e-05, "loss": 0.3601, "step": 2550 }, { "epoch": 2.7, "learning_rate": 4.3271670190274844e-05, "loss": 0.4367, "step": 2552 }, { "epoch": 2.7, "learning_rate": 4.3266384778012684e-05, "loss": 0.276, "step": 2554 }, { "epoch": 2.7, "learning_rate": 4.326109936575053e-05, "loss": 0.3276, "step": 2556 }, { "epoch": 2.7, "learning_rate": 4.325581395348837e-05, "loss": 0.4869, "step": 2558 }, { "epoch": 2.71, "learning_rate": 4.3250528541226215e-05, "loss": 0.468, "step": 2560 }, { "epoch": 2.71, "learning_rate": 4.324524312896406e-05, "loss": 0.3476, "step": 2562 }, { "epoch": 2.71, "learning_rate": 4.323995771670191e-05, "loss": 0.4213, "step": 2564 }, { "epoch": 2.71, "learning_rate": 4.3234672304439746e-05, "loss": 0.2384, "step": 2566 }, { "epoch": 2.71, "learning_rate": 4.322938689217759e-05, "loss": 0.4982, "step": 2568 }, { "epoch": 2.72, "learning_rate": 4.322410147991544e-05, "loss": 0.372, "step": 2570 }, { "epoch": 2.72, "learning_rate": 4.321881606765328e-05, "loss": 0.3857, "step": 2572 }, { "epoch": 2.72, "learning_rate": 4.3213530655391124e-05, "loss": 0.3346, "step": 2574 }, { "epoch": 2.72, "learning_rate": 4.320824524312896e-05, "loss": 0.4945, "step": 2576 }, { "epoch": 2.73, "learning_rate": 4.320295983086681e-05, "loss": 0.4013, "step": 2578 }, { "epoch": 2.73, "learning_rate": 4.3197674418604655e-05, "loss": 0.2965, "step": 2580 }, { "epoch": 2.73, "learning_rate": 4.31923890063425e-05, "loss": 0.3815, "step": 2582 }, { "epoch": 2.73, "learning_rate": 4.318710359408034e-05, "loss": 0.2204, "step": 2584 }, { "epoch": 2.73, "learning_rate": 4.318181818181819e-05, "loss": 0.3671, "step": 2586 }, { "epoch": 2.74, "learning_rate": 4.3176532769556026e-05, "loss": 0.5415, "step": 2588 }, { "epoch": 2.74, "learning_rate": 4.317124735729387e-05, "loss": 0.3768, "step": 2590 }, { "epoch": 2.74, "learning_rate": 4.316596194503171e-05, "loss": 0.377, "step": 2592 }, { "epoch": 2.74, "learning_rate": 4.316067653276956e-05, "loss": 0.5208, "step": 2594 }, { "epoch": 2.74, "learning_rate": 4.31553911205074e-05, "loss": 0.3114, "step": 2596 }, { "epoch": 2.75, "learning_rate": 4.315010570824525e-05, "loss": 0.2648, "step": 2598 }, { "epoch": 2.75, "learning_rate": 4.314482029598309e-05, "loss": 0.2037, "step": 2600 }, { "epoch": 2.75, "learning_rate": 4.3139534883720935e-05, "loss": 0.2728, "step": 2602 }, { "epoch": 2.75, "learning_rate": 4.3134249471458774e-05, "loss": 0.5251, "step": 2604 }, { "epoch": 2.75, "learning_rate": 4.312896405919662e-05, "loss": 0.4707, "step": 2606 }, { "epoch": 2.76, "learning_rate": 4.312367864693446e-05, "loss": 0.2391, "step": 2608 }, { "epoch": 2.76, "learning_rate": 4.3118393234672305e-05, "loss": 0.3286, "step": 2610 }, { "epoch": 2.76, "learning_rate": 4.3113107822410145e-05, "loss": 0.2599, "step": 2612 }, { "epoch": 2.76, "learning_rate": 4.310782241014799e-05, "loss": 0.5016, "step": 2614 }, { "epoch": 2.77, "learning_rate": 4.310253699788584e-05, "loss": 0.4022, "step": 2616 }, { "epoch": 2.77, "learning_rate": 4.309725158562368e-05, "loss": 0.4263, "step": 2618 }, { "epoch": 2.77, "learning_rate": 4.309196617336152e-05, "loss": 0.4461, "step": 2620 }, { "epoch": 2.77, "learning_rate": 4.308668076109937e-05, "loss": 0.3174, "step": 2622 }, { "epoch": 2.77, "learning_rate": 4.3081395348837214e-05, "loss": 0.4512, "step": 2624 }, { "epoch": 2.78, "learning_rate": 4.3076109936575054e-05, "loss": 0.3774, "step": 2626 }, { "epoch": 2.78, "learning_rate": 4.30708245243129e-05, "loss": 0.3699, "step": 2628 }, { "epoch": 2.78, "learning_rate": 4.306553911205074e-05, "loss": 0.4952, "step": 2630 }, { "epoch": 2.78, "learning_rate": 4.3060253699788585e-05, "loss": 0.3127, "step": 2632 }, { "epoch": 2.78, "learning_rate": 4.305496828752643e-05, "loss": 0.3005, "step": 2634 }, { "epoch": 2.79, "learning_rate": 4.304968287526428e-05, "loss": 0.5216, "step": 2636 }, { "epoch": 2.79, "learning_rate": 4.3044397463002116e-05, "loss": 0.1992, "step": 2638 }, { "epoch": 2.79, "learning_rate": 4.303911205073996e-05, "loss": 0.3832, "step": 2640 }, { "epoch": 2.79, "learning_rate": 4.30338266384778e-05, "loss": 0.3892, "step": 2642 }, { "epoch": 2.79, "learning_rate": 4.302854122621565e-05, "loss": 0.4152, "step": 2644 }, { "epoch": 2.8, "learning_rate": 4.302325581395349e-05, "loss": 0.3142, "step": 2646 }, { "epoch": 2.8, "learning_rate": 4.301797040169133e-05, "loss": 0.381, "step": 2648 }, { "epoch": 2.8, "learning_rate": 4.301268498942917e-05, "loss": 0.4858, "step": 2650 }, { "epoch": 2.8, "learning_rate": 4.3007399577167025e-05, "loss": 0.4228, "step": 2652 }, { "epoch": 2.81, "learning_rate": 4.3002114164904865e-05, "loss": 0.3344, "step": 2654 }, { "epoch": 2.81, "learning_rate": 4.299682875264271e-05, "loss": 0.3139, "step": 2656 }, { "epoch": 2.81, "learning_rate": 4.299154334038055e-05, "loss": 0.4021, "step": 2658 }, { "epoch": 2.81, "learning_rate": 4.2986257928118396e-05, "loss": 0.2641, "step": 2660 }, { "epoch": 2.81, "learning_rate": 4.2980972515856235e-05, "loss": 0.2865, "step": 2662 }, { "epoch": 2.82, "learning_rate": 4.297568710359408e-05, "loss": 0.4632, "step": 2664 }, { "epoch": 2.82, "learning_rate": 4.297040169133193e-05, "loss": 0.4016, "step": 2666 }, { "epoch": 2.82, "learning_rate": 4.296511627906977e-05, "loss": 0.3713, "step": 2668 }, { "epoch": 2.82, "learning_rate": 4.295983086680761e-05, "loss": 0.5221, "step": 2670 }, { "epoch": 2.82, "learning_rate": 4.295454545454546e-05, "loss": 0.398, "step": 2672 }, { "epoch": 2.83, "learning_rate": 4.29492600422833e-05, "loss": 0.4723, "step": 2674 }, { "epoch": 2.83, "learning_rate": 4.2943974630021144e-05, "loss": 0.441, "step": 2676 }, { "epoch": 2.83, "learning_rate": 4.293868921775899e-05, "loss": 0.3097, "step": 2678 }, { "epoch": 2.83, "learning_rate": 4.293340380549683e-05, "loss": 0.4571, "step": 2680 }, { "epoch": 2.84, "learning_rate": 4.2928118393234676e-05, "loss": 0.2796, "step": 2682 }, { "epoch": 2.84, "learning_rate": 4.2922832980972515e-05, "loss": 0.2998, "step": 2684 }, { "epoch": 2.84, "learning_rate": 4.291754756871036e-05, "loss": 0.3923, "step": 2686 }, { "epoch": 2.84, "learning_rate": 4.291226215644821e-05, "loss": 0.2452, "step": 2688 }, { "epoch": 2.84, "learning_rate": 4.290697674418605e-05, "loss": 0.4391, "step": 2690 }, { "epoch": 2.85, "learning_rate": 4.290169133192389e-05, "loss": 0.4308, "step": 2692 }, { "epoch": 2.85, "learning_rate": 4.289640591966174e-05, "loss": 0.2987, "step": 2694 }, { "epoch": 2.85, "learning_rate": 4.289112050739958e-05, "loss": 0.6786, "step": 2696 }, { "epoch": 2.85, "learning_rate": 4.2885835095137424e-05, "loss": 0.645, "step": 2698 }, { "epoch": 2.85, "learning_rate": 4.288054968287526e-05, "loss": 0.6325, "step": 2700 }, { "epoch": 2.86, "learning_rate": 4.287526427061311e-05, "loss": 0.3546, "step": 2702 }, { "epoch": 2.86, "learning_rate": 4.286997885835095e-05, "loss": 0.3041, "step": 2704 }, { "epoch": 2.86, "learning_rate": 4.28646934460888e-05, "loss": 0.3767, "step": 2706 }, { "epoch": 2.86, "learning_rate": 4.285940803382664e-05, "loss": 0.3425, "step": 2708 }, { "epoch": 2.86, "learning_rate": 4.2854122621564486e-05, "loss": 0.3014, "step": 2710 }, { "epoch": 2.87, "learning_rate": 4.2848837209302326e-05, "loss": 0.3421, "step": 2712 }, { "epoch": 2.87, "learning_rate": 4.284355179704017e-05, "loss": 0.3349, "step": 2714 }, { "epoch": 2.87, "learning_rate": 4.283826638477801e-05, "loss": 0.2842, "step": 2716 }, { "epoch": 2.87, "learning_rate": 4.283298097251586e-05, "loss": 0.4648, "step": 2718 }, { "epoch": 2.88, "learning_rate": 4.28276955602537e-05, "loss": 0.2628, "step": 2720 }, { "epoch": 2.88, "learning_rate": 4.282241014799154e-05, "loss": 0.3664, "step": 2722 }, { "epoch": 2.88, "learning_rate": 4.281712473572939e-05, "loss": 0.4732, "step": 2724 }, { "epoch": 2.88, "learning_rate": 4.2811839323467235e-05, "loss": 0.3712, "step": 2726 }, { "epoch": 2.88, "learning_rate": 4.2806553911205074e-05, "loss": 0.4412, "step": 2728 }, { "epoch": 2.89, "learning_rate": 4.280126849894292e-05, "loss": 0.2067, "step": 2730 }, { "epoch": 2.89, "learning_rate": 4.2795983086680766e-05, "loss": 0.3256, "step": 2732 }, { "epoch": 2.89, "learning_rate": 4.2790697674418605e-05, "loss": 0.2648, "step": 2734 }, { "epoch": 2.89, "learning_rate": 4.278541226215645e-05, "loss": 0.2814, "step": 2736 }, { "epoch": 2.89, "learning_rate": 4.278012684989429e-05, "loss": 0.4367, "step": 2738 }, { "epoch": 2.9, "learning_rate": 4.277484143763214e-05, "loss": 0.3559, "step": 2740 }, { "epoch": 2.9, "learning_rate": 4.276955602536998e-05, "loss": 0.6629, "step": 2742 }, { "epoch": 2.9, "learning_rate": 4.276427061310783e-05, "loss": 0.5756, "step": 2744 }, { "epoch": 2.9, "learning_rate": 4.275898520084567e-05, "loss": 0.337, "step": 2746 }, { "epoch": 2.9, "learning_rate": 4.2753699788583514e-05, "loss": 0.577, "step": 2748 }, { "epoch": 2.91, "learning_rate": 4.2748414376321353e-05, "loss": 0.4437, "step": 2750 }, { "epoch": 2.91, "learning_rate": 4.27431289640592e-05, "loss": 0.2032, "step": 2752 }, { "epoch": 2.91, "learning_rate": 4.273784355179704e-05, "loss": 0.4406, "step": 2754 }, { "epoch": 2.91, "learning_rate": 4.2732558139534885e-05, "loss": 0.3424, "step": 2756 }, { "epoch": 2.92, "learning_rate": 4.2727272727272724e-05, "loss": 0.4671, "step": 2758 }, { "epoch": 2.92, "learning_rate": 4.272198731501058e-05, "loss": 0.3583, "step": 2760 }, { "epoch": 2.92, "learning_rate": 4.2716701902748416e-05, "loss": 0.5143, "step": 2762 }, { "epoch": 2.92, "learning_rate": 4.271141649048626e-05, "loss": 0.4685, "step": 2764 }, { "epoch": 2.92, "learning_rate": 4.27061310782241e-05, "loss": 0.7, "step": 2766 }, { "epoch": 2.93, "learning_rate": 4.270084566596195e-05, "loss": 0.5151, "step": 2768 }, { "epoch": 2.93, "learning_rate": 4.269556025369979e-05, "loss": 0.4323, "step": 2770 }, { "epoch": 2.93, "learning_rate": 4.269027484143763e-05, "loss": 0.4289, "step": 2772 }, { "epoch": 2.93, "learning_rate": 4.268498942917548e-05, "loss": 0.2696, "step": 2774 }, { "epoch": 2.93, "learning_rate": 4.267970401691332e-05, "loss": 0.649, "step": 2776 }, { "epoch": 2.94, "learning_rate": 4.2674418604651164e-05, "loss": 0.3215, "step": 2778 }, { "epoch": 2.94, "learning_rate": 4.266913319238901e-05, "loss": 0.3971, "step": 2780 }, { "epoch": 2.94, "learning_rate": 4.2663847780126857e-05, "loss": 0.4493, "step": 2782 }, { "epoch": 2.94, "learning_rate": 4.2658562367864696e-05, "loss": 0.523, "step": 2784 }, { "epoch": 2.95, "learning_rate": 4.265327695560254e-05, "loss": 0.3435, "step": 2786 }, { "epoch": 2.95, "learning_rate": 4.264799154334038e-05, "loss": 0.2757, "step": 2788 }, { "epoch": 2.95, "learning_rate": 4.264270613107823e-05, "loss": 0.4671, "step": 2790 }, { "epoch": 2.95, "learning_rate": 4.2637420718816066e-05, "loss": 0.3897, "step": 2792 }, { "epoch": 2.95, "learning_rate": 4.263213530655391e-05, "loss": 0.2445, "step": 2794 }, { "epoch": 2.96, "learning_rate": 4.262684989429176e-05, "loss": 0.3071, "step": 2796 }, { "epoch": 2.96, "learning_rate": 4.2621564482029605e-05, "loss": 0.3959, "step": 2798 }, { "epoch": 2.96, "learning_rate": 4.2616279069767444e-05, "loss": 0.3327, "step": 2800 }, { "epoch": 2.96, "learning_rate": 4.261099365750529e-05, "loss": 0.3544, "step": 2802 }, { "epoch": 2.96, "learning_rate": 4.260570824524313e-05, "loss": 0.4166, "step": 2804 }, { "epoch": 2.97, "learning_rate": 4.2600422832980975e-05, "loss": 0.3214, "step": 2806 }, { "epoch": 2.97, "learning_rate": 4.2595137420718815e-05, "loss": 0.5656, "step": 2808 }, { "epoch": 2.97, "learning_rate": 4.258985200845666e-05, "loss": 0.3529, "step": 2810 }, { "epoch": 2.97, "learning_rate": 4.25845665961945e-05, "loss": 0.5777, "step": 2812 }, { "epoch": 2.97, "learning_rate": 4.257928118393235e-05, "loss": 0.3313, "step": 2814 }, { "epoch": 2.98, "learning_rate": 4.257399577167019e-05, "loss": 0.3668, "step": 2816 }, { "epoch": 2.98, "learning_rate": 4.256871035940804e-05, "loss": 0.3936, "step": 2818 }, { "epoch": 2.98, "learning_rate": 4.256342494714588e-05, "loss": 0.3968, "step": 2820 }, { "epoch": 2.98, "learning_rate": 4.2558139534883724e-05, "loss": 0.6001, "step": 2822 }, { "epoch": 2.99, "learning_rate": 4.255285412262156e-05, "loss": 0.4649, "step": 2824 }, { "epoch": 2.99, "learning_rate": 4.254756871035941e-05, "loss": 0.3453, "step": 2826 }, { "epoch": 2.99, "learning_rate": 4.2542283298097255e-05, "loss": 0.4322, "step": 2828 }, { "epoch": 2.99, "learning_rate": 4.2536997885835094e-05, "loss": 0.4257, "step": 2830 }, { "epoch": 2.99, "learning_rate": 4.253171247357294e-05, "loss": 0.2827, "step": 2832 }, { "epoch": 3.0, "learning_rate": 4.2526427061310786e-05, "loss": 0.4138, "step": 2834 }, { "epoch": 3.0, "learning_rate": 4.252114164904863e-05, "loss": 0.3991, "step": 2836 }, { "epoch": 3.0, "learning_rate": 4.251585623678647e-05, "loss": 0.3762, "step": 2838 }, { "epoch": 3.0, "learning_rate": 4.251057082452432e-05, "loss": 0.3813, "step": 2840 }, { "epoch": 3.0, "learning_rate": 4.250528541226216e-05, "loss": 0.4144, "step": 2842 }, { "epoch": 3.01, "learning_rate": 4.25e-05, "loss": 0.3277, "step": 2844 }, { "epoch": 3.01, "learning_rate": 4.249471458773784e-05, "loss": 0.4051, "step": 2846 }, { "epoch": 3.01, "learning_rate": 4.248942917547569e-05, "loss": 0.4003, "step": 2848 }, { "epoch": 3.01, "learning_rate": 4.2484143763213534e-05, "loss": 0.3624, "step": 2850 }, { "epoch": 3.01, "learning_rate": 4.247885835095138e-05, "loss": 0.4119, "step": 2852 }, { "epoch": 3.02, "learning_rate": 4.247357293868922e-05, "loss": 0.6358, "step": 2854 }, { "epoch": 3.02, "learning_rate": 4.2468287526427066e-05, "loss": 0.5912, "step": 2856 }, { "epoch": 3.02, "learning_rate": 4.2463002114164905e-05, "loss": 0.295, "step": 2858 }, { "epoch": 3.02, "learning_rate": 4.245771670190275e-05, "loss": 0.3205, "step": 2860 }, { "epoch": 3.03, "learning_rate": 4.245243128964059e-05, "loss": 0.3077, "step": 2862 }, { "epoch": 3.03, "learning_rate": 4.2447145877378437e-05, "loss": 0.259, "step": 2864 }, { "epoch": 3.03, "learning_rate": 4.2441860465116276e-05, "loss": 0.4562, "step": 2866 }, { "epoch": 3.03, "learning_rate": 4.243657505285413e-05, "loss": 0.2321, "step": 2868 }, { "epoch": 3.03, "learning_rate": 4.243128964059197e-05, "loss": 0.539, "step": 2870 }, { "epoch": 3.04, "learning_rate": 4.2426004228329814e-05, "loss": 0.3658, "step": 2872 }, { "epoch": 3.04, "learning_rate": 4.242071881606765e-05, "loss": 0.2908, "step": 2874 }, { "epoch": 3.04, "learning_rate": 4.24154334038055e-05, "loss": 0.262, "step": 2876 }, { "epoch": 3.04, "learning_rate": 4.241014799154334e-05, "loss": 0.3137, "step": 2878 }, { "epoch": 3.04, "learning_rate": 4.2404862579281185e-05, "loss": 0.2519, "step": 2880 }, { "epoch": 3.05, "learning_rate": 4.239957716701903e-05, "loss": 0.3046, "step": 2882 }, { "epoch": 3.05, "learning_rate": 4.239429175475687e-05, "loss": 0.5082, "step": 2884 }, { "epoch": 3.05, "learning_rate": 4.2389006342494716e-05, "loss": 0.2263, "step": 2886 }, { "epoch": 3.05, "learning_rate": 4.238372093023256e-05, "loss": 0.4031, "step": 2888 }, { "epoch": 3.05, "learning_rate": 4.237843551797041e-05, "loss": 0.3983, "step": 2890 }, { "epoch": 3.06, "learning_rate": 4.237315010570825e-05, "loss": 0.2879, "step": 2892 }, { "epoch": 3.06, "learning_rate": 4.2367864693446094e-05, "loss": 0.2856, "step": 2894 }, { "epoch": 3.06, "learning_rate": 4.236257928118393e-05, "loss": 0.2928, "step": 2896 }, { "epoch": 3.06, "learning_rate": 4.235729386892178e-05, "loss": 0.2686, "step": 2898 }, { "epoch": 3.07, "learning_rate": 4.235200845665962e-05, "loss": 0.4004, "step": 2900 }, { "epoch": 3.07, "learning_rate": 4.2346723044397464e-05, "loss": 0.455, "step": 2902 }, { "epoch": 3.07, "learning_rate": 4.234143763213531e-05, "loss": 0.5315, "step": 2904 }, { "epoch": 3.07, "learning_rate": 4.2336152219873156e-05, "loss": 0.3951, "step": 2906 }, { "epoch": 3.07, "learning_rate": 4.2330866807610996e-05, "loss": 0.3007, "step": 2908 }, { "epoch": 3.08, "learning_rate": 4.232558139534884e-05, "loss": 0.3168, "step": 2910 }, { "epoch": 3.08, "learning_rate": 4.232029598308668e-05, "loss": 0.3028, "step": 2912 }, { "epoch": 3.08, "learning_rate": 4.231501057082453e-05, "loss": 0.1548, "step": 2914 }, { "epoch": 3.08, "learning_rate": 4.2309725158562366e-05, "loss": 0.2496, "step": 2916 }, { "epoch": 3.08, "learning_rate": 4.230443974630021e-05, "loss": 0.4231, "step": 2918 }, { "epoch": 3.09, "learning_rate": 4.229915433403805e-05, "loss": 0.2385, "step": 2920 }, { "epoch": 3.09, "learning_rate": 4.2293868921775904e-05, "loss": 0.342, "step": 2922 }, { "epoch": 3.09, "learning_rate": 4.2288583509513744e-05, "loss": 0.2755, "step": 2924 }, { "epoch": 3.09, "learning_rate": 4.228329809725159e-05, "loss": 0.2918, "step": 2926 }, { "epoch": 3.1, "learning_rate": 4.227801268498943e-05, "loss": 0.3105, "step": 2928 }, { "epoch": 3.1, "learning_rate": 4.2272727272727275e-05, "loss": 0.1969, "step": 2930 }, { "epoch": 3.1, "learning_rate": 4.226744186046512e-05, "loss": 0.3114, "step": 2932 }, { "epoch": 3.1, "learning_rate": 4.226215644820296e-05, "loss": 0.2758, "step": 2934 }, { "epoch": 3.1, "learning_rate": 4.2256871035940807e-05, "loss": 0.2677, "step": 2936 }, { "epoch": 3.11, "learning_rate": 4.2251585623678646e-05, "loss": 0.4188, "step": 2938 }, { "epoch": 3.11, "learning_rate": 4.224630021141649e-05, "loss": 0.2955, "step": 2940 }, { "epoch": 3.11, "learning_rate": 4.224101479915434e-05, "loss": 0.3511, "step": 2942 }, { "epoch": 3.11, "learning_rate": 4.2235729386892184e-05, "loss": 0.2193, "step": 2944 }, { "epoch": 3.11, "learning_rate": 4.223044397463002e-05, "loss": 0.2371, "step": 2946 }, { "epoch": 3.12, "learning_rate": 4.222515856236787e-05, "loss": 0.3929, "step": 2948 }, { "epoch": 3.12, "learning_rate": 4.221987315010571e-05, "loss": 0.4396, "step": 2950 }, { "epoch": 3.12, "learning_rate": 4.2214587737843555e-05, "loss": 0.5568, "step": 2952 }, { "epoch": 3.12, "learning_rate": 4.2209302325581394e-05, "loss": 0.4471, "step": 2954 }, { "epoch": 3.12, "learning_rate": 4.220401691331924e-05, "loss": 0.4105, "step": 2956 }, { "epoch": 3.13, "learning_rate": 4.2198731501057086e-05, "loss": 0.5462, "step": 2958 }, { "epoch": 3.13, "learning_rate": 4.219344608879493e-05, "loss": 0.5864, "step": 2960 }, { "epoch": 3.13, "learning_rate": 4.218816067653277e-05, "loss": 0.5403, "step": 2962 }, { "epoch": 3.13, "learning_rate": 4.218287526427062e-05, "loss": 0.611, "step": 2964 }, { "epoch": 3.14, "learning_rate": 4.217758985200846e-05, "loss": 0.4517, "step": 2966 }, { "epoch": 3.14, "learning_rate": 4.21723044397463e-05, "loss": 0.3851, "step": 2968 }, { "epoch": 3.14, "learning_rate": 4.216701902748414e-05, "loss": 0.3653, "step": 2970 }, { "epoch": 3.14, "learning_rate": 4.216173361522199e-05, "loss": 0.4052, "step": 2972 }, { "epoch": 3.14, "learning_rate": 4.215644820295983e-05, "loss": 0.3847, "step": 2974 }, { "epoch": 3.15, "learning_rate": 4.215116279069768e-05, "loss": 0.2637, "step": 2976 }, { "epoch": 3.15, "learning_rate": 4.214587737843552e-05, "loss": 0.3769, "step": 2978 }, { "epoch": 3.15, "learning_rate": 4.2140591966173366e-05, "loss": 0.3022, "step": 2980 }, { "epoch": 3.15, "learning_rate": 4.2135306553911205e-05, "loss": 0.3418, "step": 2982 }, { "epoch": 3.15, "learning_rate": 4.213002114164905e-05, "loss": 0.2043, "step": 2984 }, { "epoch": 3.16, "learning_rate": 4.21247357293869e-05, "loss": 0.263, "step": 2986 }, { "epoch": 3.16, "learning_rate": 4.2119450317124736e-05, "loss": 0.2247, "step": 2988 }, { "epoch": 3.16, "learning_rate": 4.211416490486258e-05, "loss": 0.2779, "step": 2990 }, { "epoch": 3.16, "learning_rate": 4.210887949260042e-05, "loss": 0.274, "step": 2992 }, { "epoch": 3.16, "learning_rate": 4.210359408033827e-05, "loss": 0.2576, "step": 2994 }, { "epoch": 3.17, "learning_rate": 4.2098308668076114e-05, "loss": 0.4279, "step": 2996 }, { "epoch": 3.17, "learning_rate": 4.209302325581396e-05, "loss": 0.2541, "step": 2998 }, { "epoch": 3.17, "learning_rate": 4.20877378435518e-05, "loss": 0.3729, "step": 3000 }, { "epoch": 3.17, "eval_cer": 0.048674836135651184, "eval_loss": 0.49270230531692505, "eval_runtime": 130.1947, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.814, "step": 3000 }, { "epoch": 3.17, "learning_rate": 4.2082452431289645e-05, "loss": 0.3319, "step": 3002 }, { "epoch": 3.18, "learning_rate": 4.2077167019027485e-05, "loss": 0.3086, "step": 3004 }, { "epoch": 3.18, "learning_rate": 4.207188160676533e-05, "loss": 0.3868, "step": 3006 }, { "epoch": 3.18, "learning_rate": 4.206659619450317e-05, "loss": 0.3581, "step": 3008 }, { "epoch": 3.18, "learning_rate": 4.2061310782241016e-05, "loss": 0.2487, "step": 3010 }, { "epoch": 3.18, "learning_rate": 4.205602536997886e-05, "loss": 0.38, "step": 3012 }, { "epoch": 3.19, "learning_rate": 4.205073995771671e-05, "loss": 0.4104, "step": 3014 }, { "epoch": 3.19, "learning_rate": 4.204545454545455e-05, "loss": 0.251, "step": 3016 }, { "epoch": 3.19, "learning_rate": 4.204016913319239e-05, "loss": 0.2777, "step": 3018 }, { "epoch": 3.19, "learning_rate": 4.203488372093023e-05, "loss": 0.2662, "step": 3020 }, { "epoch": 3.19, "learning_rate": 4.202959830866808e-05, "loss": 0.2533, "step": 3022 }, { "epoch": 3.2, "learning_rate": 4.202431289640592e-05, "loss": 0.3681, "step": 3024 }, { "epoch": 3.2, "learning_rate": 4.2019027484143764e-05, "loss": 0.4595, "step": 3026 }, { "epoch": 3.2, "learning_rate": 4.20137420718816e-05, "loss": 0.358, "step": 3028 }, { "epoch": 3.2, "learning_rate": 4.2008456659619456e-05, "loss": 0.2452, "step": 3030 }, { "epoch": 3.21, "learning_rate": 4.2003171247357295e-05, "loss": 0.5741, "step": 3032 }, { "epoch": 3.21, "learning_rate": 4.199788583509514e-05, "loss": 0.652, "step": 3034 }, { "epoch": 3.21, "learning_rate": 4.199260042283298e-05, "loss": 0.2136, "step": 3036 }, { "epoch": 3.21, "learning_rate": 4.198731501057083e-05, "loss": 0.3405, "step": 3038 }, { "epoch": 3.21, "learning_rate": 4.198202959830867e-05, "loss": 0.5211, "step": 3040 }, { "epoch": 3.22, "learning_rate": 4.197674418604651e-05, "loss": 0.2128, "step": 3042 }, { "epoch": 3.22, "learning_rate": 4.197145877378436e-05, "loss": 0.3436, "step": 3044 }, { "epoch": 3.22, "learning_rate": 4.19661733615222e-05, "loss": 0.3668, "step": 3046 }, { "epoch": 3.22, "learning_rate": 4.1960887949260044e-05, "loss": 0.2641, "step": 3048 }, { "epoch": 3.22, "learning_rate": 4.195560253699789e-05, "loss": 0.2484, "step": 3050 }, { "epoch": 3.23, "learning_rate": 4.1950317124735736e-05, "loss": 0.3322, "step": 3052 }, { "epoch": 3.23, "learning_rate": 4.1945031712473575e-05, "loss": 0.3741, "step": 3054 }, { "epoch": 3.23, "learning_rate": 4.193974630021142e-05, "loss": 0.5316, "step": 3056 }, { "epoch": 3.23, "learning_rate": 4.193446088794926e-05, "loss": 0.2144, "step": 3058 }, { "epoch": 3.23, "learning_rate": 4.1929175475687106e-05, "loss": 0.4243, "step": 3060 }, { "epoch": 3.24, "learning_rate": 4.1923890063424946e-05, "loss": 0.3349, "step": 3062 }, { "epoch": 3.24, "learning_rate": 4.191860465116279e-05, "loss": 0.3093, "step": 3064 }, { "epoch": 3.24, "learning_rate": 4.191331923890063e-05, "loss": 0.2343, "step": 3066 }, { "epoch": 3.24, "learning_rate": 4.1908033826638484e-05, "loss": 0.2864, "step": 3068 }, { "epoch": 3.25, "learning_rate": 4.190274841437632e-05, "loss": 0.2337, "step": 3070 }, { "epoch": 3.25, "learning_rate": 4.189746300211417e-05, "loss": 0.333, "step": 3072 }, { "epoch": 3.25, "learning_rate": 4.189217758985201e-05, "loss": 0.2783, "step": 3074 }, { "epoch": 3.25, "learning_rate": 4.1886892177589855e-05, "loss": 0.3432, "step": 3076 }, { "epoch": 3.25, "learning_rate": 4.1881606765327694e-05, "loss": 0.3868, "step": 3078 }, { "epoch": 3.26, "learning_rate": 4.187632135306554e-05, "loss": 0.3249, "step": 3080 }, { "epoch": 3.26, "learning_rate": 4.187103594080338e-05, "loss": 0.3919, "step": 3082 }, { "epoch": 3.26, "learning_rate": 4.1865750528541225e-05, "loss": 0.2175, "step": 3084 }, { "epoch": 3.26, "learning_rate": 4.186046511627907e-05, "loss": 0.3022, "step": 3086 }, { "epoch": 3.26, "learning_rate": 4.185517970401692e-05, "loss": 0.2926, "step": 3088 }, { "epoch": 3.27, "learning_rate": 4.184989429175476e-05, "loss": 0.2434, "step": 3090 }, { "epoch": 3.27, "learning_rate": 4.18446088794926e-05, "loss": 0.2615, "step": 3092 }, { "epoch": 3.27, "learning_rate": 4.183932346723045e-05, "loss": 0.3064, "step": 3094 }, { "epoch": 3.27, "learning_rate": 4.183403805496829e-05, "loss": 0.281, "step": 3096 }, { "epoch": 3.27, "learning_rate": 4.1828752642706134e-05, "loss": 0.2042, "step": 3098 }, { "epoch": 3.28, "learning_rate": 4.182346723044397e-05, "loss": 0.4044, "step": 3100 }, { "epoch": 3.28, "learning_rate": 4.181818181818182e-05, "loss": 0.266, "step": 3102 }, { "epoch": 3.28, "learning_rate": 4.1812896405919666e-05, "loss": 0.2419, "step": 3104 }, { "epoch": 3.28, "learning_rate": 4.180761099365751e-05, "loss": 0.2929, "step": 3106 }, { "epoch": 3.29, "learning_rate": 4.180232558139535e-05, "loss": 0.4756, "step": 3108 }, { "epoch": 3.29, "learning_rate": 4.17970401691332e-05, "loss": 0.3084, "step": 3110 }, { "epoch": 3.29, "learning_rate": 4.1791754756871036e-05, "loss": 0.3543, "step": 3112 }, { "epoch": 3.29, "learning_rate": 4.178646934460888e-05, "loss": 0.393, "step": 3114 }, { "epoch": 3.29, "learning_rate": 4.178118393234672e-05, "loss": 0.3009, "step": 3116 }, { "epoch": 3.3, "learning_rate": 4.177589852008457e-05, "loss": 0.185, "step": 3118 }, { "epoch": 3.3, "learning_rate": 4.177061310782241e-05, "loss": 0.2683, "step": 3120 }, { "epoch": 3.3, "learning_rate": 4.176532769556026e-05, "loss": 0.5429, "step": 3122 }, { "epoch": 3.3, "learning_rate": 4.17600422832981e-05, "loss": 0.3401, "step": 3124 }, { "epoch": 3.3, "learning_rate": 4.1754756871035945e-05, "loss": 0.3918, "step": 3126 }, { "epoch": 3.31, "learning_rate": 4.1749471458773784e-05, "loss": 0.3553, "step": 3128 }, { "epoch": 3.31, "learning_rate": 4.174418604651163e-05, "loss": 0.2473, "step": 3130 }, { "epoch": 3.31, "learning_rate": 4.173890063424947e-05, "loss": 0.4602, "step": 3132 }, { "epoch": 3.31, "learning_rate": 4.1733615221987316e-05, "loss": 0.4051, "step": 3134 }, { "epoch": 3.32, "learning_rate": 4.172832980972516e-05, "loss": 0.2363, "step": 3136 }, { "epoch": 3.32, "learning_rate": 4.1723044397463e-05, "loss": 0.2591, "step": 3138 }, { "epoch": 3.32, "learning_rate": 4.171775898520085e-05, "loss": 0.3065, "step": 3140 }, { "epoch": 3.32, "learning_rate": 4.171247357293869e-05, "loss": 0.2056, "step": 3142 }, { "epoch": 3.32, "learning_rate": 4.170718816067653e-05, "loss": 0.302, "step": 3144 }, { "epoch": 3.33, "learning_rate": 4.170190274841438e-05, "loss": 0.3368, "step": 3146 }, { "epoch": 3.33, "learning_rate": 4.1696617336152225e-05, "loss": 0.3408, "step": 3148 }, { "epoch": 3.33, "learning_rate": 4.1691331923890064e-05, "loss": 0.3338, "step": 3150 }, { "epoch": 3.33, "learning_rate": 4.168604651162791e-05, "loss": 0.4434, "step": 3152 }, { "epoch": 3.33, "learning_rate": 4.168076109936575e-05, "loss": 0.4222, "step": 3154 }, { "epoch": 3.34, "learning_rate": 4.1675475687103595e-05, "loss": 0.3405, "step": 3156 }, { "epoch": 3.34, "learning_rate": 4.167019027484144e-05, "loss": 0.3749, "step": 3158 }, { "epoch": 3.34, "learning_rate": 4.166490486257929e-05, "loss": 0.2047, "step": 3160 }, { "epoch": 3.34, "learning_rate": 4.165961945031713e-05, "loss": 0.3287, "step": 3162 }, { "epoch": 3.34, "learning_rate": 4.165433403805497e-05, "loss": 0.3589, "step": 3164 }, { "epoch": 3.35, "learning_rate": 4.164904862579281e-05, "loss": 0.6135, "step": 3166 }, { "epoch": 3.35, "learning_rate": 4.164376321353066e-05, "loss": 0.8384, "step": 3168 }, { "epoch": 3.35, "learning_rate": 4.16384778012685e-05, "loss": 0.3048, "step": 3170 }, { "epoch": 3.35, "learning_rate": 4.1633192389006343e-05, "loss": 0.4265, "step": 3172 }, { "epoch": 3.36, "learning_rate": 4.162790697674418e-05, "loss": 0.4554, "step": 3174 }, { "epoch": 3.36, "learning_rate": 4.1622621564482036e-05, "loss": 0.3637, "step": 3176 }, { "epoch": 3.36, "learning_rate": 4.1617336152219875e-05, "loss": 0.5761, "step": 3178 }, { "epoch": 3.36, "learning_rate": 4.161205073995772e-05, "loss": 0.2125, "step": 3180 }, { "epoch": 3.36, "learning_rate": 4.160676532769556e-05, "loss": 0.2687, "step": 3182 }, { "epoch": 3.37, "learning_rate": 4.1601479915433406e-05, "loss": 0.4152, "step": 3184 }, { "epoch": 3.37, "learning_rate": 4.1596194503171246e-05, "loss": 0.3966, "step": 3186 }, { "epoch": 3.37, "learning_rate": 4.159090909090909e-05, "loss": 0.3472, "step": 3188 }, { "epoch": 3.37, "learning_rate": 4.158562367864694e-05, "loss": 0.2772, "step": 3190 }, { "epoch": 3.37, "learning_rate": 4.158033826638478e-05, "loss": 0.2744, "step": 3192 }, { "epoch": 3.38, "learning_rate": 4.157505285412262e-05, "loss": 0.3092, "step": 3194 }, { "epoch": 3.38, "learning_rate": 4.156976744186047e-05, "loss": 0.4398, "step": 3196 }, { "epoch": 3.38, "learning_rate": 4.1564482029598315e-05, "loss": 0.1722, "step": 3198 }, { "epoch": 3.38, "learning_rate": 4.1559196617336154e-05, "loss": 0.4008, "step": 3200 }, { "epoch": 3.38, "learning_rate": 4.1553911205074e-05, "loss": 0.3137, "step": 3202 }, { "epoch": 3.39, "learning_rate": 4.154862579281184e-05, "loss": 0.4953, "step": 3204 }, { "epoch": 3.39, "learning_rate": 4.1543340380549686e-05, "loss": 0.2774, "step": 3206 }, { "epoch": 3.39, "learning_rate": 4.1538054968287525e-05, "loss": 0.3924, "step": 3208 }, { "epoch": 3.39, "learning_rate": 4.153276955602537e-05, "loss": 0.3354, "step": 3210 }, { "epoch": 3.4, "learning_rate": 4.152748414376322e-05, "loss": 0.2924, "step": 3212 }, { "epoch": 3.4, "learning_rate": 4.152219873150106e-05, "loss": 0.4382, "step": 3214 }, { "epoch": 3.4, "learning_rate": 4.15169133192389e-05, "loss": 0.2454, "step": 3216 }, { "epoch": 3.4, "learning_rate": 4.151162790697675e-05, "loss": 0.3126, "step": 3218 }, { "epoch": 3.4, "learning_rate": 4.150634249471459e-05, "loss": 0.5072, "step": 3220 }, { "epoch": 3.41, "learning_rate": 4.1501057082452434e-05, "loss": 0.1802, "step": 3222 }, { "epoch": 3.41, "learning_rate": 4.149577167019027e-05, "loss": 0.2585, "step": 3224 }, { "epoch": 3.41, "learning_rate": 4.149048625792812e-05, "loss": 0.3149, "step": 3226 }, { "epoch": 3.41, "learning_rate": 4.148520084566596e-05, "loss": 0.2631, "step": 3228 }, { "epoch": 3.41, "learning_rate": 4.147991543340381e-05, "loss": 0.3179, "step": 3230 }, { "epoch": 3.42, "learning_rate": 4.147463002114165e-05, "loss": 0.2586, "step": 3232 }, { "epoch": 3.42, "learning_rate": 4.14693446088795e-05, "loss": 0.3983, "step": 3234 }, { "epoch": 3.42, "learning_rate": 4.1464059196617336e-05, "loss": 0.2693, "step": 3236 }, { "epoch": 3.42, "learning_rate": 4.145877378435518e-05, "loss": 0.1979, "step": 3238 }, { "epoch": 3.42, "learning_rate": 4.145348837209302e-05, "loss": 0.2388, "step": 3240 }, { "epoch": 3.43, "learning_rate": 4.144820295983087e-05, "loss": 0.361, "step": 3242 }, { "epoch": 3.43, "learning_rate": 4.1442917547568713e-05, "loss": 0.2067, "step": 3244 }, { "epoch": 3.43, "learning_rate": 4.143763213530655e-05, "loss": 0.2476, "step": 3246 }, { "epoch": 3.43, "learning_rate": 4.14323467230444e-05, "loss": 0.4077, "step": 3248 }, { "epoch": 3.44, "learning_rate": 4.1427061310782245e-05, "loss": 0.255, "step": 3250 }, { "epoch": 3.44, "learning_rate": 4.142177589852009e-05, "loss": 0.3061, "step": 3252 }, { "epoch": 3.44, "learning_rate": 4.141649048625793e-05, "loss": 0.6814, "step": 3254 }, { "epoch": 3.44, "learning_rate": 4.1411205073995776e-05, "loss": 0.3628, "step": 3256 }, { "epoch": 3.44, "learning_rate": 4.1405919661733616e-05, "loss": 0.2991, "step": 3258 }, { "epoch": 3.45, "learning_rate": 4.140063424947146e-05, "loss": 0.3783, "step": 3260 }, { "epoch": 3.45, "learning_rate": 4.13953488372093e-05, "loss": 0.2028, "step": 3262 }, { "epoch": 3.45, "learning_rate": 4.139006342494715e-05, "loss": 0.2912, "step": 3264 }, { "epoch": 3.45, "learning_rate": 4.138477801268499e-05, "loss": 0.4558, "step": 3266 }, { "epoch": 3.45, "learning_rate": 4.137949260042284e-05, "loss": 0.4464, "step": 3268 }, { "epoch": 3.46, "learning_rate": 4.137420718816068e-05, "loss": 0.5572, "step": 3270 }, { "epoch": 3.46, "learning_rate": 4.1368921775898524e-05, "loss": 0.3313, "step": 3272 }, { "epoch": 3.46, "learning_rate": 4.1363636363636364e-05, "loss": 0.27, "step": 3274 }, { "epoch": 3.46, "learning_rate": 4.135835095137421e-05, "loss": 0.5401, "step": 3276 }, { "epoch": 3.47, "learning_rate": 4.135306553911205e-05, "loss": 0.2565, "step": 3278 }, { "epoch": 3.47, "learning_rate": 4.1347780126849895e-05, "loss": 0.3559, "step": 3280 }, { "epoch": 3.47, "learning_rate": 4.1342494714587734e-05, "loss": 0.3827, "step": 3282 }, { "epoch": 3.47, "learning_rate": 4.133720930232559e-05, "loss": 0.2983, "step": 3284 }, { "epoch": 3.47, "learning_rate": 4.1331923890063427e-05, "loss": 0.4047, "step": 3286 }, { "epoch": 3.48, "learning_rate": 4.132663847780127e-05, "loss": 0.4661, "step": 3288 }, { "epoch": 3.48, "learning_rate": 4.132135306553911e-05, "loss": 0.623, "step": 3290 }, { "epoch": 3.48, "learning_rate": 4.131606765327696e-05, "loss": 0.3762, "step": 3292 }, { "epoch": 3.48, "learning_rate": 4.13107822410148e-05, "loss": 0.3287, "step": 3294 }, { "epoch": 3.48, "learning_rate": 4.130549682875264e-05, "loss": 0.5006, "step": 3296 }, { "epoch": 3.49, "learning_rate": 4.130021141649049e-05, "loss": 0.3024, "step": 3298 }, { "epoch": 3.49, "learning_rate": 4.129492600422833e-05, "loss": 0.2122, "step": 3300 }, { "epoch": 3.49, "learning_rate": 4.1289640591966175e-05, "loss": 0.7112, "step": 3302 }, { "epoch": 3.49, "learning_rate": 4.128435517970402e-05, "loss": 0.1781, "step": 3304 }, { "epoch": 3.49, "learning_rate": 4.127906976744187e-05, "loss": 0.3929, "step": 3306 }, { "epoch": 3.5, "learning_rate": 4.1273784355179706e-05, "loss": 0.3355, "step": 3308 }, { "epoch": 3.5, "learning_rate": 4.126849894291755e-05, "loss": 0.3383, "step": 3310 }, { "epoch": 3.5, "learning_rate": 4.126321353065539e-05, "loss": 0.3993, "step": 3312 }, { "epoch": 3.5, "learning_rate": 4.125792811839324e-05, "loss": 0.2688, "step": 3314 }, { "epoch": 3.51, "learning_rate": 4.125264270613108e-05, "loss": 0.2217, "step": 3316 }, { "epoch": 3.51, "learning_rate": 4.124735729386892e-05, "loss": 0.262, "step": 3318 }, { "epoch": 3.51, "learning_rate": 4.124207188160677e-05, "loss": 0.3199, "step": 3320 }, { "epoch": 3.51, "learning_rate": 4.1236786469344615e-05, "loss": 0.2936, "step": 3322 }, { "epoch": 3.51, "learning_rate": 4.1231501057082454e-05, "loss": 0.2725, "step": 3324 }, { "epoch": 3.52, "learning_rate": 4.12262156448203e-05, "loss": 0.2933, "step": 3326 }, { "epoch": 3.52, "learning_rate": 4.122093023255814e-05, "loss": 0.392, "step": 3328 }, { "epoch": 3.52, "learning_rate": 4.1215644820295986e-05, "loss": 0.2794, "step": 3330 }, { "epoch": 3.52, "learning_rate": 4.1210359408033825e-05, "loss": 0.2913, "step": 3332 }, { "epoch": 3.52, "learning_rate": 4.120507399577167e-05, "loss": 0.3144, "step": 3334 }, { "epoch": 3.53, "learning_rate": 4.119978858350951e-05, "loss": 0.2582, "step": 3336 }, { "epoch": 3.53, "learning_rate": 4.119450317124736e-05, "loss": 0.2666, "step": 3338 }, { "epoch": 3.53, "learning_rate": 4.11892177589852e-05, "loss": 0.3842, "step": 3340 }, { "epoch": 3.53, "learning_rate": 4.118393234672305e-05, "loss": 0.5303, "step": 3342 }, { "epoch": 3.53, "learning_rate": 4.117864693446089e-05, "loss": 0.3226, "step": 3344 }, { "epoch": 3.54, "learning_rate": 4.1173361522198734e-05, "loss": 0.2795, "step": 3346 }, { "epoch": 3.54, "learning_rate": 4.116807610993657e-05, "loss": 0.3647, "step": 3348 }, { "epoch": 3.54, "learning_rate": 4.116279069767442e-05, "loss": 0.3534, "step": 3350 }, { "epoch": 3.54, "learning_rate": 4.1157505285412265e-05, "loss": 0.4055, "step": 3352 }, { "epoch": 3.55, "learning_rate": 4.1152219873150104e-05, "loss": 0.5529, "step": 3354 }, { "epoch": 3.55, "learning_rate": 4.114693446088795e-05, "loss": 0.5072, "step": 3356 }, { "epoch": 3.55, "learning_rate": 4.1141649048625797e-05, "loss": 0.2841, "step": 3358 }, { "epoch": 3.55, "learning_rate": 4.113636363636364e-05, "loss": 0.3891, "step": 3360 }, { "epoch": 3.55, "learning_rate": 4.113107822410148e-05, "loss": 0.3782, "step": 3362 }, { "epoch": 3.56, "learning_rate": 4.112579281183933e-05, "loss": 0.27, "step": 3364 }, { "epoch": 3.56, "learning_rate": 4.112050739957717e-05, "loss": 0.2478, "step": 3366 }, { "epoch": 3.56, "learning_rate": 4.111522198731501e-05, "loss": 0.3285, "step": 3368 }, { "epoch": 3.56, "learning_rate": 4.110993657505285e-05, "loss": 0.3204, "step": 3370 }, { "epoch": 3.56, "learning_rate": 4.11046511627907e-05, "loss": 0.3976, "step": 3372 }, { "epoch": 3.57, "learning_rate": 4.1099365750528545e-05, "loss": 0.3353, "step": 3374 }, { "epoch": 3.57, "learning_rate": 4.109408033826639e-05, "loss": 0.5545, "step": 3376 }, { "epoch": 3.57, "learning_rate": 4.108879492600423e-05, "loss": 0.3212, "step": 3378 }, { "epoch": 3.57, "learning_rate": 4.1083509513742076e-05, "loss": 0.4366, "step": 3380 }, { "epoch": 3.58, "learning_rate": 4.1078224101479915e-05, "loss": 0.2358, "step": 3382 }, { "epoch": 3.58, "learning_rate": 4.107293868921776e-05, "loss": 0.285, "step": 3384 }, { "epoch": 3.58, "learning_rate": 4.10676532769556e-05, "loss": 0.2965, "step": 3386 }, { "epoch": 3.58, "learning_rate": 4.106236786469345e-05, "loss": 0.2111, "step": 3388 }, { "epoch": 3.58, "learning_rate": 4.1057082452431286e-05, "loss": 0.2055, "step": 3390 }, { "epoch": 3.59, "learning_rate": 4.105179704016914e-05, "loss": 0.3141, "step": 3392 }, { "epoch": 3.59, "learning_rate": 4.104651162790698e-05, "loss": 0.2548, "step": 3394 }, { "epoch": 3.59, "learning_rate": 4.1041226215644824e-05, "loss": 0.1756, "step": 3396 }, { "epoch": 3.59, "learning_rate": 4.1035940803382664e-05, "loss": 0.1975, "step": 3398 }, { "epoch": 3.59, "learning_rate": 4.103065539112051e-05, "loss": 0.4897, "step": 3400 }, { "epoch": 3.6, "learning_rate": 4.1025369978858356e-05, "loss": 0.243, "step": 3402 }, { "epoch": 3.6, "learning_rate": 4.1020084566596195e-05, "loss": 0.2601, "step": 3404 }, { "epoch": 3.6, "learning_rate": 4.101479915433404e-05, "loss": 0.3227, "step": 3406 }, { "epoch": 3.6, "learning_rate": 4.100951374207188e-05, "loss": 0.3445, "step": 3408 }, { "epoch": 3.6, "learning_rate": 4.1004228329809726e-05, "loss": 0.2215, "step": 3410 }, { "epoch": 3.61, "learning_rate": 4.099894291754757e-05, "loss": 0.2449, "step": 3412 }, { "epoch": 3.61, "learning_rate": 4.099365750528542e-05, "loss": 0.3551, "step": 3414 }, { "epoch": 3.61, "learning_rate": 4.098837209302326e-05, "loss": 0.1306, "step": 3416 }, { "epoch": 3.61, "learning_rate": 4.0983086680761104e-05, "loss": 0.2045, "step": 3418 }, { "epoch": 3.62, "learning_rate": 4.097780126849894e-05, "loss": 0.1964, "step": 3420 }, { "epoch": 3.62, "learning_rate": 4.097251585623679e-05, "loss": 0.3248, "step": 3422 }, { "epoch": 3.62, "learning_rate": 4.096723044397463e-05, "loss": 0.4956, "step": 3424 }, { "epoch": 3.62, "learning_rate": 4.0961945031712474e-05, "loss": 0.2128, "step": 3426 }, { "epoch": 3.62, "learning_rate": 4.095665961945032e-05, "loss": 0.354, "step": 3428 }, { "epoch": 3.63, "learning_rate": 4.0951374207188167e-05, "loss": 0.2535, "step": 3430 }, { "epoch": 3.63, "learning_rate": 4.0946088794926006e-05, "loss": 0.3954, "step": 3432 }, { "epoch": 3.63, "learning_rate": 4.094080338266385e-05, "loss": 0.4083, "step": 3434 }, { "epoch": 3.63, "learning_rate": 4.093551797040169e-05, "loss": 0.2527, "step": 3436 }, { "epoch": 3.63, "learning_rate": 4.093023255813954e-05, "loss": 0.2535, "step": 3438 }, { "epoch": 3.64, "learning_rate": 4.0924947145877377e-05, "loss": 0.261, "step": 3440 }, { "epoch": 3.64, "learning_rate": 4.091966173361522e-05, "loss": 0.4259, "step": 3442 }, { "epoch": 3.64, "learning_rate": 4.091437632135306e-05, "loss": 0.4179, "step": 3444 }, { "epoch": 3.64, "learning_rate": 4.0909090909090915e-05, "loss": 0.2675, "step": 3446 }, { "epoch": 3.64, "learning_rate": 4.0903805496828754e-05, "loss": 0.4809, "step": 3448 }, { "epoch": 3.65, "learning_rate": 4.08985200845666e-05, "loss": 0.2123, "step": 3450 }, { "epoch": 3.65, "learning_rate": 4.089323467230444e-05, "loss": 0.1502, "step": 3452 }, { "epoch": 3.65, "learning_rate": 4.0887949260042285e-05, "loss": 0.2021, "step": 3454 }, { "epoch": 3.65, "learning_rate": 4.088266384778013e-05, "loss": 0.296, "step": 3456 }, { "epoch": 3.66, "learning_rate": 4.087737843551797e-05, "loss": 0.3827, "step": 3458 }, { "epoch": 3.66, "learning_rate": 4.087209302325582e-05, "loss": 0.3757, "step": 3460 }, { "epoch": 3.66, "learning_rate": 4.0866807610993656e-05, "loss": 0.2955, "step": 3462 }, { "epoch": 3.66, "learning_rate": 4.08615221987315e-05, "loss": 0.2052, "step": 3464 }, { "epoch": 3.66, "learning_rate": 4.085623678646935e-05, "loss": 0.211, "step": 3466 }, { "epoch": 3.67, "learning_rate": 4.0850951374207194e-05, "loss": 0.3555, "step": 3468 }, { "epoch": 3.67, "learning_rate": 4.0845665961945034e-05, "loss": 0.3841, "step": 3470 }, { "epoch": 3.67, "learning_rate": 4.084038054968288e-05, "loss": 0.3611, "step": 3472 }, { "epoch": 3.67, "learning_rate": 4.083509513742072e-05, "loss": 0.4419, "step": 3474 }, { "epoch": 3.67, "learning_rate": 4.0829809725158565e-05, "loss": 0.5077, "step": 3476 }, { "epoch": 3.68, "learning_rate": 4.0824524312896404e-05, "loss": 0.2802, "step": 3478 }, { "epoch": 3.68, "learning_rate": 4.081923890063425e-05, "loss": 0.5795, "step": 3480 }, { "epoch": 3.68, "learning_rate": 4.0813953488372096e-05, "loss": 0.2565, "step": 3482 }, { "epoch": 3.68, "learning_rate": 4.080866807610994e-05, "loss": 0.2815, "step": 3484 }, { "epoch": 3.68, "learning_rate": 4.080338266384778e-05, "loss": 0.5439, "step": 3486 }, { "epoch": 3.69, "learning_rate": 4.079809725158563e-05, "loss": 0.2885, "step": 3488 }, { "epoch": 3.69, "learning_rate": 4.079281183932347e-05, "loss": 0.295, "step": 3490 }, { "epoch": 3.69, "learning_rate": 4.078752642706131e-05, "loss": 0.2716, "step": 3492 }, { "epoch": 3.69, "learning_rate": 4.078224101479915e-05, "loss": 0.423, "step": 3494 }, { "epoch": 3.7, "learning_rate": 4.0776955602537e-05, "loss": 0.229, "step": 3496 }, { "epoch": 3.7, "learning_rate": 4.077167019027484e-05, "loss": 0.3066, "step": 3498 }, { "epoch": 3.7, "learning_rate": 4.076638477801269e-05, "loss": 0.2746, "step": 3500 }, { "epoch": 3.7, "eval_cer": 0.031917925334853235, "eval_loss": 0.4219053387641907, "eval_runtime": 130.4096, "eval_samples_per_second": 6.449, "eval_steps_per_second": 0.813, "step": 3500 }, { "epoch": 3.7, "learning_rate": 4.076109936575053e-05, "loss": 0.4058, "step": 3502 }, { "epoch": 3.7, "learning_rate": 4.0755813953488376e-05, "loss": 0.1374, "step": 3504 }, { "epoch": 3.71, "learning_rate": 4.0750528541226215e-05, "loss": 0.1596, "step": 3506 }, { "epoch": 3.71, "learning_rate": 4.074524312896406e-05, "loss": 0.3841, "step": 3508 }, { "epoch": 3.71, "learning_rate": 4.073995771670191e-05, "loss": 0.326, "step": 3510 }, { "epoch": 3.71, "learning_rate": 4.073467230443975e-05, "loss": 0.2823, "step": 3512 }, { "epoch": 3.71, "learning_rate": 4.072938689217759e-05, "loss": 0.3303, "step": 3514 }, { "epoch": 3.72, "learning_rate": 4.072410147991543e-05, "loss": 0.3175, "step": 3516 }, { "epoch": 3.72, "learning_rate": 4.0718816067653285e-05, "loss": 0.2827, "step": 3518 }, { "epoch": 3.72, "learning_rate": 4.0713530655391124e-05, "loss": 0.3925, "step": 3520 }, { "epoch": 3.72, "learning_rate": 4.070824524312897e-05, "loss": 0.315, "step": 3522 }, { "epoch": 3.73, "learning_rate": 4.070295983086681e-05, "loss": 0.318, "step": 3524 }, { "epoch": 3.73, "learning_rate": 4.0697674418604655e-05, "loss": 0.3343, "step": 3526 }, { "epoch": 3.73, "learning_rate": 4.0692389006342495e-05, "loss": 0.2611, "step": 3528 }, { "epoch": 3.73, "learning_rate": 4.068710359408034e-05, "loss": 0.1885, "step": 3530 }, { "epoch": 3.73, "learning_rate": 4.068181818181818e-05, "loss": 0.4439, "step": 3532 }, { "epoch": 3.74, "learning_rate": 4.0676532769556026e-05, "loss": 0.1693, "step": 3534 }, { "epoch": 3.74, "learning_rate": 4.067124735729387e-05, "loss": 0.2422, "step": 3536 }, { "epoch": 3.74, "learning_rate": 4.066596194503172e-05, "loss": 0.2957, "step": 3538 }, { "epoch": 3.74, "learning_rate": 4.066067653276956e-05, "loss": 0.4195, "step": 3540 }, { "epoch": 3.74, "learning_rate": 4.0655391120507404e-05, "loss": 0.2689, "step": 3542 }, { "epoch": 3.75, "learning_rate": 4.065010570824524e-05, "loss": 0.2917, "step": 3544 }, { "epoch": 3.75, "learning_rate": 4.064482029598309e-05, "loss": 0.1926, "step": 3546 }, { "epoch": 3.75, "learning_rate": 4.063953488372093e-05, "loss": 0.2167, "step": 3548 }, { "epoch": 3.75, "learning_rate": 4.0634249471458774e-05, "loss": 0.2325, "step": 3550 }, { "epoch": 3.75, "learning_rate": 4.0628964059196614e-05, "loss": 0.2004, "step": 3552 }, { "epoch": 3.76, "learning_rate": 4.0623678646934466e-05, "loss": 0.2498, "step": 3554 }, { "epoch": 3.76, "learning_rate": 4.0618393234672306e-05, "loss": 0.2831, "step": 3556 }, { "epoch": 3.76, "learning_rate": 4.061310782241015e-05, "loss": 0.2611, "step": 3558 }, { "epoch": 3.76, "learning_rate": 4.060782241014799e-05, "loss": 0.2673, "step": 3560 }, { "epoch": 3.77, "learning_rate": 4.060253699788584e-05, "loss": 0.1925, "step": 3562 }, { "epoch": 3.77, "learning_rate": 4.059725158562368e-05, "loss": 0.2026, "step": 3564 }, { "epoch": 3.77, "learning_rate": 4.059196617336152e-05, "loss": 0.2491, "step": 3566 }, { "epoch": 3.77, "learning_rate": 4.058668076109937e-05, "loss": 0.1843, "step": 3568 }, { "epoch": 3.77, "learning_rate": 4.058139534883721e-05, "loss": 0.2792, "step": 3570 }, { "epoch": 3.78, "learning_rate": 4.057610993657506e-05, "loss": 0.3438, "step": 3572 }, { "epoch": 3.78, "learning_rate": 4.05708245243129e-05, "loss": 0.2346, "step": 3574 }, { "epoch": 3.78, "learning_rate": 4.0565539112050746e-05, "loss": 0.2819, "step": 3576 }, { "epoch": 3.78, "learning_rate": 4.0560253699788585e-05, "loss": 0.2978, "step": 3578 }, { "epoch": 3.78, "learning_rate": 4.055496828752643e-05, "loss": 0.4547, "step": 3580 }, { "epoch": 3.79, "learning_rate": 4.054968287526427e-05, "loss": 0.3207, "step": 3582 }, { "epoch": 3.79, "learning_rate": 4.054439746300212e-05, "loss": 0.2616, "step": 3584 }, { "epoch": 3.79, "learning_rate": 4.0539112050739956e-05, "loss": 0.3679, "step": 3586 }, { "epoch": 3.79, "learning_rate": 4.05338266384778e-05, "loss": 0.2208, "step": 3588 }, { "epoch": 3.79, "learning_rate": 4.052854122621565e-05, "loss": 0.2534, "step": 3590 }, { "epoch": 3.8, "learning_rate": 4.0523255813953494e-05, "loss": 0.2538, "step": 3592 }, { "epoch": 3.8, "learning_rate": 4.0517970401691333e-05, "loss": 0.3186, "step": 3594 }, { "epoch": 3.8, "learning_rate": 4.051268498942918e-05, "loss": 0.216, "step": 3596 }, { "epoch": 3.8, "learning_rate": 4.050739957716702e-05, "loss": 0.252, "step": 3598 }, { "epoch": 3.81, "learning_rate": 4.0502114164904865e-05, "loss": 0.3972, "step": 3600 }, { "epoch": 3.81, "learning_rate": 4.0496828752642704e-05, "loss": 0.3036, "step": 3602 }, { "epoch": 3.81, "learning_rate": 4.049154334038055e-05, "loss": 0.3883, "step": 3604 }, { "epoch": 3.81, "learning_rate": 4.0486257928118396e-05, "loss": 0.2229, "step": 3606 }, { "epoch": 3.81, "learning_rate": 4.048097251585624e-05, "loss": 0.359, "step": 3608 }, { "epoch": 3.82, "learning_rate": 4.047568710359408e-05, "loss": 0.2468, "step": 3610 }, { "epoch": 3.82, "learning_rate": 4.047040169133193e-05, "loss": 0.2458, "step": 3612 }, { "epoch": 3.82, "learning_rate": 4.046511627906977e-05, "loss": 0.2324, "step": 3614 }, { "epoch": 3.82, "learning_rate": 4.045983086680761e-05, "loss": 0.2602, "step": 3616 }, { "epoch": 3.82, "learning_rate": 4.045454545454546e-05, "loss": 0.2921, "step": 3618 }, { "epoch": 3.83, "learning_rate": 4.04492600422833e-05, "loss": 0.3894, "step": 3620 }, { "epoch": 3.83, "learning_rate": 4.0443974630021144e-05, "loss": 0.3837, "step": 3622 }, { "epoch": 3.83, "learning_rate": 4.0438689217758984e-05, "loss": 0.1566, "step": 3624 }, { "epoch": 3.83, "learning_rate": 4.0433403805496836e-05, "loss": 0.2718, "step": 3626 }, { "epoch": 3.84, "learning_rate": 4.0428118393234676e-05, "loss": 0.2338, "step": 3628 }, { "epoch": 3.84, "learning_rate": 4.042283298097252e-05, "loss": 0.2583, "step": 3630 }, { "epoch": 3.84, "learning_rate": 4.041754756871036e-05, "loss": 0.4098, "step": 3632 }, { "epoch": 3.84, "learning_rate": 4.041226215644821e-05, "loss": 0.3291, "step": 3634 }, { "epoch": 3.84, "learning_rate": 4.0406976744186046e-05, "loss": 0.3875, "step": 3636 }, { "epoch": 3.85, "learning_rate": 4.040169133192389e-05, "loss": 0.3359, "step": 3638 }, { "epoch": 3.85, "learning_rate": 4.039640591966173e-05, "loss": 0.545, "step": 3640 }, { "epoch": 3.85, "learning_rate": 4.039112050739958e-05, "loss": 0.2262, "step": 3642 }, { "epoch": 3.85, "learning_rate": 4.0385835095137424e-05, "loss": 0.2983, "step": 3644 }, { "epoch": 3.85, "learning_rate": 4.038054968287527e-05, "loss": 0.3009, "step": 3646 }, { "epoch": 3.86, "learning_rate": 4.037526427061311e-05, "loss": 0.3872, "step": 3648 }, { "epoch": 3.86, "learning_rate": 4.0369978858350955e-05, "loss": 0.5378, "step": 3650 }, { "epoch": 3.86, "learning_rate": 4.0364693446088795e-05, "loss": 0.3241, "step": 3652 }, { "epoch": 3.86, "learning_rate": 4.035940803382664e-05, "loss": 0.2517, "step": 3654 }, { "epoch": 3.86, "learning_rate": 4.035412262156448e-05, "loss": 0.3658, "step": 3656 }, { "epoch": 3.87, "learning_rate": 4.0348837209302326e-05, "loss": 0.2965, "step": 3658 }, { "epoch": 3.87, "learning_rate": 4.034355179704017e-05, "loss": 0.2556, "step": 3660 }, { "epoch": 3.87, "learning_rate": 4.033826638477802e-05, "loss": 0.2858, "step": 3662 }, { "epoch": 3.87, "learning_rate": 4.033298097251586e-05, "loss": 0.2957, "step": 3664 }, { "epoch": 3.88, "learning_rate": 4.0327695560253703e-05, "loss": 0.2999, "step": 3666 }, { "epoch": 3.88, "learning_rate": 4.032241014799155e-05, "loss": 0.3098, "step": 3668 }, { "epoch": 3.88, "learning_rate": 4.031712473572939e-05, "loss": 0.2422, "step": 3670 }, { "epoch": 3.88, "learning_rate": 4.0311839323467235e-05, "loss": 0.2735, "step": 3672 }, { "epoch": 3.88, "learning_rate": 4.0306553911205074e-05, "loss": 0.3498, "step": 3674 }, { "epoch": 3.89, "learning_rate": 4.030126849894292e-05, "loss": 0.2948, "step": 3676 }, { "epoch": 3.89, "learning_rate": 4.029598308668076e-05, "loss": 0.2371, "step": 3678 }, { "epoch": 3.89, "learning_rate": 4.029069767441861e-05, "loss": 0.3495, "step": 3680 }, { "epoch": 3.89, "learning_rate": 4.028541226215645e-05, "loss": 0.2426, "step": 3682 }, { "epoch": 3.89, "learning_rate": 4.02801268498943e-05, "loss": 0.4343, "step": 3684 }, { "epoch": 3.9, "learning_rate": 4.027484143763214e-05, "loss": 0.3005, "step": 3686 }, { "epoch": 3.9, "learning_rate": 4.026955602536998e-05, "loss": 0.233, "step": 3688 }, { "epoch": 3.9, "learning_rate": 4.026427061310782e-05, "loss": 0.3673, "step": 3690 }, { "epoch": 3.9, "learning_rate": 4.025898520084567e-05, "loss": 0.1899, "step": 3692 }, { "epoch": 3.9, "learning_rate": 4.025369978858351e-05, "loss": 0.2641, "step": 3694 }, { "epoch": 3.91, "learning_rate": 4.0248414376321354e-05, "loss": 0.2289, "step": 3696 }, { "epoch": 3.91, "learning_rate": 4.02431289640592e-05, "loss": 0.1489, "step": 3698 }, { "epoch": 3.91, "learning_rate": 4.0237843551797046e-05, "loss": 0.3439, "step": 3700 }, { "epoch": 3.91, "learning_rate": 4.0232558139534885e-05, "loss": 0.2439, "step": 3702 }, { "epoch": 3.92, "learning_rate": 4.022727272727273e-05, "loss": 0.3292, "step": 3704 }, { "epoch": 3.92, "learning_rate": 4.022198731501057e-05, "loss": 0.1504, "step": 3706 }, { "epoch": 3.92, "learning_rate": 4.0216701902748416e-05, "loss": 0.3573, "step": 3708 }, { "epoch": 3.92, "learning_rate": 4.0211416490486256e-05, "loss": 0.2399, "step": 3710 }, { "epoch": 3.92, "learning_rate": 4.02061310782241e-05, "loss": 0.2406, "step": 3712 }, { "epoch": 3.93, "learning_rate": 4.020084566596195e-05, "loss": 0.2586, "step": 3714 }, { "epoch": 3.93, "learning_rate": 4.0195560253699794e-05, "loss": 0.4283, "step": 3716 }, { "epoch": 3.93, "learning_rate": 4.019027484143763e-05, "loss": 0.2388, "step": 3718 }, { "epoch": 3.93, "learning_rate": 4.018498942917548e-05, "loss": 0.4452, "step": 3720 }, { "epoch": 3.93, "learning_rate": 4.0179704016913325e-05, "loss": 0.2854, "step": 3722 }, { "epoch": 3.94, "learning_rate": 4.0174418604651165e-05, "loss": 0.2051, "step": 3724 }, { "epoch": 3.94, "learning_rate": 4.016913319238901e-05, "loss": 0.339, "step": 3726 }, { "epoch": 3.94, "learning_rate": 4.016384778012685e-05, "loss": 0.1278, "step": 3728 }, { "epoch": 3.94, "learning_rate": 4.0158562367864696e-05, "loss": 0.2469, "step": 3730 }, { "epoch": 3.95, "learning_rate": 4.0153276955602535e-05, "loss": 0.2045, "step": 3732 }, { "epoch": 3.95, "learning_rate": 4.014799154334038e-05, "loss": 0.2847, "step": 3734 }, { "epoch": 3.95, "learning_rate": 4.014270613107823e-05, "loss": 0.1441, "step": 3736 }, { "epoch": 3.95, "learning_rate": 4.0137420718816073e-05, "loss": 0.3645, "step": 3738 }, { "epoch": 3.95, "learning_rate": 4.013213530655391e-05, "loss": 0.2791, "step": 3740 }, { "epoch": 3.96, "learning_rate": 4.012684989429176e-05, "loss": 0.3352, "step": 3742 }, { "epoch": 3.96, "learning_rate": 4.01215644820296e-05, "loss": 0.4004, "step": 3744 }, { "epoch": 3.96, "learning_rate": 4.0116279069767444e-05, "loss": 0.2803, "step": 3746 }, { "epoch": 3.96, "learning_rate": 4.0110993657505283e-05, "loss": 0.3235, "step": 3748 }, { "epoch": 3.96, "learning_rate": 4.010570824524313e-05, "loss": 0.2786, "step": 3750 }, { "epoch": 3.97, "learning_rate": 4.010042283298097e-05, "loss": 0.277, "step": 3752 }, { "epoch": 3.97, "learning_rate": 4.009513742071882e-05, "loss": 0.3252, "step": 3754 }, { "epoch": 3.97, "learning_rate": 4.008985200845666e-05, "loss": 0.361, "step": 3756 }, { "epoch": 3.97, "learning_rate": 4.008456659619451e-05, "loss": 0.3615, "step": 3758 }, { "epoch": 3.97, "learning_rate": 4.0079281183932346e-05, "loss": 0.3047, "step": 3760 }, { "epoch": 3.98, "learning_rate": 4.007399577167019e-05, "loss": 0.3798, "step": 3762 }, { "epoch": 3.98, "learning_rate": 4.006871035940803e-05, "loss": 0.3093, "step": 3764 }, { "epoch": 3.98, "learning_rate": 4.006342494714588e-05, "loss": 0.2643, "step": 3766 }, { "epoch": 3.98, "learning_rate": 4.0058139534883724e-05, "loss": 0.4188, "step": 3768 }, { "epoch": 3.99, "learning_rate": 4.005285412262156e-05, "loss": 0.3424, "step": 3770 }, { "epoch": 3.99, "learning_rate": 4.004756871035941e-05, "loss": 0.2737, "step": 3772 }, { "epoch": 3.99, "learning_rate": 4.0042283298097255e-05, "loss": 0.2672, "step": 3774 }, { "epoch": 3.99, "learning_rate": 4.00369978858351e-05, "loss": 0.2018, "step": 3776 }, { "epoch": 3.99, "learning_rate": 4.003171247357294e-05, "loss": 0.1879, "step": 3778 }, { "epoch": 4.0, "learning_rate": 4.0026427061310787e-05, "loss": 0.2752, "step": 3780 }, { "epoch": 4.0, "learning_rate": 4.0021141649048626e-05, "loss": 0.304, "step": 3782 }, { "epoch": 4.0, "learning_rate": 4.001585623678647e-05, "loss": 0.1757, "step": 3784 }, { "epoch": 4.0, "learning_rate": 4.001057082452431e-05, "loss": 0.2514, "step": 3786 }, { "epoch": 4.0, "learning_rate": 4.000528541226216e-05, "loss": 0.2022, "step": 3788 }, { "epoch": 4.01, "learning_rate": 4e-05, "loss": 0.2014, "step": 3790 }, { "epoch": 4.01, "learning_rate": 3.999471458773785e-05, "loss": 0.1431, "step": 3792 }, { "epoch": 4.01, "learning_rate": 3.998942917547569e-05, "loss": 0.2986, "step": 3794 }, { "epoch": 4.01, "learning_rate": 3.9984143763213535e-05, "loss": 0.2922, "step": 3796 }, { "epoch": 4.01, "learning_rate": 3.9978858350951374e-05, "loss": 0.323, "step": 3798 }, { "epoch": 4.02, "learning_rate": 3.997357293868922e-05, "loss": 0.1974, "step": 3800 }, { "epoch": 4.02, "learning_rate": 3.996828752642706e-05, "loss": 0.2474, "step": 3802 }, { "epoch": 4.02, "learning_rate": 3.9963002114164905e-05, "loss": 0.3273, "step": 3804 }, { "epoch": 4.02, "learning_rate": 3.9957716701902745e-05, "loss": 0.2557, "step": 3806 }, { "epoch": 4.03, "learning_rate": 3.99524312896406e-05, "loss": 0.1653, "step": 3808 }, { "epoch": 4.03, "learning_rate": 3.994714587737844e-05, "loss": 0.2223, "step": 3810 }, { "epoch": 4.03, "learning_rate": 3.994186046511628e-05, "loss": 0.186, "step": 3812 }, { "epoch": 4.03, "learning_rate": 3.993657505285412e-05, "loss": 0.1906, "step": 3814 }, { "epoch": 4.03, "learning_rate": 3.993128964059197e-05, "loss": 0.2265, "step": 3816 }, { "epoch": 4.04, "learning_rate": 3.992600422832981e-05, "loss": 0.2011, "step": 3818 }, { "epoch": 4.04, "learning_rate": 3.9920718816067654e-05, "loss": 0.2358, "step": 3820 }, { "epoch": 4.04, "learning_rate": 3.99154334038055e-05, "loss": 0.1286, "step": 3822 }, { "epoch": 4.04, "learning_rate": 3.991014799154334e-05, "loss": 0.2227, "step": 3824 }, { "epoch": 4.04, "learning_rate": 3.9904862579281185e-05, "loss": 0.1696, "step": 3826 }, { "epoch": 4.05, "learning_rate": 3.989957716701903e-05, "loss": 0.2811, "step": 3828 }, { "epoch": 4.05, "learning_rate": 3.989429175475688e-05, "loss": 0.2175, "step": 3830 }, { "epoch": 4.05, "learning_rate": 3.9889006342494716e-05, "loss": 0.5743, "step": 3832 }, { "epoch": 4.05, "learning_rate": 3.988372093023256e-05, "loss": 0.6482, "step": 3834 }, { "epoch": 4.05, "learning_rate": 3.98784355179704e-05, "loss": 0.4804, "step": 3836 }, { "epoch": 4.06, "learning_rate": 3.987315010570825e-05, "loss": 0.505, "step": 3838 }, { "epoch": 4.06, "learning_rate": 3.986786469344609e-05, "loss": 0.6236, "step": 3840 }, { "epoch": 4.06, "learning_rate": 3.986257928118393e-05, "loss": 0.3889, "step": 3842 }, { "epoch": 4.06, "learning_rate": 3.985729386892178e-05, "loss": 0.1109, "step": 3844 }, { "epoch": 4.07, "learning_rate": 3.9852008456659625e-05, "loss": 0.4023, "step": 3846 }, { "epoch": 4.07, "learning_rate": 3.9846723044397464e-05, "loss": 0.4235, "step": 3848 }, { "epoch": 4.07, "learning_rate": 3.984143763213531e-05, "loss": 0.3534, "step": 3850 }, { "epoch": 4.07, "learning_rate": 3.983615221987315e-05, "loss": 0.3166, "step": 3852 }, { "epoch": 4.07, "learning_rate": 3.9830866807610996e-05, "loss": 0.2571, "step": 3854 }, { "epoch": 4.08, "learning_rate": 3.9825581395348835e-05, "loss": 0.4292, "step": 3856 }, { "epoch": 4.08, "learning_rate": 3.982029598308668e-05, "loss": 0.3876, "step": 3858 }, { "epoch": 4.08, "learning_rate": 3.981501057082452e-05, "loss": 0.4129, "step": 3860 }, { "epoch": 4.08, "learning_rate": 3.980972515856237e-05, "loss": 0.1996, "step": 3862 }, { "epoch": 4.08, "learning_rate": 3.980443974630021e-05, "loss": 0.2858, "step": 3864 }, { "epoch": 4.09, "learning_rate": 3.979915433403806e-05, "loss": 0.33, "step": 3866 }, { "epoch": 4.09, "learning_rate": 3.97938689217759e-05, "loss": 0.1376, "step": 3868 }, { "epoch": 4.09, "learning_rate": 3.9788583509513744e-05, "loss": 0.1673, "step": 3870 }, { "epoch": 4.09, "learning_rate": 3.978329809725159e-05, "loss": 0.172, "step": 3872 }, { "epoch": 4.1, "learning_rate": 3.977801268498943e-05, "loss": 0.2426, "step": 3874 }, { "epoch": 4.1, "learning_rate": 3.9772727272727275e-05, "loss": 0.3028, "step": 3876 }, { "epoch": 4.1, "learning_rate": 3.9767441860465115e-05, "loss": 0.2281, "step": 3878 }, { "epoch": 4.1, "learning_rate": 3.976215644820296e-05, "loss": 0.1447, "step": 3880 }, { "epoch": 4.1, "learning_rate": 3.975687103594081e-05, "loss": 0.2487, "step": 3882 }, { "epoch": 4.11, "learning_rate": 3.975158562367865e-05, "loss": 0.1818, "step": 3884 }, { "epoch": 4.11, "learning_rate": 3.974630021141649e-05, "loss": 0.1965, "step": 3886 }, { "epoch": 4.11, "learning_rate": 3.974101479915434e-05, "loss": 0.2904, "step": 3888 }, { "epoch": 4.11, "learning_rate": 3.973572938689218e-05, "loss": 0.3242, "step": 3890 }, { "epoch": 4.11, "learning_rate": 3.9730443974630024e-05, "loss": 0.263, "step": 3892 }, { "epoch": 4.12, "learning_rate": 3.972515856236786e-05, "loss": 0.1676, "step": 3894 }, { "epoch": 4.12, "learning_rate": 3.971987315010571e-05, "loss": 0.1705, "step": 3896 }, { "epoch": 4.12, "learning_rate": 3.9714587737843555e-05, "loss": 0.2202, "step": 3898 }, { "epoch": 4.12, "learning_rate": 3.97093023255814e-05, "loss": 0.2569, "step": 3900 }, { "epoch": 4.12, "learning_rate": 3.970401691331924e-05, "loss": 0.1988, "step": 3902 }, { "epoch": 4.13, "learning_rate": 3.9698731501057086e-05, "loss": 0.3049, "step": 3904 }, { "epoch": 4.13, "learning_rate": 3.9693446088794926e-05, "loss": 0.1223, "step": 3906 }, { "epoch": 4.13, "learning_rate": 3.968816067653277e-05, "loss": 0.2061, "step": 3908 }, { "epoch": 4.13, "learning_rate": 3.968287526427061e-05, "loss": 0.1779, "step": 3910 }, { "epoch": 4.14, "learning_rate": 3.967758985200846e-05, "loss": 0.2722, "step": 3912 }, { "epoch": 4.14, "learning_rate": 3.9672304439746296e-05, "loss": 0.2599, "step": 3914 }, { "epoch": 4.14, "learning_rate": 3.966701902748415e-05, "loss": 0.1343, "step": 3916 }, { "epoch": 4.14, "learning_rate": 3.966173361522199e-05, "loss": 0.2221, "step": 3918 }, { "epoch": 4.14, "learning_rate": 3.9656448202959834e-05, "loss": 0.2081, "step": 3920 }, { "epoch": 4.15, "learning_rate": 3.9651162790697674e-05, "loss": 0.2064, "step": 3922 }, { "epoch": 4.15, "learning_rate": 3.964587737843552e-05, "loss": 0.2185, "step": 3924 }, { "epoch": 4.15, "learning_rate": 3.9640591966173366e-05, "loss": 0.2235, "step": 3926 }, { "epoch": 4.15, "learning_rate": 3.9635306553911205e-05, "loss": 0.205, "step": 3928 }, { "epoch": 4.15, "learning_rate": 3.963002114164905e-05, "loss": 0.1873, "step": 3930 }, { "epoch": 4.16, "learning_rate": 3.962473572938689e-05, "loss": 0.1276, "step": 3932 }, { "epoch": 4.16, "learning_rate": 3.961945031712474e-05, "loss": 0.2165, "step": 3934 }, { "epoch": 4.16, "learning_rate": 3.961416490486258e-05, "loss": 0.1211, "step": 3936 }, { "epoch": 4.16, "learning_rate": 3.960887949260043e-05, "loss": 0.1964, "step": 3938 }, { "epoch": 4.16, "learning_rate": 3.960359408033827e-05, "loss": 0.3566, "step": 3940 }, { "epoch": 4.17, "learning_rate": 3.9598308668076114e-05, "loss": 0.2372, "step": 3942 }, { "epoch": 4.17, "learning_rate": 3.959302325581395e-05, "loss": 0.2184, "step": 3944 }, { "epoch": 4.17, "learning_rate": 3.95877378435518e-05, "loss": 0.1324, "step": 3946 }, { "epoch": 4.17, "learning_rate": 3.958245243128964e-05, "loss": 0.1911, "step": 3948 }, { "epoch": 4.18, "learning_rate": 3.9577167019027485e-05, "loss": 0.3218, "step": 3950 }, { "epoch": 4.18, "learning_rate": 3.957188160676533e-05, "loss": 0.1894, "step": 3952 }, { "epoch": 4.18, "learning_rate": 3.956659619450318e-05, "loss": 0.323, "step": 3954 }, { "epoch": 4.18, "learning_rate": 3.9561310782241016e-05, "loss": 0.3028, "step": 3956 }, { "epoch": 4.18, "learning_rate": 3.955602536997886e-05, "loss": 0.2063, "step": 3958 }, { "epoch": 4.19, "learning_rate": 3.95507399577167e-05, "loss": 0.3204, "step": 3960 }, { "epoch": 4.19, "learning_rate": 3.954545454545455e-05, "loss": 0.1358, "step": 3962 }, { "epoch": 4.19, "learning_rate": 3.954016913319239e-05, "loss": 0.3048, "step": 3964 }, { "epoch": 4.19, "learning_rate": 3.953488372093023e-05, "loss": 0.2386, "step": 3966 }, { "epoch": 4.19, "learning_rate": 3.952959830866807e-05, "loss": 0.2359, "step": 3968 }, { "epoch": 4.2, "learning_rate": 3.9524312896405925e-05, "loss": 0.1386, "step": 3970 }, { "epoch": 4.2, "learning_rate": 3.9519027484143764e-05, "loss": 0.1509, "step": 3972 }, { "epoch": 4.2, "learning_rate": 3.951374207188161e-05, "loss": 0.1746, "step": 3974 }, { "epoch": 4.2, "learning_rate": 3.950845665961945e-05, "loss": 0.2711, "step": 3976 }, { "epoch": 4.21, "learning_rate": 3.9503171247357296e-05, "loss": 0.2591, "step": 3978 }, { "epoch": 4.21, "learning_rate": 3.949788583509514e-05, "loss": 0.3, "step": 3980 }, { "epoch": 4.21, "learning_rate": 3.949260042283298e-05, "loss": 0.2035, "step": 3982 }, { "epoch": 4.21, "learning_rate": 3.948731501057083e-05, "loss": 0.2351, "step": 3984 }, { "epoch": 4.21, "learning_rate": 3.9482029598308666e-05, "loss": 0.1863, "step": 3986 }, { "epoch": 4.22, "learning_rate": 3.947674418604652e-05, "loss": 0.1715, "step": 3988 }, { "epoch": 4.22, "learning_rate": 3.947145877378436e-05, "loss": 0.1884, "step": 3990 }, { "epoch": 4.22, "learning_rate": 3.9466173361522205e-05, "loss": 0.4106, "step": 3992 }, { "epoch": 4.22, "learning_rate": 3.9460887949260044e-05, "loss": 0.3041, "step": 3994 }, { "epoch": 4.22, "learning_rate": 3.945560253699789e-05, "loss": 0.22, "step": 3996 }, { "epoch": 4.23, "learning_rate": 3.945031712473573e-05, "loss": 0.2508, "step": 3998 }, { "epoch": 4.23, "learning_rate": 3.9445031712473575e-05, "loss": 0.1456, "step": 4000 }, { "epoch": 4.23, "eval_cer": 0.039897406668566546, "eval_loss": 0.5804613828659058, "eval_runtime": 129.4688, "eval_samples_per_second": 6.496, "eval_steps_per_second": 0.819, "step": 4000 }, { "epoch": 4.23, "learning_rate": 3.9439746300211415e-05, "loss": 0.2934, "step": 4002 }, { "epoch": 4.23, "learning_rate": 3.943446088794926e-05, "loss": 0.1868, "step": 4004 }, { "epoch": 4.23, "learning_rate": 3.942917547568711e-05, "loss": 0.2808, "step": 4006 }, { "epoch": 4.24, "learning_rate": 3.942389006342495e-05, "loss": 0.1501, "step": 4008 }, { "epoch": 4.24, "learning_rate": 3.941860465116279e-05, "loss": 0.241, "step": 4010 }, { "epoch": 4.24, "learning_rate": 3.941331923890064e-05, "loss": 0.1349, "step": 4012 }, { "epoch": 4.24, "learning_rate": 3.940803382663848e-05, "loss": 0.3111, "step": 4014 }, { "epoch": 4.25, "learning_rate": 3.940274841437632e-05, "loss": 0.2869, "step": 4016 }, { "epoch": 4.25, "learning_rate": 3.939746300211416e-05, "loss": 0.2255, "step": 4018 }, { "epoch": 4.25, "learning_rate": 3.939217758985201e-05, "loss": 0.2188, "step": 4020 }, { "epoch": 4.25, "learning_rate": 3.9386892177589855e-05, "loss": 0.1902, "step": 4022 }, { "epoch": 4.25, "learning_rate": 3.93816067653277e-05, "loss": 0.1701, "step": 4024 }, { "epoch": 4.26, "learning_rate": 3.937632135306554e-05, "loss": 0.1584, "step": 4026 }, { "epoch": 4.26, "learning_rate": 3.9371035940803386e-05, "loss": 0.2237, "step": 4028 }, { "epoch": 4.26, "learning_rate": 3.9365750528541225e-05, "loss": 0.398, "step": 4030 }, { "epoch": 4.26, "learning_rate": 3.936046511627907e-05, "loss": 0.1038, "step": 4032 }, { "epoch": 4.26, "learning_rate": 3.935517970401692e-05, "loss": 0.1975, "step": 4034 }, { "epoch": 4.27, "learning_rate": 3.934989429175476e-05, "loss": 0.2825, "step": 4036 }, { "epoch": 4.27, "learning_rate": 3.93446088794926e-05, "loss": 0.1508, "step": 4038 }, { "epoch": 4.27, "learning_rate": 3.933932346723044e-05, "loss": 0.1505, "step": 4040 }, { "epoch": 4.27, "learning_rate": 3.9334038054968295e-05, "loss": 0.2344, "step": 4042 }, { "epoch": 4.27, "learning_rate": 3.9328752642706134e-05, "loss": 0.1584, "step": 4044 }, { "epoch": 4.28, "learning_rate": 3.932346723044398e-05, "loss": 0.2375, "step": 4046 }, { "epoch": 4.28, "learning_rate": 3.931818181818182e-05, "loss": 0.3126, "step": 4048 }, { "epoch": 4.28, "learning_rate": 3.9312896405919666e-05, "loss": 0.2596, "step": 4050 }, { "epoch": 4.28, "learning_rate": 3.9307610993657505e-05, "loss": 0.1258, "step": 4052 }, { "epoch": 4.29, "learning_rate": 3.930232558139535e-05, "loss": 0.2411, "step": 4054 }, { "epoch": 4.29, "learning_rate": 3.929704016913319e-05, "loss": 0.2466, "step": 4056 }, { "epoch": 4.29, "learning_rate": 3.9291754756871036e-05, "loss": 0.125, "step": 4058 }, { "epoch": 4.29, "learning_rate": 3.928646934460888e-05, "loss": 0.1596, "step": 4060 }, { "epoch": 4.29, "learning_rate": 3.928118393234673e-05, "loss": 0.2621, "step": 4062 }, { "epoch": 4.3, "learning_rate": 3.927589852008457e-05, "loss": 0.208, "step": 4064 }, { "epoch": 4.3, "learning_rate": 3.9270613107822414e-05, "loss": 0.2743, "step": 4066 }, { "epoch": 4.3, "learning_rate": 3.926532769556025e-05, "loss": 0.3313, "step": 4068 }, { "epoch": 4.3, "learning_rate": 3.92600422832981e-05, "loss": 0.1685, "step": 4070 }, { "epoch": 4.3, "learning_rate": 3.925475687103594e-05, "loss": 0.2687, "step": 4072 }, { "epoch": 4.31, "learning_rate": 3.9249471458773785e-05, "loss": 0.3284, "step": 4074 }, { "epoch": 4.31, "learning_rate": 3.924418604651163e-05, "loss": 0.2718, "step": 4076 }, { "epoch": 4.31, "learning_rate": 3.923890063424948e-05, "loss": 0.2013, "step": 4078 }, { "epoch": 4.31, "learning_rate": 3.9233615221987316e-05, "loss": 0.2228, "step": 4080 }, { "epoch": 4.32, "learning_rate": 3.922832980972516e-05, "loss": 0.5322, "step": 4082 }, { "epoch": 4.32, "learning_rate": 3.9223044397463e-05, "loss": 0.36, "step": 4084 }, { "epoch": 4.32, "learning_rate": 3.921775898520085e-05, "loss": 0.1814, "step": 4086 }, { "epoch": 4.32, "learning_rate": 3.9212473572938693e-05, "loss": 0.2867, "step": 4088 }, { "epoch": 4.32, "learning_rate": 3.920718816067653e-05, "loss": 0.2947, "step": 4090 }, { "epoch": 4.33, "learning_rate": 3.920190274841438e-05, "loss": 0.3523, "step": 4092 }, { "epoch": 4.33, "learning_rate": 3.919661733615222e-05, "loss": 0.1549, "step": 4094 }, { "epoch": 4.33, "learning_rate": 3.919133192389007e-05, "loss": 0.3549, "step": 4096 }, { "epoch": 4.33, "learning_rate": 3.918604651162791e-05, "loss": 0.3409, "step": 4098 }, { "epoch": 4.33, "learning_rate": 3.9180761099365756e-05, "loss": 0.2373, "step": 4100 }, { "epoch": 4.34, "learning_rate": 3.9175475687103596e-05, "loss": 0.2428, "step": 4102 }, { "epoch": 4.34, "learning_rate": 3.917019027484144e-05, "loss": 0.2634, "step": 4104 }, { "epoch": 4.34, "learning_rate": 3.916490486257928e-05, "loss": 0.1302, "step": 4106 }, { "epoch": 4.34, "learning_rate": 3.915961945031713e-05, "loss": 0.26, "step": 4108 }, { "epoch": 4.34, "learning_rate": 3.9154334038054966e-05, "loss": 0.217, "step": 4110 }, { "epoch": 4.35, "learning_rate": 3.914904862579281e-05, "loss": 0.1351, "step": 4112 }, { "epoch": 4.35, "learning_rate": 3.914376321353066e-05, "loss": 0.2058, "step": 4114 }, { "epoch": 4.35, "learning_rate": 3.9138477801268504e-05, "loss": 0.1843, "step": 4116 }, { "epoch": 4.35, "learning_rate": 3.9133192389006344e-05, "loss": 0.4963, "step": 4118 }, { "epoch": 4.36, "learning_rate": 3.912790697674419e-05, "loss": 0.2972, "step": 4120 }, { "epoch": 4.36, "learning_rate": 3.912262156448203e-05, "loss": 0.2613, "step": 4122 }, { "epoch": 4.36, "learning_rate": 3.9117336152219875e-05, "loss": 0.1996, "step": 4124 }, { "epoch": 4.36, "learning_rate": 3.9112050739957714e-05, "loss": 0.3973, "step": 4126 }, { "epoch": 4.36, "learning_rate": 3.910676532769556e-05, "loss": 0.149, "step": 4128 }, { "epoch": 4.37, "learning_rate": 3.9101479915433406e-05, "loss": 0.1527, "step": 4130 }, { "epoch": 4.37, "learning_rate": 3.909619450317125e-05, "loss": 0.2652, "step": 4132 }, { "epoch": 4.37, "learning_rate": 3.909090909090909e-05, "loss": 0.2499, "step": 4134 }, { "epoch": 4.37, "learning_rate": 3.908562367864694e-05, "loss": 0.2321, "step": 4136 }, { "epoch": 4.37, "learning_rate": 3.9080338266384784e-05, "loss": 0.2166, "step": 4138 }, { "epoch": 4.38, "learning_rate": 3.907505285412262e-05, "loss": 0.2361, "step": 4140 }, { "epoch": 4.38, "learning_rate": 3.906976744186047e-05, "loss": 0.3066, "step": 4142 }, { "epoch": 4.38, "learning_rate": 3.906448202959831e-05, "loss": 0.1623, "step": 4144 }, { "epoch": 4.38, "learning_rate": 3.9059196617336155e-05, "loss": 0.1691, "step": 4146 }, { "epoch": 4.38, "learning_rate": 3.9053911205073994e-05, "loss": 0.2238, "step": 4148 }, { "epoch": 4.39, "learning_rate": 3.904862579281185e-05, "loss": 0.2006, "step": 4150 }, { "epoch": 4.39, "learning_rate": 3.9043340380549686e-05, "loss": 0.2972, "step": 4152 }, { "epoch": 4.39, "learning_rate": 3.903805496828753e-05, "loss": 0.3729, "step": 4154 }, { "epoch": 4.39, "learning_rate": 3.903276955602537e-05, "loss": 0.3234, "step": 4156 }, { "epoch": 4.4, "learning_rate": 3.902748414376322e-05, "loss": 0.1333, "step": 4158 }, { "epoch": 4.4, "learning_rate": 3.902219873150106e-05, "loss": 0.2893, "step": 4160 }, { "epoch": 4.4, "learning_rate": 3.90169133192389e-05, "loss": 0.1391, "step": 4162 }, { "epoch": 4.4, "learning_rate": 3.901162790697674e-05, "loss": 0.1938, "step": 4164 }, { "epoch": 4.4, "learning_rate": 3.900634249471459e-05, "loss": 0.1412, "step": 4166 }, { "epoch": 4.41, "learning_rate": 3.9001057082452434e-05, "loss": 0.3682, "step": 4168 }, { "epoch": 4.41, "learning_rate": 3.899577167019028e-05, "loss": 0.1981, "step": 4170 }, { "epoch": 4.41, "learning_rate": 3.899048625792812e-05, "loss": 0.1542, "step": 4172 }, { "epoch": 4.41, "learning_rate": 3.8985200845665966e-05, "loss": 0.1952, "step": 4174 }, { "epoch": 4.41, "learning_rate": 3.8979915433403805e-05, "loss": 0.234, "step": 4176 }, { "epoch": 4.42, "learning_rate": 3.897463002114165e-05, "loss": 0.159, "step": 4178 }, { "epoch": 4.42, "learning_rate": 3.896934460887949e-05, "loss": 0.2722, "step": 4180 }, { "epoch": 4.42, "learning_rate": 3.8964059196617336e-05, "loss": 0.2511, "step": 4182 }, { "epoch": 4.42, "learning_rate": 3.895877378435518e-05, "loss": 0.2582, "step": 4184 }, { "epoch": 4.42, "learning_rate": 3.895348837209303e-05, "loss": 0.2339, "step": 4186 }, { "epoch": 4.43, "learning_rate": 3.894820295983087e-05, "loss": 0.1998, "step": 4188 }, { "epoch": 4.43, "learning_rate": 3.8942917547568714e-05, "loss": 0.1863, "step": 4190 }, { "epoch": 4.43, "learning_rate": 3.893763213530656e-05, "loss": 0.3524, "step": 4192 }, { "epoch": 4.43, "learning_rate": 3.89323467230444e-05, "loss": 0.3165, "step": 4194 }, { "epoch": 4.44, "learning_rate": 3.8927061310782245e-05, "loss": 0.2742, "step": 4196 }, { "epoch": 4.44, "learning_rate": 3.8921775898520084e-05, "loss": 0.1488, "step": 4198 }, { "epoch": 4.44, "learning_rate": 3.891649048625793e-05, "loss": 0.3678, "step": 4200 }, { "epoch": 4.44, "learning_rate": 3.891120507399577e-05, "loss": 0.2203, "step": 4202 }, { "epoch": 4.44, "learning_rate": 3.890591966173362e-05, "loss": 0.2065, "step": 4204 }, { "epoch": 4.45, "learning_rate": 3.890063424947146e-05, "loss": 0.184, "step": 4206 }, { "epoch": 4.45, "learning_rate": 3.889534883720931e-05, "loss": 0.1449, "step": 4208 }, { "epoch": 4.45, "learning_rate": 3.889006342494715e-05, "loss": 0.2047, "step": 4210 }, { "epoch": 4.45, "learning_rate": 3.888477801268499e-05, "loss": 0.3436, "step": 4212 }, { "epoch": 4.45, "learning_rate": 3.887949260042283e-05, "loss": 0.2107, "step": 4214 }, { "epoch": 4.46, "learning_rate": 3.887420718816068e-05, "loss": 0.1966, "step": 4216 }, { "epoch": 4.46, "learning_rate": 3.886892177589852e-05, "loss": 0.2369, "step": 4218 }, { "epoch": 4.46, "learning_rate": 3.8863636363636364e-05, "loss": 0.2596, "step": 4220 }, { "epoch": 4.46, "learning_rate": 3.885835095137421e-05, "loss": 0.2708, "step": 4222 }, { "epoch": 4.47, "learning_rate": 3.8853065539112056e-05, "loss": 0.3761, "step": 4224 }, { "epoch": 4.47, "learning_rate": 3.8847780126849895e-05, "loss": 0.3693, "step": 4226 }, { "epoch": 4.47, "learning_rate": 3.884249471458774e-05, "loss": 0.3575, "step": 4228 }, { "epoch": 4.47, "learning_rate": 3.883720930232558e-05, "loss": 0.4008, "step": 4230 }, { "epoch": 4.47, "learning_rate": 3.883192389006343e-05, "loss": 0.1546, "step": 4232 }, { "epoch": 4.48, "learning_rate": 3.8826638477801266e-05, "loss": 0.2317, "step": 4234 }, { "epoch": 4.48, "learning_rate": 3.882135306553911e-05, "loss": 0.1259, "step": 4236 }, { "epoch": 4.48, "learning_rate": 3.881606765327696e-05, "loss": 0.2638, "step": 4238 }, { "epoch": 4.48, "learning_rate": 3.8810782241014804e-05, "loss": 0.3114, "step": 4240 }, { "epoch": 4.48, "learning_rate": 3.8805496828752643e-05, "loss": 0.4307, "step": 4242 }, { "epoch": 4.49, "learning_rate": 3.880021141649049e-05, "loss": 0.2683, "step": 4244 }, { "epoch": 4.49, "learning_rate": 3.8794926004228336e-05, "loss": 0.2929, "step": 4246 }, { "epoch": 4.49, "learning_rate": 3.8789640591966175e-05, "loss": 0.2354, "step": 4248 }, { "epoch": 4.49, "learning_rate": 3.878435517970402e-05, "loss": 0.1789, "step": 4250 }, { "epoch": 4.49, "learning_rate": 3.877906976744186e-05, "loss": 0.2649, "step": 4252 }, { "epoch": 4.5, "learning_rate": 3.8773784355179706e-05, "loss": 0.234, "step": 4254 }, { "epoch": 4.5, "learning_rate": 3.8768498942917546e-05, "loss": 0.2865, "step": 4256 }, { "epoch": 4.5, "learning_rate": 3.87632135306554e-05, "loss": 0.2417, "step": 4258 }, { "epoch": 4.5, "learning_rate": 3.875792811839324e-05, "loss": 0.1494, "step": 4260 }, { "epoch": 4.51, "learning_rate": 3.8752642706131084e-05, "loss": 0.3175, "step": 4262 }, { "epoch": 4.51, "learning_rate": 3.874735729386892e-05, "loss": 0.3808, "step": 4264 }, { "epoch": 4.51, "learning_rate": 3.874207188160677e-05, "loss": 0.2946, "step": 4266 }, { "epoch": 4.51, "learning_rate": 3.873678646934461e-05, "loss": 0.3407, "step": 4268 }, { "epoch": 4.51, "learning_rate": 3.8731501057082454e-05, "loss": 0.2248, "step": 4270 }, { "epoch": 4.52, "learning_rate": 3.8726215644820294e-05, "loss": 0.2135, "step": 4272 }, { "epoch": 4.52, "learning_rate": 3.872093023255814e-05, "loss": 0.2039, "step": 4274 }, { "epoch": 4.52, "learning_rate": 3.8715644820295986e-05, "loss": 0.2309, "step": 4276 }, { "epoch": 4.52, "learning_rate": 3.871035940803383e-05, "loss": 0.3951, "step": 4278 }, { "epoch": 4.52, "learning_rate": 3.870507399577167e-05, "loss": 0.296, "step": 4280 }, { "epoch": 4.53, "learning_rate": 3.869978858350952e-05, "loss": 0.2853, "step": 4282 }, { "epoch": 4.53, "learning_rate": 3.8694503171247357e-05, "loss": 0.1644, "step": 4284 }, { "epoch": 4.53, "learning_rate": 3.86892177589852e-05, "loss": 0.2342, "step": 4286 }, { "epoch": 4.53, "learning_rate": 3.868393234672304e-05, "loss": 0.1685, "step": 4288 }, { "epoch": 4.53, "learning_rate": 3.867864693446089e-05, "loss": 0.1452, "step": 4290 }, { "epoch": 4.54, "learning_rate": 3.8673361522198734e-05, "loss": 0.2645, "step": 4292 }, { "epoch": 4.54, "learning_rate": 3.866807610993658e-05, "loss": 0.2565, "step": 4294 }, { "epoch": 4.54, "learning_rate": 3.866279069767442e-05, "loss": 0.0879, "step": 4296 }, { "epoch": 4.54, "learning_rate": 3.8657505285412265e-05, "loss": 0.2067, "step": 4298 }, { "epoch": 4.55, "learning_rate": 3.865221987315011e-05, "loss": 0.163, "step": 4300 }, { "epoch": 4.55, "learning_rate": 3.864693446088795e-05, "loss": 0.2396, "step": 4302 }, { "epoch": 4.55, "learning_rate": 3.86416490486258e-05, "loss": 0.1791, "step": 4304 }, { "epoch": 4.55, "learning_rate": 3.8636363636363636e-05, "loss": 0.263, "step": 4306 }, { "epoch": 4.55, "learning_rate": 3.863107822410148e-05, "loss": 0.2548, "step": 4308 }, { "epoch": 4.56, "learning_rate": 3.862579281183932e-05, "loss": 0.3144, "step": 4310 }, { "epoch": 4.56, "learning_rate": 3.8620507399577174e-05, "loss": 0.1509, "step": 4312 }, { "epoch": 4.56, "learning_rate": 3.8615221987315014e-05, "loss": 0.254, "step": 4314 }, { "epoch": 4.56, "learning_rate": 3.860993657505286e-05, "loss": 0.2525, "step": 4316 }, { "epoch": 4.56, "learning_rate": 3.86046511627907e-05, "loss": 0.3747, "step": 4318 }, { "epoch": 4.57, "learning_rate": 3.8599365750528545e-05, "loss": 0.3409, "step": 4320 }, { "epoch": 4.57, "learning_rate": 3.8594080338266384e-05, "loss": 0.1191, "step": 4322 }, { "epoch": 4.57, "learning_rate": 3.858879492600423e-05, "loss": 0.2891, "step": 4324 }, { "epoch": 4.57, "learning_rate": 3.858350951374207e-05, "loss": 0.1214, "step": 4326 }, { "epoch": 4.58, "learning_rate": 3.8578224101479916e-05, "loss": 0.2835, "step": 4328 }, { "epoch": 4.58, "learning_rate": 3.857293868921776e-05, "loss": 0.2172, "step": 4330 }, { "epoch": 4.58, "learning_rate": 3.856765327695561e-05, "loss": 0.1076, "step": 4332 }, { "epoch": 4.58, "learning_rate": 3.856236786469345e-05, "loss": 0.2779, "step": 4334 }, { "epoch": 4.58, "learning_rate": 3.855708245243129e-05, "loss": 0.2579, "step": 4336 }, { "epoch": 4.59, "learning_rate": 3.855179704016913e-05, "loss": 0.2079, "step": 4338 }, { "epoch": 4.59, "learning_rate": 3.854651162790698e-05, "loss": 0.4571, "step": 4340 }, { "epoch": 4.59, "learning_rate": 3.8541226215644824e-05, "loss": 0.304, "step": 4342 }, { "epoch": 4.59, "learning_rate": 3.8535940803382664e-05, "loss": 0.2778, "step": 4344 }, { "epoch": 4.59, "learning_rate": 3.853065539112051e-05, "loss": 0.1885, "step": 4346 }, { "epoch": 4.6, "learning_rate": 3.8525369978858356e-05, "loss": 0.2225, "step": 4348 }, { "epoch": 4.6, "learning_rate": 3.8520084566596195e-05, "loss": 0.2183, "step": 4350 }, { "epoch": 4.6, "learning_rate": 3.851479915433404e-05, "loss": 0.2757, "step": 4352 }, { "epoch": 4.6, "learning_rate": 3.850951374207189e-05, "loss": 0.4415, "step": 4354 }, { "epoch": 4.6, "learning_rate": 3.8504228329809727e-05, "loss": 0.2638, "step": 4356 }, { "epoch": 4.61, "learning_rate": 3.849894291754757e-05, "loss": 0.2034, "step": 4358 }, { "epoch": 4.61, "learning_rate": 3.849365750528541e-05, "loss": 0.261, "step": 4360 }, { "epoch": 4.61, "learning_rate": 3.848837209302326e-05, "loss": 0.2194, "step": 4362 }, { "epoch": 4.61, "learning_rate": 3.84830866807611e-05, "loss": 0.2475, "step": 4364 }, { "epoch": 4.62, "learning_rate": 3.847780126849895e-05, "loss": 0.289, "step": 4366 }, { "epoch": 4.62, "learning_rate": 3.847251585623679e-05, "loss": 0.1766, "step": 4368 }, { "epoch": 4.62, "learning_rate": 3.8467230443974635e-05, "loss": 0.2849, "step": 4370 }, { "epoch": 4.62, "learning_rate": 3.8461945031712475e-05, "loss": 0.2087, "step": 4372 }, { "epoch": 4.62, "learning_rate": 3.845665961945032e-05, "loss": 0.1763, "step": 4374 }, { "epoch": 4.63, "learning_rate": 3.845137420718816e-05, "loss": 0.2629, "step": 4376 }, { "epoch": 4.63, "learning_rate": 3.8446088794926006e-05, "loss": 0.2354, "step": 4378 }, { "epoch": 4.63, "learning_rate": 3.8440803382663845e-05, "loss": 0.2312, "step": 4380 }, { "epoch": 4.63, "learning_rate": 3.843551797040169e-05, "loss": 0.2316, "step": 4382 }, { "epoch": 4.63, "learning_rate": 3.843023255813954e-05, "loss": 0.3674, "step": 4384 }, { "epoch": 4.64, "learning_rate": 3.8424947145877384e-05, "loss": 0.1878, "step": 4386 }, { "epoch": 4.64, "learning_rate": 3.841966173361522e-05, "loss": 0.1896, "step": 4388 }, { "epoch": 4.64, "learning_rate": 3.841437632135307e-05, "loss": 0.3298, "step": 4390 }, { "epoch": 4.64, "learning_rate": 3.840909090909091e-05, "loss": 0.1981, "step": 4392 }, { "epoch": 4.64, "learning_rate": 3.8403805496828754e-05, "loss": 0.2604, "step": 4394 }, { "epoch": 4.65, "learning_rate": 3.83985200845666e-05, "loss": 0.1946, "step": 4396 }, { "epoch": 4.65, "learning_rate": 3.839323467230444e-05, "loss": 0.2386, "step": 4398 }, { "epoch": 4.65, "learning_rate": 3.8387949260042286e-05, "loss": 0.1483, "step": 4400 }, { "epoch": 4.65, "learning_rate": 3.8382663847780125e-05, "loss": 0.2855, "step": 4402 }, { "epoch": 4.66, "learning_rate": 3.837737843551798e-05, "loss": 0.186, "step": 4404 }, { "epoch": 4.66, "learning_rate": 3.837209302325582e-05, "loss": 0.215, "step": 4406 }, { "epoch": 4.66, "learning_rate": 3.836680761099366e-05, "loss": 0.1128, "step": 4408 }, { "epoch": 4.66, "learning_rate": 3.83615221987315e-05, "loss": 0.1597, "step": 4410 }, { "epoch": 4.66, "learning_rate": 3.835623678646935e-05, "loss": 0.2013, "step": 4412 }, { "epoch": 4.67, "learning_rate": 3.835095137420719e-05, "loss": 0.2183, "step": 4414 }, { "epoch": 4.67, "learning_rate": 3.8345665961945034e-05, "loss": 0.3207, "step": 4416 }, { "epoch": 4.67, "learning_rate": 3.834038054968287e-05, "loss": 0.2389, "step": 4418 }, { "epoch": 4.67, "learning_rate": 3.833509513742072e-05, "loss": 0.3231, "step": 4420 }, { "epoch": 4.67, "learning_rate": 3.8329809725158565e-05, "loss": 0.2078, "step": 4422 }, { "epoch": 4.68, "learning_rate": 3.832452431289641e-05, "loss": 0.3902, "step": 4424 }, { "epoch": 4.68, "learning_rate": 3.831923890063425e-05, "loss": 0.1898, "step": 4426 }, { "epoch": 4.68, "learning_rate": 3.8313953488372097e-05, "loss": 0.2032, "step": 4428 }, { "epoch": 4.68, "learning_rate": 3.8308668076109936e-05, "loss": 0.065, "step": 4430 }, { "epoch": 4.68, "learning_rate": 3.830338266384778e-05, "loss": 0.197, "step": 4432 }, { "epoch": 4.69, "learning_rate": 3.829809725158562e-05, "loss": 0.244, "step": 4434 }, { "epoch": 4.69, "learning_rate": 3.829281183932347e-05, "loss": 0.3045, "step": 4436 }, { "epoch": 4.69, "learning_rate": 3.8287526427061307e-05, "loss": 0.2582, "step": 4438 }, { "epoch": 4.69, "learning_rate": 3.828224101479916e-05, "loss": 0.2491, "step": 4440 }, { "epoch": 4.7, "learning_rate": 3.8276955602537e-05, "loss": 0.2893, "step": 4442 }, { "epoch": 4.7, "learning_rate": 3.8271670190274845e-05, "loss": 0.2808, "step": 4444 }, { "epoch": 4.7, "learning_rate": 3.8266384778012684e-05, "loss": 0.2402, "step": 4446 }, { "epoch": 4.7, "learning_rate": 3.826109936575053e-05, "loss": 0.328, "step": 4448 }, { "epoch": 4.7, "learning_rate": 3.8255813953488376e-05, "loss": 0.1825, "step": 4450 }, { "epoch": 4.71, "learning_rate": 3.8250528541226215e-05, "loss": 0.2451, "step": 4452 }, { "epoch": 4.71, "learning_rate": 3.824524312896406e-05, "loss": 0.2322, "step": 4454 }, { "epoch": 4.71, "learning_rate": 3.82399577167019e-05, "loss": 0.239, "step": 4456 }, { "epoch": 4.71, "learning_rate": 3.8234672304439754e-05, "loss": 0.4527, "step": 4458 }, { "epoch": 4.71, "learning_rate": 3.822938689217759e-05, "loss": 0.3426, "step": 4460 }, { "epoch": 4.72, "learning_rate": 3.822410147991544e-05, "loss": 0.1954, "step": 4462 }, { "epoch": 4.72, "learning_rate": 3.821881606765328e-05, "loss": 0.2278, "step": 4464 }, { "epoch": 4.72, "learning_rate": 3.8213530655391124e-05, "loss": 0.1666, "step": 4466 }, { "epoch": 4.72, "learning_rate": 3.8208245243128964e-05, "loss": 0.137, "step": 4468 }, { "epoch": 4.73, "learning_rate": 3.820295983086681e-05, "loss": 0.292, "step": 4470 }, { "epoch": 4.73, "learning_rate": 3.819767441860465e-05, "loss": 0.2981, "step": 4472 }, { "epoch": 4.73, "learning_rate": 3.8192389006342495e-05, "loss": 0.3711, "step": 4474 }, { "epoch": 4.73, "learning_rate": 3.818710359408034e-05, "loss": 0.2092, "step": 4476 }, { "epoch": 4.73, "learning_rate": 3.818181818181819e-05, "loss": 0.2499, "step": 4478 }, { "epoch": 4.74, "learning_rate": 3.8176532769556026e-05, "loss": 0.2701, "step": 4480 }, { "epoch": 4.74, "learning_rate": 3.817124735729387e-05, "loss": 0.1771, "step": 4482 }, { "epoch": 4.74, "learning_rate": 3.816596194503171e-05, "loss": 0.2258, "step": 4484 }, { "epoch": 4.74, "learning_rate": 3.816067653276956e-05, "loss": 0.4113, "step": 4486 }, { "epoch": 4.74, "learning_rate": 3.81553911205074e-05, "loss": 0.1089, "step": 4488 }, { "epoch": 4.75, "learning_rate": 3.815010570824524e-05, "loss": 0.1204, "step": 4490 }, { "epoch": 4.75, "learning_rate": 3.814482029598309e-05, "loss": 0.1931, "step": 4492 }, { "epoch": 4.75, "learning_rate": 3.8139534883720935e-05, "loss": 0.1632, "step": 4494 }, { "epoch": 4.75, "learning_rate": 3.8134249471458775e-05, "loss": 0.2525, "step": 4496 }, { "epoch": 4.75, "learning_rate": 3.812896405919662e-05, "loss": 0.1257, "step": 4498 }, { "epoch": 4.76, "learning_rate": 3.812367864693446e-05, "loss": 0.3264, "step": 4500 }, { "epoch": 4.76, "eval_cer": 0.030265032772869762, "eval_loss": 0.3518345057964325, "eval_runtime": 127.9997, "eval_samples_per_second": 6.57, "eval_steps_per_second": 0.828, "step": 4500 }, { "epoch": 4.76, "learning_rate": 3.8118393234672306e-05, "loss": 0.1576, "step": 4502 }, { "epoch": 4.76, "learning_rate": 3.811310782241015e-05, "loss": 0.1078, "step": 4504 }, { "epoch": 4.76, "learning_rate": 3.810782241014799e-05, "loss": 0.2641, "step": 4506 }, { "epoch": 4.77, "learning_rate": 3.810253699788584e-05, "loss": 0.2368, "step": 4508 }, { "epoch": 4.77, "learning_rate": 3.809725158562368e-05, "loss": 0.218, "step": 4510 }, { "epoch": 4.77, "learning_rate": 3.809196617336153e-05, "loss": 0.3123, "step": 4512 }, { "epoch": 4.77, "learning_rate": 3.808668076109937e-05, "loss": 0.1478, "step": 4514 }, { "epoch": 4.77, "learning_rate": 3.8081395348837215e-05, "loss": 0.2465, "step": 4516 }, { "epoch": 4.78, "learning_rate": 3.8076109936575054e-05, "loss": 0.1555, "step": 4518 }, { "epoch": 4.78, "learning_rate": 3.80708245243129e-05, "loss": 0.139, "step": 4520 }, { "epoch": 4.78, "learning_rate": 3.806553911205074e-05, "loss": 0.2661, "step": 4522 }, { "epoch": 4.78, "learning_rate": 3.8060253699788585e-05, "loss": 0.1964, "step": 4524 }, { "epoch": 4.78, "learning_rate": 3.8054968287526425e-05, "loss": 0.1876, "step": 4526 }, { "epoch": 4.79, "learning_rate": 3.804968287526427e-05, "loss": 0.2145, "step": 4528 }, { "epoch": 4.79, "learning_rate": 3.804439746300212e-05, "loss": 0.2774, "step": 4530 }, { "epoch": 4.79, "learning_rate": 3.803911205073996e-05, "loss": 0.1954, "step": 4532 }, { "epoch": 4.79, "learning_rate": 3.80338266384778e-05, "loss": 0.2758, "step": 4534 }, { "epoch": 4.79, "learning_rate": 3.802854122621565e-05, "loss": 0.2895, "step": 4536 }, { "epoch": 4.8, "learning_rate": 3.802325581395349e-05, "loss": 0.2015, "step": 4538 }, { "epoch": 4.8, "learning_rate": 3.8017970401691334e-05, "loss": 0.1902, "step": 4540 }, { "epoch": 4.8, "learning_rate": 3.801268498942917e-05, "loss": 0.2945, "step": 4542 }, { "epoch": 4.8, "learning_rate": 3.800739957716702e-05, "loss": 0.3151, "step": 4544 }, { "epoch": 4.81, "learning_rate": 3.8002114164904865e-05, "loss": 0.2132, "step": 4546 }, { "epoch": 4.81, "learning_rate": 3.799682875264271e-05, "loss": 0.1585, "step": 4548 }, { "epoch": 4.81, "learning_rate": 3.799154334038055e-05, "loss": 0.3348, "step": 4550 }, { "epoch": 4.81, "learning_rate": 3.7986257928118396e-05, "loss": 0.1911, "step": 4552 }, { "epoch": 4.81, "learning_rate": 3.7980972515856236e-05, "loss": 0.2412, "step": 4554 }, { "epoch": 4.82, "learning_rate": 3.797568710359408e-05, "loss": 0.2531, "step": 4556 }, { "epoch": 4.82, "learning_rate": 3.797040169133193e-05, "loss": 0.2128, "step": 4558 }, { "epoch": 4.82, "learning_rate": 3.796511627906977e-05, "loss": 0.288, "step": 4560 }, { "epoch": 4.82, "learning_rate": 3.795983086680761e-05, "loss": 0.2211, "step": 4562 }, { "epoch": 4.82, "learning_rate": 3.795454545454545e-05, "loss": 0.1853, "step": 4564 }, { "epoch": 4.83, "learning_rate": 3.7949260042283305e-05, "loss": 0.2396, "step": 4566 }, { "epoch": 4.83, "learning_rate": 3.7943974630021145e-05, "loss": 0.163, "step": 4568 }, { "epoch": 4.83, "learning_rate": 3.793868921775899e-05, "loss": 0.2536, "step": 4570 }, { "epoch": 4.83, "learning_rate": 3.793340380549683e-05, "loss": 0.2265, "step": 4572 }, { "epoch": 4.84, "learning_rate": 3.7928118393234676e-05, "loss": 0.1727, "step": 4574 }, { "epoch": 4.84, "learning_rate": 3.7922832980972515e-05, "loss": 0.1687, "step": 4576 }, { "epoch": 4.84, "learning_rate": 3.791754756871036e-05, "loss": 0.3511, "step": 4578 }, { "epoch": 4.84, "learning_rate": 3.79122621564482e-05, "loss": 0.1654, "step": 4580 }, { "epoch": 4.84, "learning_rate": 3.790697674418605e-05, "loss": 0.2532, "step": 4582 }, { "epoch": 4.85, "learning_rate": 3.790169133192389e-05, "loss": 0.2507, "step": 4584 }, { "epoch": 4.85, "learning_rate": 3.789640591966174e-05, "loss": 0.1839, "step": 4586 }, { "epoch": 4.85, "learning_rate": 3.789112050739958e-05, "loss": 0.2172, "step": 4588 }, { "epoch": 4.85, "learning_rate": 3.7885835095137424e-05, "loss": 0.1444, "step": 4590 }, { "epoch": 4.85, "learning_rate": 3.7880549682875263e-05, "loss": 0.1246, "step": 4592 }, { "epoch": 4.86, "learning_rate": 3.787526427061311e-05, "loss": 0.1763, "step": 4594 }, { "epoch": 4.86, "learning_rate": 3.786997885835095e-05, "loss": 0.2812, "step": 4596 }, { "epoch": 4.86, "learning_rate": 3.7864693446088795e-05, "loss": 0.2248, "step": 4598 }, { "epoch": 4.86, "learning_rate": 3.785940803382664e-05, "loss": 0.137, "step": 4600 }, { "epoch": 4.86, "learning_rate": 3.785412262156449e-05, "loss": 0.3015, "step": 4602 }, { "epoch": 4.87, "learning_rate": 3.7848837209302326e-05, "loss": 0.1787, "step": 4604 }, { "epoch": 4.87, "learning_rate": 3.784355179704017e-05, "loss": 0.2139, "step": 4606 }, { "epoch": 4.87, "learning_rate": 3.783826638477802e-05, "loss": 0.1668, "step": 4608 }, { "epoch": 4.87, "learning_rate": 3.783298097251586e-05, "loss": 0.215, "step": 4610 }, { "epoch": 4.88, "learning_rate": 3.7827695560253704e-05, "loss": 0.1996, "step": 4612 }, { "epoch": 4.88, "learning_rate": 3.782241014799154e-05, "loss": 0.208, "step": 4614 }, { "epoch": 4.88, "learning_rate": 3.781712473572939e-05, "loss": 0.3617, "step": 4616 }, { "epoch": 4.88, "learning_rate": 3.781183932346723e-05, "loss": 0.2004, "step": 4618 }, { "epoch": 4.88, "learning_rate": 3.780655391120508e-05, "loss": 0.2921, "step": 4620 }, { "epoch": 4.89, "learning_rate": 3.780126849894292e-05, "loss": 0.1529, "step": 4622 }, { "epoch": 4.89, "learning_rate": 3.7795983086680766e-05, "loss": 0.1936, "step": 4624 }, { "epoch": 4.89, "learning_rate": 3.7790697674418606e-05, "loss": 0.1113, "step": 4626 }, { "epoch": 4.89, "learning_rate": 3.778541226215645e-05, "loss": 0.3297, "step": 4628 }, { "epoch": 4.89, "learning_rate": 3.778012684989429e-05, "loss": 0.2202, "step": 4630 }, { "epoch": 4.9, "learning_rate": 3.777484143763214e-05, "loss": 0.1229, "step": 4632 }, { "epoch": 4.9, "learning_rate": 3.7769556025369976e-05, "loss": 0.1535, "step": 4634 }, { "epoch": 4.9, "learning_rate": 3.776427061310782e-05, "loss": 0.2717, "step": 4636 }, { "epoch": 4.9, "learning_rate": 3.775898520084567e-05, "loss": 0.2224, "step": 4638 }, { "epoch": 4.9, "learning_rate": 3.7753699788583515e-05, "loss": 0.1491, "step": 4640 }, { "epoch": 4.91, "learning_rate": 3.7748414376321354e-05, "loss": 0.3249, "step": 4642 }, { "epoch": 4.91, "learning_rate": 3.77431289640592e-05, "loss": 0.1969, "step": 4644 }, { "epoch": 4.91, "learning_rate": 3.773784355179704e-05, "loss": 0.1595, "step": 4646 }, { "epoch": 4.91, "learning_rate": 3.7732558139534885e-05, "loss": 0.2788, "step": 4648 }, { "epoch": 4.92, "learning_rate": 3.7727272727272725e-05, "loss": 0.1207, "step": 4650 }, { "epoch": 4.92, "learning_rate": 3.772198731501057e-05, "loss": 0.1545, "step": 4652 }, { "epoch": 4.92, "learning_rate": 3.771670190274842e-05, "loss": 0.3822, "step": 4654 }, { "epoch": 4.92, "learning_rate": 3.771141649048626e-05, "loss": 0.2045, "step": 4656 }, { "epoch": 4.92, "learning_rate": 3.77061310782241e-05, "loss": 0.1368, "step": 4658 }, { "epoch": 4.93, "learning_rate": 3.770084566596195e-05, "loss": 0.1766, "step": 4660 }, { "epoch": 4.93, "learning_rate": 3.7695560253699794e-05, "loss": 0.6979, "step": 4662 }, { "epoch": 4.93, "learning_rate": 3.7690274841437633e-05, "loss": 0.1898, "step": 4664 }, { "epoch": 4.93, "learning_rate": 3.768498942917548e-05, "loss": 0.205, "step": 4666 }, { "epoch": 4.93, "learning_rate": 3.767970401691332e-05, "loss": 0.2457, "step": 4668 }, { "epoch": 4.94, "learning_rate": 3.7674418604651165e-05, "loss": 0.2407, "step": 4670 }, { "epoch": 4.94, "learning_rate": 3.7669133192389004e-05, "loss": 0.219, "step": 4672 }, { "epoch": 4.94, "learning_rate": 3.766384778012686e-05, "loss": 0.1399, "step": 4674 }, { "epoch": 4.94, "learning_rate": 3.7658562367864696e-05, "loss": 0.291, "step": 4676 }, { "epoch": 4.95, "learning_rate": 3.765327695560254e-05, "loss": 0.1945, "step": 4678 }, { "epoch": 4.95, "learning_rate": 3.764799154334038e-05, "loss": 0.2359, "step": 4680 }, { "epoch": 4.95, "learning_rate": 3.764270613107823e-05, "loss": 0.2426, "step": 4682 }, { "epoch": 4.95, "learning_rate": 3.763742071881607e-05, "loss": 0.2948, "step": 4684 }, { "epoch": 4.95, "learning_rate": 3.763213530655391e-05, "loss": 0.2388, "step": 4686 }, { "epoch": 4.96, "learning_rate": 3.762684989429175e-05, "loss": 0.0963, "step": 4688 }, { "epoch": 4.96, "learning_rate": 3.76215644820296e-05, "loss": 0.25, "step": 4690 }, { "epoch": 4.96, "learning_rate": 3.7616279069767444e-05, "loss": 0.1789, "step": 4692 }, { "epoch": 4.96, "learning_rate": 3.761099365750529e-05, "loss": 0.2834, "step": 4694 }, { "epoch": 4.96, "learning_rate": 3.760570824524313e-05, "loss": 0.1475, "step": 4696 }, { "epoch": 4.97, "learning_rate": 3.7600422832980976e-05, "loss": 0.1781, "step": 4698 }, { "epoch": 4.97, "learning_rate": 3.7595137420718815e-05, "loss": 0.2353, "step": 4700 }, { "epoch": 4.97, "learning_rate": 3.758985200845666e-05, "loss": 0.4259, "step": 4702 }, { "epoch": 4.97, "learning_rate": 3.75845665961945e-05, "loss": 0.2206, "step": 4704 }, { "epoch": 4.97, "learning_rate": 3.7579281183932346e-05, "loss": 0.2497, "step": 4706 }, { "epoch": 4.98, "learning_rate": 3.757399577167019e-05, "loss": 0.2344, "step": 4708 }, { "epoch": 4.98, "learning_rate": 3.756871035940804e-05, "loss": 0.3204, "step": 4710 }, { "epoch": 4.98, "learning_rate": 3.756342494714588e-05, "loss": 0.3352, "step": 4712 }, { "epoch": 4.98, "learning_rate": 3.7558139534883724e-05, "loss": 0.2001, "step": 4714 }, { "epoch": 4.99, "learning_rate": 3.755285412262157e-05, "loss": 0.1399, "step": 4716 }, { "epoch": 4.99, "learning_rate": 3.754756871035941e-05, "loss": 0.1997, "step": 4718 }, { "epoch": 4.99, "learning_rate": 3.7542283298097255e-05, "loss": 0.173, "step": 4720 }, { "epoch": 4.99, "learning_rate": 3.7536997885835095e-05, "loss": 0.3078, "step": 4722 }, { "epoch": 4.99, "learning_rate": 3.753171247357294e-05, "loss": 0.1478, "step": 4724 }, { "epoch": 5.0, "learning_rate": 3.752642706131078e-05, "loss": 0.218, "step": 4726 }, { "epoch": 5.0, "learning_rate": 3.752114164904863e-05, "loss": 0.2081, "step": 4728 }, { "epoch": 5.0, "learning_rate": 3.751585623678647e-05, "loss": 0.1173, "step": 4730 }, { "epoch": 5.0, "learning_rate": 3.751057082452432e-05, "loss": 0.2947, "step": 4732 }, { "epoch": 5.0, "learning_rate": 3.750528541226216e-05, "loss": 0.1826, "step": 4734 }, { "epoch": 5.01, "learning_rate": 3.7500000000000003e-05, "loss": 0.2061, "step": 4736 }, { "epoch": 5.01, "learning_rate": 3.749471458773784e-05, "loss": 0.2811, "step": 4738 }, { "epoch": 5.01, "learning_rate": 3.748942917547569e-05, "loss": 0.1638, "step": 4740 }, { "epoch": 5.01, "learning_rate": 3.748414376321353e-05, "loss": 0.3567, "step": 4742 }, { "epoch": 5.01, "learning_rate": 3.7478858350951374e-05, "loss": 0.2977, "step": 4744 }, { "epoch": 5.02, "learning_rate": 3.747357293868922e-05, "loss": 0.1962, "step": 4746 }, { "epoch": 5.02, "learning_rate": 3.7468287526427066e-05, "loss": 0.1857, "step": 4748 }, { "epoch": 5.02, "learning_rate": 3.7463002114164906e-05, "loss": 0.2079, "step": 4750 }, { "epoch": 5.02, "learning_rate": 3.745771670190275e-05, "loss": 0.1679, "step": 4752 }, { "epoch": 5.03, "learning_rate": 3.745243128964059e-05, "loss": 0.299, "step": 4754 }, { "epoch": 5.03, "learning_rate": 3.744714587737844e-05, "loss": 0.1429, "step": 4756 }, { "epoch": 5.03, "learning_rate": 3.7441860465116276e-05, "loss": 0.1921, "step": 4758 }, { "epoch": 5.03, "learning_rate": 3.743657505285412e-05, "loss": 0.2171, "step": 4760 }, { "epoch": 5.03, "learning_rate": 3.743128964059197e-05, "loss": 0.1292, "step": 4762 }, { "epoch": 5.04, "learning_rate": 3.7426004228329814e-05, "loss": 0.2783, "step": 4764 }, { "epoch": 5.04, "learning_rate": 3.7420718816067654e-05, "loss": 0.2958, "step": 4766 }, { "epoch": 5.04, "learning_rate": 3.74154334038055e-05, "loss": 0.146, "step": 4768 }, { "epoch": 5.04, "learning_rate": 3.7410147991543346e-05, "loss": 0.1545, "step": 4770 }, { "epoch": 5.04, "learning_rate": 3.7404862579281185e-05, "loss": 0.2194, "step": 4772 }, { "epoch": 5.05, "learning_rate": 3.739957716701903e-05, "loss": 0.1789, "step": 4774 }, { "epoch": 5.05, "learning_rate": 3.739429175475687e-05, "loss": 0.2369, "step": 4776 }, { "epoch": 5.05, "learning_rate": 3.7389006342494717e-05, "loss": 0.1711, "step": 4778 }, { "epoch": 5.05, "learning_rate": 3.7383720930232556e-05, "loss": 0.2223, "step": 4780 }, { "epoch": 5.05, "learning_rate": 3.737843551797041e-05, "loss": 0.2188, "step": 4782 }, { "epoch": 5.06, "learning_rate": 3.737315010570825e-05, "loss": 0.2343, "step": 4784 }, { "epoch": 5.06, "learning_rate": 3.7367864693446094e-05, "loss": 0.109, "step": 4786 }, { "epoch": 5.06, "learning_rate": 3.736257928118393e-05, "loss": 0.2162, "step": 4788 }, { "epoch": 5.06, "learning_rate": 3.735729386892178e-05, "loss": 0.3718, "step": 4790 }, { "epoch": 5.07, "learning_rate": 3.735200845665962e-05, "loss": 0.2146, "step": 4792 }, { "epoch": 5.07, "learning_rate": 3.7346723044397465e-05, "loss": 0.1319, "step": 4794 }, { "epoch": 5.07, "learning_rate": 3.7341437632135304e-05, "loss": 0.2113, "step": 4796 }, { "epoch": 5.07, "learning_rate": 3.733615221987315e-05, "loss": 0.1171, "step": 4798 }, { "epoch": 5.07, "learning_rate": 3.7330866807610996e-05, "loss": 0.2382, "step": 4800 }, { "epoch": 5.08, "learning_rate": 3.732558139534884e-05, "loss": 0.1432, "step": 4802 }, { "epoch": 5.08, "learning_rate": 3.732029598308668e-05, "loss": 0.2301, "step": 4804 }, { "epoch": 5.08, "learning_rate": 3.731501057082453e-05, "loss": 0.1705, "step": 4806 }, { "epoch": 5.08, "learning_rate": 3.730972515856237e-05, "loss": 0.0764, "step": 4808 }, { "epoch": 5.08, "learning_rate": 3.730443974630021e-05, "loss": 0.123, "step": 4810 }, { "epoch": 5.09, "learning_rate": 3.729915433403806e-05, "loss": 0.156, "step": 4812 }, { "epoch": 5.09, "learning_rate": 3.72938689217759e-05, "loss": 0.2732, "step": 4814 }, { "epoch": 5.09, "learning_rate": 3.7288583509513744e-05, "loss": 0.2014, "step": 4816 }, { "epoch": 5.09, "learning_rate": 3.728329809725159e-05, "loss": 0.2538, "step": 4818 }, { "epoch": 5.1, "learning_rate": 3.727801268498943e-05, "loss": 0.2327, "step": 4820 }, { "epoch": 5.1, "learning_rate": 3.7272727272727276e-05, "loss": 0.2191, "step": 4822 }, { "epoch": 5.1, "learning_rate": 3.726744186046512e-05, "loss": 0.2461, "step": 4824 }, { "epoch": 5.1, "learning_rate": 3.726215644820296e-05, "loss": 0.1499, "step": 4826 }, { "epoch": 5.1, "learning_rate": 3.725687103594081e-05, "loss": 0.1612, "step": 4828 }, { "epoch": 5.11, "learning_rate": 3.7251585623678646e-05, "loss": 0.2029, "step": 4830 }, { "epoch": 5.11, "learning_rate": 3.724630021141649e-05, "loss": 0.3797, "step": 4832 }, { "epoch": 5.11, "learning_rate": 3.724101479915433e-05, "loss": 0.2086, "step": 4834 }, { "epoch": 5.11, "learning_rate": 3.7235729386892184e-05, "loss": 0.1579, "step": 4836 }, { "epoch": 5.11, "learning_rate": 3.7230443974630024e-05, "loss": 0.2036, "step": 4838 }, { "epoch": 5.12, "learning_rate": 3.722515856236787e-05, "loss": 0.1914, "step": 4840 }, { "epoch": 5.12, "learning_rate": 3.721987315010571e-05, "loss": 0.1965, "step": 4842 }, { "epoch": 5.12, "learning_rate": 3.7214587737843555e-05, "loss": 0.109, "step": 4844 }, { "epoch": 5.12, "learning_rate": 3.7209302325581394e-05, "loss": 0.1581, "step": 4846 }, { "epoch": 5.12, "learning_rate": 3.720401691331924e-05, "loss": 0.3613, "step": 4848 }, { "epoch": 5.13, "learning_rate": 3.719873150105708e-05, "loss": 0.2111, "step": 4850 }, { "epoch": 5.13, "learning_rate": 3.7193446088794926e-05, "loss": 0.1572, "step": 4852 }, { "epoch": 5.13, "learning_rate": 3.718816067653277e-05, "loss": 0.2681, "step": 4854 }, { "epoch": 5.13, "learning_rate": 3.718287526427062e-05, "loss": 0.24, "step": 4856 }, { "epoch": 5.14, "learning_rate": 3.717758985200846e-05, "loss": 0.1684, "step": 4858 }, { "epoch": 5.14, "learning_rate": 3.71723044397463e-05, "loss": 0.1807, "step": 4860 }, { "epoch": 5.14, "learning_rate": 3.716701902748414e-05, "loss": 0.171, "step": 4862 }, { "epoch": 5.14, "learning_rate": 3.716173361522199e-05, "loss": 0.1212, "step": 4864 }, { "epoch": 5.14, "learning_rate": 3.7156448202959835e-05, "loss": 0.2092, "step": 4866 }, { "epoch": 5.15, "learning_rate": 3.7151162790697674e-05, "loss": 0.1592, "step": 4868 }, { "epoch": 5.15, "learning_rate": 3.714587737843552e-05, "loss": 0.268, "step": 4870 }, { "epoch": 5.15, "learning_rate": 3.7140591966173366e-05, "loss": 0.2267, "step": 4872 }, { "epoch": 5.15, "learning_rate": 3.713530655391121e-05, "loss": 0.1299, "step": 4874 }, { "epoch": 5.15, "learning_rate": 3.713002114164905e-05, "loss": 0.1087, "step": 4876 }, { "epoch": 5.16, "learning_rate": 3.71247357293869e-05, "loss": 0.2027, "step": 4878 }, { "epoch": 5.16, "learning_rate": 3.711945031712474e-05, "loss": 0.1678, "step": 4880 }, { "epoch": 5.16, "learning_rate": 3.711416490486258e-05, "loss": 0.2345, "step": 4882 }, { "epoch": 5.16, "learning_rate": 3.710887949260042e-05, "loss": 0.149, "step": 4884 }, { "epoch": 5.16, "learning_rate": 3.710359408033827e-05, "loss": 0.1786, "step": 4886 }, { "epoch": 5.17, "learning_rate": 3.709830866807611e-05, "loss": 0.2472, "step": 4888 }, { "epoch": 5.17, "learning_rate": 3.709302325581396e-05, "loss": 0.1835, "step": 4890 }, { "epoch": 5.17, "learning_rate": 3.70877378435518e-05, "loss": 0.0931, "step": 4892 }, { "epoch": 5.17, "learning_rate": 3.7082452431289646e-05, "loss": 0.1802, "step": 4894 }, { "epoch": 5.18, "learning_rate": 3.7077167019027485e-05, "loss": 0.2437, "step": 4896 }, { "epoch": 5.18, "learning_rate": 3.707188160676533e-05, "loss": 0.1213, "step": 4898 }, { "epoch": 5.18, "learning_rate": 3.706659619450317e-05, "loss": 0.1104, "step": 4900 }, { "epoch": 5.18, "learning_rate": 3.7061310782241016e-05, "loss": 0.2072, "step": 4902 }, { "epoch": 5.18, "learning_rate": 3.7056025369978856e-05, "loss": 0.1542, "step": 4904 }, { "epoch": 5.19, "learning_rate": 3.70507399577167e-05, "loss": 0.1379, "step": 4906 }, { "epoch": 5.19, "learning_rate": 3.704545454545455e-05, "loss": 0.1268, "step": 4908 }, { "epoch": 5.19, "learning_rate": 3.7040169133192394e-05, "loss": 0.1603, "step": 4910 }, { "epoch": 5.19, "learning_rate": 3.703488372093023e-05, "loss": 0.1306, "step": 4912 }, { "epoch": 5.19, "learning_rate": 3.702959830866808e-05, "loss": 0.0702, "step": 4914 }, { "epoch": 5.2, "learning_rate": 3.702431289640592e-05, "loss": 0.0872, "step": 4916 }, { "epoch": 5.2, "learning_rate": 3.7019027484143764e-05, "loss": 0.4123, "step": 4918 }, { "epoch": 5.2, "learning_rate": 3.701374207188161e-05, "loss": 0.2372, "step": 4920 }, { "epoch": 5.2, "learning_rate": 3.700845665961945e-05, "loss": 0.2711, "step": 4922 }, { "epoch": 5.21, "learning_rate": 3.7003171247357296e-05, "loss": 0.2139, "step": 4924 }, { "epoch": 5.21, "learning_rate": 3.699788583509514e-05, "loss": 0.227, "step": 4926 }, { "epoch": 5.21, "learning_rate": 3.699260042283299e-05, "loss": 0.0749, "step": 4928 }, { "epoch": 5.21, "learning_rate": 3.698731501057083e-05, "loss": 0.1619, "step": 4930 }, { "epoch": 5.21, "learning_rate": 3.698202959830867e-05, "loss": 0.3009, "step": 4932 }, { "epoch": 5.22, "learning_rate": 3.697674418604651e-05, "loss": 0.2073, "step": 4934 }, { "epoch": 5.22, "learning_rate": 3.697145877378436e-05, "loss": 0.326, "step": 4936 }, { "epoch": 5.22, "learning_rate": 3.69661733615222e-05, "loss": 0.2313, "step": 4938 }, { "epoch": 5.22, "learning_rate": 3.6960887949260044e-05, "loss": 0.1468, "step": 4940 }, { "epoch": 5.22, "learning_rate": 3.695560253699788e-05, "loss": 0.1478, "step": 4942 }, { "epoch": 5.23, "learning_rate": 3.6950317124735736e-05, "loss": 0.2012, "step": 4944 }, { "epoch": 5.23, "learning_rate": 3.6945031712473575e-05, "loss": 0.1614, "step": 4946 }, { "epoch": 5.23, "learning_rate": 3.693974630021142e-05, "loss": 0.1517, "step": 4948 }, { "epoch": 5.23, "learning_rate": 3.693446088794926e-05, "loss": 0.1746, "step": 4950 }, { "epoch": 5.23, "learning_rate": 3.692917547568711e-05, "loss": 0.1299, "step": 4952 }, { "epoch": 5.24, "learning_rate": 3.6923890063424946e-05, "loss": 0.1044, "step": 4954 }, { "epoch": 5.24, "learning_rate": 3.691860465116279e-05, "loss": 0.1396, "step": 4956 }, { "epoch": 5.24, "learning_rate": 3.691331923890063e-05, "loss": 0.1941, "step": 4958 }, { "epoch": 5.24, "learning_rate": 3.690803382663848e-05, "loss": 0.1695, "step": 4960 }, { "epoch": 5.25, "learning_rate": 3.6902748414376324e-05, "loss": 0.2018, "step": 4962 }, { "epoch": 5.25, "learning_rate": 3.689746300211417e-05, "loss": 0.1901, "step": 4964 }, { "epoch": 5.25, "learning_rate": 3.689217758985201e-05, "loss": 0.1719, "step": 4966 }, { "epoch": 5.25, "learning_rate": 3.6886892177589855e-05, "loss": 0.2726, "step": 4968 }, { "epoch": 5.25, "learning_rate": 3.6881606765327694e-05, "loss": 0.2805, "step": 4970 }, { "epoch": 5.26, "learning_rate": 3.687632135306554e-05, "loss": 0.2122, "step": 4972 }, { "epoch": 5.26, "learning_rate": 3.6871035940803386e-05, "loss": 0.1963, "step": 4974 }, { "epoch": 5.26, "learning_rate": 3.6865750528541226e-05, "loss": 0.2034, "step": 4976 }, { "epoch": 5.26, "learning_rate": 3.686046511627907e-05, "loss": 0.1429, "step": 4978 }, { "epoch": 5.26, "learning_rate": 3.685517970401692e-05, "loss": 0.2629, "step": 4980 }, { "epoch": 5.27, "learning_rate": 3.6849894291754764e-05, "loss": 0.2621, "step": 4982 }, { "epoch": 5.27, "learning_rate": 3.68446088794926e-05, "loss": 0.204, "step": 4984 }, { "epoch": 5.27, "learning_rate": 3.683932346723045e-05, "loss": 0.0373, "step": 4986 }, { "epoch": 5.27, "learning_rate": 3.683403805496829e-05, "loss": 0.2303, "step": 4988 }, { "epoch": 5.27, "learning_rate": 3.6828752642706135e-05, "loss": 0.1849, "step": 4990 }, { "epoch": 5.28, "learning_rate": 3.6823467230443974e-05, "loss": 0.2762, "step": 4992 }, { "epoch": 5.28, "learning_rate": 3.681818181818182e-05, "loss": 0.2258, "step": 4994 }, { "epoch": 5.28, "learning_rate": 3.681289640591966e-05, "loss": 0.1006, "step": 4996 }, { "epoch": 5.28, "learning_rate": 3.680761099365751e-05, "loss": 0.2063, "step": 4998 }, { "epoch": 5.29, "learning_rate": 3.680232558139535e-05, "loss": 0.1996, "step": 5000 }, { "epoch": 5.29, "eval_cer": 0.04149330293530921, "eval_loss": 0.4960121810436249, "eval_runtime": 132.0838, "eval_samples_per_second": 6.367, "eval_steps_per_second": 0.803, "step": 5000 }, { "epoch": 5.29, "learning_rate": 3.67970401691332e-05, "loss": 0.2156, "step": 5002 }, { "epoch": 5.29, "learning_rate": 3.679175475687104e-05, "loss": 0.2322, "step": 5004 }, { "epoch": 5.29, "learning_rate": 3.678646934460888e-05, "loss": 0.2876, "step": 5006 }, { "epoch": 5.29, "learning_rate": 3.678118393234672e-05, "loss": 0.1763, "step": 5008 }, { "epoch": 5.3, "learning_rate": 3.677589852008457e-05, "loss": 0.3529, "step": 5010 }, { "epoch": 5.3, "learning_rate": 3.677061310782241e-05, "loss": 0.2029, "step": 5012 }, { "epoch": 5.3, "learning_rate": 3.676532769556025e-05, "loss": 0.1825, "step": 5014 }, { "epoch": 5.3, "learning_rate": 3.67600422832981e-05, "loss": 0.2479, "step": 5016 }, { "epoch": 5.3, "learning_rate": 3.6754756871035945e-05, "loss": 0.166, "step": 5018 }, { "epoch": 5.31, "learning_rate": 3.6749471458773785e-05, "loss": 0.1522, "step": 5020 }, { "epoch": 5.31, "learning_rate": 3.674418604651163e-05, "loss": 0.2038, "step": 5022 }, { "epoch": 5.31, "learning_rate": 3.673890063424947e-05, "loss": 0.1599, "step": 5024 }, { "epoch": 5.31, "learning_rate": 3.6733615221987316e-05, "loss": 0.1344, "step": 5026 }, { "epoch": 5.32, "learning_rate": 3.672832980972516e-05, "loss": 0.2085, "step": 5028 }, { "epoch": 5.32, "learning_rate": 3.6723044397463e-05, "loss": 0.2214, "step": 5030 }, { "epoch": 5.32, "learning_rate": 3.671775898520085e-05, "loss": 0.2809, "step": 5032 }, { "epoch": 5.32, "learning_rate": 3.6712473572938694e-05, "loss": 0.3275, "step": 5034 }, { "epoch": 5.32, "learning_rate": 3.670718816067654e-05, "loss": 0.1518, "step": 5036 }, { "epoch": 5.33, "learning_rate": 3.670190274841438e-05, "loss": 0.137, "step": 5038 }, { "epoch": 5.33, "learning_rate": 3.6696617336152225e-05, "loss": 0.1788, "step": 5040 }, { "epoch": 5.33, "learning_rate": 3.6691331923890064e-05, "loss": 0.1478, "step": 5042 }, { "epoch": 5.33, "learning_rate": 3.668604651162791e-05, "loss": 0.17, "step": 5044 }, { "epoch": 5.33, "learning_rate": 3.668076109936575e-05, "loss": 0.1549, "step": 5046 }, { "epoch": 5.34, "learning_rate": 3.6675475687103596e-05, "loss": 0.218, "step": 5048 }, { "epoch": 5.34, "learning_rate": 3.6670190274841435e-05, "loss": 0.2056, "step": 5050 }, { "epoch": 5.34, "learning_rate": 3.666490486257929e-05, "loss": 0.1911, "step": 5052 }, { "epoch": 5.34, "learning_rate": 3.665961945031713e-05, "loss": 0.1425, "step": 5054 }, { "epoch": 5.34, "learning_rate": 3.665433403805497e-05, "loss": 0.236, "step": 5056 }, { "epoch": 5.35, "learning_rate": 3.664904862579281e-05, "loss": 0.2433, "step": 5058 }, { "epoch": 5.35, "learning_rate": 3.664376321353066e-05, "loss": 0.1608, "step": 5060 }, { "epoch": 5.35, "learning_rate": 3.66384778012685e-05, "loss": 0.1666, "step": 5062 }, { "epoch": 5.35, "learning_rate": 3.6633192389006344e-05, "loss": 0.1748, "step": 5064 }, { "epoch": 5.36, "learning_rate": 3.662790697674418e-05, "loss": 0.188, "step": 5066 }, { "epoch": 5.36, "learning_rate": 3.662262156448203e-05, "loss": 0.1773, "step": 5068 }, { "epoch": 5.36, "learning_rate": 3.6617336152219875e-05, "loss": 0.228, "step": 5070 }, { "epoch": 5.36, "learning_rate": 3.661205073995772e-05, "loss": 0.1715, "step": 5072 }, { "epoch": 5.36, "learning_rate": 3.660676532769556e-05, "loss": 0.2415, "step": 5074 }, { "epoch": 5.37, "learning_rate": 3.660147991543341e-05, "loss": 0.1875, "step": 5076 }, { "epoch": 5.37, "learning_rate": 3.659619450317125e-05, "loss": 0.2753, "step": 5078 }, { "epoch": 5.37, "learning_rate": 3.659090909090909e-05, "loss": 0.2631, "step": 5080 }, { "epoch": 5.37, "learning_rate": 3.658562367864694e-05, "loss": 0.1206, "step": 5082 }, { "epoch": 5.37, "learning_rate": 3.658033826638478e-05, "loss": 0.2814, "step": 5084 }, { "epoch": 5.38, "learning_rate": 3.6575052854122623e-05, "loss": 0.2033, "step": 5086 }, { "epoch": 5.38, "learning_rate": 3.656976744186046e-05, "loss": 0.1643, "step": 5088 }, { "epoch": 5.38, "learning_rate": 3.6564482029598316e-05, "loss": 0.1404, "step": 5090 }, { "epoch": 5.38, "learning_rate": 3.6559196617336155e-05, "loss": 0.2004, "step": 5092 }, { "epoch": 5.38, "learning_rate": 3.6553911205074e-05, "loss": 0.1059, "step": 5094 }, { "epoch": 5.39, "learning_rate": 3.654862579281184e-05, "loss": 0.1295, "step": 5096 }, { "epoch": 5.39, "learning_rate": 3.6543340380549686e-05, "loss": 0.3503, "step": 5098 }, { "epoch": 5.39, "learning_rate": 3.6538054968287526e-05, "loss": 0.1633, "step": 5100 }, { "epoch": 5.39, "learning_rate": 3.653276955602537e-05, "loss": 0.1093, "step": 5102 }, { "epoch": 5.4, "learning_rate": 3.652748414376321e-05, "loss": 0.2215, "step": 5104 }, { "epoch": 5.4, "learning_rate": 3.652219873150106e-05, "loss": 0.1032, "step": 5106 }, { "epoch": 5.4, "learning_rate": 3.65169133192389e-05, "loss": 0.3177, "step": 5108 }, { "epoch": 5.4, "learning_rate": 3.651162790697675e-05, "loss": 0.209, "step": 5110 }, { "epoch": 5.4, "learning_rate": 3.650634249471459e-05, "loss": 0.1654, "step": 5112 }, { "epoch": 5.41, "learning_rate": 3.6501057082452434e-05, "loss": 0.2999, "step": 5114 }, { "epoch": 5.41, "learning_rate": 3.6495771670190274e-05, "loss": 0.1211, "step": 5116 }, { "epoch": 5.41, "learning_rate": 3.649048625792812e-05, "loss": 0.2942, "step": 5118 }, { "epoch": 5.41, "learning_rate": 3.648520084566596e-05, "loss": 0.2251, "step": 5120 }, { "epoch": 5.41, "learning_rate": 3.6479915433403805e-05, "loss": 0.2525, "step": 5122 }, { "epoch": 5.42, "learning_rate": 3.647463002114165e-05, "loss": 0.2324, "step": 5124 }, { "epoch": 5.42, "learning_rate": 3.64693446088795e-05, "loss": 0.2722, "step": 5126 }, { "epoch": 5.42, "learning_rate": 3.6464059196617336e-05, "loss": 0.1604, "step": 5128 }, { "epoch": 5.42, "learning_rate": 3.645877378435518e-05, "loss": 0.155, "step": 5130 }, { "epoch": 5.42, "learning_rate": 3.645348837209303e-05, "loss": 0.1968, "step": 5132 }, { "epoch": 5.43, "learning_rate": 3.644820295983087e-05, "loss": 0.214, "step": 5134 }, { "epoch": 5.43, "learning_rate": 3.6442917547568714e-05, "loss": 0.1837, "step": 5136 }, { "epoch": 5.43, "learning_rate": 3.643763213530655e-05, "loss": 0.0795, "step": 5138 }, { "epoch": 5.43, "learning_rate": 3.64323467230444e-05, "loss": 0.2023, "step": 5140 }, { "epoch": 5.44, "learning_rate": 3.642706131078224e-05, "loss": 0.151, "step": 5142 }, { "epoch": 5.44, "learning_rate": 3.642177589852009e-05, "loss": 0.1739, "step": 5144 }, { "epoch": 5.44, "learning_rate": 3.641649048625793e-05, "loss": 0.0803, "step": 5146 }, { "epoch": 5.44, "learning_rate": 3.641120507399578e-05, "loss": 0.2561, "step": 5148 }, { "epoch": 5.44, "learning_rate": 3.6405919661733616e-05, "loss": 0.1125, "step": 5150 }, { "epoch": 5.45, "learning_rate": 3.640063424947146e-05, "loss": 0.1583, "step": 5152 }, { "epoch": 5.45, "learning_rate": 3.63953488372093e-05, "loss": 0.2679, "step": 5154 }, { "epoch": 5.45, "learning_rate": 3.639006342494715e-05, "loss": 0.1561, "step": 5156 }, { "epoch": 5.45, "learning_rate": 3.638477801268499e-05, "loss": 0.1881, "step": 5158 }, { "epoch": 5.45, "learning_rate": 3.637949260042283e-05, "loss": 0.213, "step": 5160 }, { "epoch": 5.46, "learning_rate": 3.637420718816068e-05, "loss": 0.0868, "step": 5162 }, { "epoch": 5.46, "learning_rate": 3.6368921775898525e-05, "loss": 0.1728, "step": 5164 }, { "epoch": 5.46, "learning_rate": 3.6363636363636364e-05, "loss": 0.1656, "step": 5166 }, { "epoch": 5.46, "learning_rate": 3.635835095137421e-05, "loss": 0.4125, "step": 5168 }, { "epoch": 5.47, "learning_rate": 3.635306553911205e-05, "loss": 0.2769, "step": 5170 }, { "epoch": 5.47, "learning_rate": 3.6347780126849896e-05, "loss": 0.1373, "step": 5172 }, { "epoch": 5.47, "learning_rate": 3.6342494714587735e-05, "loss": 0.3027, "step": 5174 }, { "epoch": 5.47, "learning_rate": 3.633720930232558e-05, "loss": 0.2377, "step": 5176 }, { "epoch": 5.47, "learning_rate": 3.633192389006343e-05, "loss": 0.1191, "step": 5178 }, { "epoch": 5.48, "learning_rate": 3.632663847780127e-05, "loss": 0.1034, "step": 5180 }, { "epoch": 5.48, "learning_rate": 3.632135306553911e-05, "loss": 0.1489, "step": 5182 }, { "epoch": 5.48, "learning_rate": 3.631606765327696e-05, "loss": 0.1935, "step": 5184 }, { "epoch": 5.48, "learning_rate": 3.6310782241014804e-05, "loss": 0.117, "step": 5186 }, { "epoch": 5.48, "learning_rate": 3.6305496828752644e-05, "loss": 0.2481, "step": 5188 }, { "epoch": 5.49, "learning_rate": 3.630021141649049e-05, "loss": 0.1525, "step": 5190 }, { "epoch": 5.49, "learning_rate": 3.629492600422833e-05, "loss": 0.1148, "step": 5192 }, { "epoch": 5.49, "learning_rate": 3.6289640591966175e-05, "loss": 0.1546, "step": 5194 }, { "epoch": 5.49, "learning_rate": 3.6284355179704014e-05, "loss": 0.136, "step": 5196 }, { "epoch": 5.49, "learning_rate": 3.627906976744187e-05, "loss": 0.1691, "step": 5198 }, { "epoch": 5.5, "learning_rate": 3.6273784355179706e-05, "loss": 0.1914, "step": 5200 }, { "epoch": 5.5, "learning_rate": 3.626849894291755e-05, "loss": 0.1867, "step": 5202 }, { "epoch": 5.5, "learning_rate": 3.626321353065539e-05, "loss": 0.2652, "step": 5204 }, { "epoch": 5.5, "learning_rate": 3.625792811839324e-05, "loss": 0.2976, "step": 5206 }, { "epoch": 5.51, "learning_rate": 3.625264270613108e-05, "loss": 0.2308, "step": 5208 }, { "epoch": 5.51, "learning_rate": 3.624735729386892e-05, "loss": 0.1615, "step": 5210 }, { "epoch": 5.51, "learning_rate": 3.624207188160676e-05, "loss": 0.1069, "step": 5212 }, { "epoch": 5.51, "learning_rate": 3.623678646934461e-05, "loss": 0.2488, "step": 5214 }, { "epoch": 5.51, "learning_rate": 3.6231501057082455e-05, "loss": 0.2028, "step": 5216 }, { "epoch": 5.52, "learning_rate": 3.62262156448203e-05, "loss": 0.4859, "step": 5218 }, { "epoch": 5.52, "learning_rate": 3.622093023255814e-05, "loss": 0.1746, "step": 5220 }, { "epoch": 5.52, "learning_rate": 3.6215644820295986e-05, "loss": 0.1445, "step": 5222 }, { "epoch": 5.52, "learning_rate": 3.6210359408033825e-05, "loss": 0.1672, "step": 5224 }, { "epoch": 5.52, "learning_rate": 3.620507399577167e-05, "loss": 0.1847, "step": 5226 }, { "epoch": 5.53, "learning_rate": 3.619978858350952e-05, "loss": 0.3719, "step": 5228 }, { "epoch": 5.53, "learning_rate": 3.619450317124736e-05, "loss": 0.4154, "step": 5230 }, { "epoch": 5.53, "learning_rate": 3.61892177589852e-05, "loss": 0.2469, "step": 5232 }, { "epoch": 5.53, "learning_rate": 3.618393234672305e-05, "loss": 0.1911, "step": 5234 }, { "epoch": 5.53, "learning_rate": 3.617864693446089e-05, "loss": 0.177, "step": 5236 }, { "epoch": 5.54, "learning_rate": 3.6173361522198734e-05, "loss": 0.1316, "step": 5238 }, { "epoch": 5.54, "learning_rate": 3.616807610993658e-05, "loss": 0.3266, "step": 5240 }, { "epoch": 5.54, "learning_rate": 3.616279069767442e-05, "loss": 0.2839, "step": 5242 }, { "epoch": 5.54, "learning_rate": 3.6157505285412266e-05, "loss": 0.2419, "step": 5244 }, { "epoch": 5.55, "learning_rate": 3.6152219873150105e-05, "loss": 0.2582, "step": 5246 }, { "epoch": 5.55, "learning_rate": 3.614693446088795e-05, "loss": 0.2562, "step": 5248 }, { "epoch": 5.55, "learning_rate": 3.614164904862579e-05, "loss": 0.3777, "step": 5250 }, { "epoch": 5.55, "learning_rate": 3.613636363636364e-05, "loss": 0.3315, "step": 5252 }, { "epoch": 5.55, "learning_rate": 3.613107822410148e-05, "loss": 0.1576, "step": 5254 }, { "epoch": 5.56, "learning_rate": 3.612579281183933e-05, "loss": 0.1337, "step": 5256 }, { "epoch": 5.56, "learning_rate": 3.612050739957717e-05, "loss": 0.1628, "step": 5258 }, { "epoch": 5.56, "learning_rate": 3.6115221987315014e-05, "loss": 0.1957, "step": 5260 }, { "epoch": 5.56, "learning_rate": 3.610993657505285e-05, "loss": 0.2507, "step": 5262 }, { "epoch": 5.56, "learning_rate": 3.61046511627907e-05, "loss": 0.2434, "step": 5264 }, { "epoch": 5.57, "learning_rate": 3.609936575052854e-05, "loss": 0.2171, "step": 5266 }, { "epoch": 5.57, "learning_rate": 3.6094080338266384e-05, "loss": 0.3111, "step": 5268 }, { "epoch": 5.57, "learning_rate": 3.608879492600423e-05, "loss": 0.153, "step": 5270 }, { "epoch": 5.57, "learning_rate": 3.6083509513742077e-05, "loss": 0.1377, "step": 5272 }, { "epoch": 5.58, "learning_rate": 3.6078224101479916e-05, "loss": 0.2569, "step": 5274 }, { "epoch": 5.58, "learning_rate": 3.607293868921776e-05, "loss": 0.179, "step": 5276 }, { "epoch": 5.58, "learning_rate": 3.60676532769556e-05, "loss": 0.2822, "step": 5278 }, { "epoch": 5.58, "learning_rate": 3.606236786469345e-05, "loss": 0.1619, "step": 5280 }, { "epoch": 5.58, "learning_rate": 3.605708245243129e-05, "loss": 0.2394, "step": 5282 }, { "epoch": 5.59, "learning_rate": 3.605179704016913e-05, "loss": 0.2473, "step": 5284 }, { "epoch": 5.59, "learning_rate": 3.604651162790698e-05, "loss": 0.2508, "step": 5286 }, { "epoch": 5.59, "learning_rate": 3.6041226215644825e-05, "loss": 0.2127, "step": 5288 }, { "epoch": 5.59, "learning_rate": 3.6035940803382664e-05, "loss": 0.1293, "step": 5290 }, { "epoch": 5.59, "learning_rate": 3.603065539112051e-05, "loss": 0.4284, "step": 5292 }, { "epoch": 5.6, "learning_rate": 3.6025369978858356e-05, "loss": 0.2449, "step": 5294 }, { "epoch": 5.6, "learning_rate": 3.6020084566596195e-05, "loss": 0.2241, "step": 5296 }, { "epoch": 5.6, "learning_rate": 3.601479915433404e-05, "loss": 0.1785, "step": 5298 }, { "epoch": 5.6, "learning_rate": 3.600951374207188e-05, "loss": 0.1411, "step": 5300 }, { "epoch": 5.6, "learning_rate": 3.600422832980973e-05, "loss": 0.0909, "step": 5302 }, { "epoch": 5.61, "learning_rate": 3.5998942917547566e-05, "loss": 0.1268, "step": 5304 }, { "epoch": 5.61, "learning_rate": 3.599365750528542e-05, "loss": 0.1075, "step": 5306 }, { "epoch": 5.61, "learning_rate": 3.598837209302326e-05, "loss": 0.1462, "step": 5308 }, { "epoch": 5.61, "learning_rate": 3.5983086680761104e-05, "loss": 0.2667, "step": 5310 }, { "epoch": 5.62, "learning_rate": 3.5977801268498944e-05, "loss": 0.1675, "step": 5312 }, { "epoch": 5.62, "learning_rate": 3.597251585623679e-05, "loss": 0.2495, "step": 5314 }, { "epoch": 5.62, "learning_rate": 3.596723044397463e-05, "loss": 0.2061, "step": 5316 }, { "epoch": 5.62, "learning_rate": 3.5961945031712475e-05, "loss": 0.2483, "step": 5318 }, { "epoch": 5.62, "learning_rate": 3.5956659619450314e-05, "loss": 0.1289, "step": 5320 }, { "epoch": 5.63, "learning_rate": 3.595137420718816e-05, "loss": 0.1895, "step": 5322 }, { "epoch": 5.63, "learning_rate": 3.5946088794926006e-05, "loss": 0.2229, "step": 5324 }, { "epoch": 5.63, "learning_rate": 3.594080338266385e-05, "loss": 0.2349, "step": 5326 }, { "epoch": 5.63, "learning_rate": 3.593551797040169e-05, "loss": 0.0876, "step": 5328 }, { "epoch": 5.63, "learning_rate": 3.593023255813954e-05, "loss": 0.1031, "step": 5330 }, { "epoch": 5.64, "learning_rate": 3.592494714587738e-05, "loss": 0.1326, "step": 5332 }, { "epoch": 5.64, "learning_rate": 3.591966173361522e-05, "loss": 0.2142, "step": 5334 }, { "epoch": 5.64, "learning_rate": 3.591437632135307e-05, "loss": 0.2463, "step": 5336 }, { "epoch": 5.64, "learning_rate": 3.590909090909091e-05, "loss": 0.2288, "step": 5338 }, { "epoch": 5.64, "learning_rate": 3.5903805496828754e-05, "loss": 0.2073, "step": 5340 }, { "epoch": 5.65, "learning_rate": 3.58985200845666e-05, "loss": 0.0884, "step": 5342 }, { "epoch": 5.65, "learning_rate": 3.5893234672304447e-05, "loss": 0.2159, "step": 5344 }, { "epoch": 5.65, "learning_rate": 3.5887949260042286e-05, "loss": 0.1365, "step": 5346 }, { "epoch": 5.65, "learning_rate": 3.588266384778013e-05, "loss": 0.2142, "step": 5348 }, { "epoch": 5.66, "learning_rate": 3.587737843551797e-05, "loss": 0.2101, "step": 5350 }, { "epoch": 5.66, "learning_rate": 3.587209302325582e-05, "loss": 0.3025, "step": 5352 }, { "epoch": 5.66, "learning_rate": 3.5866807610993657e-05, "loss": 0.1892, "step": 5354 }, { "epoch": 5.66, "learning_rate": 3.58615221987315e-05, "loss": 0.1453, "step": 5356 }, { "epoch": 5.66, "learning_rate": 3.585623678646934e-05, "loss": 0.1022, "step": 5358 }, { "epoch": 5.67, "learning_rate": 3.5850951374207195e-05, "loss": 0.1156, "step": 5360 }, { "epoch": 5.67, "learning_rate": 3.5845665961945034e-05, "loss": 0.2799, "step": 5362 }, { "epoch": 5.67, "learning_rate": 3.584038054968288e-05, "loss": 0.2196, "step": 5364 }, { "epoch": 5.67, "learning_rate": 3.583509513742072e-05, "loss": 0.277, "step": 5366 }, { "epoch": 5.67, "learning_rate": 3.5829809725158565e-05, "loss": 0.1254, "step": 5368 }, { "epoch": 5.68, "learning_rate": 3.5824524312896405e-05, "loss": 0.1106, "step": 5370 }, { "epoch": 5.68, "learning_rate": 3.581923890063425e-05, "loss": 0.1766, "step": 5372 }, { "epoch": 5.68, "learning_rate": 3.581395348837209e-05, "loss": 0.3162, "step": 5374 }, { "epoch": 5.68, "learning_rate": 3.5808668076109936e-05, "loss": 0.2226, "step": 5376 }, { "epoch": 5.68, "learning_rate": 3.580338266384778e-05, "loss": 0.1871, "step": 5378 }, { "epoch": 5.69, "learning_rate": 3.579809725158563e-05, "loss": 0.3953, "step": 5380 }, { "epoch": 5.69, "learning_rate": 3.579281183932347e-05, "loss": 0.2468, "step": 5382 }, { "epoch": 5.69, "learning_rate": 3.5787526427061314e-05, "loss": 0.1599, "step": 5384 }, { "epoch": 5.69, "learning_rate": 3.578224101479915e-05, "loss": 0.1013, "step": 5386 }, { "epoch": 5.7, "learning_rate": 3.5776955602537e-05, "loss": 0.304, "step": 5388 }, { "epoch": 5.7, "learning_rate": 3.5771670190274845e-05, "loss": 0.228, "step": 5390 }, { "epoch": 5.7, "learning_rate": 3.5766384778012684e-05, "loss": 0.1226, "step": 5392 }, { "epoch": 5.7, "learning_rate": 3.576109936575053e-05, "loss": 0.1913, "step": 5394 }, { "epoch": 5.7, "learning_rate": 3.5755813953488376e-05, "loss": 0.1811, "step": 5396 }, { "epoch": 5.71, "learning_rate": 3.575052854122622e-05, "loss": 0.2636, "step": 5398 }, { "epoch": 5.71, "learning_rate": 3.574524312896406e-05, "loss": 0.1235, "step": 5400 }, { "epoch": 5.71, "learning_rate": 3.573995771670191e-05, "loss": 0.2232, "step": 5402 }, { "epoch": 5.71, "learning_rate": 3.573467230443975e-05, "loss": 0.1785, "step": 5404 }, { "epoch": 5.71, "learning_rate": 3.572938689217759e-05, "loss": 0.1615, "step": 5406 }, { "epoch": 5.72, "learning_rate": 3.572410147991543e-05, "loss": 0.101, "step": 5408 }, { "epoch": 5.72, "learning_rate": 3.571881606765328e-05, "loss": 0.1539, "step": 5410 }, { "epoch": 5.72, "learning_rate": 3.571353065539112e-05, "loss": 0.1954, "step": 5412 }, { "epoch": 5.72, "learning_rate": 3.570824524312897e-05, "loss": 0.2136, "step": 5414 }, { "epoch": 5.73, "learning_rate": 3.570295983086681e-05, "loss": 0.1079, "step": 5416 }, { "epoch": 5.73, "learning_rate": 3.5697674418604656e-05, "loss": 0.2143, "step": 5418 }, { "epoch": 5.73, "learning_rate": 3.5692389006342495e-05, "loss": 0.2804, "step": 5420 }, { "epoch": 5.73, "learning_rate": 3.568710359408034e-05, "loss": 0.0697, "step": 5422 }, { "epoch": 5.73, "learning_rate": 3.568181818181818e-05, "loss": 0.1546, "step": 5424 }, { "epoch": 5.74, "learning_rate": 3.5676532769556027e-05, "loss": 0.2973, "step": 5426 }, { "epoch": 5.74, "learning_rate": 3.5671247357293866e-05, "loss": 0.2149, "step": 5428 }, { "epoch": 5.74, "learning_rate": 3.566596194503171e-05, "loss": 0.3598, "step": 5430 }, { "epoch": 5.74, "learning_rate": 3.566067653276956e-05, "loss": 0.172, "step": 5432 }, { "epoch": 5.74, "learning_rate": 3.5655391120507404e-05, "loss": 0.166, "step": 5434 }, { "epoch": 5.75, "learning_rate": 3.565010570824524e-05, "loss": 0.1814, "step": 5436 }, { "epoch": 5.75, "learning_rate": 3.564482029598309e-05, "loss": 0.1576, "step": 5438 }, { "epoch": 5.75, "learning_rate": 3.563953488372093e-05, "loss": 0.1284, "step": 5440 }, { "epoch": 5.75, "learning_rate": 3.5634249471458775e-05, "loss": 0.1752, "step": 5442 }, { "epoch": 5.75, "learning_rate": 3.562896405919662e-05, "loss": 0.2017, "step": 5444 }, { "epoch": 5.76, "learning_rate": 3.562367864693446e-05, "loss": 0.1809, "step": 5446 }, { "epoch": 5.76, "learning_rate": 3.5618393234672306e-05, "loss": 0.1873, "step": 5448 }, { "epoch": 5.76, "learning_rate": 3.561310782241015e-05, "loss": 0.1214, "step": 5450 }, { "epoch": 5.76, "learning_rate": 3.5607822410148e-05, "loss": 0.0826, "step": 5452 }, { "epoch": 5.77, "learning_rate": 3.560253699788584e-05, "loss": 0.2159, "step": 5454 }, { "epoch": 5.77, "learning_rate": 3.5597251585623684e-05, "loss": 0.1289, "step": 5456 }, { "epoch": 5.77, "learning_rate": 3.559196617336152e-05, "loss": 0.1642, "step": 5458 }, { "epoch": 5.77, "learning_rate": 3.558668076109937e-05, "loss": 0.1882, "step": 5460 }, { "epoch": 5.77, "learning_rate": 3.558139534883721e-05, "loss": 0.1427, "step": 5462 }, { "epoch": 5.78, "learning_rate": 3.5576109936575054e-05, "loss": 0.104, "step": 5464 }, { "epoch": 5.78, "learning_rate": 3.5570824524312894e-05, "loss": 0.11, "step": 5466 }, { "epoch": 5.78, "learning_rate": 3.5565539112050746e-05, "loss": 0.097, "step": 5468 }, { "epoch": 5.78, "learning_rate": 3.5560253699788586e-05, "loss": 0.1465, "step": 5470 }, { "epoch": 5.78, "learning_rate": 3.555496828752643e-05, "loss": 0.2532, "step": 5472 }, { "epoch": 5.79, "learning_rate": 3.554968287526427e-05, "loss": 0.1785, "step": 5474 }, { "epoch": 5.79, "learning_rate": 3.554439746300212e-05, "loss": 0.2498, "step": 5476 }, { "epoch": 5.79, "learning_rate": 3.5539112050739956e-05, "loss": 0.3845, "step": 5478 }, { "epoch": 5.79, "learning_rate": 3.55338266384778e-05, "loss": 0.2038, "step": 5480 }, { "epoch": 5.79, "learning_rate": 3.552854122621564e-05, "loss": 0.1827, "step": 5482 }, { "epoch": 5.8, "learning_rate": 3.552325581395349e-05, "loss": 0.1429, "step": 5484 }, { "epoch": 5.8, "learning_rate": 3.5517970401691334e-05, "loss": 0.1874, "step": 5486 }, { "epoch": 5.8, "learning_rate": 3.551268498942918e-05, "loss": 0.1165, "step": 5488 }, { "epoch": 5.8, "learning_rate": 3.550739957716702e-05, "loss": 0.1995, "step": 5490 }, { "epoch": 5.81, "learning_rate": 3.5502114164904865e-05, "loss": 0.2121, "step": 5492 }, { "epoch": 5.81, "learning_rate": 3.5496828752642705e-05, "loss": 0.1209, "step": 5494 }, { "epoch": 5.81, "learning_rate": 3.549154334038055e-05, "loss": 0.1137, "step": 5496 }, { "epoch": 5.81, "learning_rate": 3.54862579281184e-05, "loss": 0.1164, "step": 5498 }, { "epoch": 5.81, "learning_rate": 3.5480972515856236e-05, "loss": 0.1558, "step": 5500 }, { "epoch": 5.81, "eval_cer": 0.023254488458250212, "eval_loss": 0.6493213176727295, "eval_runtime": 128.6984, "eval_samples_per_second": 6.535, "eval_steps_per_second": 0.824, "step": 5500 }, { "epoch": 5.82, "learning_rate": 3.547568710359408e-05, "loss": 0.1527, "step": 5502 }, { "epoch": 5.82, "learning_rate": 3.547040169133193e-05, "loss": 0.1787, "step": 5504 }, { "epoch": 5.82, "learning_rate": 3.5465116279069774e-05, "loss": 0.0842, "step": 5506 }, { "epoch": 5.82, "learning_rate": 3.5459830866807613e-05, "loss": 0.1248, "step": 5508 }, { "epoch": 5.82, "learning_rate": 3.545454545454546e-05, "loss": 0.2904, "step": 5510 }, { "epoch": 5.83, "learning_rate": 3.54492600422833e-05, "loss": 0.1775, "step": 5512 }, { "epoch": 5.83, "learning_rate": 3.5443974630021145e-05, "loss": 0.1804, "step": 5514 }, { "epoch": 5.83, "learning_rate": 3.5438689217758984e-05, "loss": 0.1622, "step": 5516 }, { "epoch": 5.83, "learning_rate": 3.543340380549683e-05, "loss": 0.1061, "step": 5518 }, { "epoch": 5.84, "learning_rate": 3.542811839323467e-05, "loss": 0.2467, "step": 5520 }, { "epoch": 5.84, "learning_rate": 3.542283298097252e-05, "loss": 0.1456, "step": 5522 }, { "epoch": 5.84, "learning_rate": 3.541754756871036e-05, "loss": 0.0853, "step": 5524 }, { "epoch": 5.84, "learning_rate": 3.541226215644821e-05, "loss": 0.2468, "step": 5526 }, { "epoch": 5.84, "learning_rate": 3.540697674418605e-05, "loss": 0.1191, "step": 5528 }, { "epoch": 5.85, "learning_rate": 3.540169133192389e-05, "loss": 0.1519, "step": 5530 }, { "epoch": 5.85, "learning_rate": 3.539640591966173e-05, "loss": 0.1389, "step": 5532 }, { "epoch": 5.85, "learning_rate": 3.539112050739958e-05, "loss": 0.1521, "step": 5534 }, { "epoch": 5.85, "learning_rate": 3.538583509513742e-05, "loss": 0.1933, "step": 5536 }, { "epoch": 5.85, "learning_rate": 3.5380549682875264e-05, "loss": 0.2494, "step": 5538 }, { "epoch": 5.86, "learning_rate": 3.537526427061311e-05, "loss": 0.2218, "step": 5540 }, { "epoch": 5.86, "learning_rate": 3.5369978858350956e-05, "loss": 0.193, "step": 5542 }, { "epoch": 5.86, "learning_rate": 3.5364693446088795e-05, "loss": 0.149, "step": 5544 }, { "epoch": 5.86, "learning_rate": 3.535940803382664e-05, "loss": 0.211, "step": 5546 }, { "epoch": 5.86, "learning_rate": 3.535412262156449e-05, "loss": 0.2097, "step": 5548 }, { "epoch": 5.87, "learning_rate": 3.5348837209302326e-05, "loss": 0.2378, "step": 5550 }, { "epoch": 5.87, "learning_rate": 3.534355179704017e-05, "loss": 0.2757, "step": 5552 }, { "epoch": 5.87, "learning_rate": 3.533826638477801e-05, "loss": 0.1784, "step": 5554 }, { "epoch": 5.87, "learning_rate": 3.533298097251586e-05, "loss": 0.1451, "step": 5556 }, { "epoch": 5.88, "learning_rate": 3.5327695560253704e-05, "loss": 0.1482, "step": 5558 }, { "epoch": 5.88, "learning_rate": 3.532241014799155e-05, "loss": 0.1061, "step": 5560 }, { "epoch": 5.88, "learning_rate": 3.531712473572939e-05, "loss": 0.0902, "step": 5562 }, { "epoch": 5.88, "learning_rate": 3.5311839323467235e-05, "loss": 0.1869, "step": 5564 }, { "epoch": 5.88, "learning_rate": 3.5306553911205075e-05, "loss": 0.2725, "step": 5566 }, { "epoch": 5.89, "learning_rate": 3.530126849894292e-05, "loss": 0.2675, "step": 5568 }, { "epoch": 5.89, "learning_rate": 3.529598308668076e-05, "loss": 0.2006, "step": 5570 }, { "epoch": 5.89, "learning_rate": 3.5290697674418606e-05, "loss": 0.1599, "step": 5572 }, { "epoch": 5.89, "learning_rate": 3.5285412262156445e-05, "loss": 0.1574, "step": 5574 }, { "epoch": 5.89, "learning_rate": 3.52801268498943e-05, "loss": 0.103, "step": 5576 }, { "epoch": 5.9, "learning_rate": 3.527484143763214e-05, "loss": 0.2207, "step": 5578 }, { "epoch": 5.9, "learning_rate": 3.5269556025369983e-05, "loss": 0.1215, "step": 5580 }, { "epoch": 5.9, "learning_rate": 3.526427061310782e-05, "loss": 0.3131, "step": 5582 }, { "epoch": 5.9, "learning_rate": 3.525898520084567e-05, "loss": 0.2382, "step": 5584 }, { "epoch": 5.9, "learning_rate": 3.525369978858351e-05, "loss": 0.2115, "step": 5586 }, { "epoch": 5.91, "learning_rate": 3.5248414376321354e-05, "loss": 0.315, "step": 5588 }, { "epoch": 5.91, "learning_rate": 3.5243128964059193e-05, "loss": 0.3125, "step": 5590 }, { "epoch": 5.91, "learning_rate": 3.523784355179704e-05, "loss": 0.2226, "step": 5592 }, { "epoch": 5.91, "learning_rate": 3.5232558139534886e-05, "loss": 0.1697, "step": 5594 }, { "epoch": 5.92, "learning_rate": 3.522727272727273e-05, "loss": 0.199, "step": 5596 }, { "epoch": 5.92, "learning_rate": 3.522198731501057e-05, "loss": 0.116, "step": 5598 }, { "epoch": 5.92, "learning_rate": 3.521670190274842e-05, "loss": 0.3555, "step": 5600 }, { "epoch": 5.92, "learning_rate": 3.521141649048626e-05, "loss": 0.1411, "step": 5602 }, { "epoch": 5.92, "learning_rate": 3.52061310782241e-05, "loss": 0.1481, "step": 5604 }, { "epoch": 5.93, "learning_rate": 3.520084566596195e-05, "loss": 0.2624, "step": 5606 }, { "epoch": 5.93, "learning_rate": 3.519556025369979e-05, "loss": 0.2433, "step": 5608 }, { "epoch": 5.93, "learning_rate": 3.5190274841437634e-05, "loss": 0.1429, "step": 5610 }, { "epoch": 5.93, "learning_rate": 3.518498942917548e-05, "loss": 0.119, "step": 5612 }, { "epoch": 5.93, "learning_rate": 3.5179704016913326e-05, "loss": 0.0767, "step": 5614 }, { "epoch": 5.94, "learning_rate": 3.5174418604651165e-05, "loss": 0.2706, "step": 5616 }, { "epoch": 5.94, "learning_rate": 3.516913319238901e-05, "loss": 0.1723, "step": 5618 }, { "epoch": 5.94, "learning_rate": 3.516384778012685e-05, "loss": 0.1937, "step": 5620 }, { "epoch": 5.94, "learning_rate": 3.5158562367864696e-05, "loss": 0.1043, "step": 5622 }, { "epoch": 5.95, "learning_rate": 3.5153276955602536e-05, "loss": 0.2116, "step": 5624 }, { "epoch": 5.95, "learning_rate": 3.514799154334038e-05, "loss": 0.1653, "step": 5626 }, { "epoch": 5.95, "learning_rate": 3.514270613107822e-05, "loss": 0.1505, "step": 5628 }, { "epoch": 5.95, "learning_rate": 3.5137420718816074e-05, "loss": 0.2293, "step": 5630 }, { "epoch": 5.95, "learning_rate": 3.513213530655391e-05, "loss": 0.2016, "step": 5632 }, { "epoch": 5.96, "learning_rate": 3.512684989429176e-05, "loss": 0.1426, "step": 5634 }, { "epoch": 5.96, "learning_rate": 3.51215644820296e-05, "loss": 0.129, "step": 5636 }, { "epoch": 5.96, "learning_rate": 3.5116279069767445e-05, "loss": 0.1259, "step": 5638 }, { "epoch": 5.96, "learning_rate": 3.5110993657505284e-05, "loss": 0.1072, "step": 5640 }, { "epoch": 5.96, "learning_rate": 3.510570824524313e-05, "loss": 0.2352, "step": 5642 }, { "epoch": 5.97, "learning_rate": 3.510042283298097e-05, "loss": 0.3217, "step": 5644 }, { "epoch": 5.97, "learning_rate": 3.5095137420718815e-05, "loss": 0.0973, "step": 5646 }, { "epoch": 5.97, "learning_rate": 3.508985200845666e-05, "loss": 0.1055, "step": 5648 }, { "epoch": 5.97, "learning_rate": 3.508456659619451e-05, "loss": 0.1537, "step": 5650 }, { "epoch": 5.97, "learning_rate": 3.507928118393235e-05, "loss": 0.1017, "step": 5652 }, { "epoch": 5.98, "learning_rate": 3.507399577167019e-05, "loss": 0.2179, "step": 5654 }, { "epoch": 5.98, "learning_rate": 3.506871035940804e-05, "loss": 0.1378, "step": 5656 }, { "epoch": 5.98, "learning_rate": 3.506342494714588e-05, "loss": 0.251, "step": 5658 }, { "epoch": 5.98, "learning_rate": 3.5058139534883724e-05, "loss": 0.146, "step": 5660 }, { "epoch": 5.99, "learning_rate": 3.5052854122621563e-05, "loss": 0.1565, "step": 5662 }, { "epoch": 5.99, "learning_rate": 3.504756871035941e-05, "loss": 0.1334, "step": 5664 }, { "epoch": 5.99, "learning_rate": 3.5042283298097256e-05, "loss": 0.1833, "step": 5666 }, { "epoch": 5.99, "learning_rate": 3.50369978858351e-05, "loss": 0.1748, "step": 5668 }, { "epoch": 5.99, "learning_rate": 3.503171247357294e-05, "loss": 0.1905, "step": 5670 }, { "epoch": 6.0, "learning_rate": 3.502642706131079e-05, "loss": 0.1416, "step": 5672 }, { "epoch": 6.0, "learning_rate": 3.5021141649048626e-05, "loss": 0.189, "step": 5674 }, { "epoch": 6.0, "learning_rate": 3.501585623678647e-05, "loss": 0.1976, "step": 5676 }, { "epoch": 6.0, "learning_rate": 3.501057082452431e-05, "loss": 0.1923, "step": 5678 }, { "epoch": 6.0, "learning_rate": 3.500528541226216e-05, "loss": 0.2369, "step": 5680 }, { "epoch": 6.01, "learning_rate": 3.5e-05, "loss": 0.0773, "step": 5682 }, { "epoch": 6.01, "learning_rate": 3.499471458773785e-05, "loss": 0.1012, "step": 5684 }, { "epoch": 6.01, "learning_rate": 3.498942917547569e-05, "loss": 0.1372, "step": 5686 }, { "epoch": 6.01, "learning_rate": 3.4984143763213535e-05, "loss": 0.2137, "step": 5688 }, { "epoch": 6.01, "learning_rate": 3.4978858350951374e-05, "loss": 0.213, "step": 5690 }, { "epoch": 6.02, "learning_rate": 3.497357293868922e-05, "loss": 0.0937, "step": 5692 }, { "epoch": 6.02, "learning_rate": 3.496828752642706e-05, "loss": 0.2398, "step": 5694 }, { "epoch": 6.02, "learning_rate": 3.4963002114164906e-05, "loss": 0.1157, "step": 5696 }, { "epoch": 6.02, "learning_rate": 3.495771670190275e-05, "loss": 0.0669, "step": 5698 }, { "epoch": 6.03, "learning_rate": 3.495243128964059e-05, "loss": 0.2217, "step": 5700 }, { "epoch": 6.03, "learning_rate": 3.494714587737844e-05, "loss": 0.2695, "step": 5702 }, { "epoch": 6.03, "learning_rate": 3.494186046511628e-05, "loss": 0.2532, "step": 5704 }, { "epoch": 6.03, "learning_rate": 3.493657505285412e-05, "loss": 0.1932, "step": 5706 }, { "epoch": 6.03, "learning_rate": 3.493128964059197e-05, "loss": 0.2101, "step": 5708 }, { "epoch": 6.04, "learning_rate": 3.4926004228329815e-05, "loss": 0.2146, "step": 5710 }, { "epoch": 6.04, "learning_rate": 3.4920718816067654e-05, "loss": 0.1982, "step": 5712 }, { "epoch": 6.04, "learning_rate": 3.49154334038055e-05, "loss": 0.0912, "step": 5714 }, { "epoch": 6.04, "learning_rate": 3.491014799154334e-05, "loss": 0.1053, "step": 5716 }, { "epoch": 6.04, "learning_rate": 3.4904862579281185e-05, "loss": 0.2478, "step": 5718 }, { "epoch": 6.05, "learning_rate": 3.489957716701903e-05, "loss": 0.2633, "step": 5720 }, { "epoch": 6.05, "learning_rate": 3.489429175475688e-05, "loss": 0.1436, "step": 5722 }, { "epoch": 6.05, "learning_rate": 3.488900634249472e-05, "loss": 0.2109, "step": 5724 }, { "epoch": 6.05, "learning_rate": 3.488372093023256e-05, "loss": 0.1513, "step": 5726 }, { "epoch": 6.05, "learning_rate": 3.48784355179704e-05, "loss": 0.1171, "step": 5728 }, { "epoch": 6.06, "learning_rate": 3.487315010570825e-05, "loss": 0.0842, "step": 5730 }, { "epoch": 6.06, "learning_rate": 3.486786469344609e-05, "loss": 0.2574, "step": 5732 }, { "epoch": 6.06, "learning_rate": 3.4862579281183933e-05, "loss": 0.0996, "step": 5734 }, { "epoch": 6.06, "learning_rate": 3.485729386892177e-05, "loss": 0.0907, "step": 5736 }, { "epoch": 6.07, "learning_rate": 3.4852008456659626e-05, "loss": 0.1762, "step": 5738 }, { "epoch": 6.07, "learning_rate": 3.4846723044397465e-05, "loss": 0.116, "step": 5740 }, { "epoch": 6.07, "learning_rate": 3.484143763213531e-05, "loss": 0.3131, "step": 5742 }, { "epoch": 6.07, "learning_rate": 3.483615221987315e-05, "loss": 0.2087, "step": 5744 }, { "epoch": 6.07, "learning_rate": 3.4830866807610996e-05, "loss": 0.2606, "step": 5746 }, { "epoch": 6.08, "learning_rate": 3.4825581395348836e-05, "loss": 0.1987, "step": 5748 }, { "epoch": 6.08, "learning_rate": 3.482029598308668e-05, "loss": 0.3575, "step": 5750 }, { "epoch": 6.08, "learning_rate": 3.481501057082453e-05, "loss": 0.2289, "step": 5752 }, { "epoch": 6.08, "learning_rate": 3.480972515856237e-05, "loss": 0.179, "step": 5754 }, { "epoch": 6.08, "learning_rate": 3.480443974630021e-05, "loss": 0.1997, "step": 5756 }, { "epoch": 6.09, "learning_rate": 3.479915433403806e-05, "loss": 0.1259, "step": 5758 }, { "epoch": 6.09, "learning_rate": 3.47938689217759e-05, "loss": 0.1962, "step": 5760 }, { "epoch": 6.09, "learning_rate": 3.4788583509513744e-05, "loss": 0.2209, "step": 5762 }, { "epoch": 6.09, "learning_rate": 3.478329809725159e-05, "loss": 0.1538, "step": 5764 }, { "epoch": 6.1, "learning_rate": 3.477801268498943e-05, "loss": 0.2426, "step": 5766 }, { "epoch": 6.1, "learning_rate": 3.4772727272727276e-05, "loss": 0.3101, "step": 5768 }, { "epoch": 6.1, "learning_rate": 3.4767441860465115e-05, "loss": 0.3042, "step": 5770 }, { "epoch": 6.1, "learning_rate": 3.476215644820296e-05, "loss": 0.2477, "step": 5772 }, { "epoch": 6.1, "learning_rate": 3.47568710359408e-05, "loss": 0.1398, "step": 5774 }, { "epoch": 6.11, "learning_rate": 3.475158562367865e-05, "loss": 0.1257, "step": 5776 }, { "epoch": 6.11, "learning_rate": 3.474630021141649e-05, "loss": 0.1497, "step": 5778 }, { "epoch": 6.11, "learning_rate": 3.474101479915434e-05, "loss": 0.1602, "step": 5780 }, { "epoch": 6.11, "learning_rate": 3.473572938689218e-05, "loss": 0.1371, "step": 5782 }, { "epoch": 6.11, "learning_rate": 3.4730443974630024e-05, "loss": 0.1337, "step": 5784 }, { "epoch": 6.12, "learning_rate": 3.472515856236786e-05, "loss": 0.139, "step": 5786 }, { "epoch": 6.12, "learning_rate": 3.471987315010571e-05, "loss": 0.2151, "step": 5788 }, { "epoch": 6.12, "learning_rate": 3.471458773784355e-05, "loss": 0.1326, "step": 5790 }, { "epoch": 6.12, "learning_rate": 3.4709302325581395e-05, "loss": 0.1047, "step": 5792 }, { "epoch": 6.12, "learning_rate": 3.470401691331924e-05, "loss": 0.2743, "step": 5794 }, { "epoch": 6.13, "learning_rate": 3.469873150105709e-05, "loss": 0.1087, "step": 5796 }, { "epoch": 6.13, "learning_rate": 3.4693446088794926e-05, "loss": 0.1329, "step": 5798 }, { "epoch": 6.13, "learning_rate": 3.468816067653277e-05, "loss": 0.0915, "step": 5800 }, { "epoch": 6.13, "learning_rate": 3.468287526427061e-05, "loss": 0.0456, "step": 5802 }, { "epoch": 6.14, "learning_rate": 3.467758985200846e-05, "loss": 0.1955, "step": 5804 }, { "epoch": 6.14, "learning_rate": 3.4672304439746304e-05, "loss": 0.1902, "step": 5806 }, { "epoch": 6.14, "learning_rate": 3.466701902748414e-05, "loss": 0.1203, "step": 5808 }, { "epoch": 6.14, "learning_rate": 3.466173361522199e-05, "loss": 0.1, "step": 5810 }, { "epoch": 6.14, "learning_rate": 3.4656448202959835e-05, "loss": 0.112, "step": 5812 }, { "epoch": 6.15, "learning_rate": 3.465116279069768e-05, "loss": 0.1416, "step": 5814 }, { "epoch": 6.15, "learning_rate": 3.464587737843552e-05, "loss": 0.1604, "step": 5816 }, { "epoch": 6.15, "learning_rate": 3.4640591966173366e-05, "loss": 0.1217, "step": 5818 }, { "epoch": 6.15, "learning_rate": 3.4635306553911206e-05, "loss": 0.2795, "step": 5820 }, { "epoch": 6.15, "learning_rate": 3.463002114164905e-05, "loss": 0.221, "step": 5822 }, { "epoch": 6.16, "learning_rate": 3.462473572938689e-05, "loss": 0.1878, "step": 5824 }, { "epoch": 6.16, "learning_rate": 3.461945031712474e-05, "loss": 0.1025, "step": 5826 }, { "epoch": 6.16, "learning_rate": 3.4614164904862576e-05, "loss": 0.1729, "step": 5828 }, { "epoch": 6.16, "learning_rate": 3.460887949260043e-05, "loss": 0.096, "step": 5830 }, { "epoch": 6.16, "learning_rate": 3.460359408033827e-05, "loss": 0.158, "step": 5832 }, { "epoch": 6.17, "learning_rate": 3.4598308668076114e-05, "loss": 0.1745, "step": 5834 }, { "epoch": 6.17, "learning_rate": 3.4593023255813954e-05, "loss": 0.1705, "step": 5836 }, { "epoch": 6.17, "learning_rate": 3.459038054968288e-05, "loss": 0.3037, "step": 5838 }, { "epoch": 6.17, "learning_rate": 3.458509513742072e-05, "loss": 0.1047, "step": 5840 }, { "epoch": 6.18, "learning_rate": 3.457980972515856e-05, "loss": 0.0526, "step": 5842 }, { "epoch": 6.18, "learning_rate": 3.457452431289641e-05, "loss": 0.2037, "step": 5844 }, { "epoch": 6.18, "learning_rate": 3.456923890063425e-05, "loss": 0.087, "step": 5846 }, { "epoch": 6.18, "learning_rate": 3.45639534883721e-05, "loss": 0.1763, "step": 5848 }, { "epoch": 6.18, "learning_rate": 3.455866807610994e-05, "loss": 0.0715, "step": 5850 }, { "epoch": 6.19, "learning_rate": 3.4553382663847786e-05, "loss": 0.2581, "step": 5852 }, { "epoch": 6.19, "learning_rate": 3.4548097251585625e-05, "loss": 0.1202, "step": 5854 }, { "epoch": 6.19, "learning_rate": 3.454281183932347e-05, "loss": 0.0978, "step": 5856 }, { "epoch": 6.19, "learning_rate": 3.453752642706131e-05, "loss": 0.1894, "step": 5858 }, { "epoch": 6.19, "learning_rate": 3.4532241014799156e-05, "loss": 0.1545, "step": 5860 }, { "epoch": 6.2, "learning_rate": 3.4526955602536996e-05, "loss": 0.1361, "step": 5862 }, { "epoch": 6.2, "learning_rate": 3.452167019027484e-05, "loss": 0.2735, "step": 5864 }, { "epoch": 6.2, "learning_rate": 3.451638477801269e-05, "loss": 0.211, "step": 5866 }, { "epoch": 6.2, "learning_rate": 3.4511099365750534e-05, "loss": 0.107, "step": 5868 }, { "epoch": 6.21, "learning_rate": 3.450581395348837e-05, "loss": 0.1412, "step": 5870 }, { "epoch": 6.21, "learning_rate": 3.450052854122622e-05, "loss": 0.2465, "step": 5872 }, { "epoch": 6.21, "learning_rate": 3.449524312896406e-05, "loss": 0.1566, "step": 5874 }, { "epoch": 6.21, "learning_rate": 3.4489957716701904e-05, "loss": 0.1874, "step": 5876 }, { "epoch": 6.21, "learning_rate": 3.4484672304439744e-05, "loss": 0.2111, "step": 5878 }, { "epoch": 6.22, "learning_rate": 3.447938689217759e-05, "loss": 0.1737, "step": 5880 }, { "epoch": 6.22, "learning_rate": 3.4474101479915436e-05, "loss": 0.0879, "step": 5882 }, { "epoch": 6.22, "learning_rate": 3.446881606765328e-05, "loss": 0.158, "step": 5884 }, { "epoch": 6.22, "learning_rate": 3.446353065539112e-05, "loss": 0.2724, "step": 5886 }, { "epoch": 6.22, "learning_rate": 3.445824524312897e-05, "loss": 0.1235, "step": 5888 }, { "epoch": 6.23, "learning_rate": 3.445295983086681e-05, "loss": 0.2047, "step": 5890 }, { "epoch": 6.23, "learning_rate": 3.444767441860465e-05, "loss": 0.2076, "step": 5892 }, { "epoch": 6.23, "learning_rate": 3.44423890063425e-05, "loss": 0.1676, "step": 5894 }, { "epoch": 6.23, "learning_rate": 3.443710359408034e-05, "loss": 0.1896, "step": 5896 }, { "epoch": 6.23, "learning_rate": 3.4431818181818184e-05, "loss": 0.145, "step": 5898 }, { "epoch": 6.24, "learning_rate": 3.442653276955602e-05, "loss": 0.0989, "step": 5900 }, { "epoch": 6.24, "learning_rate": 3.4421247357293876e-05, "loss": 0.1219, "step": 5902 }, { "epoch": 6.24, "learning_rate": 3.4415961945031715e-05, "loss": 0.1201, "step": 5904 }, { "epoch": 6.24, "learning_rate": 3.441067653276956e-05, "loss": 0.2582, "step": 5906 }, { "epoch": 6.25, "learning_rate": 3.44053911205074e-05, "loss": 0.1738, "step": 5908 }, { "epoch": 6.25, "learning_rate": 3.440010570824525e-05, "loss": 0.2368, "step": 5910 }, { "epoch": 6.25, "learning_rate": 3.4394820295983086e-05, "loss": 0.1735, "step": 5912 }, { "epoch": 6.25, "learning_rate": 3.438953488372093e-05, "loss": 0.1939, "step": 5914 }, { "epoch": 6.25, "learning_rate": 3.438424947145877e-05, "loss": 0.1386, "step": 5916 }, { "epoch": 6.26, "learning_rate": 3.437896405919662e-05, "loss": 0.0951, "step": 5918 }, { "epoch": 6.26, "learning_rate": 3.4373678646934464e-05, "loss": 0.061, "step": 5920 }, { "epoch": 6.26, "learning_rate": 3.436839323467231e-05, "loss": 0.1492, "step": 5922 }, { "epoch": 6.26, "learning_rate": 3.436310782241015e-05, "loss": 0.1947, "step": 5924 }, { "epoch": 6.26, "learning_rate": 3.4357822410147995e-05, "loss": 0.1067, "step": 5926 }, { "epoch": 6.27, "learning_rate": 3.4352536997885834e-05, "loss": 0.1853, "step": 5928 }, { "epoch": 6.27, "learning_rate": 3.434725158562368e-05, "loss": 0.2154, "step": 5930 }, { "epoch": 6.27, "learning_rate": 3.434196617336152e-05, "loss": 0.2419, "step": 5932 }, { "epoch": 6.27, "learning_rate": 3.4336680761099366e-05, "loss": 0.184, "step": 5934 }, { "epoch": 6.27, "learning_rate": 3.433139534883721e-05, "loss": 0.1954, "step": 5936 }, { "epoch": 6.28, "learning_rate": 3.432610993657506e-05, "loss": 0.2227, "step": 5938 }, { "epoch": 6.28, "learning_rate": 3.43208245243129e-05, "loss": 0.2063, "step": 5940 }, { "epoch": 6.28, "learning_rate": 3.431553911205074e-05, "loss": 0.1288, "step": 5942 }, { "epoch": 6.28, "learning_rate": 3.431025369978859e-05, "loss": 0.1622, "step": 5944 }, { "epoch": 6.29, "learning_rate": 3.430496828752643e-05, "loss": 0.1974, "step": 5946 }, { "epoch": 6.29, "learning_rate": 3.4299682875264275e-05, "loss": 0.1642, "step": 5948 }, { "epoch": 6.29, "learning_rate": 3.4294397463002114e-05, "loss": 0.1333, "step": 5950 }, { "epoch": 6.29, "learning_rate": 3.428911205073996e-05, "loss": 0.1552, "step": 5952 }, { "epoch": 6.29, "learning_rate": 3.42838266384778e-05, "loss": 0.165, "step": 5954 }, { "epoch": 6.3, "learning_rate": 3.427854122621565e-05, "loss": 0.1734, "step": 5956 }, { "epoch": 6.3, "learning_rate": 3.427325581395349e-05, "loss": 0.1146, "step": 5958 }, { "epoch": 6.3, "learning_rate": 3.426797040169134e-05, "loss": 0.1115, "step": 5960 }, { "epoch": 6.3, "learning_rate": 3.426268498942918e-05, "loss": 0.2913, "step": 5962 }, { "epoch": 6.3, "learning_rate": 3.425739957716702e-05, "loss": 0.1059, "step": 5964 }, { "epoch": 6.31, "learning_rate": 3.425211416490486e-05, "loss": 0.1041, "step": 5966 }, { "epoch": 6.31, "learning_rate": 3.424682875264271e-05, "loss": 0.1611, "step": 5968 }, { "epoch": 6.31, "learning_rate": 3.424154334038055e-05, "loss": 0.0764, "step": 5970 }, { "epoch": 6.31, "learning_rate": 3.423625792811839e-05, "loss": 0.1817, "step": 5972 }, { "epoch": 6.32, "learning_rate": 3.423097251585624e-05, "loss": 0.1004, "step": 5974 }, { "epoch": 6.32, "learning_rate": 3.4225687103594085e-05, "loss": 0.0877, "step": 5976 }, { "epoch": 6.32, "learning_rate": 3.4220401691331925e-05, "loss": 0.1462, "step": 5978 }, { "epoch": 6.32, "learning_rate": 3.421511627906977e-05, "loss": 0.1329, "step": 5980 }, { "epoch": 6.32, "learning_rate": 3.420983086680761e-05, "loss": 0.1345, "step": 5982 }, { "epoch": 6.33, "learning_rate": 3.4204545454545456e-05, "loss": 0.1035, "step": 5984 }, { "epoch": 6.33, "learning_rate": 3.4199260042283295e-05, "loss": 0.2515, "step": 5986 }, { "epoch": 6.33, "learning_rate": 3.419397463002114e-05, "loss": 0.1297, "step": 5988 }, { "epoch": 6.33, "learning_rate": 3.418868921775899e-05, "loss": 0.2148, "step": 5990 }, { "epoch": 6.33, "learning_rate": 3.4183403805496834e-05, "loss": 0.1103, "step": 5992 }, { "epoch": 6.34, "learning_rate": 3.417811839323467e-05, "loss": 0.114, "step": 5994 }, { "epoch": 6.34, "learning_rate": 3.417283298097252e-05, "loss": 0.1915, "step": 5996 }, { "epoch": 6.34, "learning_rate": 3.4167547568710365e-05, "loss": 0.1484, "step": 5998 }, { "epoch": 6.34, "learning_rate": 3.4162262156448204e-05, "loss": 0.1164, "step": 6000 }, { "epoch": 6.34, "eval_cer": 0.025876318039327443, "eval_loss": 0.5018019676208496, "eval_runtime": 128.3817, "eval_samples_per_second": 6.551, "eval_steps_per_second": 0.826, "step": 6000 }, { "epoch": 6.34, "learning_rate": 3.415697674418605e-05, "loss": 0.0918, "step": 6002 }, { "epoch": 6.35, "learning_rate": 3.415169133192389e-05, "loss": 0.1645, "step": 6004 }, { "epoch": 6.35, "learning_rate": 3.4146405919661736e-05, "loss": 0.0649, "step": 6006 }, { "epoch": 6.35, "learning_rate": 3.4141120507399575e-05, "loss": 0.158, "step": 6008 }, { "epoch": 6.35, "learning_rate": 3.413583509513743e-05, "loss": 0.1728, "step": 6010 }, { "epoch": 6.36, "learning_rate": 3.413054968287527e-05, "loss": 0.2382, "step": 6012 }, { "epoch": 6.36, "learning_rate": 3.412526427061311e-05, "loss": 0.1304, "step": 6014 }, { "epoch": 6.36, "learning_rate": 3.411997885835095e-05, "loss": 0.1578, "step": 6016 }, { "epoch": 6.36, "learning_rate": 3.41146934460888e-05, "loss": 0.1018, "step": 6018 }, { "epoch": 6.36, "learning_rate": 3.410940803382664e-05, "loss": 0.1749, "step": 6020 }, { "epoch": 6.37, "learning_rate": 3.4104122621564484e-05, "loss": 0.0877, "step": 6022 }, { "epoch": 6.37, "learning_rate": 3.409883720930232e-05, "loss": 0.1199, "step": 6024 }, { "epoch": 6.37, "learning_rate": 3.409355179704017e-05, "loss": 0.1858, "step": 6026 }, { "epoch": 6.37, "learning_rate": 3.4088266384778015e-05, "loss": 0.1073, "step": 6028 }, { "epoch": 6.37, "learning_rate": 3.408298097251586e-05, "loss": 0.0823, "step": 6030 }, { "epoch": 6.38, "learning_rate": 3.40776955602537e-05, "loss": 0.1692, "step": 6032 }, { "epoch": 6.38, "learning_rate": 3.407241014799155e-05, "loss": 0.1037, "step": 6034 }, { "epoch": 6.38, "learning_rate": 3.4067124735729386e-05, "loss": 0.0908, "step": 6036 }, { "epoch": 6.38, "learning_rate": 3.406183932346723e-05, "loss": 0.1123, "step": 6038 }, { "epoch": 6.38, "learning_rate": 3.405655391120507e-05, "loss": 0.1272, "step": 6040 }, { "epoch": 6.39, "learning_rate": 3.405126849894292e-05, "loss": 0.1125, "step": 6042 }, { "epoch": 6.39, "learning_rate": 3.4045983086680763e-05, "loss": 0.1315, "step": 6044 }, { "epoch": 6.39, "learning_rate": 3.404069767441861e-05, "loss": 0.2304, "step": 6046 }, { "epoch": 6.39, "learning_rate": 3.403541226215645e-05, "loss": 0.108, "step": 6048 }, { "epoch": 6.4, "learning_rate": 3.4030126849894295e-05, "loss": 0.2105, "step": 6050 }, { "epoch": 6.4, "learning_rate": 3.402484143763214e-05, "loss": 0.1575, "step": 6052 }, { "epoch": 6.4, "learning_rate": 3.401955602536998e-05, "loss": 0.1972, "step": 6054 }, { "epoch": 6.4, "learning_rate": 3.4014270613107826e-05, "loss": 0.1202, "step": 6056 }, { "epoch": 6.4, "learning_rate": 3.4008985200845665e-05, "loss": 0.1426, "step": 6058 }, { "epoch": 6.41, "learning_rate": 3.400369978858351e-05, "loss": 0.1163, "step": 6060 }, { "epoch": 6.41, "learning_rate": 3.399841437632135e-05, "loss": 0.2422, "step": 6062 }, { "epoch": 6.41, "learning_rate": 3.3993128964059204e-05, "loss": 0.1269, "step": 6064 }, { "epoch": 6.41, "learning_rate": 3.398784355179704e-05, "loss": 0.1424, "step": 6066 }, { "epoch": 6.41, "learning_rate": 3.398255813953489e-05, "loss": 0.1831, "step": 6068 }, { "epoch": 6.42, "learning_rate": 3.397727272727273e-05, "loss": 0.2557, "step": 6070 }, { "epoch": 6.42, "learning_rate": 3.3971987315010574e-05, "loss": 0.3715, "step": 6072 }, { "epoch": 6.42, "learning_rate": 3.3966701902748414e-05, "loss": 0.3253, "step": 6074 }, { "epoch": 6.42, "learning_rate": 3.396141649048626e-05, "loss": 0.0648, "step": 6076 }, { "epoch": 6.42, "learning_rate": 3.39561310782241e-05, "loss": 0.2528, "step": 6078 }, { "epoch": 6.43, "learning_rate": 3.3950845665961945e-05, "loss": 0.1489, "step": 6080 }, { "epoch": 6.43, "learning_rate": 3.394556025369979e-05, "loss": 0.1198, "step": 6082 }, { "epoch": 6.43, "learning_rate": 3.394027484143764e-05, "loss": 0.1566, "step": 6084 }, { "epoch": 6.43, "learning_rate": 3.3934989429175476e-05, "loss": 0.2284, "step": 6086 }, { "epoch": 6.44, "learning_rate": 3.392970401691332e-05, "loss": 0.1358, "step": 6088 }, { "epoch": 6.44, "learning_rate": 3.392441860465116e-05, "loss": 0.0754, "step": 6090 }, { "epoch": 6.44, "learning_rate": 3.391913319238901e-05, "loss": 0.0822, "step": 6092 }, { "epoch": 6.44, "learning_rate": 3.3913847780126854e-05, "loss": 0.1326, "step": 6094 }, { "epoch": 6.44, "learning_rate": 3.390856236786469e-05, "loss": 0.1727, "step": 6096 }, { "epoch": 6.45, "learning_rate": 3.390327695560254e-05, "loss": 0.1164, "step": 6098 }, { "epoch": 6.45, "learning_rate": 3.389799154334038e-05, "loss": 0.3431, "step": 6100 }, { "epoch": 6.45, "learning_rate": 3.3892706131078225e-05, "loss": 0.1192, "step": 6102 }, { "epoch": 6.45, "learning_rate": 3.388742071881607e-05, "loss": 0.0976, "step": 6104 }, { "epoch": 6.45, "learning_rate": 3.388213530655392e-05, "loss": 0.1534, "step": 6106 }, { "epoch": 6.46, "learning_rate": 3.3876849894291756e-05, "loss": 0.1809, "step": 6108 }, { "epoch": 6.46, "learning_rate": 3.38715644820296e-05, "loss": 0.1546, "step": 6110 }, { "epoch": 6.46, "learning_rate": 3.386627906976744e-05, "loss": 0.0665, "step": 6112 }, { "epoch": 6.46, "learning_rate": 3.386099365750529e-05, "loss": 0.1246, "step": 6114 }, { "epoch": 6.47, "learning_rate": 3.385570824524313e-05, "loss": 0.2174, "step": 6116 }, { "epoch": 6.47, "learning_rate": 3.385042283298097e-05, "loss": 0.1845, "step": 6118 }, { "epoch": 6.47, "learning_rate": 3.384513742071882e-05, "loss": 0.1134, "step": 6120 }, { "epoch": 6.47, "learning_rate": 3.3839852008456665e-05, "loss": 0.2435, "step": 6122 }, { "epoch": 6.47, "learning_rate": 3.3834566596194504e-05, "loss": 0.1558, "step": 6124 }, { "epoch": 6.48, "learning_rate": 3.382928118393235e-05, "loss": 0.0459, "step": 6126 }, { "epoch": 6.48, "learning_rate": 3.382399577167019e-05, "loss": 0.1331, "step": 6128 }, { "epoch": 6.48, "learning_rate": 3.3818710359408036e-05, "loss": 0.1732, "step": 6130 }, { "epoch": 6.48, "learning_rate": 3.3813424947145875e-05, "loss": 0.1099, "step": 6132 }, { "epoch": 6.48, "learning_rate": 3.380813953488372e-05, "loss": 0.2, "step": 6134 }, { "epoch": 6.49, "learning_rate": 3.380285412262156e-05, "loss": 0.1682, "step": 6136 }, { "epoch": 6.49, "learning_rate": 3.379756871035941e-05, "loss": 0.1874, "step": 6138 }, { "epoch": 6.49, "learning_rate": 3.379228329809725e-05, "loss": 0.1585, "step": 6140 }, { "epoch": 6.49, "learning_rate": 3.37869978858351e-05, "loss": 0.2005, "step": 6142 }, { "epoch": 6.49, "learning_rate": 3.378171247357294e-05, "loss": 0.1283, "step": 6144 }, { "epoch": 6.5, "learning_rate": 3.3776427061310784e-05, "loss": 0.0849, "step": 6146 }, { "epoch": 6.5, "learning_rate": 3.377114164904863e-05, "loss": 0.1139, "step": 6148 }, { "epoch": 6.5, "learning_rate": 3.376585623678647e-05, "loss": 0.2118, "step": 6150 }, { "epoch": 6.5, "learning_rate": 3.3760570824524315e-05, "loss": 0.2581, "step": 6152 }, { "epoch": 6.51, "learning_rate": 3.3755285412262154e-05, "loss": 0.1161, "step": 6154 }, { "epoch": 6.51, "learning_rate": 3.375000000000001e-05, "loss": 0.1572, "step": 6156 }, { "epoch": 6.51, "learning_rate": 3.3744714587737846e-05, "loss": 0.1451, "step": 6158 }, { "epoch": 6.51, "learning_rate": 3.373942917547569e-05, "loss": 0.1042, "step": 6160 }, { "epoch": 6.51, "learning_rate": 3.373414376321353e-05, "loss": 0.143, "step": 6162 }, { "epoch": 6.52, "learning_rate": 3.372885835095138e-05, "loss": 0.0863, "step": 6164 }, { "epoch": 6.52, "learning_rate": 3.372357293868922e-05, "loss": 0.1268, "step": 6166 }, { "epoch": 6.52, "learning_rate": 3.371828752642706e-05, "loss": 0.1119, "step": 6168 }, { "epoch": 6.52, "learning_rate": 3.37130021141649e-05, "loss": 0.1199, "step": 6170 }, { "epoch": 6.52, "learning_rate": 3.370771670190275e-05, "loss": 0.1628, "step": 6172 }, { "epoch": 6.53, "learning_rate": 3.3702431289640595e-05, "loss": 0.2543, "step": 6174 }, { "epoch": 6.53, "learning_rate": 3.369714587737844e-05, "loss": 0.1345, "step": 6176 }, { "epoch": 6.53, "learning_rate": 3.369186046511628e-05, "loss": 0.0945, "step": 6178 }, { "epoch": 6.53, "learning_rate": 3.3686575052854126e-05, "loss": 0.112, "step": 6180 }, { "epoch": 6.53, "learning_rate": 3.3681289640591965e-05, "loss": 0.1513, "step": 6182 }, { "epoch": 6.54, "learning_rate": 3.367600422832981e-05, "loss": 0.1097, "step": 6184 }, { "epoch": 6.54, "learning_rate": 3.367071881606765e-05, "loss": 0.0726, "step": 6186 }, { "epoch": 6.54, "learning_rate": 3.36654334038055e-05, "loss": 0.2753, "step": 6188 }, { "epoch": 6.54, "learning_rate": 3.3660147991543336e-05, "loss": 0.3024, "step": 6190 }, { "epoch": 6.55, "learning_rate": 3.365486257928119e-05, "loss": 0.4197, "step": 6192 }, { "epoch": 6.55, "learning_rate": 3.364957716701903e-05, "loss": 0.093, "step": 6194 }, { "epoch": 6.55, "learning_rate": 3.3644291754756874e-05, "loss": 0.1188, "step": 6196 }, { "epoch": 6.55, "learning_rate": 3.3639006342494713e-05, "loss": 0.2256, "step": 6198 }, { "epoch": 6.55, "learning_rate": 3.363372093023256e-05, "loss": 0.1508, "step": 6200 }, { "epoch": 6.56, "learning_rate": 3.3628435517970406e-05, "loss": 0.1873, "step": 6202 }, { "epoch": 6.56, "learning_rate": 3.3623150105708245e-05, "loss": 0.0964, "step": 6204 }, { "epoch": 6.56, "learning_rate": 3.361786469344609e-05, "loss": 0.2517, "step": 6206 }, { "epoch": 6.56, "learning_rate": 3.361257928118393e-05, "loss": 0.2191, "step": 6208 }, { "epoch": 6.56, "learning_rate": 3.360729386892178e-05, "loss": 0.1957, "step": 6210 }, { "epoch": 6.57, "learning_rate": 3.360200845665962e-05, "loss": 0.226, "step": 6212 }, { "epoch": 6.57, "learning_rate": 3.359672304439747e-05, "loss": 0.1739, "step": 6214 }, { "epoch": 6.57, "learning_rate": 3.359143763213531e-05, "loss": 0.1438, "step": 6216 }, { "epoch": 6.57, "learning_rate": 3.3586152219873154e-05, "loss": 0.0768, "step": 6218 }, { "epoch": 6.58, "learning_rate": 3.358086680761099e-05, "loss": 0.1309, "step": 6220 }, { "epoch": 6.58, "learning_rate": 3.357558139534884e-05, "loss": 0.1162, "step": 6222 }, { "epoch": 6.58, "learning_rate": 3.357029598308668e-05, "loss": 0.1296, "step": 6224 }, { "epoch": 6.58, "learning_rate": 3.3565010570824524e-05, "loss": 0.1339, "step": 6226 }, { "epoch": 6.58, "learning_rate": 3.355972515856237e-05, "loss": 0.1097, "step": 6228 }, { "epoch": 6.59, "learning_rate": 3.3554439746300217e-05, "loss": 0.1484, "step": 6230 }, { "epoch": 6.59, "learning_rate": 3.3549154334038056e-05, "loss": 0.1588, "step": 6232 }, { "epoch": 6.59, "learning_rate": 3.35438689217759e-05, "loss": 0.3276, "step": 6234 }, { "epoch": 6.59, "learning_rate": 3.353858350951374e-05, "loss": 0.0764, "step": 6236 }, { "epoch": 6.59, "learning_rate": 3.353329809725159e-05, "loss": 0.1882, "step": 6238 }, { "epoch": 6.6, "learning_rate": 3.3528012684989427e-05, "loss": 0.3437, "step": 6240 }, { "epoch": 6.6, "learning_rate": 3.352272727272727e-05, "loss": 0.2354, "step": 6242 }, { "epoch": 6.6, "learning_rate": 3.351744186046512e-05, "loss": 0.1915, "step": 6244 }, { "epoch": 6.6, "learning_rate": 3.3512156448202965e-05, "loss": 0.1843, "step": 6246 }, { "epoch": 6.6, "learning_rate": 3.3506871035940804e-05, "loss": 0.0724, "step": 6248 }, { "epoch": 6.61, "learning_rate": 3.350158562367865e-05, "loss": 0.141, "step": 6250 }, { "epoch": 6.61, "learning_rate": 3.349630021141649e-05, "loss": 0.1799, "step": 6252 }, { "epoch": 6.61, "learning_rate": 3.3491014799154335e-05, "loss": 0.1416, "step": 6254 }, { "epoch": 6.61, "learning_rate": 3.348572938689218e-05, "loss": 0.1412, "step": 6256 }, { "epoch": 6.62, "learning_rate": 3.348044397463002e-05, "loss": 0.1163, "step": 6258 }, { "epoch": 6.62, "learning_rate": 3.347515856236787e-05, "loss": 0.1239, "step": 6260 }, { "epoch": 6.62, "learning_rate": 3.3469873150105706e-05, "loss": 0.2748, "step": 6262 }, { "epoch": 6.62, "learning_rate": 3.346458773784356e-05, "loss": 0.13, "step": 6264 }, { "epoch": 6.62, "learning_rate": 3.34593023255814e-05, "loss": 0.1479, "step": 6266 }, { "epoch": 6.63, "learning_rate": 3.3454016913319244e-05, "loss": 0.162, "step": 6268 }, { "epoch": 6.63, "learning_rate": 3.3448731501057084e-05, "loss": 0.1834, "step": 6270 }, { "epoch": 6.63, "learning_rate": 3.344344608879493e-05, "loss": 0.1042, "step": 6272 }, { "epoch": 6.63, "learning_rate": 3.343816067653277e-05, "loss": 0.1347, "step": 6274 }, { "epoch": 6.63, "learning_rate": 3.3432875264270615e-05, "loss": 0.2021, "step": 6276 }, { "epoch": 6.64, "learning_rate": 3.3427589852008454e-05, "loss": 0.1335, "step": 6278 }, { "epoch": 6.64, "learning_rate": 3.34223044397463e-05, "loss": 0.1198, "step": 6280 }, { "epoch": 6.64, "learning_rate": 3.3417019027484146e-05, "loss": 0.1227, "step": 6282 }, { "epoch": 6.64, "learning_rate": 3.341173361522199e-05, "loss": 0.227, "step": 6284 }, { "epoch": 6.64, "learning_rate": 3.340644820295983e-05, "loss": 0.1764, "step": 6286 }, { "epoch": 6.65, "learning_rate": 3.340116279069768e-05, "loss": 0.2143, "step": 6288 }, { "epoch": 6.65, "learning_rate": 3.339587737843552e-05, "loss": 0.1631, "step": 6290 }, { "epoch": 6.65, "learning_rate": 3.339059196617336e-05, "loss": 0.3019, "step": 6292 }, { "epoch": 6.65, "learning_rate": 3.33853065539112e-05, "loss": 0.1745, "step": 6294 }, { "epoch": 6.66, "learning_rate": 3.338002114164905e-05, "loss": 0.189, "step": 6296 }, { "epoch": 6.66, "learning_rate": 3.3374735729386894e-05, "loss": 0.1097, "step": 6298 }, { "epoch": 6.66, "learning_rate": 3.336945031712474e-05, "loss": 0.1263, "step": 6300 }, { "epoch": 6.66, "learning_rate": 3.336416490486258e-05, "loss": 0.1255, "step": 6302 }, { "epoch": 6.66, "learning_rate": 3.3358879492600426e-05, "loss": 0.1996, "step": 6304 }, { "epoch": 6.67, "learning_rate": 3.3353594080338265e-05, "loss": 0.2422, "step": 6306 }, { "epoch": 6.67, "learning_rate": 3.334830866807611e-05, "loss": 0.111, "step": 6308 }, { "epoch": 6.67, "learning_rate": 3.334302325581396e-05, "loss": 0.1312, "step": 6310 }, { "epoch": 6.67, "learning_rate": 3.3337737843551797e-05, "loss": 0.2097, "step": 6312 }, { "epoch": 6.67, "learning_rate": 3.333245243128964e-05, "loss": 0.2236, "step": 6314 }, { "epoch": 6.68, "learning_rate": 3.332716701902748e-05, "loss": 0.2479, "step": 6316 }, { "epoch": 6.68, "learning_rate": 3.3321881606765335e-05, "loss": 0.1397, "step": 6318 }, { "epoch": 6.68, "learning_rate": 3.3316596194503174e-05, "loss": 0.1172, "step": 6320 }, { "epoch": 6.68, "learning_rate": 3.331131078224102e-05, "loss": 0.1166, "step": 6322 }, { "epoch": 6.68, "learning_rate": 3.330602536997886e-05, "loss": 0.1666, "step": 6324 }, { "epoch": 6.69, "learning_rate": 3.3300739957716705e-05, "loss": 0.1743, "step": 6326 }, { "epoch": 6.69, "learning_rate": 3.3295454545454545e-05, "loss": 0.1095, "step": 6328 }, { "epoch": 6.69, "learning_rate": 3.329016913319239e-05, "loss": 0.1689, "step": 6330 }, { "epoch": 6.69, "learning_rate": 3.328488372093023e-05, "loss": 0.1888, "step": 6332 }, { "epoch": 6.7, "learning_rate": 3.3279598308668076e-05, "loss": 0.2615, "step": 6334 }, { "epoch": 6.7, "learning_rate": 3.327431289640592e-05, "loss": 0.1308, "step": 6336 }, { "epoch": 6.7, "learning_rate": 3.326902748414377e-05, "loss": 0.142, "step": 6338 }, { "epoch": 6.7, "learning_rate": 3.326374207188161e-05, "loss": 0.3503, "step": 6340 }, { "epoch": 6.7, "learning_rate": 3.3258456659619454e-05, "loss": 0.2034, "step": 6342 }, { "epoch": 6.71, "learning_rate": 3.325317124735729e-05, "loss": 0.1894, "step": 6344 }, { "epoch": 6.71, "learning_rate": 3.324788583509514e-05, "loss": 0.141, "step": 6346 }, { "epoch": 6.71, "learning_rate": 3.324260042283298e-05, "loss": 0.1073, "step": 6348 }, { "epoch": 6.71, "learning_rate": 3.3237315010570824e-05, "loss": 0.1851, "step": 6350 }, { "epoch": 6.71, "learning_rate": 3.323202959830867e-05, "loss": 0.1882, "step": 6352 }, { "epoch": 6.72, "learning_rate": 3.3226744186046516e-05, "loss": 0.1869, "step": 6354 }, { "epoch": 6.72, "learning_rate": 3.3221458773784356e-05, "loss": 0.2836, "step": 6356 }, { "epoch": 6.72, "learning_rate": 3.32161733615222e-05, "loss": 0.1976, "step": 6358 }, { "epoch": 6.72, "learning_rate": 3.321088794926005e-05, "loss": 0.043, "step": 6360 }, { "epoch": 6.73, "learning_rate": 3.320560253699789e-05, "loss": 0.0924, "step": 6362 }, { "epoch": 6.73, "learning_rate": 3.320031712473573e-05, "loss": 0.2507, "step": 6364 }, { "epoch": 6.73, "learning_rate": 3.319503171247357e-05, "loss": 0.232, "step": 6366 }, { "epoch": 6.73, "learning_rate": 3.318974630021142e-05, "loss": 0.2588, "step": 6368 }, { "epoch": 6.73, "learning_rate": 3.318446088794926e-05, "loss": 0.2609, "step": 6370 }, { "epoch": 6.74, "learning_rate": 3.317917547568711e-05, "loss": 0.0929, "step": 6372 }, { "epoch": 6.74, "learning_rate": 3.317389006342495e-05, "loss": 0.3943, "step": 6374 }, { "epoch": 6.74, "learning_rate": 3.3168604651162796e-05, "loss": 0.1662, "step": 6376 }, { "epoch": 6.74, "learning_rate": 3.3163319238900635e-05, "loss": 0.1248, "step": 6378 }, { "epoch": 6.74, "learning_rate": 3.315803382663848e-05, "loss": 0.1521, "step": 6380 }, { "epoch": 6.75, "learning_rate": 3.315274841437632e-05, "loss": 0.1616, "step": 6382 }, { "epoch": 6.75, "learning_rate": 3.3147463002114167e-05, "loss": 0.1287, "step": 6384 }, { "epoch": 6.75, "learning_rate": 3.3142177589852006e-05, "loss": 0.1336, "step": 6386 }, { "epoch": 6.75, "learning_rate": 3.313689217758985e-05, "loss": 0.0605, "step": 6388 }, { "epoch": 6.75, "learning_rate": 3.31316067653277e-05, "loss": 0.1347, "step": 6390 }, { "epoch": 6.76, "learning_rate": 3.3126321353065544e-05, "loss": 0.2142, "step": 6392 }, { "epoch": 6.76, "learning_rate": 3.312103594080338e-05, "loss": 0.1862, "step": 6394 }, { "epoch": 6.76, "learning_rate": 3.311575052854123e-05, "loss": 0.1138, "step": 6396 }, { "epoch": 6.76, "learning_rate": 3.311046511627907e-05, "loss": 0.1733, "step": 6398 }, { "epoch": 6.77, "learning_rate": 3.3105179704016915e-05, "loss": 0.0542, "step": 6400 }, { "epoch": 6.77, "learning_rate": 3.3099894291754754e-05, "loss": 0.1072, "step": 6402 }, { "epoch": 6.77, "learning_rate": 3.30946088794926e-05, "loss": 0.2929, "step": 6404 }, { "epoch": 6.77, "learning_rate": 3.3089323467230446e-05, "loss": 0.3078, "step": 6406 }, { "epoch": 6.77, "learning_rate": 3.308403805496829e-05, "loss": 0.2085, "step": 6408 }, { "epoch": 6.78, "learning_rate": 3.307875264270613e-05, "loss": 0.1387, "step": 6410 }, { "epoch": 6.78, "learning_rate": 3.307346723044398e-05, "loss": 0.1443, "step": 6412 }, { "epoch": 6.78, "learning_rate": 3.3068181818181824e-05, "loss": 0.2217, "step": 6414 }, { "epoch": 6.78, "learning_rate": 3.306289640591966e-05, "loss": 0.1486, "step": 6416 }, { "epoch": 6.78, "learning_rate": 3.305761099365751e-05, "loss": 0.1025, "step": 6418 }, { "epoch": 6.79, "learning_rate": 3.305232558139535e-05, "loss": 0.1191, "step": 6420 }, { "epoch": 6.79, "learning_rate": 3.3047040169133194e-05, "loss": 0.1424, "step": 6422 }, { "epoch": 6.79, "learning_rate": 3.3041754756871034e-05, "loss": 0.2869, "step": 6424 }, { "epoch": 6.79, "learning_rate": 3.3036469344608886e-05, "loss": 0.1608, "step": 6426 }, { "epoch": 6.79, "learning_rate": 3.3031183932346726e-05, "loss": 0.1171, "step": 6428 }, { "epoch": 6.8, "learning_rate": 3.302589852008457e-05, "loss": 0.0738, "step": 6430 }, { "epoch": 6.8, "learning_rate": 3.302061310782241e-05, "loss": 0.2901, "step": 6432 }, { "epoch": 6.8, "learning_rate": 3.301532769556026e-05, "loss": 0.2777, "step": 6434 }, { "epoch": 6.8, "learning_rate": 3.3010042283298096e-05, "loss": 0.1199, "step": 6436 }, { "epoch": 6.81, "learning_rate": 3.300475687103594e-05, "loss": 0.112, "step": 6438 }, { "epoch": 6.81, "learning_rate": 3.299947145877378e-05, "loss": 0.1459, "step": 6440 }, { "epoch": 6.81, "learning_rate": 3.299418604651163e-05, "loss": 0.0753, "step": 6442 }, { "epoch": 6.81, "learning_rate": 3.2988900634249474e-05, "loss": 0.1264, "step": 6444 }, { "epoch": 6.81, "learning_rate": 3.298361522198732e-05, "loss": 0.188, "step": 6446 }, { "epoch": 6.82, "learning_rate": 3.297832980972516e-05, "loss": 0.1609, "step": 6448 }, { "epoch": 6.82, "learning_rate": 3.2973044397463005e-05, "loss": 0.0904, "step": 6450 }, { "epoch": 6.82, "learning_rate": 3.2967758985200845e-05, "loss": 0.1965, "step": 6452 }, { "epoch": 6.82, "learning_rate": 3.296247357293869e-05, "loss": 0.1269, "step": 6454 }, { "epoch": 6.82, "learning_rate": 3.295718816067653e-05, "loss": 0.1102, "step": 6456 }, { "epoch": 6.83, "learning_rate": 3.2951902748414376e-05, "loss": 0.0769, "step": 6458 }, { "epoch": 6.83, "learning_rate": 3.294661733615222e-05, "loss": 0.1696, "step": 6460 }, { "epoch": 6.83, "learning_rate": 3.294133192389007e-05, "loss": 0.1866, "step": 6462 }, { "epoch": 6.83, "learning_rate": 3.293604651162791e-05, "loss": 0.1734, "step": 6464 }, { "epoch": 6.84, "learning_rate": 3.293076109936575e-05, "loss": 0.2824, "step": 6466 }, { "epoch": 6.84, "learning_rate": 3.29254756871036e-05, "loss": 0.1817, "step": 6468 }, { "epoch": 6.84, "learning_rate": 3.292019027484144e-05, "loss": 0.1357, "step": 6470 }, { "epoch": 6.84, "learning_rate": 3.2914904862579285e-05, "loss": 0.2785, "step": 6472 }, { "epoch": 6.84, "learning_rate": 3.2909619450317124e-05, "loss": 0.15, "step": 6474 }, { "epoch": 6.85, "learning_rate": 3.290433403805497e-05, "loss": 0.1504, "step": 6476 }, { "epoch": 6.85, "learning_rate": 3.289904862579281e-05, "loss": 0.3111, "step": 6478 }, { "epoch": 6.85, "learning_rate": 3.289376321353066e-05, "loss": 0.1105, "step": 6480 }, { "epoch": 6.85, "learning_rate": 3.28884778012685e-05, "loss": 0.0804, "step": 6482 }, { "epoch": 6.85, "learning_rate": 3.288319238900635e-05, "loss": 0.23, "step": 6484 }, { "epoch": 6.86, "learning_rate": 3.287790697674419e-05, "loss": 0.1211, "step": 6486 }, { "epoch": 6.86, "learning_rate": 3.287262156448203e-05, "loss": 0.1741, "step": 6488 }, { "epoch": 6.86, "learning_rate": 3.286733615221987e-05, "loss": 0.1537, "step": 6490 }, { "epoch": 6.86, "learning_rate": 3.286205073995772e-05, "loss": 0.1826, "step": 6492 }, { "epoch": 6.86, "learning_rate": 3.285676532769556e-05, "loss": 0.231, "step": 6494 }, { "epoch": 6.87, "learning_rate": 3.2851479915433404e-05, "loss": 0.0593, "step": 6496 }, { "epoch": 6.87, "learning_rate": 3.284619450317125e-05, "loss": 0.1864, "step": 6498 }, { "epoch": 6.87, "learning_rate": 3.2840909090909096e-05, "loss": 0.1587, "step": 6500 }, { "epoch": 6.87, "eval_cer": 0.018067825591336562, "eval_loss": 0.6848969459533691, "eval_runtime": 130.3001, "eval_samples_per_second": 6.454, "eval_steps_per_second": 0.814, "step": 6500 }, { "epoch": 6.87, "learning_rate": 3.2835623678646935e-05, "loss": 0.0839, "step": 6502 }, { "epoch": 6.88, "learning_rate": 3.283033826638478e-05, "loss": 0.2005, "step": 6504 }, { "epoch": 6.88, "learning_rate": 3.282505285412262e-05, "loss": 0.086, "step": 6506 }, { "epoch": 6.88, "learning_rate": 3.2819767441860466e-05, "loss": 0.0649, "step": 6508 }, { "epoch": 6.88, "learning_rate": 3.2814482029598306e-05, "loss": 0.2194, "step": 6510 }, { "epoch": 6.88, "learning_rate": 3.280919661733615e-05, "loss": 0.1811, "step": 6512 }, { "epoch": 6.89, "learning_rate": 3.2803911205074e-05, "loss": 0.1294, "step": 6514 }, { "epoch": 6.89, "learning_rate": 3.2798625792811844e-05, "loss": 0.1745, "step": 6516 }, { "epoch": 6.89, "learning_rate": 3.279334038054968e-05, "loss": 0.2444, "step": 6518 }, { "epoch": 6.89, "learning_rate": 3.278805496828753e-05, "loss": 0.1645, "step": 6520 }, { "epoch": 6.89, "learning_rate": 3.2782769556025375e-05, "loss": 0.3193, "step": 6522 }, { "epoch": 6.9, "learning_rate": 3.2777484143763215e-05, "loss": 0.262, "step": 6524 }, { "epoch": 6.9, "learning_rate": 3.277219873150106e-05, "loss": 0.1257, "step": 6526 }, { "epoch": 6.9, "learning_rate": 3.27669133192389e-05, "loss": 0.2004, "step": 6528 }, { "epoch": 6.9, "learning_rate": 3.2761627906976746e-05, "loss": 0.1237, "step": 6530 }, { "epoch": 6.9, "learning_rate": 3.2756342494714585e-05, "loss": 0.1606, "step": 6532 }, { "epoch": 6.91, "learning_rate": 3.275105708245244e-05, "loss": 0.134, "step": 6534 }, { "epoch": 6.91, "learning_rate": 3.274577167019028e-05, "loss": 0.2359, "step": 6536 }, { "epoch": 6.91, "learning_rate": 3.2740486257928123e-05, "loss": 0.1267, "step": 6538 }, { "epoch": 6.91, "learning_rate": 3.273520084566596e-05, "loss": 0.1402, "step": 6540 }, { "epoch": 6.92, "learning_rate": 3.272991543340381e-05, "loss": 0.2543, "step": 6542 }, { "epoch": 6.92, "learning_rate": 3.272463002114165e-05, "loss": 0.085, "step": 6544 }, { "epoch": 6.92, "learning_rate": 3.2719344608879494e-05, "loss": 0.305, "step": 6546 }, { "epoch": 6.92, "learning_rate": 3.2714059196617333e-05, "loss": 0.1374, "step": 6548 }, { "epoch": 6.92, "learning_rate": 3.270877378435518e-05, "loss": 0.1432, "step": 6550 }, { "epoch": 6.93, "learning_rate": 3.2703488372093026e-05, "loss": 0.1224, "step": 6552 }, { "epoch": 6.93, "learning_rate": 3.269820295983087e-05, "loss": 0.1348, "step": 6554 }, { "epoch": 6.93, "learning_rate": 3.269291754756871e-05, "loss": 0.1378, "step": 6556 }, { "epoch": 6.93, "learning_rate": 3.268763213530656e-05, "loss": 0.1377, "step": 6558 }, { "epoch": 6.93, "learning_rate": 3.2682346723044396e-05, "loss": 0.0546, "step": 6560 }, { "epoch": 6.94, "learning_rate": 3.267706131078224e-05, "loss": 0.1603, "step": 6562 }, { "epoch": 6.94, "learning_rate": 3.267177589852009e-05, "loss": 0.1801, "step": 6564 }, { "epoch": 6.94, "learning_rate": 3.266649048625793e-05, "loss": 0.0764, "step": 6566 }, { "epoch": 6.94, "learning_rate": 3.2661205073995774e-05, "loss": 0.1717, "step": 6568 }, { "epoch": 6.95, "learning_rate": 3.265591966173362e-05, "loss": 0.1367, "step": 6570 }, { "epoch": 6.95, "learning_rate": 3.265063424947146e-05, "loss": 0.2169, "step": 6572 }, { "epoch": 6.95, "learning_rate": 3.2645348837209305e-05, "loss": 0.1314, "step": 6574 }, { "epoch": 6.95, "learning_rate": 3.264006342494715e-05, "loss": 0.1091, "step": 6576 }, { "epoch": 6.95, "learning_rate": 3.263477801268499e-05, "loss": 0.129, "step": 6578 }, { "epoch": 6.96, "learning_rate": 3.2629492600422836e-05, "loss": 0.2033, "step": 6580 }, { "epoch": 6.96, "learning_rate": 3.2624207188160676e-05, "loss": 0.1272, "step": 6582 }, { "epoch": 6.96, "learning_rate": 3.261892177589852e-05, "loss": 0.211, "step": 6584 }, { "epoch": 6.96, "learning_rate": 3.261363636363636e-05, "loss": 0.2411, "step": 6586 }, { "epoch": 6.96, "learning_rate": 3.2608350951374214e-05, "loss": 0.1173, "step": 6588 }, { "epoch": 6.97, "learning_rate": 3.260306553911205e-05, "loss": 0.1694, "step": 6590 }, { "epoch": 6.97, "learning_rate": 3.25977801268499e-05, "loss": 0.15, "step": 6592 }, { "epoch": 6.97, "learning_rate": 3.259249471458774e-05, "loss": 0.1222, "step": 6594 }, { "epoch": 6.97, "learning_rate": 3.2587209302325585e-05, "loss": 0.0788, "step": 6596 }, { "epoch": 6.97, "learning_rate": 3.2581923890063424e-05, "loss": 0.1771, "step": 6598 }, { "epoch": 6.98, "learning_rate": 3.257663847780127e-05, "loss": 0.0798, "step": 6600 }, { "epoch": 6.98, "learning_rate": 3.257135306553911e-05, "loss": 0.2348, "step": 6602 }, { "epoch": 6.98, "learning_rate": 3.2566067653276955e-05, "loss": 0.1541, "step": 6604 }, { "epoch": 6.98, "learning_rate": 3.25607822410148e-05, "loss": 0.0848, "step": 6606 }, { "epoch": 6.99, "learning_rate": 3.255549682875265e-05, "loss": 0.0845, "step": 6608 }, { "epoch": 6.99, "learning_rate": 3.255021141649049e-05, "loss": 0.1386, "step": 6610 }, { "epoch": 6.99, "learning_rate": 3.254492600422833e-05, "loss": 0.07, "step": 6612 }, { "epoch": 6.99, "learning_rate": 3.253964059196617e-05, "loss": 0.0768, "step": 6614 }, { "epoch": 6.99, "learning_rate": 3.253435517970402e-05, "loss": 0.1933, "step": 6616 }, { "epoch": 7.0, "learning_rate": 3.2529069767441864e-05, "loss": 0.1392, "step": 6618 }, { "epoch": 7.0, "learning_rate": 3.2523784355179703e-05, "loss": 0.1438, "step": 6620 }, { "epoch": 7.0, "learning_rate": 3.251849894291755e-05, "loss": 0.1893, "step": 6622 }, { "epoch": 7.0, "learning_rate": 3.2513213530655396e-05, "loss": 0.1026, "step": 6624 }, { "epoch": 7.0, "learning_rate": 3.250792811839324e-05, "loss": 0.1147, "step": 6626 }, { "epoch": 7.01, "learning_rate": 3.250264270613108e-05, "loss": 0.0963, "step": 6628 }, { "epoch": 7.01, "learning_rate": 3.249735729386893e-05, "loss": 0.0652, "step": 6630 }, { "epoch": 7.01, "learning_rate": 3.2492071881606766e-05, "loss": 0.0809, "step": 6632 }, { "epoch": 7.01, "learning_rate": 3.248678646934461e-05, "loss": 0.1304, "step": 6634 }, { "epoch": 7.01, "learning_rate": 3.248150105708245e-05, "loss": 0.1327, "step": 6636 }, { "epoch": 7.02, "learning_rate": 3.24762156448203e-05, "loss": 0.1143, "step": 6638 }, { "epoch": 7.02, "learning_rate": 3.247093023255814e-05, "loss": 0.0977, "step": 6640 }, { "epoch": 7.02, "learning_rate": 3.246564482029599e-05, "loss": 0.149, "step": 6642 }, { "epoch": 7.02, "learning_rate": 3.246035940803383e-05, "loss": 0.1369, "step": 6644 }, { "epoch": 7.03, "learning_rate": 3.2455073995771675e-05, "loss": 0.1006, "step": 6646 }, { "epoch": 7.03, "learning_rate": 3.2449788583509514e-05, "loss": 0.1559, "step": 6648 }, { "epoch": 7.03, "learning_rate": 3.244450317124736e-05, "loss": 0.0658, "step": 6650 }, { "epoch": 7.03, "learning_rate": 3.24392177589852e-05, "loss": 0.2514, "step": 6652 }, { "epoch": 7.03, "learning_rate": 3.2433932346723046e-05, "loss": 0.1672, "step": 6654 }, { "epoch": 7.04, "learning_rate": 3.2428646934460885e-05, "loss": 0.1332, "step": 6656 }, { "epoch": 7.04, "learning_rate": 3.242336152219873e-05, "loss": 0.1938, "step": 6658 }, { "epoch": 7.04, "learning_rate": 3.241807610993658e-05, "loss": 0.1068, "step": 6660 }, { "epoch": 7.04, "learning_rate": 3.241279069767442e-05, "loss": 0.1107, "step": 6662 }, { "epoch": 7.04, "learning_rate": 3.240750528541226e-05, "loss": 0.0745, "step": 6664 }, { "epoch": 7.05, "learning_rate": 3.240221987315011e-05, "loss": 0.0882, "step": 6666 }, { "epoch": 7.05, "learning_rate": 3.239693446088795e-05, "loss": 0.0983, "step": 6668 }, { "epoch": 7.05, "learning_rate": 3.2391649048625794e-05, "loss": 0.0925, "step": 6670 }, { "epoch": 7.05, "learning_rate": 3.238636363636364e-05, "loss": 0.1857, "step": 6672 }, { "epoch": 7.05, "learning_rate": 3.238107822410148e-05, "loss": 0.1183, "step": 6674 }, { "epoch": 7.06, "learning_rate": 3.2375792811839325e-05, "loss": 0.067, "step": 6676 }, { "epoch": 7.06, "learning_rate": 3.237050739957717e-05, "loss": 0.1301, "step": 6678 }, { "epoch": 7.06, "learning_rate": 3.236522198731502e-05, "loss": 0.1827, "step": 6680 }, { "epoch": 7.06, "learning_rate": 3.235993657505286e-05, "loss": 0.0783, "step": 6682 }, { "epoch": 7.07, "learning_rate": 3.23546511627907e-05, "loss": 0.0923, "step": 6684 }, { "epoch": 7.07, "learning_rate": 3.234936575052854e-05, "loss": 0.1305, "step": 6686 }, { "epoch": 7.07, "learning_rate": 3.234408033826639e-05, "loss": 0.0894, "step": 6688 }, { "epoch": 7.07, "learning_rate": 3.233879492600423e-05, "loss": 0.1987, "step": 6690 }, { "epoch": 7.07, "learning_rate": 3.2333509513742073e-05, "loss": 0.1331, "step": 6692 }, { "epoch": 7.08, "learning_rate": 3.232822410147991e-05, "loss": 0.1275, "step": 6694 }, { "epoch": 7.08, "learning_rate": 3.2322938689217766e-05, "loss": 0.1744, "step": 6696 }, { "epoch": 7.08, "learning_rate": 3.2317653276955605e-05, "loss": 0.0823, "step": 6698 }, { "epoch": 7.08, "learning_rate": 3.231236786469345e-05, "loss": 0.1182, "step": 6700 }, { "epoch": 7.08, "learning_rate": 3.230708245243129e-05, "loss": 0.1176, "step": 6702 }, { "epoch": 7.09, "learning_rate": 3.2301797040169136e-05, "loss": 0.1704, "step": 6704 }, { "epoch": 7.09, "learning_rate": 3.2296511627906976e-05, "loss": 0.1159, "step": 6706 }, { "epoch": 7.09, "learning_rate": 3.229122621564482e-05, "loss": 0.1607, "step": 6708 }, { "epoch": 7.09, "learning_rate": 3.228594080338266e-05, "loss": 0.0877, "step": 6710 }, { "epoch": 7.1, "learning_rate": 3.228065539112051e-05, "loss": 0.1278, "step": 6712 }, { "epoch": 7.1, "learning_rate": 3.227536997885835e-05, "loss": 0.1083, "step": 6714 }, { "epoch": 7.1, "learning_rate": 3.22700845665962e-05, "loss": 0.1, "step": 6716 }, { "epoch": 7.1, "learning_rate": 3.226479915433404e-05, "loss": 0.1898, "step": 6718 }, { "epoch": 7.1, "learning_rate": 3.2259513742071884e-05, "loss": 0.0798, "step": 6720 }, { "epoch": 7.11, "learning_rate": 3.2254228329809724e-05, "loss": 0.0928, "step": 6722 }, { "epoch": 7.11, "learning_rate": 3.224894291754757e-05, "loss": 0.1268, "step": 6724 }, { "epoch": 7.11, "learning_rate": 3.2243657505285416e-05, "loss": 0.1476, "step": 6726 }, { "epoch": 7.11, "learning_rate": 3.2238372093023255e-05, "loss": 0.0909, "step": 6728 }, { "epoch": 7.11, "learning_rate": 3.22330866807611e-05, "loss": 0.1367, "step": 6730 }, { "epoch": 7.12, "learning_rate": 3.222780126849895e-05, "loss": 0.1678, "step": 6732 }, { "epoch": 7.12, "learning_rate": 3.222251585623679e-05, "loss": 0.0876, "step": 6734 }, { "epoch": 7.12, "learning_rate": 3.221723044397463e-05, "loss": 0.1025, "step": 6736 }, { "epoch": 7.12, "learning_rate": 3.221194503171248e-05, "loss": 0.1406, "step": 6738 }, { "epoch": 7.12, "learning_rate": 3.220665961945032e-05, "loss": 0.1127, "step": 6740 }, { "epoch": 7.13, "learning_rate": 3.2201374207188164e-05, "loss": 0.0851, "step": 6742 }, { "epoch": 7.13, "learning_rate": 3.2196088794926e-05, "loss": 0.1614, "step": 6744 }, { "epoch": 7.13, "learning_rate": 3.219080338266385e-05, "loss": 0.0852, "step": 6746 }, { "epoch": 7.13, "learning_rate": 3.218551797040169e-05, "loss": 0.0843, "step": 6748 }, { "epoch": 7.14, "learning_rate": 3.218023255813954e-05, "loss": 0.1957, "step": 6750 }, { "epoch": 7.14, "learning_rate": 3.217494714587738e-05, "loss": 0.2616, "step": 6752 }, { "epoch": 7.14, "learning_rate": 3.216966173361523e-05, "loss": 0.203, "step": 6754 }, { "epoch": 7.14, "learning_rate": 3.2164376321353066e-05, "loss": 0.115, "step": 6756 }, { "epoch": 7.14, "learning_rate": 3.215909090909091e-05, "loss": 0.1622, "step": 6758 }, { "epoch": 7.15, "learning_rate": 3.215380549682875e-05, "loss": 0.0653, "step": 6760 }, { "epoch": 7.15, "learning_rate": 3.21485200845666e-05, "loss": 0.1484, "step": 6762 }, { "epoch": 7.15, "learning_rate": 3.214323467230444e-05, "loss": 0.2066, "step": 6764 }, { "epoch": 7.15, "learning_rate": 3.213794926004228e-05, "loss": 0.2578, "step": 6766 }, { "epoch": 7.15, "learning_rate": 3.213266384778013e-05, "loss": 0.1562, "step": 6768 }, { "epoch": 7.16, "learning_rate": 3.2127378435517975e-05, "loss": 0.2174, "step": 6770 }, { "epoch": 7.16, "learning_rate": 3.2122093023255814e-05, "loss": 0.0996, "step": 6772 }, { "epoch": 7.16, "learning_rate": 3.211680761099366e-05, "loss": 0.144, "step": 6774 }, { "epoch": 7.16, "learning_rate": 3.21115221987315e-05, "loss": 0.2435, "step": 6776 }, { "epoch": 7.16, "learning_rate": 3.2106236786469346e-05, "loss": 0.1747, "step": 6778 }, { "epoch": 7.17, "learning_rate": 3.210095137420719e-05, "loss": 0.1178, "step": 6780 }, { "epoch": 7.17, "learning_rate": 3.209566596194503e-05, "loss": 0.0761, "step": 6782 }, { "epoch": 7.17, "learning_rate": 3.209038054968288e-05, "loss": 0.1601, "step": 6784 }, { "epoch": 7.17, "learning_rate": 3.2085095137420716e-05, "loss": 0.1881, "step": 6786 }, { "epoch": 7.18, "learning_rate": 3.207980972515857e-05, "loss": 0.0601, "step": 6788 }, { "epoch": 7.18, "learning_rate": 3.207452431289641e-05, "loss": 0.0526, "step": 6790 }, { "epoch": 7.18, "learning_rate": 3.2069238900634254e-05, "loss": 0.1336, "step": 6792 }, { "epoch": 7.18, "learning_rate": 3.2063953488372094e-05, "loss": 0.3576, "step": 6794 }, { "epoch": 7.18, "learning_rate": 3.205866807610994e-05, "loss": 0.1493, "step": 6796 }, { "epoch": 7.19, "learning_rate": 3.205338266384778e-05, "loss": 0.1175, "step": 6798 }, { "epoch": 7.19, "learning_rate": 3.2048097251585625e-05, "loss": 0.0731, "step": 6800 }, { "epoch": 7.19, "learning_rate": 3.2042811839323464e-05, "loss": 0.1651, "step": 6802 }, { "epoch": 7.19, "learning_rate": 3.203752642706131e-05, "loss": 0.1258, "step": 6804 }, { "epoch": 7.19, "learning_rate": 3.2032241014799157e-05, "loss": 0.0815, "step": 6806 }, { "epoch": 7.2, "learning_rate": 3.2026955602537e-05, "loss": 0.1047, "step": 6808 }, { "epoch": 7.2, "learning_rate": 3.202167019027484e-05, "loss": 0.0978, "step": 6810 }, { "epoch": 7.2, "learning_rate": 3.201638477801269e-05, "loss": 0.122, "step": 6812 }, { "epoch": 7.2, "learning_rate": 3.201109936575053e-05, "loss": 0.2287, "step": 6814 }, { "epoch": 7.21, "learning_rate": 3.200581395348837e-05, "loss": 0.1758, "step": 6816 }, { "epoch": 7.21, "learning_rate": 3.200052854122621e-05, "loss": 0.1481, "step": 6818 }, { "epoch": 7.21, "learning_rate": 3.199524312896406e-05, "loss": 0.1527, "step": 6820 }, { "epoch": 7.21, "learning_rate": 3.1989957716701905e-05, "loss": 0.0681, "step": 6822 }, { "epoch": 7.21, "learning_rate": 3.198467230443975e-05, "loss": 0.34, "step": 6824 }, { "epoch": 7.22, "learning_rate": 3.197938689217759e-05, "loss": 0.0775, "step": 6826 }, { "epoch": 7.22, "learning_rate": 3.1974101479915436e-05, "loss": 0.1494, "step": 6828 }, { "epoch": 7.22, "learning_rate": 3.196881606765328e-05, "loss": 0.1659, "step": 6830 }, { "epoch": 7.22, "learning_rate": 3.196353065539112e-05, "loss": 0.1885, "step": 6832 }, { "epoch": 7.22, "learning_rate": 3.195824524312897e-05, "loss": 0.1345, "step": 6834 }, { "epoch": 7.23, "learning_rate": 3.195295983086681e-05, "loss": 0.0972, "step": 6836 }, { "epoch": 7.23, "learning_rate": 3.194767441860465e-05, "loss": 0.096, "step": 6838 }, { "epoch": 7.23, "learning_rate": 3.194238900634249e-05, "loss": 0.2733, "step": 6840 }, { "epoch": 7.23, "learning_rate": 3.1937103594080345e-05, "loss": 0.1251, "step": 6842 }, { "epoch": 7.23, "learning_rate": 3.1931818181818184e-05, "loss": 0.1594, "step": 6844 }, { "epoch": 7.24, "learning_rate": 3.192653276955603e-05, "loss": 0.1894, "step": 6846 }, { "epoch": 7.24, "learning_rate": 3.192124735729387e-05, "loss": 0.125, "step": 6848 }, { "epoch": 7.24, "learning_rate": 3.1915961945031716e-05, "loss": 0.0435, "step": 6850 }, { "epoch": 7.24, "learning_rate": 3.1910676532769555e-05, "loss": 0.1681, "step": 6852 }, { "epoch": 7.25, "learning_rate": 3.19053911205074e-05, "loss": 0.0633, "step": 6854 }, { "epoch": 7.25, "learning_rate": 3.190010570824524e-05, "loss": 0.0713, "step": 6856 }, { "epoch": 7.25, "learning_rate": 3.1894820295983086e-05, "loss": 0.0999, "step": 6858 }, { "epoch": 7.25, "learning_rate": 3.188953488372093e-05, "loss": 0.1284, "step": 6860 }, { "epoch": 7.25, "learning_rate": 3.188424947145878e-05, "loss": 0.0723, "step": 6862 }, { "epoch": 7.26, "learning_rate": 3.187896405919662e-05, "loss": 0.0406, "step": 6864 }, { "epoch": 7.26, "learning_rate": 3.1873678646934464e-05, "loss": 0.0977, "step": 6866 }, { "epoch": 7.26, "learning_rate": 3.18683932346723e-05, "loss": 0.0727, "step": 6868 }, { "epoch": 7.26, "learning_rate": 3.186310782241015e-05, "loss": 0.2983, "step": 6870 }, { "epoch": 7.26, "learning_rate": 3.185782241014799e-05, "loss": 0.1568, "step": 6872 }, { "epoch": 7.27, "learning_rate": 3.1852536997885834e-05, "loss": 0.306, "step": 6874 }, { "epoch": 7.27, "learning_rate": 3.184725158562368e-05, "loss": 0.4258, "step": 6876 }, { "epoch": 7.27, "learning_rate": 3.1841966173361527e-05, "loss": 0.1543, "step": 6878 }, { "epoch": 7.27, "learning_rate": 3.1836680761099366e-05, "loss": 0.1296, "step": 6880 }, { "epoch": 7.27, "learning_rate": 3.183139534883721e-05, "loss": 0.1369, "step": 6882 }, { "epoch": 7.28, "learning_rate": 3.182610993657506e-05, "loss": 0.1389, "step": 6884 }, { "epoch": 7.28, "learning_rate": 3.18208245243129e-05, "loss": 0.1247, "step": 6886 }, { "epoch": 7.28, "learning_rate": 3.181553911205074e-05, "loss": 0.1223, "step": 6888 }, { "epoch": 7.28, "learning_rate": 3.181025369978858e-05, "loss": 0.1556, "step": 6890 }, { "epoch": 7.29, "learning_rate": 3.180496828752643e-05, "loss": 0.1862, "step": 6892 }, { "epoch": 7.29, "learning_rate": 3.179968287526427e-05, "loss": 0.1612, "step": 6894 }, { "epoch": 7.29, "learning_rate": 3.179439746300212e-05, "loss": 0.1985, "step": 6896 }, { "epoch": 7.29, "learning_rate": 3.178911205073996e-05, "loss": 0.12, "step": 6898 }, { "epoch": 7.29, "learning_rate": 3.1783826638477806e-05, "loss": 0.2225, "step": 6900 }, { "epoch": 7.3, "learning_rate": 3.1778541226215645e-05, "loss": 0.1292, "step": 6902 }, { "epoch": 7.3, "learning_rate": 3.177325581395349e-05, "loss": 0.0941, "step": 6904 }, { "epoch": 7.3, "learning_rate": 3.176797040169133e-05, "loss": 0.318, "step": 6906 }, { "epoch": 7.3, "learning_rate": 3.176268498942918e-05, "loss": 0.3259, "step": 6908 }, { "epoch": 7.3, "learning_rate": 3.1757399577167016e-05, "loss": 0.1176, "step": 6910 }, { "epoch": 7.31, "learning_rate": 3.175211416490486e-05, "loss": 0.1663, "step": 6912 }, { "epoch": 7.31, "learning_rate": 3.174682875264271e-05, "loss": 0.171, "step": 6914 }, { "epoch": 7.31, "learning_rate": 3.1741543340380554e-05, "loss": 0.2284, "step": 6916 }, { "epoch": 7.31, "learning_rate": 3.1736257928118394e-05, "loss": 0.1861, "step": 6918 }, { "epoch": 7.32, "learning_rate": 3.173097251585624e-05, "loss": 0.2835, "step": 6920 }, { "epoch": 7.32, "learning_rate": 3.172568710359408e-05, "loss": 0.149, "step": 6922 }, { "epoch": 7.32, "learning_rate": 3.1720401691331925e-05, "loss": 0.0834, "step": 6924 }, { "epoch": 7.32, "learning_rate": 3.1715116279069764e-05, "loss": 0.1018, "step": 6926 }, { "epoch": 7.32, "learning_rate": 3.170983086680761e-05, "loss": 0.057, "step": 6928 }, { "epoch": 7.33, "learning_rate": 3.1704545454545456e-05, "loss": 0.1553, "step": 6930 }, { "epoch": 7.33, "learning_rate": 3.16992600422833e-05, "loss": 0.0442, "step": 6932 }, { "epoch": 7.33, "learning_rate": 3.169397463002114e-05, "loss": 0.0943, "step": 6934 }, { "epoch": 7.33, "learning_rate": 3.168868921775899e-05, "loss": 0.1423, "step": 6936 }, { "epoch": 7.33, "learning_rate": 3.1683403805496834e-05, "loss": 0.101, "step": 6938 }, { "epoch": 7.34, "learning_rate": 3.167811839323467e-05, "loss": 0.0697, "step": 6940 }, { "epoch": 7.34, "learning_rate": 3.167283298097252e-05, "loss": 0.0719, "step": 6942 }, { "epoch": 7.34, "learning_rate": 3.166754756871036e-05, "loss": 0.2244, "step": 6944 }, { "epoch": 7.34, "learning_rate": 3.1662262156448205e-05, "loss": 0.0793, "step": 6946 }, { "epoch": 7.34, "learning_rate": 3.1656976744186044e-05, "loss": 0.0689, "step": 6948 }, { "epoch": 7.35, "learning_rate": 3.16516913319239e-05, "loss": 0.1631, "step": 6950 }, { "epoch": 7.35, "learning_rate": 3.1646405919661736e-05, "loss": 0.057, "step": 6952 }, { "epoch": 7.35, "learning_rate": 3.164112050739958e-05, "loss": 0.1282, "step": 6954 }, { "epoch": 7.35, "learning_rate": 3.163583509513742e-05, "loss": 0.183, "step": 6956 }, { "epoch": 7.36, "learning_rate": 3.163054968287527e-05, "loss": 0.114, "step": 6958 }, { "epoch": 7.36, "learning_rate": 3.162526427061311e-05, "loss": 0.1625, "step": 6960 }, { "epoch": 7.36, "learning_rate": 3.161997885835095e-05, "loss": 0.1943, "step": 6962 }, { "epoch": 7.36, "learning_rate": 3.161469344608879e-05, "loss": 0.1572, "step": 6964 }, { "epoch": 7.36, "learning_rate": 3.160940803382664e-05, "loss": 0.1261, "step": 6966 }, { "epoch": 7.37, "learning_rate": 3.1604122621564484e-05, "loss": 0.0798, "step": 6968 }, { "epoch": 7.37, "learning_rate": 3.159883720930233e-05, "loss": 0.1337, "step": 6970 }, { "epoch": 7.37, "learning_rate": 3.159355179704017e-05, "loss": 0.0814, "step": 6972 }, { "epoch": 7.37, "learning_rate": 3.1588266384778015e-05, "loss": 0.0963, "step": 6974 }, { "epoch": 7.37, "learning_rate": 3.1582980972515855e-05, "loss": 0.1317, "step": 6976 }, { "epoch": 7.38, "learning_rate": 3.15776955602537e-05, "loss": 0.1232, "step": 6978 }, { "epoch": 7.38, "learning_rate": 3.157241014799155e-05, "loss": 0.1589, "step": 6980 }, { "epoch": 7.38, "learning_rate": 3.1567124735729386e-05, "loss": 0.1632, "step": 6982 }, { "epoch": 7.38, "learning_rate": 3.156183932346723e-05, "loss": 0.0875, "step": 6984 }, { "epoch": 7.38, "learning_rate": 3.155655391120508e-05, "loss": 0.1064, "step": 6986 }, { "epoch": 7.39, "learning_rate": 3.155126849894292e-05, "loss": 0.134, "step": 6988 }, { "epoch": 7.39, "learning_rate": 3.1545983086680764e-05, "loss": 0.0632, "step": 6990 }, { "epoch": 7.39, "learning_rate": 3.154069767441861e-05, "loss": 0.1357, "step": 6992 }, { "epoch": 7.39, "learning_rate": 3.153541226215645e-05, "loss": 0.0639, "step": 6994 }, { "epoch": 7.4, "learning_rate": 3.1530126849894295e-05, "loss": 0.0779, "step": 6996 }, { "epoch": 7.4, "learning_rate": 3.1524841437632134e-05, "loss": 0.1556, "step": 6998 }, { "epoch": 7.4, "learning_rate": 3.151955602536998e-05, "loss": 0.2285, "step": 7000 }, { "epoch": 7.4, "eval_cer": 0.016699914505557138, "eval_loss": 0.7643967270851135, "eval_runtime": 129.8037, "eval_samples_per_second": 6.479, "eval_steps_per_second": 0.817, "step": 7000 }, { "epoch": 7.4, "learning_rate": 3.151427061310782e-05, "loss": 0.0917, "step": 7002 }, { "epoch": 7.4, "learning_rate": 3.150898520084567e-05, "loss": 0.1798, "step": 7004 }, { "epoch": 7.41, "learning_rate": 3.150369978858351e-05, "loss": 0.122, "step": 7006 }, { "epoch": 7.41, "learning_rate": 3.149841437632136e-05, "loss": 0.0871, "step": 7008 }, { "epoch": 7.41, "learning_rate": 3.14931289640592e-05, "loss": 0.0828, "step": 7010 }, { "epoch": 7.41, "learning_rate": 3.148784355179704e-05, "loss": 0.1366, "step": 7012 }, { "epoch": 7.41, "learning_rate": 3.148255813953488e-05, "loss": 0.1001, "step": 7014 }, { "epoch": 7.42, "learning_rate": 3.147727272727273e-05, "loss": 0.1294, "step": 7016 }, { "epoch": 7.42, "learning_rate": 3.147198731501057e-05, "loss": 0.116, "step": 7018 }, { "epoch": 7.42, "learning_rate": 3.1466701902748414e-05, "loss": 0.0794, "step": 7020 }, { "epoch": 7.42, "learning_rate": 3.146141649048626e-05, "loss": 0.0282, "step": 7022 }, { "epoch": 7.42, "learning_rate": 3.1456131078224106e-05, "loss": 0.1102, "step": 7024 }, { "epoch": 7.43, "learning_rate": 3.1450845665961945e-05, "loss": 0.0907, "step": 7026 }, { "epoch": 7.43, "learning_rate": 3.144556025369979e-05, "loss": 0.097, "step": 7028 }, { "epoch": 7.43, "learning_rate": 3.144027484143763e-05, "loss": 0.1432, "step": 7030 }, { "epoch": 7.43, "learning_rate": 3.143498942917548e-05, "loss": 0.1493, "step": 7032 }, { "epoch": 7.44, "learning_rate": 3.142970401691332e-05, "loss": 0.2088, "step": 7034 }, { "epoch": 7.44, "learning_rate": 3.142441860465116e-05, "loss": 0.5473, "step": 7036 }, { "epoch": 7.44, "learning_rate": 3.141913319238901e-05, "loss": 0.1131, "step": 7038 }, { "epoch": 7.44, "learning_rate": 3.1413847780126854e-05, "loss": 0.0682, "step": 7040 }, { "epoch": 7.44, "learning_rate": 3.1408562367864693e-05, "loss": 0.1884, "step": 7042 }, { "epoch": 7.45, "learning_rate": 3.140327695560254e-05, "loss": 0.1398, "step": 7044 }, { "epoch": 7.45, "learning_rate": 3.1397991543340386e-05, "loss": 0.2104, "step": 7046 }, { "epoch": 7.45, "learning_rate": 3.1392706131078225e-05, "loss": 0.089, "step": 7048 }, { "epoch": 7.45, "learning_rate": 3.138742071881607e-05, "loss": 0.2262, "step": 7050 }, { "epoch": 7.45, "learning_rate": 3.138213530655391e-05, "loss": 0.3409, "step": 7052 }, { "epoch": 7.46, "learning_rate": 3.1376849894291756e-05, "loss": 0.1543, "step": 7054 }, { "epoch": 7.46, "learning_rate": 3.1371564482029595e-05, "loss": 0.079, "step": 7056 }, { "epoch": 7.46, "learning_rate": 3.136627906976745e-05, "loss": 0.172, "step": 7058 }, { "epoch": 7.46, "learning_rate": 3.136099365750529e-05, "loss": 0.1977, "step": 7060 }, { "epoch": 7.47, "learning_rate": 3.1355708245243134e-05, "loss": 0.2035, "step": 7062 }, { "epoch": 7.47, "learning_rate": 3.135042283298097e-05, "loss": 0.0555, "step": 7064 }, { "epoch": 7.47, "learning_rate": 3.134513742071882e-05, "loss": 0.0926, "step": 7066 }, { "epoch": 7.47, "learning_rate": 3.133985200845666e-05, "loss": 0.1551, "step": 7068 }, { "epoch": 7.47, "learning_rate": 3.1334566596194504e-05, "loss": 0.1136, "step": 7070 }, { "epoch": 7.48, "learning_rate": 3.1329281183932344e-05, "loss": 0.1401, "step": 7072 }, { "epoch": 7.48, "learning_rate": 3.132399577167019e-05, "loss": 0.1895, "step": 7074 }, { "epoch": 7.48, "learning_rate": 3.1318710359408036e-05, "loss": 0.1383, "step": 7076 }, { "epoch": 7.48, "learning_rate": 3.131342494714588e-05, "loss": 0.1553, "step": 7078 }, { "epoch": 7.48, "learning_rate": 3.130813953488372e-05, "loss": 0.0743, "step": 7080 }, { "epoch": 7.49, "learning_rate": 3.130285412262157e-05, "loss": 0.0585, "step": 7082 }, { "epoch": 7.49, "learning_rate": 3.1297568710359406e-05, "loss": 0.0266, "step": 7084 }, { "epoch": 7.49, "learning_rate": 3.129228329809725e-05, "loss": 0.0958, "step": 7086 }, { "epoch": 7.49, "learning_rate": 3.12869978858351e-05, "loss": 0.2182, "step": 7088 }, { "epoch": 7.49, "learning_rate": 3.128171247357294e-05, "loss": 0.1488, "step": 7090 }, { "epoch": 7.5, "learning_rate": 3.1276427061310784e-05, "loss": 0.2245, "step": 7092 }, { "epoch": 7.5, "learning_rate": 3.127114164904863e-05, "loss": 0.1023, "step": 7094 }, { "epoch": 7.5, "learning_rate": 3.1265856236786476e-05, "loss": 0.2489, "step": 7096 }, { "epoch": 7.5, "learning_rate": 3.1260570824524315e-05, "loss": 0.2633, "step": 7098 }, { "epoch": 7.51, "learning_rate": 3.125528541226216e-05, "loss": 0.1263, "step": 7100 }, { "epoch": 7.51, "learning_rate": 3.125e-05, "loss": 0.1476, "step": 7102 }, { "epoch": 7.51, "learning_rate": 3.124471458773785e-05, "loss": 0.1078, "step": 7104 }, { "epoch": 7.51, "learning_rate": 3.1239429175475686e-05, "loss": 0.1322, "step": 7106 }, { "epoch": 7.51, "learning_rate": 3.123414376321353e-05, "loss": 0.1396, "step": 7108 }, { "epoch": 7.52, "learning_rate": 3.122885835095137e-05, "loss": 0.1505, "step": 7110 }, { "epoch": 7.52, "learning_rate": 3.1223572938689224e-05, "loss": 0.2306, "step": 7112 }, { "epoch": 7.52, "learning_rate": 3.1218287526427063e-05, "loss": 0.115, "step": 7114 }, { "epoch": 7.52, "learning_rate": 3.121300211416491e-05, "loss": 0.0433, "step": 7116 }, { "epoch": 7.52, "learning_rate": 3.120771670190275e-05, "loss": 0.059, "step": 7118 }, { "epoch": 7.53, "learning_rate": 3.1202431289640595e-05, "loss": 0.0396, "step": 7120 }, { "epoch": 7.53, "learning_rate": 3.1197145877378434e-05, "loss": 0.1071, "step": 7122 }, { "epoch": 7.53, "learning_rate": 3.119186046511628e-05, "loss": 0.1492, "step": 7124 }, { "epoch": 7.53, "learning_rate": 3.118657505285412e-05, "loss": 0.1364, "step": 7126 }, { "epoch": 7.53, "learning_rate": 3.1181289640591966e-05, "loss": 0.365, "step": 7128 }, { "epoch": 7.54, "learning_rate": 3.117600422832981e-05, "loss": 0.1789, "step": 7130 }, { "epoch": 7.54, "learning_rate": 3.117071881606766e-05, "loss": 0.0941, "step": 7132 }, { "epoch": 7.54, "learning_rate": 3.11654334038055e-05, "loss": 0.1209, "step": 7134 }, { "epoch": 7.54, "learning_rate": 3.116014799154334e-05, "loss": 0.2109, "step": 7136 }, { "epoch": 7.55, "learning_rate": 3.115486257928118e-05, "loss": 0.0642, "step": 7138 }, { "epoch": 7.55, "learning_rate": 3.114957716701903e-05, "loss": 0.1333, "step": 7140 }, { "epoch": 7.55, "learning_rate": 3.1144291754756874e-05, "loss": 0.0888, "step": 7142 }, { "epoch": 7.55, "learning_rate": 3.1139006342494714e-05, "loss": 0.1243, "step": 7144 }, { "epoch": 7.55, "learning_rate": 3.113372093023256e-05, "loss": 0.0929, "step": 7146 }, { "epoch": 7.56, "learning_rate": 3.1128435517970406e-05, "loss": 0.1093, "step": 7148 }, { "epoch": 7.56, "learning_rate": 3.112315010570825e-05, "loss": 0.1267, "step": 7150 }, { "epoch": 7.56, "learning_rate": 3.111786469344609e-05, "loss": 0.0882, "step": 7152 }, { "epoch": 7.56, "learning_rate": 3.111257928118394e-05, "loss": 0.2262, "step": 7154 }, { "epoch": 7.56, "learning_rate": 3.1107293868921776e-05, "loss": 0.0967, "step": 7156 }, { "epoch": 7.57, "learning_rate": 3.110200845665962e-05, "loss": 0.2711, "step": 7158 }, { "epoch": 7.57, "learning_rate": 3.109672304439746e-05, "loss": 0.0687, "step": 7160 }, { "epoch": 7.57, "learning_rate": 3.109143763213531e-05, "loss": 0.2071, "step": 7162 }, { "epoch": 7.57, "learning_rate": 3.108615221987315e-05, "loss": 0.1116, "step": 7164 }, { "epoch": 7.58, "learning_rate": 3.1080866807611e-05, "loss": 0.1037, "step": 7166 }, { "epoch": 7.58, "learning_rate": 3.107558139534884e-05, "loss": 0.1094, "step": 7168 }, { "epoch": 7.58, "learning_rate": 3.1070295983086685e-05, "loss": 0.123, "step": 7170 }, { "epoch": 7.58, "learning_rate": 3.1065010570824525e-05, "loss": 0.1824, "step": 7172 }, { "epoch": 7.58, "learning_rate": 3.105972515856237e-05, "loss": 0.0796, "step": 7174 }, { "epoch": 7.59, "learning_rate": 3.105443974630021e-05, "loss": 0.0495, "step": 7176 }, { "epoch": 7.59, "learning_rate": 3.1049154334038056e-05, "loss": 0.1828, "step": 7178 }, { "epoch": 7.59, "learning_rate": 3.1043868921775895e-05, "loss": 0.0642, "step": 7180 }, { "epoch": 7.59, "learning_rate": 3.103858350951374e-05, "loss": 0.1328, "step": 7182 }, { "epoch": 7.59, "learning_rate": 3.103329809725159e-05, "loss": 0.1206, "step": 7184 }, { "epoch": 7.6, "learning_rate": 3.1028012684989433e-05, "loss": 0.1608, "step": 7186 }, { "epoch": 7.6, "learning_rate": 3.102272727272727e-05, "loss": 0.157, "step": 7188 }, { "epoch": 7.6, "learning_rate": 3.101744186046512e-05, "loss": 0.0986, "step": 7190 }, { "epoch": 7.6, "learning_rate": 3.101215644820296e-05, "loss": 0.1187, "step": 7192 }, { "epoch": 7.6, "learning_rate": 3.1006871035940804e-05, "loss": 0.0329, "step": 7194 }, { "epoch": 7.61, "learning_rate": 3.100158562367865e-05, "loss": 0.138, "step": 7196 }, { "epoch": 7.61, "learning_rate": 3.099630021141649e-05, "loss": 0.1953, "step": 7198 }, { "epoch": 7.61, "learning_rate": 3.0991014799154336e-05, "loss": 0.2044, "step": 7200 }, { "epoch": 7.61, "learning_rate": 3.098572938689218e-05, "loss": 0.0648, "step": 7202 }, { "epoch": 7.62, "learning_rate": 3.098044397463003e-05, "loss": 0.1554, "step": 7204 }, { "epoch": 7.62, "learning_rate": 3.097515856236787e-05, "loss": 0.1202, "step": 7206 }, { "epoch": 7.62, "learning_rate": 3.096987315010571e-05, "loss": 0.1143, "step": 7208 }, { "epoch": 7.62, "learning_rate": 3.096458773784355e-05, "loss": 0.0999, "step": 7210 }, { "epoch": 7.62, "learning_rate": 3.09593023255814e-05, "loss": 0.1562, "step": 7212 }, { "epoch": 7.63, "learning_rate": 3.095401691331924e-05, "loss": 0.1498, "step": 7214 }, { "epoch": 7.63, "learning_rate": 3.0948731501057084e-05, "loss": 0.1716, "step": 7216 }, { "epoch": 7.63, "learning_rate": 3.094344608879492e-05, "loss": 0.1226, "step": 7218 }, { "epoch": 7.63, "learning_rate": 3.0938160676532776e-05, "loss": 0.1101, "step": 7220 }, { "epoch": 7.63, "learning_rate": 3.0932875264270615e-05, "loss": 0.0776, "step": 7222 }, { "epoch": 7.64, "learning_rate": 3.092758985200846e-05, "loss": 0.1376, "step": 7224 }, { "epoch": 7.64, "learning_rate": 3.09223044397463e-05, "loss": 0.1301, "step": 7226 }, { "epoch": 7.64, "learning_rate": 3.0917019027484147e-05, "loss": 0.1084, "step": 7228 }, { "epoch": 7.64, "learning_rate": 3.0911733615221986e-05, "loss": 0.1025, "step": 7230 }, { "epoch": 7.64, "learning_rate": 3.090644820295983e-05, "loss": 0.0797, "step": 7232 }, { "epoch": 7.65, "learning_rate": 3.090116279069767e-05, "loss": 0.0559, "step": 7234 }, { "epoch": 7.65, "learning_rate": 3.089587737843552e-05, "loss": 0.1271, "step": 7236 }, { "epoch": 7.65, "learning_rate": 3.089059196617336e-05, "loss": 0.1511, "step": 7238 }, { "epoch": 7.65, "learning_rate": 3.088530655391121e-05, "loss": 0.0656, "step": 7240 }, { "epoch": 7.66, "learning_rate": 3.088002114164905e-05, "loss": 0.3565, "step": 7242 }, { "epoch": 7.66, "learning_rate": 3.0874735729386895e-05, "loss": 0.1438, "step": 7244 }, { "epoch": 7.66, "learning_rate": 3.0869450317124734e-05, "loss": 0.1133, "step": 7246 }, { "epoch": 7.66, "learning_rate": 3.086416490486258e-05, "loss": 0.1269, "step": 7248 }, { "epoch": 7.66, "learning_rate": 3.0858879492600426e-05, "loss": 0.0913, "step": 7250 }, { "epoch": 7.67, "learning_rate": 3.0853594080338265e-05, "loss": 0.0962, "step": 7252 }, { "epoch": 7.67, "learning_rate": 3.084830866807611e-05, "loss": 0.2436, "step": 7254 }, { "epoch": 7.67, "learning_rate": 3.084302325581396e-05, "loss": 0.1272, "step": 7256 }, { "epoch": 7.67, "learning_rate": 3.0837737843551804e-05, "loss": 0.0837, "step": 7258 }, { "epoch": 7.67, "learning_rate": 3.083245243128964e-05, "loss": 0.084, "step": 7260 }, { "epoch": 7.68, "learning_rate": 3.082716701902749e-05, "loss": 0.1018, "step": 7262 }, { "epoch": 7.68, "learning_rate": 3.082188160676533e-05, "loss": 0.0634, "step": 7264 }, { "epoch": 7.68, "learning_rate": 3.0816596194503174e-05, "loss": 0.0727, "step": 7266 }, { "epoch": 7.68, "learning_rate": 3.0811310782241014e-05, "loss": 0.0855, "step": 7268 }, { "epoch": 7.68, "learning_rate": 3.080602536997886e-05, "loss": 0.1704, "step": 7270 }, { "epoch": 7.69, "learning_rate": 3.08007399577167e-05, "loss": 0.1212, "step": 7272 }, { "epoch": 7.69, "learning_rate": 3.079545454545455e-05, "loss": 0.1861, "step": 7274 }, { "epoch": 7.69, "learning_rate": 3.079016913319239e-05, "loss": 0.1109, "step": 7276 }, { "epoch": 7.69, "learning_rate": 3.078488372093024e-05, "loss": 0.148, "step": 7278 }, { "epoch": 7.7, "learning_rate": 3.0779598308668076e-05, "loss": 0.107, "step": 7280 }, { "epoch": 7.7, "learning_rate": 3.077431289640592e-05, "loss": 0.1321, "step": 7282 }, { "epoch": 7.7, "learning_rate": 3.076902748414376e-05, "loss": 0.1371, "step": 7284 }, { "epoch": 7.7, "learning_rate": 3.076374207188161e-05, "loss": 0.0779, "step": 7286 }, { "epoch": 7.7, "learning_rate": 3.075845665961945e-05, "loss": 0.2148, "step": 7288 }, { "epoch": 7.71, "learning_rate": 3.075317124735729e-05, "loss": 0.083, "step": 7290 }, { "epoch": 7.71, "learning_rate": 3.074788583509514e-05, "loss": 0.1508, "step": 7292 }, { "epoch": 7.71, "learning_rate": 3.0742600422832985e-05, "loss": 0.1394, "step": 7294 }, { "epoch": 7.71, "learning_rate": 3.0737315010570824e-05, "loss": 0.221, "step": 7296 }, { "epoch": 7.71, "learning_rate": 3.073202959830867e-05, "loss": 0.1494, "step": 7298 }, { "epoch": 7.72, "learning_rate": 3.0726744186046517e-05, "loss": 0.121, "step": 7300 }, { "epoch": 7.72, "learning_rate": 3.0721458773784356e-05, "loss": 0.1156, "step": 7302 }, { "epoch": 7.72, "learning_rate": 3.07161733615222e-05, "loss": 0.0434, "step": 7304 }, { "epoch": 7.72, "learning_rate": 3.071088794926004e-05, "loss": 0.138, "step": 7306 }, { "epoch": 7.73, "learning_rate": 3.070560253699789e-05, "loss": 0.1409, "step": 7308 }, { "epoch": 7.73, "learning_rate": 3.070031712473573e-05, "loss": 0.1047, "step": 7310 }, { "epoch": 7.73, "learning_rate": 3.069503171247358e-05, "loss": 0.1325, "step": 7312 }, { "epoch": 7.73, "learning_rate": 3.068974630021142e-05, "loss": 0.1291, "step": 7314 }, { "epoch": 7.73, "learning_rate": 3.0684460887949265e-05, "loss": 0.0921, "step": 7316 }, { "epoch": 7.74, "learning_rate": 3.0679175475687104e-05, "loss": 0.0725, "step": 7318 }, { "epoch": 7.74, "learning_rate": 3.067389006342495e-05, "loss": 0.13, "step": 7320 }, { "epoch": 7.74, "learning_rate": 3.066860465116279e-05, "loss": 0.0861, "step": 7322 }, { "epoch": 7.74, "learning_rate": 3.0663319238900635e-05, "loss": 0.0812, "step": 7324 }, { "epoch": 7.74, "learning_rate": 3.0658033826638475e-05, "loss": 0.2347, "step": 7326 }, { "epoch": 7.75, "learning_rate": 3.065274841437633e-05, "loss": 0.0432, "step": 7328 }, { "epoch": 7.75, "learning_rate": 3.064746300211417e-05, "loss": 0.168, "step": 7330 }, { "epoch": 7.75, "learning_rate": 3.064217758985201e-05, "loss": 0.1131, "step": 7332 }, { "epoch": 7.75, "learning_rate": 3.063689217758985e-05, "loss": 0.155, "step": 7334 }, { "epoch": 7.75, "learning_rate": 3.06316067653277e-05, "loss": 0.1038, "step": 7336 }, { "epoch": 7.76, "learning_rate": 3.062632135306554e-05, "loss": 0.1024, "step": 7338 }, { "epoch": 7.76, "learning_rate": 3.0621035940803384e-05, "loss": 0.108, "step": 7340 }, { "epoch": 7.76, "learning_rate": 3.061575052854122e-05, "loss": 0.0993, "step": 7342 }, { "epoch": 7.76, "learning_rate": 3.061046511627907e-05, "loss": 0.1252, "step": 7344 }, { "epoch": 7.77, "learning_rate": 3.0605179704016915e-05, "loss": 0.1117, "step": 7346 }, { "epoch": 7.77, "learning_rate": 3.059989429175476e-05, "loss": 0.0714, "step": 7348 }, { "epoch": 7.77, "learning_rate": 3.05946088794926e-05, "loss": 0.152, "step": 7350 }, { "epoch": 7.77, "learning_rate": 3.0589323467230446e-05, "loss": 0.2915, "step": 7352 }, { "epoch": 7.77, "learning_rate": 3.058403805496829e-05, "loss": 0.1396, "step": 7354 }, { "epoch": 7.78, "learning_rate": 3.057875264270613e-05, "loss": 0.1844, "step": 7356 }, { "epoch": 7.78, "learning_rate": 3.057346723044398e-05, "loss": 0.1751, "step": 7358 }, { "epoch": 7.78, "learning_rate": 3.056818181818182e-05, "loss": 0.1955, "step": 7360 }, { "epoch": 7.78, "learning_rate": 3.056289640591966e-05, "loss": 0.1451, "step": 7362 }, { "epoch": 7.78, "learning_rate": 3.055761099365751e-05, "loss": 0.131, "step": 7364 }, { "epoch": 7.79, "learning_rate": 3.0552325581395355e-05, "loss": 0.094, "step": 7366 }, { "epoch": 7.79, "learning_rate": 3.0547040169133195e-05, "loss": 0.1367, "step": 7368 }, { "epoch": 7.79, "learning_rate": 3.054175475687104e-05, "loss": 0.1617, "step": 7370 }, { "epoch": 7.79, "learning_rate": 3.053646934460888e-05, "loss": 0.213, "step": 7372 }, { "epoch": 7.79, "learning_rate": 3.0531183932346726e-05, "loss": 0.0715, "step": 7374 }, { "epoch": 7.8, "learning_rate": 3.0525898520084565e-05, "loss": 0.1576, "step": 7376 }, { "epoch": 7.8, "learning_rate": 3.052061310782241e-05, "loss": 0.1469, "step": 7378 }, { "epoch": 7.8, "learning_rate": 3.0515327695560254e-05, "loss": 0.0214, "step": 7380 }, { "epoch": 7.8, "learning_rate": 3.05100422832981e-05, "loss": 0.1703, "step": 7382 }, { "epoch": 7.81, "learning_rate": 3.0504756871035946e-05, "loss": 0.0795, "step": 7384 }, { "epoch": 7.81, "learning_rate": 3.049947145877379e-05, "loss": 0.0787, "step": 7386 }, { "epoch": 7.81, "learning_rate": 3.049418604651163e-05, "loss": 0.1197, "step": 7388 }, { "epoch": 7.81, "learning_rate": 3.0488900634249474e-05, "loss": 0.0709, "step": 7390 }, { "epoch": 7.81, "learning_rate": 3.0483615221987317e-05, "loss": 0.0778, "step": 7392 }, { "epoch": 7.82, "learning_rate": 3.047832980972516e-05, "loss": 0.2426, "step": 7394 }, { "epoch": 7.82, "learning_rate": 3.0473044397463002e-05, "loss": 0.0691, "step": 7396 }, { "epoch": 7.82, "learning_rate": 3.0467758985200845e-05, "loss": 0.1296, "step": 7398 }, { "epoch": 7.82, "learning_rate": 3.0462473572938694e-05, "loss": 0.1927, "step": 7400 }, { "epoch": 7.82, "learning_rate": 3.0457188160676537e-05, "loss": 0.1771, "step": 7402 }, { "epoch": 7.83, "learning_rate": 3.045190274841438e-05, "loss": 0.1729, "step": 7404 }, { "epoch": 7.83, "learning_rate": 3.0446617336152222e-05, "loss": 0.1181, "step": 7406 }, { "epoch": 7.83, "learning_rate": 3.0441331923890065e-05, "loss": 0.1441, "step": 7408 }, { "epoch": 7.83, "learning_rate": 3.0436046511627908e-05, "loss": 0.1117, "step": 7410 }, { "epoch": 7.84, "learning_rate": 3.043076109936575e-05, "loss": 0.1002, "step": 7412 }, { "epoch": 7.84, "learning_rate": 3.0425475687103593e-05, "loss": 0.1109, "step": 7414 }, { "epoch": 7.84, "learning_rate": 3.0420190274841436e-05, "loss": 0.1843, "step": 7416 }, { "epoch": 7.84, "learning_rate": 3.0414904862579285e-05, "loss": 0.2138, "step": 7418 }, { "epoch": 7.84, "learning_rate": 3.0409619450317128e-05, "loss": 0.0987, "step": 7420 }, { "epoch": 7.85, "learning_rate": 3.040433403805497e-05, "loss": 0.2392, "step": 7422 }, { "epoch": 7.85, "learning_rate": 3.0399048625792813e-05, "loss": 0.2863, "step": 7424 }, { "epoch": 7.85, "learning_rate": 3.0393763213530656e-05, "loss": 0.1728, "step": 7426 }, { "epoch": 7.85, "learning_rate": 3.0388477801268502e-05, "loss": 0.21, "step": 7428 }, { "epoch": 7.85, "learning_rate": 3.0383192389006344e-05, "loss": 0.1339, "step": 7430 }, { "epoch": 7.86, "learning_rate": 3.0377906976744187e-05, "loss": 0.0803, "step": 7432 }, { "epoch": 7.86, "learning_rate": 3.037262156448203e-05, "loss": 0.303, "step": 7434 }, { "epoch": 7.86, "learning_rate": 3.0367336152219876e-05, "loss": 0.1453, "step": 7436 }, { "epoch": 7.86, "learning_rate": 3.0362050739957722e-05, "loss": 0.0911, "step": 7438 }, { "epoch": 7.86, "learning_rate": 3.0356765327695565e-05, "loss": 0.1804, "step": 7440 }, { "epoch": 7.87, "learning_rate": 3.0351479915433407e-05, "loss": 0.0878, "step": 7442 }, { "epoch": 7.87, "learning_rate": 3.034619450317125e-05, "loss": 0.1076, "step": 7444 }, { "epoch": 7.87, "learning_rate": 3.0340909090909093e-05, "loss": 0.0919, "step": 7446 }, { "epoch": 7.87, "learning_rate": 3.0335623678646935e-05, "loss": 0.1405, "step": 7448 }, { "epoch": 7.88, "learning_rate": 3.0330338266384778e-05, "loss": 0.2786, "step": 7450 }, { "epoch": 7.88, "learning_rate": 3.032505285412262e-05, "loss": 0.0523, "step": 7452 }, { "epoch": 7.88, "learning_rate": 3.0319767441860463e-05, "loss": 0.1314, "step": 7454 }, { "epoch": 7.88, "learning_rate": 3.0314482029598313e-05, "loss": 0.0958, "step": 7456 }, { "epoch": 7.88, "learning_rate": 3.0309196617336155e-05, "loss": 0.1255, "step": 7458 }, { "epoch": 7.89, "learning_rate": 3.0303911205073998e-05, "loss": 0.1244, "step": 7460 }, { "epoch": 7.89, "learning_rate": 3.029862579281184e-05, "loss": 0.1663, "step": 7462 }, { "epoch": 7.89, "learning_rate": 3.0293340380549683e-05, "loss": 0.092, "step": 7464 }, { "epoch": 7.89, "learning_rate": 3.0288054968287526e-05, "loss": 0.1858, "step": 7466 }, { "epoch": 7.89, "learning_rate": 3.028276955602537e-05, "loss": 0.1454, "step": 7468 }, { "epoch": 7.9, "learning_rate": 3.027748414376321e-05, "loss": 0.1785, "step": 7470 }, { "epoch": 7.9, "learning_rate": 3.0272198731501057e-05, "loss": 0.1131, "step": 7472 }, { "epoch": 7.9, "learning_rate": 3.0266913319238904e-05, "loss": 0.1604, "step": 7474 }, { "epoch": 7.9, "learning_rate": 3.0261627906976746e-05, "loss": 0.0588, "step": 7476 }, { "epoch": 7.9, "learning_rate": 3.025634249471459e-05, "loss": 0.1184, "step": 7478 }, { "epoch": 7.91, "learning_rate": 3.025105708245243e-05, "loss": 0.1357, "step": 7480 }, { "epoch": 7.91, "learning_rate": 3.0245771670190278e-05, "loss": 0.0666, "step": 7482 }, { "epoch": 7.91, "learning_rate": 3.024048625792812e-05, "loss": 0.0618, "step": 7484 }, { "epoch": 7.91, "learning_rate": 3.0235200845665963e-05, "loss": 0.1297, "step": 7486 }, { "epoch": 7.92, "learning_rate": 3.0229915433403806e-05, "loss": 0.0984, "step": 7488 }, { "epoch": 7.92, "learning_rate": 3.0224630021141648e-05, "loss": 0.2079, "step": 7490 }, { "epoch": 7.92, "learning_rate": 3.0219344608879498e-05, "loss": 0.0577, "step": 7492 }, { "epoch": 7.92, "learning_rate": 3.021405919661734e-05, "loss": 0.1102, "step": 7494 }, { "epoch": 7.92, "learning_rate": 3.0208773784355183e-05, "loss": 0.108, "step": 7496 }, { "epoch": 7.93, "learning_rate": 3.0203488372093026e-05, "loss": 0.2446, "step": 7498 }, { "epoch": 7.93, "learning_rate": 3.019820295983087e-05, "loss": 0.0924, "step": 7500 }, { "epoch": 7.93, "eval_cer": 0.02718723282986606, "eval_loss": 0.7875532507896423, "eval_runtime": 127.8203, "eval_samples_per_second": 6.58, "eval_steps_per_second": 0.829, "step": 7500 }, { "epoch": 7.93, "learning_rate": 3.019291754756871e-05, "loss": 0.1296, "step": 7502 }, { "epoch": 7.93, "learning_rate": 3.0187632135306554e-05, "loss": 0.2015, "step": 7504 }, { "epoch": 7.93, "learning_rate": 3.0182346723044396e-05, "loss": 0.0772, "step": 7506 }, { "epoch": 7.94, "learning_rate": 3.017706131078224e-05, "loss": 0.1157, "step": 7508 }, { "epoch": 7.94, "learning_rate": 3.017177589852009e-05, "loss": 0.0763, "step": 7510 }, { "epoch": 7.94, "learning_rate": 3.016649048625793e-05, "loss": 0.275, "step": 7512 }, { "epoch": 7.94, "learning_rate": 3.0161205073995774e-05, "loss": 0.1857, "step": 7514 }, { "epoch": 7.95, "learning_rate": 3.0155919661733617e-05, "loss": 0.1416, "step": 7516 }, { "epoch": 7.95, "learning_rate": 3.015063424947146e-05, "loss": 0.048, "step": 7518 }, { "epoch": 7.95, "learning_rate": 3.0145348837209302e-05, "loss": 0.0743, "step": 7520 }, { "epoch": 7.95, "learning_rate": 3.0140063424947145e-05, "loss": 0.1346, "step": 7522 }, { "epoch": 7.95, "learning_rate": 3.0134778012684987e-05, "loss": 0.0731, "step": 7524 }, { "epoch": 7.96, "learning_rate": 3.0129492600422833e-05, "loss": 0.0553, "step": 7526 }, { "epoch": 7.96, "learning_rate": 3.012420718816068e-05, "loss": 0.0786, "step": 7528 }, { "epoch": 7.96, "learning_rate": 3.0118921775898522e-05, "loss": 0.0682, "step": 7530 }, { "epoch": 7.96, "learning_rate": 3.0113636363636365e-05, "loss": 0.0939, "step": 7532 }, { "epoch": 7.96, "learning_rate": 3.0108350951374207e-05, "loss": 0.2162, "step": 7534 }, { "epoch": 7.97, "learning_rate": 3.0103065539112053e-05, "loss": 0.2454, "step": 7536 }, { "epoch": 7.97, "learning_rate": 3.0097780126849896e-05, "loss": 0.1349, "step": 7538 }, { "epoch": 7.97, "learning_rate": 3.009249471458774e-05, "loss": 0.1636, "step": 7540 }, { "epoch": 7.97, "learning_rate": 3.008720930232558e-05, "loss": 0.0599, "step": 7542 }, { "epoch": 7.97, "learning_rate": 3.0081923890063424e-05, "loss": 0.1237, "step": 7544 }, { "epoch": 7.98, "learning_rate": 3.0076638477801274e-05, "loss": 0.1353, "step": 7546 }, { "epoch": 7.98, "learning_rate": 3.0071353065539116e-05, "loss": 0.162, "step": 7548 }, { "epoch": 7.98, "learning_rate": 3.006606765327696e-05, "loss": 0.2098, "step": 7550 }, { "epoch": 7.98, "learning_rate": 3.00607822410148e-05, "loss": 0.0433, "step": 7552 }, { "epoch": 7.99, "learning_rate": 3.0055496828752644e-05, "loss": 0.0537, "step": 7554 }, { "epoch": 7.99, "learning_rate": 3.0050211416490487e-05, "loss": 0.0926, "step": 7556 }, { "epoch": 7.99, "learning_rate": 3.004492600422833e-05, "loss": 0.1546, "step": 7558 }, { "epoch": 7.99, "learning_rate": 3.0039640591966172e-05, "loss": 0.1787, "step": 7560 }, { "epoch": 7.99, "learning_rate": 3.0034355179704015e-05, "loss": 0.1037, "step": 7562 }, { "epoch": 8.0, "learning_rate": 3.0029069767441864e-05, "loss": 0.1001, "step": 7564 }, { "epoch": 8.0, "learning_rate": 3.0023784355179707e-05, "loss": 0.1114, "step": 7566 }, { "epoch": 8.0, "learning_rate": 3.001849894291755e-05, "loss": 0.0804, "step": 7568 }, { "epoch": 8.0, "learning_rate": 3.0013213530655392e-05, "loss": 0.0532, "step": 7570 }, { "epoch": 8.0, "learning_rate": 3.0007928118393235e-05, "loss": 0.0644, "step": 7572 }, { "epoch": 8.01, "learning_rate": 3.0002642706131078e-05, "loss": 0.0707, "step": 7574 }, { "epoch": 8.01, "learning_rate": 2.999735729386892e-05, "loss": 0.1153, "step": 7576 }, { "epoch": 8.01, "learning_rate": 2.9992071881606763e-05, "loss": 0.0249, "step": 7578 }, { "epoch": 8.01, "learning_rate": 2.998678646934461e-05, "loss": 0.054, "step": 7580 }, { "epoch": 8.01, "learning_rate": 2.9981501057082455e-05, "loss": 0.1267, "step": 7582 }, { "epoch": 8.02, "learning_rate": 2.9976215644820298e-05, "loss": 0.0665, "step": 7584 }, { "epoch": 8.02, "learning_rate": 2.997093023255814e-05, "loss": 0.1242, "step": 7586 }, { "epoch": 8.02, "learning_rate": 2.9965644820295987e-05, "loss": 0.0634, "step": 7588 }, { "epoch": 8.02, "learning_rate": 2.996035940803383e-05, "loss": 0.132, "step": 7590 }, { "epoch": 8.03, "learning_rate": 2.9955073995771672e-05, "loss": 0.0788, "step": 7592 }, { "epoch": 8.03, "learning_rate": 2.9949788583509515e-05, "loss": 0.1367, "step": 7594 }, { "epoch": 8.03, "learning_rate": 2.9944503171247357e-05, "loss": 0.0911, "step": 7596 }, { "epoch": 8.03, "learning_rate": 2.99392177589852e-05, "loss": 0.2105, "step": 7598 }, { "epoch": 8.03, "learning_rate": 2.993393234672305e-05, "loss": 0.1738, "step": 7600 }, { "epoch": 8.04, "learning_rate": 2.9928646934460892e-05, "loss": 0.22, "step": 7602 }, { "epoch": 8.04, "learning_rate": 2.9923361522198735e-05, "loss": 0.1878, "step": 7604 }, { "epoch": 8.04, "learning_rate": 2.9918076109936577e-05, "loss": 0.0618, "step": 7606 }, { "epoch": 8.04, "learning_rate": 2.991279069767442e-05, "loss": 0.0667, "step": 7608 }, { "epoch": 8.04, "learning_rate": 2.9907505285412263e-05, "loss": 0.0914, "step": 7610 }, { "epoch": 8.05, "learning_rate": 2.9902219873150105e-05, "loss": 0.1477, "step": 7612 }, { "epoch": 8.05, "learning_rate": 2.9896934460887948e-05, "loss": 0.16, "step": 7614 }, { "epoch": 8.05, "learning_rate": 2.989164904862579e-05, "loss": 0.1531, "step": 7616 }, { "epoch": 8.05, "learning_rate": 2.988636363636364e-05, "loss": 0.1038, "step": 7618 }, { "epoch": 8.05, "learning_rate": 2.9881078224101483e-05, "loss": 0.0792, "step": 7620 }, { "epoch": 8.06, "learning_rate": 2.9875792811839326e-05, "loss": 0.1897, "step": 7622 }, { "epoch": 8.06, "learning_rate": 2.9870507399577168e-05, "loss": 0.1093, "step": 7624 }, { "epoch": 8.06, "learning_rate": 2.986522198731501e-05, "loss": 0.1908, "step": 7626 }, { "epoch": 8.06, "learning_rate": 2.9859936575052854e-05, "loss": 0.1275, "step": 7628 }, { "epoch": 8.07, "learning_rate": 2.9854651162790696e-05, "loss": 0.0609, "step": 7630 }, { "epoch": 8.07, "learning_rate": 2.9849365750528542e-05, "loss": 0.105, "step": 7632 }, { "epoch": 8.07, "learning_rate": 2.9844080338266385e-05, "loss": 0.0822, "step": 7634 }, { "epoch": 8.07, "learning_rate": 2.983879492600423e-05, "loss": 0.1121, "step": 7636 }, { "epoch": 8.07, "learning_rate": 2.9833509513742074e-05, "loss": 0.0555, "step": 7638 }, { "epoch": 8.08, "learning_rate": 2.9828224101479916e-05, "loss": 0.1721, "step": 7640 }, { "epoch": 8.08, "learning_rate": 2.9822938689217762e-05, "loss": 0.0414, "step": 7642 }, { "epoch": 8.08, "learning_rate": 2.9817653276955605e-05, "loss": 0.1267, "step": 7644 }, { "epoch": 8.08, "learning_rate": 2.9812367864693448e-05, "loss": 0.116, "step": 7646 }, { "epoch": 8.08, "learning_rate": 2.980708245243129e-05, "loss": 0.1334, "step": 7648 }, { "epoch": 8.09, "learning_rate": 2.9801797040169133e-05, "loss": 0.062, "step": 7650 }, { "epoch": 8.09, "learning_rate": 2.9796511627906976e-05, "loss": 0.2137, "step": 7652 }, { "epoch": 8.09, "learning_rate": 2.9791226215644825e-05, "loss": 0.2643, "step": 7654 }, { "epoch": 8.09, "learning_rate": 2.9785940803382668e-05, "loss": 0.159, "step": 7656 }, { "epoch": 8.1, "learning_rate": 2.978065539112051e-05, "loss": 0.0571, "step": 7658 }, { "epoch": 8.1, "learning_rate": 2.9775369978858353e-05, "loss": 0.1387, "step": 7660 }, { "epoch": 8.1, "learning_rate": 2.9770084566596196e-05, "loss": 0.0776, "step": 7662 }, { "epoch": 8.1, "learning_rate": 2.976479915433404e-05, "loss": 0.0535, "step": 7664 }, { "epoch": 8.1, "learning_rate": 2.975951374207188e-05, "loss": 0.06, "step": 7666 }, { "epoch": 8.11, "learning_rate": 2.9754228329809724e-05, "loss": 0.1233, "step": 7668 }, { "epoch": 8.11, "learning_rate": 2.9748942917547567e-05, "loss": 0.1132, "step": 7670 }, { "epoch": 8.11, "learning_rate": 2.9743657505285416e-05, "loss": 0.1112, "step": 7672 }, { "epoch": 8.11, "learning_rate": 2.973837209302326e-05, "loss": 0.2012, "step": 7674 }, { "epoch": 8.11, "learning_rate": 2.97330866807611e-05, "loss": 0.0808, "step": 7676 }, { "epoch": 8.12, "learning_rate": 2.9727801268498944e-05, "loss": 0.063, "step": 7678 }, { "epoch": 8.12, "learning_rate": 2.9722515856236787e-05, "loss": 0.0613, "step": 7680 }, { "epoch": 8.12, "learning_rate": 2.971723044397463e-05, "loss": 0.1812, "step": 7682 }, { "epoch": 8.12, "learning_rate": 2.9711945031712472e-05, "loss": 0.0844, "step": 7684 }, { "epoch": 8.12, "learning_rate": 2.9706659619450318e-05, "loss": 0.2003, "step": 7686 }, { "epoch": 8.13, "learning_rate": 2.970137420718816e-05, "loss": 0.1232, "step": 7688 }, { "epoch": 8.13, "learning_rate": 2.9696088794926007e-05, "loss": 0.1256, "step": 7690 }, { "epoch": 8.13, "learning_rate": 2.969080338266385e-05, "loss": 0.0865, "step": 7692 }, { "epoch": 8.13, "learning_rate": 2.9685517970401696e-05, "loss": 0.102, "step": 7694 }, { "epoch": 8.14, "learning_rate": 2.9680232558139538e-05, "loss": 0.1025, "step": 7696 }, { "epoch": 8.14, "learning_rate": 2.967494714587738e-05, "loss": 0.0941, "step": 7698 }, { "epoch": 8.14, "learning_rate": 2.9669661733615224e-05, "loss": 0.0641, "step": 7700 }, { "epoch": 8.14, "learning_rate": 2.9664376321353066e-05, "loss": 0.0927, "step": 7702 }, { "epoch": 8.14, "learning_rate": 2.965909090909091e-05, "loss": 0.1103, "step": 7704 }, { "epoch": 8.15, "learning_rate": 2.965380549682875e-05, "loss": 0.2576, "step": 7706 }, { "epoch": 8.15, "learning_rate": 2.96485200845666e-05, "loss": 0.0838, "step": 7708 }, { "epoch": 8.15, "learning_rate": 2.9643234672304444e-05, "loss": 0.1668, "step": 7710 }, { "epoch": 8.15, "learning_rate": 2.9637949260042286e-05, "loss": 0.1558, "step": 7712 }, { "epoch": 8.15, "learning_rate": 2.963266384778013e-05, "loss": 0.0759, "step": 7714 }, { "epoch": 8.16, "learning_rate": 2.9627378435517972e-05, "loss": 0.111, "step": 7716 }, { "epoch": 8.16, "learning_rate": 2.9622093023255814e-05, "loss": 0.1078, "step": 7718 }, { "epoch": 8.16, "learning_rate": 2.9616807610993657e-05, "loss": 0.1482, "step": 7720 }, { "epoch": 8.16, "learning_rate": 2.96115221987315e-05, "loss": 0.1622, "step": 7722 }, { "epoch": 8.16, "learning_rate": 2.9606236786469342e-05, "loss": 0.1325, "step": 7724 }, { "epoch": 8.17, "learning_rate": 2.9600951374207192e-05, "loss": 0.1058, "step": 7726 }, { "epoch": 8.17, "learning_rate": 2.9595665961945035e-05, "loss": 0.0843, "step": 7728 }, { "epoch": 8.17, "learning_rate": 2.9590380549682877e-05, "loss": 0.1694, "step": 7730 }, { "epoch": 8.17, "learning_rate": 2.958509513742072e-05, "loss": 0.0398, "step": 7732 }, { "epoch": 8.18, "learning_rate": 2.9579809725158563e-05, "loss": 0.1988, "step": 7734 }, { "epoch": 8.18, "learning_rate": 2.9574524312896405e-05, "loss": 0.1308, "step": 7736 }, { "epoch": 8.18, "learning_rate": 2.956923890063425e-05, "loss": 0.053, "step": 7738 }, { "epoch": 8.18, "learning_rate": 2.9563953488372094e-05, "loss": 0.1263, "step": 7740 }, { "epoch": 8.18, "learning_rate": 2.9558668076109937e-05, "loss": 0.1169, "step": 7742 }, { "epoch": 8.19, "learning_rate": 2.9553382663847783e-05, "loss": 0.02, "step": 7744 }, { "epoch": 8.19, "learning_rate": 2.9548097251585625e-05, "loss": 0.083, "step": 7746 }, { "epoch": 8.19, "learning_rate": 2.954281183932347e-05, "loss": 0.0907, "step": 7748 }, { "epoch": 8.19, "learning_rate": 2.9537526427061314e-05, "loss": 0.0935, "step": 7750 }, { "epoch": 8.19, "learning_rate": 2.9532241014799157e-05, "loss": 0.0604, "step": 7752 }, { "epoch": 8.2, "learning_rate": 2.9526955602537e-05, "loss": 0.092, "step": 7754 }, { "epoch": 8.2, "learning_rate": 2.9521670190274842e-05, "loss": 0.1189, "step": 7756 }, { "epoch": 8.2, "learning_rate": 2.9516384778012685e-05, "loss": 0.1149, "step": 7758 }, { "epoch": 8.2, "learning_rate": 2.9511099365750527e-05, "loss": 0.111, "step": 7760 }, { "epoch": 8.21, "learning_rate": 2.9505813953488377e-05, "loss": 0.146, "step": 7762 }, { "epoch": 8.21, "learning_rate": 2.950052854122622e-05, "loss": 0.0594, "step": 7764 }, { "epoch": 8.21, "learning_rate": 2.9495243128964062e-05, "loss": 0.1223, "step": 7766 }, { "epoch": 8.21, "learning_rate": 2.9489957716701905e-05, "loss": 0.1484, "step": 7768 }, { "epoch": 8.21, "learning_rate": 2.9484672304439748e-05, "loss": 0.139, "step": 7770 }, { "epoch": 8.22, "learning_rate": 2.947938689217759e-05, "loss": 0.2436, "step": 7772 }, { "epoch": 8.22, "learning_rate": 2.9474101479915433e-05, "loss": 0.1065, "step": 7774 }, { "epoch": 8.22, "learning_rate": 2.9468816067653276e-05, "loss": 0.1723, "step": 7776 }, { "epoch": 8.22, "learning_rate": 2.9463530655391118e-05, "loss": 0.0795, "step": 7778 }, { "epoch": 8.22, "learning_rate": 2.9458245243128968e-05, "loss": 0.0833, "step": 7780 }, { "epoch": 8.23, "learning_rate": 2.945295983086681e-05, "loss": 0.0397, "step": 7782 }, { "epoch": 8.23, "learning_rate": 2.9447674418604653e-05, "loss": 0.055, "step": 7784 }, { "epoch": 8.23, "learning_rate": 2.9442389006342496e-05, "loss": 0.0573, "step": 7786 }, { "epoch": 8.23, "learning_rate": 2.943710359408034e-05, "loss": 0.0732, "step": 7788 }, { "epoch": 8.23, "learning_rate": 2.943181818181818e-05, "loss": 0.1044, "step": 7790 }, { "epoch": 8.24, "learning_rate": 2.9426532769556027e-05, "loss": 0.0832, "step": 7792 }, { "epoch": 8.24, "learning_rate": 2.942124735729387e-05, "loss": 0.0394, "step": 7794 }, { "epoch": 8.24, "learning_rate": 2.9415961945031712e-05, "loss": 0.0308, "step": 7796 }, { "epoch": 8.24, "learning_rate": 2.941067653276956e-05, "loss": 0.1011, "step": 7798 }, { "epoch": 8.25, "learning_rate": 2.94053911205074e-05, "loss": 0.1278, "step": 7800 }, { "epoch": 8.25, "learning_rate": 2.9400105708245247e-05, "loss": 0.1258, "step": 7802 }, { "epoch": 8.25, "learning_rate": 2.939482029598309e-05, "loss": 0.1203, "step": 7804 }, { "epoch": 8.25, "learning_rate": 2.9389534883720933e-05, "loss": 0.0701, "step": 7806 }, { "epoch": 8.25, "learning_rate": 2.9384249471458775e-05, "loss": 0.1125, "step": 7808 }, { "epoch": 8.26, "learning_rate": 2.9378964059196618e-05, "loss": 0.158, "step": 7810 }, { "epoch": 8.26, "learning_rate": 2.937367864693446e-05, "loss": 0.1057, "step": 7812 }, { "epoch": 8.26, "learning_rate": 2.9368393234672303e-05, "loss": 0.0738, "step": 7814 }, { "epoch": 8.26, "learning_rate": 2.9363107822410153e-05, "loss": 0.1133, "step": 7816 }, { "epoch": 8.26, "learning_rate": 2.9357822410147995e-05, "loss": 0.108, "step": 7818 }, { "epoch": 8.27, "learning_rate": 2.9352536997885838e-05, "loss": 0.0945, "step": 7820 }, { "epoch": 8.27, "learning_rate": 2.934725158562368e-05, "loss": 0.0498, "step": 7822 }, { "epoch": 8.27, "learning_rate": 2.9341966173361523e-05, "loss": 0.0087, "step": 7824 }, { "epoch": 8.27, "learning_rate": 2.9336680761099366e-05, "loss": 0.027, "step": 7826 }, { "epoch": 8.27, "learning_rate": 2.933139534883721e-05, "loss": 0.0578, "step": 7828 }, { "epoch": 8.28, "learning_rate": 2.932610993657505e-05, "loss": 0.118, "step": 7830 }, { "epoch": 8.28, "learning_rate": 2.9320824524312894e-05, "loss": 0.1397, "step": 7832 }, { "epoch": 8.28, "learning_rate": 2.9315539112050744e-05, "loss": 0.1436, "step": 7834 }, { "epoch": 8.28, "learning_rate": 2.9310253699788586e-05, "loss": 0.0833, "step": 7836 }, { "epoch": 8.29, "learning_rate": 2.930496828752643e-05, "loss": 0.2005, "step": 7838 }, { "epoch": 8.29, "learning_rate": 2.929968287526427e-05, "loss": 0.0683, "step": 7840 }, { "epoch": 8.29, "learning_rate": 2.9294397463002114e-05, "loss": 0.1512, "step": 7842 }, { "epoch": 8.29, "learning_rate": 2.9289112050739957e-05, "loss": 0.1971, "step": 7844 }, { "epoch": 8.29, "learning_rate": 2.9283826638477803e-05, "loss": 0.1115, "step": 7846 }, { "epoch": 8.3, "learning_rate": 2.9278541226215646e-05, "loss": 0.1307, "step": 7848 }, { "epoch": 8.3, "learning_rate": 2.927325581395349e-05, "loss": 0.1031, "step": 7850 }, { "epoch": 8.3, "learning_rate": 2.9267970401691334e-05, "loss": 0.0409, "step": 7852 }, { "epoch": 8.3, "learning_rate": 2.926268498942918e-05, "loss": 0.0485, "step": 7854 }, { "epoch": 8.3, "learning_rate": 2.9257399577167023e-05, "loss": 0.0778, "step": 7856 }, { "epoch": 8.31, "learning_rate": 2.9252114164904866e-05, "loss": 0.2528, "step": 7858 }, { "epoch": 8.31, "learning_rate": 2.924682875264271e-05, "loss": 0.0568, "step": 7860 }, { "epoch": 8.31, "learning_rate": 2.924154334038055e-05, "loss": 0.0842, "step": 7862 }, { "epoch": 8.31, "learning_rate": 2.9236257928118394e-05, "loss": 0.1228, "step": 7864 }, { "epoch": 8.32, "learning_rate": 2.9230972515856236e-05, "loss": 0.1569, "step": 7866 }, { "epoch": 8.32, "learning_rate": 2.922568710359408e-05, "loss": 0.1621, "step": 7868 }, { "epoch": 8.32, "learning_rate": 2.922040169133193e-05, "loss": 0.1528, "step": 7870 }, { "epoch": 8.32, "learning_rate": 2.921511627906977e-05, "loss": 0.0645, "step": 7872 }, { "epoch": 8.32, "learning_rate": 2.9209830866807614e-05, "loss": 0.1805, "step": 7874 }, { "epoch": 8.33, "learning_rate": 2.9204545454545457e-05, "loss": 0.159, "step": 7876 }, { "epoch": 8.33, "learning_rate": 2.91992600422833e-05, "loss": 0.2725, "step": 7878 }, { "epoch": 8.33, "learning_rate": 2.9193974630021142e-05, "loss": 0.2843, "step": 7880 }, { "epoch": 8.33, "learning_rate": 2.9188689217758985e-05, "loss": 0.0793, "step": 7882 }, { "epoch": 8.33, "learning_rate": 2.9183403805496827e-05, "loss": 0.0768, "step": 7884 }, { "epoch": 8.34, "learning_rate": 2.917811839323467e-05, "loss": 0.0909, "step": 7886 }, { "epoch": 8.34, "learning_rate": 2.917283298097252e-05, "loss": 0.274, "step": 7888 }, { "epoch": 8.34, "learning_rate": 2.9167547568710362e-05, "loss": 0.0688, "step": 7890 }, { "epoch": 8.34, "learning_rate": 2.9162262156448205e-05, "loss": 0.1227, "step": 7892 }, { "epoch": 8.34, "learning_rate": 2.9156976744186047e-05, "loss": 0.1165, "step": 7894 }, { "epoch": 8.35, "learning_rate": 2.915169133192389e-05, "loss": 0.272, "step": 7896 }, { "epoch": 8.35, "learning_rate": 2.9146405919661736e-05, "loss": 0.11, "step": 7898 }, { "epoch": 8.35, "learning_rate": 2.914112050739958e-05, "loss": 0.1305, "step": 7900 }, { "epoch": 8.35, "learning_rate": 2.913583509513742e-05, "loss": 0.0879, "step": 7902 }, { "epoch": 8.36, "learning_rate": 2.9130549682875264e-05, "loss": 0.0813, "step": 7904 }, { "epoch": 8.36, "learning_rate": 2.912526427061311e-05, "loss": 0.0774, "step": 7906 }, { "epoch": 8.36, "learning_rate": 2.9119978858350956e-05, "loss": 0.0652, "step": 7908 }, { "epoch": 8.36, "learning_rate": 2.91146934460888e-05, "loss": 0.1315, "step": 7910 }, { "epoch": 8.36, "learning_rate": 2.910940803382664e-05, "loss": 0.1017, "step": 7912 }, { "epoch": 8.37, "learning_rate": 2.9104122621564484e-05, "loss": 0.0486, "step": 7914 }, { "epoch": 8.37, "learning_rate": 2.9098837209302327e-05, "loss": 0.0856, "step": 7916 }, { "epoch": 8.37, "learning_rate": 2.909355179704017e-05, "loss": 0.0431, "step": 7918 }, { "epoch": 8.37, "learning_rate": 2.9088266384778012e-05, "loss": 0.0312, "step": 7920 }, { "epoch": 8.37, "learning_rate": 2.9082980972515855e-05, "loss": 0.1018, "step": 7922 }, { "epoch": 8.38, "learning_rate": 2.9077695560253704e-05, "loss": 0.0721, "step": 7924 }, { "epoch": 8.38, "learning_rate": 2.9072410147991547e-05, "loss": 0.0959, "step": 7926 }, { "epoch": 8.38, "learning_rate": 2.906712473572939e-05, "loss": 0.0566, "step": 7928 }, { "epoch": 8.38, "learning_rate": 2.9061839323467232e-05, "loss": 0.0306, "step": 7930 }, { "epoch": 8.38, "learning_rate": 2.9056553911205075e-05, "loss": 0.2017, "step": 7932 }, { "epoch": 8.39, "learning_rate": 2.9051268498942918e-05, "loss": 0.0892, "step": 7934 }, { "epoch": 8.39, "learning_rate": 2.904598308668076e-05, "loss": 0.0562, "step": 7936 }, { "epoch": 8.39, "learning_rate": 2.9040697674418603e-05, "loss": 0.0532, "step": 7938 }, { "epoch": 8.39, "learning_rate": 2.9035412262156446e-05, "loss": 0.189, "step": 7940 }, { "epoch": 8.4, "learning_rate": 2.9030126849894295e-05, "loss": 0.1269, "step": 7942 }, { "epoch": 8.4, "learning_rate": 2.9024841437632138e-05, "loss": 0.0781, "step": 7944 }, { "epoch": 8.4, "learning_rate": 2.901955602536998e-05, "loss": 0.0659, "step": 7946 }, { "epoch": 8.4, "learning_rate": 2.9014270613107823e-05, "loss": 0.0893, "step": 7948 }, { "epoch": 8.4, "learning_rate": 2.9008985200845666e-05, "loss": 0.1148, "step": 7950 }, { "epoch": 8.41, "learning_rate": 2.9003699788583512e-05, "loss": 0.0789, "step": 7952 }, { "epoch": 8.41, "learning_rate": 2.8998414376321355e-05, "loss": 0.1647, "step": 7954 }, { "epoch": 8.41, "learning_rate": 2.8993128964059197e-05, "loss": 0.1093, "step": 7956 }, { "epoch": 8.41, "learning_rate": 2.898784355179704e-05, "loss": 0.272, "step": 7958 }, { "epoch": 8.41, "learning_rate": 2.8982558139534886e-05, "loss": 0.2171, "step": 7960 }, { "epoch": 8.42, "learning_rate": 2.8977272727272732e-05, "loss": 0.0894, "step": 7962 }, { "epoch": 8.42, "learning_rate": 2.8971987315010575e-05, "loss": 0.2088, "step": 7964 }, { "epoch": 8.42, "learning_rate": 2.8966701902748417e-05, "loss": 0.0528, "step": 7966 }, { "epoch": 8.42, "learning_rate": 2.896141649048626e-05, "loss": 0.1048, "step": 7968 }, { "epoch": 8.42, "learning_rate": 2.8956131078224103e-05, "loss": 0.101, "step": 7970 }, { "epoch": 8.43, "learning_rate": 2.8950845665961945e-05, "loss": 0.2322, "step": 7972 }, { "epoch": 8.43, "learning_rate": 2.8945560253699788e-05, "loss": 0.2275, "step": 7974 }, { "epoch": 8.43, "learning_rate": 2.894027484143763e-05, "loss": 0.1182, "step": 7976 }, { "epoch": 8.43, "learning_rate": 2.893498942917548e-05, "loss": 0.0821, "step": 7978 }, { "epoch": 8.44, "learning_rate": 2.8929704016913323e-05, "loss": 0.0642, "step": 7980 }, { "epoch": 8.44, "learning_rate": 2.8924418604651166e-05, "loss": 0.0747, "step": 7982 }, { "epoch": 8.44, "learning_rate": 2.8919133192389008e-05, "loss": 0.1306, "step": 7984 }, { "epoch": 8.44, "learning_rate": 2.891384778012685e-05, "loss": 0.1336, "step": 7986 }, { "epoch": 8.44, "learning_rate": 2.8908562367864694e-05, "loss": 0.074, "step": 7988 }, { "epoch": 8.45, "learning_rate": 2.8903276955602536e-05, "loss": 0.2258, "step": 7990 }, { "epoch": 8.45, "learning_rate": 2.889799154334038e-05, "loss": 0.0969, "step": 7992 }, { "epoch": 8.45, "learning_rate": 2.889270613107822e-05, "loss": 0.0191, "step": 7994 }, { "epoch": 8.45, "learning_rate": 2.888742071881607e-05, "loss": 0.0725, "step": 7996 }, { "epoch": 8.45, "learning_rate": 2.8882135306553914e-05, "loss": 0.0552, "step": 7998 }, { "epoch": 8.46, "learning_rate": 2.8876849894291756e-05, "loss": 0.0584, "step": 8000 }, { "epoch": 8.46, "eval_cer": 0.023083499572527786, "eval_loss": 0.834189772605896, "eval_runtime": 129.0007, "eval_samples_per_second": 6.519, "eval_steps_per_second": 0.822, "step": 8000 }, { "epoch": 8.46, "learning_rate": 2.88715644820296e-05, "loss": 0.0839, "step": 8002 }, { "epoch": 8.46, "learning_rate": 2.8866279069767442e-05, "loss": 0.101, "step": 8004 }, { "epoch": 8.46, "learning_rate": 2.8860993657505288e-05, "loss": 0.1871, "step": 8006 }, { "epoch": 8.47, "learning_rate": 2.885570824524313e-05, "loss": 0.0541, "step": 8008 }, { "epoch": 8.47, "learning_rate": 2.8850422832980973e-05, "loss": 0.071, "step": 8010 }, { "epoch": 8.47, "learning_rate": 2.8845137420718816e-05, "loss": 0.0442, "step": 8012 }, { "epoch": 8.47, "learning_rate": 2.8839852008456665e-05, "loss": 0.1274, "step": 8014 }, { "epoch": 8.47, "learning_rate": 2.8834566596194508e-05, "loss": 0.1477, "step": 8016 }, { "epoch": 8.48, "learning_rate": 2.882928118393235e-05, "loss": 0.2032, "step": 8018 }, { "epoch": 8.48, "learning_rate": 2.8823995771670193e-05, "loss": 0.0629, "step": 8020 }, { "epoch": 8.48, "learning_rate": 2.8818710359408036e-05, "loss": 0.1614, "step": 8022 }, { "epoch": 8.48, "learning_rate": 2.881342494714588e-05, "loss": 0.2354, "step": 8024 }, { "epoch": 8.48, "learning_rate": 2.880813953488372e-05, "loss": 0.0932, "step": 8026 }, { "epoch": 8.49, "learning_rate": 2.8802854122621564e-05, "loss": 0.1353, "step": 8028 }, { "epoch": 8.49, "learning_rate": 2.8797568710359407e-05, "loss": 0.1506, "step": 8030 }, { "epoch": 8.49, "learning_rate": 2.8792283298097256e-05, "loss": 0.0269, "step": 8032 }, { "epoch": 8.49, "learning_rate": 2.87869978858351e-05, "loss": 0.0624, "step": 8034 }, { "epoch": 8.49, "learning_rate": 2.878171247357294e-05, "loss": 0.1213, "step": 8036 }, { "epoch": 8.5, "learning_rate": 2.8776427061310784e-05, "loss": 0.1265, "step": 8038 }, { "epoch": 8.5, "learning_rate": 2.8771141649048627e-05, "loss": 0.1232, "step": 8040 }, { "epoch": 8.5, "learning_rate": 2.876585623678647e-05, "loss": 0.0704, "step": 8042 }, { "epoch": 8.5, "learning_rate": 2.8760570824524312e-05, "loss": 0.0532, "step": 8044 }, { "epoch": 8.51, "learning_rate": 2.8755285412262155e-05, "loss": 0.06, "step": 8046 }, { "epoch": 8.51, "learning_rate": 2.8749999999999997e-05, "loss": 0.088, "step": 8048 }, { "epoch": 8.51, "learning_rate": 2.8744714587737847e-05, "loss": 0.063, "step": 8050 }, { "epoch": 8.51, "learning_rate": 2.873942917547569e-05, "loss": 0.0784, "step": 8052 }, { "epoch": 8.51, "learning_rate": 2.8734143763213532e-05, "loss": 0.0938, "step": 8054 }, { "epoch": 8.52, "learning_rate": 2.8728858350951375e-05, "loss": 0.1824, "step": 8056 }, { "epoch": 8.52, "learning_rate": 2.872357293868922e-05, "loss": 0.2113, "step": 8058 }, { "epoch": 8.52, "learning_rate": 2.8718287526427064e-05, "loss": 0.1183, "step": 8060 }, { "epoch": 8.52, "learning_rate": 2.8713002114164906e-05, "loss": 0.0816, "step": 8062 }, { "epoch": 8.52, "learning_rate": 2.870771670190275e-05, "loss": 0.1049, "step": 8064 }, { "epoch": 8.53, "learning_rate": 2.870243128964059e-05, "loss": 0.1227, "step": 8066 }, { "epoch": 8.53, "learning_rate": 2.869714587737844e-05, "loss": 0.1581, "step": 8068 }, { "epoch": 8.53, "learning_rate": 2.8691860465116284e-05, "loss": 0.0702, "step": 8070 }, { "epoch": 8.53, "learning_rate": 2.8686575052854126e-05, "loss": 0.2441, "step": 8072 }, { "epoch": 8.53, "learning_rate": 2.868128964059197e-05, "loss": 0.1847, "step": 8074 }, { "epoch": 8.54, "learning_rate": 2.8676004228329812e-05, "loss": 0.1233, "step": 8076 }, { "epoch": 8.54, "learning_rate": 2.8670718816067654e-05, "loss": 0.3098, "step": 8078 }, { "epoch": 8.54, "learning_rate": 2.8665433403805497e-05, "loss": 0.2001, "step": 8080 }, { "epoch": 8.54, "learning_rate": 2.866014799154334e-05, "loss": 0.2392, "step": 8082 }, { "epoch": 8.55, "learning_rate": 2.8654862579281183e-05, "loss": 0.1514, "step": 8084 }, { "epoch": 8.55, "learning_rate": 2.8649577167019032e-05, "loss": 0.1599, "step": 8086 }, { "epoch": 8.55, "learning_rate": 2.8644291754756875e-05, "loss": 0.1155, "step": 8088 }, { "epoch": 8.55, "learning_rate": 2.8639006342494717e-05, "loss": 0.041, "step": 8090 }, { "epoch": 8.55, "learning_rate": 2.863372093023256e-05, "loss": 0.1052, "step": 8092 }, { "epoch": 8.56, "learning_rate": 2.8628435517970403e-05, "loss": 0.1127, "step": 8094 }, { "epoch": 8.56, "learning_rate": 2.8623150105708245e-05, "loss": 0.1245, "step": 8096 }, { "epoch": 8.56, "learning_rate": 2.8617864693446088e-05, "loss": 0.084, "step": 8098 }, { "epoch": 8.56, "learning_rate": 2.861257928118393e-05, "loss": 0.1355, "step": 8100 }, { "epoch": 8.56, "learning_rate": 2.8607293868921777e-05, "loss": 0.0758, "step": 8102 }, { "epoch": 8.57, "learning_rate": 2.8602008456659623e-05, "loss": 0.1272, "step": 8104 }, { "epoch": 8.57, "learning_rate": 2.8596723044397465e-05, "loss": 0.085, "step": 8106 }, { "epoch": 8.57, "learning_rate": 2.8591437632135308e-05, "loss": 0.0379, "step": 8108 }, { "epoch": 8.57, "learning_rate": 2.858615221987315e-05, "loss": 0.0785, "step": 8110 }, { "epoch": 8.58, "learning_rate": 2.8580866807610997e-05, "loss": 0.072, "step": 8112 }, { "epoch": 8.58, "learning_rate": 2.857558139534884e-05, "loss": 0.121, "step": 8114 }, { "epoch": 8.58, "learning_rate": 2.8570295983086682e-05, "loss": 0.0658, "step": 8116 }, { "epoch": 8.58, "learning_rate": 2.8565010570824525e-05, "loss": 0.0765, "step": 8118 }, { "epoch": 8.58, "learning_rate": 2.8559725158562368e-05, "loss": 0.0394, "step": 8120 }, { "epoch": 8.59, "learning_rate": 2.855443974630021e-05, "loss": 0.2187, "step": 8122 }, { "epoch": 8.59, "learning_rate": 2.854915433403806e-05, "loss": 0.0988, "step": 8124 }, { "epoch": 8.59, "learning_rate": 2.8543868921775902e-05, "loss": 0.07, "step": 8126 }, { "epoch": 8.59, "learning_rate": 2.8538583509513745e-05, "loss": 0.0686, "step": 8128 }, { "epoch": 8.59, "learning_rate": 2.8533298097251588e-05, "loss": 0.261, "step": 8130 }, { "epoch": 8.6, "learning_rate": 2.852801268498943e-05, "loss": 0.1215, "step": 8132 }, { "epoch": 8.6, "learning_rate": 2.8522727272727273e-05, "loss": 0.155, "step": 8134 }, { "epoch": 8.6, "learning_rate": 2.8517441860465116e-05, "loss": 0.1924, "step": 8136 }, { "epoch": 8.6, "learning_rate": 2.851215644820296e-05, "loss": 0.0767, "step": 8138 }, { "epoch": 8.6, "learning_rate": 2.85068710359408e-05, "loss": 0.1421, "step": 8140 }, { "epoch": 8.61, "learning_rate": 2.850158562367865e-05, "loss": 0.0753, "step": 8142 }, { "epoch": 8.61, "learning_rate": 2.8496300211416493e-05, "loss": 0.1448, "step": 8144 }, { "epoch": 8.61, "learning_rate": 2.8491014799154336e-05, "loss": 0.1149, "step": 8146 }, { "epoch": 8.61, "learning_rate": 2.848572938689218e-05, "loss": 0.0598, "step": 8148 }, { "epoch": 8.62, "learning_rate": 2.848044397463002e-05, "loss": 0.1086, "step": 8150 }, { "epoch": 8.62, "learning_rate": 2.8475158562367864e-05, "loss": 0.0888, "step": 8152 }, { "epoch": 8.62, "learning_rate": 2.8469873150105706e-05, "loss": 0.0574, "step": 8154 }, { "epoch": 8.62, "learning_rate": 2.8464587737843553e-05, "loss": 0.0729, "step": 8156 }, { "epoch": 8.62, "learning_rate": 2.8459302325581395e-05, "loss": 0.1762, "step": 8158 }, { "epoch": 8.63, "learning_rate": 2.845401691331924e-05, "loss": 0.0921, "step": 8160 }, { "epoch": 8.63, "learning_rate": 2.8448731501057084e-05, "loss": 0.0903, "step": 8162 }, { "epoch": 8.63, "learning_rate": 2.844344608879493e-05, "loss": 0.1676, "step": 8164 }, { "epoch": 8.63, "learning_rate": 2.8438160676532773e-05, "loss": 0.1132, "step": 8166 }, { "epoch": 8.63, "learning_rate": 2.8432875264270615e-05, "loss": 0.1617, "step": 8168 }, { "epoch": 8.64, "learning_rate": 2.8427589852008458e-05, "loss": 0.0692, "step": 8170 }, { "epoch": 8.64, "learning_rate": 2.84223044397463e-05, "loss": 0.072, "step": 8172 }, { "epoch": 8.64, "learning_rate": 2.8417019027484143e-05, "loss": 0.0783, "step": 8174 }, { "epoch": 8.64, "learning_rate": 2.8411733615221986e-05, "loss": 0.0356, "step": 8176 }, { "epoch": 8.64, "learning_rate": 2.8406448202959835e-05, "loss": 0.0552, "step": 8178 }, { "epoch": 8.65, "learning_rate": 2.8401162790697678e-05, "loss": 0.1139, "step": 8180 }, { "epoch": 8.65, "learning_rate": 2.839587737843552e-05, "loss": 0.1773, "step": 8182 }, { "epoch": 8.65, "learning_rate": 2.8390591966173363e-05, "loss": 0.2647, "step": 8184 }, { "epoch": 8.65, "learning_rate": 2.8385306553911206e-05, "loss": 0.1721, "step": 8186 }, { "epoch": 8.66, "learning_rate": 2.838002114164905e-05, "loss": 0.0464, "step": 8188 }, { "epoch": 8.66, "learning_rate": 2.837473572938689e-05, "loss": 0.0901, "step": 8190 }, { "epoch": 8.66, "learning_rate": 2.8369450317124734e-05, "loss": 0.0405, "step": 8192 }, { "epoch": 8.66, "learning_rate": 2.8364164904862577e-05, "loss": 0.0503, "step": 8194 }, { "epoch": 8.66, "learning_rate": 2.8358879492600426e-05, "loss": 0.1983, "step": 8196 }, { "epoch": 8.67, "learning_rate": 2.835359408033827e-05, "loss": 0.0998, "step": 8198 }, { "epoch": 8.67, "learning_rate": 2.834830866807611e-05, "loss": 0.0986, "step": 8200 }, { "epoch": 8.67, "learning_rate": 2.8343023255813954e-05, "loss": 0.1944, "step": 8202 }, { "epoch": 8.67, "learning_rate": 2.8337737843551797e-05, "loss": 0.1027, "step": 8204 }, { "epoch": 8.67, "learning_rate": 2.833245243128964e-05, "loss": 0.1601, "step": 8206 }, { "epoch": 8.68, "learning_rate": 2.8327167019027486e-05, "loss": 0.0563, "step": 8208 }, { "epoch": 8.68, "learning_rate": 2.832188160676533e-05, "loss": 0.0967, "step": 8210 }, { "epoch": 8.68, "learning_rate": 2.831659619450317e-05, "loss": 0.0645, "step": 8212 }, { "epoch": 8.68, "learning_rate": 2.8311310782241017e-05, "loss": 0.1535, "step": 8214 }, { "epoch": 8.68, "learning_rate": 2.830602536997886e-05, "loss": 0.1287, "step": 8216 }, { "epoch": 8.69, "learning_rate": 2.8300739957716706e-05, "loss": 0.0618, "step": 8218 }, { "epoch": 8.69, "learning_rate": 2.829545454545455e-05, "loss": 0.1816, "step": 8220 }, { "epoch": 8.69, "learning_rate": 2.829016913319239e-05, "loss": 0.1294, "step": 8222 }, { "epoch": 8.69, "learning_rate": 2.8284883720930234e-05, "loss": 0.1351, "step": 8224 }, { "epoch": 8.7, "learning_rate": 2.8279598308668077e-05, "loss": 0.1923, "step": 8226 }, { "epoch": 8.7, "learning_rate": 2.827431289640592e-05, "loss": 0.0535, "step": 8228 }, { "epoch": 8.7, "learning_rate": 2.8269027484143762e-05, "loss": 0.1522, "step": 8230 }, { "epoch": 8.7, "learning_rate": 2.826374207188161e-05, "loss": 0.1228, "step": 8232 }, { "epoch": 8.7, "learning_rate": 2.8258456659619454e-05, "loss": 0.2214, "step": 8234 }, { "epoch": 8.71, "learning_rate": 2.8253171247357297e-05, "loss": 0.055, "step": 8236 }, { "epoch": 8.71, "learning_rate": 2.824788583509514e-05, "loss": 0.0697, "step": 8238 }, { "epoch": 8.71, "learning_rate": 2.8242600422832982e-05, "loss": 0.0846, "step": 8240 }, { "epoch": 8.71, "learning_rate": 2.8237315010570825e-05, "loss": 0.1981, "step": 8242 }, { "epoch": 8.71, "learning_rate": 2.8232029598308667e-05, "loss": 0.2626, "step": 8244 }, { "epoch": 8.72, "learning_rate": 2.822674418604651e-05, "loss": 0.1027, "step": 8246 }, { "epoch": 8.72, "learning_rate": 2.8221458773784353e-05, "loss": 0.072, "step": 8248 }, { "epoch": 8.72, "learning_rate": 2.8216173361522202e-05, "loss": 0.0512, "step": 8250 }, { "epoch": 8.72, "learning_rate": 2.8210887949260045e-05, "loss": 0.1911, "step": 8252 }, { "epoch": 8.73, "learning_rate": 2.8205602536997887e-05, "loss": 0.1951, "step": 8254 }, { "epoch": 8.73, "learning_rate": 2.820031712473573e-05, "loss": 0.1127, "step": 8256 }, { "epoch": 8.73, "learning_rate": 2.8195031712473573e-05, "loss": 0.0594, "step": 8258 }, { "epoch": 8.73, "learning_rate": 2.8189746300211415e-05, "loss": 0.0958, "step": 8260 }, { "epoch": 8.73, "learning_rate": 2.818446088794926e-05, "loss": 0.2258, "step": 8262 }, { "epoch": 8.74, "learning_rate": 2.8179175475687104e-05, "loss": 0.0807, "step": 8264 }, { "epoch": 8.74, "learning_rate": 2.8173890063424947e-05, "loss": 0.1206, "step": 8266 }, { "epoch": 8.74, "learning_rate": 2.8168604651162793e-05, "loss": 0.0782, "step": 8268 }, { "epoch": 8.74, "learning_rate": 2.8163319238900636e-05, "loss": 0.067, "step": 8270 }, { "epoch": 8.74, "learning_rate": 2.815803382663848e-05, "loss": 0.2475, "step": 8272 }, { "epoch": 8.75, "learning_rate": 2.8152748414376324e-05, "loss": 0.2549, "step": 8274 }, { "epoch": 8.75, "learning_rate": 2.8147463002114167e-05, "loss": 0.1755, "step": 8276 }, { "epoch": 8.75, "learning_rate": 2.814217758985201e-05, "loss": 0.1862, "step": 8278 }, { "epoch": 8.75, "learning_rate": 2.8136892177589852e-05, "loss": 0.1525, "step": 8280 }, { "epoch": 8.75, "learning_rate": 2.8131606765327695e-05, "loss": 0.1334, "step": 8282 }, { "epoch": 8.76, "learning_rate": 2.8126321353065538e-05, "loss": 0.0858, "step": 8284 }, { "epoch": 8.76, "learning_rate": 2.8121035940803387e-05, "loss": 0.0971, "step": 8286 }, { "epoch": 8.76, "learning_rate": 2.811575052854123e-05, "loss": 0.077, "step": 8288 }, { "epoch": 8.76, "learning_rate": 2.8110465116279073e-05, "loss": 0.1299, "step": 8290 }, { "epoch": 8.77, "learning_rate": 2.8105179704016915e-05, "loss": 0.1872, "step": 8292 }, { "epoch": 8.77, "learning_rate": 2.8099894291754758e-05, "loss": 0.0986, "step": 8294 }, { "epoch": 8.77, "learning_rate": 2.80946088794926e-05, "loss": 0.0461, "step": 8296 }, { "epoch": 8.77, "learning_rate": 2.8089323467230443e-05, "loss": 0.1037, "step": 8298 }, { "epoch": 8.77, "learning_rate": 2.8084038054968286e-05, "loss": 0.1127, "step": 8300 }, { "epoch": 8.78, "learning_rate": 2.807875264270613e-05, "loss": 0.1026, "step": 8302 }, { "epoch": 8.78, "learning_rate": 2.8073467230443978e-05, "loss": 0.0636, "step": 8304 }, { "epoch": 8.78, "learning_rate": 2.806818181818182e-05, "loss": 0.1557, "step": 8306 }, { "epoch": 8.78, "learning_rate": 2.8062896405919663e-05, "loss": 0.1203, "step": 8308 }, { "epoch": 8.78, "learning_rate": 2.8057610993657506e-05, "loss": 0.0859, "step": 8310 }, { "epoch": 8.79, "learning_rate": 2.805232558139535e-05, "loss": 0.1833, "step": 8312 }, { "epoch": 8.79, "learning_rate": 2.804704016913319e-05, "loss": 0.2377, "step": 8314 }, { "epoch": 8.79, "learning_rate": 2.8041754756871037e-05, "loss": 0.1107, "step": 8316 }, { "epoch": 8.79, "learning_rate": 2.803646934460888e-05, "loss": 0.1154, "step": 8318 }, { "epoch": 8.79, "learning_rate": 2.8031183932346723e-05, "loss": 0.1013, "step": 8320 }, { "epoch": 8.8, "learning_rate": 2.802589852008457e-05, "loss": 0.0989, "step": 8322 }, { "epoch": 8.8, "learning_rate": 2.8020613107822415e-05, "loss": 0.0776, "step": 8324 }, { "epoch": 8.8, "learning_rate": 2.8015327695560258e-05, "loss": 0.0925, "step": 8326 }, { "epoch": 8.8, "learning_rate": 2.80100422832981e-05, "loss": 0.1062, "step": 8328 }, { "epoch": 8.81, "learning_rate": 2.8004756871035943e-05, "loss": 0.0799, "step": 8330 }, { "epoch": 8.81, "learning_rate": 2.7999471458773786e-05, "loss": 0.1206, "step": 8332 }, { "epoch": 8.81, "learning_rate": 2.7994186046511628e-05, "loss": 0.1726, "step": 8334 }, { "epoch": 8.81, "learning_rate": 2.798890063424947e-05, "loss": 0.1599, "step": 8336 }, { "epoch": 8.81, "learning_rate": 2.7983615221987314e-05, "loss": 0.1454, "step": 8338 }, { "epoch": 8.82, "learning_rate": 2.7978329809725163e-05, "loss": 0.1246, "step": 8340 }, { "epoch": 8.82, "learning_rate": 2.7973044397463006e-05, "loss": 0.0782, "step": 8342 }, { "epoch": 8.82, "learning_rate": 2.796775898520085e-05, "loss": 0.1085, "step": 8344 }, { "epoch": 8.82, "learning_rate": 2.796247357293869e-05, "loss": 0.0896, "step": 8346 }, { "epoch": 8.82, "learning_rate": 2.7957188160676534e-05, "loss": 0.0475, "step": 8348 }, { "epoch": 8.83, "learning_rate": 2.7951902748414376e-05, "loss": 0.1508, "step": 8350 }, { "epoch": 8.83, "learning_rate": 2.794661733615222e-05, "loss": 0.0623, "step": 8352 }, { "epoch": 8.83, "learning_rate": 2.7941331923890062e-05, "loss": 0.1372, "step": 8354 }, { "epoch": 8.83, "learning_rate": 2.7936046511627904e-05, "loss": 0.0929, "step": 8356 }, { "epoch": 8.84, "learning_rate": 2.7930761099365754e-05, "loss": 0.0702, "step": 8358 }, { "epoch": 8.84, "learning_rate": 2.7925475687103596e-05, "loss": 0.107, "step": 8360 }, { "epoch": 8.84, "learning_rate": 2.792019027484144e-05, "loss": 0.0428, "step": 8362 }, { "epoch": 8.84, "learning_rate": 2.7914904862579282e-05, "loss": 0.1434, "step": 8364 }, { "epoch": 8.84, "learning_rate": 2.7909619450317125e-05, "loss": 0.0995, "step": 8366 }, { "epoch": 8.85, "learning_rate": 2.790433403805497e-05, "loss": 0.1524, "step": 8368 }, { "epoch": 8.85, "learning_rate": 2.7899048625792813e-05, "loss": 0.0819, "step": 8370 }, { "epoch": 8.85, "learning_rate": 2.7893763213530656e-05, "loss": 0.0574, "step": 8372 }, { "epoch": 8.85, "learning_rate": 2.78884778012685e-05, "loss": 0.0799, "step": 8374 }, { "epoch": 8.85, "learning_rate": 2.7883192389006345e-05, "loss": 0.1706, "step": 8376 }, { "epoch": 8.86, "learning_rate": 2.787790697674419e-05, "loss": 0.0798, "step": 8378 }, { "epoch": 8.86, "learning_rate": 2.7872621564482033e-05, "loss": 0.1162, "step": 8380 }, { "epoch": 8.86, "learning_rate": 2.7867336152219876e-05, "loss": 0.0853, "step": 8382 }, { "epoch": 8.86, "learning_rate": 2.786205073995772e-05, "loss": 0.0931, "step": 8384 }, { "epoch": 8.86, "learning_rate": 2.785676532769556e-05, "loss": 0.0971, "step": 8386 }, { "epoch": 8.87, "learning_rate": 2.7851479915433404e-05, "loss": 0.043, "step": 8388 }, { "epoch": 8.87, "learning_rate": 2.7846194503171247e-05, "loss": 0.0413, "step": 8390 }, { "epoch": 8.87, "learning_rate": 2.784090909090909e-05, "loss": 0.0894, "step": 8392 }, { "epoch": 8.87, "learning_rate": 2.783562367864694e-05, "loss": 0.029, "step": 8394 }, { "epoch": 8.88, "learning_rate": 2.783033826638478e-05, "loss": 0.0832, "step": 8396 }, { "epoch": 8.88, "learning_rate": 2.7825052854122624e-05, "loss": 0.0876, "step": 8398 }, { "epoch": 8.88, "learning_rate": 2.7819767441860467e-05, "loss": 0.1376, "step": 8400 }, { "epoch": 8.88, "learning_rate": 2.781448202959831e-05, "loss": 0.1387, "step": 8402 }, { "epoch": 8.88, "learning_rate": 2.7809196617336152e-05, "loss": 0.1245, "step": 8404 }, { "epoch": 8.89, "learning_rate": 2.7803911205073995e-05, "loss": 0.1056, "step": 8406 }, { "epoch": 8.89, "learning_rate": 2.7798625792811838e-05, "loss": 0.0913, "step": 8408 }, { "epoch": 8.89, "learning_rate": 2.779334038054968e-05, "loss": 0.0532, "step": 8410 }, { "epoch": 8.89, "learning_rate": 2.778805496828753e-05, "loss": 0.1707, "step": 8412 }, { "epoch": 8.89, "learning_rate": 2.7782769556025372e-05, "loss": 0.1036, "step": 8414 }, { "epoch": 8.9, "learning_rate": 2.7777484143763215e-05, "loss": 0.1387, "step": 8416 }, { "epoch": 8.9, "learning_rate": 2.7772198731501058e-05, "loss": 0.091, "step": 8418 }, { "epoch": 8.9, "learning_rate": 2.77669133192389e-05, "loss": 0.0763, "step": 8420 }, { "epoch": 8.9, "learning_rate": 2.7761627906976746e-05, "loss": 0.092, "step": 8422 }, { "epoch": 8.9, "learning_rate": 2.775634249471459e-05, "loss": 0.0948, "step": 8424 }, { "epoch": 8.91, "learning_rate": 2.7751057082452432e-05, "loss": 0.0832, "step": 8426 }, { "epoch": 8.91, "learning_rate": 2.7745771670190274e-05, "loss": 0.0764, "step": 8428 }, { "epoch": 8.91, "learning_rate": 2.774048625792812e-05, "loss": 0.1405, "step": 8430 }, { "epoch": 8.91, "learning_rate": 2.7735200845665967e-05, "loss": 0.0823, "step": 8432 }, { "epoch": 8.92, "learning_rate": 2.772991543340381e-05, "loss": 0.1438, "step": 8434 }, { "epoch": 8.92, "learning_rate": 2.7724630021141652e-05, "loss": 0.228, "step": 8436 }, { "epoch": 8.92, "learning_rate": 2.7719344608879495e-05, "loss": 0.0858, "step": 8438 }, { "epoch": 8.92, "learning_rate": 2.7714059196617337e-05, "loss": 0.0388, "step": 8440 }, { "epoch": 8.92, "learning_rate": 2.770877378435518e-05, "loss": 0.0782, "step": 8442 }, { "epoch": 8.93, "learning_rate": 2.7703488372093023e-05, "loss": 0.1014, "step": 8444 }, { "epoch": 8.93, "learning_rate": 2.7698202959830865e-05, "loss": 0.1315, "step": 8446 }, { "epoch": 8.93, "learning_rate": 2.7692917547568715e-05, "loss": 0.0287, "step": 8448 }, { "epoch": 8.93, "learning_rate": 2.7687632135306557e-05, "loss": 0.0993, "step": 8450 }, { "epoch": 8.93, "learning_rate": 2.76823467230444e-05, "loss": 0.1108, "step": 8452 }, { "epoch": 8.94, "learning_rate": 2.7677061310782243e-05, "loss": 0.1189, "step": 8454 }, { "epoch": 8.94, "learning_rate": 2.7671775898520085e-05, "loss": 0.0465, "step": 8456 }, { "epoch": 8.94, "learning_rate": 2.7666490486257928e-05, "loss": 0.2443, "step": 8458 }, { "epoch": 8.94, "learning_rate": 2.766120507399577e-05, "loss": 0.1708, "step": 8460 }, { "epoch": 8.95, "learning_rate": 2.7655919661733613e-05, "loss": 0.2846, "step": 8462 }, { "epoch": 8.95, "learning_rate": 2.7650634249471456e-05, "loss": 0.079, "step": 8464 }, { "epoch": 8.95, "learning_rate": 2.7645348837209305e-05, "loss": 0.3057, "step": 8466 }, { "epoch": 8.95, "learning_rate": 2.7640063424947148e-05, "loss": 0.0952, "step": 8468 }, { "epoch": 8.95, "learning_rate": 2.763477801268499e-05, "loss": 0.0733, "step": 8470 }, { "epoch": 8.96, "learning_rate": 2.7629492600422834e-05, "loss": 0.1229, "step": 8472 }, { "epoch": 8.96, "learning_rate": 2.7624207188160676e-05, "loss": 0.1157, "step": 8474 }, { "epoch": 8.96, "learning_rate": 2.7618921775898522e-05, "loss": 0.1169, "step": 8476 }, { "epoch": 8.96, "learning_rate": 2.7613636363636365e-05, "loss": 0.0718, "step": 8478 }, { "epoch": 8.96, "learning_rate": 2.7608350951374208e-05, "loss": 0.2532, "step": 8480 }, { "epoch": 8.97, "learning_rate": 2.760306553911205e-05, "loss": 0.052, "step": 8482 }, { "epoch": 8.97, "learning_rate": 2.75977801268499e-05, "loss": 0.1107, "step": 8484 }, { "epoch": 8.97, "learning_rate": 2.7592494714587742e-05, "loss": 0.1298, "step": 8486 }, { "epoch": 8.97, "learning_rate": 2.7587209302325585e-05, "loss": 0.0634, "step": 8488 }, { "epoch": 8.97, "learning_rate": 2.7581923890063428e-05, "loss": 0.0778, "step": 8490 }, { "epoch": 8.98, "learning_rate": 2.757663847780127e-05, "loss": 0.0882, "step": 8492 }, { "epoch": 8.98, "learning_rate": 2.7571353065539113e-05, "loss": 0.0727, "step": 8494 }, { "epoch": 8.98, "learning_rate": 2.7566067653276956e-05, "loss": 0.0334, "step": 8496 }, { "epoch": 8.98, "learning_rate": 2.75607822410148e-05, "loss": 0.0673, "step": 8498 }, { "epoch": 8.99, "learning_rate": 2.755549682875264e-05, "loss": 0.2054, "step": 8500 }, { "epoch": 8.99, "eval_cer": 0.03425477343972642, "eval_loss": 0.6548437476158142, "eval_runtime": 125.5264, "eval_samples_per_second": 6.7, "eval_steps_per_second": 0.844, "step": 8500 }, { "epoch": 8.99, "learning_rate": 2.755021141649049e-05, "loss": 0.0321, "step": 8502 }, { "epoch": 8.99, "learning_rate": 2.7544926004228333e-05, "loss": 0.0655, "step": 8504 }, { "epoch": 8.99, "learning_rate": 2.7539640591966176e-05, "loss": 0.1483, "step": 8506 }, { "epoch": 8.99, "learning_rate": 2.753435517970402e-05, "loss": 0.0967, "step": 8508 }, { "epoch": 9.0, "learning_rate": 2.752906976744186e-05, "loss": 0.1002, "step": 8510 }, { "epoch": 9.0, "learning_rate": 2.7523784355179704e-05, "loss": 0.1041, "step": 8512 }, { "epoch": 9.0, "learning_rate": 2.7518498942917547e-05, "loss": 0.1675, "step": 8514 }, { "epoch": 9.0, "learning_rate": 2.751321353065539e-05, "loss": 0.0342, "step": 8516 }, { "epoch": 9.0, "learning_rate": 2.7507928118393232e-05, "loss": 0.0659, "step": 8518 }, { "epoch": 9.01, "learning_rate": 2.750264270613108e-05, "loss": 0.0966, "step": 8520 }, { "epoch": 9.01, "learning_rate": 2.7497357293868924e-05, "loss": 0.0474, "step": 8522 }, { "epoch": 9.01, "learning_rate": 2.7492071881606767e-05, "loss": 0.1123, "step": 8524 }, { "epoch": 9.01, "learning_rate": 2.748678646934461e-05, "loss": 0.1332, "step": 8526 }, { "epoch": 9.01, "learning_rate": 2.7481501057082455e-05, "loss": 0.2334, "step": 8528 }, { "epoch": 9.02, "learning_rate": 2.7476215644820298e-05, "loss": 0.1152, "step": 8530 }, { "epoch": 9.02, "learning_rate": 2.747093023255814e-05, "loss": 0.1294, "step": 8532 }, { "epoch": 9.02, "learning_rate": 2.7465644820295983e-05, "loss": 0.051, "step": 8534 }, { "epoch": 9.02, "learning_rate": 2.7460359408033826e-05, "loss": 0.0726, "step": 8536 }, { "epoch": 9.03, "learning_rate": 2.7455073995771676e-05, "loss": 0.0495, "step": 8538 }, { "epoch": 9.03, "learning_rate": 2.7449788583509518e-05, "loss": 0.134, "step": 8540 }, { "epoch": 9.03, "learning_rate": 2.744450317124736e-05, "loss": 0.0087, "step": 8542 }, { "epoch": 9.03, "learning_rate": 2.7439217758985204e-05, "loss": 0.1752, "step": 8544 }, { "epoch": 9.03, "learning_rate": 2.7433932346723046e-05, "loss": 0.1692, "step": 8546 }, { "epoch": 9.04, "learning_rate": 2.742864693446089e-05, "loss": 0.1097, "step": 8548 }, { "epoch": 9.04, "learning_rate": 2.742336152219873e-05, "loss": 0.045, "step": 8550 }, { "epoch": 9.04, "learning_rate": 2.7418076109936574e-05, "loss": 0.1581, "step": 8552 }, { "epoch": 9.04, "learning_rate": 2.7412790697674417e-05, "loss": 0.0721, "step": 8554 }, { "epoch": 9.04, "learning_rate": 2.7407505285412266e-05, "loss": 0.0617, "step": 8556 }, { "epoch": 9.05, "learning_rate": 2.740221987315011e-05, "loss": 0.0526, "step": 8558 }, { "epoch": 9.05, "learning_rate": 2.7396934460887952e-05, "loss": 0.0785, "step": 8560 }, { "epoch": 9.05, "learning_rate": 2.7391649048625794e-05, "loss": 0.0973, "step": 8562 }, { "epoch": 9.05, "learning_rate": 2.7386363636363637e-05, "loss": 0.0743, "step": 8564 }, { "epoch": 9.05, "learning_rate": 2.738107822410148e-05, "loss": 0.1105, "step": 8566 }, { "epoch": 9.06, "learning_rate": 2.7375792811839322e-05, "loss": 0.1266, "step": 8568 }, { "epoch": 9.06, "learning_rate": 2.7370507399577165e-05, "loss": 0.0903, "step": 8570 }, { "epoch": 9.06, "learning_rate": 2.736522198731501e-05, "loss": 0.2459, "step": 8572 }, { "epoch": 9.06, "learning_rate": 2.7359936575052857e-05, "loss": 0.1202, "step": 8574 }, { "epoch": 9.07, "learning_rate": 2.73546511627907e-05, "loss": 0.0555, "step": 8576 }, { "epoch": 9.07, "learning_rate": 2.7349365750528543e-05, "loss": 0.218, "step": 8578 }, { "epoch": 9.07, "learning_rate": 2.7344080338266385e-05, "loss": 0.0653, "step": 8580 }, { "epoch": 9.07, "learning_rate": 2.733879492600423e-05, "loss": 0.0576, "step": 8582 }, { "epoch": 9.07, "learning_rate": 2.7333509513742074e-05, "loss": 0.0617, "step": 8584 }, { "epoch": 9.08, "learning_rate": 2.7328224101479917e-05, "loss": 0.0679, "step": 8586 }, { "epoch": 9.08, "learning_rate": 2.732293868921776e-05, "loss": 0.0907, "step": 8588 }, { "epoch": 9.08, "learning_rate": 2.7317653276955602e-05, "loss": 0.1537, "step": 8590 }, { "epoch": 9.08, "learning_rate": 2.731236786469345e-05, "loss": 0.1437, "step": 8592 }, { "epoch": 9.08, "learning_rate": 2.7307082452431294e-05, "loss": 0.0707, "step": 8594 }, { "epoch": 9.09, "learning_rate": 2.7301797040169137e-05, "loss": 0.183, "step": 8596 }, { "epoch": 9.09, "learning_rate": 2.729651162790698e-05, "loss": 0.0725, "step": 8598 }, { "epoch": 9.09, "learning_rate": 2.7291226215644822e-05, "loss": 0.147, "step": 8600 }, { "epoch": 9.09, "learning_rate": 2.7285940803382665e-05, "loss": 0.1171, "step": 8602 }, { "epoch": 9.1, "learning_rate": 2.7280655391120507e-05, "loss": 0.0401, "step": 8604 }, { "epoch": 9.1, "learning_rate": 2.727536997885835e-05, "loss": 0.0992, "step": 8606 }, { "epoch": 9.1, "learning_rate": 2.7270084566596193e-05, "loss": 0.0551, "step": 8608 }, { "epoch": 9.1, "learning_rate": 2.7264799154334042e-05, "loss": 0.1109, "step": 8610 }, { "epoch": 9.1, "learning_rate": 2.7259513742071885e-05, "loss": 0.076, "step": 8612 }, { "epoch": 9.11, "learning_rate": 2.7254228329809728e-05, "loss": 0.0863, "step": 8614 }, { "epoch": 9.11, "learning_rate": 2.724894291754757e-05, "loss": 0.049, "step": 8616 }, { "epoch": 9.11, "learning_rate": 2.7243657505285413e-05, "loss": 0.1014, "step": 8618 }, { "epoch": 9.11, "learning_rate": 2.7238372093023256e-05, "loss": 0.0364, "step": 8620 }, { "epoch": 9.11, "learning_rate": 2.7233086680761098e-05, "loss": 0.1735, "step": 8622 }, { "epoch": 9.12, "learning_rate": 2.722780126849894e-05, "loss": 0.0667, "step": 8624 }, { "epoch": 9.12, "learning_rate": 2.7222515856236787e-05, "loss": 0.0699, "step": 8626 }, { "epoch": 9.12, "learning_rate": 2.7217230443974633e-05, "loss": 0.0562, "step": 8628 }, { "epoch": 9.12, "learning_rate": 2.7211945031712476e-05, "loss": 0.0501, "step": 8630 }, { "epoch": 9.12, "learning_rate": 2.720665961945032e-05, "loss": 0.0928, "step": 8632 }, { "epoch": 9.13, "learning_rate": 2.7201374207188164e-05, "loss": 0.1008, "step": 8634 }, { "epoch": 9.13, "learning_rate": 2.7196088794926007e-05, "loss": 0.0146, "step": 8636 }, { "epoch": 9.13, "learning_rate": 2.719080338266385e-05, "loss": 0.0512, "step": 8638 }, { "epoch": 9.13, "learning_rate": 2.7185517970401692e-05, "loss": 0.0517, "step": 8640 }, { "epoch": 9.14, "learning_rate": 2.7180232558139535e-05, "loss": 0.0155, "step": 8642 }, { "epoch": 9.14, "learning_rate": 2.7174947145877378e-05, "loss": 0.0762, "step": 8644 }, { "epoch": 9.14, "learning_rate": 2.7169661733615227e-05, "loss": 0.0822, "step": 8646 }, { "epoch": 9.14, "learning_rate": 2.716437632135307e-05, "loss": 0.0619, "step": 8648 }, { "epoch": 9.14, "learning_rate": 2.7159090909090913e-05, "loss": 0.1611, "step": 8650 }, { "epoch": 9.15, "learning_rate": 2.7153805496828755e-05, "loss": 0.0875, "step": 8652 }, { "epoch": 9.15, "learning_rate": 2.7148520084566598e-05, "loss": 0.0389, "step": 8654 }, { "epoch": 9.15, "learning_rate": 2.714323467230444e-05, "loss": 0.109, "step": 8656 }, { "epoch": 9.15, "learning_rate": 2.7137949260042283e-05, "loss": 0.0651, "step": 8658 }, { "epoch": 9.15, "learning_rate": 2.7132663847780126e-05, "loss": 0.1393, "step": 8660 }, { "epoch": 9.16, "learning_rate": 2.712737843551797e-05, "loss": 0.1152, "step": 8662 }, { "epoch": 9.16, "learning_rate": 2.7122093023255818e-05, "loss": 0.0792, "step": 8664 }, { "epoch": 9.16, "learning_rate": 2.711680761099366e-05, "loss": 0.0683, "step": 8666 }, { "epoch": 9.16, "learning_rate": 2.7111522198731503e-05, "loss": 0.1229, "step": 8668 }, { "epoch": 9.16, "learning_rate": 2.7106236786469346e-05, "loss": 0.1954, "step": 8670 }, { "epoch": 9.17, "learning_rate": 2.710095137420719e-05, "loss": 0.1128, "step": 8672 }, { "epoch": 9.17, "learning_rate": 2.709566596194503e-05, "loss": 0.0474, "step": 8674 }, { "epoch": 9.17, "learning_rate": 2.7090380549682874e-05, "loss": 0.152, "step": 8676 }, { "epoch": 9.17, "learning_rate": 2.708509513742072e-05, "loss": 0.0323, "step": 8678 }, { "epoch": 9.18, "learning_rate": 2.7079809725158563e-05, "loss": 0.0839, "step": 8680 }, { "epoch": 9.18, "learning_rate": 2.707452431289641e-05, "loss": 0.1011, "step": 8682 }, { "epoch": 9.18, "learning_rate": 2.706923890063425e-05, "loss": 0.0682, "step": 8684 }, { "epoch": 9.18, "learning_rate": 2.7063953488372094e-05, "loss": 0.1056, "step": 8686 }, { "epoch": 9.18, "learning_rate": 2.705866807610994e-05, "loss": 0.123, "step": 8688 }, { "epoch": 9.19, "learning_rate": 2.7053382663847783e-05, "loss": 0.0388, "step": 8690 }, { "epoch": 9.19, "learning_rate": 2.7048097251585626e-05, "loss": 0.0797, "step": 8692 }, { "epoch": 9.19, "learning_rate": 2.7042811839323468e-05, "loss": 0.0899, "step": 8694 }, { "epoch": 9.19, "learning_rate": 2.703752642706131e-05, "loss": 0.0955, "step": 8696 }, { "epoch": 9.19, "learning_rate": 2.7032241014799154e-05, "loss": 0.1046, "step": 8698 }, { "epoch": 9.2, "learning_rate": 2.7026955602537003e-05, "loss": 0.0889, "step": 8700 }, { "epoch": 9.2, "learning_rate": 2.7021670190274846e-05, "loss": 0.0868, "step": 8702 }, { "epoch": 9.2, "learning_rate": 2.701638477801269e-05, "loss": 0.1856, "step": 8704 }, { "epoch": 9.2, "learning_rate": 2.701109936575053e-05, "loss": 0.1437, "step": 8706 }, { "epoch": 9.21, "learning_rate": 2.7005813953488374e-05, "loss": 0.1376, "step": 8708 }, { "epoch": 9.21, "learning_rate": 2.7000528541226216e-05, "loss": 0.0924, "step": 8710 }, { "epoch": 9.21, "learning_rate": 2.699524312896406e-05, "loss": 0.0697, "step": 8712 }, { "epoch": 9.21, "learning_rate": 2.6989957716701902e-05, "loss": 0.2081, "step": 8714 }, { "epoch": 9.21, "learning_rate": 2.6984672304439744e-05, "loss": 0.0381, "step": 8716 }, { "epoch": 9.22, "learning_rate": 2.6979386892177594e-05, "loss": 0.2289, "step": 8718 }, { "epoch": 9.22, "learning_rate": 2.6974101479915437e-05, "loss": 0.0454, "step": 8720 }, { "epoch": 9.22, "learning_rate": 2.696881606765328e-05, "loss": 0.1052, "step": 8722 }, { "epoch": 9.22, "learning_rate": 2.6963530655391122e-05, "loss": 0.0191, "step": 8724 }, { "epoch": 9.22, "learning_rate": 2.6958245243128965e-05, "loss": 0.035, "step": 8726 }, { "epoch": 9.23, "learning_rate": 2.6952959830866807e-05, "loss": 0.1103, "step": 8728 }, { "epoch": 9.23, "learning_rate": 2.694767441860465e-05, "loss": 0.0613, "step": 8730 }, { "epoch": 9.23, "learning_rate": 2.6942389006342496e-05, "loss": 0.0965, "step": 8732 }, { "epoch": 9.23, "learning_rate": 2.693710359408034e-05, "loss": 0.094, "step": 8734 }, { "epoch": 9.23, "learning_rate": 2.6931818181818185e-05, "loss": 0.0707, "step": 8736 }, { "epoch": 9.24, "learning_rate": 2.6926532769556027e-05, "loss": 0.0763, "step": 8738 }, { "epoch": 9.24, "learning_rate": 2.692124735729387e-05, "loss": 0.2875, "step": 8740 }, { "epoch": 9.24, "learning_rate": 2.6915961945031716e-05, "loss": 0.046, "step": 8742 }, { "epoch": 9.24, "learning_rate": 2.691067653276956e-05, "loss": 0.0547, "step": 8744 }, { "epoch": 9.25, "learning_rate": 2.69053911205074e-05, "loss": 0.0899, "step": 8746 }, { "epoch": 9.25, "learning_rate": 2.6900105708245244e-05, "loss": 0.056, "step": 8748 }, { "epoch": 9.25, "learning_rate": 2.6894820295983087e-05, "loss": 0.0906, "step": 8750 }, { "epoch": 9.25, "learning_rate": 2.688953488372093e-05, "loss": 0.1307, "step": 8752 }, { "epoch": 9.25, "learning_rate": 2.688424947145878e-05, "loss": 0.1062, "step": 8754 }, { "epoch": 9.26, "learning_rate": 2.687896405919662e-05, "loss": 0.0706, "step": 8756 }, { "epoch": 9.26, "learning_rate": 2.6873678646934464e-05, "loss": 0.0468, "step": 8758 }, { "epoch": 9.26, "learning_rate": 2.6868393234672307e-05, "loss": 0.1303, "step": 8760 }, { "epoch": 9.26, "learning_rate": 2.686310782241015e-05, "loss": 0.0624, "step": 8762 }, { "epoch": 9.26, "learning_rate": 2.6857822410147992e-05, "loss": 0.0877, "step": 8764 }, { "epoch": 9.27, "learning_rate": 2.6852536997885835e-05, "loss": 0.053, "step": 8766 }, { "epoch": 9.27, "learning_rate": 2.6847251585623678e-05, "loss": 0.0406, "step": 8768 }, { "epoch": 9.27, "learning_rate": 2.684196617336152e-05, "loss": 0.0733, "step": 8770 }, { "epoch": 9.27, "learning_rate": 2.683668076109937e-05, "loss": 0.0395, "step": 8772 }, { "epoch": 9.27, "learning_rate": 2.6831395348837212e-05, "loss": 0.04, "step": 8774 }, { "epoch": 9.28, "learning_rate": 2.6826109936575055e-05, "loss": 0.0447, "step": 8776 }, { "epoch": 9.28, "learning_rate": 2.6820824524312898e-05, "loss": 0.1337, "step": 8778 }, { "epoch": 9.28, "learning_rate": 2.681553911205074e-05, "loss": 0.1441, "step": 8780 }, { "epoch": 9.28, "learning_rate": 2.6810253699788583e-05, "loss": 0.0266, "step": 8782 }, { "epoch": 9.29, "learning_rate": 2.6804968287526426e-05, "loss": 0.082, "step": 8784 }, { "epoch": 9.29, "learning_rate": 2.6799682875264272e-05, "loss": 0.0276, "step": 8786 }, { "epoch": 9.29, "learning_rate": 2.6794397463002114e-05, "loss": 0.0664, "step": 8788 }, { "epoch": 9.29, "learning_rate": 2.678911205073996e-05, "loss": 0.0992, "step": 8790 }, { "epoch": 9.29, "learning_rate": 2.6783826638477803e-05, "loss": 0.1348, "step": 8792 }, { "epoch": 9.3, "learning_rate": 2.677854122621565e-05, "loss": 0.0989, "step": 8794 }, { "epoch": 9.3, "learning_rate": 2.6773255813953492e-05, "loss": 0.0812, "step": 8796 }, { "epoch": 9.3, "learning_rate": 2.6767970401691335e-05, "loss": 0.1295, "step": 8798 }, { "epoch": 9.3, "learning_rate": 2.6762684989429177e-05, "loss": 0.1148, "step": 8800 }, { "epoch": 9.3, "learning_rate": 2.675739957716702e-05, "loss": 0.0984, "step": 8802 }, { "epoch": 9.31, "learning_rate": 2.6752114164904863e-05, "loss": 0.0716, "step": 8804 }, { "epoch": 9.31, "learning_rate": 2.6746828752642705e-05, "loss": 0.0396, "step": 8806 }, { "epoch": 9.31, "learning_rate": 2.6741543340380548e-05, "loss": 0.0483, "step": 8808 }, { "epoch": 9.31, "learning_rate": 2.6736257928118397e-05, "loss": 0.0657, "step": 8810 }, { "epoch": 9.32, "learning_rate": 2.673097251585624e-05, "loss": 0.2031, "step": 8812 }, { "epoch": 9.32, "learning_rate": 2.6725687103594083e-05, "loss": 0.1448, "step": 8814 }, { "epoch": 9.32, "learning_rate": 2.6720401691331925e-05, "loss": 0.0513, "step": 8816 }, { "epoch": 9.32, "learning_rate": 2.6715116279069768e-05, "loss": 0.0472, "step": 8818 }, { "epoch": 9.32, "learning_rate": 2.670983086680761e-05, "loss": 0.0745, "step": 8820 }, { "epoch": 9.33, "learning_rate": 2.6704545454545453e-05, "loss": 0.0551, "step": 8822 }, { "epoch": 9.33, "learning_rate": 2.6699260042283296e-05, "loss": 0.0293, "step": 8824 }, { "epoch": 9.33, "learning_rate": 2.669397463002114e-05, "loss": 0.0633, "step": 8826 }, { "epoch": 9.33, "learning_rate": 2.6688689217758988e-05, "loss": 0.0671, "step": 8828 }, { "epoch": 9.33, "learning_rate": 2.668340380549683e-05, "loss": 0.1661, "step": 8830 }, { "epoch": 9.34, "learning_rate": 2.6678118393234674e-05, "loss": 0.0222, "step": 8832 }, { "epoch": 9.34, "learning_rate": 2.6672832980972516e-05, "loss": 0.1829, "step": 8834 }, { "epoch": 9.34, "learning_rate": 2.666754756871036e-05, "loss": 0.2226, "step": 8836 }, { "epoch": 9.34, "learning_rate": 2.6662262156448205e-05, "loss": 0.2149, "step": 8838 }, { "epoch": 9.34, "learning_rate": 2.6656976744186048e-05, "loss": 0.1276, "step": 8840 }, { "epoch": 9.35, "learning_rate": 2.665169133192389e-05, "loss": 0.1313, "step": 8842 }, { "epoch": 9.35, "learning_rate": 2.6646405919661733e-05, "loss": 0.0533, "step": 8844 }, { "epoch": 9.35, "learning_rate": 2.664112050739958e-05, "loss": 0.1269, "step": 8846 }, { "epoch": 9.35, "learning_rate": 2.6635835095137425e-05, "loss": 0.153, "step": 8848 }, { "epoch": 9.36, "learning_rate": 2.6630549682875268e-05, "loss": 0.1629, "step": 8850 }, { "epoch": 9.36, "learning_rate": 2.662526427061311e-05, "loss": 0.1014, "step": 8852 }, { "epoch": 9.36, "learning_rate": 2.6619978858350953e-05, "loss": 0.105, "step": 8854 }, { "epoch": 9.36, "learning_rate": 2.6614693446088796e-05, "loss": 0.0713, "step": 8856 }, { "epoch": 9.36, "learning_rate": 2.660940803382664e-05, "loss": 0.1546, "step": 8858 }, { "epoch": 9.37, "learning_rate": 2.660412262156448e-05, "loss": 0.0783, "step": 8860 }, { "epoch": 9.37, "learning_rate": 2.6598837209302324e-05, "loss": 0.0218, "step": 8862 }, { "epoch": 9.37, "learning_rate": 2.6593551797040173e-05, "loss": 0.059, "step": 8864 }, { "epoch": 9.37, "learning_rate": 2.6588266384778016e-05, "loss": 0.08, "step": 8866 }, { "epoch": 9.37, "learning_rate": 2.658298097251586e-05, "loss": 0.0333, "step": 8868 }, { "epoch": 9.38, "learning_rate": 2.65776955602537e-05, "loss": 0.1142, "step": 8870 }, { "epoch": 9.38, "learning_rate": 2.6572410147991544e-05, "loss": 0.1063, "step": 8872 }, { "epoch": 9.38, "learning_rate": 2.6567124735729387e-05, "loss": 0.0753, "step": 8874 }, { "epoch": 9.38, "learning_rate": 2.656183932346723e-05, "loss": 0.0615, "step": 8876 }, { "epoch": 9.38, "learning_rate": 2.6556553911205072e-05, "loss": 0.0701, "step": 8878 }, { "epoch": 9.39, "learning_rate": 2.6551268498942915e-05, "loss": 0.0558, "step": 8880 }, { "epoch": 9.39, "learning_rate": 2.6545983086680764e-05, "loss": 0.0647, "step": 8882 }, { "epoch": 9.39, "learning_rate": 2.6540697674418607e-05, "loss": 0.1294, "step": 8884 }, { "epoch": 9.39, "learning_rate": 2.653541226215645e-05, "loss": 0.145, "step": 8886 }, { "epoch": 9.4, "learning_rate": 2.6530126849894292e-05, "loss": 0.1311, "step": 8888 }, { "epoch": 9.4, "learning_rate": 2.6524841437632135e-05, "loss": 0.1293, "step": 8890 }, { "epoch": 9.4, "learning_rate": 2.651955602536998e-05, "loss": 0.0762, "step": 8892 }, { "epoch": 9.4, "learning_rate": 2.6514270613107823e-05, "loss": 0.029, "step": 8894 }, { "epoch": 9.4, "learning_rate": 2.6508985200845666e-05, "loss": 0.086, "step": 8896 }, { "epoch": 9.41, "learning_rate": 2.650369978858351e-05, "loss": 0.0596, "step": 8898 }, { "epoch": 9.41, "learning_rate": 2.6498414376321358e-05, "loss": 0.0437, "step": 8900 }, { "epoch": 9.41, "learning_rate": 2.64931289640592e-05, "loss": 0.1034, "step": 8902 }, { "epoch": 9.41, "learning_rate": 2.6487843551797044e-05, "loss": 0.0791, "step": 8904 }, { "epoch": 9.41, "learning_rate": 2.6482558139534886e-05, "loss": 0.1433, "step": 8906 }, { "epoch": 9.42, "learning_rate": 2.647727272727273e-05, "loss": 0.0915, "step": 8908 }, { "epoch": 9.42, "learning_rate": 2.647198731501057e-05, "loss": 0.052, "step": 8910 }, { "epoch": 9.42, "learning_rate": 2.6466701902748414e-05, "loss": 0.1161, "step": 8912 }, { "epoch": 9.42, "learning_rate": 2.6461416490486257e-05, "loss": 0.0793, "step": 8914 }, { "epoch": 9.42, "learning_rate": 2.64561310782241e-05, "loss": 0.1089, "step": 8916 }, { "epoch": 9.43, "learning_rate": 2.645084566596195e-05, "loss": 0.1494, "step": 8918 }, { "epoch": 9.43, "learning_rate": 2.6445560253699792e-05, "loss": 0.128, "step": 8920 }, { "epoch": 9.43, "learning_rate": 2.6440274841437634e-05, "loss": 0.0654, "step": 8922 }, { "epoch": 9.43, "learning_rate": 2.6434989429175477e-05, "loss": 0.1304, "step": 8924 }, { "epoch": 9.44, "learning_rate": 2.642970401691332e-05, "loss": 0.0682, "step": 8926 }, { "epoch": 9.44, "learning_rate": 2.6424418604651162e-05, "loss": 0.0443, "step": 8928 }, { "epoch": 9.44, "learning_rate": 2.6419133192389005e-05, "loss": 0.1399, "step": 8930 }, { "epoch": 9.44, "learning_rate": 2.6413847780126848e-05, "loss": 0.0601, "step": 8932 }, { "epoch": 9.44, "learning_rate": 2.640856236786469e-05, "loss": 0.0917, "step": 8934 }, { "epoch": 9.45, "learning_rate": 2.640327695560254e-05, "loss": 0.0802, "step": 8936 }, { "epoch": 9.45, "learning_rate": 2.6397991543340383e-05, "loss": 0.0652, "step": 8938 }, { "epoch": 9.45, "learning_rate": 2.6392706131078225e-05, "loss": 0.0827, "step": 8940 }, { "epoch": 9.45, "learning_rate": 2.6387420718816068e-05, "loss": 0.0792, "step": 8942 }, { "epoch": 9.45, "learning_rate": 2.6382135306553914e-05, "loss": 0.0509, "step": 8944 }, { "epoch": 9.46, "learning_rate": 2.6376849894291757e-05, "loss": 0.0269, "step": 8946 }, { "epoch": 9.46, "learning_rate": 2.63715644820296e-05, "loss": 0.115, "step": 8948 }, { "epoch": 9.46, "learning_rate": 2.6366279069767442e-05, "loss": 0.068, "step": 8950 }, { "epoch": 9.46, "learning_rate": 2.6360993657505285e-05, "loss": 0.0246, "step": 8952 }, { "epoch": 9.47, "learning_rate": 2.6355708245243134e-05, "loss": 0.0227, "step": 8954 }, { "epoch": 9.47, "learning_rate": 2.6350422832980977e-05, "loss": 0.1095, "step": 8956 }, { "epoch": 9.47, "learning_rate": 2.634513742071882e-05, "loss": 0.0427, "step": 8958 }, { "epoch": 9.47, "learning_rate": 2.6339852008456662e-05, "loss": 0.1263, "step": 8960 }, { "epoch": 9.47, "learning_rate": 2.6334566596194505e-05, "loss": 0.0586, "step": 8962 }, { "epoch": 9.48, "learning_rate": 2.6329281183932347e-05, "loss": 0.2595, "step": 8964 }, { "epoch": 9.48, "learning_rate": 2.632399577167019e-05, "loss": 0.0935, "step": 8966 }, { "epoch": 9.48, "learning_rate": 2.6318710359408033e-05, "loss": 0.0694, "step": 8968 }, { "epoch": 9.48, "learning_rate": 2.6313424947145875e-05, "loss": 0.0243, "step": 8970 }, { "epoch": 9.48, "learning_rate": 2.6308139534883725e-05, "loss": 0.0722, "step": 8972 }, { "epoch": 9.49, "learning_rate": 2.6302854122621568e-05, "loss": 0.0713, "step": 8974 }, { "epoch": 9.49, "learning_rate": 2.629756871035941e-05, "loss": 0.0691, "step": 8976 }, { "epoch": 9.49, "learning_rate": 2.6292283298097253e-05, "loss": 0.1, "step": 8978 }, { "epoch": 9.49, "learning_rate": 2.6286997885835096e-05, "loss": 0.0964, "step": 8980 }, { "epoch": 9.49, "learning_rate": 2.6281712473572938e-05, "loss": 0.0803, "step": 8982 }, { "epoch": 9.5, "learning_rate": 2.627642706131078e-05, "loss": 0.0492, "step": 8984 }, { "epoch": 9.5, "learning_rate": 2.6271141649048624e-05, "loss": 0.0719, "step": 8986 }, { "epoch": 9.5, "learning_rate": 2.626585623678647e-05, "loss": 0.0741, "step": 8988 }, { "epoch": 9.5, "learning_rate": 2.6260570824524316e-05, "loss": 0.0719, "step": 8990 }, { "epoch": 9.51, "learning_rate": 2.625528541226216e-05, "loss": 0.0528, "step": 8992 }, { "epoch": 9.51, "learning_rate": 2.625e-05, "loss": 0.0613, "step": 8994 }, { "epoch": 9.51, "learning_rate": 2.6244714587737844e-05, "loss": 0.0499, "step": 8996 }, { "epoch": 9.51, "learning_rate": 2.623942917547569e-05, "loss": 0.0338, "step": 8998 }, { "epoch": 9.51, "learning_rate": 2.6234143763213532e-05, "loss": 0.1687, "step": 9000 }, { "epoch": 9.51, "eval_cer": 0.03066400683955543, "eval_loss": 0.7769936323165894, "eval_runtime": 127.7044, "eval_samples_per_second": 6.586, "eval_steps_per_second": 0.83, "step": 9000 }, { "epoch": 9.52, "learning_rate": 2.6228858350951375e-05, "loss": 0.0774, "step": 9002 }, { "epoch": 9.52, "learning_rate": 2.6223572938689218e-05, "loss": 0.09, "step": 9004 }, { "epoch": 9.52, "learning_rate": 2.621828752642706e-05, "loss": 0.0983, "step": 9006 }, { "epoch": 9.52, "learning_rate": 2.621300211416491e-05, "loss": 0.0581, "step": 9008 }, { "epoch": 9.52, "learning_rate": 2.6207716701902753e-05, "loss": 0.0721, "step": 9010 }, { "epoch": 9.53, "learning_rate": 2.6202431289640595e-05, "loss": 0.0352, "step": 9012 }, { "epoch": 9.53, "learning_rate": 2.6197145877378438e-05, "loss": 0.1807, "step": 9014 }, { "epoch": 9.53, "learning_rate": 2.619186046511628e-05, "loss": 0.0639, "step": 9016 }, { "epoch": 9.53, "learning_rate": 2.6186575052854123e-05, "loss": 0.0835, "step": 9018 }, { "epoch": 9.53, "learning_rate": 2.6181289640591966e-05, "loss": 0.139, "step": 9020 }, { "epoch": 9.54, "learning_rate": 2.617600422832981e-05, "loss": 0.0826, "step": 9022 }, { "epoch": 9.54, "learning_rate": 2.617071881606765e-05, "loss": 0.1681, "step": 9024 }, { "epoch": 9.54, "learning_rate": 2.61654334038055e-05, "loss": 0.0913, "step": 9026 }, { "epoch": 9.54, "learning_rate": 2.6160147991543343e-05, "loss": 0.0909, "step": 9028 }, { "epoch": 9.55, "learning_rate": 2.6154862579281186e-05, "loss": 0.0458, "step": 9030 }, { "epoch": 9.55, "learning_rate": 2.614957716701903e-05, "loss": 0.0742, "step": 9032 }, { "epoch": 9.55, "learning_rate": 2.614429175475687e-05, "loss": 0.1146, "step": 9034 }, { "epoch": 9.55, "learning_rate": 2.6139006342494714e-05, "loss": 0.1539, "step": 9036 }, { "epoch": 9.55, "learning_rate": 2.6133720930232557e-05, "loss": 0.0891, "step": 9038 }, { "epoch": 9.56, "learning_rate": 2.61284355179704e-05, "loss": 0.2471, "step": 9040 }, { "epoch": 9.56, "learning_rate": 2.6123150105708246e-05, "loss": 0.1032, "step": 9042 }, { "epoch": 9.56, "learning_rate": 2.611786469344609e-05, "loss": 0.1223, "step": 9044 }, { "epoch": 9.56, "learning_rate": 2.6112579281183934e-05, "loss": 0.1236, "step": 9046 }, { "epoch": 9.56, "learning_rate": 2.6107293868921777e-05, "loss": 0.1142, "step": 9048 }, { "epoch": 9.57, "learning_rate": 2.610200845665962e-05, "loss": 0.1083, "step": 9050 }, { "epoch": 9.57, "learning_rate": 2.6096723044397466e-05, "loss": 0.0754, "step": 9052 }, { "epoch": 9.57, "learning_rate": 2.609143763213531e-05, "loss": 0.0589, "step": 9054 }, { "epoch": 9.57, "learning_rate": 2.608615221987315e-05, "loss": 0.0763, "step": 9056 }, { "epoch": 9.58, "learning_rate": 2.6080866807610994e-05, "loss": 0.0907, "step": 9058 }, { "epoch": 9.58, "learning_rate": 2.6075581395348836e-05, "loss": 0.0649, "step": 9060 }, { "epoch": 9.58, "learning_rate": 2.6070295983086686e-05, "loss": 0.0344, "step": 9062 }, { "epoch": 9.58, "learning_rate": 2.606501057082453e-05, "loss": 0.0958, "step": 9064 }, { "epoch": 9.58, "learning_rate": 2.605972515856237e-05, "loss": 0.0438, "step": 9066 }, { "epoch": 9.59, "learning_rate": 2.6054439746300214e-05, "loss": 0.0463, "step": 9068 }, { "epoch": 9.59, "learning_rate": 2.6049154334038056e-05, "loss": 0.032, "step": 9070 }, { "epoch": 9.59, "learning_rate": 2.60438689217759e-05, "loss": 0.0797, "step": 9072 }, { "epoch": 9.59, "learning_rate": 2.6038583509513742e-05, "loss": 0.1319, "step": 9074 }, { "epoch": 9.59, "learning_rate": 2.6033298097251584e-05, "loss": 0.0902, "step": 9076 }, { "epoch": 9.6, "learning_rate": 2.6028012684989427e-05, "loss": 0.0814, "step": 9078 }, { "epoch": 9.6, "learning_rate": 2.6022727272727277e-05, "loss": 0.1081, "step": 9080 }, { "epoch": 9.6, "learning_rate": 2.601744186046512e-05, "loss": 0.0604, "step": 9082 }, { "epoch": 9.6, "learning_rate": 2.6012156448202962e-05, "loss": 0.0702, "step": 9084 }, { "epoch": 9.6, "learning_rate": 2.6006871035940805e-05, "loss": 0.0974, "step": 9086 }, { "epoch": 9.61, "learning_rate": 2.6001585623678647e-05, "loss": 0.0582, "step": 9088 }, { "epoch": 9.61, "learning_rate": 2.599630021141649e-05, "loss": 0.1291, "step": 9090 }, { "epoch": 9.61, "learning_rate": 2.5991014799154333e-05, "loss": 0.0836, "step": 9092 }, { "epoch": 9.61, "learning_rate": 2.5985729386892175e-05, "loss": 0.0978, "step": 9094 }, { "epoch": 9.62, "learning_rate": 2.598044397463002e-05, "loss": 0.0746, "step": 9096 }, { "epoch": 9.62, "learning_rate": 2.5975158562367867e-05, "loss": 0.0816, "step": 9098 }, { "epoch": 9.62, "learning_rate": 2.596987315010571e-05, "loss": 0.0664, "step": 9100 }, { "epoch": 9.62, "learning_rate": 2.5964587737843553e-05, "loss": 0.0668, "step": 9102 }, { "epoch": 9.62, "learning_rate": 2.59593023255814e-05, "loss": 0.1026, "step": 9104 }, { "epoch": 9.63, "learning_rate": 2.595401691331924e-05, "loss": 0.0647, "step": 9106 }, { "epoch": 9.63, "learning_rate": 2.5948731501057084e-05, "loss": 0.0637, "step": 9108 }, { "epoch": 9.63, "learning_rate": 2.5943446088794927e-05, "loss": 0.093, "step": 9110 }, { "epoch": 9.63, "learning_rate": 2.593816067653277e-05, "loss": 0.0501, "step": 9112 }, { "epoch": 9.63, "learning_rate": 2.5932875264270612e-05, "loss": 0.1349, "step": 9114 }, { "epoch": 9.64, "learning_rate": 2.592758985200846e-05, "loss": 0.0521, "step": 9116 }, { "epoch": 9.64, "learning_rate": 2.5922304439746304e-05, "loss": 0.0251, "step": 9118 }, { "epoch": 9.64, "learning_rate": 2.5917019027484147e-05, "loss": 0.0449, "step": 9120 }, { "epoch": 9.64, "learning_rate": 2.591173361522199e-05, "loss": 0.0847, "step": 9122 }, { "epoch": 9.64, "learning_rate": 2.5906448202959832e-05, "loss": 0.1406, "step": 9124 }, { "epoch": 9.65, "learning_rate": 2.5901162790697675e-05, "loss": 0.0304, "step": 9126 }, { "epoch": 9.65, "learning_rate": 2.5895877378435518e-05, "loss": 0.0721, "step": 9128 }, { "epoch": 9.65, "learning_rate": 2.589059196617336e-05, "loss": 0.061, "step": 9130 }, { "epoch": 9.65, "learning_rate": 2.5885306553911203e-05, "loss": 0.1242, "step": 9132 }, { "epoch": 9.66, "learning_rate": 2.5880021141649052e-05, "loss": 0.0477, "step": 9134 }, { "epoch": 9.66, "learning_rate": 2.5874735729386895e-05, "loss": 0.0527, "step": 9136 }, { "epoch": 9.66, "learning_rate": 2.5869450317124738e-05, "loss": 0.3248, "step": 9138 }, { "epoch": 9.66, "learning_rate": 2.586416490486258e-05, "loss": 0.112, "step": 9140 }, { "epoch": 9.66, "learning_rate": 2.5858879492600423e-05, "loss": 0.1445, "step": 9142 }, { "epoch": 9.67, "learning_rate": 2.5853594080338266e-05, "loss": 0.131, "step": 9144 }, { "epoch": 9.67, "learning_rate": 2.584830866807611e-05, "loss": 0.069, "step": 9146 }, { "epoch": 9.67, "learning_rate": 2.5843023255813955e-05, "loss": 0.0555, "step": 9148 }, { "epoch": 9.67, "learning_rate": 2.5837737843551797e-05, "loss": 0.1066, "step": 9150 }, { "epoch": 9.67, "learning_rate": 2.5832452431289643e-05, "loss": 0.0949, "step": 9152 }, { "epoch": 9.68, "learning_rate": 2.5827167019027486e-05, "loss": 0.1722, "step": 9154 }, { "epoch": 9.68, "learning_rate": 2.582188160676533e-05, "loss": 0.0974, "step": 9156 }, { "epoch": 9.68, "learning_rate": 2.5816596194503175e-05, "loss": 0.146, "step": 9158 }, { "epoch": 9.68, "learning_rate": 2.5811310782241017e-05, "loss": 0.0641, "step": 9160 }, { "epoch": 9.68, "learning_rate": 2.580602536997886e-05, "loss": 0.0692, "step": 9162 }, { "epoch": 9.69, "learning_rate": 2.5800739957716703e-05, "loss": 0.0374, "step": 9164 }, { "epoch": 9.69, "learning_rate": 2.5795454545454545e-05, "loss": 0.0374, "step": 9166 }, { "epoch": 9.69, "learning_rate": 2.5790169133192388e-05, "loss": 0.0742, "step": 9168 }, { "epoch": 9.69, "learning_rate": 2.5784883720930237e-05, "loss": 0.0503, "step": 9170 }, { "epoch": 9.7, "learning_rate": 2.577959830866808e-05, "loss": 0.0705, "step": 9172 }, { "epoch": 9.7, "learning_rate": 2.5774312896405923e-05, "loss": 0.1188, "step": 9174 }, { "epoch": 9.7, "learning_rate": 2.5769027484143765e-05, "loss": 0.1263, "step": 9176 }, { "epoch": 9.7, "learning_rate": 2.5763742071881608e-05, "loss": 0.0268, "step": 9178 }, { "epoch": 9.7, "learning_rate": 2.575845665961945e-05, "loss": 0.0401, "step": 9180 }, { "epoch": 9.71, "learning_rate": 2.5753171247357293e-05, "loss": 0.2058, "step": 9182 }, { "epoch": 9.71, "learning_rate": 2.5747885835095136e-05, "loss": 0.0778, "step": 9184 }, { "epoch": 9.71, "learning_rate": 2.574260042283298e-05, "loss": 0.0561, "step": 9186 }, { "epoch": 9.71, "learning_rate": 2.5737315010570828e-05, "loss": 0.2252, "step": 9188 }, { "epoch": 9.71, "learning_rate": 2.573202959830867e-05, "loss": 0.1165, "step": 9190 }, { "epoch": 9.72, "learning_rate": 2.5726744186046514e-05, "loss": 0.1053, "step": 9192 }, { "epoch": 9.72, "learning_rate": 2.5721458773784356e-05, "loss": 0.0465, "step": 9194 }, { "epoch": 9.72, "learning_rate": 2.57161733615222e-05, "loss": 0.0784, "step": 9196 }, { "epoch": 9.72, "learning_rate": 2.571088794926004e-05, "loss": 0.0708, "step": 9198 }, { "epoch": 9.73, "learning_rate": 2.5705602536997884e-05, "loss": 0.2093, "step": 9200 }, { "epoch": 9.73, "learning_rate": 2.570031712473573e-05, "loss": 0.2111, "step": 9202 }, { "epoch": 9.73, "learning_rate": 2.5695031712473573e-05, "loss": 0.1183, "step": 9204 }, { "epoch": 9.73, "learning_rate": 2.568974630021142e-05, "loss": 0.05, "step": 9206 }, { "epoch": 9.73, "learning_rate": 2.5684460887949262e-05, "loss": 0.2306, "step": 9208 }, { "epoch": 9.74, "learning_rate": 2.5679175475687104e-05, "loss": 0.0342, "step": 9210 }, { "epoch": 9.74, "learning_rate": 2.567389006342495e-05, "loss": 0.1203, "step": 9212 }, { "epoch": 9.74, "learning_rate": 2.5668604651162793e-05, "loss": 0.1217, "step": 9214 }, { "epoch": 9.74, "learning_rate": 2.5663319238900636e-05, "loss": 0.0543, "step": 9216 }, { "epoch": 9.74, "learning_rate": 2.565803382663848e-05, "loss": 0.1482, "step": 9218 }, { "epoch": 9.75, "learning_rate": 2.565274841437632e-05, "loss": 0.1286, "step": 9220 }, { "epoch": 9.75, "learning_rate": 2.5647463002114164e-05, "loss": 0.1369, "step": 9222 }, { "epoch": 9.75, "learning_rate": 2.5642177589852013e-05, "loss": 0.0661, "step": 9224 }, { "epoch": 9.75, "learning_rate": 2.5636892177589856e-05, "loss": 0.1009, "step": 9226 }, { "epoch": 9.75, "learning_rate": 2.56316067653277e-05, "loss": 0.0759, "step": 9228 }, { "epoch": 9.76, "learning_rate": 2.562632135306554e-05, "loss": 0.099, "step": 9230 }, { "epoch": 9.76, "learning_rate": 2.5621035940803384e-05, "loss": 0.1197, "step": 9232 }, { "epoch": 9.76, "learning_rate": 2.5615750528541227e-05, "loss": 0.1576, "step": 9234 }, { "epoch": 9.76, "learning_rate": 2.561046511627907e-05, "loss": 0.1255, "step": 9236 }, { "epoch": 9.77, "learning_rate": 2.5605179704016912e-05, "loss": 0.1013, "step": 9238 }, { "epoch": 9.77, "learning_rate": 2.5599894291754755e-05, "loss": 0.0482, "step": 9240 }, { "epoch": 9.77, "learning_rate": 2.5594608879492604e-05, "loss": 0.0474, "step": 9242 }, { "epoch": 9.77, "learning_rate": 2.5589323467230447e-05, "loss": 0.0222, "step": 9244 }, { "epoch": 9.77, "learning_rate": 2.558403805496829e-05, "loss": 0.0592, "step": 9246 }, { "epoch": 9.78, "learning_rate": 2.5578752642706132e-05, "loss": 0.0322, "step": 9248 }, { "epoch": 9.78, "learning_rate": 2.5573467230443975e-05, "loss": 0.1504, "step": 9250 }, { "epoch": 9.78, "learning_rate": 2.5568181818181817e-05, "loss": 0.0378, "step": 9252 }, { "epoch": 9.78, "learning_rate": 2.556289640591966e-05, "loss": 0.1115, "step": 9254 }, { "epoch": 9.78, "learning_rate": 2.5557610993657506e-05, "loss": 0.1217, "step": 9256 }, { "epoch": 9.79, "learning_rate": 2.555232558139535e-05, "loss": 0.1118, "step": 9258 }, { "epoch": 9.79, "learning_rate": 2.5547040169133195e-05, "loss": 0.0188, "step": 9260 }, { "epoch": 9.79, "learning_rate": 2.5541754756871038e-05, "loss": 0.092, "step": 9262 }, { "epoch": 9.79, "learning_rate": 2.5536469344608884e-05, "loss": 0.203, "step": 9264 }, { "epoch": 9.79, "learning_rate": 2.5531183932346726e-05, "loss": 0.0928, "step": 9266 }, { "epoch": 9.8, "learning_rate": 2.552589852008457e-05, "loss": 0.0479, "step": 9268 }, { "epoch": 9.8, "learning_rate": 2.552061310782241e-05, "loss": 0.0719, "step": 9270 }, { "epoch": 9.8, "learning_rate": 2.5515327695560254e-05, "loss": 0.0685, "step": 9272 }, { "epoch": 9.8, "learning_rate": 2.5510042283298097e-05, "loss": 0.0747, "step": 9274 }, { "epoch": 9.81, "learning_rate": 2.550475687103594e-05, "loss": 0.1232, "step": 9276 }, { "epoch": 9.81, "learning_rate": 2.549947145877379e-05, "loss": 0.0749, "step": 9278 }, { "epoch": 9.81, "learning_rate": 2.5494186046511632e-05, "loss": 0.091, "step": 9280 }, { "epoch": 9.81, "learning_rate": 2.5488900634249474e-05, "loss": 0.0926, "step": 9282 }, { "epoch": 9.81, "learning_rate": 2.5483615221987317e-05, "loss": 0.0651, "step": 9284 }, { "epoch": 9.82, "learning_rate": 2.547832980972516e-05, "loss": 0.0229, "step": 9286 }, { "epoch": 9.82, "learning_rate": 2.5473044397463003e-05, "loss": 0.0882, "step": 9288 }, { "epoch": 9.82, "learning_rate": 2.5467758985200845e-05, "loss": 0.151, "step": 9290 }, { "epoch": 9.82, "learning_rate": 2.5462473572938688e-05, "loss": 0.0874, "step": 9292 }, { "epoch": 9.82, "learning_rate": 2.545718816067653e-05, "loss": 0.1641, "step": 9294 }, { "epoch": 9.83, "learning_rate": 2.545190274841438e-05, "loss": 0.1099, "step": 9296 }, { "epoch": 9.83, "learning_rate": 2.5446617336152223e-05, "loss": 0.0902, "step": 9298 }, { "epoch": 9.83, "learning_rate": 2.5441331923890065e-05, "loss": 0.0434, "step": 9300 }, { "epoch": 9.83, "learning_rate": 2.5436046511627908e-05, "loss": 0.1326, "step": 9302 }, { "epoch": 9.84, "learning_rate": 2.543076109936575e-05, "loss": 0.0813, "step": 9304 }, { "epoch": 9.84, "learning_rate": 2.5425475687103593e-05, "loss": 0.0893, "step": 9306 }, { "epoch": 9.84, "learning_rate": 2.542019027484144e-05, "loss": 0.0796, "step": 9308 }, { "epoch": 9.84, "learning_rate": 2.5414904862579282e-05, "loss": 0.0507, "step": 9310 }, { "epoch": 9.84, "learning_rate": 2.5409619450317125e-05, "loss": 0.05, "step": 9312 }, { "epoch": 9.85, "learning_rate": 2.540433403805497e-05, "loss": 0.1088, "step": 9314 }, { "epoch": 9.85, "learning_rate": 2.5399048625792813e-05, "loss": 0.108, "step": 9316 }, { "epoch": 9.85, "learning_rate": 2.539376321353066e-05, "loss": 0.0851, "step": 9318 }, { "epoch": 9.85, "learning_rate": 2.5388477801268502e-05, "loss": 0.0538, "step": 9320 }, { "epoch": 9.85, "learning_rate": 2.5383192389006345e-05, "loss": 0.0998, "step": 9322 }, { "epoch": 9.86, "learning_rate": 2.5377906976744188e-05, "loss": 0.0808, "step": 9324 }, { "epoch": 9.86, "learning_rate": 2.537262156448203e-05, "loss": 0.086, "step": 9326 }, { "epoch": 9.86, "learning_rate": 2.5367336152219873e-05, "loss": 0.0555, "step": 9328 }, { "epoch": 9.86, "learning_rate": 2.5362050739957716e-05, "loss": 0.0468, "step": 9330 }, { "epoch": 9.86, "learning_rate": 2.5356765327695565e-05, "loss": 0.0644, "step": 9332 }, { "epoch": 9.87, "learning_rate": 2.5351479915433408e-05, "loss": 0.2785, "step": 9334 }, { "epoch": 9.87, "learning_rate": 2.534619450317125e-05, "loss": 0.0736, "step": 9336 }, { "epoch": 9.87, "learning_rate": 2.5340909090909093e-05, "loss": 0.0374, "step": 9338 }, { "epoch": 9.87, "learning_rate": 2.5335623678646936e-05, "loss": 0.0782, "step": 9340 }, { "epoch": 9.88, "learning_rate": 2.533033826638478e-05, "loss": 0.1181, "step": 9342 }, { "epoch": 9.88, "learning_rate": 2.532505285412262e-05, "loss": 0.1358, "step": 9344 }, { "epoch": 9.88, "learning_rate": 2.5319767441860464e-05, "loss": 0.0958, "step": 9346 }, { "epoch": 9.88, "learning_rate": 2.5314482029598306e-05, "loss": 0.1822, "step": 9348 }, { "epoch": 9.88, "learning_rate": 2.5309196617336156e-05, "loss": 0.1081, "step": 9350 }, { "epoch": 9.89, "learning_rate": 2.5303911205074e-05, "loss": 0.0782, "step": 9352 }, { "epoch": 9.89, "learning_rate": 2.529862579281184e-05, "loss": 0.1172, "step": 9354 }, { "epoch": 9.89, "learning_rate": 2.5293340380549684e-05, "loss": 0.0593, "step": 9356 }, { "epoch": 9.89, "learning_rate": 2.5288054968287526e-05, "loss": 0.0192, "step": 9358 }, { "epoch": 9.89, "learning_rate": 2.528276955602537e-05, "loss": 0.073, "step": 9360 }, { "epoch": 9.9, "learning_rate": 2.5277484143763215e-05, "loss": 0.0692, "step": 9362 }, { "epoch": 9.9, "learning_rate": 2.5272198731501058e-05, "loss": 0.0479, "step": 9364 }, { "epoch": 9.9, "learning_rate": 2.52669133192389e-05, "loss": 0.0936, "step": 9366 }, { "epoch": 9.9, "learning_rate": 2.5261627906976747e-05, "loss": 0.06, "step": 9368 }, { "epoch": 9.9, "learning_rate": 2.5256342494714593e-05, "loss": 0.194, "step": 9370 }, { "epoch": 9.91, "learning_rate": 2.5251057082452435e-05, "loss": 0.1149, "step": 9372 }, { "epoch": 9.91, "learning_rate": 2.5245771670190278e-05, "loss": 0.0893, "step": 9374 }, { "epoch": 9.91, "learning_rate": 2.524048625792812e-05, "loss": 0.0446, "step": 9376 }, { "epoch": 9.91, "learning_rate": 2.5235200845665963e-05, "loss": 0.1115, "step": 9378 }, { "epoch": 9.92, "learning_rate": 2.5229915433403806e-05, "loss": 0.1539, "step": 9380 }, { "epoch": 9.92, "learning_rate": 2.522463002114165e-05, "loss": 0.0537, "step": 9382 }, { "epoch": 9.92, "learning_rate": 2.521934460887949e-05, "loss": 0.047, "step": 9384 }, { "epoch": 9.92, "learning_rate": 2.521405919661734e-05, "loss": 0.0716, "step": 9386 }, { "epoch": 9.92, "learning_rate": 2.5208773784355183e-05, "loss": 0.0348, "step": 9388 }, { "epoch": 9.93, "learning_rate": 2.5203488372093026e-05, "loss": 0.1238, "step": 9390 }, { "epoch": 9.93, "learning_rate": 2.519820295983087e-05, "loss": 0.016, "step": 9392 }, { "epoch": 9.93, "learning_rate": 2.519291754756871e-05, "loss": 0.099, "step": 9394 }, { "epoch": 9.93, "learning_rate": 2.5187632135306554e-05, "loss": 0.1096, "step": 9396 }, { "epoch": 9.93, "learning_rate": 2.5182346723044397e-05, "loss": 0.1113, "step": 9398 }, { "epoch": 9.94, "learning_rate": 2.517706131078224e-05, "loss": 0.038, "step": 9400 }, { "epoch": 9.94, "learning_rate": 2.5171775898520082e-05, "loss": 0.0852, "step": 9402 }, { "epoch": 9.94, "learning_rate": 2.516649048625793e-05, "loss": 0.1253, "step": 9404 }, { "epoch": 9.94, "learning_rate": 2.5161205073995774e-05, "loss": 0.0963, "step": 9406 }, { "epoch": 9.95, "learning_rate": 2.5155919661733617e-05, "loss": 0.1237, "step": 9408 }, { "epoch": 9.95, "learning_rate": 2.515063424947146e-05, "loss": 0.0919, "step": 9410 }, { "epoch": 9.95, "learning_rate": 2.5145348837209302e-05, "loss": 0.0494, "step": 9412 }, { "epoch": 9.95, "learning_rate": 2.514006342494715e-05, "loss": 0.0852, "step": 9414 }, { "epoch": 9.95, "learning_rate": 2.513477801268499e-05, "loss": 0.1492, "step": 9416 }, { "epoch": 9.96, "learning_rate": 2.5129492600422834e-05, "loss": 0.0884, "step": 9418 }, { "epoch": 9.96, "learning_rate": 2.5124207188160676e-05, "loss": 0.1411, "step": 9420 }, { "epoch": 9.96, "learning_rate": 2.5118921775898522e-05, "loss": 0.0512, "step": 9422 }, { "epoch": 9.96, "learning_rate": 2.511363636363637e-05, "loss": 0.1133, "step": 9424 }, { "epoch": 9.96, "learning_rate": 2.510835095137421e-05, "loss": 0.0455, "step": 9426 }, { "epoch": 9.97, "learning_rate": 2.5103065539112054e-05, "loss": 0.0743, "step": 9428 }, { "epoch": 9.97, "learning_rate": 2.5097780126849897e-05, "loss": 0.1326, "step": 9430 }, { "epoch": 9.97, "learning_rate": 2.509249471458774e-05, "loss": 0.0816, "step": 9432 }, { "epoch": 9.97, "learning_rate": 2.5087209302325582e-05, "loss": 0.0581, "step": 9434 }, { "epoch": 9.97, "learning_rate": 2.5081923890063425e-05, "loss": 0.1075, "step": 9436 }, { "epoch": 9.98, "learning_rate": 2.5076638477801267e-05, "loss": 0.0341, "step": 9438 }, { "epoch": 9.98, "learning_rate": 2.5071353065539117e-05, "loss": 0.0657, "step": 9440 }, { "epoch": 9.98, "learning_rate": 2.506606765327696e-05, "loss": 0.0317, "step": 9442 }, { "epoch": 9.98, "learning_rate": 2.5060782241014802e-05, "loss": 0.1027, "step": 9444 }, { "epoch": 9.99, "learning_rate": 2.5055496828752645e-05, "loss": 0.1306, "step": 9446 }, { "epoch": 9.99, "learning_rate": 2.5050211416490487e-05, "loss": 0.0308, "step": 9448 }, { "epoch": 9.99, "learning_rate": 2.504492600422833e-05, "loss": 0.0878, "step": 9450 }, { "epoch": 9.99, "learning_rate": 2.5039640591966173e-05, "loss": 0.0352, "step": 9452 }, { "epoch": 9.99, "learning_rate": 2.5034355179704015e-05, "loss": 0.0678, "step": 9454 }, { "epoch": 10.0, "learning_rate": 2.5029069767441858e-05, "loss": 0.1047, "step": 9456 }, { "epoch": 10.0, "learning_rate": 2.5023784355179707e-05, "loss": 0.0423, "step": 9458 }, { "epoch": 10.0, "learning_rate": 2.501849894291755e-05, "loss": 0.0521, "step": 9460 }, { "epoch": 10.0, "learning_rate": 2.5013213530655393e-05, "loss": 0.0502, "step": 9462 }, { "epoch": 10.0, "learning_rate": 2.5007928118393235e-05, "loss": 0.0516, "step": 9464 }, { "epoch": 10.01, "learning_rate": 2.5002642706131078e-05, "loss": 0.1356, "step": 9466 }, { "epoch": 10.01, "learning_rate": 2.4997357293868924e-05, "loss": 0.142, "step": 9468 }, { "epoch": 10.01, "learning_rate": 2.4992071881606767e-05, "loss": 0.0707, "step": 9470 }, { "epoch": 10.01, "learning_rate": 2.4986786469344613e-05, "loss": 0.0274, "step": 9472 }, { "epoch": 10.01, "learning_rate": 2.4981501057082456e-05, "loss": 0.1028, "step": 9474 }, { "epoch": 10.02, "learning_rate": 2.4976215644820298e-05, "loss": 0.0566, "step": 9476 }, { "epoch": 10.02, "learning_rate": 2.497093023255814e-05, "loss": 0.2284, "step": 9478 }, { "epoch": 10.02, "learning_rate": 2.4965644820295984e-05, "loss": 0.106, "step": 9480 }, { "epoch": 10.02, "learning_rate": 2.496035940803383e-05, "loss": 0.076, "step": 9482 }, { "epoch": 10.03, "learning_rate": 2.4955073995771672e-05, "loss": 0.1524, "step": 9484 }, { "epoch": 10.03, "learning_rate": 2.4949788583509515e-05, "loss": 0.024, "step": 9486 }, { "epoch": 10.03, "learning_rate": 2.4944503171247358e-05, "loss": 0.0783, "step": 9488 }, { "epoch": 10.03, "learning_rate": 2.4939217758985204e-05, "loss": 0.0752, "step": 9490 }, { "epoch": 10.03, "learning_rate": 2.4933932346723046e-05, "loss": 0.0556, "step": 9492 }, { "epoch": 10.04, "learning_rate": 2.492864693446089e-05, "loss": 0.0667, "step": 9494 }, { "epoch": 10.04, "learning_rate": 2.4923361522198732e-05, "loss": 0.0455, "step": 9496 }, { "epoch": 10.04, "learning_rate": 2.4918076109936574e-05, "loss": 0.0423, "step": 9498 }, { "epoch": 10.04, "learning_rate": 2.491279069767442e-05, "loss": 0.0616, "step": 9500 }, { "epoch": 10.04, "eval_cer": 0.020860644058136223, "eval_loss": 0.5472248196601868, "eval_runtime": 127.8063, "eval_samples_per_second": 6.58, "eval_steps_per_second": 0.829, "step": 9500 }, { "epoch": 10.04, "learning_rate": 2.4907505285412263e-05, "loss": 0.1153, "step": 9502 }, { "epoch": 10.05, "learning_rate": 2.4902219873150106e-05, "loss": 0.0961, "step": 9504 }, { "epoch": 10.05, "learning_rate": 2.489693446088795e-05, "loss": 0.0327, "step": 9506 }, { "epoch": 10.05, "learning_rate": 2.4891649048625795e-05, "loss": 0.0733, "step": 9508 }, { "epoch": 10.05, "learning_rate": 2.4886363636363637e-05, "loss": 0.0801, "step": 9510 }, { "epoch": 10.05, "learning_rate": 2.488107822410148e-05, "loss": 0.0344, "step": 9512 }, { "epoch": 10.06, "learning_rate": 2.4875792811839323e-05, "loss": 0.0918, "step": 9514 }, { "epoch": 10.06, "learning_rate": 2.487050739957717e-05, "loss": 0.1247, "step": 9516 }, { "epoch": 10.06, "learning_rate": 2.486522198731501e-05, "loss": 0.0697, "step": 9518 }, { "epoch": 10.06, "learning_rate": 2.4859936575052854e-05, "loss": 0.0941, "step": 9520 }, { "epoch": 10.07, "learning_rate": 2.48546511627907e-05, "loss": 0.0839, "step": 9522 }, { "epoch": 10.07, "learning_rate": 2.4849365750528543e-05, "loss": 0.0163, "step": 9524 }, { "epoch": 10.07, "learning_rate": 2.484408033826639e-05, "loss": 0.0625, "step": 9526 }, { "epoch": 10.07, "learning_rate": 2.483879492600423e-05, "loss": 0.0795, "step": 9528 }, { "epoch": 10.07, "learning_rate": 2.4833509513742074e-05, "loss": 0.0719, "step": 9530 }, { "epoch": 10.08, "learning_rate": 2.4828224101479917e-05, "loss": 0.0796, "step": 9532 }, { "epoch": 10.08, "learning_rate": 2.482293868921776e-05, "loss": 0.0175, "step": 9534 }, { "epoch": 10.08, "learning_rate": 2.4817653276955606e-05, "loss": 0.0797, "step": 9536 }, { "epoch": 10.08, "learning_rate": 2.4812367864693448e-05, "loss": 0.0956, "step": 9538 }, { "epoch": 10.08, "learning_rate": 2.480708245243129e-05, "loss": 0.0526, "step": 9540 }, { "epoch": 10.09, "learning_rate": 2.4801797040169134e-05, "loss": 0.0719, "step": 9542 }, { "epoch": 10.09, "learning_rate": 2.479651162790698e-05, "loss": 0.0664, "step": 9544 }, { "epoch": 10.09, "learning_rate": 2.4791226215644822e-05, "loss": 0.0272, "step": 9546 }, { "epoch": 10.09, "learning_rate": 2.4785940803382665e-05, "loss": 0.043, "step": 9548 }, { "epoch": 10.1, "learning_rate": 2.4780655391120508e-05, "loss": 0.1261, "step": 9550 }, { "epoch": 10.1, "learning_rate": 2.477536997885835e-05, "loss": 0.2794, "step": 9552 }, { "epoch": 10.1, "learning_rate": 2.4770084566596196e-05, "loss": 0.0698, "step": 9554 }, { "epoch": 10.1, "learning_rate": 2.476479915433404e-05, "loss": 0.2251, "step": 9556 }, { "epoch": 10.1, "learning_rate": 2.4759513742071882e-05, "loss": 0.0471, "step": 9558 }, { "epoch": 10.11, "learning_rate": 2.4754228329809724e-05, "loss": 0.0455, "step": 9560 }, { "epoch": 10.11, "learning_rate": 2.474894291754757e-05, "loss": 0.3214, "step": 9562 }, { "epoch": 10.11, "learning_rate": 2.4743657505285413e-05, "loss": 0.2688, "step": 9564 }, { "epoch": 10.11, "learning_rate": 2.4738372093023256e-05, "loss": 0.1333, "step": 9566 }, { "epoch": 10.11, "learning_rate": 2.47330866807611e-05, "loss": 0.1161, "step": 9568 }, { "epoch": 10.12, "learning_rate": 2.4727801268498945e-05, "loss": 0.0928, "step": 9570 }, { "epoch": 10.12, "learning_rate": 2.4722515856236787e-05, "loss": 0.0991, "step": 9572 }, { "epoch": 10.12, "learning_rate": 2.4717230443974633e-05, "loss": 0.0438, "step": 9574 }, { "epoch": 10.12, "learning_rate": 2.4711945031712476e-05, "loss": 0.1875, "step": 9576 }, { "epoch": 10.12, "learning_rate": 2.470665961945032e-05, "loss": 0.1174, "step": 9578 }, { "epoch": 10.13, "learning_rate": 2.4701374207188165e-05, "loss": 0.0306, "step": 9580 }, { "epoch": 10.13, "learning_rate": 2.4696088794926007e-05, "loss": 0.1353, "step": 9582 }, { "epoch": 10.13, "learning_rate": 2.469080338266385e-05, "loss": 0.0791, "step": 9584 }, { "epoch": 10.13, "learning_rate": 2.4685517970401693e-05, "loss": 0.0907, "step": 9586 }, { "epoch": 10.14, "learning_rate": 2.4680232558139535e-05, "loss": 0.1941, "step": 9588 }, { "epoch": 10.14, "learning_rate": 2.467494714587738e-05, "loss": 0.0339, "step": 9590 }, { "epoch": 10.14, "learning_rate": 2.4669661733615224e-05, "loss": 0.0292, "step": 9592 }, { "epoch": 10.14, "learning_rate": 2.4664376321353067e-05, "loss": 0.0474, "step": 9594 }, { "epoch": 10.14, "learning_rate": 2.465909090909091e-05, "loss": 0.0832, "step": 9596 }, { "epoch": 10.15, "learning_rate": 2.4653805496828755e-05, "loss": 0.0618, "step": 9598 }, { "epoch": 10.15, "learning_rate": 2.4648520084566598e-05, "loss": 0.0498, "step": 9600 }, { "epoch": 10.15, "learning_rate": 2.464323467230444e-05, "loss": 0.0797, "step": 9602 }, { "epoch": 10.15, "learning_rate": 2.4637949260042283e-05, "loss": 0.0558, "step": 9604 }, { "epoch": 10.15, "learning_rate": 2.4632663847780126e-05, "loss": 0.0621, "step": 9606 }, { "epoch": 10.16, "learning_rate": 2.4627378435517972e-05, "loss": 0.0617, "step": 9608 }, { "epoch": 10.16, "learning_rate": 2.4622093023255815e-05, "loss": 0.1126, "step": 9610 }, { "epoch": 10.16, "learning_rate": 2.4616807610993658e-05, "loss": 0.0502, "step": 9612 }, { "epoch": 10.16, "learning_rate": 2.46115221987315e-05, "loss": 0.0547, "step": 9614 }, { "epoch": 10.16, "learning_rate": 2.4606236786469346e-05, "loss": 0.0615, "step": 9616 }, { "epoch": 10.17, "learning_rate": 2.460095137420719e-05, "loss": 0.0155, "step": 9618 }, { "epoch": 10.17, "learning_rate": 2.459566596194503e-05, "loss": 0.0557, "step": 9620 }, { "epoch": 10.17, "learning_rate": 2.4590380549682874e-05, "loss": 0.0594, "step": 9622 }, { "epoch": 10.17, "learning_rate": 2.458509513742072e-05, "loss": 0.0639, "step": 9624 }, { "epoch": 10.18, "learning_rate": 2.4579809725158563e-05, "loss": 0.0344, "step": 9626 }, { "epoch": 10.18, "learning_rate": 2.457452431289641e-05, "loss": 0.0442, "step": 9628 }, { "epoch": 10.18, "learning_rate": 2.4569238900634252e-05, "loss": 0.1143, "step": 9630 }, { "epoch": 10.18, "learning_rate": 2.4563953488372094e-05, "loss": 0.0763, "step": 9632 }, { "epoch": 10.18, "learning_rate": 2.455866807610994e-05, "loss": 0.0544, "step": 9634 }, { "epoch": 10.19, "learning_rate": 2.4553382663847783e-05, "loss": 0.0375, "step": 9636 }, { "epoch": 10.19, "learning_rate": 2.4548097251585626e-05, "loss": 0.0968, "step": 9638 }, { "epoch": 10.19, "learning_rate": 2.454281183932347e-05, "loss": 0.1139, "step": 9640 }, { "epoch": 10.19, "learning_rate": 2.453752642706131e-05, "loss": 0.0868, "step": 9642 }, { "epoch": 10.19, "learning_rate": 2.4532241014799157e-05, "loss": 0.0274, "step": 9644 }, { "epoch": 10.2, "learning_rate": 2.4526955602537e-05, "loss": 0.1256, "step": 9646 }, { "epoch": 10.2, "learning_rate": 2.4521670190274843e-05, "loss": 0.123, "step": 9648 }, { "epoch": 10.2, "learning_rate": 2.4516384778012685e-05, "loss": 0.0492, "step": 9650 }, { "epoch": 10.2, "learning_rate": 2.4511099365750528e-05, "loss": 0.0606, "step": 9652 }, { "epoch": 10.21, "learning_rate": 2.4505813953488374e-05, "loss": 0.0915, "step": 9654 }, { "epoch": 10.21, "learning_rate": 2.4500528541226217e-05, "loss": 0.0606, "step": 9656 }, { "epoch": 10.21, "learning_rate": 2.449524312896406e-05, "loss": 0.0424, "step": 9658 }, { "epoch": 10.21, "learning_rate": 2.4489957716701902e-05, "loss": 0.0585, "step": 9660 }, { "epoch": 10.21, "learning_rate": 2.4484672304439748e-05, "loss": 0.0735, "step": 9662 }, { "epoch": 10.22, "learning_rate": 2.447938689217759e-05, "loss": 0.0558, "step": 9664 }, { "epoch": 10.22, "learning_rate": 2.4474101479915433e-05, "loss": 0.0444, "step": 9666 }, { "epoch": 10.22, "learning_rate": 2.4468816067653276e-05, "loss": 0.0697, "step": 9668 }, { "epoch": 10.22, "learning_rate": 2.446353065539112e-05, "loss": 0.1268, "step": 9670 }, { "epoch": 10.22, "learning_rate": 2.4458245243128965e-05, "loss": 0.1274, "step": 9672 }, { "epoch": 10.23, "learning_rate": 2.4452959830866807e-05, "loss": 0.1141, "step": 9674 }, { "epoch": 10.23, "learning_rate": 2.4447674418604654e-05, "loss": 0.0624, "step": 9676 }, { "epoch": 10.23, "learning_rate": 2.4442389006342496e-05, "loss": 0.054, "step": 9678 }, { "epoch": 10.23, "learning_rate": 2.4437103594080342e-05, "loss": 0.0644, "step": 9680 }, { "epoch": 10.23, "learning_rate": 2.4431818181818185e-05, "loss": 0.0435, "step": 9682 }, { "epoch": 10.24, "learning_rate": 2.4426532769556028e-05, "loss": 0.0393, "step": 9684 }, { "epoch": 10.24, "learning_rate": 2.442124735729387e-05, "loss": 0.1323, "step": 9686 }, { "epoch": 10.24, "learning_rate": 2.4415961945031713e-05, "loss": 0.1019, "step": 9688 }, { "epoch": 10.24, "learning_rate": 2.441067653276956e-05, "loss": 0.0803, "step": 9690 }, { "epoch": 10.25, "learning_rate": 2.44053911205074e-05, "loss": 0.0582, "step": 9692 }, { "epoch": 10.25, "learning_rate": 2.4400105708245244e-05, "loss": 0.0659, "step": 9694 }, { "epoch": 10.25, "learning_rate": 2.4394820295983087e-05, "loss": 0.0528, "step": 9696 }, { "epoch": 10.25, "learning_rate": 2.4389534883720933e-05, "loss": 0.0614, "step": 9698 }, { "epoch": 10.25, "learning_rate": 2.4384249471458776e-05, "loss": 0.1287, "step": 9700 }, { "epoch": 10.26, "learning_rate": 2.437896405919662e-05, "loss": 0.211, "step": 9702 }, { "epoch": 10.26, "learning_rate": 2.437367864693446e-05, "loss": 0.0376, "step": 9704 }, { "epoch": 10.26, "learning_rate": 2.4368393234672304e-05, "loss": 0.0271, "step": 9706 }, { "epoch": 10.26, "learning_rate": 2.436310782241015e-05, "loss": 0.1002, "step": 9708 }, { "epoch": 10.26, "learning_rate": 2.4357822410147992e-05, "loss": 0.1325, "step": 9710 }, { "epoch": 10.27, "learning_rate": 2.4352536997885835e-05, "loss": 0.0062, "step": 9712 }, { "epoch": 10.27, "learning_rate": 2.4347251585623678e-05, "loss": 0.068, "step": 9714 }, { "epoch": 10.27, "learning_rate": 2.4341966173361524e-05, "loss": 0.0643, "step": 9716 }, { "epoch": 10.27, "learning_rate": 2.4336680761099367e-05, "loss": 0.1679, "step": 9718 }, { "epoch": 10.27, "learning_rate": 2.433139534883721e-05, "loss": 0.1077, "step": 9720 }, { "epoch": 10.28, "learning_rate": 2.4326109936575052e-05, "loss": 0.0535, "step": 9722 }, { "epoch": 10.28, "learning_rate": 2.4320824524312898e-05, "loss": 0.1192, "step": 9724 }, { "epoch": 10.28, "learning_rate": 2.431553911205074e-05, "loss": 0.057, "step": 9726 }, { "epoch": 10.28, "learning_rate": 2.4310253699788583e-05, "loss": 0.1002, "step": 9728 }, { "epoch": 10.29, "learning_rate": 2.430496828752643e-05, "loss": 0.2332, "step": 9730 }, { "epoch": 10.29, "learning_rate": 2.4299682875264272e-05, "loss": 0.0869, "step": 9732 }, { "epoch": 10.29, "learning_rate": 2.4294397463002118e-05, "loss": 0.2152, "step": 9734 }, { "epoch": 10.29, "learning_rate": 2.428911205073996e-05, "loss": 0.1526, "step": 9736 }, { "epoch": 10.29, "learning_rate": 2.4283826638477803e-05, "loss": 0.1586, "step": 9738 }, { "epoch": 10.3, "learning_rate": 2.4278541226215646e-05, "loss": 0.1074, "step": 9740 }, { "epoch": 10.3, "learning_rate": 2.427325581395349e-05, "loss": 0.0455, "step": 9742 }, { "epoch": 10.3, "learning_rate": 2.4267970401691335e-05, "loss": 0.0448, "step": 9744 }, { "epoch": 10.3, "learning_rate": 2.4262684989429177e-05, "loss": 0.0873, "step": 9746 }, { "epoch": 10.3, "learning_rate": 2.425739957716702e-05, "loss": 0.0349, "step": 9748 }, { "epoch": 10.31, "learning_rate": 2.4252114164904863e-05, "loss": 0.0399, "step": 9750 }, { "epoch": 10.31, "learning_rate": 2.424682875264271e-05, "loss": 0.0379, "step": 9752 }, { "epoch": 10.31, "learning_rate": 2.424154334038055e-05, "loss": 0.0655, "step": 9754 }, { "epoch": 10.31, "learning_rate": 2.4236257928118394e-05, "loss": 0.0885, "step": 9756 }, { "epoch": 10.32, "learning_rate": 2.4230972515856237e-05, "loss": 0.0374, "step": 9758 }, { "epoch": 10.32, "learning_rate": 2.422568710359408e-05, "loss": 0.0658, "step": 9760 }, { "epoch": 10.32, "learning_rate": 2.4220401691331926e-05, "loss": 0.042, "step": 9762 }, { "epoch": 10.32, "learning_rate": 2.421511627906977e-05, "loss": 0.0622, "step": 9764 }, { "epoch": 10.32, "learning_rate": 2.420983086680761e-05, "loss": 0.0568, "step": 9766 }, { "epoch": 10.33, "learning_rate": 2.4204545454545454e-05, "loss": 0.0538, "step": 9768 }, { "epoch": 10.33, "learning_rate": 2.41992600422833e-05, "loss": 0.0909, "step": 9770 }, { "epoch": 10.33, "learning_rate": 2.4193974630021142e-05, "loss": 0.1713, "step": 9772 }, { "epoch": 10.33, "learning_rate": 2.4188689217758985e-05, "loss": 0.0973, "step": 9774 }, { "epoch": 10.33, "learning_rate": 2.4183403805496828e-05, "loss": 0.0276, "step": 9776 }, { "epoch": 10.34, "learning_rate": 2.4178118393234674e-05, "loss": 0.063, "step": 9778 }, { "epoch": 10.34, "learning_rate": 2.4172832980972516e-05, "loss": 0.0193, "step": 9780 }, { "epoch": 10.34, "learning_rate": 2.4167547568710363e-05, "loss": 0.0401, "step": 9782 }, { "epoch": 10.34, "learning_rate": 2.4162262156448205e-05, "loss": 0.1097, "step": 9784 }, { "epoch": 10.34, "learning_rate": 2.4156976744186048e-05, "loss": 0.0611, "step": 9786 }, { "epoch": 10.35, "learning_rate": 2.4151691331923894e-05, "loss": 0.0231, "step": 9788 }, { "epoch": 10.35, "learning_rate": 2.4146405919661737e-05, "loss": 0.145, "step": 9790 }, { "epoch": 10.35, "learning_rate": 2.414112050739958e-05, "loss": 0.0839, "step": 9792 }, { "epoch": 10.35, "learning_rate": 2.4135835095137422e-05, "loss": 0.0581, "step": 9794 }, { "epoch": 10.36, "learning_rate": 2.4130549682875265e-05, "loss": 0.0452, "step": 9796 }, { "epoch": 10.36, "learning_rate": 2.412526427061311e-05, "loss": 0.0309, "step": 9798 }, { "epoch": 10.36, "learning_rate": 2.4119978858350953e-05, "loss": 0.1888, "step": 9800 }, { "epoch": 10.36, "learning_rate": 2.4114693446088796e-05, "loss": 0.1226, "step": 9802 }, { "epoch": 10.36, "learning_rate": 2.410940803382664e-05, "loss": 0.0596, "step": 9804 }, { "epoch": 10.37, "learning_rate": 2.4104122621564485e-05, "loss": 0.0507, "step": 9806 }, { "epoch": 10.37, "learning_rate": 2.4098837209302327e-05, "loss": 0.0564, "step": 9808 }, { "epoch": 10.37, "learning_rate": 2.409355179704017e-05, "loss": 0.0319, "step": 9810 }, { "epoch": 10.37, "learning_rate": 2.4088266384778013e-05, "loss": 0.1549, "step": 9812 }, { "epoch": 10.37, "learning_rate": 2.4082980972515855e-05, "loss": 0.1253, "step": 9814 }, { "epoch": 10.38, "learning_rate": 2.40776955602537e-05, "loss": 0.0819, "step": 9816 }, { "epoch": 10.38, "learning_rate": 2.4072410147991544e-05, "loss": 0.126, "step": 9818 }, { "epoch": 10.38, "learning_rate": 2.4067124735729387e-05, "loss": 0.0943, "step": 9820 }, { "epoch": 10.38, "learning_rate": 2.406183932346723e-05, "loss": 0.0304, "step": 9822 }, { "epoch": 10.38, "learning_rate": 2.4056553911205076e-05, "loss": 0.1265, "step": 9824 }, { "epoch": 10.39, "learning_rate": 2.4051268498942918e-05, "loss": 0.0787, "step": 9826 }, { "epoch": 10.39, "learning_rate": 2.404598308668076e-05, "loss": 0.104, "step": 9828 }, { "epoch": 10.39, "learning_rate": 2.4040697674418604e-05, "loss": 0.0846, "step": 9830 }, { "epoch": 10.39, "learning_rate": 2.403541226215645e-05, "loss": 0.0298, "step": 9832 }, { "epoch": 10.4, "learning_rate": 2.4030126849894292e-05, "loss": 0.1509, "step": 9834 }, { "epoch": 10.4, "learning_rate": 2.402484143763214e-05, "loss": 0.0735, "step": 9836 }, { "epoch": 10.4, "learning_rate": 2.401955602536998e-05, "loss": 0.0591, "step": 9838 }, { "epoch": 10.4, "learning_rate": 2.4014270613107824e-05, "loss": 0.0444, "step": 9840 }, { "epoch": 10.4, "learning_rate": 2.400898520084567e-05, "loss": 0.032, "step": 9842 }, { "epoch": 10.41, "learning_rate": 2.4003699788583512e-05, "loss": 0.103, "step": 9844 }, { "epoch": 10.41, "learning_rate": 2.3998414376321355e-05, "loss": 0.0892, "step": 9846 }, { "epoch": 10.41, "learning_rate": 2.3993128964059198e-05, "loss": 0.0488, "step": 9848 }, { "epoch": 10.41, "learning_rate": 2.398784355179704e-05, "loss": 0.0486, "step": 9850 }, { "epoch": 10.41, "learning_rate": 2.3982558139534887e-05, "loss": 0.0858, "step": 9852 }, { "epoch": 10.42, "learning_rate": 2.397727272727273e-05, "loss": 0.0312, "step": 9854 }, { "epoch": 10.42, "learning_rate": 2.3971987315010572e-05, "loss": 0.0556, "step": 9856 }, { "epoch": 10.42, "learning_rate": 2.3966701902748415e-05, "loss": 0.0405, "step": 9858 }, { "epoch": 10.42, "learning_rate": 2.396141649048626e-05, "loss": 0.0065, "step": 9860 }, { "epoch": 10.42, "learning_rate": 2.3956131078224103e-05, "loss": 0.0873, "step": 9862 }, { "epoch": 10.43, "learning_rate": 2.3950845665961946e-05, "loss": 0.116, "step": 9864 }, { "epoch": 10.43, "learning_rate": 2.394556025369979e-05, "loss": 0.0445, "step": 9866 }, { "epoch": 10.43, "learning_rate": 2.394027484143763e-05, "loss": 0.0315, "step": 9868 }, { "epoch": 10.43, "learning_rate": 2.3934989429175477e-05, "loss": 0.0742, "step": 9870 }, { "epoch": 10.44, "learning_rate": 2.392970401691332e-05, "loss": 0.0774, "step": 9872 }, { "epoch": 10.44, "learning_rate": 2.3924418604651163e-05, "loss": 0.1122, "step": 9874 }, { "epoch": 10.44, "learning_rate": 2.3919133192389005e-05, "loss": 0.0437, "step": 9876 }, { "epoch": 10.44, "learning_rate": 2.391384778012685e-05, "loss": 0.1272, "step": 9878 }, { "epoch": 10.44, "learning_rate": 2.3908562367864694e-05, "loss": 0.0312, "step": 9880 }, { "epoch": 10.45, "learning_rate": 2.3903276955602537e-05, "loss": 0.1325, "step": 9882 }, { "epoch": 10.45, "learning_rate": 2.3897991543340383e-05, "loss": 0.0612, "step": 9884 }, { "epoch": 10.45, "learning_rate": 2.3892706131078225e-05, "loss": 0.0457, "step": 9886 }, { "epoch": 10.45, "learning_rate": 2.3887420718816068e-05, "loss": 0.1169, "step": 9888 }, { "epoch": 10.45, "learning_rate": 2.3882135306553914e-05, "loss": 0.031, "step": 9890 }, { "epoch": 10.46, "learning_rate": 2.3876849894291757e-05, "loss": 0.0832, "step": 9892 }, { "epoch": 10.46, "learning_rate": 2.38715644820296e-05, "loss": 0.069, "step": 9894 }, { "epoch": 10.46, "learning_rate": 2.3866279069767446e-05, "loss": 0.0545, "step": 9896 }, { "epoch": 10.46, "learning_rate": 2.3860993657505288e-05, "loss": 0.0588, "step": 9898 }, { "epoch": 10.47, "learning_rate": 2.385570824524313e-05, "loss": 0.1478, "step": 9900 }, { "epoch": 10.47, "learning_rate": 2.3850422832980974e-05, "loss": 0.057, "step": 9902 }, { "epoch": 10.47, "learning_rate": 2.3845137420718816e-05, "loss": 0.1162, "step": 9904 }, { "epoch": 10.47, "learning_rate": 2.3839852008456662e-05, "loss": 0.0453, "step": 9906 }, { "epoch": 10.47, "learning_rate": 2.3834566596194505e-05, "loss": 0.0668, "step": 9908 }, { "epoch": 10.48, "learning_rate": 2.3829281183932348e-05, "loss": 0.0392, "step": 9910 }, { "epoch": 10.48, "learning_rate": 2.382399577167019e-05, "loss": 0.0875, "step": 9912 }, { "epoch": 10.48, "learning_rate": 2.3818710359408036e-05, "loss": 0.0435, "step": 9914 }, { "epoch": 10.48, "learning_rate": 2.381342494714588e-05, "loss": 0.1691, "step": 9916 }, { "epoch": 10.48, "learning_rate": 2.3808139534883722e-05, "loss": 0.0769, "step": 9918 }, { "epoch": 10.49, "learning_rate": 2.3802854122621564e-05, "loss": 0.0625, "step": 9920 }, { "epoch": 10.49, "learning_rate": 2.3797568710359407e-05, "loss": 0.0797, "step": 9922 }, { "epoch": 10.49, "learning_rate": 2.3792283298097253e-05, "loss": 0.0506, "step": 9924 }, { "epoch": 10.49, "learning_rate": 2.3786997885835096e-05, "loss": 0.0871, "step": 9926 }, { "epoch": 10.49, "learning_rate": 2.378171247357294e-05, "loss": 0.1237, "step": 9928 }, { "epoch": 10.5, "learning_rate": 2.377642706131078e-05, "loss": 0.1047, "step": 9930 }, { "epoch": 10.5, "learning_rate": 2.3771141649048627e-05, "loss": 0.07, "step": 9932 }, { "epoch": 10.5, "learning_rate": 2.376585623678647e-05, "loss": 0.0599, "step": 9934 }, { "epoch": 10.5, "learning_rate": 2.3760570824524313e-05, "loss": 0.0793, "step": 9936 }, { "epoch": 10.51, "learning_rate": 2.375528541226216e-05, "loss": 0.1223, "step": 9938 }, { "epoch": 10.51, "learning_rate": 2.375e-05, "loss": 0.0498, "step": 9940 }, { "epoch": 10.51, "learning_rate": 2.3744714587737847e-05, "loss": 0.1055, "step": 9942 }, { "epoch": 10.51, "learning_rate": 2.373942917547569e-05, "loss": 0.0825, "step": 9944 }, { "epoch": 10.51, "learning_rate": 2.3734143763213533e-05, "loss": 0.023, "step": 9946 }, { "epoch": 10.52, "learning_rate": 2.3728858350951375e-05, "loss": 0.1028, "step": 9948 }, { "epoch": 10.52, "learning_rate": 2.372357293868922e-05, "loss": 0.0584, "step": 9950 }, { "epoch": 10.52, "learning_rate": 2.3718287526427064e-05, "loss": 0.1151, "step": 9952 }, { "epoch": 10.52, "learning_rate": 2.3713002114164907e-05, "loss": 0.0949, "step": 9954 }, { "epoch": 10.52, "learning_rate": 2.370771670190275e-05, "loss": 0.1587, "step": 9956 }, { "epoch": 10.53, "learning_rate": 2.3702431289640592e-05, "loss": 0.0479, "step": 9958 }, { "epoch": 10.53, "learning_rate": 2.3697145877378438e-05, "loss": 0.0412, "step": 9960 }, { "epoch": 10.53, "learning_rate": 2.369186046511628e-05, "loss": 0.1578, "step": 9962 }, { "epoch": 10.53, "learning_rate": 2.3686575052854124e-05, "loss": 0.1135, "step": 9964 }, { "epoch": 10.53, "learning_rate": 2.3681289640591966e-05, "loss": 0.1214, "step": 9966 }, { "epoch": 10.54, "learning_rate": 2.3676004228329812e-05, "loss": 0.0285, "step": 9968 }, { "epoch": 10.54, "learning_rate": 2.3670718816067655e-05, "loss": 0.0551, "step": 9970 }, { "epoch": 10.54, "learning_rate": 2.3665433403805498e-05, "loss": 0.0443, "step": 9972 }, { "epoch": 10.54, "learning_rate": 2.366014799154334e-05, "loss": 0.0498, "step": 9974 }, { "epoch": 10.55, "learning_rate": 2.3654862579281183e-05, "loss": 0.283, "step": 9976 }, { "epoch": 10.55, "learning_rate": 2.364957716701903e-05, "loss": 0.0721, "step": 9978 }, { "epoch": 10.55, "learning_rate": 2.364429175475687e-05, "loss": 0.0452, "step": 9980 }, { "epoch": 10.55, "learning_rate": 2.3639006342494714e-05, "loss": 0.1145, "step": 9982 }, { "epoch": 10.55, "learning_rate": 2.3633720930232557e-05, "loss": 0.163, "step": 9984 }, { "epoch": 10.56, "learning_rate": 2.3628435517970403e-05, "loss": 0.0282, "step": 9986 }, { "epoch": 10.56, "learning_rate": 2.3623150105708246e-05, "loss": 0.0182, "step": 9988 }, { "epoch": 10.56, "learning_rate": 2.361786469344609e-05, "loss": 0.0602, "step": 9990 }, { "epoch": 10.56, "learning_rate": 2.3612579281183934e-05, "loss": 0.1005, "step": 9992 }, { "epoch": 10.56, "learning_rate": 2.3607293868921777e-05, "loss": 0.0487, "step": 9994 }, { "epoch": 10.57, "learning_rate": 2.3602008456659623e-05, "loss": 0.1124, "step": 9996 }, { "epoch": 10.57, "learning_rate": 2.3596723044397466e-05, "loss": 0.0693, "step": 9998 }, { "epoch": 10.57, "learning_rate": 2.359143763213531e-05, "loss": 0.05, "step": 10000 }, { "epoch": 10.57, "eval_cer": 0.02638928469649473, "eval_loss": 0.38202860951423645, "eval_runtime": 128.6979, "eval_samples_per_second": 6.535, "eval_steps_per_second": 0.824, "step": 10000 }, { "epoch": 10.57, "learning_rate": 2.358615221987315e-05, "loss": 0.0817, "step": 10002 }, { "epoch": 10.58, "learning_rate": 2.3580866807610994e-05, "loss": 0.0701, "step": 10004 }, { "epoch": 10.58, "learning_rate": 2.357558139534884e-05, "loss": 0.0567, "step": 10006 }, { "epoch": 10.58, "learning_rate": 2.3570295983086683e-05, "loss": 0.0494, "step": 10008 }, { "epoch": 10.58, "learning_rate": 2.3565010570824525e-05, "loss": 0.0508, "step": 10010 }, { "epoch": 10.58, "learning_rate": 2.3559725158562368e-05, "loss": 0.1178, "step": 10012 }, { "epoch": 10.59, "learning_rate": 2.3554439746300214e-05, "loss": 0.0839, "step": 10014 }, { "epoch": 10.59, "learning_rate": 2.3549154334038057e-05, "loss": 0.1188, "step": 10016 }, { "epoch": 10.59, "learning_rate": 2.35438689217759e-05, "loss": 0.0868, "step": 10018 }, { "epoch": 10.59, "learning_rate": 2.3538583509513742e-05, "loss": 0.0215, "step": 10020 }, { "epoch": 10.59, "learning_rate": 2.3533298097251585e-05, "loss": 0.0431, "step": 10022 }, { "epoch": 10.6, "learning_rate": 2.352801268498943e-05, "loss": 0.0646, "step": 10024 }, { "epoch": 10.6, "learning_rate": 2.3522727272727273e-05, "loss": 0.0371, "step": 10026 }, { "epoch": 10.6, "learning_rate": 2.3517441860465116e-05, "loss": 0.0433, "step": 10028 }, { "epoch": 10.6, "learning_rate": 2.351215644820296e-05, "loss": 0.0562, "step": 10030 }, { "epoch": 10.6, "learning_rate": 2.3506871035940805e-05, "loss": 0.0157, "step": 10032 }, { "epoch": 10.61, "learning_rate": 2.3501585623678648e-05, "loss": 0.0435, "step": 10034 }, { "epoch": 10.61, "learning_rate": 2.349630021141649e-05, "loss": 0.0897, "step": 10036 }, { "epoch": 10.61, "learning_rate": 2.3491014799154333e-05, "loss": 0.184, "step": 10038 }, { "epoch": 10.61, "learning_rate": 2.348572938689218e-05, "loss": 0.2017, "step": 10040 }, { "epoch": 10.62, "learning_rate": 2.348044397463002e-05, "loss": 0.1572, "step": 10042 }, { "epoch": 10.62, "learning_rate": 2.3475158562367868e-05, "loss": 0.0323, "step": 10044 }, { "epoch": 10.62, "learning_rate": 2.346987315010571e-05, "loss": 0.1133, "step": 10046 }, { "epoch": 10.62, "learning_rate": 2.3464587737843553e-05, "loss": 0.1693, "step": 10048 }, { "epoch": 10.62, "learning_rate": 2.34593023255814e-05, "loss": 0.1112, "step": 10050 }, { "epoch": 10.63, "learning_rate": 2.3454016913319242e-05, "loss": 0.137, "step": 10052 }, { "epoch": 10.63, "learning_rate": 2.3448731501057084e-05, "loss": 0.0497, "step": 10054 }, { "epoch": 10.63, "learning_rate": 2.3443446088794927e-05, "loss": 0.1043, "step": 10056 }, { "epoch": 10.63, "learning_rate": 2.343816067653277e-05, "loss": 0.0784, "step": 10058 }, { "epoch": 10.63, "learning_rate": 2.3432875264270616e-05, "loss": 0.0274, "step": 10060 }, { "epoch": 10.64, "learning_rate": 2.342758985200846e-05, "loss": 0.0521, "step": 10062 }, { "epoch": 10.64, "learning_rate": 2.34223044397463e-05, "loss": 0.0672, "step": 10064 }, { "epoch": 10.64, "learning_rate": 2.3417019027484144e-05, "loss": 0.0268, "step": 10066 }, { "epoch": 10.64, "learning_rate": 2.341173361522199e-05, "loss": 0.0964, "step": 10068 }, { "epoch": 10.64, "learning_rate": 2.3406448202959833e-05, "loss": 0.0146, "step": 10070 }, { "epoch": 10.65, "learning_rate": 2.3401162790697675e-05, "loss": 0.052, "step": 10072 }, { "epoch": 10.65, "learning_rate": 2.3395877378435518e-05, "loss": 0.0886, "step": 10074 }, { "epoch": 10.65, "learning_rate": 2.339059196617336e-05, "loss": 0.0271, "step": 10076 }, { "epoch": 10.65, "learning_rate": 2.3385306553911207e-05, "loss": 0.0203, "step": 10078 }, { "epoch": 10.66, "learning_rate": 2.338002114164905e-05, "loss": 0.0978, "step": 10080 }, { "epoch": 10.66, "learning_rate": 2.3374735729386892e-05, "loss": 0.0224, "step": 10082 }, { "epoch": 10.66, "learning_rate": 2.3369450317124735e-05, "loss": 0.0761, "step": 10084 }, { "epoch": 10.66, "learning_rate": 2.336416490486258e-05, "loss": 0.0888, "step": 10086 }, { "epoch": 10.66, "learning_rate": 2.3358879492600423e-05, "loss": 0.0903, "step": 10088 }, { "epoch": 10.67, "learning_rate": 2.3353594080338266e-05, "loss": 0.1393, "step": 10090 }, { "epoch": 10.67, "learning_rate": 2.3348308668076112e-05, "loss": 0.0509, "step": 10092 }, { "epoch": 10.67, "learning_rate": 2.3343023255813955e-05, "loss": 0.0558, "step": 10094 }, { "epoch": 10.67, "learning_rate": 2.3337737843551797e-05, "loss": 0.0986, "step": 10096 }, { "epoch": 10.67, "learning_rate": 2.3332452431289643e-05, "loss": 0.0502, "step": 10098 }, { "epoch": 10.68, "learning_rate": 2.3327167019027486e-05, "loss": 0.0581, "step": 10100 }, { "epoch": 10.68, "learning_rate": 2.332188160676533e-05, "loss": 0.0655, "step": 10102 }, { "epoch": 10.68, "learning_rate": 2.3316596194503175e-05, "loss": 0.0805, "step": 10104 }, { "epoch": 10.68, "learning_rate": 2.3311310782241018e-05, "loss": 0.0529, "step": 10106 }, { "epoch": 10.68, "learning_rate": 2.330602536997886e-05, "loss": 0.1108, "step": 10108 }, { "epoch": 10.69, "learning_rate": 2.3300739957716703e-05, "loss": 0.0586, "step": 10110 }, { "epoch": 10.69, "learning_rate": 2.3295454545454546e-05, "loss": 0.0512, "step": 10112 }, { "epoch": 10.69, "learning_rate": 2.329016913319239e-05, "loss": 0.0463, "step": 10114 }, { "epoch": 10.69, "learning_rate": 2.3284883720930234e-05, "loss": 0.0569, "step": 10116 }, { "epoch": 10.7, "learning_rate": 2.3279598308668077e-05, "loss": 0.0277, "step": 10118 }, { "epoch": 10.7, "learning_rate": 2.327431289640592e-05, "loss": 0.0731, "step": 10120 }, { "epoch": 10.7, "learning_rate": 2.3269027484143766e-05, "loss": 0.0775, "step": 10122 }, { "epoch": 10.7, "learning_rate": 2.326374207188161e-05, "loss": 0.04, "step": 10124 }, { "epoch": 10.7, "learning_rate": 2.325845665961945e-05, "loss": 0.084, "step": 10126 }, { "epoch": 10.71, "learning_rate": 2.3253171247357294e-05, "loss": 0.056, "step": 10128 }, { "epoch": 10.71, "learning_rate": 2.3247885835095136e-05, "loss": 0.0541, "step": 10130 }, { "epoch": 10.71, "learning_rate": 2.3242600422832982e-05, "loss": 0.0266, "step": 10132 }, { "epoch": 10.71, "learning_rate": 2.3237315010570825e-05, "loss": 0.0888, "step": 10134 }, { "epoch": 10.71, "learning_rate": 2.3232029598308668e-05, "loss": 0.0318, "step": 10136 }, { "epoch": 10.72, "learning_rate": 2.322674418604651e-05, "loss": 0.0515, "step": 10138 }, { "epoch": 10.72, "learning_rate": 2.3221458773784357e-05, "loss": 0.0594, "step": 10140 }, { "epoch": 10.72, "learning_rate": 2.32161733615222e-05, "loss": 0.0935, "step": 10142 }, { "epoch": 10.72, "learning_rate": 2.3210887949260042e-05, "loss": 0.0298, "step": 10144 }, { "epoch": 10.73, "learning_rate": 2.3205602536997888e-05, "loss": 0.1176, "step": 10146 }, { "epoch": 10.73, "learning_rate": 2.320031712473573e-05, "loss": 0.0506, "step": 10148 }, { "epoch": 10.73, "learning_rate": 2.3195031712473577e-05, "loss": 0.0595, "step": 10150 }, { "epoch": 10.73, "learning_rate": 2.318974630021142e-05, "loss": 0.0413, "step": 10152 }, { "epoch": 10.73, "learning_rate": 2.3184460887949262e-05, "loss": 0.102, "step": 10154 }, { "epoch": 10.74, "learning_rate": 2.3179175475687105e-05, "loss": 0.0363, "step": 10156 }, { "epoch": 10.74, "learning_rate": 2.317389006342495e-05, "loss": 0.0616, "step": 10158 }, { "epoch": 10.74, "learning_rate": 2.3168604651162793e-05, "loss": 0.1399, "step": 10160 }, { "epoch": 10.74, "learning_rate": 2.3163319238900636e-05, "loss": 0.0462, "step": 10162 }, { "epoch": 10.74, "learning_rate": 2.315803382663848e-05, "loss": 0.0246, "step": 10164 }, { "epoch": 10.75, "learning_rate": 2.315274841437632e-05, "loss": 0.0721, "step": 10166 }, { "epoch": 10.75, "learning_rate": 2.3147463002114167e-05, "loss": 0.0664, "step": 10168 }, { "epoch": 10.75, "learning_rate": 2.314217758985201e-05, "loss": 0.0476, "step": 10170 }, { "epoch": 10.75, "learning_rate": 2.3136892177589853e-05, "loss": 0.0586, "step": 10172 }, { "epoch": 10.75, "learning_rate": 2.3131606765327695e-05, "loss": 0.0424, "step": 10174 }, { "epoch": 10.76, "learning_rate": 2.312632135306554e-05, "loss": 0.0312, "step": 10176 }, { "epoch": 10.76, "learning_rate": 2.3121035940803384e-05, "loss": 0.0746, "step": 10178 }, { "epoch": 10.76, "learning_rate": 2.3115750528541227e-05, "loss": 0.0858, "step": 10180 }, { "epoch": 10.76, "learning_rate": 2.311046511627907e-05, "loss": 0.0282, "step": 10182 }, { "epoch": 10.77, "learning_rate": 2.3105179704016912e-05, "loss": 0.0502, "step": 10184 }, { "epoch": 10.77, "learning_rate": 2.3099894291754758e-05, "loss": 0.0535, "step": 10186 }, { "epoch": 10.77, "learning_rate": 2.30946088794926e-05, "loss": 0.0495, "step": 10188 }, { "epoch": 10.77, "learning_rate": 2.3089323467230444e-05, "loss": 0.1278, "step": 10190 }, { "epoch": 10.77, "learning_rate": 2.3084038054968286e-05, "loss": 0.1004, "step": 10192 }, { "epoch": 10.78, "learning_rate": 2.3078752642706132e-05, "loss": 0.0488, "step": 10194 }, { "epoch": 10.78, "learning_rate": 2.3073467230443975e-05, "loss": 0.0498, "step": 10196 }, { "epoch": 10.78, "learning_rate": 2.3068181818181818e-05, "loss": 0.0595, "step": 10198 }, { "epoch": 10.78, "learning_rate": 2.3062896405919664e-05, "loss": 0.0699, "step": 10200 }, { "epoch": 10.78, "learning_rate": 2.3057610993657506e-05, "loss": 0.0941, "step": 10202 }, { "epoch": 10.79, "learning_rate": 2.3052325581395352e-05, "loss": 0.0409, "step": 10204 }, { "epoch": 10.79, "learning_rate": 2.3047040169133195e-05, "loss": 0.0915, "step": 10206 }, { "epoch": 10.79, "learning_rate": 2.3041754756871038e-05, "loss": 0.0957, "step": 10208 }, { "epoch": 10.79, "learning_rate": 2.303646934460888e-05, "loss": 0.0636, "step": 10210 }, { "epoch": 10.79, "learning_rate": 2.3031183932346727e-05, "loss": 0.0596, "step": 10212 }, { "epoch": 10.8, "learning_rate": 2.302589852008457e-05, "loss": 0.05, "step": 10214 }, { "epoch": 10.8, "learning_rate": 2.3020613107822412e-05, "loss": 0.0246, "step": 10216 }, { "epoch": 10.8, "learning_rate": 2.3015327695560255e-05, "loss": 0.142, "step": 10218 }, { "epoch": 10.8, "learning_rate": 2.3010042283298097e-05, "loss": 0.0424, "step": 10220 }, { "epoch": 10.81, "learning_rate": 2.3004756871035943e-05, "loss": 0.0676, "step": 10222 }, { "epoch": 10.81, "learning_rate": 2.2999471458773786e-05, "loss": 0.0551, "step": 10224 }, { "epoch": 10.81, "learning_rate": 2.299418604651163e-05, "loss": 0.0686, "step": 10226 }, { "epoch": 10.81, "learning_rate": 2.298890063424947e-05, "loss": 0.1185, "step": 10228 }, { "epoch": 10.81, "learning_rate": 2.2983615221987317e-05, "loss": 0.0675, "step": 10230 }, { "epoch": 10.82, "learning_rate": 2.297832980972516e-05, "loss": 0.0656, "step": 10232 }, { "epoch": 10.82, "learning_rate": 2.2973044397463003e-05, "loss": 0.0615, "step": 10234 }, { "epoch": 10.82, "learning_rate": 2.2967758985200845e-05, "loss": 0.0193, "step": 10236 }, { "epoch": 10.82, "learning_rate": 2.2962473572938688e-05, "loss": 0.1593, "step": 10238 }, { "epoch": 10.82, "learning_rate": 2.2957188160676534e-05, "loss": 0.1292, "step": 10240 }, { "epoch": 10.83, "learning_rate": 2.2951902748414377e-05, "loss": 0.0303, "step": 10242 }, { "epoch": 10.83, "learning_rate": 2.294661733615222e-05, "loss": 0.1374, "step": 10244 }, { "epoch": 10.83, "learning_rate": 2.2941331923890062e-05, "loss": 0.0827, "step": 10246 }, { "epoch": 10.83, "learning_rate": 2.2936046511627908e-05, "loss": 0.0772, "step": 10248 }, { "epoch": 10.84, "learning_rate": 2.293076109936575e-05, "loss": 0.0461, "step": 10250 }, { "epoch": 10.84, "learning_rate": 2.2925475687103597e-05, "loss": 0.0469, "step": 10252 }, { "epoch": 10.84, "learning_rate": 2.292019027484144e-05, "loss": 0.0446, "step": 10254 }, { "epoch": 10.84, "learning_rate": 2.2914904862579282e-05, "loss": 0.0505, "step": 10256 }, { "epoch": 10.84, "learning_rate": 2.290961945031713e-05, "loss": 0.017, "step": 10258 }, { "epoch": 10.85, "learning_rate": 2.290433403805497e-05, "loss": 0.0467, "step": 10260 }, { "epoch": 10.85, "learning_rate": 2.2899048625792814e-05, "loss": 0.136, "step": 10262 }, { "epoch": 10.85, "learning_rate": 2.2893763213530656e-05, "loss": 0.0482, "step": 10264 }, { "epoch": 10.85, "learning_rate": 2.2888477801268502e-05, "loss": 0.065, "step": 10266 }, { "epoch": 10.85, "learning_rate": 2.2883192389006345e-05, "loss": 0.0573, "step": 10268 }, { "epoch": 10.86, "learning_rate": 2.2877906976744188e-05, "loss": 0.0658, "step": 10270 }, { "epoch": 10.86, "learning_rate": 2.287262156448203e-05, "loss": 0.0434, "step": 10272 }, { "epoch": 10.86, "learning_rate": 2.2867336152219873e-05, "loss": 0.1148, "step": 10274 }, { "epoch": 10.86, "learning_rate": 2.286205073995772e-05, "loss": 0.0269, "step": 10276 }, { "epoch": 10.86, "learning_rate": 2.2856765327695562e-05, "loss": 0.06, "step": 10278 }, { "epoch": 10.87, "learning_rate": 2.2851479915433404e-05, "loss": 0.0524, "step": 10280 }, { "epoch": 10.87, "learning_rate": 2.2846194503171247e-05, "loss": 0.0526, "step": 10282 }, { "epoch": 10.87, "learning_rate": 2.2840909090909093e-05, "loss": 0.0903, "step": 10284 }, { "epoch": 10.87, "learning_rate": 2.2835623678646936e-05, "loss": 0.0244, "step": 10286 }, { "epoch": 10.88, "learning_rate": 2.283033826638478e-05, "loss": 0.0735, "step": 10288 }, { "epoch": 10.88, "learning_rate": 2.282505285412262e-05, "loss": 0.0204, "step": 10290 }, { "epoch": 10.88, "learning_rate": 2.2819767441860464e-05, "loss": 0.0459, "step": 10292 }, { "epoch": 10.88, "learning_rate": 2.281448202959831e-05, "loss": 0.0396, "step": 10294 }, { "epoch": 10.88, "learning_rate": 2.2809196617336153e-05, "loss": 0.0582, "step": 10296 }, { "epoch": 10.89, "learning_rate": 2.2803911205073995e-05, "loss": 0.0827, "step": 10298 }, { "epoch": 10.89, "learning_rate": 2.2798625792811838e-05, "loss": 0.2031, "step": 10300 }, { "epoch": 10.89, "learning_rate": 2.2793340380549684e-05, "loss": 0.0703, "step": 10302 }, { "epoch": 10.89, "learning_rate": 2.2788054968287527e-05, "loss": 0.0775, "step": 10304 }, { "epoch": 10.89, "learning_rate": 2.2782769556025373e-05, "loss": 0.0718, "step": 10306 }, { "epoch": 10.9, "learning_rate": 2.2777484143763215e-05, "loss": 0.1008, "step": 10308 }, { "epoch": 10.9, "learning_rate": 2.2772198731501058e-05, "loss": 0.0488, "step": 10310 }, { "epoch": 10.9, "learning_rate": 2.2766913319238904e-05, "loss": 0.091, "step": 10312 }, { "epoch": 10.9, "learning_rate": 2.2761627906976747e-05, "loss": 0.0164, "step": 10314 }, { "epoch": 10.9, "learning_rate": 2.275634249471459e-05, "loss": 0.0562, "step": 10316 }, { "epoch": 10.91, "learning_rate": 2.2751057082452432e-05, "loss": 0.1001, "step": 10318 }, { "epoch": 10.91, "learning_rate": 2.2745771670190275e-05, "loss": 0.0833, "step": 10320 }, { "epoch": 10.91, "learning_rate": 2.274048625792812e-05, "loss": 0.0365, "step": 10322 }, { "epoch": 10.91, "learning_rate": 2.2735200845665964e-05, "loss": 0.0233, "step": 10324 }, { "epoch": 10.92, "learning_rate": 2.2729915433403806e-05, "loss": 0.0548, "step": 10326 }, { "epoch": 10.92, "learning_rate": 2.272463002114165e-05, "loss": 0.082, "step": 10328 }, { "epoch": 10.92, "learning_rate": 2.2719344608879495e-05, "loss": 0.0264, "step": 10330 }, { "epoch": 10.92, "learning_rate": 2.2714059196617338e-05, "loss": 0.1164, "step": 10332 }, { "epoch": 10.92, "learning_rate": 2.270877378435518e-05, "loss": 0.1032, "step": 10334 }, { "epoch": 10.93, "learning_rate": 2.2703488372093023e-05, "loss": 0.0402, "step": 10336 }, { "epoch": 10.93, "learning_rate": 2.2698202959830866e-05, "loss": 0.0097, "step": 10338 }, { "epoch": 10.93, "learning_rate": 2.2692917547568712e-05, "loss": 0.0675, "step": 10340 }, { "epoch": 10.93, "learning_rate": 2.2687632135306554e-05, "loss": 0.1228, "step": 10342 }, { "epoch": 10.93, "learning_rate": 2.2682346723044397e-05, "loss": 0.1078, "step": 10344 }, { "epoch": 10.94, "learning_rate": 2.267706131078224e-05, "loss": 0.0542, "step": 10346 }, { "epoch": 10.94, "learning_rate": 2.2671775898520086e-05, "loss": 0.046, "step": 10348 }, { "epoch": 10.94, "learning_rate": 2.266649048625793e-05, "loss": 0.0806, "step": 10350 }, { "epoch": 10.94, "learning_rate": 2.266120507399577e-05, "loss": 0.0393, "step": 10352 }, { "epoch": 10.95, "learning_rate": 2.2655919661733617e-05, "loss": 0.0835, "step": 10354 }, { "epoch": 10.95, "learning_rate": 2.265063424947146e-05, "loss": 0.0646, "step": 10356 }, { "epoch": 10.95, "learning_rate": 2.2645348837209303e-05, "loss": 0.0517, "step": 10358 }, { "epoch": 10.95, "learning_rate": 2.264006342494715e-05, "loss": 0.0558, "step": 10360 }, { "epoch": 10.95, "learning_rate": 2.263477801268499e-05, "loss": 0.1552, "step": 10362 }, { "epoch": 10.96, "learning_rate": 2.2629492600422834e-05, "loss": 0.0535, "step": 10364 }, { "epoch": 10.96, "learning_rate": 2.262420718816068e-05, "loss": 0.0364, "step": 10366 }, { "epoch": 10.96, "learning_rate": 2.2618921775898523e-05, "loss": 0.0611, "step": 10368 }, { "epoch": 10.96, "learning_rate": 2.2613636363636365e-05, "loss": 0.0374, "step": 10370 }, { "epoch": 10.96, "learning_rate": 2.2608350951374208e-05, "loss": 0.0747, "step": 10372 }, { "epoch": 10.97, "learning_rate": 2.260306553911205e-05, "loss": 0.0561, "step": 10374 }, { "epoch": 10.97, "learning_rate": 2.2597780126849897e-05, "loss": 0.1045, "step": 10376 }, { "epoch": 10.97, "learning_rate": 2.259249471458774e-05, "loss": 0.0336, "step": 10378 }, { "epoch": 10.97, "learning_rate": 2.2587209302325582e-05, "loss": 0.1093, "step": 10380 }, { "epoch": 10.97, "learning_rate": 2.2581923890063425e-05, "loss": 0.1052, "step": 10382 }, { "epoch": 10.98, "learning_rate": 2.257663847780127e-05, "loss": 0.0394, "step": 10384 }, { "epoch": 10.98, "learning_rate": 2.2571353065539113e-05, "loss": 0.0291, "step": 10386 }, { "epoch": 10.98, "learning_rate": 2.2566067653276956e-05, "loss": 0.0769, "step": 10388 }, { "epoch": 10.98, "learning_rate": 2.25607822410148e-05, "loss": 0.1155, "step": 10390 }, { "epoch": 10.99, "learning_rate": 2.255549682875264e-05, "loss": 0.0829, "step": 10392 }, { "epoch": 10.99, "learning_rate": 2.2550211416490488e-05, "loss": 0.0843, "step": 10394 }, { "epoch": 10.99, "learning_rate": 2.254492600422833e-05, "loss": 0.0408, "step": 10396 }, { "epoch": 10.99, "learning_rate": 2.2539640591966173e-05, "loss": 0.1647, "step": 10398 }, { "epoch": 10.99, "learning_rate": 2.2534355179704016e-05, "loss": 0.0599, "step": 10400 }, { "epoch": 11.0, "learning_rate": 2.252906976744186e-05, "loss": 0.0424, "step": 10402 }, { "epoch": 11.0, "learning_rate": 2.2523784355179704e-05, "loss": 0.09, "step": 10404 }, { "epoch": 11.0, "learning_rate": 2.2518498942917547e-05, "loss": 0.0305, "step": 10406 }, { "epoch": 11.0, "learning_rate": 2.2513213530655393e-05, "loss": 0.0331, "step": 10408 }, { "epoch": 11.0, "learning_rate": 2.2507928118393236e-05, "loss": 0.0907, "step": 10410 }, { "epoch": 11.01, "learning_rate": 2.2502642706131082e-05, "loss": 0.0668, "step": 10412 }, { "epoch": 11.01, "learning_rate": 2.2497357293868924e-05, "loss": 0.1693, "step": 10414 }, { "epoch": 11.01, "learning_rate": 2.2492071881606767e-05, "loss": 0.0328, "step": 10416 }, { "epoch": 11.01, "learning_rate": 2.248678646934461e-05, "loss": 0.0536, "step": 10418 }, { "epoch": 11.01, "learning_rate": 2.2481501057082456e-05, "loss": 0.1321, "step": 10420 }, { "epoch": 11.02, "learning_rate": 2.24762156448203e-05, "loss": 0.0779, "step": 10422 }, { "epoch": 11.02, "learning_rate": 2.247093023255814e-05, "loss": 0.0142, "step": 10424 }, { "epoch": 11.02, "learning_rate": 2.2465644820295984e-05, "loss": 0.0312, "step": 10426 }, { "epoch": 11.02, "learning_rate": 2.2460359408033827e-05, "loss": 0.1316, "step": 10428 }, { "epoch": 11.03, "learning_rate": 2.2455073995771673e-05, "loss": 0.0545, "step": 10430 }, { "epoch": 11.03, "learning_rate": 2.2449788583509515e-05, "loss": 0.0712, "step": 10432 }, { "epoch": 11.03, "learning_rate": 2.2444503171247358e-05, "loss": 0.0942, "step": 10434 }, { "epoch": 11.03, "learning_rate": 2.24392177589852e-05, "loss": 0.0524, "step": 10436 }, { "epoch": 11.03, "learning_rate": 2.2433932346723047e-05, "loss": 0.0469, "step": 10438 }, { "epoch": 11.04, "learning_rate": 2.242864693446089e-05, "loss": 0.0925, "step": 10440 }, { "epoch": 11.04, "learning_rate": 2.2423361522198732e-05, "loss": 0.045, "step": 10442 }, { "epoch": 11.04, "learning_rate": 2.2418076109936575e-05, "loss": 0.0913, "step": 10444 }, { "epoch": 11.04, "learning_rate": 2.2412790697674417e-05, "loss": 0.0557, "step": 10446 }, { "epoch": 11.04, "learning_rate": 2.2407505285412263e-05, "loss": 0.036, "step": 10448 }, { "epoch": 11.05, "learning_rate": 2.2402219873150106e-05, "loss": 0.0623, "step": 10450 }, { "epoch": 11.05, "learning_rate": 2.239693446088795e-05, "loss": 0.0375, "step": 10452 }, { "epoch": 11.05, "learning_rate": 2.239164904862579e-05, "loss": 0.1366, "step": 10454 }, { "epoch": 11.05, "learning_rate": 2.2386363636363637e-05, "loss": 0.0478, "step": 10456 }, { "epoch": 11.05, "learning_rate": 2.238107822410148e-05, "loss": 0.0697, "step": 10458 }, { "epoch": 11.06, "learning_rate": 2.2375792811839326e-05, "loss": 0.0432, "step": 10460 }, { "epoch": 11.06, "learning_rate": 2.237050739957717e-05, "loss": 0.0327, "step": 10462 }, { "epoch": 11.06, "learning_rate": 2.236522198731501e-05, "loss": 0.0793, "step": 10464 }, { "epoch": 11.06, "learning_rate": 2.2359936575052858e-05, "loss": 0.0506, "step": 10466 }, { "epoch": 11.07, "learning_rate": 2.23546511627907e-05, "loss": 0.0744, "step": 10468 }, { "epoch": 11.07, "learning_rate": 2.2349365750528543e-05, "loss": 0.0531, "step": 10470 }, { "epoch": 11.07, "learning_rate": 2.2344080338266386e-05, "loss": 0.1243, "step": 10472 }, { "epoch": 11.07, "learning_rate": 2.233879492600423e-05, "loss": 0.0289, "step": 10474 }, { "epoch": 11.07, "learning_rate": 2.2333509513742074e-05, "loss": 0.0458, "step": 10476 }, { "epoch": 11.08, "learning_rate": 2.2328224101479917e-05, "loss": 0.0888, "step": 10478 }, { "epoch": 11.08, "learning_rate": 2.232293868921776e-05, "loss": 0.0729, "step": 10480 }, { "epoch": 11.08, "learning_rate": 2.2317653276955602e-05, "loss": 0.054, "step": 10482 }, { "epoch": 11.08, "learning_rate": 2.231236786469345e-05, "loss": 0.0521, "step": 10484 }, { "epoch": 11.08, "learning_rate": 2.230708245243129e-05, "loss": 0.0531, "step": 10486 }, { "epoch": 11.09, "learning_rate": 2.2301797040169134e-05, "loss": 0.0542, "step": 10488 }, { "epoch": 11.09, "learning_rate": 2.2296511627906976e-05, "loss": 0.0279, "step": 10490 }, { "epoch": 11.09, "learning_rate": 2.2291226215644823e-05, "loss": 0.0912, "step": 10492 }, { "epoch": 11.09, "learning_rate": 2.2285940803382665e-05, "loss": 0.0324, "step": 10494 }, { "epoch": 11.1, "learning_rate": 2.2280655391120508e-05, "loss": 0.0314, "step": 10496 }, { "epoch": 11.1, "learning_rate": 2.227536997885835e-05, "loss": 0.1059, "step": 10498 }, { "epoch": 11.1, "learning_rate": 2.2270084566596193e-05, "loss": 0.042, "step": 10500 }, { "epoch": 11.1, "eval_cer": 0.04098033627814192, "eval_loss": 0.8228550553321838, "eval_runtime": 124.2745, "eval_samples_per_second": 6.767, "eval_steps_per_second": 0.853, "step": 10500 }, { "epoch": 11.1, "learning_rate": 2.226479915433404e-05, "loss": 0.0726, "step": 10502 }, { "epoch": 11.1, "learning_rate": 2.2259513742071882e-05, "loss": 0.0406, "step": 10504 }, { "epoch": 11.11, "learning_rate": 2.2254228329809725e-05, "loss": 0.0535, "step": 10506 }, { "epoch": 11.11, "learning_rate": 2.2248942917547567e-05, "loss": 0.0596, "step": 10508 }, { "epoch": 11.11, "learning_rate": 2.2243657505285413e-05, "loss": 0.0626, "step": 10510 }, { "epoch": 11.11, "learning_rate": 2.2238372093023256e-05, "loss": 0.0582, "step": 10512 }, { "epoch": 11.11, "learning_rate": 2.2233086680761102e-05, "loss": 0.1281, "step": 10514 }, { "epoch": 11.12, "learning_rate": 2.2227801268498945e-05, "loss": 0.0823, "step": 10516 }, { "epoch": 11.12, "learning_rate": 2.2222515856236787e-05, "loss": 0.0629, "step": 10518 }, { "epoch": 11.12, "learning_rate": 2.2217230443974633e-05, "loss": 0.0487, "step": 10520 }, { "epoch": 11.12, "learning_rate": 2.2211945031712476e-05, "loss": 0.0708, "step": 10522 }, { "epoch": 11.12, "learning_rate": 2.220665961945032e-05, "loss": 0.073, "step": 10524 }, { "epoch": 11.13, "learning_rate": 2.220137420718816e-05, "loss": 0.1439, "step": 10526 }, { "epoch": 11.13, "learning_rate": 2.2196088794926008e-05, "loss": 0.0255, "step": 10528 }, { "epoch": 11.13, "learning_rate": 2.219080338266385e-05, "loss": 0.0569, "step": 10530 }, { "epoch": 11.13, "learning_rate": 2.2185517970401693e-05, "loss": 0.0554, "step": 10532 }, { "epoch": 11.14, "learning_rate": 2.2180232558139536e-05, "loss": 0.0647, "step": 10534 }, { "epoch": 11.14, "learning_rate": 2.2174947145877378e-05, "loss": 0.0534, "step": 10536 }, { "epoch": 11.14, "learning_rate": 2.2169661733615224e-05, "loss": 0.0495, "step": 10538 }, { "epoch": 11.14, "learning_rate": 2.2164376321353067e-05, "loss": 0.128, "step": 10540 }, { "epoch": 11.14, "learning_rate": 2.215909090909091e-05, "loss": 0.1084, "step": 10542 }, { "epoch": 11.15, "learning_rate": 2.2153805496828752e-05, "loss": 0.0622, "step": 10544 }, { "epoch": 11.15, "learning_rate": 2.21485200845666e-05, "loss": 0.0289, "step": 10546 }, { "epoch": 11.15, "learning_rate": 2.214323467230444e-05, "loss": 0.0682, "step": 10548 }, { "epoch": 11.15, "learning_rate": 2.2137949260042284e-05, "loss": 0.0404, "step": 10550 }, { "epoch": 11.15, "learning_rate": 2.2132663847780126e-05, "loss": 0.0743, "step": 10552 }, { "epoch": 11.16, "learning_rate": 2.212737843551797e-05, "loss": 0.0205, "step": 10554 }, { "epoch": 11.16, "learning_rate": 2.2122093023255815e-05, "loss": 0.0295, "step": 10556 }, { "epoch": 11.16, "learning_rate": 2.2116807610993658e-05, "loss": 0.0505, "step": 10558 }, { "epoch": 11.16, "learning_rate": 2.21115221987315e-05, "loss": 0.0379, "step": 10560 }, { "epoch": 11.16, "learning_rate": 2.2106236786469346e-05, "loss": 0.0194, "step": 10562 }, { "epoch": 11.17, "learning_rate": 2.210095137420719e-05, "loss": 0.0343, "step": 10564 }, { "epoch": 11.17, "learning_rate": 2.2095665961945032e-05, "loss": 0.077, "step": 10566 }, { "epoch": 11.17, "learning_rate": 2.2090380549682878e-05, "loss": 0.0756, "step": 10568 }, { "epoch": 11.17, "learning_rate": 2.208509513742072e-05, "loss": 0.049, "step": 10570 }, { "epoch": 11.18, "learning_rate": 2.2079809725158563e-05, "loss": 0.0582, "step": 10572 }, { "epoch": 11.18, "learning_rate": 2.207452431289641e-05, "loss": 0.0347, "step": 10574 }, { "epoch": 11.18, "learning_rate": 2.2069238900634252e-05, "loss": 0.0778, "step": 10576 }, { "epoch": 11.18, "learning_rate": 2.2063953488372095e-05, "loss": 0.0305, "step": 10578 }, { "epoch": 11.18, "learning_rate": 2.2058668076109937e-05, "loss": 0.098, "step": 10580 }, { "epoch": 11.19, "learning_rate": 2.2053382663847783e-05, "loss": 0.0289, "step": 10582 }, { "epoch": 11.19, "learning_rate": 2.2048097251585626e-05, "loss": 0.0603, "step": 10584 }, { "epoch": 11.19, "learning_rate": 2.204281183932347e-05, "loss": 0.0724, "step": 10586 }, { "epoch": 11.19, "learning_rate": 2.203752642706131e-05, "loss": 0.0864, "step": 10588 }, { "epoch": 11.19, "learning_rate": 2.2032241014799154e-05, "loss": 0.1251, "step": 10590 }, { "epoch": 11.2, "learning_rate": 2.2026955602537e-05, "loss": 0.0161, "step": 10592 }, { "epoch": 11.2, "learning_rate": 2.2021670190274843e-05, "loss": 0.0633, "step": 10594 }, { "epoch": 11.2, "learning_rate": 2.2016384778012685e-05, "loss": 0.0709, "step": 10596 }, { "epoch": 11.2, "learning_rate": 2.2011099365750528e-05, "loss": 0.0971, "step": 10598 }, { "epoch": 11.21, "learning_rate": 2.2005813953488374e-05, "loss": 0.0582, "step": 10600 }, { "epoch": 11.21, "learning_rate": 2.2000528541226217e-05, "loss": 0.0354, "step": 10602 }, { "epoch": 11.21, "learning_rate": 2.199524312896406e-05, "loss": 0.0754, "step": 10604 }, { "epoch": 11.21, "learning_rate": 2.1989957716701902e-05, "loss": 0.0231, "step": 10606 }, { "epoch": 11.21, "learning_rate": 2.1984672304439745e-05, "loss": 0.0977, "step": 10608 }, { "epoch": 11.22, "learning_rate": 2.197938689217759e-05, "loss": 0.0853, "step": 10610 }, { "epoch": 11.22, "learning_rate": 2.1974101479915434e-05, "loss": 0.0304, "step": 10612 }, { "epoch": 11.22, "learning_rate": 2.1968816067653276e-05, "loss": 0.0818, "step": 10614 }, { "epoch": 11.22, "learning_rate": 2.1963530655391122e-05, "loss": 0.1034, "step": 10616 }, { "epoch": 11.22, "learning_rate": 2.1958245243128965e-05, "loss": 0.0743, "step": 10618 }, { "epoch": 11.23, "learning_rate": 2.195295983086681e-05, "loss": 0.0717, "step": 10620 }, { "epoch": 11.23, "learning_rate": 2.1947674418604654e-05, "loss": 0.0263, "step": 10622 }, { "epoch": 11.23, "learning_rate": 2.1942389006342496e-05, "loss": 0.065, "step": 10624 }, { "epoch": 11.23, "learning_rate": 2.193710359408034e-05, "loss": 0.1063, "step": 10626 }, { "epoch": 11.23, "learning_rate": 2.1931818181818185e-05, "loss": 0.0699, "step": 10628 }, { "epoch": 11.24, "learning_rate": 2.1926532769556028e-05, "loss": 0.0324, "step": 10630 }, { "epoch": 11.24, "learning_rate": 2.192124735729387e-05, "loss": 0.0283, "step": 10632 }, { "epoch": 11.24, "learning_rate": 2.1915961945031713e-05, "loss": 0.0296, "step": 10634 }, { "epoch": 11.24, "learning_rate": 2.191067653276956e-05, "loss": 0.0686, "step": 10636 }, { "epoch": 11.25, "learning_rate": 2.1905391120507402e-05, "loss": 0.0577, "step": 10638 }, { "epoch": 11.25, "learning_rate": 2.1900105708245245e-05, "loss": 0.0288, "step": 10640 }, { "epoch": 11.25, "learning_rate": 2.1894820295983087e-05, "loss": 0.1001, "step": 10642 }, { "epoch": 11.25, "learning_rate": 2.188953488372093e-05, "loss": 0.0518, "step": 10644 }, { "epoch": 11.25, "learning_rate": 2.1884249471458776e-05, "loss": 0.0222, "step": 10646 }, { "epoch": 11.26, "learning_rate": 2.187896405919662e-05, "loss": 0.0773, "step": 10648 }, { "epoch": 11.26, "learning_rate": 2.187367864693446e-05, "loss": 0.0045, "step": 10650 }, { "epoch": 11.26, "learning_rate": 2.1868393234672304e-05, "loss": 0.0173, "step": 10652 }, { "epoch": 11.26, "learning_rate": 2.1863107822410147e-05, "loss": 0.0316, "step": 10654 }, { "epoch": 11.26, "learning_rate": 2.1857822410147993e-05, "loss": 0.1172, "step": 10656 }, { "epoch": 11.27, "learning_rate": 2.1852536997885835e-05, "loss": 0.0507, "step": 10658 }, { "epoch": 11.27, "learning_rate": 2.1847251585623678e-05, "loss": 0.0951, "step": 10660 }, { "epoch": 11.27, "learning_rate": 2.184196617336152e-05, "loss": 0.0199, "step": 10662 }, { "epoch": 11.27, "learning_rate": 2.1836680761099367e-05, "loss": 0.0583, "step": 10664 }, { "epoch": 11.27, "learning_rate": 2.183139534883721e-05, "loss": 0.041, "step": 10666 }, { "epoch": 11.28, "learning_rate": 2.1826109936575052e-05, "loss": 0.0624, "step": 10668 }, { "epoch": 11.28, "learning_rate": 2.1820824524312898e-05, "loss": 0.0803, "step": 10670 }, { "epoch": 11.28, "learning_rate": 2.181553911205074e-05, "loss": 0.1347, "step": 10672 }, { "epoch": 11.28, "learning_rate": 2.1810253699788587e-05, "loss": 0.0725, "step": 10674 }, { "epoch": 11.29, "learning_rate": 2.180496828752643e-05, "loss": 0.0355, "step": 10676 }, { "epoch": 11.29, "learning_rate": 2.1799682875264272e-05, "loss": 0.0703, "step": 10678 }, { "epoch": 11.29, "learning_rate": 2.1794397463002115e-05, "loss": 0.0373, "step": 10680 }, { "epoch": 11.29, "learning_rate": 2.178911205073996e-05, "loss": 0.0535, "step": 10682 }, { "epoch": 11.29, "learning_rate": 2.1783826638477804e-05, "loss": 0.0635, "step": 10684 }, { "epoch": 11.3, "learning_rate": 2.1778541226215646e-05, "loss": 0.0548, "step": 10686 }, { "epoch": 11.3, "learning_rate": 2.177325581395349e-05, "loss": 0.0797, "step": 10688 }, { "epoch": 11.3, "learning_rate": 2.176797040169133e-05, "loss": 0.1646, "step": 10690 }, { "epoch": 11.3, "learning_rate": 2.1762684989429178e-05, "loss": 0.0078, "step": 10692 }, { "epoch": 11.3, "learning_rate": 2.175739957716702e-05, "loss": 0.039, "step": 10694 }, { "epoch": 11.31, "learning_rate": 2.1752114164904863e-05, "loss": 0.0683, "step": 10696 }, { "epoch": 11.31, "learning_rate": 2.1746828752642706e-05, "loss": 0.0309, "step": 10698 }, { "epoch": 11.31, "learning_rate": 2.1741543340380552e-05, "loss": 0.0709, "step": 10700 }, { "epoch": 11.31, "learning_rate": 2.1736257928118394e-05, "loss": 0.0945, "step": 10702 }, { "epoch": 11.32, "learning_rate": 2.1730972515856237e-05, "loss": 0.0391, "step": 10704 }, { "epoch": 11.32, "learning_rate": 2.172568710359408e-05, "loss": 0.0402, "step": 10706 }, { "epoch": 11.32, "learning_rate": 2.1720401691331922e-05, "loss": 0.1571, "step": 10708 }, { "epoch": 11.32, "learning_rate": 2.171511627906977e-05, "loss": 0.0544, "step": 10710 }, { "epoch": 11.32, "learning_rate": 2.170983086680761e-05, "loss": 0.0227, "step": 10712 }, { "epoch": 11.33, "learning_rate": 2.1704545454545454e-05, "loss": 0.0334, "step": 10714 }, { "epoch": 11.33, "learning_rate": 2.1699260042283297e-05, "loss": 0.041, "step": 10716 }, { "epoch": 11.33, "learning_rate": 2.1693974630021143e-05, "loss": 0.108, "step": 10718 }, { "epoch": 11.33, "learning_rate": 2.1688689217758985e-05, "loss": 0.0199, "step": 10720 }, { "epoch": 11.33, "learning_rate": 2.168340380549683e-05, "loss": 0.0907, "step": 10722 }, { "epoch": 11.34, "learning_rate": 2.1678118393234674e-05, "loss": 0.0606, "step": 10724 }, { "epoch": 11.34, "learning_rate": 2.1672832980972517e-05, "loss": 0.0314, "step": 10726 }, { "epoch": 11.34, "learning_rate": 2.1667547568710363e-05, "loss": 0.0559, "step": 10728 }, { "epoch": 11.34, "learning_rate": 2.1662262156448205e-05, "loss": 0.0155, "step": 10730 }, { "epoch": 11.34, "learning_rate": 2.1656976744186048e-05, "loss": 0.068, "step": 10732 }, { "epoch": 11.35, "learning_rate": 2.165169133192389e-05, "loss": 0.037, "step": 10734 }, { "epoch": 11.35, "learning_rate": 2.1646405919661737e-05, "loss": 0.0204, "step": 10736 }, { "epoch": 11.35, "learning_rate": 2.164112050739958e-05, "loss": 0.0349, "step": 10738 }, { "epoch": 11.35, "learning_rate": 2.1635835095137422e-05, "loss": 0.0516, "step": 10740 }, { "epoch": 11.36, "learning_rate": 2.1630549682875265e-05, "loss": 0.0482, "step": 10742 }, { "epoch": 11.36, "learning_rate": 2.1625264270613107e-05, "loss": 0.022, "step": 10744 }, { "epoch": 11.36, "learning_rate": 2.1619978858350954e-05, "loss": 0.0717, "step": 10746 }, { "epoch": 11.36, "learning_rate": 2.1614693446088796e-05, "loss": 0.0435, "step": 10748 }, { "epoch": 11.36, "learning_rate": 2.160940803382664e-05, "loss": 0.0449, "step": 10750 }, { "epoch": 11.37, "learning_rate": 2.160412262156448e-05, "loss": 0.0319, "step": 10752 }, { "epoch": 11.37, "learning_rate": 2.1598837209302328e-05, "loss": 0.0255, "step": 10754 }, { "epoch": 11.37, "learning_rate": 2.159355179704017e-05, "loss": 0.1095, "step": 10756 }, { "epoch": 11.37, "learning_rate": 2.1588266384778013e-05, "loss": 0.0939, "step": 10758 }, { "epoch": 11.37, "learning_rate": 2.1582980972515856e-05, "loss": 0.055, "step": 10760 }, { "epoch": 11.38, "learning_rate": 2.15776955602537e-05, "loss": 0.1075, "step": 10762 }, { "epoch": 11.38, "learning_rate": 2.1572410147991544e-05, "loss": 0.0374, "step": 10764 }, { "epoch": 11.38, "learning_rate": 2.1567124735729387e-05, "loss": 0.0753, "step": 10766 }, { "epoch": 11.38, "learning_rate": 2.156183932346723e-05, "loss": 0.1265, "step": 10768 }, { "epoch": 11.38, "learning_rate": 2.1556553911205072e-05, "loss": 0.0542, "step": 10770 }, { "epoch": 11.39, "learning_rate": 2.155126849894292e-05, "loss": 0.0173, "step": 10772 }, { "epoch": 11.39, "learning_rate": 2.154598308668076e-05, "loss": 0.0681, "step": 10774 }, { "epoch": 11.39, "learning_rate": 2.1540697674418607e-05, "loss": 0.0296, "step": 10776 }, { "epoch": 11.39, "learning_rate": 2.153541226215645e-05, "loss": 0.0658, "step": 10778 }, { "epoch": 11.4, "learning_rate": 2.1530126849894293e-05, "loss": 0.1054, "step": 10780 }, { "epoch": 11.4, "learning_rate": 2.152484143763214e-05, "loss": 0.1244, "step": 10782 }, { "epoch": 11.4, "learning_rate": 2.151955602536998e-05, "loss": 0.026, "step": 10784 }, { "epoch": 11.4, "learning_rate": 2.1514270613107824e-05, "loss": 0.0251, "step": 10786 }, { "epoch": 11.4, "learning_rate": 2.1508985200845667e-05, "loss": 0.0284, "step": 10788 }, { "epoch": 11.41, "learning_rate": 2.1503699788583513e-05, "loss": 0.0288, "step": 10790 }, { "epoch": 11.41, "learning_rate": 2.1498414376321355e-05, "loss": 0.0914, "step": 10792 }, { "epoch": 11.41, "learning_rate": 2.1493128964059198e-05, "loss": 0.0433, "step": 10794 }, { "epoch": 11.41, "learning_rate": 2.148784355179704e-05, "loss": 0.0501, "step": 10796 }, { "epoch": 11.41, "learning_rate": 2.1482558139534883e-05, "loss": 0.0046, "step": 10798 }, { "epoch": 11.42, "learning_rate": 2.147727272727273e-05, "loss": 0.0498, "step": 10800 }, { "epoch": 11.42, "learning_rate": 2.1471987315010572e-05, "loss": 0.0928, "step": 10802 }, { "epoch": 11.42, "learning_rate": 2.1466701902748415e-05, "loss": 0.0529, "step": 10804 }, { "epoch": 11.42, "learning_rate": 2.1461416490486257e-05, "loss": 0.0392, "step": 10806 }, { "epoch": 11.42, "learning_rate": 2.1456131078224103e-05, "loss": 0.0596, "step": 10808 }, { "epoch": 11.43, "learning_rate": 2.1450845665961946e-05, "loss": 0.0378, "step": 10810 }, { "epoch": 11.43, "learning_rate": 2.144556025369979e-05, "loss": 0.0409, "step": 10812 }, { "epoch": 11.43, "learning_rate": 2.144027484143763e-05, "loss": 0.0267, "step": 10814 }, { "epoch": 11.43, "learning_rate": 2.1434989429175474e-05, "loss": 0.0452, "step": 10816 }, { "epoch": 11.44, "learning_rate": 2.142970401691332e-05, "loss": 0.0531, "step": 10818 }, { "epoch": 11.44, "learning_rate": 2.1424418604651163e-05, "loss": 0.0386, "step": 10820 }, { "epoch": 11.44, "learning_rate": 2.1419133192389006e-05, "loss": 0.063, "step": 10822 }, { "epoch": 11.44, "learning_rate": 2.141384778012685e-05, "loss": 0.0325, "step": 10824 }, { "epoch": 11.44, "learning_rate": 2.1408562367864694e-05, "loss": 0.0868, "step": 10826 }, { "epoch": 11.45, "learning_rate": 2.1403276955602537e-05, "loss": 0.0804, "step": 10828 }, { "epoch": 11.45, "learning_rate": 2.1397991543340383e-05, "loss": 0.0758, "step": 10830 }, { "epoch": 11.45, "learning_rate": 2.1392706131078226e-05, "loss": 0.1376, "step": 10832 }, { "epoch": 11.45, "learning_rate": 2.138742071881607e-05, "loss": 0.0455, "step": 10834 }, { "epoch": 11.45, "learning_rate": 2.1382135306553914e-05, "loss": 0.0557, "step": 10836 }, { "epoch": 11.46, "learning_rate": 2.1376849894291757e-05, "loss": 0.004, "step": 10838 }, { "epoch": 11.46, "learning_rate": 2.13715644820296e-05, "loss": 0.0539, "step": 10840 }, { "epoch": 11.46, "learning_rate": 2.1366279069767442e-05, "loss": 0.1415, "step": 10842 }, { "epoch": 11.46, "learning_rate": 2.136099365750529e-05, "loss": 0.0817, "step": 10844 }, { "epoch": 11.47, "learning_rate": 2.135570824524313e-05, "loss": 0.0372, "step": 10846 }, { "epoch": 11.47, "learning_rate": 2.1350422832980974e-05, "loss": 0.0595, "step": 10848 }, { "epoch": 11.47, "learning_rate": 2.1345137420718817e-05, "loss": 0.04, "step": 10850 }, { "epoch": 11.47, "learning_rate": 2.133985200845666e-05, "loss": 0.0356, "step": 10852 }, { "epoch": 11.47, "learning_rate": 2.1334566596194505e-05, "loss": 0.0702, "step": 10854 }, { "epoch": 11.48, "learning_rate": 2.1329281183932348e-05, "loss": 0.0667, "step": 10856 }, { "epoch": 11.48, "learning_rate": 2.132399577167019e-05, "loss": 0.0514, "step": 10858 }, { "epoch": 11.48, "learning_rate": 2.1318710359408033e-05, "loss": 0.1231, "step": 10860 }, { "epoch": 11.48, "learning_rate": 2.131342494714588e-05, "loss": 0.0405, "step": 10862 }, { "epoch": 11.48, "learning_rate": 2.1308139534883722e-05, "loss": 0.0731, "step": 10864 }, { "epoch": 11.49, "learning_rate": 2.1302854122621565e-05, "loss": 0.0312, "step": 10866 }, { "epoch": 11.49, "learning_rate": 2.1297568710359407e-05, "loss": 0.0644, "step": 10868 }, { "epoch": 11.49, "learning_rate": 2.129228329809725e-05, "loss": 0.0827, "step": 10870 }, { "epoch": 11.49, "learning_rate": 2.1286997885835096e-05, "loss": 0.0253, "step": 10872 }, { "epoch": 11.49, "learning_rate": 2.128171247357294e-05, "loss": 0.0523, "step": 10874 }, { "epoch": 11.5, "learning_rate": 2.127642706131078e-05, "loss": 0.0856, "step": 10876 }, { "epoch": 11.5, "learning_rate": 2.1271141649048627e-05, "loss": 0.065, "step": 10878 }, { "epoch": 11.5, "learning_rate": 2.126585623678647e-05, "loss": 0.0138, "step": 10880 }, { "epoch": 11.5, "learning_rate": 2.1260570824524316e-05, "loss": 0.0285, "step": 10882 }, { "epoch": 11.51, "learning_rate": 2.125528541226216e-05, "loss": 0.0584, "step": 10884 }, { "epoch": 11.51, "learning_rate": 2.125e-05, "loss": 0.0262, "step": 10886 }, { "epoch": 11.51, "learning_rate": 2.1244714587737844e-05, "loss": 0.0238, "step": 10888 }, { "epoch": 11.51, "learning_rate": 2.123942917547569e-05, "loss": 0.0802, "step": 10890 }, { "epoch": 11.51, "learning_rate": 2.1234143763213533e-05, "loss": 0.0672, "step": 10892 }, { "epoch": 11.52, "learning_rate": 2.1228858350951376e-05, "loss": 0.0377, "step": 10894 }, { "epoch": 11.52, "learning_rate": 2.1223572938689218e-05, "loss": 0.028, "step": 10896 }, { "epoch": 11.52, "learning_rate": 2.1218287526427064e-05, "loss": 0.0638, "step": 10898 }, { "epoch": 11.52, "learning_rate": 2.1213002114164907e-05, "loss": 0.0875, "step": 10900 }, { "epoch": 11.52, "learning_rate": 2.120771670190275e-05, "loss": 0.0416, "step": 10902 }, { "epoch": 11.53, "learning_rate": 2.1202431289640592e-05, "loss": 0.006, "step": 10904 }, { "epoch": 11.53, "learning_rate": 2.1197145877378435e-05, "loss": 0.0266, "step": 10906 }, { "epoch": 11.53, "learning_rate": 2.119186046511628e-05, "loss": 0.0754, "step": 10908 }, { "epoch": 11.53, "learning_rate": 2.1186575052854124e-05, "loss": 0.0288, "step": 10910 }, { "epoch": 11.53, "learning_rate": 2.1181289640591966e-05, "loss": 0.0511, "step": 10912 }, { "epoch": 11.54, "learning_rate": 2.117600422832981e-05, "loss": 0.0647, "step": 10914 }, { "epoch": 11.54, "learning_rate": 2.1170718816067655e-05, "loss": 0.0274, "step": 10916 }, { "epoch": 11.54, "learning_rate": 2.1165433403805498e-05, "loss": 0.0344, "step": 10918 }, { "epoch": 11.54, "learning_rate": 2.116014799154334e-05, "loss": 0.0904, "step": 10920 }, { "epoch": 11.55, "learning_rate": 2.1154862579281183e-05, "loss": 0.0463, "step": 10922 }, { "epoch": 11.55, "learning_rate": 2.1149577167019026e-05, "loss": 0.139, "step": 10924 }, { "epoch": 11.55, "learning_rate": 2.1144291754756872e-05, "loss": 0.0338, "step": 10926 }, { "epoch": 11.55, "learning_rate": 2.1139006342494715e-05, "loss": 0.0309, "step": 10928 }, { "epoch": 11.55, "learning_rate": 2.113372093023256e-05, "loss": 0.0738, "step": 10930 }, { "epoch": 11.56, "learning_rate": 2.1128435517970403e-05, "loss": 0.0942, "step": 10932 }, { "epoch": 11.56, "learning_rate": 2.1123150105708246e-05, "loss": 0.0721, "step": 10934 }, { "epoch": 11.56, "learning_rate": 2.1117864693446092e-05, "loss": 0.1223, "step": 10936 }, { "epoch": 11.56, "learning_rate": 2.1112579281183935e-05, "loss": 0.0341, "step": 10938 }, { "epoch": 11.56, "learning_rate": 2.1107293868921777e-05, "loss": 0.0784, "step": 10940 }, { "epoch": 11.57, "learning_rate": 2.110200845665962e-05, "loss": 0.0329, "step": 10942 }, { "epoch": 11.57, "learning_rate": 2.1096723044397466e-05, "loss": 0.0208, "step": 10944 }, { "epoch": 11.57, "learning_rate": 2.109143763213531e-05, "loss": 0.0544, "step": 10946 }, { "epoch": 11.57, "learning_rate": 2.108615221987315e-05, "loss": 0.0241, "step": 10948 }, { "epoch": 11.58, "learning_rate": 2.1080866807610994e-05, "loss": 0.0443, "step": 10950 }, { "epoch": 11.58, "learning_rate": 2.107558139534884e-05, "loss": 0.0468, "step": 10952 }, { "epoch": 11.58, "learning_rate": 2.1070295983086683e-05, "loss": 0.0101, "step": 10954 }, { "epoch": 11.58, "learning_rate": 2.1065010570824526e-05, "loss": 0.132, "step": 10956 }, { "epoch": 11.58, "learning_rate": 2.1059725158562368e-05, "loss": 0.0627, "step": 10958 }, { "epoch": 11.59, "learning_rate": 2.105443974630021e-05, "loss": 0.033, "step": 10960 }, { "epoch": 11.59, "learning_rate": 2.1049154334038057e-05, "loss": 0.0594, "step": 10962 }, { "epoch": 11.59, "learning_rate": 2.10438689217759e-05, "loss": 0.0433, "step": 10964 }, { "epoch": 11.59, "learning_rate": 2.1038583509513742e-05, "loss": 0.0288, "step": 10966 }, { "epoch": 11.59, "learning_rate": 2.1033298097251585e-05, "loss": 0.0383, "step": 10968 }, { "epoch": 11.6, "learning_rate": 2.102801268498943e-05, "loss": 0.118, "step": 10970 }, { "epoch": 11.6, "learning_rate": 2.1022727272727274e-05, "loss": 0.0767, "step": 10972 }, { "epoch": 11.6, "learning_rate": 2.1017441860465116e-05, "loss": 0.1311, "step": 10974 }, { "epoch": 11.6, "learning_rate": 2.101215644820296e-05, "loss": 0.0383, "step": 10976 }, { "epoch": 11.6, "learning_rate": 2.10068710359408e-05, "loss": 0.0315, "step": 10978 }, { "epoch": 11.61, "learning_rate": 2.1001585623678648e-05, "loss": 0.1211, "step": 10980 }, { "epoch": 11.61, "learning_rate": 2.099630021141649e-05, "loss": 0.1321, "step": 10982 }, { "epoch": 11.61, "learning_rate": 2.0991014799154336e-05, "loss": 0.0749, "step": 10984 }, { "epoch": 11.61, "learning_rate": 2.098572938689218e-05, "loss": 0.0581, "step": 10986 }, { "epoch": 11.62, "learning_rate": 2.0980443974630022e-05, "loss": 0.0501, "step": 10988 }, { "epoch": 11.62, "learning_rate": 2.0975158562367868e-05, "loss": 0.1586, "step": 10990 }, { "epoch": 11.62, "learning_rate": 2.096987315010571e-05, "loss": 0.1585, "step": 10992 }, { "epoch": 11.62, "learning_rate": 2.0964587737843553e-05, "loss": 0.016, "step": 10994 }, { "epoch": 11.62, "learning_rate": 2.0959302325581396e-05, "loss": 0.0378, "step": 10996 }, { "epoch": 11.63, "learning_rate": 2.0954016913319242e-05, "loss": 0.0257, "step": 10998 }, { "epoch": 11.63, "learning_rate": 2.0948731501057085e-05, "loss": 0.0754, "step": 11000 }, { "epoch": 11.63, "eval_cer": 0.027586206896551724, "eval_loss": 0.871033787727356, "eval_runtime": 128.8039, "eval_samples_per_second": 6.529, "eval_steps_per_second": 0.823, "step": 11000 }, { "epoch": 11.63, "learning_rate": 2.0943446088794927e-05, "loss": 0.0617, "step": 11002 }, { "epoch": 11.63, "learning_rate": 2.093816067653277e-05, "loss": 0.0261, "step": 11004 }, { "epoch": 11.63, "learning_rate": 2.0932875264270613e-05, "loss": 0.0115, "step": 11006 }, { "epoch": 11.64, "learning_rate": 2.092758985200846e-05, "loss": 0.0594, "step": 11008 }, { "epoch": 11.64, "learning_rate": 2.09223044397463e-05, "loss": 0.0841, "step": 11010 }, { "epoch": 11.64, "learning_rate": 2.0917019027484144e-05, "loss": 0.0745, "step": 11012 }, { "epoch": 11.64, "learning_rate": 2.0911733615221987e-05, "loss": 0.0111, "step": 11014 }, { "epoch": 11.64, "learning_rate": 2.0906448202959833e-05, "loss": 0.0268, "step": 11016 }, { "epoch": 11.65, "learning_rate": 2.0901162790697675e-05, "loss": 0.0558, "step": 11018 }, { "epoch": 11.65, "learning_rate": 2.0895877378435518e-05, "loss": 0.0958, "step": 11020 }, { "epoch": 11.65, "learning_rate": 2.089059196617336e-05, "loss": 0.0142, "step": 11022 }, { "epoch": 11.65, "learning_rate": 2.0885306553911203e-05, "loss": 0.013, "step": 11024 }, { "epoch": 11.66, "learning_rate": 2.088002114164905e-05, "loss": 0.0168, "step": 11026 }, { "epoch": 11.66, "learning_rate": 2.0874735729386892e-05, "loss": 0.0582, "step": 11028 }, { "epoch": 11.66, "learning_rate": 2.0869450317124735e-05, "loss": 0.1415, "step": 11030 }, { "epoch": 11.66, "learning_rate": 2.086416490486258e-05, "loss": 0.0226, "step": 11032 }, { "epoch": 11.66, "learning_rate": 2.0858879492600424e-05, "loss": 0.0594, "step": 11034 }, { "epoch": 11.67, "learning_rate": 2.0853594080338266e-05, "loss": 0.0904, "step": 11036 }, { "epoch": 11.67, "learning_rate": 2.0848308668076112e-05, "loss": 0.0244, "step": 11038 }, { "epoch": 11.67, "learning_rate": 2.0843023255813955e-05, "loss": 0.0395, "step": 11040 }, { "epoch": 11.67, "learning_rate": 2.0837737843551798e-05, "loss": 0.0772, "step": 11042 }, { "epoch": 11.67, "learning_rate": 2.0832452431289644e-05, "loss": 0.0739, "step": 11044 }, { "epoch": 11.68, "learning_rate": 2.0827167019027486e-05, "loss": 0.0474, "step": 11046 }, { "epoch": 11.68, "learning_rate": 2.082188160676533e-05, "loss": 0.0723, "step": 11048 }, { "epoch": 11.68, "learning_rate": 2.0816596194503172e-05, "loss": 0.0151, "step": 11050 }, { "epoch": 11.68, "learning_rate": 2.0811310782241018e-05, "loss": 0.024, "step": 11052 }, { "epoch": 11.68, "learning_rate": 2.080602536997886e-05, "loss": 0.0285, "step": 11054 }, { "epoch": 11.69, "learning_rate": 2.0800739957716703e-05, "loss": 0.146, "step": 11056 }, { "epoch": 11.69, "learning_rate": 2.0795454545454546e-05, "loss": 0.056, "step": 11058 }, { "epoch": 11.69, "learning_rate": 2.079016913319239e-05, "loss": 0.0106, "step": 11060 }, { "epoch": 11.69, "learning_rate": 2.0784883720930235e-05, "loss": 0.0663, "step": 11062 }, { "epoch": 11.7, "learning_rate": 2.0779598308668077e-05, "loss": 0.1001, "step": 11064 }, { "epoch": 11.7, "learning_rate": 2.077431289640592e-05, "loss": 0.0979, "step": 11066 }, { "epoch": 11.7, "learning_rate": 2.0769027484143763e-05, "loss": 0.0581, "step": 11068 }, { "epoch": 11.7, "learning_rate": 2.076374207188161e-05, "loss": 0.0471, "step": 11070 }, { "epoch": 11.7, "learning_rate": 2.075845665961945e-05, "loss": 0.0102, "step": 11072 }, { "epoch": 11.71, "learning_rate": 2.0753171247357294e-05, "loss": 0.0279, "step": 11074 }, { "epoch": 11.71, "learning_rate": 2.0747885835095137e-05, "loss": 0.0497, "step": 11076 }, { "epoch": 11.71, "learning_rate": 2.074260042283298e-05, "loss": 0.0217, "step": 11078 }, { "epoch": 11.71, "learning_rate": 2.0737315010570825e-05, "loss": 0.0711, "step": 11080 }, { "epoch": 11.71, "learning_rate": 2.0732029598308668e-05, "loss": 0.0176, "step": 11082 }, { "epoch": 11.72, "learning_rate": 2.072674418604651e-05, "loss": 0.116, "step": 11084 }, { "epoch": 11.72, "learning_rate": 2.0721458773784357e-05, "loss": 0.0255, "step": 11086 }, { "epoch": 11.72, "learning_rate": 2.07161733615222e-05, "loss": 0.0766, "step": 11088 }, { "epoch": 11.72, "learning_rate": 2.0710887949260045e-05, "loss": 0.0453, "step": 11090 }, { "epoch": 11.73, "learning_rate": 2.0705602536997888e-05, "loss": 0.0803, "step": 11092 }, { "epoch": 11.73, "learning_rate": 2.070031712473573e-05, "loss": 0.0871, "step": 11094 }, { "epoch": 11.73, "learning_rate": 2.0695031712473573e-05, "loss": 0.0988, "step": 11096 }, { "epoch": 11.73, "learning_rate": 2.068974630021142e-05, "loss": 0.0257, "step": 11098 }, { "epoch": 11.73, "learning_rate": 2.0684460887949262e-05, "loss": 0.1108, "step": 11100 }, { "epoch": 11.74, "learning_rate": 2.0679175475687105e-05, "loss": 0.0522, "step": 11102 }, { "epoch": 11.74, "learning_rate": 2.0673890063424948e-05, "loss": 0.0438, "step": 11104 }, { "epoch": 11.74, "learning_rate": 2.0668604651162794e-05, "loss": 0.0334, "step": 11106 }, { "epoch": 11.74, "learning_rate": 2.0663319238900636e-05, "loss": 0.0322, "step": 11108 }, { "epoch": 11.74, "learning_rate": 2.065803382663848e-05, "loss": 0.0284, "step": 11110 }, { "epoch": 11.75, "learning_rate": 2.065274841437632e-05, "loss": 0.0106, "step": 11112 }, { "epoch": 11.75, "learning_rate": 2.0647463002114164e-05, "loss": 0.0164, "step": 11114 }, { "epoch": 11.75, "learning_rate": 2.064217758985201e-05, "loss": 0.0614, "step": 11116 }, { "epoch": 11.75, "learning_rate": 2.0636892177589853e-05, "loss": 0.0688, "step": 11118 }, { "epoch": 11.75, "learning_rate": 2.0631606765327696e-05, "loss": 0.0633, "step": 11120 }, { "epoch": 11.76, "learning_rate": 2.062632135306554e-05, "loss": 0.1126, "step": 11122 }, { "epoch": 11.76, "learning_rate": 2.0621035940803384e-05, "loss": 0.004, "step": 11124 }, { "epoch": 11.76, "learning_rate": 2.0615750528541227e-05, "loss": 0.0379, "step": 11126 }, { "epoch": 11.76, "learning_rate": 2.061046511627907e-05, "loss": 0.085, "step": 11128 }, { "epoch": 11.77, "learning_rate": 2.0605179704016912e-05, "loss": 0.0781, "step": 11130 }, { "epoch": 11.77, "learning_rate": 2.0599894291754755e-05, "loss": 0.0755, "step": 11132 }, { "epoch": 11.77, "learning_rate": 2.05946088794926e-05, "loss": 0.0244, "step": 11134 }, { "epoch": 11.77, "learning_rate": 2.0589323467230444e-05, "loss": 0.0323, "step": 11136 }, { "epoch": 11.77, "learning_rate": 2.0584038054968287e-05, "loss": 0.0872, "step": 11138 }, { "epoch": 11.78, "learning_rate": 2.0578752642706133e-05, "loss": 0.0902, "step": 11140 }, { "epoch": 11.78, "learning_rate": 2.0573467230443975e-05, "loss": 0.0517, "step": 11142 }, { "epoch": 11.78, "learning_rate": 2.056818181818182e-05, "loss": 0.1477, "step": 11144 }, { "epoch": 11.78, "learning_rate": 2.0562896405919664e-05, "loss": 0.0487, "step": 11146 }, { "epoch": 11.78, "learning_rate": 2.0557610993657507e-05, "loss": 0.0588, "step": 11148 }, { "epoch": 11.79, "learning_rate": 2.055232558139535e-05, "loss": 0.0118, "step": 11150 }, { "epoch": 11.79, "learning_rate": 2.0547040169133195e-05, "loss": 0.0789, "step": 11152 }, { "epoch": 11.79, "learning_rate": 2.0541754756871038e-05, "loss": 0.0406, "step": 11154 }, { "epoch": 11.79, "learning_rate": 2.053646934460888e-05, "loss": 0.0636, "step": 11156 }, { "epoch": 11.79, "learning_rate": 2.0531183932346723e-05, "loss": 0.0327, "step": 11158 }, { "epoch": 11.8, "learning_rate": 2.052589852008457e-05, "loss": 0.0546, "step": 11160 }, { "epoch": 11.8, "learning_rate": 2.0520613107822412e-05, "loss": 0.0355, "step": 11162 }, { "epoch": 11.8, "learning_rate": 2.0515327695560255e-05, "loss": 0.053, "step": 11164 }, { "epoch": 11.8, "learning_rate": 2.0510042283298097e-05, "loss": 0.0439, "step": 11166 }, { "epoch": 11.81, "learning_rate": 2.050475687103594e-05, "loss": 0.0155, "step": 11168 }, { "epoch": 11.81, "learning_rate": 2.0499471458773786e-05, "loss": 0.0532, "step": 11170 }, { "epoch": 11.81, "learning_rate": 2.049418604651163e-05, "loss": 0.0108, "step": 11172 }, { "epoch": 11.81, "learning_rate": 2.048890063424947e-05, "loss": 0.0571, "step": 11174 }, { "epoch": 11.81, "learning_rate": 2.0483615221987314e-05, "loss": 0.0961, "step": 11176 }, { "epoch": 11.82, "learning_rate": 2.047832980972516e-05, "loss": 0.0407, "step": 11178 }, { "epoch": 11.82, "learning_rate": 2.0473044397463003e-05, "loss": 0.0304, "step": 11180 }, { "epoch": 11.82, "learning_rate": 2.0467758985200846e-05, "loss": 0.0345, "step": 11182 }, { "epoch": 11.82, "learning_rate": 2.0462473572938688e-05, "loss": 0.019, "step": 11184 }, { "epoch": 11.82, "learning_rate": 2.045718816067653e-05, "loss": 0.071, "step": 11186 }, { "epoch": 11.83, "learning_rate": 2.0451902748414377e-05, "loss": 0.2046, "step": 11188 }, { "epoch": 11.83, "learning_rate": 2.044661733615222e-05, "loss": 0.0732, "step": 11190 }, { "epoch": 11.83, "learning_rate": 2.0441331923890066e-05, "loss": 0.0416, "step": 11192 }, { "epoch": 11.83, "learning_rate": 2.043604651162791e-05, "loss": 0.031, "step": 11194 }, { "epoch": 11.84, "learning_rate": 2.043076109936575e-05, "loss": 0.0405, "step": 11196 }, { "epoch": 11.84, "learning_rate": 2.0425475687103597e-05, "loss": 0.0323, "step": 11198 }, { "epoch": 11.84, "learning_rate": 2.042019027484144e-05, "loss": 0.063, "step": 11200 }, { "epoch": 11.84, "learning_rate": 2.0414904862579282e-05, "loss": 0.1026, "step": 11202 }, { "epoch": 11.84, "learning_rate": 2.0409619450317125e-05, "loss": 0.0508, "step": 11204 }, { "epoch": 11.85, "learning_rate": 2.040433403805497e-05, "loss": 0.0286, "step": 11206 }, { "epoch": 11.85, "learning_rate": 2.0399048625792814e-05, "loss": 0.0337, "step": 11208 }, { "epoch": 11.85, "learning_rate": 2.0393763213530657e-05, "loss": 0.061, "step": 11210 }, { "epoch": 11.85, "learning_rate": 2.03884778012685e-05, "loss": 0.0663, "step": 11212 }, { "epoch": 11.85, "learning_rate": 2.0383192389006345e-05, "loss": 0.091, "step": 11214 }, { "epoch": 11.86, "learning_rate": 2.0377906976744188e-05, "loss": 0.0043, "step": 11216 }, { "epoch": 11.86, "learning_rate": 2.037262156448203e-05, "loss": 0.0518, "step": 11218 }, { "epoch": 11.86, "learning_rate": 2.0367336152219873e-05, "loss": 0.0485, "step": 11220 }, { "epoch": 11.86, "learning_rate": 2.0362050739957716e-05, "loss": 0.0537, "step": 11222 }, { "epoch": 11.86, "learning_rate": 2.0356765327695562e-05, "loss": 0.0262, "step": 11224 }, { "epoch": 11.87, "learning_rate": 2.0351479915433405e-05, "loss": 0.1172, "step": 11226 }, { "epoch": 11.87, "learning_rate": 2.0346194503171247e-05, "loss": 0.0423, "step": 11228 }, { "epoch": 11.87, "learning_rate": 2.034355179704017e-05, "loss": 0.0751, "step": 11230 }, { "epoch": 11.87, "learning_rate": 2.0338266384778013e-05, "loss": 0.0498, "step": 11232 }, { "epoch": 11.88, "learning_rate": 2.033298097251586e-05, "loss": 0.0332, "step": 11234 }, { "epoch": 11.88, "learning_rate": 2.0327695560253702e-05, "loss": 0.0439, "step": 11236 }, { "epoch": 11.88, "learning_rate": 2.0322410147991544e-05, "loss": 0.0399, "step": 11238 }, { "epoch": 11.88, "learning_rate": 2.0317124735729387e-05, "loss": 0.0337, "step": 11240 }, { "epoch": 11.88, "learning_rate": 2.0311839323467233e-05, "loss": 0.0582, "step": 11242 }, { "epoch": 11.89, "learning_rate": 2.0306553911205076e-05, "loss": 0.1019, "step": 11244 }, { "epoch": 11.89, "learning_rate": 2.030126849894292e-05, "loss": 0.0754, "step": 11246 }, { "epoch": 11.89, "learning_rate": 2.029598308668076e-05, "loss": 0.0471, "step": 11248 }, { "epoch": 11.89, "learning_rate": 2.0290697674418604e-05, "loss": 0.0311, "step": 11250 }, { "epoch": 11.89, "learning_rate": 2.028541226215645e-05, "loss": 0.0573, "step": 11252 }, { "epoch": 11.9, "learning_rate": 2.0280126849894293e-05, "loss": 0.0474, "step": 11254 }, { "epoch": 11.9, "learning_rate": 2.0274841437632135e-05, "loss": 0.0533, "step": 11256 }, { "epoch": 11.9, "learning_rate": 2.0269556025369978e-05, "loss": 0.1002, "step": 11258 }, { "epoch": 11.9, "learning_rate": 2.0264270613107824e-05, "loss": 0.045, "step": 11260 }, { "epoch": 11.9, "learning_rate": 2.0258985200845667e-05, "loss": 0.0355, "step": 11262 }, { "epoch": 11.91, "learning_rate": 2.025369978858351e-05, "loss": 0.0453, "step": 11264 }, { "epoch": 11.91, "learning_rate": 2.0248414376321352e-05, "loss": 0.0356, "step": 11266 }, { "epoch": 11.91, "learning_rate": 2.0243128964059198e-05, "loss": 0.0951, "step": 11268 }, { "epoch": 11.91, "learning_rate": 2.023784355179704e-05, "loss": 0.0593, "step": 11270 }, { "epoch": 11.92, "learning_rate": 2.0232558139534883e-05, "loss": 0.046, "step": 11272 }, { "epoch": 11.92, "learning_rate": 2.022727272727273e-05, "loss": 0.0185, "step": 11274 }, { "epoch": 11.92, "learning_rate": 2.0221987315010572e-05, "loss": 0.041, "step": 11276 }, { "epoch": 11.92, "learning_rate": 2.0216701902748418e-05, "loss": 0.0857, "step": 11278 }, { "epoch": 11.92, "learning_rate": 2.021141649048626e-05, "loss": 0.0111, "step": 11280 }, { "epoch": 11.93, "learning_rate": 2.0206131078224104e-05, "loss": 0.0913, "step": 11282 }, { "epoch": 11.93, "learning_rate": 2.0200845665961946e-05, "loss": 0.0448, "step": 11284 }, { "epoch": 11.93, "learning_rate": 2.019556025369979e-05, "loss": 0.0471, "step": 11286 }, { "epoch": 11.93, "learning_rate": 2.0190274841437635e-05, "loss": 0.0396, "step": 11288 }, { "epoch": 11.93, "learning_rate": 2.0184989429175478e-05, "loss": 0.0769, "step": 11290 }, { "epoch": 11.94, "learning_rate": 2.017970401691332e-05, "loss": 0.053, "step": 11292 }, { "epoch": 11.94, "learning_rate": 2.0174418604651163e-05, "loss": 0.1192, "step": 11294 }, { "epoch": 11.94, "learning_rate": 2.016913319238901e-05, "loss": 0.0102, "step": 11296 }, { "epoch": 11.94, "learning_rate": 2.0163847780126852e-05, "loss": 0.0832, "step": 11298 }, { "epoch": 11.95, "learning_rate": 2.0158562367864694e-05, "loss": 0.0162, "step": 11300 }, { "epoch": 11.95, "learning_rate": 2.0153276955602537e-05, "loss": 0.0205, "step": 11302 }, { "epoch": 11.95, "learning_rate": 2.014799154334038e-05, "loss": 0.016, "step": 11304 }, { "epoch": 11.95, "learning_rate": 2.0142706131078226e-05, "loss": 0.0455, "step": 11306 }, { "epoch": 11.95, "learning_rate": 2.013742071881607e-05, "loss": 0.0433, "step": 11308 }, { "epoch": 11.96, "learning_rate": 2.013213530655391e-05, "loss": 0.0466, "step": 11310 }, { "epoch": 11.96, "learning_rate": 2.0126849894291754e-05, "loss": 0.0151, "step": 11312 }, { "epoch": 11.96, "learning_rate": 2.01215644820296e-05, "loss": 0.0353, "step": 11314 }, { "epoch": 11.96, "learning_rate": 2.0116279069767443e-05, "loss": 0.1275, "step": 11316 }, { "epoch": 11.96, "learning_rate": 2.0110993657505285e-05, "loss": 0.1457, "step": 11318 }, { "epoch": 11.97, "learning_rate": 2.0105708245243128e-05, "loss": 0.0438, "step": 11320 }, { "epoch": 11.97, "learning_rate": 2.0100422832980974e-05, "loss": 0.0343, "step": 11322 }, { "epoch": 11.97, "learning_rate": 2.0095137420718817e-05, "loss": 0.1007, "step": 11324 }, { "epoch": 11.97, "learning_rate": 2.0089852008456663e-05, "loss": 0.0562, "step": 11326 }, { "epoch": 11.97, "learning_rate": 2.0084566596194505e-05, "loss": 0.0126, "step": 11328 }, { "epoch": 11.98, "learning_rate": 2.0079281183932348e-05, "loss": 0.0698, "step": 11330 }, { "epoch": 11.98, "learning_rate": 2.007399577167019e-05, "loss": 0.0589, "step": 11332 }, { "epoch": 11.98, "learning_rate": 2.0068710359408037e-05, "loss": 0.1048, "step": 11334 }, { "epoch": 11.98, "learning_rate": 2.006342494714588e-05, "loss": 0.0315, "step": 11336 }, { "epoch": 11.99, "learning_rate": 2.0058139534883722e-05, "loss": 0.0187, "step": 11338 }, { "epoch": 11.99, "learning_rate": 2.0052854122621565e-05, "loss": 0.0887, "step": 11340 }, { "epoch": 11.99, "learning_rate": 2.004756871035941e-05, "loss": 0.069, "step": 11342 }, { "epoch": 11.99, "learning_rate": 2.0042283298097253e-05, "loss": 0.0561, "step": 11344 }, { "epoch": 11.99, "learning_rate": 2.0036997885835096e-05, "loss": 0.0311, "step": 11346 }, { "epoch": 12.0, "learning_rate": 2.003171247357294e-05, "loss": 0.0373, "step": 11348 }, { "epoch": 12.0, "learning_rate": 2.002642706131078e-05, "loss": 0.0202, "step": 11350 }, { "epoch": 12.0, "learning_rate": 2.0021141649048628e-05, "loss": 0.0274, "step": 11352 }, { "epoch": 12.0, "learning_rate": 2.001585623678647e-05, "loss": 0.0598, "step": 11354 }, { "epoch": 12.0, "learning_rate": 2.0010570824524313e-05, "loss": 0.0263, "step": 11356 }, { "epoch": 12.01, "learning_rate": 2.0005285412262156e-05, "loss": 0.0716, "step": 11358 }, { "epoch": 12.01, "learning_rate": 2e-05, "loss": 0.0086, "step": 11360 }, { "epoch": 12.01, "learning_rate": 1.9994714587737844e-05, "loss": 0.0221, "step": 11362 }, { "epoch": 12.01, "learning_rate": 1.9989429175475687e-05, "loss": 0.037, "step": 11364 }, { "epoch": 12.01, "learning_rate": 1.998414376321353e-05, "loss": 0.0976, "step": 11366 }, { "epoch": 12.02, "learning_rate": 1.9978858350951372e-05, "loss": 0.0408, "step": 11368 }, { "epoch": 12.02, "learning_rate": 1.997357293868922e-05, "loss": 0.025, "step": 11370 }, { "epoch": 12.02, "learning_rate": 1.996828752642706e-05, "loss": 0.0182, "step": 11372 }, { "epoch": 12.02, "learning_rate": 1.9963002114164904e-05, "loss": 0.0515, "step": 11374 }, { "epoch": 12.03, "learning_rate": 1.995771670190275e-05, "loss": 0.0557, "step": 11376 }, { "epoch": 12.03, "learning_rate": 1.9952431289640592e-05, "loss": 0.0922, "step": 11378 }, { "epoch": 12.03, "learning_rate": 1.994714587737844e-05, "loss": 0.0551, "step": 11380 }, { "epoch": 12.03, "learning_rate": 1.994186046511628e-05, "loss": 0.1137, "step": 11382 }, { "epoch": 12.03, "learning_rate": 1.9936575052854124e-05, "loss": 0.0116, "step": 11384 }, { "epoch": 12.04, "learning_rate": 1.9931289640591967e-05, "loss": 0.0234, "step": 11386 }, { "epoch": 12.04, "learning_rate": 1.9926004228329813e-05, "loss": 0.0043, "step": 11388 }, { "epoch": 12.04, "learning_rate": 1.9920718816067655e-05, "loss": 0.0436, "step": 11390 }, { "epoch": 12.04, "learning_rate": 1.9915433403805498e-05, "loss": 0.0186, "step": 11392 }, { "epoch": 12.04, "learning_rate": 1.991014799154334e-05, "loss": 0.0104, "step": 11394 }, { "epoch": 12.05, "learning_rate": 1.9904862579281187e-05, "loss": 0.0784, "step": 11396 }, { "epoch": 12.05, "learning_rate": 1.989957716701903e-05, "loss": 0.0521, "step": 11398 }, { "epoch": 12.05, "learning_rate": 1.9894291754756872e-05, "loss": 0.0724, "step": 11400 }, { "epoch": 12.05, "learning_rate": 1.9889006342494715e-05, "loss": 0.0619, "step": 11402 }, { "epoch": 12.05, "learning_rate": 1.9883720930232557e-05, "loss": 0.0655, "step": 11404 }, { "epoch": 12.06, "learning_rate": 1.9878435517970403e-05, "loss": 0.0641, "step": 11406 }, { "epoch": 12.06, "learning_rate": 1.9873150105708246e-05, "loss": 0.0981, "step": 11408 }, { "epoch": 12.06, "learning_rate": 1.986786469344609e-05, "loss": 0.0054, "step": 11410 }, { "epoch": 12.06, "learning_rate": 1.986257928118393e-05, "loss": 0.0116, "step": 11412 }, { "epoch": 12.07, "learning_rate": 1.9857293868921777e-05, "loss": 0.0746, "step": 11414 }, { "epoch": 12.07, "learning_rate": 1.985200845665962e-05, "loss": 0.0258, "step": 11416 }, { "epoch": 12.07, "learning_rate": 1.9846723044397463e-05, "loss": 0.0716, "step": 11418 }, { "epoch": 12.07, "learning_rate": 1.9841437632135305e-05, "loss": 0.0096, "step": 11420 }, { "epoch": 12.07, "learning_rate": 1.9836152219873148e-05, "loss": 0.0508, "step": 11422 }, { "epoch": 12.08, "learning_rate": 1.9830866807610994e-05, "loss": 0.0379, "step": 11424 }, { "epoch": 12.08, "learning_rate": 1.9825581395348837e-05, "loss": 0.0356, "step": 11426 }, { "epoch": 12.08, "learning_rate": 1.9820295983086683e-05, "loss": 0.0137, "step": 11428 }, { "epoch": 12.08, "learning_rate": 1.9815010570824526e-05, "loss": 0.0478, "step": 11430 }, { "epoch": 12.08, "learning_rate": 1.980972515856237e-05, "loss": 0.0576, "step": 11432 }, { "epoch": 12.09, "learning_rate": 1.9804439746300214e-05, "loss": 0.0398, "step": 11434 }, { "epoch": 12.09, "learning_rate": 1.9799154334038057e-05, "loss": 0.0909, "step": 11436 }, { "epoch": 12.09, "learning_rate": 1.97938689217759e-05, "loss": 0.0252, "step": 11438 }, { "epoch": 12.09, "learning_rate": 1.9788583509513742e-05, "loss": 0.0412, "step": 11440 }, { "epoch": 12.1, "learning_rate": 1.978329809725159e-05, "loss": 0.0248, "step": 11442 }, { "epoch": 12.1, "learning_rate": 1.977801268498943e-05, "loss": 0.0725, "step": 11444 }, { "epoch": 12.1, "learning_rate": 1.9772727272727274e-05, "loss": 0.03, "step": 11446 }, { "epoch": 12.1, "learning_rate": 1.9767441860465116e-05, "loss": 0.0491, "step": 11448 }, { "epoch": 12.1, "learning_rate": 1.9762156448202962e-05, "loss": 0.0061, "step": 11450 }, { "epoch": 12.11, "learning_rate": 1.9756871035940805e-05, "loss": 0.0138, "step": 11452 }, { "epoch": 12.11, "learning_rate": 1.9751585623678648e-05, "loss": 0.0164, "step": 11454 }, { "epoch": 12.11, "learning_rate": 1.974630021141649e-05, "loss": 0.0317, "step": 11456 }, { "epoch": 12.11, "learning_rate": 1.9741014799154333e-05, "loss": 0.0404, "step": 11458 }, { "epoch": 12.11, "learning_rate": 1.973572938689218e-05, "loss": 0.0162, "step": 11460 }, { "epoch": 12.12, "learning_rate": 1.9730443974630022e-05, "loss": 0.0183, "step": 11462 }, { "epoch": 12.12, "learning_rate": 1.9725158562367865e-05, "loss": 0.0297, "step": 11464 }, { "epoch": 12.12, "learning_rate": 1.9719873150105707e-05, "loss": 0.038, "step": 11466 }, { "epoch": 12.12, "learning_rate": 1.9714587737843553e-05, "loss": 0.0497, "step": 11468 }, { "epoch": 12.12, "learning_rate": 1.9709302325581396e-05, "loss": 0.0859, "step": 11470 }, { "epoch": 12.13, "learning_rate": 1.970401691331924e-05, "loss": 0.0235, "step": 11472 }, { "epoch": 12.13, "learning_rate": 1.969873150105708e-05, "loss": 0.0481, "step": 11474 }, { "epoch": 12.13, "learning_rate": 1.9693446088794927e-05, "loss": 0.0667, "step": 11476 }, { "epoch": 12.13, "learning_rate": 1.968816067653277e-05, "loss": 0.0276, "step": 11478 }, { "epoch": 12.14, "learning_rate": 1.9682875264270613e-05, "loss": 0.0065, "step": 11480 }, { "epoch": 12.14, "learning_rate": 1.967758985200846e-05, "loss": 0.0434, "step": 11482 }, { "epoch": 12.14, "learning_rate": 1.96723044397463e-05, "loss": 0.0375, "step": 11484 }, { "epoch": 12.14, "learning_rate": 1.9667019027484148e-05, "loss": 0.0296, "step": 11486 }, { "epoch": 12.14, "learning_rate": 1.966173361522199e-05, "loss": 0.0439, "step": 11488 }, { "epoch": 12.15, "learning_rate": 1.9656448202959833e-05, "loss": 0.0253, "step": 11490 }, { "epoch": 12.15, "learning_rate": 1.9651162790697676e-05, "loss": 0.0521, "step": 11492 }, { "epoch": 12.15, "learning_rate": 1.9645877378435518e-05, "loss": 0.0333, "step": 11494 }, { "epoch": 12.15, "learning_rate": 1.9640591966173364e-05, "loss": 0.0429, "step": 11496 }, { "epoch": 12.15, "learning_rate": 1.9635306553911207e-05, "loss": 0.1049, "step": 11498 }, { "epoch": 12.16, "learning_rate": 1.963002114164905e-05, "loss": 0.129, "step": 11500 }, { "epoch": 12.16, "eval_cer": 0.059048161869478484, "eval_loss": 0.6092634797096252, "eval_runtime": 125.9114, "eval_samples_per_second": 6.679, "eval_steps_per_second": 0.842, "step": 11500 }, { "epoch": 12.16, "learning_rate": 1.9624735729386892e-05, "loss": 0.0586, "step": 11502 }, { "epoch": 12.16, "learning_rate": 1.961945031712474e-05, "loss": 0.0584, "step": 11504 }, { "epoch": 12.16, "learning_rate": 1.961416490486258e-05, "loss": 0.0592, "step": 11506 }, { "epoch": 12.16, "learning_rate": 1.9608879492600424e-05, "loss": 0.0383, "step": 11508 }, { "epoch": 12.17, "learning_rate": 1.9603594080338266e-05, "loss": 0.0369, "step": 11510 }, { "epoch": 12.17, "learning_rate": 1.959830866807611e-05, "loss": 0.0529, "step": 11512 }, { "epoch": 12.17, "learning_rate": 1.9593023255813955e-05, "loss": 0.0497, "step": 11514 }, { "epoch": 12.17, "learning_rate": 1.9587737843551798e-05, "loss": 0.0366, "step": 11516 }, { "epoch": 12.18, "learning_rate": 1.958245243128964e-05, "loss": 0.044, "step": 11518 }, { "epoch": 12.18, "learning_rate": 1.9577167019027483e-05, "loss": 0.0366, "step": 11520 }, { "epoch": 12.18, "learning_rate": 1.957188160676533e-05, "loss": 0.0423, "step": 11522 }, { "epoch": 12.18, "learning_rate": 1.9566596194503172e-05, "loss": 0.0449, "step": 11524 }, { "epoch": 12.18, "learning_rate": 1.9561310782241014e-05, "loss": 0.0332, "step": 11526 }, { "epoch": 12.19, "learning_rate": 1.9556025369978857e-05, "loss": 0.0076, "step": 11528 }, { "epoch": 12.19, "learning_rate": 1.9550739957716703e-05, "loss": 0.035, "step": 11530 }, { "epoch": 12.19, "learning_rate": 1.9545454545454546e-05, "loss": 0.0676, "step": 11532 }, { "epoch": 12.19, "learning_rate": 1.9540169133192392e-05, "loss": 0.0041, "step": 11534 }, { "epoch": 12.19, "learning_rate": 1.9534883720930235e-05, "loss": 0.0308, "step": 11536 }, { "epoch": 12.2, "learning_rate": 1.9529598308668077e-05, "loss": 0.0438, "step": 11538 }, { "epoch": 12.2, "learning_rate": 1.9524312896405923e-05, "loss": 0.0225, "step": 11540 }, { "epoch": 12.2, "learning_rate": 1.9519027484143766e-05, "loss": 0.0332, "step": 11542 }, { "epoch": 12.2, "learning_rate": 1.951374207188161e-05, "loss": 0.0264, "step": 11544 }, { "epoch": 12.21, "learning_rate": 1.950845665961945e-05, "loss": 0.0327, "step": 11546 }, { "epoch": 12.21, "learning_rate": 1.9503171247357294e-05, "loss": 0.0606, "step": 11548 }, { "epoch": 12.21, "learning_rate": 1.949788583509514e-05, "loss": 0.0591, "step": 11550 }, { "epoch": 12.21, "learning_rate": 1.9492600422832983e-05, "loss": 0.0971, "step": 11552 }, { "epoch": 12.21, "learning_rate": 1.9487315010570825e-05, "loss": 0.0467, "step": 11554 }, { "epoch": 12.22, "learning_rate": 1.9482029598308668e-05, "loss": 0.0173, "step": 11556 }, { "epoch": 12.22, "learning_rate": 1.9476744186046514e-05, "loss": 0.0635, "step": 11558 }, { "epoch": 12.22, "learning_rate": 1.9471458773784357e-05, "loss": 0.0159, "step": 11560 }, { "epoch": 12.22, "learning_rate": 1.94661733615222e-05, "loss": 0.0232, "step": 11562 }, { "epoch": 12.22, "learning_rate": 1.9460887949260042e-05, "loss": 0.022, "step": 11564 }, { "epoch": 12.23, "learning_rate": 1.9455602536997885e-05, "loss": 0.0191, "step": 11566 }, { "epoch": 12.23, "learning_rate": 1.945031712473573e-05, "loss": 0.0209, "step": 11568 }, { "epoch": 12.23, "learning_rate": 1.9445031712473574e-05, "loss": 0.0111, "step": 11570 }, { "epoch": 12.23, "learning_rate": 1.9439746300211416e-05, "loss": 0.0271, "step": 11572 }, { "epoch": 12.23, "learning_rate": 1.943446088794926e-05, "loss": 0.0219, "step": 11574 }, { "epoch": 12.24, "learning_rate": 1.9429175475687105e-05, "loss": 0.0302, "step": 11576 }, { "epoch": 12.24, "learning_rate": 1.9423890063424948e-05, "loss": 0.059, "step": 11578 }, { "epoch": 12.24, "learning_rate": 1.941860465116279e-05, "loss": 0.0582, "step": 11580 }, { "epoch": 12.24, "learning_rate": 1.9413319238900633e-05, "loss": 0.0951, "step": 11582 }, { "epoch": 12.25, "learning_rate": 1.940803382663848e-05, "loss": 0.0089, "step": 11584 }, { "epoch": 12.25, "learning_rate": 1.9402748414376322e-05, "loss": 0.0225, "step": 11586 }, { "epoch": 12.25, "learning_rate": 1.9397463002114168e-05, "loss": 0.1043, "step": 11588 }, { "epoch": 12.25, "learning_rate": 1.939217758985201e-05, "loss": 0.0561, "step": 11590 }, { "epoch": 12.25, "learning_rate": 1.9386892177589853e-05, "loss": 0.0499, "step": 11592 }, { "epoch": 12.26, "learning_rate": 1.93816067653277e-05, "loss": 0.0464, "step": 11594 }, { "epoch": 12.26, "learning_rate": 1.9376321353065542e-05, "loss": 0.0498, "step": 11596 }, { "epoch": 12.26, "learning_rate": 1.9371035940803385e-05, "loss": 0.0296, "step": 11598 }, { "epoch": 12.26, "learning_rate": 1.9365750528541227e-05, "loss": 0.0946, "step": 11600 }, { "epoch": 12.26, "learning_rate": 1.936046511627907e-05, "loss": 0.094, "step": 11602 }, { "epoch": 12.27, "learning_rate": 1.9355179704016916e-05, "loss": 0.1682, "step": 11604 }, { "epoch": 12.27, "learning_rate": 1.934989429175476e-05, "loss": 0.0512, "step": 11606 }, { "epoch": 12.27, "learning_rate": 1.93446088794926e-05, "loss": 0.1028, "step": 11608 }, { "epoch": 12.27, "learning_rate": 1.9339323467230444e-05, "loss": 0.1174, "step": 11610 }, { "epoch": 12.27, "learning_rate": 1.933403805496829e-05, "loss": 0.0504, "step": 11612 }, { "epoch": 12.28, "learning_rate": 1.9328752642706133e-05, "loss": 0.0252, "step": 11614 }, { "epoch": 12.28, "learning_rate": 1.9323467230443975e-05, "loss": 0.0987, "step": 11616 }, { "epoch": 12.28, "learning_rate": 1.9318181818181818e-05, "loss": 0.0166, "step": 11618 }, { "epoch": 12.28, "learning_rate": 1.931289640591966e-05, "loss": 0.1345, "step": 11620 }, { "epoch": 12.29, "learning_rate": 1.9307610993657507e-05, "loss": 0.018, "step": 11622 }, { "epoch": 12.29, "learning_rate": 1.930232558139535e-05, "loss": 0.0405, "step": 11624 }, { "epoch": 12.29, "learning_rate": 1.9297040169133192e-05, "loss": 0.0127, "step": 11626 }, { "epoch": 12.29, "learning_rate": 1.9291754756871035e-05, "loss": 0.0211, "step": 11628 }, { "epoch": 12.29, "learning_rate": 1.928646934460888e-05, "loss": 0.2615, "step": 11630 }, { "epoch": 12.3, "learning_rate": 1.9281183932346724e-05, "loss": 0.0352, "step": 11632 }, { "epoch": 12.3, "learning_rate": 1.9275898520084566e-05, "loss": 0.062, "step": 11634 }, { "epoch": 12.3, "learning_rate": 1.9270613107822412e-05, "loss": 0.0076, "step": 11636 }, { "epoch": 12.3, "learning_rate": 1.9265327695560255e-05, "loss": 0.1104, "step": 11638 }, { "epoch": 12.3, "learning_rate": 1.9260042283298098e-05, "loss": 0.0247, "step": 11640 }, { "epoch": 12.31, "learning_rate": 1.9254756871035944e-05, "loss": 0.0621, "step": 11642 }, { "epoch": 12.31, "learning_rate": 1.9249471458773786e-05, "loss": 0.0341, "step": 11644 }, { "epoch": 12.31, "learning_rate": 1.924418604651163e-05, "loss": 0.0243, "step": 11646 }, { "epoch": 12.31, "learning_rate": 1.9238900634249475e-05, "loss": 0.0353, "step": 11648 }, { "epoch": 12.32, "learning_rate": 1.9233615221987318e-05, "loss": 0.0273, "step": 11650 }, { "epoch": 12.32, "learning_rate": 1.922832980972516e-05, "loss": 0.0557, "step": 11652 }, { "epoch": 12.32, "learning_rate": 1.9223044397463003e-05, "loss": 0.0112, "step": 11654 }, { "epoch": 12.32, "learning_rate": 1.9217758985200846e-05, "loss": 0.0363, "step": 11656 }, { "epoch": 12.32, "learning_rate": 1.9212473572938692e-05, "loss": 0.0698, "step": 11658 }, { "epoch": 12.33, "learning_rate": 1.9207188160676534e-05, "loss": 0.16, "step": 11660 }, { "epoch": 12.33, "learning_rate": 1.9201902748414377e-05, "loss": 0.089, "step": 11662 }, { "epoch": 12.33, "learning_rate": 1.919661733615222e-05, "loss": 0.0624, "step": 11664 }, { "epoch": 12.33, "learning_rate": 1.9191331923890062e-05, "loss": 0.0254, "step": 11666 }, { "epoch": 12.33, "learning_rate": 1.918604651162791e-05, "loss": 0.0436, "step": 11668 }, { "epoch": 12.34, "learning_rate": 1.918076109936575e-05, "loss": 0.0417, "step": 11670 }, { "epoch": 12.34, "learning_rate": 1.9175475687103594e-05, "loss": 0.032, "step": 11672 }, { "epoch": 12.34, "learning_rate": 1.9170190274841437e-05, "loss": 0.0656, "step": 11674 }, { "epoch": 12.34, "learning_rate": 1.9164904862579283e-05, "loss": 0.0597, "step": 11676 }, { "epoch": 12.34, "learning_rate": 1.9159619450317125e-05, "loss": 0.0349, "step": 11678 }, { "epoch": 12.35, "learning_rate": 1.9154334038054968e-05, "loss": 0.0432, "step": 11680 }, { "epoch": 12.35, "learning_rate": 1.914904862579281e-05, "loss": 0.0244, "step": 11682 }, { "epoch": 12.35, "learning_rate": 1.9143763213530653e-05, "loss": 0.0721, "step": 11684 }, { "epoch": 12.35, "learning_rate": 1.91384778012685e-05, "loss": 0.0318, "step": 11686 }, { "epoch": 12.36, "learning_rate": 1.9133192389006342e-05, "loss": 0.0717, "step": 11688 }, { "epoch": 12.36, "learning_rate": 1.9127906976744188e-05, "loss": 0.0474, "step": 11690 }, { "epoch": 12.36, "learning_rate": 1.912262156448203e-05, "loss": 0.0258, "step": 11692 }, { "epoch": 12.36, "learning_rate": 1.9117336152219877e-05, "loss": 0.0452, "step": 11694 }, { "epoch": 12.36, "learning_rate": 1.911205073995772e-05, "loss": 0.0167, "step": 11696 }, { "epoch": 12.37, "learning_rate": 1.9106765327695562e-05, "loss": 0.0303, "step": 11698 }, { "epoch": 12.37, "learning_rate": 1.9101479915433405e-05, "loss": 0.0828, "step": 11700 }, { "epoch": 12.37, "learning_rate": 1.9096194503171247e-05, "loss": 0.015, "step": 11702 }, { "epoch": 12.37, "learning_rate": 1.9090909090909094e-05, "loss": 0.036, "step": 11704 }, { "epoch": 12.37, "learning_rate": 1.9085623678646936e-05, "loss": 0.0045, "step": 11706 }, { "epoch": 12.38, "learning_rate": 1.908033826638478e-05, "loss": 0.0425, "step": 11708 }, { "epoch": 12.38, "learning_rate": 1.907505285412262e-05, "loss": 0.1253, "step": 11710 }, { "epoch": 12.38, "learning_rate": 1.9069767441860468e-05, "loss": 0.051, "step": 11712 }, { "epoch": 12.38, "learning_rate": 1.906448202959831e-05, "loss": 0.0164, "step": 11714 }, { "epoch": 12.38, "learning_rate": 1.9059196617336153e-05, "loss": 0.0591, "step": 11716 }, { "epoch": 12.39, "learning_rate": 1.9053911205073996e-05, "loss": 0.039, "step": 11718 }, { "epoch": 12.39, "learning_rate": 1.904862579281184e-05, "loss": 0.0519, "step": 11720 }, { "epoch": 12.39, "learning_rate": 1.9043340380549684e-05, "loss": 0.0243, "step": 11722 }, { "epoch": 12.39, "learning_rate": 1.9038054968287527e-05, "loss": 0.0222, "step": 11724 }, { "epoch": 12.4, "learning_rate": 1.903276955602537e-05, "loss": 0.0417, "step": 11726 }, { "epoch": 12.4, "learning_rate": 1.9027484143763212e-05, "loss": 0.0052, "step": 11728 }, { "epoch": 12.4, "learning_rate": 1.902219873150106e-05, "loss": 0.1375, "step": 11730 }, { "epoch": 12.4, "learning_rate": 1.90169133192389e-05, "loss": 0.0676, "step": 11732 }, { "epoch": 12.4, "learning_rate": 1.9011627906976744e-05, "loss": 0.0273, "step": 11734 }, { "epoch": 12.41, "learning_rate": 1.9006342494714586e-05, "loss": 0.0784, "step": 11736 }, { "epoch": 12.41, "learning_rate": 1.9001057082452433e-05, "loss": 0.0367, "step": 11738 }, { "epoch": 12.41, "learning_rate": 1.8995771670190275e-05, "loss": 0.0331, "step": 11740 }, { "epoch": 12.41, "learning_rate": 1.8990486257928118e-05, "loss": 0.0026, "step": 11742 }, { "epoch": 12.41, "learning_rate": 1.8985200845665964e-05, "loss": 0.0098, "step": 11744 }, { "epoch": 12.42, "learning_rate": 1.8979915433403807e-05, "loss": 0.0093, "step": 11746 }, { "epoch": 12.42, "learning_rate": 1.8974630021141653e-05, "loss": 0.0568, "step": 11748 }, { "epoch": 12.42, "learning_rate": 1.8969344608879495e-05, "loss": 0.0363, "step": 11750 }, { "epoch": 12.42, "learning_rate": 1.8964059196617338e-05, "loss": 0.0386, "step": 11752 }, { "epoch": 12.42, "learning_rate": 1.895877378435518e-05, "loss": 0.0281, "step": 11754 }, { "epoch": 12.43, "learning_rate": 1.8953488372093023e-05, "loss": 0.018, "step": 11756 }, { "epoch": 12.43, "learning_rate": 1.894820295983087e-05, "loss": 0.0129, "step": 11758 }, { "epoch": 12.43, "learning_rate": 1.8942917547568712e-05, "loss": 0.0137, "step": 11760 }, { "epoch": 12.43, "learning_rate": 1.8937632135306555e-05, "loss": 0.0773, "step": 11762 }, { "epoch": 12.44, "learning_rate": 1.8932346723044397e-05, "loss": 0.069, "step": 11764 }, { "epoch": 12.44, "learning_rate": 1.8927061310782243e-05, "loss": 0.0699, "step": 11766 }, { "epoch": 12.44, "learning_rate": 1.8921775898520086e-05, "loss": 0.0518, "step": 11768 }, { "epoch": 12.44, "learning_rate": 1.891649048625793e-05, "loss": 0.0252, "step": 11770 }, { "epoch": 12.44, "learning_rate": 1.891120507399577e-05, "loss": 0.0087, "step": 11772 }, { "epoch": 12.45, "learning_rate": 1.8905919661733614e-05, "loss": 0.0321, "step": 11774 }, { "epoch": 12.45, "learning_rate": 1.890063424947146e-05, "loss": 0.0232, "step": 11776 }, { "epoch": 12.45, "learning_rate": 1.8895348837209303e-05, "loss": 0.0225, "step": 11778 }, { "epoch": 12.45, "learning_rate": 1.8890063424947146e-05, "loss": 0.043, "step": 11780 }, { "epoch": 12.45, "learning_rate": 1.8884778012684988e-05, "loss": 0.0319, "step": 11782 }, { "epoch": 12.46, "learning_rate": 1.8879492600422834e-05, "loss": 0.0238, "step": 11784 }, { "epoch": 12.46, "learning_rate": 1.8874207188160677e-05, "loss": 0.044, "step": 11786 }, { "epoch": 12.46, "learning_rate": 1.886892177589852e-05, "loss": 0.0721, "step": 11788 }, { "epoch": 12.46, "learning_rate": 1.8863636363636362e-05, "loss": 0.1443, "step": 11790 }, { "epoch": 12.47, "learning_rate": 1.885835095137421e-05, "loss": 0.1707, "step": 11792 }, { "epoch": 12.47, "learning_rate": 1.885306553911205e-05, "loss": 0.0278, "step": 11794 }, { "epoch": 12.47, "learning_rate": 1.8847780126849897e-05, "loss": 0.0472, "step": 11796 }, { "epoch": 12.47, "learning_rate": 1.884249471458774e-05, "loss": 0.0468, "step": 11798 }, { "epoch": 12.47, "learning_rate": 1.8837209302325582e-05, "loss": 0.0368, "step": 11800 }, { "epoch": 12.48, "learning_rate": 1.883192389006343e-05, "loss": 0.0713, "step": 11802 }, { "epoch": 12.48, "learning_rate": 1.882663847780127e-05, "loss": 0.0581, "step": 11804 }, { "epoch": 12.48, "learning_rate": 1.8821353065539114e-05, "loss": 0.1298, "step": 11806 }, { "epoch": 12.48, "learning_rate": 1.8816067653276956e-05, "loss": 0.1243, "step": 11808 }, { "epoch": 12.48, "learning_rate": 1.88107822410148e-05, "loss": 0.078, "step": 11810 }, { "epoch": 12.49, "learning_rate": 1.8805496828752645e-05, "loss": 0.0132, "step": 11812 }, { "epoch": 12.49, "learning_rate": 1.8800211416490488e-05, "loss": 0.0336, "step": 11814 }, { "epoch": 12.49, "learning_rate": 1.879492600422833e-05, "loss": 0.0503, "step": 11816 }, { "epoch": 12.49, "learning_rate": 1.8789640591966173e-05, "loss": 0.0343, "step": 11818 }, { "epoch": 12.49, "learning_rate": 1.878435517970402e-05, "loss": 0.0146, "step": 11820 }, { "epoch": 12.5, "learning_rate": 1.8779069767441862e-05, "loss": 0.0561, "step": 11822 }, { "epoch": 12.5, "learning_rate": 1.8773784355179705e-05, "loss": 0.141, "step": 11824 }, { "epoch": 12.5, "learning_rate": 1.8768498942917547e-05, "loss": 0.0475, "step": 11826 }, { "epoch": 12.5, "learning_rate": 1.876321353065539e-05, "loss": 0.0365, "step": 11828 }, { "epoch": 12.51, "learning_rate": 1.8757928118393236e-05, "loss": 0.0063, "step": 11830 }, { "epoch": 12.51, "learning_rate": 1.875264270613108e-05, "loss": 0.0754, "step": 11832 }, { "epoch": 12.51, "learning_rate": 1.874735729386892e-05, "loss": 0.06, "step": 11834 }, { "epoch": 12.51, "learning_rate": 1.8742071881606764e-05, "loss": 0.1181, "step": 11836 }, { "epoch": 12.51, "learning_rate": 1.873678646934461e-05, "loss": 0.0563, "step": 11838 }, { "epoch": 12.52, "learning_rate": 1.8731501057082453e-05, "loss": 0.0358, "step": 11840 }, { "epoch": 12.52, "learning_rate": 1.8726215644820295e-05, "loss": 0.0087, "step": 11842 }, { "epoch": 12.52, "learning_rate": 1.8720930232558138e-05, "loss": 0.0112, "step": 11844 }, { "epoch": 12.52, "learning_rate": 1.8715644820295984e-05, "loss": 0.0286, "step": 11846 }, { "epoch": 12.52, "learning_rate": 1.8710359408033827e-05, "loss": 0.0281, "step": 11848 }, { "epoch": 12.53, "learning_rate": 1.8705073995771673e-05, "loss": 0.0756, "step": 11850 }, { "epoch": 12.53, "learning_rate": 1.8699788583509516e-05, "loss": 0.0139, "step": 11852 }, { "epoch": 12.53, "learning_rate": 1.8694503171247358e-05, "loss": 0.0402, "step": 11854 }, { "epoch": 12.53, "learning_rate": 1.8689217758985204e-05, "loss": 0.023, "step": 11856 }, { "epoch": 12.53, "learning_rate": 1.8683932346723047e-05, "loss": 0.0187, "step": 11858 }, { "epoch": 12.54, "learning_rate": 1.867864693446089e-05, "loss": 0.0562, "step": 11860 }, { "epoch": 12.54, "learning_rate": 1.8673361522198732e-05, "loss": 0.0147, "step": 11862 }, { "epoch": 12.54, "learning_rate": 1.8668076109936575e-05, "loss": 0.0217, "step": 11864 }, { "epoch": 12.54, "learning_rate": 1.866279069767442e-05, "loss": 0.0082, "step": 11866 }, { "epoch": 12.55, "learning_rate": 1.8657505285412264e-05, "loss": 0.039, "step": 11868 }, { "epoch": 12.55, "learning_rate": 1.8652219873150106e-05, "loss": 0.0157, "step": 11870 }, { "epoch": 12.55, "learning_rate": 1.864693446088795e-05, "loss": 0.0795, "step": 11872 }, { "epoch": 12.55, "learning_rate": 1.8641649048625795e-05, "loss": 0.0533, "step": 11874 }, { "epoch": 12.55, "learning_rate": 1.8636363636363638e-05, "loss": 0.0508, "step": 11876 }, { "epoch": 12.56, "learning_rate": 1.863107822410148e-05, "loss": 0.0334, "step": 11878 }, { "epoch": 12.56, "learning_rate": 1.8625792811839323e-05, "loss": 0.022, "step": 11880 }, { "epoch": 12.56, "learning_rate": 1.8620507399577166e-05, "loss": 0.0166, "step": 11882 }, { "epoch": 12.56, "learning_rate": 1.8615221987315012e-05, "loss": 0.0703, "step": 11884 }, { "epoch": 12.56, "learning_rate": 1.8609936575052855e-05, "loss": 0.0374, "step": 11886 }, { "epoch": 12.57, "learning_rate": 1.8604651162790697e-05, "loss": 0.0318, "step": 11888 }, { "epoch": 12.57, "learning_rate": 1.859936575052854e-05, "loss": 0.1932, "step": 11890 }, { "epoch": 12.57, "learning_rate": 1.8594080338266386e-05, "loss": 0.0277, "step": 11892 }, { "epoch": 12.57, "learning_rate": 1.858879492600423e-05, "loss": 0.0648, "step": 11894 }, { "epoch": 12.58, "learning_rate": 1.858350951374207e-05, "loss": 0.0769, "step": 11896 }, { "epoch": 12.58, "learning_rate": 1.8578224101479917e-05, "loss": 0.0774, "step": 11898 }, { "epoch": 12.58, "learning_rate": 1.857293868921776e-05, "loss": 0.0401, "step": 11900 }, { "epoch": 12.58, "learning_rate": 1.8567653276955606e-05, "loss": 0.0181, "step": 11902 }, { "epoch": 12.58, "learning_rate": 1.856236786469345e-05, "loss": 0.0538, "step": 11904 }, { "epoch": 12.59, "learning_rate": 1.855708245243129e-05, "loss": 0.0022, "step": 11906 }, { "epoch": 12.59, "learning_rate": 1.8551797040169134e-05, "loss": 0.0727, "step": 11908 }, { "epoch": 12.59, "learning_rate": 1.854651162790698e-05, "loss": 0.003, "step": 11910 }, { "epoch": 12.59, "learning_rate": 1.8541226215644823e-05, "loss": 0.0089, "step": 11912 }, { "epoch": 12.59, "learning_rate": 1.8535940803382666e-05, "loss": 0.0329, "step": 11914 }, { "epoch": 12.6, "learning_rate": 1.8530655391120508e-05, "loss": 0.0231, "step": 11916 }, { "epoch": 12.6, "learning_rate": 1.852536997885835e-05, "loss": 0.0387, "step": 11918 }, { "epoch": 12.6, "learning_rate": 1.8520084566596197e-05, "loss": 0.061, "step": 11920 }, { "epoch": 12.6, "learning_rate": 1.851479915433404e-05, "loss": 0.0585, "step": 11922 }, { "epoch": 12.6, "learning_rate": 1.8509513742071882e-05, "loss": 0.0702, "step": 11924 }, { "epoch": 12.61, "learning_rate": 1.8504228329809725e-05, "loss": 0.0598, "step": 11926 }, { "epoch": 12.61, "learning_rate": 1.849894291754757e-05, "loss": 0.024, "step": 11928 }, { "epoch": 12.61, "learning_rate": 1.8493657505285414e-05, "loss": 0.0682, "step": 11930 }, { "epoch": 12.61, "learning_rate": 1.8488372093023256e-05, "loss": 0.0347, "step": 11932 }, { "epoch": 12.62, "learning_rate": 1.84830866807611e-05, "loss": 0.0395, "step": 11934 }, { "epoch": 12.62, "learning_rate": 1.847780126849894e-05, "loss": 0.0333, "step": 11936 }, { "epoch": 12.62, "learning_rate": 1.8472515856236788e-05, "loss": 0.0432, "step": 11938 }, { "epoch": 12.62, "learning_rate": 1.846723044397463e-05, "loss": 0.0321, "step": 11940 }, { "epoch": 12.62, "learning_rate": 1.8461945031712473e-05, "loss": 0.0308, "step": 11942 }, { "epoch": 12.63, "learning_rate": 1.8456659619450316e-05, "loss": 0.0345, "step": 11944 }, { "epoch": 12.63, "learning_rate": 1.8451374207188162e-05, "loss": 0.0509, "step": 11946 }, { "epoch": 12.63, "learning_rate": 1.8446088794926004e-05, "loss": 0.0276, "step": 11948 }, { "epoch": 12.63, "learning_rate": 1.8440803382663847e-05, "loss": 0.0526, "step": 11950 }, { "epoch": 12.63, "learning_rate": 1.8435517970401693e-05, "loss": 0.0226, "step": 11952 }, { "epoch": 12.64, "learning_rate": 1.8430232558139536e-05, "loss": 0.0062, "step": 11954 }, { "epoch": 12.64, "learning_rate": 1.8424947145877382e-05, "loss": 0.0682, "step": 11956 }, { "epoch": 12.64, "learning_rate": 1.8419661733615225e-05, "loss": 0.0851, "step": 11958 }, { "epoch": 12.64, "learning_rate": 1.8414376321353067e-05, "loss": 0.0219, "step": 11960 }, { "epoch": 12.64, "learning_rate": 1.840909090909091e-05, "loss": 0.0094, "step": 11962 }, { "epoch": 12.65, "learning_rate": 1.8403805496828756e-05, "loss": 0.0361, "step": 11964 }, { "epoch": 12.65, "learning_rate": 1.83985200845666e-05, "loss": 0.1176, "step": 11966 }, { "epoch": 12.65, "learning_rate": 1.839323467230444e-05, "loss": 0.0238, "step": 11968 }, { "epoch": 12.65, "learning_rate": 1.8387949260042284e-05, "loss": 0.0309, "step": 11970 }, { "epoch": 12.66, "learning_rate": 1.8382663847780127e-05, "loss": 0.0262, "step": 11972 }, { "epoch": 12.66, "learning_rate": 1.8377378435517973e-05, "loss": 0.0233, "step": 11974 }, { "epoch": 12.66, "learning_rate": 1.8372093023255815e-05, "loss": 0.0249, "step": 11976 }, { "epoch": 12.66, "learning_rate": 1.8366807610993658e-05, "loss": 0.1753, "step": 11978 }, { "epoch": 12.66, "learning_rate": 1.83615221987315e-05, "loss": 0.0459, "step": 11980 }, { "epoch": 12.67, "learning_rate": 1.8356236786469347e-05, "loss": 0.0319, "step": 11982 }, { "epoch": 12.67, "learning_rate": 1.835095137420719e-05, "loss": 0.088, "step": 11984 }, { "epoch": 12.67, "learning_rate": 1.8345665961945032e-05, "loss": 0.0274, "step": 11986 }, { "epoch": 12.67, "learning_rate": 1.8340380549682875e-05, "loss": 0.0498, "step": 11988 }, { "epoch": 12.67, "learning_rate": 1.8335095137420718e-05, "loss": 0.0145, "step": 11990 }, { "epoch": 12.68, "learning_rate": 1.8329809725158564e-05, "loss": 0.0204, "step": 11992 }, { "epoch": 12.68, "learning_rate": 1.8324524312896406e-05, "loss": 0.0554, "step": 11994 }, { "epoch": 12.68, "learning_rate": 1.831923890063425e-05, "loss": 0.0453, "step": 11996 }, { "epoch": 12.68, "learning_rate": 1.831395348837209e-05, "loss": 0.0281, "step": 11998 }, { "epoch": 12.68, "learning_rate": 1.8308668076109938e-05, "loss": 0.0519, "step": 12000 }, { "epoch": 12.68, "eval_cer": 0.03493872898261613, "eval_loss": 0.7152830958366394, "eval_runtime": 126.707, "eval_samples_per_second": 6.637, "eval_steps_per_second": 0.837, "step": 12000 }, { "epoch": 12.69, "learning_rate": 1.830338266384778e-05, "loss": 0.0397, "step": 12002 }, { "epoch": 12.69, "learning_rate": 1.8298097251585626e-05, "loss": 0.0566, "step": 12004 }, { "epoch": 12.69, "learning_rate": 1.829281183932347e-05, "loss": 0.0203, "step": 12006 }, { "epoch": 12.69, "learning_rate": 1.8287526427061312e-05, "loss": 0.0205, "step": 12008 }, { "epoch": 12.7, "learning_rate": 1.8282241014799158e-05, "loss": 0.0227, "step": 12010 }, { "epoch": 12.7, "learning_rate": 1.8276955602537e-05, "loss": 0.0056, "step": 12012 }, { "epoch": 12.7, "learning_rate": 1.8271670190274843e-05, "loss": 0.0421, "step": 12014 }, { "epoch": 12.7, "learning_rate": 1.8266384778012686e-05, "loss": 0.0131, "step": 12016 }, { "epoch": 12.7, "learning_rate": 1.826109936575053e-05, "loss": 0.0164, "step": 12018 }, { "epoch": 12.71, "learning_rate": 1.8255813953488375e-05, "loss": 0.0469, "step": 12020 }, { "epoch": 12.71, "learning_rate": 1.8250528541226217e-05, "loss": 0.0405, "step": 12022 }, { "epoch": 12.71, "learning_rate": 1.824524312896406e-05, "loss": 0.0053, "step": 12024 }, { "epoch": 12.71, "learning_rate": 1.8239957716701903e-05, "loss": 0.0114, "step": 12026 }, { "epoch": 12.71, "learning_rate": 1.823467230443975e-05, "loss": 0.0044, "step": 12028 }, { "epoch": 12.72, "learning_rate": 1.822938689217759e-05, "loss": 0.0255, "step": 12030 }, { "epoch": 12.72, "learning_rate": 1.8224101479915434e-05, "loss": 0.0303, "step": 12032 }, { "epoch": 12.72, "learning_rate": 1.8218816067653277e-05, "loss": 0.0222, "step": 12034 }, { "epoch": 12.72, "learning_rate": 1.821353065539112e-05, "loss": 0.0577, "step": 12036 }, { "epoch": 12.73, "learning_rate": 1.8208245243128965e-05, "loss": 0.0462, "step": 12038 }, { "epoch": 12.73, "learning_rate": 1.8202959830866808e-05, "loss": 0.0222, "step": 12040 }, { "epoch": 12.73, "learning_rate": 1.819767441860465e-05, "loss": 0.0696, "step": 12042 }, { "epoch": 12.73, "learning_rate": 1.8192389006342493e-05, "loss": 0.0139, "step": 12044 }, { "epoch": 12.73, "learning_rate": 1.818710359408034e-05, "loss": 0.0103, "step": 12046 }, { "epoch": 12.74, "learning_rate": 1.8181818181818182e-05, "loss": 0.0333, "step": 12048 }, { "epoch": 12.74, "learning_rate": 1.8176532769556025e-05, "loss": 0.0181, "step": 12050 }, { "epoch": 12.74, "learning_rate": 1.8171247357293867e-05, "loss": 0.0193, "step": 12052 }, { "epoch": 12.74, "learning_rate": 1.8165961945031713e-05, "loss": 0.051, "step": 12054 }, { "epoch": 12.74, "learning_rate": 1.8160676532769556e-05, "loss": 0.0071, "step": 12056 }, { "epoch": 12.75, "learning_rate": 1.8155391120507402e-05, "loss": 0.0019, "step": 12058 }, { "epoch": 12.75, "learning_rate": 1.8150105708245245e-05, "loss": 0.0435, "step": 12060 }, { "epoch": 12.75, "learning_rate": 1.8144820295983088e-05, "loss": 0.0204, "step": 12062 }, { "epoch": 12.75, "learning_rate": 1.8139534883720934e-05, "loss": 0.0172, "step": 12064 }, { "epoch": 12.75, "learning_rate": 1.8134249471458776e-05, "loss": 0.0118, "step": 12066 }, { "epoch": 12.76, "learning_rate": 1.812896405919662e-05, "loss": 0.0241, "step": 12068 }, { "epoch": 12.76, "learning_rate": 1.812367864693446e-05, "loss": 0.0647, "step": 12070 }, { "epoch": 12.76, "learning_rate": 1.8118393234672304e-05, "loss": 0.0233, "step": 12072 }, { "epoch": 12.76, "learning_rate": 1.811310782241015e-05, "loss": 0.0086, "step": 12074 }, { "epoch": 12.77, "learning_rate": 1.8107822410147993e-05, "loss": 0.0489, "step": 12076 }, { "epoch": 12.77, "learning_rate": 1.8102536997885836e-05, "loss": 0.0581, "step": 12078 }, { "epoch": 12.77, "learning_rate": 1.809725158562368e-05, "loss": 0.0231, "step": 12080 }, { "epoch": 12.77, "learning_rate": 1.8091966173361524e-05, "loss": 0.041, "step": 12082 }, { "epoch": 12.77, "learning_rate": 1.8086680761099367e-05, "loss": 0.034, "step": 12084 }, { "epoch": 12.78, "learning_rate": 1.808139534883721e-05, "loss": 0.0653, "step": 12086 }, { "epoch": 12.78, "learning_rate": 1.8076109936575052e-05, "loss": 0.0455, "step": 12088 }, { "epoch": 12.78, "learning_rate": 1.8070824524312895e-05, "loss": 0.0227, "step": 12090 }, { "epoch": 12.78, "learning_rate": 1.806553911205074e-05, "loss": 0.1526, "step": 12092 }, { "epoch": 12.78, "learning_rate": 1.8060253699788584e-05, "loss": 0.0399, "step": 12094 }, { "epoch": 12.79, "learning_rate": 1.8054968287526427e-05, "loss": 0.0096, "step": 12096 }, { "epoch": 12.79, "learning_rate": 1.804968287526427e-05, "loss": 0.081, "step": 12098 }, { "epoch": 12.79, "learning_rate": 1.8044397463002115e-05, "loss": 0.0674, "step": 12100 }, { "epoch": 12.79, "learning_rate": 1.8039112050739958e-05, "loss": 0.0201, "step": 12102 }, { "epoch": 12.79, "learning_rate": 1.80338266384778e-05, "loss": 0.0162, "step": 12104 }, { "epoch": 12.8, "learning_rate": 1.8028541226215647e-05, "loss": 0.1635, "step": 12106 }, { "epoch": 12.8, "learning_rate": 1.802325581395349e-05, "loss": 0.0519, "step": 12108 }, { "epoch": 12.8, "learning_rate": 1.8017970401691332e-05, "loss": 0.0668, "step": 12110 }, { "epoch": 12.8, "learning_rate": 1.8012684989429178e-05, "loss": 0.0052, "step": 12112 }, { "epoch": 12.81, "learning_rate": 1.800739957716702e-05, "loss": 0.0597, "step": 12114 }, { "epoch": 12.81, "learning_rate": 1.8002114164904863e-05, "loss": 0.0481, "step": 12116 }, { "epoch": 12.81, "learning_rate": 1.799682875264271e-05, "loss": 0.0407, "step": 12118 }, { "epoch": 12.81, "learning_rate": 1.7991543340380552e-05, "loss": 0.0751, "step": 12120 }, { "epoch": 12.81, "learning_rate": 1.7986257928118395e-05, "loss": 0.0375, "step": 12122 }, { "epoch": 12.82, "learning_rate": 1.7980972515856237e-05, "loss": 0.0605, "step": 12124 }, { "epoch": 12.82, "learning_rate": 1.797568710359408e-05, "loss": 0.0272, "step": 12126 }, { "epoch": 12.82, "learning_rate": 1.7970401691331926e-05, "loss": 0.0457, "step": 12128 }, { "epoch": 12.82, "learning_rate": 1.796511627906977e-05, "loss": 0.0282, "step": 12130 }, { "epoch": 12.82, "learning_rate": 1.795983086680761e-05, "loss": 0.0617, "step": 12132 }, { "epoch": 12.83, "learning_rate": 1.7954545454545454e-05, "loss": 0.0221, "step": 12134 }, { "epoch": 12.83, "learning_rate": 1.79492600422833e-05, "loss": 0.0818, "step": 12136 }, { "epoch": 12.83, "learning_rate": 1.7943974630021143e-05, "loss": 0.0797, "step": 12138 }, { "epoch": 12.83, "learning_rate": 1.7938689217758986e-05, "loss": 0.0649, "step": 12140 }, { "epoch": 12.84, "learning_rate": 1.7933403805496828e-05, "loss": 0.0281, "step": 12142 }, { "epoch": 12.84, "learning_rate": 1.792811839323467e-05, "loss": 0.0125, "step": 12144 }, { "epoch": 12.84, "learning_rate": 1.7922832980972517e-05, "loss": 0.0172, "step": 12146 }, { "epoch": 12.84, "learning_rate": 1.791754756871036e-05, "loss": 0.023, "step": 12148 }, { "epoch": 12.84, "learning_rate": 1.7912262156448202e-05, "loss": 0.0441, "step": 12150 }, { "epoch": 12.85, "learning_rate": 1.7906976744186045e-05, "loss": 0.0704, "step": 12152 }, { "epoch": 12.85, "learning_rate": 1.790169133192389e-05, "loss": 0.1245, "step": 12154 }, { "epoch": 12.85, "learning_rate": 1.7896405919661734e-05, "loss": 0.0597, "step": 12156 }, { "epoch": 12.85, "learning_rate": 1.7891120507399576e-05, "loss": 0.07, "step": 12158 }, { "epoch": 12.85, "learning_rate": 1.7885835095137422e-05, "loss": 0.069, "step": 12160 }, { "epoch": 12.86, "learning_rate": 1.7880549682875265e-05, "loss": 0.0785, "step": 12162 }, { "epoch": 12.86, "learning_rate": 1.787526427061311e-05, "loss": 0.0217, "step": 12164 }, { "epoch": 12.86, "learning_rate": 1.7869978858350954e-05, "loss": 0.0645, "step": 12166 }, { "epoch": 12.86, "learning_rate": 1.7864693446088797e-05, "loss": 0.0546, "step": 12168 }, { "epoch": 12.86, "learning_rate": 1.785940803382664e-05, "loss": 0.0539, "step": 12170 }, { "epoch": 12.87, "learning_rate": 1.7854122621564485e-05, "loss": 0.1025, "step": 12172 }, { "epoch": 12.87, "learning_rate": 1.7848837209302328e-05, "loss": 0.055, "step": 12174 }, { "epoch": 12.87, "learning_rate": 1.784355179704017e-05, "loss": 0.0331, "step": 12176 }, { "epoch": 12.87, "learning_rate": 1.7838266384778013e-05, "loss": 0.0556, "step": 12178 }, { "epoch": 12.88, "learning_rate": 1.7832980972515856e-05, "loss": 0.0582, "step": 12180 }, { "epoch": 12.88, "learning_rate": 1.7827695560253702e-05, "loss": 0.072, "step": 12182 }, { "epoch": 12.88, "learning_rate": 1.7822410147991545e-05, "loss": 0.0276, "step": 12184 }, { "epoch": 12.88, "learning_rate": 1.7817124735729387e-05, "loss": 0.0059, "step": 12186 }, { "epoch": 12.88, "learning_rate": 1.781183932346723e-05, "loss": 0.0242, "step": 12188 }, { "epoch": 12.89, "learning_rate": 1.7806553911205076e-05, "loss": 0.0488, "step": 12190 }, { "epoch": 12.89, "learning_rate": 1.780126849894292e-05, "loss": 0.0418, "step": 12192 }, { "epoch": 12.89, "learning_rate": 1.779598308668076e-05, "loss": 0.0164, "step": 12194 }, { "epoch": 12.89, "learning_rate": 1.7790697674418604e-05, "loss": 0.0877, "step": 12196 }, { "epoch": 12.89, "learning_rate": 1.7785412262156447e-05, "loss": 0.1477, "step": 12198 }, { "epoch": 12.9, "learning_rate": 1.7780126849894293e-05, "loss": 0.0141, "step": 12200 }, { "epoch": 12.9, "learning_rate": 1.7774841437632136e-05, "loss": 0.0179, "step": 12202 }, { "epoch": 12.9, "learning_rate": 1.7769556025369978e-05, "loss": 0.0325, "step": 12204 }, { "epoch": 12.9, "learning_rate": 1.776427061310782e-05, "loss": 0.0368, "step": 12206 }, { "epoch": 12.9, "learning_rate": 1.7758985200845667e-05, "loss": 0.0178, "step": 12208 }, { "epoch": 12.91, "learning_rate": 1.775369978858351e-05, "loss": 0.038, "step": 12210 }, { "epoch": 12.91, "learning_rate": 1.7748414376321352e-05, "loss": 0.011, "step": 12212 }, { "epoch": 12.91, "learning_rate": 1.77431289640592e-05, "loss": 0.0098, "step": 12214 }, { "epoch": 12.91, "learning_rate": 1.773784355179704e-05, "loss": 0.0482, "step": 12216 }, { "epoch": 12.92, "learning_rate": 1.7732558139534887e-05, "loss": 0.0697, "step": 12218 }, { "epoch": 12.92, "learning_rate": 1.772727272727273e-05, "loss": 0.0124, "step": 12220 }, { "epoch": 12.92, "learning_rate": 1.7721987315010572e-05, "loss": 0.1065, "step": 12222 }, { "epoch": 12.92, "learning_rate": 1.7716701902748415e-05, "loss": 0.1214, "step": 12224 }, { "epoch": 12.92, "learning_rate": 1.771141649048626e-05, "loss": 0.0112, "step": 12226 }, { "epoch": 12.93, "learning_rate": 1.7706131078224104e-05, "loss": 0.0301, "step": 12228 }, { "epoch": 12.93, "learning_rate": 1.7700845665961946e-05, "loss": 0.1167, "step": 12230 }, { "epoch": 12.93, "learning_rate": 1.769556025369979e-05, "loss": 0.0479, "step": 12232 }, { "epoch": 12.93, "learning_rate": 1.7690274841437632e-05, "loss": 0.0196, "step": 12234 }, { "epoch": 12.93, "learning_rate": 1.7684989429175478e-05, "loss": 0.0172, "step": 12236 }, { "epoch": 12.94, "learning_rate": 1.767970401691332e-05, "loss": 0.0264, "step": 12238 }, { "epoch": 12.94, "learning_rate": 1.7674418604651163e-05, "loss": 0.121, "step": 12240 }, { "epoch": 12.94, "learning_rate": 1.7669133192389006e-05, "loss": 0.0175, "step": 12242 }, { "epoch": 12.94, "learning_rate": 1.7663847780126852e-05, "loss": 0.0459, "step": 12244 }, { "epoch": 12.95, "learning_rate": 1.7658562367864695e-05, "loss": 0.0183, "step": 12246 }, { "epoch": 12.95, "learning_rate": 1.7653276955602537e-05, "loss": 0.0601, "step": 12248 }, { "epoch": 12.95, "learning_rate": 1.764799154334038e-05, "loss": 0.0104, "step": 12250 }, { "epoch": 12.95, "learning_rate": 1.7642706131078223e-05, "loss": 0.0101, "step": 12252 }, { "epoch": 12.95, "learning_rate": 1.763742071881607e-05, "loss": 0.0273, "step": 12254 }, { "epoch": 12.96, "learning_rate": 1.763213530655391e-05, "loss": 0.0343, "step": 12256 }, { "epoch": 12.96, "learning_rate": 1.7626849894291754e-05, "loss": 0.0275, "step": 12258 }, { "epoch": 12.96, "learning_rate": 1.7621564482029597e-05, "loss": 0.0227, "step": 12260 }, { "epoch": 12.96, "learning_rate": 1.7616279069767443e-05, "loss": 0.0143, "step": 12262 }, { "epoch": 12.96, "learning_rate": 1.7610993657505285e-05, "loss": 0.0261, "step": 12264 }, { "epoch": 12.97, "learning_rate": 1.760570824524313e-05, "loss": 0.0837, "step": 12266 }, { "epoch": 12.97, "learning_rate": 1.7600422832980974e-05, "loss": 0.052, "step": 12268 }, { "epoch": 12.97, "learning_rate": 1.7595137420718817e-05, "loss": 0.0346, "step": 12270 }, { "epoch": 12.97, "learning_rate": 1.7589852008456663e-05, "loss": 0.0276, "step": 12272 }, { "epoch": 12.97, "learning_rate": 1.7584566596194506e-05, "loss": 0.0328, "step": 12274 }, { "epoch": 12.98, "learning_rate": 1.7579281183932348e-05, "loss": 0.0346, "step": 12276 }, { "epoch": 12.98, "learning_rate": 1.757399577167019e-05, "loss": 0.0911, "step": 12278 }, { "epoch": 12.98, "learning_rate": 1.7568710359408037e-05, "loss": 0.0286, "step": 12280 }, { "epoch": 12.98, "learning_rate": 1.756342494714588e-05, "loss": 0.0165, "step": 12282 }, { "epoch": 12.99, "learning_rate": 1.7558139534883722e-05, "loss": 0.0112, "step": 12284 }, { "epoch": 12.99, "learning_rate": 1.7552854122621565e-05, "loss": 0.0442, "step": 12286 }, { "epoch": 12.99, "learning_rate": 1.7547568710359408e-05, "loss": 0.0113, "step": 12288 }, { "epoch": 12.99, "learning_rate": 1.7542283298097254e-05, "loss": 0.0397, "step": 12290 }, { "epoch": 12.99, "learning_rate": 1.7536997885835096e-05, "loss": 0.0113, "step": 12292 }, { "epoch": 13.0, "learning_rate": 1.753171247357294e-05, "loss": 0.0359, "step": 12294 }, { "epoch": 13.0, "learning_rate": 1.7526427061310782e-05, "loss": 0.0269, "step": 12296 }, { "epoch": 13.0, "learning_rate": 1.7521141649048628e-05, "loss": 0.0205, "step": 12298 }, { "epoch": 13.0, "learning_rate": 1.751585623678647e-05, "loss": 0.006, "step": 12300 }, { "epoch": 13.0, "learning_rate": 1.7510570824524313e-05, "loss": 0.0539, "step": 12302 }, { "epoch": 13.01, "learning_rate": 1.7505285412262156e-05, "loss": 0.0533, "step": 12304 }, { "epoch": 13.01, "learning_rate": 1.75e-05, "loss": 0.067, "step": 12306 }, { "epoch": 13.01, "learning_rate": 1.7494714587737845e-05, "loss": 0.0075, "step": 12308 }, { "epoch": 13.01, "learning_rate": 1.7489429175475687e-05, "loss": 0.0233, "step": 12310 }, { "epoch": 13.01, "learning_rate": 1.748414376321353e-05, "loss": 0.0528, "step": 12312 }, { "epoch": 13.02, "learning_rate": 1.7478858350951376e-05, "loss": 0.0479, "step": 12314 }, { "epoch": 13.02, "learning_rate": 1.747357293868922e-05, "loss": 0.1347, "step": 12316 }, { "epoch": 13.02, "learning_rate": 1.746828752642706e-05, "loss": 0.0294, "step": 12318 }, { "epoch": 13.02, "learning_rate": 1.7463002114164907e-05, "loss": 0.0273, "step": 12320 }, { "epoch": 13.03, "learning_rate": 1.745771670190275e-05, "loss": 0.0432, "step": 12322 }, { "epoch": 13.03, "learning_rate": 1.7452431289640593e-05, "loss": 0.106, "step": 12324 }, { "epoch": 13.03, "learning_rate": 1.744714587737844e-05, "loss": 0.0854, "step": 12326 }, { "epoch": 13.03, "learning_rate": 1.744186046511628e-05, "loss": 0.0735, "step": 12328 }, { "epoch": 13.03, "learning_rate": 1.7436575052854124e-05, "loss": 0.0072, "step": 12330 }, { "epoch": 13.04, "learning_rate": 1.7431289640591967e-05, "loss": 0.0377, "step": 12332 }, { "epoch": 13.04, "learning_rate": 1.7426004228329813e-05, "loss": 0.0226, "step": 12334 }, { "epoch": 13.04, "learning_rate": 1.7420718816067655e-05, "loss": 0.0568, "step": 12336 }, { "epoch": 13.04, "learning_rate": 1.7415433403805498e-05, "loss": 0.0474, "step": 12338 }, { "epoch": 13.04, "learning_rate": 1.741014799154334e-05, "loss": 0.0052, "step": 12340 }, { "epoch": 13.05, "learning_rate": 1.7404862579281183e-05, "loss": 0.0388, "step": 12342 }, { "epoch": 13.05, "learning_rate": 1.739957716701903e-05, "loss": 0.0411, "step": 12344 }, { "epoch": 13.05, "learning_rate": 1.7394291754756872e-05, "loss": 0.0087, "step": 12346 }, { "epoch": 13.05, "learning_rate": 1.7389006342494715e-05, "loss": 0.0273, "step": 12348 }, { "epoch": 13.05, "learning_rate": 1.7383720930232558e-05, "loss": 0.0399, "step": 12350 }, { "epoch": 13.06, "learning_rate": 1.73784355179704e-05, "loss": 0.003, "step": 12352 }, { "epoch": 13.06, "learning_rate": 1.7373150105708246e-05, "loss": 0.0108, "step": 12354 }, { "epoch": 13.06, "learning_rate": 1.736786469344609e-05, "loss": 0.0944, "step": 12356 }, { "epoch": 13.06, "learning_rate": 1.736257928118393e-05, "loss": 0.088, "step": 12358 }, { "epoch": 13.07, "learning_rate": 1.7357293868921774e-05, "loss": 0.0058, "step": 12360 }, { "epoch": 13.07, "learning_rate": 1.735200845665962e-05, "loss": 0.0372, "step": 12362 }, { "epoch": 13.07, "learning_rate": 1.7346723044397463e-05, "loss": 0.0398, "step": 12364 }, { "epoch": 13.07, "learning_rate": 1.7341437632135306e-05, "loss": 0.0178, "step": 12366 }, { "epoch": 13.07, "learning_rate": 1.7336152219873152e-05, "loss": 0.0135, "step": 12368 }, { "epoch": 13.08, "learning_rate": 1.7330866807610994e-05, "loss": 0.0077, "step": 12370 }, { "epoch": 13.08, "learning_rate": 1.732558139534884e-05, "loss": 0.016, "step": 12372 }, { "epoch": 13.08, "learning_rate": 1.7320295983086683e-05, "loss": 0.1078, "step": 12374 }, { "epoch": 13.08, "learning_rate": 1.7315010570824526e-05, "loss": 0.0113, "step": 12376 }, { "epoch": 13.08, "learning_rate": 1.730972515856237e-05, "loss": 0.032, "step": 12378 }, { "epoch": 13.09, "learning_rate": 1.7304439746300215e-05, "loss": 0.0365, "step": 12380 }, { "epoch": 13.09, "learning_rate": 1.7299154334038057e-05, "loss": 0.0738, "step": 12382 }, { "epoch": 13.09, "learning_rate": 1.72938689217759e-05, "loss": 0.0072, "step": 12384 }, { "epoch": 13.09, "learning_rate": 1.7288583509513743e-05, "loss": 0.0046, "step": 12386 }, { "epoch": 13.1, "learning_rate": 1.7283298097251585e-05, "loss": 0.0382, "step": 12388 }, { "epoch": 13.1, "learning_rate": 1.727801268498943e-05, "loss": 0.0071, "step": 12390 }, { "epoch": 13.1, "learning_rate": 1.7272727272727274e-05, "loss": 0.005, "step": 12392 }, { "epoch": 13.1, "learning_rate": 1.7267441860465117e-05, "loss": 0.1507, "step": 12394 }, { "epoch": 13.1, "learning_rate": 1.726215644820296e-05, "loss": 0.0189, "step": 12396 }, { "epoch": 13.11, "learning_rate": 1.7256871035940805e-05, "loss": 0.0148, "step": 12398 }, { "epoch": 13.11, "learning_rate": 1.7251585623678648e-05, "loss": 0.0264, "step": 12400 }, { "epoch": 13.11, "learning_rate": 1.724630021141649e-05, "loss": 0.0133, "step": 12402 }, { "epoch": 13.11, "learning_rate": 1.7241014799154333e-05, "loss": 0.0567, "step": 12404 }, { "epoch": 13.11, "learning_rate": 1.7235729386892176e-05, "loss": 0.0368, "step": 12406 }, { "epoch": 13.12, "learning_rate": 1.7230443974630022e-05, "loss": 0.0566, "step": 12408 }, { "epoch": 13.12, "learning_rate": 1.7225158562367865e-05, "loss": 0.0335, "step": 12410 }, { "epoch": 13.12, "learning_rate": 1.7219873150105707e-05, "loss": 0.0466, "step": 12412 }, { "epoch": 13.12, "learning_rate": 1.721458773784355e-05, "loss": 0.0176, "step": 12414 }, { "epoch": 13.12, "learning_rate": 1.7209302325581396e-05, "loss": 0.0085, "step": 12416 }, { "epoch": 13.13, "learning_rate": 1.720401691331924e-05, "loss": 0.0475, "step": 12418 }, { "epoch": 13.13, "learning_rate": 1.719873150105708e-05, "loss": 0.0202, "step": 12420 }, { "epoch": 13.13, "learning_rate": 1.7193446088794928e-05, "loss": 0.0277, "step": 12422 }, { "epoch": 13.13, "learning_rate": 1.718816067653277e-05, "loss": 0.0702, "step": 12424 }, { "epoch": 13.14, "learning_rate": 1.7182875264270616e-05, "loss": 0.0043, "step": 12426 }, { "epoch": 13.14, "learning_rate": 1.717758985200846e-05, "loss": 0.0044, "step": 12428 }, { "epoch": 13.14, "learning_rate": 1.71723044397463e-05, "loss": 0.0139, "step": 12430 }, { "epoch": 13.14, "learning_rate": 1.7167019027484144e-05, "loss": 0.0196, "step": 12432 }, { "epoch": 13.14, "learning_rate": 1.716173361522199e-05, "loss": 0.0219, "step": 12434 }, { "epoch": 13.15, "learning_rate": 1.7156448202959833e-05, "loss": 0.007, "step": 12436 }, { "epoch": 13.15, "learning_rate": 1.7151162790697676e-05, "loss": 0.063, "step": 12438 }, { "epoch": 13.15, "learning_rate": 1.714587737843552e-05, "loss": 0.0267, "step": 12440 }, { "epoch": 13.15, "learning_rate": 1.714059196617336e-05, "loss": 0.0312, "step": 12442 }, { "epoch": 13.15, "learning_rate": 1.7135306553911207e-05, "loss": 0.0269, "step": 12444 }, { "epoch": 13.16, "learning_rate": 1.713002114164905e-05, "loss": 0.0248, "step": 12446 }, { "epoch": 13.16, "learning_rate": 1.7124735729386892e-05, "loss": 0.0452, "step": 12448 }, { "epoch": 13.16, "learning_rate": 1.7119450317124735e-05, "loss": 0.0167, "step": 12450 }, { "epoch": 13.16, "learning_rate": 1.711416490486258e-05, "loss": 0.0416, "step": 12452 }, { "epoch": 13.16, "learning_rate": 1.7108879492600424e-05, "loss": 0.017, "step": 12454 }, { "epoch": 13.17, "learning_rate": 1.7103594080338267e-05, "loss": 0.0101, "step": 12456 }, { "epoch": 13.17, "learning_rate": 1.709830866807611e-05, "loss": 0.0318, "step": 12458 }, { "epoch": 13.17, "learning_rate": 1.7093023255813952e-05, "loss": 0.0352, "step": 12460 }, { "epoch": 13.17, "learning_rate": 1.7087737843551798e-05, "loss": 0.0109, "step": 12462 }, { "epoch": 13.18, "learning_rate": 1.708245243128964e-05, "loss": 0.0253, "step": 12464 }, { "epoch": 13.18, "learning_rate": 1.7077167019027483e-05, "loss": 0.0144, "step": 12466 }, { "epoch": 13.18, "learning_rate": 1.7071881606765326e-05, "loss": 0.0045, "step": 12468 }, { "epoch": 13.18, "learning_rate": 1.7066596194503172e-05, "loss": 0.0683, "step": 12470 }, { "epoch": 13.18, "learning_rate": 1.7061310782241015e-05, "loss": 0.0128, "step": 12472 }, { "epoch": 13.19, "learning_rate": 1.705602536997886e-05, "loss": 0.0172, "step": 12474 }, { "epoch": 13.19, "learning_rate": 1.7050739957716703e-05, "loss": 0.0041, "step": 12476 }, { "epoch": 13.19, "learning_rate": 1.7045454545454546e-05, "loss": 0.044, "step": 12478 }, { "epoch": 13.19, "learning_rate": 1.7040169133192392e-05, "loss": 0.0309, "step": 12480 }, { "epoch": 13.19, "learning_rate": 1.7034883720930235e-05, "loss": 0.0039, "step": 12482 }, { "epoch": 13.2, "learning_rate": 1.7029598308668078e-05, "loss": 0.0094, "step": 12484 }, { "epoch": 13.2, "learning_rate": 1.702431289640592e-05, "loss": 0.0042, "step": 12486 }, { "epoch": 13.2, "learning_rate": 1.7019027484143766e-05, "loss": 0.0219, "step": 12488 }, { "epoch": 13.2, "learning_rate": 1.701374207188161e-05, "loss": 0.0145, "step": 12490 }, { "epoch": 13.21, "learning_rate": 1.700845665961945e-05, "loss": 0.0336, "step": 12492 }, { "epoch": 13.21, "learning_rate": 1.7003171247357294e-05, "loss": 0.0401, "step": 12494 }, { "epoch": 13.21, "learning_rate": 1.6997885835095137e-05, "loss": 0.0391, "step": 12496 }, { "epoch": 13.21, "learning_rate": 1.6992600422832983e-05, "loss": 0.038, "step": 12498 }, { "epoch": 13.21, "learning_rate": 1.6987315010570826e-05, "loss": 0.0196, "step": 12500 }, { "epoch": 13.21, "eval_cer": 0.06223995440296381, "eval_loss": 0.755074143409729, "eval_runtime": 124.4091, "eval_samples_per_second": 6.76, "eval_steps_per_second": 0.852, "step": 12500 }, { "epoch": 13.22, "learning_rate": 1.698202959830867e-05, "loss": 0.007, "step": 12502 }, { "epoch": 13.22, "learning_rate": 1.697674418604651e-05, "loss": 0.025, "step": 12504 }, { "epoch": 13.22, "learning_rate": 1.6971458773784357e-05, "loss": 0.029, "step": 12506 }, { "epoch": 13.22, "learning_rate": 1.69661733615222e-05, "loss": 0.0127, "step": 12508 }, { "epoch": 13.22, "learning_rate": 1.6960887949260042e-05, "loss": 0.0133, "step": 12510 }, { "epoch": 13.23, "learning_rate": 1.6955602536997885e-05, "loss": 0.0413, "step": 12512 }, { "epoch": 13.23, "learning_rate": 1.6950317124735728e-05, "loss": 0.0103, "step": 12514 }, { "epoch": 13.23, "learning_rate": 1.6945031712473574e-05, "loss": 0.0451, "step": 12516 }, { "epoch": 13.23, "learning_rate": 1.6939746300211416e-05, "loss": 0.0842, "step": 12518 }, { "epoch": 13.23, "learning_rate": 1.693446088794926e-05, "loss": 0.0592, "step": 12520 }, { "epoch": 13.24, "learning_rate": 1.6929175475687102e-05, "loss": 0.0405, "step": 12522 }, { "epoch": 13.24, "learning_rate": 1.6923890063424948e-05, "loss": 0.0995, "step": 12524 }, { "epoch": 13.24, "learning_rate": 1.691860465116279e-05, "loss": 0.0079, "step": 12526 }, { "epoch": 13.24, "learning_rate": 1.6913319238900637e-05, "loss": 0.0506, "step": 12528 }, { "epoch": 13.25, "learning_rate": 1.690803382663848e-05, "loss": 0.0021, "step": 12530 }, { "epoch": 13.25, "learning_rate": 1.6902748414376322e-05, "loss": 0.0799, "step": 12532 }, { "epoch": 13.25, "learning_rate": 1.6897463002114168e-05, "loss": 0.0053, "step": 12534 }, { "epoch": 13.25, "learning_rate": 1.689217758985201e-05, "loss": 0.0199, "step": 12536 }, { "epoch": 13.25, "learning_rate": 1.6886892177589853e-05, "loss": 0.0066, "step": 12538 }, { "epoch": 13.26, "learning_rate": 1.6881606765327696e-05, "loss": 0.015, "step": 12540 }, { "epoch": 13.26, "learning_rate": 1.6876321353065542e-05, "loss": 0.0059, "step": 12542 }, { "epoch": 13.26, "learning_rate": 1.6871035940803385e-05, "loss": 0.0768, "step": 12544 }, { "epoch": 13.26, "learning_rate": 1.6865750528541227e-05, "loss": 0.0812, "step": 12546 }, { "epoch": 13.26, "learning_rate": 1.686046511627907e-05, "loss": 0.0314, "step": 12548 }, { "epoch": 13.27, "learning_rate": 1.6855179704016913e-05, "loss": 0.0307, "step": 12550 }, { "epoch": 13.27, "learning_rate": 1.684989429175476e-05, "loss": 0.0369, "step": 12552 }, { "epoch": 13.27, "learning_rate": 1.68446088794926e-05, "loss": 0.0515, "step": 12554 }, { "epoch": 13.27, "learning_rate": 1.6839323467230444e-05, "loss": 0.0093, "step": 12556 }, { "epoch": 13.27, "learning_rate": 1.6834038054968287e-05, "loss": 0.0179, "step": 12558 }, { "epoch": 13.28, "learning_rate": 1.6828752642706133e-05, "loss": 0.0736, "step": 12560 }, { "epoch": 13.28, "learning_rate": 1.6823467230443976e-05, "loss": 0.0317, "step": 12562 }, { "epoch": 13.28, "learning_rate": 1.6818181818181818e-05, "loss": 0.0386, "step": 12564 }, { "epoch": 13.28, "learning_rate": 1.681289640591966e-05, "loss": 0.0064, "step": 12566 }, { "epoch": 13.29, "learning_rate": 1.6807610993657504e-05, "loss": 0.0181, "step": 12568 }, { "epoch": 13.29, "learning_rate": 1.680232558139535e-05, "loss": 0.0671, "step": 12570 }, { "epoch": 13.29, "learning_rate": 1.6797040169133192e-05, "loss": 0.0633, "step": 12572 }, { "epoch": 13.29, "learning_rate": 1.6791754756871035e-05, "loss": 0.0319, "step": 12574 }, { "epoch": 13.29, "learning_rate": 1.678646934460888e-05, "loss": 0.0428, "step": 12576 }, { "epoch": 13.3, "learning_rate": 1.6781183932346724e-05, "loss": 0.0128, "step": 12578 }, { "epoch": 13.3, "learning_rate": 1.6775898520084566e-05, "loss": 0.0144, "step": 12580 }, { "epoch": 13.3, "learning_rate": 1.6770613107822412e-05, "loss": 0.0642, "step": 12582 }, { "epoch": 13.3, "learning_rate": 1.6765327695560255e-05, "loss": 0.0288, "step": 12584 }, { "epoch": 13.3, "learning_rate": 1.6760042283298098e-05, "loss": 0.0676, "step": 12586 }, { "epoch": 13.31, "learning_rate": 1.6754756871035944e-05, "loss": 0.0565, "step": 12588 }, { "epoch": 13.31, "learning_rate": 1.6749471458773787e-05, "loss": 0.0209, "step": 12590 }, { "epoch": 13.31, "learning_rate": 1.674418604651163e-05, "loss": 0.0521, "step": 12592 }, { "epoch": 13.31, "learning_rate": 1.6738900634249472e-05, "loss": 0.0486, "step": 12594 }, { "epoch": 13.32, "learning_rate": 1.6733615221987318e-05, "loss": 0.0414, "step": 12596 }, { "epoch": 13.32, "learning_rate": 1.672832980972516e-05, "loss": 0.1086, "step": 12598 }, { "epoch": 13.32, "learning_rate": 1.6723044397463003e-05, "loss": 0.0127, "step": 12600 }, { "epoch": 13.32, "learning_rate": 1.6717758985200846e-05, "loss": 0.0137, "step": 12602 }, { "epoch": 13.32, "learning_rate": 1.671247357293869e-05, "loss": 0.0251, "step": 12604 }, { "epoch": 13.33, "learning_rate": 1.6707188160676535e-05, "loss": 0.0726, "step": 12606 }, { "epoch": 13.33, "learning_rate": 1.6701902748414377e-05, "loss": 0.0432, "step": 12608 }, { "epoch": 13.33, "learning_rate": 1.669661733615222e-05, "loss": 0.0551, "step": 12610 }, { "epoch": 13.33, "learning_rate": 1.6691331923890063e-05, "loss": 0.022, "step": 12612 }, { "epoch": 13.33, "learning_rate": 1.668604651162791e-05, "loss": 0.0516, "step": 12614 }, { "epoch": 13.34, "learning_rate": 1.668076109936575e-05, "loss": 0.0044, "step": 12616 }, { "epoch": 13.34, "learning_rate": 1.6675475687103594e-05, "loss": 0.0202, "step": 12618 }, { "epoch": 13.34, "learning_rate": 1.6670190274841437e-05, "loss": 0.0538, "step": 12620 }, { "epoch": 13.34, "learning_rate": 1.666490486257928e-05, "loss": 0.0165, "step": 12622 }, { "epoch": 13.34, "learning_rate": 1.6659619450317125e-05, "loss": 0.0322, "step": 12624 }, { "epoch": 13.35, "learning_rate": 1.6654334038054968e-05, "loss": 0.043, "step": 12626 }, { "epoch": 13.35, "learning_rate": 1.664904862579281e-05, "loss": 0.0144, "step": 12628 }, { "epoch": 13.35, "learning_rate": 1.6643763213530657e-05, "loss": 0.0054, "step": 12630 }, { "epoch": 13.35, "learning_rate": 1.66384778012685e-05, "loss": 0.0574, "step": 12632 }, { "epoch": 13.36, "learning_rate": 1.6633192389006346e-05, "loss": 0.0266, "step": 12634 }, { "epoch": 13.36, "learning_rate": 1.6627906976744188e-05, "loss": 0.0316, "step": 12636 }, { "epoch": 13.36, "learning_rate": 1.662262156448203e-05, "loss": 0.0876, "step": 12638 }, { "epoch": 13.36, "learning_rate": 1.6617336152219874e-05, "loss": 0.0855, "step": 12640 }, { "epoch": 13.36, "learning_rate": 1.661205073995772e-05, "loss": 0.1275, "step": 12642 }, { "epoch": 13.37, "learning_rate": 1.6606765327695562e-05, "loss": 0.0203, "step": 12644 }, { "epoch": 13.37, "learning_rate": 1.6601479915433405e-05, "loss": 0.0323, "step": 12646 }, { "epoch": 13.37, "learning_rate": 1.6596194503171248e-05, "loss": 0.0605, "step": 12648 }, { "epoch": 13.37, "learning_rate": 1.6590909090909094e-05, "loss": 0.0931, "step": 12650 }, { "epoch": 13.37, "learning_rate": 1.6585623678646936e-05, "loss": 0.0625, "step": 12652 }, { "epoch": 13.38, "learning_rate": 1.658033826638478e-05, "loss": 0.1087, "step": 12654 }, { "epoch": 13.38, "learning_rate": 1.6575052854122622e-05, "loss": 0.0316, "step": 12656 }, { "epoch": 13.38, "learning_rate": 1.6569767441860464e-05, "loss": 0.0726, "step": 12658 }, { "epoch": 13.38, "learning_rate": 1.656448202959831e-05, "loss": 0.0508, "step": 12660 }, { "epoch": 13.38, "learning_rate": 1.6559196617336153e-05, "loss": 0.0491, "step": 12662 }, { "epoch": 13.39, "learning_rate": 1.6553911205073996e-05, "loss": 0.0286, "step": 12664 }, { "epoch": 13.39, "learning_rate": 1.654862579281184e-05, "loss": 0.0067, "step": 12666 }, { "epoch": 13.39, "learning_rate": 1.6543340380549685e-05, "loss": 0.0397, "step": 12668 }, { "epoch": 13.39, "learning_rate": 1.6538054968287527e-05, "loss": 0.0147, "step": 12670 }, { "epoch": 13.4, "learning_rate": 1.653276955602537e-05, "loss": 0.0627, "step": 12672 }, { "epoch": 13.4, "learning_rate": 1.6527484143763213e-05, "loss": 0.0232, "step": 12674 }, { "epoch": 13.4, "learning_rate": 1.6522198731501055e-05, "loss": 0.0828, "step": 12676 }, { "epoch": 13.4, "learning_rate": 1.65169133192389e-05, "loss": 0.0694, "step": 12678 }, { "epoch": 13.4, "learning_rate": 1.6511627906976744e-05, "loss": 0.0384, "step": 12680 }, { "epoch": 13.41, "learning_rate": 1.650634249471459e-05, "loss": 0.0524, "step": 12682 }, { "epoch": 13.41, "learning_rate": 1.6501057082452433e-05, "loss": 0.0372, "step": 12684 }, { "epoch": 13.41, "learning_rate": 1.6495771670190275e-05, "loss": 0.0427, "step": 12686 }, { "epoch": 13.41, "learning_rate": 1.649048625792812e-05, "loss": 0.0364, "step": 12688 }, { "epoch": 13.41, "learning_rate": 1.6485200845665964e-05, "loss": 0.0206, "step": 12690 }, { "epoch": 13.42, "learning_rate": 1.6479915433403807e-05, "loss": 0.0788, "step": 12692 }, { "epoch": 13.42, "learning_rate": 1.647463002114165e-05, "loss": 0.0328, "step": 12694 }, { "epoch": 13.42, "learning_rate": 1.6469344608879496e-05, "loss": 0.0387, "step": 12696 }, { "epoch": 13.42, "learning_rate": 1.6464059196617338e-05, "loss": 0.0256, "step": 12698 }, { "epoch": 13.42, "learning_rate": 1.645877378435518e-05, "loss": 0.0057, "step": 12700 }, { "epoch": 13.43, "learning_rate": 1.6453488372093024e-05, "loss": 0.0207, "step": 12702 }, { "epoch": 13.43, "learning_rate": 1.6448202959830866e-05, "loss": 0.026, "step": 12704 }, { "epoch": 13.43, "learning_rate": 1.6442917547568712e-05, "loss": 0.0282, "step": 12706 }, { "epoch": 13.43, "learning_rate": 1.6437632135306555e-05, "loss": 0.0158, "step": 12708 }, { "epoch": 13.44, "learning_rate": 1.6432346723044398e-05, "loss": 0.0053, "step": 12710 }, { "epoch": 13.44, "learning_rate": 1.642706131078224e-05, "loss": 0.0359, "step": 12712 }, { "epoch": 13.44, "learning_rate": 1.6421775898520086e-05, "loss": 0.0409, "step": 12714 }, { "epoch": 13.44, "learning_rate": 1.641649048625793e-05, "loss": 0.0491, "step": 12716 }, { "epoch": 13.44, "learning_rate": 1.641120507399577e-05, "loss": 0.0802, "step": 12718 }, { "epoch": 13.45, "learning_rate": 1.6405919661733614e-05, "loss": 0.014, "step": 12720 }, { "epoch": 13.45, "learning_rate": 1.6400634249471457e-05, "loss": 0.0092, "step": 12722 }, { "epoch": 13.45, "learning_rate": 1.6395348837209303e-05, "loss": 0.017, "step": 12724 }, { "epoch": 13.45, "learning_rate": 1.6390063424947146e-05, "loss": 0.0035, "step": 12726 }, { "epoch": 13.45, "learning_rate": 1.638477801268499e-05, "loss": 0.0399, "step": 12728 }, { "epoch": 13.46, "learning_rate": 1.637949260042283e-05, "loss": 0.047, "step": 12730 }, { "epoch": 13.46, "learning_rate": 1.6374207188160677e-05, "loss": 0.0686, "step": 12732 }, { "epoch": 13.46, "learning_rate": 1.636892177589852e-05, "loss": 0.0192, "step": 12734 }, { "epoch": 13.46, "learning_rate": 1.6363636363636366e-05, "loss": 0.0117, "step": 12736 }, { "epoch": 13.47, "learning_rate": 1.635835095137421e-05, "loss": 0.0389, "step": 12738 }, { "epoch": 13.47, "learning_rate": 1.635306553911205e-05, "loss": 0.013, "step": 12740 }, { "epoch": 13.47, "learning_rate": 1.6347780126849897e-05, "loss": 0.0425, "step": 12742 }, { "epoch": 13.47, "learning_rate": 1.634249471458774e-05, "loss": 0.0402, "step": 12744 }, { "epoch": 13.47, "learning_rate": 1.6337209302325583e-05, "loss": 0.0108, "step": 12746 }, { "epoch": 13.48, "learning_rate": 1.6331923890063425e-05, "loss": 0.0307, "step": 12748 }, { "epoch": 13.48, "learning_rate": 1.632663847780127e-05, "loss": 0.0325, "step": 12750 }, { "epoch": 13.48, "learning_rate": 1.6321353065539114e-05, "loss": 0.0307, "step": 12752 }, { "epoch": 13.48, "learning_rate": 1.6316067653276957e-05, "loss": 0.0683, "step": 12754 }, { "epoch": 13.48, "learning_rate": 1.63107822410148e-05, "loss": 0.1128, "step": 12756 }, { "epoch": 13.49, "learning_rate": 1.6305496828752642e-05, "loss": 0.0099, "step": 12758 }, { "epoch": 13.49, "learning_rate": 1.6300211416490488e-05, "loss": 0.0266, "step": 12760 }, { "epoch": 13.49, "learning_rate": 1.629492600422833e-05, "loss": 0.0508, "step": 12762 }, { "epoch": 13.49, "learning_rate": 1.6289640591966173e-05, "loss": 0.0086, "step": 12764 }, { "epoch": 13.49, "learning_rate": 1.6284355179704016e-05, "loss": 0.0065, "step": 12766 }, { "epoch": 13.5, "learning_rate": 1.6279069767441862e-05, "loss": 0.0293, "step": 12768 }, { "epoch": 13.5, "learning_rate": 1.6273784355179705e-05, "loss": 0.0284, "step": 12770 }, { "epoch": 13.5, "learning_rate": 1.6268498942917548e-05, "loss": 0.049, "step": 12772 }, { "epoch": 13.5, "learning_rate": 1.626321353065539e-05, "loss": 0.0055, "step": 12774 }, { "epoch": 13.51, "learning_rate": 1.6257928118393233e-05, "loss": 0.0229, "step": 12776 }, { "epoch": 13.51, "learning_rate": 1.625264270613108e-05, "loss": 0.025, "step": 12778 }, { "epoch": 13.51, "learning_rate": 1.624735729386892e-05, "loss": 0.0119, "step": 12780 }, { "epoch": 13.51, "learning_rate": 1.6242071881606764e-05, "loss": 0.0036, "step": 12782 }, { "epoch": 13.51, "learning_rate": 1.623678646934461e-05, "loss": 0.0205, "step": 12784 }, { "epoch": 13.52, "learning_rate": 1.6231501057082453e-05, "loss": 0.0826, "step": 12786 }, { "epoch": 13.52, "learning_rate": 1.6226215644820296e-05, "loss": 0.0318, "step": 12788 }, { "epoch": 13.52, "learning_rate": 1.6220930232558142e-05, "loss": 0.06, "step": 12790 }, { "epoch": 13.52, "learning_rate": 1.6215644820295984e-05, "loss": 0.0567, "step": 12792 }, { "epoch": 13.52, "learning_rate": 1.6210359408033827e-05, "loss": 0.0042, "step": 12794 }, { "epoch": 13.53, "learning_rate": 1.6205073995771673e-05, "loss": 0.0224, "step": 12796 }, { "epoch": 13.53, "learning_rate": 1.6199788583509516e-05, "loss": 0.0903, "step": 12798 }, { "epoch": 13.53, "learning_rate": 1.619450317124736e-05, "loss": 0.0358, "step": 12800 }, { "epoch": 13.53, "learning_rate": 1.61892177589852e-05, "loss": 0.0387, "step": 12802 }, { "epoch": 13.53, "learning_rate": 1.6183932346723047e-05, "loss": 0.012, "step": 12804 }, { "epoch": 13.54, "learning_rate": 1.617864693446089e-05, "loss": 0.0068, "step": 12806 }, { "epoch": 13.54, "learning_rate": 1.6173361522198733e-05, "loss": 0.0562, "step": 12808 }, { "epoch": 13.54, "learning_rate": 1.6168076109936575e-05, "loss": 0.0161, "step": 12810 }, { "epoch": 13.54, "learning_rate": 1.6162790697674418e-05, "loss": 0.1477, "step": 12812 }, { "epoch": 13.55, "learning_rate": 1.6157505285412264e-05, "loss": 0.0068, "step": 12814 }, { "epoch": 13.55, "learning_rate": 1.6152219873150107e-05, "loss": 0.0012, "step": 12816 }, { "epoch": 13.55, "learning_rate": 1.614693446088795e-05, "loss": 0.0062, "step": 12818 }, { "epoch": 13.55, "learning_rate": 1.6141649048625792e-05, "loss": 0.0027, "step": 12820 }, { "epoch": 13.55, "learning_rate": 1.6136363636363638e-05, "loss": 0.0334, "step": 12822 }, { "epoch": 13.56, "learning_rate": 1.613107822410148e-05, "loss": 0.0741, "step": 12824 }, { "epoch": 13.56, "learning_rate": 1.6125792811839323e-05, "loss": 0.0322, "step": 12826 }, { "epoch": 13.56, "learning_rate": 1.6120507399577166e-05, "loss": 0.0601, "step": 12828 }, { "epoch": 13.56, "learning_rate": 1.611522198731501e-05, "loss": 0.0885, "step": 12830 }, { "epoch": 13.56, "learning_rate": 1.6109936575052855e-05, "loss": 0.0439, "step": 12832 }, { "epoch": 13.57, "learning_rate": 1.6104651162790697e-05, "loss": 0.0516, "step": 12834 }, { "epoch": 13.57, "learning_rate": 1.609936575052854e-05, "loss": 0.0019, "step": 12836 }, { "epoch": 13.57, "learning_rate": 1.6094080338266386e-05, "loss": 0.0077, "step": 12838 }, { "epoch": 13.57, "learning_rate": 1.608879492600423e-05, "loss": 0.0339, "step": 12840 }, { "epoch": 13.58, "learning_rate": 1.6083509513742075e-05, "loss": 0.095, "step": 12842 }, { "epoch": 13.58, "learning_rate": 1.6078224101479918e-05, "loss": 0.0199, "step": 12844 }, { "epoch": 13.58, "learning_rate": 1.607293868921776e-05, "loss": 0.0221, "step": 12846 }, { "epoch": 13.58, "learning_rate": 1.6067653276955603e-05, "loss": 0.0254, "step": 12848 }, { "epoch": 13.58, "learning_rate": 1.606236786469345e-05, "loss": 0.0099, "step": 12850 }, { "epoch": 13.59, "learning_rate": 1.605708245243129e-05, "loss": 0.0319, "step": 12852 }, { "epoch": 13.59, "learning_rate": 1.6051797040169134e-05, "loss": 0.0054, "step": 12854 }, { "epoch": 13.59, "learning_rate": 1.6046511627906977e-05, "loss": 0.0582, "step": 12856 }, { "epoch": 13.59, "learning_rate": 1.6041226215644823e-05, "loss": 0.0241, "step": 12858 }, { "epoch": 13.59, "learning_rate": 1.6035940803382666e-05, "loss": 0.0605, "step": 12860 }, { "epoch": 13.6, "learning_rate": 1.603065539112051e-05, "loss": 0.0041, "step": 12862 }, { "epoch": 13.6, "learning_rate": 1.602536997885835e-05, "loss": 0.0449, "step": 12864 }, { "epoch": 13.6, "learning_rate": 1.6020084566596194e-05, "loss": 0.037, "step": 12866 }, { "epoch": 13.6, "learning_rate": 1.601479915433404e-05, "loss": 0.0217, "step": 12868 }, { "epoch": 13.6, "learning_rate": 1.6009513742071882e-05, "loss": 0.0166, "step": 12870 }, { "epoch": 13.61, "learning_rate": 1.6004228329809725e-05, "loss": 0.0344, "step": 12872 }, { "epoch": 13.61, "learning_rate": 1.5998942917547568e-05, "loss": 0.0757, "step": 12874 }, { "epoch": 13.61, "learning_rate": 1.5993657505285414e-05, "loss": 0.0141, "step": 12876 }, { "epoch": 13.61, "learning_rate": 1.5988372093023257e-05, "loss": 0.0195, "step": 12878 }, { "epoch": 13.62, "learning_rate": 1.59830866807611e-05, "loss": 0.0059, "step": 12880 }, { "epoch": 13.62, "learning_rate": 1.5977801268498942e-05, "loss": 0.0134, "step": 12882 }, { "epoch": 13.62, "learning_rate": 1.5972515856236785e-05, "loss": 0.0369, "step": 12884 }, { "epoch": 13.62, "learning_rate": 1.596723044397463e-05, "loss": 0.0202, "step": 12886 }, { "epoch": 13.62, "learning_rate": 1.5961945031712473e-05, "loss": 0.0057, "step": 12888 }, { "epoch": 13.63, "learning_rate": 1.5956659619450316e-05, "loss": 0.0126, "step": 12890 }, { "epoch": 13.63, "learning_rate": 1.5951374207188162e-05, "loss": 0.0295, "step": 12892 }, { "epoch": 13.63, "learning_rate": 1.5946088794926005e-05, "loss": 0.0557, "step": 12894 }, { "epoch": 13.63, "learning_rate": 1.594080338266385e-05, "loss": 0.0056, "step": 12896 }, { "epoch": 13.63, "learning_rate": 1.5935517970401693e-05, "loss": 0.0762, "step": 12898 }, { "epoch": 13.64, "learning_rate": 1.5930232558139536e-05, "loss": 0.1435, "step": 12900 }, { "epoch": 13.64, "learning_rate": 1.592494714587738e-05, "loss": 0.0182, "step": 12902 }, { "epoch": 13.64, "learning_rate": 1.5919661733615225e-05, "loss": 0.0355, "step": 12904 }, { "epoch": 13.64, "learning_rate": 1.5914376321353067e-05, "loss": 0.0059, "step": 12906 }, { "epoch": 13.64, "learning_rate": 1.590909090909091e-05, "loss": 0.0151, "step": 12908 }, { "epoch": 13.65, "learning_rate": 1.5903805496828753e-05, "loss": 0.0204, "step": 12910 }, { "epoch": 13.65, "learning_rate": 1.58985200845666e-05, "loss": 0.006, "step": 12912 }, { "epoch": 13.65, "learning_rate": 1.589323467230444e-05, "loss": 0.0701, "step": 12914 }, { "epoch": 13.65, "learning_rate": 1.5887949260042284e-05, "loss": 0.0503, "step": 12916 }, { "epoch": 13.66, "learning_rate": 1.5882663847780127e-05, "loss": 0.0082, "step": 12918 }, { "epoch": 13.66, "learning_rate": 1.587737843551797e-05, "loss": 0.0953, "step": 12920 }, { "epoch": 13.66, "learning_rate": 1.5872093023255816e-05, "loss": 0.0477, "step": 12922 }, { "epoch": 13.66, "learning_rate": 1.586680761099366e-05, "loss": 0.0165, "step": 12924 }, { "epoch": 13.66, "learning_rate": 1.58615221987315e-05, "loss": 0.0203, "step": 12926 }, { "epoch": 13.67, "learning_rate": 1.5856236786469344e-05, "loss": 0.0265, "step": 12928 }, { "epoch": 13.67, "learning_rate": 1.585095137420719e-05, "loss": 0.0096, "step": 12930 }, { "epoch": 13.67, "learning_rate": 1.5845665961945032e-05, "loss": 0.001, "step": 12932 }, { "epoch": 13.67, "learning_rate": 1.5840380549682875e-05, "loss": 0.0594, "step": 12934 }, { "epoch": 13.67, "learning_rate": 1.5835095137420718e-05, "loss": 0.0072, "step": 12936 }, { "epoch": 13.68, "learning_rate": 1.582980972515856e-05, "loss": 0.0511, "step": 12938 }, { "epoch": 13.68, "learning_rate": 1.5824524312896406e-05, "loss": 0.0159, "step": 12940 }, { "epoch": 13.68, "learning_rate": 1.581923890063425e-05, "loss": 0.0259, "step": 12942 }, { "epoch": 13.68, "learning_rate": 1.5813953488372095e-05, "loss": 0.1291, "step": 12944 }, { "epoch": 13.68, "learning_rate": 1.5808668076109938e-05, "loss": 0.043, "step": 12946 }, { "epoch": 13.69, "learning_rate": 1.580338266384778e-05, "loss": 0.0402, "step": 12948 }, { "epoch": 13.69, "learning_rate": 1.5798097251585627e-05, "loss": 0.0065, "step": 12950 }, { "epoch": 13.69, "learning_rate": 1.579281183932347e-05, "loss": 0.0221, "step": 12952 }, { "epoch": 13.69, "learning_rate": 1.5787526427061312e-05, "loss": 0.0411, "step": 12954 }, { "epoch": 13.7, "learning_rate": 1.5782241014799155e-05, "loss": 0.0816, "step": 12956 }, { "epoch": 13.7, "learning_rate": 1.5776955602537e-05, "loss": 0.0357, "step": 12958 }, { "epoch": 13.7, "learning_rate": 1.5771670190274843e-05, "loss": 0.0548, "step": 12960 }, { "epoch": 13.7, "learning_rate": 1.5766384778012686e-05, "loss": 0.0553, "step": 12962 }, { "epoch": 13.7, "learning_rate": 1.576109936575053e-05, "loss": 0.0188, "step": 12964 }, { "epoch": 13.71, "learning_rate": 1.5755813953488375e-05, "loss": 0.066, "step": 12966 }, { "epoch": 13.71, "learning_rate": 1.5750528541226217e-05, "loss": 0.0554, "step": 12968 }, { "epoch": 13.71, "learning_rate": 1.574524312896406e-05, "loss": 0.0076, "step": 12970 }, { "epoch": 13.71, "learning_rate": 1.5739957716701903e-05, "loss": 0.0499, "step": 12972 }, { "epoch": 13.71, "learning_rate": 1.5734672304439745e-05, "loss": 0.0239, "step": 12974 }, { "epoch": 13.72, "learning_rate": 1.572938689217759e-05, "loss": 0.009, "step": 12976 }, { "epoch": 13.72, "learning_rate": 1.5724101479915434e-05, "loss": 0.0518, "step": 12978 }, { "epoch": 13.72, "learning_rate": 1.5718816067653277e-05, "loss": 0.0334, "step": 12980 }, { "epoch": 13.72, "learning_rate": 1.571353065539112e-05, "loss": 0.0415, "step": 12982 }, { "epoch": 13.73, "learning_rate": 1.5708245243128966e-05, "loss": 0.0461, "step": 12984 }, { "epoch": 13.73, "learning_rate": 1.5702959830866808e-05, "loss": 0.0394, "step": 12986 }, { "epoch": 13.73, "learning_rate": 1.569767441860465e-05, "loss": 0.0133, "step": 12988 }, { "epoch": 13.73, "learning_rate": 1.5692389006342494e-05, "loss": 0.0237, "step": 12990 }, { "epoch": 13.73, "learning_rate": 1.5687103594080336e-05, "loss": 0.0313, "step": 12992 }, { "epoch": 13.74, "learning_rate": 1.5681818181818182e-05, "loss": 0.0376, "step": 12994 }, { "epoch": 13.74, "learning_rate": 1.5676532769556025e-05, "loss": 0.0207, "step": 12996 }, { "epoch": 13.74, "learning_rate": 1.567124735729387e-05, "loss": 0.0201, "step": 12998 }, { "epoch": 13.74, "learning_rate": 1.5665961945031714e-05, "loss": 0.0495, "step": 13000 }, { "epoch": 13.74, "eval_cer": 0.04098033627814192, "eval_loss": 0.49177786707878113, "eval_runtime": 126.4592, "eval_samples_per_second": 6.65, "eval_steps_per_second": 0.838, "step": 13000 }, { "epoch": 13.74, "learning_rate": 1.566067653276956e-05, "loss": 0.0182, "step": 13002 }, { "epoch": 13.75, "learning_rate": 1.5655391120507402e-05, "loss": 0.0455, "step": 13004 }, { "epoch": 13.75, "learning_rate": 1.5650105708245245e-05, "loss": 0.0239, "step": 13006 }, { "epoch": 13.75, "learning_rate": 1.5644820295983088e-05, "loss": 0.0461, "step": 13008 }, { "epoch": 13.75, "learning_rate": 1.563953488372093e-05, "loss": 0.0214, "step": 13010 }, { "epoch": 13.75, "learning_rate": 1.5634249471458776e-05, "loss": 0.0747, "step": 13012 }, { "epoch": 13.76, "learning_rate": 1.562896405919662e-05, "loss": 0.0303, "step": 13014 }, { "epoch": 13.76, "learning_rate": 1.5623678646934462e-05, "loss": 0.0234, "step": 13016 }, { "epoch": 13.76, "learning_rate": 1.5618393234672305e-05, "loss": 0.0324, "step": 13018 }, { "epoch": 13.76, "learning_rate": 1.5613107822410147e-05, "loss": 0.0018, "step": 13020 }, { "epoch": 13.77, "learning_rate": 1.5607822410147993e-05, "loss": 0.0218, "step": 13022 }, { "epoch": 13.77, "learning_rate": 1.5602536997885836e-05, "loss": 0.0367, "step": 13024 }, { "epoch": 13.77, "learning_rate": 1.559725158562368e-05, "loss": 0.0582, "step": 13026 }, { "epoch": 13.77, "learning_rate": 1.559196617336152e-05, "loss": 0.0355, "step": 13028 }, { "epoch": 13.77, "learning_rate": 1.5586680761099367e-05, "loss": 0.0429, "step": 13030 }, { "epoch": 13.78, "learning_rate": 1.558139534883721e-05, "loss": 0.0299, "step": 13032 }, { "epoch": 13.78, "learning_rate": 1.5576109936575053e-05, "loss": 0.0404, "step": 13034 }, { "epoch": 13.78, "learning_rate": 1.5570824524312895e-05, "loss": 0.0406, "step": 13036 }, { "epoch": 13.78, "learning_rate": 1.5565539112050738e-05, "loss": 0.0185, "step": 13038 }, { "epoch": 13.78, "learning_rate": 1.5560253699788584e-05, "loss": 0.0204, "step": 13040 }, { "epoch": 13.79, "learning_rate": 1.5554968287526427e-05, "loss": 0.0058, "step": 13042 }, { "epoch": 13.79, "learning_rate": 1.554968287526427e-05, "loss": 0.0612, "step": 13044 }, { "epoch": 13.79, "learning_rate": 1.5544397463002115e-05, "loss": 0.0198, "step": 13046 }, { "epoch": 13.79, "learning_rate": 1.5539112050739958e-05, "loss": 0.0196, "step": 13048 }, { "epoch": 13.79, "learning_rate": 1.5533826638477804e-05, "loss": 0.0012, "step": 13050 }, { "epoch": 13.8, "learning_rate": 1.5528541226215647e-05, "loss": 0.029, "step": 13052 }, { "epoch": 13.8, "learning_rate": 1.552325581395349e-05, "loss": 0.0228, "step": 13054 }, { "epoch": 13.8, "learning_rate": 1.5517970401691332e-05, "loss": 0.0367, "step": 13056 }, { "epoch": 13.8, "learning_rate": 1.5512684989429178e-05, "loss": 0.0608, "step": 13058 }, { "epoch": 13.81, "learning_rate": 1.550739957716702e-05, "loss": 0.0569, "step": 13060 }, { "epoch": 13.81, "learning_rate": 1.5502114164904864e-05, "loss": 0.0054, "step": 13062 }, { "epoch": 13.81, "learning_rate": 1.5496828752642706e-05, "loss": 0.0158, "step": 13064 }, { "epoch": 13.81, "learning_rate": 1.5491543340380552e-05, "loss": 0.0341, "step": 13066 }, { "epoch": 13.81, "learning_rate": 1.5486257928118395e-05, "loss": 0.0202, "step": 13068 }, { "epoch": 13.82, "learning_rate": 1.5480972515856238e-05, "loss": 0.0623, "step": 13070 }, { "epoch": 13.82, "learning_rate": 1.547568710359408e-05, "loss": 0.0062, "step": 13072 }, { "epoch": 13.82, "learning_rate": 1.5470401691331923e-05, "loss": 0.0139, "step": 13074 }, { "epoch": 13.82, "learning_rate": 1.546511627906977e-05, "loss": 0.0107, "step": 13076 }, { "epoch": 13.82, "learning_rate": 1.5459830866807612e-05, "loss": 0.0138, "step": 13078 }, { "epoch": 13.83, "learning_rate": 1.5454545454545454e-05, "loss": 0.0398, "step": 13080 }, { "epoch": 13.83, "learning_rate": 1.5449260042283297e-05, "loss": 0.0256, "step": 13082 }, { "epoch": 13.83, "learning_rate": 1.5443974630021143e-05, "loss": 0.0062, "step": 13084 }, { "epoch": 13.83, "learning_rate": 1.5438689217758986e-05, "loss": 0.0163, "step": 13086 }, { "epoch": 13.84, "learning_rate": 1.543340380549683e-05, "loss": 0.0525, "step": 13088 }, { "epoch": 13.84, "learning_rate": 1.542811839323467e-05, "loss": 0.1588, "step": 13090 }, { "epoch": 13.84, "learning_rate": 1.5422832980972514e-05, "loss": 0.0386, "step": 13092 }, { "epoch": 13.84, "learning_rate": 1.541754756871036e-05, "loss": 0.0213, "step": 13094 }, { "epoch": 13.84, "learning_rate": 1.5412262156448203e-05, "loss": 0.0187, "step": 13096 }, { "epoch": 13.85, "learning_rate": 1.5406976744186045e-05, "loss": 0.0402, "step": 13098 }, { "epoch": 13.85, "learning_rate": 1.540169133192389e-05, "loss": 0.0738, "step": 13100 }, { "epoch": 13.85, "learning_rate": 1.5396405919661734e-05, "loss": 0.0397, "step": 13102 }, { "epoch": 13.85, "learning_rate": 1.539112050739958e-05, "loss": 0.0111, "step": 13104 }, { "epoch": 13.85, "learning_rate": 1.5385835095137423e-05, "loss": 0.0102, "step": 13106 }, { "epoch": 13.86, "learning_rate": 1.5380549682875265e-05, "loss": 0.089, "step": 13108 }, { "epoch": 13.86, "learning_rate": 1.5375264270613108e-05, "loss": 0.0461, "step": 13110 }, { "epoch": 13.86, "learning_rate": 1.5369978858350954e-05, "loss": 0.0456, "step": 13112 }, { "epoch": 13.86, "learning_rate": 1.5364693446088797e-05, "loss": 0.0304, "step": 13114 }, { "epoch": 13.86, "learning_rate": 1.535940803382664e-05, "loss": 0.0566, "step": 13116 }, { "epoch": 13.87, "learning_rate": 1.5354122621564482e-05, "loss": 0.0024, "step": 13118 }, { "epoch": 13.87, "learning_rate": 1.5348837209302328e-05, "loss": 0.0571, "step": 13120 }, { "epoch": 13.87, "learning_rate": 1.534355179704017e-05, "loss": 0.0012, "step": 13122 }, { "epoch": 13.87, "learning_rate": 1.5338266384778014e-05, "loss": 0.0618, "step": 13124 }, { "epoch": 13.88, "learning_rate": 1.5332980972515856e-05, "loss": 0.035, "step": 13126 }, { "epoch": 13.88, "learning_rate": 1.53276955602537e-05, "loss": 0.0343, "step": 13128 }, { "epoch": 13.88, "learning_rate": 1.5322410147991545e-05, "loss": 0.0398, "step": 13130 }, { "epoch": 13.88, "learning_rate": 1.5317124735729388e-05, "loss": 0.1157, "step": 13132 }, { "epoch": 13.88, "learning_rate": 1.531183932346723e-05, "loss": 0.0126, "step": 13134 }, { "epoch": 13.89, "learning_rate": 1.5306553911205073e-05, "loss": 0.0197, "step": 13136 }, { "epoch": 13.89, "learning_rate": 1.530126849894292e-05, "loss": 0.0207, "step": 13138 }, { "epoch": 13.89, "learning_rate": 1.529598308668076e-05, "loss": 0.0249, "step": 13140 }, { "epoch": 13.89, "learning_rate": 1.5290697674418604e-05, "loss": 0.0116, "step": 13142 }, { "epoch": 13.89, "learning_rate": 1.5285412262156447e-05, "loss": 0.0212, "step": 13144 }, { "epoch": 13.9, "learning_rate": 1.528012684989429e-05, "loss": 0.0302, "step": 13146 }, { "epoch": 13.9, "learning_rate": 1.5274841437632136e-05, "loss": 0.0309, "step": 13148 }, { "epoch": 13.9, "learning_rate": 1.526955602536998e-05, "loss": 0.0287, "step": 13150 }, { "epoch": 13.9, "learning_rate": 1.5264270613107824e-05, "loss": 0.0315, "step": 13152 }, { "epoch": 13.9, "learning_rate": 1.5258985200845667e-05, "loss": 0.0123, "step": 13154 }, { "epoch": 13.91, "learning_rate": 1.5253699788583512e-05, "loss": 0.0107, "step": 13156 }, { "epoch": 13.91, "learning_rate": 1.5248414376321354e-05, "loss": 0.0162, "step": 13158 }, { "epoch": 13.91, "learning_rate": 1.5243128964059197e-05, "loss": 0.0362, "step": 13160 }, { "epoch": 13.91, "learning_rate": 1.523784355179704e-05, "loss": 0.0339, "step": 13162 }, { "epoch": 13.92, "learning_rate": 1.5232558139534884e-05, "loss": 0.0635, "step": 13164 }, { "epoch": 13.92, "learning_rate": 1.5227272727272728e-05, "loss": 0.024, "step": 13166 }, { "epoch": 13.92, "learning_rate": 1.5221987315010573e-05, "loss": 0.023, "step": 13168 }, { "epoch": 13.92, "learning_rate": 1.5216701902748415e-05, "loss": 0.0562, "step": 13170 }, { "epoch": 13.92, "learning_rate": 1.5211416490486258e-05, "loss": 0.0143, "step": 13172 }, { "epoch": 13.93, "learning_rate": 1.5206131078224104e-05, "loss": 0.0143, "step": 13174 }, { "epoch": 13.93, "learning_rate": 1.5200845665961947e-05, "loss": 0.0185, "step": 13176 }, { "epoch": 13.93, "learning_rate": 1.519556025369979e-05, "loss": 0.0204, "step": 13178 }, { "epoch": 13.93, "learning_rate": 1.5190274841437632e-05, "loss": 0.0322, "step": 13180 }, { "epoch": 13.93, "learning_rate": 1.5184989429175475e-05, "loss": 0.047, "step": 13182 }, { "epoch": 13.94, "learning_rate": 1.517970401691332e-05, "loss": 0.0296, "step": 13184 }, { "epoch": 13.94, "learning_rate": 1.5174418604651163e-05, "loss": 0.0115, "step": 13186 }, { "epoch": 13.94, "learning_rate": 1.5169133192389006e-05, "loss": 0.0086, "step": 13188 }, { "epoch": 13.94, "learning_rate": 1.516384778012685e-05, "loss": 0.0098, "step": 13190 }, { "epoch": 13.95, "learning_rate": 1.5158562367864695e-05, "loss": 0.0352, "step": 13192 }, { "epoch": 13.95, "learning_rate": 1.5153276955602538e-05, "loss": 0.003, "step": 13194 }, { "epoch": 13.95, "learning_rate": 1.5147991543340382e-05, "loss": 0.0179, "step": 13196 }, { "epoch": 13.95, "learning_rate": 1.5142706131078225e-05, "loss": 0.0103, "step": 13198 }, { "epoch": 13.95, "learning_rate": 1.5137420718816067e-05, "loss": 0.0212, "step": 13200 }, { "epoch": 13.96, "learning_rate": 1.5132135306553913e-05, "loss": 0.0327, "step": 13202 }, { "epoch": 13.96, "learning_rate": 1.5126849894291756e-05, "loss": 0.0174, "step": 13204 }, { "epoch": 13.96, "learning_rate": 1.5121564482029599e-05, "loss": 0.0536, "step": 13206 }, { "epoch": 13.96, "learning_rate": 1.5116279069767441e-05, "loss": 0.0491, "step": 13208 }, { "epoch": 13.96, "learning_rate": 1.5110993657505287e-05, "loss": 0.0246, "step": 13210 }, { "epoch": 13.97, "learning_rate": 1.510570824524313e-05, "loss": 0.0733, "step": 13212 }, { "epoch": 13.97, "learning_rate": 1.5100422832980973e-05, "loss": 0.005, "step": 13214 }, { "epoch": 13.97, "learning_rate": 1.5095137420718815e-05, "loss": 0.0153, "step": 13216 }, { "epoch": 13.97, "learning_rate": 1.508985200845666e-05, "loss": 0.0374, "step": 13218 }, { "epoch": 13.97, "learning_rate": 1.5084566596194504e-05, "loss": 0.035, "step": 13220 }, { "epoch": 13.98, "learning_rate": 1.5079281183932348e-05, "loss": 0.0276, "step": 13222 }, { "epoch": 13.98, "learning_rate": 1.5073995771670191e-05, "loss": 0.0337, "step": 13224 }, { "epoch": 13.98, "learning_rate": 1.5068710359408034e-05, "loss": 0.0762, "step": 13226 }, { "epoch": 13.98, "learning_rate": 1.506342494714588e-05, "loss": 0.0072, "step": 13228 }, { "epoch": 13.99, "learning_rate": 1.5058139534883723e-05, "loss": 0.0254, "step": 13230 }, { "epoch": 13.99, "learning_rate": 1.5052854122621565e-05, "loss": 0.0068, "step": 13232 }, { "epoch": 13.99, "learning_rate": 1.5047568710359408e-05, "loss": 0.018, "step": 13234 }, { "epoch": 13.99, "learning_rate": 1.504228329809725e-05, "loss": 0.0269, "step": 13236 }, { "epoch": 13.99, "learning_rate": 1.5036997885835097e-05, "loss": 0.054, "step": 13238 }, { "epoch": 14.0, "learning_rate": 1.503171247357294e-05, "loss": 0.0689, "step": 13240 }, { "epoch": 14.0, "learning_rate": 1.5026427061310782e-05, "loss": 0.0259, "step": 13242 }, { "epoch": 14.0, "learning_rate": 1.5021141649048626e-05, "loss": 0.0249, "step": 13244 }, { "epoch": 14.0, "learning_rate": 1.501585623678647e-05, "loss": 0.0192, "step": 13246 }, { "epoch": 14.0, "learning_rate": 1.5010570824524315e-05, "loss": 0.0105, "step": 13248 }, { "epoch": 14.01, "learning_rate": 1.5005285412262158e-05, "loss": 0.0108, "step": 13250 }, { "epoch": 14.01, "learning_rate": 1.5e-05, "loss": 0.0063, "step": 13252 }, { "epoch": 14.01, "learning_rate": 1.4994714587737843e-05, "loss": 0.0109, "step": 13254 }, { "epoch": 14.01, "learning_rate": 1.4989429175475689e-05, "loss": 0.0026, "step": 13256 }, { "epoch": 14.01, "learning_rate": 1.4984143763213532e-05, "loss": 0.0211, "step": 13258 }, { "epoch": 14.02, "learning_rate": 1.4978858350951374e-05, "loss": 0.0473, "step": 13260 }, { "epoch": 14.02, "learning_rate": 1.4973572938689217e-05, "loss": 0.0033, "step": 13262 }, { "epoch": 14.02, "learning_rate": 1.4968287526427063e-05, "loss": 0.0232, "step": 13264 }, { "epoch": 14.02, "learning_rate": 1.4963002114164906e-05, "loss": 0.0317, "step": 13266 }, { "epoch": 14.03, "learning_rate": 1.4957716701902749e-05, "loss": 0.0207, "step": 13268 }, { "epoch": 14.03, "learning_rate": 1.4952431289640593e-05, "loss": 0.0295, "step": 13270 }, { "epoch": 14.03, "learning_rate": 1.4947145877378436e-05, "loss": 0.0027, "step": 13272 }, { "epoch": 14.03, "learning_rate": 1.4941860465116282e-05, "loss": 0.0149, "step": 13274 }, { "epoch": 14.03, "learning_rate": 1.4936575052854124e-05, "loss": 0.0253, "step": 13276 }, { "epoch": 14.04, "learning_rate": 1.4931289640591967e-05, "loss": 0.0139, "step": 13278 }, { "epoch": 14.04, "learning_rate": 1.492600422832981e-05, "loss": 0.0864, "step": 13280 }, { "epoch": 14.04, "learning_rate": 1.4920718816067656e-05, "loss": 0.0691, "step": 13282 }, { "epoch": 14.04, "learning_rate": 1.4915433403805498e-05, "loss": 0.015, "step": 13284 }, { "epoch": 14.04, "learning_rate": 1.4910147991543341e-05, "loss": 0.0429, "step": 13286 }, { "epoch": 14.05, "learning_rate": 1.4904862579281184e-05, "loss": 0.0207, "step": 13288 }, { "epoch": 14.05, "learning_rate": 1.4899577167019026e-05, "loss": 0.0256, "step": 13290 }, { "epoch": 14.05, "learning_rate": 1.4894291754756872e-05, "loss": 0.0107, "step": 13292 }, { "epoch": 14.05, "learning_rate": 1.4889006342494715e-05, "loss": 0.0192, "step": 13294 }, { "epoch": 14.05, "learning_rate": 1.488372093023256e-05, "loss": 0.0007, "step": 13296 }, { "epoch": 14.06, "learning_rate": 1.4878435517970402e-05, "loss": 0.087, "step": 13298 }, { "epoch": 14.06, "learning_rate": 1.4873150105708247e-05, "loss": 0.003, "step": 13300 }, { "epoch": 14.06, "learning_rate": 1.4867864693446091e-05, "loss": 0.0655, "step": 13302 }, { "epoch": 14.06, "learning_rate": 1.4862579281183934e-05, "loss": 0.0009, "step": 13304 }, { "epoch": 14.07, "learning_rate": 1.4857293868921776e-05, "loss": 0.0076, "step": 13306 }, { "epoch": 14.07, "learning_rate": 1.4852008456659619e-05, "loss": 0.0231, "step": 13308 }, { "epoch": 14.07, "learning_rate": 1.4846723044397465e-05, "loss": 0.0266, "step": 13310 }, { "epoch": 14.07, "learning_rate": 1.4841437632135308e-05, "loss": 0.0252, "step": 13312 }, { "epoch": 14.07, "learning_rate": 1.483615221987315e-05, "loss": 0.0915, "step": 13314 }, { "epoch": 14.08, "learning_rate": 1.4830866807610993e-05, "loss": 0.0135, "step": 13316 }, { "epoch": 14.08, "learning_rate": 1.4825581395348839e-05, "loss": 0.019, "step": 13318 }, { "epoch": 14.08, "learning_rate": 1.4820295983086682e-05, "loss": 0.0622, "step": 13320 }, { "epoch": 14.08, "learning_rate": 1.4815010570824524e-05, "loss": 0.0025, "step": 13322 }, { "epoch": 14.08, "learning_rate": 1.4809725158562369e-05, "loss": 0.0052, "step": 13324 }, { "epoch": 14.09, "learning_rate": 1.4804439746300211e-05, "loss": 0.0065, "step": 13326 }, { "epoch": 14.09, "learning_rate": 1.4799154334038057e-05, "loss": 0.063, "step": 13328 }, { "epoch": 14.09, "learning_rate": 1.47938689217759e-05, "loss": 0.0188, "step": 13330 }, { "epoch": 14.09, "learning_rate": 1.4788583509513743e-05, "loss": 0.0182, "step": 13332 }, { "epoch": 14.1, "learning_rate": 1.4783298097251585e-05, "loss": 0.021, "step": 13334 }, { "epoch": 14.1, "learning_rate": 1.4778012684989432e-05, "loss": 0.0099, "step": 13336 }, { "epoch": 14.1, "learning_rate": 1.4772727272727274e-05, "loss": 0.0057, "step": 13338 }, { "epoch": 14.1, "learning_rate": 1.4767441860465117e-05, "loss": 0.0193, "step": 13340 }, { "epoch": 14.1, "learning_rate": 1.476215644820296e-05, "loss": 0.0487, "step": 13342 }, { "epoch": 14.11, "learning_rate": 1.4756871035940802e-05, "loss": 0.0194, "step": 13344 }, { "epoch": 14.11, "learning_rate": 1.4751585623678648e-05, "loss": 0.0289, "step": 13346 }, { "epoch": 14.11, "learning_rate": 1.4746300211416491e-05, "loss": 0.0224, "step": 13348 }, { "epoch": 14.11, "learning_rate": 1.4741014799154335e-05, "loss": 0.0137, "step": 13350 }, { "epoch": 14.11, "learning_rate": 1.4735729386892178e-05, "loss": 0.0165, "step": 13352 }, { "epoch": 14.12, "learning_rate": 1.473044397463002e-05, "loss": 0.0291, "step": 13354 }, { "epoch": 14.12, "learning_rate": 1.4725158562367867e-05, "loss": 0.0642, "step": 13356 }, { "epoch": 14.12, "learning_rate": 1.471987315010571e-05, "loss": 0.0494, "step": 13358 }, { "epoch": 14.12, "learning_rate": 1.4714587737843552e-05, "loss": 0.0077, "step": 13360 }, { "epoch": 14.12, "learning_rate": 1.4709302325581395e-05, "loss": 0.0222, "step": 13362 }, { "epoch": 14.13, "learning_rate": 1.470401691331924e-05, "loss": 0.0185, "step": 13364 }, { "epoch": 14.13, "learning_rate": 1.4698731501057083e-05, "loss": 0.0556, "step": 13366 }, { "epoch": 14.13, "learning_rate": 1.4693446088794926e-05, "loss": 0.0324, "step": 13368 }, { "epoch": 14.13, "learning_rate": 1.4688160676532769e-05, "loss": 0.0105, "step": 13370 }, { "epoch": 14.14, "learning_rate": 1.4682875264270613e-05, "loss": 0.0087, "step": 13372 }, { "epoch": 14.14, "learning_rate": 1.4677589852008458e-05, "loss": 0.0375, "step": 13374 }, { "epoch": 14.14, "learning_rate": 1.4672304439746302e-05, "loss": 0.0014, "step": 13376 }, { "epoch": 14.14, "learning_rate": 1.4667019027484145e-05, "loss": 0.019, "step": 13378 }, { "epoch": 14.14, "learning_rate": 1.4661733615221987e-05, "loss": 0.0439, "step": 13380 }, { "epoch": 14.15, "learning_rate": 1.4656448202959833e-05, "loss": 0.017, "step": 13382 }, { "epoch": 14.15, "learning_rate": 1.4651162790697676e-05, "loss": 0.0047, "step": 13384 }, { "epoch": 14.15, "learning_rate": 1.4645877378435519e-05, "loss": 0.0519, "step": 13386 }, { "epoch": 14.15, "learning_rate": 1.4640591966173361e-05, "loss": 0.0107, "step": 13388 }, { "epoch": 14.15, "learning_rate": 1.4635306553911204e-05, "loss": 0.0064, "step": 13390 }, { "epoch": 14.16, "learning_rate": 1.463002114164905e-05, "loss": 0.0832, "step": 13392 }, { "epoch": 14.16, "learning_rate": 1.4624735729386893e-05, "loss": 0.0079, "step": 13394 }, { "epoch": 14.16, "learning_rate": 1.4619450317124735e-05, "loss": 0.0031, "step": 13396 }, { "epoch": 14.16, "learning_rate": 1.461416490486258e-05, "loss": 0.0436, "step": 13398 }, { "epoch": 14.16, "learning_rate": 1.4608879492600424e-05, "loss": 0.0132, "step": 13400 }, { "epoch": 14.17, "learning_rate": 1.4603594080338267e-05, "loss": 0.0417, "step": 13402 }, { "epoch": 14.17, "learning_rate": 1.4598308668076111e-05, "loss": 0.0505, "step": 13404 }, { "epoch": 14.17, "learning_rate": 1.4593023255813954e-05, "loss": 0.0369, "step": 13406 }, { "epoch": 14.17, "learning_rate": 1.4587737843551796e-05, "loss": 0.072, "step": 13408 }, { "epoch": 14.18, "learning_rate": 1.4582452431289643e-05, "loss": 0.0264, "step": 13410 }, { "epoch": 14.18, "learning_rate": 1.4577167019027485e-05, "loss": 0.0184, "step": 13412 }, { "epoch": 14.18, "learning_rate": 1.4571881606765328e-05, "loss": 0.0311, "step": 13414 }, { "epoch": 14.18, "learning_rate": 1.456659619450317e-05, "loss": 0.0128, "step": 13416 }, { "epoch": 14.18, "learning_rate": 1.4561310782241017e-05, "loss": 0.0044, "step": 13418 }, { "epoch": 14.19, "learning_rate": 1.455602536997886e-05, "loss": 0.007, "step": 13420 }, { "epoch": 14.19, "learning_rate": 1.4550739957716702e-05, "loss": 0.0169, "step": 13422 }, { "epoch": 14.19, "learning_rate": 1.4545454545454545e-05, "loss": 0.0117, "step": 13424 }, { "epoch": 14.19, "learning_rate": 1.4540169133192389e-05, "loss": 0.0298, "step": 13426 }, { "epoch": 14.19, "learning_rate": 1.4534883720930233e-05, "loss": 0.0403, "step": 13428 }, { "epoch": 14.2, "learning_rate": 1.4529598308668078e-05, "loss": 0.0188, "step": 13430 }, { "epoch": 14.2, "learning_rate": 1.452431289640592e-05, "loss": 0.0029, "step": 13432 }, { "epoch": 14.2, "learning_rate": 1.4519027484143763e-05, "loss": 0.0201, "step": 13434 }, { "epoch": 14.2, "learning_rate": 1.4513742071881609e-05, "loss": 0.0215, "step": 13436 }, { "epoch": 14.21, "learning_rate": 1.4508456659619452e-05, "loss": 0.0227, "step": 13438 }, { "epoch": 14.21, "learning_rate": 1.4503171247357294e-05, "loss": 0.0074, "step": 13440 }, { "epoch": 14.21, "learning_rate": 1.4497885835095137e-05, "loss": 0.0111, "step": 13442 }, { "epoch": 14.21, "learning_rate": 1.449260042283298e-05, "loss": 0.0031, "step": 13444 }, { "epoch": 14.21, "learning_rate": 1.4487315010570826e-05, "loss": 0.0568, "step": 13446 }, { "epoch": 14.22, "learning_rate": 1.4482029598308669e-05, "loss": 0.0475, "step": 13448 }, { "epoch": 14.22, "learning_rate": 1.4476744186046511e-05, "loss": 0.0066, "step": 13450 }, { "epoch": 14.22, "learning_rate": 1.4471458773784356e-05, "loss": 0.0605, "step": 13452 }, { "epoch": 14.22, "learning_rate": 1.44661733615222e-05, "loss": 0.0407, "step": 13454 }, { "epoch": 14.22, "learning_rate": 1.4460887949260044e-05, "loss": 0.0279, "step": 13456 }, { "epoch": 14.23, "learning_rate": 1.4455602536997887e-05, "loss": 0.009, "step": 13458 }, { "epoch": 14.23, "learning_rate": 1.445031712473573e-05, "loss": 0.0206, "step": 13460 }, { "epoch": 14.23, "learning_rate": 1.4445031712473572e-05, "loss": 0.1183, "step": 13462 }, { "epoch": 14.23, "learning_rate": 1.4439746300211418e-05, "loss": 0.039, "step": 13464 }, { "epoch": 14.23, "learning_rate": 1.4434460887949261e-05, "loss": 0.0591, "step": 13466 }, { "epoch": 14.24, "learning_rate": 1.4429175475687104e-05, "loss": 0.0715, "step": 13468 }, { "epoch": 14.24, "learning_rate": 1.4423890063424946e-05, "loss": 0.024, "step": 13470 }, { "epoch": 14.24, "learning_rate": 1.4418604651162792e-05, "loss": 0.0236, "step": 13472 }, { "epoch": 14.24, "learning_rate": 1.4413319238900635e-05, "loss": 0.0232, "step": 13474 }, { "epoch": 14.25, "learning_rate": 1.4408033826638478e-05, "loss": 0.0394, "step": 13476 }, { "epoch": 14.25, "learning_rate": 1.4402748414376322e-05, "loss": 0.0025, "step": 13478 }, { "epoch": 14.25, "learning_rate": 1.4397463002114165e-05, "loss": 0.0709, "step": 13480 }, { "epoch": 14.25, "learning_rate": 1.439217758985201e-05, "loss": 0.0878, "step": 13482 }, { "epoch": 14.25, "learning_rate": 1.4386892177589854e-05, "loss": 0.008, "step": 13484 }, { "epoch": 14.26, "learning_rate": 1.4381606765327696e-05, "loss": 0.02, "step": 13486 }, { "epoch": 14.26, "learning_rate": 1.4376321353065539e-05, "loss": 0.0175, "step": 13488 }, { "epoch": 14.26, "learning_rate": 1.4371035940803385e-05, "loss": 0.018, "step": 13490 }, { "epoch": 14.26, "learning_rate": 1.4365750528541228e-05, "loss": 0.0914, "step": 13492 }, { "epoch": 14.26, "learning_rate": 1.436046511627907e-05, "loss": 0.0175, "step": 13494 }, { "epoch": 14.27, "learning_rate": 1.4355179704016913e-05, "loss": 0.0415, "step": 13496 }, { "epoch": 14.27, "learning_rate": 1.4349894291754756e-05, "loss": 0.0276, "step": 13498 }, { "epoch": 14.27, "learning_rate": 1.4344608879492602e-05, "loss": 0.0012, "step": 13500 }, { "epoch": 14.27, "eval_cer": 0.09563978341407808, "eval_loss": 1.003995656967163, "eval_runtime": 120.9517, "eval_samples_per_second": 6.953, "eval_steps_per_second": 0.876, "step": 13500 }, { "epoch": 14.27, "learning_rate": 1.4339323467230444e-05, "loss": 0.1183, "step": 13502 }, { "epoch": 14.27, "learning_rate": 1.4334038054968287e-05, "loss": 0.062, "step": 13504 }, { "epoch": 14.28, "learning_rate": 1.4328752642706131e-05, "loss": 0.0552, "step": 13506 }, { "epoch": 14.28, "learning_rate": 1.4323467230443976e-05, "loss": 0.0046, "step": 13508 }, { "epoch": 14.28, "learning_rate": 1.431818181818182e-05, "loss": 0.0243, "step": 13510 }, { "epoch": 14.28, "learning_rate": 1.4312896405919663e-05, "loss": 0.1092, "step": 13512 }, { "epoch": 14.29, "learning_rate": 1.4307610993657506e-05, "loss": 0.1191, "step": 13514 }, { "epoch": 14.29, "learning_rate": 1.4302325581395348e-05, "loss": 0.0201, "step": 13516 }, { "epoch": 14.29, "learning_rate": 1.4297040169133194e-05, "loss": 0.0286, "step": 13518 }, { "epoch": 14.29, "learning_rate": 1.4291754756871037e-05, "loss": 0.0275, "step": 13520 }, { "epoch": 14.29, "learning_rate": 1.428646934460888e-05, "loss": 0.0763, "step": 13522 }, { "epoch": 14.3, "learning_rate": 1.4281183932346722e-05, "loss": 0.1111, "step": 13524 }, { "epoch": 14.3, "learning_rate": 1.4275898520084568e-05, "loss": 0.0027, "step": 13526 }, { "epoch": 14.3, "learning_rate": 1.4270613107822411e-05, "loss": 0.0311, "step": 13528 }, { "epoch": 14.3, "learning_rate": 1.4265327695560254e-05, "loss": 0.0032, "step": 13530 }, { "epoch": 14.3, "learning_rate": 1.4260042283298098e-05, "loss": 0.0262, "step": 13532 }, { "epoch": 14.31, "learning_rate": 1.425475687103594e-05, "loss": 0.0174, "step": 13534 }, { "epoch": 14.31, "learning_rate": 1.4249471458773787e-05, "loss": 0.0109, "step": 13536 }, { "epoch": 14.31, "learning_rate": 1.424418604651163e-05, "loss": 0.0691, "step": 13538 }, { "epoch": 14.31, "learning_rate": 1.4238900634249472e-05, "loss": 0.1051, "step": 13540 }, { "epoch": 14.32, "learning_rate": 1.4233615221987315e-05, "loss": 0.1075, "step": 13542 }, { "epoch": 14.32, "learning_rate": 1.422832980972516e-05, "loss": 0.0403, "step": 13544 }, { "epoch": 14.32, "learning_rate": 1.4223044397463003e-05, "loss": 0.0249, "step": 13546 }, { "epoch": 14.32, "learning_rate": 1.4217758985200846e-05, "loss": 0.0125, "step": 13548 }, { "epoch": 14.32, "learning_rate": 1.4212473572938689e-05, "loss": 0.0637, "step": 13550 }, { "epoch": 14.33, "learning_rate": 1.4207188160676532e-05, "loss": 0.0158, "step": 13552 }, { "epoch": 14.33, "learning_rate": 1.4201902748414378e-05, "loss": 0.0537, "step": 13554 }, { "epoch": 14.33, "learning_rate": 1.419661733615222e-05, "loss": 0.027, "step": 13556 }, { "epoch": 14.33, "learning_rate": 1.4191331923890065e-05, "loss": 0.0184, "step": 13558 }, { "epoch": 14.33, "learning_rate": 1.4186046511627907e-05, "loss": 0.0254, "step": 13560 }, { "epoch": 14.34, "learning_rate": 1.4180761099365752e-05, "loss": 0.0231, "step": 13562 }, { "epoch": 14.34, "learning_rate": 1.4175475687103596e-05, "loss": 0.0292, "step": 13564 }, { "epoch": 14.34, "learning_rate": 1.4170190274841439e-05, "loss": 0.0091, "step": 13566 }, { "epoch": 14.34, "learning_rate": 1.4164904862579281e-05, "loss": 0.0139, "step": 13568 }, { "epoch": 14.34, "learning_rate": 1.4159619450317124e-05, "loss": 0.0491, "step": 13570 }, { "epoch": 14.35, "learning_rate": 1.415433403805497e-05, "loss": 0.0261, "step": 13572 }, { "epoch": 14.35, "learning_rate": 1.4149048625792813e-05, "loss": 0.0122, "step": 13574 }, { "epoch": 14.35, "learning_rate": 1.4143763213530655e-05, "loss": 0.0122, "step": 13576 }, { "epoch": 14.35, "learning_rate": 1.4138477801268498e-05, "loss": 0.049, "step": 13578 }, { "epoch": 14.36, "learning_rate": 1.4133192389006344e-05, "loss": 0.0109, "step": 13580 }, { "epoch": 14.36, "learning_rate": 1.4127906976744187e-05, "loss": 0.1036, "step": 13582 }, { "epoch": 14.36, "learning_rate": 1.412262156448203e-05, "loss": 0.0188, "step": 13584 }, { "epoch": 14.36, "learning_rate": 1.4117336152219874e-05, "loss": 0.0124, "step": 13586 }, { "epoch": 14.36, "learning_rate": 1.4112050739957717e-05, "loss": 0.0425, "step": 13588 }, { "epoch": 14.37, "learning_rate": 1.4106765327695563e-05, "loss": 0.0123, "step": 13590 }, { "epoch": 14.37, "learning_rate": 1.4101479915433405e-05, "loss": 0.0958, "step": 13592 }, { "epoch": 14.37, "learning_rate": 1.4096194503171248e-05, "loss": 0.0522, "step": 13594 }, { "epoch": 14.37, "learning_rate": 1.409090909090909e-05, "loss": 0.0211, "step": 13596 }, { "epoch": 14.37, "learning_rate": 1.4085623678646937e-05, "loss": 0.0055, "step": 13598 }, { "epoch": 14.38, "learning_rate": 1.408033826638478e-05, "loss": 0.0014, "step": 13600 }, { "epoch": 14.38, "learning_rate": 1.4075052854122622e-05, "loss": 0.0414, "step": 13602 }, { "epoch": 14.38, "learning_rate": 1.4069767441860465e-05, "loss": 0.0068, "step": 13604 }, { "epoch": 14.38, "learning_rate": 1.4064482029598307e-05, "loss": 0.0163, "step": 13606 }, { "epoch": 14.38, "learning_rate": 1.4059196617336153e-05, "loss": 0.0078, "step": 13608 }, { "epoch": 14.39, "learning_rate": 1.4053911205073996e-05, "loss": 0.0214, "step": 13610 }, { "epoch": 14.39, "learning_rate": 1.404862579281184e-05, "loss": 0.0221, "step": 13612 }, { "epoch": 14.39, "learning_rate": 1.4043340380549683e-05, "loss": 0.0048, "step": 13614 }, { "epoch": 14.39, "learning_rate": 1.403805496828753e-05, "loss": 0.0113, "step": 13616 }, { "epoch": 14.4, "learning_rate": 1.4032769556025372e-05, "loss": 0.0915, "step": 13618 }, { "epoch": 14.4, "learning_rate": 1.4027484143763215e-05, "loss": 0.0354, "step": 13620 }, { "epoch": 14.4, "learning_rate": 1.4022198731501057e-05, "loss": 0.0057, "step": 13622 }, { "epoch": 14.4, "learning_rate": 1.40169133192389e-05, "loss": 0.003, "step": 13624 }, { "epoch": 14.4, "learning_rate": 1.4011627906976746e-05, "loss": 0.0131, "step": 13626 }, { "epoch": 14.41, "learning_rate": 1.4006342494714589e-05, "loss": 0.0242, "step": 13628 }, { "epoch": 14.41, "learning_rate": 1.4001057082452431e-05, "loss": 0.018, "step": 13630 }, { "epoch": 14.41, "learning_rate": 1.3995771670190274e-05, "loss": 0.0172, "step": 13632 }, { "epoch": 14.41, "learning_rate": 1.399048625792812e-05, "loss": 0.0313, "step": 13634 }, { "epoch": 14.41, "learning_rate": 1.3985200845665963e-05, "loss": 0.0219, "step": 13636 }, { "epoch": 14.42, "learning_rate": 1.3979915433403807e-05, "loss": 0.0044, "step": 13638 }, { "epoch": 14.42, "learning_rate": 1.397463002114165e-05, "loss": 0.0299, "step": 13640 }, { "epoch": 14.42, "learning_rate": 1.3969344608879492e-05, "loss": 0.0146, "step": 13642 }, { "epoch": 14.42, "learning_rate": 1.3964059196617338e-05, "loss": 0.0114, "step": 13644 }, { "epoch": 14.42, "learning_rate": 1.3958773784355181e-05, "loss": 0.1177, "step": 13646 }, { "epoch": 14.43, "learning_rate": 1.3953488372093024e-05, "loss": 0.0769, "step": 13648 }, { "epoch": 14.43, "learning_rate": 1.3948202959830866e-05, "loss": 0.0479, "step": 13650 }, { "epoch": 14.43, "learning_rate": 1.3942917547568712e-05, "loss": 0.0186, "step": 13652 }, { "epoch": 14.43, "learning_rate": 1.3937632135306555e-05, "loss": 0.0156, "step": 13654 }, { "epoch": 14.44, "learning_rate": 1.3932346723044398e-05, "loss": 0.0296, "step": 13656 }, { "epoch": 14.44, "learning_rate": 1.392706131078224e-05, "loss": 0.0376, "step": 13658 }, { "epoch": 14.44, "learning_rate": 1.3921775898520085e-05, "loss": 0.0162, "step": 13660 }, { "epoch": 14.44, "learning_rate": 1.391649048625793e-05, "loss": 0.02, "step": 13662 }, { "epoch": 14.44, "learning_rate": 1.3911205073995774e-05, "loss": 0.0107, "step": 13664 }, { "epoch": 14.45, "learning_rate": 1.3905919661733616e-05, "loss": 0.0131, "step": 13666 }, { "epoch": 14.45, "learning_rate": 1.3900634249471459e-05, "loss": 0.0253, "step": 13668 }, { "epoch": 14.45, "learning_rate": 1.3895348837209305e-05, "loss": 0.0241, "step": 13670 }, { "epoch": 14.45, "learning_rate": 1.3890063424947148e-05, "loss": 0.0411, "step": 13672 }, { "epoch": 14.45, "learning_rate": 1.388477801268499e-05, "loss": 0.0222, "step": 13674 }, { "epoch": 14.46, "learning_rate": 1.3879492600422833e-05, "loss": 0.0701, "step": 13676 }, { "epoch": 14.46, "learning_rate": 1.3874207188160676e-05, "loss": 0.0616, "step": 13678 }, { "epoch": 14.46, "learning_rate": 1.3868921775898522e-05, "loss": 0.0431, "step": 13680 }, { "epoch": 14.46, "learning_rate": 1.3863636363636364e-05, "loss": 0.0509, "step": 13682 }, { "epoch": 14.47, "learning_rate": 1.3858350951374207e-05, "loss": 0.0369, "step": 13684 }, { "epoch": 14.47, "learning_rate": 1.3853065539112051e-05, "loss": 0.0113, "step": 13686 }, { "epoch": 14.47, "learning_rate": 1.3847780126849896e-05, "loss": 0.051, "step": 13688 }, { "epoch": 14.47, "learning_rate": 1.3842494714587738e-05, "loss": 0.0501, "step": 13690 }, { "epoch": 14.47, "learning_rate": 1.3837209302325583e-05, "loss": 0.0243, "step": 13692 }, { "epoch": 14.48, "learning_rate": 1.3831923890063426e-05, "loss": 0.0398, "step": 13694 }, { "epoch": 14.48, "learning_rate": 1.3826638477801268e-05, "loss": 0.0111, "step": 13696 }, { "epoch": 14.48, "learning_rate": 1.3821353065539114e-05, "loss": 0.0178, "step": 13698 }, { "epoch": 14.48, "learning_rate": 1.3816067653276957e-05, "loss": 0.0069, "step": 13700 }, { "epoch": 14.48, "learning_rate": 1.38107822410148e-05, "loss": 0.0212, "step": 13702 }, { "epoch": 14.49, "learning_rate": 1.3805496828752642e-05, "loss": 0.0212, "step": 13704 }, { "epoch": 14.49, "learning_rate": 1.3800211416490485e-05, "loss": 0.0104, "step": 13706 }, { "epoch": 14.49, "learning_rate": 1.3794926004228331e-05, "loss": 0.0028, "step": 13708 }, { "epoch": 14.49, "learning_rate": 1.3789640591966174e-05, "loss": 0.0084, "step": 13710 }, { "epoch": 14.49, "learning_rate": 1.3784355179704016e-05, "loss": 0.0075, "step": 13712 }, { "epoch": 14.5, "learning_rate": 1.377906976744186e-05, "loss": 0.0149, "step": 13714 }, { "epoch": 14.5, "learning_rate": 1.3773784355179705e-05, "loss": 0.0051, "step": 13716 }, { "epoch": 14.5, "learning_rate": 1.376849894291755e-05, "loss": 0.0321, "step": 13718 }, { "epoch": 14.5, "learning_rate": 1.3763213530655392e-05, "loss": 0.0161, "step": 13720 }, { "epoch": 14.51, "learning_rate": 1.3757928118393235e-05, "loss": 0.0121, "step": 13722 }, { "epoch": 14.51, "learning_rate": 1.3752642706131077e-05, "loss": 0.0102, "step": 13724 }, { "epoch": 14.51, "learning_rate": 1.3747357293868924e-05, "loss": 0.005, "step": 13726 }, { "epoch": 14.51, "learning_rate": 1.3742071881606766e-05, "loss": 0.0073, "step": 13728 }, { "epoch": 14.51, "learning_rate": 1.3736786469344609e-05, "loss": 0.0379, "step": 13730 }, { "epoch": 14.52, "learning_rate": 1.3731501057082452e-05, "loss": 0.0563, "step": 13732 }, { "epoch": 14.52, "learning_rate": 1.3726215644820298e-05, "loss": 0.0341, "step": 13734 }, { "epoch": 14.52, "learning_rate": 1.372093023255814e-05, "loss": 0.0172, "step": 13736 }, { "epoch": 14.52, "learning_rate": 1.3715644820295983e-05, "loss": 0.0016, "step": 13738 }, { "epoch": 14.52, "learning_rate": 1.3710359408033827e-05, "loss": 0.0094, "step": 13740 }, { "epoch": 14.53, "learning_rate": 1.370507399577167e-05, "loss": 0.0734, "step": 13742 }, { "epoch": 14.53, "learning_rate": 1.3699788583509516e-05, "loss": 0.0563, "step": 13744 }, { "epoch": 14.53, "learning_rate": 1.3694503171247359e-05, "loss": 0.0231, "step": 13746 }, { "epoch": 14.53, "learning_rate": 1.3689217758985201e-05, "loss": 0.0068, "step": 13748 }, { "epoch": 14.53, "learning_rate": 1.3683932346723044e-05, "loss": 0.0094, "step": 13750 }, { "epoch": 14.54, "learning_rate": 1.367864693446089e-05, "loss": 0.0178, "step": 13752 }, { "epoch": 14.54, "learning_rate": 1.3673361522198733e-05, "loss": 0.0592, "step": 13754 }, { "epoch": 14.54, "learning_rate": 1.3668076109936575e-05, "loss": 0.0183, "step": 13756 }, { "epoch": 14.54, "learning_rate": 1.3662790697674418e-05, "loss": 0.0065, "step": 13758 }, { "epoch": 14.55, "learning_rate": 1.365750528541226e-05, "loss": 0.0021, "step": 13760 }, { "epoch": 14.55, "learning_rate": 1.3652219873150107e-05, "loss": 0.0198, "step": 13762 }, { "epoch": 14.55, "learning_rate": 1.364693446088795e-05, "loss": 0.0197, "step": 13764 }, { "epoch": 14.55, "learning_rate": 1.3641649048625794e-05, "loss": 0.0105, "step": 13766 }, { "epoch": 14.55, "learning_rate": 1.3636363636363637e-05, "loss": 0.0388, "step": 13768 }, { "epoch": 14.56, "learning_rate": 1.3631078224101481e-05, "loss": 0.0011, "step": 13770 }, { "epoch": 14.56, "learning_rate": 1.3625792811839325e-05, "loss": 0.0006, "step": 13772 }, { "epoch": 14.56, "learning_rate": 1.3620507399577168e-05, "loss": 0.0233, "step": 13774 }, { "epoch": 14.56, "learning_rate": 1.361522198731501e-05, "loss": 0.0077, "step": 13776 }, { "epoch": 14.56, "learning_rate": 1.3609936575052853e-05, "loss": 0.003, "step": 13778 }, { "epoch": 14.57, "learning_rate": 1.36046511627907e-05, "loss": 0.0128, "step": 13780 }, { "epoch": 14.57, "learning_rate": 1.3599365750528542e-05, "loss": 0.0146, "step": 13782 }, { "epoch": 14.57, "learning_rate": 1.3594080338266385e-05, "loss": 0.0699, "step": 13784 }, { "epoch": 14.57, "learning_rate": 1.3588794926004227e-05, "loss": 0.0145, "step": 13786 }, { "epoch": 14.58, "learning_rate": 1.3583509513742073e-05, "loss": 0.0151, "step": 13788 }, { "epoch": 14.58, "learning_rate": 1.3578224101479916e-05, "loss": 0.039, "step": 13790 }, { "epoch": 14.58, "learning_rate": 1.3572938689217759e-05, "loss": 0.0277, "step": 13792 }, { "epoch": 14.58, "learning_rate": 1.3567653276955603e-05, "loss": 0.0039, "step": 13794 }, { "epoch": 14.58, "learning_rate": 1.3562367864693446e-05, "loss": 0.0024, "step": 13796 }, { "epoch": 14.59, "learning_rate": 1.3557082452431292e-05, "loss": 0.0033, "step": 13798 }, { "epoch": 14.59, "learning_rate": 1.3551797040169135e-05, "loss": 0.0173, "step": 13800 }, { "epoch": 14.59, "learning_rate": 1.3546511627906977e-05, "loss": 0.0648, "step": 13802 }, { "epoch": 14.59, "learning_rate": 1.354122621564482e-05, "loss": 0.0049, "step": 13804 }, { "epoch": 14.59, "learning_rate": 1.3535940803382666e-05, "loss": 0.0268, "step": 13806 }, { "epoch": 14.6, "learning_rate": 1.3530655391120509e-05, "loss": 0.0133, "step": 13808 }, { "epoch": 14.6, "learning_rate": 1.3525369978858351e-05, "loss": 0.0039, "step": 13810 }, { "epoch": 14.6, "learning_rate": 1.3520084566596194e-05, "loss": 0.0697, "step": 13812 }, { "epoch": 14.6, "learning_rate": 1.3514799154334037e-05, "loss": 0.0069, "step": 13814 }, { "epoch": 14.6, "learning_rate": 1.3509513742071883e-05, "loss": 0.0679, "step": 13816 }, { "epoch": 14.61, "learning_rate": 1.3504228329809725e-05, "loss": 0.0769, "step": 13818 }, { "epoch": 14.61, "learning_rate": 1.349894291754757e-05, "loss": 0.0052, "step": 13820 }, { "epoch": 14.61, "learning_rate": 1.3493657505285412e-05, "loss": 0.0194, "step": 13822 }, { "epoch": 14.61, "learning_rate": 1.3488372093023258e-05, "loss": 0.004, "step": 13824 }, { "epoch": 14.62, "learning_rate": 1.3483086680761101e-05, "loss": 0.0115, "step": 13826 }, { "epoch": 14.62, "learning_rate": 1.3477801268498944e-05, "loss": 0.0701, "step": 13828 }, { "epoch": 14.62, "learning_rate": 1.3472515856236786e-05, "loss": 0.0325, "step": 13830 }, { "epoch": 14.62, "learning_rate": 1.3467230443974629e-05, "loss": 0.0148, "step": 13832 }, { "epoch": 14.62, "learning_rate": 1.3461945031712475e-05, "loss": 0.0282, "step": 13834 }, { "epoch": 14.63, "learning_rate": 1.3456659619450318e-05, "loss": 0.0206, "step": 13836 }, { "epoch": 14.63, "learning_rate": 1.345137420718816e-05, "loss": 0.0577, "step": 13838 }, { "epoch": 14.63, "learning_rate": 1.3446088794926003e-05, "loss": 0.0582, "step": 13840 }, { "epoch": 14.63, "learning_rate": 1.344080338266385e-05, "loss": 0.0143, "step": 13842 }, { "epoch": 14.63, "learning_rate": 1.3435517970401692e-05, "loss": 0.0237, "step": 13844 }, { "epoch": 14.64, "learning_rate": 1.3430232558139536e-05, "loss": 0.0128, "step": 13846 }, { "epoch": 14.64, "learning_rate": 1.3424947145877379e-05, "loss": 0.0338, "step": 13848 }, { "epoch": 14.64, "learning_rate": 1.3419661733615222e-05, "loss": 0.0459, "step": 13850 }, { "epoch": 14.64, "learning_rate": 1.3414376321353068e-05, "loss": 0.0092, "step": 13852 }, { "epoch": 14.64, "learning_rate": 1.340909090909091e-05, "loss": 0.069, "step": 13854 }, { "epoch": 14.65, "learning_rate": 1.3403805496828753e-05, "loss": 0.0052, "step": 13856 }, { "epoch": 14.65, "learning_rate": 1.3398520084566596e-05, "loss": 0.0128, "step": 13858 }, { "epoch": 14.65, "learning_rate": 1.3393234672304442e-05, "loss": 0.0054, "step": 13860 }, { "epoch": 14.65, "learning_rate": 1.3387949260042284e-05, "loss": 0.0318, "step": 13862 }, { "epoch": 14.66, "learning_rate": 1.3382663847780127e-05, "loss": 0.0147, "step": 13864 }, { "epoch": 14.66, "learning_rate": 1.337737843551797e-05, "loss": 0.0183, "step": 13866 }, { "epoch": 14.66, "learning_rate": 1.3372093023255814e-05, "loss": 0.0115, "step": 13868 }, { "epoch": 14.66, "learning_rate": 1.3366807610993659e-05, "loss": 0.0046, "step": 13870 }, { "epoch": 14.66, "learning_rate": 1.3361522198731501e-05, "loss": 0.0153, "step": 13872 }, { "epoch": 14.67, "learning_rate": 1.3356236786469346e-05, "loss": 0.0257, "step": 13874 }, { "epoch": 14.67, "learning_rate": 1.3350951374207188e-05, "loss": 0.0091, "step": 13876 }, { "epoch": 14.67, "learning_rate": 1.3345665961945034e-05, "loss": 0.0428, "step": 13878 }, { "epoch": 14.67, "learning_rate": 1.3340380549682877e-05, "loss": 0.006, "step": 13880 }, { "epoch": 14.67, "learning_rate": 1.333509513742072e-05, "loss": 0.0107, "step": 13882 }, { "epoch": 14.68, "learning_rate": 1.3329809725158562e-05, "loss": 0.0074, "step": 13884 }, { "epoch": 14.68, "learning_rate": 1.3324524312896405e-05, "loss": 0.0027, "step": 13886 }, { "epoch": 14.68, "learning_rate": 1.3319238900634251e-05, "loss": 0.0179, "step": 13888 }, { "epoch": 14.68, "learning_rate": 1.3313953488372094e-05, "loss": 0.0392, "step": 13890 }, { "epoch": 14.68, "learning_rate": 1.3308668076109936e-05, "loss": 0.0578, "step": 13892 }, { "epoch": 14.69, "learning_rate": 1.3303382663847779e-05, "loss": 0.0031, "step": 13894 }, { "epoch": 14.69, "learning_rate": 1.3298097251585625e-05, "loss": 0.0141, "step": 13896 }, { "epoch": 14.69, "learning_rate": 1.3292811839323468e-05, "loss": 0.023, "step": 13898 }, { "epoch": 14.69, "learning_rate": 1.3287526427061312e-05, "loss": 0.0334, "step": 13900 }, { "epoch": 14.7, "learning_rate": 1.3282241014799155e-05, "loss": 0.0172, "step": 13902 }, { "epoch": 14.7, "learning_rate": 1.3276955602536997e-05, "loss": 0.0193, "step": 13904 }, { "epoch": 14.7, "learning_rate": 1.3271670190274844e-05, "loss": 0.012, "step": 13906 }, { "epoch": 14.7, "learning_rate": 1.3266384778012686e-05, "loss": 0.0029, "step": 13908 }, { "epoch": 14.7, "learning_rate": 1.3261099365750529e-05, "loss": 0.0846, "step": 13910 }, { "epoch": 14.71, "learning_rate": 1.3255813953488372e-05, "loss": 0.0243, "step": 13912 }, { "epoch": 14.71, "learning_rate": 1.3250528541226218e-05, "loss": 0.0498, "step": 13914 }, { "epoch": 14.71, "learning_rate": 1.324524312896406e-05, "loss": 0.0711, "step": 13916 }, { "epoch": 14.71, "learning_rate": 1.3239957716701903e-05, "loss": 0.0096, "step": 13918 }, { "epoch": 14.71, "learning_rate": 1.3234672304439746e-05, "loss": 0.01, "step": 13920 }, { "epoch": 14.72, "learning_rate": 1.322938689217759e-05, "loss": 0.0635, "step": 13922 }, { "epoch": 14.72, "learning_rate": 1.3224101479915434e-05, "loss": 0.004, "step": 13924 }, { "epoch": 14.72, "learning_rate": 1.3218816067653279e-05, "loss": 0.0302, "step": 13926 }, { "epoch": 14.72, "learning_rate": 1.3213530655391121e-05, "loss": 0.0196, "step": 13928 }, { "epoch": 14.73, "learning_rate": 1.3208245243128964e-05, "loss": 0.0089, "step": 13930 }, { "epoch": 14.73, "learning_rate": 1.320295983086681e-05, "loss": 0.0592, "step": 13932 }, { "epoch": 14.73, "learning_rate": 1.3197674418604653e-05, "loss": 0.0242, "step": 13934 }, { "epoch": 14.73, "learning_rate": 1.3192389006342495e-05, "loss": 0.0562, "step": 13936 }, { "epoch": 14.73, "learning_rate": 1.3187103594080338e-05, "loss": 0.1009, "step": 13938 }, { "epoch": 14.74, "learning_rate": 1.318181818181818e-05, "loss": 0.0292, "step": 13940 }, { "epoch": 14.74, "learning_rate": 1.3176532769556027e-05, "loss": 0.0123, "step": 13942 }, { "epoch": 14.74, "learning_rate": 1.317124735729387e-05, "loss": 0.002, "step": 13944 }, { "epoch": 14.74, "learning_rate": 1.3165961945031712e-05, "loss": 0.0556, "step": 13946 }, { "epoch": 14.74, "learning_rate": 1.3160676532769557e-05, "loss": 0.0065, "step": 13948 }, { "epoch": 14.75, "learning_rate": 1.3155391120507401e-05, "loss": 0.0098, "step": 13950 }, { "epoch": 14.75, "learning_rate": 1.3150105708245244e-05, "loss": 0.0191, "step": 13952 }, { "epoch": 14.75, "learning_rate": 1.3144820295983088e-05, "loss": 0.0013, "step": 13954 }, { "epoch": 14.75, "learning_rate": 1.313953488372093e-05, "loss": 0.0402, "step": 13956 }, { "epoch": 14.75, "learning_rate": 1.3134249471458773e-05, "loss": 0.0177, "step": 13958 }, { "epoch": 14.76, "learning_rate": 1.312896405919662e-05, "loss": 0.0181, "step": 13960 }, { "epoch": 14.76, "learning_rate": 1.3123678646934462e-05, "loss": 0.0154, "step": 13962 }, { "epoch": 14.76, "learning_rate": 1.3118393234672305e-05, "loss": 0.0194, "step": 13964 }, { "epoch": 14.76, "learning_rate": 1.3113107822410147e-05, "loss": 0.0338, "step": 13966 }, { "epoch": 14.77, "learning_rate": 1.3107822410147993e-05, "loss": 0.0014, "step": 13968 }, { "epoch": 14.77, "learning_rate": 1.3102536997885836e-05, "loss": 0.0198, "step": 13970 }, { "epoch": 14.77, "learning_rate": 1.3097251585623679e-05, "loss": 0.0695, "step": 13972 }, { "epoch": 14.77, "learning_rate": 1.3091966173361521e-05, "loss": 0.0272, "step": 13974 }, { "epoch": 14.77, "learning_rate": 1.3086680761099366e-05, "loss": 0.0626, "step": 13976 }, { "epoch": 14.78, "learning_rate": 1.308139534883721e-05, "loss": 0.011, "step": 13978 }, { "epoch": 14.78, "learning_rate": 1.3076109936575055e-05, "loss": 0.017, "step": 13980 }, { "epoch": 14.78, "learning_rate": 1.3070824524312897e-05, "loss": 0.0119, "step": 13982 }, { "epoch": 14.78, "learning_rate": 1.306553911205074e-05, "loss": 0.02, "step": 13984 }, { "epoch": 14.78, "learning_rate": 1.3060253699788586e-05, "loss": 0.0353, "step": 13986 }, { "epoch": 14.79, "learning_rate": 1.3054968287526429e-05, "loss": 0.039, "step": 13988 }, { "epoch": 14.79, "learning_rate": 1.3049682875264271e-05, "loss": 0.0174, "step": 13990 }, { "epoch": 14.79, "learning_rate": 1.3044397463002114e-05, "loss": 0.0433, "step": 13992 }, { "epoch": 14.79, "learning_rate": 1.3039112050739957e-05, "loss": 0.0249, "step": 13994 }, { "epoch": 14.79, "learning_rate": 1.3033826638477803e-05, "loss": 0.0049, "step": 13996 }, { "epoch": 14.8, "learning_rate": 1.3028541226215645e-05, "loss": 0.0084, "step": 13998 }, { "epoch": 14.8, "learning_rate": 1.3023255813953488e-05, "loss": 0.043, "step": 14000 }, { "epoch": 14.8, "eval_cer": 0.11011684240524366, "eval_loss": 0.7147426009178162, "eval_runtime": 118.439, "eval_samples_per_second": 7.101, "eval_steps_per_second": 0.895, "step": 14000 }, { "epoch": 14.8, "learning_rate": 1.3017970401691332e-05, "loss": 0.0128, "step": 14002 }, { "epoch": 14.8, "learning_rate": 1.3012684989429177e-05, "loss": 0.0234, "step": 14004 }, { "epoch": 14.81, "learning_rate": 1.3007399577167021e-05, "loss": 0.0215, "step": 14006 }, { "epoch": 14.81, "learning_rate": 1.3002114164904864e-05, "loss": 0.0191, "step": 14008 }, { "epoch": 14.81, "learning_rate": 1.2996828752642706e-05, "loss": 0.0338, "step": 14010 }, { "epoch": 14.81, "learning_rate": 1.299154334038055e-05, "loss": 0.0133, "step": 14012 }, { "epoch": 14.81, "learning_rate": 1.2986257928118395e-05, "loss": 0.0235, "step": 14014 }, { "epoch": 14.82, "learning_rate": 1.2980972515856238e-05, "loss": 0.0398, "step": 14016 }, { "epoch": 14.82, "learning_rate": 1.297568710359408e-05, "loss": 0.0036, "step": 14018 }, { "epoch": 14.82, "learning_rate": 1.2970401691331923e-05, "loss": 0.0039, "step": 14020 }, { "epoch": 14.82, "learning_rate": 1.296511627906977e-05, "loss": 0.031, "step": 14022 }, { "epoch": 14.82, "learning_rate": 1.2959830866807612e-05, "loss": 0.0258, "step": 14024 }, { "epoch": 14.83, "learning_rate": 1.2954545454545455e-05, "loss": 0.0334, "step": 14026 }, { "epoch": 14.83, "learning_rate": 1.2949260042283299e-05, "loss": 0.025, "step": 14028 }, { "epoch": 14.83, "learning_rate": 1.2943974630021142e-05, "loss": 0.0311, "step": 14030 }, { "epoch": 14.83, "learning_rate": 1.2938689217758988e-05, "loss": 0.008, "step": 14032 }, { "epoch": 14.84, "learning_rate": 1.293340380549683e-05, "loss": 0.0442, "step": 14034 }, { "epoch": 14.84, "learning_rate": 1.2928118393234673e-05, "loss": 0.0072, "step": 14036 }, { "epoch": 14.84, "learning_rate": 1.2922832980972516e-05, "loss": 0.0121, "step": 14038 }, { "epoch": 14.84, "learning_rate": 1.2917547568710358e-05, "loss": 0.0383, "step": 14040 }, { "epoch": 14.84, "learning_rate": 1.2912262156448204e-05, "loss": 0.0156, "step": 14042 }, { "epoch": 14.85, "learning_rate": 1.2906976744186047e-05, "loss": 0.0085, "step": 14044 }, { "epoch": 14.85, "learning_rate": 1.290169133192389e-05, "loss": 0.0195, "step": 14046 }, { "epoch": 14.85, "learning_rate": 1.2896405919661732e-05, "loss": 0.027, "step": 14048 }, { "epoch": 14.85, "learning_rate": 1.2891120507399579e-05, "loss": 0.0062, "step": 14050 }, { "epoch": 14.85, "learning_rate": 1.2885835095137421e-05, "loss": 0.0284, "step": 14052 }, { "epoch": 14.86, "learning_rate": 1.2880549682875266e-05, "loss": 0.0132, "step": 14054 }, { "epoch": 14.86, "learning_rate": 1.2875264270613108e-05, "loss": 0.0049, "step": 14056 }, { "epoch": 14.86, "learning_rate": 1.2869978858350951e-05, "loss": 0.1056, "step": 14058 }, { "epoch": 14.86, "learning_rate": 1.2864693446088797e-05, "loss": 0.0352, "step": 14060 }, { "epoch": 14.86, "learning_rate": 1.285940803382664e-05, "loss": 0.0039, "step": 14062 }, { "epoch": 14.87, "learning_rate": 1.2854122621564482e-05, "loss": 0.0626, "step": 14064 }, { "epoch": 14.87, "learning_rate": 1.2848837209302325e-05, "loss": 0.0113, "step": 14066 }, { "epoch": 14.87, "learning_rate": 1.2843551797040171e-05, "loss": 0.0759, "step": 14068 }, { "epoch": 14.87, "learning_rate": 1.2838266384778014e-05, "loss": 0.045, "step": 14070 }, { "epoch": 14.88, "learning_rate": 1.2832980972515856e-05, "loss": 0.0482, "step": 14072 }, { "epoch": 14.88, "learning_rate": 1.2827695560253699e-05, "loss": 0.0117, "step": 14074 }, { "epoch": 14.88, "learning_rate": 1.2822410147991543e-05, "loss": 0.0192, "step": 14076 }, { "epoch": 14.88, "learning_rate": 1.2817124735729388e-05, "loss": 0.0201, "step": 14078 }, { "epoch": 14.88, "learning_rate": 1.281183932346723e-05, "loss": 0.0157, "step": 14080 }, { "epoch": 14.89, "learning_rate": 1.2806553911205075e-05, "loss": 0.0105, "step": 14082 }, { "epoch": 14.89, "learning_rate": 1.2801268498942918e-05, "loss": 0.0715, "step": 14084 }, { "epoch": 14.89, "learning_rate": 1.2795983086680764e-05, "loss": 0.0033, "step": 14086 }, { "epoch": 14.89, "learning_rate": 1.2790697674418606e-05, "loss": 0.0399, "step": 14088 }, { "epoch": 14.89, "learning_rate": 1.2785412262156449e-05, "loss": 0.0123, "step": 14090 }, { "epoch": 14.9, "learning_rate": 1.2780126849894292e-05, "loss": 0.0195, "step": 14092 }, { "epoch": 14.9, "learning_rate": 1.2774841437632134e-05, "loss": 0.0414, "step": 14094 }, { "epoch": 14.9, "learning_rate": 1.276955602536998e-05, "loss": 0.0498, "step": 14096 }, { "epoch": 14.9, "learning_rate": 1.2764270613107823e-05, "loss": 0.0189, "step": 14098 }, { "epoch": 14.9, "learning_rate": 1.2758985200845666e-05, "loss": 0.0561, "step": 14100 }, { "epoch": 14.91, "learning_rate": 1.2753699788583508e-05, "loss": 0.0084, "step": 14102 }, { "epoch": 14.91, "learning_rate": 1.2748414376321354e-05, "loss": 0.0024, "step": 14104 }, { "epoch": 14.91, "learning_rate": 1.2743128964059197e-05, "loss": 0.0021, "step": 14106 }, { "epoch": 14.91, "learning_rate": 1.2737843551797041e-05, "loss": 0.0055, "step": 14108 }, { "epoch": 14.92, "learning_rate": 1.2732558139534884e-05, "loss": 0.0232, "step": 14110 }, { "epoch": 14.92, "learning_rate": 1.2727272727272727e-05, "loss": 0.0115, "step": 14112 }, { "epoch": 14.92, "learning_rate": 1.2721987315010573e-05, "loss": 0.0262, "step": 14114 }, { "epoch": 14.92, "learning_rate": 1.2716701902748416e-05, "loss": 0.0173, "step": 14116 }, { "epoch": 14.92, "learning_rate": 1.2711416490486258e-05, "loss": 0.0069, "step": 14118 }, { "epoch": 14.93, "learning_rate": 1.27061310782241e-05, "loss": 0.0197, "step": 14120 }, { "epoch": 14.93, "learning_rate": 1.2700845665961947e-05, "loss": 0.0512, "step": 14122 }, { "epoch": 14.93, "learning_rate": 1.269556025369979e-05, "loss": 0.0183, "step": 14124 }, { "epoch": 14.93, "learning_rate": 1.2690274841437632e-05, "loss": 0.0081, "step": 14126 }, { "epoch": 14.93, "learning_rate": 1.2684989429175475e-05, "loss": 0.0204, "step": 14128 }, { "epoch": 14.94, "learning_rate": 1.267970401691332e-05, "loss": 0.0542, "step": 14130 }, { "epoch": 14.94, "learning_rate": 1.2674418604651164e-05, "loss": 0.0014, "step": 14132 }, { "epoch": 14.94, "learning_rate": 1.2669133192389008e-05, "loss": 0.0283, "step": 14134 }, { "epoch": 14.94, "learning_rate": 1.266384778012685e-05, "loss": 0.0025, "step": 14136 }, { "epoch": 14.95, "learning_rate": 1.2658562367864693e-05, "loss": 0.0045, "step": 14138 }, { "epoch": 14.95, "learning_rate": 1.265327695560254e-05, "loss": 0.0321, "step": 14140 }, { "epoch": 14.95, "learning_rate": 1.2647991543340382e-05, "loss": 0.0172, "step": 14142 }, { "epoch": 14.95, "learning_rate": 1.2642706131078225e-05, "loss": 0.0381, "step": 14144 }, { "epoch": 14.95, "learning_rate": 1.2637420718816067e-05, "loss": 0.0101, "step": 14146 }, { "epoch": 14.96, "learning_rate": 1.263213530655391e-05, "loss": 0.0131, "step": 14148 }, { "epoch": 14.96, "learning_rate": 1.2626849894291756e-05, "loss": 0.0297, "step": 14150 }, { "epoch": 14.96, "learning_rate": 1.2621564482029599e-05, "loss": 0.0712, "step": 14152 }, { "epoch": 14.96, "learning_rate": 1.2616279069767442e-05, "loss": 0.0195, "step": 14154 }, { "epoch": 14.96, "learning_rate": 1.2610993657505286e-05, "loss": 0.0028, "step": 14156 }, { "epoch": 14.97, "learning_rate": 1.260570824524313e-05, "loss": 0.0212, "step": 14158 }, { "epoch": 14.97, "learning_rate": 1.2600422832980973e-05, "loss": 0.0071, "step": 14160 }, { "epoch": 14.97, "learning_rate": 1.2595137420718817e-05, "loss": 0.0148, "step": 14162 }, { "epoch": 14.97, "learning_rate": 1.258985200845666e-05, "loss": 0.0342, "step": 14164 }, { "epoch": 14.97, "learning_rate": 1.2584566596194503e-05, "loss": 0.0767, "step": 14166 }, { "epoch": 14.98, "learning_rate": 1.2579281183932349e-05, "loss": 0.021, "step": 14168 }, { "epoch": 14.98, "learning_rate": 1.2573995771670191e-05, "loss": 0.0338, "step": 14170 }, { "epoch": 14.98, "learning_rate": 1.2568710359408034e-05, "loss": 0.0312, "step": 14172 }, { "epoch": 14.98, "learning_rate": 1.2563424947145877e-05, "loss": 0.0186, "step": 14174 }, { "epoch": 14.99, "learning_rate": 1.2558139534883723e-05, "loss": 0.0552, "step": 14176 }, { "epoch": 14.99, "learning_rate": 1.2552854122621565e-05, "loss": 0.0098, "step": 14178 }, { "epoch": 14.99, "learning_rate": 1.2547568710359408e-05, "loss": 0.0243, "step": 14180 }, { "epoch": 14.99, "learning_rate": 1.254228329809725e-05, "loss": 0.0223, "step": 14182 }, { "epoch": 14.99, "learning_rate": 1.2536997885835095e-05, "loss": 0.0292, "step": 14184 }, { "epoch": 15.0, "learning_rate": 1.253171247357294e-05, "loss": 0.0551, "step": 14186 }, { "epoch": 15.0, "learning_rate": 1.2526427061310784e-05, "loss": 0.0148, "step": 14188 }, { "epoch": 15.0, "learning_rate": 1.2521141649048627e-05, "loss": 0.0025, "step": 14190 }, { "epoch": 15.0, "learning_rate": 1.251585623678647e-05, "loss": 0.0089, "step": 14192 }, { "epoch": 15.0, "learning_rate": 1.2510570824524315e-05, "loss": 0.0835, "step": 14194 }, { "epoch": 15.01, "learning_rate": 1.2505285412262158e-05, "loss": 0.0132, "step": 14196 }, { "epoch": 15.01, "learning_rate": 1.25e-05, "loss": 0.0119, "step": 14198 }, { "epoch": 15.01, "learning_rate": 1.2494714587737843e-05, "loss": 0.0015, "step": 14200 }, { "epoch": 15.01, "learning_rate": 1.2489429175475688e-05, "loss": 0.0044, "step": 14202 }, { "epoch": 15.01, "learning_rate": 1.248414376321353e-05, "loss": 0.0366, "step": 14204 }, { "epoch": 15.02, "learning_rate": 1.2478858350951375e-05, "loss": 0.1134, "step": 14206 }, { "epoch": 15.02, "learning_rate": 1.2473572938689217e-05, "loss": 0.0088, "step": 14208 }, { "epoch": 15.02, "learning_rate": 1.2468287526427062e-05, "loss": 0.0351, "step": 14210 }, { "epoch": 15.02, "learning_rate": 1.2463002114164906e-05, "loss": 0.0116, "step": 14212 }, { "epoch": 15.03, "learning_rate": 1.245771670190275e-05, "loss": 0.0377, "step": 14214 }, { "epoch": 15.03, "learning_rate": 1.2452431289640593e-05, "loss": 0.0145, "step": 14216 }, { "epoch": 15.03, "learning_rate": 1.2447145877378436e-05, "loss": 0.0409, "step": 14218 }, { "epoch": 15.03, "learning_rate": 1.244186046511628e-05, "loss": 0.0149, "step": 14220 }, { "epoch": 15.03, "learning_rate": 1.2436575052854123e-05, "loss": 0.0156, "step": 14222 }, { "epoch": 15.04, "learning_rate": 1.2431289640591967e-05, "loss": 0.011, "step": 14224 }, { "epoch": 15.04, "learning_rate": 1.242600422832981e-05, "loss": 0.0027, "step": 14226 }, { "epoch": 15.04, "learning_rate": 1.2420718816067654e-05, "loss": 0.0122, "step": 14228 }, { "epoch": 15.04, "learning_rate": 1.2415433403805497e-05, "loss": 0.0271, "step": 14230 }, { "epoch": 15.04, "learning_rate": 1.2410147991543341e-05, "loss": 0.0151, "step": 14232 }, { "epoch": 15.05, "learning_rate": 1.2404862579281184e-05, "loss": 0.0016, "step": 14234 }, { "epoch": 15.05, "learning_rate": 1.2399577167019028e-05, "loss": 0.0097, "step": 14236 }, { "epoch": 15.05, "learning_rate": 1.2394291754756873e-05, "loss": 0.0016, "step": 14238 }, { "epoch": 15.05, "learning_rate": 1.2389006342494715e-05, "loss": 0.0188, "step": 14240 }, { "epoch": 15.05, "learning_rate": 1.238372093023256e-05, "loss": 0.0048, "step": 14242 }, { "epoch": 15.06, "learning_rate": 1.2378435517970402e-05, "loss": 0.0289, "step": 14244 }, { "epoch": 15.06, "learning_rate": 1.2373150105708247e-05, "loss": 0.0386, "step": 14246 }, { "epoch": 15.06, "learning_rate": 1.236786469344609e-05, "loss": 0.0394, "step": 14248 }, { "epoch": 15.06, "learning_rate": 1.2362579281183934e-05, "loss": 0.0077, "step": 14250 }, { "epoch": 15.07, "learning_rate": 1.2357293868921776e-05, "loss": 0.007, "step": 14252 }, { "epoch": 15.07, "learning_rate": 1.2352008456659619e-05, "loss": 0.0025, "step": 14254 }, { "epoch": 15.07, "learning_rate": 1.2346723044397463e-05, "loss": 0.018, "step": 14256 }, { "epoch": 15.07, "learning_rate": 1.2341437632135306e-05, "loss": 0.0049, "step": 14258 }, { "epoch": 15.07, "learning_rate": 1.233615221987315e-05, "loss": 0.0545, "step": 14260 }, { "epoch": 15.08, "learning_rate": 1.2330866807610993e-05, "loss": 0.0179, "step": 14262 }, { "epoch": 15.08, "learning_rate": 1.2325581395348838e-05, "loss": 0.0084, "step": 14264 }, { "epoch": 15.08, "learning_rate": 1.2320295983086682e-05, "loss": 0.0726, "step": 14266 }, { "epoch": 15.08, "learning_rate": 1.2315010570824526e-05, "loss": 0.0107, "step": 14268 }, { "epoch": 15.08, "learning_rate": 1.2309725158562369e-05, "loss": 0.0246, "step": 14270 }, { "epoch": 15.09, "learning_rate": 1.2304439746300212e-05, "loss": 0.0071, "step": 14272 }, { "epoch": 15.09, "learning_rate": 1.2299154334038056e-05, "loss": 0.0165, "step": 14274 }, { "epoch": 15.09, "learning_rate": 1.2293868921775899e-05, "loss": 0.0012, "step": 14276 }, { "epoch": 15.09, "learning_rate": 1.2288583509513743e-05, "loss": 0.0039, "step": 14278 }, { "epoch": 15.1, "learning_rate": 1.2283298097251586e-05, "loss": 0.0154, "step": 14280 }, { "epoch": 15.1, "learning_rate": 1.227801268498943e-05, "loss": 0.009, "step": 14282 }, { "epoch": 15.1, "learning_rate": 1.2272727272727273e-05, "loss": 0.0496, "step": 14284 }, { "epoch": 15.1, "learning_rate": 1.2267441860465115e-05, "loss": 0.0129, "step": 14286 }, { "epoch": 15.1, "learning_rate": 1.226215644820296e-05, "loss": 0.0179, "step": 14288 }, { "epoch": 15.11, "learning_rate": 1.2256871035940804e-05, "loss": 0.0161, "step": 14290 }, { "epoch": 15.11, "learning_rate": 1.2251585623678648e-05, "loss": 0.0031, "step": 14292 }, { "epoch": 15.11, "learning_rate": 1.2246300211416491e-05, "loss": 0.0035, "step": 14294 }, { "epoch": 15.11, "learning_rate": 1.2241014799154336e-05, "loss": 0.0109, "step": 14296 }, { "epoch": 15.11, "learning_rate": 1.2235729386892178e-05, "loss": 0.0152, "step": 14298 }, { "epoch": 15.12, "learning_rate": 1.2230443974630023e-05, "loss": 0.0054, "step": 14300 }, { "epoch": 15.12, "learning_rate": 1.2225158562367865e-05, "loss": 0.0019, "step": 14302 }, { "epoch": 15.12, "learning_rate": 1.2219873150105708e-05, "loss": 0.0168, "step": 14304 }, { "epoch": 15.12, "learning_rate": 1.2214587737843552e-05, "loss": 0.01, "step": 14306 }, { "epoch": 15.12, "learning_rate": 1.2209302325581395e-05, "loss": 0.0634, "step": 14308 }, { "epoch": 15.13, "learning_rate": 1.220401691331924e-05, "loss": 0.0297, "step": 14310 }, { "epoch": 15.13, "learning_rate": 1.2198731501057082e-05, "loss": 0.0023, "step": 14312 }, { "epoch": 15.13, "learning_rate": 1.2193446088794926e-05, "loss": 0.0024, "step": 14314 }, { "epoch": 15.13, "learning_rate": 1.218816067653277e-05, "loss": 0.0011, "step": 14316 }, { "epoch": 15.14, "learning_rate": 1.2182875264270615e-05, "loss": 0.019, "step": 14318 }, { "epoch": 15.14, "learning_rate": 1.2177589852008458e-05, "loss": 0.014, "step": 14320 }, { "epoch": 15.14, "learning_rate": 1.21723044397463e-05, "loss": 0.0491, "step": 14322 }, { "epoch": 15.14, "learning_rate": 1.2167019027484145e-05, "loss": 0.0111, "step": 14324 }, { "epoch": 15.14, "learning_rate": 1.2161733615221987e-05, "loss": 0.1101, "step": 14326 }, { "epoch": 15.15, "learning_rate": 1.2156448202959832e-05, "loss": 0.0013, "step": 14328 }, { "epoch": 15.15, "learning_rate": 1.2151162790697674e-05, "loss": 0.0017, "step": 14330 }, { "epoch": 15.15, "learning_rate": 1.2145877378435519e-05, "loss": 0.0016, "step": 14332 }, { "epoch": 15.15, "learning_rate": 1.2140591966173362e-05, "loss": 0.0131, "step": 14334 }, { "epoch": 15.15, "learning_rate": 1.2135306553911206e-05, "loss": 0.0167, "step": 14336 }, { "epoch": 15.16, "learning_rate": 1.2130021141649049e-05, "loss": 0.0574, "step": 14338 }, { "epoch": 15.16, "learning_rate": 1.2124735729386893e-05, "loss": 0.0031, "step": 14340 }, { "epoch": 15.16, "learning_rate": 1.2119450317124736e-05, "loss": 0.0017, "step": 14342 }, { "epoch": 15.16, "learning_rate": 1.211416490486258e-05, "loss": 0.0011, "step": 14344 }, { "epoch": 15.16, "learning_rate": 1.2108879492600424e-05, "loss": 0.0199, "step": 14346 }, { "epoch": 15.17, "learning_rate": 1.2103594080338267e-05, "loss": 0.0643, "step": 14348 }, { "epoch": 15.17, "learning_rate": 1.2098308668076111e-05, "loss": 0.0007, "step": 14350 }, { "epoch": 15.17, "learning_rate": 1.2093023255813954e-05, "loss": 0.0077, "step": 14352 }, { "epoch": 15.17, "learning_rate": 1.2087737843551798e-05, "loss": 0.0107, "step": 14354 }, { "epoch": 15.18, "learning_rate": 1.2082452431289641e-05, "loss": 0.0029, "step": 14356 }, { "epoch": 15.18, "learning_rate": 1.2077167019027484e-05, "loss": 0.0179, "step": 14358 }, { "epoch": 15.18, "learning_rate": 1.2071881606765328e-05, "loss": 0.0099, "step": 14360 }, { "epoch": 15.18, "learning_rate": 1.206659619450317e-05, "loss": 0.0019, "step": 14362 }, { "epoch": 15.18, "learning_rate": 1.2061310782241015e-05, "loss": 0.0512, "step": 14364 }, { "epoch": 15.19, "learning_rate": 1.2056025369978858e-05, "loss": 0.0281, "step": 14366 }, { "epoch": 15.19, "learning_rate": 1.2050739957716702e-05, "loss": 0.0039, "step": 14368 }, { "epoch": 15.19, "learning_rate": 1.2045454545454547e-05, "loss": 0.0081, "step": 14370 }, { "epoch": 15.19, "learning_rate": 1.2040169133192391e-05, "loss": 0.038, "step": 14372 }, { "epoch": 15.19, "learning_rate": 1.2034883720930234e-05, "loss": 0.0053, "step": 14374 }, { "epoch": 15.2, "learning_rate": 1.2029598308668076e-05, "loss": 0.0346, "step": 14376 }, { "epoch": 15.2, "learning_rate": 1.202431289640592e-05, "loss": 0.0205, "step": 14378 }, { "epoch": 15.2, "learning_rate": 1.2019027484143763e-05, "loss": 0.0112, "step": 14380 }, { "epoch": 15.2, "learning_rate": 1.2013742071881608e-05, "loss": 0.0561, "step": 14382 }, { "epoch": 15.21, "learning_rate": 1.200845665961945e-05, "loss": 0.0052, "step": 14384 }, { "epoch": 15.21, "learning_rate": 1.2003171247357295e-05, "loss": 0.0107, "step": 14386 }, { "epoch": 15.21, "learning_rate": 1.1997885835095137e-05, "loss": 0.0043, "step": 14388 }, { "epoch": 15.21, "learning_rate": 1.1992600422832982e-05, "loss": 0.0613, "step": 14390 }, { "epoch": 15.21, "learning_rate": 1.1987315010570824e-05, "loss": 0.0243, "step": 14392 }, { "epoch": 15.22, "learning_rate": 1.1982029598308669e-05, "loss": 0.0275, "step": 14394 }, { "epoch": 15.22, "learning_rate": 1.1976744186046513e-05, "loss": 0.0478, "step": 14396 }, { "epoch": 15.22, "learning_rate": 1.1971458773784356e-05, "loss": 0.0442, "step": 14398 }, { "epoch": 15.22, "learning_rate": 1.19661733615222e-05, "loss": 0.0173, "step": 14400 }, { "epoch": 15.22, "learning_rate": 1.1960887949260043e-05, "loss": 0.082, "step": 14402 }, { "epoch": 15.23, "learning_rate": 1.1955602536997887e-05, "loss": 0.0412, "step": 14404 }, { "epoch": 15.23, "learning_rate": 1.195031712473573e-05, "loss": 0.0079, "step": 14406 }, { "epoch": 15.23, "learning_rate": 1.1945031712473574e-05, "loss": 0.0347, "step": 14408 }, { "epoch": 15.23, "learning_rate": 1.1939746300211417e-05, "loss": 0.0082, "step": 14410 }, { "epoch": 15.23, "learning_rate": 1.193446088794926e-05, "loss": 0.0088, "step": 14412 }, { "epoch": 15.24, "learning_rate": 1.1929175475687104e-05, "loss": 0.0102, "step": 14414 }, { "epoch": 15.24, "learning_rate": 1.1923890063424947e-05, "loss": 0.0421, "step": 14416 }, { "epoch": 15.24, "learning_rate": 1.1918604651162791e-05, "loss": 0.0322, "step": 14418 }, { "epoch": 15.24, "learning_rate": 1.1913319238900635e-05, "loss": 0.0186, "step": 14420 }, { "epoch": 15.25, "learning_rate": 1.190803382663848e-05, "loss": 0.0491, "step": 14422 }, { "epoch": 15.25, "learning_rate": 1.1902748414376322e-05, "loss": 0.0272, "step": 14424 }, { "epoch": 15.25, "learning_rate": 1.1897463002114167e-05, "loss": 0.0482, "step": 14426 }, { "epoch": 15.25, "learning_rate": 1.189217758985201e-05, "loss": 0.0494, "step": 14428 }, { "epoch": 15.25, "learning_rate": 1.1886892177589852e-05, "loss": 0.0158, "step": 14430 }, { "epoch": 15.26, "learning_rate": 1.1881606765327696e-05, "loss": 0.0274, "step": 14432 }, { "epoch": 15.26, "learning_rate": 1.1876321353065539e-05, "loss": 0.0167, "step": 14434 }, { "epoch": 15.26, "learning_rate": 1.1871035940803384e-05, "loss": 0.1149, "step": 14436 }, { "epoch": 15.26, "learning_rate": 1.1865750528541226e-05, "loss": 0.0095, "step": 14438 }, { "epoch": 15.26, "learning_rate": 1.186046511627907e-05, "loss": 0.0087, "step": 14440 }, { "epoch": 15.27, "learning_rate": 1.1855179704016913e-05, "loss": 0.0084, "step": 14442 }, { "epoch": 15.27, "learning_rate": 1.1849894291754758e-05, "loss": 0.0193, "step": 14444 }, { "epoch": 15.27, "learning_rate": 1.18446088794926e-05, "loss": 0.0106, "step": 14446 }, { "epoch": 15.27, "learning_rate": 1.1839323467230445e-05, "loss": 0.0039, "step": 14448 }, { "epoch": 15.27, "learning_rate": 1.1834038054968289e-05, "loss": 0.0293, "step": 14450 }, { "epoch": 15.28, "learning_rate": 1.1828752642706132e-05, "loss": 0.0379, "step": 14452 }, { "epoch": 15.28, "learning_rate": 1.1823467230443976e-05, "loss": 0.0193, "step": 14454 }, { "epoch": 15.28, "learning_rate": 1.1818181818181819e-05, "loss": 0.0078, "step": 14456 }, { "epoch": 15.28, "learning_rate": 1.1812896405919663e-05, "loss": 0.0086, "step": 14458 }, { "epoch": 15.29, "learning_rate": 1.1807610993657506e-05, "loss": 0.0157, "step": 14460 }, { "epoch": 15.29, "learning_rate": 1.1802325581395348e-05, "loss": 0.0359, "step": 14462 }, { "epoch": 15.29, "learning_rate": 1.1797040169133193e-05, "loss": 0.0405, "step": 14464 }, { "epoch": 15.29, "learning_rate": 1.1791754756871035e-05, "loss": 0.0195, "step": 14466 }, { "epoch": 15.29, "learning_rate": 1.178646934460888e-05, "loss": 0.0071, "step": 14468 }, { "epoch": 15.3, "learning_rate": 1.1781183932346722e-05, "loss": 0.0268, "step": 14470 }, { "epoch": 15.3, "learning_rate": 1.1775898520084567e-05, "loss": 0.0203, "step": 14472 }, { "epoch": 15.3, "learning_rate": 1.1770613107822411e-05, "loss": 0.0033, "step": 14474 }, { "epoch": 15.3, "learning_rate": 1.1765327695560256e-05, "loss": 0.0172, "step": 14476 }, { "epoch": 15.3, "learning_rate": 1.1760042283298098e-05, "loss": 0.007, "step": 14478 }, { "epoch": 15.31, "learning_rate": 1.1754756871035941e-05, "loss": 0.0289, "step": 14480 }, { "epoch": 15.31, "learning_rate": 1.1749471458773785e-05, "loss": 0.0525, "step": 14482 }, { "epoch": 15.31, "learning_rate": 1.1744186046511628e-05, "loss": 0.0071, "step": 14484 }, { "epoch": 15.31, "learning_rate": 1.1738900634249472e-05, "loss": 0.0152, "step": 14486 }, { "epoch": 15.32, "learning_rate": 1.1733615221987315e-05, "loss": 0.0126, "step": 14488 }, { "epoch": 15.32, "learning_rate": 1.172832980972516e-05, "loss": 0.0154, "step": 14490 }, { "epoch": 15.32, "learning_rate": 1.1723044397463002e-05, "loss": 0.0023, "step": 14492 }, { "epoch": 15.32, "learning_rate": 1.1717758985200846e-05, "loss": 0.0349, "step": 14494 }, { "epoch": 15.32, "learning_rate": 1.1712473572938689e-05, "loss": 0.0262, "step": 14496 }, { "epoch": 15.33, "learning_rate": 1.1707188160676533e-05, "loss": 0.031, "step": 14498 }, { "epoch": 15.33, "learning_rate": 1.1701902748414378e-05, "loss": 0.0121, "step": 14500 }, { "epoch": 15.33, "eval_cer": 0.04895981761185523, "eval_loss": 0.6679000854492188, "eval_runtime": 125.3329, "eval_samples_per_second": 6.71, "eval_steps_per_second": 0.846, "step": 14500 }, { "epoch": 15.33, "learning_rate": 1.169661733615222e-05, "loss": 0.0094, "step": 14502 }, { "epoch": 15.33, "learning_rate": 1.1691331923890065e-05, "loss": 0.0073, "step": 14504 }, { "epoch": 15.33, "learning_rate": 1.1686046511627907e-05, "loss": 0.0042, "step": 14506 }, { "epoch": 15.34, "learning_rate": 1.1680761099365752e-05, "loss": 0.0144, "step": 14508 }, { "epoch": 15.34, "learning_rate": 1.1675475687103595e-05, "loss": 0.0017, "step": 14510 }, { "epoch": 15.34, "learning_rate": 1.1670190274841439e-05, "loss": 0.0413, "step": 14512 }, { "epoch": 15.34, "learning_rate": 1.1664904862579282e-05, "loss": 0.0259, "step": 14514 }, { "epoch": 15.34, "learning_rate": 1.1659619450317124e-05, "loss": 0.0061, "step": 14516 }, { "epoch": 15.35, "learning_rate": 1.1654334038054969e-05, "loss": 0.0018, "step": 14518 }, { "epoch": 15.35, "learning_rate": 1.1649048625792811e-05, "loss": 0.0007, "step": 14520 }, { "epoch": 15.35, "learning_rate": 1.1643763213530656e-05, "loss": 0.0474, "step": 14522 }, { "epoch": 15.35, "learning_rate": 1.16384778012685e-05, "loss": 0.0409, "step": 14524 }, { "epoch": 15.36, "learning_rate": 1.1633192389006343e-05, "loss": 0.0263, "step": 14526 }, { "epoch": 15.36, "learning_rate": 1.1627906976744187e-05, "loss": 0.0323, "step": 14528 }, { "epoch": 15.36, "learning_rate": 1.1622621564482031e-05, "loss": 0.0008, "step": 14530 }, { "epoch": 15.36, "learning_rate": 1.1617336152219874e-05, "loss": 0.0061, "step": 14532 }, { "epoch": 15.36, "learning_rate": 1.1612050739957717e-05, "loss": 0.0196, "step": 14534 }, { "epoch": 15.37, "learning_rate": 1.1606765327695561e-05, "loss": 0.0093, "step": 14536 }, { "epoch": 15.37, "learning_rate": 1.1601479915433404e-05, "loss": 0.0166, "step": 14538 }, { "epoch": 15.37, "learning_rate": 1.1596194503171248e-05, "loss": 0.0296, "step": 14540 }, { "epoch": 15.37, "learning_rate": 1.159090909090909e-05, "loss": 0.0298, "step": 14542 }, { "epoch": 15.37, "learning_rate": 1.1585623678646935e-05, "loss": 0.0354, "step": 14544 }, { "epoch": 15.38, "learning_rate": 1.1580338266384778e-05, "loss": 0.0048, "step": 14546 }, { "epoch": 15.38, "learning_rate": 1.1575052854122622e-05, "loss": 0.0014, "step": 14548 }, { "epoch": 15.38, "learning_rate": 1.1569767441860465e-05, "loss": 0.0582, "step": 14550 }, { "epoch": 15.38, "learning_rate": 1.156448202959831e-05, "loss": 0.0271, "step": 14552 }, { "epoch": 15.38, "learning_rate": 1.1559196617336154e-05, "loss": 0.0263, "step": 14554 }, { "epoch": 15.39, "learning_rate": 1.1553911205073996e-05, "loss": 0.029, "step": 14556 }, { "epoch": 15.39, "learning_rate": 1.154862579281184e-05, "loss": 0.0028, "step": 14558 }, { "epoch": 15.39, "learning_rate": 1.1543340380549683e-05, "loss": 0.0368, "step": 14560 }, { "epoch": 15.39, "learning_rate": 1.1538054968287528e-05, "loss": 0.0295, "step": 14562 }, { "epoch": 15.4, "learning_rate": 1.153276955602537e-05, "loss": 0.0139, "step": 14564 }, { "epoch": 15.4, "learning_rate": 1.1527484143763215e-05, "loss": 0.0055, "step": 14566 }, { "epoch": 15.4, "learning_rate": 1.1522198731501057e-05, "loss": 0.0277, "step": 14568 }, { "epoch": 15.4, "learning_rate": 1.15169133192389e-05, "loss": 0.0919, "step": 14570 }, { "epoch": 15.4, "learning_rate": 1.1511627906976744e-05, "loss": 0.033, "step": 14572 }, { "epoch": 15.41, "learning_rate": 1.1506342494714587e-05, "loss": 0.0078, "step": 14574 }, { "epoch": 15.41, "learning_rate": 1.1501057082452431e-05, "loss": 0.0171, "step": 14576 }, { "epoch": 15.41, "learning_rate": 1.1495771670190276e-05, "loss": 0.0377, "step": 14578 }, { "epoch": 15.41, "learning_rate": 1.149048625792812e-05, "loss": 0.0205, "step": 14580 }, { "epoch": 15.41, "learning_rate": 1.1485200845665963e-05, "loss": 0.0259, "step": 14582 }, { "epoch": 15.42, "learning_rate": 1.1479915433403807e-05, "loss": 0.0213, "step": 14584 }, { "epoch": 15.42, "learning_rate": 1.147463002114165e-05, "loss": 0.0033, "step": 14586 }, { "epoch": 15.42, "learning_rate": 1.1469344608879493e-05, "loss": 0.0379, "step": 14588 }, { "epoch": 15.42, "learning_rate": 1.1464059196617337e-05, "loss": 0.0388, "step": 14590 }, { "epoch": 15.42, "learning_rate": 1.145877378435518e-05, "loss": 0.0007, "step": 14592 }, { "epoch": 15.43, "learning_rate": 1.1453488372093024e-05, "loss": 0.0107, "step": 14594 }, { "epoch": 15.43, "learning_rate": 1.1448202959830867e-05, "loss": 0.018, "step": 14596 }, { "epoch": 15.43, "learning_rate": 1.1442917547568711e-05, "loss": 0.0355, "step": 14598 }, { "epoch": 15.43, "learning_rate": 1.1437632135306554e-05, "loss": 0.0101, "step": 14600 }, { "epoch": 15.44, "learning_rate": 1.1432346723044398e-05, "loss": 0.0064, "step": 14602 }, { "epoch": 15.44, "learning_rate": 1.1427061310782242e-05, "loss": 0.0077, "step": 14604 }, { "epoch": 15.44, "learning_rate": 1.1421775898520085e-05, "loss": 0.003, "step": 14606 }, { "epoch": 15.44, "learning_rate": 1.141649048625793e-05, "loss": 0.0049, "step": 14608 }, { "epoch": 15.44, "learning_rate": 1.1411205073995772e-05, "loss": 0.0184, "step": 14610 }, { "epoch": 15.45, "learning_rate": 1.1405919661733616e-05, "loss": 0.0231, "step": 14612 }, { "epoch": 15.45, "learning_rate": 1.140063424947146e-05, "loss": 0.0215, "step": 14614 }, { "epoch": 15.45, "learning_rate": 1.1395348837209304e-05, "loss": 0.0232, "step": 14616 }, { "epoch": 15.45, "learning_rate": 1.1390063424947146e-05, "loss": 0.0191, "step": 14618 }, { "epoch": 15.45, "learning_rate": 1.138477801268499e-05, "loss": 0.0127, "step": 14620 }, { "epoch": 15.46, "learning_rate": 1.1379492600422833e-05, "loss": 0.031, "step": 14622 }, { "epoch": 15.46, "learning_rate": 1.1374207188160676e-05, "loss": 0.0854, "step": 14624 }, { "epoch": 15.46, "learning_rate": 1.136892177589852e-05, "loss": 0.0194, "step": 14626 }, { "epoch": 15.46, "learning_rate": 1.1363636363636365e-05, "loss": 0.0172, "step": 14628 }, { "epoch": 15.47, "learning_rate": 1.1358350951374207e-05, "loss": 0.0028, "step": 14630 }, { "epoch": 15.47, "learning_rate": 1.1353065539112052e-05, "loss": 0.0134, "step": 14632 }, { "epoch": 15.47, "learning_rate": 1.1347780126849896e-05, "loss": 0.0056, "step": 14634 }, { "epoch": 15.47, "learning_rate": 1.1342494714587739e-05, "loss": 0.0076, "step": 14636 }, { "epoch": 15.47, "learning_rate": 1.1337209302325581e-05, "loss": 0.0103, "step": 14638 }, { "epoch": 15.48, "learning_rate": 1.1331923890063426e-05, "loss": 0.0054, "step": 14640 }, { "epoch": 15.48, "learning_rate": 1.1326638477801268e-05, "loss": 0.0255, "step": 14642 }, { "epoch": 15.48, "learning_rate": 1.1321353065539113e-05, "loss": 0.0609, "step": 14644 }, { "epoch": 15.48, "learning_rate": 1.1316067653276955e-05, "loss": 0.0092, "step": 14646 }, { "epoch": 15.48, "learning_rate": 1.13107822410148e-05, "loss": 0.013, "step": 14648 }, { "epoch": 15.49, "learning_rate": 1.1305496828752642e-05, "loss": 0.024, "step": 14650 }, { "epoch": 15.49, "learning_rate": 1.1300211416490487e-05, "loss": 0.0104, "step": 14652 }, { "epoch": 15.49, "learning_rate": 1.129492600422833e-05, "loss": 0.0106, "step": 14654 }, { "epoch": 15.49, "learning_rate": 1.1289640591966174e-05, "loss": 0.0158, "step": 14656 }, { "epoch": 15.49, "learning_rate": 1.1284355179704018e-05, "loss": 0.0783, "step": 14658 }, { "epoch": 15.5, "learning_rate": 1.1279069767441861e-05, "loss": 0.0025, "step": 14660 }, { "epoch": 15.5, "learning_rate": 1.1273784355179705e-05, "loss": 0.055, "step": 14662 }, { "epoch": 15.5, "learning_rate": 1.1268498942917548e-05, "loss": 0.0038, "step": 14664 }, { "epoch": 15.5, "learning_rate": 1.1263213530655392e-05, "loss": 0.0024, "step": 14666 }, { "epoch": 15.51, "learning_rate": 1.1257928118393235e-05, "loss": 0.0337, "step": 14668 }, { "epoch": 15.51, "learning_rate": 1.125264270613108e-05, "loss": 0.0129, "step": 14670 }, { "epoch": 15.51, "learning_rate": 1.1247357293868922e-05, "loss": 0.0065, "step": 14672 }, { "epoch": 15.51, "learning_rate": 1.1242071881606765e-05, "loss": 0.0291, "step": 14674 }, { "epoch": 15.51, "learning_rate": 1.1236786469344609e-05, "loss": 0.0127, "step": 14676 }, { "epoch": 15.52, "learning_rate": 1.1231501057082452e-05, "loss": 0.0149, "step": 14678 }, { "epoch": 15.52, "learning_rate": 1.1226215644820296e-05, "loss": 0.0389, "step": 14680 }, { "epoch": 15.52, "learning_rate": 1.122093023255814e-05, "loss": 0.0657, "step": 14682 }, { "epoch": 15.52, "learning_rate": 1.1215644820295985e-05, "loss": 0.0156, "step": 14684 }, { "epoch": 15.52, "learning_rate": 1.1210359408033828e-05, "loss": 0.0137, "step": 14686 }, { "epoch": 15.53, "learning_rate": 1.1205073995771672e-05, "loss": 0.0564, "step": 14688 }, { "epoch": 15.53, "learning_rate": 1.1199788583509515e-05, "loss": 0.0165, "step": 14690 }, { "epoch": 15.53, "learning_rate": 1.1194503171247357e-05, "loss": 0.0152, "step": 14692 }, { "epoch": 15.53, "learning_rate": 1.1189217758985202e-05, "loss": 0.0335, "step": 14694 }, { "epoch": 15.53, "learning_rate": 1.1183932346723044e-05, "loss": 0.0033, "step": 14696 }, { "epoch": 15.54, "learning_rate": 1.1178646934460889e-05, "loss": 0.0171, "step": 14698 }, { "epoch": 15.54, "learning_rate": 1.1173361522198731e-05, "loss": 0.0164, "step": 14700 }, { "epoch": 15.54, "learning_rate": 1.1168076109936576e-05, "loss": 0.0208, "step": 14702 }, { "epoch": 15.54, "learning_rate": 1.1162790697674418e-05, "loss": 0.0062, "step": 14704 }, { "epoch": 15.55, "learning_rate": 1.1157505285412263e-05, "loss": 0.0031, "step": 14706 }, { "epoch": 15.55, "learning_rate": 1.1152219873150107e-05, "loss": 0.0039, "step": 14708 }, { "epoch": 15.55, "learning_rate": 1.114693446088795e-05, "loss": 0.04, "step": 14710 }, { "epoch": 15.55, "learning_rate": 1.1141649048625794e-05, "loss": 0.0146, "step": 14712 }, { "epoch": 15.55, "learning_rate": 1.1136363636363637e-05, "loss": 0.0118, "step": 14714 }, { "epoch": 15.56, "learning_rate": 1.1131078224101481e-05, "loss": 0.001, "step": 14716 }, { "epoch": 15.56, "learning_rate": 1.1125792811839324e-05, "loss": 0.0444, "step": 14718 }, { "epoch": 15.56, "learning_rate": 1.1120507399577168e-05, "loss": 0.0169, "step": 14720 }, { "epoch": 15.56, "learning_rate": 1.1115221987315011e-05, "loss": 0.0395, "step": 14722 }, { "epoch": 15.56, "learning_rate": 1.1109936575052855e-05, "loss": 0.0146, "step": 14724 }, { "epoch": 15.57, "learning_rate": 1.1104651162790698e-05, "loss": 0.0048, "step": 14726 }, { "epoch": 15.57, "learning_rate": 1.109936575052854e-05, "loss": 0.0195, "step": 14728 }, { "epoch": 15.57, "learning_rate": 1.1094080338266385e-05, "loss": 0.0036, "step": 14730 }, { "epoch": 15.57, "learning_rate": 1.1088794926004228e-05, "loss": 0.0068, "step": 14732 }, { "epoch": 15.58, "learning_rate": 1.1083509513742072e-05, "loss": 0.0082, "step": 14734 }, { "epoch": 15.58, "learning_rate": 1.1078224101479916e-05, "loss": 0.0098, "step": 14736 }, { "epoch": 15.58, "learning_rate": 1.107293868921776e-05, "loss": 0.004, "step": 14738 }, { "epoch": 15.58, "learning_rate": 1.1067653276955603e-05, "loss": 0.0357, "step": 14740 }, { "epoch": 15.58, "learning_rate": 1.1062367864693448e-05, "loss": 0.0158, "step": 14742 }, { "epoch": 15.59, "learning_rate": 1.105708245243129e-05, "loss": 0.0059, "step": 14744 }, { "epoch": 15.59, "learning_rate": 1.1051797040169133e-05, "loss": 0.0125, "step": 14746 }, { "epoch": 15.59, "learning_rate": 1.1046511627906977e-05, "loss": 0.0443, "step": 14748 }, { "epoch": 15.59, "learning_rate": 1.104122621564482e-05, "loss": 0.0017, "step": 14750 }, { "epoch": 15.59, "learning_rate": 1.1035940803382664e-05, "loss": 0.062, "step": 14752 }, { "epoch": 15.6, "learning_rate": 1.1030655391120507e-05, "loss": 0.006, "step": 14754 }, { "epoch": 15.6, "learning_rate": 1.1025369978858352e-05, "loss": 0.0143, "step": 14756 }, { "epoch": 15.6, "learning_rate": 1.1020084566596194e-05, "loss": 0.0055, "step": 14758 }, { "epoch": 15.6, "learning_rate": 1.1014799154334039e-05, "loss": 0.0177, "step": 14760 }, { "epoch": 15.6, "learning_rate": 1.1009513742071883e-05, "loss": 0.0077, "step": 14762 }, { "epoch": 15.61, "learning_rate": 1.1004228329809726e-05, "loss": 0.0037, "step": 14764 }, { "epoch": 15.61, "learning_rate": 1.099894291754757e-05, "loss": 0.0334, "step": 14766 }, { "epoch": 15.61, "learning_rate": 1.0993657505285413e-05, "loss": 0.0067, "step": 14768 }, { "epoch": 15.61, "learning_rate": 1.0988372093023257e-05, "loss": 0.0158, "step": 14770 }, { "epoch": 15.62, "learning_rate": 1.09830866807611e-05, "loss": 0.0067, "step": 14772 }, { "epoch": 15.62, "learning_rate": 1.0977801268498944e-05, "loss": 0.0298, "step": 14774 }, { "epoch": 15.62, "learning_rate": 1.0972515856236787e-05, "loss": 0.0008, "step": 14776 }, { "epoch": 15.62, "learning_rate": 1.0967230443974631e-05, "loss": 0.0064, "step": 14778 }, { "epoch": 15.62, "learning_rate": 1.0961945031712474e-05, "loss": 0.0055, "step": 14780 }, { "epoch": 15.63, "learning_rate": 1.0956659619450316e-05, "loss": 0.0701, "step": 14782 }, { "epoch": 15.63, "learning_rate": 1.095137420718816e-05, "loss": 0.0278, "step": 14784 }, { "epoch": 15.63, "learning_rate": 1.0946088794926005e-05, "loss": 0.0189, "step": 14786 }, { "epoch": 15.63, "learning_rate": 1.094080338266385e-05, "loss": 0.0136, "step": 14788 }, { "epoch": 15.63, "learning_rate": 1.0935517970401692e-05, "loss": 0.0049, "step": 14790 }, { "epoch": 15.64, "learning_rate": 1.0930232558139537e-05, "loss": 0.0078, "step": 14792 }, { "epoch": 15.64, "learning_rate": 1.092494714587738e-05, "loss": 0.0471, "step": 14794 }, { "epoch": 15.64, "learning_rate": 1.0919661733615222e-05, "loss": 0.0093, "step": 14796 }, { "epoch": 15.64, "learning_rate": 1.0914376321353066e-05, "loss": 0.0332, "step": 14798 }, { "epoch": 15.64, "learning_rate": 1.0909090909090909e-05, "loss": 0.046, "step": 14800 }, { "epoch": 15.65, "learning_rate": 1.0903805496828753e-05, "loss": 0.0186, "step": 14802 }, { "epoch": 15.65, "learning_rate": 1.0898520084566596e-05, "loss": 0.0015, "step": 14804 }, { "epoch": 15.65, "learning_rate": 1.089323467230444e-05, "loss": 0.0335, "step": 14806 }, { "epoch": 15.65, "learning_rate": 1.0887949260042283e-05, "loss": 0.0217, "step": 14808 }, { "epoch": 15.66, "learning_rate": 1.0882663847780127e-05, "loss": 0.0014, "step": 14810 }, { "epoch": 15.66, "learning_rate": 1.0877378435517972e-05, "loss": 0.0465, "step": 14812 }, { "epoch": 15.66, "learning_rate": 1.0872093023255814e-05, "loss": 0.0107, "step": 14814 }, { "epoch": 15.66, "learning_rate": 1.0866807610993659e-05, "loss": 0.0903, "step": 14816 }, { "epoch": 15.66, "learning_rate": 1.0861522198731501e-05, "loss": 0.0038, "step": 14818 }, { "epoch": 15.67, "learning_rate": 1.0856236786469346e-05, "loss": 0.0018, "step": 14820 }, { "epoch": 15.67, "learning_rate": 1.0850951374207188e-05, "loss": 0.0343, "step": 14822 }, { "epoch": 15.67, "learning_rate": 1.0845665961945033e-05, "loss": 0.0074, "step": 14824 }, { "epoch": 15.67, "learning_rate": 1.0840380549682875e-05, "loss": 0.0257, "step": 14826 }, { "epoch": 15.67, "learning_rate": 1.083509513742072e-05, "loss": 0.0017, "step": 14828 }, { "epoch": 15.68, "learning_rate": 1.0829809725158563e-05, "loss": 0.0189, "step": 14830 }, { "epoch": 15.68, "learning_rate": 1.0824524312896405e-05, "loss": 0.0674, "step": 14832 }, { "epoch": 15.68, "learning_rate": 1.081923890063425e-05, "loss": 0.012, "step": 14834 }, { "epoch": 15.68, "learning_rate": 1.0813953488372092e-05, "loss": 0.034, "step": 14836 }, { "epoch": 15.68, "learning_rate": 1.0808668076109937e-05, "loss": 0.0246, "step": 14838 }, { "epoch": 15.69, "learning_rate": 1.0803382663847781e-05, "loss": 0.0021, "step": 14840 }, { "epoch": 15.69, "learning_rate": 1.0798097251585625e-05, "loss": 0.0245, "step": 14842 }, { "epoch": 15.69, "learning_rate": 1.0792811839323468e-05, "loss": 0.0014, "step": 14844 }, { "epoch": 15.69, "learning_rate": 1.0787526427061312e-05, "loss": 0.0155, "step": 14846 }, { "epoch": 15.7, "learning_rate": 1.0782241014799155e-05, "loss": 0.0208, "step": 14848 }, { "epoch": 15.7, "learning_rate": 1.0776955602536998e-05, "loss": 0.0056, "step": 14850 }, { "epoch": 15.7, "learning_rate": 1.0771670190274842e-05, "loss": 0.0019, "step": 14852 }, { "epoch": 15.7, "learning_rate": 1.0766384778012685e-05, "loss": 0.031, "step": 14854 }, { "epoch": 15.7, "learning_rate": 1.0761099365750529e-05, "loss": 0.0575, "step": 14856 }, { "epoch": 15.71, "learning_rate": 1.0755813953488372e-05, "loss": 0.0129, "step": 14858 }, { "epoch": 15.71, "learning_rate": 1.0750528541226216e-05, "loss": 0.0061, "step": 14860 }, { "epoch": 15.71, "learning_rate": 1.0745243128964059e-05, "loss": 0.0038, "step": 14862 }, { "epoch": 15.71, "learning_rate": 1.0739957716701903e-05, "loss": 0.0207, "step": 14864 }, { "epoch": 15.71, "learning_rate": 1.0734672304439748e-05, "loss": 0.0242, "step": 14866 }, { "epoch": 15.72, "learning_rate": 1.072938689217759e-05, "loss": 0.0162, "step": 14868 }, { "epoch": 15.72, "learning_rate": 1.0724101479915435e-05, "loss": 0.0286, "step": 14870 }, { "epoch": 15.72, "learning_rate": 1.0718816067653277e-05, "loss": 0.0316, "step": 14872 }, { "epoch": 15.72, "learning_rate": 1.0713530655391122e-05, "loss": 0.0073, "step": 14874 }, { "epoch": 15.73, "learning_rate": 1.0708245243128964e-05, "loss": 0.0109, "step": 14876 }, { "epoch": 15.73, "learning_rate": 1.0702959830866809e-05, "loss": 0.0047, "step": 14878 }, { "epoch": 15.73, "learning_rate": 1.0697674418604651e-05, "loss": 0.0354, "step": 14880 }, { "epoch": 15.73, "learning_rate": 1.0692389006342496e-05, "loss": 0.0226, "step": 14882 }, { "epoch": 15.73, "learning_rate": 1.0687103594080338e-05, "loss": 0.017, "step": 14884 }, { "epoch": 15.74, "learning_rate": 1.0681818181818181e-05, "loss": 0.003, "step": 14886 }, { "epoch": 15.74, "learning_rate": 1.0676532769556025e-05, "loss": 0.0335, "step": 14888 }, { "epoch": 15.74, "learning_rate": 1.067124735729387e-05, "loss": 0.0027, "step": 14890 }, { "epoch": 15.74, "learning_rate": 1.0665961945031714e-05, "loss": 0.0122, "step": 14892 }, { "epoch": 15.74, "learning_rate": 1.0660676532769557e-05, "loss": 0.0095, "step": 14894 }, { "epoch": 15.75, "learning_rate": 1.0655391120507401e-05, "loss": 0.0042, "step": 14896 }, { "epoch": 15.75, "learning_rate": 1.0652748414376323e-05, "loss": 0.1013, "step": 14898 }, { "epoch": 15.75, "learning_rate": 1.0647463002114165e-05, "loss": 0.0157, "step": 14900 }, { "epoch": 15.75, "learning_rate": 1.064217758985201e-05, "loss": 0.0084, "step": 14902 }, { "epoch": 15.75, "learning_rate": 1.0636892177589852e-05, "loss": 0.0251, "step": 14904 }, { "epoch": 15.76, "learning_rate": 1.0631606765327697e-05, "loss": 0.0067, "step": 14906 }, { "epoch": 15.76, "learning_rate": 1.062632135306554e-05, "loss": 0.0134, "step": 14908 }, { "epoch": 15.76, "learning_rate": 1.0621035940803384e-05, "loss": 0.0021, "step": 14910 }, { "epoch": 15.76, "learning_rate": 1.0615750528541226e-05, "loss": 0.0048, "step": 14912 }, { "epoch": 15.77, "learning_rate": 1.0610465116279069e-05, "loss": 0.0014, "step": 14914 }, { "epoch": 15.77, "learning_rate": 1.0605179704016913e-05, "loss": 0.0111, "step": 14916 }, { "epoch": 15.77, "learning_rate": 1.0599894291754758e-05, "loss": 0.0058, "step": 14918 }, { "epoch": 15.77, "learning_rate": 1.0594608879492602e-05, "loss": 0.0129, "step": 14920 }, { "epoch": 15.77, "learning_rate": 1.0589323467230445e-05, "loss": 0.0626, "step": 14922 }, { "epoch": 15.78, "learning_rate": 1.0584038054968289e-05, "loss": 0.0111, "step": 14924 }, { "epoch": 15.78, "learning_rate": 1.0578752642706132e-05, "loss": 0.0184, "step": 14926 }, { "epoch": 15.78, "learning_rate": 1.0573467230443976e-05, "loss": 0.0208, "step": 14928 }, { "epoch": 15.78, "learning_rate": 1.0568181818181819e-05, "loss": 0.0076, "step": 14930 }, { "epoch": 15.78, "learning_rate": 1.0562896405919661e-05, "loss": 0.0395, "step": 14932 }, { "epoch": 15.79, "learning_rate": 1.0557610993657506e-05, "loss": 0.0079, "step": 14934 }, { "epoch": 15.79, "learning_rate": 1.0552325581395349e-05, "loss": 0.0393, "step": 14936 }, { "epoch": 15.79, "learning_rate": 1.0547040169133193e-05, "loss": 0.0174, "step": 14938 }, { "epoch": 15.79, "learning_rate": 1.0541754756871036e-05, "loss": 0.0146, "step": 14940 }, { "epoch": 15.79, "learning_rate": 1.053646934460888e-05, "loss": 0.0084, "step": 14942 }, { "epoch": 15.8, "learning_rate": 1.0531183932346724e-05, "loss": 0.0514, "step": 14944 }, { "epoch": 15.8, "learning_rate": 1.0525898520084567e-05, "loss": 0.0202, "step": 14946 }, { "epoch": 15.8, "learning_rate": 1.0520613107822411e-05, "loss": 0.011, "step": 14948 }, { "epoch": 15.8, "learning_rate": 1.0515327695560254e-05, "loss": 0.0199, "step": 14950 }, { "epoch": 15.81, "learning_rate": 1.0510042283298098e-05, "loss": 0.0292, "step": 14952 }, { "epoch": 15.81, "learning_rate": 1.0504756871035941e-05, "loss": 0.0477, "step": 14954 }, { "epoch": 15.81, "learning_rate": 1.0499471458773785e-05, "loss": 0.0244, "step": 14956 }, { "epoch": 15.81, "learning_rate": 1.0494186046511628e-05, "loss": 0.0155, "step": 14958 }, { "epoch": 15.81, "learning_rate": 1.0488900634249472e-05, "loss": 0.029, "step": 14960 }, { "epoch": 15.82, "learning_rate": 1.0483615221987315e-05, "loss": 0.0276, "step": 14962 }, { "epoch": 15.82, "learning_rate": 1.0478329809725158e-05, "loss": 0.0275, "step": 14964 }, { "epoch": 15.82, "learning_rate": 1.0473044397463002e-05, "loss": 0.0165, "step": 14966 }, { "epoch": 15.82, "learning_rate": 1.0467758985200845e-05, "loss": 0.0428, "step": 14968 }, { "epoch": 15.82, "learning_rate": 1.046247357293869e-05, "loss": 0.0454, "step": 14970 }, { "epoch": 15.83, "learning_rate": 1.0457188160676534e-05, "loss": 0.1425, "step": 14972 }, { "epoch": 15.83, "learning_rate": 1.0451902748414378e-05, "loss": 0.0476, "step": 14974 }, { "epoch": 15.83, "learning_rate": 1.044661733615222e-05, "loss": 0.0332, "step": 14976 }, { "epoch": 15.83, "learning_rate": 1.0441331923890065e-05, "loss": 0.0403, "step": 14978 }, { "epoch": 15.84, "learning_rate": 1.0436046511627908e-05, "loss": 0.0198, "step": 14980 }, { "epoch": 15.84, "learning_rate": 1.043076109936575e-05, "loss": 0.0125, "step": 14982 }, { "epoch": 15.84, "learning_rate": 1.0425475687103595e-05, "loss": 0.0084, "step": 14984 }, { "epoch": 15.84, "learning_rate": 1.0420190274841437e-05, "loss": 0.0013, "step": 14986 }, { "epoch": 15.84, "learning_rate": 1.0414904862579282e-05, "loss": 0.0103, "step": 14988 }, { "epoch": 15.85, "learning_rate": 1.0409619450317124e-05, "loss": 0.0311, "step": 14990 }, { "epoch": 15.85, "learning_rate": 1.0404334038054969e-05, "loss": 0.0204, "step": 14992 }, { "epoch": 15.85, "learning_rate": 1.0399048625792811e-05, "loss": 0.0037, "step": 14994 }, { "epoch": 15.85, "learning_rate": 1.0393763213530656e-05, "loss": 0.0013, "step": 14996 }, { "epoch": 15.85, "learning_rate": 1.03884778012685e-05, "loss": 0.0351, "step": 14998 }, { "epoch": 15.86, "learning_rate": 1.0383192389006343e-05, "loss": 0.0054, "step": 15000 }, { "epoch": 15.86, "eval_cer": 0.0335138216015959, "eval_loss": 0.8025035262107849, "eval_runtime": 125.5761, "eval_samples_per_second": 6.697, "eval_steps_per_second": 0.844, "step": 15000 }, { "epoch": 15.86, "learning_rate": 1.0377906976744187e-05, "loss": 0.0153, "step": 15002 }, { "epoch": 15.86, "learning_rate": 1.037262156448203e-05, "loss": 0.0195, "step": 15004 }, { "epoch": 15.86, "learning_rate": 1.0367336152219874e-05, "loss": 0.0055, "step": 15006 }, { "epoch": 15.86, "learning_rate": 1.0362050739957717e-05, "loss": 0.0015, "step": 15008 }, { "epoch": 15.87, "learning_rate": 1.0356765327695561e-05, "loss": 0.0036, "step": 15010 }, { "epoch": 15.87, "learning_rate": 1.0351479915433404e-05, "loss": 0.0304, "step": 15012 }, { "epoch": 15.87, "learning_rate": 1.0346194503171248e-05, "loss": 0.021, "step": 15014 }, { "epoch": 15.87, "learning_rate": 1.0340909090909091e-05, "loss": 0.0065, "step": 15016 }, { "epoch": 15.88, "learning_rate": 1.0335623678646934e-05, "loss": 0.006, "step": 15018 }, { "epoch": 15.88, "learning_rate": 1.0330338266384778e-05, "loss": 0.0214, "step": 15020 }, { "epoch": 15.88, "learning_rate": 1.0325052854122622e-05, "loss": 0.0925, "step": 15022 }, { "epoch": 15.88, "learning_rate": 1.0319767441860467e-05, "loss": 0.0072, "step": 15024 }, { "epoch": 15.88, "learning_rate": 1.031448202959831e-05, "loss": 0.0101, "step": 15026 }, { "epoch": 15.89, "learning_rate": 1.0309196617336154e-05, "loss": 0.0601, "step": 15028 }, { "epoch": 15.89, "learning_rate": 1.0303911205073996e-05, "loss": 0.0107, "step": 15030 }, { "epoch": 15.89, "learning_rate": 1.029862579281184e-05, "loss": 0.0073, "step": 15032 }, { "epoch": 15.89, "learning_rate": 1.0293340380549683e-05, "loss": 0.0511, "step": 15034 }, { "epoch": 15.89, "learning_rate": 1.0288054968287526e-05, "loss": 0.0102, "step": 15036 }, { "epoch": 15.9, "learning_rate": 1.028276955602537e-05, "loss": 0.0081, "step": 15038 }, { "epoch": 15.9, "learning_rate": 1.0277484143763213e-05, "loss": 0.0111, "step": 15040 }, { "epoch": 15.9, "learning_rate": 1.0272198731501058e-05, "loss": 0.0022, "step": 15042 }, { "epoch": 15.9, "learning_rate": 1.02669133192389e-05, "loss": 0.0098, "step": 15044 }, { "epoch": 15.9, "learning_rate": 1.0261627906976745e-05, "loss": 0.014, "step": 15046 }, { "epoch": 15.91, "learning_rate": 1.0256342494714589e-05, "loss": 0.0183, "step": 15048 }, { "epoch": 15.91, "learning_rate": 1.0251057082452432e-05, "loss": 0.004, "step": 15050 }, { "epoch": 15.91, "learning_rate": 1.0245771670190276e-05, "loss": 0.0045, "step": 15052 }, { "epoch": 15.91, "learning_rate": 1.0240486257928119e-05, "loss": 0.0175, "step": 15054 }, { "epoch": 15.92, "learning_rate": 1.0235200845665963e-05, "loss": 0.0855, "step": 15056 }, { "epoch": 15.92, "learning_rate": 1.0229915433403806e-05, "loss": 0.0145, "step": 15058 }, { "epoch": 15.92, "learning_rate": 1.022463002114165e-05, "loss": 0.0268, "step": 15060 }, { "epoch": 15.92, "learning_rate": 1.0219344608879493e-05, "loss": 0.0131, "step": 15062 }, { "epoch": 15.92, "learning_rate": 1.0214059196617337e-05, "loss": 0.0231, "step": 15064 }, { "epoch": 15.93, "learning_rate": 1.020877378435518e-05, "loss": 0.0278, "step": 15066 }, { "epoch": 15.93, "learning_rate": 1.0203488372093024e-05, "loss": 0.0021, "step": 15068 }, { "epoch": 15.93, "learning_rate": 1.0198202959830867e-05, "loss": 0.007, "step": 15070 }, { "epoch": 15.93, "learning_rate": 1.019291754756871e-05, "loss": 0.0337, "step": 15072 }, { "epoch": 15.93, "learning_rate": 1.0187632135306554e-05, "loss": 0.0458, "step": 15074 }, { "epoch": 15.94, "learning_rate": 1.0182346723044398e-05, "loss": 0.0022, "step": 15076 }, { "epoch": 15.94, "learning_rate": 1.0177061310782243e-05, "loss": 0.0135, "step": 15078 }, { "epoch": 15.94, "learning_rate": 1.0171775898520085e-05, "loss": 0.0165, "step": 15080 }, { "epoch": 15.94, "learning_rate": 1.016649048625793e-05, "loss": 0.0712, "step": 15082 }, { "epoch": 15.95, "learning_rate": 1.0161205073995772e-05, "loss": 0.024, "step": 15084 }, { "epoch": 15.95, "learning_rate": 1.0155919661733617e-05, "loss": 0.0021, "step": 15086 }, { "epoch": 15.95, "learning_rate": 1.015063424947146e-05, "loss": 0.0026, "step": 15088 }, { "epoch": 15.95, "learning_rate": 1.0145348837209302e-05, "loss": 0.0148, "step": 15090 }, { "epoch": 15.95, "learning_rate": 1.0140063424947146e-05, "loss": 0.0023, "step": 15092 }, { "epoch": 15.96, "learning_rate": 1.0134778012684989e-05, "loss": 0.0029, "step": 15094 }, { "epoch": 15.96, "learning_rate": 1.0129492600422833e-05, "loss": 0.0083, "step": 15096 }, { "epoch": 15.96, "learning_rate": 1.0124207188160676e-05, "loss": 0.0047, "step": 15098 }, { "epoch": 15.96, "learning_rate": 1.011892177589852e-05, "loss": 0.0348, "step": 15100 }, { "epoch": 15.96, "learning_rate": 1.0113636363636365e-05, "loss": 0.0033, "step": 15102 }, { "epoch": 15.97, "learning_rate": 1.0108350951374209e-05, "loss": 0.0005, "step": 15104 }, { "epoch": 15.97, "learning_rate": 1.0103065539112052e-05, "loss": 0.0072, "step": 15106 }, { "epoch": 15.97, "learning_rate": 1.0097780126849894e-05, "loss": 0.0082, "step": 15108 }, { "epoch": 15.97, "learning_rate": 1.0092494714587739e-05, "loss": 0.0054, "step": 15110 }, { "epoch": 15.97, "learning_rate": 1.0087209302325581e-05, "loss": 0.0637, "step": 15112 }, { "epoch": 15.98, "learning_rate": 1.0081923890063426e-05, "loss": 0.0627, "step": 15114 }, { "epoch": 15.98, "learning_rate": 1.0076638477801269e-05, "loss": 0.0063, "step": 15116 }, { "epoch": 15.98, "learning_rate": 1.0071353065539113e-05, "loss": 0.0118, "step": 15118 }, { "epoch": 15.98, "learning_rate": 1.0066067653276956e-05, "loss": 0.0127, "step": 15120 }, { "epoch": 15.99, "learning_rate": 1.00607822410148e-05, "loss": 0.0117, "step": 15122 }, { "epoch": 15.99, "learning_rate": 1.0055496828752643e-05, "loss": 0.0017, "step": 15124 }, { "epoch": 15.99, "learning_rate": 1.0050211416490487e-05, "loss": 0.0018, "step": 15126 }, { "epoch": 15.99, "learning_rate": 1.0044926004228331e-05, "loss": 0.0018, "step": 15128 }, { "epoch": 15.99, "learning_rate": 1.0039640591966174e-05, "loss": 0.0301, "step": 15130 }, { "epoch": 16.0, "learning_rate": 1.0034355179704018e-05, "loss": 0.074, "step": 15132 }, { "epoch": 16.0, "learning_rate": 1.0029069767441861e-05, "loss": 0.0381, "step": 15134 }, { "epoch": 16.0, "learning_rate": 1.0023784355179705e-05, "loss": 0.0129, "step": 15136 }, { "epoch": 16.0, "learning_rate": 1.0018498942917548e-05, "loss": 0.001, "step": 15138 }, { "epoch": 16.0, "learning_rate": 1.001321353065539e-05, "loss": 0.0034, "step": 15140 }, { "epoch": 16.01, "learning_rate": 1.0007928118393235e-05, "loss": 0.014, "step": 15142 }, { "epoch": 16.01, "learning_rate": 1.0002642706131078e-05, "loss": 0.0118, "step": 15144 }, { "epoch": 16.01, "learning_rate": 9.997357293868922e-06, "loss": 0.0318, "step": 15146 }, { "epoch": 16.01, "learning_rate": 9.992071881606765e-06, "loss": 0.0006, "step": 15148 }, { "epoch": 16.01, "learning_rate": 9.98678646934461e-06, "loss": 0.0669, "step": 15150 }, { "epoch": 16.02, "learning_rate": 9.981501057082452e-06, "loss": 0.0426, "step": 15152 }, { "epoch": 16.02, "learning_rate": 9.976215644820296e-06, "loss": 0.0088, "step": 15154 }, { "epoch": 16.02, "learning_rate": 9.97093023255814e-06, "loss": 0.0141, "step": 15156 }, { "epoch": 16.02, "learning_rate": 9.965644820295983e-06, "loss": 0.0079, "step": 15158 }, { "epoch": 16.03, "learning_rate": 9.960359408033828e-06, "loss": 0.0047, "step": 15160 }, { "epoch": 16.03, "learning_rate": 9.95507399577167e-06, "loss": 0.0025, "step": 15162 }, { "epoch": 16.03, "learning_rate": 9.949788583509515e-06, "loss": 0.0007, "step": 15164 }, { "epoch": 16.03, "learning_rate": 9.944503171247357e-06, "loss": 0.0009, "step": 15166 }, { "epoch": 16.03, "learning_rate": 9.939217758985202e-06, "loss": 0.0158, "step": 15168 }, { "epoch": 16.04, "learning_rate": 9.933932346723044e-06, "loss": 0.023, "step": 15170 }, { "epoch": 16.04, "learning_rate": 9.928646934460889e-06, "loss": 0.0184, "step": 15172 }, { "epoch": 16.04, "learning_rate": 9.923361522198731e-06, "loss": 0.0449, "step": 15174 }, { "epoch": 16.04, "learning_rate": 9.918076109936574e-06, "loss": 0.0215, "step": 15176 }, { "epoch": 16.04, "learning_rate": 9.912790697674418e-06, "loss": 0.0148, "step": 15178 }, { "epoch": 16.05, "learning_rate": 9.907505285412263e-06, "loss": 0.0143, "step": 15180 }, { "epoch": 16.05, "learning_rate": 9.902219873150107e-06, "loss": 0.0121, "step": 15182 }, { "epoch": 16.05, "learning_rate": 9.89693446088795e-06, "loss": 0.0063, "step": 15184 }, { "epoch": 16.05, "learning_rate": 9.891649048625794e-06, "loss": 0.0178, "step": 15186 }, { "epoch": 16.05, "learning_rate": 9.886363636363637e-06, "loss": 0.0131, "step": 15188 }, { "epoch": 16.06, "learning_rate": 9.881078224101481e-06, "loss": 0.0121, "step": 15190 }, { "epoch": 16.06, "learning_rate": 9.875792811839324e-06, "loss": 0.0007, "step": 15192 }, { "epoch": 16.06, "learning_rate": 9.870507399577167e-06, "loss": 0.0022, "step": 15194 }, { "epoch": 16.06, "learning_rate": 9.865221987315011e-06, "loss": 0.0076, "step": 15196 }, { "epoch": 16.07, "learning_rate": 9.859936575052854e-06, "loss": 0.0417, "step": 15198 }, { "epoch": 16.07, "learning_rate": 9.854651162790698e-06, "loss": 0.0325, "step": 15200 }, { "epoch": 16.07, "learning_rate": 9.84936575052854e-06, "loss": 0.022, "step": 15202 }, { "epoch": 16.07, "learning_rate": 9.844080338266385e-06, "loss": 0.0188, "step": 15204 }, { "epoch": 16.07, "learning_rate": 9.83879492600423e-06, "loss": 0.0085, "step": 15206 }, { "epoch": 16.08, "learning_rate": 9.833509513742074e-06, "loss": 0.003, "step": 15208 }, { "epoch": 16.08, "learning_rate": 9.828224101479916e-06, "loss": 0.0306, "step": 15210 }, { "epoch": 16.08, "learning_rate": 9.822938689217759e-06, "loss": 0.0018, "step": 15212 }, { "epoch": 16.08, "learning_rate": 9.817653276955603e-06, "loss": 0.0404, "step": 15214 }, { "epoch": 16.08, "learning_rate": 9.812367864693446e-06, "loss": 0.0178, "step": 15216 }, { "epoch": 16.09, "learning_rate": 9.80708245243129e-06, "loss": 0.0063, "step": 15218 }, { "epoch": 16.09, "learning_rate": 9.801797040169133e-06, "loss": 0.0222, "step": 15220 }, { "epoch": 16.09, "learning_rate": 9.796511627906978e-06, "loss": 0.0006, "step": 15222 }, { "epoch": 16.09, "learning_rate": 9.79122621564482e-06, "loss": 0.0088, "step": 15224 }, { "epoch": 16.1, "learning_rate": 9.785940803382665e-06, "loss": 0.014, "step": 15226 }, { "epoch": 16.1, "learning_rate": 9.780655391120507e-06, "loss": 0.001, "step": 15228 }, { "epoch": 16.1, "learning_rate": 9.775369978858352e-06, "loss": 0.0105, "step": 15230 }, { "epoch": 16.1, "learning_rate": 9.770084566596196e-06, "loss": 0.0037, "step": 15232 }, { "epoch": 16.1, "learning_rate": 9.764799154334039e-06, "loss": 0.0118, "step": 15234 }, { "epoch": 16.11, "learning_rate": 9.759513742071883e-06, "loss": 0.0172, "step": 15236 }, { "epoch": 16.11, "learning_rate": 9.754228329809726e-06, "loss": 0.0048, "step": 15238 }, { "epoch": 16.11, "learning_rate": 9.74894291754757e-06, "loss": 0.0003, "step": 15240 }, { "epoch": 16.11, "learning_rate": 9.743657505285413e-06, "loss": 0.0022, "step": 15242 }, { "epoch": 16.11, "learning_rate": 9.738372093023257e-06, "loss": 0.0045, "step": 15244 }, { "epoch": 16.12, "learning_rate": 9.7330866807611e-06, "loss": 0.0046, "step": 15246 }, { "epoch": 16.12, "learning_rate": 9.727801268498942e-06, "loss": 0.005, "step": 15248 }, { "epoch": 16.12, "learning_rate": 9.722515856236787e-06, "loss": 0.0402, "step": 15250 }, { "epoch": 16.12, "learning_rate": 9.71723044397463e-06, "loss": 0.0045, "step": 15252 }, { "epoch": 16.12, "learning_rate": 9.711945031712474e-06, "loss": 0.0552, "step": 15254 }, { "epoch": 16.13, "learning_rate": 9.706659619450317e-06, "loss": 0.0035, "step": 15256 }, { "epoch": 16.13, "learning_rate": 9.701374207188161e-06, "loss": 0.0225, "step": 15258 }, { "epoch": 16.13, "learning_rate": 9.696088794926005e-06, "loss": 0.069, "step": 15260 }, { "epoch": 16.13, "learning_rate": 9.69080338266385e-06, "loss": 0.0067, "step": 15262 }, { "epoch": 16.14, "learning_rate": 9.685517970401692e-06, "loss": 0.0032, "step": 15264 }, { "epoch": 16.14, "learning_rate": 9.680232558139535e-06, "loss": 0.0054, "step": 15266 }, { "epoch": 16.14, "learning_rate": 9.67494714587738e-06, "loss": 0.0207, "step": 15268 }, { "epoch": 16.14, "learning_rate": 9.669661733615222e-06, "loss": 0.0059, "step": 15270 }, { "epoch": 16.14, "learning_rate": 9.664376321353066e-06, "loss": 0.0024, "step": 15272 }, { "epoch": 16.15, "learning_rate": 9.659090909090909e-06, "loss": 0.0056, "step": 15274 }, { "epoch": 16.15, "learning_rate": 9.653805496828753e-06, "loss": 0.0047, "step": 15276 }, { "epoch": 16.15, "learning_rate": 9.648520084566596e-06, "loss": 0.0111, "step": 15278 }, { "epoch": 16.15, "learning_rate": 9.64323467230444e-06, "loss": 0.0028, "step": 15280 }, { "epoch": 16.15, "learning_rate": 9.637949260042283e-06, "loss": 0.001, "step": 15282 }, { "epoch": 16.16, "learning_rate": 9.632663847780127e-06, "loss": 0.0099, "step": 15284 }, { "epoch": 16.16, "learning_rate": 9.627378435517972e-06, "loss": 0.0105, "step": 15286 }, { "epoch": 16.16, "learning_rate": 9.622093023255814e-06, "loss": 0.0016, "step": 15288 }, { "epoch": 16.16, "learning_rate": 9.616807610993659e-06, "loss": 0.0077, "step": 15290 }, { "epoch": 16.16, "learning_rate": 9.611522198731502e-06, "loss": 0.0269, "step": 15292 }, { "epoch": 16.17, "learning_rate": 9.606236786469346e-06, "loss": 0.0276, "step": 15294 }, { "epoch": 16.17, "learning_rate": 9.600951374207189e-06, "loss": 0.0022, "step": 15296 }, { "epoch": 16.17, "learning_rate": 9.595665961945031e-06, "loss": 0.0045, "step": 15298 }, { "epoch": 16.17, "learning_rate": 9.590380549682876e-06, "loss": 0.0041, "step": 15300 }, { "epoch": 16.18, "learning_rate": 9.585095137420718e-06, "loss": 0.0145, "step": 15302 }, { "epoch": 16.18, "learning_rate": 9.579809725158563e-06, "loss": 0.0014, "step": 15304 }, { "epoch": 16.18, "learning_rate": 9.574524312896405e-06, "loss": 0.0012, "step": 15306 }, { "epoch": 16.18, "learning_rate": 9.56923890063425e-06, "loss": 0.0182, "step": 15308 }, { "epoch": 16.18, "learning_rate": 9.563953488372094e-06, "loss": 0.0387, "step": 15310 }, { "epoch": 16.19, "learning_rate": 9.558668076109938e-06, "loss": 0.0252, "step": 15312 }, { "epoch": 16.19, "learning_rate": 9.553382663847781e-06, "loss": 0.0138, "step": 15314 }, { "epoch": 16.19, "learning_rate": 9.548097251585624e-06, "loss": 0.0383, "step": 15316 }, { "epoch": 16.19, "learning_rate": 9.542811839323468e-06, "loss": 0.0089, "step": 15318 }, { "epoch": 16.19, "learning_rate": 9.53752642706131e-06, "loss": 0.0074, "step": 15320 }, { "epoch": 16.2, "learning_rate": 9.532241014799155e-06, "loss": 0.0173, "step": 15322 }, { "epoch": 16.2, "learning_rate": 9.526955602536998e-06, "loss": 0.0107, "step": 15324 }, { "epoch": 16.2, "learning_rate": 9.521670190274842e-06, "loss": 0.0162, "step": 15326 }, { "epoch": 16.2, "learning_rate": 9.516384778012685e-06, "loss": 0.0182, "step": 15328 }, { "epoch": 16.21, "learning_rate": 9.51109936575053e-06, "loss": 0.0373, "step": 15330 }, { "epoch": 16.21, "learning_rate": 9.505813953488372e-06, "loss": 0.0092, "step": 15332 }, { "epoch": 16.21, "learning_rate": 9.500528541226216e-06, "loss": 0.0283, "step": 15334 }, { "epoch": 16.21, "learning_rate": 9.495243128964059e-06, "loss": 0.0025, "step": 15336 }, { "epoch": 16.21, "learning_rate": 9.489957716701903e-06, "loss": 0.0232, "step": 15338 }, { "epoch": 16.22, "learning_rate": 9.484672304439748e-06, "loss": 0.0578, "step": 15340 }, { "epoch": 16.22, "learning_rate": 9.47938689217759e-06, "loss": 0.0006, "step": 15342 }, { "epoch": 16.22, "learning_rate": 9.474101479915435e-06, "loss": 0.0544, "step": 15344 }, { "epoch": 16.22, "learning_rate": 9.468816067653277e-06, "loss": 0.0355, "step": 15346 }, { "epoch": 16.22, "learning_rate": 9.463530655391122e-06, "loss": 0.0202, "step": 15348 }, { "epoch": 16.23, "learning_rate": 9.458245243128964e-06, "loss": 0.0019, "step": 15350 }, { "epoch": 16.23, "learning_rate": 9.452959830866807e-06, "loss": 0.0169, "step": 15352 }, { "epoch": 16.23, "learning_rate": 9.447674418604651e-06, "loss": 0.0172, "step": 15354 }, { "epoch": 16.23, "learning_rate": 9.442389006342494e-06, "loss": 0.0081, "step": 15356 }, { "epoch": 16.23, "learning_rate": 9.437103594080338e-06, "loss": 0.0047, "step": 15358 }, { "epoch": 16.24, "learning_rate": 9.431818181818181e-06, "loss": 0.0017, "step": 15360 }, { "epoch": 16.24, "learning_rate": 9.426532769556026e-06, "loss": 0.0409, "step": 15362 }, { "epoch": 16.24, "learning_rate": 9.42124735729387e-06, "loss": 0.004, "step": 15364 }, { "epoch": 16.24, "learning_rate": 9.415961945031714e-06, "loss": 0.0238, "step": 15366 }, { "epoch": 16.25, "learning_rate": 9.410676532769557e-06, "loss": 0.0093, "step": 15368 }, { "epoch": 16.25, "learning_rate": 9.4053911205074e-06, "loss": 0.0115, "step": 15370 }, { "epoch": 16.25, "learning_rate": 9.400105708245244e-06, "loss": 0.0043, "step": 15372 }, { "epoch": 16.25, "learning_rate": 9.394820295983087e-06, "loss": 0.0007, "step": 15374 }, { "epoch": 16.25, "learning_rate": 9.389534883720931e-06, "loss": 0.0024, "step": 15376 }, { "epoch": 16.26, "learning_rate": 9.384249471458774e-06, "loss": 0.0156, "step": 15378 }, { "epoch": 16.26, "learning_rate": 9.378964059196618e-06, "loss": 0.019, "step": 15380 }, { "epoch": 16.26, "learning_rate": 9.37367864693446e-06, "loss": 0.0122, "step": 15382 }, { "epoch": 16.26, "learning_rate": 9.368393234672305e-06, "loss": 0.014, "step": 15384 }, { "epoch": 16.26, "learning_rate": 9.363107822410148e-06, "loss": 0.0191, "step": 15386 }, { "epoch": 16.27, "learning_rate": 9.357822410147992e-06, "loss": 0.0772, "step": 15388 }, { "epoch": 16.27, "learning_rate": 9.352536997885836e-06, "loss": 0.0306, "step": 15390 }, { "epoch": 16.27, "learning_rate": 9.347251585623679e-06, "loss": 0.0118, "step": 15392 }, { "epoch": 16.27, "learning_rate": 9.341966173361523e-06, "loss": 0.0154, "step": 15394 }, { "epoch": 16.27, "learning_rate": 9.336680761099366e-06, "loss": 0.0113, "step": 15396 }, { "epoch": 16.28, "learning_rate": 9.33139534883721e-06, "loss": 0.0242, "step": 15398 }, { "epoch": 16.28, "learning_rate": 9.326109936575053e-06, "loss": 0.0293, "step": 15400 }, { "epoch": 16.28, "learning_rate": 9.320824524312898e-06, "loss": 0.0092, "step": 15402 }, { "epoch": 16.28, "learning_rate": 9.31553911205074e-06, "loss": 0.0074, "step": 15404 }, { "epoch": 16.29, "learning_rate": 9.310253699788583e-06, "loss": 0.0031, "step": 15406 }, { "epoch": 16.29, "learning_rate": 9.304968287526427e-06, "loss": 0.0091, "step": 15408 }, { "epoch": 16.29, "learning_rate": 9.29968287526427e-06, "loss": 0.0013, "step": 15410 }, { "epoch": 16.29, "learning_rate": 9.294397463002114e-06, "loss": 0.0035, "step": 15412 }, { "epoch": 16.29, "learning_rate": 9.289112050739959e-06, "loss": 0.0462, "step": 15414 }, { "epoch": 16.3, "learning_rate": 9.283826638477803e-06, "loss": 0.0069, "step": 15416 }, { "epoch": 16.3, "learning_rate": 9.278541226215646e-06, "loss": 0.0014, "step": 15418 }, { "epoch": 16.3, "learning_rate": 9.27325581395349e-06, "loss": 0.0758, "step": 15420 }, { "epoch": 16.3, "learning_rate": 9.267970401691333e-06, "loss": 0.0031, "step": 15422 }, { "epoch": 16.3, "learning_rate": 9.262684989429175e-06, "loss": 0.0355, "step": 15424 }, { "epoch": 16.31, "learning_rate": 9.25739957716702e-06, "loss": 0.0242, "step": 15426 }, { "epoch": 16.31, "learning_rate": 9.252114164904862e-06, "loss": 0.002, "step": 15428 }, { "epoch": 16.31, "learning_rate": 9.246828752642707e-06, "loss": 0.0156, "step": 15430 }, { "epoch": 16.31, "learning_rate": 9.24154334038055e-06, "loss": 0.0538, "step": 15432 }, { "epoch": 16.32, "learning_rate": 9.236257928118394e-06, "loss": 0.0066, "step": 15434 }, { "epoch": 16.32, "learning_rate": 9.230972515856237e-06, "loss": 0.0657, "step": 15436 }, { "epoch": 16.32, "learning_rate": 9.225687103594081e-06, "loss": 0.0134, "step": 15438 }, { "epoch": 16.32, "learning_rate": 9.220401691331924e-06, "loss": 0.0129, "step": 15440 }, { "epoch": 16.32, "learning_rate": 9.215116279069768e-06, "loss": 0.0293, "step": 15442 }, { "epoch": 16.33, "learning_rate": 9.209830866807612e-06, "loss": 0.0413, "step": 15444 }, { "epoch": 16.33, "learning_rate": 9.204545454545455e-06, "loss": 0.0044, "step": 15446 }, { "epoch": 16.33, "learning_rate": 9.1992600422833e-06, "loss": 0.0584, "step": 15448 }, { "epoch": 16.33, "learning_rate": 9.193974630021142e-06, "loss": 0.1936, "step": 15450 }, { "epoch": 16.33, "learning_rate": 9.188689217758986e-06, "loss": 0.0127, "step": 15452 }, { "epoch": 16.34, "learning_rate": 9.183403805496829e-06, "loss": 0.0157, "step": 15454 }, { "epoch": 16.34, "learning_rate": 9.178118393234673e-06, "loss": 0.0288, "step": 15456 }, { "epoch": 16.34, "learning_rate": 9.172832980972516e-06, "loss": 0.0008, "step": 15458 }, { "epoch": 16.34, "learning_rate": 9.167547568710359e-06, "loss": 0.0394, "step": 15460 }, { "epoch": 16.34, "learning_rate": 9.162262156448203e-06, "loss": 0.0032, "step": 15462 }, { "epoch": 16.35, "learning_rate": 9.156976744186046e-06, "loss": 0.035, "step": 15464 }, { "epoch": 16.35, "learning_rate": 9.15169133192389e-06, "loss": 0.0011, "step": 15466 }, { "epoch": 16.35, "learning_rate": 9.146405919661735e-06, "loss": 0.0099, "step": 15468 }, { "epoch": 16.35, "learning_rate": 9.141120507399579e-06, "loss": 0.0036, "step": 15470 }, { "epoch": 16.36, "learning_rate": 9.135835095137422e-06, "loss": 0.0008, "step": 15472 }, { "epoch": 16.36, "learning_rate": 9.130549682875264e-06, "loss": 0.0411, "step": 15474 }, { "epoch": 16.36, "learning_rate": 9.125264270613109e-06, "loss": 0.0199, "step": 15476 }, { "epoch": 16.36, "learning_rate": 9.119978858350951e-06, "loss": 0.0192, "step": 15478 }, { "epoch": 16.36, "learning_rate": 9.114693446088796e-06, "loss": 0.0034, "step": 15480 }, { "epoch": 16.37, "learning_rate": 9.109408033826638e-06, "loss": 0.064, "step": 15482 }, { "epoch": 16.37, "learning_rate": 9.104122621564483e-06, "loss": 0.0287, "step": 15484 }, { "epoch": 16.37, "learning_rate": 9.098837209302325e-06, "loss": 0.0124, "step": 15486 }, { "epoch": 16.37, "learning_rate": 9.09355179704017e-06, "loss": 0.035, "step": 15488 }, { "epoch": 16.37, "learning_rate": 9.088266384778012e-06, "loss": 0.0143, "step": 15490 }, { "epoch": 16.38, "learning_rate": 9.082980972515857e-06, "loss": 0.0335, "step": 15492 }, { "epoch": 16.38, "learning_rate": 9.077695560253701e-06, "loss": 0.0384, "step": 15494 }, { "epoch": 16.38, "learning_rate": 9.072410147991544e-06, "loss": 0.039, "step": 15496 }, { "epoch": 16.38, "learning_rate": 9.067124735729388e-06, "loss": 0.0183, "step": 15498 }, { "epoch": 16.38, "learning_rate": 9.06183932346723e-06, "loss": 0.0215, "step": 15500 }, { "epoch": 16.38, "eval_cer": 0.04058136221145626, "eval_loss": 0.7027328014373779, "eval_runtime": 129.2071, "eval_samples_per_second": 6.509, "eval_steps_per_second": 0.82, "step": 15500 }, { "epoch": 16.39, "learning_rate": 9.056553911205075e-06, "loss": 0.0107, "step": 15502 }, { "epoch": 16.39, "learning_rate": 9.051268498942918e-06, "loss": 0.0201, "step": 15504 }, { "epoch": 16.39, "learning_rate": 9.045983086680762e-06, "loss": 0.0449, "step": 15506 }, { "epoch": 16.39, "learning_rate": 9.040697674418605e-06, "loss": 0.0063, "step": 15508 }, { "epoch": 16.4, "learning_rate": 9.035412262156448e-06, "loss": 0.0025, "step": 15510 }, { "epoch": 16.4, "learning_rate": 9.030126849894292e-06, "loss": 0.0022, "step": 15512 }, { "epoch": 16.4, "learning_rate": 9.024841437632135e-06, "loss": 0.0365, "step": 15514 }, { "epoch": 16.4, "learning_rate": 9.019556025369979e-06, "loss": 0.0839, "step": 15516 }, { "epoch": 16.4, "learning_rate": 9.014270613107823e-06, "loss": 0.0562, "step": 15518 }, { "epoch": 16.41, "learning_rate": 9.008985200845666e-06, "loss": 0.0425, "step": 15520 }, { "epoch": 16.41, "learning_rate": 9.00369978858351e-06, "loss": 0.0228, "step": 15522 }, { "epoch": 16.41, "learning_rate": 8.998414376321355e-06, "loss": 0.0055, "step": 15524 }, { "epoch": 16.41, "learning_rate": 8.993128964059197e-06, "loss": 0.01, "step": 15526 }, { "epoch": 16.41, "learning_rate": 8.98784355179704e-06, "loss": 0.0198, "step": 15528 }, { "epoch": 16.42, "learning_rate": 8.982558139534884e-06, "loss": 0.0038, "step": 15530 }, { "epoch": 16.42, "learning_rate": 8.977272727272727e-06, "loss": 0.0086, "step": 15532 }, { "epoch": 16.42, "learning_rate": 8.971987315010571e-06, "loss": 0.0288, "step": 15534 }, { "epoch": 16.42, "learning_rate": 8.966701902748414e-06, "loss": 0.0007, "step": 15536 }, { "epoch": 16.42, "learning_rate": 8.961416490486259e-06, "loss": 0.0206, "step": 15538 }, { "epoch": 16.43, "learning_rate": 8.956131078224101e-06, "loss": 0.0066, "step": 15540 }, { "epoch": 16.43, "learning_rate": 8.950845665961946e-06, "loss": 0.0089, "step": 15542 }, { "epoch": 16.43, "learning_rate": 8.945560253699788e-06, "loss": 0.0103, "step": 15544 }, { "epoch": 16.43, "learning_rate": 8.940274841437633e-06, "loss": 0.0196, "step": 15546 }, { "epoch": 16.44, "learning_rate": 8.934989429175477e-06, "loss": 0.02, "step": 15548 }, { "epoch": 16.44, "learning_rate": 8.92970401691332e-06, "loss": 0.0585, "step": 15550 }, { "epoch": 16.44, "learning_rate": 8.924418604651164e-06, "loss": 0.0204, "step": 15552 }, { "epoch": 16.44, "learning_rate": 8.919133192389007e-06, "loss": 0.0195, "step": 15554 }, { "epoch": 16.44, "learning_rate": 8.913847780126851e-06, "loss": 0.0055, "step": 15556 }, { "epoch": 16.45, "learning_rate": 8.908562367864694e-06, "loss": 0.037, "step": 15558 }, { "epoch": 16.45, "learning_rate": 8.903276955602538e-06, "loss": 0.005, "step": 15560 }, { "epoch": 16.45, "learning_rate": 8.89799154334038e-06, "loss": 0.0041, "step": 15562 }, { "epoch": 16.45, "learning_rate": 8.892706131078223e-06, "loss": 0.0289, "step": 15564 }, { "epoch": 16.45, "learning_rate": 8.887420718816068e-06, "loss": 0.0018, "step": 15566 }, { "epoch": 16.46, "learning_rate": 8.88213530655391e-06, "loss": 0.0025, "step": 15568 }, { "epoch": 16.46, "learning_rate": 8.876849894291755e-06, "loss": 0.0317, "step": 15570 }, { "epoch": 16.46, "learning_rate": 8.8715644820296e-06, "loss": 0.0246, "step": 15572 }, { "epoch": 16.46, "learning_rate": 8.866279069767444e-06, "loss": 0.0013, "step": 15574 }, { "epoch": 16.47, "learning_rate": 8.860993657505286e-06, "loss": 0.0173, "step": 15576 }, { "epoch": 16.47, "learning_rate": 8.85570824524313e-06, "loss": 0.018, "step": 15578 }, { "epoch": 16.47, "learning_rate": 8.850422832980973e-06, "loss": 0.0055, "step": 15580 }, { "epoch": 16.47, "learning_rate": 8.845137420718816e-06, "loss": 0.0109, "step": 15582 }, { "epoch": 16.47, "learning_rate": 8.83985200845666e-06, "loss": 0.0013, "step": 15584 }, { "epoch": 16.48, "learning_rate": 8.834566596194503e-06, "loss": 0.0017, "step": 15586 }, { "epoch": 16.48, "learning_rate": 8.829281183932347e-06, "loss": 0.0039, "step": 15588 }, { "epoch": 16.48, "learning_rate": 8.82399577167019e-06, "loss": 0.002, "step": 15590 }, { "epoch": 16.48, "learning_rate": 8.818710359408034e-06, "loss": 0.0175, "step": 15592 }, { "epoch": 16.48, "learning_rate": 8.813424947145877e-06, "loss": 0.0164, "step": 15594 }, { "epoch": 16.49, "learning_rate": 8.808139534883721e-06, "loss": 0.0724, "step": 15596 }, { "epoch": 16.49, "learning_rate": 8.802854122621566e-06, "loss": 0.0191, "step": 15598 }, { "epoch": 16.49, "learning_rate": 8.797568710359408e-06, "loss": 0.053, "step": 15600 }, { "epoch": 16.49, "learning_rate": 8.792283298097253e-06, "loss": 0.0191, "step": 15602 }, { "epoch": 16.49, "learning_rate": 8.786997885835095e-06, "loss": 0.0064, "step": 15604 }, { "epoch": 16.5, "learning_rate": 8.78171247357294e-06, "loss": 0.0036, "step": 15606 }, { "epoch": 16.5, "learning_rate": 8.776427061310782e-06, "loss": 0.0846, "step": 15608 }, { "epoch": 16.5, "learning_rate": 8.771141649048627e-06, "loss": 0.0507, "step": 15610 }, { "epoch": 16.5, "learning_rate": 8.76585623678647e-06, "loss": 0.0054, "step": 15612 }, { "epoch": 16.51, "learning_rate": 8.760570824524314e-06, "loss": 0.0069, "step": 15614 }, { "epoch": 16.51, "learning_rate": 8.755285412262157e-06, "loss": 0.0042, "step": 15616 }, { "epoch": 16.51, "learning_rate": 8.75e-06, "loss": 0.0057, "step": 15618 }, { "epoch": 16.51, "learning_rate": 8.744714587737844e-06, "loss": 0.0052, "step": 15620 }, { "epoch": 16.51, "learning_rate": 8.739429175475688e-06, "loss": 0.0015, "step": 15622 }, { "epoch": 16.52, "learning_rate": 8.73414376321353e-06, "loss": 0.0027, "step": 15624 }, { "epoch": 16.52, "learning_rate": 8.728858350951375e-06, "loss": 0.0023, "step": 15626 }, { "epoch": 16.52, "learning_rate": 8.72357293868922e-06, "loss": 0.0057, "step": 15628 }, { "epoch": 16.52, "learning_rate": 8.718287526427062e-06, "loss": 0.0022, "step": 15630 }, { "epoch": 16.52, "learning_rate": 8.713002114164906e-06, "loss": 0.0113, "step": 15632 }, { "epoch": 16.53, "learning_rate": 8.707716701902749e-06, "loss": 0.0017, "step": 15634 }, { "epoch": 16.53, "learning_rate": 8.702431289640592e-06, "loss": 0.0328, "step": 15636 }, { "epoch": 16.53, "learning_rate": 8.697145877378436e-06, "loss": 0.0094, "step": 15638 }, { "epoch": 16.53, "learning_rate": 8.691860465116279e-06, "loss": 0.0201, "step": 15640 }, { "epoch": 16.53, "learning_rate": 8.686575052854123e-06, "loss": 0.0059, "step": 15642 }, { "epoch": 16.54, "learning_rate": 8.681289640591966e-06, "loss": 0.0061, "step": 15644 }, { "epoch": 16.54, "learning_rate": 8.67600422832981e-06, "loss": 0.0123, "step": 15646 }, { "epoch": 16.54, "learning_rate": 8.670718816067653e-06, "loss": 0.038, "step": 15648 }, { "epoch": 16.54, "learning_rate": 8.665433403805497e-06, "loss": 0.0219, "step": 15650 }, { "epoch": 16.55, "learning_rate": 8.660147991543342e-06, "loss": 0.0099, "step": 15652 }, { "epoch": 16.55, "learning_rate": 8.654862579281184e-06, "loss": 0.019, "step": 15654 }, { "epoch": 16.55, "learning_rate": 8.649577167019029e-06, "loss": 0.001, "step": 15656 }, { "epoch": 16.55, "learning_rate": 8.644291754756871e-06, "loss": 0.0327, "step": 15658 }, { "epoch": 16.55, "learning_rate": 8.639006342494716e-06, "loss": 0.0166, "step": 15660 }, { "epoch": 16.56, "learning_rate": 8.633720930232558e-06, "loss": 0.0084, "step": 15662 }, { "epoch": 16.56, "learning_rate": 8.628435517970403e-06, "loss": 0.0051, "step": 15664 }, { "epoch": 16.56, "learning_rate": 8.623150105708245e-06, "loss": 0.0303, "step": 15666 }, { "epoch": 16.56, "learning_rate": 8.617864693446088e-06, "loss": 0.0372, "step": 15668 }, { "epoch": 16.56, "learning_rate": 8.612579281183932e-06, "loss": 0.0384, "step": 15670 }, { "epoch": 16.57, "learning_rate": 8.607293868921775e-06, "loss": 0.0705, "step": 15672 }, { "epoch": 16.57, "learning_rate": 8.60200845665962e-06, "loss": 0.012, "step": 15674 }, { "epoch": 16.57, "learning_rate": 8.596723044397464e-06, "loss": 0.0019, "step": 15676 }, { "epoch": 16.57, "learning_rate": 8.591437632135308e-06, "loss": 0.0009, "step": 15678 }, { "epoch": 16.58, "learning_rate": 8.58615221987315e-06, "loss": 0.0035, "step": 15680 }, { "epoch": 16.58, "learning_rate": 8.580866807610995e-06, "loss": 0.0016, "step": 15682 }, { "epoch": 16.58, "learning_rate": 8.575581395348838e-06, "loss": 0.0076, "step": 15684 }, { "epoch": 16.58, "learning_rate": 8.57029598308668e-06, "loss": 0.0049, "step": 15686 }, { "epoch": 16.58, "learning_rate": 8.565010570824525e-06, "loss": 0.0144, "step": 15688 }, { "epoch": 16.59, "learning_rate": 8.559725158562368e-06, "loss": 0.0011, "step": 15690 }, { "epoch": 16.59, "learning_rate": 8.554439746300212e-06, "loss": 0.0017, "step": 15692 }, { "epoch": 16.59, "learning_rate": 8.549154334038055e-06, "loss": 0.0122, "step": 15694 }, { "epoch": 16.59, "learning_rate": 8.543868921775899e-06, "loss": 0.0038, "step": 15696 }, { "epoch": 16.59, "learning_rate": 8.538583509513742e-06, "loss": 0.0005, "step": 15698 }, { "epoch": 16.6, "learning_rate": 8.533298097251586e-06, "loss": 0.0341, "step": 15700 }, { "epoch": 16.6, "learning_rate": 8.52801268498943e-06, "loss": 0.0007, "step": 15702 }, { "epoch": 16.6, "learning_rate": 8.522727272727273e-06, "loss": 0.0677, "step": 15704 }, { "epoch": 16.6, "learning_rate": 8.517441860465117e-06, "loss": 0.0053, "step": 15706 }, { "epoch": 16.6, "learning_rate": 8.51215644820296e-06, "loss": 0.0009, "step": 15708 }, { "epoch": 16.61, "learning_rate": 8.506871035940804e-06, "loss": 0.0008, "step": 15710 }, { "epoch": 16.61, "learning_rate": 8.501585623678647e-06, "loss": 0.0008, "step": 15712 }, { "epoch": 16.61, "learning_rate": 8.496300211416491e-06, "loss": 0.0045, "step": 15714 }, { "epoch": 16.61, "learning_rate": 8.491014799154334e-06, "loss": 0.018, "step": 15716 }, { "epoch": 16.62, "learning_rate": 8.485729386892179e-06, "loss": 0.0387, "step": 15718 }, { "epoch": 16.62, "learning_rate": 8.480443974630021e-06, "loss": 0.0098, "step": 15720 }, { "epoch": 16.62, "learning_rate": 8.475158562367864e-06, "loss": 0.0036, "step": 15722 }, { "epoch": 16.62, "learning_rate": 8.469873150105708e-06, "loss": 0.0045, "step": 15724 }, { "epoch": 16.62, "learning_rate": 8.464587737843551e-06, "loss": 0.023, "step": 15726 }, { "epoch": 16.63, "learning_rate": 8.459302325581395e-06, "loss": 0.0013, "step": 15728 }, { "epoch": 16.63, "learning_rate": 8.45401691331924e-06, "loss": 0.0501, "step": 15730 }, { "epoch": 16.63, "learning_rate": 8.448731501057084e-06, "loss": 0.0244, "step": 15732 }, { "epoch": 16.63, "learning_rate": 8.443446088794927e-06, "loss": 0.003, "step": 15734 }, { "epoch": 16.63, "learning_rate": 8.438160676532771e-06, "loss": 0.0093, "step": 15736 }, { "epoch": 16.64, "learning_rate": 8.432875264270614e-06, "loss": 0.0012, "step": 15738 }, { "epoch": 16.64, "learning_rate": 8.427589852008456e-06, "loss": 0.0012, "step": 15740 }, { "epoch": 16.64, "learning_rate": 8.4223044397463e-06, "loss": 0.0007, "step": 15742 }, { "epoch": 16.64, "learning_rate": 8.417019027484143e-06, "loss": 0.003, "step": 15744 }, { "epoch": 16.64, "learning_rate": 8.411733615221988e-06, "loss": 0.0197, "step": 15746 }, { "epoch": 16.65, "learning_rate": 8.40644820295983e-06, "loss": 0.0114, "step": 15748 }, { "epoch": 16.65, "learning_rate": 8.401162790697675e-06, "loss": 0.0009, "step": 15750 }, { "epoch": 16.65, "learning_rate": 8.395877378435517e-06, "loss": 0.0244, "step": 15752 }, { "epoch": 16.65, "learning_rate": 8.390591966173362e-06, "loss": 0.0402, "step": 15754 }, { "epoch": 16.66, "learning_rate": 8.385306553911206e-06, "loss": 0.0028, "step": 15756 }, { "epoch": 16.66, "learning_rate": 8.380021141649049e-06, "loss": 0.0157, "step": 15758 }, { "epoch": 16.66, "learning_rate": 8.374735729386893e-06, "loss": 0.0052, "step": 15760 }, { "epoch": 16.66, "learning_rate": 8.369450317124736e-06, "loss": 0.0008, "step": 15762 }, { "epoch": 16.66, "learning_rate": 8.36416490486258e-06, "loss": 0.0569, "step": 15764 }, { "epoch": 16.67, "learning_rate": 8.358879492600423e-06, "loss": 0.0022, "step": 15766 }, { "epoch": 16.67, "learning_rate": 8.353594080338267e-06, "loss": 0.0005, "step": 15768 }, { "epoch": 16.67, "learning_rate": 8.34830866807611e-06, "loss": 0.0443, "step": 15770 }, { "epoch": 16.67, "learning_rate": 8.343023255813954e-06, "loss": 0.0037, "step": 15772 }, { "epoch": 16.67, "learning_rate": 8.337737843551797e-06, "loss": 0.0637, "step": 15774 }, { "epoch": 16.68, "learning_rate": 8.33245243128964e-06, "loss": 0.0059, "step": 15776 }, { "epoch": 16.68, "learning_rate": 8.327167019027484e-06, "loss": 0.0065, "step": 15778 }, { "epoch": 16.68, "learning_rate": 8.321881606765328e-06, "loss": 0.0043, "step": 15780 }, { "epoch": 16.68, "learning_rate": 8.316596194503173e-06, "loss": 0.0005, "step": 15782 }, { "epoch": 16.68, "learning_rate": 8.311310782241015e-06, "loss": 0.0015, "step": 15784 }, { "epoch": 16.69, "learning_rate": 8.30602536997886e-06, "loss": 0.0041, "step": 15786 }, { "epoch": 16.69, "learning_rate": 8.300739957716703e-06, "loss": 0.0269, "step": 15788 }, { "epoch": 16.69, "learning_rate": 8.295454545454547e-06, "loss": 0.0046, "step": 15790 }, { "epoch": 16.69, "learning_rate": 8.29016913319239e-06, "loss": 0.029, "step": 15792 }, { "epoch": 16.7, "learning_rate": 8.284883720930232e-06, "loss": 0.0044, "step": 15794 }, { "epoch": 16.7, "learning_rate": 8.279598308668077e-06, "loss": 0.0115, "step": 15796 }, { "epoch": 16.7, "learning_rate": 8.27431289640592e-06, "loss": 0.0472, "step": 15798 }, { "epoch": 16.7, "learning_rate": 8.269027484143764e-06, "loss": 0.0076, "step": 15800 }, { "epoch": 16.7, "learning_rate": 8.263742071881606e-06, "loss": 0.0065, "step": 15802 }, { "epoch": 16.71, "learning_rate": 8.25845665961945e-06, "loss": 0.0278, "step": 15804 }, { "epoch": 16.71, "learning_rate": 8.253171247357295e-06, "loss": 0.0006, "step": 15806 }, { "epoch": 16.71, "learning_rate": 8.247885835095138e-06, "loss": 0.0354, "step": 15808 }, { "epoch": 16.71, "learning_rate": 8.242600422832982e-06, "loss": 0.0012, "step": 15810 }, { "epoch": 16.71, "learning_rate": 8.237315010570825e-06, "loss": 0.0095, "step": 15812 }, { "epoch": 16.72, "learning_rate": 8.232029598308669e-06, "loss": 0.0546, "step": 15814 }, { "epoch": 16.72, "learning_rate": 8.226744186046512e-06, "loss": 0.0153, "step": 15816 }, { "epoch": 16.72, "learning_rate": 8.221458773784356e-06, "loss": 0.0081, "step": 15818 }, { "epoch": 16.72, "learning_rate": 8.216173361522199e-06, "loss": 0.0276, "step": 15820 }, { "epoch": 16.73, "learning_rate": 8.210887949260043e-06, "loss": 0.0224, "step": 15822 }, { "epoch": 16.73, "learning_rate": 8.205602536997886e-06, "loss": 0.0207, "step": 15824 }, { "epoch": 16.73, "learning_rate": 8.200317124735729e-06, "loss": 0.0153, "step": 15826 }, { "epoch": 16.73, "learning_rate": 8.195031712473573e-06, "loss": 0.0632, "step": 15828 }, { "epoch": 16.73, "learning_rate": 8.189746300211416e-06, "loss": 0.0229, "step": 15830 }, { "epoch": 16.74, "learning_rate": 8.18446088794926e-06, "loss": 0.004, "step": 15832 }, { "epoch": 16.74, "learning_rate": 8.179175475687104e-06, "loss": 0.0379, "step": 15834 }, { "epoch": 16.74, "learning_rate": 8.173890063424949e-06, "loss": 0.0025, "step": 15836 }, { "epoch": 16.74, "learning_rate": 8.168604651162791e-06, "loss": 0.0197, "step": 15838 }, { "epoch": 16.74, "learning_rate": 8.163319238900636e-06, "loss": 0.0026, "step": 15840 }, { "epoch": 16.75, "learning_rate": 8.158033826638478e-06, "loss": 0.0005, "step": 15842 }, { "epoch": 16.75, "learning_rate": 8.152748414376321e-06, "loss": 0.0158, "step": 15844 }, { "epoch": 16.75, "learning_rate": 8.147463002114165e-06, "loss": 0.0148, "step": 15846 }, { "epoch": 16.75, "learning_rate": 8.142177589852008e-06, "loss": 0.025, "step": 15848 }, { "epoch": 16.75, "learning_rate": 8.136892177589852e-06, "loss": 0.0016, "step": 15850 }, { "epoch": 16.76, "learning_rate": 8.131606765327695e-06, "loss": 0.0008, "step": 15852 }, { "epoch": 16.76, "learning_rate": 8.12632135306554e-06, "loss": 0.0041, "step": 15854 }, { "epoch": 16.76, "learning_rate": 8.121035940803382e-06, "loss": 0.0118, "step": 15856 }, { "epoch": 16.76, "learning_rate": 8.115750528541227e-06, "loss": 0.0253, "step": 15858 }, { "epoch": 16.77, "learning_rate": 8.110465116279071e-06, "loss": 0.0133, "step": 15860 }, { "epoch": 16.77, "learning_rate": 8.105179704016914e-06, "loss": 0.0063, "step": 15862 }, { "epoch": 16.77, "learning_rate": 8.099894291754758e-06, "loss": 0.0136, "step": 15864 }, { "epoch": 16.77, "learning_rate": 8.0946088794926e-06, "loss": 0.003, "step": 15866 }, { "epoch": 16.77, "learning_rate": 8.089323467230445e-06, "loss": 0.0086, "step": 15868 }, { "epoch": 16.78, "learning_rate": 8.084038054968288e-06, "loss": 0.003, "step": 15870 }, { "epoch": 16.78, "learning_rate": 8.078752642706132e-06, "loss": 0.0041, "step": 15872 }, { "epoch": 16.78, "learning_rate": 8.073467230443975e-06, "loss": 0.0026, "step": 15874 }, { "epoch": 16.78, "learning_rate": 8.068181818181819e-06, "loss": 0.0174, "step": 15876 }, { "epoch": 16.78, "learning_rate": 8.062896405919662e-06, "loss": 0.0183, "step": 15878 }, { "epoch": 16.79, "learning_rate": 8.057610993657504e-06, "loss": 0.005, "step": 15880 }, { "epoch": 16.79, "learning_rate": 8.052325581395349e-06, "loss": 0.0087, "step": 15882 }, { "epoch": 16.79, "learning_rate": 8.047040169133193e-06, "loss": 0.0174, "step": 15884 }, { "epoch": 16.79, "learning_rate": 8.041754756871037e-06, "loss": 0.0063, "step": 15886 }, { "epoch": 16.79, "learning_rate": 8.03646934460888e-06, "loss": 0.0044, "step": 15888 }, { "epoch": 16.8, "learning_rate": 8.031183932346724e-06, "loss": 0.0145, "step": 15890 }, { "epoch": 16.8, "learning_rate": 8.025898520084567e-06, "loss": 0.0314, "step": 15892 }, { "epoch": 16.8, "learning_rate": 8.020613107822412e-06, "loss": 0.0004, "step": 15894 }, { "epoch": 16.8, "learning_rate": 8.015327695560254e-06, "loss": 0.0327, "step": 15896 }, { "epoch": 16.81, "learning_rate": 8.010042283298097e-06, "loss": 0.0079, "step": 15898 }, { "epoch": 16.81, "learning_rate": 8.004756871035941e-06, "loss": 0.0295, "step": 15900 }, { "epoch": 16.81, "learning_rate": 7.999471458773784e-06, "loss": 0.0153, "step": 15902 }, { "epoch": 16.81, "learning_rate": 7.994186046511628e-06, "loss": 0.0069, "step": 15904 }, { "epoch": 16.81, "learning_rate": 7.988900634249471e-06, "loss": 0.001, "step": 15906 }, { "epoch": 16.82, "learning_rate": 7.983615221987315e-06, "loss": 0.0023, "step": 15908 }, { "epoch": 16.82, "learning_rate": 7.978329809725158e-06, "loss": 0.0239, "step": 15910 }, { "epoch": 16.82, "learning_rate": 7.973044397463002e-06, "loss": 0.0301, "step": 15912 }, { "epoch": 16.82, "learning_rate": 7.967758985200847e-06, "loss": 0.0061, "step": 15914 }, { "epoch": 16.82, "learning_rate": 7.96247357293869e-06, "loss": 0.0106, "step": 15916 }, { "epoch": 16.83, "learning_rate": 7.957188160676534e-06, "loss": 0.0008, "step": 15918 }, { "epoch": 16.83, "learning_rate": 7.951902748414376e-06, "loss": 0.0302, "step": 15920 }, { "epoch": 16.83, "learning_rate": 7.94661733615222e-06, "loss": 0.014, "step": 15922 }, { "epoch": 16.83, "learning_rate": 7.941331923890063e-06, "loss": 0.007, "step": 15924 }, { "epoch": 16.84, "learning_rate": 7.936046511627908e-06, "loss": 0.0229, "step": 15926 }, { "epoch": 16.84, "learning_rate": 7.93076109936575e-06, "loss": 0.0071, "step": 15928 }, { "epoch": 16.84, "learning_rate": 7.925475687103595e-06, "loss": 0.0022, "step": 15930 }, { "epoch": 16.84, "learning_rate": 7.920190274841438e-06, "loss": 0.0043, "step": 15932 }, { "epoch": 16.84, "learning_rate": 7.91490486257928e-06, "loss": 0.023, "step": 15934 }, { "epoch": 16.85, "learning_rate": 7.909619450317125e-06, "loss": 0.0016, "step": 15936 }, { "epoch": 16.85, "learning_rate": 7.904334038054969e-06, "loss": 0.0009, "step": 15938 }, { "epoch": 16.85, "learning_rate": 7.899048625792813e-06, "loss": 0.0116, "step": 15940 }, { "epoch": 16.85, "learning_rate": 7.893763213530656e-06, "loss": 0.0317, "step": 15942 }, { "epoch": 16.85, "learning_rate": 7.8884778012685e-06, "loss": 0.0041, "step": 15944 }, { "epoch": 16.86, "learning_rate": 7.883192389006343e-06, "loss": 0.0323, "step": 15946 }, { "epoch": 16.86, "learning_rate": 7.877906976744187e-06, "loss": 0.0025, "step": 15948 }, { "epoch": 16.86, "learning_rate": 7.87262156448203e-06, "loss": 0.0025, "step": 15950 }, { "epoch": 16.86, "learning_rate": 7.867336152219873e-06, "loss": 0.0026, "step": 15952 }, { "epoch": 16.86, "learning_rate": 7.862050739957717e-06, "loss": 0.0046, "step": 15954 }, { "epoch": 16.87, "learning_rate": 7.85676532769556e-06, "loss": 0.0028, "step": 15956 }, { "epoch": 16.87, "learning_rate": 7.851479915433404e-06, "loss": 0.0106, "step": 15958 }, { "epoch": 16.87, "learning_rate": 7.846194503171247e-06, "loss": 0.0023, "step": 15960 }, { "epoch": 16.87, "learning_rate": 7.840909090909091e-06, "loss": 0.0007, "step": 15962 }, { "epoch": 16.88, "learning_rate": 7.835623678646936e-06, "loss": 0.0119, "step": 15964 }, { "epoch": 16.88, "learning_rate": 7.83033826638478e-06, "loss": 0.0067, "step": 15966 }, { "epoch": 16.88, "learning_rate": 7.825052854122623e-06, "loss": 0.0019, "step": 15968 }, { "epoch": 16.88, "learning_rate": 7.819767441860465e-06, "loss": 0.0025, "step": 15970 }, { "epoch": 16.88, "learning_rate": 7.81448202959831e-06, "loss": 0.007, "step": 15972 }, { "epoch": 16.89, "learning_rate": 7.809196617336152e-06, "loss": 0.0025, "step": 15974 }, { "epoch": 16.89, "learning_rate": 7.803911205073997e-06, "loss": 0.0012, "step": 15976 }, { "epoch": 16.89, "learning_rate": 7.79862579281184e-06, "loss": 0.0012, "step": 15978 }, { "epoch": 16.89, "learning_rate": 7.793340380549684e-06, "loss": 0.005, "step": 15980 }, { "epoch": 16.89, "learning_rate": 7.788054968287526e-06, "loss": 0.0006, "step": 15982 }, { "epoch": 16.9, "learning_rate": 7.782769556025369e-06, "loss": 0.0004, "step": 15984 }, { "epoch": 16.9, "learning_rate": 7.777484143763213e-06, "loss": 0.002, "step": 15986 }, { "epoch": 16.9, "learning_rate": 7.772198731501058e-06, "loss": 0.0265, "step": 15988 }, { "epoch": 16.9, "learning_rate": 7.766913319238902e-06, "loss": 0.0074, "step": 15990 }, { "epoch": 16.9, "learning_rate": 7.761627906976745e-06, "loss": 0.0025, "step": 15992 }, { "epoch": 16.91, "learning_rate": 7.756342494714589e-06, "loss": 0.0256, "step": 15994 }, { "epoch": 16.91, "learning_rate": 7.751057082452432e-06, "loss": 0.0007, "step": 15996 }, { "epoch": 16.91, "learning_rate": 7.745771670190276e-06, "loss": 0.0028, "step": 15998 }, { "epoch": 16.91, "learning_rate": 7.740486257928119e-06, "loss": 0.0029, "step": 16000 }, { "epoch": 16.91, "eval_cer": 0.06184098033627814, "eval_loss": 0.898295521736145, "eval_runtime": 124.9218, "eval_samples_per_second": 6.732, "eval_steps_per_second": 0.849, "step": 16000 }, { "epoch": 16.92, "learning_rate": 7.735200845665962e-06, "loss": 0.0036, "step": 16002 }, { "epoch": 16.92, "learning_rate": 7.729915433403806e-06, "loss": 0.0021, "step": 16004 }, { "epoch": 16.92, "learning_rate": 7.724630021141649e-06, "loss": 0.0006, "step": 16006 }, { "epoch": 16.92, "learning_rate": 7.719344608879493e-06, "loss": 0.0126, "step": 16008 }, { "epoch": 16.92, "learning_rate": 7.714059196617336e-06, "loss": 0.0391, "step": 16010 }, { "epoch": 16.93, "learning_rate": 7.70877378435518e-06, "loss": 0.0153, "step": 16012 }, { "epoch": 16.93, "learning_rate": 7.703488372093023e-06, "loss": 0.0036, "step": 16014 }, { "epoch": 16.93, "learning_rate": 7.698202959830867e-06, "loss": 0.0109, "step": 16016 }, { "epoch": 16.93, "learning_rate": 7.692917547568711e-06, "loss": 0.0096, "step": 16018 }, { "epoch": 16.93, "learning_rate": 7.687632135306554e-06, "loss": 0.0146, "step": 16020 }, { "epoch": 16.94, "learning_rate": 7.682346723044398e-06, "loss": 0.0411, "step": 16022 }, { "epoch": 16.94, "learning_rate": 7.677061310782241e-06, "loss": 0.0046, "step": 16024 }, { "epoch": 16.94, "learning_rate": 7.671775898520085e-06, "loss": 0.0061, "step": 16026 }, { "epoch": 16.94, "learning_rate": 7.666490486257928e-06, "loss": 0.0031, "step": 16028 }, { "epoch": 16.95, "learning_rate": 7.661205073995772e-06, "loss": 0.0096, "step": 16030 }, { "epoch": 16.95, "learning_rate": 7.655919661733615e-06, "loss": 0.0065, "step": 16032 }, { "epoch": 16.95, "learning_rate": 7.65063424947146e-06, "loss": 0.0007, "step": 16034 }, { "epoch": 16.95, "learning_rate": 7.645348837209302e-06, "loss": 0.0067, "step": 16036 }, { "epoch": 16.95, "learning_rate": 7.640063424947145e-06, "loss": 0.0016, "step": 16038 }, { "epoch": 16.96, "learning_rate": 7.63477801268499e-06, "loss": 0.0039, "step": 16040 }, { "epoch": 16.96, "learning_rate": 7.629492600422834e-06, "loss": 0.017, "step": 16042 }, { "epoch": 16.96, "learning_rate": 7.624207188160677e-06, "loss": 0.004, "step": 16044 }, { "epoch": 16.96, "learning_rate": 7.61892177589852e-06, "loss": 0.0162, "step": 16046 }, { "epoch": 16.96, "learning_rate": 7.613636363636364e-06, "loss": 0.0003, "step": 16048 }, { "epoch": 16.97, "learning_rate": 7.608350951374208e-06, "loss": 0.0022, "step": 16050 }, { "epoch": 16.97, "learning_rate": 7.603065539112052e-06, "loss": 0.0284, "step": 16052 }, { "epoch": 16.97, "learning_rate": 7.597780126849895e-06, "loss": 0.0081, "step": 16054 }, { "epoch": 16.97, "learning_rate": 7.592494714587737e-06, "loss": 0.0081, "step": 16056 }, { "epoch": 16.97, "learning_rate": 7.587209302325582e-06, "loss": 0.0119, "step": 16058 }, { "epoch": 16.98, "learning_rate": 7.581923890063425e-06, "loss": 0.0114, "step": 16060 }, { "epoch": 16.98, "learning_rate": 7.576638477801269e-06, "loss": 0.0038, "step": 16062 }, { "epoch": 16.98, "learning_rate": 7.571353065539112e-06, "loss": 0.0042, "step": 16064 }, { "epoch": 16.98, "learning_rate": 7.566067653276957e-06, "loss": 0.0329, "step": 16066 }, { "epoch": 16.99, "learning_rate": 7.560782241014799e-06, "loss": 0.0011, "step": 16068 }, { "epoch": 16.99, "learning_rate": 7.555496828752644e-06, "loss": 0.0017, "step": 16070 }, { "epoch": 16.99, "learning_rate": 7.550211416490486e-06, "loss": 0.0037, "step": 16072 }, { "epoch": 16.99, "learning_rate": 7.54492600422833e-06, "loss": 0.0032, "step": 16074 }, { "epoch": 16.99, "learning_rate": 7.539640591966174e-06, "loss": 0.0028, "step": 16076 }, { "epoch": 17.0, "learning_rate": 7.534355179704017e-06, "loss": 0.0038, "step": 16078 }, { "epoch": 17.0, "learning_rate": 7.529069767441861e-06, "loss": 0.026, "step": 16080 }, { "epoch": 17.0, "learning_rate": 7.523784355179704e-06, "loss": 0.0007, "step": 16082 }, { "epoch": 17.0, "learning_rate": 7.518498942917548e-06, "loss": 0.001, "step": 16084 }, { "epoch": 17.0, "learning_rate": 7.513213530655391e-06, "loss": 0.021, "step": 16086 }, { "epoch": 17.01, "learning_rate": 7.507928118393235e-06, "loss": 0.0175, "step": 16088 }, { "epoch": 17.01, "learning_rate": 7.502642706131079e-06, "loss": 0.0011, "step": 16090 }, { "epoch": 17.01, "learning_rate": 7.4973572938689215e-06, "loss": 0.0009, "step": 16092 }, { "epoch": 17.01, "learning_rate": 7.492071881606766e-06, "loss": 0.0044, "step": 16094 }, { "epoch": 17.01, "learning_rate": 7.4867864693446086e-06, "loss": 0.0152, "step": 16096 }, { "epoch": 17.02, "learning_rate": 7.481501057082453e-06, "loss": 0.0012, "step": 16098 }, { "epoch": 17.02, "learning_rate": 7.4762156448202964e-06, "loss": 0.0022, "step": 16100 }, { "epoch": 17.02, "learning_rate": 7.470930232558141e-06, "loss": 0.0017, "step": 16102 }, { "epoch": 17.02, "learning_rate": 7.4656448202959835e-06, "loss": 0.0019, "step": 16104 }, { "epoch": 17.03, "learning_rate": 7.460359408033828e-06, "loss": 0.0055, "step": 16106 }, { "epoch": 17.03, "learning_rate": 7.4550739957716705e-06, "loss": 0.0068, "step": 16108 }, { "epoch": 17.03, "learning_rate": 7.449788583509513e-06, "loss": 0.0033, "step": 16110 }, { "epoch": 17.03, "learning_rate": 7.4445031712473576e-06, "loss": 0.0175, "step": 16112 }, { "epoch": 17.03, "learning_rate": 7.439217758985201e-06, "loss": 0.0005, "step": 16114 }, { "epoch": 17.04, "learning_rate": 7.4339323467230454e-06, "loss": 0.0096, "step": 16116 }, { "epoch": 17.04, "learning_rate": 7.428646934460888e-06, "loss": 0.004, "step": 16118 }, { "epoch": 17.04, "learning_rate": 7.4233615221987325e-06, "loss": 0.0256, "step": 16120 }, { "epoch": 17.04, "learning_rate": 7.418076109936575e-06, "loss": 0.013, "step": 16122 }, { "epoch": 17.04, "learning_rate": 7.4127906976744195e-06, "loss": 0.0216, "step": 16124 }, { "epoch": 17.05, "learning_rate": 7.407505285412262e-06, "loss": 0.0032, "step": 16126 }, { "epoch": 17.05, "learning_rate": 7.402219873150106e-06, "loss": 0.0082, "step": 16128 }, { "epoch": 17.05, "learning_rate": 7.39693446088795e-06, "loss": 0.0007, "step": 16130 }, { "epoch": 17.05, "learning_rate": 7.391649048625793e-06, "loss": 0.0019, "step": 16132 }, { "epoch": 17.05, "learning_rate": 7.386363636363637e-06, "loss": 0.0007, "step": 16134 }, { "epoch": 17.06, "learning_rate": 7.38107822410148e-06, "loss": 0.0008, "step": 16136 }, { "epoch": 17.06, "learning_rate": 7.375792811839324e-06, "loss": 0.0093, "step": 16138 }, { "epoch": 17.06, "learning_rate": 7.370507399577168e-06, "loss": 0.0072, "step": 16140 }, { "epoch": 17.06, "learning_rate": 7.36522198731501e-06, "loss": 0.0097, "step": 16142 }, { "epoch": 17.07, "learning_rate": 7.359936575052855e-06, "loss": 0.0028, "step": 16144 }, { "epoch": 17.07, "learning_rate": 7.354651162790697e-06, "loss": 0.0212, "step": 16146 }, { "epoch": 17.07, "learning_rate": 7.349365750528542e-06, "loss": 0.0076, "step": 16148 }, { "epoch": 17.07, "learning_rate": 7.344080338266384e-06, "loss": 0.0006, "step": 16150 }, { "epoch": 17.07, "learning_rate": 7.338794926004229e-06, "loss": 0.0008, "step": 16152 }, { "epoch": 17.08, "learning_rate": 7.333509513742072e-06, "loss": 0.0017, "step": 16154 }, { "epoch": 17.08, "learning_rate": 7.328224101479917e-06, "loss": 0.0008, "step": 16156 }, { "epoch": 17.08, "learning_rate": 7.322938689217759e-06, "loss": 0.005, "step": 16158 }, { "epoch": 17.08, "learning_rate": 7.317653276955602e-06, "loss": 0.0022, "step": 16160 }, { "epoch": 17.08, "learning_rate": 7.312367864693446e-06, "loss": 0.001, "step": 16162 }, { "epoch": 17.09, "learning_rate": 7.30708245243129e-06, "loss": 0.0022, "step": 16164 }, { "epoch": 17.09, "learning_rate": 7.301797040169133e-06, "loss": 0.0132, "step": 16166 }, { "epoch": 17.09, "learning_rate": 7.296511627906977e-06, "loss": 0.0175, "step": 16168 }, { "epoch": 17.09, "learning_rate": 7.291226215644821e-06, "loss": 0.0066, "step": 16170 }, { "epoch": 17.1, "learning_rate": 7.285940803382664e-06, "loss": 0.0012, "step": 16172 }, { "epoch": 17.1, "learning_rate": 7.280655391120508e-06, "loss": 0.0157, "step": 16174 }, { "epoch": 17.1, "learning_rate": 7.275369978858351e-06, "loss": 0.0004, "step": 16176 }, { "epoch": 17.1, "learning_rate": 7.2700845665961945e-06, "loss": 0.0009, "step": 16178 }, { "epoch": 17.1, "learning_rate": 7.264799154334039e-06, "loss": 0.0012, "step": 16180 }, { "epoch": 17.11, "learning_rate": 7.2595137420718815e-06, "loss": 0.0054, "step": 16182 }, { "epoch": 17.11, "learning_rate": 7.254228329809726e-06, "loss": 0.0018, "step": 16184 }, { "epoch": 17.11, "learning_rate": 7.248942917547569e-06, "loss": 0.0034, "step": 16186 }, { "epoch": 17.11, "learning_rate": 7.243657505285413e-06, "loss": 0.0015, "step": 16188 }, { "epoch": 17.11, "learning_rate": 7.238372093023256e-06, "loss": 0.0046, "step": 16190 }, { "epoch": 17.12, "learning_rate": 7.2330866807611e-06, "loss": 0.0049, "step": 16192 }, { "epoch": 17.12, "learning_rate": 7.2278012684989435e-06, "loss": 0.0222, "step": 16194 }, { "epoch": 17.12, "learning_rate": 7.222515856236786e-06, "loss": 0.0074, "step": 16196 }, { "epoch": 17.12, "learning_rate": 7.2172304439746305e-06, "loss": 0.0017, "step": 16198 }, { "epoch": 17.12, "learning_rate": 7.211945031712473e-06, "loss": 0.0007, "step": 16200 }, { "epoch": 17.13, "learning_rate": 7.2066596194503176e-06, "loss": 0.0201, "step": 16202 }, { "epoch": 17.13, "learning_rate": 7.201374207188161e-06, "loss": 0.0021, "step": 16204 }, { "epoch": 17.13, "learning_rate": 7.196088794926005e-06, "loss": 0.0318, "step": 16206 }, { "epoch": 17.13, "learning_rate": 7.190803382663848e-06, "loss": 0.0256, "step": 16208 }, { "epoch": 17.14, "learning_rate": 7.1855179704016925e-06, "loss": 0.0006, "step": 16210 }, { "epoch": 17.14, "learning_rate": 7.180232558139535e-06, "loss": 0.0008, "step": 16212 }, { "epoch": 17.14, "learning_rate": 7.174947145877378e-06, "loss": 0.0055, "step": 16214 }, { "epoch": 17.14, "learning_rate": 7.169661733615222e-06, "loss": 0.0084, "step": 16216 }, { "epoch": 17.14, "learning_rate": 7.164376321353066e-06, "loss": 0.0011, "step": 16218 }, { "epoch": 17.15, "learning_rate": 7.15909090909091e-06, "loss": 0.0011, "step": 16220 }, { "epoch": 17.15, "learning_rate": 7.153805496828753e-06, "loss": 0.0394, "step": 16222 }, { "epoch": 17.15, "learning_rate": 7.148520084566597e-06, "loss": 0.0011, "step": 16224 }, { "epoch": 17.15, "learning_rate": 7.14323467230444e-06, "loss": 0.0027, "step": 16226 }, { "epoch": 17.15, "learning_rate": 7.137949260042284e-06, "loss": 0.0044, "step": 16228 }, { "epoch": 17.16, "learning_rate": 7.132663847780127e-06, "loss": 0.0014, "step": 16230 }, { "epoch": 17.16, "learning_rate": 7.12737843551797e-06, "loss": 0.0003, "step": 16232 }, { "epoch": 17.16, "learning_rate": 7.122093023255815e-06, "loss": 0.0099, "step": 16234 }, { "epoch": 17.16, "learning_rate": 7.116807610993657e-06, "loss": 0.0033, "step": 16236 }, { "epoch": 17.16, "learning_rate": 7.111522198731502e-06, "loss": 0.0008, "step": 16238 }, { "epoch": 17.17, "learning_rate": 7.106236786469344e-06, "loss": 0.0005, "step": 16240 }, { "epoch": 17.17, "learning_rate": 7.100951374207189e-06, "loss": 0.0012, "step": 16242 }, { "epoch": 17.17, "learning_rate": 7.095665961945032e-06, "loss": 0.0004, "step": 16244 }, { "epoch": 17.17, "learning_rate": 7.090380549682876e-06, "loss": 0.0134, "step": 16246 }, { "epoch": 17.18, "learning_rate": 7.085095137420719e-06, "loss": 0.0119, "step": 16248 }, { "epoch": 17.18, "learning_rate": 7.079809725158562e-06, "loss": 0.0072, "step": 16250 }, { "epoch": 17.18, "learning_rate": 7.074524312896406e-06, "loss": 0.0175, "step": 16252 }, { "epoch": 17.18, "learning_rate": 7.069238900634249e-06, "loss": 0.0004, "step": 16254 }, { "epoch": 17.18, "learning_rate": 7.063953488372093e-06, "loss": 0.0064, "step": 16256 }, { "epoch": 17.19, "learning_rate": 7.058668076109937e-06, "loss": 0.0226, "step": 16258 }, { "epoch": 17.19, "learning_rate": 7.053382663847781e-06, "loss": 0.0171, "step": 16260 }, { "epoch": 17.19, "learning_rate": 7.048097251585624e-06, "loss": 0.0052, "step": 16262 }, { "epoch": 17.19, "learning_rate": 7.042811839323468e-06, "loss": 0.0022, "step": 16264 }, { "epoch": 17.19, "learning_rate": 7.037526427061311e-06, "loss": 0.0127, "step": 16266 }, { "epoch": 17.2, "learning_rate": 7.032241014799154e-06, "loss": 0.0005, "step": 16268 }, { "epoch": 17.2, "learning_rate": 7.026955602536998e-06, "loss": 0.0222, "step": 16270 }, { "epoch": 17.2, "learning_rate": 7.0216701902748416e-06, "loss": 0.0011, "step": 16272 }, { "epoch": 17.2, "learning_rate": 7.016384778012686e-06, "loss": 0.0004, "step": 16274 }, { "epoch": 17.21, "learning_rate": 7.011099365750529e-06, "loss": 0.0028, "step": 16276 }, { "epoch": 17.21, "learning_rate": 7.005813953488373e-06, "loss": 0.0113, "step": 16278 }, { "epoch": 17.21, "learning_rate": 7.000528541226216e-06, "loss": 0.0286, "step": 16280 }, { "epoch": 17.21, "learning_rate": 6.99524312896406e-06, "loss": 0.0013, "step": 16282 }, { "epoch": 17.21, "learning_rate": 6.9899577167019035e-06, "loss": 0.0129, "step": 16284 }, { "epoch": 17.22, "learning_rate": 6.984672304439746e-06, "loss": 0.0817, "step": 16286 }, { "epoch": 17.22, "learning_rate": 6.9793868921775905e-06, "loss": 0.0009, "step": 16288 }, { "epoch": 17.22, "learning_rate": 6.974101479915433e-06, "loss": 0.0052, "step": 16290 }, { "epoch": 17.22, "learning_rate": 6.968816067653278e-06, "loss": 0.0012, "step": 16292 }, { "epoch": 17.22, "learning_rate": 6.96353065539112e-06, "loss": 0.014, "step": 16294 }, { "epoch": 17.23, "learning_rate": 6.958245243128965e-06, "loss": 0.0027, "step": 16296 }, { "epoch": 17.23, "learning_rate": 6.952959830866808e-06, "loss": 0.0091, "step": 16298 }, { "epoch": 17.23, "learning_rate": 6.9476744186046525e-06, "loss": 0.0008, "step": 16300 }, { "epoch": 17.23, "learning_rate": 6.942389006342495e-06, "loss": 0.0025, "step": 16302 }, { "epoch": 17.23, "learning_rate": 6.937103594080338e-06, "loss": 0.0054, "step": 16304 }, { "epoch": 17.24, "learning_rate": 6.931818181818182e-06, "loss": 0.0019, "step": 16306 }, { "epoch": 17.24, "learning_rate": 6.926532769556026e-06, "loss": 0.0156, "step": 16308 }, { "epoch": 17.24, "learning_rate": 6.921247357293869e-06, "loss": 0.0007, "step": 16310 }, { "epoch": 17.24, "learning_rate": 6.915961945031713e-06, "loss": 0.0075, "step": 16312 }, { "epoch": 17.25, "learning_rate": 6.910676532769557e-06, "loss": 0.0142, "step": 16314 }, { "epoch": 17.25, "learning_rate": 6.9053911205074e-06, "loss": 0.0168, "step": 16316 }, { "epoch": 17.25, "learning_rate": 6.9001057082452425e-06, "loss": 0.0031, "step": 16318 }, { "epoch": 17.25, "learning_rate": 6.894820295983087e-06, "loss": 0.002, "step": 16320 }, { "epoch": 17.25, "learning_rate": 6.88953488372093e-06, "loss": 0.0541, "step": 16322 }, { "epoch": 17.26, "learning_rate": 6.884249471458775e-06, "loss": 0.0205, "step": 16324 }, { "epoch": 17.26, "learning_rate": 6.878964059196617e-06, "loss": 0.0156, "step": 16326 }, { "epoch": 17.26, "learning_rate": 6.873678646934462e-06, "loss": 0.011, "step": 16328 }, { "epoch": 17.26, "learning_rate": 6.8683932346723044e-06, "loss": 0.0022, "step": 16330 }, { "epoch": 17.26, "learning_rate": 6.863107822410149e-06, "loss": 0.0043, "step": 16332 }, { "epoch": 17.27, "learning_rate": 6.8578224101479915e-06, "loss": 0.0053, "step": 16334 }, { "epoch": 17.27, "learning_rate": 6.852536997885835e-06, "loss": 0.02, "step": 16336 }, { "epoch": 17.27, "learning_rate": 6.847251585623679e-06, "loss": 0.0137, "step": 16338 }, { "epoch": 17.27, "learning_rate": 6.841966173361522e-06, "loss": 0.0012, "step": 16340 }, { "epoch": 17.27, "learning_rate": 6.836680761099366e-06, "loss": 0.0009, "step": 16342 }, { "epoch": 17.28, "learning_rate": 6.831395348837209e-06, "loss": 0.0044, "step": 16344 }, { "epoch": 17.28, "learning_rate": 6.8261099365750534e-06, "loss": 0.0085, "step": 16346 }, { "epoch": 17.28, "learning_rate": 6.820824524312897e-06, "loss": 0.0034, "step": 16348 }, { "epoch": 17.28, "learning_rate": 6.8155391120507405e-06, "loss": 0.0038, "step": 16350 }, { "epoch": 17.29, "learning_rate": 6.810253699788584e-06, "loss": 0.0003, "step": 16352 }, { "epoch": 17.29, "learning_rate": 6.804968287526427e-06, "loss": 0.007, "step": 16354 }, { "epoch": 17.29, "learning_rate": 6.799682875264271e-06, "loss": 0.0029, "step": 16356 }, { "epoch": 17.29, "learning_rate": 6.794397463002114e-06, "loss": 0.0007, "step": 16358 }, { "epoch": 17.29, "learning_rate": 6.789112050739958e-06, "loss": 0.0047, "step": 16360 }, { "epoch": 17.3, "learning_rate": 6.7838266384778016e-06, "loss": 0.015, "step": 16362 }, { "epoch": 17.3, "learning_rate": 6.778541226215646e-06, "loss": 0.0027, "step": 16364 }, { "epoch": 17.3, "learning_rate": 6.773255813953489e-06, "loss": 0.0054, "step": 16366 }, { "epoch": 17.3, "learning_rate": 6.767970401691333e-06, "loss": 0.0051, "step": 16368 }, { "epoch": 17.3, "learning_rate": 6.762684989429176e-06, "loss": 0.0084, "step": 16370 }, { "epoch": 17.31, "learning_rate": 6.757399577167018e-06, "loss": 0.0005, "step": 16372 }, { "epoch": 17.31, "learning_rate": 6.752114164904863e-06, "loss": 0.016, "step": 16374 }, { "epoch": 17.31, "learning_rate": 6.746828752642706e-06, "loss": 0.0017, "step": 16376 }, { "epoch": 17.31, "learning_rate": 6.7415433403805506e-06, "loss": 0.0436, "step": 16378 }, { "epoch": 17.32, "learning_rate": 6.736257928118393e-06, "loss": 0.0104, "step": 16380 }, { "epoch": 17.32, "learning_rate": 6.730972515856238e-06, "loss": 0.001, "step": 16382 }, { "epoch": 17.32, "learning_rate": 6.72568710359408e-06, "loss": 0.0574, "step": 16384 }, { "epoch": 17.32, "learning_rate": 6.720401691331925e-06, "loss": 0.0078, "step": 16386 }, { "epoch": 17.32, "learning_rate": 6.715116279069768e-06, "loss": 0.0009, "step": 16388 }, { "epoch": 17.33, "learning_rate": 6.709830866807611e-06, "loss": 0.0058, "step": 16390 }, { "epoch": 17.33, "learning_rate": 6.704545454545455e-06, "loss": 0.0005, "step": 16392 }, { "epoch": 17.33, "learning_rate": 6.699260042283298e-06, "loss": 0.0012, "step": 16394 }, { "epoch": 17.33, "learning_rate": 6.693974630021142e-06, "loss": 0.0379, "step": 16396 }, { "epoch": 17.33, "learning_rate": 6.688689217758985e-06, "loss": 0.0008, "step": 16398 }, { "epoch": 17.34, "learning_rate": 6.683403805496829e-06, "loss": 0.0317, "step": 16400 }, { "epoch": 17.34, "learning_rate": 6.678118393234673e-06, "loss": 0.0498, "step": 16402 }, { "epoch": 17.34, "learning_rate": 6.672832980972517e-06, "loss": 0.0172, "step": 16404 }, { "epoch": 17.34, "learning_rate": 6.66754756871036e-06, "loss": 0.0015, "step": 16406 }, { "epoch": 17.34, "learning_rate": 6.6622621564482025e-06, "loss": 0.0068, "step": 16408 }, { "epoch": 17.35, "learning_rate": 6.656976744186047e-06, "loss": 0.0037, "step": 16410 }, { "epoch": 17.35, "learning_rate": 6.6516913319238895e-06, "loss": 0.0012, "step": 16412 }, { "epoch": 17.35, "learning_rate": 6.646405919661734e-06, "loss": 0.0078, "step": 16414 }, { "epoch": 17.35, "learning_rate": 6.641120507399577e-06, "loss": 0.0044, "step": 16416 }, { "epoch": 17.36, "learning_rate": 6.635835095137422e-06, "loss": 0.0033, "step": 16418 }, { "epoch": 17.36, "learning_rate": 6.6305496828752644e-06, "loss": 0.0222, "step": 16420 }, { "epoch": 17.36, "learning_rate": 6.625264270613109e-06, "loss": 0.0202, "step": 16422 }, { "epoch": 17.36, "learning_rate": 6.6199788583509515e-06, "loss": 0.0009, "step": 16424 }, { "epoch": 17.36, "learning_rate": 6.614693446088795e-06, "loss": 0.0109, "step": 16426 }, { "epoch": 17.37, "learning_rate": 6.609408033826639e-06, "loss": 0.0003, "step": 16428 }, { "epoch": 17.37, "learning_rate": 6.604122621564482e-06, "loss": 0.0081, "step": 16430 }, { "epoch": 17.37, "learning_rate": 6.598837209302326e-06, "loss": 0.0067, "step": 16432 }, { "epoch": 17.37, "learning_rate": 6.593551797040169e-06, "loss": 0.0026, "step": 16434 }, { "epoch": 17.37, "learning_rate": 6.5882663847780134e-06, "loss": 0.0021, "step": 16436 }, { "epoch": 17.38, "learning_rate": 6.582980972515856e-06, "loss": 0.0019, "step": 16438 }, { "epoch": 17.38, "learning_rate": 6.5776955602537005e-06, "loss": 0.0143, "step": 16440 }, { "epoch": 17.38, "learning_rate": 6.572410147991544e-06, "loss": 0.0131, "step": 16442 }, { "epoch": 17.38, "learning_rate": 6.567124735729387e-06, "loss": 0.0088, "step": 16444 }, { "epoch": 17.38, "learning_rate": 6.561839323467231e-06, "loss": 0.0022, "step": 16446 }, { "epoch": 17.39, "learning_rate": 6.556553911205074e-06, "loss": 0.0029, "step": 16448 }, { "epoch": 17.39, "learning_rate": 6.551268498942918e-06, "loss": 0.0072, "step": 16450 }, { "epoch": 17.39, "learning_rate": 6.545983086680761e-06, "loss": 0.0004, "step": 16452 }, { "epoch": 17.39, "learning_rate": 6.540697674418605e-06, "loss": 0.0018, "step": 16454 }, { "epoch": 17.4, "learning_rate": 6.535412262156449e-06, "loss": 0.0021, "step": 16456 }, { "epoch": 17.4, "learning_rate": 6.530126849894293e-06, "loss": 0.0125, "step": 16458 }, { "epoch": 17.4, "learning_rate": 6.524841437632136e-06, "loss": 0.0024, "step": 16460 }, { "epoch": 17.4, "learning_rate": 6.519556025369978e-06, "loss": 0.0167, "step": 16462 }, { "epoch": 17.4, "learning_rate": 6.514270613107823e-06, "loss": 0.001, "step": 16464 }, { "epoch": 17.41, "learning_rate": 6.508985200845666e-06, "loss": 0.0194, "step": 16466 }, { "epoch": 17.41, "learning_rate": 6.503699788583511e-06, "loss": 0.0352, "step": 16468 }, { "epoch": 17.41, "learning_rate": 6.498414376321353e-06, "loss": 0.0035, "step": 16470 }, { "epoch": 17.41, "learning_rate": 6.493128964059198e-06, "loss": 0.007, "step": 16472 }, { "epoch": 17.41, "learning_rate": 6.48784355179704e-06, "loss": 0.0011, "step": 16474 }, { "epoch": 17.42, "learning_rate": 6.482558139534885e-06, "loss": 0.0042, "step": 16476 }, { "epoch": 17.42, "learning_rate": 6.477272727272727e-06, "loss": 0.004, "step": 16478 }, { "epoch": 17.42, "learning_rate": 6.471987315010571e-06, "loss": 0.0087, "step": 16480 }, { "epoch": 17.42, "learning_rate": 6.466701902748415e-06, "loss": 0.0063, "step": 16482 }, { "epoch": 17.42, "learning_rate": 6.461416490486258e-06, "loss": 0.0038, "step": 16484 }, { "epoch": 17.43, "learning_rate": 6.456131078224102e-06, "loss": 0.0028, "step": 16486 }, { "epoch": 17.43, "learning_rate": 6.450845665961945e-06, "loss": 0.0137, "step": 16488 }, { "epoch": 17.43, "learning_rate": 6.445560253699789e-06, "loss": 0.0012, "step": 16490 }, { "epoch": 17.43, "learning_rate": 6.440274841437633e-06, "loss": 0.0024, "step": 16492 }, { "epoch": 17.44, "learning_rate": 6.4349894291754755e-06, "loss": 0.028, "step": 16494 }, { "epoch": 17.44, "learning_rate": 6.42970401691332e-06, "loss": 0.0153, "step": 16496 }, { "epoch": 17.44, "learning_rate": 6.4244186046511625e-06, "loss": 0.0074, "step": 16498 }, { "epoch": 17.44, "learning_rate": 6.419133192389007e-06, "loss": 0.0004, "step": 16500 }, { "epoch": 17.44, "eval_cer": 0.04713593616414933, "eval_loss": 0.8180399537086487, "eval_runtime": 126.9565, "eval_samples_per_second": 6.624, "eval_steps_per_second": 0.835, "step": 16500 }, { "epoch": 17.44, "learning_rate": 6.4138477801268495e-06, "loss": 0.0022, "step": 16502 }, { "epoch": 17.45, "learning_rate": 6.408562367864694e-06, "loss": 0.011, "step": 16504 }, { "epoch": 17.45, "learning_rate": 6.4032769556025374e-06, "loss": 0.0024, "step": 16506 }, { "epoch": 17.45, "learning_rate": 6.397991543340382e-06, "loss": 0.0023, "step": 16508 }, { "epoch": 17.45, "learning_rate": 6.3927061310782245e-06, "loss": 0.0145, "step": 16510 }, { "epoch": 17.45, "learning_rate": 6.387420718816067e-06, "loss": 0.0111, "step": 16512 }, { "epoch": 17.46, "learning_rate": 6.3821353065539115e-06, "loss": 0.0011, "step": 16514 }, { "epoch": 17.46, "learning_rate": 6.376849894291754e-06, "loss": 0.0246, "step": 16516 }, { "epoch": 17.46, "learning_rate": 6.3715644820295985e-06, "loss": 0.0247, "step": 16518 }, { "epoch": 17.46, "learning_rate": 6.366279069767442e-06, "loss": 0.0227, "step": 16520 }, { "epoch": 17.47, "learning_rate": 6.360993657505286e-06, "loss": 0.0443, "step": 16522 }, { "epoch": 17.47, "learning_rate": 6.355708245243129e-06, "loss": 0.0018, "step": 16524 }, { "epoch": 17.47, "learning_rate": 6.3504228329809735e-06, "loss": 0.0099, "step": 16526 }, { "epoch": 17.47, "learning_rate": 6.345137420718816e-06, "loss": 0.0018, "step": 16528 }, { "epoch": 17.47, "learning_rate": 6.33985200845666e-06, "loss": 0.1091, "step": 16530 }, { "epoch": 17.48, "learning_rate": 6.334566596194504e-06, "loss": 0.0205, "step": 16532 }, { "epoch": 17.48, "learning_rate": 6.329281183932347e-06, "loss": 0.0005, "step": 16534 }, { "epoch": 17.48, "learning_rate": 6.323995771670191e-06, "loss": 0.0016, "step": 16536 }, { "epoch": 17.48, "learning_rate": 6.318710359408034e-06, "loss": 0.0003, "step": 16538 }, { "epoch": 17.48, "learning_rate": 6.313424947145878e-06, "loss": 0.0024, "step": 16540 }, { "epoch": 17.49, "learning_rate": 6.308139534883721e-06, "loss": 0.0043, "step": 16542 }, { "epoch": 17.49, "learning_rate": 6.302854122621565e-06, "loss": 0.0262, "step": 16544 }, { "epoch": 17.49, "learning_rate": 6.297568710359409e-06, "loss": 0.0064, "step": 16546 }, { "epoch": 17.49, "learning_rate": 6.292283298097251e-06, "loss": 0.0018, "step": 16548 }, { "epoch": 17.49, "learning_rate": 6.286997885835096e-06, "loss": 0.0133, "step": 16550 }, { "epoch": 17.5, "learning_rate": 6.281712473572938e-06, "loss": 0.0052, "step": 16552 }, { "epoch": 17.5, "learning_rate": 6.276427061310783e-06, "loss": 0.0006, "step": 16554 }, { "epoch": 17.5, "learning_rate": 6.271141649048625e-06, "loss": 0.0827, "step": 16556 }, { "epoch": 17.5, "learning_rate": 6.26585623678647e-06, "loss": 0.0039, "step": 16558 }, { "epoch": 17.51, "learning_rate": 6.260570824524313e-06, "loss": 0.0007, "step": 16560 }, { "epoch": 17.51, "learning_rate": 6.255285412262158e-06, "loss": 0.0035, "step": 16562 }, { "epoch": 17.51, "learning_rate": 6.25e-06, "loss": 0.0046, "step": 16564 }, { "epoch": 17.51, "learning_rate": 6.244714587737844e-06, "loss": 0.008, "step": 16566 }, { "epoch": 17.51, "learning_rate": 6.239429175475687e-06, "loss": 0.0105, "step": 16568 }, { "epoch": 17.52, "learning_rate": 6.234143763213531e-06, "loss": 0.0166, "step": 16570 }, { "epoch": 17.52, "learning_rate": 6.228858350951375e-06, "loss": 0.0054, "step": 16572 }, { "epoch": 17.52, "learning_rate": 6.223572938689218e-06, "loss": 0.0008, "step": 16574 }, { "epoch": 17.52, "learning_rate": 6.218287526427061e-06, "loss": 0.0012, "step": 16576 }, { "epoch": 17.52, "learning_rate": 6.213002114164905e-06, "loss": 0.0022, "step": 16578 }, { "epoch": 17.53, "learning_rate": 6.2077167019027484e-06, "loss": 0.0016, "step": 16580 }, { "epoch": 17.53, "learning_rate": 6.202431289640592e-06, "loss": 0.0014, "step": 16582 }, { "epoch": 17.53, "learning_rate": 6.197145877378436e-06, "loss": 0.0407, "step": 16584 }, { "epoch": 17.53, "learning_rate": 6.19186046511628e-06, "loss": 0.0259, "step": 16586 }, { "epoch": 17.53, "learning_rate": 6.186575052854123e-06, "loss": 0.0132, "step": 16588 }, { "epoch": 17.54, "learning_rate": 6.181289640591967e-06, "loss": 0.0078, "step": 16590 }, { "epoch": 17.54, "learning_rate": 6.1760042283298096e-06, "loss": 0.0276, "step": 16592 }, { "epoch": 17.54, "learning_rate": 6.170718816067653e-06, "loss": 0.0355, "step": 16594 }, { "epoch": 17.54, "learning_rate": 6.165433403805497e-06, "loss": 0.0024, "step": 16596 }, { "epoch": 17.55, "learning_rate": 6.160147991543341e-06, "loss": 0.0605, "step": 16598 }, { "epoch": 17.55, "learning_rate": 6.1548625792811845e-06, "loss": 0.0178, "step": 16600 }, { "epoch": 17.55, "learning_rate": 6.149577167019028e-06, "loss": 0.0032, "step": 16602 }, { "epoch": 17.55, "learning_rate": 6.1442917547568715e-06, "loss": 0.009, "step": 16604 }, { "epoch": 17.55, "learning_rate": 6.139006342494715e-06, "loss": 0.0005, "step": 16606 }, { "epoch": 17.56, "learning_rate": 6.133720930232558e-06, "loss": 0.0131, "step": 16608 }, { "epoch": 17.56, "learning_rate": 6.128435517970402e-06, "loss": 0.0034, "step": 16610 }, { "epoch": 17.56, "learning_rate": 6.123150105708246e-06, "loss": 0.0038, "step": 16612 }, { "epoch": 17.56, "learning_rate": 6.117864693446089e-06, "loss": 0.0088, "step": 16614 }, { "epoch": 17.56, "learning_rate": 6.112579281183933e-06, "loss": 0.001, "step": 16616 }, { "epoch": 17.57, "learning_rate": 6.107293868921776e-06, "loss": 0.0009, "step": 16618 }, { "epoch": 17.57, "learning_rate": 6.10200845665962e-06, "loss": 0.0175, "step": 16620 }, { "epoch": 17.57, "learning_rate": 6.096723044397463e-06, "loss": 0.0262, "step": 16622 }, { "epoch": 17.57, "learning_rate": 6.0914376321353075e-06, "loss": 0.0154, "step": 16624 }, { "epoch": 17.58, "learning_rate": 6.08615221987315e-06, "loss": 0.018, "step": 16626 }, { "epoch": 17.58, "learning_rate": 6.080866807610994e-06, "loss": 0.0041, "step": 16628 }, { "epoch": 17.58, "learning_rate": 6.075581395348837e-06, "loss": 0.0035, "step": 16630 }, { "epoch": 17.58, "learning_rate": 6.070295983086681e-06, "loss": 0.0379, "step": 16632 }, { "epoch": 17.58, "learning_rate": 6.065010570824524e-06, "loss": 0.0023, "step": 16634 }, { "epoch": 17.59, "learning_rate": 6.059725158562368e-06, "loss": 0.0011, "step": 16636 }, { "epoch": 17.59, "learning_rate": 6.054439746300212e-06, "loss": 0.0198, "step": 16638 }, { "epoch": 17.59, "learning_rate": 6.049154334038056e-06, "loss": 0.0122, "step": 16640 }, { "epoch": 17.59, "learning_rate": 6.043868921775899e-06, "loss": 0.0144, "step": 16642 }, { "epoch": 17.59, "learning_rate": 6.038583509513742e-06, "loss": 0.0008, "step": 16644 }, { "epoch": 17.6, "learning_rate": 6.033298097251585e-06, "loss": 0.0019, "step": 16646 }, { "epoch": 17.6, "learning_rate": 6.028012684989429e-06, "loss": 0.0022, "step": 16648 }, { "epoch": 17.6, "learning_rate": 6.022727272727273e-06, "loss": 0.0007, "step": 16650 }, { "epoch": 17.6, "learning_rate": 6.017441860465117e-06, "loss": 0.0312, "step": 16652 }, { "epoch": 17.6, "learning_rate": 6.01215644820296e-06, "loss": 0.0004, "step": 16654 }, { "epoch": 17.61, "learning_rate": 6.006871035940804e-06, "loss": 0.0004, "step": 16656 }, { "epoch": 17.61, "learning_rate": 6.001585623678647e-06, "loss": 0.0015, "step": 16658 }, { "epoch": 17.61, "learning_rate": 5.996300211416491e-06, "loss": 0.0029, "step": 16660 }, { "epoch": 17.61, "learning_rate": 5.991014799154334e-06, "loss": 0.0024, "step": 16662 }, { "epoch": 17.62, "learning_rate": 5.985729386892178e-06, "loss": 0.022, "step": 16664 }, { "epoch": 17.62, "learning_rate": 5.9804439746300214e-06, "loss": 0.0022, "step": 16666 }, { "epoch": 17.62, "learning_rate": 5.975158562367865e-06, "loss": 0.0008, "step": 16668 }, { "epoch": 17.62, "learning_rate": 5.9698731501057085e-06, "loss": 0.0008, "step": 16670 }, { "epoch": 17.62, "learning_rate": 5.964587737843552e-06, "loss": 0.0016, "step": 16672 }, { "epoch": 17.63, "learning_rate": 5.9593023255813955e-06, "loss": 0.0059, "step": 16674 }, { "epoch": 17.63, "learning_rate": 5.95401691331924e-06, "loss": 0.0033, "step": 16676 }, { "epoch": 17.63, "learning_rate": 5.948731501057083e-06, "loss": 0.0013, "step": 16678 }, { "epoch": 17.63, "learning_rate": 5.943446088794926e-06, "loss": 0.0049, "step": 16680 }, { "epoch": 17.63, "learning_rate": 5.9381606765327696e-06, "loss": 0.0126, "step": 16682 }, { "epoch": 17.64, "learning_rate": 5.932875264270613e-06, "loss": 0.0058, "step": 16684 }, { "epoch": 17.64, "learning_rate": 5.927589852008457e-06, "loss": 0.0008, "step": 16686 }, { "epoch": 17.64, "learning_rate": 5.9223044397463e-06, "loss": 0.0017, "step": 16688 }, { "epoch": 17.64, "learning_rate": 5.9170190274841445e-06, "loss": 0.003, "step": 16690 }, { "epoch": 17.64, "learning_rate": 5.911733615221988e-06, "loss": 0.0069, "step": 16692 }, { "epoch": 17.65, "learning_rate": 5.9064482029598315e-06, "loss": 0.0078, "step": 16694 }, { "epoch": 17.65, "learning_rate": 5.901162790697674e-06, "loss": 0.0326, "step": 16696 }, { "epoch": 17.65, "learning_rate": 5.895877378435518e-06, "loss": 0.0114, "step": 16698 }, { "epoch": 17.65, "learning_rate": 5.890591966173361e-06, "loss": 0.0008, "step": 16700 }, { "epoch": 17.66, "learning_rate": 5.885306553911206e-06, "loss": 0.0004, "step": 16702 }, { "epoch": 17.66, "learning_rate": 5.880021141649049e-06, "loss": 0.0994, "step": 16704 }, { "epoch": 17.66, "learning_rate": 5.874735729386893e-06, "loss": 0.0003, "step": 16706 }, { "epoch": 17.66, "learning_rate": 5.869450317124736e-06, "loss": 0.016, "step": 16708 }, { "epoch": 17.66, "learning_rate": 5.86416490486258e-06, "loss": 0.0019, "step": 16710 }, { "epoch": 17.67, "learning_rate": 5.858879492600423e-06, "loss": 0.0014, "step": 16712 }, { "epoch": 17.67, "learning_rate": 5.853594080338267e-06, "loss": 0.0013, "step": 16714 }, { "epoch": 17.67, "learning_rate": 5.84830866807611e-06, "loss": 0.0011, "step": 16716 }, { "epoch": 17.67, "learning_rate": 5.843023255813954e-06, "loss": 0.0334, "step": 16718 }, { "epoch": 17.67, "learning_rate": 5.837737843551797e-06, "loss": 0.0304, "step": 16720 }, { "epoch": 17.68, "learning_rate": 5.832452431289641e-06, "loss": 0.0167, "step": 16722 }, { "epoch": 17.68, "learning_rate": 5.827167019027484e-06, "loss": 0.0008, "step": 16724 }, { "epoch": 17.68, "learning_rate": 5.821881606765328e-06, "loss": 0.0021, "step": 16726 }, { "epoch": 17.68, "learning_rate": 5.816596194503171e-06, "loss": 0.0056, "step": 16728 }, { "epoch": 17.68, "learning_rate": 5.811310782241016e-06, "loss": 0.0006, "step": 16730 }, { "epoch": 17.69, "learning_rate": 5.806025369978858e-06, "loss": 0.0135, "step": 16732 }, { "epoch": 17.69, "learning_rate": 5.800739957716702e-06, "loss": 0.0047, "step": 16734 }, { "epoch": 17.69, "learning_rate": 5.795454545454545e-06, "loss": 0.0223, "step": 16736 }, { "epoch": 17.69, "learning_rate": 5.790169133192389e-06, "loss": 0.0008, "step": 16738 }, { "epoch": 17.7, "learning_rate": 5.7848837209302324e-06, "loss": 0.0048, "step": 16740 }, { "epoch": 17.7, "learning_rate": 5.779598308668077e-06, "loss": 0.0023, "step": 16742 }, { "epoch": 17.7, "learning_rate": 5.77431289640592e-06, "loss": 0.0018, "step": 16744 }, { "epoch": 17.7, "learning_rate": 5.769027484143764e-06, "loss": 0.0167, "step": 16746 }, { "epoch": 17.7, "learning_rate": 5.763742071881607e-06, "loss": 0.0047, "step": 16748 }, { "epoch": 17.71, "learning_rate": 5.75845665961945e-06, "loss": 0.0176, "step": 16750 }, { "epoch": 17.71, "learning_rate": 5.7531712473572936e-06, "loss": 0.0423, "step": 16752 }, { "epoch": 17.71, "learning_rate": 5.747885835095138e-06, "loss": 0.0035, "step": 16754 }, { "epoch": 17.71, "learning_rate": 5.7426004228329814e-06, "loss": 0.0093, "step": 16756 }, { "epoch": 17.71, "learning_rate": 5.737315010570825e-06, "loss": 0.0033, "step": 16758 }, { "epoch": 17.72, "learning_rate": 5.7320295983086685e-06, "loss": 0.0033, "step": 16760 }, { "epoch": 17.72, "learning_rate": 5.726744186046512e-06, "loss": 0.0085, "step": 16762 }, { "epoch": 17.72, "learning_rate": 5.7214587737843555e-06, "loss": 0.0017, "step": 16764 }, { "epoch": 17.72, "learning_rate": 5.716173361522199e-06, "loss": 0.0031, "step": 16766 }, { "epoch": 17.73, "learning_rate": 5.7108879492600425e-06, "loss": 0.0007, "step": 16768 }, { "epoch": 17.73, "learning_rate": 5.705602536997886e-06, "loss": 0.0165, "step": 16770 }, { "epoch": 17.73, "learning_rate": 5.70031712473573e-06, "loss": 0.0498, "step": 16772 }, { "epoch": 17.73, "learning_rate": 5.695031712473573e-06, "loss": 0.0039, "step": 16774 }, { "epoch": 17.73, "learning_rate": 5.689746300211417e-06, "loss": 0.0053, "step": 16776 }, { "epoch": 17.74, "learning_rate": 5.68446088794926e-06, "loss": 0.0161, "step": 16778 }, { "epoch": 17.74, "learning_rate": 5.679175475687104e-06, "loss": 0.0134, "step": 16780 }, { "epoch": 17.74, "learning_rate": 5.673890063424948e-06, "loss": 0.0224, "step": 16782 }, { "epoch": 17.74, "learning_rate": 5.668604651162791e-06, "loss": 0.0039, "step": 16784 }, { "epoch": 17.74, "learning_rate": 5.663319238900634e-06, "loss": 0.0132, "step": 16786 }, { "epoch": 17.75, "learning_rate": 5.658033826638478e-06, "loss": 0.0004, "step": 16788 }, { "epoch": 17.75, "learning_rate": 5.652748414376321e-06, "loss": 0.0033, "step": 16790 }, { "epoch": 17.75, "learning_rate": 5.647463002114165e-06, "loss": 0.0037, "step": 16792 }, { "epoch": 17.75, "learning_rate": 5.642177589852009e-06, "loss": 0.0128, "step": 16794 }, { "epoch": 17.75, "learning_rate": 5.636892177589853e-06, "loss": 0.009, "step": 16796 }, { "epoch": 17.76, "learning_rate": 5.631606765327696e-06, "loss": 0.005, "step": 16798 }, { "epoch": 17.76, "learning_rate": 5.62632135306554e-06, "loss": 0.0005, "step": 16800 }, { "epoch": 17.76, "learning_rate": 5.621035940803382e-06, "loss": 0.0268, "step": 16802 }, { "epoch": 17.76, "learning_rate": 5.615750528541226e-06, "loss": 0.0753, "step": 16804 }, { "epoch": 17.77, "learning_rate": 5.61046511627907e-06, "loss": 0.0016, "step": 16806 }, { "epoch": 17.77, "learning_rate": 5.605179704016914e-06, "loss": 0.009, "step": 16808 }, { "epoch": 17.77, "learning_rate": 5.599894291754757e-06, "loss": 0.0004, "step": 16810 }, { "epoch": 17.77, "learning_rate": 5.594608879492601e-06, "loss": 0.0033, "step": 16812 }, { "epoch": 17.77, "learning_rate": 5.589323467230444e-06, "loss": 0.0091, "step": 16814 }, { "epoch": 17.78, "learning_rate": 5.584038054968288e-06, "loss": 0.0008, "step": 16816 }, { "epoch": 17.78, "learning_rate": 5.578752642706131e-06, "loss": 0.0018, "step": 16818 }, { "epoch": 17.78, "learning_rate": 5.573467230443975e-06, "loss": 0.0183, "step": 16820 }, { "epoch": 17.78, "learning_rate": 5.568181818181818e-06, "loss": 0.0005, "step": 16822 }, { "epoch": 17.78, "learning_rate": 5.562896405919662e-06, "loss": 0.0205, "step": 16824 }, { "epoch": 17.79, "learning_rate": 5.5576109936575054e-06, "loss": 0.0003, "step": 16826 }, { "epoch": 17.79, "learning_rate": 5.552325581395349e-06, "loss": 0.0037, "step": 16828 }, { "epoch": 17.79, "learning_rate": 5.5470401691331925e-06, "loss": 0.0097, "step": 16830 }, { "epoch": 17.79, "learning_rate": 5.541754756871036e-06, "loss": 0.0008, "step": 16832 }, { "epoch": 17.79, "learning_rate": 5.53646934460888e-06, "loss": 0.0003, "step": 16834 }, { "epoch": 17.8, "learning_rate": 5.531183932346724e-06, "loss": 0.009, "step": 16836 }, { "epoch": 17.8, "learning_rate": 5.5258985200845665e-06, "loss": 0.0004, "step": 16838 }, { "epoch": 17.8, "learning_rate": 5.52061310782241e-06, "loss": 0.0005, "step": 16840 }, { "epoch": 17.8, "learning_rate": 5.5153276955602536e-06, "loss": 0.0223, "step": 16842 }, { "epoch": 17.81, "learning_rate": 5.510042283298097e-06, "loss": 0.0009, "step": 16844 }, { "epoch": 17.81, "learning_rate": 5.5047568710359415e-06, "loss": 0.003, "step": 16846 }, { "epoch": 17.81, "learning_rate": 5.499471458773785e-06, "loss": 0.0013, "step": 16848 }, { "epoch": 17.81, "learning_rate": 5.4941860465116285e-06, "loss": 0.0194, "step": 16850 }, { "epoch": 17.81, "learning_rate": 5.488900634249472e-06, "loss": 0.0211, "step": 16852 }, { "epoch": 17.82, "learning_rate": 5.4836152219873155e-06, "loss": 0.0007, "step": 16854 }, { "epoch": 17.82, "learning_rate": 5.478329809725158e-06, "loss": 0.0041, "step": 16856 }, { "epoch": 17.82, "learning_rate": 5.4730443974630026e-06, "loss": 0.0142, "step": 16858 }, { "epoch": 17.82, "learning_rate": 5.467758985200846e-06, "loss": 0.0151, "step": 16860 }, { "epoch": 17.82, "learning_rate": 5.46247357293869e-06, "loss": 0.0087, "step": 16862 }, { "epoch": 17.83, "learning_rate": 5.457188160676533e-06, "loss": 0.0009, "step": 16864 }, { "epoch": 17.83, "learning_rate": 5.451902748414377e-06, "loss": 0.0349, "step": 16866 }, { "epoch": 17.83, "learning_rate": 5.44661733615222e-06, "loss": 0.0041, "step": 16868 }, { "epoch": 17.83, "learning_rate": 5.441331923890064e-06, "loss": 0.0016, "step": 16870 }, { "epoch": 17.84, "learning_rate": 5.436046511627907e-06, "loss": 0.0013, "step": 16872 }, { "epoch": 17.84, "learning_rate": 5.430761099365751e-06, "loss": 0.0016, "step": 16874 }, { "epoch": 17.84, "learning_rate": 5.425475687103594e-06, "loss": 0.0088, "step": 16876 }, { "epoch": 17.84, "learning_rate": 5.420190274841438e-06, "loss": 0.0036, "step": 16878 }, { "epoch": 17.84, "learning_rate": 5.414904862579281e-06, "loss": 0.003, "step": 16880 }, { "epoch": 17.85, "learning_rate": 5.409619450317125e-06, "loss": 0.0217, "step": 16882 }, { "epoch": 17.85, "learning_rate": 5.404334038054968e-06, "loss": 0.0257, "step": 16884 }, { "epoch": 17.85, "learning_rate": 5.399048625792813e-06, "loss": 0.0067, "step": 16886 }, { "epoch": 17.85, "learning_rate": 5.393763213530656e-06, "loss": 0.0016, "step": 16888 }, { "epoch": 17.85, "learning_rate": 5.388477801268499e-06, "loss": 0.0016, "step": 16890 }, { "epoch": 17.86, "learning_rate": 5.383192389006342e-06, "loss": 0.001, "step": 16892 }, { "epoch": 17.86, "learning_rate": 5.377906976744186e-06, "loss": 0.0179, "step": 16894 }, { "epoch": 17.86, "learning_rate": 5.372621564482029e-06, "loss": 0.0012, "step": 16896 }, { "epoch": 17.86, "learning_rate": 5.367336152219874e-06, "loss": 0.0012, "step": 16898 }, { "epoch": 17.86, "learning_rate": 5.362050739957717e-06, "loss": 0.0142, "step": 16900 }, { "epoch": 17.87, "learning_rate": 5.356765327695561e-06, "loss": 0.0005, "step": 16902 }, { "epoch": 17.87, "learning_rate": 5.351479915433404e-06, "loss": 0.0021, "step": 16904 }, { "epoch": 17.87, "learning_rate": 5.346194503171248e-06, "loss": 0.0076, "step": 16906 }, { "epoch": 17.87, "learning_rate": 5.3409090909090905e-06, "loss": 0.0066, "step": 16908 }, { "epoch": 17.88, "learning_rate": 5.335623678646935e-06, "loss": 0.0152, "step": 16910 }, { "epoch": 17.88, "learning_rate": 5.330338266384778e-06, "loss": 0.0005, "step": 16912 }, { "epoch": 17.88, "learning_rate": 5.325052854122622e-06, "loss": 0.017, "step": 16914 }, { "epoch": 17.88, "learning_rate": 5.3197674418604654e-06, "loss": 0.0011, "step": 16916 }, { "epoch": 17.88, "learning_rate": 5.314482029598309e-06, "loss": 0.0013, "step": 16918 }, { "epoch": 17.89, "learning_rate": 5.3091966173361525e-06, "loss": 0.0104, "step": 16920 }, { "epoch": 17.89, "learning_rate": 5.303911205073996e-06, "loss": 0.0287, "step": 16922 }, { "epoch": 17.89, "learning_rate": 5.2986257928118395e-06, "loss": 0.0144, "step": 16924 }, { "epoch": 17.89, "learning_rate": 5.293340380549683e-06, "loss": 0.0144, "step": 16926 }, { "epoch": 17.89, "learning_rate": 5.2880549682875265e-06, "loss": 0.0034, "step": 16928 }, { "epoch": 17.9, "learning_rate": 5.28276955602537e-06, "loss": 0.0273, "step": 16930 }, { "epoch": 17.9, "learning_rate": 5.277484143763214e-06, "loss": 0.0043, "step": 16932 }, { "epoch": 17.9, "learning_rate": 5.272198731501057e-06, "loss": 0.0009, "step": 16934 }, { "epoch": 17.9, "learning_rate": 5.266913319238901e-06, "loss": 0.0027, "step": 16936 }, { "epoch": 17.9, "learning_rate": 5.261627906976745e-06, "loss": 0.0109, "step": 16938 }, { "epoch": 17.91, "learning_rate": 5.2563424947145885e-06, "loss": 0.0119, "step": 16940 }, { "epoch": 17.91, "learning_rate": 5.251057082452432e-06, "loss": 0.0007, "step": 16942 }, { "epoch": 17.91, "learning_rate": 5.245771670190275e-06, "loss": 0.0151, "step": 16944 }, { "epoch": 17.91, "learning_rate": 5.240486257928118e-06, "loss": 0.0134, "step": 16946 }, { "epoch": 17.92, "learning_rate": 5.235200845665962e-06, "loss": 0.0042, "step": 16948 }, { "epoch": 17.92, "learning_rate": 5.229915433403806e-06, "loss": 0.0037, "step": 16950 }, { "epoch": 17.92, "learning_rate": 5.22463002114165e-06, "loss": 0.002, "step": 16952 }, { "epoch": 17.92, "learning_rate": 5.219344608879493e-06, "loss": 0.0198, "step": 16954 }, { "epoch": 17.92, "learning_rate": 5.214059196617337e-06, "loss": 0.0402, "step": 16956 }, { "epoch": 17.93, "learning_rate": 5.20877378435518e-06, "loss": 0.0003, "step": 16958 }, { "epoch": 17.93, "learning_rate": 5.203488372093023e-06, "loss": 0.0013, "step": 16960 }, { "epoch": 17.93, "learning_rate": 5.198202959830867e-06, "loss": 0.0019, "step": 16962 }, { "epoch": 17.93, "learning_rate": 5.192917547568711e-06, "loss": 0.0023, "step": 16964 }, { "epoch": 17.93, "learning_rate": 5.187632135306554e-06, "loss": 0.0011, "step": 16966 }, { "epoch": 17.94, "learning_rate": 5.182346723044398e-06, "loss": 0.0008, "step": 16968 }, { "epoch": 17.94, "learning_rate": 5.177061310782241e-06, "loss": 0.0268, "step": 16970 }, { "epoch": 17.94, "learning_rate": 5.171775898520085e-06, "loss": 0.0004, "step": 16972 }, { "epoch": 17.94, "learning_rate": 5.166490486257928e-06, "loss": 0.0063, "step": 16974 }, { "epoch": 17.95, "learning_rate": 5.161205073995772e-06, "loss": 0.0004, "step": 16976 }, { "epoch": 17.95, "learning_rate": 5.155919661733615e-06, "loss": 0.0024, "step": 16978 }, { "epoch": 17.95, "learning_rate": 5.150634249471459e-06, "loss": 0.0006, "step": 16980 }, { "epoch": 17.95, "learning_rate": 5.145348837209302e-06, "loss": 0.0141, "step": 16982 }, { "epoch": 17.95, "learning_rate": 5.140063424947146e-06, "loss": 0.0014, "step": 16984 }, { "epoch": 17.96, "learning_rate": 5.134778012684989e-06, "loss": 0.0016, "step": 16986 }, { "epoch": 17.96, "learning_rate": 5.129492600422833e-06, "loss": 0.0181, "step": 16988 }, { "epoch": 17.96, "learning_rate": 5.124207188160677e-06, "loss": 0.0015, "step": 16990 }, { "epoch": 17.96, "learning_rate": 5.118921775898521e-06, "loss": 0.0016, "step": 16992 }, { "epoch": 17.96, "learning_rate": 5.113636363636364e-06, "loss": 0.0005, "step": 16994 }, { "epoch": 17.97, "learning_rate": 5.108350951374207e-06, "loss": 0.0269, "step": 16996 }, { "epoch": 17.97, "learning_rate": 5.1030655391120505e-06, "loss": 0.0113, "step": 16998 }, { "epoch": 17.97, "learning_rate": 5.097780126849894e-06, "loss": 0.0008, "step": 17000 }, { "epoch": 17.97, "eval_cer": 0.057395269307495014, "eval_loss": 0.671505331993103, "eval_runtime": 126.1941, "eval_samples_per_second": 6.664, "eval_steps_per_second": 0.84, "step": 17000 }, { "epoch": 17.97, "learning_rate": 5.092494714587738e-06, "loss": 0.0013, "step": 17002 }, { "epoch": 17.97, "learning_rate": 5.087209302325582e-06, "loss": 0.0057, "step": 17004 }, { "epoch": 17.98, "learning_rate": 5.0819238900634255e-06, "loss": 0.0048, "step": 17006 }, { "epoch": 17.98, "learning_rate": 5.076638477801269e-06, "loss": 0.0005, "step": 17008 }, { "epoch": 17.98, "learning_rate": 5.0713530655391125e-06, "loss": 0.0004, "step": 17010 }, { "epoch": 17.98, "learning_rate": 5.066067653276956e-06, "loss": 0.0017, "step": 17012 }, { "epoch": 17.99, "learning_rate": 5.0607822410147995e-06, "loss": 0.0067, "step": 17014 }, { "epoch": 17.99, "learning_rate": 5.055496828752643e-06, "loss": 0.0015, "step": 17016 }, { "epoch": 17.99, "learning_rate": 5.0502114164904866e-06, "loss": 0.0008, "step": 17018 }, { "epoch": 17.99, "learning_rate": 5.04492600422833e-06, "loss": 0.0005, "step": 17020 }, { "epoch": 17.99, "learning_rate": 5.039640591966174e-06, "loss": 0.0115, "step": 17022 }, { "epoch": 18.0, "learning_rate": 5.034355179704017e-06, "loss": 0.0005, "step": 17024 }, { "epoch": 18.0, "learning_rate": 5.029069767441861e-06, "loss": 0.0053, "step": 17026 }, { "epoch": 18.0, "learning_rate": 5.023784355179704e-06, "loss": 0.011, "step": 17028 }, { "epoch": 18.0, "learning_rate": 5.018498942917548e-06, "loss": 0.0015, "step": 17030 }, { "epoch": 18.0, "learning_rate": 5.013213530655391e-06, "loss": 0.0004, "step": 17032 }, { "epoch": 18.01, "learning_rate": 5.007928118393235e-06, "loss": 0.0022, "step": 17034 }, { "epoch": 18.01, "learning_rate": 5.002642706131078e-06, "loss": 0.0011, "step": 17036 }, { "epoch": 18.01, "learning_rate": 4.997357293868922e-06, "loss": 0.001, "step": 17038 }, { "epoch": 18.01, "learning_rate": 4.992071881606765e-06, "loss": 0.025, "step": 17040 }, { "epoch": 18.01, "learning_rate": 4.98678646934461e-06, "loss": 0.011, "step": 17042 }, { "epoch": 18.02, "learning_rate": 4.981501057082453e-06, "loss": 0.0004, "step": 17044 }, { "epoch": 18.02, "learning_rate": 4.976215644820297e-06, "loss": 0.0018, "step": 17046 }, { "epoch": 18.02, "learning_rate": 4.970930232558139e-06, "loss": 0.0041, "step": 17048 }, { "epoch": 18.02, "learning_rate": 4.965644820295983e-06, "loss": 0.0008, "step": 17050 }, { "epoch": 18.03, "learning_rate": 4.960359408033826e-06, "loss": 0.0003, "step": 17052 }, { "epoch": 18.03, "learning_rate": 4.955073995771671e-06, "loss": 0.0204, "step": 17054 }, { "epoch": 18.03, "learning_rate": 4.949788583509514e-06, "loss": 0.0025, "step": 17056 }, { "epoch": 18.03, "learning_rate": 4.944503171247358e-06, "loss": 0.025, "step": 17058 }, { "epoch": 18.03, "learning_rate": 4.939217758985201e-06, "loss": 0.0006, "step": 17060 }, { "epoch": 18.04, "learning_rate": 4.933932346723045e-06, "loss": 0.0175, "step": 17062 }, { "epoch": 18.04, "learning_rate": 4.928646934460888e-06, "loss": 0.0113, "step": 17064 }, { "epoch": 18.04, "learning_rate": 4.923361522198732e-06, "loss": 0.0055, "step": 17066 }, { "epoch": 18.04, "learning_rate": 4.918076109936575e-06, "loss": 0.0003, "step": 17068 }, { "epoch": 18.04, "learning_rate": 4.912790697674419e-06, "loss": 0.0046, "step": 17070 }, { "epoch": 18.05, "learning_rate": 4.907505285412262e-06, "loss": 0.0007, "step": 17072 }, { "epoch": 18.05, "learning_rate": 4.902219873150106e-06, "loss": 0.0011, "step": 17074 }, { "epoch": 18.05, "learning_rate": 4.8969344608879494e-06, "loss": 0.0246, "step": 17076 }, { "epoch": 18.05, "learning_rate": 4.891649048625793e-06, "loss": 0.0042, "step": 17078 }, { "epoch": 18.05, "learning_rate": 4.8863636363636365e-06, "loss": 0.0019, "step": 17080 }, { "epoch": 18.06, "learning_rate": 4.881078224101481e-06, "loss": 0.0004, "step": 17082 }, { "epoch": 18.06, "learning_rate": 4.8757928118393235e-06, "loss": 0.028, "step": 17084 }, { "epoch": 18.06, "learning_rate": 4.870507399577167e-06, "loss": 0.0627, "step": 17086 }, { "epoch": 18.06, "learning_rate": 4.8652219873150105e-06, "loss": 0.0159, "step": 17088 }, { "epoch": 18.07, "learning_rate": 4.859936575052854e-06, "loss": 0.0006, "step": 17090 }, { "epoch": 18.07, "learning_rate": 4.854651162790698e-06, "loss": 0.0037, "step": 17092 }, { "epoch": 18.07, "learning_rate": 4.849365750528542e-06, "loss": 0.0007, "step": 17094 }, { "epoch": 18.07, "learning_rate": 4.8440803382663855e-06, "loss": 0.0038, "step": 17096 }, { "epoch": 18.07, "learning_rate": 4.838794926004229e-06, "loss": 0.0006, "step": 17098 }, { "epoch": 18.08, "learning_rate": 4.8335095137420725e-06, "loss": 0.0147, "step": 17100 }, { "epoch": 18.08, "learning_rate": 4.828224101479915e-06, "loss": 0.0053, "step": 17102 }, { "epoch": 18.08, "learning_rate": 4.822938689217759e-06, "loss": 0.0038, "step": 17104 }, { "epoch": 18.08, "learning_rate": 4.817653276955603e-06, "loss": 0.0136, "step": 17106 }, { "epoch": 18.08, "learning_rate": 4.812367864693447e-06, "loss": 0.0513, "step": 17108 }, { "epoch": 18.09, "learning_rate": 4.80708245243129e-06, "loss": 0.0018, "step": 17110 }, { "epoch": 18.09, "learning_rate": 4.801797040169134e-06, "loss": 0.0075, "step": 17112 }, { "epoch": 18.09, "learning_rate": 4.796511627906977e-06, "loss": 0.0059, "step": 17114 }, { "epoch": 18.09, "learning_rate": 4.791226215644821e-06, "loss": 0.0047, "step": 17116 }, { "epoch": 18.1, "learning_rate": 4.785940803382663e-06, "loss": 0.0012, "step": 17118 }, { "epoch": 18.1, "learning_rate": 4.780655391120508e-06, "loss": 0.0162, "step": 17120 }, { "epoch": 18.1, "learning_rate": 4.775369978858351e-06, "loss": 0.0011, "step": 17122 }, { "epoch": 18.1, "learning_rate": 4.770084566596195e-06, "loss": 0.0051, "step": 17124 }, { "epoch": 18.1, "learning_rate": 4.764799154334038e-06, "loss": 0.0012, "step": 17126 }, { "epoch": 18.11, "learning_rate": 4.759513742071882e-06, "loss": 0.0008, "step": 17128 }, { "epoch": 18.11, "learning_rate": 4.754228329809725e-06, "loss": 0.0004, "step": 17130 }, { "epoch": 18.11, "learning_rate": 4.748942917547569e-06, "loss": 0.0211, "step": 17132 }, { "epoch": 18.11, "learning_rate": 4.743657505285413e-06, "loss": 0.0265, "step": 17134 }, { "epoch": 18.11, "learning_rate": 4.738372093023256e-06, "loss": 0.0032, "step": 17136 }, { "epoch": 18.12, "learning_rate": 4.733086680761099e-06, "loss": 0.0003, "step": 17138 }, { "epoch": 18.12, "learning_rate": 4.727801268498943e-06, "loss": 0.01, "step": 17140 }, { "epoch": 18.12, "learning_rate": 4.722515856236786e-06, "loss": 0.0005, "step": 17142 }, { "epoch": 18.12, "learning_rate": 4.71723044397463e-06, "loss": 0.0007, "step": 17144 }, { "epoch": 18.12, "learning_rate": 4.711945031712474e-06, "loss": 0.0078, "step": 17146 }, { "epoch": 18.13, "learning_rate": 4.706659619450318e-06, "loss": 0.0009, "step": 17148 }, { "epoch": 18.13, "learning_rate": 4.701374207188161e-06, "loss": 0.0004, "step": 17150 }, { "epoch": 18.13, "learning_rate": 4.696088794926005e-06, "loss": 0.0005, "step": 17152 }, { "epoch": 18.13, "learning_rate": 4.6908033826638475e-06, "loss": 0.0021, "step": 17154 }, { "epoch": 18.14, "learning_rate": 4.685517970401691e-06, "loss": 0.002, "step": 17156 }, { "epoch": 18.14, "learning_rate": 4.6802325581395345e-06, "loss": 0.0031, "step": 17158 }, { "epoch": 18.14, "learning_rate": 4.674947145877379e-06, "loss": 0.0044, "step": 17160 }, { "epoch": 18.14, "learning_rate": 4.669661733615222e-06, "loss": 0.0011, "step": 17162 }, { "epoch": 18.14, "learning_rate": 4.664376321353066e-06, "loss": 0.004, "step": 17164 }, { "epoch": 18.15, "learning_rate": 4.6590909090909095e-06, "loss": 0.0062, "step": 17166 }, { "epoch": 18.15, "learning_rate": 4.653805496828753e-06, "loss": 0.0312, "step": 17168 }, { "epoch": 18.15, "learning_rate": 4.6485200845665965e-06, "loss": 0.0007, "step": 17170 }, { "epoch": 18.15, "learning_rate": 4.64323467230444e-06, "loss": 0.0013, "step": 17172 }, { "epoch": 18.15, "learning_rate": 4.6379492600422835e-06, "loss": 0.0014, "step": 17174 }, { "epoch": 18.16, "learning_rate": 4.632663847780127e-06, "loss": 0.0465, "step": 17176 }, { "epoch": 18.16, "learning_rate": 4.6273784355179706e-06, "loss": 0.0061, "step": 17178 }, { "epoch": 18.16, "learning_rate": 4.622093023255814e-06, "loss": 0.0014, "step": 17180 }, { "epoch": 18.16, "learning_rate": 4.616807610993658e-06, "loss": 0.0049, "step": 17182 }, { "epoch": 18.16, "learning_rate": 4.611522198731501e-06, "loss": 0.0227, "step": 17184 }, { "epoch": 18.17, "learning_rate": 4.6062367864693455e-06, "loss": 0.0047, "step": 17186 }, { "epoch": 18.17, "learning_rate": 4.600951374207189e-06, "loss": 0.0009, "step": 17188 }, { "epoch": 18.17, "learning_rate": 4.595665961945032e-06, "loss": 0.0007, "step": 17190 }, { "epoch": 18.17, "learning_rate": 4.590380549682875e-06, "loss": 0.0038, "step": 17192 }, { "epoch": 18.18, "learning_rate": 4.585095137420719e-06, "loss": 0.0075, "step": 17194 }, { "epoch": 18.18, "learning_rate": 4.579809725158562e-06, "loss": 0.001, "step": 17196 }, { "epoch": 18.18, "learning_rate": 4.574524312896407e-06, "loss": 0.0007, "step": 17198 }, { "epoch": 18.18, "learning_rate": 4.56923890063425e-06, "loss": 0.0105, "step": 17200 }, { "epoch": 18.18, "learning_rate": 4.563953488372094e-06, "loss": 0.0005, "step": 17202 }, { "epoch": 18.19, "learning_rate": 4.558668076109937e-06, "loss": 0.0021, "step": 17204 }, { "epoch": 18.19, "learning_rate": 4.55338266384778e-06, "loss": 0.001, "step": 17206 }, { "epoch": 18.19, "learning_rate": 4.548097251585623e-06, "loss": 0.0008, "step": 17208 }, { "epoch": 18.19, "learning_rate": 4.542811839323467e-06, "loss": 0.0007, "step": 17210 }, { "epoch": 18.19, "learning_rate": 4.537526427061311e-06, "loss": 0.0013, "step": 17212 }, { "epoch": 18.2, "learning_rate": 4.532241014799155e-06, "loss": 0.0004, "step": 17214 }, { "epoch": 18.2, "learning_rate": 4.526955602536998e-06, "loss": 0.004, "step": 17216 }, { "epoch": 18.2, "learning_rate": 4.521670190274842e-06, "loss": 0.0193, "step": 17218 }, { "epoch": 18.2, "learning_rate": 4.516384778012685e-06, "loss": 0.002, "step": 17220 }, { "epoch": 18.21, "learning_rate": 4.511099365750529e-06, "loss": 0.0038, "step": 17222 }, { "epoch": 18.21, "learning_rate": 4.505813953488372e-06, "loss": 0.0089, "step": 17224 }, { "epoch": 18.21, "learning_rate": 4.500528541226216e-06, "loss": 0.0007, "step": 17226 }, { "epoch": 18.21, "learning_rate": 4.495243128964059e-06, "loss": 0.0006, "step": 17228 }, { "epoch": 18.21, "learning_rate": 4.489957716701903e-06, "loss": 0.0071, "step": 17230 }, { "epoch": 18.22, "learning_rate": 4.484672304439746e-06, "loss": 0.0081, "step": 17232 }, { "epoch": 18.22, "learning_rate": 4.47938689217759e-06, "loss": 0.0114, "step": 17234 }, { "epoch": 18.22, "learning_rate": 4.4741014799154334e-06, "loss": 0.0005, "step": 17236 }, { "epoch": 18.22, "learning_rate": 4.468816067653278e-06, "loss": 0.0018, "step": 17238 }, { "epoch": 18.22, "learning_rate": 4.463530655391121e-06, "loss": 0.0116, "step": 17240 }, { "epoch": 18.23, "learning_rate": 4.458245243128964e-06, "loss": 0.001, "step": 17242 }, { "epoch": 18.23, "learning_rate": 4.4529598308668075e-06, "loss": 0.0005, "step": 17244 }, { "epoch": 18.23, "learning_rate": 4.447674418604651e-06, "loss": 0.0117, "step": 17246 }, { "epoch": 18.23, "learning_rate": 4.4423890063424945e-06, "loss": 0.0073, "step": 17248 }, { "epoch": 18.23, "learning_rate": 4.437103594080338e-06, "loss": 0.0003, "step": 17250 }, { "epoch": 18.24, "learning_rate": 4.4318181818181824e-06, "loss": 0.0015, "step": 17252 }, { "epoch": 18.24, "learning_rate": 4.426532769556026e-06, "loss": 0.0032, "step": 17254 }, { "epoch": 18.24, "learning_rate": 4.4212473572938695e-06, "loss": 0.0025, "step": 17256 }, { "epoch": 18.24, "learning_rate": 4.415961945031713e-06, "loss": 0.0003, "step": 17258 }, { "epoch": 18.25, "learning_rate": 4.410676532769556e-06, "loss": 0.0003, "step": 17260 }, { "epoch": 18.25, "learning_rate": 4.405391120507399e-06, "loss": 0.0066, "step": 17262 }, { "epoch": 18.25, "learning_rate": 4.4001057082452435e-06, "loss": 0.0071, "step": 17264 }, { "epoch": 18.25, "learning_rate": 4.394820295983087e-06, "loss": 0.0018, "step": 17266 }, { "epoch": 18.25, "learning_rate": 4.389534883720931e-06, "loss": 0.0005, "step": 17268 }, { "epoch": 18.26, "learning_rate": 4.384249471458774e-06, "loss": 0.0029, "step": 17270 }, { "epoch": 18.26, "learning_rate": 4.378964059196618e-06, "loss": 0.0448, "step": 17272 }, { "epoch": 18.26, "learning_rate": 4.373678646934461e-06, "loss": 0.0132, "step": 17274 }, { "epoch": 18.26, "learning_rate": 4.368393234672305e-06, "loss": 0.0203, "step": 17276 }, { "epoch": 18.26, "learning_rate": 4.363107822410148e-06, "loss": 0.0503, "step": 17278 }, { "epoch": 18.27, "learning_rate": 4.357822410147992e-06, "loss": 0.0411, "step": 17280 }, { "epoch": 18.27, "learning_rate": 4.352536997885835e-06, "loss": 0.0203, "step": 17282 }, { "epoch": 18.27, "learning_rate": 4.347251585623679e-06, "loss": 0.0053, "step": 17284 }, { "epoch": 18.27, "learning_rate": 4.341966173361522e-06, "loss": 0.0148, "step": 17286 }, { "epoch": 18.27, "learning_rate": 4.336680761099366e-06, "loss": 0.016, "step": 17288 }, { "epoch": 18.28, "learning_rate": 4.33139534883721e-06, "loss": 0.0002, "step": 17290 }, { "epoch": 18.28, "learning_rate": 4.326109936575054e-06, "loss": 0.0004, "step": 17292 }, { "epoch": 18.28, "learning_rate": 4.320824524312896e-06, "loss": 0.0252, "step": 17294 }, { "epoch": 18.28, "learning_rate": 4.31553911205074e-06, "loss": 0.0075, "step": 17296 }, { "epoch": 18.29, "learning_rate": 4.310253699788583e-06, "loss": 0.0197, "step": 17298 }, { "epoch": 18.29, "learning_rate": 4.304968287526427e-06, "loss": 0.0009, "step": 17300 }, { "epoch": 18.29, "learning_rate": 4.29968287526427e-06, "loss": 0.0005, "step": 17302 }, { "epoch": 18.29, "learning_rate": 4.294397463002115e-06, "loss": 0.001, "step": 17304 }, { "epoch": 18.29, "learning_rate": 4.289112050739958e-06, "loss": 0.0006, "step": 17306 }, { "epoch": 18.3, "learning_rate": 4.283826638477802e-06, "loss": 0.0085, "step": 17308 }, { "epoch": 18.3, "learning_rate": 4.278541226215645e-06, "loss": 0.0671, "step": 17310 }, { "epoch": 18.3, "learning_rate": 4.273255813953488e-06, "loss": 0.0034, "step": 17312 }, { "epoch": 18.3, "learning_rate": 4.2679704016913315e-06, "loss": 0.0012, "step": 17314 }, { "epoch": 18.3, "learning_rate": 4.262684989429176e-06, "loss": 0.0012, "step": 17316 }, { "epoch": 18.31, "learning_rate": 4.257399577167019e-06, "loss": 0.0005, "step": 17318 }, { "epoch": 18.31, "learning_rate": 4.252114164904863e-06, "loss": 0.0085, "step": 17320 }, { "epoch": 18.31, "learning_rate": 4.246828752642706e-06, "loss": 0.0024, "step": 17322 }, { "epoch": 18.31, "learning_rate": 4.24154334038055e-06, "loss": 0.0004, "step": 17324 }, { "epoch": 18.32, "learning_rate": 4.2362579281183935e-06, "loss": 0.0435, "step": 17326 }, { "epoch": 18.32, "learning_rate": 4.230972515856237e-06, "loss": 0.0032, "step": 17328 }, { "epoch": 18.32, "learning_rate": 4.2256871035940805e-06, "loss": 0.0005, "step": 17330 }, { "epoch": 18.32, "learning_rate": 4.220401691331924e-06, "loss": 0.0004, "step": 17332 }, { "epoch": 18.32, "learning_rate": 4.2151162790697675e-06, "loss": 0.0007, "step": 17334 }, { "epoch": 18.33, "learning_rate": 4.209830866807611e-06, "loss": 0.0007, "step": 17336 }, { "epoch": 18.33, "learning_rate": 4.2045454545454546e-06, "loss": 0.0017, "step": 17338 }, { "epoch": 18.33, "learning_rate": 4.199260042283298e-06, "loss": 0.0008, "step": 17340 }, { "epoch": 18.33, "learning_rate": 4.193974630021142e-06, "loss": 0.0004, "step": 17342 }, { "epoch": 18.33, "learning_rate": 4.188689217758986e-06, "loss": 0.0133, "step": 17344 }, { "epoch": 18.34, "learning_rate": 4.1834038054968295e-06, "loss": 0.0081, "step": 17346 }, { "epoch": 18.34, "learning_rate": 4.178118393234672e-06, "loss": 0.0019, "step": 17348 }, { "epoch": 18.34, "learning_rate": 4.172832980972516e-06, "loss": 0.0026, "step": 17350 }, { "epoch": 18.34, "learning_rate": 4.167547568710359e-06, "loss": 0.0139, "step": 17352 }, { "epoch": 18.34, "learning_rate": 4.162262156448203e-06, "loss": 0.0442, "step": 17354 }, { "epoch": 18.35, "learning_rate": 4.156976744186047e-06, "loss": 0.0009, "step": 17356 }, { "epoch": 18.35, "learning_rate": 4.151691331923891e-06, "loss": 0.0009, "step": 17358 }, { "epoch": 18.35, "learning_rate": 4.146405919661734e-06, "loss": 0.0005, "step": 17360 }, { "epoch": 18.35, "learning_rate": 4.141120507399578e-06, "loss": 0.0018, "step": 17362 }, { "epoch": 18.36, "learning_rate": 4.135835095137421e-06, "loss": 0.0144, "step": 17364 }, { "epoch": 18.36, "learning_rate": 4.130549682875264e-06, "loss": 0.0023, "step": 17366 }, { "epoch": 18.36, "learning_rate": 4.125264270613108e-06, "loss": 0.0035, "step": 17368 }, { "epoch": 18.36, "learning_rate": 4.119978858350952e-06, "loss": 0.0004, "step": 17370 }, { "epoch": 18.36, "learning_rate": 4.114693446088795e-06, "loss": 0.0549, "step": 17372 }, { "epoch": 18.37, "learning_rate": 4.109408033826639e-06, "loss": 0.0007, "step": 17374 }, { "epoch": 18.37, "learning_rate": 4.104122621564482e-06, "loss": 0.006, "step": 17376 }, { "epoch": 18.37, "learning_rate": 4.098837209302326e-06, "loss": 0.0004, "step": 17378 }, { "epoch": 18.37, "learning_rate": 4.093551797040169e-06, "loss": 0.003, "step": 17380 }, { "epoch": 18.37, "learning_rate": 4.088266384778013e-06, "loss": 0.0023, "step": 17382 }, { "epoch": 18.38, "learning_rate": 4.082980972515856e-06, "loss": 0.0006, "step": 17384 }, { "epoch": 18.38, "learning_rate": 4.0776955602537e-06, "loss": 0.0006, "step": 17386 }, { "epoch": 18.38, "learning_rate": 4.072410147991543e-06, "loss": 0.0074, "step": 17388 }, { "epoch": 18.38, "learning_rate": 4.067124735729387e-06, "loss": 0.0004, "step": 17390 }, { "epoch": 18.38, "learning_rate": 4.06183932346723e-06, "loss": 0.0004, "step": 17392 }, { "epoch": 18.39, "learning_rate": 4.056553911205074e-06, "loss": 0.0005, "step": 17394 }, { "epoch": 18.39, "learning_rate": 4.051268498942918e-06, "loss": 0.0007, "step": 17396 }, { "epoch": 18.39, "learning_rate": 4.045983086680762e-06, "loss": 0.0033, "step": 17398 }, { "epoch": 18.39, "learning_rate": 4.0406976744186045e-06, "loss": 0.0014, "step": 17400 }, { "epoch": 18.4, "learning_rate": 4.035412262156448e-06, "loss": 0.0006, "step": 17402 }, { "epoch": 18.4, "learning_rate": 4.0301268498942915e-06, "loss": 0.0051, "step": 17404 }, { "epoch": 18.4, "learning_rate": 4.024841437632135e-06, "loss": 0.0025, "step": 17406 }, { "epoch": 18.4, "learning_rate": 4.019556025369979e-06, "loss": 0.0002, "step": 17408 }, { "epoch": 18.4, "learning_rate": 4.014270613107823e-06, "loss": 0.0013, "step": 17410 }, { "epoch": 18.41, "learning_rate": 4.0089852008456664e-06, "loss": 0.0169, "step": 17412 }, { "epoch": 18.41, "learning_rate": 4.00369978858351e-06, "loss": 0.0014, "step": 17414 }, { "epoch": 18.41, "learning_rate": 3.9984143763213535e-06, "loss": 0.0007, "step": 17416 }, { "epoch": 18.41, "learning_rate": 3.993128964059196e-06, "loss": 0.0175, "step": 17418 }, { "epoch": 18.41, "learning_rate": 3.9878435517970405e-06, "loss": 0.0028, "step": 17420 }, { "epoch": 18.42, "learning_rate": 3.982558139534884e-06, "loss": 0.0155, "step": 17422 }, { "epoch": 18.42, "learning_rate": 3.9772727272727275e-06, "loss": 0.0009, "step": 17424 }, { "epoch": 18.42, "learning_rate": 3.971987315010571e-06, "loss": 0.0016, "step": 17426 }, { "epoch": 18.42, "learning_rate": 3.966701902748415e-06, "loss": 0.0008, "step": 17428 }, { "epoch": 18.42, "learning_rate": 3.961416490486258e-06, "loss": 0.0006, "step": 17430 }, { "epoch": 18.43, "learning_rate": 3.956131078224102e-06, "loss": 0.0017, "step": 17432 }, { "epoch": 18.43, "learning_rate": 3.950845665961945e-06, "loss": 0.0009, "step": 17434 }, { "epoch": 18.43, "learning_rate": 3.945560253699789e-06, "loss": 0.0063, "step": 17436 }, { "epoch": 18.43, "learning_rate": 3.940274841437632e-06, "loss": 0.0134, "step": 17438 }, { "epoch": 18.44, "learning_rate": 3.934989429175476e-06, "loss": 0.0009, "step": 17440 }, { "epoch": 18.44, "learning_rate": 3.929704016913319e-06, "loss": 0.0045, "step": 17442 }, { "epoch": 18.44, "learning_rate": 3.924418604651163e-06, "loss": 0.0083, "step": 17444 }, { "epoch": 18.44, "learning_rate": 3.919133192389006e-06, "loss": 0.0052, "step": 17446 }, { "epoch": 18.44, "learning_rate": 3.913847780126851e-06, "loss": 0.0009, "step": 17448 }, { "epoch": 18.45, "learning_rate": 3.908562367864694e-06, "loss": 0.0041, "step": 17450 }, { "epoch": 18.45, "learning_rate": 3.903276955602537e-06, "loss": 0.0017, "step": 17452 }, { "epoch": 18.45, "learning_rate": 3.89799154334038e-06, "loss": 0.0007, "step": 17454 }, { "epoch": 18.45, "learning_rate": 3.892706131078224e-06, "loss": 0.0005, "step": 17456 }, { "epoch": 18.45, "learning_rate": 3.887420718816067e-06, "loss": 0.0188, "step": 17458 }, { "epoch": 18.46, "learning_rate": 3.882135306553912e-06, "loss": 0.0023, "step": 17460 }, { "epoch": 18.46, "learning_rate": 3.876849894291755e-06, "loss": 0.0008, "step": 17462 }, { "epoch": 18.46, "learning_rate": 3.871564482029599e-06, "loss": 0.0056, "step": 17464 }, { "epoch": 18.46, "learning_rate": 3.866279069767442e-06, "loss": 0.0029, "step": 17466 }, { "epoch": 18.47, "learning_rate": 3.860993657505286e-06, "loss": 0.0027, "step": 17468 }, { "epoch": 18.47, "learning_rate": 3.8557082452431285e-06, "loss": 0.0302, "step": 17470 }, { "epoch": 18.47, "learning_rate": 3.850422832980973e-06, "loss": 0.0005, "step": 17472 }, { "epoch": 18.47, "learning_rate": 3.845137420718816e-06, "loss": 0.0003, "step": 17474 }, { "epoch": 18.47, "learning_rate": 3.83985200845666e-06, "loss": 0.0004, "step": 17476 }, { "epoch": 18.48, "learning_rate": 3.834566596194503e-06, "loss": 0.0151, "step": 17478 }, { "epoch": 18.48, "learning_rate": 3.829281183932347e-06, "loss": 0.0004, "step": 17480 }, { "epoch": 18.48, "learning_rate": 3.82399577167019e-06, "loss": 0.0031, "step": 17482 }, { "epoch": 18.48, "learning_rate": 3.818710359408034e-06, "loss": 0.0012, "step": 17484 }, { "epoch": 18.48, "learning_rate": 3.813424947145878e-06, "loss": 0.0023, "step": 17486 }, { "epoch": 18.49, "learning_rate": 3.808139534883721e-06, "loss": 0.0004, "step": 17488 }, { "epoch": 18.49, "learning_rate": 3.8028541226215645e-06, "loss": 0.0143, "step": 17490 }, { "epoch": 18.49, "learning_rate": 3.797568710359408e-06, "loss": 0.0003, "step": 17492 }, { "epoch": 18.49, "learning_rate": 3.7922832980972515e-06, "loss": 0.0016, "step": 17494 }, { "epoch": 18.49, "learning_rate": 3.7869978858350955e-06, "loss": 0.0011, "step": 17496 }, { "epoch": 18.5, "learning_rate": 3.781712473572939e-06, "loss": 0.074, "step": 17498 }, { "epoch": 18.5, "learning_rate": 3.7764270613107825e-06, "loss": 0.0004, "step": 17500 }, { "epoch": 18.5, "eval_cer": 0.06377885437446566, "eval_loss": 0.9183065891265869, "eval_runtime": 124.5586, "eval_samples_per_second": 6.752, "eval_steps_per_second": 0.851, "step": 17500 }, { "epoch": 18.5, "learning_rate": 3.771141649048626e-06, "loss": 0.0004, "step": 17502 }, { "epoch": 18.5, "learning_rate": 3.76585623678647e-06, "loss": 0.001, "step": 17504 }, { "epoch": 18.51, "learning_rate": 3.7605708245243126e-06, "loss": 0.0004, "step": 17506 }, { "epoch": 18.51, "learning_rate": 3.7552854122621566e-06, "loss": 0.0011, "step": 17508 }, { "epoch": 18.51, "learning_rate": 3.75e-06, "loss": 0.0004, "step": 17510 }, { "epoch": 18.51, "learning_rate": 3.7447145877378436e-06, "loss": 0.0005, "step": 17512 }, { "epoch": 18.51, "learning_rate": 3.739429175475687e-06, "loss": 0.0184, "step": 17514 }, { "epoch": 18.52, "learning_rate": 3.734143763213531e-06, "loss": 0.0004, "step": 17516 }, { "epoch": 18.52, "learning_rate": 3.7288583509513746e-06, "loss": 0.0003, "step": 17518 }, { "epoch": 18.52, "learning_rate": 3.723572938689218e-06, "loss": 0.0005, "step": 17520 }, { "epoch": 18.52, "learning_rate": 3.7182875264270616e-06, "loss": 0.0016, "step": 17522 }, { "epoch": 18.52, "learning_rate": 3.7130021141649047e-06, "loss": 0.0025, "step": 17524 }, { "epoch": 18.53, "learning_rate": 3.7077167019027482e-06, "loss": 0.0025, "step": 17526 }, { "epoch": 18.53, "learning_rate": 3.702431289640592e-06, "loss": 0.0006, "step": 17528 }, { "epoch": 18.53, "learning_rate": 3.6971458773784357e-06, "loss": 0.0008, "step": 17530 }, { "epoch": 18.53, "learning_rate": 3.6918604651162792e-06, "loss": 0.0327, "step": 17532 }, { "epoch": 18.53, "learning_rate": 3.6865750528541227e-06, "loss": 0.0004, "step": 17534 }, { "epoch": 18.54, "learning_rate": 3.6812896405919667e-06, "loss": 0.0006, "step": 17536 }, { "epoch": 18.54, "learning_rate": 3.67600422832981e-06, "loss": 0.0035, "step": 17538 }, { "epoch": 18.54, "learning_rate": 3.6707188160676533e-06, "loss": 0.001, "step": 17540 }, { "epoch": 18.54, "learning_rate": 3.665433403805497e-06, "loss": 0.0108, "step": 17542 }, { "epoch": 18.55, "learning_rate": 3.6601479915433403e-06, "loss": 0.0286, "step": 17544 }, { "epoch": 18.55, "learning_rate": 3.654862579281184e-06, "loss": 0.0024, "step": 17546 }, { "epoch": 18.55, "learning_rate": 3.649577167019028e-06, "loss": 0.0091, "step": 17548 }, { "epoch": 18.55, "learning_rate": 3.6442917547568713e-06, "loss": 0.0008, "step": 17550 }, { "epoch": 18.55, "learning_rate": 3.639006342494715e-06, "loss": 0.0015, "step": 17552 }, { "epoch": 18.56, "learning_rate": 3.6337209302325583e-06, "loss": 0.0009, "step": 17554 }, { "epoch": 18.56, "learning_rate": 3.6284355179704023e-06, "loss": 0.001, "step": 17556 }, { "epoch": 18.56, "learning_rate": 3.623150105708245e-06, "loss": 0.0136, "step": 17558 }, { "epoch": 18.56, "learning_rate": 3.617864693446089e-06, "loss": 0.0007, "step": 17560 }, { "epoch": 18.56, "learning_rate": 3.6125792811839324e-06, "loss": 0.0004, "step": 17562 }, { "epoch": 18.57, "learning_rate": 3.607293868921776e-06, "loss": 0.0346, "step": 17564 }, { "epoch": 18.57, "learning_rate": 3.6020084566596195e-06, "loss": 0.0172, "step": 17566 }, { "epoch": 18.57, "learning_rate": 3.5967230443974634e-06, "loss": 0.0003, "step": 17568 }, { "epoch": 18.57, "learning_rate": 3.591437632135307e-06, "loss": 0.0003, "step": 17570 }, { "epoch": 18.58, "learning_rate": 3.5861522198731504e-06, "loss": 0.0004, "step": 17572 }, { "epoch": 18.58, "learning_rate": 3.580866807610994e-06, "loss": 0.0006, "step": 17574 }, { "epoch": 18.58, "learning_rate": 3.575581395348837e-06, "loss": 0.0066, "step": 17576 }, { "epoch": 18.58, "learning_rate": 3.5702959830866806e-06, "loss": 0.001, "step": 17578 }, { "epoch": 18.58, "learning_rate": 3.5650105708245245e-06, "loss": 0.0041, "step": 17580 }, { "epoch": 18.59, "learning_rate": 3.559725158562368e-06, "loss": 0.0005, "step": 17582 }, { "epoch": 18.59, "learning_rate": 3.5544397463002115e-06, "loss": 0.0005, "step": 17584 }, { "epoch": 18.59, "learning_rate": 3.549154334038055e-06, "loss": 0.0006, "step": 17586 }, { "epoch": 18.59, "learning_rate": 3.543868921775899e-06, "loss": 0.0005, "step": 17588 }, { "epoch": 18.59, "learning_rate": 3.5385835095137425e-06, "loss": 0.0015, "step": 17590 }, { "epoch": 18.6, "learning_rate": 3.533298097251586e-06, "loss": 0.0029, "step": 17592 }, { "epoch": 18.6, "learning_rate": 3.528012684989429e-06, "loss": 0.0004, "step": 17594 }, { "epoch": 18.6, "learning_rate": 3.5227272727272726e-06, "loss": 0.002, "step": 17596 }, { "epoch": 18.6, "learning_rate": 3.517441860465116e-06, "loss": 0.0069, "step": 17598 }, { "epoch": 18.6, "learning_rate": 3.51215644820296e-06, "loss": 0.0047, "step": 17600 }, { "epoch": 18.61, "learning_rate": 3.5068710359408036e-06, "loss": 0.0003, "step": 17602 }, { "epoch": 18.61, "learning_rate": 3.501585623678647e-06, "loss": 0.0008, "step": 17604 }, { "epoch": 18.61, "learning_rate": 3.4963002114164907e-06, "loss": 0.0004, "step": 17606 }, { "epoch": 18.61, "learning_rate": 3.4910147991543346e-06, "loss": 0.0092, "step": 17608 }, { "epoch": 18.62, "learning_rate": 3.485729386892178e-06, "loss": 0.0017, "step": 17610 }, { "epoch": 18.62, "learning_rate": 3.4804439746300212e-06, "loss": 0.0029, "step": 17612 }, { "epoch": 18.62, "learning_rate": 3.4751585623678647e-06, "loss": 0.001, "step": 17614 }, { "epoch": 18.62, "learning_rate": 3.4698731501057083e-06, "loss": 0.0018, "step": 17616 }, { "epoch": 18.62, "learning_rate": 3.4645877378435518e-06, "loss": 0.0067, "step": 17618 }, { "epoch": 18.63, "learning_rate": 3.4593023255813957e-06, "loss": 0.0005, "step": 17620 }, { "epoch": 18.63, "learning_rate": 3.4540169133192392e-06, "loss": 0.0138, "step": 17622 }, { "epoch": 18.63, "learning_rate": 3.4487315010570828e-06, "loss": 0.0013, "step": 17624 }, { "epoch": 18.63, "learning_rate": 3.4434460887949263e-06, "loss": 0.0004, "step": 17626 }, { "epoch": 18.63, "learning_rate": 3.4381606765327694e-06, "loss": 0.0006, "step": 17628 }, { "epoch": 18.64, "learning_rate": 3.432875264270613e-06, "loss": 0.0005, "step": 17630 }, { "epoch": 18.64, "learning_rate": 3.427589852008457e-06, "loss": 0.0012, "step": 17632 }, { "epoch": 18.64, "learning_rate": 3.4223044397463003e-06, "loss": 0.0254, "step": 17634 }, { "epoch": 18.64, "learning_rate": 3.417019027484144e-06, "loss": 0.0005, "step": 17636 }, { "epoch": 18.64, "learning_rate": 3.4117336152219874e-06, "loss": 0.0011, "step": 17638 }, { "epoch": 18.65, "learning_rate": 3.4064482029598313e-06, "loss": 0.0041, "step": 17640 }, { "epoch": 18.65, "learning_rate": 3.401162790697675e-06, "loss": 0.0004, "step": 17642 }, { "epoch": 18.65, "learning_rate": 3.3958773784355184e-06, "loss": 0.0005, "step": 17644 }, { "epoch": 18.65, "learning_rate": 3.3905919661733615e-06, "loss": 0.0051, "step": 17646 }, { "epoch": 18.66, "learning_rate": 3.385306553911205e-06, "loss": 0.0032, "step": 17648 }, { "epoch": 18.66, "learning_rate": 3.3800211416490485e-06, "loss": 0.0003, "step": 17650 }, { "epoch": 18.66, "learning_rate": 3.3747357293868924e-06, "loss": 0.004, "step": 17652 }, { "epoch": 18.66, "learning_rate": 3.369450317124736e-06, "loss": 0.007, "step": 17654 }, { "epoch": 18.66, "learning_rate": 3.3641649048625795e-06, "loss": 0.0007, "step": 17656 }, { "epoch": 18.67, "learning_rate": 3.358879492600423e-06, "loss": 0.0014, "step": 17658 }, { "epoch": 18.67, "learning_rate": 3.353594080338267e-06, "loss": 0.0004, "step": 17660 }, { "epoch": 18.67, "learning_rate": 3.3483086680761104e-06, "loss": 0.002, "step": 17662 }, { "epoch": 18.67, "learning_rate": 3.3430232558139535e-06, "loss": 0.0007, "step": 17664 }, { "epoch": 18.67, "learning_rate": 3.337737843551797e-06, "loss": 0.0006, "step": 17666 }, { "epoch": 18.68, "learning_rate": 3.3324524312896406e-06, "loss": 0.0192, "step": 17668 }, { "epoch": 18.68, "learning_rate": 3.327167019027484e-06, "loss": 0.0065, "step": 17670 }, { "epoch": 18.68, "learning_rate": 3.321881606765328e-06, "loss": 0.0002, "step": 17672 }, { "epoch": 18.68, "learning_rate": 3.3165961945031716e-06, "loss": 0.036, "step": 17674 }, { "epoch": 18.68, "learning_rate": 3.311310782241015e-06, "loss": 0.0006, "step": 17676 }, { "epoch": 18.69, "learning_rate": 3.3060253699788586e-06, "loss": 0.0006, "step": 17678 }, { "epoch": 18.69, "learning_rate": 3.3007399577167025e-06, "loss": 0.0006, "step": 17680 }, { "epoch": 18.69, "learning_rate": 3.295454545454545e-06, "loss": 0.0004, "step": 17682 }, { "epoch": 18.69, "learning_rate": 3.290169133192389e-06, "loss": 0.0024, "step": 17684 }, { "epoch": 18.7, "learning_rate": 3.2848837209302327e-06, "loss": 0.0004, "step": 17686 }, { "epoch": 18.7, "learning_rate": 3.279598308668076e-06, "loss": 0.0011, "step": 17688 }, { "epoch": 18.7, "learning_rate": 3.2743128964059197e-06, "loss": 0.0004, "step": 17690 }, { "epoch": 18.7, "learning_rate": 3.2690274841437636e-06, "loss": 0.0007, "step": 17692 }, { "epoch": 18.7, "learning_rate": 3.263742071881607e-06, "loss": 0.0026, "step": 17694 }, { "epoch": 18.71, "learning_rate": 3.2584566596194507e-06, "loss": 0.0361, "step": 17696 }, { "epoch": 18.71, "learning_rate": 3.253171247357294e-06, "loss": 0.0045, "step": 17698 }, { "epoch": 18.71, "learning_rate": 3.2478858350951373e-06, "loss": 0.0287, "step": 17700 }, { "epoch": 18.71, "learning_rate": 3.242600422832981e-06, "loss": 0.0095, "step": 17702 }, { "epoch": 18.71, "learning_rate": 3.2373150105708248e-06, "loss": 0.0004, "step": 17704 }, { "epoch": 18.72, "learning_rate": 3.2320295983086683e-06, "loss": 0.0014, "step": 17706 }, { "epoch": 18.72, "learning_rate": 3.226744186046512e-06, "loss": 0.0005, "step": 17708 }, { "epoch": 18.72, "learning_rate": 3.2214587737843553e-06, "loss": 0.002, "step": 17710 }, { "epoch": 18.72, "learning_rate": 3.2161733615221992e-06, "loss": 0.0339, "step": 17712 }, { "epoch": 18.73, "learning_rate": 3.2108879492600428e-06, "loss": 0.0011, "step": 17714 }, { "epoch": 18.73, "learning_rate": 3.205602536997886e-06, "loss": 0.0011, "step": 17716 }, { "epoch": 18.73, "learning_rate": 3.2003171247357294e-06, "loss": 0.0005, "step": 17718 }, { "epoch": 18.73, "learning_rate": 3.195031712473573e-06, "loss": 0.0005, "step": 17720 }, { "epoch": 18.73, "learning_rate": 3.1897463002114164e-06, "loss": 0.0006, "step": 17722 }, { "epoch": 18.74, "learning_rate": 3.1844608879492604e-06, "loss": 0.0003, "step": 17724 }, { "epoch": 18.74, "learning_rate": 3.179175475687104e-06, "loss": 0.0003, "step": 17726 }, { "epoch": 18.74, "learning_rate": 3.1738900634249474e-06, "loss": 0.0022, "step": 17728 }, { "epoch": 18.74, "learning_rate": 3.168604651162791e-06, "loss": 0.0022, "step": 17730 }, { "epoch": 18.74, "learning_rate": 3.163319238900635e-06, "loss": 0.0323, "step": 17732 }, { "epoch": 18.75, "learning_rate": 3.1580338266384775e-06, "loss": 0.0049, "step": 17734 }, { "epoch": 18.75, "learning_rate": 3.1527484143763215e-06, "loss": 0.0007, "step": 17736 }, { "epoch": 18.75, "learning_rate": 3.147463002114165e-06, "loss": 0.0311, "step": 17738 }, { "epoch": 18.75, "learning_rate": 3.1421775898520085e-06, "loss": 0.0039, "step": 17740 }, { "epoch": 18.75, "learning_rate": 3.136892177589852e-06, "loss": 0.0193, "step": 17742 }, { "epoch": 18.76, "learning_rate": 3.131606765327696e-06, "loss": 0.0006, "step": 17744 }, { "epoch": 18.76, "learning_rate": 3.1263213530655395e-06, "loss": 0.0074, "step": 17746 }, { "epoch": 18.76, "learning_rate": 3.1210359408033826e-06, "loss": 0.0008, "step": 17748 }, { "epoch": 18.76, "learning_rate": 3.1157505285412265e-06, "loss": 0.0019, "step": 17750 }, { "epoch": 18.77, "learning_rate": 3.11046511627907e-06, "loss": 0.0037, "step": 17752 }, { "epoch": 18.77, "learning_rate": 3.1051797040169136e-06, "loss": 0.0207, "step": 17754 }, { "epoch": 18.77, "learning_rate": 3.099894291754757e-06, "loss": 0.0131, "step": 17756 }, { "epoch": 18.77, "learning_rate": 3.0946088794926006e-06, "loss": 0.0034, "step": 17758 }, { "epoch": 18.77, "learning_rate": 3.089323467230444e-06, "loss": 0.0005, "step": 17760 }, { "epoch": 18.78, "learning_rate": 3.0840380549682876e-06, "loss": 0.0027, "step": 17762 }, { "epoch": 18.78, "learning_rate": 3.0787526427061316e-06, "loss": 0.0404, "step": 17764 }, { "epoch": 18.78, "learning_rate": 3.0734672304439747e-06, "loss": 0.0173, "step": 17766 }, { "epoch": 18.78, "learning_rate": 3.068181818181818e-06, "loss": 0.0083, "step": 17768 }, { "epoch": 18.78, "learning_rate": 3.062896405919662e-06, "loss": 0.0021, "step": 17770 }, { "epoch": 18.79, "learning_rate": 3.0576109936575056e-06, "loss": 0.0009, "step": 17772 }, { "epoch": 18.79, "learning_rate": 3.0523255813953487e-06, "loss": 0.0009, "step": 17774 }, { "epoch": 18.79, "learning_rate": 3.0470401691331927e-06, "loss": 0.0037, "step": 17776 }, { "epoch": 18.79, "learning_rate": 3.041754756871036e-06, "loss": 0.001, "step": 17778 }, { "epoch": 18.79, "learning_rate": 3.0364693446088797e-06, "loss": 0.0078, "step": 17780 }, { "epoch": 18.8, "learning_rate": 3.0311839323467232e-06, "loss": 0.0026, "step": 17782 }, { "epoch": 18.8, "learning_rate": 3.0258985200845668e-06, "loss": 0.0069, "step": 17784 }, { "epoch": 18.8, "learning_rate": 3.0206131078224103e-06, "loss": 0.0006, "step": 17786 }, { "epoch": 18.8, "learning_rate": 3.015327695560254e-06, "loss": 0.0007, "step": 17788 }, { "epoch": 18.81, "learning_rate": 3.0100422832980977e-06, "loss": 0.0286, "step": 17790 }, { "epoch": 18.81, "learning_rate": 3.004756871035941e-06, "loss": 0.0008, "step": 17792 }, { "epoch": 18.81, "learning_rate": 2.9994714587737843e-06, "loss": 0.0005, "step": 17794 }, { "epoch": 18.81, "learning_rate": 2.9941860465116283e-06, "loss": 0.0003, "step": 17796 }, { "epoch": 18.81, "learning_rate": 2.988900634249472e-06, "loss": 0.0008, "step": 17798 }, { "epoch": 18.82, "learning_rate": 2.983615221987315e-06, "loss": 0.0005, "step": 17800 }, { "epoch": 18.82, "learning_rate": 2.978329809725159e-06, "loss": 0.0007, "step": 17802 }, { "epoch": 18.82, "learning_rate": 2.9730443974630024e-06, "loss": 0.0058, "step": 17804 }, { "epoch": 18.82, "learning_rate": 2.967758985200846e-06, "loss": 0.0025, "step": 17806 }, { "epoch": 18.82, "learning_rate": 2.9624735729386894e-06, "loss": 0.0277, "step": 17808 }, { "epoch": 18.83, "learning_rate": 2.957188160676533e-06, "loss": 0.002, "step": 17810 }, { "epoch": 18.83, "learning_rate": 2.9519027484143764e-06, "loss": 0.0102, "step": 17812 }, { "epoch": 18.83, "learning_rate": 2.94661733615222e-06, "loss": 0.0009, "step": 17814 }, { "epoch": 18.83, "learning_rate": 2.941331923890064e-06, "loss": 0.0006, "step": 17816 }, { "epoch": 18.84, "learning_rate": 2.936046511627907e-06, "loss": 0.0009, "step": 17818 }, { "epoch": 18.84, "learning_rate": 2.9307610993657505e-06, "loss": 0.0026, "step": 17820 }, { "epoch": 18.84, "learning_rate": 2.9254756871035944e-06, "loss": 0.0004, "step": 17822 }, { "epoch": 18.84, "learning_rate": 2.920190274841438e-06, "loss": 0.001, "step": 17824 }, { "epoch": 18.84, "learning_rate": 2.914904862579281e-06, "loss": 0.0005, "step": 17826 }, { "epoch": 18.85, "learning_rate": 2.909619450317125e-06, "loss": 0.0021, "step": 17828 }, { "epoch": 18.85, "learning_rate": 2.9043340380549685e-06, "loss": 0.0007, "step": 17830 }, { "epoch": 18.85, "learning_rate": 2.899048625792812e-06, "loss": 0.0008, "step": 17832 }, { "epoch": 18.85, "learning_rate": 2.8937632135306556e-06, "loss": 0.0005, "step": 17834 }, { "epoch": 18.85, "learning_rate": 2.888477801268499e-06, "loss": 0.0023, "step": 17836 }, { "epoch": 18.86, "learning_rate": 2.8831923890063426e-06, "loss": 0.0118, "step": 17838 }, { "epoch": 18.86, "learning_rate": 2.877906976744186e-06, "loss": 0.0006, "step": 17840 }, { "epoch": 18.86, "learning_rate": 2.87262156448203e-06, "loss": 0.0119, "step": 17842 }, { "epoch": 18.86, "learning_rate": 2.867336152219873e-06, "loss": 0.0009, "step": 17844 }, { "epoch": 18.86, "learning_rate": 2.8620507399577167e-06, "loss": 0.0013, "step": 17846 }, { "epoch": 18.87, "learning_rate": 2.8567653276955606e-06, "loss": 0.0023, "step": 17848 }, { "epoch": 18.87, "learning_rate": 2.851479915433404e-06, "loss": 0.0005, "step": 17850 }, { "epoch": 18.87, "learning_rate": 2.8461945031712476e-06, "loss": 0.0621, "step": 17852 }, { "epoch": 18.87, "learning_rate": 2.840909090909091e-06, "loss": 0.0032, "step": 17854 }, { "epoch": 18.88, "learning_rate": 2.8356236786469347e-06, "loss": 0.005, "step": 17856 }, { "epoch": 18.88, "learning_rate": 2.830338266384778e-06, "loss": 0.0021, "step": 17858 }, { "epoch": 18.88, "learning_rate": 2.8250528541226217e-06, "loss": 0.0103, "step": 17860 }, { "epoch": 18.88, "learning_rate": 2.8197674418604652e-06, "loss": 0.0012, "step": 17862 }, { "epoch": 18.88, "learning_rate": 2.8144820295983088e-06, "loss": 0.0004, "step": 17864 }, { "epoch": 18.89, "learning_rate": 2.8091966173361523e-06, "loss": 0.0003, "step": 17866 }, { "epoch": 18.89, "learning_rate": 2.8039112050739962e-06, "loss": 0.0003, "step": 17868 }, { "epoch": 18.89, "learning_rate": 2.7986257928118393e-06, "loss": 0.0013, "step": 17870 }, { "epoch": 18.89, "learning_rate": 2.793340380549683e-06, "loss": 0.0006, "step": 17872 }, { "epoch": 18.89, "learning_rate": 2.7880549682875268e-06, "loss": 0.0406, "step": 17874 }, { "epoch": 18.9, "learning_rate": 2.7827695560253703e-06, "loss": 0.0008, "step": 17876 }, { "epoch": 18.9, "learning_rate": 2.777484143763214e-06, "loss": 0.0009, "step": 17878 }, { "epoch": 18.9, "learning_rate": 2.772198731501057e-06, "loss": 0.0007, "step": 17880 }, { "epoch": 18.9, "learning_rate": 2.766913319238901e-06, "loss": 0.0006, "step": 17882 }, { "epoch": 18.9, "learning_rate": 2.7616279069767444e-06, "loss": 0.0003, "step": 17884 }, { "epoch": 18.91, "learning_rate": 2.756342494714588e-06, "loss": 0.002, "step": 17886 }, { "epoch": 18.91, "learning_rate": 2.7510570824524314e-06, "loss": 0.007, "step": 17888 }, { "epoch": 18.91, "learning_rate": 2.745771670190275e-06, "loss": 0.006, "step": 17890 }, { "epoch": 18.91, "learning_rate": 2.7404862579281184e-06, "loss": 0.0042, "step": 17892 }, { "epoch": 18.92, "learning_rate": 2.7352008456659624e-06, "loss": 0.0003, "step": 17894 }, { "epoch": 18.92, "learning_rate": 2.7299154334038055e-06, "loss": 0.0058, "step": 17896 }, { "epoch": 18.92, "learning_rate": 2.724630021141649e-06, "loss": 0.0012, "step": 17898 }, { "epoch": 18.92, "learning_rate": 2.719344608879493e-06, "loss": 0.0025, "step": 17900 }, { "epoch": 18.92, "learning_rate": 2.7140591966173364e-06, "loss": 0.0011, "step": 17902 }, { "epoch": 18.93, "learning_rate": 2.70877378435518e-06, "loss": 0.0003, "step": 17904 }, { "epoch": 18.93, "learning_rate": 2.703488372093023e-06, "loss": 0.0016, "step": 17906 }, { "epoch": 18.93, "learning_rate": 2.698202959830867e-06, "loss": 0.0005, "step": 17908 }, { "epoch": 18.93, "learning_rate": 2.6929175475687105e-06, "loss": 0.0006, "step": 17910 }, { "epoch": 18.93, "learning_rate": 2.687632135306554e-06, "loss": 0.0004, "step": 17912 }, { "epoch": 18.94, "learning_rate": 2.6823467230443976e-06, "loss": 0.0141, "step": 17914 }, { "epoch": 18.94, "learning_rate": 2.677061310782241e-06, "loss": 0.0009, "step": 17916 }, { "epoch": 18.94, "learning_rate": 2.6717758985200846e-06, "loss": 0.0004, "step": 17918 }, { "epoch": 18.94, "learning_rate": 2.6664904862579285e-06, "loss": 0.0007, "step": 17920 }, { "epoch": 18.95, "learning_rate": 2.661205073995772e-06, "loss": 0.0022, "step": 17922 }, { "epoch": 18.95, "learning_rate": 2.655919661733615e-06, "loss": 0.0007, "step": 17924 }, { "epoch": 18.95, "learning_rate": 2.6506342494714587e-06, "loss": 0.0018, "step": 17926 }, { "epoch": 18.95, "learning_rate": 2.6453488372093026e-06, "loss": 0.0003, "step": 17928 }, { "epoch": 18.95, "learning_rate": 2.640063424947146e-06, "loss": 0.0008, "step": 17930 }, { "epoch": 18.96, "learning_rate": 2.6347780126849892e-06, "loss": 0.0012, "step": 17932 }, { "epoch": 18.96, "learning_rate": 2.629492600422833e-06, "loss": 0.0013, "step": 17934 }, { "epoch": 18.96, "learning_rate": 2.6242071881606767e-06, "loss": 0.0003, "step": 17936 }, { "epoch": 18.96, "learning_rate": 2.61892177589852e-06, "loss": 0.0012, "step": 17938 }, { "epoch": 18.96, "learning_rate": 2.6136363636363637e-06, "loss": 0.0004, "step": 17940 }, { "epoch": 18.97, "learning_rate": 2.6083509513742072e-06, "loss": 0.0146, "step": 17942 }, { "epoch": 18.97, "learning_rate": 2.6030655391120508e-06, "loss": 0.0003, "step": 17944 }, { "epoch": 18.97, "learning_rate": 2.5977801268498947e-06, "loss": 0.0022, "step": 17946 }, { "epoch": 18.97, "learning_rate": 2.5924947145877382e-06, "loss": 0.0004, "step": 17948 }, { "epoch": 18.97, "learning_rate": 2.5872093023255813e-06, "loss": 0.0008, "step": 17950 }, { "epoch": 18.98, "learning_rate": 2.581923890063425e-06, "loss": 0.0023, "step": 17952 }, { "epoch": 18.98, "learning_rate": 2.5766384778012688e-06, "loss": 0.0045, "step": 17954 }, { "epoch": 18.98, "learning_rate": 2.5713530655391123e-06, "loss": 0.0004, "step": 17956 }, { "epoch": 18.98, "learning_rate": 2.5660676532769554e-06, "loss": 0.0008, "step": 17958 }, { "epoch": 18.99, "learning_rate": 2.5607822410147993e-06, "loss": 0.0017, "step": 17960 }, { "epoch": 18.99, "learning_rate": 2.555496828752643e-06, "loss": 0.0004, "step": 17962 }, { "epoch": 18.99, "learning_rate": 2.5502114164904864e-06, "loss": 0.0005, "step": 17964 }, { "epoch": 18.99, "learning_rate": 2.5449260042283303e-06, "loss": 0.001, "step": 17966 }, { "epoch": 18.99, "learning_rate": 2.5396405919661734e-06, "loss": 0.001, "step": 17968 }, { "epoch": 19.0, "learning_rate": 2.534355179704017e-06, "loss": 0.0009, "step": 17970 }, { "epoch": 19.0, "learning_rate": 2.5290697674418604e-06, "loss": 0.0006, "step": 17972 }, { "epoch": 19.0, "learning_rate": 2.5237843551797044e-06, "loss": 0.0012, "step": 17974 }, { "epoch": 19.0, "learning_rate": 2.5184989429175475e-06, "loss": 0.0093, "step": 17976 }, { "epoch": 19.0, "learning_rate": 2.513213530655391e-06, "loss": 0.001, "step": 17978 }, { "epoch": 19.01, "learning_rate": 2.507928118393235e-06, "loss": 0.0007, "step": 17980 }, { "epoch": 19.01, "learning_rate": 2.5026427061310784e-06, "loss": 0.0007, "step": 17982 }, { "epoch": 19.01, "learning_rate": 2.4973572938689215e-06, "loss": 0.0007, "step": 17984 }, { "epoch": 19.01, "learning_rate": 2.4920718816067655e-06, "loss": 0.0109, "step": 17986 }, { "epoch": 19.01, "learning_rate": 2.486786469344609e-06, "loss": 0.0338, "step": 17988 }, { "epoch": 19.02, "learning_rate": 2.4815010570824525e-06, "loss": 0.0005, "step": 17990 }, { "epoch": 19.02, "learning_rate": 2.4762156448202965e-06, "loss": 0.0145, "step": 17992 }, { "epoch": 19.02, "learning_rate": 2.4709302325581396e-06, "loss": 0.0005, "step": 17994 }, { "epoch": 19.02, "learning_rate": 2.465644820295983e-06, "loss": 0.011, "step": 17996 }, { "epoch": 19.03, "learning_rate": 2.4603594080338266e-06, "loss": 0.003, "step": 17998 }, { "epoch": 19.03, "learning_rate": 2.4550739957716705e-06, "loss": 0.0014, "step": 18000 }, { "epoch": 19.03, "eval_cer": 0.05528640638358507, "eval_loss": 0.782919704914093, "eval_runtime": 125.7137, "eval_samples_per_second": 6.69, "eval_steps_per_second": 0.843, "step": 18000 } ], "max_steps": 18920, "num_train_epochs": 20, "total_flos": 2.130711393070083e+20, "trial_name": null, "trial_params": null }