{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1000.0, "eval_steps": 200, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "grad_norm": Infinity, "learning_rate": 5e-05, "loss": 15.0136, "step": 2 }, { "epoch": 0.8, "grad_norm": 49.684608459472656, "learning_rate": 4.9980000000000006e-05, "loss": 11.701, "step": 4 }, { "epoch": 1.2, "grad_norm": 127.0980453491211, "learning_rate": 4.996e-05, "loss": 10.924, "step": 6 }, { "epoch": 1.6, "grad_norm": 34.73715591430664, "learning_rate": 4.9940000000000006e-05, "loss": 6.777, "step": 8 }, { "epoch": 2.0, "grad_norm": 44.66195297241211, "learning_rate": 4.992e-05, "loss": 6.4118, "step": 10 }, { "epoch": 2.4, "grad_norm": 21.1611328125, "learning_rate": 4.99e-05, "loss": 5.0251, "step": 12 }, { "epoch": 2.8, "grad_norm": 18.735519409179688, "learning_rate": 4.9880000000000004e-05, "loss": 4.4427, "step": 14 }, { "epoch": 3.2, "grad_norm": 17.76988410949707, "learning_rate": 4.986e-05, "loss": 4.3027, "step": 16 }, { "epoch": 3.6, "grad_norm": 29.88725471496582, "learning_rate": 4.9840000000000004e-05, "loss": 4.3483, "step": 18 }, { "epoch": 4.0, "grad_norm": 9.712175369262695, "learning_rate": 4.982e-05, "loss": 3.7752, "step": 20 }, { "epoch": 4.4, "grad_norm": 9.271561622619629, "learning_rate": 4.9800000000000004e-05, "loss": 3.8542, "step": 22 }, { "epoch": 4.8, "grad_norm": 9.395713806152344, "learning_rate": 4.978e-05, "loss": 3.6511, "step": 24 }, { "epoch": 5.2, "grad_norm": 9.891660690307617, "learning_rate": 4.976e-05, "loss": 3.6388, "step": 26 }, { "epoch": 5.6, "grad_norm": 10.07371997833252, "learning_rate": 4.974e-05, "loss": 3.5038, "step": 28 }, { "epoch": 6.0, "grad_norm": 6.219523906707764, "learning_rate": 4.972e-05, "loss": 3.5689, "step": 30 }, { "epoch": 6.4, "grad_norm": 7.127007961273193, "learning_rate": 4.97e-05, "loss": 3.3218, "step": 32 }, { "epoch": 6.8, "grad_norm": 7.027449607849121, "learning_rate": 4.9680000000000005e-05, "loss": 3.3653, "step": 34 }, { "epoch": 7.2, "grad_norm": 8.678634643554688, "learning_rate": 4.966e-05, "loss": 3.1652, "step": 36 }, { "epoch": 7.6, "grad_norm": 7.660609722137451, "learning_rate": 4.9640000000000006e-05, "loss": 3.3788, "step": 38 }, { "epoch": 8.0, "grad_norm": 10.358582496643066, "learning_rate": 4.962e-05, "loss": 3.3855, "step": 40 }, { "epoch": 8.4, "grad_norm": 10.276606559753418, "learning_rate": 4.96e-05, "loss": 3.2163, "step": 42 }, { "epoch": 8.8, "grad_norm": 8.291080474853516, "learning_rate": 4.958e-05, "loss": 3.2291, "step": 44 }, { "epoch": 9.2, "grad_norm": 6.494446277618408, "learning_rate": 4.956e-05, "loss": 2.967, "step": 46 }, { "epoch": 9.6, "grad_norm": 11.704506874084473, "learning_rate": 4.9540000000000003e-05, "loss": 3.1076, "step": 48 }, { "epoch": 10.0, "grad_norm": 5.962319850921631, "learning_rate": 4.952e-05, "loss": 2.9828, "step": 50 }, { "epoch": 10.4, "grad_norm": 8.854808807373047, "learning_rate": 4.9500000000000004e-05, "loss": 3.0108, "step": 52 }, { "epoch": 10.8, "grad_norm": 8.931464195251465, "learning_rate": 4.948000000000001e-05, "loss": 2.9091, "step": 54 }, { "epoch": 11.2, "grad_norm": 5.261834144592285, "learning_rate": 4.946e-05, "loss": 2.9687, "step": 56 }, { "epoch": 11.6, "grad_norm": 6.691362380981445, "learning_rate": 4.944e-05, "loss": 2.8329, "step": 58 }, { "epoch": 12.0, "grad_norm": 8.342561721801758, "learning_rate": 4.942e-05, "loss": 2.9541, "step": 60 }, { "epoch": 12.4, "grad_norm": 7.324896812438965, "learning_rate": 4.94e-05, "loss": 2.6798, "step": 62 }, { "epoch": 12.8, "grad_norm": 6.073739051818848, "learning_rate": 4.9380000000000005e-05, "loss": 2.8208, "step": 64 }, { "epoch": 13.2, "grad_norm": 8.276163101196289, "learning_rate": 4.936e-05, "loss": 3.0621, "step": 66 }, { "epoch": 13.6, "grad_norm": 18.59242820739746, "learning_rate": 4.9340000000000005e-05, "loss": 2.8045, "step": 68 }, { "epoch": 14.0, "grad_norm": 8.741026878356934, "learning_rate": 4.932e-05, "loss": 2.8315, "step": 70 }, { "epoch": 14.4, "grad_norm": 15.741567611694336, "learning_rate": 4.93e-05, "loss": 2.7767, "step": 72 }, { "epoch": 14.8, "grad_norm": 12.403434753417969, "learning_rate": 4.928e-05, "loss": 2.7444, "step": 74 }, { "epoch": 15.2, "grad_norm": 14.471048355102539, "learning_rate": 4.926e-05, "loss": 2.9654, "step": 76 }, { "epoch": 15.6, "grad_norm": 10.648104667663574, "learning_rate": 4.924e-05, "loss": 2.7352, "step": 78 }, { "epoch": 16.0, "grad_norm": 18.217269897460938, "learning_rate": 4.9220000000000006e-05, "loss": 2.5891, "step": 80 }, { "epoch": 16.4, "grad_norm": 13.469452857971191, "learning_rate": 4.92e-05, "loss": 2.694, "step": 82 }, { "epoch": 16.8, "grad_norm": 9.214739799499512, "learning_rate": 4.918000000000001e-05, "loss": 2.5167, "step": 84 }, { "epoch": 17.2, "grad_norm": 6.7329301834106445, "learning_rate": 4.9160000000000004e-05, "loss": 2.5829, "step": 86 }, { "epoch": 17.6, "grad_norm": 35.808902740478516, "learning_rate": 4.914e-05, "loss": 2.6473, "step": 88 }, { "epoch": 18.0, "grad_norm": 10.69314956665039, "learning_rate": 4.9120000000000004e-05, "loss": 2.4711, "step": 90 }, { "epoch": 18.4, "grad_norm": 9.859249114990234, "learning_rate": 4.91e-05, "loss": 2.4506, "step": 92 }, { "epoch": 18.8, "grad_norm": 9.507281303405762, "learning_rate": 4.9080000000000004e-05, "loss": 2.3622, "step": 94 }, { "epoch": 19.2, "grad_norm": 8.016356468200684, "learning_rate": 4.906e-05, "loss": 2.1735, "step": 96 }, { "epoch": 19.6, "grad_norm": 6.360136985778809, "learning_rate": 4.9040000000000005e-05, "loss": 2.3155, "step": 98 }, { "epoch": 20.0, "grad_norm": 7.803518772125244, "learning_rate": 4.902e-05, "loss": 2.3425, "step": 100 }, { "epoch": 20.4, "grad_norm": 7.230980396270752, "learning_rate": 4.9e-05, "loss": 2.2642, "step": 102 }, { "epoch": 20.8, "grad_norm": 9.346038818359375, "learning_rate": 4.898e-05, "loss": 2.2588, "step": 104 }, { "epoch": 21.2, "grad_norm": 7.534197807312012, "learning_rate": 4.896e-05, "loss": 2.0746, "step": 106 }, { "epoch": 21.6, "grad_norm": 6.142523765563965, "learning_rate": 4.894e-05, "loss": 2.162, "step": 108 }, { "epoch": 22.0, "grad_norm": 8.614611625671387, "learning_rate": 4.8920000000000006e-05, "loss": 2.1573, "step": 110 }, { "epoch": 22.4, "grad_norm": 10.614283561706543, "learning_rate": 4.89e-05, "loss": 2.1414, "step": 112 }, { "epoch": 22.8, "grad_norm": 10.460810661315918, "learning_rate": 4.8880000000000006e-05, "loss": 2.2815, "step": 114 }, { "epoch": 23.2, "grad_norm": 6.170535087585449, "learning_rate": 4.886e-05, "loss": 2.027, "step": 116 }, { "epoch": 23.6, "grad_norm": 5.691027641296387, "learning_rate": 4.884e-05, "loss": 2.1516, "step": 118 }, { "epoch": 24.0, "grad_norm": 15.287314414978027, "learning_rate": 4.8820000000000004e-05, "loss": 2.0142, "step": 120 }, { "epoch": 24.4, "grad_norm": 8.013864517211914, "learning_rate": 4.88e-05, "loss": 2.0917, "step": 122 }, { "epoch": 24.8, "grad_norm": 11.503437042236328, "learning_rate": 4.8780000000000004e-05, "loss": 2.1307, "step": 124 }, { "epoch": 25.2, "grad_norm": 15.792810440063477, "learning_rate": 4.876e-05, "loss": 2.0419, "step": 126 }, { "epoch": 25.6, "grad_norm": 11.666516304016113, "learning_rate": 4.8740000000000004e-05, "loss": 2.1891, "step": 128 }, { "epoch": 26.0, "grad_norm": 6.6844682693481445, "learning_rate": 4.872000000000001e-05, "loss": 2.0272, "step": 130 }, { "epoch": 26.4, "grad_norm": 11.392362594604492, "learning_rate": 4.87e-05, "loss": 2.048, "step": 132 }, { "epoch": 26.8, "grad_norm": 8.699398040771484, "learning_rate": 4.868e-05, "loss": 1.9478, "step": 134 }, { "epoch": 27.2, "grad_norm": 12.196807861328125, "learning_rate": 4.866e-05, "loss": 1.7973, "step": 136 }, { "epoch": 27.6, "grad_norm": 7.32382869720459, "learning_rate": 4.864e-05, "loss": 2.0374, "step": 138 }, { "epoch": 28.0, "grad_norm": 6.675863742828369, "learning_rate": 4.8620000000000005e-05, "loss": 1.9698, "step": 140 }, { "epoch": 28.4, "grad_norm": 7.29475736618042, "learning_rate": 4.86e-05, "loss": 1.9659, "step": 142 }, { "epoch": 28.8, "grad_norm": 5.971595764160156, "learning_rate": 4.8580000000000006e-05, "loss": 1.9064, "step": 144 }, { "epoch": 29.2, "grad_norm": 8.920109748840332, "learning_rate": 4.856e-05, "loss": 1.7365, "step": 146 }, { "epoch": 29.6, "grad_norm": 11.016731262207031, "learning_rate": 4.854e-05, "loss": 2.1336, "step": 148 }, { "epoch": 30.0, "grad_norm": 7.597679138183594, "learning_rate": 4.852e-05, "loss": 1.8777, "step": 150 }, { "epoch": 30.4, "grad_norm": 6.2793402671813965, "learning_rate": 4.85e-05, "loss": 1.9479, "step": 152 }, { "epoch": 30.8, "grad_norm": 7.849404811859131, "learning_rate": 4.8480000000000003e-05, "loss": 1.8329, "step": 154 }, { "epoch": 31.2, "grad_norm": 9.55136775970459, "learning_rate": 4.846e-05, "loss": 1.8551, "step": 156 }, { "epoch": 31.6, "grad_norm": 8.940347671508789, "learning_rate": 4.8440000000000004e-05, "loss": 1.7816, "step": 158 }, { "epoch": 32.0, "grad_norm": 8.635571479797363, "learning_rate": 4.842000000000001e-05, "loss": 1.9009, "step": 160 }, { "epoch": 32.4, "grad_norm": 7.238102912902832, "learning_rate": 4.8400000000000004e-05, "loss": 1.688, "step": 162 }, { "epoch": 32.8, "grad_norm": 8.034041404724121, "learning_rate": 4.838e-05, "loss": 1.8203, "step": 164 }, { "epoch": 33.2, "grad_norm": 5.368892192840576, "learning_rate": 4.836e-05, "loss": 1.9755, "step": 166 }, { "epoch": 33.6, "grad_norm": 10.670844078063965, "learning_rate": 4.834e-05, "loss": 2.0307, "step": 168 }, { "epoch": 34.0, "grad_norm": 7.037011623382568, "learning_rate": 4.8320000000000005e-05, "loss": 1.5716, "step": 170 }, { "epoch": 34.4, "grad_norm": 9.43834400177002, "learning_rate": 4.83e-05, "loss": 1.9199, "step": 172 }, { "epoch": 34.8, "grad_norm": 7.6983208656311035, "learning_rate": 4.8280000000000005e-05, "loss": 1.5518, "step": 174 }, { "epoch": 35.2, "grad_norm": 8.454316139221191, "learning_rate": 4.826e-05, "loss": 1.8398, "step": 176 }, { "epoch": 35.6, "grad_norm": 10.421158790588379, "learning_rate": 4.824e-05, "loss": 1.6981, "step": 178 }, { "epoch": 36.0, "grad_norm": 5.223074436187744, "learning_rate": 4.822e-05, "loss": 1.857, "step": 180 }, { "epoch": 36.4, "grad_norm": 11.528409004211426, "learning_rate": 4.82e-05, "loss": 1.7822, "step": 182 }, { "epoch": 36.8, "grad_norm": 7.677820205688477, "learning_rate": 4.818e-05, "loss": 1.6692, "step": 184 }, { "epoch": 37.2, "grad_norm": 6.263685703277588, "learning_rate": 4.816e-05, "loss": 1.6247, "step": 186 }, { "epoch": 37.6, "grad_norm": 7.266116142272949, "learning_rate": 4.814e-05, "loss": 1.6027, "step": 188 }, { "epoch": 38.0, "grad_norm": 9.865347862243652, "learning_rate": 4.812000000000001e-05, "loss": 1.9097, "step": 190 }, { "epoch": 38.4, "grad_norm": 15.372934341430664, "learning_rate": 4.8100000000000004e-05, "loss": 1.6605, "step": 192 }, { "epoch": 38.8, "grad_norm": 9.946341514587402, "learning_rate": 4.808e-05, "loss": 1.911, "step": 194 }, { "epoch": 39.2, "grad_norm": 7.970291614532471, "learning_rate": 4.8060000000000004e-05, "loss": 1.8492, "step": 196 }, { "epoch": 39.6, "grad_norm": 10.375287055969238, "learning_rate": 4.804e-05, "loss": 1.6653, "step": 198 }, { "epoch": 40.0, "grad_norm": 7.815904140472412, "learning_rate": 4.8020000000000004e-05, "loss": 1.6377, "step": 200 }, { "epoch": 40.0, "eval_cer": 1.3523316062176165, "eval_loss": 3.0961663722991943, "eval_runtime": 14.2542, "eval_samples_per_second": 0.702, "eval_steps_per_second": 0.14, "step": 200 }, { "epoch": 40.4, "grad_norm": 5.183942794799805, "learning_rate": 4.8e-05, "loss": 1.5704, "step": 202 }, { "epoch": 40.8, "grad_norm": 14.287017822265625, "learning_rate": 4.7980000000000005e-05, "loss": 1.7588, "step": 204 }, { "epoch": 41.2, "grad_norm": 8.08150577545166, "learning_rate": 4.796e-05, "loss": 1.5937, "step": 206 }, { "epoch": 41.6, "grad_norm": 6.277037620544434, "learning_rate": 4.794e-05, "loss": 1.4366, "step": 208 }, { "epoch": 42.0, "grad_norm": 11.282824516296387, "learning_rate": 4.792e-05, "loss": 1.7153, "step": 210 }, { "epoch": 42.4, "grad_norm": 8.427130699157715, "learning_rate": 4.79e-05, "loss": 1.6514, "step": 212 }, { "epoch": 42.8, "grad_norm": 6.836592674255371, "learning_rate": 4.788e-05, "loss": 1.4515, "step": 214 }, { "epoch": 43.2, "grad_norm": 7.578636169433594, "learning_rate": 4.7860000000000006e-05, "loss": 1.6989, "step": 216 }, { "epoch": 43.6, "grad_norm": 10.145289421081543, "learning_rate": 4.784e-05, "loss": 1.5635, "step": 218 }, { "epoch": 44.0, "grad_norm": 8.619582176208496, "learning_rate": 4.7820000000000006e-05, "loss": 1.5683, "step": 220 }, { "epoch": 44.4, "grad_norm": 6.797476291656494, "learning_rate": 4.78e-05, "loss": 1.4495, "step": 222 }, { "epoch": 44.8, "grad_norm": 10.675323486328125, "learning_rate": 4.778e-05, "loss": 1.5185, "step": 224 }, { "epoch": 45.2, "grad_norm": 5.305678844451904, "learning_rate": 4.7760000000000004e-05, "loss": 1.4058, "step": 226 }, { "epoch": 45.6, "grad_norm": 6.72518253326416, "learning_rate": 4.774e-05, "loss": 1.5476, "step": 228 }, { "epoch": 46.0, "grad_norm": 8.866070747375488, "learning_rate": 4.7720000000000004e-05, "loss": 1.5456, "step": 230 }, { "epoch": 46.4, "grad_norm": 11.276784896850586, "learning_rate": 4.77e-05, "loss": 1.5525, "step": 232 }, { "epoch": 46.8, "grad_norm": 12.82331657409668, "learning_rate": 4.7680000000000004e-05, "loss": 1.4452, "step": 234 }, { "epoch": 47.2, "grad_norm": 6.922698497772217, "learning_rate": 4.766000000000001e-05, "loss": 1.5323, "step": 236 }, { "epoch": 47.6, "grad_norm": 11.840755462646484, "learning_rate": 4.7640000000000005e-05, "loss": 1.6155, "step": 238 }, { "epoch": 48.0, "grad_norm": 8.395027160644531, "learning_rate": 4.762e-05, "loss": 1.4552, "step": 240 }, { "epoch": 48.4, "grad_norm": 8.875897407531738, "learning_rate": 4.76e-05, "loss": 1.3785, "step": 242 }, { "epoch": 48.8, "grad_norm": 5.54985237121582, "learning_rate": 4.758e-05, "loss": 1.4475, "step": 244 }, { "epoch": 49.2, "grad_norm": 5.359694004058838, "learning_rate": 4.7560000000000005e-05, "loss": 1.5333, "step": 246 }, { "epoch": 49.6, "grad_norm": 7.369774341583252, "learning_rate": 4.754e-05, "loss": 1.5244, "step": 248 }, { "epoch": 50.0, "grad_norm": 7.076777935028076, "learning_rate": 4.7520000000000006e-05, "loss": 1.3297, "step": 250 }, { "epoch": 50.4, "grad_norm": 13.234743118286133, "learning_rate": 4.75e-05, "loss": 1.3432, "step": 252 }, { "epoch": 50.8, "grad_norm": 10.082717895507812, "learning_rate": 4.748e-05, "loss": 1.5458, "step": 254 }, { "epoch": 51.2, "grad_norm": 6.331541061401367, "learning_rate": 4.746e-05, "loss": 1.399, "step": 256 }, { "epoch": 51.6, "grad_norm": 9.98861026763916, "learning_rate": 4.744e-05, "loss": 1.5399, "step": 258 }, { "epoch": 52.0, "grad_norm": 7.388329982757568, "learning_rate": 4.742e-05, "loss": 1.3386, "step": 260 }, { "epoch": 52.4, "grad_norm": 5.0084943771362305, "learning_rate": 4.74e-05, "loss": 1.3894, "step": 262 }, { "epoch": 52.8, "grad_norm": 9.229616165161133, "learning_rate": 4.7380000000000004e-05, "loss": 1.2003, "step": 264 }, { "epoch": 53.2, "grad_norm": 7.136425495147705, "learning_rate": 4.736000000000001e-05, "loss": 1.3176, "step": 266 }, { "epoch": 53.6, "grad_norm": 8.369108200073242, "learning_rate": 4.7340000000000004e-05, "loss": 1.3253, "step": 268 }, { "epoch": 54.0, "grad_norm": 4.811428070068359, "learning_rate": 4.732e-05, "loss": 1.3407, "step": 270 }, { "epoch": 54.4, "grad_norm": 4.642618656158447, "learning_rate": 4.73e-05, "loss": 1.1019, "step": 272 }, { "epoch": 54.8, "grad_norm": 8.8253755569458, "learning_rate": 4.728e-05, "loss": 1.4383, "step": 274 }, { "epoch": 55.2, "grad_norm": 7.9131646156311035, "learning_rate": 4.7260000000000005e-05, "loss": 1.2026, "step": 276 }, { "epoch": 55.6, "grad_norm": 6.536884784698486, "learning_rate": 4.724e-05, "loss": 1.1712, "step": 278 }, { "epoch": 56.0, "grad_norm": 9.874418258666992, "learning_rate": 4.7220000000000005e-05, "loss": 1.2421, "step": 280 }, { "epoch": 56.4, "grad_norm": 10.34627914428711, "learning_rate": 4.72e-05, "loss": 1.2225, "step": 282 }, { "epoch": 56.8, "grad_norm": 7.703983306884766, "learning_rate": 4.718e-05, "loss": 1.215, "step": 284 }, { "epoch": 57.2, "grad_norm": 5.581977367401123, "learning_rate": 4.716e-05, "loss": 1.123, "step": 286 }, { "epoch": 57.6, "grad_norm": 7.364421367645264, "learning_rate": 4.714e-05, "loss": 1.0888, "step": 288 }, { "epoch": 58.0, "grad_norm": 9.032854080200195, "learning_rate": 4.712e-05, "loss": 1.1085, "step": 290 }, { "epoch": 58.4, "grad_norm": 6.511216640472412, "learning_rate": 4.71e-05, "loss": 1.1576, "step": 292 }, { "epoch": 58.8, "grad_norm": 7.004978179931641, "learning_rate": 4.708e-05, "loss": 1.2069, "step": 294 }, { "epoch": 59.2, "grad_norm": 15.359365463256836, "learning_rate": 4.706000000000001e-05, "loss": 1.1824, "step": 296 }, { "epoch": 59.6, "grad_norm": 14.307708740234375, "learning_rate": 4.7040000000000004e-05, "loss": 1.3109, "step": 298 }, { "epoch": 60.0, "grad_norm": 8.387164115905762, "learning_rate": 4.702e-05, "loss": 1.1117, "step": 300 }, { "epoch": 60.4, "grad_norm": 8.697118759155273, "learning_rate": 4.7e-05, "loss": 1.1582, "step": 302 }, { "epoch": 60.8, "grad_norm": 7.9323859214782715, "learning_rate": 4.698e-05, "loss": 1.0815, "step": 304 }, { "epoch": 61.2, "grad_norm": 4.937358379364014, "learning_rate": 4.6960000000000004e-05, "loss": 1.0168, "step": 306 }, { "epoch": 61.6, "grad_norm": 5.674488544464111, "learning_rate": 4.694e-05, "loss": 1.0296, "step": 308 }, { "epoch": 62.0, "grad_norm": 8.039836883544922, "learning_rate": 4.6920000000000005e-05, "loss": 1.1417, "step": 310 }, { "epoch": 62.4, "grad_norm": 5.33099889755249, "learning_rate": 4.69e-05, "loss": 1.076, "step": 312 }, { "epoch": 62.8, "grad_norm": 6.6786322593688965, "learning_rate": 4.688e-05, "loss": 0.9831, "step": 314 }, { "epoch": 63.2, "grad_norm": 24.683210372924805, "learning_rate": 4.686e-05, "loss": 1.2451, "step": 316 }, { "epoch": 63.6, "grad_norm": 7.963212013244629, "learning_rate": 4.684e-05, "loss": 1.2019, "step": 318 }, { "epoch": 64.0, "grad_norm": 15.025751113891602, "learning_rate": 4.682e-05, "loss": 1.2215, "step": 320 }, { "epoch": 64.4, "grad_norm": 6.122192859649658, "learning_rate": 4.6800000000000006e-05, "loss": 1.1068, "step": 322 }, { "epoch": 64.8, "grad_norm": 12.217151641845703, "learning_rate": 4.678e-05, "loss": 1.1743, "step": 324 }, { "epoch": 65.2, "grad_norm": 8.359797477722168, "learning_rate": 4.6760000000000006e-05, "loss": 1.1179, "step": 326 }, { "epoch": 65.6, "grad_norm": 6.854281902313232, "learning_rate": 4.674e-05, "loss": 1.112, "step": 328 }, { "epoch": 66.0, "grad_norm": 4.675491809844971, "learning_rate": 4.672e-05, "loss": 1.1006, "step": 330 }, { "epoch": 66.4, "grad_norm": 6.421262741088867, "learning_rate": 4.6700000000000003e-05, "loss": 1.1157, "step": 332 }, { "epoch": 66.8, "grad_norm": 5.516520977020264, "learning_rate": 4.668e-05, "loss": 0.8967, "step": 334 }, { "epoch": 67.2, "grad_norm": 7.117183685302734, "learning_rate": 4.6660000000000004e-05, "loss": 0.9784, "step": 336 }, { "epoch": 67.6, "grad_norm": 6.1140313148498535, "learning_rate": 4.664e-05, "loss": 0.9273, "step": 338 }, { "epoch": 68.0, "grad_norm": 4.55072546005249, "learning_rate": 4.6620000000000004e-05, "loss": 0.9545, "step": 340 }, { "epoch": 68.4, "grad_norm": 6.126168251037598, "learning_rate": 4.660000000000001e-05, "loss": 1.0331, "step": 342 }, { "epoch": 68.8, "grad_norm": 6.012630939483643, "learning_rate": 4.6580000000000005e-05, "loss": 0.9731, "step": 344 }, { "epoch": 69.2, "grad_norm": 5.366358757019043, "learning_rate": 4.656e-05, "loss": 0.9958, "step": 346 }, { "epoch": 69.6, "grad_norm": 3.7810704708099365, "learning_rate": 4.654e-05, "loss": 0.812, "step": 348 }, { "epoch": 70.0, "grad_norm": 4.86328649520874, "learning_rate": 4.652e-05, "loss": 0.9613, "step": 350 }, { "epoch": 70.4, "grad_norm": 7.402370929718018, "learning_rate": 4.6500000000000005e-05, "loss": 0.8087, "step": 352 }, { "epoch": 70.8, "grad_norm": 6.665915012359619, "learning_rate": 4.648e-05, "loss": 0.9995, "step": 354 }, { "epoch": 71.2, "grad_norm": 7.111016273498535, "learning_rate": 4.6460000000000006e-05, "loss": 0.9208, "step": 356 }, { "epoch": 71.6, "grad_norm": 9.72551441192627, "learning_rate": 4.644e-05, "loss": 0.8678, "step": 358 }, { "epoch": 72.0, "grad_norm": 8.900708198547363, "learning_rate": 4.642e-05, "loss": 1.035, "step": 360 }, { "epoch": 72.4, "grad_norm": 5.16693115234375, "learning_rate": 4.64e-05, "loss": 0.7226, "step": 362 }, { "epoch": 72.8, "grad_norm": 5.154110908508301, "learning_rate": 4.638e-05, "loss": 0.9605, "step": 364 }, { "epoch": 73.2, "grad_norm": 6.085078239440918, "learning_rate": 4.636e-05, "loss": 0.8533, "step": 366 }, { "epoch": 73.6, "grad_norm": 6.434233665466309, "learning_rate": 4.634e-05, "loss": 0.7296, "step": 368 }, { "epoch": 74.0, "grad_norm": 6.530635356903076, "learning_rate": 4.6320000000000004e-05, "loss": 1.0567, "step": 370 }, { "epoch": 74.4, "grad_norm": 10.707755088806152, "learning_rate": 4.630000000000001e-05, "loss": 0.8352, "step": 372 }, { "epoch": 74.8, "grad_norm": 8.428587913513184, "learning_rate": 4.6280000000000004e-05, "loss": 0.9734, "step": 374 }, { "epoch": 75.2, "grad_norm": 10.632957458496094, "learning_rate": 4.626e-05, "loss": 1.042, "step": 376 }, { "epoch": 75.6, "grad_norm": 7.95598030090332, "learning_rate": 4.624e-05, "loss": 0.8348, "step": 378 }, { "epoch": 76.0, "grad_norm": 6.667850494384766, "learning_rate": 4.622e-05, "loss": 0.9917, "step": 380 }, { "epoch": 76.4, "grad_norm": 5.5978193283081055, "learning_rate": 4.6200000000000005e-05, "loss": 0.8493, "step": 382 }, { "epoch": 76.8, "grad_norm": 6.053537368774414, "learning_rate": 4.618e-05, "loss": 0.891, "step": 384 }, { "epoch": 77.2, "grad_norm": 6.258749961853027, "learning_rate": 4.6160000000000005e-05, "loss": 0.9184, "step": 386 }, { "epoch": 77.6, "grad_norm": 10.063704490661621, "learning_rate": 4.614e-05, "loss": 0.9033, "step": 388 }, { "epoch": 78.0, "grad_norm": 7.901361465454102, "learning_rate": 4.612e-05, "loss": 0.9363, "step": 390 }, { "epoch": 78.4, "grad_norm": 6.6293840408325195, "learning_rate": 4.61e-05, "loss": 0.7804, "step": 392 }, { "epoch": 78.8, "grad_norm": 6.199061393737793, "learning_rate": 4.608e-05, "loss": 0.8607, "step": 394 }, { "epoch": 79.2, "grad_norm": 6.37863826751709, "learning_rate": 4.606e-05, "loss": 0.9559, "step": 396 }, { "epoch": 79.6, "grad_norm": 4.380728244781494, "learning_rate": 4.604e-05, "loss": 0.8286, "step": 398 }, { "epoch": 80.0, "grad_norm": 11.867618560791016, "learning_rate": 4.602e-05, "loss": 0.7885, "step": 400 }, { "epoch": 80.0, "eval_cer": 0.9844559585492227, "eval_loss": 3.8706846237182617, "eval_runtime": 10.1422, "eval_samples_per_second": 0.986, "eval_steps_per_second": 0.197, "step": 400 }, { "epoch": 80.4, "grad_norm": 5.309966564178467, "learning_rate": 4.600000000000001e-05, "loss": 0.8136, "step": 402 }, { "epoch": 80.8, "grad_norm": 6.934682846069336, "learning_rate": 4.5980000000000004e-05, "loss": 0.7915, "step": 404 }, { "epoch": 81.2, "grad_norm": 5.952892303466797, "learning_rate": 4.596e-05, "loss": 0.8422, "step": 406 }, { "epoch": 81.6, "grad_norm": 6.603396415710449, "learning_rate": 4.594e-05, "loss": 0.7312, "step": 408 }, { "epoch": 82.0, "grad_norm": 7.942108154296875, "learning_rate": 4.592e-05, "loss": 0.8218, "step": 410 }, { "epoch": 82.4, "grad_norm": 11.193021774291992, "learning_rate": 4.5900000000000004e-05, "loss": 0.6923, "step": 412 }, { "epoch": 82.8, "grad_norm": 10.165032386779785, "learning_rate": 4.588e-05, "loss": 0.9594, "step": 414 }, { "epoch": 83.2, "grad_norm": 7.621891021728516, "learning_rate": 4.5860000000000005e-05, "loss": 0.8482, "step": 416 }, { "epoch": 83.6, "grad_norm": 11.457491874694824, "learning_rate": 4.584e-05, "loss": 0.7546, "step": 418 }, { "epoch": 84.0, "grad_norm": 10.69454288482666, "learning_rate": 4.5820000000000005e-05, "loss": 0.8709, "step": 420 }, { "epoch": 84.4, "grad_norm": 5.449646472930908, "learning_rate": 4.58e-05, "loss": 0.6618, "step": 422 }, { "epoch": 84.8, "grad_norm": 5.637418270111084, "learning_rate": 4.578e-05, "loss": 0.883, "step": 424 }, { "epoch": 85.2, "grad_norm": 21.261632919311523, "learning_rate": 4.576e-05, "loss": 0.8184, "step": 426 }, { "epoch": 85.6, "grad_norm": 7.270489692687988, "learning_rate": 4.574e-05, "loss": 0.7435, "step": 428 }, { "epoch": 86.0, "grad_norm": 12.779277801513672, "learning_rate": 4.572e-05, "loss": 0.9857, "step": 430 }, { "epoch": 86.4, "grad_norm": 9.577957153320312, "learning_rate": 4.5700000000000006e-05, "loss": 0.8363, "step": 432 }, { "epoch": 86.8, "grad_norm": 12.401692390441895, "learning_rate": 4.568e-05, "loss": 1.0277, "step": 434 }, { "epoch": 87.2, "grad_norm": 6.231203556060791, "learning_rate": 4.566e-05, "loss": 0.9051, "step": 436 }, { "epoch": 87.6, "grad_norm": 10.290390968322754, "learning_rate": 4.564e-05, "loss": 0.8766, "step": 438 }, { "epoch": 88.0, "grad_norm": 7.919069766998291, "learning_rate": 4.562e-05, "loss": 0.8685, "step": 440 }, { "epoch": 88.4, "grad_norm": 8.157342910766602, "learning_rate": 4.5600000000000004e-05, "loss": 0.8485, "step": 442 }, { "epoch": 88.8, "grad_norm": 7.64354133605957, "learning_rate": 4.558e-05, "loss": 0.8091, "step": 444 }, { "epoch": 89.2, "grad_norm": 8.296390533447266, "learning_rate": 4.5560000000000004e-05, "loss": 0.9552, "step": 446 }, { "epoch": 89.6, "grad_norm": 7.538454055786133, "learning_rate": 4.554000000000001e-05, "loss": 0.7956, "step": 448 }, { "epoch": 90.0, "grad_norm": 6.705000877380371, "learning_rate": 4.5520000000000005e-05, "loss": 0.7903, "step": 450 }, { "epoch": 90.4, "grad_norm": 5.8832573890686035, "learning_rate": 4.55e-05, "loss": 0.8337, "step": 452 }, { "epoch": 90.8, "grad_norm": 7.217807769775391, "learning_rate": 4.548e-05, "loss": 0.8209, "step": 454 }, { "epoch": 91.2, "grad_norm": 7.400946617126465, "learning_rate": 4.546e-05, "loss": 0.8294, "step": 456 }, { "epoch": 91.6, "grad_norm": 35.36799240112305, "learning_rate": 4.5440000000000005e-05, "loss": 1.0173, "step": 458 }, { "epoch": 92.0, "grad_norm": 13.412359237670898, "learning_rate": 4.542e-05, "loss": 0.8212, "step": 460 }, { "epoch": 92.4, "grad_norm": 7.765072822570801, "learning_rate": 4.5400000000000006e-05, "loss": 0.72, "step": 462 }, { "epoch": 92.8, "grad_norm": 4.0667643547058105, "learning_rate": 4.538e-05, "loss": 0.7562, "step": 464 }, { "epoch": 93.2, "grad_norm": 6.551666259765625, "learning_rate": 4.536e-05, "loss": 0.8337, "step": 466 }, { "epoch": 93.6, "grad_norm": 6.05645751953125, "learning_rate": 4.534e-05, "loss": 0.7663, "step": 468 }, { "epoch": 94.0, "grad_norm": 5.71702766418457, "learning_rate": 4.532e-05, "loss": 0.791, "step": 470 }, { "epoch": 94.4, "grad_norm": 4.935368061065674, "learning_rate": 4.53e-05, "loss": 0.6659, "step": 472 }, { "epoch": 94.8, "grad_norm": 6.581076145172119, "learning_rate": 4.528e-05, "loss": 0.7337, "step": 474 }, { "epoch": 95.2, "grad_norm": 5.02113151550293, "learning_rate": 4.5260000000000004e-05, "loss": 0.6605, "step": 476 }, { "epoch": 95.6, "grad_norm": 6.294024467468262, "learning_rate": 4.524000000000001e-05, "loss": 0.8004, "step": 478 }, { "epoch": 96.0, "grad_norm": 10.649039268493652, "learning_rate": 4.5220000000000004e-05, "loss": 0.638, "step": 480 }, { "epoch": 96.4, "grad_norm": 6.017673492431641, "learning_rate": 4.52e-05, "loss": 0.7136, "step": 482 }, { "epoch": 96.8, "grad_norm": 4.729225158691406, "learning_rate": 4.518e-05, "loss": 0.6726, "step": 484 }, { "epoch": 97.2, "grad_norm": 4.915314197540283, "learning_rate": 4.516e-05, "loss": 0.7259, "step": 486 }, { "epoch": 97.6, "grad_norm": 7.431918621063232, "learning_rate": 4.5140000000000005e-05, "loss": 0.5909, "step": 488 }, { "epoch": 98.0, "grad_norm": 62.33146667480469, "learning_rate": 4.512e-05, "loss": 0.6472, "step": 490 }, { "epoch": 98.4, "grad_norm": 4.831386089324951, "learning_rate": 4.5100000000000005e-05, "loss": 0.6815, "step": 492 }, { "epoch": 98.8, "grad_norm": 10.517301559448242, "learning_rate": 4.508e-05, "loss": 0.6494, "step": 494 }, { "epoch": 99.2, "grad_norm": 4.930273056030273, "learning_rate": 4.506e-05, "loss": 0.4983, "step": 496 }, { "epoch": 99.6, "grad_norm": 4.601551532745361, "learning_rate": 4.504e-05, "loss": 0.683, "step": 498 }, { "epoch": 100.0, "grad_norm": 7.453440189361572, "learning_rate": 4.502e-05, "loss": 0.6366, "step": 500 }, { "epoch": 100.4, "grad_norm": 4.724945068359375, "learning_rate": 4.5e-05, "loss": 0.64, "step": 502 }, { "epoch": 100.8, "grad_norm": 3.9395129680633545, "learning_rate": 4.498e-05, "loss": 0.5618, "step": 504 }, { "epoch": 101.2, "grad_norm": 6.729037284851074, "learning_rate": 4.496e-05, "loss": 0.5693, "step": 506 }, { "epoch": 101.6, "grad_norm": 5.272979736328125, "learning_rate": 4.494000000000001e-05, "loss": 0.63, "step": 508 }, { "epoch": 102.0, "grad_norm": 8.960346221923828, "learning_rate": 4.4920000000000004e-05, "loss": 0.6785, "step": 510 }, { "epoch": 102.4, "grad_norm": 8.266894340515137, "learning_rate": 4.49e-05, "loss": 0.5824, "step": 512 }, { "epoch": 102.8, "grad_norm": 5.965224742889404, "learning_rate": 4.488e-05, "loss": 0.5581, "step": 514 }, { "epoch": 103.2, "grad_norm": 7.83219051361084, "learning_rate": 4.486e-05, "loss": 0.7593, "step": 516 }, { "epoch": 103.6, "grad_norm": 3.9604523181915283, "learning_rate": 4.4840000000000004e-05, "loss": 0.5372, "step": 518 }, { "epoch": 104.0, "grad_norm": 5.798244476318359, "learning_rate": 4.482e-05, "loss": 0.6626, "step": 520 }, { "epoch": 104.4, "grad_norm": 6.5943284034729, "learning_rate": 4.4800000000000005e-05, "loss": 0.5507, "step": 522 }, { "epoch": 104.8, "grad_norm": 10.946722030639648, "learning_rate": 4.478e-05, "loss": 0.7684, "step": 524 }, { "epoch": 105.2, "grad_norm": 8.626599311828613, "learning_rate": 4.4760000000000005e-05, "loss": 0.6427, "step": 526 }, { "epoch": 105.6, "grad_norm": 3.8266408443450928, "learning_rate": 4.474e-05, "loss": 0.5768, "step": 528 }, { "epoch": 106.0, "grad_norm": 6.957334995269775, "learning_rate": 4.472e-05, "loss": 0.6519, "step": 530 }, { "epoch": 106.4, "grad_norm": 35.23234176635742, "learning_rate": 4.47e-05, "loss": 0.6476, "step": 532 }, { "epoch": 106.8, "grad_norm": 4.853867530822754, "learning_rate": 4.468e-05, "loss": 0.6786, "step": 534 }, { "epoch": 107.2, "grad_norm": 4.731429576873779, "learning_rate": 4.466e-05, "loss": 0.5365, "step": 536 }, { "epoch": 107.6, "grad_norm": 5.534473419189453, "learning_rate": 4.4640000000000006e-05, "loss": 0.8592, "step": 538 }, { "epoch": 108.0, "grad_norm": 7.266269207000732, "learning_rate": 4.462e-05, "loss": 0.603, "step": 540 }, { "epoch": 108.4, "grad_norm": 5.702962398529053, "learning_rate": 4.46e-05, "loss": 0.6186, "step": 542 }, { "epoch": 108.8, "grad_norm": 8.743338584899902, "learning_rate": 4.458e-05, "loss": 0.6254, "step": 544 }, { "epoch": 109.2, "grad_norm": 6.263076305389404, "learning_rate": 4.456e-05, "loss": 0.6659, "step": 546 }, { "epoch": 109.6, "grad_norm": 5.996551990509033, "learning_rate": 4.4540000000000004e-05, "loss": 0.5969, "step": 548 }, { "epoch": 110.0, "grad_norm": 10.580172538757324, "learning_rate": 4.452e-05, "loss": 0.5821, "step": 550 }, { "epoch": 110.4, "grad_norm": 27.180540084838867, "learning_rate": 4.4500000000000004e-05, "loss": 0.6207, "step": 552 }, { "epoch": 110.8, "grad_norm": 7.355681419372559, "learning_rate": 4.448e-05, "loss": 0.6267, "step": 554 }, { "epoch": 111.2, "grad_norm": 6.613978862762451, "learning_rate": 4.4460000000000005e-05, "loss": 0.6288, "step": 556 }, { "epoch": 111.6, "grad_norm": 7.290516376495361, "learning_rate": 4.444e-05, "loss": 0.5438, "step": 558 }, { "epoch": 112.0, "grad_norm": 9.501524925231934, "learning_rate": 4.442e-05, "loss": 0.7205, "step": 560 }, { "epoch": 112.4, "grad_norm": 10.028887748718262, "learning_rate": 4.44e-05, "loss": 0.5184, "step": 562 }, { "epoch": 112.8, "grad_norm": 8.010947227478027, "learning_rate": 4.438e-05, "loss": 0.7519, "step": 564 }, { "epoch": 113.2, "grad_norm": 6.318701267242432, "learning_rate": 4.436e-05, "loss": 0.6628, "step": 566 }, { "epoch": 113.6, "grad_norm": 5.48253059387207, "learning_rate": 4.4340000000000006e-05, "loss": 0.4274, "step": 568 }, { "epoch": 114.0, "grad_norm": 4.281514644622803, "learning_rate": 4.432e-05, "loss": 0.5881, "step": 570 }, { "epoch": 114.4, "grad_norm": 5.825370788574219, "learning_rate": 4.43e-05, "loss": 0.623, "step": 572 }, { "epoch": 114.8, "grad_norm": 4.9794464111328125, "learning_rate": 4.428e-05, "loss": 0.4493, "step": 574 }, { "epoch": 115.2, "grad_norm": 4.465465545654297, "learning_rate": 4.426e-05, "loss": 0.5063, "step": 576 }, { "epoch": 115.6, "grad_norm": 4.500620365142822, "learning_rate": 4.424e-05, "loss": 0.4278, "step": 578 }, { "epoch": 116.0, "grad_norm": 4.524998664855957, "learning_rate": 4.422e-05, "loss": 0.495, "step": 580 }, { "epoch": 116.4, "grad_norm": 5.147426128387451, "learning_rate": 4.4200000000000004e-05, "loss": 0.5836, "step": 582 }, { "epoch": 116.8, "grad_norm": 8.192828178405762, "learning_rate": 4.418000000000001e-05, "loss": 0.5445, "step": 584 }, { "epoch": 117.2, "grad_norm": 10.884695053100586, "learning_rate": 4.4160000000000004e-05, "loss": 0.521, "step": 586 }, { "epoch": 117.6, "grad_norm": 20.870441436767578, "learning_rate": 4.414e-05, "loss": 0.5945, "step": 588 }, { "epoch": 118.0, "grad_norm": 13.276383399963379, "learning_rate": 4.412e-05, "loss": 0.6055, "step": 590 }, { "epoch": 118.4, "grad_norm": 8.099759101867676, "learning_rate": 4.41e-05, "loss": 0.6538, "step": 592 }, { "epoch": 118.8, "grad_norm": 7.6346845626831055, "learning_rate": 4.4080000000000005e-05, "loss": 0.5938, "step": 594 }, { "epoch": 119.2, "grad_norm": 11.216561317443848, "learning_rate": 4.406e-05, "loss": 0.5067, "step": 596 }, { "epoch": 119.6, "grad_norm": 5.136045455932617, "learning_rate": 4.4040000000000005e-05, "loss": 0.6133, "step": 598 }, { "epoch": 120.0, "grad_norm": 5.624964714050293, "learning_rate": 4.402e-05, "loss": 0.5971, "step": 600 }, { "epoch": 120.0, "eval_cer": 1.6373056994818653, "eval_loss": 4.126744270324707, "eval_runtime": 12.1196, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.165, "step": 600 }, { "epoch": 120.4, "grad_norm": 3.5531275272369385, "learning_rate": 4.4000000000000006e-05, "loss": 0.4577, "step": 602 }, { "epoch": 120.8, "grad_norm": 3.850210428237915, "learning_rate": 4.398e-05, "loss": 0.4971, "step": 604 }, { "epoch": 121.2, "grad_norm": 5.581485748291016, "learning_rate": 4.396e-05, "loss": 0.474, "step": 606 }, { "epoch": 121.6, "grad_norm": 9.454375267028809, "learning_rate": 4.394e-05, "loss": 0.5761, "step": 608 }, { "epoch": 122.0, "grad_norm": 11.871817588806152, "learning_rate": 4.392e-05, "loss": 0.5484, "step": 610 }, { "epoch": 122.4, "grad_norm": 7.668302536010742, "learning_rate": 4.39e-05, "loss": 0.4879, "step": 612 }, { "epoch": 122.8, "grad_norm": 4.763530731201172, "learning_rate": 4.388000000000001e-05, "loss": 0.5895, "step": 614 }, { "epoch": 123.2, "grad_norm": 5.7320475578308105, "learning_rate": 4.3860000000000004e-05, "loss": 0.4605, "step": 616 }, { "epoch": 123.6, "grad_norm": 9.247602462768555, "learning_rate": 4.384e-05, "loss": 0.529, "step": 618 }, { "epoch": 124.0, "grad_norm": 12.229979515075684, "learning_rate": 4.382e-05, "loss": 0.5058, "step": 620 }, { "epoch": 124.4, "grad_norm": 6.2679123878479, "learning_rate": 4.38e-05, "loss": 0.5174, "step": 622 }, { "epoch": 124.8, "grad_norm": 9.075543403625488, "learning_rate": 4.3780000000000004e-05, "loss": 0.4529, "step": 624 }, { "epoch": 125.2, "grad_norm": 6.712457656860352, "learning_rate": 4.376e-05, "loss": 0.5557, "step": 626 }, { "epoch": 125.6, "grad_norm": 5.796508312225342, "learning_rate": 4.3740000000000005e-05, "loss": 0.5687, "step": 628 }, { "epoch": 126.0, "grad_norm": 6.2599263191223145, "learning_rate": 4.372e-05, "loss": 0.6222, "step": 630 }, { "epoch": 126.4, "grad_norm": 3.747432231903076, "learning_rate": 4.3700000000000005e-05, "loss": 0.4284, "step": 632 }, { "epoch": 126.8, "grad_norm": 4.780229568481445, "learning_rate": 4.368e-05, "loss": 0.3579, "step": 634 }, { "epoch": 127.2, "grad_norm": 3.9507830142974854, "learning_rate": 4.366e-05, "loss": 0.6348, "step": 636 }, { "epoch": 127.6, "grad_norm": 5.6048455238342285, "learning_rate": 4.364e-05, "loss": 0.4674, "step": 638 }, { "epoch": 128.0, "grad_norm": 5.384023189544678, "learning_rate": 4.362e-05, "loss": 0.5081, "step": 640 }, { "epoch": 128.4, "grad_norm": 7.066680908203125, "learning_rate": 4.36e-05, "loss": 0.5708, "step": 642 }, { "epoch": 128.8, "grad_norm": 8.362913131713867, "learning_rate": 4.3580000000000006e-05, "loss": 0.6078, "step": 644 }, { "epoch": 129.2, "grad_norm": 6.050140857696533, "learning_rate": 4.356e-05, "loss": 0.257, "step": 646 }, { "epoch": 129.6, "grad_norm": 7.49799108505249, "learning_rate": 4.354e-05, "loss": 0.5757, "step": 648 }, { "epoch": 130.0, "grad_norm": 7.89163875579834, "learning_rate": 4.352e-05, "loss": 0.6344, "step": 650 }, { "epoch": 130.4, "grad_norm": 6.083957195281982, "learning_rate": 4.35e-05, "loss": 0.401, "step": 652 }, { "epoch": 130.8, "grad_norm": 9.174875259399414, "learning_rate": 4.3480000000000004e-05, "loss": 0.5934, "step": 654 }, { "epoch": 131.2, "grad_norm": 5.738006591796875, "learning_rate": 4.346e-05, "loss": 0.4404, "step": 656 }, { "epoch": 131.6, "grad_norm": 6.542445659637451, "learning_rate": 4.3440000000000004e-05, "loss": 0.4314, "step": 658 }, { "epoch": 132.0, "grad_norm": 5.116391658782959, "learning_rate": 4.342e-05, "loss": 0.3826, "step": 660 }, { "epoch": 132.4, "grad_norm": 6.853465557098389, "learning_rate": 4.3400000000000005e-05, "loss": 0.3808, "step": 662 }, { "epoch": 132.8, "grad_norm": 12.942069053649902, "learning_rate": 4.338e-05, "loss": 0.6073, "step": 664 }, { "epoch": 133.2, "grad_norm": 7.706568241119385, "learning_rate": 4.336e-05, "loss": 0.5283, "step": 666 }, { "epoch": 133.6, "grad_norm": 6.301263332366943, "learning_rate": 4.334e-05, "loss": 0.4935, "step": 668 }, { "epoch": 134.0, "grad_norm": 7.179783344268799, "learning_rate": 4.332e-05, "loss": 0.4651, "step": 670 }, { "epoch": 134.4, "grad_norm": 6.358160018920898, "learning_rate": 4.33e-05, "loss": 0.5385, "step": 672 }, { "epoch": 134.8, "grad_norm": 4.190439701080322, "learning_rate": 4.3280000000000006e-05, "loss": 0.384, "step": 674 }, { "epoch": 135.2, "grad_norm": 4.561495304107666, "learning_rate": 4.326e-05, "loss": 0.4414, "step": 676 }, { "epoch": 135.6, "grad_norm": 8.13863754272461, "learning_rate": 4.324e-05, "loss": 0.4714, "step": 678 }, { "epoch": 136.0, "grad_norm": 6.7273149490356445, "learning_rate": 4.3219999999999996e-05, "loss": 0.5042, "step": 680 }, { "epoch": 136.4, "grad_norm": 8.344538688659668, "learning_rate": 4.32e-05, "loss": 0.4009, "step": 682 }, { "epoch": 136.8, "grad_norm": 6.040705680847168, "learning_rate": 4.318e-05, "loss": 0.5367, "step": 684 }, { "epoch": 137.2, "grad_norm": 17.42331314086914, "learning_rate": 4.316e-05, "loss": 0.6962, "step": 686 }, { "epoch": 137.6, "grad_norm": 8.37028694152832, "learning_rate": 4.3140000000000004e-05, "loss": 0.5799, "step": 688 }, { "epoch": 138.0, "grad_norm": 8.41966438293457, "learning_rate": 4.312000000000001e-05, "loss": 0.5009, "step": 690 }, { "epoch": 138.4, "grad_norm": 6.029211521148682, "learning_rate": 4.3100000000000004e-05, "loss": 0.7605, "step": 692 }, { "epoch": 138.8, "grad_norm": 5.410158634185791, "learning_rate": 4.308e-05, "loss": 0.3634, "step": 694 }, { "epoch": 139.2, "grad_norm": 4.348201751708984, "learning_rate": 4.306e-05, "loss": 0.5489, "step": 696 }, { "epoch": 139.6, "grad_norm": 3.9824752807617188, "learning_rate": 4.304e-05, "loss": 0.4561, "step": 698 }, { "epoch": 140.0, "grad_norm": 8.944303512573242, "learning_rate": 4.3020000000000005e-05, "loss": 0.4707, "step": 700 }, { "epoch": 140.4, "grad_norm": 5.632055282592773, "learning_rate": 4.3e-05, "loss": 0.5999, "step": 702 }, { "epoch": 140.8, "grad_norm": 14.070127487182617, "learning_rate": 4.2980000000000005e-05, "loss": 0.4984, "step": 704 }, { "epoch": 141.2, "grad_norm": 16.78105926513672, "learning_rate": 4.296e-05, "loss": 0.9005, "step": 706 }, { "epoch": 141.6, "grad_norm": 9.091694831848145, "learning_rate": 4.2940000000000006e-05, "loss": 0.9385, "step": 708 }, { "epoch": 142.0, "grad_norm": 14.769110679626465, "learning_rate": 4.292e-05, "loss": 0.9469, "step": 710 }, { "epoch": 142.4, "grad_norm": 17.92959213256836, "learning_rate": 4.29e-05, "loss": 1.4199, "step": 712 }, { "epoch": 142.8, "grad_norm": 9.447157859802246, "learning_rate": 4.288e-05, "loss": 0.769, "step": 714 }, { "epoch": 143.2, "grad_norm": 3.9615840911865234, "learning_rate": 4.286e-05, "loss": 0.5201, "step": 716 }, { "epoch": 143.6, "grad_norm": 5.852191925048828, "learning_rate": 4.284e-05, "loss": 0.5292, "step": 718 }, { "epoch": 144.0, "grad_norm": 5.750957012176514, "learning_rate": 4.282000000000001e-05, "loss": 0.5614, "step": 720 }, { "epoch": 144.4, "grad_norm": 15.76643180847168, "learning_rate": 4.2800000000000004e-05, "loss": 0.6244, "step": 722 }, { "epoch": 144.8, "grad_norm": 6.512340545654297, "learning_rate": 4.278e-05, "loss": 0.6127, "step": 724 }, { "epoch": 145.2, "grad_norm": 4.311427116394043, "learning_rate": 4.276e-05, "loss": 0.4129, "step": 726 }, { "epoch": 145.6, "grad_norm": 4.978346347808838, "learning_rate": 4.274e-05, "loss": 0.5292, "step": 728 }, { "epoch": 146.0, "grad_norm": 14.831403732299805, "learning_rate": 4.2720000000000004e-05, "loss": 0.5616, "step": 730 }, { "epoch": 146.4, "grad_norm": 5.795246124267578, "learning_rate": 4.27e-05, "loss": 0.4344, "step": 732 }, { "epoch": 146.8, "grad_norm": 5.248664855957031, "learning_rate": 4.2680000000000005e-05, "loss": 0.3454, "step": 734 }, { "epoch": 147.2, "grad_norm": 15.053390502929688, "learning_rate": 4.266e-05, "loss": 0.7484, "step": 736 }, { "epoch": 147.6, "grad_norm": 5.366586208343506, "learning_rate": 4.2640000000000005e-05, "loss": 0.4029, "step": 738 }, { "epoch": 148.0, "grad_norm": 6.9862284660339355, "learning_rate": 4.262e-05, "loss": 0.6294, "step": 740 }, { "epoch": 148.4, "grad_norm": 7.036198139190674, "learning_rate": 4.26e-05, "loss": 0.5422, "step": 742 }, { "epoch": 148.8, "grad_norm": 3.3616418838500977, "learning_rate": 4.258e-05, "loss": 0.4209, "step": 744 }, { "epoch": 149.2, "grad_norm": 4.155647277832031, "learning_rate": 4.256e-05, "loss": 0.4292, "step": 746 }, { "epoch": 149.6, "grad_norm": 8.1836519241333, "learning_rate": 4.254e-05, "loss": 0.4118, "step": 748 }, { "epoch": 150.0, "grad_norm": 4.456229209899902, "learning_rate": 4.2520000000000006e-05, "loss": 0.3975, "step": 750 }, { "epoch": 150.4, "grad_norm": 4.303975582122803, "learning_rate": 4.25e-05, "loss": 0.4041, "step": 752 }, { "epoch": 150.8, "grad_norm": 6.465883731842041, "learning_rate": 4.248e-05, "loss": 0.4129, "step": 754 }, { "epoch": 151.2, "grad_norm": 7.509527683258057, "learning_rate": 4.246e-05, "loss": 0.467, "step": 756 }, { "epoch": 151.6, "grad_norm": 4.677668571472168, "learning_rate": 4.244e-05, "loss": 0.4123, "step": 758 }, { "epoch": 152.0, "grad_norm": 5.819883823394775, "learning_rate": 4.2420000000000004e-05, "loss": 0.3235, "step": 760 }, { "epoch": 152.4, "grad_norm": 4.089188575744629, "learning_rate": 4.24e-05, "loss": 0.3023, "step": 762 }, { "epoch": 152.8, "grad_norm": 54.53089904785156, "learning_rate": 4.2380000000000004e-05, "loss": 0.4599, "step": 764 }, { "epoch": 153.2, "grad_norm": 4.931365013122559, "learning_rate": 4.236e-05, "loss": 0.3929, "step": 766 }, { "epoch": 153.6, "grad_norm": 6.279163360595703, "learning_rate": 4.2340000000000005e-05, "loss": 0.4158, "step": 768 }, { "epoch": 154.0, "grad_norm": 9.854475021362305, "learning_rate": 4.232e-05, "loss": 0.4534, "step": 770 }, { "epoch": 154.4, "grad_norm": 16.251379013061523, "learning_rate": 4.23e-05, "loss": 0.5205, "step": 772 }, { "epoch": 154.8, "grad_norm": 22.267152786254883, "learning_rate": 4.228e-05, "loss": 0.7859, "step": 774 }, { "epoch": 155.2, "grad_norm": 38.15674591064453, "learning_rate": 4.226e-05, "loss": 0.6345, "step": 776 }, { "epoch": 155.6, "grad_norm": 5.160548210144043, "learning_rate": 4.224e-05, "loss": 0.4549, "step": 778 }, { "epoch": 156.0, "grad_norm": 14.376232147216797, "learning_rate": 4.2220000000000006e-05, "loss": 0.5506, "step": 780 }, { "epoch": 156.4, "grad_norm": 5.283304214477539, "learning_rate": 4.22e-05, "loss": 0.4041, "step": 782 }, { "epoch": 156.8, "grad_norm": 5.564827919006348, "learning_rate": 4.2180000000000006e-05, "loss": 0.4409, "step": 784 }, { "epoch": 157.2, "grad_norm": 3.5995585918426514, "learning_rate": 4.2159999999999996e-05, "loss": 0.3096, "step": 786 }, { "epoch": 157.6, "grad_norm": 5.079690933227539, "learning_rate": 4.214e-05, "loss": 0.4685, "step": 788 }, { "epoch": 158.0, "grad_norm": 4.095869064331055, "learning_rate": 4.212e-05, "loss": 0.3982, "step": 790 }, { "epoch": 158.4, "grad_norm": 11.241134643554688, "learning_rate": 4.21e-05, "loss": 0.5044, "step": 792 }, { "epoch": 158.8, "grad_norm": 5.442983627319336, "learning_rate": 4.2080000000000004e-05, "loss": 0.4976, "step": 794 }, { "epoch": 159.2, "grad_norm": 4.381131172180176, "learning_rate": 4.206e-05, "loss": 0.2917, "step": 796 }, { "epoch": 159.6, "grad_norm": 8.443836212158203, "learning_rate": 4.2040000000000004e-05, "loss": 0.4055, "step": 798 }, { "epoch": 160.0, "grad_norm": 5.472066402435303, "learning_rate": 4.202e-05, "loss": 0.3386, "step": 800 }, { "epoch": 160.0, "eval_cer": 0.8238341968911918, "eval_loss": 5.2229905128479, "eval_runtime": 10.8153, "eval_samples_per_second": 0.925, "eval_steps_per_second": 0.185, "step": 800 }, { "epoch": 160.4, "grad_norm": 7.8397321701049805, "learning_rate": 4.2e-05, "loss": 0.5112, "step": 802 }, { "epoch": 160.8, "grad_norm": 6.320011138916016, "learning_rate": 4.198e-05, "loss": 0.3019, "step": 804 }, { "epoch": 161.2, "grad_norm": 37.264060974121094, "learning_rate": 4.196e-05, "loss": 0.6536, "step": 806 }, { "epoch": 161.6, "grad_norm": 3.84326171875, "learning_rate": 4.194e-05, "loss": 0.3039, "step": 808 }, { "epoch": 162.0, "grad_norm": 7.04636812210083, "learning_rate": 4.1920000000000005e-05, "loss": 0.5139, "step": 810 }, { "epoch": 162.4, "grad_norm": 6.51361083984375, "learning_rate": 4.19e-05, "loss": 0.444, "step": 812 }, { "epoch": 162.8, "grad_norm": 5.69830322265625, "learning_rate": 4.1880000000000006e-05, "loss": 0.2855, "step": 814 }, { "epoch": 163.2, "grad_norm": 3.7491302490234375, "learning_rate": 4.186e-05, "loss": 0.4162, "step": 816 }, { "epoch": 163.6, "grad_norm": 3.095339059829712, "learning_rate": 4.184e-05, "loss": 0.3416, "step": 818 }, { "epoch": 164.0, "grad_norm": 5.921448707580566, "learning_rate": 4.182e-05, "loss": 0.2932, "step": 820 }, { "epoch": 164.4, "grad_norm": 6.84466552734375, "learning_rate": 4.18e-05, "loss": 0.3855, "step": 822 }, { "epoch": 164.8, "grad_norm": 5.921712875366211, "learning_rate": 4.178e-05, "loss": 0.5926, "step": 824 }, { "epoch": 165.2, "grad_norm": 5.396553039550781, "learning_rate": 4.176000000000001e-05, "loss": 0.3847, "step": 826 }, { "epoch": 165.6, "grad_norm": 8.610844612121582, "learning_rate": 4.1740000000000004e-05, "loss": 0.5524, "step": 828 }, { "epoch": 166.0, "grad_norm": 8.62914752960205, "learning_rate": 4.172e-05, "loss": 0.538, "step": 830 }, { "epoch": 166.4, "grad_norm": 5.01972770690918, "learning_rate": 4.1710000000000006e-05, "loss": 0.3925, "step": 832 }, { "epoch": 166.8, "grad_norm": 10.730269432067871, "learning_rate": 4.169e-05, "loss": 0.4757, "step": 834 }, { "epoch": 167.2, "grad_norm": 6.459101676940918, "learning_rate": 4.1670000000000006e-05, "loss": 0.4385, "step": 836 }, { "epoch": 167.6, "grad_norm": 7.109572410583496, "learning_rate": 4.165e-05, "loss": 0.476, "step": 838 }, { "epoch": 168.0, "grad_norm": 10.757712364196777, "learning_rate": 4.163e-05, "loss": 0.4992, "step": 840 }, { "epoch": 168.4, "grad_norm": 7.3937458992004395, "learning_rate": 4.161e-05, "loss": 0.5778, "step": 842 }, { "epoch": 168.8, "grad_norm": 6.3021440505981445, "learning_rate": 4.159e-05, "loss": 0.4303, "step": 844 }, { "epoch": 169.2, "grad_norm": 3.6656627655029297, "learning_rate": 4.1570000000000003e-05, "loss": 0.3308, "step": 846 }, { "epoch": 169.6, "grad_norm": 2.997938632965088, "learning_rate": 4.155e-05, "loss": 0.2596, "step": 848 }, { "epoch": 170.0, "grad_norm": 6.958062648773193, "learning_rate": 4.1530000000000004e-05, "loss": 0.4394, "step": 850 }, { "epoch": 170.4, "grad_norm": 6.756046772003174, "learning_rate": 4.151000000000001e-05, "loss": 0.3677, "step": 852 }, { "epoch": 170.8, "grad_norm": 3.2689120769500732, "learning_rate": 4.1490000000000004e-05, "loss": 0.2873, "step": 854 }, { "epoch": 171.2, "grad_norm": 5.314024448394775, "learning_rate": 4.147e-05, "loss": 0.3909, "step": 856 }, { "epoch": 171.6, "grad_norm": 3.7270002365112305, "learning_rate": 4.145e-05, "loss": 0.3909, "step": 858 }, { "epoch": 172.0, "grad_norm": 6.9819512367248535, "learning_rate": 4.143e-05, "loss": 0.2846, "step": 860 }, { "epoch": 172.4, "grad_norm": 2.962066411972046, "learning_rate": 4.1410000000000005e-05, "loss": 0.2619, "step": 862 }, { "epoch": 172.8, "grad_norm": 3.569014072418213, "learning_rate": 4.139e-05, "loss": 0.2781, "step": 864 }, { "epoch": 173.2, "grad_norm": 3.527249574661255, "learning_rate": 4.1370000000000005e-05, "loss": 0.3842, "step": 866 }, { "epoch": 173.6, "grad_norm": 4.2352399826049805, "learning_rate": 4.135e-05, "loss": 0.335, "step": 868 }, { "epoch": 174.0, "grad_norm": 6.447229385375977, "learning_rate": 4.133e-05, "loss": 0.3222, "step": 870 }, { "epoch": 174.4, "grad_norm": 11.106180191040039, "learning_rate": 4.131e-05, "loss": 0.3268, "step": 872 }, { "epoch": 174.8, "grad_norm": 3.3395438194274902, "learning_rate": 4.129e-05, "loss": 0.3935, "step": 874 }, { "epoch": 175.2, "grad_norm": 3.479788064956665, "learning_rate": 4.127e-05, "loss": 0.2443, "step": 876 }, { "epoch": 175.6, "grad_norm": 4.073863506317139, "learning_rate": 4.125e-05, "loss": 0.2563, "step": 878 }, { "epoch": 176.0, "grad_norm": 5.981196403503418, "learning_rate": 4.123e-05, "loss": 0.4802, "step": 880 }, { "epoch": 176.4, "grad_norm": 4.173013210296631, "learning_rate": 4.121000000000001e-05, "loss": 0.2914, "step": 882 }, { "epoch": 176.8, "grad_norm": 2.971569776535034, "learning_rate": 4.1190000000000004e-05, "loss": 0.3114, "step": 884 }, { "epoch": 177.2, "grad_norm": 6.021455764770508, "learning_rate": 4.117e-05, "loss": 0.3764, "step": 886 }, { "epoch": 177.6, "grad_norm": 4.878323554992676, "learning_rate": 4.115e-05, "loss": 0.3873, "step": 888 }, { "epoch": 178.0, "grad_norm": 3.8538734912872314, "learning_rate": 4.113e-05, "loss": 0.2837, "step": 890 }, { "epoch": 178.4, "grad_norm": 3.9948248863220215, "learning_rate": 4.1110000000000005e-05, "loss": 0.3094, "step": 892 }, { "epoch": 178.8, "grad_norm": 5.816072940826416, "learning_rate": 4.109e-05, "loss": 0.2934, "step": 894 }, { "epoch": 179.2, "grad_norm": 3.113535165786743, "learning_rate": 4.1070000000000005e-05, "loss": 0.3557, "step": 896 }, { "epoch": 179.6, "grad_norm": 6.137686729431152, "learning_rate": 4.105e-05, "loss": 0.4045, "step": 898 }, { "epoch": 180.0, "grad_norm": 3.5662405490875244, "learning_rate": 4.103e-05, "loss": 0.3519, "step": 900 }, { "epoch": 180.4, "grad_norm": 3.8303089141845703, "learning_rate": 4.101e-05, "loss": 0.3214, "step": 902 }, { "epoch": 180.8, "grad_norm": 9.166275024414062, "learning_rate": 4.099e-05, "loss": 0.4848, "step": 904 }, { "epoch": 181.2, "grad_norm": 10.577348709106445, "learning_rate": 4.097e-05, "loss": 0.384, "step": 906 }, { "epoch": 181.6, "grad_norm": 16.208290100097656, "learning_rate": 4.095e-05, "loss": 0.3965, "step": 908 }, { "epoch": 182.0, "grad_norm": 10.099300384521484, "learning_rate": 4.093e-05, "loss": 0.5272, "step": 910 }, { "epoch": 182.4, "grad_norm": 10.671839714050293, "learning_rate": 4.0910000000000006e-05, "loss": 0.43, "step": 912 }, { "epoch": 182.8, "grad_norm": 5.3112006187438965, "learning_rate": 4.089e-05, "loss": 0.3257, "step": 914 }, { "epoch": 183.2, "grad_norm": 4.102912902832031, "learning_rate": 4.087e-05, "loss": 0.3654, "step": 916 }, { "epoch": 183.6, "grad_norm": 5.3000102043151855, "learning_rate": 4.085e-05, "loss": 0.3863, "step": 918 }, { "epoch": 184.0, "grad_norm": 5.4898786544799805, "learning_rate": 4.083e-05, "loss": 0.3195, "step": 920 }, { "epoch": 184.4, "grad_norm": 5.776346683502197, "learning_rate": 4.0810000000000004e-05, "loss": 0.4312, "step": 922 }, { "epoch": 184.8, "grad_norm": 4.6862568855285645, "learning_rate": 4.079e-05, "loss": 0.3065, "step": 924 }, { "epoch": 185.2, "grad_norm": 10.953518867492676, "learning_rate": 4.0770000000000004e-05, "loss": 0.4489, "step": 926 }, { "epoch": 185.6, "grad_norm": 8.787673950195312, "learning_rate": 4.075e-05, "loss": 0.4034, "step": 928 }, { "epoch": 186.0, "grad_norm": 9.470020294189453, "learning_rate": 4.0730000000000005e-05, "loss": 0.3861, "step": 930 }, { "epoch": 186.4, "grad_norm": 4.024227142333984, "learning_rate": 4.071e-05, "loss": 0.3834, "step": 932 }, { "epoch": 186.8, "grad_norm": 5.861637592315674, "learning_rate": 4.069e-05, "loss": 0.3243, "step": 934 }, { "epoch": 187.2, "grad_norm": 3.784006357192993, "learning_rate": 4.067e-05, "loss": 0.389, "step": 936 }, { "epoch": 187.6, "grad_norm": 14.943076133728027, "learning_rate": 4.065e-05, "loss": 0.3693, "step": 938 }, { "epoch": 188.0, "grad_norm": 4.281869888305664, "learning_rate": 4.063e-05, "loss": 0.4012, "step": 940 }, { "epoch": 188.4, "grad_norm": 27.98006248474121, "learning_rate": 4.0610000000000006e-05, "loss": 0.3778, "step": 942 }, { "epoch": 188.8, "grad_norm": 4.209466934204102, "learning_rate": 4.059e-05, "loss": 0.3249, "step": 944 }, { "epoch": 189.2, "grad_norm": 4.692903518676758, "learning_rate": 4.057e-05, "loss": 0.3127, "step": 946 }, { "epoch": 189.6, "grad_norm": 5.356520175933838, "learning_rate": 4.055e-05, "loss": 0.3758, "step": 948 }, { "epoch": 190.0, "grad_norm": 3.5437092781066895, "learning_rate": 4.053e-05, "loss": 0.3021, "step": 950 }, { "epoch": 190.4, "grad_norm": 4.578685760498047, "learning_rate": 4.0510000000000003e-05, "loss": 0.2827, "step": 952 }, { "epoch": 190.8, "grad_norm": 12.183197975158691, "learning_rate": 4.049e-05, "loss": 0.326, "step": 954 }, { "epoch": 191.2, "grad_norm": 8.277548789978027, "learning_rate": 4.0470000000000004e-05, "loss": 0.5601, "step": 956 }, { "epoch": 191.6, "grad_norm": 5.226196765899658, "learning_rate": 4.045000000000001e-05, "loss": 0.3032, "step": 958 }, { "epoch": 192.0, "grad_norm": 9.146272659301758, "learning_rate": 4.0430000000000004e-05, "loss": 0.4875, "step": 960 }, { "epoch": 192.4, "grad_norm": 3.903257369995117, "learning_rate": 4.041e-05, "loss": 0.315, "step": 962 }, { "epoch": 192.8, "grad_norm": 7.387937545776367, "learning_rate": 4.039e-05, "loss": 0.3239, "step": 964 }, { "epoch": 193.2, "grad_norm": 3.028843402862549, "learning_rate": 4.037e-05, "loss": 0.3818, "step": 966 }, { "epoch": 193.6, "grad_norm": 3.2991795539855957, "learning_rate": 4.0350000000000005e-05, "loss": 0.285, "step": 968 }, { "epoch": 194.0, "grad_norm": 5.26383113861084, "learning_rate": 4.033e-05, "loss": 0.3087, "step": 970 }, { "epoch": 194.4, "grad_norm": 4.984470844268799, "learning_rate": 4.0310000000000005e-05, "loss": 0.309, "step": 972 }, { "epoch": 194.8, "grad_norm": 4.118978977203369, "learning_rate": 4.029e-05, "loss": 0.3237, "step": 974 }, { "epoch": 195.2, "grad_norm": 4.815092086791992, "learning_rate": 4.027e-05, "loss": 0.3025, "step": 976 }, { "epoch": 195.6, "grad_norm": 4.258254528045654, "learning_rate": 4.025e-05, "loss": 0.3048, "step": 978 }, { "epoch": 196.0, "grad_norm": 33.65522003173828, "learning_rate": 4.023e-05, "loss": 0.4716, "step": 980 }, { "epoch": 196.4, "grad_norm": 5.594071865081787, "learning_rate": 4.021e-05, "loss": 0.4904, "step": 982 }, { "epoch": 196.8, "grad_norm": 5.004247188568115, "learning_rate": 4.019e-05, "loss": 0.364, "step": 984 }, { "epoch": 197.2, "grad_norm": 1.9569050073623657, "learning_rate": 4.017e-05, "loss": 0.1931, "step": 986 }, { "epoch": 197.6, "grad_norm": 4.696444511413574, "learning_rate": 4.015000000000001e-05, "loss": 0.4191, "step": 988 }, { "epoch": 198.0, "grad_norm": 3.742265462875366, "learning_rate": 4.0130000000000004e-05, "loss": 0.2607, "step": 990 }, { "epoch": 198.4, "grad_norm": 7.665576457977295, "learning_rate": 4.011e-05, "loss": 0.3239, "step": 992 }, { "epoch": 198.8, "grad_norm": 3.5105903148651123, "learning_rate": 4.009e-05, "loss": 0.234, "step": 994 }, { "epoch": 199.2, "grad_norm": 4.643097400665283, "learning_rate": 4.007e-05, "loss": 0.3226, "step": 996 }, { "epoch": 199.6, "grad_norm": 3.738030195236206, "learning_rate": 4.0050000000000004e-05, "loss": 0.3312, "step": 998 }, { "epoch": 200.0, "grad_norm": 3.953299045562744, "learning_rate": 4.003e-05, "loss": 0.3001, "step": 1000 }, { "epoch": 200.0, "eval_cer": 0.7409326424870466, "eval_loss": 4.802455902099609, "eval_runtime": 7.4095, "eval_samples_per_second": 1.35, "eval_steps_per_second": 0.27, "step": 1000 }, { "epoch": 200.4, "grad_norm": 3.3567326068878174, "learning_rate": 4.0010000000000005e-05, "loss": 0.2681, "step": 1002 }, { "epoch": 200.8, "grad_norm": 8.357306480407715, "learning_rate": 3.999e-05, "loss": 0.3738, "step": 1004 }, { "epoch": 201.2, "grad_norm": 10.400415420532227, "learning_rate": 3.9970000000000005e-05, "loss": 0.2868, "step": 1006 }, { "epoch": 201.6, "grad_norm": 4.407706260681152, "learning_rate": 3.995e-05, "loss": 0.4007, "step": 1008 }, { "epoch": 202.0, "grad_norm": 9.267544746398926, "learning_rate": 3.993e-05, "loss": 0.9026, "step": 1010 }, { "epoch": 202.4, "grad_norm": 5.621496677398682, "learning_rate": 3.991e-05, "loss": 0.4489, "step": 1012 }, { "epoch": 202.8, "grad_norm": 3.558072090148926, "learning_rate": 3.989e-05, "loss": 0.2298, "step": 1014 }, { "epoch": 203.2, "grad_norm": 2.9873435497283936, "learning_rate": 3.987e-05, "loss": 0.1987, "step": 1016 }, { "epoch": 203.6, "grad_norm": 5.322385787963867, "learning_rate": 3.9850000000000006e-05, "loss": 0.2895, "step": 1018 }, { "epoch": 204.0, "grad_norm": 7.5180888175964355, "learning_rate": 3.983e-05, "loss": 0.3463, "step": 1020 }, { "epoch": 204.4, "grad_norm": 6.719485759735107, "learning_rate": 3.981e-05, "loss": 0.3048, "step": 1022 }, { "epoch": 204.8, "grad_norm": 6.660239219665527, "learning_rate": 3.979e-05, "loss": 0.4609, "step": 1024 }, { "epoch": 205.2, "grad_norm": 5.768293857574463, "learning_rate": 3.977e-05, "loss": 0.249, "step": 1026 }, { "epoch": 205.6, "grad_norm": 3.0675160884857178, "learning_rate": 3.9750000000000004e-05, "loss": 0.2354, "step": 1028 }, { "epoch": 206.0, "grad_norm": 11.294059753417969, "learning_rate": 3.973e-05, "loss": 0.3684, "step": 1030 }, { "epoch": 206.4, "grad_norm": 3.6871910095214844, "learning_rate": 3.9710000000000004e-05, "loss": 0.2779, "step": 1032 }, { "epoch": 206.8, "grad_norm": 6.131875038146973, "learning_rate": 3.969e-05, "loss": 0.3156, "step": 1034 }, { "epoch": 207.2, "grad_norm": 2.9990296363830566, "learning_rate": 3.9670000000000005e-05, "loss": 0.2331, "step": 1036 }, { "epoch": 207.6, "grad_norm": 18.472679138183594, "learning_rate": 3.965e-05, "loss": 0.2604, "step": 1038 }, { "epoch": 208.0, "grad_norm": 6.045041561126709, "learning_rate": 3.963e-05, "loss": 0.3675, "step": 1040 }, { "epoch": 208.4, "grad_norm": 3.6173176765441895, "learning_rate": 3.961e-05, "loss": 0.2397, "step": 1042 }, { "epoch": 208.8, "grad_norm": 7.163242340087891, "learning_rate": 3.959e-05, "loss": 0.3302, "step": 1044 }, { "epoch": 209.2, "grad_norm": 3.0550074577331543, "learning_rate": 3.957e-05, "loss": 0.2007, "step": 1046 }, { "epoch": 209.6, "grad_norm": 3.970686197280884, "learning_rate": 3.9550000000000006e-05, "loss": 0.2095, "step": 1048 }, { "epoch": 210.0, "grad_norm": 5.858161449432373, "learning_rate": 3.953e-05, "loss": 0.3541, "step": 1050 }, { "epoch": 210.4, "grad_norm": 8.229304313659668, "learning_rate": 3.951e-05, "loss": 0.3848, "step": 1052 }, { "epoch": 210.8, "grad_norm": 9.068227767944336, "learning_rate": 3.9489999999999996e-05, "loss": 0.3379, "step": 1054 }, { "epoch": 211.2, "grad_norm": 7.604948043823242, "learning_rate": 3.947e-05, "loss": 0.337, "step": 1056 }, { "epoch": 211.6, "grad_norm": 7.514566898345947, "learning_rate": 3.9450000000000003e-05, "loss": 0.3186, "step": 1058 }, { "epoch": 212.0, "grad_norm": 3.127157688140869, "learning_rate": 3.943e-05, "loss": 0.3489, "step": 1060 }, { "epoch": 212.4, "grad_norm": 5.082261085510254, "learning_rate": 3.9410000000000004e-05, "loss": 0.2547, "step": 1062 }, { "epoch": 212.8, "grad_norm": 39.13839340209961, "learning_rate": 3.939e-05, "loss": 0.2956, "step": 1064 }, { "epoch": 213.2, "grad_norm": 5.479771614074707, "learning_rate": 3.9370000000000004e-05, "loss": 0.4028, "step": 1066 }, { "epoch": 213.6, "grad_norm": 8.011462211608887, "learning_rate": 3.935e-05, "loss": 0.3623, "step": 1068 }, { "epoch": 214.0, "grad_norm": 13.533524513244629, "learning_rate": 3.933e-05, "loss": 0.4352, "step": 1070 }, { "epoch": 214.4, "grad_norm": 12.545663833618164, "learning_rate": 3.931e-05, "loss": 0.3419, "step": 1072 }, { "epoch": 214.8, "grad_norm": 8.803689956665039, "learning_rate": 3.9290000000000005e-05, "loss": 0.5071, "step": 1074 }, { "epoch": 215.2, "grad_norm": 16.848220825195312, "learning_rate": 3.927e-05, "loss": 0.5142, "step": 1076 }, { "epoch": 215.6, "grad_norm": 11.03078556060791, "learning_rate": 3.9250000000000005e-05, "loss": 0.5675, "step": 1078 }, { "epoch": 216.0, "grad_norm": 5.382646560668945, "learning_rate": 3.923e-05, "loss": 0.3914, "step": 1080 }, { "epoch": 216.4, "grad_norm": 16.76767349243164, "learning_rate": 3.921e-05, "loss": 0.9283, "step": 1082 }, { "epoch": 216.8, "grad_norm": 10.466293334960938, "learning_rate": 3.919e-05, "loss": 0.6276, "step": 1084 }, { "epoch": 217.2, "grad_norm": 6.433579921722412, "learning_rate": 3.917e-05, "loss": 0.3376, "step": 1086 }, { "epoch": 217.6, "grad_norm": 29.134021759033203, "learning_rate": 3.915e-05, "loss": 0.4844, "step": 1088 }, { "epoch": 218.0, "grad_norm": 6.599155426025391, "learning_rate": 3.913e-05, "loss": 0.3107, "step": 1090 }, { "epoch": 218.4, "grad_norm": 4.010183334350586, "learning_rate": 3.911e-05, "loss": 0.3493, "step": 1092 }, { "epoch": 218.8, "grad_norm": 99.7288589477539, "learning_rate": 3.909000000000001e-05, "loss": 0.2908, "step": 1094 }, { "epoch": 219.2, "grad_norm": 4.7976250648498535, "learning_rate": 3.9070000000000004e-05, "loss": 0.3458, "step": 1096 }, { "epoch": 219.6, "grad_norm": 2.492018461227417, "learning_rate": 3.905e-05, "loss": 0.2232, "step": 1098 }, { "epoch": 220.0, "grad_norm": 2.7591733932495117, "learning_rate": 3.903e-05, "loss": 0.2541, "step": 1100 }, { "epoch": 220.4, "grad_norm": 3.919055700302124, "learning_rate": 3.901e-05, "loss": 0.2872, "step": 1102 }, { "epoch": 220.8, "grad_norm": 3.4302217960357666, "learning_rate": 3.8990000000000004e-05, "loss": 0.2587, "step": 1104 }, { "epoch": 221.2, "grad_norm": 4.06305456161499, "learning_rate": 3.897e-05, "loss": 0.2339, "step": 1106 }, { "epoch": 221.6, "grad_norm": 2.6143205165863037, "learning_rate": 3.8950000000000005e-05, "loss": 0.2275, "step": 1108 }, { "epoch": 222.0, "grad_norm": 4.553073883056641, "learning_rate": 3.893e-05, "loss": 0.3883, "step": 1110 }, { "epoch": 222.4, "grad_norm": 4.542922019958496, "learning_rate": 3.8910000000000005e-05, "loss": 0.257, "step": 1112 }, { "epoch": 222.8, "grad_norm": 5.6276092529296875, "learning_rate": 3.889e-05, "loss": 0.3673, "step": 1114 }, { "epoch": 223.2, "grad_norm": 2.930835008621216, "learning_rate": 3.887e-05, "loss": 0.2694, "step": 1116 }, { "epoch": 223.6, "grad_norm": 2.3071775436401367, "learning_rate": 3.885e-05, "loss": 0.2537, "step": 1118 }, { "epoch": 224.0, "grad_norm": 3.7658379077911377, "learning_rate": 3.883e-05, "loss": 0.286, "step": 1120 }, { "epoch": 224.4, "grad_norm": 3.5590548515319824, "learning_rate": 3.881e-05, "loss": 0.1906, "step": 1122 }, { "epoch": 224.8, "grad_norm": 3.773296594619751, "learning_rate": 3.8790000000000006e-05, "loss": 0.3078, "step": 1124 }, { "epoch": 225.2, "grad_norm": 2.050581216812134, "learning_rate": 3.877e-05, "loss": 0.2273, "step": 1126 }, { "epoch": 225.6, "grad_norm": 3.5804643630981445, "learning_rate": 3.875e-05, "loss": 0.2926, "step": 1128 }, { "epoch": 226.0, "grad_norm": 3.1876752376556396, "learning_rate": 3.873e-05, "loss": 0.2541, "step": 1130 }, { "epoch": 226.4, "grad_norm": 3.945162773132324, "learning_rate": 3.871e-05, "loss": 0.2563, "step": 1132 }, { "epoch": 226.8, "grad_norm": 7.031915664672852, "learning_rate": 3.8690000000000004e-05, "loss": 0.339, "step": 1134 }, { "epoch": 227.2, "grad_norm": 3.9610836505889893, "learning_rate": 3.867e-05, "loss": 0.2121, "step": 1136 }, { "epoch": 227.6, "grad_norm": 7.1289963722229, "learning_rate": 3.8650000000000004e-05, "loss": 0.2645, "step": 1138 }, { "epoch": 228.0, "grad_norm": 4.613805294036865, "learning_rate": 3.863e-05, "loss": 0.297, "step": 1140 }, { "epoch": 228.4, "grad_norm": 4.274087905883789, "learning_rate": 3.8610000000000005e-05, "loss": 0.282, "step": 1142 }, { "epoch": 228.8, "grad_norm": 4.384876728057861, "learning_rate": 3.859e-05, "loss": 0.2474, "step": 1144 }, { "epoch": 229.2, "grad_norm": 5.20656681060791, "learning_rate": 3.857e-05, "loss": 0.3751, "step": 1146 }, { "epoch": 229.6, "grad_norm": 3.3969218730926514, "learning_rate": 3.855e-05, "loss": 0.1836, "step": 1148 }, { "epoch": 230.0, "grad_norm": 3.364154577255249, "learning_rate": 3.853e-05, "loss": 0.3004, "step": 1150 }, { "epoch": 230.4, "grad_norm": 5.171586036682129, "learning_rate": 3.851e-05, "loss": 0.2444, "step": 1152 }, { "epoch": 230.8, "grad_norm": 5.799811840057373, "learning_rate": 3.8490000000000006e-05, "loss": 0.2185, "step": 1154 }, { "epoch": 231.2, "grad_norm": 6.083639621734619, "learning_rate": 3.847e-05, "loss": 0.2861, "step": 1156 }, { "epoch": 231.6, "grad_norm": 3.7272956371307373, "learning_rate": 3.845e-05, "loss": 0.2717, "step": 1158 }, { "epoch": 232.0, "grad_norm": 4.329137802124023, "learning_rate": 3.8429999999999996e-05, "loss": 0.2602, "step": 1160 }, { "epoch": 232.4, "grad_norm": 4.413944244384766, "learning_rate": 3.841e-05, "loss": 0.2011, "step": 1162 }, { "epoch": 232.8, "grad_norm": 3.931156873703003, "learning_rate": 3.8390000000000003e-05, "loss": 0.2651, "step": 1164 }, { "epoch": 233.2, "grad_norm": 5.2897257804870605, "learning_rate": 3.837e-05, "loss": 0.2615, "step": 1166 }, { "epoch": 233.6, "grad_norm": 4.5546674728393555, "learning_rate": 3.8350000000000004e-05, "loss": 0.2094, "step": 1168 }, { "epoch": 234.0, "grad_norm": 3.8186538219451904, "learning_rate": 3.833e-05, "loss": 0.3153, "step": 1170 }, { "epoch": 234.4, "grad_norm": 3.316904067993164, "learning_rate": 3.8310000000000004e-05, "loss": 0.2352, "step": 1172 }, { "epoch": 234.8, "grad_norm": 4.222316265106201, "learning_rate": 3.829e-05, "loss": 0.234, "step": 1174 }, { "epoch": 235.2, "grad_norm": 8.480021476745605, "learning_rate": 3.827e-05, "loss": 0.2356, "step": 1176 }, { "epoch": 235.6, "grad_norm": 6.756654262542725, "learning_rate": 3.825e-05, "loss": 0.3142, "step": 1178 }, { "epoch": 236.0, "grad_norm": 3.7328832149505615, "learning_rate": 3.823e-05, "loss": 0.3489, "step": 1180 }, { "epoch": 236.4, "grad_norm": 5.274710655212402, "learning_rate": 3.821e-05, "loss": 0.2233, "step": 1182 }, { "epoch": 236.8, "grad_norm": 6.157715797424316, "learning_rate": 3.8190000000000005e-05, "loss": 0.3511, "step": 1184 }, { "epoch": 237.2, "grad_norm": 5.26965856552124, "learning_rate": 3.817e-05, "loss": 0.2319, "step": 1186 }, { "epoch": 237.6, "grad_norm": 3.7509121894836426, "learning_rate": 3.8150000000000006e-05, "loss": 0.3101, "step": 1188 }, { "epoch": 238.0, "grad_norm": 4.383106231689453, "learning_rate": 3.8129999999999996e-05, "loss": 0.3157, "step": 1190 }, { "epoch": 238.4, "grad_norm": 2.763468027114868, "learning_rate": 3.811e-05, "loss": 0.2371, "step": 1192 }, { "epoch": 238.8, "grad_norm": 5.298147201538086, "learning_rate": 3.809e-05, "loss": 0.3751, "step": 1194 }, { "epoch": 239.2, "grad_norm": 4.905087471008301, "learning_rate": 3.807e-05, "loss": 0.2113, "step": 1196 }, { "epoch": 239.6, "grad_norm": 3.809682846069336, "learning_rate": 3.805e-05, "loss": 0.1827, "step": 1198 }, { "epoch": 240.0, "grad_norm": 19.97643280029297, "learning_rate": 3.803000000000001e-05, "loss": 0.7716, "step": 1200 }, { "epoch": 240.0, "eval_cer": 0.694300518134715, "eval_loss": 3.659235954284668, "eval_runtime": 10.2179, "eval_samples_per_second": 0.979, "eval_steps_per_second": 0.196, "step": 1200 }, { "epoch": 240.4, "grad_norm": 3.243964433670044, "learning_rate": 3.8010000000000004e-05, "loss": 0.2496, "step": 1202 }, { "epoch": 240.8, "grad_norm": 5.125737190246582, "learning_rate": 3.799e-05, "loss": 0.4178, "step": 1204 }, { "epoch": 241.2, "grad_norm": 9.332146644592285, "learning_rate": 3.797e-05, "loss": 0.3246, "step": 1206 }, { "epoch": 241.6, "grad_norm": 9.066664695739746, "learning_rate": 3.795e-05, "loss": 0.3466, "step": 1208 }, { "epoch": 242.0, "grad_norm": 24.088499069213867, "learning_rate": 3.7930000000000004e-05, "loss": 0.4467, "step": 1210 }, { "epoch": 242.4, "grad_norm": 10.76351547241211, "learning_rate": 3.791e-05, "loss": 0.3909, "step": 1212 }, { "epoch": 242.8, "grad_norm": 3.6657209396362305, "learning_rate": 3.7890000000000005e-05, "loss": 0.2672, "step": 1214 }, { "epoch": 243.2, "grad_norm": 4.568761825561523, "learning_rate": 3.787e-05, "loss": 0.3177, "step": 1216 }, { "epoch": 243.6, "grad_norm": 3.740229845046997, "learning_rate": 3.7850000000000005e-05, "loss": 0.2277, "step": 1218 }, { "epoch": 244.0, "grad_norm": 2.7282726764678955, "learning_rate": 3.783e-05, "loss": 0.2114, "step": 1220 }, { "epoch": 244.4, "grad_norm": 5.805517673492432, "learning_rate": 3.781e-05, "loss": 0.297, "step": 1222 }, { "epoch": 244.8, "grad_norm": 2.7406880855560303, "learning_rate": 3.779e-05, "loss": 0.227, "step": 1224 }, { "epoch": 245.2, "grad_norm": 9.11500358581543, "learning_rate": 3.777e-05, "loss": 0.257, "step": 1226 }, { "epoch": 245.6, "grad_norm": 7.219081878662109, "learning_rate": 3.775e-05, "loss": 0.2907, "step": 1228 }, { "epoch": 246.0, "grad_norm": 6.072811603546143, "learning_rate": 3.7730000000000006e-05, "loss": 0.3287, "step": 1230 }, { "epoch": 246.4, "grad_norm": 4.9385199546813965, "learning_rate": 3.771e-05, "loss": 0.3258, "step": 1232 }, { "epoch": 246.8, "grad_norm": 7.658295154571533, "learning_rate": 3.769e-05, "loss": 0.2299, "step": 1234 }, { "epoch": 247.2, "grad_norm": 2.7563042640686035, "learning_rate": 3.767e-05, "loss": 0.2529, "step": 1236 }, { "epoch": 247.6, "grad_norm": 8.167572975158691, "learning_rate": 3.765e-05, "loss": 0.3218, "step": 1238 }, { "epoch": 248.0, "grad_norm": 2.6507465839385986, "learning_rate": 3.7630000000000004e-05, "loss": 0.2071, "step": 1240 }, { "epoch": 248.4, "grad_norm": 3.1226236820220947, "learning_rate": 3.761e-05, "loss": 0.17, "step": 1242 }, { "epoch": 248.8, "grad_norm": 5.431761741638184, "learning_rate": 3.7590000000000004e-05, "loss": 0.2925, "step": 1244 }, { "epoch": 249.2, "grad_norm": 5.579361438751221, "learning_rate": 3.757e-05, "loss": 0.2446, "step": 1246 }, { "epoch": 249.6, "grad_norm": 3.6551694869995117, "learning_rate": 3.7550000000000005e-05, "loss": 0.2126, "step": 1248 }, { "epoch": 250.0, "grad_norm": 3.2459394931793213, "learning_rate": 3.753e-05, "loss": 0.2035, "step": 1250 }, { "epoch": 250.4, "grad_norm": 4.810949802398682, "learning_rate": 3.751e-05, "loss": 0.2693, "step": 1252 }, { "epoch": 250.8, "grad_norm": 4.148841381072998, "learning_rate": 3.749e-05, "loss": 0.2059, "step": 1254 }, { "epoch": 251.2, "grad_norm": 3.1036810874938965, "learning_rate": 3.747e-05, "loss": 0.2232, "step": 1256 }, { "epoch": 251.6, "grad_norm": 4.422311305999756, "learning_rate": 3.745e-05, "loss": 0.2483, "step": 1258 }, { "epoch": 252.0, "grad_norm": 3.690707206726074, "learning_rate": 3.7430000000000006e-05, "loss": 0.3028, "step": 1260 }, { "epoch": 252.4, "grad_norm": 4.438896656036377, "learning_rate": 3.741e-05, "loss": 0.2793, "step": 1262 }, { "epoch": 252.8, "grad_norm": 3.033923625946045, "learning_rate": 3.739e-05, "loss": 0.291, "step": 1264 }, { "epoch": 253.2, "grad_norm": 2.898509979248047, "learning_rate": 3.7369999999999996e-05, "loss": 0.1662, "step": 1266 }, { "epoch": 253.6, "grad_norm": 3.010573625564575, "learning_rate": 3.735e-05, "loss": 0.1718, "step": 1268 }, { "epoch": 254.0, "grad_norm": 2.4309346675872803, "learning_rate": 3.7330000000000003e-05, "loss": 0.2498, "step": 1270 }, { "epoch": 254.4, "grad_norm": 3.305250883102417, "learning_rate": 3.731e-05, "loss": 0.218, "step": 1272 }, { "epoch": 254.8, "grad_norm": 4.035239219665527, "learning_rate": 3.7290000000000004e-05, "loss": 0.2192, "step": 1274 }, { "epoch": 255.2, "grad_norm": 3.371534585952759, "learning_rate": 3.727e-05, "loss": 0.2052, "step": 1276 }, { "epoch": 255.6, "grad_norm": 4.402913570404053, "learning_rate": 3.7250000000000004e-05, "loss": 0.2361, "step": 1278 }, { "epoch": 256.0, "grad_norm": 4.0324225425720215, "learning_rate": 3.723e-05, "loss": 0.2612, "step": 1280 }, { "epoch": 256.4, "grad_norm": 3.957167863845825, "learning_rate": 3.721e-05, "loss": 0.2234, "step": 1282 }, { "epoch": 256.8, "grad_norm": 3.2119905948638916, "learning_rate": 3.719e-05, "loss": 0.2074, "step": 1284 }, { "epoch": 257.2, "grad_norm": 2.756065607070923, "learning_rate": 3.717e-05, "loss": 0.1436, "step": 1286 }, { "epoch": 257.6, "grad_norm": 7.600184917449951, "learning_rate": 3.715e-05, "loss": 0.2015, "step": 1288 }, { "epoch": 258.0, "grad_norm": 5.387580394744873, "learning_rate": 3.7130000000000005e-05, "loss": 0.3074, "step": 1290 }, { "epoch": 258.4, "grad_norm": 4.440196990966797, "learning_rate": 3.711e-05, "loss": 0.2174, "step": 1292 }, { "epoch": 258.8, "grad_norm": 5.929399013519287, "learning_rate": 3.7090000000000006e-05, "loss": 0.209, "step": 1294 }, { "epoch": 259.2, "grad_norm": 3.0978758335113525, "learning_rate": 3.707e-05, "loss": 0.255, "step": 1296 }, { "epoch": 259.6, "grad_norm": 6.41616153717041, "learning_rate": 3.705e-05, "loss": 0.2119, "step": 1298 }, { "epoch": 260.0, "grad_norm": 6.491250991821289, "learning_rate": 3.703e-05, "loss": 0.2928, "step": 1300 }, { "epoch": 260.4, "grad_norm": 3.819356679916382, "learning_rate": 3.701e-05, "loss": 0.1843, "step": 1302 }, { "epoch": 260.8, "grad_norm": 2.3542838096618652, "learning_rate": 3.699e-05, "loss": 0.1993, "step": 1304 }, { "epoch": 261.2, "grad_norm": 20.27495002746582, "learning_rate": 3.697e-05, "loss": 0.2043, "step": 1306 }, { "epoch": 261.6, "grad_norm": 5.29311990737915, "learning_rate": 3.6950000000000004e-05, "loss": 0.2897, "step": 1308 }, { "epoch": 262.0, "grad_norm": 2.2187821865081787, "learning_rate": 3.693e-05, "loss": 0.1656, "step": 1310 }, { "epoch": 262.4, "grad_norm": 9.03425121307373, "learning_rate": 3.691e-05, "loss": 0.2557, "step": 1312 }, { "epoch": 262.8, "grad_norm": 18.437767028808594, "learning_rate": 3.689e-05, "loss": 0.3026, "step": 1314 }, { "epoch": 263.2, "grad_norm": 3.226881742477417, "learning_rate": 3.6870000000000004e-05, "loss": 0.3038, "step": 1316 }, { "epoch": 263.6, "grad_norm": 3.9146432876586914, "learning_rate": 3.685e-05, "loss": 0.2789, "step": 1318 }, { "epoch": 264.0, "grad_norm": 3.8228628635406494, "learning_rate": 3.6830000000000005e-05, "loss": 0.1945, "step": 1320 }, { "epoch": 264.4, "grad_norm": 3.494779586791992, "learning_rate": 3.681e-05, "loss": 0.203, "step": 1322 }, { "epoch": 264.8, "grad_norm": 2.6736092567443848, "learning_rate": 3.6790000000000005e-05, "loss": 0.1992, "step": 1324 }, { "epoch": 265.2, "grad_norm": 4.036677360534668, "learning_rate": 3.677e-05, "loss": 0.2368, "step": 1326 }, { "epoch": 265.6, "grad_norm": 5.371213912963867, "learning_rate": 3.675e-05, "loss": 0.2533, "step": 1328 }, { "epoch": 266.0, "grad_norm": 12.527331352233887, "learning_rate": 3.673e-05, "loss": 0.3302, "step": 1330 }, { "epoch": 266.4, "grad_norm": 4.46913480758667, "learning_rate": 3.671e-05, "loss": 0.2386, "step": 1332 }, { "epoch": 266.8, "grad_norm": 4.542766571044922, "learning_rate": 3.669e-05, "loss": 0.2763, "step": 1334 }, { "epoch": 267.2, "grad_norm": 7.278443336486816, "learning_rate": 3.6670000000000006e-05, "loss": 0.2091, "step": 1336 }, { "epoch": 267.6, "grad_norm": 3.8118326663970947, "learning_rate": 3.665e-05, "loss": 0.2061, "step": 1338 }, { "epoch": 268.0, "grad_norm": 5.112471580505371, "learning_rate": 3.663e-05, "loss": 0.3629, "step": 1340 }, { "epoch": 268.4, "grad_norm": 3.544633150100708, "learning_rate": 3.661e-05, "loss": 0.2497, "step": 1342 }, { "epoch": 268.8, "grad_norm": 4.489765644073486, "learning_rate": 3.659e-05, "loss": 0.2771, "step": 1344 }, { "epoch": 269.2, "grad_norm": 11.77910327911377, "learning_rate": 3.6570000000000004e-05, "loss": 0.1626, "step": 1346 }, { "epoch": 269.6, "grad_norm": 18.77377700805664, "learning_rate": 3.655e-05, "loss": 0.2999, "step": 1348 }, { "epoch": 270.0, "grad_norm": 4.783228397369385, "learning_rate": 3.6530000000000004e-05, "loss": 0.2516, "step": 1350 }, { "epoch": 270.4, "grad_norm": 4.325599670410156, "learning_rate": 3.651e-05, "loss": 0.1946, "step": 1352 }, { "epoch": 270.8, "grad_norm": 2.995680332183838, "learning_rate": 3.6490000000000005e-05, "loss": 0.2303, "step": 1354 }, { "epoch": 271.2, "grad_norm": 4.4513373374938965, "learning_rate": 3.647e-05, "loss": 0.4388, "step": 1356 }, { "epoch": 271.6, "grad_norm": 3.957228660583496, "learning_rate": 3.645e-05, "loss": 0.1617, "step": 1358 }, { "epoch": 272.0, "grad_norm": 59.26689147949219, "learning_rate": 3.643e-05, "loss": 0.2284, "step": 1360 }, { "epoch": 272.4, "grad_norm": 2.4353671073913574, "learning_rate": 3.641e-05, "loss": 0.1805, "step": 1362 }, { "epoch": 272.8, "grad_norm": 6.833852767944336, "learning_rate": 3.639e-05, "loss": 0.2528, "step": 1364 }, { "epoch": 273.2, "grad_norm": 3.1908679008483887, "learning_rate": 3.6370000000000006e-05, "loss": 0.1478, "step": 1366 }, { "epoch": 273.6, "grad_norm": 350.72735595703125, "learning_rate": 3.635e-05, "loss": 0.231, "step": 1368 }, { "epoch": 274.0, "grad_norm": 5.273377418518066, "learning_rate": 3.6330000000000006e-05, "loss": 0.2131, "step": 1370 }, { "epoch": 274.4, "grad_norm": 5.034257411956787, "learning_rate": 3.6309999999999996e-05, "loss": 0.2262, "step": 1372 }, { "epoch": 274.8, "grad_norm": 4.61154842376709, "learning_rate": 3.629e-05, "loss": 0.2897, "step": 1374 }, { "epoch": 275.2, "grad_norm": 2.9386649131774902, "learning_rate": 3.6270000000000003e-05, "loss": 0.2835, "step": 1376 }, { "epoch": 275.6, "grad_norm": 3.499753475189209, "learning_rate": 3.625e-05, "loss": 0.217, "step": 1378 }, { "epoch": 276.0, "grad_norm": 7.77516508102417, "learning_rate": 3.6230000000000004e-05, "loss": 0.3286, "step": 1380 }, { "epoch": 276.4, "grad_norm": 5.217374324798584, "learning_rate": 3.622e-05, "loss": 0.2711, "step": 1382 }, { "epoch": 276.8, "grad_norm": 5.361811637878418, "learning_rate": 3.62e-05, "loss": 0.1904, "step": 1384 }, { "epoch": 277.2, "grad_norm": 3.2903220653533936, "learning_rate": 3.618e-05, "loss": 0.1915, "step": 1386 }, { "epoch": 277.6, "grad_norm": 3.0300111770629883, "learning_rate": 3.616e-05, "loss": 0.1931, "step": 1388 }, { "epoch": 278.0, "grad_norm": 4.807421684265137, "learning_rate": 3.614e-05, "loss": 0.2193, "step": 1390 }, { "epoch": 278.4, "grad_norm": 4.757713317871094, "learning_rate": 3.6120000000000007e-05, "loss": 0.1941, "step": 1392 }, { "epoch": 278.8, "grad_norm": 3.549058675765991, "learning_rate": 3.61e-05, "loss": 0.1918, "step": 1394 }, { "epoch": 279.2, "grad_norm": 3.05086350440979, "learning_rate": 3.608e-05, "loss": 0.1967, "step": 1396 }, { "epoch": 279.6, "grad_norm": 5.165313720703125, "learning_rate": 3.606e-05, "loss": 0.2269, "step": 1398 }, { "epoch": 280.0, "grad_norm": 2.8271372318267822, "learning_rate": 3.604e-05, "loss": 0.2038, "step": 1400 }, { "epoch": 280.0, "eval_cer": 0.7564766839378239, "eval_loss": 4.3533453941345215, "eval_runtime": 8.6554, "eval_samples_per_second": 1.155, "eval_steps_per_second": 0.231, "step": 1400 }, { "epoch": 280.4, "grad_norm": 3.9113049507141113, "learning_rate": 3.6020000000000004e-05, "loss": 0.2595, "step": 1402 }, { "epoch": 280.8, "grad_norm": 2.887913942337036, "learning_rate": 3.6e-05, "loss": 0.1902, "step": 1404 }, { "epoch": 281.2, "grad_norm": 3.3010952472686768, "learning_rate": 3.5980000000000004e-05, "loss": 0.2129, "step": 1406 }, { "epoch": 281.6, "grad_norm": 4.3039093017578125, "learning_rate": 3.596e-05, "loss": 0.2534, "step": 1408 }, { "epoch": 282.0, "grad_norm": 3.491180181503296, "learning_rate": 3.594e-05, "loss": 0.2264, "step": 1410 }, { "epoch": 282.4, "grad_norm": 2.889333963394165, "learning_rate": 3.592e-05, "loss": 0.2048, "step": 1412 }, { "epoch": 282.8, "grad_norm": 1.7048636674880981, "learning_rate": 3.59e-05, "loss": 0.1405, "step": 1414 }, { "epoch": 283.2, "grad_norm": 4.122111797332764, "learning_rate": 3.588e-05, "loss": 0.1817, "step": 1416 }, { "epoch": 283.6, "grad_norm": 3.1915600299835205, "learning_rate": 3.586e-05, "loss": 0.2362, "step": 1418 }, { "epoch": 284.0, "grad_norm": 3.3390262126922607, "learning_rate": 3.584e-05, "loss": 0.1682, "step": 1420 }, { "epoch": 284.4, "grad_norm": 3.4338111877441406, "learning_rate": 3.5820000000000006e-05, "loss": 0.1577, "step": 1422 }, { "epoch": 284.8, "grad_norm": 14.567124366760254, "learning_rate": 3.58e-05, "loss": 0.3024, "step": 1424 }, { "epoch": 285.2, "grad_norm": 3.6370203495025635, "learning_rate": 3.578e-05, "loss": 0.3107, "step": 1426 }, { "epoch": 285.6, "grad_norm": 12.201949119567871, "learning_rate": 3.5759999999999996e-05, "loss": 0.3859, "step": 1428 }, { "epoch": 286.0, "grad_norm": 3.960357427597046, "learning_rate": 3.574e-05, "loss": 0.27, "step": 1430 }, { "epoch": 286.4, "grad_norm": 6.261948585510254, "learning_rate": 3.5720000000000004e-05, "loss": 0.2668, "step": 1432 }, { "epoch": 286.8, "grad_norm": 5.581904888153076, "learning_rate": 3.57e-05, "loss": 0.2869, "step": 1434 }, { "epoch": 287.2, "grad_norm": 4.065515995025635, "learning_rate": 3.5680000000000004e-05, "loss": 0.1633, "step": 1436 }, { "epoch": 287.6, "grad_norm": 9.160911560058594, "learning_rate": 3.566e-05, "loss": 0.2643, "step": 1438 }, { "epoch": 288.0, "grad_norm": 3.630946397781372, "learning_rate": 3.5640000000000004e-05, "loss": 0.2225, "step": 1440 }, { "epoch": 288.4, "grad_norm": 6.089304447174072, "learning_rate": 3.562e-05, "loss": 0.2138, "step": 1442 }, { "epoch": 288.8, "grad_norm": 8.57037353515625, "learning_rate": 3.56e-05, "loss": 0.2421, "step": 1444 }, { "epoch": 289.2, "grad_norm": 5.543829441070557, "learning_rate": 3.558e-05, "loss": 0.2298, "step": 1446 }, { "epoch": 289.6, "grad_norm": 3.366126298904419, "learning_rate": 3.5560000000000005e-05, "loss": 0.2143, "step": 1448 }, { "epoch": 290.0, "grad_norm": 3.924182415008545, "learning_rate": 3.554e-05, "loss": 0.1636, "step": 1450 }, { "epoch": 290.4, "grad_norm": 4.297432899475098, "learning_rate": 3.5520000000000006e-05, "loss": 0.2103, "step": 1452 }, { "epoch": 290.8, "grad_norm": 3.7211902141571045, "learning_rate": 3.55e-05, "loss": 0.1943, "step": 1454 }, { "epoch": 291.2, "grad_norm": 2.7519145011901855, "learning_rate": 3.548e-05, "loss": 0.2306, "step": 1456 }, { "epoch": 291.6, "grad_norm": 2.303471565246582, "learning_rate": 3.546e-05, "loss": 0.1664, "step": 1458 }, { "epoch": 292.0, "grad_norm": 3.3286373615264893, "learning_rate": 3.544e-05, "loss": 0.1822, "step": 1460 }, { "epoch": 292.4, "grad_norm": 5.508766174316406, "learning_rate": 3.542e-05, "loss": 0.1878, "step": 1462 }, { "epoch": 292.8, "grad_norm": 4.348577976226807, "learning_rate": 3.54e-05, "loss": 0.2137, "step": 1464 }, { "epoch": 293.2, "grad_norm": 4.8452887535095215, "learning_rate": 3.5380000000000003e-05, "loss": 0.194, "step": 1466 }, { "epoch": 293.6, "grad_norm": 4.1087188720703125, "learning_rate": 3.536000000000001e-05, "loss": 0.2126, "step": 1468 }, { "epoch": 294.0, "grad_norm": 9.476184844970703, "learning_rate": 3.5340000000000004e-05, "loss": 0.2821, "step": 1470 }, { "epoch": 294.4, "grad_norm": 3.5999913215637207, "learning_rate": 3.532e-05, "loss": 0.2014, "step": 1472 }, { "epoch": 294.8, "grad_norm": 7.104425430297852, "learning_rate": 3.53e-05, "loss": 0.2412, "step": 1474 }, { "epoch": 295.2, "grad_norm": 4.14633321762085, "learning_rate": 3.528e-05, "loss": 0.2403, "step": 1476 }, { "epoch": 295.6, "grad_norm": 3.0593507289886475, "learning_rate": 3.5260000000000005e-05, "loss": 0.2227, "step": 1478 }, { "epoch": 296.0, "grad_norm": 4.490056037902832, "learning_rate": 3.524e-05, "loss": 0.2387, "step": 1480 }, { "epoch": 296.4, "grad_norm": 4.980740070343018, "learning_rate": 3.5220000000000005e-05, "loss": 0.1691, "step": 1482 }, { "epoch": 296.8, "grad_norm": 4.288209438323975, "learning_rate": 3.52e-05, "loss": 0.2156, "step": 1484 }, { "epoch": 297.2, "grad_norm": 3.74086856842041, "learning_rate": 3.518e-05, "loss": 0.2628, "step": 1486 }, { "epoch": 297.6, "grad_norm": 3.970950126647949, "learning_rate": 3.516e-05, "loss": 0.1643, "step": 1488 }, { "epoch": 298.0, "grad_norm": 1.9123245477676392, "learning_rate": 3.514e-05, "loss": 0.1501, "step": 1490 }, { "epoch": 298.4, "grad_norm": 3.738727331161499, "learning_rate": 3.512e-05, "loss": 0.1639, "step": 1492 }, { "epoch": 298.8, "grad_norm": 6.217358112335205, "learning_rate": 3.51e-05, "loss": 0.1582, "step": 1494 }, { "epoch": 299.2, "grad_norm": 3.7663512229919434, "learning_rate": 3.508e-05, "loss": 0.2647, "step": 1496 }, { "epoch": 299.6, "grad_norm": 4.492752552032471, "learning_rate": 3.5060000000000007e-05, "loss": 0.1914, "step": 1498 }, { "epoch": 300.0, "grad_norm": 2.1234936714172363, "learning_rate": 3.504e-05, "loss": 0.158, "step": 1500 }, { "epoch": 300.4, "grad_norm": 2.944578170776367, "learning_rate": 3.502e-05, "loss": 0.2069, "step": 1502 }, { "epoch": 300.8, "grad_norm": 4.446110248565674, "learning_rate": 3.5e-05, "loss": 0.142, "step": 1504 }, { "epoch": 301.2, "grad_norm": 8.160805702209473, "learning_rate": 3.498e-05, "loss": 0.196, "step": 1506 }, { "epoch": 301.6, "grad_norm": 3.6933579444885254, "learning_rate": 3.4960000000000004e-05, "loss": 0.1896, "step": 1508 }, { "epoch": 302.0, "grad_norm": 7.617312908172607, "learning_rate": 3.494e-05, "loss": 0.1958, "step": 1510 }, { "epoch": 302.4, "grad_norm": 3.231534481048584, "learning_rate": 3.4920000000000004e-05, "loss": 0.1691, "step": 1512 }, { "epoch": 302.8, "grad_norm": 4.182994365692139, "learning_rate": 3.49e-05, "loss": 0.1798, "step": 1514 }, { "epoch": 303.2, "grad_norm": 3.8814966678619385, "learning_rate": 3.4880000000000005e-05, "loss": 0.1431, "step": 1516 }, { "epoch": 303.6, "grad_norm": 4.101096153259277, "learning_rate": 3.486e-05, "loss": 0.1458, "step": 1518 }, { "epoch": 304.0, "grad_norm": 3.9785642623901367, "learning_rate": 3.484e-05, "loss": 0.246, "step": 1520 }, { "epoch": 304.4, "grad_norm": 3.7394511699676514, "learning_rate": 3.482e-05, "loss": 0.1572, "step": 1522 }, { "epoch": 304.8, "grad_norm": 2.8834211826324463, "learning_rate": 3.48e-05, "loss": 0.1339, "step": 1524 }, { "epoch": 305.2, "grad_norm": 1.892931342124939, "learning_rate": 3.478e-05, "loss": 0.1303, "step": 1526 }, { "epoch": 305.6, "grad_norm": 1.9035749435424805, "learning_rate": 3.4760000000000006e-05, "loss": 0.2136, "step": 1528 }, { "epoch": 306.0, "grad_norm": 2.2680091857910156, "learning_rate": 3.474e-05, "loss": 0.1622, "step": 1530 }, { "epoch": 306.4, "grad_norm": 4.67875337600708, "learning_rate": 3.472e-05, "loss": 0.3408, "step": 1532 }, { "epoch": 306.8, "grad_norm": 4.48759651184082, "learning_rate": 3.4699999999999996e-05, "loss": 0.1614, "step": 1534 }, { "epoch": 307.2, "grad_norm": 3.07967472076416, "learning_rate": 3.468e-05, "loss": 0.2193, "step": 1536 }, { "epoch": 307.6, "grad_norm": 7.475008487701416, "learning_rate": 3.4660000000000004e-05, "loss": 0.1914, "step": 1538 }, { "epoch": 308.0, "grad_norm": 5.759007453918457, "learning_rate": 3.464e-05, "loss": 0.2361, "step": 1540 }, { "epoch": 308.4, "grad_norm": 4.998610019683838, "learning_rate": 3.4620000000000004e-05, "loss": 0.2068, "step": 1542 }, { "epoch": 308.8, "grad_norm": 6.882718086242676, "learning_rate": 3.46e-05, "loss": 0.1853, "step": 1544 }, { "epoch": 309.2, "grad_norm": 6.003058433532715, "learning_rate": 3.4580000000000004e-05, "loss": 0.2367, "step": 1546 }, { "epoch": 309.6, "grad_norm": 6.925650596618652, "learning_rate": 3.456e-05, "loss": 0.204, "step": 1548 }, { "epoch": 310.0, "grad_norm": 7.221513748168945, "learning_rate": 3.454e-05, "loss": 0.4014, "step": 1550 }, { "epoch": 310.4, "grad_norm": 3.0065932273864746, "learning_rate": 3.452e-05, "loss": 0.1864, "step": 1552 }, { "epoch": 310.8, "grad_norm": 7.513803005218506, "learning_rate": 3.45e-05, "loss": 0.3213, "step": 1554 }, { "epoch": 311.2, "grad_norm": 3.3990230560302734, "learning_rate": 3.448e-05, "loss": 0.13, "step": 1556 }, { "epoch": 311.6, "grad_norm": 3.864299774169922, "learning_rate": 3.4460000000000005e-05, "loss": 0.1786, "step": 1558 }, { "epoch": 312.0, "grad_norm": 2.488945722579956, "learning_rate": 3.444e-05, "loss": 0.1534, "step": 1560 }, { "epoch": 312.4, "grad_norm": 3.447606325149536, "learning_rate": 3.442e-05, "loss": 0.1577, "step": 1562 }, { "epoch": 312.8, "grad_norm": 2.97577166557312, "learning_rate": 3.4399999999999996e-05, "loss": 0.1544, "step": 1564 }, { "epoch": 313.2, "grad_norm": 3.9868626594543457, "learning_rate": 3.438e-05, "loss": 0.2341, "step": 1566 }, { "epoch": 313.6, "grad_norm": 2.807943105697632, "learning_rate": 3.436e-05, "loss": 0.1804, "step": 1568 }, { "epoch": 314.0, "grad_norm": 2.624800443649292, "learning_rate": 3.434e-05, "loss": 0.1871, "step": 1570 }, { "epoch": 314.4, "grad_norm": 2.625075578689575, "learning_rate": 3.4320000000000003e-05, "loss": 0.1254, "step": 1572 }, { "epoch": 314.8, "grad_norm": 4.675913333892822, "learning_rate": 3.430000000000001e-05, "loss": 0.1725, "step": 1574 }, { "epoch": 315.2, "grad_norm": 3.236999273300171, "learning_rate": 3.4280000000000004e-05, "loss": 0.1989, "step": 1576 }, { "epoch": 315.6, "grad_norm": 6.421444892883301, "learning_rate": 3.426e-05, "loss": 0.2155, "step": 1578 }, { "epoch": 316.0, "grad_norm": 2.6597115993499756, "learning_rate": 3.424e-05, "loss": 0.2099, "step": 1580 }, { "epoch": 316.4, "grad_norm": 4.0817179679870605, "learning_rate": 3.422e-05, "loss": 0.2205, "step": 1582 }, { "epoch": 316.8, "grad_norm": 3.3092260360717773, "learning_rate": 3.4200000000000005e-05, "loss": 0.2492, "step": 1584 }, { "epoch": 317.2, "grad_norm": 5.16715145111084, "learning_rate": 3.418e-05, "loss": 0.2472, "step": 1586 }, { "epoch": 317.6, "grad_norm": 3.3597195148468018, "learning_rate": 3.4160000000000005e-05, "loss": 0.188, "step": 1588 }, { "epoch": 318.0, "grad_norm": 5.051392555236816, "learning_rate": 3.414e-05, "loss": 0.1997, "step": 1590 }, { "epoch": 318.4, "grad_norm": 4.864868640899658, "learning_rate": 3.412e-05, "loss": 0.227, "step": 1592 }, { "epoch": 318.8, "grad_norm": 3.4974377155303955, "learning_rate": 3.41e-05, "loss": 0.1855, "step": 1594 }, { "epoch": 319.2, "grad_norm": 2.8796896934509277, "learning_rate": 3.408e-05, "loss": 0.1847, "step": 1596 }, { "epoch": 319.6, "grad_norm": 5.714700222015381, "learning_rate": 3.406e-05, "loss": 0.179, "step": 1598 }, { "epoch": 320.0, "grad_norm": 2.2135703563690186, "learning_rate": 3.404e-05, "loss": 0.1515, "step": 1600 }, { "epoch": 320.0, "eval_cer": 0.7927461139896373, "eval_loss": 5.463683128356934, "eval_runtime": 7.219, "eval_samples_per_second": 1.385, "eval_steps_per_second": 0.277, "step": 1600 }, { "epoch": 320.4, "grad_norm": 5.897749900817871, "learning_rate": 3.402e-05, "loss": 0.1576, "step": 1602 }, { "epoch": 320.8, "grad_norm": 8.979398727416992, "learning_rate": 3.4000000000000007e-05, "loss": 0.1857, "step": 1604 }, { "epoch": 321.2, "grad_norm": 6.596129417419434, "learning_rate": 3.398e-05, "loss": 0.265, "step": 1606 }, { "epoch": 321.6, "grad_norm": 2.8837380409240723, "learning_rate": 3.396e-05, "loss": 0.1066, "step": 1608 }, { "epoch": 322.0, "grad_norm": 5.109397888183594, "learning_rate": 3.394e-05, "loss": 0.3006, "step": 1610 }, { "epoch": 322.4, "grad_norm": 6.885828495025635, "learning_rate": 3.392e-05, "loss": 0.1402, "step": 1612 }, { "epoch": 322.8, "grad_norm": 4.138067722320557, "learning_rate": 3.3900000000000004e-05, "loss": 0.3075, "step": 1614 }, { "epoch": 323.2, "grad_norm": 4.533945560455322, "learning_rate": 3.388e-05, "loss": 0.1756, "step": 1616 }, { "epoch": 323.6, "grad_norm": 4.8230881690979, "learning_rate": 3.3860000000000004e-05, "loss": 0.2355, "step": 1618 }, { "epoch": 324.0, "grad_norm": 3.423970937728882, "learning_rate": 3.384e-05, "loss": 0.1278, "step": 1620 }, { "epoch": 324.4, "grad_norm": 3.2299673557281494, "learning_rate": 3.3820000000000005e-05, "loss": 0.1229, "step": 1622 }, { "epoch": 324.8, "grad_norm": 6.278626918792725, "learning_rate": 3.38e-05, "loss": 0.2736, "step": 1624 }, { "epoch": 325.2, "grad_norm": 2.765869140625, "learning_rate": 3.378e-05, "loss": 0.208, "step": 1626 }, { "epoch": 325.6, "grad_norm": 3.2809343338012695, "learning_rate": 3.376e-05, "loss": 0.1674, "step": 1628 }, { "epoch": 326.0, "grad_norm": 1.7131997346878052, "learning_rate": 3.374e-05, "loss": 0.1213, "step": 1630 }, { "epoch": 326.4, "grad_norm": 5.318551063537598, "learning_rate": 3.372e-05, "loss": 0.1453, "step": 1632 }, { "epoch": 326.8, "grad_norm": 4.351783275604248, "learning_rate": 3.3700000000000006e-05, "loss": 0.2043, "step": 1634 }, { "epoch": 327.2, "grad_norm": 4.660512447357178, "learning_rate": 3.368e-05, "loss": 0.2205, "step": 1636 }, { "epoch": 327.6, "grad_norm": 10.070477485656738, "learning_rate": 3.366e-05, "loss": 0.1395, "step": 1638 }, { "epoch": 328.0, "grad_norm": 2.6522252559661865, "learning_rate": 3.3639999999999996e-05, "loss": 0.1491, "step": 1640 }, { "epoch": 328.4, "grad_norm": 3.3944950103759766, "learning_rate": 3.362e-05, "loss": 0.1716, "step": 1642 }, { "epoch": 328.8, "grad_norm": 3.597280979156494, "learning_rate": 3.3600000000000004e-05, "loss": 0.1251, "step": 1644 }, { "epoch": 329.2, "grad_norm": 1.6378886699676514, "learning_rate": 3.358e-05, "loss": 0.0866, "step": 1646 }, { "epoch": 329.6, "grad_norm": 4.193557262420654, "learning_rate": 3.3560000000000004e-05, "loss": 0.2193, "step": 1648 }, { "epoch": 330.0, "grad_norm": 5.30430793762207, "learning_rate": 3.354e-05, "loss": 0.2401, "step": 1650 }, { "epoch": 330.4, "grad_norm": 5.312755584716797, "learning_rate": 3.3520000000000004e-05, "loss": 0.2145, "step": 1652 }, { "epoch": 330.8, "grad_norm": 4.96132230758667, "learning_rate": 3.35e-05, "loss": 0.1633, "step": 1654 }, { "epoch": 331.2, "grad_norm": 3.945343494415283, "learning_rate": 3.348e-05, "loss": 0.1605, "step": 1656 }, { "epoch": 331.6, "grad_norm": 3.2530901432037354, "learning_rate": 3.346e-05, "loss": 0.1762, "step": 1658 }, { "epoch": 332.0, "grad_norm": 4.97373628616333, "learning_rate": 3.344e-05, "loss": 0.2072, "step": 1660 }, { "epoch": 332.4, "grad_norm": 3.8797354698181152, "learning_rate": 3.342e-05, "loss": 0.2073, "step": 1662 }, { "epoch": 332.8, "grad_norm": 4.142359733581543, "learning_rate": 3.3400000000000005e-05, "loss": 0.1619, "step": 1664 }, { "epoch": 333.2, "grad_norm": 4.184179782867432, "learning_rate": 3.338e-05, "loss": 0.1155, "step": 1666 }, { "epoch": 333.6, "grad_norm": 3.18281888961792, "learning_rate": 3.336e-05, "loss": 0.1597, "step": 1668 }, { "epoch": 334.0, "grad_norm": 4.98223876953125, "learning_rate": 3.3339999999999996e-05, "loss": 0.1599, "step": 1670 }, { "epoch": 334.4, "grad_norm": 2.958751678466797, "learning_rate": 3.332e-05, "loss": 0.141, "step": 1672 }, { "epoch": 334.8, "grad_norm": 1.8969042301177979, "learning_rate": 3.33e-05, "loss": 0.145, "step": 1674 }, { "epoch": 335.2, "grad_norm": 3.110189914703369, "learning_rate": 3.328e-05, "loss": 0.168, "step": 1676 }, { "epoch": 335.6, "grad_norm": 3.959944009780884, "learning_rate": 3.3260000000000003e-05, "loss": 0.1555, "step": 1678 }, { "epoch": 336.0, "grad_norm": 3.2756810188293457, "learning_rate": 3.324e-05, "loss": 0.1884, "step": 1680 }, { "epoch": 336.4, "grad_norm": 4.222044944763184, "learning_rate": 3.3220000000000004e-05, "loss": 0.1976, "step": 1682 }, { "epoch": 336.8, "grad_norm": 2.0300333499908447, "learning_rate": 3.32e-05, "loss": 0.1465, "step": 1684 }, { "epoch": 337.2, "grad_norm": 6.307195663452148, "learning_rate": 3.318e-05, "loss": 0.1608, "step": 1686 }, { "epoch": 337.6, "grad_norm": 3.4754626750946045, "learning_rate": 3.316e-05, "loss": 0.1545, "step": 1688 }, { "epoch": 338.0, "grad_norm": 2.9157328605651855, "learning_rate": 3.314e-05, "loss": 0.1557, "step": 1690 }, { "epoch": 338.4, "grad_norm": 3.257925271987915, "learning_rate": 3.312e-05, "loss": 0.1678, "step": 1692 }, { "epoch": 338.8, "grad_norm": 2.9700069427490234, "learning_rate": 3.3100000000000005e-05, "loss": 0.1435, "step": 1694 }, { "epoch": 339.2, "grad_norm": 1.923651933670044, "learning_rate": 3.308e-05, "loss": 0.0995, "step": 1696 }, { "epoch": 339.6, "grad_norm": 3.6840572357177734, "learning_rate": 3.3060000000000005e-05, "loss": 0.1883, "step": 1698 }, { "epoch": 340.0, "grad_norm": 2.541073799133301, "learning_rate": 3.304e-05, "loss": 0.1184, "step": 1700 }, { "epoch": 340.4, "grad_norm": 2.4867494106292725, "learning_rate": 3.302e-05, "loss": 0.1613, "step": 1702 }, { "epoch": 340.8, "grad_norm": 3.4906351566314697, "learning_rate": 3.3e-05, "loss": 0.1239, "step": 1704 }, { "epoch": 341.2, "grad_norm": 2.9451887607574463, "learning_rate": 3.298e-05, "loss": 0.172, "step": 1706 }, { "epoch": 341.6, "grad_norm": 3.453885078430176, "learning_rate": 3.296e-05, "loss": 0.1515, "step": 1708 }, { "epoch": 342.0, "grad_norm": 3.525286912918091, "learning_rate": 3.2940000000000006e-05, "loss": 0.1953, "step": 1710 }, { "epoch": 342.4, "grad_norm": 2.4129929542541504, "learning_rate": 3.292e-05, "loss": 0.15, "step": 1712 }, { "epoch": 342.8, "grad_norm": 2.678140640258789, "learning_rate": 3.29e-05, "loss": 0.1732, "step": 1714 }, { "epoch": 343.2, "grad_norm": 4.180364608764648, "learning_rate": 3.288e-05, "loss": 0.1386, "step": 1716 }, { "epoch": 343.6, "grad_norm": 2.7738051414489746, "learning_rate": 3.286e-05, "loss": 0.1831, "step": 1718 }, { "epoch": 344.0, "grad_norm": 3.1170198917388916, "learning_rate": 3.2840000000000004e-05, "loss": 0.1201, "step": 1720 }, { "epoch": 344.4, "grad_norm": 2.4444096088409424, "learning_rate": 3.282e-05, "loss": 0.1101, "step": 1722 }, { "epoch": 344.8, "grad_norm": 1.651002049446106, "learning_rate": 3.2800000000000004e-05, "loss": 0.1664, "step": 1724 }, { "epoch": 345.2, "grad_norm": 6.079354763031006, "learning_rate": 3.278e-05, "loss": 0.199, "step": 1726 }, { "epoch": 345.6, "grad_norm": 5.296341419219971, "learning_rate": 3.2760000000000005e-05, "loss": 0.2135, "step": 1728 }, { "epoch": 346.0, "grad_norm": 5.279977798461914, "learning_rate": 3.274e-05, "loss": 0.1775, "step": 1730 }, { "epoch": 346.4, "grad_norm": 3.6159985065460205, "learning_rate": 3.272e-05, "loss": 0.1274, "step": 1732 }, { "epoch": 346.8, "grad_norm": 4.214212417602539, "learning_rate": 3.27e-05, "loss": 0.1704, "step": 1734 }, { "epoch": 347.2, "grad_norm": 2.6386454105377197, "learning_rate": 3.268e-05, "loss": 0.1553, "step": 1736 }, { "epoch": 347.6, "grad_norm": 5.681671619415283, "learning_rate": 3.266e-05, "loss": 0.2622, "step": 1738 }, { "epoch": 348.0, "grad_norm": 3.4936113357543945, "learning_rate": 3.2640000000000006e-05, "loss": 0.1209, "step": 1740 }, { "epoch": 348.4, "grad_norm": 6.470088481903076, "learning_rate": 3.262e-05, "loss": 0.1763, "step": 1742 }, { "epoch": 348.8, "grad_norm": 4.852290153503418, "learning_rate": 3.26e-05, "loss": 0.1909, "step": 1744 }, { "epoch": 349.2, "grad_norm": 4.112585067749023, "learning_rate": 3.2579999999999996e-05, "loss": 0.1474, "step": 1746 }, { "epoch": 349.6, "grad_norm": 1.8468984365463257, "learning_rate": 3.256e-05, "loss": 0.1143, "step": 1748 }, { "epoch": 350.0, "grad_norm": 4.566800594329834, "learning_rate": 3.2540000000000004e-05, "loss": 0.1972, "step": 1750 }, { "epoch": 350.4, "grad_norm": 2.2017741203308105, "learning_rate": 3.252e-05, "loss": 0.171, "step": 1752 }, { "epoch": 350.8, "grad_norm": 3.770983934402466, "learning_rate": 3.2500000000000004e-05, "loss": 0.1259, "step": 1754 }, { "epoch": 351.2, "grad_norm": 3.9944915771484375, "learning_rate": 3.248e-05, "loss": 0.241, "step": 1756 }, { "epoch": 351.6, "grad_norm": 3.9045469760894775, "learning_rate": 3.2460000000000004e-05, "loss": 0.1821, "step": 1758 }, { "epoch": 352.0, "grad_norm": 3.7421658039093018, "learning_rate": 3.244e-05, "loss": 0.1442, "step": 1760 }, { "epoch": 352.4, "grad_norm": 1.5044286251068115, "learning_rate": 3.242e-05, "loss": 0.1274, "step": 1762 }, { "epoch": 352.8, "grad_norm": 2.8305540084838867, "learning_rate": 3.24e-05, "loss": 0.1344, "step": 1764 }, { "epoch": 353.2, "grad_norm": 1.7906485795974731, "learning_rate": 3.238e-05, "loss": 0.1367, "step": 1766 }, { "epoch": 353.6, "grad_norm": 5.86719274520874, "learning_rate": 3.236e-05, "loss": 0.1842, "step": 1768 }, { "epoch": 354.0, "grad_norm": 3.7178423404693604, "learning_rate": 3.2340000000000005e-05, "loss": 0.1885, "step": 1770 }, { "epoch": 354.4, "grad_norm": 2.0382328033447266, "learning_rate": 3.232e-05, "loss": 0.0996, "step": 1772 }, { "epoch": 354.8, "grad_norm": 1.9932969808578491, "learning_rate": 3.2300000000000006e-05, "loss": 0.1378, "step": 1774 }, { "epoch": 355.2, "grad_norm": 2.879995584487915, "learning_rate": 3.2279999999999996e-05, "loss": 0.1062, "step": 1776 }, { "epoch": 355.6, "grad_norm": 3.9957191944122314, "learning_rate": 3.226e-05, "loss": 0.1409, "step": 1778 }, { "epoch": 356.0, "grad_norm": 4.236739635467529, "learning_rate": 3.224e-05, "loss": 0.2104, "step": 1780 }, { "epoch": 356.4, "grad_norm": 1.7207732200622559, "learning_rate": 3.222e-05, "loss": 0.0727, "step": 1782 }, { "epoch": 356.8, "grad_norm": 3.362875461578369, "learning_rate": 3.2200000000000003e-05, "loss": 0.1964, "step": 1784 }, { "epoch": 357.2, "grad_norm": 2.998486042022705, "learning_rate": 3.218e-05, "loss": 0.2187, "step": 1786 }, { "epoch": 357.6, "grad_norm": 6.288384914398193, "learning_rate": 3.2160000000000004e-05, "loss": 0.1799, "step": 1788 }, { "epoch": 358.0, "grad_norm": 2.7052111625671387, "learning_rate": 3.214e-05, "loss": 0.0905, "step": 1790 }, { "epoch": 358.4, "grad_norm": 5.407663822174072, "learning_rate": 3.212e-05, "loss": 0.1265, "step": 1792 }, { "epoch": 358.8, "grad_norm": 6.812448978424072, "learning_rate": 3.21e-05, "loss": 0.1757, "step": 1794 }, { "epoch": 359.2, "grad_norm": 1.3381348848342896, "learning_rate": 3.208e-05, "loss": 0.1837, "step": 1796 }, { "epoch": 359.6, "grad_norm": 2.509676218032837, "learning_rate": 3.206e-05, "loss": 0.1214, "step": 1798 }, { "epoch": 360.0, "grad_norm": 6.054287433624268, "learning_rate": 3.2040000000000005e-05, "loss": 0.2434, "step": 1800 }, { "epoch": 360.0, "eval_cer": 0.7564766839378239, "eval_loss": 4.898867130279541, "eval_runtime": 7.6225, "eval_samples_per_second": 1.312, "eval_steps_per_second": 0.262, "step": 1800 }, { "epoch": 360.4, "grad_norm": 3.0615079402923584, "learning_rate": 3.202e-05, "loss": 0.1641, "step": 1802 }, { "epoch": 360.8, "grad_norm": 2.685194492340088, "learning_rate": 3.2000000000000005e-05, "loss": 0.1416, "step": 1804 }, { "epoch": 361.2, "grad_norm": 3.331611394882202, "learning_rate": 3.198e-05, "loss": 0.1261, "step": 1806 }, { "epoch": 361.6, "grad_norm": 3.8755078315734863, "learning_rate": 3.196e-05, "loss": 0.1717, "step": 1808 }, { "epoch": 362.0, "grad_norm": 4.241562843322754, "learning_rate": 3.194e-05, "loss": 0.1607, "step": 1810 }, { "epoch": 362.4, "grad_norm": 4.34427547454834, "learning_rate": 3.192e-05, "loss": 0.1379, "step": 1812 }, { "epoch": 362.8, "grad_norm": 2.3105342388153076, "learning_rate": 3.19e-05, "loss": 0.154, "step": 1814 }, { "epoch": 363.2, "grad_norm": 3.1316983699798584, "learning_rate": 3.188e-05, "loss": 0.1451, "step": 1816 }, { "epoch": 363.6, "grad_norm": 7.79421854019165, "learning_rate": 3.186e-05, "loss": 0.1871, "step": 1818 }, { "epoch": 364.0, "grad_norm": 5.24039888381958, "learning_rate": 3.184e-05, "loss": 0.1757, "step": 1820 }, { "epoch": 364.4, "grad_norm": 3.3378076553344727, "learning_rate": 3.182e-05, "loss": 0.1221, "step": 1822 }, { "epoch": 364.8, "grad_norm": 3.119875431060791, "learning_rate": 3.18e-05, "loss": 0.1725, "step": 1824 }, { "epoch": 365.2, "grad_norm": 3.362198829650879, "learning_rate": 3.1780000000000004e-05, "loss": 0.1141, "step": 1826 }, { "epoch": 365.6, "grad_norm": 6.064019680023193, "learning_rate": 3.176e-05, "loss": 0.1928, "step": 1828 }, { "epoch": 366.0, "grad_norm": 2.840318441390991, "learning_rate": 3.1740000000000004e-05, "loss": 0.1235, "step": 1830 }, { "epoch": 366.4, "grad_norm": 4.153942584991455, "learning_rate": 3.172e-05, "loss": 0.1234, "step": 1832 }, { "epoch": 366.8, "grad_norm": 2.190415143966675, "learning_rate": 3.1700000000000005e-05, "loss": 0.1107, "step": 1834 }, { "epoch": 367.2, "grad_norm": 3.9375882148742676, "learning_rate": 3.168e-05, "loss": 0.1481, "step": 1836 }, { "epoch": 367.6, "grad_norm": 3.44856595993042, "learning_rate": 3.166e-05, "loss": 0.1165, "step": 1838 }, { "epoch": 368.0, "grad_norm": 3.9315924644470215, "learning_rate": 3.164e-05, "loss": 0.1558, "step": 1840 }, { "epoch": 368.4, "grad_norm": 3.2634127140045166, "learning_rate": 3.162e-05, "loss": 0.134, "step": 1842 }, { "epoch": 368.8, "grad_norm": 2.156912326812744, "learning_rate": 3.16e-05, "loss": 0.1317, "step": 1844 }, { "epoch": 369.2, "grad_norm": 3.0183212757110596, "learning_rate": 3.1580000000000006e-05, "loss": 0.1034, "step": 1846 }, { "epoch": 369.6, "grad_norm": 2.838207244873047, "learning_rate": 3.156e-05, "loss": 0.1738, "step": 1848 }, { "epoch": 370.0, "grad_norm": 3.2733356952667236, "learning_rate": 3.154e-05, "loss": 0.161, "step": 1850 }, { "epoch": 370.4, "grad_norm": 2.209399461746216, "learning_rate": 3.1519999999999996e-05, "loss": 0.1017, "step": 1852 }, { "epoch": 370.8, "grad_norm": 4.579075813293457, "learning_rate": 3.15e-05, "loss": 0.1989, "step": 1854 }, { "epoch": 371.2, "grad_norm": 3.323669910430908, "learning_rate": 3.1480000000000004e-05, "loss": 0.1016, "step": 1856 }, { "epoch": 371.6, "grad_norm": 4.919926166534424, "learning_rate": 3.146e-05, "loss": 0.2089, "step": 1858 }, { "epoch": 372.0, "grad_norm": 3.828839063644409, "learning_rate": 3.1440000000000004e-05, "loss": 0.1434, "step": 1860 }, { "epoch": 372.4, "grad_norm": 4.172984600067139, "learning_rate": 3.142e-05, "loss": 0.11, "step": 1862 }, { "epoch": 372.8, "grad_norm": 3.361454963684082, "learning_rate": 3.1400000000000004e-05, "loss": 0.1452, "step": 1864 }, { "epoch": 373.2, "grad_norm": 2.2560362815856934, "learning_rate": 3.138e-05, "loss": 0.1227, "step": 1866 }, { "epoch": 373.6, "grad_norm": 2.284684419631958, "learning_rate": 3.136e-05, "loss": 0.1119, "step": 1868 }, { "epoch": 374.0, "grad_norm": 3.5740270614624023, "learning_rate": 3.134e-05, "loss": 0.1502, "step": 1870 }, { "epoch": 374.4, "grad_norm": 2.5053253173828125, "learning_rate": 3.132e-05, "loss": 0.1057, "step": 1872 }, { "epoch": 374.8, "grad_norm": 5.028293609619141, "learning_rate": 3.13e-05, "loss": 0.1474, "step": 1874 }, { "epoch": 375.2, "grad_norm": 4.547641277313232, "learning_rate": 3.1280000000000005e-05, "loss": 0.1516, "step": 1876 }, { "epoch": 375.6, "grad_norm": 3.9409918785095215, "learning_rate": 3.126e-05, "loss": 0.1748, "step": 1878 }, { "epoch": 376.0, "grad_norm": 1.020080327987671, "learning_rate": 3.1240000000000006e-05, "loss": 0.0979, "step": 1880 }, { "epoch": 376.4, "grad_norm": 2.8897788524627686, "learning_rate": 3.122e-05, "loss": 0.1223, "step": 1882 }, { "epoch": 376.8, "grad_norm": 3.696582555770874, "learning_rate": 3.12e-05, "loss": 0.1284, "step": 1884 }, { "epoch": 377.2, "grad_norm": 8.42935848236084, "learning_rate": 3.118e-05, "loss": 0.1561, "step": 1886 }, { "epoch": 377.6, "grad_norm": 2.810959815979004, "learning_rate": 3.116e-05, "loss": 0.1526, "step": 1888 }, { "epoch": 378.0, "grad_norm": 1.78050696849823, "learning_rate": 3.1140000000000003e-05, "loss": 0.1289, "step": 1890 }, { "epoch": 378.4, "grad_norm": 2.001422166824341, "learning_rate": 3.112e-05, "loss": 0.1316, "step": 1892 }, { "epoch": 378.8, "grad_norm": 2.8434829711914062, "learning_rate": 3.1100000000000004e-05, "loss": 0.0968, "step": 1894 }, { "epoch": 379.2, "grad_norm": 2.774887800216675, "learning_rate": 3.108e-05, "loss": 0.1712, "step": 1896 }, { "epoch": 379.6, "grad_norm": 3.0647776126861572, "learning_rate": 3.106e-05, "loss": 0.1746, "step": 1898 }, { "epoch": 380.0, "grad_norm": 2.039196491241455, "learning_rate": 3.104e-05, "loss": 0.1193, "step": 1900 }, { "epoch": 380.4, "grad_norm": 2.4039220809936523, "learning_rate": 3.102e-05, "loss": 0.1115, "step": 1902 }, { "epoch": 380.8, "grad_norm": 3.0284810066223145, "learning_rate": 3.1e-05, "loss": 0.2595, "step": 1904 }, { "epoch": 381.2, "grad_norm": 4.8089752197265625, "learning_rate": 3.0980000000000005e-05, "loss": 0.1687, "step": 1906 }, { "epoch": 381.6, "grad_norm": 2.0946786403656006, "learning_rate": 3.096e-05, "loss": 0.0988, "step": 1908 }, { "epoch": 382.0, "grad_norm": 5.5917863845825195, "learning_rate": 3.0940000000000005e-05, "loss": 0.2241, "step": 1910 }, { "epoch": 382.4, "grad_norm": 3.4670252799987793, "learning_rate": 3.092e-05, "loss": 0.1414, "step": 1912 }, { "epoch": 382.8, "grad_norm": 1.4226624965667725, "learning_rate": 3.09e-05, "loss": 0.1389, "step": 1914 }, { "epoch": 383.2, "grad_norm": 1.4152538776397705, "learning_rate": 3.088e-05, "loss": 0.1549, "step": 1916 }, { "epoch": 383.6, "grad_norm": 2.132115602493286, "learning_rate": 3.086e-05, "loss": 0.0913, "step": 1918 }, { "epoch": 384.0, "grad_norm": 5.9196977615356445, "learning_rate": 3.084e-05, "loss": 0.1772, "step": 1920 }, { "epoch": 384.4, "grad_norm": 2.9233782291412354, "learning_rate": 3.082e-05, "loss": 0.1506, "step": 1922 }, { "epoch": 384.8, "grad_norm": 1.75539231300354, "learning_rate": 3.08e-05, "loss": 0.0833, "step": 1924 }, { "epoch": 385.2, "grad_norm": 2.975764274597168, "learning_rate": 3.078e-05, "loss": 0.1803, "step": 1926 }, { "epoch": 385.6, "grad_norm": 3.345104217529297, "learning_rate": 3.076e-05, "loss": 0.1409, "step": 1928 }, { "epoch": 386.0, "grad_norm": 2.457251787185669, "learning_rate": 3.074e-05, "loss": 0.1017, "step": 1930 }, { "epoch": 386.4, "grad_norm": 6.230168342590332, "learning_rate": 3.072e-05, "loss": 0.219, "step": 1932 }, { "epoch": 386.8, "grad_norm": 2.308485984802246, "learning_rate": 3.07e-05, "loss": 0.1187, "step": 1934 }, { "epoch": 387.2, "grad_norm": 3.2631239891052246, "learning_rate": 3.0680000000000004e-05, "loss": 0.1452, "step": 1936 }, { "epoch": 387.6, "grad_norm": 2.702026605606079, "learning_rate": 3.066e-05, "loss": 0.1571, "step": 1938 }, { "epoch": 388.0, "grad_norm": 1.7952728271484375, "learning_rate": 3.0640000000000005e-05, "loss": 0.1163, "step": 1940 }, { "epoch": 388.4, "grad_norm": 2.0206494331359863, "learning_rate": 3.062e-05, "loss": 0.1348, "step": 1942 }, { "epoch": 388.8, "grad_norm": 4.8599982261657715, "learning_rate": 3.06e-05, "loss": 0.1966, "step": 1944 }, { "epoch": 389.2, "grad_norm": 4.036508083343506, "learning_rate": 3.058e-05, "loss": 0.143, "step": 1946 }, { "epoch": 389.6, "grad_norm": 1.5852596759796143, "learning_rate": 3.056e-05, "loss": 0.1503, "step": 1948 }, { "epoch": 390.0, "grad_norm": 3.403186321258545, "learning_rate": 3.054e-05, "loss": 0.134, "step": 1950 }, { "epoch": 390.4, "grad_norm": 5.940028190612793, "learning_rate": 3.0520000000000006e-05, "loss": 0.1922, "step": 1952 }, { "epoch": 390.8, "grad_norm": 3.5208024978637695, "learning_rate": 3.05e-05, "loss": 0.1788, "step": 1954 }, { "epoch": 391.2, "grad_norm": 3.1141529083251953, "learning_rate": 3.0480000000000003e-05, "loss": 0.096, "step": 1956 }, { "epoch": 391.6, "grad_norm": 4.077508449554443, "learning_rate": 3.046e-05, "loss": 0.1605, "step": 1958 }, { "epoch": 392.0, "grad_norm": 2.1439621448516846, "learning_rate": 3.0440000000000003e-05, "loss": 0.1271, "step": 1960 }, { "epoch": 392.4, "grad_norm": 5.507186412811279, "learning_rate": 3.0420000000000004e-05, "loss": 0.1735, "step": 1962 }, { "epoch": 392.8, "grad_norm": 3.3453614711761475, "learning_rate": 3.04e-05, "loss": 0.1634, "step": 1964 }, { "epoch": 393.2, "grad_norm": 2.7352888584136963, "learning_rate": 3.0380000000000004e-05, "loss": 0.1346, "step": 1966 }, { "epoch": 393.6, "grad_norm": 2.0383083820343018, "learning_rate": 3.036e-05, "loss": 0.1273, "step": 1968 }, { "epoch": 394.0, "grad_norm": 3.690504789352417, "learning_rate": 3.034e-05, "loss": 0.1381, "step": 1970 }, { "epoch": 394.4, "grad_norm": 4.1608757972717285, "learning_rate": 3.0320000000000004e-05, "loss": 0.1502, "step": 1972 }, { "epoch": 394.8, "grad_norm": 3.455759048461914, "learning_rate": 3.03e-05, "loss": 0.1484, "step": 1974 }, { "epoch": 395.2, "grad_norm": 4.038259983062744, "learning_rate": 3.028e-05, "loss": 0.1193, "step": 1976 }, { "epoch": 395.6, "grad_norm": 3.2492103576660156, "learning_rate": 3.0259999999999998e-05, "loss": 0.1744, "step": 1978 }, { "epoch": 396.0, "grad_norm": 1.5181330442428589, "learning_rate": 3.0240000000000002e-05, "loss": 0.0857, "step": 1980 }, { "epoch": 396.4, "grad_norm": 3.133068084716797, "learning_rate": 3.0220000000000005e-05, "loss": 0.1074, "step": 1982 }, { "epoch": 396.8, "grad_norm": 2.414536952972412, "learning_rate": 3.02e-05, "loss": 0.1219, "step": 1984 }, { "epoch": 397.2, "grad_norm": 2.1002447605133057, "learning_rate": 3.0180000000000002e-05, "loss": 0.1365, "step": 1986 }, { "epoch": 397.6, "grad_norm": 6.065481662750244, "learning_rate": 3.016e-05, "loss": 0.1293, "step": 1988 }, { "epoch": 398.0, "grad_norm": 2.854368209838867, "learning_rate": 3.0140000000000003e-05, "loss": 0.1343, "step": 1990 }, { "epoch": 398.4, "grad_norm": 5.03422212600708, "learning_rate": 3.0120000000000003e-05, "loss": 0.1403, "step": 1992 }, { "epoch": 398.8, "grad_norm": 5.6399078369140625, "learning_rate": 3.01e-05, "loss": 0.1719, "step": 1994 }, { "epoch": 399.2, "grad_norm": 4.262267589569092, "learning_rate": 3.0080000000000003e-05, "loss": 0.1139, "step": 1996 }, { "epoch": 399.6, "grad_norm": 5.227204322814941, "learning_rate": 3.006e-05, "loss": 0.1214, "step": 1998 }, { "epoch": 400.0, "grad_norm": 3.9355087280273438, "learning_rate": 3.004e-05, "loss": 0.1473, "step": 2000 }, { "epoch": 400.0, "eval_cer": 0.7616580310880829, "eval_loss": 6.240771770477295, "eval_runtime": 10.1506, "eval_samples_per_second": 0.985, "eval_steps_per_second": 0.197, "step": 2000 }, { "epoch": 400.4, "grad_norm": 3.103576183319092, "learning_rate": 3.0020000000000004e-05, "loss": 0.1785, "step": 2002 }, { "epoch": 400.8, "grad_norm": 4.65574836730957, "learning_rate": 3e-05, "loss": 0.1284, "step": 2004 }, { "epoch": 401.2, "grad_norm": 2.1072275638580322, "learning_rate": 2.998e-05, "loss": 0.1068, "step": 2006 }, { "epoch": 401.6, "grad_norm": 1.3174000978469849, "learning_rate": 2.9959999999999998e-05, "loss": 0.0815, "step": 2008 }, { "epoch": 402.0, "grad_norm": 3.112102508544922, "learning_rate": 2.994e-05, "loss": 0.1977, "step": 2010 }, { "epoch": 402.4, "grad_norm": 2.263845920562744, "learning_rate": 2.9920000000000005e-05, "loss": 0.1348, "step": 2012 }, { "epoch": 402.8, "grad_norm": 2.766692876815796, "learning_rate": 2.9900000000000002e-05, "loss": 0.1255, "step": 2014 }, { "epoch": 403.2, "grad_norm": 2.23176646232605, "learning_rate": 2.9880000000000002e-05, "loss": 0.1442, "step": 2016 }, { "epoch": 403.6, "grad_norm": 1.9494329690933228, "learning_rate": 2.986e-05, "loss": 0.1146, "step": 2018 }, { "epoch": 404.0, "grad_norm": 5.363874435424805, "learning_rate": 2.9840000000000002e-05, "loss": 0.1214, "step": 2020 }, { "epoch": 404.4, "grad_norm": 3.4357447624206543, "learning_rate": 2.9820000000000002e-05, "loss": 0.1342, "step": 2022 }, { "epoch": 404.8, "grad_norm": 7.587038993835449, "learning_rate": 2.98e-05, "loss": 0.1999, "step": 2024 }, { "epoch": 405.2, "grad_norm": 2.659700393676758, "learning_rate": 2.9780000000000003e-05, "loss": 0.1408, "step": 2026 }, { "epoch": 405.6, "grad_norm": 4.327391624450684, "learning_rate": 2.976e-05, "loss": 0.1266, "step": 2028 }, { "epoch": 406.0, "grad_norm": 4.79611873626709, "learning_rate": 2.974e-05, "loss": 0.1461, "step": 2030 }, { "epoch": 406.4, "grad_norm": 2.158862590789795, "learning_rate": 2.9720000000000003e-05, "loss": 0.1357, "step": 2032 }, { "epoch": 406.8, "grad_norm": 2.4708826541900635, "learning_rate": 2.97e-05, "loss": 0.1163, "step": 2034 }, { "epoch": 407.2, "grad_norm": 2.922783136367798, "learning_rate": 2.9680000000000004e-05, "loss": 0.1832, "step": 2036 }, { "epoch": 407.6, "grad_norm": 4.7164692878723145, "learning_rate": 2.9659999999999997e-05, "loss": 0.1139, "step": 2038 }, { "epoch": 408.0, "grad_norm": 3.0394704341888428, "learning_rate": 2.964e-05, "loss": 0.1274, "step": 2040 }, { "epoch": 408.4, "grad_norm": 0.9801866412162781, "learning_rate": 2.9620000000000004e-05, "loss": 0.0813, "step": 2042 }, { "epoch": 408.8, "grad_norm": 3.053173780441284, "learning_rate": 2.96e-05, "loss": 0.1837, "step": 2044 }, { "epoch": 409.2, "grad_norm": 2.467465400695801, "learning_rate": 2.958e-05, "loss": 0.1429, "step": 2046 }, { "epoch": 409.6, "grad_norm": 3.369722843170166, "learning_rate": 2.9559999999999998e-05, "loss": 0.0949, "step": 2048 }, { "epoch": 410.0, "grad_norm": 2.210752010345459, "learning_rate": 2.9540000000000002e-05, "loss": 0.1009, "step": 2050 }, { "epoch": 410.4, "grad_norm": 6.935453414916992, "learning_rate": 2.9520000000000002e-05, "loss": 0.1679, "step": 2052 }, { "epoch": 410.8, "grad_norm": 7.696201801300049, "learning_rate": 2.95e-05, "loss": 0.1278, "step": 2054 }, { "epoch": 411.2, "grad_norm": 1.3658827543258667, "learning_rate": 2.9480000000000002e-05, "loss": 0.1132, "step": 2056 }, { "epoch": 411.6, "grad_norm": 5.29360294342041, "learning_rate": 2.946e-05, "loss": 0.1291, "step": 2058 }, { "epoch": 412.0, "grad_norm": 2.792808771133423, "learning_rate": 2.944e-05, "loss": 0.1322, "step": 2060 }, { "epoch": 412.4, "grad_norm": 6.45595121383667, "learning_rate": 2.9420000000000003e-05, "loss": 0.2103, "step": 2062 }, { "epoch": 412.8, "grad_norm": 1.8413951396942139, "learning_rate": 2.94e-05, "loss": 0.0915, "step": 2064 }, { "epoch": 413.2, "grad_norm": 2.8356518745422363, "learning_rate": 2.9380000000000003e-05, "loss": 0.0796, "step": 2066 }, { "epoch": 413.6, "grad_norm": 5.992844104766846, "learning_rate": 2.9360000000000003e-05, "loss": 0.1075, "step": 2068 }, { "epoch": 414.0, "grad_norm": 2.3731396198272705, "learning_rate": 2.934e-05, "loss": 0.1374, "step": 2070 }, { "epoch": 414.4, "grad_norm": 3.5058887004852295, "learning_rate": 2.9320000000000004e-05, "loss": 0.1277, "step": 2072 }, { "epoch": 414.8, "grad_norm": 1.2598580121994019, "learning_rate": 2.93e-05, "loss": 0.0687, "step": 2074 }, { "epoch": 415.2, "grad_norm": 7.589585304260254, "learning_rate": 2.928e-05, "loss": 0.1908, "step": 2076 }, { "epoch": 415.6, "grad_norm": 2.706092357635498, "learning_rate": 2.9260000000000004e-05, "loss": 0.1686, "step": 2078 }, { "epoch": 416.0, "grad_norm": 4.14879846572876, "learning_rate": 2.924e-05, "loss": 0.1473, "step": 2080 }, { "epoch": 416.4, "grad_norm": 7.4718546867370605, "learning_rate": 2.922e-05, "loss": 0.2445, "step": 2082 }, { "epoch": 416.8, "grad_norm": 2.774493455886841, "learning_rate": 2.9199999999999998e-05, "loss": 0.1167, "step": 2084 }, { "epoch": 417.2, "grad_norm": 3.767965316772461, "learning_rate": 2.9180000000000002e-05, "loss": 0.1602, "step": 2086 }, { "epoch": 417.6, "grad_norm": 3.5170719623565674, "learning_rate": 2.9160000000000005e-05, "loss": 0.1052, "step": 2088 }, { "epoch": 418.0, "grad_norm": 3.2805016040802, "learning_rate": 2.9140000000000002e-05, "loss": 0.1072, "step": 2090 }, { "epoch": 418.4, "grad_norm": 8.42609977722168, "learning_rate": 2.9120000000000002e-05, "loss": 0.1836, "step": 2092 }, { "epoch": 418.8, "grad_norm": 3.1477766036987305, "learning_rate": 2.91e-05, "loss": 0.1168, "step": 2094 }, { "epoch": 419.2, "grad_norm": 4.623281478881836, "learning_rate": 2.9080000000000003e-05, "loss": 0.1143, "step": 2096 }, { "epoch": 419.6, "grad_norm": 3.7914562225341797, "learning_rate": 2.9060000000000003e-05, "loss": 0.1385, "step": 2098 }, { "epoch": 420.0, "grad_norm": 1.30825674533844, "learning_rate": 2.904e-05, "loss": 0.0906, "step": 2100 }, { "epoch": 420.4, "grad_norm": 2.613701105117798, "learning_rate": 2.9020000000000003e-05, "loss": 0.1061, "step": 2102 }, { "epoch": 420.8, "grad_norm": 3.7864463329315186, "learning_rate": 2.9e-05, "loss": 0.18, "step": 2104 }, { "epoch": 421.2, "grad_norm": 1.8628921508789062, "learning_rate": 2.898e-05, "loss": 0.0821, "step": 2106 }, { "epoch": 421.6, "grad_norm": 5.049961090087891, "learning_rate": 2.8960000000000004e-05, "loss": 0.1457, "step": 2108 }, { "epoch": 422.0, "grad_norm": 3.4079642295837402, "learning_rate": 2.894e-05, "loss": 0.1589, "step": 2110 }, { "epoch": 422.4, "grad_norm": 4.242629528045654, "learning_rate": 2.8920000000000004e-05, "loss": 0.0962, "step": 2112 }, { "epoch": 422.8, "grad_norm": 3.024545192718506, "learning_rate": 2.8899999999999998e-05, "loss": 0.132, "step": 2114 }, { "epoch": 423.2, "grad_norm": 3.9388020038604736, "learning_rate": 2.888e-05, "loss": 0.1404, "step": 2116 }, { "epoch": 423.6, "grad_norm": 4.817683219909668, "learning_rate": 2.8860000000000005e-05, "loss": 0.1694, "step": 2118 }, { "epoch": 424.0, "grad_norm": 1.6105481386184692, "learning_rate": 2.8840000000000002e-05, "loss": 0.1226, "step": 2120 }, { "epoch": 424.4, "grad_norm": 1.840665578842163, "learning_rate": 2.8820000000000002e-05, "loss": 0.1118, "step": 2122 }, { "epoch": 424.8, "grad_norm": 4.849315643310547, "learning_rate": 2.88e-05, "loss": 0.1711, "step": 2124 }, { "epoch": 425.2, "grad_norm": 2.858804702758789, "learning_rate": 2.8780000000000002e-05, "loss": 0.0908, "step": 2126 }, { "epoch": 425.6, "grad_norm": 2.7389400005340576, "learning_rate": 2.8760000000000002e-05, "loss": 0.1732, "step": 2128 }, { "epoch": 426.0, "grad_norm": 8.67746639251709, "learning_rate": 2.874e-05, "loss": 0.1217, "step": 2130 }, { "epoch": 426.4, "grad_norm": 2.3466663360595703, "learning_rate": 2.8720000000000003e-05, "loss": 0.129, "step": 2132 }, { "epoch": 426.8, "grad_norm": 2.3738059997558594, "learning_rate": 2.87e-05, "loss": 0.1369, "step": 2134 }, { "epoch": 427.2, "grad_norm": 4.255991458892822, "learning_rate": 2.868e-05, "loss": 0.1084, "step": 2136 }, { "epoch": 427.6, "grad_norm": 2.923297643661499, "learning_rate": 2.8660000000000003e-05, "loss": 0.0868, "step": 2138 }, { "epoch": 428.0, "grad_norm": 1.8945319652557373, "learning_rate": 2.864e-05, "loss": 0.1806, "step": 2140 }, { "epoch": 428.4, "grad_norm": 1.4484467506408691, "learning_rate": 2.8620000000000004e-05, "loss": 0.1384, "step": 2142 }, { "epoch": 428.8, "grad_norm": 4.060105323791504, "learning_rate": 2.86e-05, "loss": 0.099, "step": 2144 }, { "epoch": 429.2, "grad_norm": 3.4062278270721436, "learning_rate": 2.858e-05, "loss": 0.115, "step": 2146 }, { "epoch": 429.6, "grad_norm": 2.151989221572876, "learning_rate": 2.8560000000000004e-05, "loss": 0.1295, "step": 2148 }, { "epoch": 430.0, "grad_norm": 2.7934505939483643, "learning_rate": 2.854e-05, "loss": 0.1027, "step": 2150 }, { "epoch": 430.4, "grad_norm": 3.2139978408813477, "learning_rate": 2.852e-05, "loss": 0.1271, "step": 2152 }, { "epoch": 430.8, "grad_norm": 2.441424608230591, "learning_rate": 2.8499999999999998e-05, "loss": 0.1245, "step": 2154 }, { "epoch": 431.2, "grad_norm": 3.661648988723755, "learning_rate": 2.8480000000000002e-05, "loss": 0.1073, "step": 2156 }, { "epoch": 431.6, "grad_norm": 2.0183136463165283, "learning_rate": 2.8460000000000002e-05, "loss": 0.1104, "step": 2158 }, { "epoch": 432.0, "grad_norm": 2.86687970161438, "learning_rate": 2.844e-05, "loss": 0.1155, "step": 2160 }, { "epoch": 432.4, "grad_norm": 1.1998648643493652, "learning_rate": 2.8420000000000002e-05, "loss": 0.0843, "step": 2162 }, { "epoch": 432.8, "grad_norm": 2.1123528480529785, "learning_rate": 2.84e-05, "loss": 0.1068, "step": 2164 }, { "epoch": 433.2, "grad_norm": 0.95002681016922, "learning_rate": 2.8380000000000003e-05, "loss": 0.105, "step": 2166 }, { "epoch": 433.6, "grad_norm": 4.858355522155762, "learning_rate": 2.8360000000000003e-05, "loss": 0.1247, "step": 2168 }, { "epoch": 434.0, "grad_norm": 2.5363845825195312, "learning_rate": 2.834e-05, "loss": 0.1487, "step": 2170 }, { "epoch": 434.4, "grad_norm": 3.457606554031372, "learning_rate": 2.8320000000000003e-05, "loss": 0.1135, "step": 2172 }, { "epoch": 434.8, "grad_norm": 4.463535308837891, "learning_rate": 2.83e-05, "loss": 0.1728, "step": 2174 }, { "epoch": 435.2, "grad_norm": 3.531446695327759, "learning_rate": 2.828e-05, "loss": 0.1086, "step": 2176 }, { "epoch": 435.6, "grad_norm": 6.251543998718262, "learning_rate": 2.8260000000000004e-05, "loss": 0.2333, "step": 2178 }, { "epoch": 436.0, "grad_norm": 2.766528606414795, "learning_rate": 2.824e-05, "loss": 0.1253, "step": 2180 }, { "epoch": 436.4, "grad_norm": 7.038033485412598, "learning_rate": 2.822e-05, "loss": 0.2545, "step": 2182 }, { "epoch": 436.8, "grad_norm": 7.4704132080078125, "learning_rate": 2.8199999999999998e-05, "loss": 0.2455, "step": 2184 }, { "epoch": 437.2, "grad_norm": 6.29686975479126, "learning_rate": 2.818e-05, "loss": 0.3344, "step": 2186 }, { "epoch": 437.6, "grad_norm": 6.082864761352539, "learning_rate": 2.816e-05, "loss": 0.2231, "step": 2188 }, { "epoch": 438.0, "grad_norm": 3.985015392303467, "learning_rate": 2.8139999999999998e-05, "loss": 0.1792, "step": 2190 }, { "epoch": 438.4, "grad_norm": 2.9146018028259277, "learning_rate": 2.8120000000000002e-05, "loss": 0.1493, "step": 2192 }, { "epoch": 438.8, "grad_norm": 3.9818902015686035, "learning_rate": 2.8100000000000005e-05, "loss": 0.2604, "step": 2194 }, { "epoch": 439.2, "grad_norm": 1.0820796489715576, "learning_rate": 2.8080000000000002e-05, "loss": 0.1089, "step": 2196 }, { "epoch": 439.6, "grad_norm": 4.060903549194336, "learning_rate": 2.8060000000000002e-05, "loss": 0.116, "step": 2198 }, { "epoch": 440.0, "grad_norm": 2.9101788997650146, "learning_rate": 2.804e-05, "loss": 0.1748, "step": 2200 }, { "epoch": 440.0, "eval_cer": 0.844559585492228, "eval_loss": 5.342930793762207, "eval_runtime": 9.5766, "eval_samples_per_second": 1.044, "eval_steps_per_second": 0.209, "step": 2200 }, { "epoch": 440.4, "grad_norm": 7.607762813568115, "learning_rate": 2.8020000000000003e-05, "loss": 0.3311, "step": 2202 }, { "epoch": 440.8, "grad_norm": 6.977325916290283, "learning_rate": 2.8000000000000003e-05, "loss": 0.189, "step": 2204 }, { "epoch": 441.2, "grad_norm": 2.7992708683013916, "learning_rate": 2.798e-05, "loss": 0.1259, "step": 2206 }, { "epoch": 441.6, "grad_norm": 1.903342843055725, "learning_rate": 2.7960000000000003e-05, "loss": 0.1041, "step": 2208 }, { "epoch": 442.0, "grad_norm": 3.4426167011260986, "learning_rate": 2.794e-05, "loss": 0.1601, "step": 2210 }, { "epoch": 442.4, "grad_norm": 1.311985969543457, "learning_rate": 2.792e-05, "loss": 0.0779, "step": 2212 }, { "epoch": 442.8, "grad_norm": 1.354495644569397, "learning_rate": 2.7900000000000004e-05, "loss": 0.0842, "step": 2214 }, { "epoch": 443.2, "grad_norm": 2.3051438331604004, "learning_rate": 2.788e-05, "loss": 0.124, "step": 2216 }, { "epoch": 443.6, "grad_norm": 5.303744316101074, "learning_rate": 2.7860000000000004e-05, "loss": 0.1536, "step": 2218 }, { "epoch": 444.0, "grad_norm": 4.015014171600342, "learning_rate": 2.7839999999999998e-05, "loss": 0.0827, "step": 2220 }, { "epoch": 444.4, "grad_norm": 5.616124629974365, "learning_rate": 2.782e-05, "loss": 0.1113, "step": 2222 }, { "epoch": 444.8, "grad_norm": 2.1459877490997314, "learning_rate": 2.7800000000000005e-05, "loss": 0.1251, "step": 2224 }, { "epoch": 445.2, "grad_norm": 4.136318206787109, "learning_rate": 2.778e-05, "loss": 0.1046, "step": 2226 }, { "epoch": 445.6, "grad_norm": 3.397648811340332, "learning_rate": 2.7760000000000002e-05, "loss": 0.1269, "step": 2228 }, { "epoch": 446.0, "grad_norm": 2.5913991928100586, "learning_rate": 2.774e-05, "loss": 0.0948, "step": 2230 }, { "epoch": 446.4, "grad_norm": 1.386391043663025, "learning_rate": 2.7720000000000002e-05, "loss": 0.0993, "step": 2232 }, { "epoch": 446.8, "grad_norm": 4.5972490310668945, "learning_rate": 2.7700000000000002e-05, "loss": 0.1483, "step": 2234 }, { "epoch": 447.2, "grad_norm": 2.5993785858154297, "learning_rate": 2.768e-05, "loss": 0.155, "step": 2236 }, { "epoch": 447.6, "grad_norm": 4.264490604400635, "learning_rate": 2.7660000000000003e-05, "loss": 0.1052, "step": 2238 }, { "epoch": 448.0, "grad_norm": 2.487481117248535, "learning_rate": 2.764e-05, "loss": 0.1335, "step": 2240 }, { "epoch": 448.4, "grad_norm": 1.0610066652297974, "learning_rate": 2.762e-05, "loss": 0.1398, "step": 2242 }, { "epoch": 448.8, "grad_norm": 1.1737905740737915, "learning_rate": 2.7600000000000003e-05, "loss": 0.072, "step": 2244 }, { "epoch": 449.2, "grad_norm": 5.444261074066162, "learning_rate": 2.758e-05, "loss": 0.1002, "step": 2246 }, { "epoch": 449.6, "grad_norm": 4.881010055541992, "learning_rate": 2.7560000000000004e-05, "loss": 0.1678, "step": 2248 }, { "epoch": 450.0, "grad_norm": 5.786609172821045, "learning_rate": 2.754e-05, "loss": 0.1541, "step": 2250 }, { "epoch": 450.4, "grad_norm": 1.9890432357788086, "learning_rate": 2.752e-05, "loss": 0.0955, "step": 2252 }, { "epoch": 450.8, "grad_norm": 3.460513114929199, "learning_rate": 2.7500000000000004e-05, "loss": 0.1521, "step": 2254 }, { "epoch": 451.2, "grad_norm": 2.2203564643859863, "learning_rate": 2.748e-05, "loss": 0.1178, "step": 2256 }, { "epoch": 451.6, "grad_norm": 3.564667224884033, "learning_rate": 2.746e-05, "loss": 0.1426, "step": 2258 }, { "epoch": 452.0, "grad_norm": 2.946467638015747, "learning_rate": 2.7439999999999998e-05, "loss": 0.1201, "step": 2260 }, { "epoch": 452.4, "grad_norm": 2.517930269241333, "learning_rate": 2.7420000000000002e-05, "loss": 0.1089, "step": 2262 }, { "epoch": 452.8, "grad_norm": 1.3680473566055298, "learning_rate": 2.7400000000000002e-05, "loss": 0.0866, "step": 2264 }, { "epoch": 453.2, "grad_norm": 5.366114139556885, "learning_rate": 2.738e-05, "loss": 0.1105, "step": 2266 }, { "epoch": 453.6, "grad_norm": 3.502387762069702, "learning_rate": 2.7360000000000002e-05, "loss": 0.1578, "step": 2268 }, { "epoch": 454.0, "grad_norm": 2.756788492202759, "learning_rate": 2.734e-05, "loss": 0.1241, "step": 2270 }, { "epoch": 454.4, "grad_norm": 4.074233531951904, "learning_rate": 2.7320000000000003e-05, "loss": 0.1439, "step": 2272 }, { "epoch": 454.8, "grad_norm": 3.0822720527648926, "learning_rate": 2.7300000000000003e-05, "loss": 0.0947, "step": 2274 }, { "epoch": 455.2, "grad_norm": 3.721374034881592, "learning_rate": 2.728e-05, "loss": 0.0974, "step": 2276 }, { "epoch": 455.6, "grad_norm": 1.5979372262954712, "learning_rate": 2.7260000000000003e-05, "loss": 0.1125, "step": 2278 }, { "epoch": 456.0, "grad_norm": 1.068957805633545, "learning_rate": 2.724e-05, "loss": 0.1221, "step": 2280 }, { "epoch": 456.4, "grad_norm": 1.1962614059448242, "learning_rate": 2.722e-05, "loss": 0.0743, "step": 2282 }, { "epoch": 456.8, "grad_norm": 7.841579437255859, "learning_rate": 2.7200000000000004e-05, "loss": 0.1794, "step": 2284 }, { "epoch": 457.2, "grad_norm": 3.5426013469696045, "learning_rate": 2.718e-05, "loss": 0.1504, "step": 2286 }, { "epoch": 457.6, "grad_norm": 5.391930103302002, "learning_rate": 2.716e-05, "loss": 0.186, "step": 2288 }, { "epoch": 458.0, "grad_norm": 1.9637701511383057, "learning_rate": 2.7139999999999998e-05, "loss": 0.0673, "step": 2290 }, { "epoch": 458.4, "grad_norm": 6.227233409881592, "learning_rate": 2.712e-05, "loss": 0.1645, "step": 2292 }, { "epoch": 458.8, "grad_norm": 2.795668363571167, "learning_rate": 2.7100000000000005e-05, "loss": 0.1321, "step": 2294 }, { "epoch": 459.2, "grad_norm": 5.905117511749268, "learning_rate": 2.7079999999999998e-05, "loss": 0.1379, "step": 2296 }, { "epoch": 459.6, "grad_norm": 5.319226264953613, "learning_rate": 2.7060000000000002e-05, "loss": 0.1483, "step": 2298 }, { "epoch": 460.0, "grad_norm": 9.204543113708496, "learning_rate": 2.704e-05, "loss": 0.1946, "step": 2300 }, { "epoch": 460.4, "grad_norm": 6.897572994232178, "learning_rate": 2.7020000000000002e-05, "loss": 0.2434, "step": 2302 }, { "epoch": 460.8, "grad_norm": 15.994668006896973, "learning_rate": 2.7000000000000002e-05, "loss": 0.2446, "step": 2304 }, { "epoch": 461.2, "grad_norm": 37.115692138671875, "learning_rate": 2.698e-05, "loss": 0.2989, "step": 2306 }, { "epoch": 461.6, "grad_norm": 20.312076568603516, "learning_rate": 2.6960000000000003e-05, "loss": 0.8728, "step": 2308 }, { "epoch": 462.0, "grad_norm": 8.775638580322266, "learning_rate": 2.694e-05, "loss": 0.3515, "step": 2310 }, { "epoch": 462.4, "grad_norm": 6.640770435333252, "learning_rate": 2.692e-05, "loss": 0.2602, "step": 2312 }, { "epoch": 462.8, "grad_norm": 10.17358112335205, "learning_rate": 2.6900000000000003e-05, "loss": 0.3542, "step": 2314 }, { "epoch": 463.2, "grad_norm": 6.9356207847595215, "learning_rate": 2.688e-05, "loss": 0.3974, "step": 2316 }, { "epoch": 463.6, "grad_norm": 13.696517944335938, "learning_rate": 2.686e-05, "loss": 0.1867, "step": 2318 }, { "epoch": 464.0, "grad_norm": 77.05476379394531, "learning_rate": 2.6840000000000004e-05, "loss": 0.1885, "step": 2320 }, { "epoch": 464.4, "grad_norm": 6.0848283767700195, "learning_rate": 2.682e-05, "loss": 0.2405, "step": 2322 }, { "epoch": 464.8, "grad_norm": 3.521810531616211, "learning_rate": 2.6800000000000004e-05, "loss": 0.1441, "step": 2324 }, { "epoch": 465.2, "grad_norm": 8.553693771362305, "learning_rate": 2.678e-05, "loss": 0.2243, "step": 2326 }, { "epoch": 465.6, "grad_norm": 3.225001335144043, "learning_rate": 2.676e-05, "loss": 0.1717, "step": 2328 }, { "epoch": 466.0, "grad_norm": 11.779875755310059, "learning_rate": 2.6740000000000005e-05, "loss": 0.3818, "step": 2330 }, { "epoch": 466.4, "grad_norm": 4.481483459472656, "learning_rate": 2.672e-05, "loss": 0.2252, "step": 2332 }, { "epoch": 466.8, "grad_norm": 3.4556853771209717, "learning_rate": 2.6700000000000002e-05, "loss": 0.1696, "step": 2334 }, { "epoch": 467.2, "grad_norm": 3.769174337387085, "learning_rate": 2.668e-05, "loss": 0.1678, "step": 2336 }, { "epoch": 467.6, "grad_norm": 5.973076343536377, "learning_rate": 2.6660000000000002e-05, "loss": 0.2523, "step": 2338 }, { "epoch": 468.0, "grad_norm": 2.8578996658325195, "learning_rate": 2.6640000000000002e-05, "loss": 0.1983, "step": 2340 }, { "epoch": 468.4, "grad_norm": 5.111087799072266, "learning_rate": 2.662e-05, "loss": 0.1354, "step": 2342 }, { "epoch": 468.8, "grad_norm": 7.505328178405762, "learning_rate": 2.6600000000000003e-05, "loss": 0.2694, "step": 2344 }, { "epoch": 469.2, "grad_norm": 3.969907760620117, "learning_rate": 2.658e-05, "loss": 0.1299, "step": 2346 }, { "epoch": 469.6, "grad_norm": 2.1442630290985107, "learning_rate": 2.6560000000000003e-05, "loss": 0.1741, "step": 2348 }, { "epoch": 470.0, "grad_norm": 1.8014036417007446, "learning_rate": 2.6540000000000003e-05, "loss": 0.1126, "step": 2350 }, { "epoch": 470.4, "grad_norm": 5.656866073608398, "learning_rate": 2.652e-05, "loss": 0.1666, "step": 2352 }, { "epoch": 470.8, "grad_norm": 2.227996587753296, "learning_rate": 2.6500000000000004e-05, "loss": 0.1017, "step": 2354 }, { "epoch": 471.2, "grad_norm": 6.71688175201416, "learning_rate": 2.648e-05, "loss": 0.167, "step": 2356 }, { "epoch": 471.6, "grad_norm": 5.240406036376953, "learning_rate": 2.646e-05, "loss": 0.2111, "step": 2358 }, { "epoch": 472.0, "grad_norm": 3.054079294204712, "learning_rate": 2.6440000000000004e-05, "loss": 0.1257, "step": 2360 }, { "epoch": 472.4, "grad_norm": 2.4196925163269043, "learning_rate": 2.642e-05, "loss": 0.1381, "step": 2362 }, { "epoch": 472.8, "grad_norm": 2.720067262649536, "learning_rate": 2.64e-05, "loss": 0.1242, "step": 2364 }, { "epoch": 473.2, "grad_norm": 4.917180061340332, "learning_rate": 2.6379999999999998e-05, "loss": 0.1431, "step": 2366 }, { "epoch": 473.6, "grad_norm": 1.2334229946136475, "learning_rate": 2.6360000000000002e-05, "loss": 0.1079, "step": 2368 }, { "epoch": 474.0, "grad_norm": 3.5581812858581543, "learning_rate": 2.6340000000000002e-05, "loss": 0.1284, "step": 2370 }, { "epoch": 474.4, "grad_norm": 5.021975994110107, "learning_rate": 2.632e-05, "loss": 0.1416, "step": 2372 }, { "epoch": 474.8, "grad_norm": 1.8159092664718628, "learning_rate": 2.6300000000000002e-05, "loss": 0.1233, "step": 2374 }, { "epoch": 475.2, "grad_norm": 2.765962600708008, "learning_rate": 2.628e-05, "loss": 0.1151, "step": 2376 }, { "epoch": 475.6, "grad_norm": 3.0692317485809326, "learning_rate": 2.6260000000000003e-05, "loss": 0.1048, "step": 2378 }, { "epoch": 476.0, "grad_norm": 4.45041036605835, "learning_rate": 2.6240000000000003e-05, "loss": 0.1426, "step": 2380 }, { "epoch": 476.4, "grad_norm": 1.0940873622894287, "learning_rate": 2.622e-05, "loss": 0.0763, "step": 2382 }, { "epoch": 476.8, "grad_norm": 2.6101717948913574, "learning_rate": 2.6200000000000003e-05, "loss": 0.1237, "step": 2384 }, { "epoch": 477.2, "grad_norm": 3.5323994159698486, "learning_rate": 2.618e-05, "loss": 0.205, "step": 2386 }, { "epoch": 477.6, "grad_norm": 4.156005859375, "learning_rate": 2.616e-05, "loss": 0.1116, "step": 2388 }, { "epoch": 478.0, "grad_norm": 1.5155543088912964, "learning_rate": 2.6140000000000004e-05, "loss": 0.0755, "step": 2390 }, { "epoch": 478.4, "grad_norm": 2.25032114982605, "learning_rate": 2.612e-05, "loss": 0.1247, "step": 2392 }, { "epoch": 478.8, "grad_norm": 5.559423923492432, "learning_rate": 2.61e-05, "loss": 0.1003, "step": 2394 }, { "epoch": 479.2, "grad_norm": 1.6602033376693726, "learning_rate": 2.6079999999999998e-05, "loss": 0.0933, "step": 2396 }, { "epoch": 479.6, "grad_norm": 5.814101696014404, "learning_rate": 2.606e-05, "loss": 0.1473, "step": 2398 }, { "epoch": 480.0, "grad_norm": 3.911107063293457, "learning_rate": 2.6040000000000005e-05, "loss": 0.1368, "step": 2400 }, { "epoch": 480.0, "eval_cer": 0.7461139896373057, "eval_loss": 5.558980941772461, "eval_runtime": 8.5004, "eval_samples_per_second": 1.176, "eval_steps_per_second": 0.235, "step": 2400 }, { "epoch": 480.4, "grad_norm": 4.684641361236572, "learning_rate": 2.602e-05, "loss": 0.1292, "step": 2402 }, { "epoch": 480.8, "grad_norm": 2.9970955848693848, "learning_rate": 2.6000000000000002e-05, "loss": 0.1221, "step": 2404 }, { "epoch": 481.2, "grad_norm": 1.0695799589157104, "learning_rate": 2.598e-05, "loss": 0.0628, "step": 2406 }, { "epoch": 481.6, "grad_norm": 3.955312728881836, "learning_rate": 2.5960000000000002e-05, "loss": 0.2547, "step": 2408 }, { "epoch": 482.0, "grad_norm": 2.7364110946655273, "learning_rate": 2.5940000000000002e-05, "loss": 0.1042, "step": 2410 }, { "epoch": 482.4, "grad_norm": 1.6540870666503906, "learning_rate": 2.592e-05, "loss": 0.133, "step": 2412 }, { "epoch": 482.8, "grad_norm": 4.270384788513184, "learning_rate": 2.5900000000000003e-05, "loss": 0.1298, "step": 2414 }, { "epoch": 483.2, "grad_norm": 2.7965283393859863, "learning_rate": 2.588e-05, "loss": 0.1073, "step": 2416 }, { "epoch": 483.6, "grad_norm": 4.552781581878662, "learning_rate": 2.586e-05, "loss": 0.1055, "step": 2418 }, { "epoch": 484.0, "grad_norm": 2.225459337234497, "learning_rate": 2.5840000000000003e-05, "loss": 0.1604, "step": 2420 }, { "epoch": 484.4, "grad_norm": 1.2065198421478271, "learning_rate": 2.582e-05, "loss": 0.0694, "step": 2422 }, { "epoch": 484.8, "grad_norm": 2.888120412826538, "learning_rate": 2.58e-05, "loss": 0.17, "step": 2424 }, { "epoch": 485.2, "grad_norm": 1.2008005380630493, "learning_rate": 2.5779999999999997e-05, "loss": 0.0807, "step": 2426 }, { "epoch": 485.6, "grad_norm": 2.7486467361450195, "learning_rate": 2.576e-05, "loss": 0.0984, "step": 2428 }, { "epoch": 486.0, "grad_norm": 1.5352399349212646, "learning_rate": 2.5740000000000004e-05, "loss": 0.2387, "step": 2430 }, { "epoch": 486.4, "grad_norm": 0.9989357590675354, "learning_rate": 2.572e-05, "loss": 0.099, "step": 2432 }, { "epoch": 486.8, "grad_norm": 1.921958327293396, "learning_rate": 2.57e-05, "loss": 0.1045, "step": 2434 }, { "epoch": 487.2, "grad_norm": 3.524778127670288, "learning_rate": 2.5679999999999998e-05, "loss": 0.1357, "step": 2436 }, { "epoch": 487.6, "grad_norm": 2.2660274505615234, "learning_rate": 2.566e-05, "loss": 0.1749, "step": 2438 }, { "epoch": 488.0, "grad_norm": 1.9576343297958374, "learning_rate": 2.5640000000000002e-05, "loss": 0.0904, "step": 2440 }, { "epoch": 488.4, "grad_norm": 2.3125100135803223, "learning_rate": 2.562e-05, "loss": 0.1167, "step": 2442 }, { "epoch": 488.8, "grad_norm": 55.50243377685547, "learning_rate": 2.5600000000000002e-05, "loss": 0.1864, "step": 2444 }, { "epoch": 489.2, "grad_norm": 4.1762542724609375, "learning_rate": 2.5580000000000002e-05, "loss": 0.1168, "step": 2446 }, { "epoch": 489.6, "grad_norm": 5.206878185272217, "learning_rate": 2.556e-05, "loss": 0.1332, "step": 2448 }, { "epoch": 490.0, "grad_norm": 3.7076449394226074, "learning_rate": 2.5540000000000003e-05, "loss": 0.1543, "step": 2450 }, { "epoch": 490.4, "grad_norm": 3.7791595458984375, "learning_rate": 2.552e-05, "loss": 0.2857, "step": 2452 }, { "epoch": 490.8, "grad_norm": 2.294548749923706, "learning_rate": 2.5500000000000003e-05, "loss": 0.0937, "step": 2454 }, { "epoch": 491.2, "grad_norm": 3.1516354084014893, "learning_rate": 2.5480000000000003e-05, "loss": 0.1229, "step": 2456 }, { "epoch": 491.6, "grad_norm": 2.718092679977417, "learning_rate": 2.546e-05, "loss": 0.1233, "step": 2458 }, { "epoch": 492.0, "grad_norm": 2.926225423812866, "learning_rate": 2.5440000000000004e-05, "loss": 0.114, "step": 2460 }, { "epoch": 492.4, "grad_norm": 4.610673427581787, "learning_rate": 2.542e-05, "loss": 0.1654, "step": 2462 }, { "epoch": 492.8, "grad_norm": 4.791942119598389, "learning_rate": 2.54e-05, "loss": 0.1268, "step": 2464 }, { "epoch": 493.2, "grad_norm": 3.9224584102630615, "learning_rate": 2.5380000000000004e-05, "loss": 0.1319, "step": 2466 }, { "epoch": 493.6, "grad_norm": 1.5530719757080078, "learning_rate": 2.536e-05, "loss": 0.0978, "step": 2468 }, { "epoch": 494.0, "grad_norm": 3.076397657394409, "learning_rate": 2.534e-05, "loss": 0.0923, "step": 2470 }, { "epoch": 494.4, "grad_norm": 1.4830809831619263, "learning_rate": 2.5319999999999998e-05, "loss": 0.0919, "step": 2472 }, { "epoch": 494.8, "grad_norm": 2.5039913654327393, "learning_rate": 2.5300000000000002e-05, "loss": 0.0883, "step": 2474 }, { "epoch": 495.2, "grad_norm": 5.806745529174805, "learning_rate": 2.5280000000000005e-05, "loss": 0.1825, "step": 2476 }, { "epoch": 495.6, "grad_norm": 4.73628568649292, "learning_rate": 2.526e-05, "loss": 0.1412, "step": 2478 }, { "epoch": 496.0, "grad_norm": 3.454263925552368, "learning_rate": 2.5240000000000002e-05, "loss": 0.2092, "step": 2480 }, { "epoch": 496.4, "grad_norm": 3.812835931777954, "learning_rate": 2.522e-05, "loss": 0.1318, "step": 2482 }, { "epoch": 496.8, "grad_norm": 2.7497341632843018, "learning_rate": 2.5200000000000003e-05, "loss": 0.1254, "step": 2484 }, { "epoch": 497.2, "grad_norm": 2.41111159324646, "learning_rate": 2.5180000000000003e-05, "loss": 0.1669, "step": 2486 }, { "epoch": 497.6, "grad_norm": 5.573232650756836, "learning_rate": 2.516e-05, "loss": 0.1725, "step": 2488 }, { "epoch": 498.0, "grad_norm": 4.965357303619385, "learning_rate": 2.5140000000000003e-05, "loss": 0.2103, "step": 2490 }, { "epoch": 498.4, "grad_norm": 2.43125319480896, "learning_rate": 2.512e-05, "loss": 0.1217, "step": 2492 }, { "epoch": 498.8, "grad_norm": 7.352945327758789, "learning_rate": 2.51e-05, "loss": 0.1278, "step": 2494 }, { "epoch": 499.2, "grad_norm": 4.72855281829834, "learning_rate": 2.5080000000000004e-05, "loss": 0.1891, "step": 2496 }, { "epoch": 499.6, "grad_norm": 6.205315113067627, "learning_rate": 2.506e-05, "loss": 0.1326, "step": 2498 }, { "epoch": 500.0, "grad_norm": 3.9389426708221436, "learning_rate": 2.504e-05, "loss": 0.145, "step": 2500 }, { "epoch": 500.4, "grad_norm": 2.37660813331604, "learning_rate": 2.5019999999999998e-05, "loss": 0.0906, "step": 2502 }, { "epoch": 500.8, "grad_norm": 1.68324613571167, "learning_rate": 2.5e-05, "loss": 0.1086, "step": 2504 }, { "epoch": 501.2, "grad_norm": 4.37831449508667, "learning_rate": 2.498e-05, "loss": 0.1332, "step": 2506 }, { "epoch": 501.6, "grad_norm": 2.8366785049438477, "learning_rate": 2.496e-05, "loss": 0.1022, "step": 2508 }, { "epoch": 502.0, "grad_norm": 2.1639950275421143, "learning_rate": 2.4940000000000002e-05, "loss": 0.1666, "step": 2510 }, { "epoch": 502.4, "grad_norm": 1.7866286039352417, "learning_rate": 2.4920000000000002e-05, "loss": 0.1471, "step": 2512 }, { "epoch": 502.8, "grad_norm": 3.455711841583252, "learning_rate": 2.4900000000000002e-05, "loss": 0.0849, "step": 2514 }, { "epoch": 503.2, "grad_norm": 3.003446578979492, "learning_rate": 2.488e-05, "loss": 0.1411, "step": 2516 }, { "epoch": 503.6, "grad_norm": 1.7913579940795898, "learning_rate": 2.486e-05, "loss": 0.0741, "step": 2518 }, { "epoch": 504.0, "grad_norm": 2.321124792098999, "learning_rate": 2.4840000000000003e-05, "loss": 0.1278, "step": 2520 }, { "epoch": 504.4, "grad_norm": 1.3931304216384888, "learning_rate": 2.4820000000000003e-05, "loss": 0.0865, "step": 2522 }, { "epoch": 504.8, "grad_norm": 2.296494483947754, "learning_rate": 2.48e-05, "loss": 0.1026, "step": 2524 }, { "epoch": 505.2, "grad_norm": 18.643718719482422, "learning_rate": 2.478e-05, "loss": 0.1575, "step": 2526 }, { "epoch": 505.6, "grad_norm": 5.901914119720459, "learning_rate": 2.476e-05, "loss": 0.1366, "step": 2528 }, { "epoch": 506.0, "grad_norm": 2.0314862728118896, "learning_rate": 2.4740000000000004e-05, "loss": 0.1021, "step": 2530 }, { "epoch": 506.4, "grad_norm": 7.161267280578613, "learning_rate": 2.472e-05, "loss": 0.1336, "step": 2532 }, { "epoch": 506.8, "grad_norm": 5.649757385253906, "learning_rate": 2.47e-05, "loss": 0.1108, "step": 2534 }, { "epoch": 507.2, "grad_norm": 3.4043819904327393, "learning_rate": 2.468e-05, "loss": 0.1111, "step": 2536 }, { "epoch": 507.6, "grad_norm": 4.375669479370117, "learning_rate": 2.466e-05, "loss": 0.1037, "step": 2538 }, { "epoch": 508.0, "grad_norm": 2.1790218353271484, "learning_rate": 2.464e-05, "loss": 0.0772, "step": 2540 }, { "epoch": 508.4, "grad_norm": 5.11190128326416, "learning_rate": 2.462e-05, "loss": 0.1388, "step": 2542 }, { "epoch": 508.8, "grad_norm": 3.799504518508911, "learning_rate": 2.46e-05, "loss": 0.1198, "step": 2544 }, { "epoch": 509.2, "grad_norm": 2.7028844356536865, "learning_rate": 2.4580000000000002e-05, "loss": 0.0971, "step": 2546 }, { "epoch": 509.6, "grad_norm": 3.309697151184082, "learning_rate": 2.4560000000000002e-05, "loss": 0.1655, "step": 2548 }, { "epoch": 510.0, "grad_norm": 3.145717144012451, "learning_rate": 2.4540000000000002e-05, "loss": 0.1297, "step": 2550 }, { "epoch": 510.4, "grad_norm": 3.5902652740478516, "learning_rate": 2.4520000000000002e-05, "loss": 0.1567, "step": 2552 }, { "epoch": 510.8, "grad_norm": 2.754152536392212, "learning_rate": 2.45e-05, "loss": 0.1422, "step": 2554 }, { "epoch": 511.2, "grad_norm": 2.6286187171936035, "learning_rate": 2.448e-05, "loss": 0.1112, "step": 2556 }, { "epoch": 511.6, "grad_norm": 3.495182991027832, "learning_rate": 2.4460000000000003e-05, "loss": 0.1182, "step": 2558 }, { "epoch": 512.0, "grad_norm": 2.852360963821411, "learning_rate": 2.4440000000000003e-05, "loss": 0.1152, "step": 2560 }, { "epoch": 512.4, "grad_norm": 2.074434757232666, "learning_rate": 2.442e-05, "loss": 0.0837, "step": 2562 }, { "epoch": 512.8, "grad_norm": 2.0009803771972656, "learning_rate": 2.44e-05, "loss": 0.1198, "step": 2564 }, { "epoch": 513.2, "grad_norm": 2.142988443374634, "learning_rate": 2.438e-05, "loss": 0.0778, "step": 2566 }, { "epoch": 513.6, "grad_norm": 1.4459530115127563, "learning_rate": 2.4360000000000004e-05, "loss": 0.0945, "step": 2568 }, { "epoch": 514.0, "grad_norm": 1.9289487600326538, "learning_rate": 2.434e-05, "loss": 0.1056, "step": 2570 }, { "epoch": 514.4, "grad_norm": 1.9753559827804565, "learning_rate": 2.432e-05, "loss": 0.1024, "step": 2572 }, { "epoch": 514.8, "grad_norm": 1.686347246170044, "learning_rate": 2.43e-05, "loss": 0.0929, "step": 2574 }, { "epoch": 515.2, "grad_norm": 1.996042013168335, "learning_rate": 2.428e-05, "loss": 0.1105, "step": 2576 }, { "epoch": 515.6, "grad_norm": 2.0957796573638916, "learning_rate": 2.426e-05, "loss": 0.0677, "step": 2578 }, { "epoch": 516.0, "grad_norm": 1.7301092147827148, "learning_rate": 2.4240000000000002e-05, "loss": 0.0877, "step": 2580 }, { "epoch": 516.4, "grad_norm": 0.621410071849823, "learning_rate": 2.4220000000000002e-05, "loss": 0.0712, "step": 2582 }, { "epoch": 516.8, "grad_norm": 4.836117267608643, "learning_rate": 2.4200000000000002e-05, "loss": 0.0934, "step": 2584 }, { "epoch": 517.2, "grad_norm": 3.3124561309814453, "learning_rate": 2.418e-05, "loss": 0.1267, "step": 2586 }, { "epoch": 517.6, "grad_norm": 3.9416232109069824, "learning_rate": 2.4160000000000002e-05, "loss": 0.0942, "step": 2588 }, { "epoch": 518.0, "grad_norm": 2.6292450428009033, "learning_rate": 2.4140000000000003e-05, "loss": 0.1269, "step": 2590 }, { "epoch": 518.4, "grad_norm": 3.8789076805114746, "learning_rate": 2.412e-05, "loss": 0.1418, "step": 2592 }, { "epoch": 518.8, "grad_norm": 2.269087791442871, "learning_rate": 2.41e-05, "loss": 0.085, "step": 2594 }, { "epoch": 519.2, "grad_norm": 2.7680087089538574, "learning_rate": 2.408e-05, "loss": 0.1074, "step": 2596 }, { "epoch": 519.6, "grad_norm": 1.231184482574463, "learning_rate": 2.4060000000000003e-05, "loss": 0.0695, "step": 2598 }, { "epoch": 520.0, "grad_norm": 3.4624996185302734, "learning_rate": 2.404e-05, "loss": 0.1288, "step": 2600 }, { "epoch": 520.0, "eval_cer": 0.7512953367875648, "eval_loss": 5.26657772064209, "eval_runtime": 10.2121, "eval_samples_per_second": 0.979, "eval_steps_per_second": 0.196, "step": 2600 }, { "epoch": 520.4, "grad_norm": 1.9507189989089966, "learning_rate": 2.402e-05, "loss": 0.0735, "step": 2602 }, { "epoch": 520.8, "grad_norm": 2.8180816173553467, "learning_rate": 2.4e-05, "loss": 0.1307, "step": 2604 }, { "epoch": 521.2, "grad_norm": 4.875729560852051, "learning_rate": 2.398e-05, "loss": 0.0902, "step": 2606 }, { "epoch": 521.6, "grad_norm": 3.008120059967041, "learning_rate": 2.396e-05, "loss": 0.0865, "step": 2608 }, { "epoch": 522.0, "grad_norm": 0.9475916624069214, "learning_rate": 2.394e-05, "loss": 0.0703, "step": 2610 }, { "epoch": 522.4, "grad_norm": 1.7550231218338013, "learning_rate": 2.392e-05, "loss": 0.0646, "step": 2612 }, { "epoch": 522.8, "grad_norm": 2.7088842391967773, "learning_rate": 2.39e-05, "loss": 0.096, "step": 2614 }, { "epoch": 523.2, "grad_norm": 1.4161086082458496, "learning_rate": 2.3880000000000002e-05, "loss": 0.0964, "step": 2616 }, { "epoch": 523.6, "grad_norm": 1.264827847480774, "learning_rate": 2.3860000000000002e-05, "loss": 0.0662, "step": 2618 }, { "epoch": 524.0, "grad_norm": 1.4094135761260986, "learning_rate": 2.3840000000000002e-05, "loss": 0.0965, "step": 2620 }, { "epoch": 524.4, "grad_norm": 1.4363828897476196, "learning_rate": 2.3820000000000002e-05, "loss": 0.0779, "step": 2622 }, { "epoch": 524.8, "grad_norm": 1.4233641624450684, "learning_rate": 2.38e-05, "loss": 0.0705, "step": 2624 }, { "epoch": 525.2, "grad_norm": 2.480970621109009, "learning_rate": 2.3780000000000003e-05, "loss": 0.0897, "step": 2626 }, { "epoch": 525.6, "grad_norm": 0.6600705981254578, "learning_rate": 2.3760000000000003e-05, "loss": 0.0793, "step": 2628 }, { "epoch": 526.0, "grad_norm": 3.6763248443603516, "learning_rate": 2.374e-05, "loss": 0.096, "step": 2630 }, { "epoch": 526.4, "grad_norm": 3.463775634765625, "learning_rate": 2.372e-05, "loss": 0.0831, "step": 2632 }, { "epoch": 526.8, "grad_norm": 6.810688018798828, "learning_rate": 2.37e-05, "loss": 0.1137, "step": 2634 }, { "epoch": 527.2, "grad_norm": 2.3730781078338623, "learning_rate": 2.3680000000000004e-05, "loss": 0.1175, "step": 2636 }, { "epoch": 527.6, "grad_norm": 2.6050760746002197, "learning_rate": 2.366e-05, "loss": 0.0802, "step": 2638 }, { "epoch": 528.0, "grad_norm": 4.423994541168213, "learning_rate": 2.364e-05, "loss": 0.1108, "step": 2640 }, { "epoch": 528.4, "grad_norm": 5.180148601531982, "learning_rate": 2.362e-05, "loss": 0.0932, "step": 2642 }, { "epoch": 528.8, "grad_norm": 2.3446710109710693, "learning_rate": 2.36e-05, "loss": 0.1016, "step": 2644 }, { "epoch": 529.2, "grad_norm": 2.5139336585998535, "learning_rate": 2.358e-05, "loss": 0.0751, "step": 2646 }, { "epoch": 529.6, "grad_norm": 1.2989625930786133, "learning_rate": 2.356e-05, "loss": 0.0662, "step": 2648 }, { "epoch": 530.0, "grad_norm": 2.553877830505371, "learning_rate": 2.354e-05, "loss": 0.1177, "step": 2650 }, { "epoch": 530.4, "grad_norm": 1.8396165370941162, "learning_rate": 2.3520000000000002e-05, "loss": 0.0662, "step": 2652 }, { "epoch": 530.8, "grad_norm": 4.663384437561035, "learning_rate": 2.35e-05, "loss": 0.1178, "step": 2654 }, { "epoch": 531.2, "grad_norm": 2.8219828605651855, "learning_rate": 2.3480000000000002e-05, "loss": 0.1132, "step": 2656 }, { "epoch": 531.6, "grad_norm": 1.2275285720825195, "learning_rate": 2.3460000000000002e-05, "loss": 0.1046, "step": 2658 }, { "epoch": 532.0, "grad_norm": 1.2770496606826782, "learning_rate": 2.344e-05, "loss": 0.0782, "step": 2660 }, { "epoch": 532.4, "grad_norm": 1.856634497642517, "learning_rate": 2.342e-05, "loss": 0.0969, "step": 2662 }, { "epoch": 532.8, "grad_norm": 1.8828840255737305, "learning_rate": 2.3400000000000003e-05, "loss": 0.0841, "step": 2664 }, { "epoch": 533.2, "grad_norm": 2.2948920726776123, "learning_rate": 2.3380000000000003e-05, "loss": 0.0583, "step": 2666 }, { "epoch": 533.6, "grad_norm": 0.8606839776039124, "learning_rate": 2.336e-05, "loss": 0.0738, "step": 2668 }, { "epoch": 534.0, "grad_norm": 2.4591057300567627, "learning_rate": 2.334e-05, "loss": 0.1054, "step": 2670 }, { "epoch": 534.4, "grad_norm": 3.764620304107666, "learning_rate": 2.332e-05, "loss": 0.0764, "step": 2672 }, { "epoch": 534.8, "grad_norm": 2.255633592605591, "learning_rate": 2.3300000000000004e-05, "loss": 0.0842, "step": 2674 }, { "epoch": 535.2, "grad_norm": 2.200331687927246, "learning_rate": 2.328e-05, "loss": 0.0778, "step": 2676 }, { "epoch": 535.6, "grad_norm": 2.497239589691162, "learning_rate": 2.326e-05, "loss": 0.0728, "step": 2678 }, { "epoch": 536.0, "grad_norm": 3.267695188522339, "learning_rate": 2.324e-05, "loss": 0.1139, "step": 2680 }, { "epoch": 536.4, "grad_norm": 1.8557711839675903, "learning_rate": 2.322e-05, "loss": 0.098, "step": 2682 }, { "epoch": 536.8, "grad_norm": 2.2791478633880615, "learning_rate": 2.32e-05, "loss": 0.0896, "step": 2684 }, { "epoch": 537.2, "grad_norm": 2.8376824855804443, "learning_rate": 2.318e-05, "loss": 0.0781, "step": 2686 }, { "epoch": 537.6, "grad_norm": 2.163323163986206, "learning_rate": 2.3160000000000002e-05, "loss": 0.0969, "step": 2688 }, { "epoch": 538.0, "grad_norm": 5.7494797706604, "learning_rate": 2.3140000000000002e-05, "loss": 0.0955, "step": 2690 }, { "epoch": 538.4, "grad_norm": 2.4649808406829834, "learning_rate": 2.312e-05, "loss": 0.11, "step": 2692 }, { "epoch": 538.8, "grad_norm": 3.0863876342773438, "learning_rate": 2.3100000000000002e-05, "loss": 0.0843, "step": 2694 }, { "epoch": 539.2, "grad_norm": 3.6312317848205566, "learning_rate": 2.3080000000000003e-05, "loss": 0.1019, "step": 2696 }, { "epoch": 539.6, "grad_norm": 2.468548059463501, "learning_rate": 2.306e-05, "loss": 0.0822, "step": 2698 }, { "epoch": 540.0, "grad_norm": 1.6652857065200806, "learning_rate": 2.304e-05, "loss": 0.0961, "step": 2700 }, { "epoch": 540.4, "grad_norm": 3.8361330032348633, "learning_rate": 2.302e-05, "loss": 0.101, "step": 2702 }, { "epoch": 540.8, "grad_norm": 4.310556411743164, "learning_rate": 2.3000000000000003e-05, "loss": 0.1128, "step": 2704 }, { "epoch": 541.2, "grad_norm": 3.1835808753967285, "learning_rate": 2.298e-05, "loss": 0.0987, "step": 2706 }, { "epoch": 541.6, "grad_norm": 3.3024141788482666, "learning_rate": 2.296e-05, "loss": 0.1063, "step": 2708 }, { "epoch": 542.0, "grad_norm": 1.6458234786987305, "learning_rate": 2.294e-05, "loss": 0.1126, "step": 2710 }, { "epoch": 542.4, "grad_norm": 1.724668025970459, "learning_rate": 2.292e-05, "loss": 0.0867, "step": 2712 }, { "epoch": 542.8, "grad_norm": 1.4865473508834839, "learning_rate": 2.29e-05, "loss": 0.079, "step": 2714 }, { "epoch": 543.2, "grad_norm": 3.5208725929260254, "learning_rate": 2.288e-05, "loss": 0.1429, "step": 2716 }, { "epoch": 543.6, "grad_norm": 3.6930909156799316, "learning_rate": 2.286e-05, "loss": 0.1269, "step": 2718 }, { "epoch": 544.0, "grad_norm": 2.0226941108703613, "learning_rate": 2.284e-05, "loss": 0.0607, "step": 2720 }, { "epoch": 544.4, "grad_norm": 4.909393310546875, "learning_rate": 2.282e-05, "loss": 0.1087, "step": 2722 }, { "epoch": 544.8, "grad_norm": 4.9799346923828125, "learning_rate": 2.2800000000000002e-05, "loss": 0.1221, "step": 2724 }, { "epoch": 545.2, "grad_norm": 1.59001624584198, "learning_rate": 2.2780000000000002e-05, "loss": 0.0683, "step": 2726 }, { "epoch": 545.6, "grad_norm": 3.707362651824951, "learning_rate": 2.2760000000000002e-05, "loss": 0.1001, "step": 2728 }, { "epoch": 546.0, "grad_norm": 1.9665006399154663, "learning_rate": 2.274e-05, "loss": 0.0704, "step": 2730 }, { "epoch": 546.4, "grad_norm": 1.5304112434387207, "learning_rate": 2.2720000000000003e-05, "loss": 0.0922, "step": 2732 }, { "epoch": 546.8, "grad_norm": 3.010896682739258, "learning_rate": 2.2700000000000003e-05, "loss": 0.0888, "step": 2734 }, { "epoch": 547.2, "grad_norm": 2.2305309772491455, "learning_rate": 2.268e-05, "loss": 0.0824, "step": 2736 }, { "epoch": 547.6, "grad_norm": 2.85485577583313, "learning_rate": 2.266e-05, "loss": 0.1067, "step": 2738 }, { "epoch": 548.0, "grad_norm": 1.1896988153457642, "learning_rate": 2.264e-05, "loss": 0.0712, "step": 2740 }, { "epoch": 548.4, "grad_norm": 5.146761417388916, "learning_rate": 2.2620000000000004e-05, "loss": 0.1166, "step": 2742 }, { "epoch": 548.8, "grad_norm": 2.2142999172210693, "learning_rate": 2.26e-05, "loss": 0.0759, "step": 2744 }, { "epoch": 549.2, "grad_norm": 1.410355567932129, "learning_rate": 2.258e-05, "loss": 0.0963, "step": 2746 }, { "epoch": 549.6, "grad_norm": 4.55780029296875, "learning_rate": 2.256e-05, "loss": 0.1034, "step": 2748 }, { "epoch": 550.0, "grad_norm": 4.3058037757873535, "learning_rate": 2.254e-05, "loss": 0.1235, "step": 2750 }, { "epoch": 550.4, "grad_norm": 4.704401969909668, "learning_rate": 2.252e-05, "loss": 0.0778, "step": 2752 }, { "epoch": 550.8, "grad_norm": 4.31204080581665, "learning_rate": 2.25e-05, "loss": 0.1301, "step": 2754 }, { "epoch": 551.2, "grad_norm": 2.2332754135131836, "learning_rate": 2.248e-05, "loss": 0.0945, "step": 2756 }, { "epoch": 551.6, "grad_norm": 6.21327543258667, "learning_rate": 2.2460000000000002e-05, "loss": 0.1265, "step": 2758 }, { "epoch": 552.0, "grad_norm": 1.6358875036239624, "learning_rate": 2.244e-05, "loss": 0.0797, "step": 2760 }, { "epoch": 552.4, "grad_norm": 2.589110851287842, "learning_rate": 2.2420000000000002e-05, "loss": 0.1043, "step": 2762 }, { "epoch": 552.8, "grad_norm": 2.670912742614746, "learning_rate": 2.2400000000000002e-05, "loss": 0.0964, "step": 2764 }, { "epoch": 553.2, "grad_norm": 2.675179958343506, "learning_rate": 2.2380000000000003e-05, "loss": 0.0827, "step": 2766 }, { "epoch": 553.6, "grad_norm": 2.216668128967285, "learning_rate": 2.236e-05, "loss": 0.0799, "step": 2768 }, { "epoch": 554.0, "grad_norm": 1.6410869359970093, "learning_rate": 2.234e-05, "loss": 0.0771, "step": 2770 }, { "epoch": 554.4, "grad_norm": 2.870657205581665, "learning_rate": 2.2320000000000003e-05, "loss": 0.097, "step": 2772 }, { "epoch": 554.8, "grad_norm": 1.0376278162002563, "learning_rate": 2.23e-05, "loss": 0.058, "step": 2774 }, { "epoch": 555.2, "grad_norm": 4.137859344482422, "learning_rate": 2.228e-05, "loss": 0.1185, "step": 2776 }, { "epoch": 555.6, "grad_norm": 2.4561498165130615, "learning_rate": 2.226e-05, "loss": 0.0795, "step": 2778 }, { "epoch": 556.0, "grad_norm": 4.502532482147217, "learning_rate": 2.224e-05, "loss": 0.1177, "step": 2780 }, { "epoch": 556.4, "grad_norm": 3.1879873275756836, "learning_rate": 2.222e-05, "loss": 0.1276, "step": 2782 }, { "epoch": 556.8, "grad_norm": 1.2662545442581177, "learning_rate": 2.22e-05, "loss": 0.0819, "step": 2784 }, { "epoch": 557.2, "grad_norm": 2.5692007541656494, "learning_rate": 2.218e-05, "loss": 0.0993, "step": 2786 }, { "epoch": 557.6, "grad_norm": 5.032308101654053, "learning_rate": 2.216e-05, "loss": 0.1457, "step": 2788 }, { "epoch": 558.0, "grad_norm": 1.7276628017425537, "learning_rate": 2.214e-05, "loss": 0.0813, "step": 2790 }, { "epoch": 558.4, "grad_norm": 2.6543221473693848, "learning_rate": 2.212e-05, "loss": 0.0744, "step": 2792 }, { "epoch": 558.8, "grad_norm": 4.194149494171143, "learning_rate": 2.2100000000000002e-05, "loss": 0.1281, "step": 2794 }, { "epoch": 559.2, "grad_norm": 1.7024481296539307, "learning_rate": 2.2080000000000002e-05, "loss": 0.0777, "step": 2796 }, { "epoch": 559.6, "grad_norm": 3.592214584350586, "learning_rate": 2.206e-05, "loss": 0.105, "step": 2798 }, { "epoch": 560.0, "grad_norm": 1.345398187637329, "learning_rate": 2.2040000000000002e-05, "loss": 0.061, "step": 2800 }, { "epoch": 560.0, "eval_cer": 0.7046632124352331, "eval_loss": 5.871425151824951, "eval_runtime": 7.5732, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.264, "step": 2800 }, { "epoch": 560.4, "grad_norm": 1.9446215629577637, "learning_rate": 2.2020000000000003e-05, "loss": 0.0785, "step": 2802 }, { "epoch": 560.8, "grad_norm": 1.662397861480713, "learning_rate": 2.2000000000000003e-05, "loss": 0.079, "step": 2804 }, { "epoch": 561.2, "grad_norm": 3.255864143371582, "learning_rate": 2.198e-05, "loss": 0.1031, "step": 2806 }, { "epoch": 561.6, "grad_norm": 4.21778678894043, "learning_rate": 2.196e-05, "loss": 0.1246, "step": 2808 }, { "epoch": 562.0, "grad_norm": 3.8382344245910645, "learning_rate": 2.1940000000000003e-05, "loss": 0.0801, "step": 2810 }, { "epoch": 562.4, "grad_norm": 1.351887583732605, "learning_rate": 2.192e-05, "loss": 0.0687, "step": 2812 }, { "epoch": 562.8, "grad_norm": 2.213937520980835, "learning_rate": 2.19e-05, "loss": 0.0733, "step": 2814 }, { "epoch": 563.2, "grad_norm": 2.0447607040405273, "learning_rate": 2.188e-05, "loss": 0.0831, "step": 2816 }, { "epoch": 563.6, "grad_norm": 2.6329054832458496, "learning_rate": 2.186e-05, "loss": 0.0703, "step": 2818 }, { "epoch": 564.0, "grad_norm": 2.7810661792755127, "learning_rate": 2.184e-05, "loss": 0.1188, "step": 2820 }, { "epoch": 564.4, "grad_norm": 1.217483401298523, "learning_rate": 2.182e-05, "loss": 0.0744, "step": 2822 }, { "epoch": 564.8, "grad_norm": 2.893791913986206, "learning_rate": 2.18e-05, "loss": 0.0969, "step": 2824 }, { "epoch": 565.2, "grad_norm": 2.8382651805877686, "learning_rate": 2.178e-05, "loss": 0.0907, "step": 2826 }, { "epoch": 565.6, "grad_norm": 1.3939740657806396, "learning_rate": 2.176e-05, "loss": 0.0704, "step": 2828 }, { "epoch": 566.0, "grad_norm": 1.3342844247817993, "learning_rate": 2.1740000000000002e-05, "loss": 0.0584, "step": 2830 }, { "epoch": 566.4, "grad_norm": 3.3584325313568115, "learning_rate": 2.1720000000000002e-05, "loss": 0.0686, "step": 2832 }, { "epoch": 566.8, "grad_norm": 2.186462879180908, "learning_rate": 2.1700000000000002e-05, "loss": 0.0668, "step": 2834 }, { "epoch": 567.2, "grad_norm": 3.376781702041626, "learning_rate": 2.168e-05, "loss": 0.1144, "step": 2836 }, { "epoch": 567.6, "grad_norm": 4.04083251953125, "learning_rate": 2.166e-05, "loss": 0.1134, "step": 2838 }, { "epoch": 568.0, "grad_norm": 3.4629011154174805, "learning_rate": 2.1640000000000003e-05, "loss": 0.1043, "step": 2840 }, { "epoch": 568.4, "grad_norm": 1.264782428741455, "learning_rate": 2.162e-05, "loss": 0.0623, "step": 2842 }, { "epoch": 568.8, "grad_norm": 4.419800758361816, "learning_rate": 2.16e-05, "loss": 0.1421, "step": 2844 }, { "epoch": 569.2, "grad_norm": 2.84981107711792, "learning_rate": 2.158e-05, "loss": 0.0861, "step": 2846 }, { "epoch": 569.6, "grad_norm": 1.9858825206756592, "learning_rate": 2.1560000000000004e-05, "loss": 0.092, "step": 2848 }, { "epoch": 570.0, "grad_norm": 5.713150978088379, "learning_rate": 2.154e-05, "loss": 0.1344, "step": 2850 }, { "epoch": 570.4, "grad_norm": 6.728931903839111, "learning_rate": 2.152e-05, "loss": 0.1809, "step": 2852 }, { "epoch": 570.8, "grad_norm": 2.6093764305114746, "learning_rate": 2.15e-05, "loss": 0.1083, "step": 2854 }, { "epoch": 571.2, "grad_norm": 7.2618818283081055, "learning_rate": 2.148e-05, "loss": 0.0921, "step": 2856 }, { "epoch": 571.6, "grad_norm": 1.9876304864883423, "learning_rate": 2.146e-05, "loss": 0.1267, "step": 2858 }, { "epoch": 572.0, "grad_norm": 2.192747116088867, "learning_rate": 2.144e-05, "loss": 0.0996, "step": 2860 }, { "epoch": 572.4, "grad_norm": 41.23218536376953, "learning_rate": 2.142e-05, "loss": 0.2353, "step": 2862 }, { "epoch": 572.8, "grad_norm": 3.3676564693450928, "learning_rate": 2.1400000000000002e-05, "loss": 0.0811, "step": 2864 }, { "epoch": 573.2, "grad_norm": 1.328993558883667, "learning_rate": 2.138e-05, "loss": 0.098, "step": 2866 }, { "epoch": 573.6, "grad_norm": 2.340534210205078, "learning_rate": 2.1360000000000002e-05, "loss": 0.0756, "step": 2868 }, { "epoch": 574.0, "grad_norm": 2.8786284923553467, "learning_rate": 2.1340000000000002e-05, "loss": 0.0854, "step": 2870 }, { "epoch": 574.4, "grad_norm": 0.8964816927909851, "learning_rate": 2.1320000000000003e-05, "loss": 0.0578, "step": 2872 }, { "epoch": 574.8, "grad_norm": 0.8548876047134399, "learning_rate": 2.13e-05, "loss": 0.0861, "step": 2874 }, { "epoch": 575.2, "grad_norm": 2.7419703006744385, "learning_rate": 2.128e-05, "loss": 0.0963, "step": 2876 }, { "epoch": 575.6, "grad_norm": 1.8067907094955444, "learning_rate": 2.1260000000000003e-05, "loss": 0.0951, "step": 2878 }, { "epoch": 576.0, "grad_norm": 3.687648057937622, "learning_rate": 2.124e-05, "loss": 0.0871, "step": 2880 }, { "epoch": 576.4, "grad_norm": 1.9315521717071533, "learning_rate": 2.122e-05, "loss": 0.0922, "step": 2882 }, { "epoch": 576.8, "grad_norm": 1.7751024961471558, "learning_rate": 2.12e-05, "loss": 0.0655, "step": 2884 }, { "epoch": 577.2, "grad_norm": 1.4915128946304321, "learning_rate": 2.118e-05, "loss": 0.0876, "step": 2886 }, { "epoch": 577.6, "grad_norm": 6.069519519805908, "learning_rate": 2.116e-05, "loss": 0.1054, "step": 2888 }, { "epoch": 578.0, "grad_norm": 2.5972280502319336, "learning_rate": 2.114e-05, "loss": 0.0884, "step": 2890 }, { "epoch": 578.4, "grad_norm": 4.94214391708374, "learning_rate": 2.112e-05, "loss": 0.1114, "step": 2892 }, { "epoch": 578.8, "grad_norm": 6.188068389892578, "learning_rate": 2.11e-05, "loss": 0.137, "step": 2894 }, { "epoch": 579.2, "grad_norm": 1.7973812818527222, "learning_rate": 2.1079999999999998e-05, "loss": 0.083, "step": 2896 }, { "epoch": 579.6, "grad_norm": 9.86609935760498, "learning_rate": 2.106e-05, "loss": 0.1612, "step": 2898 }, { "epoch": 580.0, "grad_norm": 7.7617645263671875, "learning_rate": 2.1040000000000002e-05, "loss": 0.1526, "step": 2900 }, { "epoch": 580.4, "grad_norm": 1.8411521911621094, "learning_rate": 2.1020000000000002e-05, "loss": 0.0778, "step": 2902 }, { "epoch": 580.8, "grad_norm": 2.9589316844940186, "learning_rate": 2.1e-05, "loss": 0.0895, "step": 2904 }, { "epoch": 581.2, "grad_norm": 1.977992057800293, "learning_rate": 2.098e-05, "loss": 0.0755, "step": 2906 }, { "epoch": 581.6, "grad_norm": 1.6778709888458252, "learning_rate": 2.0960000000000003e-05, "loss": 0.0801, "step": 2908 }, { "epoch": 582.0, "grad_norm": 3.223924398422241, "learning_rate": 2.0940000000000003e-05, "loss": 0.0813, "step": 2910 }, { "epoch": 582.4, "grad_norm": 2.0674643516540527, "learning_rate": 2.092e-05, "loss": 0.0784, "step": 2912 }, { "epoch": 582.8, "grad_norm": 2.0526344776153564, "learning_rate": 2.09e-05, "loss": 0.0764, "step": 2914 }, { "epoch": 583.2, "grad_norm": 1.7967157363891602, "learning_rate": 2.0880000000000003e-05, "loss": 0.0885, "step": 2916 }, { "epoch": 583.6, "grad_norm": 1.5187805891036987, "learning_rate": 2.086e-05, "loss": 0.0613, "step": 2918 }, { "epoch": 584.0, "grad_norm": 2.5726311206817627, "learning_rate": 2.084e-05, "loss": 0.0992, "step": 2920 }, { "epoch": 584.4, "grad_norm": 3.439004421234131, "learning_rate": 2.082e-05, "loss": 0.0689, "step": 2922 }, { "epoch": 584.8, "grad_norm": 1.4500306844711304, "learning_rate": 2.08e-05, "loss": 0.0836, "step": 2924 }, { "epoch": 585.2, "grad_norm": 1.1707394123077393, "learning_rate": 2.078e-05, "loss": 0.0719, "step": 2926 }, { "epoch": 585.6, "grad_norm": 1.1638984680175781, "learning_rate": 2.076e-05, "loss": 0.0578, "step": 2928 }, { "epoch": 586.0, "grad_norm": 3.594942331314087, "learning_rate": 2.074e-05, "loss": 0.0911, "step": 2930 }, { "epoch": 586.4, "grad_norm": 1.3355311155319214, "learning_rate": 2.072e-05, "loss": 0.0621, "step": 2932 }, { "epoch": 586.8, "grad_norm": 2.641371965408325, "learning_rate": 2.07e-05, "loss": 0.0918, "step": 2934 }, { "epoch": 587.2, "grad_norm": 1.1724873781204224, "learning_rate": 2.0680000000000002e-05, "loss": 0.0623, "step": 2936 }, { "epoch": 587.6, "grad_norm": 2.1476640701293945, "learning_rate": 2.0660000000000002e-05, "loss": 0.0695, "step": 2938 }, { "epoch": 588.0, "grad_norm": 2.2305967807769775, "learning_rate": 2.0640000000000002e-05, "loss": 0.0997, "step": 2940 }, { "epoch": 588.4, "grad_norm": 2.564176082611084, "learning_rate": 2.062e-05, "loss": 0.0791, "step": 2942 }, { "epoch": 588.8, "grad_norm": 2.4028632640838623, "learning_rate": 2.06e-05, "loss": 0.0763, "step": 2944 }, { "epoch": 589.2, "grad_norm": 2.985494375228882, "learning_rate": 2.0580000000000003e-05, "loss": 0.0875, "step": 2946 }, { "epoch": 589.6, "grad_norm": 1.8035310506820679, "learning_rate": 2.0560000000000003e-05, "loss": 0.0796, "step": 2948 }, { "epoch": 590.0, "grad_norm": 1.1875872611999512, "learning_rate": 2.054e-05, "loss": 0.0642, "step": 2950 }, { "epoch": 590.4, "grad_norm": 1.9262006282806396, "learning_rate": 2.052e-05, "loss": 0.0646, "step": 2952 }, { "epoch": 590.8, "grad_norm": 2.1944353580474854, "learning_rate": 2.05e-05, "loss": 0.0703, "step": 2954 }, { "epoch": 591.2, "grad_norm": 3.2228546142578125, "learning_rate": 2.048e-05, "loss": 0.0996, "step": 2956 }, { "epoch": 591.6, "grad_norm": 2.125375986099243, "learning_rate": 2.046e-05, "loss": 0.086, "step": 2958 }, { "epoch": 592.0, "grad_norm": 2.6495003700256348, "learning_rate": 2.044e-05, "loss": 0.0827, "step": 2960 }, { "epoch": 592.4, "grad_norm": 2.7390058040618896, "learning_rate": 2.042e-05, "loss": 0.0613, "step": 2962 }, { "epoch": 592.8, "grad_norm": 3.926757335662842, "learning_rate": 2.04e-05, "loss": 0.0984, "step": 2964 }, { "epoch": 593.2, "grad_norm": 5.078751087188721, "learning_rate": 2.038e-05, "loss": 0.0765, "step": 2966 }, { "epoch": 593.6, "grad_norm": 2.6536545753479004, "learning_rate": 2.036e-05, "loss": 0.0597, "step": 2968 }, { "epoch": 594.0, "grad_norm": 2.3138556480407715, "learning_rate": 2.0340000000000002e-05, "loss": 0.0784, "step": 2970 }, { "epoch": 594.4, "grad_norm": 5.304313659667969, "learning_rate": 2.032e-05, "loss": 0.0949, "step": 2972 }, { "epoch": 594.8, "grad_norm": 1.7252670526504517, "learning_rate": 2.0300000000000002e-05, "loss": 0.0821, "step": 2974 }, { "epoch": 595.2, "grad_norm": 3.77689790725708, "learning_rate": 2.0280000000000002e-05, "loss": 0.0891, "step": 2976 }, { "epoch": 595.6, "grad_norm": 2.0150749683380127, "learning_rate": 2.0260000000000003e-05, "loss": 0.1201, "step": 2978 }, { "epoch": 596.0, "grad_norm": 1.5162644386291504, "learning_rate": 2.024e-05, "loss": 0.0681, "step": 2980 }, { "epoch": 596.4, "grad_norm": 1.1821938753128052, "learning_rate": 2.022e-05, "loss": 0.0596, "step": 2982 }, { "epoch": 596.8, "grad_norm": 0.9755553603172302, "learning_rate": 2.0200000000000003e-05, "loss": 0.0701, "step": 2984 }, { "epoch": 597.2, "grad_norm": 0.7641944289207458, "learning_rate": 2.0180000000000003e-05, "loss": 0.0591, "step": 2986 }, { "epoch": 597.6, "grad_norm": 2.5395467281341553, "learning_rate": 2.016e-05, "loss": 0.0802, "step": 2988 }, { "epoch": 598.0, "grad_norm": 1.3853689432144165, "learning_rate": 2.014e-05, "loss": 0.072, "step": 2990 }, { "epoch": 598.4, "grad_norm": 1.2771015167236328, "learning_rate": 2.012e-05, "loss": 0.0722, "step": 2992 }, { "epoch": 598.8, "grad_norm": 3.8211328983306885, "learning_rate": 2.01e-05, "loss": 0.0834, "step": 2994 }, { "epoch": 599.2, "grad_norm": 2.301374912261963, "learning_rate": 2.008e-05, "loss": 0.0815, "step": 2996 }, { "epoch": 599.6, "grad_norm": 2.2846601009368896, "learning_rate": 2.006e-05, "loss": 0.0714, "step": 2998 }, { "epoch": 600.0, "grad_norm": 1.8128670454025269, "learning_rate": 2.004e-05, "loss": 0.0558, "step": 3000 }, { "epoch": 600.0, "eval_cer": 0.6269430051813472, "eval_loss": 5.516363620758057, "eval_runtime": 6.3729, "eval_samples_per_second": 1.569, "eval_steps_per_second": 0.314, "step": 3000 }, { "epoch": 600.4, "grad_norm": 2.410027265548706, "learning_rate": 2.002e-05, "loss": 0.0813, "step": 3002 }, { "epoch": 600.8, "grad_norm": 1.9932230710983276, "learning_rate": 2e-05, "loss": 0.0768, "step": 3004 }, { "epoch": 601.2, "grad_norm": 1.9289451837539673, "learning_rate": 1.9980000000000002e-05, "loss": 0.0833, "step": 3006 }, { "epoch": 601.6, "grad_norm": 6.6780290603637695, "learning_rate": 1.9960000000000002e-05, "loss": 0.1068, "step": 3008 }, { "epoch": 602.0, "grad_norm": 7.2567667961120605, "learning_rate": 1.994e-05, "loss": 0.119, "step": 3010 }, { "epoch": 602.4, "grad_norm": 4.05262565612793, "learning_rate": 1.992e-05, "loss": 0.0914, "step": 3012 }, { "epoch": 602.8, "grad_norm": 1.735781192779541, "learning_rate": 1.9900000000000003e-05, "loss": 0.0633, "step": 3014 }, { "epoch": 603.2, "grad_norm": 2.209965467453003, "learning_rate": 1.9880000000000003e-05, "loss": 0.0765, "step": 3016 }, { "epoch": 603.6, "grad_norm": 3.677443265914917, "learning_rate": 1.986e-05, "loss": 0.0767, "step": 3018 }, { "epoch": 604.0, "grad_norm": 2.7224485874176025, "learning_rate": 1.984e-05, "loss": 0.1001, "step": 3020 }, { "epoch": 604.4, "grad_norm": 1.8268851041793823, "learning_rate": 1.982e-05, "loss": 0.099, "step": 3022 }, { "epoch": 604.8, "grad_norm": 2.747781276702881, "learning_rate": 1.9800000000000004e-05, "loss": 0.0827, "step": 3024 }, { "epoch": 605.2, "grad_norm": 0.8385090231895447, "learning_rate": 1.978e-05, "loss": 0.0587, "step": 3026 }, { "epoch": 605.6, "grad_norm": 2.899277687072754, "learning_rate": 1.976e-05, "loss": 0.0826, "step": 3028 }, { "epoch": 606.0, "grad_norm": 4.240220069885254, "learning_rate": 1.974e-05, "loss": 0.0962, "step": 3030 }, { "epoch": 606.4, "grad_norm": 1.8706060647964478, "learning_rate": 1.972e-05, "loss": 0.0694, "step": 3032 }, { "epoch": 606.8, "grad_norm": 9.396303176879883, "learning_rate": 1.97e-05, "loss": 0.1515, "step": 3034 }, { "epoch": 607.2, "grad_norm": 1.7086679935455322, "learning_rate": 1.968e-05, "loss": 0.0637, "step": 3036 }, { "epoch": 607.6, "grad_norm": 5.888516902923584, "learning_rate": 1.966e-05, "loss": 0.1391, "step": 3038 }, { "epoch": 608.0, "grad_norm": 2.6486706733703613, "learning_rate": 1.9640000000000002e-05, "loss": 0.0623, "step": 3040 }, { "epoch": 608.4, "grad_norm": 1.5447965860366821, "learning_rate": 1.9620000000000002e-05, "loss": 0.0754, "step": 3042 }, { "epoch": 608.8, "grad_norm": 1.7744275331497192, "learning_rate": 1.9600000000000002e-05, "loss": 0.0694, "step": 3044 }, { "epoch": 609.2, "grad_norm": 1.8987764120101929, "learning_rate": 1.9580000000000002e-05, "loss": 0.0653, "step": 3046 }, { "epoch": 609.6, "grad_norm": 4.255063056945801, "learning_rate": 1.956e-05, "loss": 0.1021, "step": 3048 }, { "epoch": 610.0, "grad_norm": 4.186433792114258, "learning_rate": 1.954e-05, "loss": 0.0777, "step": 3050 }, { "epoch": 610.4, "grad_norm": 4.8806610107421875, "learning_rate": 1.9520000000000003e-05, "loss": 0.0908, "step": 3052 }, { "epoch": 610.8, "grad_norm": 1.4175288677215576, "learning_rate": 1.9500000000000003e-05, "loss": 0.0594, "step": 3054 }, { "epoch": 611.2, "grad_norm": 1.0401084423065186, "learning_rate": 1.948e-05, "loss": 0.0528, "step": 3056 }, { "epoch": 611.6, "grad_norm": 0.9244334101676941, "learning_rate": 1.946e-05, "loss": 0.0721, "step": 3058 }, { "epoch": 612.0, "grad_norm": 1.8958861827850342, "learning_rate": 1.944e-05, "loss": 0.0642, "step": 3060 }, { "epoch": 612.4, "grad_norm": 1.6624629497528076, "learning_rate": 1.942e-05, "loss": 0.0816, "step": 3062 }, { "epoch": 612.8, "grad_norm": 1.756848931312561, "learning_rate": 1.94e-05, "loss": 0.0528, "step": 3064 }, { "epoch": 613.2, "grad_norm": 2.9185791015625, "learning_rate": 1.938e-05, "loss": 0.0943, "step": 3066 }, { "epoch": 613.6, "grad_norm": 3.751755714416504, "learning_rate": 1.936e-05, "loss": 0.1202, "step": 3068 }, { "epoch": 614.0, "grad_norm": 0.803933322429657, "learning_rate": 1.934e-05, "loss": 0.0642, "step": 3070 }, { "epoch": 614.4, "grad_norm": 2.000929355621338, "learning_rate": 1.932e-05, "loss": 0.0932, "step": 3072 }, { "epoch": 614.8, "grad_norm": 1.1023443937301636, "learning_rate": 1.93e-05, "loss": 0.0634, "step": 3074 }, { "epoch": 615.2, "grad_norm": 1.8340427875518799, "learning_rate": 1.9280000000000002e-05, "loss": 0.0568, "step": 3076 }, { "epoch": 615.6, "grad_norm": 1.8976824283599854, "learning_rate": 1.9260000000000002e-05, "loss": 0.0698, "step": 3078 }, { "epoch": 616.0, "grad_norm": 1.0702615976333618, "learning_rate": 1.924e-05, "loss": 0.0477, "step": 3080 }, { "epoch": 616.4, "grad_norm": 2.470982551574707, "learning_rate": 1.9220000000000002e-05, "loss": 0.077, "step": 3082 }, { "epoch": 616.8, "grad_norm": 2.0277113914489746, "learning_rate": 1.9200000000000003e-05, "loss": 0.0554, "step": 3084 }, { "epoch": 617.2, "grad_norm": 1.9138801097869873, "learning_rate": 1.918e-05, "loss": 0.0917, "step": 3086 }, { "epoch": 617.6, "grad_norm": 1.4285691976547241, "learning_rate": 1.916e-05, "loss": 0.0564, "step": 3088 }, { "epoch": 618.0, "grad_norm": 1.5668582916259766, "learning_rate": 1.914e-05, "loss": 0.0737, "step": 3090 }, { "epoch": 618.4, "grad_norm": 2.30855131149292, "learning_rate": 1.9120000000000003e-05, "loss": 0.0619, "step": 3092 }, { "epoch": 618.8, "grad_norm": 2.1190710067749023, "learning_rate": 1.91e-05, "loss": 0.0796, "step": 3094 }, { "epoch": 619.2, "grad_norm": 1.2604186534881592, "learning_rate": 1.908e-05, "loss": 0.052, "step": 3096 }, { "epoch": 619.6, "grad_norm": 3.291710138320923, "learning_rate": 1.906e-05, "loss": 0.0795, "step": 3098 }, { "epoch": 620.0, "grad_norm": 2.501821517944336, "learning_rate": 1.904e-05, "loss": 0.0706, "step": 3100 }, { "epoch": 620.4, "grad_norm": 2.6770129203796387, "learning_rate": 1.902e-05, "loss": 0.1121, "step": 3102 }, { "epoch": 620.8, "grad_norm": 3.1167821884155273, "learning_rate": 1.9e-05, "loss": 0.1265, "step": 3104 }, { "epoch": 621.2, "grad_norm": 1.264593482017517, "learning_rate": 1.898e-05, "loss": 0.0608, "step": 3106 }, { "epoch": 621.6, "grad_norm": 2.2165138721466064, "learning_rate": 1.896e-05, "loss": 0.0844, "step": 3108 }, { "epoch": 622.0, "grad_norm": 3.4200451374053955, "learning_rate": 1.894e-05, "loss": 0.1287, "step": 3110 }, { "epoch": 622.4, "grad_norm": 1.4842395782470703, "learning_rate": 1.8920000000000002e-05, "loss": 0.0647, "step": 3112 }, { "epoch": 622.8, "grad_norm": 2.9566657543182373, "learning_rate": 1.8900000000000002e-05, "loss": 0.0741, "step": 3114 }, { "epoch": 623.2, "grad_norm": 4.631263256072998, "learning_rate": 1.888e-05, "loss": 0.0809, "step": 3116 }, { "epoch": 623.6, "grad_norm": 4.397104263305664, "learning_rate": 1.886e-05, "loss": 0.0817, "step": 3118 }, { "epoch": 624.0, "grad_norm": 6.764240741729736, "learning_rate": 1.8840000000000003e-05, "loss": 0.1184, "step": 3120 }, { "epoch": 624.4, "grad_norm": 1.8579261302947998, "learning_rate": 1.8820000000000003e-05, "loss": 0.0956, "step": 3122 }, { "epoch": 624.8, "grad_norm": 1.2099709510803223, "learning_rate": 1.88e-05, "loss": 0.0557, "step": 3124 }, { "epoch": 625.2, "grad_norm": 2.5030956268310547, "learning_rate": 1.878e-05, "loss": 0.0643, "step": 3126 }, { "epoch": 625.6, "grad_norm": 2.1166775226593018, "learning_rate": 1.876e-05, "loss": 0.0724, "step": 3128 }, { "epoch": 626.0, "grad_norm": 2.751803398132324, "learning_rate": 1.8740000000000004e-05, "loss": 0.0668, "step": 3130 }, { "epoch": 626.4, "grad_norm": 1.4526809453964233, "learning_rate": 1.872e-05, "loss": 0.08, "step": 3132 }, { "epoch": 626.8, "grad_norm": 1.7340513467788696, "learning_rate": 1.87e-05, "loss": 0.0553, "step": 3134 }, { "epoch": 627.2, "grad_norm": 0.8968878388404846, "learning_rate": 1.868e-05, "loss": 0.0532, "step": 3136 }, { "epoch": 627.6, "grad_norm": 0.7971158027648926, "learning_rate": 1.866e-05, "loss": 0.0606, "step": 3138 }, { "epoch": 628.0, "grad_norm": 3.1907966136932373, "learning_rate": 1.864e-05, "loss": 0.0721, "step": 3140 }, { "epoch": 628.4, "grad_norm": 1.922194480895996, "learning_rate": 1.862e-05, "loss": 0.038, "step": 3142 }, { "epoch": 628.8, "grad_norm": 3.887855291366577, "learning_rate": 1.86e-05, "loss": 0.1196, "step": 3144 }, { "epoch": 629.2, "grad_norm": 3.857783317565918, "learning_rate": 1.858e-05, "loss": 0.0773, "step": 3146 }, { "epoch": 629.6, "grad_norm": 1.4349216222763062, "learning_rate": 1.856e-05, "loss": 0.0518, "step": 3148 }, { "epoch": 630.0, "grad_norm": 3.0156397819519043, "learning_rate": 1.8540000000000002e-05, "loss": 0.0845, "step": 3150 }, { "epoch": 630.4, "grad_norm": 2.9045908451080322, "learning_rate": 1.8520000000000002e-05, "loss": 0.0474, "step": 3152 }, { "epoch": 630.8, "grad_norm": 2.2335829734802246, "learning_rate": 1.85e-05, "loss": 0.089, "step": 3154 }, { "epoch": 631.2, "grad_norm": 3.2819840908050537, "learning_rate": 1.848e-05, "loss": 0.1133, "step": 3156 }, { "epoch": 631.6, "grad_norm": 3.800731658935547, "learning_rate": 1.846e-05, "loss": 0.0866, "step": 3158 }, { "epoch": 632.0, "grad_norm": 2.8835740089416504, "learning_rate": 1.8440000000000003e-05, "loss": 0.0816, "step": 3160 }, { "epoch": 632.4, "grad_norm": 1.4655286073684692, "learning_rate": 1.842e-05, "loss": 0.0764, "step": 3162 }, { "epoch": 632.8, "grad_norm": 1.1423026323318481, "learning_rate": 1.84e-05, "loss": 0.0497, "step": 3164 }, { "epoch": 633.2, "grad_norm": 2.776040554046631, "learning_rate": 1.838e-05, "loss": 0.0477, "step": 3166 }, { "epoch": 633.6, "grad_norm": 2.4027938842773438, "learning_rate": 1.8360000000000004e-05, "loss": 0.0783, "step": 3168 }, { "epoch": 634.0, "grad_norm": 2.875760316848755, "learning_rate": 1.834e-05, "loss": 0.0535, "step": 3170 }, { "epoch": 634.4, "grad_norm": 3.3752312660217285, "learning_rate": 1.832e-05, "loss": 0.0649, "step": 3172 }, { "epoch": 634.8, "grad_norm": 1.9681396484375, "learning_rate": 1.83e-05, "loss": 0.0728, "step": 3174 }, { "epoch": 635.2, "grad_norm": 1.207863450050354, "learning_rate": 1.828e-05, "loss": 0.0512, "step": 3176 }, { "epoch": 635.6, "grad_norm": 3.8591196537017822, "learning_rate": 1.826e-05, "loss": 0.0811, "step": 3178 }, { "epoch": 636.0, "grad_norm": 2.640810966491699, "learning_rate": 1.824e-05, "loss": 0.0695, "step": 3180 }, { "epoch": 636.4, "grad_norm": 3.92535662651062, "learning_rate": 1.8220000000000002e-05, "loss": 0.0673, "step": 3182 }, { "epoch": 636.8, "grad_norm": 1.2887781858444214, "learning_rate": 1.8200000000000002e-05, "loss": 0.076, "step": 3184 }, { "epoch": 637.2, "grad_norm": 1.8367034196853638, "learning_rate": 1.818e-05, "loss": 0.0602, "step": 3186 }, { "epoch": 637.6, "grad_norm": 2.28827166557312, "learning_rate": 1.8160000000000002e-05, "loss": 0.0617, "step": 3188 }, { "epoch": 638.0, "grad_norm": 2.0312983989715576, "learning_rate": 1.8140000000000003e-05, "loss": 0.0667, "step": 3190 }, { "epoch": 638.4, "grad_norm": 1.1325631141662598, "learning_rate": 1.812e-05, "loss": 0.0568, "step": 3192 }, { "epoch": 638.8, "grad_norm": 1.4695326089859009, "learning_rate": 1.81e-05, "loss": 0.0556, "step": 3194 }, { "epoch": 639.2, "grad_norm": 1.8509931564331055, "learning_rate": 1.808e-05, "loss": 0.0583, "step": 3196 }, { "epoch": 639.6, "grad_norm": 3.354156255722046, "learning_rate": 1.8060000000000003e-05, "loss": 0.0757, "step": 3198 }, { "epoch": 640.0, "grad_norm": 2.054307222366333, "learning_rate": 1.804e-05, "loss": 0.0534, "step": 3200 }, { "epoch": 640.0, "eval_cer": 0.694300518134715, "eval_loss": 5.564584732055664, "eval_runtime": 6.179, "eval_samples_per_second": 1.618, "eval_steps_per_second": 0.324, "step": 3200 }, { "epoch": 640.4, "grad_norm": 2.526193857192993, "learning_rate": 1.802e-05, "loss": 0.0483, "step": 3202 }, { "epoch": 640.8, "grad_norm": 2.3064937591552734, "learning_rate": 1.8e-05, "loss": 0.0872, "step": 3204 }, { "epoch": 641.2, "grad_norm": 2.982731819152832, "learning_rate": 1.798e-05, "loss": 0.0605, "step": 3206 }, { "epoch": 641.6, "grad_norm": 3.833238363265991, "learning_rate": 1.796e-05, "loss": 0.0644, "step": 3208 }, { "epoch": 642.0, "grad_norm": 1.4992573261260986, "learning_rate": 1.794e-05, "loss": 0.0567, "step": 3210 }, { "epoch": 642.4, "grad_norm": 2.4911949634552, "learning_rate": 1.792e-05, "loss": 0.0654, "step": 3212 }, { "epoch": 642.8, "grad_norm": 1.5535213947296143, "learning_rate": 1.79e-05, "loss": 0.0514, "step": 3214 }, { "epoch": 643.2, "grad_norm": 0.9769728779792786, "learning_rate": 1.7879999999999998e-05, "loss": 0.056, "step": 3216 }, { "epoch": 643.6, "grad_norm": 2.9892325401306152, "learning_rate": 1.7860000000000002e-05, "loss": 0.0621, "step": 3218 }, { "epoch": 644.0, "grad_norm": 5.668875217437744, "learning_rate": 1.7840000000000002e-05, "loss": 0.0788, "step": 3220 }, { "epoch": 644.4, "grad_norm": 2.7565388679504395, "learning_rate": 1.7820000000000002e-05, "loss": 0.0757, "step": 3222 }, { "epoch": 644.8, "grad_norm": 5.525135040283203, "learning_rate": 1.78e-05, "loss": 0.083, "step": 3224 }, { "epoch": 645.2, "grad_norm": 3.678616523742676, "learning_rate": 1.7780000000000003e-05, "loss": 0.0913, "step": 3226 }, { "epoch": 645.6, "grad_norm": 1.4903839826583862, "learning_rate": 1.7760000000000003e-05, "loss": 0.0513, "step": 3228 }, { "epoch": 646.0, "grad_norm": 3.8204829692840576, "learning_rate": 1.774e-05, "loss": 0.0612, "step": 3230 }, { "epoch": 646.4, "grad_norm": 1.5227696895599365, "learning_rate": 1.772e-05, "loss": 0.0535, "step": 3232 }, { "epoch": 646.8, "grad_norm": 2.8465957641601562, "learning_rate": 1.77e-05, "loss": 0.0762, "step": 3234 }, { "epoch": 647.2, "grad_norm": 2.4469046592712402, "learning_rate": 1.7680000000000004e-05, "loss": 0.0924, "step": 3236 }, { "epoch": 647.6, "grad_norm": 3.4114606380462646, "learning_rate": 1.766e-05, "loss": 0.0679, "step": 3238 }, { "epoch": 648.0, "grad_norm": 3.5804762840270996, "learning_rate": 1.764e-05, "loss": 0.0849, "step": 3240 }, { "epoch": 648.4, "grad_norm": 2.079221487045288, "learning_rate": 1.762e-05, "loss": 0.0923, "step": 3242 }, { "epoch": 648.8, "grad_norm": 1.9177277088165283, "learning_rate": 1.76e-05, "loss": 0.0672, "step": 3244 }, { "epoch": 649.2, "grad_norm": 3.34010910987854, "learning_rate": 1.758e-05, "loss": 0.0669, "step": 3246 }, { "epoch": 649.6, "grad_norm": 2.247480869293213, "learning_rate": 1.756e-05, "loss": 0.0552, "step": 3248 }, { "epoch": 650.0, "grad_norm": 2.2847900390625, "learning_rate": 1.754e-05, "loss": 0.0553, "step": 3250 }, { "epoch": 650.4, "grad_norm": 3.302842617034912, "learning_rate": 1.752e-05, "loss": 0.0915, "step": 3252 }, { "epoch": 650.8, "grad_norm": 2.670706033706665, "learning_rate": 1.75e-05, "loss": 0.0506, "step": 3254 }, { "epoch": 651.2, "grad_norm": 2.300001859664917, "learning_rate": 1.7480000000000002e-05, "loss": 0.0585, "step": 3256 }, { "epoch": 651.6, "grad_norm": 0.5808373689651489, "learning_rate": 1.7460000000000002e-05, "loss": 0.0308, "step": 3258 }, { "epoch": 652.0, "grad_norm": 0.8896394371986389, "learning_rate": 1.7440000000000002e-05, "loss": 0.0348, "step": 3260 }, { "epoch": 652.4, "grad_norm": 1.3267570734024048, "learning_rate": 1.742e-05, "loss": 0.0459, "step": 3262 }, { "epoch": 652.8, "grad_norm": 4.301355838775635, "learning_rate": 1.74e-05, "loss": 0.0619, "step": 3264 }, { "epoch": 653.2, "grad_norm": 3.899325370788574, "learning_rate": 1.7380000000000003e-05, "loss": 0.0725, "step": 3266 }, { "epoch": 653.6, "grad_norm": 0.7974644899368286, "learning_rate": 1.736e-05, "loss": 0.047, "step": 3268 }, { "epoch": 654.0, "grad_norm": 1.7369194030761719, "learning_rate": 1.734e-05, "loss": 0.0525, "step": 3270 }, { "epoch": 654.4, "grad_norm": 2.257744073867798, "learning_rate": 1.732e-05, "loss": 0.055, "step": 3272 }, { "epoch": 654.8, "grad_norm": 2.1294498443603516, "learning_rate": 1.73e-05, "loss": 0.0498, "step": 3274 }, { "epoch": 655.2, "grad_norm": 2.529773473739624, "learning_rate": 1.728e-05, "loss": 0.0578, "step": 3276 }, { "epoch": 655.6, "grad_norm": 1.5116692781448364, "learning_rate": 1.726e-05, "loss": 0.0423, "step": 3278 }, { "epoch": 656.0, "grad_norm": 1.3055908679962158, "learning_rate": 1.724e-05, "loss": 0.074, "step": 3280 }, { "epoch": 656.4, "grad_norm": 1.4156906604766846, "learning_rate": 1.722e-05, "loss": 0.0551, "step": 3282 }, { "epoch": 656.8, "grad_norm": 1.2462130784988403, "learning_rate": 1.7199999999999998e-05, "loss": 0.0442, "step": 3284 }, { "epoch": 657.2, "grad_norm": 3.489969253540039, "learning_rate": 1.718e-05, "loss": 0.0785, "step": 3286 }, { "epoch": 657.6, "grad_norm": 3.3948590755462646, "learning_rate": 1.7160000000000002e-05, "loss": 0.0778, "step": 3288 }, { "epoch": 658.0, "grad_norm": 1.5263768434524536, "learning_rate": 1.7140000000000002e-05, "loss": 0.0434, "step": 3290 }, { "epoch": 658.4, "grad_norm": 1.3201247453689575, "learning_rate": 1.712e-05, "loss": 0.0492, "step": 3292 }, { "epoch": 658.8, "grad_norm": 1.6828140020370483, "learning_rate": 1.7100000000000002e-05, "loss": 0.0533, "step": 3294 }, { "epoch": 659.2, "grad_norm": 1.8429982662200928, "learning_rate": 1.7080000000000002e-05, "loss": 0.0564, "step": 3296 }, { "epoch": 659.6, "grad_norm": 1.5047435760498047, "learning_rate": 1.706e-05, "loss": 0.0423, "step": 3298 }, { "epoch": 660.0, "grad_norm": 2.1918718814849854, "learning_rate": 1.704e-05, "loss": 0.0625, "step": 3300 }, { "epoch": 660.4, "grad_norm": 2.714839458465576, "learning_rate": 1.702e-05, "loss": 0.0886, "step": 3302 }, { "epoch": 660.8, "grad_norm": 2.9184587001800537, "learning_rate": 1.7000000000000003e-05, "loss": 0.0765, "step": 3304 }, { "epoch": 661.2, "grad_norm": 4.13349723815918, "learning_rate": 1.698e-05, "loss": 0.0566, "step": 3306 }, { "epoch": 661.6, "grad_norm": 3.0959644317626953, "learning_rate": 1.696e-05, "loss": 0.0405, "step": 3308 }, { "epoch": 662.0, "grad_norm": 1.8493571281433105, "learning_rate": 1.694e-05, "loss": 0.0919, "step": 3310 }, { "epoch": 662.4, "grad_norm": 1.457541823387146, "learning_rate": 1.692e-05, "loss": 0.0423, "step": 3312 }, { "epoch": 662.8, "grad_norm": 2.615905523300171, "learning_rate": 1.69e-05, "loss": 0.0581, "step": 3314 }, { "epoch": 663.2, "grad_norm": 5.194374084472656, "learning_rate": 1.688e-05, "loss": 0.0992, "step": 3316 }, { "epoch": 663.6, "grad_norm": 1.268939733505249, "learning_rate": 1.686e-05, "loss": 0.0406, "step": 3318 }, { "epoch": 664.0, "grad_norm": 5.491112232208252, "learning_rate": 1.684e-05, "loss": 0.062, "step": 3320 }, { "epoch": 664.4, "grad_norm": 2.642009735107422, "learning_rate": 1.6819999999999998e-05, "loss": 0.0742, "step": 3322 }, { "epoch": 664.8, "grad_norm": 3.079897403717041, "learning_rate": 1.6800000000000002e-05, "loss": 0.0457, "step": 3324 }, { "epoch": 665.2, "grad_norm": 3.489657402038574, "learning_rate": 1.6780000000000002e-05, "loss": 0.1405, "step": 3326 }, { "epoch": 665.6, "grad_norm": 3.0785460472106934, "learning_rate": 1.6760000000000002e-05, "loss": 0.0672, "step": 3328 }, { "epoch": 666.0, "grad_norm": 1.661453366279602, "learning_rate": 1.674e-05, "loss": 0.073, "step": 3330 }, { "epoch": 666.4, "grad_norm": 2.980323553085327, "learning_rate": 1.672e-05, "loss": 0.0551, "step": 3332 }, { "epoch": 666.8, "grad_norm": 1.9340627193450928, "learning_rate": 1.6700000000000003e-05, "loss": 0.0516, "step": 3334 }, { "epoch": 667.2, "grad_norm": 1.74107027053833, "learning_rate": 1.668e-05, "loss": 0.0313, "step": 3336 }, { "epoch": 667.6, "grad_norm": 1.3743219375610352, "learning_rate": 1.666e-05, "loss": 0.0393, "step": 3338 }, { "epoch": 668.0, "grad_norm": 0.9528849124908447, "learning_rate": 1.664e-05, "loss": 0.0478, "step": 3340 }, { "epoch": 668.4, "grad_norm": 2.4443249702453613, "learning_rate": 1.662e-05, "loss": 0.042, "step": 3342 }, { "epoch": 668.8, "grad_norm": 2.6746790409088135, "learning_rate": 1.66e-05, "loss": 0.0447, "step": 3344 }, { "epoch": 669.2, "grad_norm": 1.8048741817474365, "learning_rate": 1.658e-05, "loss": 0.0759, "step": 3346 }, { "epoch": 669.6, "grad_norm": 1.1573213338851929, "learning_rate": 1.656e-05, "loss": 0.0323, "step": 3348 }, { "epoch": 670.0, "grad_norm": 4.609884738922119, "learning_rate": 1.654e-05, "loss": 0.0766, "step": 3350 }, { "epoch": 670.4, "grad_norm": 1.673646330833435, "learning_rate": 1.652e-05, "loss": 0.0595, "step": 3352 }, { "epoch": 670.8, "grad_norm": 1.6454784870147705, "learning_rate": 1.65e-05, "loss": 0.0455, "step": 3354 }, { "epoch": 671.2, "grad_norm": 3.1113955974578857, "learning_rate": 1.648e-05, "loss": 0.058, "step": 3356 }, { "epoch": 671.6, "grad_norm": 3.156810760498047, "learning_rate": 1.646e-05, "loss": 0.05, "step": 3358 }, { "epoch": 672.0, "grad_norm": 1.0151987075805664, "learning_rate": 1.644e-05, "loss": 0.0831, "step": 3360 }, { "epoch": 672.4, "grad_norm": 0.7404467463493347, "learning_rate": 1.6420000000000002e-05, "loss": 0.0572, "step": 3362 }, { "epoch": 672.8, "grad_norm": 0.6863143444061279, "learning_rate": 1.6400000000000002e-05, "loss": 0.0372, "step": 3364 }, { "epoch": 673.2, "grad_norm": 2.685533046722412, "learning_rate": 1.6380000000000002e-05, "loss": 0.0853, "step": 3366 }, { "epoch": 673.6, "grad_norm": 1.1501761674880981, "learning_rate": 1.636e-05, "loss": 0.0346, "step": 3368 }, { "epoch": 674.0, "grad_norm": 2.7288224697113037, "learning_rate": 1.634e-05, "loss": 0.0546, "step": 3370 }, { "epoch": 674.4, "grad_norm": 1.6842821836471558, "learning_rate": 1.6320000000000003e-05, "loss": 0.049, "step": 3372 }, { "epoch": 674.8, "grad_norm": 2.206166982650757, "learning_rate": 1.63e-05, "loss": 0.0385, "step": 3374 }, { "epoch": 675.2, "grad_norm": 2.3264284133911133, "learning_rate": 1.628e-05, "loss": 0.0585, "step": 3376 }, { "epoch": 675.6, "grad_norm": 1.4030497074127197, "learning_rate": 1.626e-05, "loss": 0.0337, "step": 3378 }, { "epoch": 676.0, "grad_norm": 2.835793972015381, "learning_rate": 1.624e-05, "loss": 0.0603, "step": 3380 }, { "epoch": 676.4, "grad_norm": 1.0228796005249023, "learning_rate": 1.622e-05, "loss": 0.0438, "step": 3382 }, { "epoch": 676.8, "grad_norm": 3.7952167987823486, "learning_rate": 1.62e-05, "loss": 0.062, "step": 3384 }, { "epoch": 677.2, "grad_norm": 4.7880539894104, "learning_rate": 1.618e-05, "loss": 0.0883, "step": 3386 }, { "epoch": 677.6, "grad_norm": 1.4195492267608643, "learning_rate": 1.616e-05, "loss": 0.034, "step": 3388 }, { "epoch": 678.0, "grad_norm": 1.9061038494110107, "learning_rate": 1.6139999999999998e-05, "loss": 0.0453, "step": 3390 }, { "epoch": 678.4, "grad_norm": 3.0448527336120605, "learning_rate": 1.612e-05, "loss": 0.0492, "step": 3392 }, { "epoch": 678.8, "grad_norm": 2.576571464538574, "learning_rate": 1.6100000000000002e-05, "loss": 0.0763, "step": 3394 }, { "epoch": 679.2, "grad_norm": 1.6183745861053467, "learning_rate": 1.6080000000000002e-05, "loss": 0.0571, "step": 3396 }, { "epoch": 679.6, "grad_norm": 2.4504337310791016, "learning_rate": 1.606e-05, "loss": 0.046, "step": 3398 }, { "epoch": 680.0, "grad_norm": 4.533170223236084, "learning_rate": 1.604e-05, "loss": 0.0841, "step": 3400 }, { "epoch": 680.0, "eval_cer": 0.6476683937823834, "eval_loss": 5.557984828948975, "eval_runtime": 6.2609, "eval_samples_per_second": 1.597, "eval_steps_per_second": 0.319, "step": 3400 }, { "epoch": 680.4, "grad_norm": 2.4834582805633545, "learning_rate": 1.6020000000000002e-05, "loss": 0.0547, "step": 3402 }, { "epoch": 680.8, "grad_norm": 3.069660186767578, "learning_rate": 1.6000000000000003e-05, "loss": 0.0413, "step": 3404 }, { "epoch": 681.2, "grad_norm": 3.3005783557891846, "learning_rate": 1.598e-05, "loss": 0.0726, "step": 3406 }, { "epoch": 681.6, "grad_norm": 3.1302666664123535, "learning_rate": 1.596e-05, "loss": 0.0619, "step": 3408 }, { "epoch": 682.0, "grad_norm": 1.7724065780639648, "learning_rate": 1.594e-05, "loss": 0.0477, "step": 3410 }, { "epoch": 682.4, "grad_norm": 4.448440074920654, "learning_rate": 1.592e-05, "loss": 0.0671, "step": 3412 }, { "epoch": 682.8, "grad_norm": 3.585568428039551, "learning_rate": 1.59e-05, "loss": 0.0586, "step": 3414 }, { "epoch": 683.2, "grad_norm": 2.2051546573638916, "learning_rate": 1.588e-05, "loss": 0.0663, "step": 3416 }, { "epoch": 683.6, "grad_norm": 1.253649115562439, "learning_rate": 1.586e-05, "loss": 0.0498, "step": 3418 }, { "epoch": 684.0, "grad_norm": 0.9743872284889221, "learning_rate": 1.584e-05, "loss": 0.034, "step": 3420 }, { "epoch": 684.4, "grad_norm": 1.2514783143997192, "learning_rate": 1.582e-05, "loss": 0.0409, "step": 3422 }, { "epoch": 684.8, "grad_norm": 0.7249080538749695, "learning_rate": 1.58e-05, "loss": 0.0637, "step": 3424 }, { "epoch": 685.2, "grad_norm": 1.8865165710449219, "learning_rate": 1.578e-05, "loss": 0.0402, "step": 3426 }, { "epoch": 685.6, "grad_norm": 2.7285850048065186, "learning_rate": 1.5759999999999998e-05, "loss": 0.0651, "step": 3428 }, { "epoch": 686.0, "grad_norm": 2.7466542720794678, "learning_rate": 1.5740000000000002e-05, "loss": 0.0506, "step": 3430 }, { "epoch": 686.4, "grad_norm": 2.3403427600860596, "learning_rate": 1.5720000000000002e-05, "loss": 0.045, "step": 3432 }, { "epoch": 686.8, "grad_norm": 1.507604956626892, "learning_rate": 1.5700000000000002e-05, "loss": 0.0463, "step": 3434 }, { "epoch": 687.2, "grad_norm": 0.5008689165115356, "learning_rate": 1.568e-05, "loss": 0.0327, "step": 3436 }, { "epoch": 687.6, "grad_norm": 3.3805477619171143, "learning_rate": 1.566e-05, "loss": 0.0581, "step": 3438 }, { "epoch": 688.0, "grad_norm": 6.175005912780762, "learning_rate": 1.5640000000000003e-05, "loss": 0.0808, "step": 3440 }, { "epoch": 688.4, "grad_norm": 2.1275577545166016, "learning_rate": 1.5620000000000003e-05, "loss": 0.0613, "step": 3442 }, { "epoch": 688.8, "grad_norm": 3.0625457763671875, "learning_rate": 1.56e-05, "loss": 0.0445, "step": 3444 }, { "epoch": 689.2, "grad_norm": 0.9347559213638306, "learning_rate": 1.558e-05, "loss": 0.0311, "step": 3446 }, { "epoch": 689.6, "grad_norm": 2.666185140609741, "learning_rate": 1.556e-05, "loss": 0.0613, "step": 3448 }, { "epoch": 690.0, "grad_norm": 3.775283098220825, "learning_rate": 1.554e-05, "loss": 0.062, "step": 3450 }, { "epoch": 690.4, "grad_norm": 3.1734230518341064, "learning_rate": 1.552e-05, "loss": 0.0524, "step": 3452 }, { "epoch": 690.8, "grad_norm": 0.9543465375900269, "learning_rate": 1.55e-05, "loss": 0.0319, "step": 3454 }, { "epoch": 691.2, "grad_norm": 1.0880846977233887, "learning_rate": 1.548e-05, "loss": 0.0676, "step": 3456 }, { "epoch": 691.6, "grad_norm": 1.5534535646438599, "learning_rate": 1.546e-05, "loss": 0.0518, "step": 3458 }, { "epoch": 692.0, "grad_norm": 3.327502489089966, "learning_rate": 1.544e-05, "loss": 0.0502, "step": 3460 }, { "epoch": 692.4, "grad_norm": 1.5540556907653809, "learning_rate": 1.542e-05, "loss": 0.0481, "step": 3462 }, { "epoch": 692.8, "grad_norm": 2.46077561378479, "learning_rate": 1.54e-05, "loss": 0.0368, "step": 3464 }, { "epoch": 693.2, "grad_norm": 2.452298402786255, "learning_rate": 1.538e-05, "loss": 0.049, "step": 3466 }, { "epoch": 693.6, "grad_norm": 1.732583999633789, "learning_rate": 1.536e-05, "loss": 0.0452, "step": 3468 }, { "epoch": 694.0, "grad_norm": 1.7842763662338257, "learning_rate": 1.5340000000000002e-05, "loss": 0.0388, "step": 3470 }, { "epoch": 694.4, "grad_norm": 1.764836072921753, "learning_rate": 1.5320000000000002e-05, "loss": 0.0466, "step": 3472 }, { "epoch": 694.8, "grad_norm": 0.9146240949630737, "learning_rate": 1.53e-05, "loss": 0.0477, "step": 3474 }, { "epoch": 695.2, "grad_norm": 1.3537386655807495, "learning_rate": 1.528e-05, "loss": 0.0441, "step": 3476 }, { "epoch": 695.6, "grad_norm": 5.33474063873291, "learning_rate": 1.5260000000000003e-05, "loss": 0.0796, "step": 3478 }, { "epoch": 696.0, "grad_norm": 1.9927090406417847, "learning_rate": 1.5240000000000001e-05, "loss": 0.0188, "step": 3480 }, { "epoch": 696.4, "grad_norm": 2.5529420375823975, "learning_rate": 1.5220000000000002e-05, "loss": 0.0618, "step": 3482 }, { "epoch": 696.8, "grad_norm": 2.286226511001587, "learning_rate": 1.52e-05, "loss": 0.0418, "step": 3484 }, { "epoch": 697.2, "grad_norm": 1.5779316425323486, "learning_rate": 1.518e-05, "loss": 0.0319, "step": 3486 }, { "epoch": 697.6, "grad_norm": 2.8948466777801514, "learning_rate": 1.5160000000000002e-05, "loss": 0.032, "step": 3488 }, { "epoch": 698.0, "grad_norm": 1.9238014221191406, "learning_rate": 1.514e-05, "loss": 0.0305, "step": 3490 }, { "epoch": 698.4, "grad_norm": 2.4406802654266357, "learning_rate": 1.5120000000000001e-05, "loss": 0.0258, "step": 3492 }, { "epoch": 698.8, "grad_norm": 3.3354783058166504, "learning_rate": 1.51e-05, "loss": 0.0971, "step": 3494 }, { "epoch": 699.2, "grad_norm": 0.7927127480506897, "learning_rate": 1.508e-05, "loss": 0.0297, "step": 3496 }, { "epoch": 699.6, "grad_norm": 1.7944114208221436, "learning_rate": 1.5060000000000001e-05, "loss": 0.0633, "step": 3498 }, { "epoch": 700.0, "grad_norm": 1.9360566139221191, "learning_rate": 1.5040000000000002e-05, "loss": 0.0476, "step": 3500 }, { "epoch": 700.4, "grad_norm": 1.5522773265838623, "learning_rate": 1.502e-05, "loss": 0.0483, "step": 3502 }, { "epoch": 700.8, "grad_norm": 2.5774755477905273, "learning_rate": 1.5e-05, "loss": 0.0545, "step": 3504 }, { "epoch": 701.2, "grad_norm": 1.7503174543380737, "learning_rate": 1.4979999999999999e-05, "loss": 0.0549, "step": 3506 }, { "epoch": 701.6, "grad_norm": 1.1969586610794067, "learning_rate": 1.4960000000000002e-05, "loss": 0.0498, "step": 3508 }, { "epoch": 702.0, "grad_norm": 3.777215003967285, "learning_rate": 1.4940000000000001e-05, "loss": 0.0379, "step": 3510 }, { "epoch": 702.4, "grad_norm": 2.0928001403808594, "learning_rate": 1.4920000000000001e-05, "loss": 0.0461, "step": 3512 }, { "epoch": 702.8, "grad_norm": 4.167431831359863, "learning_rate": 1.49e-05, "loss": 0.0684, "step": 3514 }, { "epoch": 703.2, "grad_norm": 2.54809832572937, "learning_rate": 1.488e-05, "loss": 0.0511, "step": 3516 }, { "epoch": 703.6, "grad_norm": 0.9506762027740479, "learning_rate": 1.4860000000000002e-05, "loss": 0.0269, "step": 3518 }, { "epoch": 704.0, "grad_norm": 3.2696914672851562, "learning_rate": 1.4840000000000002e-05, "loss": 0.074, "step": 3520 }, { "epoch": 704.4, "grad_norm": 1.5293852090835571, "learning_rate": 1.482e-05, "loss": 0.0508, "step": 3522 }, { "epoch": 704.8, "grad_norm": 1.7822108268737793, "learning_rate": 1.48e-05, "loss": 0.0427, "step": 3524 }, { "epoch": 705.2, "grad_norm": 2.5883612632751465, "learning_rate": 1.4779999999999999e-05, "loss": 0.0672, "step": 3526 }, { "epoch": 705.6, "grad_norm": 2.658252000808716, "learning_rate": 1.4760000000000001e-05, "loss": 0.0534, "step": 3528 }, { "epoch": 706.0, "grad_norm": 7.12090539932251, "learning_rate": 1.4740000000000001e-05, "loss": 0.0842, "step": 3530 }, { "epoch": 706.4, "grad_norm": 2.513805866241455, "learning_rate": 1.472e-05, "loss": 0.0488, "step": 3532 }, { "epoch": 706.8, "grad_norm": 1.365681767463684, "learning_rate": 1.47e-05, "loss": 0.0438, "step": 3534 }, { "epoch": 707.2, "grad_norm": 2.5658419132232666, "learning_rate": 1.4680000000000002e-05, "loss": 0.0394, "step": 3536 }, { "epoch": 707.6, "grad_norm": 2.740206718444824, "learning_rate": 1.4660000000000002e-05, "loss": 0.0808, "step": 3538 }, { "epoch": 708.0, "grad_norm": 1.317673921585083, "learning_rate": 1.464e-05, "loss": 0.0306, "step": 3540 }, { "epoch": 708.4, "grad_norm": 2.239718437194824, "learning_rate": 1.462e-05, "loss": 0.0298, "step": 3542 }, { "epoch": 708.8, "grad_norm": 1.1097630262374878, "learning_rate": 1.4599999999999999e-05, "loss": 0.024, "step": 3544 }, { "epoch": 709.2, "grad_norm": 1.8984092473983765, "learning_rate": 1.4580000000000003e-05, "loss": 0.0388, "step": 3546 }, { "epoch": 709.6, "grad_norm": 3.055222988128662, "learning_rate": 1.4560000000000001e-05, "loss": 0.038, "step": 3548 }, { "epoch": 710.0, "grad_norm": 1.9284411668777466, "learning_rate": 1.4540000000000001e-05, "loss": 0.0466, "step": 3550 }, { "epoch": 710.4, "grad_norm": 4.265026092529297, "learning_rate": 1.452e-05, "loss": 0.0701, "step": 3552 }, { "epoch": 710.8, "grad_norm": 1.456832766532898, "learning_rate": 1.45e-05, "loss": 0.0393, "step": 3554 }, { "epoch": 711.2, "grad_norm": 2.666119337081909, "learning_rate": 1.4480000000000002e-05, "loss": 0.049, "step": 3556 }, { "epoch": 711.6, "grad_norm": 0.8895554542541504, "learning_rate": 1.4460000000000002e-05, "loss": 0.0355, "step": 3558 }, { "epoch": 712.0, "grad_norm": 4.842914581298828, "learning_rate": 1.444e-05, "loss": 0.0608, "step": 3560 }, { "epoch": 712.4, "grad_norm": 0.9188222885131836, "learning_rate": 1.4420000000000001e-05, "loss": 0.0365, "step": 3562 }, { "epoch": 712.8, "grad_norm": 1.5632437467575073, "learning_rate": 1.44e-05, "loss": 0.0534, "step": 3564 }, { "epoch": 713.2, "grad_norm": 2.743556022644043, "learning_rate": 1.4380000000000001e-05, "loss": 0.0394, "step": 3566 }, { "epoch": 713.6, "grad_norm": 2.8603475093841553, "learning_rate": 1.4360000000000001e-05, "loss": 0.0434, "step": 3568 }, { "epoch": 714.0, "grad_norm": 3.9810242652893066, "learning_rate": 1.434e-05, "loss": 0.0427, "step": 3570 }, { "epoch": 714.4, "grad_norm": 1.4751412868499756, "learning_rate": 1.432e-05, "loss": 0.0451, "step": 3572 }, { "epoch": 714.8, "grad_norm": 4.460021018981934, "learning_rate": 1.43e-05, "loss": 0.0433, "step": 3574 }, { "epoch": 715.2, "grad_norm": 2.362337350845337, "learning_rate": 1.4280000000000002e-05, "loss": 0.0514, "step": 3576 }, { "epoch": 715.6, "grad_norm": 1.4668232202529907, "learning_rate": 1.426e-05, "loss": 0.0358, "step": 3578 }, { "epoch": 716.0, "grad_norm": 1.801436185836792, "learning_rate": 1.4240000000000001e-05, "loss": 0.0383, "step": 3580 }, { "epoch": 716.4, "grad_norm": 3.0168445110321045, "learning_rate": 1.422e-05, "loss": 0.0453, "step": 3582 }, { "epoch": 716.8, "grad_norm": 1.4068840742111206, "learning_rate": 1.42e-05, "loss": 0.037, "step": 3584 }, { "epoch": 717.2, "grad_norm": 2.679334878921509, "learning_rate": 1.4180000000000001e-05, "loss": 0.0408, "step": 3586 }, { "epoch": 717.6, "grad_norm": 0.957267165184021, "learning_rate": 1.4160000000000002e-05, "loss": 0.0348, "step": 3588 }, { "epoch": 718.0, "grad_norm": 1.4081228971481323, "learning_rate": 1.414e-05, "loss": 0.0368, "step": 3590 }, { "epoch": 718.4, "grad_norm": 1.8122862577438354, "learning_rate": 1.412e-05, "loss": 0.035, "step": 3592 }, { "epoch": 718.8, "grad_norm": 2.707129716873169, "learning_rate": 1.4099999999999999e-05, "loss": 0.0472, "step": 3594 }, { "epoch": 719.2, "grad_norm": 2.8922290802001953, "learning_rate": 1.408e-05, "loss": 0.0377, "step": 3596 }, { "epoch": 719.6, "grad_norm": 1.6573847532272339, "learning_rate": 1.4060000000000001e-05, "loss": 0.0253, "step": 3598 }, { "epoch": 720.0, "grad_norm": 3.196274757385254, "learning_rate": 1.4040000000000001e-05, "loss": 0.0365, "step": 3600 }, { "epoch": 720.0, "eval_cer": 0.8497409326424871, "eval_loss": 5.65879487991333, "eval_runtime": 10.866, "eval_samples_per_second": 0.92, "eval_steps_per_second": 0.184, "step": 3600 }, { "epoch": 720.4, "grad_norm": 1.7962344884872437, "learning_rate": 1.402e-05, "loss": 0.0468, "step": 3602 }, { "epoch": 720.8, "grad_norm": 2.260091781616211, "learning_rate": 1.4000000000000001e-05, "loss": 0.045, "step": 3604 }, { "epoch": 721.2, "grad_norm": 1.119921326637268, "learning_rate": 1.3980000000000002e-05, "loss": 0.0405, "step": 3606 }, { "epoch": 721.6, "grad_norm": 2.756131172180176, "learning_rate": 1.396e-05, "loss": 0.0416, "step": 3608 }, { "epoch": 722.0, "grad_norm": 7.076635360717773, "learning_rate": 1.394e-05, "loss": 0.1504, "step": 3610 }, { "epoch": 722.4, "grad_norm": 2.316617012023926, "learning_rate": 1.3919999999999999e-05, "loss": 0.0372, "step": 3612 }, { "epoch": 722.8, "grad_norm": 3.734356164932251, "learning_rate": 1.3900000000000002e-05, "loss": 0.0606, "step": 3614 }, { "epoch": 723.2, "grad_norm": 4.033563137054443, "learning_rate": 1.3880000000000001e-05, "loss": 0.0634, "step": 3616 }, { "epoch": 723.6, "grad_norm": 2.0772087574005127, "learning_rate": 1.3860000000000001e-05, "loss": 0.0437, "step": 3618 }, { "epoch": 724.0, "grad_norm": 3.0050809383392334, "learning_rate": 1.384e-05, "loss": 0.0503, "step": 3620 }, { "epoch": 724.4, "grad_norm": 1.5410784482955933, "learning_rate": 1.382e-05, "loss": 0.0456, "step": 3622 }, { "epoch": 724.8, "grad_norm": 1.9508724212646484, "learning_rate": 1.3800000000000002e-05, "loss": 0.0413, "step": 3624 }, { "epoch": 725.2, "grad_norm": 1.156769037246704, "learning_rate": 1.3780000000000002e-05, "loss": 0.0216, "step": 3626 }, { "epoch": 725.6, "grad_norm": 3.211069345474243, "learning_rate": 1.376e-05, "loss": 0.0523, "step": 3628 }, { "epoch": 726.0, "grad_norm": 0.7225372791290283, "learning_rate": 1.374e-05, "loss": 0.0208, "step": 3630 }, { "epoch": 726.4, "grad_norm": 4.186135292053223, "learning_rate": 1.3719999999999999e-05, "loss": 0.033, "step": 3632 }, { "epoch": 726.8, "grad_norm": 5.8144330978393555, "learning_rate": 1.3700000000000001e-05, "loss": 0.0681, "step": 3634 }, { "epoch": 727.2, "grad_norm": 1.7166060209274292, "learning_rate": 1.3680000000000001e-05, "loss": 0.0248, "step": 3636 }, { "epoch": 727.6, "grad_norm": 1.638479471206665, "learning_rate": 1.3660000000000001e-05, "loss": 0.0363, "step": 3638 }, { "epoch": 728.0, "grad_norm": 2.228649139404297, "learning_rate": 1.364e-05, "loss": 0.0551, "step": 3640 }, { "epoch": 728.4, "grad_norm": 1.6497563123703003, "learning_rate": 1.362e-05, "loss": 0.0282, "step": 3642 }, { "epoch": 728.8, "grad_norm": 2.02925705909729, "learning_rate": 1.3600000000000002e-05, "loss": 0.0306, "step": 3644 }, { "epoch": 729.2, "grad_norm": 2.455268621444702, "learning_rate": 1.358e-05, "loss": 0.0635, "step": 3646 }, { "epoch": 729.6, "grad_norm": 2.110567331314087, "learning_rate": 1.356e-05, "loss": 0.0389, "step": 3648 }, { "epoch": 730.0, "grad_norm": 1.3118997812271118, "learning_rate": 1.3539999999999999e-05, "loss": 0.0151, "step": 3650 }, { "epoch": 730.4, "grad_norm": 1.4547139406204224, "learning_rate": 1.352e-05, "loss": 0.0243, "step": 3652 }, { "epoch": 730.8, "grad_norm": 1.8997750282287598, "learning_rate": 1.3500000000000001e-05, "loss": 0.0518, "step": 3654 }, { "epoch": 731.2, "grad_norm": 0.6570135951042175, "learning_rate": 1.3480000000000001e-05, "loss": 0.0267, "step": 3656 }, { "epoch": 731.6, "grad_norm": 4.083014488220215, "learning_rate": 1.346e-05, "loss": 0.0571, "step": 3658 }, { "epoch": 732.0, "grad_norm": 3.44917893409729, "learning_rate": 1.344e-05, "loss": 0.0394, "step": 3660 }, { "epoch": 732.4, "grad_norm": 1.9882771968841553, "learning_rate": 1.3420000000000002e-05, "loss": 0.0447, "step": 3662 }, { "epoch": 732.8, "grad_norm": 0.8310521841049194, "learning_rate": 1.3400000000000002e-05, "loss": 0.0166, "step": 3664 }, { "epoch": 733.2, "grad_norm": 1.7363057136535645, "learning_rate": 1.338e-05, "loss": 0.0329, "step": 3666 }, { "epoch": 733.6, "grad_norm": 0.7819139361381531, "learning_rate": 1.336e-05, "loss": 0.0228, "step": 3668 }, { "epoch": 734.0, "grad_norm": 1.479896068572998, "learning_rate": 1.334e-05, "loss": 0.0365, "step": 3670 }, { "epoch": 734.4, "grad_norm": 2.3137943744659424, "learning_rate": 1.3320000000000001e-05, "loss": 0.0589, "step": 3672 }, { "epoch": 734.8, "grad_norm": 1.152243971824646, "learning_rate": 1.3300000000000001e-05, "loss": 0.025, "step": 3674 }, { "epoch": 735.2, "grad_norm": 3.76621675491333, "learning_rate": 1.3280000000000002e-05, "loss": 0.0542, "step": 3676 }, { "epoch": 735.6, "grad_norm": 0.8568558096885681, "learning_rate": 1.326e-05, "loss": 0.0417, "step": 3678 }, { "epoch": 736.0, "grad_norm": 1.9573228359222412, "learning_rate": 1.324e-05, "loss": 0.041, "step": 3680 }, { "epoch": 736.4, "grad_norm": 1.7299160957336426, "learning_rate": 1.3220000000000002e-05, "loss": 0.0351, "step": 3682 }, { "epoch": 736.8, "grad_norm": 0.627434492111206, "learning_rate": 1.32e-05, "loss": 0.0516, "step": 3684 }, { "epoch": 737.2, "grad_norm": 2.167555332183838, "learning_rate": 1.3180000000000001e-05, "loss": 0.073, "step": 3686 }, { "epoch": 737.6, "grad_norm": 2.4491517543792725, "learning_rate": 1.316e-05, "loss": 0.0347, "step": 3688 }, { "epoch": 738.0, "grad_norm": 0.5564351081848145, "learning_rate": 1.314e-05, "loss": 0.037, "step": 3690 }, { "epoch": 738.4, "grad_norm": 0.8658536076545715, "learning_rate": 1.3120000000000001e-05, "loss": 0.0215, "step": 3692 }, { "epoch": 738.8, "grad_norm": 2.1979689598083496, "learning_rate": 1.3100000000000002e-05, "loss": 0.0401, "step": 3694 }, { "epoch": 739.2, "grad_norm": 2.0984628200531006, "learning_rate": 1.308e-05, "loss": 0.0371, "step": 3696 }, { "epoch": 739.6, "grad_norm": 1.7856910228729248, "learning_rate": 1.306e-05, "loss": 0.0296, "step": 3698 }, { "epoch": 740.0, "grad_norm": 1.3292806148529053, "learning_rate": 1.3039999999999999e-05, "loss": 0.0223, "step": 3700 }, { "epoch": 740.4, "grad_norm": 6.729653835296631, "learning_rate": 1.3020000000000002e-05, "loss": 0.074, "step": 3702 }, { "epoch": 740.8, "grad_norm": 1.4632220268249512, "learning_rate": 1.3000000000000001e-05, "loss": 0.0436, "step": 3704 }, { "epoch": 741.2, "grad_norm": 2.630584955215454, "learning_rate": 1.2980000000000001e-05, "loss": 0.0433, "step": 3706 }, { "epoch": 741.6, "grad_norm": 4.350565433502197, "learning_rate": 1.296e-05, "loss": 0.0406, "step": 3708 }, { "epoch": 742.0, "grad_norm": 1.5024784803390503, "learning_rate": 1.294e-05, "loss": 0.0485, "step": 3710 }, { "epoch": 742.4, "grad_norm": 2.028130292892456, "learning_rate": 1.2920000000000002e-05, "loss": 0.0594, "step": 3712 }, { "epoch": 742.8, "grad_norm": 1.1201485395431519, "learning_rate": 1.29e-05, "loss": 0.022, "step": 3714 }, { "epoch": 743.2, "grad_norm": 1.2075583934783936, "learning_rate": 1.288e-05, "loss": 0.0265, "step": 3716 }, { "epoch": 743.6, "grad_norm": 2.774137258529663, "learning_rate": 1.286e-05, "loss": 0.0383, "step": 3718 }, { "epoch": 744.0, "grad_norm": 5.416350364685059, "learning_rate": 1.2839999999999999e-05, "loss": 0.0786, "step": 3720 }, { "epoch": 744.4, "grad_norm": 2.6670773029327393, "learning_rate": 1.2820000000000001e-05, "loss": 0.0325, "step": 3722 }, { "epoch": 744.8, "grad_norm": 2.1593785285949707, "learning_rate": 1.2800000000000001e-05, "loss": 0.0413, "step": 3724 }, { "epoch": 745.2, "grad_norm": 2.002650022506714, "learning_rate": 1.278e-05, "loss": 0.0312, "step": 3726 }, { "epoch": 745.6, "grad_norm": 1.936164140701294, "learning_rate": 1.276e-05, "loss": 0.0243, "step": 3728 }, { "epoch": 746.0, "grad_norm": 1.8547717332839966, "learning_rate": 1.2740000000000002e-05, "loss": 0.0399, "step": 3730 }, { "epoch": 746.4, "grad_norm": 3.1501171588897705, "learning_rate": 1.2720000000000002e-05, "loss": 0.032, "step": 3732 }, { "epoch": 746.8, "grad_norm": 0.8723810911178589, "learning_rate": 1.27e-05, "loss": 0.027, "step": 3734 }, { "epoch": 747.2, "grad_norm": 3.4539148807525635, "learning_rate": 1.268e-05, "loss": 0.0482, "step": 3736 }, { "epoch": 747.6, "grad_norm": 1.3060643672943115, "learning_rate": 1.2659999999999999e-05, "loss": 0.0269, "step": 3738 }, { "epoch": 748.0, "grad_norm": 2.7551488876342773, "learning_rate": 1.2640000000000003e-05, "loss": 0.0284, "step": 3740 }, { "epoch": 748.4, "grad_norm": 1.0009137392044067, "learning_rate": 1.2620000000000001e-05, "loss": 0.0247, "step": 3742 }, { "epoch": 748.8, "grad_norm": 1.5129976272583008, "learning_rate": 1.2600000000000001e-05, "loss": 0.048, "step": 3744 }, { "epoch": 749.2, "grad_norm": 3.6371476650238037, "learning_rate": 1.258e-05, "loss": 0.0371, "step": 3746 }, { "epoch": 749.6, "grad_norm": 1.6782655715942383, "learning_rate": 1.256e-05, "loss": 0.029, "step": 3748 }, { "epoch": 750.0, "grad_norm": 3.2354347705841064, "learning_rate": 1.2540000000000002e-05, "loss": 0.0329, "step": 3750 }, { "epoch": 750.4, "grad_norm": 1.7548147439956665, "learning_rate": 1.252e-05, "loss": 0.0275, "step": 3752 }, { "epoch": 750.8, "grad_norm": 2.688228130340576, "learning_rate": 1.25e-05, "loss": 0.0432, "step": 3754 }, { "epoch": 751.2, "grad_norm": 3.3512911796569824, "learning_rate": 1.248e-05, "loss": 0.0497, "step": 3756 }, { "epoch": 751.6, "grad_norm": 2.4893031120300293, "learning_rate": 1.2460000000000001e-05, "loss": 0.0387, "step": 3758 }, { "epoch": 752.0, "grad_norm": 3.603759527206421, "learning_rate": 1.244e-05, "loss": 0.053, "step": 3760 }, { "epoch": 752.4, "grad_norm": 3.1649422645568848, "learning_rate": 1.2420000000000001e-05, "loss": 0.0508, "step": 3762 }, { "epoch": 752.8, "grad_norm": 1.535531759262085, "learning_rate": 1.24e-05, "loss": 0.0332, "step": 3764 }, { "epoch": 753.2, "grad_norm": 1.706977367401123, "learning_rate": 1.238e-05, "loss": 0.0319, "step": 3766 }, { "epoch": 753.6, "grad_norm": 2.5066232681274414, "learning_rate": 1.236e-05, "loss": 0.0433, "step": 3768 }, { "epoch": 754.0, "grad_norm": 1.8006137609481812, "learning_rate": 1.234e-05, "loss": 0.0249, "step": 3770 }, { "epoch": 754.4, "grad_norm": 1.7781040668487549, "learning_rate": 1.232e-05, "loss": 0.0261, "step": 3772 }, { "epoch": 754.8, "grad_norm": 3.1421170234680176, "learning_rate": 1.23e-05, "loss": 0.038, "step": 3774 }, { "epoch": 755.2, "grad_norm": 0.9355373382568359, "learning_rate": 1.2280000000000001e-05, "loss": 0.0232, "step": 3776 }, { "epoch": 755.6, "grad_norm": 3.1055400371551514, "learning_rate": 1.2260000000000001e-05, "loss": 0.0495, "step": 3778 }, { "epoch": 756.0, "grad_norm": 4.146120071411133, "learning_rate": 1.224e-05, "loss": 0.0401, "step": 3780 }, { "epoch": 756.4, "grad_norm": 0.9135686159133911, "learning_rate": 1.2220000000000002e-05, "loss": 0.0188, "step": 3782 }, { "epoch": 756.8, "grad_norm": 3.9991183280944824, "learning_rate": 1.22e-05, "loss": 0.0428, "step": 3784 }, { "epoch": 757.2, "grad_norm": 0.7097906470298767, "learning_rate": 1.2180000000000002e-05, "loss": 0.0276, "step": 3786 }, { "epoch": 757.6, "grad_norm": 1.9780782461166382, "learning_rate": 1.216e-05, "loss": 0.0184, "step": 3788 }, { "epoch": 758.0, "grad_norm": 0.5671160221099854, "learning_rate": 1.214e-05, "loss": 0.0545, "step": 3790 }, { "epoch": 758.4, "grad_norm": 1.1512974500656128, "learning_rate": 1.2120000000000001e-05, "loss": 0.0469, "step": 3792 }, { "epoch": 758.8, "grad_norm": 1.1992844343185425, "learning_rate": 1.2100000000000001e-05, "loss": 0.03, "step": 3794 }, { "epoch": 759.2, "grad_norm": 0.6173650026321411, "learning_rate": 1.2080000000000001e-05, "loss": 0.012, "step": 3796 }, { "epoch": 759.6, "grad_norm": 1.8645148277282715, "learning_rate": 1.206e-05, "loss": 0.0111, "step": 3798 }, { "epoch": 760.0, "grad_norm": 3.421665668487549, "learning_rate": 1.204e-05, "loss": 0.0581, "step": 3800 }, { "epoch": 760.0, "eval_cer": 0.9844559585492227, "eval_loss": 5.778679370880127, "eval_runtime": 11.2312, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.178, "step": 3800 }, { "epoch": 760.4, "grad_norm": 1.852907419204712, "learning_rate": 1.202e-05, "loss": 0.0181, "step": 3802 }, { "epoch": 760.8, "grad_norm": 2.911541223526001, "learning_rate": 1.2e-05, "loss": 0.0248, "step": 3804 }, { "epoch": 761.2, "grad_norm": 1.0590224266052246, "learning_rate": 1.198e-05, "loss": 0.0286, "step": 3806 }, { "epoch": 761.6, "grad_norm": 0.9174668788909912, "learning_rate": 1.196e-05, "loss": 0.0395, "step": 3808 }, { "epoch": 762.0, "grad_norm": 2.8939170837402344, "learning_rate": 1.1940000000000001e-05, "loss": 0.0179, "step": 3810 }, { "epoch": 762.4, "grad_norm": 2.3052783012390137, "learning_rate": 1.1920000000000001e-05, "loss": 0.0212, "step": 3812 }, { "epoch": 762.8, "grad_norm": 2.168642997741699, "learning_rate": 1.19e-05, "loss": 0.0123, "step": 3814 }, { "epoch": 763.2, "grad_norm": 2.467606544494629, "learning_rate": 1.1880000000000001e-05, "loss": 0.027, "step": 3816 }, { "epoch": 763.6, "grad_norm": 1.4321223497390747, "learning_rate": 1.186e-05, "loss": 0.0491, "step": 3818 }, { "epoch": 764.0, "grad_norm": 1.4398454427719116, "learning_rate": 1.1840000000000002e-05, "loss": 0.0266, "step": 3820 }, { "epoch": 764.4, "grad_norm": 0.6096422672271729, "learning_rate": 1.182e-05, "loss": 0.0167, "step": 3822 }, { "epoch": 764.8, "grad_norm": 4.199224472045898, "learning_rate": 1.18e-05, "loss": 0.0519, "step": 3824 }, { "epoch": 765.2, "grad_norm": 0.9572998881340027, "learning_rate": 1.178e-05, "loss": 0.0192, "step": 3826 }, { "epoch": 765.6, "grad_norm": 2.169630289077759, "learning_rate": 1.1760000000000001e-05, "loss": 0.048, "step": 3828 }, { "epoch": 766.0, "grad_norm": 1.042709231376648, "learning_rate": 1.1740000000000001e-05, "loss": 0.0278, "step": 3830 }, { "epoch": 766.4, "grad_norm": 0.6761509776115417, "learning_rate": 1.172e-05, "loss": 0.0162, "step": 3832 }, { "epoch": 766.8, "grad_norm": 4.036219120025635, "learning_rate": 1.1700000000000001e-05, "loss": 0.0512, "step": 3834 }, { "epoch": 767.2, "grad_norm": 2.1031031608581543, "learning_rate": 1.168e-05, "loss": 0.0137, "step": 3836 }, { "epoch": 767.6, "grad_norm": 1.6351547241210938, "learning_rate": 1.166e-05, "loss": 0.0142, "step": 3838 }, { "epoch": 768.0, "grad_norm": 3.770040988922119, "learning_rate": 1.164e-05, "loss": 0.0282, "step": 3840 }, { "epoch": 768.4, "grad_norm": 2.080451726913452, "learning_rate": 1.162e-05, "loss": 0.033, "step": 3842 }, { "epoch": 768.8, "grad_norm": 2.1268391609191895, "learning_rate": 1.16e-05, "loss": 0.0394, "step": 3844 }, { "epoch": 769.2, "grad_norm": 1.0406523942947388, "learning_rate": 1.1580000000000001e-05, "loss": 0.015, "step": 3846 }, { "epoch": 769.6, "grad_norm": 3.5559639930725098, "learning_rate": 1.156e-05, "loss": 0.0311, "step": 3848 }, { "epoch": 770.0, "grad_norm": 2.2618443965911865, "learning_rate": 1.1540000000000001e-05, "loss": 0.0315, "step": 3850 }, { "epoch": 770.4, "grad_norm": 1.2544986009597778, "learning_rate": 1.152e-05, "loss": 0.0432, "step": 3852 }, { "epoch": 770.8, "grad_norm": 0.5581336617469788, "learning_rate": 1.1500000000000002e-05, "loss": 0.0184, "step": 3854 }, { "epoch": 771.2, "grad_norm": 2.5634474754333496, "learning_rate": 1.148e-05, "loss": 0.0318, "step": 3856 }, { "epoch": 771.6, "grad_norm": 1.0599069595336914, "learning_rate": 1.146e-05, "loss": 0.0175, "step": 3858 }, { "epoch": 772.0, "grad_norm": 1.590185523033142, "learning_rate": 1.144e-05, "loss": 0.0158, "step": 3860 }, { "epoch": 772.4, "grad_norm": 3.1218271255493164, "learning_rate": 1.142e-05, "loss": 0.036, "step": 3862 }, { "epoch": 772.8, "grad_norm": 3.128950834274292, "learning_rate": 1.1400000000000001e-05, "loss": 0.0483, "step": 3864 }, { "epoch": 773.2, "grad_norm": 1.823964238166809, "learning_rate": 1.1380000000000001e-05, "loss": 0.0341, "step": 3866 }, { "epoch": 773.6, "grad_norm": 0.7947022318840027, "learning_rate": 1.1360000000000001e-05, "loss": 0.0355, "step": 3868 }, { "epoch": 774.0, "grad_norm": 0.8914023041725159, "learning_rate": 1.134e-05, "loss": 0.0258, "step": 3870 }, { "epoch": 774.4, "grad_norm": 3.3237674236297607, "learning_rate": 1.132e-05, "loss": 0.05, "step": 3872 }, { "epoch": 774.8, "grad_norm": 0.47215649485588074, "learning_rate": 1.13e-05, "loss": 0.0273, "step": 3874 }, { "epoch": 775.2, "grad_norm": 0.7916866540908813, "learning_rate": 1.128e-05, "loss": 0.0186, "step": 3876 }, { "epoch": 775.6, "grad_norm": 1.0351872444152832, "learning_rate": 1.126e-05, "loss": 0.0258, "step": 3878 }, { "epoch": 776.0, "grad_norm": 2.316249132156372, "learning_rate": 1.124e-05, "loss": 0.033, "step": 3880 }, { "epoch": 776.4, "grad_norm": 2.5310611724853516, "learning_rate": 1.122e-05, "loss": 0.0318, "step": 3882 }, { "epoch": 776.8, "grad_norm": 0.5155133605003357, "learning_rate": 1.1200000000000001e-05, "loss": 0.0124, "step": 3884 }, { "epoch": 777.2, "grad_norm": 2.533953905105591, "learning_rate": 1.118e-05, "loss": 0.0166, "step": 3886 }, { "epoch": 777.6, "grad_norm": 1.3026554584503174, "learning_rate": 1.1160000000000002e-05, "loss": 0.0187, "step": 3888 }, { "epoch": 778.0, "grad_norm": 1.945618987083435, "learning_rate": 1.114e-05, "loss": 0.026, "step": 3890 }, { "epoch": 778.4, "grad_norm": 2.576169967651367, "learning_rate": 1.112e-05, "loss": 0.0336, "step": 3892 }, { "epoch": 778.8, "grad_norm": 1.329588532447815, "learning_rate": 1.11e-05, "loss": 0.0241, "step": 3894 }, { "epoch": 779.2, "grad_norm": 1.8523677587509155, "learning_rate": 1.108e-05, "loss": 0.0188, "step": 3896 }, { "epoch": 779.6, "grad_norm": 4.367215633392334, "learning_rate": 1.106e-05, "loss": 0.0342, "step": 3898 }, { "epoch": 780.0, "grad_norm": 2.299787998199463, "learning_rate": 1.1040000000000001e-05, "loss": 0.0346, "step": 3900 }, { "epoch": 780.4, "grad_norm": 2.3877906799316406, "learning_rate": 1.1020000000000001e-05, "loss": 0.0352, "step": 3902 }, { "epoch": 780.8, "grad_norm": 1.2404804229736328, "learning_rate": 1.1000000000000001e-05, "loss": 0.0133, "step": 3904 }, { "epoch": 781.2, "grad_norm": 2.430363893508911, "learning_rate": 1.098e-05, "loss": 0.0224, "step": 3906 }, { "epoch": 781.6, "grad_norm": 0.5327134132385254, "learning_rate": 1.096e-05, "loss": 0.0148, "step": 3908 }, { "epoch": 782.0, "grad_norm": 7.333368301391602, "learning_rate": 1.094e-05, "loss": 0.0943, "step": 3910 }, { "epoch": 782.4, "grad_norm": 5.356266975402832, "learning_rate": 1.092e-05, "loss": 0.0773, "step": 3912 }, { "epoch": 782.8, "grad_norm": 0.8153685331344604, "learning_rate": 1.09e-05, "loss": 0.0167, "step": 3914 }, { "epoch": 783.2, "grad_norm": 2.9928181171417236, "learning_rate": 1.088e-05, "loss": 0.0331, "step": 3916 }, { "epoch": 783.6, "grad_norm": 1.7017661333084106, "learning_rate": 1.0860000000000001e-05, "loss": 0.0204, "step": 3918 }, { "epoch": 784.0, "grad_norm": 2.1701276302337646, "learning_rate": 1.084e-05, "loss": 0.0357, "step": 3920 }, { "epoch": 784.4, "grad_norm": 1.3080952167510986, "learning_rate": 1.0820000000000001e-05, "loss": 0.0177, "step": 3922 }, { "epoch": 784.8, "grad_norm": 3.71382999420166, "learning_rate": 1.08e-05, "loss": 0.0305, "step": 3924 }, { "epoch": 785.2, "grad_norm": 1.1258769035339355, "learning_rate": 1.0780000000000002e-05, "loss": 0.016, "step": 3926 }, { "epoch": 785.6, "grad_norm": 1.1905940771102905, "learning_rate": 1.076e-05, "loss": 0.0132, "step": 3928 }, { "epoch": 786.0, "grad_norm": 0.6586520671844482, "learning_rate": 1.074e-05, "loss": 0.0329, "step": 3930 }, { "epoch": 786.4, "grad_norm": 4.179177284240723, "learning_rate": 1.072e-05, "loss": 0.0368, "step": 3932 }, { "epoch": 786.8, "grad_norm": 3.0890908241271973, "learning_rate": 1.0700000000000001e-05, "loss": 0.0501, "step": 3934 }, { "epoch": 787.2, "grad_norm": 2.491922616958618, "learning_rate": 1.0680000000000001e-05, "loss": 0.025, "step": 3936 }, { "epoch": 787.6, "grad_norm": 1.0971635580062866, "learning_rate": 1.0660000000000001e-05, "loss": 0.0222, "step": 3938 }, { "epoch": 788.0, "grad_norm": 1.7721463441848755, "learning_rate": 1.064e-05, "loss": 0.0305, "step": 3940 }, { "epoch": 788.4, "grad_norm": 1.4886889457702637, "learning_rate": 1.062e-05, "loss": 0.0178, "step": 3942 }, { "epoch": 788.8, "grad_norm": 1.9491239786148071, "learning_rate": 1.06e-05, "loss": 0.0318, "step": 3944 }, { "epoch": 789.2, "grad_norm": 1.1356430053710938, "learning_rate": 1.058e-05, "loss": 0.0139, "step": 3946 }, { "epoch": 789.6, "grad_norm": 2.166321039199829, "learning_rate": 1.056e-05, "loss": 0.0279, "step": 3948 }, { "epoch": 790.0, "grad_norm": 2.244081974029541, "learning_rate": 1.0539999999999999e-05, "loss": 0.0265, "step": 3950 }, { "epoch": 790.4, "grad_norm": 0.8772174119949341, "learning_rate": 1.0520000000000001e-05, "loss": 0.0163, "step": 3952 }, { "epoch": 790.8, "grad_norm": 2.823695182800293, "learning_rate": 1.05e-05, "loss": 0.0305, "step": 3954 }, { "epoch": 791.2, "grad_norm": 2.895740032196045, "learning_rate": 1.0480000000000001e-05, "loss": 0.0348, "step": 3956 }, { "epoch": 791.6, "grad_norm": 1.1300901174545288, "learning_rate": 1.046e-05, "loss": 0.0207, "step": 3958 }, { "epoch": 792.0, "grad_norm": 0.8884359002113342, "learning_rate": 1.0440000000000002e-05, "loss": 0.0195, "step": 3960 }, { "epoch": 792.4, "grad_norm": 2.368915319442749, "learning_rate": 1.042e-05, "loss": 0.028, "step": 3962 }, { "epoch": 792.8, "grad_norm": 2.6876330375671387, "learning_rate": 1.04e-05, "loss": 0.0281, "step": 3964 }, { "epoch": 793.2, "grad_norm": 1.0367575883865356, "learning_rate": 1.038e-05, "loss": 0.0507, "step": 3966 }, { "epoch": 793.6, "grad_norm": 3.168985605239868, "learning_rate": 1.036e-05, "loss": 0.0495, "step": 3968 }, { "epoch": 794.0, "grad_norm": 1.229306697845459, "learning_rate": 1.0340000000000001e-05, "loss": 0.0301, "step": 3970 }, { "epoch": 794.4, "grad_norm": 1.0520139932632446, "learning_rate": 1.0320000000000001e-05, "loss": 0.021, "step": 3972 }, { "epoch": 794.8, "grad_norm": 2.676933526992798, "learning_rate": 1.03e-05, "loss": 0.0309, "step": 3974 }, { "epoch": 795.2, "grad_norm": 4.34064245223999, "learning_rate": 1.0280000000000002e-05, "loss": 0.0249, "step": 3976 }, { "epoch": 795.6, "grad_norm": 5.3315510749816895, "learning_rate": 1.026e-05, "loss": 0.0309, "step": 3978 }, { "epoch": 796.0, "grad_norm": 1.3164293766021729, "learning_rate": 1.024e-05, "loss": 0.023, "step": 3980 }, { "epoch": 796.4, "grad_norm": 1.9977514743804932, "learning_rate": 1.022e-05, "loss": 0.027, "step": 3982 }, { "epoch": 796.8, "grad_norm": 1.7131800651550293, "learning_rate": 1.02e-05, "loss": 0.028, "step": 3984 }, { "epoch": 797.2, "grad_norm": 4.3003363609313965, "learning_rate": 1.018e-05, "loss": 0.0347, "step": 3986 }, { "epoch": 797.6, "grad_norm": 1.341761589050293, "learning_rate": 1.016e-05, "loss": 0.0123, "step": 3988 }, { "epoch": 798.0, "grad_norm": 3.1462953090667725, "learning_rate": 1.0140000000000001e-05, "loss": 0.0444, "step": 3990 }, { "epoch": 798.4, "grad_norm": 0.8807296752929688, "learning_rate": 1.012e-05, "loss": 0.0219, "step": 3992 }, { "epoch": 798.8, "grad_norm": 2.3460042476654053, "learning_rate": 1.0100000000000002e-05, "loss": 0.0362, "step": 3994 }, { "epoch": 799.2, "grad_norm": 0.8241341710090637, "learning_rate": 1.008e-05, "loss": 0.0091, "step": 3996 }, { "epoch": 799.6, "grad_norm": 3.496680974960327, "learning_rate": 1.006e-05, "loss": 0.0345, "step": 3998 }, { "epoch": 800.0, "grad_norm": 3.266923666000366, "learning_rate": 1.004e-05, "loss": 0.0417, "step": 4000 }, { "epoch": 800.0, "eval_cer": 0.9689119170984456, "eval_loss": 5.907256126403809, "eval_runtime": 10.2447, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.195, "step": 4000 }, { "epoch": 800.4, "grad_norm": 1.3846269845962524, "learning_rate": 1.002e-05, "loss": 0.022, "step": 4002 }, { "epoch": 800.8, "grad_norm": 1.1469746828079224, "learning_rate": 1e-05, "loss": 0.025, "step": 4004 }, { "epoch": 801.2, "grad_norm": 2.7114078998565674, "learning_rate": 9.980000000000001e-06, "loss": 0.0361, "step": 4006 }, { "epoch": 801.6, "grad_norm": 2.8197381496429443, "learning_rate": 9.96e-06, "loss": 0.0232, "step": 4008 }, { "epoch": 802.0, "grad_norm": 1.0120513439178467, "learning_rate": 9.940000000000001e-06, "loss": 0.0306, "step": 4010 }, { "epoch": 802.4, "grad_norm": 1.4894874095916748, "learning_rate": 9.92e-06, "loss": 0.0306, "step": 4012 }, { "epoch": 802.8, "grad_norm": 1.369505763053894, "learning_rate": 9.900000000000002e-06, "loss": 0.0172, "step": 4014 }, { "epoch": 803.2, "grad_norm": 0.9297989010810852, "learning_rate": 9.88e-06, "loss": 0.0212, "step": 4016 }, { "epoch": 803.6, "grad_norm": 0.5790768265724182, "learning_rate": 9.86e-06, "loss": 0.0166, "step": 4018 }, { "epoch": 804.0, "grad_norm": 0.6795037984848022, "learning_rate": 9.84e-06, "loss": 0.0147, "step": 4020 }, { "epoch": 804.4, "grad_norm": 0.1836501955986023, "learning_rate": 9.820000000000001e-06, "loss": 0.0243, "step": 4022 }, { "epoch": 804.8, "grad_norm": 5.362781047821045, "learning_rate": 9.800000000000001e-06, "loss": 0.0379, "step": 4024 }, { "epoch": 805.2, "grad_norm": 0.6634941101074219, "learning_rate": 9.78e-06, "loss": 0.0207, "step": 4026 }, { "epoch": 805.6, "grad_norm": 1.424170732498169, "learning_rate": 9.760000000000001e-06, "loss": 0.0254, "step": 4028 }, { "epoch": 806.0, "grad_norm": 2.7897703647613525, "learning_rate": 9.74e-06, "loss": 0.0303, "step": 4030 }, { "epoch": 806.4, "grad_norm": 0.7810472846031189, "learning_rate": 9.72e-06, "loss": 0.0235, "step": 4032 }, { "epoch": 806.8, "grad_norm": 2.362274408340454, "learning_rate": 9.7e-06, "loss": 0.0288, "step": 4034 }, { "epoch": 807.2, "grad_norm": 2.253084897994995, "learning_rate": 9.68e-06, "loss": 0.0327, "step": 4036 }, { "epoch": 807.6, "grad_norm": 0.9703194499015808, "learning_rate": 9.66e-06, "loss": 0.0176, "step": 4038 }, { "epoch": 808.0, "grad_norm": 1.4522712230682373, "learning_rate": 9.640000000000001e-06, "loss": 0.0176, "step": 4040 }, { "epoch": 808.4, "grad_norm": 1.024231195449829, "learning_rate": 9.62e-06, "loss": 0.0078, "step": 4042 }, { "epoch": 808.8, "grad_norm": 1.745613694190979, "learning_rate": 9.600000000000001e-06, "loss": 0.0509, "step": 4044 }, { "epoch": 809.2, "grad_norm": 2.158809185028076, "learning_rate": 9.58e-06, "loss": 0.0147, "step": 4046 }, { "epoch": 809.6, "grad_norm": 1.7091212272644043, "learning_rate": 9.560000000000002e-06, "loss": 0.0287, "step": 4048 }, { "epoch": 810.0, "grad_norm": 1.1262223720550537, "learning_rate": 9.54e-06, "loss": 0.0166, "step": 4050 }, { "epoch": 810.4, "grad_norm": 1.4952987432479858, "learning_rate": 9.52e-06, "loss": 0.0309, "step": 4052 }, { "epoch": 810.8, "grad_norm": 0.879986047744751, "learning_rate": 9.5e-06, "loss": 0.0249, "step": 4054 }, { "epoch": 811.2, "grad_norm": 1.7666233777999878, "learning_rate": 9.48e-06, "loss": 0.0293, "step": 4056 }, { "epoch": 811.6, "grad_norm": 0.7960854768753052, "learning_rate": 9.460000000000001e-06, "loss": 0.0247, "step": 4058 }, { "epoch": 812.0, "grad_norm": 1.274422287940979, "learning_rate": 9.44e-06, "loss": 0.0195, "step": 4060 }, { "epoch": 812.4, "grad_norm": 0.5529813766479492, "learning_rate": 9.420000000000001e-06, "loss": 0.0157, "step": 4062 }, { "epoch": 812.8, "grad_norm": 1.8585835695266724, "learning_rate": 9.4e-06, "loss": 0.0336, "step": 4064 }, { "epoch": 813.2, "grad_norm": 1.7958025932312012, "learning_rate": 9.38e-06, "loss": 0.0449, "step": 4066 }, { "epoch": 813.6, "grad_norm": 2.7267844676971436, "learning_rate": 9.36e-06, "loss": 0.0264, "step": 4068 }, { "epoch": 814.0, "grad_norm": 1.7481924295425415, "learning_rate": 9.34e-06, "loss": 0.029, "step": 4070 }, { "epoch": 814.4, "grad_norm": 2.172417640686035, "learning_rate": 9.32e-06, "loss": 0.0297, "step": 4072 }, { "epoch": 814.8, "grad_norm": 0.9762948751449585, "learning_rate": 9.3e-06, "loss": 0.0115, "step": 4074 }, { "epoch": 815.2, "grad_norm": 2.6577563285827637, "learning_rate": 9.28e-06, "loss": 0.0246, "step": 4076 }, { "epoch": 815.6, "grad_norm": 0.8015798926353455, "learning_rate": 9.260000000000001e-06, "loss": 0.0267, "step": 4078 }, { "epoch": 816.0, "grad_norm": 1.0584297180175781, "learning_rate": 9.24e-06, "loss": 0.011, "step": 4080 }, { "epoch": 816.4, "grad_norm": 1.0849862098693848, "learning_rate": 9.220000000000002e-06, "loss": 0.0231, "step": 4082 }, { "epoch": 816.8, "grad_norm": 0.9697434306144714, "learning_rate": 9.2e-06, "loss": 0.012, "step": 4084 }, { "epoch": 817.2, "grad_norm": 2.6315059661865234, "learning_rate": 9.180000000000002e-06, "loss": 0.0553, "step": 4086 }, { "epoch": 817.6, "grad_norm": 3.3184351921081543, "learning_rate": 9.16e-06, "loss": 0.0535, "step": 4088 }, { "epoch": 818.0, "grad_norm": 1.1517833471298218, "learning_rate": 9.14e-06, "loss": 0.014, "step": 4090 }, { "epoch": 818.4, "grad_norm": 4.32451868057251, "learning_rate": 9.12e-06, "loss": 0.0376, "step": 4092 }, { "epoch": 818.8, "grad_norm": 1.1398695707321167, "learning_rate": 9.100000000000001e-06, "loss": 0.0398, "step": 4094 }, { "epoch": 819.2, "grad_norm": 2.0802507400512695, "learning_rate": 9.080000000000001e-06, "loss": 0.0252, "step": 4096 }, { "epoch": 819.6, "grad_norm": 0.9045012593269348, "learning_rate": 9.06e-06, "loss": 0.0174, "step": 4098 }, { "epoch": 820.0, "grad_norm": 2.1668124198913574, "learning_rate": 9.04e-06, "loss": 0.0211, "step": 4100 }, { "epoch": 820.4, "grad_norm": 2.2313766479492188, "learning_rate": 9.02e-06, "loss": 0.0362, "step": 4102 }, { "epoch": 820.8, "grad_norm": 0.9132230281829834, "learning_rate": 9e-06, "loss": 0.0077, "step": 4104 }, { "epoch": 821.2, "grad_norm": 2.8791136741638184, "learning_rate": 8.98e-06, "loss": 0.0157, "step": 4106 }, { "epoch": 821.6, "grad_norm": 1.9534212350845337, "learning_rate": 8.96e-06, "loss": 0.0134, "step": 4108 }, { "epoch": 822.0, "grad_norm": 0.4635048508644104, "learning_rate": 8.939999999999999e-06, "loss": 0.0069, "step": 4110 }, { "epoch": 822.4, "grad_norm": 3.4828341007232666, "learning_rate": 8.920000000000001e-06, "loss": 0.0353, "step": 4112 }, { "epoch": 822.8, "grad_norm": 0.8256353139877319, "learning_rate": 8.9e-06, "loss": 0.013, "step": 4114 }, { "epoch": 823.2, "grad_norm": 2.4521946907043457, "learning_rate": 8.880000000000001e-06, "loss": 0.016, "step": 4116 }, { "epoch": 823.6, "grad_norm": 1.7152241468429565, "learning_rate": 8.86e-06, "loss": 0.0237, "step": 4118 }, { "epoch": 824.0, "grad_norm": 1.822773814201355, "learning_rate": 8.840000000000002e-06, "loss": 0.0197, "step": 4120 }, { "epoch": 824.4, "grad_norm": 0.6199412941932678, "learning_rate": 8.82e-06, "loss": 0.0204, "step": 4122 }, { "epoch": 824.8, "grad_norm": 0.9337736368179321, "learning_rate": 8.8e-06, "loss": 0.0086, "step": 4124 }, { "epoch": 825.2, "grad_norm": 0.8277480006217957, "learning_rate": 8.78e-06, "loss": 0.0086, "step": 4126 }, { "epoch": 825.6, "grad_norm": 1.116439938545227, "learning_rate": 8.76e-06, "loss": 0.0132, "step": 4128 }, { "epoch": 826.0, "grad_norm": 1.8832006454467773, "learning_rate": 8.740000000000001e-06, "loss": 0.0295, "step": 4130 }, { "epoch": 826.4, "grad_norm": 2.929202079772949, "learning_rate": 8.720000000000001e-06, "loss": 0.0221, "step": 4132 }, { "epoch": 826.8, "grad_norm": 0.5216739177703857, "learning_rate": 8.7e-06, "loss": 0.0065, "step": 4134 }, { "epoch": 827.2, "grad_norm": 1.4088107347488403, "learning_rate": 8.68e-06, "loss": 0.0131, "step": 4136 }, { "epoch": 827.6, "grad_norm": 1.7594072818756104, "learning_rate": 8.66e-06, "loss": 0.0156, "step": 4138 }, { "epoch": 828.0, "grad_norm": 1.8217796087265015, "learning_rate": 8.64e-06, "loss": 0.0146, "step": 4140 }, { "epoch": 828.4, "grad_norm": 0.7106614112854004, "learning_rate": 8.62e-06, "loss": 0.0176, "step": 4142 }, { "epoch": 828.8, "grad_norm": 2.4765520095825195, "learning_rate": 8.599999999999999e-06, "loss": 0.0275, "step": 4144 }, { "epoch": 829.2, "grad_norm": 0.6357381343841553, "learning_rate": 8.580000000000001e-06, "loss": 0.0211, "step": 4146 }, { "epoch": 829.6, "grad_norm": 2.847214460372925, "learning_rate": 8.56e-06, "loss": 0.0365, "step": 4148 }, { "epoch": 830.0, "grad_norm": 1.0173203945159912, "learning_rate": 8.540000000000001e-06, "loss": 0.0108, "step": 4150 }, { "epoch": 830.4, "grad_norm": 1.3322340250015259, "learning_rate": 8.52e-06, "loss": 0.0148, "step": 4152 }, { "epoch": 830.8, "grad_norm": 1.3917874097824097, "learning_rate": 8.500000000000002e-06, "loss": 0.0229, "step": 4154 }, { "epoch": 831.2, "grad_norm": 2.691869020462036, "learning_rate": 8.48e-06, "loss": 0.0275, "step": 4156 }, { "epoch": 831.6, "grad_norm": 1.337558627128601, "learning_rate": 8.46e-06, "loss": 0.0122, "step": 4158 }, { "epoch": 832.0, "grad_norm": 1.4767327308654785, "learning_rate": 8.44e-06, "loss": 0.0364, "step": 4160 }, { "epoch": 832.4, "grad_norm": 3.814605474472046, "learning_rate": 8.42e-06, "loss": 0.0347, "step": 4162 }, { "epoch": 832.8, "grad_norm": 2.818389654159546, "learning_rate": 8.400000000000001e-06, "loss": 0.0198, "step": 4164 }, { "epoch": 833.2, "grad_norm": 3.2514357566833496, "learning_rate": 8.380000000000001e-06, "loss": 0.0284, "step": 4166 }, { "epoch": 833.6, "grad_norm": 1.4297858476638794, "learning_rate": 8.36e-06, "loss": 0.0098, "step": 4168 }, { "epoch": 834.0, "grad_norm": 0.7793368697166443, "learning_rate": 8.34e-06, "loss": 0.0083, "step": 4170 }, { "epoch": 834.4, "grad_norm": 1.9074702262878418, "learning_rate": 8.32e-06, "loss": 0.0198, "step": 4172 }, { "epoch": 834.8, "grad_norm": 1.2998321056365967, "learning_rate": 8.3e-06, "loss": 0.0144, "step": 4174 }, { "epoch": 835.2, "grad_norm": 3.494831085205078, "learning_rate": 8.28e-06, "loss": 0.0288, "step": 4176 }, { "epoch": 835.6, "grad_norm": 2.5131473541259766, "learning_rate": 8.26e-06, "loss": 0.0242, "step": 4178 }, { "epoch": 836.0, "grad_norm": 0.6369240283966064, "learning_rate": 8.24e-06, "loss": 0.0277, "step": 4180 }, { "epoch": 836.4, "grad_norm": 1.439581274986267, "learning_rate": 8.22e-06, "loss": 0.0094, "step": 4182 }, { "epoch": 836.8, "grad_norm": 3.580019950866699, "learning_rate": 8.200000000000001e-06, "loss": 0.0278, "step": 4184 }, { "epoch": 837.2, "grad_norm": 2.0335276126861572, "learning_rate": 8.18e-06, "loss": 0.0238, "step": 4186 }, { "epoch": 837.6, "grad_norm": 0.9112706780433655, "learning_rate": 8.160000000000001e-06, "loss": 0.0193, "step": 4188 }, { "epoch": 838.0, "grad_norm": 2.0161759853363037, "learning_rate": 8.14e-06, "loss": 0.0197, "step": 4190 }, { "epoch": 838.4, "grad_norm": 2.0969629287719727, "learning_rate": 8.12e-06, "loss": 0.0287, "step": 4192 }, { "epoch": 838.8, "grad_norm": 2.8399927616119385, "learning_rate": 8.1e-06, "loss": 0.0115, "step": 4194 }, { "epoch": 839.2, "grad_norm": 1.3836287260055542, "learning_rate": 8.08e-06, "loss": 0.0126, "step": 4196 }, { "epoch": 839.6, "grad_norm": 2.9628820419311523, "learning_rate": 8.06e-06, "loss": 0.0298, "step": 4198 }, { "epoch": 840.0, "grad_norm": 1.019310474395752, "learning_rate": 8.040000000000001e-06, "loss": 0.0135, "step": 4200 }, { "epoch": 840.0, "eval_cer": 0.844559585492228, "eval_loss": 5.9253082275390625, "eval_runtime": 7.463, "eval_samples_per_second": 1.34, "eval_steps_per_second": 0.268, "step": 4200 }, { "epoch": 840.4, "grad_norm": 2.4139511585235596, "learning_rate": 8.02e-06, "loss": 0.0346, "step": 4202 }, { "epoch": 840.8, "grad_norm": 1.4437613487243652, "learning_rate": 8.000000000000001e-06, "loss": 0.0168, "step": 4204 }, { "epoch": 841.2, "grad_norm": 2.834376335144043, "learning_rate": 7.98e-06, "loss": 0.016, "step": 4206 }, { "epoch": 841.6, "grad_norm": 2.1421449184417725, "learning_rate": 7.96e-06, "loss": 0.0173, "step": 4208 }, { "epoch": 842.0, "grad_norm": 2.9387009143829346, "learning_rate": 7.94e-06, "loss": 0.0174, "step": 4210 }, { "epoch": 842.4, "grad_norm": 1.325251817703247, "learning_rate": 7.92e-06, "loss": 0.0206, "step": 4212 }, { "epoch": 842.8, "grad_norm": 0.90850430727005, "learning_rate": 7.9e-06, "loss": 0.0169, "step": 4214 }, { "epoch": 843.2, "grad_norm": 2.198197841644287, "learning_rate": 7.879999999999999e-06, "loss": 0.0162, "step": 4216 }, { "epoch": 843.6, "grad_norm": 1.20256769657135, "learning_rate": 7.860000000000001e-06, "loss": 0.015, "step": 4218 }, { "epoch": 844.0, "grad_norm": 2.3890042304992676, "learning_rate": 7.84e-06, "loss": 0.0315, "step": 4220 }, { "epoch": 844.4, "grad_norm": 1.4771133661270142, "learning_rate": 7.820000000000001e-06, "loss": 0.0292, "step": 4222 }, { "epoch": 844.8, "grad_norm": 2.676724433898926, "learning_rate": 7.8e-06, "loss": 0.0246, "step": 4224 }, { "epoch": 845.2, "grad_norm": 1.807215929031372, "learning_rate": 7.78e-06, "loss": 0.0353, "step": 4226 }, { "epoch": 845.6, "grad_norm": 0.5129318237304688, "learning_rate": 7.76e-06, "loss": 0.022, "step": 4228 }, { "epoch": 846.0, "grad_norm": 0.6781650185585022, "learning_rate": 7.74e-06, "loss": 0.0241, "step": 4230 }, { "epoch": 846.4, "grad_norm": 1.218066692352295, "learning_rate": 7.72e-06, "loss": 0.0203, "step": 4232 }, { "epoch": 846.8, "grad_norm": 1.0814565420150757, "learning_rate": 7.7e-06, "loss": 0.0233, "step": 4234 }, { "epoch": 847.2, "grad_norm": 1.4272456169128418, "learning_rate": 7.68e-06, "loss": 0.0243, "step": 4236 }, { "epoch": 847.6, "grad_norm": 2.8030571937561035, "learning_rate": 7.660000000000001e-06, "loss": 0.0203, "step": 4238 }, { "epoch": 848.0, "grad_norm": 0.8668378591537476, "learning_rate": 7.64e-06, "loss": 0.0058, "step": 4240 }, { "epoch": 848.4, "grad_norm": 3.6039226055145264, "learning_rate": 7.620000000000001e-06, "loss": 0.0296, "step": 4242 }, { "epoch": 848.8, "grad_norm": 3.5025687217712402, "learning_rate": 7.6e-06, "loss": 0.0624, "step": 4244 }, { "epoch": 849.2, "grad_norm": 2.8646485805511475, "learning_rate": 7.580000000000001e-06, "loss": 0.0328, "step": 4246 }, { "epoch": 849.6, "grad_norm": 1.1786781549453735, "learning_rate": 7.5600000000000005e-06, "loss": 0.0062, "step": 4248 }, { "epoch": 850.0, "grad_norm": 1.288277268409729, "learning_rate": 7.54e-06, "loss": 0.0205, "step": 4250 }, { "epoch": 850.4, "grad_norm": 1.4911863803863525, "learning_rate": 7.520000000000001e-06, "loss": 0.0156, "step": 4252 }, { "epoch": 850.8, "grad_norm": 0.6840441226959229, "learning_rate": 7.5e-06, "loss": 0.0114, "step": 4254 }, { "epoch": 851.2, "grad_norm": 2.2678558826446533, "learning_rate": 7.480000000000001e-06, "loss": 0.0374, "step": 4256 }, { "epoch": 851.6, "grad_norm": 2.3372793197631836, "learning_rate": 7.4600000000000006e-06, "loss": 0.0252, "step": 4258 }, { "epoch": 852.0, "grad_norm": 1.0053930282592773, "learning_rate": 7.44e-06, "loss": 0.0102, "step": 4260 }, { "epoch": 852.4, "grad_norm": 2.01884388923645, "learning_rate": 7.420000000000001e-06, "loss": 0.0242, "step": 4262 }, { "epoch": 852.8, "grad_norm": 2.876995801925659, "learning_rate": 7.4e-06, "loss": 0.0248, "step": 4264 }, { "epoch": 853.2, "grad_norm": 2.5536487102508545, "learning_rate": 7.3800000000000005e-06, "loss": 0.0195, "step": 4266 }, { "epoch": 853.6, "grad_norm": 1.3914456367492676, "learning_rate": 7.36e-06, "loss": 0.0113, "step": 4268 }, { "epoch": 854.0, "grad_norm": 0.9604011178016663, "learning_rate": 7.340000000000001e-06, "loss": 0.0327, "step": 4270 }, { "epoch": 854.4, "grad_norm": 1.3072509765625, "learning_rate": 7.32e-06, "loss": 0.0083, "step": 4272 }, { "epoch": 854.8, "grad_norm": 2.6690924167633057, "learning_rate": 7.2999999999999996e-06, "loss": 0.0178, "step": 4274 }, { "epoch": 855.2, "grad_norm": 4.206664562225342, "learning_rate": 7.280000000000001e-06, "loss": 0.0255, "step": 4276 }, { "epoch": 855.6, "grad_norm": 1.0589569807052612, "learning_rate": 7.26e-06, "loss": 0.0132, "step": 4278 }, { "epoch": 856.0, "grad_norm": 4.262838840484619, "learning_rate": 7.240000000000001e-06, "loss": 0.0332, "step": 4280 }, { "epoch": 856.4, "grad_norm": 1.3161221742630005, "learning_rate": 7.22e-06, "loss": 0.0075, "step": 4282 }, { "epoch": 856.8, "grad_norm": 2.856611490249634, "learning_rate": 7.2e-06, "loss": 0.0181, "step": 4284 }, { "epoch": 857.2, "grad_norm": 1.036173939704895, "learning_rate": 7.180000000000001e-06, "loss": 0.0128, "step": 4286 }, { "epoch": 857.6, "grad_norm": 1.582527756690979, "learning_rate": 7.16e-06, "loss": 0.0101, "step": 4288 }, { "epoch": 858.0, "grad_norm": 2.660861015319824, "learning_rate": 7.140000000000001e-06, "loss": 0.0136, "step": 4290 }, { "epoch": 858.4, "grad_norm": 3.4294912815093994, "learning_rate": 7.1200000000000004e-06, "loss": 0.0394, "step": 4292 }, { "epoch": 858.8, "grad_norm": 2.211803674697876, "learning_rate": 7.1e-06, "loss": 0.0479, "step": 4294 }, { "epoch": 859.2, "grad_norm": 1.455553412437439, "learning_rate": 7.080000000000001e-06, "loss": 0.0099, "step": 4296 }, { "epoch": 859.6, "grad_norm": 1.473196268081665, "learning_rate": 7.06e-06, "loss": 0.0219, "step": 4298 }, { "epoch": 860.0, "grad_norm": 1.2194198369979858, "learning_rate": 7.04e-06, "loss": 0.0252, "step": 4300 }, { "epoch": 860.4, "grad_norm": 2.2912774085998535, "learning_rate": 7.0200000000000006e-06, "loss": 0.0132, "step": 4302 }, { "epoch": 860.8, "grad_norm": 0.6153225898742676, "learning_rate": 7.000000000000001e-06, "loss": 0.0141, "step": 4304 }, { "epoch": 861.2, "grad_norm": 2.718421697616577, "learning_rate": 6.98e-06, "loss": 0.0139, "step": 4306 }, { "epoch": 861.6, "grad_norm": 0.5412417650222778, "learning_rate": 6.9599999999999994e-06, "loss": 0.0146, "step": 4308 }, { "epoch": 862.0, "grad_norm": 4.5048933029174805, "learning_rate": 6.9400000000000005e-06, "loss": 0.0417, "step": 4310 }, { "epoch": 862.4, "grad_norm": 0.8604233860969543, "learning_rate": 6.92e-06, "loss": 0.0094, "step": 4312 }, { "epoch": 862.8, "grad_norm": 1.3983564376831055, "learning_rate": 6.900000000000001e-06, "loss": 0.0104, "step": 4314 }, { "epoch": 863.2, "grad_norm": 3.4860808849334717, "learning_rate": 6.88e-06, "loss": 0.0193, "step": 4316 }, { "epoch": 863.6, "grad_norm": 1.8917955160140991, "learning_rate": 6.8599999999999995e-06, "loss": 0.0084, "step": 4318 }, { "epoch": 864.0, "grad_norm": 4.438288688659668, "learning_rate": 6.840000000000001e-06, "loss": 0.0225, "step": 4320 }, { "epoch": 864.4, "grad_norm": 0.09430878609418869, "learning_rate": 6.82e-06, "loss": 0.0137, "step": 4322 }, { "epoch": 864.8, "grad_norm": 2.1289584636688232, "learning_rate": 6.800000000000001e-06, "loss": 0.0317, "step": 4324 }, { "epoch": 865.2, "grad_norm": 3.224700689315796, "learning_rate": 6.78e-06, "loss": 0.0186, "step": 4326 }, { "epoch": 865.6, "grad_norm": 1.4260207414627075, "learning_rate": 6.76e-06, "loss": 0.0124, "step": 4328 }, { "epoch": 866.0, "grad_norm": 0.7236484885215759, "learning_rate": 6.740000000000001e-06, "loss": 0.0089, "step": 4330 }, { "epoch": 866.4, "grad_norm": 1.3708550930023193, "learning_rate": 6.72e-06, "loss": 0.0158, "step": 4332 }, { "epoch": 866.8, "grad_norm": 1.6548798084259033, "learning_rate": 6.700000000000001e-06, "loss": 0.023, "step": 4334 }, { "epoch": 867.2, "grad_norm": 1.5152397155761719, "learning_rate": 6.68e-06, "loss": 0.0214, "step": 4336 }, { "epoch": 867.6, "grad_norm": 2.9019618034362793, "learning_rate": 6.660000000000001e-06, "loss": 0.0146, "step": 4338 }, { "epoch": 868.0, "grad_norm": 2.3361549377441406, "learning_rate": 6.640000000000001e-06, "loss": 0.0137, "step": 4340 }, { "epoch": 868.4, "grad_norm": 1.7312577962875366, "learning_rate": 6.62e-06, "loss": 0.0207, "step": 4342 }, { "epoch": 868.8, "grad_norm": 0.5571810007095337, "learning_rate": 6.6e-06, "loss": 0.0117, "step": 4344 }, { "epoch": 869.2, "grad_norm": 2.253033399581909, "learning_rate": 6.58e-06, "loss": 0.0224, "step": 4346 }, { "epoch": 869.6, "grad_norm": 0.6948078274726868, "learning_rate": 6.560000000000001e-06, "loss": 0.0057, "step": 4348 }, { "epoch": 870.0, "grad_norm": 2.1356987953186035, "learning_rate": 6.54e-06, "loss": 0.0062, "step": 4350 }, { "epoch": 870.4, "grad_norm": 2.5592103004455566, "learning_rate": 6.519999999999999e-06, "loss": 0.0312, "step": 4352 }, { "epoch": 870.8, "grad_norm": 3.284168004989624, "learning_rate": 6.5000000000000004e-06, "loss": 0.0566, "step": 4354 }, { "epoch": 871.2, "grad_norm": 1.9325273036956787, "learning_rate": 6.48e-06, "loss": 0.0172, "step": 4356 }, { "epoch": 871.6, "grad_norm": 0.7977363467216492, "learning_rate": 6.460000000000001e-06, "loss": 0.0104, "step": 4358 }, { "epoch": 872.0, "grad_norm": 1.1264784336090088, "learning_rate": 6.44e-06, "loss": 0.0155, "step": 4360 }, { "epoch": 872.4, "grad_norm": 1.3069642782211304, "learning_rate": 6.4199999999999995e-06, "loss": 0.0189, "step": 4362 }, { "epoch": 872.8, "grad_norm": 1.3915576934814453, "learning_rate": 6.4000000000000006e-06, "loss": 0.0143, "step": 4364 }, { "epoch": 873.2, "grad_norm": 1.5095793008804321, "learning_rate": 6.38e-06, "loss": 0.014, "step": 4366 }, { "epoch": 873.6, "grad_norm": 0.9059845209121704, "learning_rate": 6.360000000000001e-06, "loss": 0.0144, "step": 4368 }, { "epoch": 874.0, "grad_norm": 1.144038438796997, "learning_rate": 6.34e-06, "loss": 0.0074, "step": 4370 }, { "epoch": 874.4, "grad_norm": 1.1205127239227295, "learning_rate": 6.320000000000001e-06, "loss": 0.0111, "step": 4372 }, { "epoch": 874.8, "grad_norm": 0.568921685218811, "learning_rate": 6.300000000000001e-06, "loss": 0.0218, "step": 4374 }, { "epoch": 875.2, "grad_norm": 1.9434576034545898, "learning_rate": 6.28e-06, "loss": 0.0186, "step": 4376 }, { "epoch": 875.6, "grad_norm": 1.5082075595855713, "learning_rate": 6.26e-06, "loss": 0.0145, "step": 4378 }, { "epoch": 876.0, "grad_norm": 0.2783375084400177, "learning_rate": 6.24e-06, "loss": 0.0099, "step": 4380 }, { "epoch": 876.4, "grad_norm": 3.715101957321167, "learning_rate": 6.22e-06, "loss": 0.0165, "step": 4382 }, { "epoch": 876.8, "grad_norm": 2.3051416873931885, "learning_rate": 6.2e-06, "loss": 0.0149, "step": 4384 }, { "epoch": 877.2, "grad_norm": 3.224658966064453, "learning_rate": 6.18e-06, "loss": 0.0353, "step": 4386 }, { "epoch": 877.6, "grad_norm": 1.7567092180252075, "learning_rate": 6.16e-06, "loss": 0.018, "step": 4388 }, { "epoch": 878.0, "grad_norm": 0.8809515237808228, "learning_rate": 6.1400000000000005e-06, "loss": 0.0062, "step": 4390 }, { "epoch": 878.4, "grad_norm": 1.1139260530471802, "learning_rate": 6.12e-06, "loss": 0.0162, "step": 4392 }, { "epoch": 878.8, "grad_norm": 4.15202522277832, "learning_rate": 6.1e-06, "loss": 0.0404, "step": 4394 }, { "epoch": 879.2, "grad_norm": 3.8586695194244385, "learning_rate": 6.08e-06, "loss": 0.0347, "step": 4396 }, { "epoch": 879.6, "grad_norm": 1.594111680984497, "learning_rate": 6.0600000000000004e-06, "loss": 0.0186, "step": 4398 }, { "epoch": 880.0, "grad_norm": 2.0479559898376465, "learning_rate": 6.040000000000001e-06, "loss": 0.0141, "step": 4400 }, { "epoch": 880.0, "eval_cer": 0.8652849740932642, "eval_loss": 5.798274993896484, "eval_runtime": 10.4953, "eval_samples_per_second": 0.953, "eval_steps_per_second": 0.191, "step": 4400 }, { "epoch": 880.4, "grad_norm": 1.1258996725082397, "learning_rate": 6.02e-06, "loss": 0.0057, "step": 4402 }, { "epoch": 880.8, "grad_norm": 4.044236183166504, "learning_rate": 6e-06, "loss": 0.0345, "step": 4404 }, { "epoch": 881.2, "grad_norm": 0.5549332499504089, "learning_rate": 5.98e-06, "loss": 0.0066, "step": 4406 }, { "epoch": 881.6, "grad_norm": 2.7585976123809814, "learning_rate": 5.9600000000000005e-06, "loss": 0.0203, "step": 4408 }, { "epoch": 882.0, "grad_norm": 1.167089581489563, "learning_rate": 5.940000000000001e-06, "loss": 0.0172, "step": 4410 }, { "epoch": 882.4, "grad_norm": 2.0702669620513916, "learning_rate": 5.920000000000001e-06, "loss": 0.0177, "step": 4412 }, { "epoch": 882.8, "grad_norm": 1.7911534309387207, "learning_rate": 5.9e-06, "loss": 0.012, "step": 4414 }, { "epoch": 883.2, "grad_norm": 1.1880173683166504, "learning_rate": 5.8800000000000005e-06, "loss": 0.0216, "step": 4416 }, { "epoch": 883.6, "grad_norm": 3.531724691390991, "learning_rate": 5.86e-06, "loss": 0.0237, "step": 4418 }, { "epoch": 884.0, "grad_norm": 0.9561977982521057, "learning_rate": 5.84e-06, "loss": 0.007, "step": 4420 }, { "epoch": 884.4, "grad_norm": 0.5346122980117798, "learning_rate": 5.82e-06, "loss": 0.007, "step": 4422 }, { "epoch": 884.8, "grad_norm": 1.8492122888565063, "learning_rate": 5.8e-06, "loss": 0.0209, "step": 4424 }, { "epoch": 885.2, "grad_norm": 3.3925983905792236, "learning_rate": 5.78e-06, "loss": 0.0182, "step": 4426 }, { "epoch": 885.6, "grad_norm": 0.8583513498306274, "learning_rate": 5.76e-06, "loss": 0.0052, "step": 4428 }, { "epoch": 886.0, "grad_norm": 0.6618106365203857, "learning_rate": 5.74e-06, "loss": 0.0052, "step": 4430 }, { "epoch": 886.4, "grad_norm": 1.825413465499878, "learning_rate": 5.72e-06, "loss": 0.0177, "step": 4432 }, { "epoch": 886.8, "grad_norm": 3.904254674911499, "learning_rate": 5.7000000000000005e-06, "loss": 0.0263, "step": 4434 }, { "epoch": 887.2, "grad_norm": 2.677150249481201, "learning_rate": 5.680000000000001e-06, "loss": 0.024, "step": 4436 }, { "epoch": 887.6, "grad_norm": 1.256636619567871, "learning_rate": 5.66e-06, "loss": 0.0131, "step": 4438 }, { "epoch": 888.0, "grad_norm": 1.1774131059646606, "learning_rate": 5.64e-06, "loss": 0.0071, "step": 4440 }, { "epoch": 888.4, "grad_norm": 1.6445457935333252, "learning_rate": 5.62e-06, "loss": 0.0218, "step": 4442 }, { "epoch": 888.8, "grad_norm": 2.4621102809906006, "learning_rate": 5.600000000000001e-06, "loss": 0.0244, "step": 4444 }, { "epoch": 889.2, "grad_norm": 0.6779671907424927, "learning_rate": 5.580000000000001e-06, "loss": 0.0113, "step": 4446 }, { "epoch": 889.6, "grad_norm": 0.7202679514884949, "learning_rate": 5.56e-06, "loss": 0.0222, "step": 4448 }, { "epoch": 890.0, "grad_norm": 1.722650170326233, "learning_rate": 5.54e-06, "loss": 0.0222, "step": 4450 }, { "epoch": 890.4, "grad_norm": 2.8887250423431396, "learning_rate": 5.5200000000000005e-06, "loss": 0.0207, "step": 4452 }, { "epoch": 890.8, "grad_norm": 4.800909042358398, "learning_rate": 5.500000000000001e-06, "loss": 0.0458, "step": 4454 }, { "epoch": 891.2, "grad_norm": 2.4763264656066895, "learning_rate": 5.48e-06, "loss": 0.028, "step": 4456 }, { "epoch": 891.6, "grad_norm": 1.1716160774230957, "learning_rate": 5.46e-06, "loss": 0.0114, "step": 4458 }, { "epoch": 892.0, "grad_norm": 1.124992847442627, "learning_rate": 5.44e-06, "loss": 0.0073, "step": 4460 }, { "epoch": 892.4, "grad_norm": 0.7944567203521729, "learning_rate": 5.42e-06, "loss": 0.0134, "step": 4462 }, { "epoch": 892.8, "grad_norm": 0.3993642032146454, "learning_rate": 5.4e-06, "loss": 0.0036, "step": 4464 }, { "epoch": 893.2, "grad_norm": 0.759009063243866, "learning_rate": 5.38e-06, "loss": 0.0144, "step": 4466 }, { "epoch": 893.6, "grad_norm": 1.2472556829452515, "learning_rate": 5.36e-06, "loss": 0.0046, "step": 4468 }, { "epoch": 894.0, "grad_norm": 1.43532395362854, "learning_rate": 5.3400000000000005e-06, "loss": 0.0228, "step": 4470 }, { "epoch": 894.4, "grad_norm": 1.3911241292953491, "learning_rate": 5.32e-06, "loss": 0.0083, "step": 4472 }, { "epoch": 894.8, "grad_norm": 0.3438008725643158, "learning_rate": 5.3e-06, "loss": 0.0038, "step": 4474 }, { "epoch": 895.2, "grad_norm": 0.6154689192771912, "learning_rate": 5.28e-06, "loss": 0.0042, "step": 4476 }, { "epoch": 895.6, "grad_norm": 3.7437009811401367, "learning_rate": 5.2600000000000005e-06, "loss": 0.012, "step": 4478 }, { "epoch": 896.0, "grad_norm": 1.0637949705123901, "learning_rate": 5.240000000000001e-06, "loss": 0.011, "step": 4480 }, { "epoch": 896.4, "grad_norm": 1.552032709121704, "learning_rate": 5.220000000000001e-06, "loss": 0.0229, "step": 4482 }, { "epoch": 896.8, "grad_norm": 1.4537209272384644, "learning_rate": 5.2e-06, "loss": 0.0251, "step": 4484 }, { "epoch": 897.2, "grad_norm": 0.9003869295120239, "learning_rate": 5.18e-06, "loss": 0.0046, "step": 4486 }, { "epoch": 897.6, "grad_norm": 0.3489748239517212, "learning_rate": 5.1600000000000006e-06, "loss": 0.0216, "step": 4488 }, { "epoch": 898.0, "grad_norm": 2.4235432147979736, "learning_rate": 5.140000000000001e-06, "loss": 0.0257, "step": 4490 }, { "epoch": 898.4, "grad_norm": 1.3054066896438599, "learning_rate": 5.12e-06, "loss": 0.0127, "step": 4492 }, { "epoch": 898.8, "grad_norm": 1.6292197704315186, "learning_rate": 5.1e-06, "loss": 0.0126, "step": 4494 }, { "epoch": 899.2, "grad_norm": 2.2787368297576904, "learning_rate": 5.08e-06, "loss": 0.0183, "step": 4496 }, { "epoch": 899.6, "grad_norm": 1.0262376070022583, "learning_rate": 5.06e-06, "loss": 0.007, "step": 4498 }, { "epoch": 900.0, "grad_norm": 2.0793135166168213, "learning_rate": 5.04e-06, "loss": 0.0159, "step": 4500 }, { "epoch": 900.4, "grad_norm": 2.5996837615966797, "learning_rate": 5.02e-06, "loss": 0.0218, "step": 4502 }, { "epoch": 900.8, "grad_norm": 0.4176289439201355, "learning_rate": 5e-06, "loss": 0.0047, "step": 4504 }, { "epoch": 901.2, "grad_norm": 3.1083590984344482, "learning_rate": 4.98e-06, "loss": 0.0083, "step": 4506 }, { "epoch": 901.6, "grad_norm": 3.026745319366455, "learning_rate": 4.96e-06, "loss": 0.0146, "step": 4508 }, { "epoch": 902.0, "grad_norm": 1.6012837886810303, "learning_rate": 4.94e-06, "loss": 0.0072, "step": 4510 }, { "epoch": 902.4, "grad_norm": 1.67831552028656, "learning_rate": 4.92e-06, "loss": 0.0038, "step": 4512 }, { "epoch": 902.8, "grad_norm": 3.0897653102874756, "learning_rate": 4.9000000000000005e-06, "loss": 0.0235, "step": 4514 }, { "epoch": 903.2, "grad_norm": 4.091286659240723, "learning_rate": 4.880000000000001e-06, "loss": 0.0237, "step": 4516 }, { "epoch": 903.6, "grad_norm": 0.49504801630973816, "learning_rate": 4.86e-06, "loss": 0.0051, "step": 4518 }, { "epoch": 904.0, "grad_norm": 1.0421103239059448, "learning_rate": 4.84e-06, "loss": 0.0218, "step": 4520 }, { "epoch": 904.4, "grad_norm": 1.028603434562683, "learning_rate": 4.8200000000000004e-06, "loss": 0.011, "step": 4522 }, { "epoch": 904.8, "grad_norm": 0.2970399558544159, "learning_rate": 4.800000000000001e-06, "loss": 0.0051, "step": 4524 }, { "epoch": 905.2, "grad_norm": 2.364382028579712, "learning_rate": 4.780000000000001e-06, "loss": 0.0189, "step": 4526 }, { "epoch": 905.6, "grad_norm": 0.4422241151332855, "learning_rate": 4.76e-06, "loss": 0.0109, "step": 4528 }, { "epoch": 906.0, "grad_norm": 2.1608760356903076, "learning_rate": 4.74e-06, "loss": 0.0131, "step": 4530 }, { "epoch": 906.4, "grad_norm": 0.12161199003458023, "learning_rate": 4.72e-06, "loss": 0.009, "step": 4532 }, { "epoch": 906.8, "grad_norm": 1.4804952144622803, "learning_rate": 4.7e-06, "loss": 0.0238, "step": 4534 }, { "epoch": 907.2, "grad_norm": 0.7341527342796326, "learning_rate": 4.68e-06, "loss": 0.01, "step": 4536 }, { "epoch": 907.6, "grad_norm": 1.3554614782333374, "learning_rate": 4.66e-06, "loss": 0.0205, "step": 4538 }, { "epoch": 908.0, "grad_norm": 0.9035517573356628, "learning_rate": 4.64e-06, "loss": 0.0081, "step": 4540 }, { "epoch": 908.4, "grad_norm": 1.6760250329971313, "learning_rate": 4.62e-06, "loss": 0.0199, "step": 4542 }, { "epoch": 908.8, "grad_norm": 0.5212812423706055, "learning_rate": 4.6e-06, "loss": 0.005, "step": 4544 }, { "epoch": 909.2, "grad_norm": 0.7475099563598633, "learning_rate": 4.58e-06, "loss": 0.0125, "step": 4546 }, { "epoch": 909.6, "grad_norm": 2.2574195861816406, "learning_rate": 4.56e-06, "loss": 0.0105, "step": 4548 }, { "epoch": 910.0, "grad_norm": 2.4152660369873047, "learning_rate": 4.540000000000001e-06, "loss": 0.0131, "step": 4550 }, { "epoch": 910.4, "grad_norm": 3.0419113636016846, "learning_rate": 4.52e-06, "loss": 0.0193, "step": 4552 }, { "epoch": 910.8, "grad_norm": 0.5520228147506714, "learning_rate": 4.5e-06, "loss": 0.0097, "step": 4554 }, { "epoch": 911.2, "grad_norm": 2.9290459156036377, "learning_rate": 4.48e-06, "loss": 0.0136, "step": 4556 }, { "epoch": 911.6, "grad_norm": 0.7498840689659119, "learning_rate": 4.4600000000000005e-06, "loss": 0.0142, "step": 4558 }, { "epoch": 912.0, "grad_norm": 1.4279303550720215, "learning_rate": 4.440000000000001e-06, "loss": 0.0147, "step": 4560 }, { "epoch": 912.4, "grad_norm": 2.443671226501465, "learning_rate": 4.420000000000001e-06, "loss": 0.0205, "step": 4562 }, { "epoch": 912.8, "grad_norm": 3.3235387802124023, "learning_rate": 4.4e-06, "loss": 0.0183, "step": 4564 }, { "epoch": 913.2, "grad_norm": 2.6960608959198, "learning_rate": 4.38e-06, "loss": 0.0175, "step": 4566 }, { "epoch": 913.6, "grad_norm": 1.0370121002197266, "learning_rate": 4.360000000000001e-06, "loss": 0.0114, "step": 4568 }, { "epoch": 914.0, "grad_norm": 2.447061061859131, "learning_rate": 4.34e-06, "loss": 0.0107, "step": 4570 }, { "epoch": 914.4, "grad_norm": 3.4953832626342773, "learning_rate": 4.32e-06, "loss": 0.0327, "step": 4572 }, { "epoch": 914.8, "grad_norm": 0.5625460147857666, "learning_rate": 4.2999999999999995e-06, "loss": 0.0037, "step": 4574 }, { "epoch": 915.2, "grad_norm": 0.9331218004226685, "learning_rate": 4.28e-06, "loss": 0.0137, "step": 4576 }, { "epoch": 915.6, "grad_norm": 0.46286335587501526, "learning_rate": 4.26e-06, "loss": 0.0068, "step": 4578 }, { "epoch": 916.0, "grad_norm": 1.9197884798049927, "learning_rate": 4.24e-06, "loss": 0.0042, "step": 4580 }, { "epoch": 916.4, "grad_norm": 2.4258196353912354, "learning_rate": 4.22e-06, "loss": 0.0047, "step": 4582 }, { "epoch": 916.8, "grad_norm": 2.6092746257781982, "learning_rate": 4.2000000000000004e-06, "loss": 0.0078, "step": 4584 }, { "epoch": 917.2, "grad_norm": 3.272501230239868, "learning_rate": 4.18e-06, "loss": 0.0202, "step": 4586 }, { "epoch": 917.6, "grad_norm": 0.6734169721603394, "learning_rate": 4.16e-06, "loss": 0.0033, "step": 4588 }, { "epoch": 918.0, "grad_norm": 1.7746845483779907, "learning_rate": 4.14e-06, "loss": 0.0111, "step": 4590 }, { "epoch": 918.4, "grad_norm": 3.426013469696045, "learning_rate": 4.12e-06, "loss": 0.0067, "step": 4592 }, { "epoch": 918.8, "grad_norm": 2.978816032409668, "learning_rate": 4.1000000000000006e-06, "loss": 0.0101, "step": 4594 }, { "epoch": 919.2, "grad_norm": 0.4398278295993805, "learning_rate": 4.080000000000001e-06, "loss": 0.0103, "step": 4596 }, { "epoch": 919.6, "grad_norm": 0.9343242645263672, "learning_rate": 4.06e-06, "loss": 0.0085, "step": 4598 }, { "epoch": 920.0, "grad_norm": 1.3646279573440552, "learning_rate": 4.04e-06, "loss": 0.0048, "step": 4600 }, { "epoch": 920.0, "eval_cer": 0.9689119170984456, "eval_loss": 5.954517364501953, "eval_runtime": 11.4023, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.175, "step": 4600 }, { "epoch": 920.4, "grad_norm": 4.039776802062988, "learning_rate": 4.0200000000000005e-06, "loss": 0.0306, "step": 4602 }, { "epoch": 920.8, "grad_norm": 4.049306869506836, "learning_rate": 4.000000000000001e-06, "loss": 0.0393, "step": 4604 }, { "epoch": 921.2, "grad_norm": 1.1815578937530518, "learning_rate": 3.98e-06, "loss": 0.0063, "step": 4606 }, { "epoch": 921.6, "grad_norm": 1.9774863719940186, "learning_rate": 3.96e-06, "loss": 0.0075, "step": 4608 }, { "epoch": 922.0, "grad_norm": 0.40581199526786804, "learning_rate": 3.9399999999999995e-06, "loss": 0.0173, "step": 4610 }, { "epoch": 922.4, "grad_norm": 2.215247392654419, "learning_rate": 3.92e-06, "loss": 0.0153, "step": 4612 }, { "epoch": 922.8, "grad_norm": 2.829404354095459, "learning_rate": 3.9e-06, "loss": 0.0072, "step": 4614 }, { "epoch": 923.2, "grad_norm": 3.7368335723876953, "learning_rate": 3.88e-06, "loss": 0.0138, "step": 4616 }, { "epoch": 923.6, "grad_norm": 1.0594358444213867, "learning_rate": 3.86e-06, "loss": 0.0042, "step": 4618 }, { "epoch": 924.0, "grad_norm": 0.3466489315032959, "learning_rate": 3.84e-06, "loss": 0.0166, "step": 4620 }, { "epoch": 924.4, "grad_norm": 0.5670722723007202, "learning_rate": 3.82e-06, "loss": 0.0045, "step": 4622 }, { "epoch": 924.8, "grad_norm": 1.0566874742507935, "learning_rate": 3.8e-06, "loss": 0.0108, "step": 4624 }, { "epoch": 925.2, "grad_norm": 0.4031703770160675, "learning_rate": 3.7800000000000002e-06, "loss": 0.0256, "step": 4626 }, { "epoch": 925.6, "grad_norm": 1.4180141687393188, "learning_rate": 3.7600000000000004e-06, "loss": 0.012, "step": 4628 }, { "epoch": 926.0, "grad_norm": 0.3982624411582947, "learning_rate": 3.7400000000000006e-06, "loss": 0.0127, "step": 4630 }, { "epoch": 926.4, "grad_norm": 4.25453519821167, "learning_rate": 3.72e-06, "loss": 0.0227, "step": 4632 }, { "epoch": 926.8, "grad_norm": 1.3402272462844849, "learning_rate": 3.7e-06, "loss": 0.0046, "step": 4634 }, { "epoch": 927.2, "grad_norm": 0.6791250109672546, "learning_rate": 3.68e-06, "loss": 0.0096, "step": 4636 }, { "epoch": 927.6, "grad_norm": 0.48612600564956665, "learning_rate": 3.66e-06, "loss": 0.0114, "step": 4638 }, { "epoch": 928.0, "grad_norm": 0.5941303968429565, "learning_rate": 3.6400000000000003e-06, "loss": 0.0077, "step": 4640 }, { "epoch": 928.4, "grad_norm": 1.3463650941848755, "learning_rate": 3.6200000000000005e-06, "loss": 0.0078, "step": 4642 }, { "epoch": 928.8, "grad_norm": 1.1942932605743408, "learning_rate": 3.6e-06, "loss": 0.0064, "step": 4644 }, { "epoch": 929.2, "grad_norm": 1.2843832969665527, "learning_rate": 3.58e-06, "loss": 0.0091, "step": 4646 }, { "epoch": 929.6, "grad_norm": 2.2332606315612793, "learning_rate": 3.5600000000000002e-06, "loss": 0.0104, "step": 4648 }, { "epoch": 930.0, "grad_norm": 0.8830119967460632, "learning_rate": 3.5400000000000004e-06, "loss": 0.0116, "step": 4650 }, { "epoch": 930.4, "grad_norm": 0.8229765295982361, "learning_rate": 3.52e-06, "loss": 0.0041, "step": 4652 }, { "epoch": 930.8, "grad_norm": 4.314882755279541, "learning_rate": 3.5000000000000004e-06, "loss": 0.0412, "step": 4654 }, { "epoch": 931.2, "grad_norm": 1.2482166290283203, "learning_rate": 3.4799999999999997e-06, "loss": 0.0087, "step": 4656 }, { "epoch": 931.6, "grad_norm": 1.5826668739318848, "learning_rate": 3.46e-06, "loss": 0.0103, "step": 4658 }, { "epoch": 932.0, "grad_norm": 2.994452476501465, "learning_rate": 3.44e-06, "loss": 0.0163, "step": 4660 }, { "epoch": 932.4, "grad_norm": 3.2297141551971436, "learning_rate": 3.4200000000000003e-06, "loss": 0.024, "step": 4662 }, { "epoch": 932.8, "grad_norm": 0.5197581052780151, "learning_rate": 3.4000000000000005e-06, "loss": 0.0061, "step": 4664 }, { "epoch": 933.2, "grad_norm": 2.319756507873535, "learning_rate": 3.38e-06, "loss": 0.0097, "step": 4666 }, { "epoch": 933.6, "grad_norm": 0.2052745670080185, "learning_rate": 3.36e-06, "loss": 0.0195, "step": 4668 }, { "epoch": 934.0, "grad_norm": 2.839472532272339, "learning_rate": 3.34e-06, "loss": 0.0147, "step": 4670 }, { "epoch": 934.4, "grad_norm": 0.18924733996391296, "learning_rate": 3.3200000000000004e-06, "loss": 0.0063, "step": 4672 }, { "epoch": 934.8, "grad_norm": 1.064115047454834, "learning_rate": 3.3e-06, "loss": 0.005, "step": 4674 }, { "epoch": 935.2, "grad_norm": 1.7354036569595337, "learning_rate": 3.2800000000000004e-06, "loss": 0.0181, "step": 4676 }, { "epoch": 935.6, "grad_norm": 2.7428698539733887, "learning_rate": 3.2599999999999997e-06, "loss": 0.016, "step": 4678 }, { "epoch": 936.0, "grad_norm": 2.011568784713745, "learning_rate": 3.24e-06, "loss": 0.0085, "step": 4680 }, { "epoch": 936.4, "grad_norm": 3.586052179336548, "learning_rate": 3.22e-06, "loss": 0.0163, "step": 4682 }, { "epoch": 936.8, "grad_norm": 0.3156156837940216, "learning_rate": 3.2000000000000003e-06, "loss": 0.0145, "step": 4684 }, { "epoch": 937.2, "grad_norm": 1.766697645187378, "learning_rate": 3.1800000000000005e-06, "loss": 0.011, "step": 4686 }, { "epoch": 937.6, "grad_norm": 1.1104477643966675, "learning_rate": 3.1600000000000007e-06, "loss": 0.0073, "step": 4688 }, { "epoch": 938.0, "grad_norm": 2.3930845260620117, "learning_rate": 3.14e-06, "loss": 0.0081, "step": 4690 }, { "epoch": 938.4, "grad_norm": 4.70937967300415, "learning_rate": 3.12e-06, "loss": 0.0226, "step": 4692 }, { "epoch": 938.8, "grad_norm": 0.8315094113349915, "learning_rate": 3.1e-06, "loss": 0.0085, "step": 4694 }, { "epoch": 939.2, "grad_norm": 1.0491383075714111, "learning_rate": 3.08e-06, "loss": 0.0042, "step": 4696 }, { "epoch": 939.6, "grad_norm": 2.8985812664031982, "learning_rate": 3.06e-06, "loss": 0.0191, "step": 4698 }, { "epoch": 940.0, "grad_norm": 5.367652893066406, "learning_rate": 3.04e-06, "loss": 0.0146, "step": 4700 }, { "epoch": 940.4, "grad_norm": 0.6323106288909912, "learning_rate": 3.0200000000000003e-06, "loss": 0.008, "step": 4702 }, { "epoch": 940.8, "grad_norm": 1.2798386812210083, "learning_rate": 3e-06, "loss": 0.0074, "step": 4704 }, { "epoch": 941.2, "grad_norm": 1.4427399635314941, "learning_rate": 2.9800000000000003e-06, "loss": 0.0158, "step": 4706 }, { "epoch": 941.6, "grad_norm": 0.71954745054245, "learning_rate": 2.9600000000000005e-06, "loss": 0.0097, "step": 4708 }, { "epoch": 942.0, "grad_norm": 1.0171699523925781, "learning_rate": 2.9400000000000002e-06, "loss": 0.0069, "step": 4710 }, { "epoch": 942.4, "grad_norm": 0.5885002017021179, "learning_rate": 2.92e-06, "loss": 0.0073, "step": 4712 }, { "epoch": 942.8, "grad_norm": 5.477886199951172, "learning_rate": 2.9e-06, "loss": 0.0176, "step": 4714 }, { "epoch": 943.2, "grad_norm": 2.774256467819214, "learning_rate": 2.88e-06, "loss": 0.0107, "step": 4716 }, { "epoch": 943.6, "grad_norm": 2.310032844543457, "learning_rate": 2.86e-06, "loss": 0.0189, "step": 4718 }, { "epoch": 944.0, "grad_norm": 1.2971998453140259, "learning_rate": 2.8400000000000003e-06, "loss": 0.0047, "step": 4720 }, { "epoch": 944.4, "grad_norm": 1.0555007457733154, "learning_rate": 2.82e-06, "loss": 0.0123, "step": 4722 }, { "epoch": 944.8, "grad_norm": 0.7609583139419556, "learning_rate": 2.8000000000000003e-06, "loss": 0.0046, "step": 4724 }, { "epoch": 945.2, "grad_norm": 3.1408016681671143, "learning_rate": 2.78e-06, "loss": 0.0112, "step": 4726 }, { "epoch": 945.6, "grad_norm": 1.2770349979400635, "learning_rate": 2.7600000000000003e-06, "loss": 0.0043, "step": 4728 }, { "epoch": 946.0, "grad_norm": 0.9990266561508179, "learning_rate": 2.74e-06, "loss": 0.0257, "step": 4730 }, { "epoch": 946.4, "grad_norm": 0.6022937297821045, "learning_rate": 2.72e-06, "loss": 0.0028, "step": 4732 }, { "epoch": 946.8, "grad_norm": 2.36944842338562, "learning_rate": 2.7e-06, "loss": 0.0219, "step": 4734 }, { "epoch": 947.2, "grad_norm": 0.2630894184112549, "learning_rate": 2.68e-06, "loss": 0.0072, "step": 4736 }, { "epoch": 947.6, "grad_norm": 0.35513773560523987, "learning_rate": 2.66e-06, "loss": 0.0036, "step": 4738 }, { "epoch": 948.0, "grad_norm": 1.1686034202575684, "learning_rate": 2.64e-06, "loss": 0.0082, "step": 4740 }, { "epoch": 948.4, "grad_norm": 1.9305801391601562, "learning_rate": 2.6200000000000003e-06, "loss": 0.0115, "step": 4742 }, { "epoch": 948.8, "grad_norm": 1.35567307472229, "learning_rate": 2.6e-06, "loss": 0.0084, "step": 4744 }, { "epoch": 949.2, "grad_norm": 2.533832311630249, "learning_rate": 2.5800000000000003e-06, "loss": 0.0087, "step": 4746 }, { "epoch": 949.6, "grad_norm": 2.063406467437744, "learning_rate": 2.56e-06, "loss": 0.0122, "step": 4748 }, { "epoch": 950.0, "grad_norm": 0.696678876876831, "learning_rate": 2.54e-06, "loss": 0.0052, "step": 4750 }, { "epoch": 950.4, "grad_norm": 0.9523385763168335, "learning_rate": 2.52e-06, "loss": 0.0122, "step": 4752 }, { "epoch": 950.8, "grad_norm": 0.48982834815979004, "learning_rate": 2.5e-06, "loss": 0.0091, "step": 4754 }, { "epoch": 951.2, "grad_norm": 1.5091735124588013, "learning_rate": 2.48e-06, "loss": 0.0034, "step": 4756 }, { "epoch": 951.6, "grad_norm": 0.442278116941452, "learning_rate": 2.46e-06, "loss": 0.0121, "step": 4758 }, { "epoch": 952.0, "grad_norm": 2.0062875747680664, "learning_rate": 2.4400000000000004e-06, "loss": 0.0151, "step": 4760 }, { "epoch": 952.4, "grad_norm": 1.3136907815933228, "learning_rate": 2.42e-06, "loss": 0.0046, "step": 4762 }, { "epoch": 952.8, "grad_norm": 0.8090879321098328, "learning_rate": 2.4000000000000003e-06, "loss": 0.007, "step": 4764 }, { "epoch": 953.2, "grad_norm": 0.7339707612991333, "learning_rate": 2.38e-06, "loss": 0.0065, "step": 4766 }, { "epoch": 953.6, "grad_norm": 1.6637178659439087, "learning_rate": 2.36e-06, "loss": 0.0165, "step": 4768 }, { "epoch": 954.0, "grad_norm": 0.2670665681362152, "learning_rate": 2.34e-06, "loss": 0.0061, "step": 4770 }, { "epoch": 954.4, "grad_norm": 0.13886696100234985, "learning_rate": 2.32e-06, "loss": 0.0023, "step": 4772 }, { "epoch": 954.8, "grad_norm": 2.034649133682251, "learning_rate": 2.3e-06, "loss": 0.004, "step": 4774 }, { "epoch": 955.2, "grad_norm": 1.9137920141220093, "learning_rate": 2.28e-06, "loss": 0.0153, "step": 4776 }, { "epoch": 955.6, "grad_norm": 0.24256832897663116, "learning_rate": 2.26e-06, "loss": 0.0116, "step": 4778 }, { "epoch": 956.0, "grad_norm": 1.928824782371521, "learning_rate": 2.24e-06, "loss": 0.0127, "step": 4780 }, { "epoch": 956.4, "grad_norm": 0.12544772028923035, "learning_rate": 2.2200000000000003e-06, "loss": 0.0016, "step": 4782 }, { "epoch": 956.8, "grad_norm": 0.23463402688503265, "learning_rate": 2.2e-06, "loss": 0.0135, "step": 4784 }, { "epoch": 957.2, "grad_norm": 2.4359560012817383, "learning_rate": 2.1800000000000003e-06, "loss": 0.0215, "step": 4786 }, { "epoch": 957.6, "grad_norm": 0.8816166520118713, "learning_rate": 2.16e-06, "loss": 0.008, "step": 4788 }, { "epoch": 958.0, "grad_norm": 1.7936108112335205, "learning_rate": 2.14e-06, "loss": 0.0064, "step": 4790 }, { "epoch": 958.4, "grad_norm": 2.0341930389404297, "learning_rate": 2.12e-06, "loss": 0.009, "step": 4792 }, { "epoch": 958.8, "grad_norm": 1.1488474607467651, "learning_rate": 2.1000000000000002e-06, "loss": 0.0091, "step": 4794 }, { "epoch": 959.2, "grad_norm": 1.5733258724212646, "learning_rate": 2.08e-06, "loss": 0.0037, "step": 4796 }, { "epoch": 959.6, "grad_norm": 1.8026502132415771, "learning_rate": 2.06e-06, "loss": 0.0122, "step": 4798 }, { "epoch": 960.0, "grad_norm": 0.30048415064811707, "learning_rate": 2.0400000000000004e-06, "loss": 0.0032, "step": 4800 }, { "epoch": 960.0, "eval_cer": 0.8808290155440415, "eval_loss": 6.038042068481445, "eval_runtime": 11.4002, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.175, "step": 4800 }, { "epoch": 960.4, "grad_norm": 1.9587512016296387, "learning_rate": 2.02e-06, "loss": 0.0093, "step": 4802 }, { "epoch": 960.8, "grad_norm": 2.5855283737182617, "learning_rate": 2.0000000000000003e-06, "loss": 0.0172, "step": 4804 }, { "epoch": 961.2, "grad_norm": 0.2956543266773224, "learning_rate": 1.98e-06, "loss": 0.0071, "step": 4806 }, { "epoch": 961.6, "grad_norm": 0.1730463057756424, "learning_rate": 1.96e-06, "loss": 0.0065, "step": 4808 }, { "epoch": 962.0, "grad_norm": 0.25830745697021484, "learning_rate": 1.94e-06, "loss": 0.0024, "step": 4810 }, { "epoch": 962.4, "grad_norm": 1.794001817703247, "learning_rate": 1.92e-06, "loss": 0.0091, "step": 4812 }, { "epoch": 962.8, "grad_norm": 0.7759162783622742, "learning_rate": 1.9e-06, "loss": 0.0203, "step": 4814 }, { "epoch": 963.2, "grad_norm": 1.2889721393585205, "learning_rate": 1.8800000000000002e-06, "loss": 0.0084, "step": 4816 }, { "epoch": 963.6, "grad_norm": 0.36591535806655884, "learning_rate": 1.86e-06, "loss": 0.0153, "step": 4818 }, { "epoch": 964.0, "grad_norm": 3.2849414348602295, "learning_rate": 1.84e-06, "loss": 0.0098, "step": 4820 }, { "epoch": 964.4, "grad_norm": 1.1506483554840088, "learning_rate": 1.8200000000000002e-06, "loss": 0.0058, "step": 4822 }, { "epoch": 964.8, "grad_norm": 2.263983964920044, "learning_rate": 1.8e-06, "loss": 0.0122, "step": 4824 }, { "epoch": 965.2, "grad_norm": 1.530084490776062, "learning_rate": 1.7800000000000001e-06, "loss": 0.004, "step": 4826 }, { "epoch": 965.6, "grad_norm": 0.3724777102470398, "learning_rate": 1.76e-06, "loss": 0.0044, "step": 4828 }, { "epoch": 966.0, "grad_norm": 1.9066413640975952, "learning_rate": 1.7399999999999999e-06, "loss": 0.0085, "step": 4830 }, { "epoch": 966.4, "grad_norm": 5.192115306854248, "learning_rate": 1.72e-06, "loss": 0.01, "step": 4832 }, { "epoch": 966.8, "grad_norm": 0.6623052954673767, "learning_rate": 1.7000000000000002e-06, "loss": 0.0042, "step": 4834 }, { "epoch": 967.2, "grad_norm": 0.3544906675815582, "learning_rate": 1.68e-06, "loss": 0.0061, "step": 4836 }, { "epoch": 967.6, "grad_norm": 1.2825881242752075, "learning_rate": 1.6600000000000002e-06, "loss": 0.0109, "step": 4838 }, { "epoch": 968.0, "grad_norm": 0.8549659252166748, "learning_rate": 1.6400000000000002e-06, "loss": 0.009, "step": 4840 }, { "epoch": 968.4, "grad_norm": 1.2293105125427246, "learning_rate": 1.62e-06, "loss": 0.0114, "step": 4842 }, { "epoch": 968.8, "grad_norm": 2.285867691040039, "learning_rate": 1.6000000000000001e-06, "loss": 0.0161, "step": 4844 }, { "epoch": 969.2, "grad_norm": 2.2078604698181152, "learning_rate": 1.5800000000000003e-06, "loss": 0.007, "step": 4846 }, { "epoch": 969.6, "grad_norm": 0.5466961860656738, "learning_rate": 1.56e-06, "loss": 0.0066, "step": 4848 }, { "epoch": 970.0, "grad_norm": 0.8512510061264038, "learning_rate": 1.54e-06, "loss": 0.0045, "step": 4850 }, { "epoch": 970.4, "grad_norm": 2.7270822525024414, "learning_rate": 1.52e-06, "loss": 0.0202, "step": 4852 }, { "epoch": 970.8, "grad_norm": 0.2759383022785187, "learning_rate": 1.5e-06, "loss": 0.0049, "step": 4854 }, { "epoch": 971.2, "grad_norm": 1.9304689168930054, "learning_rate": 1.4800000000000002e-06, "loss": 0.0138, "step": 4856 }, { "epoch": 971.6, "grad_norm": 2.8767435550689697, "learning_rate": 1.46e-06, "loss": 0.0083, "step": 4858 }, { "epoch": 972.0, "grad_norm": 0.4171920120716095, "learning_rate": 1.44e-06, "loss": 0.0032, "step": 4860 }, { "epoch": 972.4, "grad_norm": 1.9236071109771729, "learning_rate": 1.4200000000000002e-06, "loss": 0.0112, "step": 4862 }, { "epoch": 972.8, "grad_norm": 7.305168151855469, "learning_rate": 1.4000000000000001e-06, "loss": 0.0131, "step": 4864 }, { "epoch": 973.2, "grad_norm": 2.853778839111328, "learning_rate": 1.3800000000000001e-06, "loss": 0.0081, "step": 4866 }, { "epoch": 973.6, "grad_norm": 0.5795276165008545, "learning_rate": 1.36e-06, "loss": 0.0031, "step": 4868 }, { "epoch": 974.0, "grad_norm": 1.1775046586990356, "learning_rate": 1.34e-06, "loss": 0.011, "step": 4870 }, { "epoch": 974.4, "grad_norm": 1.683487057685852, "learning_rate": 1.32e-06, "loss": 0.0128, "step": 4872 }, { "epoch": 974.8, "grad_norm": 3.1353728771209717, "learning_rate": 1.3e-06, "loss": 0.0149, "step": 4874 }, { "epoch": 975.2, "grad_norm": 0.9901724457740784, "learning_rate": 1.28e-06, "loss": 0.0055, "step": 4876 }, { "epoch": 975.6, "grad_norm": 2.057359457015991, "learning_rate": 1.26e-06, "loss": 0.0168, "step": 4878 }, { "epoch": 976.0, "grad_norm": 1.212866187095642, "learning_rate": 1.24e-06, "loss": 0.0102, "step": 4880 }, { "epoch": 976.4, "grad_norm": 2.332064390182495, "learning_rate": 1.2200000000000002e-06, "loss": 0.0069, "step": 4882 }, { "epoch": 976.8, "grad_norm": 2.1847922801971436, "learning_rate": 1.2000000000000002e-06, "loss": 0.0122, "step": 4884 }, { "epoch": 977.2, "grad_norm": 1.0398973226547241, "learning_rate": 1.18e-06, "loss": 0.0058, "step": 4886 }, { "epoch": 977.6, "grad_norm": 1.4135427474975586, "learning_rate": 1.16e-06, "loss": 0.0081, "step": 4888 }, { "epoch": 978.0, "grad_norm": 4.063148498535156, "learning_rate": 1.14e-06, "loss": 0.0084, "step": 4890 }, { "epoch": 978.4, "grad_norm": 5.841921806335449, "learning_rate": 1.12e-06, "loss": 0.0151, "step": 4892 }, { "epoch": 978.8, "grad_norm": 1.1487469673156738, "learning_rate": 1.1e-06, "loss": 0.0034, "step": 4894 }, { "epoch": 979.2, "grad_norm": 0.17215222120285034, "learning_rate": 1.08e-06, "loss": 0.0024, "step": 4896 }, { "epoch": 979.6, "grad_norm": 0.20423412322998047, "learning_rate": 1.06e-06, "loss": 0.0086, "step": 4898 }, { "epoch": 980.0, "grad_norm": 1.6678364276885986, "learning_rate": 1.04e-06, "loss": 0.0081, "step": 4900 }, { "epoch": 980.4, "grad_norm": 0.22323819994926453, "learning_rate": 1.0200000000000002e-06, "loss": 0.0072, "step": 4902 }, { "epoch": 980.8, "grad_norm": 4.96053409576416, "learning_rate": 1.0000000000000002e-06, "loss": 0.0104, "step": 4904 }, { "epoch": 981.2, "grad_norm": 0.06163514405488968, "learning_rate": 9.8e-07, "loss": 0.0037, "step": 4906 }, { "epoch": 981.6, "grad_norm": 0.0944143757224083, "learning_rate": 9.6e-07, "loss": 0.0011, "step": 4908 }, { "epoch": 982.0, "grad_norm": 0.20509199798107147, "learning_rate": 9.400000000000001e-07, "loss": 0.0092, "step": 4910 }, { "epoch": 982.4, "grad_norm": 6.270112991333008, "learning_rate": 9.2e-07, "loss": 0.0388, "step": 4912 }, { "epoch": 982.8, "grad_norm": 0.7087854743003845, "learning_rate": 9e-07, "loss": 0.0068, "step": 4914 }, { "epoch": 983.2, "grad_norm": 3.2817344665527344, "learning_rate": 8.8e-07, "loss": 0.0159, "step": 4916 }, { "epoch": 983.6, "grad_norm": 5.324103355407715, "learning_rate": 8.6e-07, "loss": 0.0284, "step": 4918 }, { "epoch": 984.0, "grad_norm": 4.407694339752197, "learning_rate": 8.4e-07, "loss": 0.0185, "step": 4920 }, { "epoch": 984.4, "grad_norm": 2.2648439407348633, "learning_rate": 8.200000000000001e-07, "loss": 0.0077, "step": 4922 }, { "epoch": 984.8, "grad_norm": 1.6710631847381592, "learning_rate": 8.000000000000001e-07, "loss": 0.0069, "step": 4924 }, { "epoch": 985.2, "grad_norm": 2.066645860671997, "learning_rate": 7.8e-07, "loss": 0.0114, "step": 4926 }, { "epoch": 985.6, "grad_norm": 1.1103792190551758, "learning_rate": 7.6e-07, "loss": 0.0038, "step": 4928 }, { "epoch": 986.0, "grad_norm": 0.6761166453361511, "learning_rate": 7.400000000000001e-07, "loss": 0.0096, "step": 4930 }, { "epoch": 986.4, "grad_norm": 0.36638203263282776, "learning_rate": 7.2e-07, "loss": 0.012, "step": 4932 }, { "epoch": 986.8, "grad_norm": 3.7784104347229004, "learning_rate": 7.000000000000001e-07, "loss": 0.0141, "step": 4934 }, { "epoch": 987.2, "grad_norm": 2.78393816947937, "learning_rate": 6.8e-07, "loss": 0.0232, "step": 4936 }, { "epoch": 987.6, "grad_norm": 1.3821086883544922, "learning_rate": 6.6e-07, "loss": 0.0123, "step": 4938 }, { "epoch": 988.0, "grad_norm": 0.611269474029541, "learning_rate": 6.4e-07, "loss": 0.007, "step": 4940 }, { "epoch": 988.4, "grad_norm": 0.44068822264671326, "learning_rate": 6.2e-07, "loss": 0.0102, "step": 4942 }, { "epoch": 988.8, "grad_norm": 1.3738272190093994, "learning_rate": 6.000000000000001e-07, "loss": 0.0048, "step": 4944 }, { "epoch": 989.2, "grad_norm": 1.7122091054916382, "learning_rate": 5.8e-07, "loss": 0.0086, "step": 4946 }, { "epoch": 989.6, "grad_norm": 2.672187566757202, "learning_rate": 5.6e-07, "loss": 0.0062, "step": 4948 }, { "epoch": 990.0, "grad_norm": 1.8074374198913574, "learning_rate": 5.4e-07, "loss": 0.011, "step": 4950 }, { "epoch": 990.4, "grad_norm": 0.5707859396934509, "learning_rate": 5.2e-07, "loss": 0.0068, "step": 4952 }, { "epoch": 990.8, "grad_norm": 0.5525256991386414, "learning_rate": 5.000000000000001e-07, "loss": 0.0086, "step": 4954 }, { "epoch": 991.2, "grad_norm": 1.2856701612472534, "learning_rate": 4.8e-07, "loss": 0.0139, "step": 4956 }, { "epoch": 991.6, "grad_norm": 1.334687352180481, "learning_rate": 4.6e-07, "loss": 0.0064, "step": 4958 }, { "epoch": 992.0, "grad_norm": 0.2152119278907776, "learning_rate": 4.4e-07, "loss": 0.0039, "step": 4960 }, { "epoch": 992.4, "grad_norm": 1.2984968423843384, "learning_rate": 4.2e-07, "loss": 0.0134, "step": 4962 }, { "epoch": 992.8, "grad_norm": 2.512779474258423, "learning_rate": 4.0000000000000003e-07, "loss": 0.0142, "step": 4964 }, { "epoch": 993.2, "grad_norm": 0.4058573246002197, "learning_rate": 3.8e-07, "loss": 0.0033, "step": 4966 }, { "epoch": 993.6, "grad_norm": 0.6961707472801208, "learning_rate": 3.6e-07, "loss": 0.0061, "step": 4968 }, { "epoch": 994.0, "grad_norm": 1.231079339981079, "learning_rate": 3.4e-07, "loss": 0.0137, "step": 4970 }, { "epoch": 994.4, "grad_norm": 1.9893370866775513, "learning_rate": 3.2e-07, "loss": 0.0079, "step": 4972 }, { "epoch": 994.8, "grad_norm": 3.5322604179382324, "learning_rate": 3.0000000000000004e-07, "loss": 0.0107, "step": 4974 }, { "epoch": 995.2, "grad_norm": 0.2526344358921051, "learning_rate": 2.8e-07, "loss": 0.0029, "step": 4976 }, { "epoch": 995.6, "grad_norm": 0.4448156952857971, "learning_rate": 2.6e-07, "loss": 0.0072, "step": 4978 }, { "epoch": 996.0, "grad_norm": 0.7669633626937866, "learning_rate": 2.4e-07, "loss": 0.0128, "step": 4980 }, { "epoch": 996.4, "grad_norm": 0.5753944516181946, "learning_rate": 2.2e-07, "loss": 0.0158, "step": 4982 }, { "epoch": 996.8, "grad_norm": 0.17449146509170532, "learning_rate": 2.0000000000000002e-07, "loss": 0.0052, "step": 4984 }, { "epoch": 997.2, "grad_norm": 0.1937945932149887, "learning_rate": 1.8e-07, "loss": 0.0079, "step": 4986 }, { "epoch": 997.6, "grad_norm": 0.4388817250728607, "learning_rate": 1.6e-07, "loss": 0.0157, "step": 4988 }, { "epoch": 998.0, "grad_norm": 0.3047299087047577, "learning_rate": 1.4e-07, "loss": 0.0127, "step": 4990 }, { "epoch": 998.4, "grad_norm": 1.254011869430542, "learning_rate": 1.2e-07, "loss": 0.0101, "step": 4992 }, { "epoch": 998.8, "grad_norm": 0.19459804892539978, "learning_rate": 1.0000000000000001e-07, "loss": 0.0053, "step": 4994 }, { "epoch": 999.2, "grad_norm": 4.005507469177246, "learning_rate": 8e-08, "loss": 0.0148, "step": 4996 }, { "epoch": 999.6, "grad_norm": 0.2429426908493042, "learning_rate": 6e-08, "loss": 0.0048, "step": 4998 }, { "epoch": 1000.0, "grad_norm": 0.936008870601654, "learning_rate": 4e-08, "loss": 0.0099, "step": 5000 }, { "epoch": 1000.0, "eval_cer": 0.8549222797927462, "eval_loss": 6.012123107910156, "eval_runtime": 10.8045, "eval_samples_per_second": 0.926, "eval_steps_per_second": 0.185, "step": 5000 } ], "logging_steps": 2, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 1000, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.539581802643456e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }