{ "best_metric": 1.0, "best_model_checkpoint": "./wavbert_fongbe/checkpoint-5200", "epoch": 1.8220042046250877, "eval_steps": 100, "global_step": 5200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 9.8297, "step": 1 }, { "epoch": 0.0, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 9.9301, "step": 2 }, { "epoch": 0.0, "grad_norm": 34.74740982055664, "learning_rate": 6e-07, "loss": 10.394, "step": 3 }, { "epoch": 0.0, "grad_norm": 25.30831527709961, "learning_rate": 1.2e-06, "loss": 8.5656, "step": 4 }, { "epoch": 0.0, "grad_norm": 38.40359115600586, "learning_rate": 1.8e-06, "loss": 11.2407, "step": 5 }, { "epoch": 0.0, "grad_norm": 28.476762771606445, "learning_rate": 2.4e-06, "loss": 8.109, "step": 6 }, { "epoch": 0.0, "grad_norm": Infinity, "learning_rate": 2.4e-06, "loss": 11.0636, "step": 7 }, { "epoch": 0.0, "grad_norm": 34.81722640991211, "learning_rate": 2.9999999999999997e-06, "loss": 9.1192, "step": 8 }, { "epoch": 0.0, "grad_norm": 53.64858627319336, "learning_rate": 3.6e-06, "loss": 11.2103, "step": 9 }, { "epoch": 0.0, "grad_norm": 34.31063461303711, "learning_rate": 4.2e-06, "loss": 7.7964, "step": 10 }, { "epoch": 0.0, "grad_norm": 40.947654724121094, "learning_rate": 4.8e-06, "loss": 8.9611, "step": 11 }, { "epoch": 0.0, "grad_norm": 54.53398513793945, "learning_rate": 5.399999999999999e-06, "loss": 8.7025, "step": 12 }, { "epoch": 0.0, "grad_norm": 74.66500091552734, "learning_rate": 5.999999999999999e-06, "loss": 9.4326, "step": 13 }, { "epoch": 0.0, "grad_norm": 44.239418029785156, "learning_rate": 6.599999999999999e-06, "loss": 6.5448, "step": 14 }, { "epoch": 0.01, "grad_norm": 43.60188293457031, "learning_rate": 7.2e-06, "loss": 5.5535, "step": 15 }, { "epoch": 0.01, "grad_norm": 49.674495697021484, "learning_rate": 7.799999999999998e-06, "loss": 5.7704, "step": 16 }, { "epoch": 0.01, "grad_norm": 70.16291046142578, "learning_rate": 8.4e-06, "loss": 5.6929, "step": 17 }, { "epoch": 0.01, "grad_norm": 37.02568817138672, "learning_rate": 8.999999999999999e-06, "loss": 3.9512, "step": 18 }, { "epoch": 0.01, "grad_norm": 8.776915550231934, "learning_rate": 9.6e-06, "loss": 3.7522, "step": 19 }, { "epoch": 0.01, "grad_norm": 10.268403053283691, "learning_rate": 1.02e-05, "loss": 3.557, "step": 20 }, { "epoch": 0.01, "grad_norm": 21.770320892333984, "learning_rate": 1.0799999999999998e-05, "loss": 3.8904, "step": 21 }, { "epoch": 0.01, "grad_norm": 12.886055946350098, "learning_rate": 1.14e-05, "loss": 3.4933, "step": 22 }, { "epoch": 0.01, "grad_norm": 20.950607299804688, "learning_rate": 1.1999999999999999e-05, "loss": 3.4173, "step": 23 }, { "epoch": 0.01, "grad_norm": 22.54509925842285, "learning_rate": 1.26e-05, "loss": 3.5335, "step": 24 }, { "epoch": 0.01, "grad_norm": 13.497519493103027, "learning_rate": 1.3199999999999997e-05, "loss": 2.9166, "step": 25 }, { "epoch": 0.01, "grad_norm": 11.36432933807373, "learning_rate": 1.3799999999999998e-05, "loss": 3.6862, "step": 26 }, { "epoch": 0.01, "grad_norm": 21.900163650512695, "learning_rate": 1.44e-05, "loss": 3.3244, "step": 27 }, { "epoch": 0.01, "grad_norm": NaN, "learning_rate": 1.44e-05, "loss": 3.1803, "step": 28 }, { "epoch": 0.01, "grad_norm": 8.995499610900879, "learning_rate": 1.4999999999999999e-05, "loss": 3.4045, "step": 29 }, { "epoch": 0.01, "grad_norm": 11.834275245666504, "learning_rate": 1.5599999999999996e-05, "loss": 3.1981, "step": 30 }, { "epoch": 0.01, "grad_norm": 6.6730780601501465, "learning_rate": 1.6199999999999997e-05, "loss": 3.2896, "step": 31 }, { "epoch": 0.01, "grad_norm": 14.79286003112793, "learning_rate": 1.68e-05, "loss": 3.1652, "step": 32 }, { "epoch": 0.01, "grad_norm": 5.442844867706299, "learning_rate": 1.74e-05, "loss": 3.102, "step": 33 }, { "epoch": 0.01, "grad_norm": 6.1123127937316895, "learning_rate": 1.7999999999999997e-05, "loss": 3.1152, "step": 34 }, { "epoch": 0.01, "grad_norm": 5.981231212615967, "learning_rate": 1.8599999999999998e-05, "loss": 3.1492, "step": 35 }, { "epoch": 0.01, "grad_norm": 6.901692867279053, "learning_rate": 1.92e-05, "loss": 3.1973, "step": 36 }, { "epoch": 0.01, "grad_norm": 17.36743927001953, "learning_rate": 1.98e-05, "loss": 3.0872, "step": 37 }, { "epoch": 0.01, "grad_norm": 11.805011749267578, "learning_rate": 2.04e-05, "loss": 2.8694, "step": 38 }, { "epoch": 0.01, "grad_norm": 8.926337242126465, "learning_rate": 2.1e-05, "loss": 3.5693, "step": 39 }, { "epoch": 0.01, "grad_norm": 7.315157890319824, "learning_rate": 2.1599999999999996e-05, "loss": 2.776, "step": 40 }, { "epoch": 0.01, "grad_norm": 8.084774017333984, "learning_rate": 2.2199999999999998e-05, "loss": 2.8658, "step": 41 }, { "epoch": 0.01, "grad_norm": 10.755606651306152, "learning_rate": 2.28e-05, "loss": 2.7053, "step": 42 }, { "epoch": 0.02, "grad_norm": 7.2747626304626465, "learning_rate": 2.34e-05, "loss": 2.6037, "step": 43 }, { "epoch": 0.02, "grad_norm": 9.70805835723877, "learning_rate": 2.3999999999999997e-05, "loss": 3.1741, "step": 44 }, { "epoch": 0.02, "grad_norm": 18.0622615814209, "learning_rate": 2.4599999999999998e-05, "loss": 3.0041, "step": 45 }, { "epoch": 0.02, "grad_norm": 14.248910903930664, "learning_rate": 2.52e-05, "loss": 2.6704, "step": 46 }, { "epoch": 0.02, "grad_norm": 7.843019485473633, "learning_rate": 2.5799999999999997e-05, "loss": 2.5436, "step": 47 }, { "epoch": 0.02, "grad_norm": 10.0279541015625, "learning_rate": 2.6399999999999995e-05, "loss": 2.5453, "step": 48 }, { "epoch": 0.02, "grad_norm": 47.676048278808594, "learning_rate": 2.6999999999999996e-05, "loss": 2.4351, "step": 49 }, { "epoch": 0.02, "grad_norm": 10.947311401367188, "learning_rate": 2.7599999999999997e-05, "loss": 2.8235, "step": 50 }, { "epoch": 0.02, "grad_norm": 5.891237735748291, "learning_rate": 2.8199999999999998e-05, "loss": 3.0562, "step": 51 }, { "epoch": 0.02, "grad_norm": 7.135880947113037, "learning_rate": 2.88e-05, "loss": 3.0568, "step": 52 }, { "epoch": 0.02, "grad_norm": 6.591911315917969, "learning_rate": 2.94e-05, "loss": 2.8868, "step": 53 }, { "epoch": 0.02, "grad_norm": 7.592108726501465, "learning_rate": 2.9999999999999997e-05, "loss": 2.8324, "step": 54 }, { "epoch": 0.02, "grad_norm": 7.499967098236084, "learning_rate": 3.06e-05, "loss": 2.9309, "step": 55 }, { "epoch": 0.02, "grad_norm": 13.009428977966309, "learning_rate": 3.119999999999999e-05, "loss": 2.9151, "step": 56 }, { "epoch": 0.02, "grad_norm": 10.796736717224121, "learning_rate": 3.1799999999999994e-05, "loss": 2.9887, "step": 57 }, { "epoch": 0.02, "grad_norm": 6.898942947387695, "learning_rate": 3.2399999999999995e-05, "loss": 2.7634, "step": 58 }, { "epoch": 0.02, "grad_norm": 4.915682792663574, "learning_rate": 3.2999999999999996e-05, "loss": 2.3548, "step": 59 }, { "epoch": 0.02, "grad_norm": 6.942079067230225, "learning_rate": 3.36e-05, "loss": 2.8065, "step": 60 }, { "epoch": 0.02, "grad_norm": 9.648048400878906, "learning_rate": 3.42e-05, "loss": 2.7106, "step": 61 }, { "epoch": 0.02, "grad_norm": 11.939984321594238, "learning_rate": 3.48e-05, "loss": 2.6683, "step": 62 }, { "epoch": 0.02, "grad_norm": 8.364795684814453, "learning_rate": 3.539999999999999e-05, "loss": 3.1089, "step": 63 }, { "epoch": 0.02, "grad_norm": 6.097488880157471, "learning_rate": 3.5999999999999994e-05, "loss": 2.5706, "step": 64 }, { "epoch": 0.02, "grad_norm": 6.490532398223877, "learning_rate": 3.6599999999999995e-05, "loss": 2.7647, "step": 65 }, { "epoch": 0.02, "grad_norm": 7.321822166442871, "learning_rate": 3.7199999999999996e-05, "loss": 2.9021, "step": 66 }, { "epoch": 0.02, "grad_norm": 8.508000373840332, "learning_rate": 3.78e-05, "loss": 2.4467, "step": 67 }, { "epoch": 0.02, "grad_norm": 8.116604804992676, "learning_rate": 3.84e-05, "loss": 2.2756, "step": 68 }, { "epoch": 0.02, "grad_norm": 11.946988105773926, "learning_rate": 3.9e-05, "loss": 2.9973, "step": 69 }, { "epoch": 0.02, "grad_norm": 11.239936828613281, "learning_rate": 3.96e-05, "loss": 2.6493, "step": 70 }, { "epoch": 0.02, "grad_norm": 9.772220611572266, "learning_rate": 4.02e-05, "loss": 2.5128, "step": 71 }, { "epoch": 0.03, "grad_norm": 12.785487174987793, "learning_rate": 4.08e-05, "loss": 1.8853, "step": 72 }, { "epoch": 0.03, "grad_norm": 8.324564933776855, "learning_rate": 4.14e-05, "loss": 2.7886, "step": 73 }, { "epoch": 0.03, "grad_norm": 11.687192916870117, "learning_rate": 4.2e-05, "loss": 1.838, "step": 74 }, { "epoch": 0.03, "grad_norm": 9.597622871398926, "learning_rate": 4.259999999999999e-05, "loss": 1.8491, "step": 75 }, { "epoch": 0.03, "grad_norm": 7.367631912231445, "learning_rate": 4.319999999999999e-05, "loss": 2.6484, "step": 76 }, { "epoch": 0.03, "grad_norm": 5.516895771026611, "learning_rate": 4.3799999999999994e-05, "loss": 2.1669, "step": 77 }, { "epoch": 0.03, "grad_norm": 7.077077388763428, "learning_rate": 4.4399999999999995e-05, "loss": 2.3611, "step": 78 }, { "epoch": 0.03, "grad_norm": 6.426537990570068, "learning_rate": 4.4999999999999996e-05, "loss": 1.8982, "step": 79 }, { "epoch": 0.03, "grad_norm": 5.794361114501953, "learning_rate": 4.56e-05, "loss": 1.7709, "step": 80 }, { "epoch": 0.03, "grad_norm": 6.885998725891113, "learning_rate": 4.62e-05, "loss": 1.7069, "step": 81 }, { "epoch": 0.03, "grad_norm": 7.63657283782959, "learning_rate": 4.68e-05, "loss": 1.6549, "step": 82 }, { "epoch": 0.03, "grad_norm": 6.502758979797363, "learning_rate": 4.7399999999999993e-05, "loss": 1.5249, "step": 83 }, { "epoch": 0.03, "grad_norm": 7.67425537109375, "learning_rate": 4.7999999999999994e-05, "loss": 1.6269, "step": 84 }, { "epoch": 0.03, "grad_norm": 5.933468818664551, "learning_rate": 4.8599999999999995e-05, "loss": 1.535, "step": 85 }, { "epoch": 0.03, "grad_norm": 6.353681564331055, "learning_rate": 4.9199999999999997e-05, "loss": 1.2357, "step": 86 }, { "epoch": 0.03, "grad_norm": 8.016544342041016, "learning_rate": 4.98e-05, "loss": 1.2392, "step": 87 }, { "epoch": 0.03, "grad_norm": 6.43305778503418, "learning_rate": 5.04e-05, "loss": 1.5339, "step": 88 }, { "epoch": 0.03, "grad_norm": 8.496820449829102, "learning_rate": 5.1e-05, "loss": 1.4414, "step": 89 }, { "epoch": 0.03, "grad_norm": 12.498025894165039, "learning_rate": 5.1599999999999994e-05, "loss": 1.0707, "step": 90 }, { "epoch": 0.03, "grad_norm": 6.985462188720703, "learning_rate": 5.2199999999999995e-05, "loss": 1.5268, "step": 91 }, { "epoch": 0.03, "grad_norm": 10.915468215942383, "learning_rate": 5.279999999999999e-05, "loss": 1.4841, "step": 92 }, { "epoch": 0.03, "grad_norm": 12.632097244262695, "learning_rate": 5.339999999999999e-05, "loss": 2.197, "step": 93 }, { "epoch": 0.03, "grad_norm": 11.778656959533691, "learning_rate": 5.399999999999999e-05, "loss": 1.1379, "step": 94 }, { "epoch": 0.03, "grad_norm": 11.089698791503906, "learning_rate": 5.459999999999999e-05, "loss": 1.3323, "step": 95 }, { "epoch": 0.03, "grad_norm": 8.403290748596191, "learning_rate": 5.519999999999999e-05, "loss": 1.0058, "step": 96 }, { "epoch": 0.03, "grad_norm": 12.170100212097168, "learning_rate": 5.5799999999999994e-05, "loss": 1.3709, "step": 97 }, { "epoch": 0.03, "grad_norm": 13.481610298156738, "learning_rate": 5.6399999999999995e-05, "loss": 1.5149, "step": 98 }, { "epoch": 0.03, "grad_norm": 8.465336799621582, "learning_rate": 5.6999999999999996e-05, "loss": 1.1589, "step": 99 }, { "epoch": 0.04, "grad_norm": 8.697178840637207, "learning_rate": 5.76e-05, "loss": 1.338, "step": 100 }, { "epoch": 0.04, "eval_loss": 1.2086584568023682, "eval_runtime": 51.6602, "eval_samples_per_second": 41.967, "eval_steps_per_second": 10.492, "eval_wer": 0.8778758000345961, "step": 100 }, { "epoch": 0.04, "grad_norm": 9.466093063354492, "learning_rate": 5.82e-05, "loss": 1.9736, "step": 101 }, { "epoch": 0.04, "grad_norm": 6.502716064453125, "learning_rate": 5.88e-05, "loss": 1.2357, "step": 102 }, { "epoch": 0.04, "grad_norm": 6.822862148284912, "learning_rate": 5.94e-05, "loss": 0.969, "step": 103 }, { "epoch": 0.04, "grad_norm": 4.4939680099487305, "learning_rate": 5.9999999999999995e-05, "loss": 0.8472, "step": 104 }, { "epoch": 0.04, "grad_norm": 5.335927963256836, "learning_rate": 6.0599999999999996e-05, "loss": 1.232, "step": 105 }, { "epoch": 0.04, "grad_norm": 4.711673736572266, "learning_rate": 6.12e-05, "loss": 1.0268, "step": 106 }, { "epoch": 0.04, "grad_norm": 6.31601619720459, "learning_rate": 6.18e-05, "loss": 0.9878, "step": 107 }, { "epoch": 0.04, "grad_norm": 7.08448600769043, "learning_rate": 6.239999999999999e-05, "loss": 1.2664, "step": 108 }, { "epoch": 0.04, "grad_norm": 4.570302486419678, "learning_rate": 6.299999999999999e-05, "loss": 0.5733, "step": 109 }, { "epoch": 0.04, "grad_norm": 44.946353912353516, "learning_rate": 6.359999999999999e-05, "loss": 1.6091, "step": 110 }, { "epoch": 0.04, "grad_norm": 14.037195205688477, "learning_rate": 6.419999999999999e-05, "loss": 1.5667, "step": 111 }, { "epoch": 0.04, "grad_norm": 9.980315208435059, "learning_rate": 6.479999999999999e-05, "loss": 1.8256, "step": 112 }, { "epoch": 0.04, "grad_norm": 13.4607572555542, "learning_rate": 6.539999999999999e-05, "loss": 1.2149, "step": 113 }, { "epoch": 0.04, "grad_norm": 10.761940002441406, "learning_rate": 6.599999999999999e-05, "loss": 1.4465, "step": 114 }, { "epoch": 0.04, "grad_norm": 8.355905532836914, "learning_rate": 6.659999999999999e-05, "loss": 0.873, "step": 115 }, { "epoch": 0.04, "grad_norm": 8.082406997680664, "learning_rate": 6.72e-05, "loss": 0.8309, "step": 116 }, { "epoch": 0.04, "grad_norm": 7.375432968139648, "learning_rate": 6.78e-05, "loss": 1.1447, "step": 117 }, { "epoch": 0.04, "grad_norm": 13.326135635375977, "learning_rate": 6.84e-05, "loss": 0.9311, "step": 118 }, { "epoch": 0.04, "grad_norm": 8.355464935302734, "learning_rate": 6.9e-05, "loss": 0.9014, "step": 119 }, { "epoch": 0.04, "grad_norm": 16.993934631347656, "learning_rate": 6.96e-05, "loss": 1.152, "step": 120 }, { "epoch": 0.04, "grad_norm": 7.030590057373047, "learning_rate": 7.02e-05, "loss": 0.6491, "step": 121 }, { "epoch": 0.04, "grad_norm": 10.766969680786133, "learning_rate": 7.079999999999999e-05, "loss": 1.738, "step": 122 }, { "epoch": 0.04, "grad_norm": 16.502492904663086, "learning_rate": 7.139999999999999e-05, "loss": 1.0539, "step": 123 }, { "epoch": 0.04, "grad_norm": 10.595362663269043, "learning_rate": 7.199999999999999e-05, "loss": 1.0724, "step": 124 }, { "epoch": 0.04, "grad_norm": 7.935666084289551, "learning_rate": 7.259999999999999e-05, "loss": 0.795, "step": 125 }, { "epoch": 0.04, "grad_norm": 7.293322563171387, "learning_rate": 7.319999999999999e-05, "loss": 1.279, "step": 126 }, { "epoch": 0.04, "grad_norm": 14.84443187713623, "learning_rate": 7.379999999999999e-05, "loss": 1.993, "step": 127 }, { "epoch": 0.04, "grad_norm": 8.164588928222656, "learning_rate": 7.439999999999999e-05, "loss": 1.0304, "step": 128 }, { "epoch": 0.05, "grad_norm": 7.177072048187256, "learning_rate": 7.5e-05, "loss": 1.1268, "step": 129 }, { "epoch": 0.05, "grad_norm": 8.665708541870117, "learning_rate": 7.56e-05, "loss": 0.911, "step": 130 }, { "epoch": 0.05, "grad_norm": 7.278268337249756, "learning_rate": 7.62e-05, "loss": 0.8871, "step": 131 }, { "epoch": 0.05, "grad_norm": 4.9616780281066895, "learning_rate": 7.68e-05, "loss": 0.7107, "step": 132 }, { "epoch": 0.05, "grad_norm": 10.167497634887695, "learning_rate": 7.74e-05, "loss": 1.962, "step": 133 }, { "epoch": 0.05, "grad_norm": 17.286006927490234, "learning_rate": 7.8e-05, "loss": 2.3033, "step": 134 }, { "epoch": 0.05, "grad_norm": 5.155051231384277, "learning_rate": 7.86e-05, "loss": 0.7892, "step": 135 }, { "epoch": 0.05, "grad_norm": 13.249787330627441, "learning_rate": 7.92e-05, "loss": 0.6655, "step": 136 }, { "epoch": 0.05, "grad_norm": 13.192652702331543, "learning_rate": 7.98e-05, "loss": 0.9772, "step": 137 }, { "epoch": 0.05, "grad_norm": 6.216400623321533, "learning_rate": 8.04e-05, "loss": 1.4718, "step": 138 }, { "epoch": 0.05, "grad_norm": 9.961997985839844, "learning_rate": 8.1e-05, "loss": 1.1065, "step": 139 }, { "epoch": 0.05, "grad_norm": 9.072415351867676, "learning_rate": 8.16e-05, "loss": 1.6112, "step": 140 }, { "epoch": 0.05, "grad_norm": 8.392301559448242, "learning_rate": 8.22e-05, "loss": 1.284, "step": 141 }, { "epoch": 0.05, "grad_norm": 7.51064920425415, "learning_rate": 8.28e-05, "loss": 1.2114, "step": 142 }, { "epoch": 0.05, "grad_norm": 19.81893539428711, "learning_rate": 8.34e-05, "loss": 0.909, "step": 143 }, { "epoch": 0.05, "grad_norm": 7.77775239944458, "learning_rate": 8.4e-05, "loss": 1.4994, "step": 144 }, { "epoch": 0.05, "grad_norm": 6.78767204284668, "learning_rate": 8.459999999999998e-05, "loss": 0.6961, "step": 145 }, { "epoch": 0.05, "grad_norm": 11.081228256225586, "learning_rate": 8.519999999999998e-05, "loss": 1.8639, "step": 146 }, { "epoch": 0.05, "grad_norm": 11.193565368652344, "learning_rate": 8.579999999999998e-05, "loss": 1.6451, "step": 147 }, { "epoch": 0.05, "grad_norm": 8.930937767028809, "learning_rate": 8.639999999999999e-05, "loss": 0.9557, "step": 148 }, { "epoch": 0.05, "grad_norm": 20.329538345336914, "learning_rate": 8.699999999999999e-05, "loss": 0.9914, "step": 149 }, { "epoch": 0.05, "grad_norm": NaN, "learning_rate": 8.699999999999999e-05, "loss": 0.2003, "step": 150 }, { "epoch": 0.05, "grad_norm": 8.920183181762695, "learning_rate": 8.759999999999999e-05, "loss": 2.1297, "step": 151 }, { "epoch": 0.05, "grad_norm": 4.7198567390441895, "learning_rate": 8.819999999999999e-05, "loss": 1.0552, "step": 152 }, { "epoch": 0.05, "grad_norm": 5.2832512855529785, "learning_rate": 8.879999999999999e-05, "loss": 1.0677, "step": 153 }, { "epoch": 0.05, "grad_norm": 7.0598530769348145, "learning_rate": 8.939999999999999e-05, "loss": 0.9668, "step": 154 }, { "epoch": 0.05, "grad_norm": 7.0460052490234375, "learning_rate": 8.999999999999999e-05, "loss": 0.917, "step": 155 }, { "epoch": 0.05, "grad_norm": 4.30267333984375, "learning_rate": 9.059999999999999e-05, "loss": 0.8791, "step": 156 }, { "epoch": 0.06, "grad_norm": 15.747892379760742, "learning_rate": 9.12e-05, "loss": 1.0952, "step": 157 }, { "epoch": 0.06, "grad_norm": 4.930239200592041, "learning_rate": 9.18e-05, "loss": 0.7907, "step": 158 }, { "epoch": 0.06, "grad_norm": 9.977339744567871, "learning_rate": 9.24e-05, "loss": 1.6181, "step": 159 }, { "epoch": 0.06, "grad_norm": 5.140440464019775, "learning_rate": 9.3e-05, "loss": 0.8012, "step": 160 }, { "epoch": 0.06, "grad_norm": 8.982709884643555, "learning_rate": 9.36e-05, "loss": 0.8466, "step": 161 }, { "epoch": 0.06, "grad_norm": 5.337733268737793, "learning_rate": 9.419999999999999e-05, "loss": 0.9134, "step": 162 }, { "epoch": 0.06, "grad_norm": 5.364322662353516, "learning_rate": 9.479999999999999e-05, "loss": 0.4464, "step": 163 }, { "epoch": 0.06, "grad_norm": 13.642416954040527, "learning_rate": 9.539999999999999e-05, "loss": 1.9781, "step": 164 }, { "epoch": 0.06, "grad_norm": 6.412764549255371, "learning_rate": 9.599999999999999e-05, "loss": 0.7324, "step": 165 }, { "epoch": 0.06, "grad_norm": 7.758469581604004, "learning_rate": 9.659999999999999e-05, "loss": 1.5962, "step": 166 }, { "epoch": 0.06, "grad_norm": 6.5638041496276855, "learning_rate": 9.719999999999999e-05, "loss": 1.0662, "step": 167 }, { "epoch": 0.06, "grad_norm": 5.04262638092041, "learning_rate": 9.779999999999999e-05, "loss": 1.106, "step": 168 }, { "epoch": 0.06, "grad_norm": 6.401612758636475, "learning_rate": 9.839999999999999e-05, "loss": 1.0118, "step": 169 }, { "epoch": 0.06, "grad_norm": 7.277603626251221, "learning_rate": 9.9e-05, "loss": 1.5725, "step": 170 }, { "epoch": 0.06, "grad_norm": 7.579121112823486, "learning_rate": 9.96e-05, "loss": 1.0403, "step": 171 }, { "epoch": 0.06, "grad_norm": 16.09749412536621, "learning_rate": 0.0001002, "loss": 1.1833, "step": 172 }, { "epoch": 0.06, "grad_norm": 10.291646957397461, "learning_rate": 0.0001008, "loss": 2.9008, "step": 173 }, { "epoch": 0.06, "grad_norm": 7.372079372406006, "learning_rate": 0.0001014, "loss": 0.779, "step": 174 }, { "epoch": 0.06, "grad_norm": 15.55083179473877, "learning_rate": 0.000102, "loss": 1.0366, "step": 175 }, { "epoch": 0.06, "grad_norm": 10.298460006713867, "learning_rate": 0.0001026, "loss": 1.7753, "step": 176 }, { "epoch": 0.06, "grad_norm": 7.792703151702881, "learning_rate": 0.00010319999999999999, "loss": 1.3867, "step": 177 }, { "epoch": 0.06, "grad_norm": 5.284561634063721, "learning_rate": 0.00010379999999999999, "loss": 1.2554, "step": 178 }, { "epoch": 0.06, "grad_norm": 8.849788665771484, "learning_rate": 0.00010439999999999999, "loss": 1.087, "step": 179 }, { "epoch": 0.06, "grad_norm": 5.7863664627075195, "learning_rate": 0.00010499999999999999, "loss": 1.003, "step": 180 }, { "epoch": 0.06, "grad_norm": 6.806325912475586, "learning_rate": 0.00010559999999999998, "loss": 0.7003, "step": 181 }, { "epoch": 0.06, "grad_norm": 8.477960586547852, "learning_rate": 0.00010619999999999998, "loss": 0.9081, "step": 182 }, { "epoch": 0.06, "grad_norm": 7.785075664520264, "learning_rate": 0.00010679999999999998, "loss": 1.1227, "step": 183 }, { "epoch": 0.06, "grad_norm": 6.257397174835205, "learning_rate": 0.00010739999999999998, "loss": 0.8996, "step": 184 }, { "epoch": 0.06, "grad_norm": 8.024823188781738, "learning_rate": 0.00010799999999999998, "loss": 1.2054, "step": 185 }, { "epoch": 0.07, "grad_norm": 7.594734191894531, "learning_rate": 0.00010859999999999998, "loss": 1.2464, "step": 186 }, { "epoch": 0.07, "grad_norm": 9.327756881713867, "learning_rate": 0.00010919999999999998, "loss": 1.1473, "step": 187 }, { "epoch": 0.07, "grad_norm": 15.395828247070312, "learning_rate": 0.00010979999999999999, "loss": 1.7228, "step": 188 }, { "epoch": 0.07, "grad_norm": 8.292728424072266, "learning_rate": 0.00011039999999999999, "loss": 1.3052, "step": 189 }, { "epoch": 0.07, "grad_norm": 6.423582077026367, "learning_rate": 0.00011099999999999999, "loss": 0.7601, "step": 190 }, { "epoch": 0.07, "grad_norm": 8.61807918548584, "learning_rate": 0.00011159999999999999, "loss": 0.6694, "step": 191 }, { "epoch": 0.07, "grad_norm": 10.768646240234375, "learning_rate": 0.00011219999999999999, "loss": 1.2744, "step": 192 }, { "epoch": 0.07, "grad_norm": 6.7909836769104, "learning_rate": 0.00011279999999999999, "loss": 0.8817, "step": 193 }, { "epoch": 0.07, "grad_norm": 9.817007064819336, "learning_rate": 0.00011339999999999999, "loss": 1.3541, "step": 194 }, { "epoch": 0.07, "grad_norm": 6.27230978012085, "learning_rate": 0.00011399999999999999, "loss": 0.5326, "step": 195 }, { "epoch": 0.07, "grad_norm": 9.24206256866455, "learning_rate": 0.0001146, "loss": 0.8079, "step": 196 }, { "epoch": 0.07, "grad_norm": 7.788369655609131, "learning_rate": 0.0001152, "loss": 0.6113, "step": 197 }, { "epoch": 0.07, "grad_norm": 16.040983200073242, "learning_rate": 0.0001158, "loss": 1.1879, "step": 198 }, { "epoch": 0.07, "grad_norm": 8.447440147399902, "learning_rate": 0.0001164, "loss": 1.5101, "step": 199 }, { "epoch": 0.07, "grad_norm": 12.108735084533691, "learning_rate": 0.000117, "loss": 1.9649, "step": 200 }, { "epoch": 0.07, "eval_loss": 1.0153309106826782, "eval_runtime": 50.5715, "eval_samples_per_second": 42.87, "eval_steps_per_second": 10.718, "eval_wer": 0.805137519460301, "step": 200 }, { "epoch": 0.07, "grad_norm": 8.759153366088867, "learning_rate": 0.0001176, "loss": 1.3547, "step": 201 }, { "epoch": 0.07, "grad_norm": 6.493929386138916, "learning_rate": 0.0001182, "loss": 2.0605, "step": 202 }, { "epoch": 0.07, "grad_norm": 6.16959285736084, "learning_rate": 0.0001188, "loss": 1.6697, "step": 203 }, { "epoch": 0.07, "grad_norm": 6.1440348625183105, "learning_rate": 0.0001194, "loss": 1.4012, "step": 204 }, { "epoch": 0.07, "grad_norm": 8.175420761108398, "learning_rate": 0.00011999999999999999, "loss": 1.164, "step": 205 }, { "epoch": 0.07, "grad_norm": 7.481700420379639, "learning_rate": 0.00012059999999999999, "loss": 1.0755, "step": 206 }, { "epoch": 0.07, "grad_norm": 5.793044567108154, "learning_rate": 0.00012119999999999999, "loss": 0.8223, "step": 207 }, { "epoch": 0.07, "grad_norm": 7.168550968170166, "learning_rate": 0.00012179999999999999, "loss": 1.309, "step": 208 }, { "epoch": 0.07, "grad_norm": 9.77776050567627, "learning_rate": 0.0001224, "loss": 1.1608, "step": 209 }, { "epoch": 0.07, "grad_norm": 6.302064418792725, "learning_rate": 0.00012299999999999998, "loss": 0.7877, "step": 210 }, { "epoch": 0.07, "grad_norm": 5.7911152839660645, "learning_rate": 0.0001236, "loss": 0.7164, "step": 211 }, { "epoch": 0.07, "grad_norm": 6.755216121673584, "learning_rate": 0.00012419999999999998, "loss": 1.3267, "step": 212 }, { "epoch": 0.07, "grad_norm": 5.61589241027832, "learning_rate": 0.00012479999999999997, "loss": 1.1473, "step": 213 }, { "epoch": 0.07, "grad_norm": 6.926543712615967, "learning_rate": 0.00012539999999999999, "loss": 1.4776, "step": 214 }, { "epoch": 0.08, "grad_norm": 30.6265869140625, "learning_rate": 0.00012599999999999997, "loss": 1.4803, "step": 215 }, { "epoch": 0.08, "grad_norm": 10.334158897399902, "learning_rate": 0.0001266, "loss": 1.579, "step": 216 }, { "epoch": 0.08, "grad_norm": 8.379430770874023, "learning_rate": 0.00012719999999999997, "loss": 1.0805, "step": 217 }, { "epoch": 0.08, "grad_norm": 5.460320472717285, "learning_rate": 0.0001278, "loss": 0.7705, "step": 218 }, { "epoch": 0.08, "grad_norm": 7.094217777252197, "learning_rate": 0.00012839999999999998, "loss": 1.061, "step": 219 }, { "epoch": 0.08, "grad_norm": 7.368185043334961, "learning_rate": 0.000129, "loss": 1.1021, "step": 220 }, { "epoch": 0.08, "grad_norm": 8.67043685913086, "learning_rate": 0.00012959999999999998, "loss": 1.84, "step": 221 }, { "epoch": 0.08, "grad_norm": 9.372278213500977, "learning_rate": 0.0001302, "loss": 1.2919, "step": 222 }, { "epoch": 0.08, "grad_norm": 10.148419380187988, "learning_rate": 0.00013079999999999998, "loss": 1.3467, "step": 223 }, { "epoch": 0.08, "grad_norm": 9.848054885864258, "learning_rate": 0.0001314, "loss": 1.018, "step": 224 }, { "epoch": 0.08, "grad_norm": 13.819363594055176, "learning_rate": 0.00013199999999999998, "loss": 1.8879, "step": 225 }, { "epoch": 0.08, "grad_norm": 8.070960998535156, "learning_rate": 0.0001326, "loss": 2.0939, "step": 226 }, { "epoch": 0.08, "grad_norm": 7.248965263366699, "learning_rate": 0.00013319999999999999, "loss": 1.4703, "step": 227 }, { "epoch": 0.08, "grad_norm": 6.5253472328186035, "learning_rate": 0.0001338, "loss": 1.6318, "step": 228 }, { "epoch": 0.08, "grad_norm": 8.237483024597168, "learning_rate": 0.0001344, "loss": 1.053, "step": 229 }, { "epoch": 0.08, "grad_norm": 6.78982400894165, "learning_rate": 0.000135, "loss": 1.1394, "step": 230 }, { "epoch": 0.08, "grad_norm": 7.947813510894775, "learning_rate": 0.0001356, "loss": 1.0707, "step": 231 }, { "epoch": 0.08, "grad_norm": 6.178483486175537, "learning_rate": 0.0001362, "loss": 1.246, "step": 232 }, { "epoch": 0.08, "grad_norm": 6.992203712463379, "learning_rate": 0.0001368, "loss": 0.8734, "step": 233 }, { "epoch": 0.08, "grad_norm": 6.598153114318848, "learning_rate": 0.0001374, "loss": 1.5, "step": 234 }, { "epoch": 0.08, "grad_norm": 6.893948078155518, "learning_rate": 0.000138, "loss": 0.902, "step": 235 }, { "epoch": 0.08, "grad_norm": NaN, "learning_rate": 0.000138, "loss": 2.4242, "step": 236 }, { "epoch": 0.08, "grad_norm": 10.769316673278809, "learning_rate": 0.0001386, "loss": 1.7384, "step": 237 }, { "epoch": 0.08, "grad_norm": 8.3101167678833, "learning_rate": 0.0001392, "loss": 0.9501, "step": 238 }, { "epoch": 0.08, "grad_norm": 7.548043251037598, "learning_rate": 0.00013979999999999998, "loss": 0.9354, "step": 239 }, { "epoch": 0.08, "grad_norm": 7.125143527984619, "learning_rate": 0.0001404, "loss": 0.5763, "step": 240 }, { "epoch": 0.08, "grad_norm": 6.7880330085754395, "learning_rate": 0.00014099999999999998, "loss": 0.7926, "step": 241 }, { "epoch": 0.08, "grad_norm": 5.474691867828369, "learning_rate": 0.00014159999999999997, "loss": 0.6569, "step": 242 }, { "epoch": 0.09, "grad_norm": 6.943902492523193, "learning_rate": 0.0001422, "loss": 0.5578, "step": 243 }, { "epoch": 0.09, "grad_norm": 6.652454853057861, "learning_rate": 0.00014279999999999997, "loss": 1.0422, "step": 244 }, { "epoch": 0.09, "grad_norm": 5.662099838256836, "learning_rate": 0.0001434, "loss": 0.9684, "step": 245 }, { "epoch": 0.09, "grad_norm": 8.081048965454102, "learning_rate": 0.00014399999999999998, "loss": 1.0536, "step": 246 }, { "epoch": 0.09, "grad_norm": 6.202698230743408, "learning_rate": 0.0001446, "loss": 0.9664, "step": 247 }, { "epoch": 0.09, "grad_norm": 6.779542922973633, "learning_rate": 0.00014519999999999998, "loss": 1.0268, "step": 248 }, { "epoch": 0.09, "grad_norm": 11.181925773620605, "learning_rate": 0.0001458, "loss": 1.184, "step": 249 }, { "epoch": 0.09, "grad_norm": 11.750948905944824, "learning_rate": 0.00014639999999999998, "loss": 1.8029, "step": 250 }, { "epoch": 0.09, "grad_norm": 14.2088041305542, "learning_rate": 0.000147, "loss": 2.2772, "step": 251 }, { "epoch": 0.09, "grad_norm": 7.257857322692871, "learning_rate": 0.00014759999999999998, "loss": 1.2177, "step": 252 }, { "epoch": 0.09, "grad_norm": 5.847878456115723, "learning_rate": 0.0001482, "loss": 1.4445, "step": 253 }, { "epoch": 0.09, "grad_norm": 5.302583694458008, "learning_rate": 0.00014879999999999998, "loss": 1.1606, "step": 254 }, { "epoch": 0.09, "grad_norm": 5.7541680335998535, "learning_rate": 0.0001494, "loss": 1.2038, "step": 255 }, { "epoch": 0.09, "grad_norm": 4.664234638214111, "learning_rate": 0.00015, "loss": 1.3001, "step": 256 }, { "epoch": 0.09, "grad_norm": 5.489469051361084, "learning_rate": 0.00015059999999999997, "loss": 0.766, "step": 257 }, { "epoch": 0.09, "grad_norm": 9.820629119873047, "learning_rate": 0.0001512, "loss": 0.6067, "step": 258 }, { "epoch": 0.09, "grad_norm": 6.636170864105225, "learning_rate": 0.00015179999999999998, "loss": 0.9712, "step": 259 }, { "epoch": 0.09, "grad_norm": 4.902698993682861, "learning_rate": 0.0001524, "loss": 0.8656, "step": 260 }, { "epoch": 0.09, "grad_norm": 12.261871337890625, "learning_rate": 0.00015299999999999998, "loss": 1.3325, "step": 261 }, { "epoch": 0.09, "grad_norm": 10.92815113067627, "learning_rate": 0.0001536, "loss": 1.4591, "step": 262 }, { "epoch": 0.09, "grad_norm": 5.674909591674805, "learning_rate": 0.00015419999999999998, "loss": 0.5967, "step": 263 }, { "epoch": 0.09, "grad_norm": 7.12188196182251, "learning_rate": 0.0001548, "loss": 0.5938, "step": 264 }, { "epoch": 0.09, "grad_norm": 7.955028057098389, "learning_rate": 0.00015539999999999998, "loss": 0.7309, "step": 265 }, { "epoch": 0.09, "grad_norm": 8.135099411010742, "learning_rate": 0.000156, "loss": 0.9604, "step": 266 }, { "epoch": 0.09, "grad_norm": 17.383867263793945, "learning_rate": 0.00015659999999999998, "loss": 0.966, "step": 267 }, { "epoch": 0.09, "grad_norm": 7.752403736114502, "learning_rate": 0.0001572, "loss": 1.5528, "step": 268 }, { "epoch": 0.09, "grad_norm": 9.291197776794434, "learning_rate": 0.0001578, "loss": 0.7933, "step": 269 }, { "epoch": 0.09, "grad_norm": 7.345749855041504, "learning_rate": 0.0001584, "loss": 0.7673, "step": 270 }, { "epoch": 0.09, "grad_norm": 4.077545166015625, "learning_rate": 0.000159, "loss": 0.4076, "step": 271 }, { "epoch": 0.1, "grad_norm": 7.044876575469971, "learning_rate": 0.0001596, "loss": 2.2965, "step": 272 }, { "epoch": 0.1, "grad_norm": 8.072477340698242, "learning_rate": 0.0001602, "loss": 1.0095, "step": 273 }, { "epoch": 0.1, "grad_norm": 7.533301830291748, "learning_rate": 0.0001608, "loss": 1.6276, "step": 274 }, { "epoch": 0.1, "grad_norm": 10.649826049804688, "learning_rate": 0.0001614, "loss": 1.1563, "step": 275 }, { "epoch": 0.1, "grad_norm": 6.723114013671875, "learning_rate": 0.000162, "loss": 1.5564, "step": 276 }, { "epoch": 0.1, "grad_norm": 4.508934497833252, "learning_rate": 0.0001626, "loss": 1.1316, "step": 277 }, { "epoch": 0.1, "grad_norm": 6.111855983734131, "learning_rate": 0.0001632, "loss": 1.0925, "step": 278 }, { "epoch": 0.1, "grad_norm": 7.4623613357543945, "learning_rate": 0.0001638, "loss": 0.7827, "step": 279 }, { "epoch": 0.1, "grad_norm": 4.680227756500244, "learning_rate": 0.0001644, "loss": 0.9112, "step": 280 }, { "epoch": 0.1, "grad_norm": 11.717487335205078, "learning_rate": 0.000165, "loss": 1.1662, "step": 281 }, { "epoch": 0.1, "grad_norm": 4.690805912017822, "learning_rate": 0.0001656, "loss": 1.0872, "step": 282 }, { "epoch": 0.1, "grad_norm": 8.276322364807129, "learning_rate": 0.0001662, "loss": 1.0738, "step": 283 }, { "epoch": 0.1, "grad_norm": 5.989705562591553, "learning_rate": 0.0001668, "loss": 1.2949, "step": 284 }, { "epoch": 0.1, "grad_norm": 5.663930416107178, "learning_rate": 0.0001674, "loss": 0.5814, "step": 285 }, { "epoch": 0.1, "grad_norm": 6.9047698974609375, "learning_rate": 0.000168, "loss": 1.0676, "step": 286 }, { "epoch": 0.1, "grad_norm": 7.110291957855225, "learning_rate": 0.0001686, "loss": 1.227, "step": 287 }, { "epoch": 0.1, "grad_norm": 8.15951156616211, "learning_rate": 0.00016919999999999997, "loss": 0.9983, "step": 288 }, { "epoch": 0.1, "grad_norm": 8.399914741516113, "learning_rate": 0.00016979999999999998, "loss": 0.4559, "step": 289 }, { "epoch": 0.1, "grad_norm": 7.747352123260498, "learning_rate": 0.00017039999999999997, "loss": 1.0402, "step": 290 }, { "epoch": 0.1, "grad_norm": 5.47703218460083, "learning_rate": 0.00017099999999999998, "loss": 1.167, "step": 291 }, { "epoch": 0.1, "grad_norm": 6.136383533477783, "learning_rate": 0.00017159999999999997, "loss": 0.7194, "step": 292 }, { "epoch": 0.1, "grad_norm": 6.892377853393555, "learning_rate": 0.00017219999999999998, "loss": 0.9781, "step": 293 }, { "epoch": 0.1, "grad_norm": 14.462456703186035, "learning_rate": 0.00017279999999999997, "loss": 0.7622, "step": 294 }, { "epoch": 0.1, "grad_norm": 7.86848783493042, "learning_rate": 0.00017339999999999996, "loss": 1.2177, "step": 295 }, { "epoch": 0.1, "grad_norm": 7.074151992797852, "learning_rate": 0.00017399999999999997, "loss": 1.0429, "step": 296 }, { "epoch": 0.1, "grad_norm": 6.4072675704956055, "learning_rate": 0.00017459999999999996, "loss": 0.6744, "step": 297 }, { "epoch": 0.1, "grad_norm": 12.912552833557129, "learning_rate": 0.00017519999999999998, "loss": 0.888, "step": 298 }, { "epoch": 0.1, "grad_norm": 10.346461296081543, "learning_rate": 0.00017579999999999996, "loss": 0.7609, "step": 299 }, { "epoch": 0.11, "grad_norm": 8.770968437194824, "learning_rate": 0.00017639999999999998, "loss": 0.905, "step": 300 }, { "epoch": 0.11, "eval_loss": 1.0725961923599243, "eval_runtime": 51.0006, "eval_samples_per_second": 42.509, "eval_steps_per_second": 10.627, "eval_wer": 0.8373983739837398, "step": 300 }, { "epoch": 0.11, "grad_norm": 6.282247066497803, "learning_rate": 0.00017699999999999997, "loss": 1.1397, "step": 301 }, { "epoch": 0.11, "grad_norm": 4.363534927368164, "learning_rate": 0.00017759999999999998, "loss": 0.9052, "step": 302 }, { "epoch": 0.11, "grad_norm": 7.095592021942139, "learning_rate": 0.00017819999999999997, "loss": 0.7081, "step": 303 }, { "epoch": 0.11, "grad_norm": 6.591117858886719, "learning_rate": 0.00017879999999999998, "loss": 1.198, "step": 304 }, { "epoch": 0.11, "grad_norm": 7.835860252380371, "learning_rate": 0.00017939999999999997, "loss": 1.4741, "step": 305 }, { "epoch": 0.11, "grad_norm": 5.496611595153809, "learning_rate": 0.00017999999999999998, "loss": 0.7443, "step": 306 }, { "epoch": 0.11, "grad_norm": 5.134020805358887, "learning_rate": 0.00018059999999999997, "loss": 0.6569, "step": 307 }, { "epoch": 0.11, "grad_norm": 4.844152450561523, "learning_rate": 0.00018119999999999999, "loss": 1.3045, "step": 308 }, { "epoch": 0.11, "grad_norm": 4.551060676574707, "learning_rate": 0.00018179999999999997, "loss": 0.564, "step": 309 }, { "epoch": 0.11, "grad_norm": 6.43617582321167, "learning_rate": 0.0001824, "loss": 1.0049, "step": 310 }, { "epoch": 0.11, "grad_norm": 5.678737640380859, "learning_rate": 0.00018299999999999998, "loss": 0.5783, "step": 311 }, { "epoch": 0.11, "grad_norm": 4.169671058654785, "learning_rate": 0.0001836, "loss": 0.6863, "step": 312 }, { "epoch": 0.11, "grad_norm": 8.256979942321777, "learning_rate": 0.00018419999999999998, "loss": 1.0964, "step": 313 }, { "epoch": 0.11, "grad_norm": 8.771745681762695, "learning_rate": 0.0001848, "loss": 1.2424, "step": 314 }, { "epoch": 0.11, "grad_norm": 7.281775951385498, "learning_rate": 0.00018539999999999998, "loss": 2.047, "step": 315 }, { "epoch": 0.11, "grad_norm": 5.94806432723999, "learning_rate": 0.000186, "loss": 1.0021, "step": 316 }, { "epoch": 0.11, "grad_norm": 11.512472152709961, "learning_rate": 0.00018659999999999998, "loss": 1.3445, "step": 317 }, { "epoch": 0.11, "grad_norm": 5.457605838775635, "learning_rate": 0.0001872, "loss": 0.9049, "step": 318 }, { "epoch": 0.11, "grad_norm": 7.318243026733398, "learning_rate": 0.00018779999999999998, "loss": 1.0203, "step": 319 }, { "epoch": 0.11, "grad_norm": 6.3816118240356445, "learning_rate": 0.00018839999999999997, "loss": 1.0586, "step": 320 }, { "epoch": 0.11, "grad_norm": 6.394790172576904, "learning_rate": 0.00018899999999999999, "loss": 0.9951, "step": 321 }, { "epoch": 0.11, "grad_norm": 8.764740943908691, "learning_rate": 0.00018959999999999997, "loss": 0.9491, "step": 322 }, { "epoch": 0.11, "grad_norm": 7.136832237243652, "learning_rate": 0.0001902, "loss": 1.068, "step": 323 }, { "epoch": 0.11, "grad_norm": 18.01349639892578, "learning_rate": 0.00019079999999999998, "loss": 1.7603, "step": 324 }, { "epoch": 0.11, "grad_norm": 7.3441386222839355, "learning_rate": 0.0001914, "loss": 0.8072, "step": 325 }, { "epoch": 0.11, "grad_norm": 7.931651592254639, "learning_rate": 0.00019199999999999998, "loss": 1.7458, "step": 326 }, { "epoch": 0.11, "grad_norm": 6.381895542144775, "learning_rate": 0.0001926, "loss": 1.2449, "step": 327 }, { "epoch": 0.11, "grad_norm": 6.183620452880859, "learning_rate": 0.00019319999999999998, "loss": 0.9892, "step": 328 }, { "epoch": 0.12, "grad_norm": 5.490755558013916, "learning_rate": 0.0001938, "loss": 1.1006, "step": 329 }, { "epoch": 0.12, "grad_norm": 5.9131011962890625, "learning_rate": 0.00019439999999999998, "loss": 1.2143, "step": 330 }, { "epoch": 0.12, "grad_norm": 4.422755718231201, "learning_rate": 0.000195, "loss": 1.1383, "step": 331 }, { "epoch": 0.12, "grad_norm": 4.351710796356201, "learning_rate": 0.00019559999999999998, "loss": 0.9472, "step": 332 }, { "epoch": 0.12, "grad_norm": 4.6042375564575195, "learning_rate": 0.0001962, "loss": 0.5255, "step": 333 }, { "epoch": 0.12, "grad_norm": 4.7591094970703125, "learning_rate": 0.00019679999999999999, "loss": 1.0898, "step": 334 }, { "epoch": 0.12, "grad_norm": 17.369613647460938, "learning_rate": 0.0001974, "loss": 0.8401, "step": 335 }, { "epoch": 0.12, "grad_norm": 8.167240142822266, "learning_rate": 0.000198, "loss": 1.9533, "step": 336 }, { "epoch": 0.12, "grad_norm": 7.36649227142334, "learning_rate": 0.0001986, "loss": 1.0694, "step": 337 }, { "epoch": 0.12, "grad_norm": 18.003772735595703, "learning_rate": 0.0001992, "loss": 0.8262, "step": 338 }, { "epoch": 0.12, "grad_norm": 7.110168933868408, "learning_rate": 0.0001998, "loss": 1.0649, "step": 339 }, { "epoch": 0.12, "grad_norm": 11.043972969055176, "learning_rate": 0.0002004, "loss": 0.9792, "step": 340 }, { "epoch": 0.12, "grad_norm": 4.065829753875732, "learning_rate": 0.000201, "loss": 0.5039, "step": 341 }, { "epoch": 0.12, "grad_norm": 8.094096183776855, "learning_rate": 0.0002016, "loss": 0.9588, "step": 342 }, { "epoch": 0.12, "grad_norm": 9.9978609085083, "learning_rate": 0.0002022, "loss": 1.2725, "step": 343 }, { "epoch": 0.12, "grad_norm": 13.194984436035156, "learning_rate": 0.0002028, "loss": 1.3617, "step": 344 }, { "epoch": 0.12, "grad_norm": 8.760825157165527, "learning_rate": 0.00020339999999999998, "loss": 1.0096, "step": 345 }, { "epoch": 0.12, "grad_norm": 13.242958068847656, "learning_rate": 0.000204, "loss": 1.2532, "step": 346 }, { "epoch": 0.12, "grad_norm": 10.69446086883545, "learning_rate": 0.00020459999999999999, "loss": 0.9107, "step": 347 }, { "epoch": 0.12, "grad_norm": 14.089339256286621, "learning_rate": 0.0002052, "loss": 1.0958, "step": 348 }, { "epoch": 0.12, "grad_norm": 10.158014297485352, "learning_rate": 0.0002058, "loss": 1.3443, "step": 349 }, { "epoch": 0.12, "grad_norm": 277.1723937988281, "learning_rate": 0.00020639999999999998, "loss": 1.2569, "step": 350 }, { "epoch": 0.12, "grad_norm": 15.648301124572754, "learning_rate": 0.00020699999999999996, "loss": 2.473, "step": 351 }, { "epoch": 0.12, "grad_norm": 5.679229259490967, "learning_rate": 0.00020759999999999998, "loss": 1.3066, "step": 352 }, { "epoch": 0.12, "grad_norm": 6.558560371398926, "learning_rate": 0.00020819999999999996, "loss": 1.0691, "step": 353 }, { "epoch": 0.12, "grad_norm": 3.9980814456939697, "learning_rate": 0.00020879999999999998, "loss": 0.9881, "step": 354 }, { "epoch": 0.12, "grad_norm": 4.269549369812012, "learning_rate": 0.00020939999999999997, "loss": 0.9437, "step": 355 }, { "epoch": 0.12, "grad_norm": 5.434672832489014, "learning_rate": 0.00020999999999999998, "loss": 1.139, "step": 356 }, { "epoch": 0.13, "grad_norm": 3.682892084121704, "learning_rate": 0.00021059999999999997, "loss": 0.9274, "step": 357 }, { "epoch": 0.13, "grad_norm": 4.203353404998779, "learning_rate": 0.00021119999999999996, "loss": 0.9735, "step": 358 }, { "epoch": 0.13, "grad_norm": 4.374783039093018, "learning_rate": 0.00021179999999999997, "loss": 0.7179, "step": 359 }, { "epoch": 0.13, "grad_norm": 6.496840476989746, "learning_rate": 0.00021239999999999996, "loss": 0.6335, "step": 360 }, { "epoch": 0.13, "grad_norm": 4.820166110992432, "learning_rate": 0.00021299999999999997, "loss": 0.7724, "step": 361 }, { "epoch": 0.13, "grad_norm": 4.941391468048096, "learning_rate": 0.00021359999999999996, "loss": 0.6864, "step": 362 }, { "epoch": 0.13, "grad_norm": 4.761062145233154, "learning_rate": 0.00021419999999999998, "loss": 1.3754, "step": 363 }, { "epoch": 0.13, "grad_norm": 8.317700386047363, "learning_rate": 0.00021479999999999996, "loss": 1.2307, "step": 364 }, { "epoch": 0.13, "grad_norm": 7.029150009155273, "learning_rate": 0.00021539999999999998, "loss": 0.7201, "step": 365 }, { "epoch": 0.13, "grad_norm": 11.40591812133789, "learning_rate": 0.00021599999999999996, "loss": 1.1349, "step": 366 }, { "epoch": 0.13, "grad_norm": 7.807318687438965, "learning_rate": 0.00021659999999999998, "loss": 1.4664, "step": 367 }, { "epoch": 0.13, "grad_norm": 7.696564197540283, "learning_rate": 0.00021719999999999997, "loss": 0.9169, "step": 368 }, { "epoch": 0.13, "grad_norm": 6.1474151611328125, "learning_rate": 0.00021779999999999998, "loss": 1.0351, "step": 369 }, { "epoch": 0.13, "grad_norm": 4.485285758972168, "learning_rate": 0.00021839999999999997, "loss": 0.5278, "step": 370 }, { "epoch": 0.13, "grad_norm": 4.146036624908447, "learning_rate": 0.00021899999999999998, "loss": 0.6379, "step": 371 }, { "epoch": 0.13, "grad_norm": 7.9090800285339355, "learning_rate": 0.00021959999999999997, "loss": 1.071, "step": 372 }, { "epoch": 0.13, "grad_norm": 9.113584518432617, "learning_rate": 0.00022019999999999999, "loss": 0.8886, "step": 373 }, { "epoch": 0.13, "grad_norm": 6.609561920166016, "learning_rate": 0.00022079999999999997, "loss": 0.7262, "step": 374 }, { "epoch": 0.13, "grad_norm": 8.190186500549316, "learning_rate": 0.0002214, "loss": 0.7478, "step": 375 }, { "epoch": 0.13, "grad_norm": 6.347649097442627, "learning_rate": 0.00022199999999999998, "loss": 1.5713, "step": 376 }, { "epoch": 0.13, "grad_norm": 6.180627346038818, "learning_rate": 0.0002226, "loss": 1.8954, "step": 377 }, { "epoch": 0.13, "grad_norm": 5.399271488189697, "learning_rate": 0.00022319999999999998, "loss": 1.382, "step": 378 }, { "epoch": 0.13, "grad_norm": 6.500628471374512, "learning_rate": 0.0002238, "loss": 1.1642, "step": 379 }, { "epoch": 0.13, "grad_norm": 8.18594741821289, "learning_rate": 0.00022439999999999998, "loss": 1.3459, "step": 380 }, { "epoch": 0.13, "grad_norm": 3.980257272720337, "learning_rate": 0.000225, "loss": 0.6223, "step": 381 }, { "epoch": 0.13, "grad_norm": 5.482236385345459, "learning_rate": 0.00022559999999999998, "loss": 1.2836, "step": 382 }, { "epoch": 0.13, "grad_norm": 4.334828853607178, "learning_rate": 0.00022619999999999997, "loss": 0.9142, "step": 383 }, { "epoch": 0.13, "grad_norm": 3.967797040939331, "learning_rate": 0.00022679999999999998, "loss": 0.5278, "step": 384 }, { "epoch": 0.13, "grad_norm": 4.172309398651123, "learning_rate": 0.00022739999999999997, "loss": 0.6968, "step": 385 }, { "epoch": 0.14, "grad_norm": 3.380971670150757, "learning_rate": 0.00022799999999999999, "loss": 0.6464, "step": 386 }, { "epoch": 0.14, "grad_norm": 7.23440408706665, "learning_rate": 0.00022859999999999997, "loss": 1.8373, "step": 387 }, { "epoch": 0.14, "grad_norm": 6.774138450622559, "learning_rate": 0.0002292, "loss": 1.0644, "step": 388 }, { "epoch": 0.14, "grad_norm": 8.141240119934082, "learning_rate": 0.00022979999999999997, "loss": 1.4621, "step": 389 }, { "epoch": 0.14, "grad_norm": 8.437649726867676, "learning_rate": 0.0002304, "loss": 1.0165, "step": 390 }, { "epoch": 0.14, "grad_norm": 6.1060791015625, "learning_rate": 0.00023099999999999998, "loss": 1.5574, "step": 391 }, { "epoch": 0.14, "grad_norm": 6.36237907409668, "learning_rate": 0.0002316, "loss": 0.8218, "step": 392 }, { "epoch": 0.14, "grad_norm": 7.044564247131348, "learning_rate": 0.00023219999999999998, "loss": 1.0601, "step": 393 }, { "epoch": 0.14, "grad_norm": 4.057678699493408, "learning_rate": 0.0002328, "loss": 0.8001, "step": 394 }, { "epoch": 0.14, "grad_norm": 6.342691421508789, "learning_rate": 0.00023339999999999998, "loss": 0.5296, "step": 395 }, { "epoch": 0.14, "grad_norm": 7.52527379989624, "learning_rate": 0.000234, "loss": 0.7155, "step": 396 }, { "epoch": 0.14, "grad_norm": 8.385841369628906, "learning_rate": 0.00023459999999999998, "loss": 0.7998, "step": 397 }, { "epoch": 0.14, "grad_norm": 8.12092399597168, "learning_rate": 0.0002352, "loss": 1.0234, "step": 398 }, { "epoch": 0.14, "grad_norm": 14.459668159484863, "learning_rate": 0.00023579999999999999, "loss": 0.457, "step": 399 }, { "epoch": 0.14, "grad_norm": 9.308672904968262, "learning_rate": 0.0002364, "loss": 2.0688, "step": 400 }, { "epoch": 0.14, "eval_loss": 1.1829124689102173, "eval_runtime": 50.8154, "eval_samples_per_second": 42.664, "eval_steps_per_second": 10.666, "eval_wer": 0.8181975436775645, "step": 400 }, { "epoch": 0.14, "grad_norm": 4.8363261222839355, "learning_rate": 0.000237, "loss": 1.5778, "step": 401 }, { "epoch": 0.14, "grad_norm": 4.74521541595459, "learning_rate": 0.0002376, "loss": 0.9614, "step": 402 }, { "epoch": 0.14, "grad_norm": 4.534332752227783, "learning_rate": 0.0002382, "loss": 0.8923, "step": 403 }, { "epoch": 0.14, "grad_norm": 5.4116997718811035, "learning_rate": 0.0002388, "loss": 1.2341, "step": 404 }, { "epoch": 0.14, "grad_norm": 6.503213882446289, "learning_rate": 0.0002394, "loss": 1.2941, "step": 405 }, { "epoch": 0.14, "grad_norm": 7.444057941436768, "learning_rate": 0.00023999999999999998, "loss": 0.8984, "step": 406 }, { "epoch": 0.14, "grad_norm": 5.511045932769775, "learning_rate": 0.0002406, "loss": 0.7948, "step": 407 }, { "epoch": 0.14, "grad_norm": 7.183623313903809, "learning_rate": 0.00024119999999999998, "loss": 1.095, "step": 408 }, { "epoch": 0.14, "grad_norm": 6.442593574523926, "learning_rate": 0.0002418, "loss": 1.6099, "step": 409 }, { "epoch": 0.14, "grad_norm": 3.9881575107574463, "learning_rate": 0.00024239999999999998, "loss": 0.8797, "step": 410 }, { "epoch": 0.14, "grad_norm": 10.62885570526123, "learning_rate": 0.000243, "loss": 1.0199, "step": 411 }, { "epoch": 0.14, "grad_norm": 7.63018798828125, "learning_rate": 0.00024359999999999999, "loss": 0.9954, "step": 412 }, { "epoch": 0.14, "grad_norm": 6.816760540008545, "learning_rate": 0.00024419999999999997, "loss": 0.9977, "step": 413 }, { "epoch": 0.15, "grad_norm": 15.825918197631836, "learning_rate": 0.0002448, "loss": 3.4223, "step": 414 }, { "epoch": 0.15, "grad_norm": 5.994204998016357, "learning_rate": 0.00024539999999999995, "loss": 1.4219, "step": 415 }, { "epoch": 0.15, "grad_norm": 7.240597248077393, "learning_rate": 0.00024599999999999996, "loss": 1.4543, "step": 416 }, { "epoch": 0.15, "grad_norm": 4.940434455871582, "learning_rate": 0.0002466, "loss": 0.6018, "step": 417 }, { "epoch": 0.15, "grad_norm": 10.660022735595703, "learning_rate": 0.0002472, "loss": 1.586, "step": 418 }, { "epoch": 0.15, "grad_norm": 6.618219375610352, "learning_rate": 0.00024779999999999995, "loss": 1.0557, "step": 419 }, { "epoch": 0.15, "grad_norm": 5.79208517074585, "learning_rate": 0.00024839999999999997, "loss": 0.8003, "step": 420 }, { "epoch": 0.15, "grad_norm": 5.461696147918701, "learning_rate": 0.000249, "loss": 1.1184, "step": 421 }, { "epoch": 0.15, "grad_norm": 4.876696586608887, "learning_rate": 0.00024959999999999994, "loss": 0.8481, "step": 422 }, { "epoch": 0.15, "grad_norm": 12.002443313598633, "learning_rate": 0.00025019999999999996, "loss": 1.8712, "step": 423 }, { "epoch": 0.15, "grad_norm": 6.414361476898193, "learning_rate": 0.00025079999999999997, "loss": 1.0893, "step": 424 }, { "epoch": 0.15, "grad_norm": NaN, "learning_rate": 0.00025079999999999997, "loss": 0.2905, "step": 425 }, { "epoch": 0.15, "grad_norm": 5.970865249633789, "learning_rate": 0.0002514, "loss": 1.5082, "step": 426 }, { "epoch": 0.15, "grad_norm": 5.599332332611084, "learning_rate": 0.00025199999999999995, "loss": 1.8596, "step": 427 }, { "epoch": 0.15, "grad_norm": 5.235030651092529, "learning_rate": 0.00025259999999999996, "loss": 1.3534, "step": 428 }, { "epoch": 0.15, "grad_norm": 5.113259792327881, "learning_rate": 0.0002532, "loss": 1.1064, "step": 429 }, { "epoch": 0.15, "grad_norm": 4.495222091674805, "learning_rate": 0.0002538, "loss": 1.2008, "step": 430 }, { "epoch": 0.15, "grad_norm": 4.514638900756836, "learning_rate": 0.00025439999999999995, "loss": 1.0261, "step": 431 }, { "epoch": 0.15, "grad_norm": 5.7902960777282715, "learning_rate": 0.00025499999999999996, "loss": 1.0724, "step": 432 }, { "epoch": 0.15, "grad_norm": 8.044351577758789, "learning_rate": 0.0002556, "loss": 1.1708, "step": 433 }, { "epoch": 0.15, "grad_norm": 5.15432071685791, "learning_rate": 0.0002562, "loss": 1.1937, "step": 434 }, { "epoch": 0.15, "grad_norm": 7.8752593994140625, "learning_rate": 0.00025679999999999995, "loss": 0.8926, "step": 435 }, { "epoch": 0.15, "grad_norm": 11.0122652053833, "learning_rate": 0.00025739999999999997, "loss": 0.9854, "step": 436 }, { "epoch": 0.15, "grad_norm": 6.583104133605957, "learning_rate": 0.000258, "loss": 1.3097, "step": 437 }, { "epoch": 0.15, "grad_norm": 5.033292770385742, "learning_rate": 0.0002586, "loss": 1.1236, "step": 438 }, { "epoch": 0.15, "grad_norm": 8.683174133300781, "learning_rate": 0.00025919999999999996, "loss": 1.2291, "step": 439 }, { "epoch": 0.15, "grad_norm": 5.946511268615723, "learning_rate": 0.00025979999999999997, "loss": 0.9983, "step": 440 }, { "epoch": 0.15, "grad_norm": 12.835111618041992, "learning_rate": 0.0002604, "loss": 1.512, "step": 441 }, { "epoch": 0.15, "grad_norm": 8.656193733215332, "learning_rate": 0.000261, "loss": 0.9368, "step": 442 }, { "epoch": 0.16, "grad_norm": 7.563745498657227, "learning_rate": 0.00026159999999999996, "loss": 1.2435, "step": 443 }, { "epoch": 0.16, "grad_norm": 6.136525630950928, "learning_rate": 0.0002622, "loss": 0.4889, "step": 444 }, { "epoch": 0.16, "grad_norm": 5.84512996673584, "learning_rate": 0.0002628, "loss": 0.8539, "step": 445 }, { "epoch": 0.16, "grad_norm": 6.192235946655273, "learning_rate": 0.00026339999999999995, "loss": 1.1632, "step": 446 }, { "epoch": 0.16, "grad_norm": 9.75316333770752, "learning_rate": 0.00026399999999999997, "loss": 1.2679, "step": 447 }, { "epoch": 0.16, "grad_norm": 5.024916648864746, "learning_rate": 0.0002646, "loss": 1.1505, "step": 448 }, { "epoch": 0.16, "grad_norm": 7.302039623260498, "learning_rate": 0.0002652, "loss": 1.0232, "step": 449 }, { "epoch": 0.16, "grad_norm": 5.526060104370117, "learning_rate": 0.00026579999999999996, "loss": 0.9171, "step": 450 }, { "epoch": 0.16, "grad_norm": 4.972250938415527, "learning_rate": 0.00026639999999999997, "loss": 1.7666, "step": 451 }, { "epoch": 0.16, "grad_norm": 5.7761054039001465, "learning_rate": 0.000267, "loss": 1.3188, "step": 452 }, { "epoch": 0.16, "grad_norm": 4.936556339263916, "learning_rate": 0.0002676, "loss": 0.8506, "step": 453 }, { "epoch": 0.16, "grad_norm": 4.347964286804199, "learning_rate": 0.00026819999999999996, "loss": 1.0834, "step": 454 }, { "epoch": 0.16, "grad_norm": 3.9066038131713867, "learning_rate": 0.0002688, "loss": 1.0587, "step": 455 }, { "epoch": 0.16, "grad_norm": 10.331660270690918, "learning_rate": 0.0002694, "loss": 1.6772, "step": 456 }, { "epoch": 0.16, "grad_norm": 6.844241142272949, "learning_rate": 0.00027, "loss": 1.0441, "step": 457 }, { "epoch": 0.16, "grad_norm": 7.055009365081787, "learning_rate": 0.00027059999999999996, "loss": 1.4951, "step": 458 }, { "epoch": 0.16, "grad_norm": 5.321764945983887, "learning_rate": 0.0002712, "loss": 0.9027, "step": 459 }, { "epoch": 0.16, "grad_norm": 6.612902641296387, "learning_rate": 0.0002718, "loss": 0.9197, "step": 460 }, { "epoch": 0.16, "grad_norm": 7.686680316925049, "learning_rate": 0.0002724, "loss": 0.8149, "step": 461 }, { "epoch": 0.16, "grad_norm": 5.893856048583984, "learning_rate": 0.00027299999999999997, "loss": 0.7485, "step": 462 }, { "epoch": 0.16, "grad_norm": 5.316108703613281, "learning_rate": 0.0002736, "loss": 0.8778, "step": 463 }, { "epoch": 0.16, "grad_norm": 8.641250610351562, "learning_rate": 0.0002742, "loss": 1.4959, "step": 464 }, { "epoch": 0.16, "grad_norm": 6.9227447509765625, "learning_rate": 0.0002748, "loss": 0.9793, "step": 465 }, { "epoch": 0.16, "grad_norm": 17.23862075805664, "learning_rate": 0.00027539999999999997, "loss": 1.0452, "step": 466 }, { "epoch": 0.16, "grad_norm": 6.343098163604736, "learning_rate": 0.000276, "loss": 1.271, "step": 467 }, { "epoch": 0.16, "grad_norm": 9.62498664855957, "learning_rate": 0.0002766, "loss": 0.9764, "step": 468 }, { "epoch": 0.16, "grad_norm": 7.0901336669921875, "learning_rate": 0.0002772, "loss": 1.3291, "step": 469 }, { "epoch": 0.16, "grad_norm": 8.628947257995605, "learning_rate": 0.0002778, "loss": 1.2843, "step": 470 }, { "epoch": 0.17, "grad_norm": 6.520139217376709, "learning_rate": 0.0002784, "loss": 0.8886, "step": 471 }, { "epoch": 0.17, "grad_norm": 12.442567825317383, "learning_rate": 0.000279, "loss": 1.1537, "step": 472 }, { "epoch": 0.17, "grad_norm": 6.3325347900390625, "learning_rate": 0.00027959999999999997, "loss": 1.3338, "step": 473 }, { "epoch": 0.17, "grad_norm": 7.008511066436768, "learning_rate": 0.0002802, "loss": 1.2572, "step": 474 }, { "epoch": 0.17, "grad_norm": 10.975461959838867, "learning_rate": 0.0002808, "loss": 1.2421, "step": 475 }, { "epoch": 0.17, "grad_norm": 6.61250114440918, "learning_rate": 0.00028139999999999996, "loss": 2.1506, "step": 476 }, { "epoch": 0.17, "grad_norm": 4.0594916343688965, "learning_rate": 0.00028199999999999997, "loss": 1.5596, "step": 477 }, { "epoch": 0.17, "grad_norm": 4.184998989105225, "learning_rate": 0.0002826, "loss": 1.3744, "step": 478 }, { "epoch": 0.17, "grad_norm": 5.103187084197998, "learning_rate": 0.00028319999999999994, "loss": 0.8721, "step": 479 }, { "epoch": 0.17, "grad_norm": 5.862779140472412, "learning_rate": 0.00028379999999999996, "loss": 1.2812, "step": 480 }, { "epoch": 0.17, "grad_norm": 4.681532859802246, "learning_rate": 0.0002844, "loss": 0.7297, "step": 481 }, { "epoch": 0.17, "grad_norm": 3.8014369010925293, "learning_rate": 0.000285, "loss": 0.6603, "step": 482 }, { "epoch": 0.17, "grad_norm": 6.400700092315674, "learning_rate": 0.00028559999999999995, "loss": 0.8617, "step": 483 }, { "epoch": 0.17, "grad_norm": 7.437506675720215, "learning_rate": 0.00028619999999999996, "loss": 0.9853, "step": 484 }, { "epoch": 0.17, "grad_norm": 7.865484237670898, "learning_rate": 0.0002868, "loss": 1.0431, "step": 485 }, { "epoch": 0.17, "grad_norm": 11.700081825256348, "learning_rate": 0.00028739999999999994, "loss": 2.4413, "step": 486 }, { "epoch": 0.17, "grad_norm": 6.08707332611084, "learning_rate": 0.00028799999999999995, "loss": 1.2467, "step": 487 }, { "epoch": 0.17, "grad_norm": 5.360887050628662, "learning_rate": 0.00028859999999999997, "loss": 1.1013, "step": 488 }, { "epoch": 0.17, "grad_norm": 5.402688980102539, "learning_rate": 0.0002892, "loss": 1.5754, "step": 489 }, { "epoch": 0.17, "grad_norm": 5.761510848999023, "learning_rate": 0.00028979999999999994, "loss": 1.1612, "step": 490 }, { "epoch": 0.17, "grad_norm": 4.469000339508057, "learning_rate": 0.00029039999999999996, "loss": 0.9344, "step": 491 }, { "epoch": 0.17, "grad_norm": 8.59809684753418, "learning_rate": 0.00029099999999999997, "loss": 1.902, "step": 492 }, { "epoch": 0.17, "grad_norm": 10.263930320739746, "learning_rate": 0.0002916, "loss": 0.6227, "step": 493 }, { "epoch": 0.17, "grad_norm": 5.8753981590271, "learning_rate": 0.00029219999999999995, "loss": 0.4528, "step": 494 }, { "epoch": 0.17, "grad_norm": 6.5920562744140625, "learning_rate": 0.00029279999999999996, "loss": 0.6706, "step": 495 }, { "epoch": 0.17, "grad_norm": 10.204047203063965, "learning_rate": 0.0002934, "loss": 1.4762, "step": 496 }, { "epoch": 0.17, "grad_norm": 14.272616386413574, "learning_rate": 0.000294, "loss": 2.0472, "step": 497 }, { "epoch": 0.17, "grad_norm": 8.78574275970459, "learning_rate": 0.00029459999999999995, "loss": 1.101, "step": 498 }, { "epoch": 0.17, "grad_norm": 6.776339530944824, "learning_rate": 0.00029519999999999997, "loss": 1.1936, "step": 499 }, { "epoch": 0.18, "grad_norm": 6.81951379776001, "learning_rate": 0.0002958, "loss": 1.9781, "step": 500 }, { "epoch": 0.18, "eval_loss": 1.4941623210906982, "eval_runtime": 50.4072, "eval_samples_per_second": 43.01, "eval_steps_per_second": 10.752, "eval_wer": 0.8929250994637606, "step": 500 }, { "epoch": 0.18, "grad_norm": 8.40855884552002, "learning_rate": 0.0002964, "loss": 2.3229, "step": 501 }, { "epoch": 0.18, "grad_norm": 9.991337776184082, "learning_rate": 0.00029699999999999996, "loss": 1.5519, "step": 502 }, { "epoch": 0.18, "grad_norm": 4.177870273590088, "learning_rate": 0.00029759999999999997, "loss": 1.3197, "step": 503 }, { "epoch": 0.18, "grad_norm": 4.229679107666016, "learning_rate": 0.0002982, "loss": 1.2668, "step": 504 }, { "epoch": 0.18, "grad_norm": 5.256344795227051, "learning_rate": 0.0002988, "loss": 1.2646, "step": 505 }, { "epoch": 0.18, "grad_norm": 3.3408243656158447, "learning_rate": 0.00029939999999999996, "loss": 0.8331, "step": 506 }, { "epoch": 0.18, "grad_norm": 3.4926090240478516, "learning_rate": 0.0003, "loss": 0.7081, "step": 507 }, { "epoch": 0.18, "grad_norm": 4.9362640380859375, "learning_rate": 0.0002999627883899777, "loss": 1.1574, "step": 508 }, { "epoch": 0.18, "grad_norm": 3.2679216861724854, "learning_rate": 0.0002999255767799553, "loss": 0.546, "step": 509 }, { "epoch": 0.18, "grad_norm": 10.322508811950684, "learning_rate": 0.000299888365169933, "loss": 0.9139, "step": 510 }, { "epoch": 0.18, "grad_norm": 4.090631484985352, "learning_rate": 0.0002998511535599107, "loss": 0.5386, "step": 511 }, { "epoch": 0.18, "grad_norm": 5.046153545379639, "learning_rate": 0.0002998139419498883, "loss": 0.7433, "step": 512 }, { "epoch": 0.18, "grad_norm": 10.831178665161133, "learning_rate": 0.00029977673033986603, "loss": 1.5149, "step": 513 }, { "epoch": 0.18, "grad_norm": 7.8567633628845215, "learning_rate": 0.0002997395187298437, "loss": 1.0058, "step": 514 }, { "epoch": 0.18, "grad_norm": 4.131727695465088, "learning_rate": 0.0002997023071198214, "loss": 0.3814, "step": 515 }, { "epoch": 0.18, "grad_norm": 11.242587089538574, "learning_rate": 0.00029966509550979903, "loss": 1.0306, "step": 516 }, { "epoch": 0.18, "grad_norm": 8.567095756530762, "learning_rate": 0.0002996278838997767, "loss": 1.1139, "step": 517 }, { "epoch": 0.18, "grad_norm": 6.56288480758667, "learning_rate": 0.0002995906722897544, "loss": 1.6086, "step": 518 }, { "epoch": 0.18, "grad_norm": 7.899925231933594, "learning_rate": 0.00029955346067973203, "loss": 1.5415, "step": 519 }, { "epoch": 0.18, "grad_norm": 6.132180213928223, "learning_rate": 0.00029951624906970973, "loss": 1.2357, "step": 520 }, { "epoch": 0.18, "grad_norm": 4.827673435211182, "learning_rate": 0.00029947903745968743, "loss": 0.7701, "step": 521 }, { "epoch": 0.18, "grad_norm": 12.996684074401855, "learning_rate": 0.0002994418258496651, "loss": 1.4232, "step": 522 }, { "epoch": 0.18, "grad_norm": 6.13612174987793, "learning_rate": 0.00029940461423964273, "loss": 0.9823, "step": 523 }, { "epoch": 0.18, "grad_norm": 6.259670734405518, "learning_rate": 0.0002993674026296204, "loss": 1.2875, "step": 524 }, { "epoch": 0.18, "grad_norm": NaN, "learning_rate": 0.0002993674026296204, "loss": 0.694, "step": 525 }, { "epoch": 0.18, "grad_norm": 6.94905424118042, "learning_rate": 0.0002993301910195981, "loss": 1.9648, "step": 526 }, { "epoch": 0.18, "grad_norm": 3.61449933052063, "learning_rate": 0.0002992929794095758, "loss": 1.4221, "step": 527 }, { "epoch": 0.19, "grad_norm": 3.9974653720855713, "learning_rate": 0.00029925576779955344, "loss": 1.28, "step": 528 }, { "epoch": 0.19, "grad_norm": 4.458034992218018, "learning_rate": 0.00029921855618953114, "loss": 0.9629, "step": 529 }, { "epoch": 0.19, "grad_norm": 4.363546371459961, "learning_rate": 0.0002991813445795088, "loss": 0.8967, "step": 530 }, { "epoch": 0.19, "grad_norm": 6.293959140777588, "learning_rate": 0.00029914413296948644, "loss": 1.022, "step": 531 }, { "epoch": 0.19, "grad_norm": 3.35537052154541, "learning_rate": 0.00029910692135946414, "loss": 0.6593, "step": 532 }, { "epoch": 0.19, "grad_norm": 4.888710021972656, "learning_rate": 0.0002990697097494418, "loss": 0.9094, "step": 533 }, { "epoch": 0.19, "grad_norm": 5.332656383514404, "learning_rate": 0.0002990324981394195, "loss": 1.7321, "step": 534 }, { "epoch": 0.19, "grad_norm": 8.324007034301758, "learning_rate": 0.00029899528652939714, "loss": 1.4225, "step": 535 }, { "epoch": 0.19, "grad_norm": 5.71950101852417, "learning_rate": 0.0002989580749193748, "loss": 0.626, "step": 536 }, { "epoch": 0.19, "grad_norm": 6.610879421234131, "learning_rate": 0.0002989208633093525, "loss": 1.0425, "step": 537 }, { "epoch": 0.19, "grad_norm": 6.266473770141602, "learning_rate": 0.00029888365169933014, "loss": 1.2166, "step": 538 }, { "epoch": 0.19, "grad_norm": 5.074027061462402, "learning_rate": 0.00029884644008930784, "loss": 0.9753, "step": 539 }, { "epoch": 0.19, "grad_norm": 5.849838733673096, "learning_rate": 0.00029880922847928554, "loss": 1.1584, "step": 540 }, { "epoch": 0.19, "grad_norm": 4.537852764129639, "learning_rate": 0.0002987720168692632, "loss": 0.907, "step": 541 }, { "epoch": 0.19, "grad_norm": 5.0589985847473145, "learning_rate": 0.00029873480525924084, "loss": 0.6799, "step": 542 }, { "epoch": 0.19, "grad_norm": 6.327163219451904, "learning_rate": 0.0002986975936492185, "loss": 1.8219, "step": 543 }, { "epoch": 0.19, "grad_norm": 7.557645797729492, "learning_rate": 0.0002986603820391962, "loss": 1.4392, "step": 544 }, { "epoch": 0.19, "grad_norm": 5.596197128295898, "learning_rate": 0.0002986231704291739, "loss": 1.1227, "step": 545 }, { "epoch": 0.19, "grad_norm": 5.67082405090332, "learning_rate": 0.00029858595881915155, "loss": 1.029, "step": 546 }, { "epoch": 0.19, "grad_norm": 7.202919006347656, "learning_rate": 0.00029854874720912925, "loss": 0.736, "step": 547 }, { "epoch": 0.19, "grad_norm": 6.42061185836792, "learning_rate": 0.0002985115355991069, "loss": 1.1307, "step": 548 }, { "epoch": 0.19, "grad_norm": 7.092513561248779, "learning_rate": 0.00029847432398908455, "loss": 1.2644, "step": 549 }, { "epoch": 0.19, "grad_norm": 5.857559680938721, "learning_rate": 0.00029843711237906225, "loss": 0.8481, "step": 550 }, { "epoch": 0.19, "grad_norm": 5.834782123565674, "learning_rate": 0.0002983999007690399, "loss": 1.6768, "step": 551 }, { "epoch": 0.19, "grad_norm": 6.073868751525879, "learning_rate": 0.0002983626891590176, "loss": 1.9901, "step": 552 }, { "epoch": 0.19, "grad_norm": 3.636265754699707, "learning_rate": 0.00029832547754899525, "loss": 0.855, "step": 553 }, { "epoch": 0.19, "grad_norm": 4.978489398956299, "learning_rate": 0.00029828826593897295, "loss": 1.5895, "step": 554 }, { "epoch": 0.19, "grad_norm": 3.5552754402160645, "learning_rate": 0.0002982510543289506, "loss": 1.0363, "step": 555 }, { "epoch": 0.19, "grad_norm": 6.47123908996582, "learning_rate": 0.00029821384271892825, "loss": 1.305, "step": 556 }, { "epoch": 0.2, "grad_norm": 3.921194076538086, "learning_rate": 0.00029817663110890595, "loss": 1.571, "step": 557 }, { "epoch": 0.2, "grad_norm": 3.359584331512451, "learning_rate": 0.00029813941949888366, "loss": 1.0663, "step": 558 }, { "epoch": 0.2, "grad_norm": 5.973052501678467, "learning_rate": 0.0002981022078888613, "loss": 1.1589, "step": 559 }, { "epoch": 0.2, "grad_norm": 6.5657124519348145, "learning_rate": 0.00029806499627883895, "loss": 0.8758, "step": 560 }, { "epoch": 0.2, "grad_norm": 3.143813371658325, "learning_rate": 0.00029802778466881666, "loss": 0.9989, "step": 561 }, { "epoch": 0.2, "grad_norm": 3.4931013584136963, "learning_rate": 0.0002979905730587943, "loss": 0.5586, "step": 562 }, { "epoch": 0.2, "grad_norm": 4.445590496063232, "learning_rate": 0.000297953361448772, "loss": 1.5847, "step": 563 }, { "epoch": 0.2, "grad_norm": 7.393503665924072, "learning_rate": 0.00029791614983874966, "loss": 1.226, "step": 564 }, { "epoch": 0.2, "grad_norm": 4.212005138397217, "learning_rate": 0.00029787893822872736, "loss": 1.1496, "step": 565 }, { "epoch": 0.2, "grad_norm": 6.066720962524414, "learning_rate": 0.000297841726618705, "loss": 1.0976, "step": 566 }, { "epoch": 0.2, "grad_norm": 6.2609734535217285, "learning_rate": 0.00029780451500868266, "loss": 0.8392, "step": 567 }, { "epoch": 0.2, "grad_norm": 3.803570508956909, "learning_rate": 0.00029776730339866036, "loss": 0.7692, "step": 568 }, { "epoch": 0.2, "grad_norm": 5.159245491027832, "learning_rate": 0.000297730091788638, "loss": 0.8532, "step": 569 }, { "epoch": 0.2, "grad_norm": 4.429141998291016, "learning_rate": 0.0002976928801786157, "loss": 0.7445, "step": 570 }, { "epoch": 0.2, "grad_norm": 9.767818450927734, "learning_rate": 0.0002976556685685934, "loss": 1.5825, "step": 571 }, { "epoch": 0.2, "grad_norm": 6.809798717498779, "learning_rate": 0.00029761845695857106, "loss": 1.5131, "step": 572 }, { "epoch": 0.2, "grad_norm": 20.902917861938477, "learning_rate": 0.0002975812453485487, "loss": 1.8095, "step": 573 }, { "epoch": 0.2, "grad_norm": 5.394655704498291, "learning_rate": 0.0002975440337385264, "loss": 1.2016, "step": 574 }, { "epoch": 0.2, "grad_norm": NaN, "learning_rate": 0.0002975440337385264, "loss": 0.531, "step": 575 }, { "epoch": 0.2, "grad_norm": 9.339009284973145, "learning_rate": 0.00029750682212850406, "loss": 3.3416, "step": 576 }, { "epoch": 0.2, "grad_norm": 5.734882354736328, "learning_rate": 0.00029746961051848177, "loss": 2.2444, "step": 577 }, { "epoch": 0.2, "grad_norm": 3.9475908279418945, "learning_rate": 0.0002974323989084594, "loss": 1.2054, "step": 578 }, { "epoch": 0.2, "grad_norm": 3.528080463409424, "learning_rate": 0.00029739518729843706, "loss": 1.3752, "step": 579 }, { "epoch": 0.2, "grad_norm": 3.754209041595459, "learning_rate": 0.00029735797568841477, "loss": 0.7848, "step": 580 }, { "epoch": 0.2, "grad_norm": 3.216021776199341, "learning_rate": 0.0002973207640783924, "loss": 0.6936, "step": 581 }, { "epoch": 0.2, "grad_norm": 3.249885082244873, "learning_rate": 0.0002972835524683701, "loss": 0.8303, "step": 582 }, { "epoch": 0.2, "grad_norm": 4.777367115020752, "learning_rate": 0.00029724634085834777, "loss": 0.6014, "step": 583 }, { "epoch": 0.2, "grad_norm": 9.312623023986816, "learning_rate": 0.00029720912924832547, "loss": 1.1901, "step": 584 }, { "epoch": 0.2, "grad_norm": 4.88458776473999, "learning_rate": 0.0002971719176383031, "loss": 1.1543, "step": 585 }, { "epoch": 0.21, "grad_norm": 4.8880615234375, "learning_rate": 0.00029713470602828077, "loss": 1.0548, "step": 586 }, { "epoch": 0.21, "grad_norm": 4.879009246826172, "learning_rate": 0.00029709749441825847, "loss": 1.0961, "step": 587 }, { "epoch": 0.21, "grad_norm": 4.066569805145264, "learning_rate": 0.0002970602828082361, "loss": 0.6876, "step": 588 }, { "epoch": 0.21, "grad_norm": 5.1321892738342285, "learning_rate": 0.0002970230711982138, "loss": 1.0765, "step": 589 }, { "epoch": 0.21, "grad_norm": 4.651081085205078, "learning_rate": 0.0002969858595881915, "loss": 0.6319, "step": 590 }, { "epoch": 0.21, "grad_norm": 4.676162242889404, "learning_rate": 0.00029694864797816917, "loss": 0.8857, "step": 591 }, { "epoch": 0.21, "grad_norm": 4.472525119781494, "learning_rate": 0.0002969114363681468, "loss": 1.2666, "step": 592 }, { "epoch": 0.21, "grad_norm": 5.793959140777588, "learning_rate": 0.0002968742247581245, "loss": 0.8785, "step": 593 }, { "epoch": 0.21, "grad_norm": 7.9074931144714355, "learning_rate": 0.00029683701314810217, "loss": 1.4696, "step": 594 }, { "epoch": 0.21, "grad_norm": 4.401146411895752, "learning_rate": 0.0002967998015380799, "loss": 0.5815, "step": 595 }, { "epoch": 0.21, "grad_norm": 7.1119513511657715, "learning_rate": 0.0002967625899280575, "loss": 1.0864, "step": 596 }, { "epoch": 0.21, "grad_norm": 7.709166049957275, "learning_rate": 0.0002967253783180352, "loss": 1.6677, "step": 597 }, { "epoch": 0.21, "grad_norm": 7.580686092376709, "learning_rate": 0.0002966881667080129, "loss": 1.2696, "step": 598 }, { "epoch": 0.21, "grad_norm": 5.200621128082275, "learning_rate": 0.0002966509550979905, "loss": 0.6652, "step": 599 }, { "epoch": 0.21, "grad_norm": 8.517735481262207, "learning_rate": 0.0002966137434879682, "loss": 1.5409, "step": 600 }, { "epoch": 0.21, "eval_loss": 1.319638729095459, "eval_runtime": 51.1269, "eval_samples_per_second": 42.404, "eval_steps_per_second": 10.601, "eval_wer": 0.909271752291991, "step": 600 }, { "epoch": 0.21, "grad_norm": 12.722813606262207, "learning_rate": 0.0002965765318779459, "loss": 2.4553, "step": 601 }, { "epoch": 0.21, "grad_norm": 5.727902412414551, "learning_rate": 0.0002965393202679236, "loss": 1.4027, "step": 602 }, { "epoch": 0.21, "grad_norm": 6.360785007476807, "learning_rate": 0.00029650210865790123, "loss": 1.1641, "step": 603 }, { "epoch": 0.21, "grad_norm": 4.100715637207031, "learning_rate": 0.0002964648970478789, "loss": 1.0047, "step": 604 }, { "epoch": 0.21, "grad_norm": 5.119365692138672, "learning_rate": 0.0002964276854378566, "loss": 1.1648, "step": 605 }, { "epoch": 0.21, "grad_norm": 5.546939373016357, "learning_rate": 0.0002963904738278343, "loss": 1.3271, "step": 606 }, { "epoch": 0.21, "grad_norm": 4.981604099273682, "learning_rate": 0.00029635326221781193, "loss": 1.2142, "step": 607 }, { "epoch": 0.21, "grad_norm": 6.839823246002197, "learning_rate": 0.00029631605060778963, "loss": 1.6792, "step": 608 }, { "epoch": 0.21, "grad_norm": 7.334500789642334, "learning_rate": 0.0002962788389977673, "loss": 1.2355, "step": 609 }, { "epoch": 0.21, "grad_norm": 4.874765872955322, "learning_rate": 0.00029624162738774493, "loss": 0.9184, "step": 610 }, { "epoch": 0.21, "grad_norm": 3.3265280723571777, "learning_rate": 0.00029620441577772263, "loss": 0.6558, "step": 611 }, { "epoch": 0.21, "grad_norm": 3.5789058208465576, "learning_rate": 0.0002961672041677003, "loss": 0.8824, "step": 612 }, { "epoch": 0.21, "grad_norm": 3.2460458278656006, "learning_rate": 0.000296129992557678, "loss": 1.0423, "step": 613 }, { "epoch": 0.22, "grad_norm": 6.92147970199585, "learning_rate": 0.00029609278094765563, "loss": 0.7398, "step": 614 }, { "epoch": 0.22, "grad_norm": 3.5256998538970947, "learning_rate": 0.00029605556933763334, "loss": 0.6854, "step": 615 }, { "epoch": 0.22, "grad_norm": 4.17782735824585, "learning_rate": 0.000296018357727611, "loss": 1.0298, "step": 616 }, { "epoch": 0.22, "grad_norm": 8.018060684204102, "learning_rate": 0.00029598114611758863, "loss": 2.2126, "step": 617 }, { "epoch": 0.22, "grad_norm": 7.263321876525879, "learning_rate": 0.00029594393450756634, "loss": 0.9248, "step": 618 }, { "epoch": 0.22, "grad_norm": 6.241823196411133, "learning_rate": 0.00029590672289754404, "loss": 1.1756, "step": 619 }, { "epoch": 0.22, "grad_norm": 4.013612747192383, "learning_rate": 0.0002958695112875217, "loss": 0.882, "step": 620 }, { "epoch": 0.22, "grad_norm": 4.5317792892456055, "learning_rate": 0.00029583229967749934, "loss": 1.0661, "step": 621 }, { "epoch": 0.22, "grad_norm": 8.019999504089355, "learning_rate": 0.00029579508806747704, "loss": 0.9743, "step": 622 }, { "epoch": 0.22, "grad_norm": 10.188430786132812, "learning_rate": 0.0002957578764574547, "loss": 1.3912, "step": 623 }, { "epoch": 0.22, "grad_norm": 6.105098724365234, "learning_rate": 0.0002957206648474324, "loss": 0.8787, "step": 624 }, { "epoch": 0.22, "grad_norm": 8.475005149841309, "learning_rate": 0.00029568345323741004, "loss": 2.6934, "step": 625 }, { "epoch": 0.22, "grad_norm": 3.846712589263916, "learning_rate": 0.00029564624162738774, "loss": 1.4757, "step": 626 }, { "epoch": 0.22, "grad_norm": 3.2802951335906982, "learning_rate": 0.0002956090300173654, "loss": 0.9426, "step": 627 }, { "epoch": 0.22, "grad_norm": 3.0451314449310303, "learning_rate": 0.00029557181840734304, "loss": 0.8553, "step": 628 }, { "epoch": 0.22, "grad_norm": 4.837835788726807, "learning_rate": 0.00029553460679732074, "loss": 1.0035, "step": 629 }, { "epoch": 0.22, "grad_norm": 3.3272557258605957, "learning_rate": 0.0002954973951872984, "loss": 1.7012, "step": 630 }, { "epoch": 0.22, "grad_norm": 3.1126511096954346, "learning_rate": 0.0002954601835772761, "loss": 0.8654, "step": 631 }, { "epoch": 0.22, "grad_norm": 4.995132923126221, "learning_rate": 0.00029542297196725374, "loss": 0.9708, "step": 632 }, { "epoch": 0.22, "grad_norm": 5.542963027954102, "learning_rate": 0.00029538576035723145, "loss": 1.0861, "step": 633 }, { "epoch": 0.22, "grad_norm": 3.722038984298706, "learning_rate": 0.0002953485487472091, "loss": 1.012, "step": 634 }, { "epoch": 0.22, "grad_norm": 4.468753337860107, "learning_rate": 0.00029531133713718674, "loss": 1.139, "step": 635 }, { "epoch": 0.22, "grad_norm": 4.822729110717773, "learning_rate": 0.00029527412552716445, "loss": 1.1173, "step": 636 }, { "epoch": 0.22, "grad_norm": 4.884435653686523, "learning_rate": 0.00029523691391714215, "loss": 1.0669, "step": 637 }, { "epoch": 0.22, "grad_norm": 4.150437831878662, "learning_rate": 0.0002951997023071198, "loss": 0.871, "step": 638 }, { "epoch": 0.22, "grad_norm": 4.12093448638916, "learning_rate": 0.0002951624906970975, "loss": 0.6693, "step": 639 }, { "epoch": 0.22, "grad_norm": 7.429576396942139, "learning_rate": 0.00029512527908707515, "loss": 1.0517, "step": 640 }, { "epoch": 0.22, "grad_norm": 3.6653037071228027, "learning_rate": 0.0002950880674770528, "loss": 0.8529, "step": 641 }, { "epoch": 0.22, "grad_norm": 2.8426244258880615, "learning_rate": 0.0002950508558670305, "loss": 0.7732, "step": 642 }, { "epoch": 0.23, "grad_norm": 7.198141574859619, "learning_rate": 0.00029501364425700815, "loss": 1.6585, "step": 643 }, { "epoch": 0.23, "grad_norm": 7.202881813049316, "learning_rate": 0.00029497643264698585, "loss": 1.4548, "step": 644 }, { "epoch": 0.23, "grad_norm": 6.224870681762695, "learning_rate": 0.0002949392210369635, "loss": 1.6397, "step": 645 }, { "epoch": 0.23, "grad_norm": 3.892850160598755, "learning_rate": 0.00029490200942694115, "loss": 1.0224, "step": 646 }, { "epoch": 0.23, "grad_norm": 6.112697601318359, "learning_rate": 0.00029486479781691885, "loss": 0.8937, "step": 647 }, { "epoch": 0.23, "grad_norm": 10.07210636138916, "learning_rate": 0.0002948275862068965, "loss": 1.0152, "step": 648 }, { "epoch": 0.23, "grad_norm": 6.240150451660156, "learning_rate": 0.0002947903745968742, "loss": 1.0351, "step": 649 }, { "epoch": 0.23, "grad_norm": 9.268518447875977, "learning_rate": 0.0002947531629868519, "loss": 0.9157, "step": 650 }, { "epoch": 0.23, "grad_norm": 4.466054916381836, "learning_rate": 0.00029471595137682956, "loss": 1.6634, "step": 651 }, { "epoch": 0.23, "grad_norm": 5.009162425994873, "learning_rate": 0.0002946787397668072, "loss": 1.2699, "step": 652 }, { "epoch": 0.23, "grad_norm": 4.312174320220947, "learning_rate": 0.00029464152815678485, "loss": 1.2239, "step": 653 }, { "epoch": 0.23, "grad_norm": 3.121635675430298, "learning_rate": 0.00029460431654676256, "loss": 0.9369, "step": 654 }, { "epoch": 0.23, "grad_norm": 4.073437213897705, "learning_rate": 0.00029456710493674026, "loss": 1.1005, "step": 655 }, { "epoch": 0.23, "grad_norm": 3.3540802001953125, "learning_rate": 0.0002945298933267179, "loss": 1.0565, "step": 656 }, { "epoch": 0.23, "grad_norm": 4.2247724533081055, "learning_rate": 0.0002944926817166956, "loss": 0.7415, "step": 657 }, { "epoch": 0.23, "grad_norm": 3.133108139038086, "learning_rate": 0.00029445547010667326, "loss": 0.4355, "step": 658 }, { "epoch": 0.23, "grad_norm": 5.775803089141846, "learning_rate": 0.0002944182584966509, "loss": 1.0846, "step": 659 }, { "epoch": 0.23, "grad_norm": 6.800411701202393, "learning_rate": 0.0002943810468866286, "loss": 1.1723, "step": 660 }, { "epoch": 0.23, "grad_norm": 5.831652641296387, "learning_rate": 0.00029434383527660626, "loss": 0.8722, "step": 661 }, { "epoch": 0.23, "grad_norm": 5.7799882888793945, "learning_rate": 0.00029430662366658396, "loss": 1.0813, "step": 662 }, { "epoch": 0.23, "grad_norm": 9.425108909606934, "learning_rate": 0.0002942694120565616, "loss": 2.4706, "step": 663 }, { "epoch": 0.23, "grad_norm": 7.258337020874023, "learning_rate": 0.0002942322004465393, "loss": 1.0422, "step": 664 }, { "epoch": 0.23, "grad_norm": 6.805200576782227, "learning_rate": 0.00029419498883651696, "loss": 0.8724, "step": 665 }, { "epoch": 0.23, "grad_norm": 5.374764442443848, "learning_rate": 0.0002941577772264946, "loss": 0.8653, "step": 666 }, { "epoch": 0.23, "grad_norm": 30.488962173461914, "learning_rate": 0.0002941205656164723, "loss": 2.5592, "step": 667 }, { "epoch": 0.23, "grad_norm": 9.162358283996582, "learning_rate": 0.00029408335400645, "loss": 1.6403, "step": 668 }, { "epoch": 0.23, "grad_norm": 6.208140850067139, "learning_rate": 0.00029404614239642767, "loss": 1.0371, "step": 669 }, { "epoch": 0.23, "grad_norm": 4.24813175201416, "learning_rate": 0.0002940089307864053, "loss": 0.6602, "step": 670 }, { "epoch": 0.24, "grad_norm": 6.219293594360352, "learning_rate": 0.00029397171917638296, "loss": 1.2641, "step": 671 }, { "epoch": 0.24, "grad_norm": 10.14699935913086, "learning_rate": 0.00029393450756636067, "loss": 1.5108, "step": 672 }, { "epoch": 0.24, "grad_norm": 8.945874214172363, "learning_rate": 0.00029389729595633837, "loss": 0.8659, "step": 673 }, { "epoch": 0.24, "grad_norm": 9.979119300842285, "learning_rate": 0.000293860084346316, "loss": 0.6198, "step": 674 }, { "epoch": 0.24, "grad_norm": 10.116988182067871, "learning_rate": 0.0002938228727362937, "loss": 1.1682, "step": 675 }, { "epoch": 0.24, "grad_norm": 7.378312587738037, "learning_rate": 0.00029378566112627137, "loss": 2.0674, "step": 676 }, { "epoch": 0.24, "grad_norm": 7.4542012214660645, "learning_rate": 0.000293748449516249, "loss": 1.8939, "step": 677 }, { "epoch": 0.24, "grad_norm": 6.61478328704834, "learning_rate": 0.0002937112379062267, "loss": 1.6356, "step": 678 }, { "epoch": 0.24, "grad_norm": 5.97426176071167, "learning_rate": 0.00029367402629620437, "loss": 0.8941, "step": 679 }, { "epoch": 0.24, "grad_norm": 3.164785385131836, "learning_rate": 0.0002936368146861821, "loss": 0.6823, "step": 680 }, { "epoch": 0.24, "grad_norm": 4.324411869049072, "learning_rate": 0.0002935996030761598, "loss": 0.6235, "step": 681 }, { "epoch": 0.24, "grad_norm": 4.255089282989502, "learning_rate": 0.0002935623914661374, "loss": 0.9023, "step": 682 }, { "epoch": 0.24, "grad_norm": 4.362627029418945, "learning_rate": 0.0002935251798561151, "loss": 1.3556, "step": 683 }, { "epoch": 0.24, "grad_norm": 3.432969570159912, "learning_rate": 0.0002934879682460927, "loss": 0.8282, "step": 684 }, { "epoch": 0.24, "grad_norm": 2.972658395767212, "learning_rate": 0.0002934507566360704, "loss": 1.0565, "step": 685 }, { "epoch": 0.24, "grad_norm": 4.701770305633545, "learning_rate": 0.00029341354502604813, "loss": 1.2406, "step": 686 }, { "epoch": 0.24, "grad_norm": 4.701326370239258, "learning_rate": 0.0002933763334160258, "loss": 0.7059, "step": 687 }, { "epoch": 0.24, "grad_norm": 3.311460256576538, "learning_rate": 0.0002933391218060034, "loss": 0.9308, "step": 688 }, { "epoch": 0.24, "grad_norm": 5.932095050811768, "learning_rate": 0.00029330191019598113, "loss": 1.8754, "step": 689 }, { "epoch": 0.24, "grad_norm": 4.051765441894531, "learning_rate": 0.0002932646985859588, "loss": 0.7799, "step": 690 }, { "epoch": 0.24, "grad_norm": 4.459011554718018, "learning_rate": 0.0002932274869759365, "loss": 0.8546, "step": 691 }, { "epoch": 0.24, "grad_norm": 4.098698616027832, "learning_rate": 0.00029319027536591413, "loss": 0.5279, "step": 692 }, { "epoch": 0.24, "grad_norm": 4.640591621398926, "learning_rate": 0.00029315306375589183, "loss": 0.9102, "step": 693 }, { "epoch": 0.24, "grad_norm": 6.9347004890441895, "learning_rate": 0.0002931158521458695, "loss": 0.7036, "step": 694 }, { "epoch": 0.24, "grad_norm": 4.845668792724609, "learning_rate": 0.00029307864053584713, "loss": 0.5378, "step": 695 }, { "epoch": 0.24, "grad_norm": 5.8146281242370605, "learning_rate": 0.00029304142892582483, "loss": 1.1422, "step": 696 }, { "epoch": 0.24, "grad_norm": 6.487931251525879, "learning_rate": 0.0002930042173158025, "loss": 1.0314, "step": 697 }, { "epoch": 0.24, "grad_norm": 6.625868797302246, "learning_rate": 0.0002929670057057802, "loss": 1.5303, "step": 698 }, { "epoch": 0.24, "grad_norm": 14.07421588897705, "learning_rate": 0.0002929297940957579, "loss": 2.0771, "step": 699 }, { "epoch": 0.25, "grad_norm": 7.190564155578613, "learning_rate": 0.00029289258248573554, "loss": 1.1797, "step": 700 }, { "epoch": 0.25, "eval_loss": 1.2068666219711304, "eval_runtime": 51.0804, "eval_samples_per_second": 42.443, "eval_steps_per_second": 10.611, "eval_wer": 0.8056564608199274, "step": 700 }, { "epoch": 0.25, "grad_norm": 6.680876731872559, "learning_rate": 0.0002928553708757132, "loss": 2.1408, "step": 701 }, { "epoch": 0.25, "grad_norm": 3.9025561809539795, "learning_rate": 0.00029281815926569083, "loss": 1.5205, "step": 702 }, { "epoch": 0.25, "grad_norm": 4.40712833404541, "learning_rate": 0.00029278094765566854, "loss": 1.4955, "step": 703 }, { "epoch": 0.25, "grad_norm": 3.2378780841827393, "learning_rate": 0.00029274373604564624, "loss": 0.9419, "step": 704 }, { "epoch": 0.25, "grad_norm": 3.353944778442383, "learning_rate": 0.0002927065244356239, "loss": 1.0087, "step": 705 }, { "epoch": 0.25, "grad_norm": 4.185000896453857, "learning_rate": 0.0002926693128256016, "loss": 1.2567, "step": 706 }, { "epoch": 0.25, "grad_norm": 3.718811511993408, "learning_rate": 0.00029263210121557924, "loss": 0.8007, "step": 707 }, { "epoch": 0.25, "grad_norm": 4.834765911102295, "learning_rate": 0.0002925948896055569, "loss": 0.9665, "step": 708 }, { "epoch": 0.25, "grad_norm": 5.415790557861328, "learning_rate": 0.0002925576779955346, "loss": 0.6987, "step": 709 }, { "epoch": 0.25, "grad_norm": 3.706892490386963, "learning_rate": 0.00029252046638551224, "loss": 0.7334, "step": 710 }, { "epoch": 0.25, "grad_norm": 3.0536246299743652, "learning_rate": 0.00029248325477548994, "loss": 0.9511, "step": 711 }, { "epoch": 0.25, "grad_norm": 4.1888933181762695, "learning_rate": 0.0002924460431654676, "loss": 1.2429, "step": 712 }, { "epoch": 0.25, "grad_norm": 3.904407262802124, "learning_rate": 0.0002924088315554453, "loss": 1.1109, "step": 713 }, { "epoch": 0.25, "grad_norm": 4.773108005523682, "learning_rate": 0.00029237161994542294, "loss": 0.9709, "step": 714 }, { "epoch": 0.25, "grad_norm": 2.608194351196289, "learning_rate": 0.0002923344083354006, "loss": 0.402, "step": 715 }, { "epoch": 0.25, "grad_norm": 5.155666351318359, "learning_rate": 0.0002922971967253783, "loss": 1.4452, "step": 716 }, { "epoch": 0.25, "grad_norm": 11.795857429504395, "learning_rate": 0.000292259985115356, "loss": 3.5267, "step": 717 }, { "epoch": 0.25, "grad_norm": 4.475924491882324, "learning_rate": 0.00029222277350533365, "loss": 0.6192, "step": 718 }, { "epoch": 0.25, "grad_norm": 4.139604568481445, "learning_rate": 0.0002921855618953113, "loss": 0.7032, "step": 719 }, { "epoch": 0.25, "grad_norm": 5.846983909606934, "learning_rate": 0.00029214835028528894, "loss": 1.1969, "step": 720 }, { "epoch": 0.25, "grad_norm": 4.0570387840271, "learning_rate": 0.00029211113867526665, "loss": 0.6138, "step": 721 }, { "epoch": 0.25, "grad_norm": 4.584964275360107, "learning_rate": 0.00029207392706524435, "loss": 0.6882, "step": 722 }, { "epoch": 0.25, "grad_norm": 8.10388469696045, "learning_rate": 0.000292036715455222, "loss": 0.3923, "step": 723 }, { "epoch": 0.25, "grad_norm": 6.056919097900391, "learning_rate": 0.0002919995038451997, "loss": 1.0729, "step": 724 }, { "epoch": 0.25, "grad_norm": 7.191409111022949, "learning_rate": 0.00029196229223517735, "loss": 1.0135, "step": 725 }, { "epoch": 0.25, "grad_norm": 6.211596488952637, "learning_rate": 0.000291925080625155, "loss": 2.1364, "step": 726 }, { "epoch": 0.25, "grad_norm": 3.2215120792388916, "learning_rate": 0.0002918878690151327, "loss": 0.9194, "step": 727 }, { "epoch": 0.26, "grad_norm": 3.045462131500244, "learning_rate": 0.00029185065740511035, "loss": 0.7808, "step": 728 }, { "epoch": 0.26, "grad_norm": 2.9927256107330322, "learning_rate": 0.00029181344579508805, "loss": 0.7217, "step": 729 }, { "epoch": 0.26, "grad_norm": 3.7817091941833496, "learning_rate": 0.0002917762341850657, "loss": 0.724, "step": 730 }, { "epoch": 0.26, "grad_norm": 3.1122050285339355, "learning_rate": 0.0002917390225750434, "loss": 0.7772, "step": 731 }, { "epoch": 0.26, "grad_norm": 4.8279852867126465, "learning_rate": 0.00029170181096502105, "loss": 1.2088, "step": 732 }, { "epoch": 0.26, "grad_norm": 3.453472852706909, "learning_rate": 0.0002916645993549987, "loss": 0.6257, "step": 733 }, { "epoch": 0.26, "grad_norm": 4.543291091918945, "learning_rate": 0.0002916273877449764, "loss": 0.7873, "step": 734 }, { "epoch": 0.26, "grad_norm": 3.899169445037842, "learning_rate": 0.0002915901761349541, "loss": 0.7794, "step": 735 }, { "epoch": 0.26, "grad_norm": 4.045598030090332, "learning_rate": 0.00029155296452493176, "loss": 0.4835, "step": 736 }, { "epoch": 0.26, "grad_norm": 4.990403175354004, "learning_rate": 0.0002915157529149094, "loss": 1.0899, "step": 737 }, { "epoch": 0.26, "grad_norm": 7.7346110343933105, "learning_rate": 0.0002914785413048871, "loss": 1.0977, "step": 738 }, { "epoch": 0.26, "grad_norm": 3.3284356594085693, "learning_rate": 0.00029144132969486476, "loss": 0.4629, "step": 739 }, { "epoch": 0.26, "grad_norm": 3.3941850662231445, "learning_rate": 0.00029140411808484246, "loss": 0.9461, "step": 740 }, { "epoch": 0.26, "grad_norm": 3.9711191654205322, "learning_rate": 0.0002913669064748201, "loss": 1.1025, "step": 741 }, { "epoch": 0.26, "grad_norm": 19.457284927368164, "learning_rate": 0.0002913296948647978, "loss": 2.8168, "step": 742 }, { "epoch": 0.26, "grad_norm": 5.4924750328063965, "learning_rate": 0.00029129248325477546, "loss": 1.3301, "step": 743 }, { "epoch": 0.26, "grad_norm": 5.154421806335449, "learning_rate": 0.0002912552716447531, "loss": 0.8412, "step": 744 }, { "epoch": 0.26, "grad_norm": 5.290253162384033, "learning_rate": 0.0002912180600347308, "loss": 0.9267, "step": 745 }, { "epoch": 0.26, "grad_norm": 4.45554780960083, "learning_rate": 0.00029118084842470846, "loss": 0.6853, "step": 746 }, { "epoch": 0.26, "grad_norm": 5.069665431976318, "learning_rate": 0.00029114363681468616, "loss": 0.6652, "step": 747 }, { "epoch": 0.26, "grad_norm": 4.831662178039551, "learning_rate": 0.00029110642520466387, "loss": 0.7098, "step": 748 }, { "epoch": 0.26, "grad_norm": 4.927679538726807, "learning_rate": 0.0002910692135946415, "loss": 0.7609, "step": 749 }, { "epoch": 0.26, "grad_norm": 3.876534938812256, "learning_rate": 0.00029103200198461916, "loss": 0.7453, "step": 750 }, { "epoch": 0.26, "grad_norm": 4.562718868255615, "learning_rate": 0.00029099479037459687, "loss": 1.7246, "step": 751 }, { "epoch": 0.26, "grad_norm": 3.7739195823669434, "learning_rate": 0.0002909575787645745, "loss": 1.3101, "step": 752 }, { "epoch": 0.26, "grad_norm": 3.139188051223755, "learning_rate": 0.0002909203671545522, "loss": 1.1718, "step": 753 }, { "epoch": 0.26, "grad_norm": 2.7190756797790527, "learning_rate": 0.00029088315554452987, "loss": 0.7607, "step": 754 }, { "epoch": 0.26, "grad_norm": 3.391061544418335, "learning_rate": 0.00029084594393450757, "loss": 0.937, "step": 755 }, { "epoch": 0.26, "grad_norm": 2.2688794136047363, "learning_rate": 0.0002908087323244852, "loss": 0.4747, "step": 756 }, { "epoch": 0.27, "grad_norm": 2.645151376724243, "learning_rate": 0.00029077152071446287, "loss": 0.6512, "step": 757 }, { "epoch": 0.27, "grad_norm": 5.204065322875977, "learning_rate": 0.00029073430910444057, "loss": 0.7823, "step": 758 }, { "epoch": 0.27, "grad_norm": 4.213620185852051, "learning_rate": 0.0002906970974944182, "loss": 1.1063, "step": 759 }, { "epoch": 0.27, "grad_norm": 7.5675129890441895, "learning_rate": 0.0002906598858843959, "loss": 1.9301, "step": 760 }, { "epoch": 0.27, "grad_norm": 3.451794385910034, "learning_rate": 0.00029062267427437357, "loss": 0.7759, "step": 761 }, { "epoch": 0.27, "grad_norm": 3.535918712615967, "learning_rate": 0.0002905854626643512, "loss": 0.7944, "step": 762 }, { "epoch": 0.27, "grad_norm": 5.344732761383057, "learning_rate": 0.0002905482510543289, "loss": 1.0658, "step": 763 }, { "epoch": 0.27, "grad_norm": 3.685420274734497, "learning_rate": 0.0002905110394443066, "loss": 0.5017, "step": 764 }, { "epoch": 0.27, "grad_norm": 5.2186055183410645, "learning_rate": 0.00029047382783428427, "loss": 2.056, "step": 765 }, { "epoch": 0.27, "grad_norm": 6.418582439422607, "learning_rate": 0.000290436616224262, "loss": 1.7033, "step": 766 }, { "epoch": 0.27, "grad_norm": 6.189699649810791, "learning_rate": 0.0002903994046142396, "loss": 1.5274, "step": 767 }, { "epoch": 0.27, "grad_norm": 6.971052646636963, "learning_rate": 0.00029036219300421727, "loss": 0.889, "step": 768 }, { "epoch": 0.27, "grad_norm": 5.264997959136963, "learning_rate": 0.000290324981394195, "loss": 0.7537, "step": 769 }, { "epoch": 0.27, "grad_norm": 9.451274871826172, "learning_rate": 0.0002902877697841726, "loss": 0.6874, "step": 770 }, { "epoch": 0.27, "grad_norm": 4.780513763427734, "learning_rate": 0.0002902505581741503, "loss": 0.9426, "step": 771 }, { "epoch": 0.27, "grad_norm": 7.542695999145508, "learning_rate": 0.000290213346564128, "loss": 1.5722, "step": 772 }, { "epoch": 0.27, "grad_norm": 9.267386436462402, "learning_rate": 0.0002901761349541057, "loss": 0.9906, "step": 773 }, { "epoch": 0.27, "grad_norm": 4.18009090423584, "learning_rate": 0.00029013892334408333, "loss": 0.9282, "step": 774 }, { "epoch": 0.27, "grad_norm": 15.079828262329102, "learning_rate": 0.000290101711734061, "loss": 2.3483, "step": 775 }, { "epoch": 0.27, "grad_norm": 6.639895439147949, "learning_rate": 0.0002900645001240387, "loss": 1.7343, "step": 776 }, { "epoch": 0.27, "grad_norm": 5.052302360534668, "learning_rate": 0.00029002728851401633, "loss": 1.4465, "step": 777 }, { "epoch": 0.27, "grad_norm": 4.000107765197754, "learning_rate": 0.00028999007690399403, "loss": 0.9901, "step": 778 }, { "epoch": 0.27, "grad_norm": 3.0668177604675293, "learning_rate": 0.0002899528652939717, "loss": 0.8454, "step": 779 }, { "epoch": 0.27, "grad_norm": 2.6663155555725098, "learning_rate": 0.0002899156536839494, "loss": 0.7529, "step": 780 }, { "epoch": 0.27, "grad_norm": 3.555816650390625, "learning_rate": 0.00028987844207392703, "loss": 0.6824, "step": 781 }, { "epoch": 0.27, "grad_norm": 4.055764675140381, "learning_rate": 0.00028984123046390473, "loss": 1.1432, "step": 782 }, { "epoch": 0.27, "grad_norm": 4.053997039794922, "learning_rate": 0.0002898040188538824, "loss": 1.0801, "step": 783 }, { "epoch": 0.27, "grad_norm": 3.9936132431030273, "learning_rate": 0.0002897668072438601, "loss": 1.2037, "step": 784 }, { "epoch": 0.28, "grad_norm": 4.756950378417969, "learning_rate": 0.00028972959563383773, "loss": 1.5041, "step": 785 }, { "epoch": 0.28, "grad_norm": 3.913256883621216, "learning_rate": 0.0002896923840238154, "loss": 0.6958, "step": 786 }, { "epoch": 0.28, "grad_norm": 4.463769912719727, "learning_rate": 0.0002896551724137931, "loss": 1.1124, "step": 787 }, { "epoch": 0.28, "grad_norm": 4.305164813995361, "learning_rate": 0.00028961796080377073, "loss": 1.2586, "step": 788 }, { "epoch": 0.28, "grad_norm": 2.605264663696289, "learning_rate": 0.00028958074919374844, "loss": 0.4304, "step": 789 }, { "epoch": 0.28, "grad_norm": 5.45980167388916, "learning_rate": 0.0002895435375837261, "loss": 1.613, "step": 790 }, { "epoch": 0.28, "grad_norm": 4.24216365814209, "learning_rate": 0.0002895063259737038, "loss": 1.0244, "step": 791 }, { "epoch": 0.28, "grad_norm": 3.666222333908081, "learning_rate": 0.00028946911436368144, "loss": 0.6473, "step": 792 }, { "epoch": 0.28, "grad_norm": 4.788506031036377, "learning_rate": 0.0002894319027536591, "loss": 0.8939, "step": 793 }, { "epoch": 0.28, "grad_norm": 4.17887020111084, "learning_rate": 0.0002893946911436368, "loss": 1.0748, "step": 794 }, { "epoch": 0.28, "grad_norm": 5.128133773803711, "learning_rate": 0.0002893574795336145, "loss": 0.864, "step": 795 }, { "epoch": 0.28, "grad_norm": 5.645290851593018, "learning_rate": 0.00028932026792359214, "loss": 1.656, "step": 796 }, { "epoch": 0.28, "grad_norm": 5.542060375213623, "learning_rate": 0.00028928305631356984, "loss": 1.016, "step": 797 }, { "epoch": 0.28, "grad_norm": 3.639885425567627, "learning_rate": 0.0002892458447035475, "loss": 0.6474, "step": 798 }, { "epoch": 0.28, "grad_norm": 4.543883800506592, "learning_rate": 0.00028920863309352514, "loss": 0.895, "step": 799 }, { "epoch": 0.28, "grad_norm": 6.362172603607178, "learning_rate": 0.00028917142148350284, "loss": 2.4732, "step": 800 }, { "epoch": 0.28, "eval_loss": 0.987212598323822, "eval_runtime": 50.7349, "eval_samples_per_second": 42.732, "eval_steps_per_second": 10.683, "eval_wer": 0.7189932537623248, "step": 800 }, { "epoch": 0.28, "grad_norm": 4.333866596221924, "learning_rate": 0.0002891342098734805, "loss": 1.3953, "step": 801 }, { "epoch": 0.28, "grad_norm": 4.874297142028809, "learning_rate": 0.0002890969982634582, "loss": 1.6125, "step": 802 }, { "epoch": 0.28, "grad_norm": 4.766399383544922, "learning_rate": 0.00028905978665343584, "loss": 1.0827, "step": 803 }, { "epoch": 0.28, "grad_norm": 4.576672077178955, "learning_rate": 0.0002890225750434135, "loss": 0.8876, "step": 804 }, { "epoch": 0.28, "grad_norm": 2.9895853996276855, "learning_rate": 0.0002889853634333912, "loss": 0.7372, "step": 805 }, { "epoch": 0.28, "grad_norm": 5.91171407699585, "learning_rate": 0.00028894815182336884, "loss": 2.1011, "step": 806 }, { "epoch": 0.28, "grad_norm": 5.903454303741455, "learning_rate": 0.00028891094021334655, "loss": 1.009, "step": 807 }, { "epoch": 0.28, "grad_norm": 5.659839630126953, "learning_rate": 0.00028887372860332425, "loss": 0.8499, "step": 808 }, { "epoch": 0.28, "grad_norm": 5.591513156890869, "learning_rate": 0.0002888365169933019, "loss": 0.9237, "step": 809 }, { "epoch": 0.28, "grad_norm": 6.837412357330322, "learning_rate": 0.00028879930538327955, "loss": 1.2858, "step": 810 }, { "epoch": 0.28, "grad_norm": 5.4030585289001465, "learning_rate": 0.0002887620937732572, "loss": 1.3576, "step": 811 }, { "epoch": 0.28, "grad_norm": 4.567882537841797, "learning_rate": 0.0002887248821632349, "loss": 1.4714, "step": 812 }, { "epoch": 0.28, "grad_norm": 3.555302381515503, "learning_rate": 0.0002886876705532126, "loss": 0.4524, "step": 813 }, { "epoch": 0.29, "grad_norm": 2.767725706100464, "learning_rate": 0.00028865045894319025, "loss": 0.4047, "step": 814 }, { "epoch": 0.29, "grad_norm": 4.69365930557251, "learning_rate": 0.00028861324733316795, "loss": 0.835, "step": 815 }, { "epoch": 0.29, "grad_norm": 6.788897514343262, "learning_rate": 0.0002885760357231456, "loss": 1.5947, "step": 816 }, { "epoch": 0.29, "grad_norm": 3.98279070854187, "learning_rate": 0.00028853882411312325, "loss": 0.8821, "step": 817 }, { "epoch": 0.29, "grad_norm": 4.854119777679443, "learning_rate": 0.00028850161250310095, "loss": 0.9184, "step": 818 }, { "epoch": 0.29, "grad_norm": 4.863973617553711, "learning_rate": 0.0002884644008930786, "loss": 0.9386, "step": 819 }, { "epoch": 0.29, "grad_norm": 7.1155266761779785, "learning_rate": 0.0002884271892830563, "loss": 1.128, "step": 820 }, { "epoch": 0.29, "grad_norm": 5.065074920654297, "learning_rate": 0.00028838997767303395, "loss": 0.7205, "step": 821 }, { "epoch": 0.29, "grad_norm": 4.343176364898682, "learning_rate": 0.00028835276606301166, "loss": 0.6354, "step": 822 }, { "epoch": 0.29, "grad_norm": 5.124356746673584, "learning_rate": 0.0002883155544529893, "loss": 0.6844, "step": 823 }, { "epoch": 0.29, "grad_norm": 5.882658958435059, "learning_rate": 0.00028827834284296695, "loss": 1.1918, "step": 824 }, { "epoch": 0.29, "grad_norm": 6.812358379364014, "learning_rate": 0.00028824113123294466, "loss": 1.0781, "step": 825 }, { "epoch": 0.29, "grad_norm": 4.14019250869751, "learning_rate": 0.00028820391962292236, "loss": 1.4099, "step": 826 }, { "epoch": 0.29, "grad_norm": 4.570350646972656, "learning_rate": 0.0002881667080129, "loss": 1.0887, "step": 827 }, { "epoch": 0.29, "grad_norm": 3.517451047897339, "learning_rate": 0.00028812949640287766, "loss": 0.6863, "step": 828 }, { "epoch": 0.29, "grad_norm": 5.128011703491211, "learning_rate": 0.0002880922847928553, "loss": 0.8951, "step": 829 }, { "epoch": 0.29, "grad_norm": 6.206776142120361, "learning_rate": 0.000288055073182833, "loss": 0.6493, "step": 830 }, { "epoch": 0.29, "grad_norm": 2.9472367763519287, "learning_rate": 0.0002880178615728107, "loss": 0.6847, "step": 831 }, { "epoch": 0.29, "grad_norm": 4.013580799102783, "learning_rate": 0.00028798064996278836, "loss": 0.553, "step": 832 }, { "epoch": 0.29, "grad_norm": 2.9942123889923096, "learning_rate": 0.00028794343835276606, "loss": 1.1586, "step": 833 }, { "epoch": 0.29, "grad_norm": 5.667693138122559, "learning_rate": 0.0002879062267427437, "loss": 1.1743, "step": 834 }, { "epoch": 0.29, "grad_norm": 3.7603282928466797, "learning_rate": 0.00028786901513272136, "loss": 0.8572, "step": 835 }, { "epoch": 0.29, "grad_norm": 3.591860294342041, "learning_rate": 0.00028783180352269906, "loss": 0.6201, "step": 836 }, { "epoch": 0.29, "grad_norm": 4.3808979988098145, "learning_rate": 0.0002877945919126767, "loss": 0.7408, "step": 837 }, { "epoch": 0.29, "grad_norm": 2.802478313446045, "learning_rate": 0.0002877573803026544, "loss": 0.5719, "step": 838 }, { "epoch": 0.29, "grad_norm": 3.7019598484039307, "learning_rate": 0.0002877201686926321, "loss": 0.7113, "step": 839 }, { "epoch": 0.29, "grad_norm": 4.928816318511963, "learning_rate": 0.00028768295708260977, "loss": 1.0754, "step": 840 }, { "epoch": 0.29, "grad_norm": 4.269992828369141, "learning_rate": 0.0002876457454725874, "loss": 0.5499, "step": 841 }, { "epoch": 0.3, "grad_norm": 5.93079948425293, "learning_rate": 0.00028760853386256506, "loss": 0.9921, "step": 842 }, { "epoch": 0.3, "grad_norm": 3.9162845611572266, "learning_rate": 0.00028757132225254277, "loss": 0.4579, "step": 843 }, { "epoch": 0.3, "grad_norm": 4.143979549407959, "learning_rate": 0.00028753411064252047, "loss": 0.5102, "step": 844 }, { "epoch": 0.3, "grad_norm": 7.756710052490234, "learning_rate": 0.0002874968990324981, "loss": 0.9789, "step": 845 }, { "epoch": 0.3, "grad_norm": 9.213879585266113, "learning_rate": 0.00028745968742247577, "loss": 1.778, "step": 846 }, { "epoch": 0.3, "grad_norm": 6.540843486785889, "learning_rate": 0.00028742247581245347, "loss": 2.657, "step": 847 }, { "epoch": 0.3, "grad_norm": 3.8558125495910645, "learning_rate": 0.0002873852642024311, "loss": 0.6151, "step": 848 }, { "epoch": 0.3, "grad_norm": 4.576344013214111, "learning_rate": 0.0002873480525924088, "loss": 0.6595, "step": 849 }, { "epoch": 0.3, "grad_norm": 10.87994384765625, "learning_rate": 0.00028731084098238647, "loss": 0.8652, "step": 850 }, { "epoch": 0.3, "grad_norm": 3.8747811317443848, "learning_rate": 0.0002872736293723642, "loss": 1.7178, "step": 851 }, { "epoch": 0.3, "grad_norm": 4.656328201293945, "learning_rate": 0.0002872364177623418, "loss": 1.8586, "step": 852 }, { "epoch": 0.3, "grad_norm": 3.2285523414611816, "learning_rate": 0.00028719920615231947, "loss": 1.324, "step": 853 }, { "epoch": 0.3, "grad_norm": 5.688818454742432, "learning_rate": 0.0002871619945422972, "loss": 1.2527, "step": 854 }, { "epoch": 0.3, "grad_norm": 4.996300220489502, "learning_rate": 0.0002871247829322748, "loss": 0.8493, "step": 855 }, { "epoch": 0.3, "grad_norm": 4.3291850090026855, "learning_rate": 0.0002870875713222525, "loss": 0.9196, "step": 856 }, { "epoch": 0.3, "grad_norm": 3.986222267150879, "learning_rate": 0.00028705035971223023, "loss": 0.9148, "step": 857 }, { "epoch": 0.3, "grad_norm": 3.2459797859191895, "learning_rate": 0.0002870131481022079, "loss": 0.6929, "step": 858 }, { "epoch": 0.3, "grad_norm": 4.315959930419922, "learning_rate": 0.0002869759364921855, "loss": 0.9603, "step": 859 }, { "epoch": 0.3, "grad_norm": 3.7050576210021973, "learning_rate": 0.0002869387248821632, "loss": 0.9597, "step": 860 }, { "epoch": 0.3, "grad_norm": 3.6986582279205322, "learning_rate": 0.0002869015132721409, "loss": 0.8796, "step": 861 }, { "epoch": 0.3, "grad_norm": 2.9992449283599854, "learning_rate": 0.0002868643016621186, "loss": 0.7877, "step": 862 }, { "epoch": 0.3, "grad_norm": 3.221742630004883, "learning_rate": 0.00028682709005209623, "loss": 1.0128, "step": 863 }, { "epoch": 0.3, "grad_norm": 3.570732593536377, "learning_rate": 0.00028678987844207393, "loss": 0.5447, "step": 864 }, { "epoch": 0.3, "grad_norm": 5.575841903686523, "learning_rate": 0.0002867526668320516, "loss": 1.1387, "step": 865 }, { "epoch": 0.3, "grad_norm": 3.252793788909912, "learning_rate": 0.00028671545522202923, "loss": 0.659, "step": 866 }, { "epoch": 0.3, "grad_norm": 4.882000923156738, "learning_rate": 0.00028667824361200693, "loss": 0.4831, "step": 867 }, { "epoch": 0.3, "grad_norm": 4.8467326164245605, "learning_rate": 0.0002866410320019846, "loss": 1.0795, "step": 868 }, { "epoch": 0.3, "grad_norm": 6.496115684509277, "learning_rate": 0.0002866038203919623, "loss": 1.1627, "step": 869 }, { "epoch": 0.3, "grad_norm": 4.330075263977051, "learning_rate": 0.00028656660878193993, "loss": 0.5286, "step": 870 }, { "epoch": 0.31, "grad_norm": 7.284979820251465, "learning_rate": 0.0002865293971719176, "loss": 1.3597, "step": 871 }, { "epoch": 0.31, "grad_norm": 3.145815372467041, "learning_rate": 0.0002864921855618953, "loss": 0.5407, "step": 872 }, { "epoch": 0.31, "grad_norm": 6.272894382476807, "learning_rate": 0.00028645497395187293, "loss": 1.104, "step": 873 }, { "epoch": 0.31, "grad_norm": 5.079260349273682, "learning_rate": 0.00028641776234185064, "loss": 0.9572, "step": 874 }, { "epoch": 0.31, "grad_norm": 9.606317520141602, "learning_rate": 0.00028638055073182834, "loss": 0.9342, "step": 875 }, { "epoch": 0.31, "grad_norm": 4.756997585296631, "learning_rate": 0.000286343339121806, "loss": 1.7561, "step": 876 }, { "epoch": 0.31, "grad_norm": 3.379587173461914, "learning_rate": 0.00028630612751178364, "loss": 0.8307, "step": 877 }, { "epoch": 0.31, "grad_norm": 2.6264638900756836, "learning_rate": 0.0002862689159017613, "loss": 0.7802, "step": 878 }, { "epoch": 0.31, "grad_norm": 4.880773544311523, "learning_rate": 0.000286231704291739, "loss": 0.7768, "step": 879 }, { "epoch": 0.31, "grad_norm": 4.7575883865356445, "learning_rate": 0.0002861944926817167, "loss": 1.4659, "step": 880 }, { "epoch": 0.31, "grad_norm": 3.334516763687134, "learning_rate": 0.00028615728107169434, "loss": 1.3035, "step": 881 }, { "epoch": 0.31, "grad_norm": 3.547193765640259, "learning_rate": 0.00028612006946167204, "loss": 0.5167, "step": 882 }, { "epoch": 0.31, "grad_norm": 2.4741361141204834, "learning_rate": 0.0002860828578516497, "loss": 0.582, "step": 883 }, { "epoch": 0.31, "grad_norm": 3.9844954013824463, "learning_rate": 0.00028604564624162734, "loss": 0.5924, "step": 884 }, { "epoch": 0.31, "grad_norm": 2.942441940307617, "learning_rate": 0.00028600843463160504, "loss": 0.6893, "step": 885 }, { "epoch": 0.31, "grad_norm": 4.320768356323242, "learning_rate": 0.0002859712230215827, "loss": 0.8256, "step": 886 }, { "epoch": 0.31, "grad_norm": 4.074968338012695, "learning_rate": 0.0002859340114115604, "loss": 1.0719, "step": 887 }, { "epoch": 0.31, "grad_norm": 3.55584716796875, "learning_rate": 0.00028589679980153804, "loss": 1.0453, "step": 888 }, { "epoch": 0.31, "grad_norm": 4.58350133895874, "learning_rate": 0.00028585958819151575, "loss": 0.4654, "step": 889 }, { "epoch": 0.31, "grad_norm": 5.6324543952941895, "learning_rate": 0.0002858223765814934, "loss": 1.0127, "step": 890 }, { "epoch": 0.31, "grad_norm": 4.809018611907959, "learning_rate": 0.00028578516497147104, "loss": 1.1784, "step": 891 }, { "epoch": 0.31, "grad_norm": 4.3140645027160645, "learning_rate": 0.00028574795336144875, "loss": 0.9088, "step": 892 }, { "epoch": 0.31, "grad_norm": 5.170782566070557, "learning_rate": 0.00028571074175142645, "loss": 1.2303, "step": 893 }, { "epoch": 0.31, "grad_norm": 3.350752592086792, "learning_rate": 0.0002856735301414041, "loss": 0.8323, "step": 894 }, { "epoch": 0.31, "grad_norm": 4.265800476074219, "learning_rate": 0.00028563631853138175, "loss": 0.9239, "step": 895 }, { "epoch": 0.31, "grad_norm": 3.861973762512207, "learning_rate": 0.00028559910692135945, "loss": 0.5933, "step": 896 }, { "epoch": 0.31, "grad_norm": 5.536797046661377, "learning_rate": 0.0002855618953113371, "loss": 0.5246, "step": 897 }, { "epoch": 0.31, "grad_norm": 6.882984638214111, "learning_rate": 0.0002855246837013148, "loss": 0.782, "step": 898 }, { "epoch": 0.31, "grad_norm": 3.6057496070861816, "learning_rate": 0.00028548747209129245, "loss": 0.8962, "step": 899 }, { "epoch": 0.32, "grad_norm": 10.800071716308594, "learning_rate": 0.00028545026048127015, "loss": 0.6917, "step": 900 }, { "epoch": 0.32, "eval_loss": 0.9631061553955078, "eval_runtime": 51.078, "eval_samples_per_second": 42.445, "eval_steps_per_second": 10.611, "eval_wer": 0.7361183186299948, "step": 900 }, { "epoch": 0.32, "grad_norm": 3.3842124938964844, "learning_rate": 0.0002854130488712478, "loss": 1.3777, "step": 901 }, { "epoch": 0.32, "grad_norm": 3.1951818466186523, "learning_rate": 0.00028537583726122545, "loss": 1.0767, "step": 902 }, { "epoch": 0.32, "grad_norm": 2.5450639724731445, "learning_rate": 0.00028533862565120315, "loss": 0.714, "step": 903 }, { "epoch": 0.32, "grad_norm": 2.42718505859375, "learning_rate": 0.0002853014140411808, "loss": 0.5684, "step": 904 }, { "epoch": 0.32, "grad_norm": 5.6511759757995605, "learning_rate": 0.0002852642024311585, "loss": 1.1985, "step": 905 }, { "epoch": 0.32, "grad_norm": 3.286999225616455, "learning_rate": 0.0002852269908211362, "loss": 0.9587, "step": 906 }, { "epoch": 0.32, "grad_norm": 3.237370014190674, "learning_rate": 0.00028518977921111386, "loss": 0.4929, "step": 907 }, { "epoch": 0.32, "grad_norm": 3.4097900390625, "learning_rate": 0.0002851525676010915, "loss": 0.8108, "step": 908 }, { "epoch": 0.32, "grad_norm": 3.5256292819976807, "learning_rate": 0.00028511535599106915, "loss": 1.0193, "step": 909 }, { "epoch": 0.32, "grad_norm": 3.9239373207092285, "learning_rate": 0.00028507814438104686, "loss": 1.0205, "step": 910 }, { "epoch": 0.32, "grad_norm": 6.755927085876465, "learning_rate": 0.00028504093277102456, "loss": 1.2051, "step": 911 }, { "epoch": 0.32, "grad_norm": 2.922299385070801, "learning_rate": 0.0002850037211610022, "loss": 0.6658, "step": 912 }, { "epoch": 0.32, "grad_norm": 3.0863943099975586, "learning_rate": 0.00028496650955097986, "loss": 0.8485, "step": 913 }, { "epoch": 0.32, "grad_norm": 3.762369394302368, "learning_rate": 0.00028492929794095756, "loss": 0.8154, "step": 914 }, { "epoch": 0.32, "grad_norm": 2.955242395401001, "learning_rate": 0.0002848920863309352, "loss": 0.8395, "step": 915 }, { "epoch": 0.32, "grad_norm": 4.491308212280273, "learning_rate": 0.0002848548747209129, "loss": 0.7699, "step": 916 }, { "epoch": 0.32, "grad_norm": 6.529202938079834, "learning_rate": 0.00028481766311089056, "loss": 2.1599, "step": 917 }, { "epoch": 0.32, "grad_norm": 2.4373815059661865, "learning_rate": 0.00028478045150086826, "loss": 0.3231, "step": 918 }, { "epoch": 0.32, "grad_norm": 6.301473140716553, "learning_rate": 0.0002847432398908459, "loss": 0.4571, "step": 919 }, { "epoch": 0.32, "grad_norm": 4.855682373046875, "learning_rate": 0.00028470602828082356, "loss": 0.5254, "step": 920 }, { "epoch": 0.32, "grad_norm": 3.8739993572235107, "learning_rate": 0.00028466881667080126, "loss": 0.5581, "step": 921 }, { "epoch": 0.32, "grad_norm": 5.096916198730469, "learning_rate": 0.0002846316050607789, "loss": 0.8322, "step": 922 }, { "epoch": 0.32, "grad_norm": 6.568998336791992, "learning_rate": 0.0002845943934507566, "loss": 0.4273, "step": 923 }, { "epoch": 0.32, "grad_norm": 6.560142993927002, "learning_rate": 0.0002845571818407343, "loss": 0.9331, "step": 924 }, { "epoch": 0.32, "grad_norm": 3.5637927055358887, "learning_rate": 0.00028451997023071197, "loss": 0.3868, "step": 925 }, { "epoch": 0.32, "grad_norm": 6.0603837966918945, "learning_rate": 0.0002844827586206896, "loss": 1.7794, "step": 926 }, { "epoch": 0.32, "grad_norm": 6.264075756072998, "learning_rate": 0.0002844455470106673, "loss": 1.1889, "step": 927 }, { "epoch": 0.33, "grad_norm": 4.174050331115723, "learning_rate": 0.00028440833540064497, "loss": 0.9896, "step": 928 }, { "epoch": 0.33, "grad_norm": 2.8910465240478516, "learning_rate": 0.00028437112379062267, "loss": 0.7981, "step": 929 }, { "epoch": 0.33, "grad_norm": 3.8150928020477295, "learning_rate": 0.0002843339121806003, "loss": 0.9314, "step": 930 }, { "epoch": 0.33, "grad_norm": 5.244769096374512, "learning_rate": 0.000284296700570578, "loss": 0.7626, "step": 931 }, { "epoch": 0.33, "grad_norm": 2.8552732467651367, "learning_rate": 0.00028425948896055567, "loss": 0.6992, "step": 932 }, { "epoch": 0.33, "grad_norm": 3.5984230041503906, "learning_rate": 0.0002842222773505333, "loss": 1.0394, "step": 933 }, { "epoch": 0.33, "grad_norm": 7.442532062530518, "learning_rate": 0.000284185065740511, "loss": 1.0194, "step": 934 }, { "epoch": 0.33, "grad_norm": 4.221444606781006, "learning_rate": 0.00028414785413048867, "loss": 1.116, "step": 935 }, { "epoch": 0.33, "grad_norm": 3.7174394130706787, "learning_rate": 0.00028411064252046637, "loss": 1.2957, "step": 936 }, { "epoch": 0.33, "grad_norm": 4.641437530517578, "learning_rate": 0.000284073430910444, "loss": 0.9645, "step": 937 }, { "epoch": 0.33, "grad_norm": 3.738492965698242, "learning_rate": 0.0002840362193004217, "loss": 1.0473, "step": 938 }, { "epoch": 0.33, "grad_norm": 4.242377758026123, "learning_rate": 0.00028399900769039937, "loss": 0.9279, "step": 939 }, { "epoch": 0.33, "grad_norm": 5.320493221282959, "learning_rate": 0.0002839617960803771, "loss": 0.6088, "step": 940 }, { "epoch": 0.33, "grad_norm": 4.363595962524414, "learning_rate": 0.0002839245844703547, "loss": 0.9191, "step": 941 }, { "epoch": 0.33, "grad_norm": 3.4770991802215576, "learning_rate": 0.0002838873728603324, "loss": 0.6281, "step": 942 }, { "epoch": 0.33, "grad_norm": 7.542201519012451, "learning_rate": 0.0002838501612503101, "loss": 2.1469, "step": 943 }, { "epoch": 0.33, "grad_norm": 7.0690202713012695, "learning_rate": 0.0002838129496402877, "loss": 1.5071, "step": 944 }, { "epoch": 0.33, "grad_norm": 5.570958614349365, "learning_rate": 0.00028377573803026543, "loss": 0.7142, "step": 945 }, { "epoch": 0.33, "grad_norm": 6.251632213592529, "learning_rate": 0.0002837385264202431, "loss": 1.2639, "step": 946 }, { "epoch": 0.33, "grad_norm": 4.675748348236084, "learning_rate": 0.0002837013148102208, "loss": 0.9949, "step": 947 }, { "epoch": 0.33, "grad_norm": 4.878772258758545, "learning_rate": 0.00028366410320019843, "loss": 0.9438, "step": 948 }, { "epoch": 0.33, "grad_norm": 4.873948574066162, "learning_rate": 0.00028362689159017613, "loss": 0.8105, "step": 949 }, { "epoch": 0.33, "grad_norm": 2.9942407608032227, "learning_rate": 0.0002835896799801538, "loss": 0.7128, "step": 950 }, { "epoch": 0.33, "grad_norm": 4.542831897735596, "learning_rate": 0.00028355246837013143, "loss": 1.2922, "step": 951 }, { "epoch": 0.33, "grad_norm": 2.6374423503875732, "learning_rate": 0.00028351525676010913, "loss": 0.912, "step": 952 }, { "epoch": 0.33, "grad_norm": 3.016441822052002, "learning_rate": 0.00028347804515008683, "loss": 0.9093, "step": 953 }, { "epoch": 0.33, "grad_norm": 2.8477137088775635, "learning_rate": 0.0002834408335400645, "loss": 0.7698, "step": 954 }, { "epoch": 0.33, "grad_norm": 4.108532428741455, "learning_rate": 0.00028340362193004213, "loss": 1.0006, "step": 955 }, { "epoch": 0.33, "grad_norm": 5.331216335296631, "learning_rate": 0.00028336641032001983, "loss": 1.0243, "step": 956 }, { "epoch": 0.34, "grad_norm": 3.312037467956543, "learning_rate": 0.0002833291987099975, "loss": 0.6803, "step": 957 }, { "epoch": 0.34, "grad_norm": 3.1434242725372314, "learning_rate": 0.0002832919870999752, "loss": 0.3665, "step": 958 }, { "epoch": 0.34, "grad_norm": 5.74772834777832, "learning_rate": 0.00028325477548995283, "loss": 1.1212, "step": 959 }, { "epoch": 0.34, "grad_norm": 3.111370325088501, "learning_rate": 0.00028321756387993054, "loss": 0.4247, "step": 960 }, { "epoch": 0.34, "grad_norm": 4.159219741821289, "learning_rate": 0.0002831803522699082, "loss": 0.8041, "step": 961 }, { "epoch": 0.34, "grad_norm": 5.435075759887695, "learning_rate": 0.00028314314065988583, "loss": 2.0447, "step": 962 }, { "epoch": 0.34, "grad_norm": 4.009119987487793, "learning_rate": 0.00028310592904986354, "loss": 0.5044, "step": 963 }, { "epoch": 0.34, "grad_norm": 3.7699248790740967, "learning_rate": 0.0002830687174398412, "loss": 0.3857, "step": 964 }, { "epoch": 0.34, "grad_norm": 2.6067960262298584, "learning_rate": 0.0002830315058298189, "loss": 0.3094, "step": 965 }, { "epoch": 0.34, "grad_norm": 2.571464776992798, "learning_rate": 0.00028299429421979654, "loss": 0.3871, "step": 966 }, { "epoch": 0.34, "grad_norm": 9.788469314575195, "learning_rate": 0.00028295708260977424, "loss": 1.292, "step": 967 }, { "epoch": 0.34, "grad_norm": 4.8277058601379395, "learning_rate": 0.0002829198709997519, "loss": 0.5882, "step": 968 }, { "epoch": 0.34, "grad_norm": 5.459136486053467, "learning_rate": 0.00028288265938972954, "loss": 1.0885, "step": 969 }, { "epoch": 0.34, "grad_norm": 3.053255558013916, "learning_rate": 0.00028284544777970724, "loss": 0.4897, "step": 970 }, { "epoch": 0.34, "grad_norm": 4.948587417602539, "learning_rate": 0.00028280823616968494, "loss": 1.4657, "step": 971 }, { "epoch": 0.34, "grad_norm": 6.1781206130981445, "learning_rate": 0.0002827710245596626, "loss": 0.903, "step": 972 }, { "epoch": 0.34, "grad_norm": 7.313625812530518, "learning_rate": 0.0002827338129496403, "loss": 2.1275, "step": 973 }, { "epoch": 0.34, "grad_norm": 5.631922721862793, "learning_rate": 0.00028269660133961794, "loss": 1.1911, "step": 974 }, { "epoch": 0.34, "grad_norm": 11.282684326171875, "learning_rate": 0.0002826593897295956, "loss": 2.9956, "step": 975 }, { "epoch": 0.34, "grad_norm": 7.1073317527771, "learning_rate": 0.0002826221781195733, "loss": 2.037, "step": 976 }, { "epoch": 0.34, "grad_norm": 18.51368522644043, "learning_rate": 0.00028258496650955094, "loss": 2.3501, "step": 977 }, { "epoch": 0.34, "grad_norm": 3.6810758113861084, "learning_rate": 0.00028254775489952865, "loss": 1.236, "step": 978 }, { "epoch": 0.34, "grad_norm": 3.3135673999786377, "learning_rate": 0.0002825105432895063, "loss": 0.8256, "step": 979 }, { "epoch": 0.34, "grad_norm": 3.1581242084503174, "learning_rate": 0.000282473331679484, "loss": 0.7498, "step": 980 }, { "epoch": 0.34, "grad_norm": 3.0917932987213135, "learning_rate": 0.00028243612006946165, "loss": 0.7558, "step": 981 }, { "epoch": 0.34, "grad_norm": 3.050999164581299, "learning_rate": 0.0002823989084594393, "loss": 0.7646, "step": 982 }, { "epoch": 0.34, "grad_norm": 4.006269454956055, "learning_rate": 0.000282361696849417, "loss": 0.5423, "step": 983 }, { "epoch": 0.34, "grad_norm": 3.775635242462158, "learning_rate": 0.0002823244852393947, "loss": 0.5903, "step": 984 }, { "epoch": 0.35, "grad_norm": 2.8768789768218994, "learning_rate": 0.00028228727362937235, "loss": 0.8378, "step": 985 }, { "epoch": 0.35, "grad_norm": 5.105539321899414, "learning_rate": 0.00028225006201935, "loss": 0.7609, "step": 986 }, { "epoch": 0.35, "grad_norm": 3.927220344543457, "learning_rate": 0.00028221285040932765, "loss": 0.5829, "step": 987 }, { "epoch": 0.35, "grad_norm": 4.363030433654785, "learning_rate": 0.00028217563879930535, "loss": 0.5635, "step": 988 }, { "epoch": 0.35, "grad_norm": 3.495129108428955, "learning_rate": 0.00028213842718928305, "loss": 0.8921, "step": 989 }, { "epoch": 0.35, "grad_norm": 4.773660659790039, "learning_rate": 0.0002821012155792607, "loss": 1.5913, "step": 990 }, { "epoch": 0.35, "grad_norm": 3.911088228225708, "learning_rate": 0.0002820640039692384, "loss": 1.5924, "step": 991 }, { "epoch": 0.35, "grad_norm": 4.539827823638916, "learning_rate": 0.00028202679235921605, "loss": 0.8482, "step": 992 }, { "epoch": 0.35, "grad_norm": 3.734384298324585, "learning_rate": 0.0002819895807491937, "loss": 0.8819, "step": 993 }, { "epoch": 0.35, "grad_norm": 5.120278835296631, "learning_rate": 0.0002819523691391714, "loss": 0.8981, "step": 994 }, { "epoch": 0.35, "grad_norm": 4.272894382476807, "learning_rate": 0.00028191515752914905, "loss": 0.7838, "step": 995 }, { "epoch": 0.35, "grad_norm": 5.84804105758667, "learning_rate": 0.00028187794591912676, "loss": 0.46, "step": 996 }, { "epoch": 0.35, "grad_norm": 9.430734634399414, "learning_rate": 0.0002818407343091044, "loss": 0.6851, "step": 997 }, { "epoch": 0.35, "grad_norm": 3.4790775775909424, "learning_rate": 0.0002818035226990821, "loss": 0.6983, "step": 998 }, { "epoch": 0.35, "grad_norm": 3.3269646167755127, "learning_rate": 0.00028176631108905976, "loss": 0.2755, "step": 999 }, { "epoch": 0.35, "grad_norm": 9.969862937927246, "learning_rate": 0.0002817290994790374, "loss": 1.5394, "step": 1000 }, { "epoch": 0.35, "eval_loss": 0.8975178599357605, "eval_runtime": 51.1763, "eval_samples_per_second": 42.363, "eval_steps_per_second": 10.591, "eval_wer": 0.7244421380384016, "step": 1000 }, { "epoch": 0.35, "grad_norm": 8.002113342285156, "learning_rate": 0.0002816918878690151, "loss": 1.4136, "step": 1001 }, { "epoch": 0.35, "grad_norm": 2.763845920562744, "learning_rate": 0.0002816546762589928, "loss": 0.6863, "step": 1002 }, { "epoch": 0.35, "grad_norm": 3.5052077770233154, "learning_rate": 0.00028161746464897046, "loss": 1.2483, "step": 1003 }, { "epoch": 0.35, "grad_norm": 3.184037685394287, "learning_rate": 0.0002815802530389481, "loss": 0.7632, "step": 1004 }, { "epoch": 0.35, "grad_norm": 2.645569086074829, "learning_rate": 0.0002815430414289258, "loss": 0.5727, "step": 1005 }, { "epoch": 0.35, "grad_norm": 2.4027628898620605, "learning_rate": 0.00028150582981890346, "loss": 0.5045, "step": 1006 }, { "epoch": 0.35, "grad_norm": 3.2467432022094727, "learning_rate": 0.00028146861820888116, "loss": 0.4922, "step": 1007 }, { "epoch": 0.35, "grad_norm": 3.64123272895813, "learning_rate": 0.0002814314065988588, "loss": 0.5161, "step": 1008 }, { "epoch": 0.35, "grad_norm": 3.1751201152801514, "learning_rate": 0.0002813941949888365, "loss": 1.003, "step": 1009 }, { "epoch": 0.35, "grad_norm": 3.5006515979766846, "learning_rate": 0.00028135698337881416, "loss": 0.6148, "step": 1010 }, { "epoch": 0.35, "grad_norm": 2.372243881225586, "learning_rate": 0.0002813197717687918, "loss": 0.5718, "step": 1011 }, { "epoch": 0.35, "grad_norm": 5.657968044281006, "learning_rate": 0.0002812825601587695, "loss": 1.1868, "step": 1012 }, { "epoch": 0.35, "grad_norm": 4.172140598297119, "learning_rate": 0.00028124534854874716, "loss": 0.7887, "step": 1013 }, { "epoch": 0.36, "grad_norm": 4.085210800170898, "learning_rate": 0.00028120813693872487, "loss": 0.9341, "step": 1014 }, { "epoch": 0.36, "grad_norm": 2.6052396297454834, "learning_rate": 0.00028117092532870257, "loss": 0.6269, "step": 1015 }, { "epoch": 0.36, "grad_norm": 5.70512580871582, "learning_rate": 0.0002811337137186802, "loss": 1.2359, "step": 1016 }, { "epoch": 0.36, "grad_norm": 8.653592109680176, "learning_rate": 0.00028109650210865787, "loss": 1.07, "step": 1017 }, { "epoch": 0.36, "grad_norm": 5.807731628417969, "learning_rate": 0.0002810592904986355, "loss": 0.8944, "step": 1018 }, { "epoch": 0.36, "grad_norm": 3.5197043418884277, "learning_rate": 0.0002810220788886132, "loss": 0.9289, "step": 1019 }, { "epoch": 0.36, "grad_norm": 3.4789111614227295, "learning_rate": 0.0002809848672785909, "loss": 0.9998, "step": 1020 }, { "epoch": 0.36, "grad_norm": 5.096599102020264, "learning_rate": 0.00028094765566856857, "loss": 0.939, "step": 1021 }, { "epoch": 0.36, "grad_norm": 3.344417095184326, "learning_rate": 0.0002809104440585463, "loss": 1.0038, "step": 1022 }, { "epoch": 0.36, "grad_norm": 3.7382874488830566, "learning_rate": 0.0002808732324485239, "loss": 1.4407, "step": 1023 }, { "epoch": 0.36, "grad_norm": 2.9947118759155273, "learning_rate": 0.00028083602083850157, "loss": 0.5359, "step": 1024 }, { "epoch": 0.36, "grad_norm": 3.5792839527130127, "learning_rate": 0.0002807988092284793, "loss": 0.5672, "step": 1025 }, { "epoch": 0.36, "grad_norm": 3.1083927154541016, "learning_rate": 0.0002807615976184569, "loss": 0.9157, "step": 1026 }, { "epoch": 0.36, "grad_norm": 2.818694829940796, "learning_rate": 0.0002807243860084346, "loss": 0.9535, "step": 1027 }, { "epoch": 0.36, "grad_norm": 3.34951114654541, "learning_rate": 0.0002806871743984123, "loss": 1.2982, "step": 1028 }, { "epoch": 0.36, "grad_norm": 3.1415767669677734, "learning_rate": 0.0002806499627883899, "loss": 0.6431, "step": 1029 }, { "epoch": 0.36, "grad_norm": 3.301403284072876, "learning_rate": 0.0002806127511783676, "loss": 1.0773, "step": 1030 }, { "epoch": 0.36, "grad_norm": 3.992616653442383, "learning_rate": 0.0002805755395683453, "loss": 0.9013, "step": 1031 }, { "epoch": 0.36, "grad_norm": 3.0520200729370117, "learning_rate": 0.000280538327958323, "loss": 0.5579, "step": 1032 }, { "epoch": 0.36, "grad_norm": 2.932624340057373, "learning_rate": 0.0002805011163483007, "loss": 1.0126, "step": 1033 }, { "epoch": 0.36, "grad_norm": 3.583620309829712, "learning_rate": 0.00028046390473827833, "loss": 0.5483, "step": 1034 }, { "epoch": 0.36, "grad_norm": 3.1285436153411865, "learning_rate": 0.000280426693128256, "loss": 0.4759, "step": 1035 }, { "epoch": 0.36, "grad_norm": 2.9035234451293945, "learning_rate": 0.0002803894815182336, "loss": 0.3353, "step": 1036 }, { "epoch": 0.36, "grad_norm": 3.205357789993286, "learning_rate": 0.00028035226990821133, "loss": 1.0646, "step": 1037 }, { "epoch": 0.36, "grad_norm": 4.966174602508545, "learning_rate": 0.00028031505829818903, "loss": 2.3098, "step": 1038 }, { "epoch": 0.36, "grad_norm": 5.033513069152832, "learning_rate": 0.0002802778466881667, "loss": 0.9838, "step": 1039 }, { "epoch": 0.36, "grad_norm": 3.171358823776245, "learning_rate": 0.0002802406350781444, "loss": 0.5542, "step": 1040 }, { "epoch": 0.36, "grad_norm": 3.6895203590393066, "learning_rate": 0.00028020342346812203, "loss": 0.8648, "step": 1041 }, { "epoch": 0.37, "grad_norm": 5.115908145904541, "learning_rate": 0.0002801662118580997, "loss": 0.8138, "step": 1042 }, { "epoch": 0.37, "grad_norm": 4.41919469833374, "learning_rate": 0.0002801290002480774, "loss": 0.6559, "step": 1043 }, { "epoch": 0.37, "grad_norm": 3.736941337585449, "learning_rate": 0.00028009178863805503, "loss": 0.7366, "step": 1044 }, { "epoch": 0.37, "grad_norm": 4.1285319328308105, "learning_rate": 0.00028005457702803274, "loss": 0.5219, "step": 1045 }, { "epoch": 0.37, "grad_norm": 4.831568241119385, "learning_rate": 0.0002800173654180104, "loss": 0.5053, "step": 1046 }, { "epoch": 0.37, "grad_norm": 4.898710250854492, "learning_rate": 0.0002799801538079881, "loss": 1.0036, "step": 1047 }, { "epoch": 0.37, "grad_norm": 5.523413181304932, "learning_rate": 0.00027994294219796574, "loss": 1.398, "step": 1048 }, { "epoch": 0.37, "grad_norm": 3.2347447872161865, "learning_rate": 0.0002799057305879434, "loss": 0.4556, "step": 1049 }, { "epoch": 0.37, "grad_norm": 5.337017059326172, "learning_rate": 0.0002798685189779211, "loss": 1.328, "step": 1050 }, { "epoch": 0.37, "grad_norm": 4.164899826049805, "learning_rate": 0.0002798313073678988, "loss": 1.2521, "step": 1051 }, { "epoch": 0.37, "grad_norm": 2.6211090087890625, "learning_rate": 0.00027979409575787644, "loss": 1.0136, "step": 1052 }, { "epoch": 0.37, "grad_norm": 2.821180820465088, "learning_rate": 0.0002797568841478541, "loss": 0.9512, "step": 1053 }, { "epoch": 0.37, "grad_norm": 3.4610302448272705, "learning_rate": 0.00027971967253783174, "loss": 0.7071, "step": 1054 }, { "epoch": 0.37, "grad_norm": 2.7207629680633545, "learning_rate": 0.00027968246092780944, "loss": 0.6617, "step": 1055 }, { "epoch": 0.37, "grad_norm": 4.068587779998779, "learning_rate": 0.00027964524931778714, "loss": 0.7244, "step": 1056 }, { "epoch": 0.37, "grad_norm": 3.698821544647217, "learning_rate": 0.0002796080377077648, "loss": 0.8169, "step": 1057 }, { "epoch": 0.37, "grad_norm": 2.56290864944458, "learning_rate": 0.0002795708260977425, "loss": 0.5039, "step": 1058 }, { "epoch": 0.37, "grad_norm": 3.410244941711426, "learning_rate": 0.00027953361448772014, "loss": 0.7284, "step": 1059 }, { "epoch": 0.37, "grad_norm": 2.4827516078948975, "learning_rate": 0.0002794964028776978, "loss": 0.675, "step": 1060 }, { "epoch": 0.37, "grad_norm": 2.349926471710205, "learning_rate": 0.0002794591912676755, "loss": 0.3923, "step": 1061 }, { "epoch": 0.37, "grad_norm": 4.622032165527344, "learning_rate": 0.00027942197965765314, "loss": 0.7249, "step": 1062 }, { "epoch": 0.37, "grad_norm": 4.842290878295898, "learning_rate": 0.00027938476804763085, "loss": 0.5359, "step": 1063 }, { "epoch": 0.37, "grad_norm": 3.4208180904388428, "learning_rate": 0.00027934755643760855, "loss": 1.2509, "step": 1064 }, { "epoch": 0.37, "grad_norm": 3.2167344093322754, "learning_rate": 0.0002793103448275862, "loss": 0.3897, "step": 1065 }, { "epoch": 0.37, "grad_norm": 6.12382698059082, "learning_rate": 0.00027927313321756385, "loss": 0.8148, "step": 1066 }, { "epoch": 0.37, "grad_norm": 3.50907301902771, "learning_rate": 0.0002792359216075415, "loss": 0.3775, "step": 1067 }, { "epoch": 0.37, "grad_norm": 4.680488109588623, "learning_rate": 0.0002791987099975192, "loss": 0.7939, "step": 1068 }, { "epoch": 0.37, "grad_norm": 5.077605247497559, "learning_rate": 0.0002791614983874969, "loss": 0.8747, "step": 1069 }, { "epoch": 0.37, "grad_norm": 8.638588905334473, "learning_rate": 0.00027912428677747455, "loss": 1.3101, "step": 1070 }, { "epoch": 0.38, "grad_norm": 6.577147006988525, "learning_rate": 0.0002790870751674522, "loss": 2.0627, "step": 1071 }, { "epoch": 0.38, "grad_norm": 5.956910133361816, "learning_rate": 0.0002790498635574299, "loss": 1.1331, "step": 1072 }, { "epoch": 0.38, "grad_norm": 8.133764266967773, "learning_rate": 0.00027901265194740755, "loss": 1.0287, "step": 1073 }, { "epoch": 0.38, "grad_norm": 11.1182279586792, "learning_rate": 0.00027897544033738525, "loss": 1.0364, "step": 1074 }, { "epoch": 0.38, "grad_norm": 3.266451835632324, "learning_rate": 0.0002789382287273629, "loss": 0.6508, "step": 1075 }, { "epoch": 0.38, "grad_norm": 14.238346099853516, "learning_rate": 0.0002789010171173406, "loss": 2.2819, "step": 1076 }, { "epoch": 0.38, "grad_norm": 3.7446582317352295, "learning_rate": 0.00027886380550731825, "loss": 0.922, "step": 1077 }, { "epoch": 0.38, "grad_norm": 4.616486072540283, "learning_rate": 0.0002788265938972959, "loss": 1.1808, "step": 1078 }, { "epoch": 0.38, "grad_norm": 3.3517651557922363, "learning_rate": 0.0002787893822872736, "loss": 1.3126, "step": 1079 }, { "epoch": 0.38, "grad_norm": 2.9327385425567627, "learning_rate": 0.00027875217067725125, "loss": 0.805, "step": 1080 }, { "epoch": 0.38, "grad_norm": 5.411552906036377, "learning_rate": 0.00027871495906722896, "loss": 0.8779, "step": 1081 }, { "epoch": 0.38, "grad_norm": 4.122033596038818, "learning_rate": 0.00027867774745720666, "loss": 1.3039, "step": 1082 }, { "epoch": 0.38, "grad_norm": 3.9304003715515137, "learning_rate": 0.0002786405358471843, "loss": 1.1372, "step": 1083 }, { "epoch": 0.38, "grad_norm": 3.756955862045288, "learning_rate": 0.00027860332423716196, "loss": 1.0457, "step": 1084 }, { "epoch": 0.38, "grad_norm": 4.7756242752075195, "learning_rate": 0.00027856611262713966, "loss": 0.8883, "step": 1085 }, { "epoch": 0.38, "grad_norm": 5.649970054626465, "learning_rate": 0.0002785289010171173, "loss": 2.0304, "step": 1086 }, { "epoch": 0.38, "grad_norm": 4.984282970428467, "learning_rate": 0.000278491689407095, "loss": 1.1931, "step": 1087 }, { "epoch": 0.38, "grad_norm": 4.8974289894104, "learning_rate": 0.00027845447779707266, "loss": 0.7087, "step": 1088 }, { "epoch": 0.38, "grad_norm": 3.27323579788208, "learning_rate": 0.00027841726618705036, "loss": 0.4356, "step": 1089 }, { "epoch": 0.38, "grad_norm": 6.953647136688232, "learning_rate": 0.000278380054577028, "loss": 1.3964, "step": 1090 }, { "epoch": 0.38, "grad_norm": 2.8305001258850098, "learning_rate": 0.00027834284296700566, "loss": 0.6397, "step": 1091 }, { "epoch": 0.38, "grad_norm": 5.174875736236572, "learning_rate": 0.00027830563135698336, "loss": 0.6587, "step": 1092 }, { "epoch": 0.38, "grad_norm": 3.3889546394348145, "learning_rate": 0.000278268419746961, "loss": 0.6804, "step": 1093 }, { "epoch": 0.38, "grad_norm": 6.435478210449219, "learning_rate": 0.0002782312081369387, "loss": 1.0459, "step": 1094 }, { "epoch": 0.38, "grad_norm": 9.19431209564209, "learning_rate": 0.00027819399652691636, "loss": 1.5923, "step": 1095 }, { "epoch": 0.38, "grad_norm": 2.4626758098602295, "learning_rate": 0.000278156784916894, "loss": 0.7921, "step": 1096 }, { "epoch": 0.38, "grad_norm": 5.5386457443237305, "learning_rate": 0.0002781195733068717, "loss": 0.9378, "step": 1097 }, { "epoch": 0.38, "grad_norm": 4.50201940536499, "learning_rate": 0.00027808236169684936, "loss": 1.7841, "step": 1098 }, { "epoch": 0.39, "grad_norm": 3.9906821250915527, "learning_rate": 0.00027804515008682707, "loss": 0.7195, "step": 1099 }, { "epoch": 0.39, "grad_norm": 6.178668022155762, "learning_rate": 0.00027800793847680477, "loss": 0.5866, "step": 1100 }, { "epoch": 0.39, "eval_loss": 0.858989417552948, "eval_runtime": 50.9765, "eval_samples_per_second": 42.529, "eval_steps_per_second": 10.632, "eval_wer": 0.7215014703338523, "step": 1100 }, { "epoch": 0.39, "grad_norm": 4.665261268615723, "learning_rate": 0.0002779707268667824, "loss": 1.8917, "step": 1101 }, { "epoch": 0.39, "grad_norm": 2.5688321590423584, "learning_rate": 0.00027793351525676007, "loss": 1.1618, "step": 1102 }, { "epoch": 0.39, "grad_norm": 2.149432420730591, "learning_rate": 0.00027789630364673777, "loss": 0.7372, "step": 1103 }, { "epoch": 0.39, "grad_norm": 2.077228307723999, "learning_rate": 0.0002778590920367154, "loss": 0.7426, "step": 1104 }, { "epoch": 0.39, "grad_norm": 2.8044614791870117, "learning_rate": 0.0002778218804266931, "loss": 1.0178, "step": 1105 }, { "epoch": 0.39, "grad_norm": 3.4295260906219482, "learning_rate": 0.00027778466881667077, "loss": 0.6801, "step": 1106 }, { "epoch": 0.39, "grad_norm": 3.1818017959594727, "learning_rate": 0.00027774745720664847, "loss": 0.5587, "step": 1107 }, { "epoch": 0.39, "grad_norm": 3.3339715003967285, "learning_rate": 0.0002777102455966261, "loss": 0.7365, "step": 1108 }, { "epoch": 0.39, "grad_norm": 4.128452301025391, "learning_rate": 0.00027767303398660377, "loss": 1.1199, "step": 1109 }, { "epoch": 0.39, "grad_norm": 3.8575356006622314, "learning_rate": 0.00027763582237658147, "loss": 1.1287, "step": 1110 }, { "epoch": 0.39, "grad_norm": 3.410104751586914, "learning_rate": 0.0002775986107665591, "loss": 0.6495, "step": 1111 }, { "epoch": 0.39, "grad_norm": 4.475276947021484, "learning_rate": 0.0002775613991565368, "loss": 0.7845, "step": 1112 }, { "epoch": 0.39, "grad_norm": 2.5406651496887207, "learning_rate": 0.00027752418754651447, "loss": 0.476, "step": 1113 }, { "epoch": 0.39, "grad_norm": 3.599978446960449, "learning_rate": 0.0002774869759364922, "loss": 0.9411, "step": 1114 }, { "epoch": 0.39, "grad_norm": 2.8049709796905518, "learning_rate": 0.0002774497643264698, "loss": 0.7746, "step": 1115 }, { "epoch": 0.39, "grad_norm": 5.1293182373046875, "learning_rate": 0.00027741255271644753, "loss": 1.4263, "step": 1116 }, { "epoch": 0.39, "grad_norm": 4.558226585388184, "learning_rate": 0.0002773753411064252, "loss": 0.8449, "step": 1117 }, { "epoch": 0.39, "grad_norm": 3.3060882091522217, "learning_rate": 0.0002773381294964029, "loss": 0.7272, "step": 1118 }, { "epoch": 0.39, "grad_norm": 6.07926607131958, "learning_rate": 0.00027730091788638053, "loss": 0.9118, "step": 1119 }, { "epoch": 0.39, "grad_norm": 4.133337020874023, "learning_rate": 0.0002772637062763582, "loss": 0.483, "step": 1120 }, { "epoch": 0.39, "grad_norm": 5.365363121032715, "learning_rate": 0.0002772264946663359, "loss": 0.8083, "step": 1121 }, { "epoch": 0.39, "grad_norm": 3.9955897331237793, "learning_rate": 0.00027718928305631353, "loss": 1.1863, "step": 1122 }, { "epoch": 0.39, "grad_norm": 3.5983023643493652, "learning_rate": 0.00027715207144629123, "loss": 0.7021, "step": 1123 }, { "epoch": 0.39, "grad_norm": 3.3443710803985596, "learning_rate": 0.0002771148598362689, "loss": 0.6462, "step": 1124 }, { "epoch": 0.39, "grad_norm": 3.6084046363830566, "learning_rate": 0.0002770776482262466, "loss": 0.723, "step": 1125 }, { "epoch": 0.39, "grad_norm": 3.003911256790161, "learning_rate": 0.00027704043661622423, "loss": 1.1826, "step": 1126 }, { "epoch": 0.39, "grad_norm": 3.219900608062744, "learning_rate": 0.0002770032250062019, "loss": 1.0835, "step": 1127 }, { "epoch": 0.4, "grad_norm": 2.942505121231079, "learning_rate": 0.0002769660133961796, "loss": 0.9503, "step": 1128 }, { "epoch": 0.4, "grad_norm": 3.2380802631378174, "learning_rate": 0.0002769288017861573, "loss": 0.8017, "step": 1129 }, { "epoch": 0.4, "grad_norm": 2.9053637981414795, "learning_rate": 0.00027689159017613493, "loss": 0.6966, "step": 1130 }, { "epoch": 0.4, "grad_norm": 3.158212900161743, "learning_rate": 0.00027685437856611264, "loss": 0.4872, "step": 1131 }, { "epoch": 0.4, "grad_norm": 3.566776990890503, "learning_rate": 0.0002768171669560903, "loss": 0.7114, "step": 1132 }, { "epoch": 0.4, "grad_norm": 2.0422821044921875, "learning_rate": 0.00027677995534606793, "loss": 0.6142, "step": 1133 }, { "epoch": 0.4, "grad_norm": 1.843910813331604, "learning_rate": 0.00027674274373604564, "loss": 0.4941, "step": 1134 }, { "epoch": 0.4, "grad_norm": 4.197638034820557, "learning_rate": 0.0002767055321260233, "loss": 1.1024, "step": 1135 }, { "epoch": 0.4, "grad_norm": 2.642378568649292, "learning_rate": 0.000276668320516001, "loss": 0.5026, "step": 1136 }, { "epoch": 0.4, "grad_norm": 4.609485626220703, "learning_rate": 0.00027663110890597864, "loss": 1.2151, "step": 1137 }, { "epoch": 0.4, "grad_norm": 2.2409887313842773, "learning_rate": 0.0002765938972959563, "loss": 0.3389, "step": 1138 }, { "epoch": 0.4, "grad_norm": 4.53881311416626, "learning_rate": 0.000276556685685934, "loss": 0.9275, "step": 1139 }, { "epoch": 0.4, "grad_norm": 5.820657730102539, "learning_rate": 0.00027651947407591164, "loss": 0.7122, "step": 1140 }, { "epoch": 0.4, "grad_norm": 4.550056457519531, "learning_rate": 0.00027648226246588934, "loss": 1.9592, "step": 1141 }, { "epoch": 0.4, "grad_norm": 3.5938735008239746, "learning_rate": 0.000276445050855867, "loss": 0.765, "step": 1142 }, { "epoch": 0.4, "grad_norm": 3.5738070011138916, "learning_rate": 0.0002764078392458447, "loss": 0.6004, "step": 1143 }, { "epoch": 0.4, "grad_norm": 3.4456381797790527, "learning_rate": 0.00027637062763582234, "loss": 0.5931, "step": 1144 }, { "epoch": 0.4, "grad_norm": 7.033105850219727, "learning_rate": 0.0002763334160258, "loss": 0.8942, "step": 1145 }, { "epoch": 0.4, "grad_norm": 3.166602611541748, "learning_rate": 0.0002762962044157777, "loss": 0.5482, "step": 1146 }, { "epoch": 0.4, "grad_norm": 4.609311103820801, "learning_rate": 0.0002762589928057554, "loss": 0.9272, "step": 1147 }, { "epoch": 0.4, "grad_norm": 3.320896625518799, "learning_rate": 0.00027622178119573304, "loss": 0.6604, "step": 1148 }, { "epoch": 0.4, "grad_norm": 6.4265875816345215, "learning_rate": 0.00027618456958571075, "loss": 1.238, "step": 1149 }, { "epoch": 0.4, "grad_norm": 8.044411659240723, "learning_rate": 0.0002761473579756884, "loss": 0.9712, "step": 1150 }, { "epoch": 0.4, "grad_norm": 7.890890121459961, "learning_rate": 0.00027611014636566604, "loss": 2.561, "step": 1151 }, { "epoch": 0.4, "grad_norm": 3.3083295822143555, "learning_rate": 0.00027607293475564375, "loss": 1.3886, "step": 1152 }, { "epoch": 0.4, "grad_norm": 3.2230350971221924, "learning_rate": 0.0002760357231456214, "loss": 0.9359, "step": 1153 }, { "epoch": 0.4, "grad_norm": 4.595873832702637, "learning_rate": 0.0002759985115355991, "loss": 1.0819, "step": 1154 }, { "epoch": 0.4, "grad_norm": 3.82143497467041, "learning_rate": 0.00027596129992557675, "loss": 0.9526, "step": 1155 }, { "epoch": 0.41, "grad_norm": 3.152099132537842, "learning_rate": 0.00027592408831555445, "loss": 0.5482, "step": 1156 }, { "epoch": 0.41, "grad_norm": 2.9585154056549072, "learning_rate": 0.0002758868767055321, "loss": 1.1054, "step": 1157 }, { "epoch": 0.41, "grad_norm": 2.825305700302124, "learning_rate": 0.00027584966509550975, "loss": 0.7547, "step": 1158 }, { "epoch": 0.41, "grad_norm": 2.47904634475708, "learning_rate": 0.00027581245348548745, "loss": 0.7835, "step": 1159 }, { "epoch": 0.41, "grad_norm": 2.429239511489868, "learning_rate": 0.00027577524187546515, "loss": 1.1264, "step": 1160 }, { "epoch": 0.41, "grad_norm": 3.2548816204071045, "learning_rate": 0.0002757380302654428, "loss": 0.7843, "step": 1161 }, { "epoch": 0.41, "grad_norm": 5.171494007110596, "learning_rate": 0.00027570081865542045, "loss": 0.8388, "step": 1162 }, { "epoch": 0.41, "grad_norm": 4.407280921936035, "learning_rate": 0.00027566360704539815, "loss": 0.5067, "step": 1163 }, { "epoch": 0.41, "grad_norm": 2.2985994815826416, "learning_rate": 0.0002756263954353758, "loss": 0.7189, "step": 1164 }, { "epoch": 0.41, "grad_norm": 3.708470106124878, "learning_rate": 0.0002755891838253535, "loss": 0.7342, "step": 1165 }, { "epoch": 0.41, "grad_norm": 3.204012393951416, "learning_rate": 0.00027555197221533115, "loss": 0.5299, "step": 1166 }, { "epoch": 0.41, "grad_norm": 4.998049259185791, "learning_rate": 0.00027551476060530886, "loss": 0.3688, "step": 1167 }, { "epoch": 0.41, "grad_norm": 2.720385789871216, "learning_rate": 0.0002754775489952865, "loss": 0.7139, "step": 1168 }, { "epoch": 0.41, "grad_norm": 4.8262248039245605, "learning_rate": 0.00027544033738526415, "loss": 0.4921, "step": 1169 }, { "epoch": 0.41, "grad_norm": 3.7257118225097656, "learning_rate": 0.00027540312577524186, "loss": 0.7639, "step": 1170 }, { "epoch": 0.41, "grad_norm": 5.001168251037598, "learning_rate": 0.0002753659141652195, "loss": 0.6115, "step": 1171 }, { "epoch": 0.41, "grad_norm": 3.162776470184326, "learning_rate": 0.0002753287025551972, "loss": 0.2777, "step": 1172 }, { "epoch": 0.41, "grad_norm": 5.089084148406982, "learning_rate": 0.0002752914909451749, "loss": 1.4753, "step": 1173 }, { "epoch": 0.41, "grad_norm": 2.25437068939209, "learning_rate": 0.00027525427933515256, "loss": 0.2112, "step": 1174 }, { "epoch": 0.41, "grad_norm": 5.870993137359619, "learning_rate": 0.0002752170677251302, "loss": 0.6987, "step": 1175 }, { "epoch": 0.41, "grad_norm": 3.5580074787139893, "learning_rate": 0.00027517985611510786, "loss": 0.8806, "step": 1176 }, { "epoch": 0.41, "grad_norm": 5.758315563201904, "learning_rate": 0.00027514264450508556, "loss": 1.0525, "step": 1177 }, { "epoch": 0.41, "grad_norm": 3.0043349266052246, "learning_rate": 0.00027510543289506326, "loss": 1.0657, "step": 1178 }, { "epoch": 0.41, "grad_norm": 4.536125183105469, "learning_rate": 0.0002750682212850409, "loss": 1.1375, "step": 1179 }, { "epoch": 0.41, "grad_norm": 22.37661361694336, "learning_rate": 0.00027503100967501856, "loss": 4.7936, "step": 1180 }, { "epoch": 0.41, "grad_norm": 3.035590171813965, "learning_rate": 0.00027499379806499626, "loss": 0.9853, "step": 1181 }, { "epoch": 0.41, "grad_norm": 2.8873214721679688, "learning_rate": 0.0002749565864549739, "loss": 0.5476, "step": 1182 }, { "epoch": 0.41, "grad_norm": 2.164041042327881, "learning_rate": 0.0002749193748449516, "loss": 0.3647, "step": 1183 }, { "epoch": 0.41, "grad_norm": 3.414952039718628, "learning_rate": 0.00027488216323492926, "loss": 1.0755, "step": 1184 }, { "epoch": 0.42, "grad_norm": 4.234614849090576, "learning_rate": 0.00027484495162490697, "loss": 1.3185, "step": 1185 }, { "epoch": 0.42, "grad_norm": 2.8845980167388916, "learning_rate": 0.0002748077400148846, "loss": 0.7329, "step": 1186 }, { "epoch": 0.42, "grad_norm": 2.165452718734741, "learning_rate": 0.00027477052840486226, "loss": 0.4976, "step": 1187 }, { "epoch": 0.42, "grad_norm": 3.6230342388153076, "learning_rate": 0.00027473331679483997, "loss": 1.2895, "step": 1188 }, { "epoch": 0.42, "grad_norm": 2.947673797607422, "learning_rate": 0.0002746961051848176, "loss": 0.7188, "step": 1189 }, { "epoch": 0.42, "grad_norm": 3.158074140548706, "learning_rate": 0.0002746588935747953, "loss": 0.608, "step": 1190 }, { "epoch": 0.42, "grad_norm": 2.541929006576538, "learning_rate": 0.000274621681964773, "loss": 0.6903, "step": 1191 }, { "epoch": 0.42, "grad_norm": 2.1511118412017822, "learning_rate": 0.00027458447035475067, "loss": 0.3223, "step": 1192 }, { "epoch": 0.42, "grad_norm": 4.088685989379883, "learning_rate": 0.0002745472587447283, "loss": 0.9799, "step": 1193 }, { "epoch": 0.42, "grad_norm": 4.290481090545654, "learning_rate": 0.00027451004713470597, "loss": 0.7953, "step": 1194 }, { "epoch": 0.42, "grad_norm": 4.309513092041016, "learning_rate": 0.00027447283552468367, "loss": 0.7685, "step": 1195 }, { "epoch": 0.42, "grad_norm": 3.8015859127044678, "learning_rate": 0.0002744356239146614, "loss": 0.5714, "step": 1196 }, { "epoch": 0.42, "grad_norm": 5.4302568435668945, "learning_rate": 0.000274398412304639, "loss": 0.6369, "step": 1197 }, { "epoch": 0.42, "grad_norm": 4.336185932159424, "learning_rate": 0.0002743612006946167, "loss": 0.3784, "step": 1198 }, { "epoch": 0.42, "grad_norm": 3.980928659439087, "learning_rate": 0.0002743239890845944, "loss": 0.5137, "step": 1199 }, { "epoch": 0.42, "grad_norm": 7.486271381378174, "learning_rate": 0.000274286777474572, "loss": 0.4206, "step": 1200 }, { "epoch": 0.42, "eval_loss": 0.7752430438995361, "eval_runtime": 49.918, "eval_samples_per_second": 43.431, "eval_steps_per_second": 10.858, "eval_wer": 0.6498875627054143, "step": 1200 }, { "epoch": 0.42, "grad_norm": 3.420276641845703, "learning_rate": 0.0002742495658645497, "loss": 1.3815, "step": 1201 }, { "epoch": 0.42, "grad_norm": 3.3125431537628174, "learning_rate": 0.0002742123542545274, "loss": 1.3345, "step": 1202 }, { "epoch": 0.42, "grad_norm": 6.291903495788574, "learning_rate": 0.0002741751426445051, "loss": 1.1505, "step": 1203 }, { "epoch": 0.42, "grad_norm": 3.3073489665985107, "learning_rate": 0.0002741379310344827, "loss": 0.5659, "step": 1204 }, { "epoch": 0.42, "grad_norm": 2.7754597663879395, "learning_rate": 0.00027410071942446043, "loss": 0.8152, "step": 1205 }, { "epoch": 0.42, "grad_norm": 2.6730542182922363, "learning_rate": 0.0002740635078144381, "loss": 1.41, "step": 1206 }, { "epoch": 0.42, "grad_norm": 4.539772987365723, "learning_rate": 0.0002740262962044157, "loss": 1.0475, "step": 1207 }, { "epoch": 0.42, "grad_norm": 2.224508762359619, "learning_rate": 0.00027398908459439343, "loss": 0.5957, "step": 1208 }, { "epoch": 0.42, "grad_norm": 2.530787467956543, "learning_rate": 0.00027395187298437113, "loss": 0.6799, "step": 1209 }, { "epoch": 0.42, "grad_norm": 2.891000509262085, "learning_rate": 0.0002739146613743488, "loss": 1.2861, "step": 1210 }, { "epoch": 0.42, "grad_norm": 3.0431253910064697, "learning_rate": 0.00027387744976432643, "loss": 0.5401, "step": 1211 }, { "epoch": 0.42, "grad_norm": 2.738537549972534, "learning_rate": 0.0002738402381543041, "loss": 0.8909, "step": 1212 }, { "epoch": 0.43, "grad_norm": 4.205295085906982, "learning_rate": 0.0002738030265442818, "loss": 0.8714, "step": 1213 }, { "epoch": 0.43, "grad_norm": 2.5161707401275635, "learning_rate": 0.0002737658149342595, "loss": 0.5855, "step": 1214 }, { "epoch": 0.43, "grad_norm": 3.841719150543213, "learning_rate": 0.00027372860332423713, "loss": 0.9467, "step": 1215 }, { "epoch": 0.43, "grad_norm": 3.4140450954437256, "learning_rate": 0.00027369139171421484, "loss": 1.0405, "step": 1216 }, { "epoch": 0.43, "grad_norm": 5.440001487731934, "learning_rate": 0.0002736541801041925, "loss": 0.5273, "step": 1217 }, { "epoch": 0.43, "grad_norm": 3.5466346740722656, "learning_rate": 0.00027361696849417013, "loss": 0.8691, "step": 1218 }, { "epoch": 0.43, "grad_norm": 4.198976516723633, "learning_rate": 0.00027357975688414784, "loss": 0.9419, "step": 1219 }, { "epoch": 0.43, "grad_norm": 3.357349395751953, "learning_rate": 0.0002735425452741255, "loss": 0.7402, "step": 1220 }, { "epoch": 0.43, "grad_norm": 5.02566385269165, "learning_rate": 0.0002735053336641032, "loss": 0.7176, "step": 1221 }, { "epoch": 0.43, "grad_norm": 6.563841819763184, "learning_rate": 0.00027346812205408084, "loss": 2.2978, "step": 1222 }, { "epoch": 0.43, "grad_norm": 3.9487085342407227, "learning_rate": 0.00027343091044405854, "loss": 0.6593, "step": 1223 }, { "epoch": 0.43, "grad_norm": 4.61473274230957, "learning_rate": 0.0002733936988340362, "loss": 1.9731, "step": 1224 }, { "epoch": 0.43, "grad_norm": 5.610717296600342, "learning_rate": 0.00027335648722401384, "loss": 2.6307, "step": 1225 }, { "epoch": 0.43, "grad_norm": 4.044619083404541, "learning_rate": 0.00027331927561399154, "loss": 1.2597, "step": 1226 }, { "epoch": 0.43, "grad_norm": 4.211724758148193, "learning_rate": 0.00027328206400396924, "loss": 1.6048, "step": 1227 }, { "epoch": 0.43, "grad_norm": 2.5721964836120605, "learning_rate": 0.0002732448523939469, "loss": 0.686, "step": 1228 }, { "epoch": 0.43, "grad_norm": 2.3949391841888428, "learning_rate": 0.00027320764078392454, "loss": 0.6873, "step": 1229 }, { "epoch": 0.43, "grad_norm": 18.452733993530273, "learning_rate": 0.00027317042917390224, "loss": 3.4468, "step": 1230 }, { "epoch": 0.43, "grad_norm": 3.9526307582855225, "learning_rate": 0.0002731332175638799, "loss": 0.6906, "step": 1231 }, { "epoch": 0.43, "grad_norm": 4.902743339538574, "learning_rate": 0.0002730960059538576, "loss": 0.5812, "step": 1232 }, { "epoch": 0.43, "grad_norm": 3.806962013244629, "learning_rate": 0.00027305879434383524, "loss": 0.6684, "step": 1233 }, { "epoch": 0.43, "grad_norm": 4.608047008514404, "learning_rate": 0.00027302158273381295, "loss": 0.8382, "step": 1234 }, { "epoch": 0.43, "grad_norm": 8.201254844665527, "learning_rate": 0.0002729843711237906, "loss": 0.645, "step": 1235 }, { "epoch": 0.43, "grad_norm": 3.0138301849365234, "learning_rate": 0.00027294715951376824, "loss": 0.7516, "step": 1236 }, { "epoch": 0.43, "grad_norm": 4.389562606811523, "learning_rate": 0.00027290994790374595, "loss": 1.2125, "step": 1237 }, { "epoch": 0.43, "grad_norm": 2.5127413272857666, "learning_rate": 0.0002728727362937236, "loss": 0.4319, "step": 1238 }, { "epoch": 0.43, "grad_norm": 3.1524224281311035, "learning_rate": 0.0002728355246837013, "loss": 0.586, "step": 1239 }, { "epoch": 0.43, "grad_norm": 5.574815273284912, "learning_rate": 0.000272798313073679, "loss": 0.8344, "step": 1240 }, { "epoch": 0.43, "grad_norm": 4.405158996582031, "learning_rate": 0.00027276110146365665, "loss": 0.8623, "step": 1241 }, { "epoch": 0.44, "grad_norm": 3.2911202907562256, "learning_rate": 0.0002727238898536343, "loss": 1.0748, "step": 1242 }, { "epoch": 0.44, "grad_norm": 4.5247392654418945, "learning_rate": 0.00027268667824361195, "loss": 0.5555, "step": 1243 }, { "epoch": 0.44, "grad_norm": 4.708747386932373, "learning_rate": 0.00027264946663358965, "loss": 0.988, "step": 1244 }, { "epoch": 0.44, "grad_norm": 3.7301928997039795, "learning_rate": 0.00027261225502356735, "loss": 0.7501, "step": 1245 }, { "epoch": 0.44, "grad_norm": 4.694626331329346, "learning_rate": 0.000272575043413545, "loss": 1.3788, "step": 1246 }, { "epoch": 0.44, "grad_norm": 4.803793907165527, "learning_rate": 0.0002725378318035227, "loss": 0.8157, "step": 1247 }, { "epoch": 0.44, "grad_norm": 3.017162561416626, "learning_rate": 0.00027250062019350035, "loss": 0.7995, "step": 1248 }, { "epoch": 0.44, "grad_norm": 3.8873093128204346, "learning_rate": 0.000272463408583478, "loss": 1.574, "step": 1249 }, { "epoch": 0.44, "grad_norm": 2.3652150630950928, "learning_rate": 0.0002724261969734557, "loss": 0.3168, "step": 1250 }, { "epoch": 0.44, "grad_norm": 4.411014556884766, "learning_rate": 0.00027238898536343335, "loss": 1.3121, "step": 1251 }, { "epoch": 0.44, "grad_norm": 2.4930648803710938, "learning_rate": 0.00027235177375341106, "loss": 0.7999, "step": 1252 }, { "epoch": 0.44, "grad_norm": 5.38267183303833, "learning_rate": 0.0002723145621433887, "loss": 0.9561, "step": 1253 }, { "epoch": 0.44, "grad_norm": 2.609616756439209, "learning_rate": 0.00027227735053336635, "loss": 0.8172, "step": 1254 }, { "epoch": 0.44, "grad_norm": 2.3476998805999756, "learning_rate": 0.00027224013892334406, "loss": 1.4765, "step": 1255 }, { "epoch": 0.44, "grad_norm": 2.823747158050537, "learning_rate": 0.0002722029273133217, "loss": 0.5941, "step": 1256 }, { "epoch": 0.44, "grad_norm": 2.738922595977783, "learning_rate": 0.0002721657157032994, "loss": 0.8431, "step": 1257 }, { "epoch": 0.44, "grad_norm": 3.6117281913757324, "learning_rate": 0.0002721285040932771, "loss": 0.8236, "step": 1258 }, { "epoch": 0.44, "grad_norm": 4.166642665863037, "learning_rate": 0.00027209129248325476, "loss": 0.8192, "step": 1259 }, { "epoch": 0.44, "grad_norm": 2.9680120944976807, "learning_rate": 0.0002720540808732324, "loss": 0.5717, "step": 1260 }, { "epoch": 0.44, "grad_norm": 3.276177406311035, "learning_rate": 0.0002720168692632101, "loss": 0.9233, "step": 1261 }, { "epoch": 0.44, "grad_norm": 3.2780745029449463, "learning_rate": 0.00027197965765318776, "loss": 0.7867, "step": 1262 }, { "epoch": 0.44, "grad_norm": 5.421667575836182, "learning_rate": 0.00027194244604316546, "loss": 0.8406, "step": 1263 }, { "epoch": 0.44, "grad_norm": 3.3682520389556885, "learning_rate": 0.0002719052344331431, "loss": 0.5538, "step": 1264 }, { "epoch": 0.44, "grad_norm": 2.914537191390991, "learning_rate": 0.0002718680228231208, "loss": 0.7114, "step": 1265 }, { "epoch": 0.44, "grad_norm": 4.220479965209961, "learning_rate": 0.00027183081121309846, "loss": 0.8671, "step": 1266 }, { "epoch": 0.44, "grad_norm": 5.633809566497803, "learning_rate": 0.0002717935996030761, "loss": 1.037, "step": 1267 }, { "epoch": 0.44, "grad_norm": 3.3338539600372314, "learning_rate": 0.0002717563879930538, "loss": 0.5996, "step": 1268 }, { "epoch": 0.44, "grad_norm": 6.159158706665039, "learning_rate": 0.00027171917638303146, "loss": 0.692, "step": 1269 }, { "epoch": 0.44, "grad_norm": 3.33358097076416, "learning_rate": 0.00027168196477300917, "loss": 0.6227, "step": 1270 }, { "epoch": 0.45, "grad_norm": 3.7529525756835938, "learning_rate": 0.0002716447531629868, "loss": 0.5452, "step": 1271 }, { "epoch": 0.45, "grad_norm": 3.7959513664245605, "learning_rate": 0.0002716075415529645, "loss": 0.6587, "step": 1272 }, { "epoch": 0.45, "grad_norm": 3.419649600982666, "learning_rate": 0.00027157032994294217, "loss": 0.8046, "step": 1273 }, { "epoch": 0.45, "grad_norm": 3.8088862895965576, "learning_rate": 0.00027153311833291987, "loss": 0.8147, "step": 1274 }, { "epoch": 0.45, "grad_norm": 4.987424373626709, "learning_rate": 0.0002714959067228975, "loss": 0.8777, "step": 1275 }, { "epoch": 0.45, "grad_norm": 3.6485095024108887, "learning_rate": 0.0002714586951128752, "loss": 1.4681, "step": 1276 }, { "epoch": 0.45, "grad_norm": 4.164416313171387, "learning_rate": 0.00027142148350285287, "loss": 0.8455, "step": 1277 }, { "epoch": 0.45, "grad_norm": 4.422079086303711, "learning_rate": 0.0002713842718928305, "loss": 1.0435, "step": 1278 }, { "epoch": 0.45, "grad_norm": 2.755207061767578, "learning_rate": 0.0002713470602828082, "loss": 0.6229, "step": 1279 }, { "epoch": 0.45, "grad_norm": 2.0500385761260986, "learning_rate": 0.00027130984867278587, "loss": 0.6549, "step": 1280 }, { "epoch": 0.45, "grad_norm": 2.116751194000244, "learning_rate": 0.00027127263706276357, "loss": 0.6984, "step": 1281 }, { "epoch": 0.45, "grad_norm": 2.164412498474121, "learning_rate": 0.0002712354254527412, "loss": 0.6526, "step": 1282 }, { "epoch": 0.45, "grad_norm": 2.9012765884399414, "learning_rate": 0.0002711982138427189, "loss": 0.7316, "step": 1283 }, { "epoch": 0.45, "grad_norm": 2.856905937194824, "learning_rate": 0.00027116100223269657, "loss": 0.6951, "step": 1284 }, { "epoch": 0.45, "grad_norm": 2.1080093383789062, "learning_rate": 0.0002711237906226742, "loss": 0.5701, "step": 1285 }, { "epoch": 0.45, "grad_norm": 1.3872121572494507, "learning_rate": 0.0002710865790126519, "loss": 0.3921, "step": 1286 }, { "epoch": 0.45, "grad_norm": 5.119579792022705, "learning_rate": 0.0002710493674026296, "loss": 0.4822, "step": 1287 }, { "epoch": 0.45, "grad_norm": 3.3475329875946045, "learning_rate": 0.0002710121557926073, "loss": 0.4675, "step": 1288 }, { "epoch": 0.45, "grad_norm": 4.7075090408325195, "learning_rate": 0.000270974944182585, "loss": 0.8302, "step": 1289 }, { "epoch": 0.45, "grad_norm": 5.20393180847168, "learning_rate": 0.00027093773257256263, "loss": 1.3705, "step": 1290 }, { "epoch": 0.45, "grad_norm": 3.181398391723633, "learning_rate": 0.0002709005209625403, "loss": 0.5417, "step": 1291 }, { "epoch": 0.45, "grad_norm": NaN, "learning_rate": 0.0002709005209625403, "loss": 0.097, "step": 1292 }, { "epoch": 0.45, "grad_norm": 3.0050837993621826, "learning_rate": 0.000270863309352518, "loss": 0.5245, "step": 1293 }, { "epoch": 0.45, "grad_norm": 4.091763973236084, "learning_rate": 0.00027082609774249563, "loss": 1.1929, "step": 1294 }, { "epoch": 0.45, "grad_norm": 3.52555513381958, "learning_rate": 0.00027078888613247333, "loss": 1.1448, "step": 1295 }, { "epoch": 0.45, "grad_norm": 5.984481334686279, "learning_rate": 0.000270751674522451, "loss": 0.7998, "step": 1296 }, { "epoch": 0.45, "grad_norm": 6.075478553771973, "learning_rate": 0.00027071446291242863, "loss": 1.3081, "step": 1297 }, { "epoch": 0.45, "grad_norm": 16.068614959716797, "learning_rate": 0.00027067725130240633, "loss": 0.6254, "step": 1298 }, { "epoch": 0.46, "grad_norm": 3.1314620971679688, "learning_rate": 0.000270640039692384, "loss": 0.4411, "step": 1299 }, { "epoch": 0.46, "grad_norm": 4.841050624847412, "learning_rate": 0.0002706028280823617, "loss": 0.5328, "step": 1300 }, { "epoch": 0.46, "eval_loss": 0.718771755695343, "eval_runtime": 50.1715, "eval_samples_per_second": 43.212, "eval_steps_per_second": 10.803, "eval_wer": 0.6280055353745027, "step": 1300 }, { "epoch": 0.46, "grad_norm": 2.5195000171661377, "learning_rate": 0.00027056561647233933, "loss": 1.2308, "step": 1301 }, { "epoch": 0.46, "grad_norm": 3.1528189182281494, "learning_rate": 0.00027052840486231703, "loss": 1.2859, "step": 1302 }, { "epoch": 0.46, "grad_norm": 2.554877519607544, "learning_rate": 0.0002704911932522947, "loss": 0.6787, "step": 1303 }, { "epoch": 0.46, "grad_norm": 3.0567691326141357, "learning_rate": 0.00027045398164227233, "loss": 0.8472, "step": 1304 }, { "epoch": 0.46, "grad_norm": 1.9456758499145508, "learning_rate": 0.00027041677003225003, "loss": 0.5319, "step": 1305 }, { "epoch": 0.46, "grad_norm": 2.8807168006896973, "learning_rate": 0.00027037955842222774, "loss": 0.5714, "step": 1306 }, { "epoch": 0.46, "grad_norm": 3.7060537338256836, "learning_rate": 0.0002703423468122054, "loss": 0.8815, "step": 1307 }, { "epoch": 0.46, "grad_norm": 4.672762870788574, "learning_rate": 0.0002703051352021831, "loss": 0.9812, "step": 1308 }, { "epoch": 0.46, "grad_norm": 2.651334285736084, "learning_rate": 0.00027026792359216074, "loss": 0.58, "step": 1309 }, { "epoch": 0.46, "grad_norm": 5.646411418914795, "learning_rate": 0.0002702307119821384, "loss": 1.1246, "step": 1310 }, { "epoch": 0.46, "grad_norm": 2.932492971420288, "learning_rate": 0.0002701935003721161, "loss": 0.6828, "step": 1311 }, { "epoch": 0.46, "grad_norm": 3.369302749633789, "learning_rate": 0.00027015628876209374, "loss": 1.0535, "step": 1312 }, { "epoch": 0.46, "grad_norm": 3.5227344036102295, "learning_rate": 0.00027011907715207144, "loss": 0.7979, "step": 1313 }, { "epoch": 0.46, "grad_norm": 3.5197794437408447, "learning_rate": 0.0002700818655420491, "loss": 0.818, "step": 1314 }, { "epoch": 0.46, "grad_norm": 2.7741096019744873, "learning_rate": 0.0002700446539320268, "loss": 0.2863, "step": 1315 }, { "epoch": 0.46, "grad_norm": 5.058023929595947, "learning_rate": 0.00027000744232200444, "loss": 0.9948, "step": 1316 }, { "epoch": 0.46, "grad_norm": 4.746575832366943, "learning_rate": 0.0002699702307119821, "loss": 1.0099, "step": 1317 }, { "epoch": 0.46, "grad_norm": 3.513601064682007, "learning_rate": 0.0002699330191019598, "loss": 0.5189, "step": 1318 }, { "epoch": 0.46, "grad_norm": 3.361132860183716, "learning_rate": 0.0002698958074919375, "loss": 0.7626, "step": 1319 }, { "epoch": 0.46, "grad_norm": 3.748187780380249, "learning_rate": 0.00026985859588191514, "loss": 0.4882, "step": 1320 }, { "epoch": 0.46, "grad_norm": 3.661416530609131, "learning_rate": 0.0002698213842718928, "loss": 0.5571, "step": 1321 }, { "epoch": 0.46, "grad_norm": 6.057156562805176, "learning_rate": 0.00026978417266187044, "loss": 0.6306, "step": 1322 }, { "epoch": 0.46, "grad_norm": 4.695087432861328, "learning_rate": 0.00026974696105184814, "loss": 0.617, "step": 1323 }, { "epoch": 0.46, "grad_norm": 3.9815876483917236, "learning_rate": 0.00026970974944182585, "loss": 0.4741, "step": 1324 }, { "epoch": 0.46, "grad_norm": 5.866700649261475, "learning_rate": 0.0002696725378318035, "loss": 0.7968, "step": 1325 }, { "epoch": 0.46, "grad_norm": 3.9536242485046387, "learning_rate": 0.0002696353262217812, "loss": 1.1302, "step": 1326 }, { "epoch": 0.46, "grad_norm": 3.9558680057525635, "learning_rate": 0.00026959811461175885, "loss": 0.7709, "step": 1327 }, { "epoch": 0.47, "grad_norm": 2.1161468029022217, "learning_rate": 0.0002695609030017365, "loss": 0.8176, "step": 1328 }, { "epoch": 0.47, "grad_norm": 3.3528337478637695, "learning_rate": 0.0002695236913917142, "loss": 1.0837, "step": 1329 }, { "epoch": 0.47, "grad_norm": 2.7077548503875732, "learning_rate": 0.00026948647978169185, "loss": 1.0823, "step": 1330 }, { "epoch": 0.47, "grad_norm": 2.875311851501465, "learning_rate": 0.00026944926817166955, "loss": 0.7376, "step": 1331 }, { "epoch": 0.47, "grad_norm": 3.183072090148926, "learning_rate": 0.0002694120565616472, "loss": 0.9196, "step": 1332 }, { "epoch": 0.47, "grad_norm": 2.7492129802703857, "learning_rate": 0.0002693748449516249, "loss": 0.4203, "step": 1333 }, { "epoch": 0.47, "grad_norm": 3.388514518737793, "learning_rate": 0.00026933763334160255, "loss": 1.483, "step": 1334 }, { "epoch": 0.47, "grad_norm": 1.9908661842346191, "learning_rate": 0.0002693004217315802, "loss": 0.3537, "step": 1335 }, { "epoch": 0.47, "grad_norm": 1.7505980730056763, "learning_rate": 0.0002692632101215579, "loss": 0.3952, "step": 1336 }, { "epoch": 0.47, "grad_norm": 8.66080379486084, "learning_rate": 0.0002692259985115356, "loss": 2.9207, "step": 1337 }, { "epoch": 0.47, "grad_norm": 3.433154821395874, "learning_rate": 0.00026918878690151325, "loss": 0.8745, "step": 1338 }, { "epoch": 0.47, "grad_norm": 2.7627408504486084, "learning_rate": 0.0002691515752914909, "loss": 0.3728, "step": 1339 }, { "epoch": 0.47, "grad_norm": 2.7169649600982666, "learning_rate": 0.0002691143636814686, "loss": 0.4879, "step": 1340 }, { "epoch": 0.47, "grad_norm": 3.4643096923828125, "learning_rate": 0.00026907715207144625, "loss": 0.4795, "step": 1341 }, { "epoch": 0.47, "grad_norm": 4.340239524841309, "learning_rate": 0.00026903994046142396, "loss": 0.8171, "step": 1342 }, { "epoch": 0.47, "grad_norm": 3.2308506965637207, "learning_rate": 0.0002690027288514016, "loss": 0.7844, "step": 1343 }, { "epoch": 0.47, "grad_norm": 4.7775092124938965, "learning_rate": 0.0002689655172413793, "loss": 0.7076, "step": 1344 }, { "epoch": 0.47, "grad_norm": 4.886669635772705, "learning_rate": 0.00026892830563135696, "loss": 0.5041, "step": 1345 }, { "epoch": 0.47, "grad_norm": 4.971267223358154, "learning_rate": 0.0002688910940213346, "loss": 0.8198, "step": 1346 }, { "epoch": 0.47, "grad_norm": 2.96894907951355, "learning_rate": 0.0002688538824113123, "loss": 0.4911, "step": 1347 }, { "epoch": 0.47, "grad_norm": 4.252577781677246, "learning_rate": 0.00026881667080128996, "loss": 0.7421, "step": 1348 }, { "epoch": 0.47, "grad_norm": 5.704499244689941, "learning_rate": 0.00026877945919126766, "loss": 0.8419, "step": 1349 }, { "epoch": 0.47, "grad_norm": 6.062288761138916, "learning_rate": 0.00026874224758124536, "loss": 2.6257, "step": 1350 }, { "epoch": 0.47, "grad_norm": 2.9042396545410156, "learning_rate": 0.000268705035971223, "loss": 1.0895, "step": 1351 }, { "epoch": 0.47, "grad_norm": 3.361873149871826, "learning_rate": 0.00026866782436120066, "loss": 1.0075, "step": 1352 }, { "epoch": 0.47, "grad_norm": 1.925755262374878, "learning_rate": 0.0002686306127511783, "loss": 0.5891, "step": 1353 }, { "epoch": 0.47, "grad_norm": 1.8617517948150635, "learning_rate": 0.000268593401141156, "loss": 0.6051, "step": 1354 }, { "epoch": 0.47, "grad_norm": 3.268332004547119, "learning_rate": 0.0002685561895311337, "loss": 1.3279, "step": 1355 }, { "epoch": 0.48, "grad_norm": 2.6798906326293945, "learning_rate": 0.00026851897792111136, "loss": 0.7166, "step": 1356 }, { "epoch": 0.48, "grad_norm": 3.4666085243225098, "learning_rate": 0.00026848176631108907, "loss": 0.8236, "step": 1357 }, { "epoch": 0.48, "grad_norm": 4.445189952850342, "learning_rate": 0.0002684445547010667, "loss": 1.0953, "step": 1358 }, { "epoch": 0.48, "grad_norm": 3.491154909133911, "learning_rate": 0.00026840734309104436, "loss": 0.86, "step": 1359 }, { "epoch": 0.48, "grad_norm": 1.873551368713379, "learning_rate": 0.00026837013148102207, "loss": 0.3485, "step": 1360 }, { "epoch": 0.48, "grad_norm": 4.520420551300049, "learning_rate": 0.0002683329198709997, "loss": 0.5189, "step": 1361 }, { "epoch": 0.48, "grad_norm": 2.5249836444854736, "learning_rate": 0.0002682957082609774, "loss": 0.6036, "step": 1362 }, { "epoch": 0.48, "grad_norm": 6.196778774261475, "learning_rate": 0.00026825849665095507, "loss": 1.0194, "step": 1363 }, { "epoch": 0.48, "grad_norm": 2.7715699672698975, "learning_rate": 0.0002682212850409327, "loss": 0.6127, "step": 1364 }, { "epoch": 0.48, "grad_norm": 4.2750773429870605, "learning_rate": 0.0002681840734309104, "loss": 0.5814, "step": 1365 }, { "epoch": 0.48, "grad_norm": 6.068713188171387, "learning_rate": 0.00026814686182088807, "loss": 0.8995, "step": 1366 }, { "epoch": 0.48, "grad_norm": 2.5809454917907715, "learning_rate": 0.00026810965021086577, "loss": 0.4658, "step": 1367 }, { "epoch": 0.48, "grad_norm": 8.29049301147461, "learning_rate": 0.0002680724386008435, "loss": 0.942, "step": 1368 }, { "epoch": 0.48, "grad_norm": 2.825986385345459, "learning_rate": 0.0002680352269908211, "loss": 0.2832, "step": 1369 }, { "epoch": 0.48, "grad_norm": 4.205315113067627, "learning_rate": 0.00026799801538079877, "loss": 0.3227, "step": 1370 }, { "epoch": 0.48, "grad_norm": 3.076242685317993, "learning_rate": 0.0002679608037707764, "loss": 0.6167, "step": 1371 }, { "epoch": 0.48, "grad_norm": 3.471637487411499, "learning_rate": 0.0002679235921607541, "loss": 0.4435, "step": 1372 }, { "epoch": 0.48, "grad_norm": 6.81670618057251, "learning_rate": 0.0002678863805507318, "loss": 0.6166, "step": 1373 }, { "epoch": 0.48, "grad_norm": 10.90646743774414, "learning_rate": 0.0002678491689407095, "loss": 1.4561, "step": 1374 }, { "epoch": 0.48, "grad_norm": 10.379186630249023, "learning_rate": 0.0002678119573306872, "loss": 1.1457, "step": 1375 }, { "epoch": 0.48, "grad_norm": 10.573211669921875, "learning_rate": 0.0002677747457206648, "loss": 1.8288, "step": 1376 }, { "epoch": 0.48, "grad_norm": 3.0730042457580566, "learning_rate": 0.0002677375341106425, "loss": 1.1138, "step": 1377 }, { "epoch": 0.48, "grad_norm": 3.4380507469177246, "learning_rate": 0.0002677003225006202, "loss": 0.6603, "step": 1378 }, { "epoch": 0.48, "grad_norm": 2.709672212600708, "learning_rate": 0.0002676631108905978, "loss": 0.7853, "step": 1379 }, { "epoch": 0.48, "grad_norm": 3.5759897232055664, "learning_rate": 0.00026762589928057553, "loss": 0.7393, "step": 1380 }, { "epoch": 0.48, "grad_norm": 2.016786813735962, "learning_rate": 0.0002675886876705532, "loss": 0.9876, "step": 1381 }, { "epoch": 0.48, "grad_norm": 2.079416036605835, "learning_rate": 0.0002675514760605309, "loss": 0.4571, "step": 1382 }, { "epoch": 0.48, "grad_norm": 3.269665241241455, "learning_rate": 0.00026751426445050853, "loss": 0.9098, "step": 1383 }, { "epoch": 0.48, "grad_norm": 3.512655735015869, "learning_rate": 0.0002674770528404862, "loss": 1.1042, "step": 1384 }, { "epoch": 0.49, "grad_norm": 2.725264072418213, "learning_rate": 0.0002674398412304639, "loss": 0.8414, "step": 1385 }, { "epoch": 0.49, "grad_norm": 4.0862860679626465, "learning_rate": 0.0002674026296204416, "loss": 1.6329, "step": 1386 }, { "epoch": 0.49, "grad_norm": 4.442559242248535, "learning_rate": 0.00026736541801041923, "loss": 1.1877, "step": 1387 }, { "epoch": 0.49, "grad_norm": 3.18272066116333, "learning_rate": 0.0002673282064003969, "loss": 0.8395, "step": 1388 }, { "epoch": 0.49, "grad_norm": 3.8753631114959717, "learning_rate": 0.0002672909947903746, "loss": 0.5428, "step": 1389 }, { "epoch": 0.49, "grad_norm": 3.923710823059082, "learning_rate": 0.00026725378318035223, "loss": 1.0107, "step": 1390 }, { "epoch": 0.49, "grad_norm": 2.947282552719116, "learning_rate": 0.00026721657157032994, "loss": 0.4316, "step": 1391 }, { "epoch": 0.49, "grad_norm": 3.441121816635132, "learning_rate": 0.0002671793599603076, "loss": 1.4819, "step": 1392 }, { "epoch": 0.49, "grad_norm": 2.639099597930908, "learning_rate": 0.0002671421483502853, "loss": 0.4009, "step": 1393 }, { "epoch": 0.49, "grad_norm": 4.281423091888428, "learning_rate": 0.00026710493674026294, "loss": 0.8285, "step": 1394 }, { "epoch": 0.49, "grad_norm": 5.369678020477295, "learning_rate": 0.0002670677251302406, "loss": 1.3822, "step": 1395 }, { "epoch": 0.49, "grad_norm": 3.5579185485839844, "learning_rate": 0.0002670305135202183, "loss": 0.4678, "step": 1396 }, { "epoch": 0.49, "grad_norm": 4.262704849243164, "learning_rate": 0.00026699330191019594, "loss": 0.5735, "step": 1397 }, { "epoch": 0.49, "grad_norm": 3.989433526992798, "learning_rate": 0.00026695609030017364, "loss": 0.6595, "step": 1398 }, { "epoch": 0.49, "grad_norm": 7.13302755355835, "learning_rate": 0.00026691887869015134, "loss": 1.1522, "step": 1399 }, { "epoch": 0.49, "grad_norm": 10.057518005371094, "learning_rate": 0.000266881667080129, "loss": 1.3733, "step": 1400 }, { "epoch": 0.49, "eval_loss": 0.6848714351654053, "eval_runtime": 50.8029, "eval_samples_per_second": 42.675, "eval_steps_per_second": 10.669, "eval_wer": 0.6019719771665801, "step": 1400 }, { "epoch": 0.49, "grad_norm": 3.617882251739502, "learning_rate": 0.00026684445547010664, "loss": 1.3938, "step": 1401 }, { "epoch": 0.49, "grad_norm": 2.1804263591766357, "learning_rate": 0.0002668072438600843, "loss": 0.9542, "step": 1402 }, { "epoch": 0.49, "grad_norm": 2.778125286102295, "learning_rate": 0.000266770032250062, "loss": 0.6878, "step": 1403 }, { "epoch": 0.49, "grad_norm": 2.387033700942993, "learning_rate": 0.0002667328206400397, "loss": 0.6723, "step": 1404 }, { "epoch": 0.49, "grad_norm": 3.006556272506714, "learning_rate": 0.00026669560903001734, "loss": 0.8387, "step": 1405 }, { "epoch": 0.49, "grad_norm": 1.9671204090118408, "learning_rate": 0.000266658397419995, "loss": 0.6155, "step": 1406 }, { "epoch": 0.49, "grad_norm": 2.056529998779297, "learning_rate": 0.0002666211858099727, "loss": 0.5556, "step": 1407 }, { "epoch": 0.49, "grad_norm": 1.5942492485046387, "learning_rate": 0.00026658397419995034, "loss": 0.3429, "step": 1408 }, { "epoch": 0.49, "grad_norm": 6.031838417053223, "learning_rate": 0.00026654676258992805, "loss": 1.1818, "step": 1409 }, { "epoch": 0.49, "grad_norm": 2.842709541320801, "learning_rate": 0.0002665095509799057, "loss": 0.9802, "step": 1410 }, { "epoch": 0.49, "grad_norm": 2.389888286590576, "learning_rate": 0.0002664723393698834, "loss": 0.4917, "step": 1411 }, { "epoch": 0.49, "grad_norm": 3.805605173110962, "learning_rate": 0.00026643512775986105, "loss": 0.6546, "step": 1412 }, { "epoch": 0.5, "grad_norm": 4.952422142028809, "learning_rate": 0.0002663979161498387, "loss": 0.6714, "step": 1413 }, { "epoch": 0.5, "grad_norm": 3.914050340652466, "learning_rate": 0.0002663607045398164, "loss": 0.6369, "step": 1414 }, { "epoch": 0.5, "grad_norm": 2.893771171569824, "learning_rate": 0.00026632349292979405, "loss": 0.721, "step": 1415 }, { "epoch": 0.5, "grad_norm": 4.831104755401611, "learning_rate": 0.00026628628131977175, "loss": 1.0663, "step": 1416 }, { "epoch": 0.5, "grad_norm": 2.74212384223938, "learning_rate": 0.00026624906970974945, "loss": 0.6219, "step": 1417 }, { "epoch": 0.5, "grad_norm": 5.251964569091797, "learning_rate": 0.0002662118580997271, "loss": 1.5889, "step": 1418 }, { "epoch": 0.5, "grad_norm": 4.620336532592773, "learning_rate": 0.00026617464648970475, "loss": 1.05, "step": 1419 }, { "epoch": 0.5, "grad_norm": 5.674115180969238, "learning_rate": 0.00026613743487968245, "loss": 0.7279, "step": 1420 }, { "epoch": 0.5, "grad_norm": 3.6265783309936523, "learning_rate": 0.0002661002232696601, "loss": 0.4997, "step": 1421 }, { "epoch": 0.5, "grad_norm": 4.726874828338623, "learning_rate": 0.0002660630116596378, "loss": 0.9038, "step": 1422 }, { "epoch": 0.5, "grad_norm": 5.306706428527832, "learning_rate": 0.00026602580004961545, "loss": 1.3036, "step": 1423 }, { "epoch": 0.5, "grad_norm": 4.389394283294678, "learning_rate": 0.00026598858843959316, "loss": 1.1654, "step": 1424 }, { "epoch": 0.5, "grad_norm": 3.7293262481689453, "learning_rate": 0.0002659513768295708, "loss": 0.4846, "step": 1425 }, { "epoch": 0.5, "grad_norm": 3.0907764434814453, "learning_rate": 0.00026591416521954845, "loss": 1.5577, "step": 1426 }, { "epoch": 0.5, "grad_norm": 4.340789318084717, "learning_rate": 0.00026587695360952616, "loss": 0.8854, "step": 1427 }, { "epoch": 0.5, "grad_norm": 5.936422348022461, "learning_rate": 0.0002658397419995038, "loss": 1.3372, "step": 1428 }, { "epoch": 0.5, "grad_norm": 2.7207283973693848, "learning_rate": 0.0002658025303894815, "loss": 0.9362, "step": 1429 }, { "epoch": 0.5, "grad_norm": 2.6525309085845947, "learning_rate": 0.00026576531877945916, "loss": 0.7226, "step": 1430 }, { "epoch": 0.5, "grad_norm": 2.381458282470703, "learning_rate": 0.00026572810716943686, "loss": 0.6479, "step": 1431 }, { "epoch": 0.5, "grad_norm": 2.5098137855529785, "learning_rate": 0.0002656908955594145, "loss": 0.6157, "step": 1432 }, { "epoch": 0.5, "grad_norm": 2.9847960472106934, "learning_rate": 0.00026565368394939216, "loss": 0.5089, "step": 1433 }, { "epoch": 0.5, "grad_norm": 4.012515544891357, "learning_rate": 0.00026561647233936986, "loss": 0.4906, "step": 1434 }, { "epoch": 0.5, "grad_norm": 2.7779381275177, "learning_rate": 0.00026557926072934756, "loss": 0.5565, "step": 1435 }, { "epoch": 0.5, "grad_norm": 3.0392794609069824, "learning_rate": 0.0002655420491193252, "loss": 0.631, "step": 1436 }, { "epoch": 0.5, "grad_norm": 5.07166051864624, "learning_rate": 0.00026550483750930286, "loss": 0.4397, "step": 1437 }, { "epoch": 0.5, "grad_norm": 2.711935520172119, "learning_rate": 0.00026546762589928056, "loss": 0.5008, "step": 1438 }, { "epoch": 0.5, "grad_norm": 2.4669036865234375, "learning_rate": 0.0002654304142892582, "loss": 0.5278, "step": 1439 }, { "epoch": 0.5, "grad_norm": 4.123307228088379, "learning_rate": 0.0002653932026792359, "loss": 0.7766, "step": 1440 }, { "epoch": 0.5, "grad_norm": 3.164463758468628, "learning_rate": 0.00026535599106921356, "loss": 0.6879, "step": 1441 }, { "epoch": 0.51, "grad_norm": 4.237036228179932, "learning_rate": 0.00026531877945919127, "loss": 0.6142, "step": 1442 }, { "epoch": 0.51, "grad_norm": 3.148812770843506, "learning_rate": 0.0002652815678491689, "loss": 0.469, "step": 1443 }, { "epoch": 0.51, "grad_norm": 5.390077590942383, "learning_rate": 0.00026524435623914656, "loss": 0.7129, "step": 1444 }, { "epoch": 0.51, "grad_norm": 3.447877883911133, "learning_rate": 0.00026520714462912427, "loss": 1.0652, "step": 1445 }, { "epoch": 0.51, "grad_norm": 4.019307613372803, "learning_rate": 0.0002651699330191019, "loss": 0.6862, "step": 1446 }, { "epoch": 0.51, "grad_norm": 5.470080375671387, "learning_rate": 0.0002651327214090796, "loss": 0.9423, "step": 1447 }, { "epoch": 0.51, "grad_norm": 4.0855607986450195, "learning_rate": 0.00026509550979905727, "loss": 0.7612, "step": 1448 }, { "epoch": 0.51, "grad_norm": 4.5919671058654785, "learning_rate": 0.00026505829818903497, "loss": 0.7675, "step": 1449 }, { "epoch": 0.51, "grad_norm": 3.587916612625122, "learning_rate": 0.0002650210865790126, "loss": 0.2524, "step": 1450 }, { "epoch": 0.51, "grad_norm": 6.112603664398193, "learning_rate": 0.0002649838749689903, "loss": 1.4048, "step": 1451 }, { "epoch": 0.51, "grad_norm": 2.97434139251709, "learning_rate": 0.00026494666335896797, "loss": 1.2854, "step": 1452 }, { "epoch": 0.51, "grad_norm": 2.173609972000122, "learning_rate": 0.00026490945174894567, "loss": 0.5143, "step": 1453 }, { "epoch": 0.51, "grad_norm": 2.1337573528289795, "learning_rate": 0.0002648722401389233, "loss": 0.7012, "step": 1454 }, { "epoch": 0.51, "grad_norm": 10.74719524383545, "learning_rate": 0.00026483502852890097, "loss": 2.8888, "step": 1455 }, { "epoch": 0.51, "grad_norm": 2.577202558517456, "learning_rate": 0.0002647978169188787, "loss": 0.5115, "step": 1456 }, { "epoch": 0.51, "grad_norm": 2.5629165172576904, "learning_rate": 0.0002647606053088563, "loss": 0.9478, "step": 1457 }, { "epoch": 0.51, "grad_norm": 2.9556891918182373, "learning_rate": 0.000264723393698834, "loss": 0.5669, "step": 1458 }, { "epoch": 0.51, "grad_norm": 2.905815601348877, "learning_rate": 0.0002646861820888117, "loss": 0.732, "step": 1459 }, { "epoch": 0.51, "grad_norm": 3.5102758407592773, "learning_rate": 0.0002646489704787894, "loss": 0.875, "step": 1460 }, { "epoch": 0.51, "grad_norm": 2.5579817295074463, "learning_rate": 0.000264611758868767, "loss": 0.5173, "step": 1461 }, { "epoch": 0.51, "grad_norm": 3.0412824153900146, "learning_rate": 0.0002645745472587447, "loss": 0.95, "step": 1462 }, { "epoch": 0.51, "grad_norm": 2.4420363903045654, "learning_rate": 0.0002645373356487224, "loss": 0.5622, "step": 1463 }, { "epoch": 0.51, "grad_norm": 3.9669206142425537, "learning_rate": 0.0002645001240387001, "loss": 1.3098, "step": 1464 }, { "epoch": 0.51, "grad_norm": 3.515052080154419, "learning_rate": 0.00026446291242867773, "loss": 0.6694, "step": 1465 }, { "epoch": 0.51, "grad_norm": 4.764845848083496, "learning_rate": 0.00026442570081865543, "loss": 0.4299, "step": 1466 }, { "epoch": 0.51, "grad_norm": 3.278150796890259, "learning_rate": 0.0002643884892086331, "loss": 0.5183, "step": 1467 }, { "epoch": 0.51, "grad_norm": 5.361281394958496, "learning_rate": 0.00026435127759861073, "loss": 0.8904, "step": 1468 }, { "epoch": 0.51, "grad_norm": 4.450416088104248, "learning_rate": 0.00026431406598858843, "loss": 0.5072, "step": 1469 }, { "epoch": 0.52, "grad_norm": 3.9510233402252197, "learning_rate": 0.0002642768543785661, "loss": 0.5038, "step": 1470 }, { "epoch": 0.52, "grad_norm": 4.09872579574585, "learning_rate": 0.0002642396427685438, "loss": 0.8801, "step": 1471 }, { "epoch": 0.52, "grad_norm": 3.2074294090270996, "learning_rate": 0.00026420243115852143, "loss": 0.5852, "step": 1472 }, { "epoch": 0.52, "grad_norm": 6.999640941619873, "learning_rate": 0.00026416521954849913, "loss": 0.4915, "step": 1473 }, { "epoch": 0.52, "grad_norm": 3.909586191177368, "learning_rate": 0.0002641280079384768, "loss": 0.811, "step": 1474 }, { "epoch": 0.52, "grad_norm": 8.129749298095703, "learning_rate": 0.00026409079632845443, "loss": 2.2248, "step": 1475 }, { "epoch": 0.52, "grad_norm": 4.023890018463135, "learning_rate": 0.00026405358471843213, "loss": 1.2998, "step": 1476 }, { "epoch": 0.52, "grad_norm": 3.378636598587036, "learning_rate": 0.0002640163731084098, "loss": 0.7717, "step": 1477 }, { "epoch": 0.52, "grad_norm": 2.6731226444244385, "learning_rate": 0.0002639791614983875, "loss": 0.7744, "step": 1478 }, { "epoch": 0.52, "grad_norm": 4.031921863555908, "learning_rate": 0.00026394194988836513, "loss": 0.9579, "step": 1479 }, { "epoch": 0.52, "grad_norm": 3.2059433460235596, "learning_rate": 0.0002639047382783428, "loss": 0.7075, "step": 1480 }, { "epoch": 0.52, "grad_norm": 2.67105770111084, "learning_rate": 0.0002638675266683205, "loss": 0.7341, "step": 1481 }, { "epoch": 0.52, "grad_norm": 2.016000986099243, "learning_rate": 0.0002638303150582982, "loss": 0.4283, "step": 1482 }, { "epoch": 0.52, "grad_norm": 2.9530158042907715, "learning_rate": 0.00026379310344827584, "loss": 0.6266, "step": 1483 }, { "epoch": 0.52, "grad_norm": 2.5564568042755127, "learning_rate": 0.00026375589183825354, "loss": 0.7194, "step": 1484 }, { "epoch": 0.52, "grad_norm": 2.650444269180298, "learning_rate": 0.0002637186802282312, "loss": 0.5366, "step": 1485 }, { "epoch": 0.52, "grad_norm": 2.470327854156494, "learning_rate": 0.00026368146861820884, "loss": 0.6349, "step": 1486 }, { "epoch": 0.52, "grad_norm": 3.796874761581421, "learning_rate": 0.00026364425700818654, "loss": 0.6011, "step": 1487 }, { "epoch": 0.52, "grad_norm": 3.449688673019409, "learning_rate": 0.0002636070453981642, "loss": 0.6161, "step": 1488 }, { "epoch": 0.52, "grad_norm": 2.23581600189209, "learning_rate": 0.0002635698337881419, "loss": 0.4338, "step": 1489 }, { "epoch": 0.52, "grad_norm": 5.239893436431885, "learning_rate": 0.00026353262217811954, "loss": 0.9053, "step": 1490 }, { "epoch": 0.52, "grad_norm": 3.8986151218414307, "learning_rate": 0.00026349541056809724, "loss": 0.3529, "step": 1491 }, { "epoch": 0.52, "grad_norm": 3.6103169918060303, "learning_rate": 0.0002634581989580749, "loss": 0.8344, "step": 1492 }, { "epoch": 0.52, "grad_norm": 4.4422993659973145, "learning_rate": 0.00026342098734805254, "loss": 0.6958, "step": 1493 }, { "epoch": 0.52, "grad_norm": 3.637202501296997, "learning_rate": 0.00026338377573803024, "loss": 0.9728, "step": 1494 }, { "epoch": 0.52, "grad_norm": 4.514386177062988, "learning_rate": 0.00026334656412800795, "loss": 0.625, "step": 1495 }, { "epoch": 0.52, "grad_norm": 3.6551239490509033, "learning_rate": 0.0002633093525179856, "loss": 0.7805, "step": 1496 }, { "epoch": 0.52, "grad_norm": 4.161332130432129, "learning_rate": 0.00026327214090796324, "loss": 0.7602, "step": 1497 }, { "epoch": 0.52, "grad_norm": 3.9714465141296387, "learning_rate": 0.00026323492929794095, "loss": 0.5906, "step": 1498 }, { "epoch": 0.53, "grad_norm": 3.682180404663086, "learning_rate": 0.0002631977176879186, "loss": 0.4266, "step": 1499 }, { "epoch": 0.53, "grad_norm": 3.590712785720825, "learning_rate": 0.0002631605060778963, "loss": 0.4957, "step": 1500 }, { "epoch": 0.53, "eval_loss": 0.735007107257843, "eval_runtime": 50.1513, "eval_samples_per_second": 43.229, "eval_steps_per_second": 10.807, "eval_wer": 0.630254281266217, "step": 1500 }, { "epoch": 0.53, "grad_norm": 3.7293701171875, "learning_rate": 0.00026312329446787395, "loss": 0.8107, "step": 1501 }, { "epoch": 0.53, "grad_norm": 3.134906053543091, "learning_rate": 0.00026308608285785165, "loss": 0.6081, "step": 1502 }, { "epoch": 0.53, "grad_norm": 3.5551223754882812, "learning_rate": 0.0002630488712478293, "loss": 0.7375, "step": 1503 }, { "epoch": 0.53, "grad_norm": 5.314273357391357, "learning_rate": 0.00026301165963780695, "loss": 0.8512, "step": 1504 }, { "epoch": 0.53, "grad_norm": 2.985286235809326, "learning_rate": 0.00026297444802778465, "loss": 0.5388, "step": 1505 }, { "epoch": 0.53, "grad_norm": 2.9923386573791504, "learning_rate": 0.0002629372364177623, "loss": 0.9692, "step": 1506 }, { "epoch": 0.53, "grad_norm": 2.3773350715637207, "learning_rate": 0.00026290002480774, "loss": 0.9575, "step": 1507 }, { "epoch": 0.53, "grad_norm": 3.891386032104492, "learning_rate": 0.0002628628131977177, "loss": 0.8126, "step": 1508 }, { "epoch": 0.53, "grad_norm": 2.9968199729919434, "learning_rate": 0.00026282560158769535, "loss": 0.5441, "step": 1509 }, { "epoch": 0.53, "grad_norm": 2.9973716735839844, "learning_rate": 0.000262788389977673, "loss": 0.618, "step": 1510 }, { "epoch": 0.53, "grad_norm": 3.8595619201660156, "learning_rate": 0.00026275117836765065, "loss": 0.904, "step": 1511 }, { "epoch": 0.53, "grad_norm": 3.169881820678711, "learning_rate": 0.00026271396675762835, "loss": 0.5045, "step": 1512 }, { "epoch": 0.53, "grad_norm": 2.6832542419433594, "learning_rate": 0.00026267675514760606, "loss": 0.3136, "step": 1513 }, { "epoch": 0.53, "grad_norm": 2.8257126808166504, "learning_rate": 0.0002626395435375837, "loss": 0.4304, "step": 1514 }, { "epoch": 0.53, "grad_norm": 2.0919361114501953, "learning_rate": 0.0002626023319275614, "loss": 0.3809, "step": 1515 }, { "epoch": 0.53, "grad_norm": 3.3260436058044434, "learning_rate": 0.00026256512031753906, "loss": 0.5717, "step": 1516 }, { "epoch": 0.53, "grad_norm": 3.5611228942871094, "learning_rate": 0.0002625279087075167, "loss": 0.5815, "step": 1517 }, { "epoch": 0.53, "grad_norm": 5.097376346588135, "learning_rate": 0.0002624906970974944, "loss": 0.7399, "step": 1518 }, { "epoch": 0.53, "grad_norm": 3.300809621810913, "learning_rate": 0.00026245348548747206, "loss": 0.7625, "step": 1519 }, { "epoch": 0.53, "grad_norm": 6.034483432769775, "learning_rate": 0.00026241627387744976, "loss": 1.3396, "step": 1520 }, { "epoch": 0.53, "grad_norm": 3.987584114074707, "learning_rate": 0.0002623790622674274, "loss": 0.8065, "step": 1521 }, { "epoch": 0.53, "grad_norm": 3.958810567855835, "learning_rate": 0.00026234185065740506, "loss": 0.5565, "step": 1522 }, { "epoch": 0.53, "grad_norm": 3.113748550415039, "learning_rate": 0.00026230463904738276, "loss": 0.4262, "step": 1523 }, { "epoch": 0.53, "grad_norm": 3.8435819149017334, "learning_rate": 0.0002622674274373604, "loss": 0.6072, "step": 1524 }, { "epoch": 0.53, "grad_norm": 3.7314817905426025, "learning_rate": 0.0002622302158273381, "loss": 0.6308, "step": 1525 }, { "epoch": 0.53, "grad_norm": 2.7327356338500977, "learning_rate": 0.0002621930042173158, "loss": 0.904, "step": 1526 }, { "epoch": 0.54, "grad_norm": 3.118708848953247, "learning_rate": 0.00026215579260729346, "loss": 0.784, "step": 1527 }, { "epoch": 0.54, "grad_norm": 2.2013180255889893, "learning_rate": 0.0002621185809972711, "loss": 0.8928, "step": 1528 }, { "epoch": 0.54, "grad_norm": 2.631147861480713, "learning_rate": 0.00026208136938724876, "loss": 0.6634, "step": 1529 }, { "epoch": 0.54, "grad_norm": 1.8624056577682495, "learning_rate": 0.00026204415777722646, "loss": 0.3717, "step": 1530 }, { "epoch": 0.54, "grad_norm": 2.32458233833313, "learning_rate": 0.00026200694616720417, "loss": 0.4845, "step": 1531 }, { "epoch": 0.54, "grad_norm": 2.4256484508514404, "learning_rate": 0.0002619697345571818, "loss": 0.6534, "step": 1532 }, { "epoch": 0.54, "grad_norm": 3.6999833583831787, "learning_rate": 0.0002619325229471595, "loss": 0.6113, "step": 1533 }, { "epoch": 0.54, "grad_norm": 2.6705386638641357, "learning_rate": 0.00026189531133713717, "loss": 0.6816, "step": 1534 }, { "epoch": 0.54, "grad_norm": 2.588336706161499, "learning_rate": 0.0002618580997271148, "loss": 0.5221, "step": 1535 }, { "epoch": 0.54, "grad_norm": 3.662912368774414, "learning_rate": 0.0002618208881170925, "loss": 1.0771, "step": 1536 }, { "epoch": 0.54, "grad_norm": 3.598870277404785, "learning_rate": 0.00026178367650707017, "loss": 0.3864, "step": 1537 }, { "epoch": 0.54, "grad_norm": 4.303178310394287, "learning_rate": 0.00026174646489704787, "loss": 0.8527, "step": 1538 }, { "epoch": 0.54, "grad_norm": 1.631732702255249, "learning_rate": 0.0002617092532870255, "loss": 0.2559, "step": 1539 }, { "epoch": 0.54, "grad_norm": 3.9511115550994873, "learning_rate": 0.0002616720416770032, "loss": 1.2759, "step": 1540 }, { "epoch": 0.54, "grad_norm": 4.213435173034668, "learning_rate": 0.00026163483006698087, "loss": 0.9761, "step": 1541 }, { "epoch": 0.54, "grad_norm": 4.040883541107178, "learning_rate": 0.0002615976184569585, "loss": 1.2503, "step": 1542 }, { "epoch": 0.54, "grad_norm": 3.2061383724212646, "learning_rate": 0.0002615604068469362, "loss": 0.4728, "step": 1543 }, { "epoch": 0.54, "grad_norm": 3.8172144889831543, "learning_rate": 0.0002615231952369139, "loss": 0.8298, "step": 1544 }, { "epoch": 0.54, "grad_norm": 4.939277648925781, "learning_rate": 0.0002614859836268916, "loss": 1.1306, "step": 1545 }, { "epoch": 0.54, "grad_norm": 2.502394676208496, "learning_rate": 0.0002614487720168692, "loss": 0.4795, "step": 1546 }, { "epoch": 0.54, "grad_norm": 2.375518321990967, "learning_rate": 0.00026141156040684687, "loss": 0.3351, "step": 1547 }, { "epoch": 0.54, "grad_norm": 6.979621410369873, "learning_rate": 0.0002613743487968246, "loss": 1.9284, "step": 1548 }, { "epoch": 0.54, "grad_norm": 2.901841878890991, "learning_rate": 0.0002613371371868023, "loss": 0.4371, "step": 1549 }, { "epoch": 0.54, "grad_norm": 4.348433971405029, "learning_rate": 0.0002612999255767799, "loss": 0.6329, "step": 1550 }, { "epoch": 0.54, "grad_norm": 3.033573865890503, "learning_rate": 0.00026126271396675763, "loss": 1.1795, "step": 1551 }, { "epoch": 0.54, "grad_norm": 2.457150459289551, "learning_rate": 0.0002612255023567353, "loss": 0.5499, "step": 1552 }, { "epoch": 0.54, "grad_norm": 1.8844290971755981, "learning_rate": 0.0002611882907467129, "loss": 0.7523, "step": 1553 }, { "epoch": 0.54, "grad_norm": 2.3162200450897217, "learning_rate": 0.00026115107913669063, "loss": 0.884, "step": 1554 }, { "epoch": 0.54, "grad_norm": 2.550788164138794, "learning_rate": 0.0002611138675266683, "loss": 0.7523, "step": 1555 }, { "epoch": 0.55, "grad_norm": 3.001537561416626, "learning_rate": 0.000261076655916646, "loss": 0.5522, "step": 1556 }, { "epoch": 0.55, "grad_norm": 2.1897246837615967, "learning_rate": 0.0002610394443066237, "loss": 0.4203, "step": 1557 }, { "epoch": 0.55, "grad_norm": 2.3803975582122803, "learning_rate": 0.00026100223269660133, "loss": 0.9045, "step": 1558 }, { "epoch": 0.55, "grad_norm": 3.0036349296569824, "learning_rate": 0.000260965021086579, "loss": 0.6177, "step": 1559 }, { "epoch": 0.55, "grad_norm": 4.269506454467773, "learning_rate": 0.00026092780947655663, "loss": 1.6503, "step": 1560 }, { "epoch": 0.55, "grad_norm": 2.8251953125, "learning_rate": 0.00026089059786653433, "loss": 0.6453, "step": 1561 }, { "epoch": 0.55, "grad_norm": 3.577118396759033, "learning_rate": 0.00026085338625651204, "loss": 1.0677, "step": 1562 }, { "epoch": 0.55, "grad_norm": 2.7220301628112793, "learning_rate": 0.0002608161746464897, "loss": 1.0965, "step": 1563 }, { "epoch": 0.55, "grad_norm": 2.421391010284424, "learning_rate": 0.00026077896303646733, "loss": 0.4751, "step": 1564 }, { "epoch": 0.55, "grad_norm": 3.9482858180999756, "learning_rate": 0.00026074175142644504, "loss": 0.5636, "step": 1565 }, { "epoch": 0.55, "grad_norm": 4.108609199523926, "learning_rate": 0.0002607045398164227, "loss": 0.6971, "step": 1566 }, { "epoch": 0.55, "grad_norm": 2.993597984313965, "learning_rate": 0.0002606673282064004, "loss": 0.6064, "step": 1567 }, { "epoch": 0.55, "grad_norm": 3.803035259246826, "learning_rate": 0.00026063011659637804, "loss": 0.5257, "step": 1568 }, { "epoch": 0.55, "grad_norm": 3.5908119678497314, "learning_rate": 0.00026059290498635574, "loss": 0.8194, "step": 1569 }, { "epoch": 0.55, "grad_norm": 2.1012234687805176, "learning_rate": 0.0002605556933763334, "loss": 0.5269, "step": 1570 }, { "epoch": 0.55, "grad_norm": 2.691065549850464, "learning_rate": 0.00026051848176631104, "loss": 0.638, "step": 1571 }, { "epoch": 0.55, "grad_norm": 4.872591018676758, "learning_rate": 0.00026048127015628874, "loss": 0.8042, "step": 1572 }, { "epoch": 0.55, "grad_norm": 4.684198379516602, "learning_rate": 0.0002604440585462664, "loss": 0.8874, "step": 1573 }, { "epoch": 0.55, "grad_norm": 4.133639812469482, "learning_rate": 0.0002604068469362441, "loss": 0.933, "step": 1574 }, { "epoch": 0.55, "grad_norm": 5.2042012214660645, "learning_rate": 0.0002603696353262218, "loss": 1.4783, "step": 1575 }, { "epoch": 0.55, "grad_norm": 4.421505928039551, "learning_rate": 0.00026033242371619944, "loss": 1.4112, "step": 1576 }, { "epoch": 0.55, "grad_norm": 4.258162975311279, "learning_rate": 0.0002602952121061771, "loss": 1.1223, "step": 1577 }, { "epoch": 0.55, "grad_norm": 3.783501386642456, "learning_rate": 0.00026025800049615474, "loss": 0.6567, "step": 1578 }, { "epoch": 0.55, "grad_norm": 3.9202888011932373, "learning_rate": 0.00026022078888613244, "loss": 1.1697, "step": 1579 }, { "epoch": 0.55, "grad_norm": 2.2230772972106934, "learning_rate": 0.00026018357727611015, "loss": 0.6542, "step": 1580 }, { "epoch": 0.55, "grad_norm": 2.5005552768707275, "learning_rate": 0.0002601463656660878, "loss": 0.484, "step": 1581 }, { "epoch": 0.55, "grad_norm": 2.2738864421844482, "learning_rate": 0.0002601091540560655, "loss": 0.5697, "step": 1582 }, { "epoch": 0.55, "grad_norm": 2.813744306564331, "learning_rate": 0.00026007194244604315, "loss": 0.7979, "step": 1583 }, { "epoch": 0.56, "grad_norm": 2.8571112155914307, "learning_rate": 0.0002600347308360208, "loss": 0.6285, "step": 1584 }, { "epoch": 0.56, "grad_norm": 2.8317923545837402, "learning_rate": 0.0002599975192259985, "loss": 0.4895, "step": 1585 }, { "epoch": 0.56, "grad_norm": 3.4439423084259033, "learning_rate": 0.00025996030761597615, "loss": 0.4935, "step": 1586 }, { "epoch": 0.56, "grad_norm": 2.2733328342437744, "learning_rate": 0.00025992309600595385, "loss": 0.8773, "step": 1587 }, { "epoch": 0.56, "grad_norm": 2.249488592147827, "learning_rate": 0.0002598858843959315, "loss": 0.4, "step": 1588 }, { "epoch": 0.56, "grad_norm": 1.9486145973205566, "learning_rate": 0.00025984867278590915, "loss": 0.48, "step": 1589 }, { "epoch": 0.56, "grad_norm": 4.063397407531738, "learning_rate": 0.00025981146117588685, "loss": 0.7096, "step": 1590 }, { "epoch": 0.56, "grad_norm": 3.8297181129455566, "learning_rate": 0.0002597742495658645, "loss": 0.5382, "step": 1591 }, { "epoch": 0.56, "grad_norm": 3.13065505027771, "learning_rate": 0.0002597370379558422, "loss": 0.6512, "step": 1592 }, { "epoch": 0.56, "grad_norm": 3.807405710220337, "learning_rate": 0.0002596998263458199, "loss": 0.4922, "step": 1593 }, { "epoch": 0.56, "grad_norm": 4.6079254150390625, "learning_rate": 0.00025966261473579755, "loss": 0.5638, "step": 1594 }, { "epoch": 0.56, "grad_norm": 2.535735607147217, "learning_rate": 0.0002596254031257752, "loss": 0.7087, "step": 1595 }, { "epoch": 0.56, "grad_norm": 2.7222447395324707, "learning_rate": 0.0002595881915157529, "loss": 0.4831, "step": 1596 }, { "epoch": 0.56, "grad_norm": 3.0065388679504395, "learning_rate": 0.00025955097990573055, "loss": 0.7503, "step": 1597 }, { "epoch": 0.56, "grad_norm": 3.3113796710968018, "learning_rate": 0.00025951376829570826, "loss": 0.5052, "step": 1598 }, { "epoch": 0.56, "grad_norm": 4.114770412445068, "learning_rate": 0.0002594765566856859, "loss": 0.6825, "step": 1599 }, { "epoch": 0.56, "grad_norm": 3.18342924118042, "learning_rate": 0.0002594393450756636, "loss": 0.4084, "step": 1600 }, { "epoch": 0.56, "eval_loss": 0.7260645627975464, "eval_runtime": 49.8256, "eval_samples_per_second": 43.512, "eval_steps_per_second": 10.878, "eval_wer": 0.6434872859366891, "step": 1600 }, { "epoch": 0.56, "grad_norm": 2.540189504623413, "learning_rate": 0.00025940213346564126, "loss": 0.9917, "step": 1601 }, { "epoch": 0.56, "grad_norm": 5.168649673461914, "learning_rate": 0.0002593649218556189, "loss": 1.1896, "step": 1602 }, { "epoch": 0.56, "grad_norm": 3.469858407974243, "learning_rate": 0.0002593277102455966, "loss": 0.588, "step": 1603 }, { "epoch": 0.56, "grad_norm": 2.415588617324829, "learning_rate": 0.00025929049863557426, "loss": 1.2277, "step": 1604 }, { "epoch": 0.56, "grad_norm": 1.8117973804473877, "learning_rate": 0.00025925328702555196, "loss": 0.5193, "step": 1605 }, { "epoch": 0.56, "grad_norm": 2.193356990814209, "learning_rate": 0.0002592160754155296, "loss": 0.5361, "step": 1606 }, { "epoch": 0.56, "grad_norm": 4.278368949890137, "learning_rate": 0.0002591788638055073, "loss": 0.7885, "step": 1607 }, { "epoch": 0.56, "grad_norm": 2.5760748386383057, "learning_rate": 0.00025914165219548496, "loss": 0.6032, "step": 1608 }, { "epoch": 0.56, "grad_norm": 2.544609308242798, "learning_rate": 0.00025910444058546266, "loss": 0.7552, "step": 1609 }, { "epoch": 0.56, "grad_norm": 3.166447639465332, "learning_rate": 0.0002590672289754403, "loss": 0.9139, "step": 1610 }, { "epoch": 0.56, "grad_norm": 5.605889797210693, "learning_rate": 0.000259030017365418, "loss": 1.2701, "step": 1611 }, { "epoch": 0.56, "grad_norm": 3.2346339225769043, "learning_rate": 0.00025899280575539566, "loss": 0.807, "step": 1612 }, { "epoch": 0.57, "grad_norm": 2.9761223793029785, "learning_rate": 0.0002589555941453733, "loss": 0.6815, "step": 1613 }, { "epoch": 0.57, "grad_norm": 2.0151445865631104, "learning_rate": 0.000258918382535351, "loss": 0.4635, "step": 1614 }, { "epoch": 0.57, "grad_norm": 1.8519450426101685, "learning_rate": 0.00025888117092532866, "loss": 0.3119, "step": 1615 }, { "epoch": 0.57, "grad_norm": 4.429759502410889, "learning_rate": 0.00025884395931530637, "loss": 0.7836, "step": 1616 }, { "epoch": 0.57, "grad_norm": 17.506669998168945, "learning_rate": 0.000258806747705284, "loss": 0.8412, "step": 1617 }, { "epoch": 0.57, "grad_norm": 5.09642219543457, "learning_rate": 0.0002587695360952617, "loss": 0.8578, "step": 1618 }, { "epoch": 0.57, "grad_norm": 7.458622932434082, "learning_rate": 0.00025873232448523937, "loss": 0.9713, "step": 1619 }, { "epoch": 0.57, "grad_norm": 4.237120628356934, "learning_rate": 0.000258695112875217, "loss": 1.0024, "step": 1620 }, { "epoch": 0.57, "grad_norm": 4.813836097717285, "learning_rate": 0.0002586579012651947, "loss": 0.8012, "step": 1621 }, { "epoch": 0.57, "grad_norm": 6.002732753753662, "learning_rate": 0.00025862068965517237, "loss": 1.0697, "step": 1622 }, { "epoch": 0.57, "grad_norm": 5.764606475830078, "learning_rate": 0.00025858347804515007, "loss": 0.8174, "step": 1623 }, { "epoch": 0.57, "grad_norm": 3.286224842071533, "learning_rate": 0.00025854626643512777, "loss": 0.3334, "step": 1624 }, { "epoch": 0.57, "grad_norm": 3.0024983882904053, "learning_rate": 0.0002585090548251054, "loss": 0.3869, "step": 1625 }, { "epoch": 0.57, "grad_norm": 2.4190189838409424, "learning_rate": 0.00025847184321508307, "loss": 0.7463, "step": 1626 }, { "epoch": 0.57, "grad_norm": 2.419149875640869, "learning_rate": 0.0002584346316050608, "loss": 0.8414, "step": 1627 }, { "epoch": 0.57, "grad_norm": 2.5799694061279297, "learning_rate": 0.0002583974199950384, "loss": 0.8311, "step": 1628 }, { "epoch": 0.57, "grad_norm": 1.2012771368026733, "learning_rate": 0.0002583602083850161, "loss": 0.2703, "step": 1629 }, { "epoch": 0.57, "grad_norm": 2.410515785217285, "learning_rate": 0.0002583229967749938, "loss": 0.7188, "step": 1630 }, { "epoch": 0.57, "grad_norm": 2.3258681297302246, "learning_rate": 0.0002582857851649714, "loss": 0.4769, "step": 1631 }, { "epoch": 0.57, "grad_norm": 3.23000431060791, "learning_rate": 0.0002582485735549491, "loss": 0.3131, "step": 1632 }, { "epoch": 0.57, "grad_norm": 3.1836373805999756, "learning_rate": 0.0002582113619449268, "loss": 0.91, "step": 1633 }, { "epoch": 0.57, "grad_norm": 2.7543880939483643, "learning_rate": 0.0002581741503349045, "loss": 1.0753, "step": 1634 }, { "epoch": 0.57, "grad_norm": 2.235100030899048, "learning_rate": 0.0002581369387248821, "loss": 0.5141, "step": 1635 }, { "epoch": 0.57, "grad_norm": 2.6288204193115234, "learning_rate": 0.00025809972711485983, "loss": 0.8646, "step": 1636 }, { "epoch": 0.57, "grad_norm": 3.652747869491577, "learning_rate": 0.0002580625155048375, "loss": 1.2427, "step": 1637 }, { "epoch": 0.57, "grad_norm": 5.494747161865234, "learning_rate": 0.0002580253038948151, "loss": 0.784, "step": 1638 }, { "epoch": 0.57, "grad_norm": 2.1231942176818848, "learning_rate": 0.00025798809228479283, "loss": 0.462, "step": 1639 }, { "epoch": 0.57, "grad_norm": 4.513335704803467, "learning_rate": 0.00025795088067477053, "loss": 0.7612, "step": 1640 }, { "epoch": 0.57, "grad_norm": 3.353682279586792, "learning_rate": 0.0002579136690647482, "loss": 0.6168, "step": 1641 }, { "epoch": 0.58, "grad_norm": 2.6810176372528076, "learning_rate": 0.0002578764574547259, "loss": 0.6426, "step": 1642 }, { "epoch": 0.58, "grad_norm": 6.6810126304626465, "learning_rate": 0.00025783924584470353, "loss": 2.6482, "step": 1643 }, { "epoch": 0.58, "grad_norm": 3.389218330383301, "learning_rate": 0.0002578020342346812, "loss": 0.3808, "step": 1644 }, { "epoch": 0.58, "grad_norm": 3.735605239868164, "learning_rate": 0.0002577648226246589, "loss": 0.5647, "step": 1645 }, { "epoch": 0.58, "grad_norm": 40.25916290283203, "learning_rate": 0.00025772761101463653, "loss": 0.6441, "step": 1646 }, { "epoch": 0.58, "grad_norm": 2.7484211921691895, "learning_rate": 0.00025769039940461423, "loss": 0.7046, "step": 1647 }, { "epoch": 0.58, "grad_norm": 3.8884170055389404, "learning_rate": 0.0002576531877945919, "loss": 0.9336, "step": 1648 }, { "epoch": 0.58, "grad_norm": 5.420833110809326, "learning_rate": 0.0002576159761845696, "loss": 1.2645, "step": 1649 }, { "epoch": 0.58, "grad_norm": 4.9601593017578125, "learning_rate": 0.00025757876457454723, "loss": 0.7021, "step": 1650 }, { "epoch": 0.58, "grad_norm": 2.882598400115967, "learning_rate": 0.0002575415529645249, "loss": 1.571, "step": 1651 }, { "epoch": 0.58, "grad_norm": 2.408437490463257, "learning_rate": 0.0002575043413545026, "loss": 0.6583, "step": 1652 }, { "epoch": 0.58, "grad_norm": 2.7199838161468506, "learning_rate": 0.0002574671297444803, "loss": 1.0358, "step": 1653 }, { "epoch": 0.58, "grad_norm": 2.46736216545105, "learning_rate": 0.00025742991813445794, "loss": 0.7032, "step": 1654 }, { "epoch": 0.58, "grad_norm": 1.8680689334869385, "learning_rate": 0.0002573927065244356, "loss": 0.3942, "step": 1655 }, { "epoch": 0.58, "grad_norm": 2.436976671218872, "learning_rate": 0.0002573554949144133, "loss": 0.928, "step": 1656 }, { "epoch": 0.58, "grad_norm": 1.8157801628112793, "learning_rate": 0.00025731828330439094, "loss": 0.5851, "step": 1657 }, { "epoch": 0.58, "grad_norm": 2.2245826721191406, "learning_rate": 0.00025728107169436864, "loss": 0.829, "step": 1658 }, { "epoch": 0.58, "grad_norm": 2.383336067199707, "learning_rate": 0.0002572438600843463, "loss": 0.5824, "step": 1659 }, { "epoch": 0.58, "grad_norm": 2.794093370437622, "learning_rate": 0.000257206648474324, "loss": 0.7381, "step": 1660 }, { "epoch": 0.58, "grad_norm": 6.125317573547363, "learning_rate": 0.00025716943686430164, "loss": 0.6719, "step": 1661 }, { "epoch": 0.58, "grad_norm": 4.031485080718994, "learning_rate": 0.0002571322252542793, "loss": 1.0915, "step": 1662 }, { "epoch": 0.58, "grad_norm": 2.9629569053649902, "learning_rate": 0.000257095013644257, "loss": 0.8506, "step": 1663 }, { "epoch": 0.58, "grad_norm": 3.803297281265259, "learning_rate": 0.00025705780203423464, "loss": 0.6614, "step": 1664 }, { "epoch": 0.58, "grad_norm": 7.903172016143799, "learning_rate": 0.00025702059042421234, "loss": 1.5172, "step": 1665 }, { "epoch": 0.58, "grad_norm": 4.132394313812256, "learning_rate": 0.00025698337881419, "loss": 0.6185, "step": 1666 }, { "epoch": 0.58, "grad_norm": 1.9272037744522095, "learning_rate": 0.0002569461672041677, "loss": 0.2627, "step": 1667 }, { "epoch": 0.58, "grad_norm": 3.5171618461608887, "learning_rate": 0.00025690895559414534, "loss": 0.8559, "step": 1668 }, { "epoch": 0.58, "grad_norm": 3.361370325088501, "learning_rate": 0.000256871743984123, "loss": 0.6559, "step": 1669 }, { "epoch": 0.59, "grad_norm": 4.13026762008667, "learning_rate": 0.0002568345323741007, "loss": 1.6543, "step": 1670 }, { "epoch": 0.59, "grad_norm": 3.8648040294647217, "learning_rate": 0.0002567973207640784, "loss": 0.784, "step": 1671 }, { "epoch": 0.59, "grad_norm": 4.692941188812256, "learning_rate": 0.00025676010915405605, "loss": 0.5386, "step": 1672 }, { "epoch": 0.59, "grad_norm": 2.755861520767212, "learning_rate": 0.0002567228975440337, "loss": 0.2788, "step": 1673 }, { "epoch": 0.59, "grad_norm": 5.936482906341553, "learning_rate": 0.0002566856859340114, "loss": 1.7085, "step": 1674 }, { "epoch": 0.59, "grad_norm": NaN, "learning_rate": 0.0002566856859340114, "loss": 0.1353, "step": 1675 }, { "epoch": 0.59, "grad_norm": 2.012645959854126, "learning_rate": 0.00025664847432398905, "loss": 0.9096, "step": 1676 }, { "epoch": 0.59, "grad_norm": 2.613264799118042, "learning_rate": 0.00025661126271396675, "loss": 0.5428, "step": 1677 }, { "epoch": 0.59, "grad_norm": 3.0009877681732178, "learning_rate": 0.0002565740511039444, "loss": 0.5408, "step": 1678 }, { "epoch": 0.59, "grad_norm": 3.448207378387451, "learning_rate": 0.0002565368394939221, "loss": 0.5128, "step": 1679 }, { "epoch": 0.59, "grad_norm": 1.7628707885742188, "learning_rate": 0.00025649962788389975, "loss": 0.2766, "step": 1680 }, { "epoch": 0.59, "grad_norm": 18.24505043029785, "learning_rate": 0.0002564624162738774, "loss": 4.3011, "step": 1681 }, { "epoch": 0.59, "grad_norm": 2.7175798416137695, "learning_rate": 0.0002564252046638551, "loss": 0.9797, "step": 1682 }, { "epoch": 0.59, "grad_norm": 2.9099786281585693, "learning_rate": 0.00025638799305383275, "loss": 0.6673, "step": 1683 }, { "epoch": 0.59, "grad_norm": 2.798135280609131, "learning_rate": 0.00025635078144381045, "loss": 0.6912, "step": 1684 }, { "epoch": 0.59, "grad_norm": 3.6960597038269043, "learning_rate": 0.00025631356983378816, "loss": 0.3386, "step": 1685 }, { "epoch": 0.59, "grad_norm": 1.622521162033081, "learning_rate": 0.0002562763582237658, "loss": 0.3305, "step": 1686 }, { "epoch": 0.59, "grad_norm": 1.7763274908065796, "learning_rate": 0.00025623914661374346, "loss": 0.3437, "step": 1687 }, { "epoch": 0.59, "grad_norm": 2.253156900405884, "learning_rate": 0.0002562019350037211, "loss": 0.374, "step": 1688 }, { "epoch": 0.59, "grad_norm": 3.9651002883911133, "learning_rate": 0.0002561647233936988, "loss": 0.5962, "step": 1689 }, { "epoch": 0.59, "grad_norm": 3.5626463890075684, "learning_rate": 0.0002561275117836765, "loss": 0.7751, "step": 1690 }, { "epoch": 0.59, "grad_norm": 3.0992138385772705, "learning_rate": 0.00025609030017365416, "loss": 0.7268, "step": 1691 }, { "epoch": 0.59, "grad_norm": 3.610893487930298, "learning_rate": 0.00025605308856363186, "loss": 0.3764, "step": 1692 }, { "epoch": 0.59, "grad_norm": 5.090748310089111, "learning_rate": 0.0002560158769536095, "loss": 1.1423, "step": 1693 }, { "epoch": 0.59, "grad_norm": 4.872439861297607, "learning_rate": 0.00025597866534358716, "loss": 0.7031, "step": 1694 }, { "epoch": 0.59, "grad_norm": 4.71915340423584, "learning_rate": 0.00025594145373356486, "loss": 1.1228, "step": 1695 }, { "epoch": 0.59, "grad_norm": 3.626314163208008, "learning_rate": 0.0002559042421235425, "loss": 0.5694, "step": 1696 }, { "epoch": 0.59, "grad_norm": 6.56594181060791, "learning_rate": 0.0002558670305135202, "loss": 1.0496, "step": 1697 }, { "epoch": 0.59, "grad_norm": 6.181338310241699, "learning_rate": 0.00025582981890349786, "loss": 2.4125, "step": 1698 }, { "epoch": 0.6, "grad_norm": 2.708691358566284, "learning_rate": 0.00025579260729347556, "loss": 0.3454, "step": 1699 }, { "epoch": 0.6, "grad_norm": 3.9663429260253906, "learning_rate": 0.0002557553956834532, "loss": 0.5035, "step": 1700 }, { "epoch": 0.6, "eval_loss": 0.7400824427604675, "eval_runtime": 50.2926, "eval_samples_per_second": 43.108, "eval_steps_per_second": 10.777, "eval_wer": 0.6105345096004151, "step": 1700 }, { "epoch": 0.6, "grad_norm": 2.6513137817382812, "learning_rate": 0.00025571818407343086, "loss": 1.472, "step": 1701 }, { "epoch": 0.6, "grad_norm": 1.8909034729003906, "learning_rate": 0.00025568097246340856, "loss": 0.7735, "step": 1702 }, { "epoch": 0.6, "grad_norm": 1.8732975721359253, "learning_rate": 0.00025564376085338627, "loss": 0.6441, "step": 1703 }, { "epoch": 0.6, "grad_norm": 1.949060082435608, "learning_rate": 0.0002556065492433639, "loss": 0.6141, "step": 1704 }, { "epoch": 0.6, "grad_norm": 1.6796951293945312, "learning_rate": 0.00025556933763334157, "loss": 0.3948, "step": 1705 }, { "epoch": 0.6, "grad_norm": 2.200995922088623, "learning_rate": 0.0002555321260233192, "loss": 0.4608, "step": 1706 }, { "epoch": 0.6, "grad_norm": 2.7621562480926514, "learning_rate": 0.0002554949144132969, "loss": 0.6803, "step": 1707 }, { "epoch": 0.6, "grad_norm": 4.656009197235107, "learning_rate": 0.0002554577028032746, "loss": 0.7062, "step": 1708 }, { "epoch": 0.6, "grad_norm": 3.225820302963257, "learning_rate": 0.00025542049119325227, "loss": 1.0558, "step": 1709 }, { "epoch": 0.6, "grad_norm": 3.8542261123657227, "learning_rate": 0.00025538327958322997, "loss": 1.0637, "step": 1710 }, { "epoch": 0.6, "grad_norm": 2.9913618564605713, "learning_rate": 0.0002553460679732076, "loss": 0.4731, "step": 1711 }, { "epoch": 0.6, "grad_norm": 2.990241289138794, "learning_rate": 0.00025530885636318527, "loss": 0.6727, "step": 1712 }, { "epoch": 0.6, "grad_norm": 2.4291019439697266, "learning_rate": 0.00025527164475316297, "loss": 0.4963, "step": 1713 }, { "epoch": 0.6, "grad_norm": 2.358236312866211, "learning_rate": 0.0002552344331431406, "loss": 0.3102, "step": 1714 }, { "epoch": 0.6, "grad_norm": 2.8951985836029053, "learning_rate": 0.0002551972215331183, "loss": 0.6374, "step": 1715 }, { "epoch": 0.6, "grad_norm": 3.7932424545288086, "learning_rate": 0.00025516000992309597, "loss": 0.3472, "step": 1716 }, { "epoch": 0.6, "grad_norm": 3.9364991188049316, "learning_rate": 0.0002551227983130737, "loss": 1.3303, "step": 1717 }, { "epoch": 0.6, "grad_norm": 2.639146566390991, "learning_rate": 0.0002550855867030513, "loss": 0.2635, "step": 1718 }, { "epoch": 0.6, "grad_norm": 3.6595664024353027, "learning_rate": 0.00025504837509302897, "loss": 0.4851, "step": 1719 }, { "epoch": 0.6, "grad_norm": 3.7507216930389404, "learning_rate": 0.0002550111634830067, "loss": 0.7927, "step": 1720 }, { "epoch": 0.6, "grad_norm": 6.605137348175049, "learning_rate": 0.0002549739518729844, "loss": 1.9443, "step": 1721 }, { "epoch": 0.6, "grad_norm": 3.5037612915039062, "learning_rate": 0.000254936740262962, "loss": 0.4611, "step": 1722 }, { "epoch": 0.6, "grad_norm": 2.2894933223724365, "learning_rate": 0.0002548995286529397, "loss": 0.4025, "step": 1723 }, { "epoch": 0.6, "grad_norm": 3.477785587310791, "learning_rate": 0.0002548623170429174, "loss": 1.0904, "step": 1724 }, { "epoch": 0.6, "grad_norm": 1.9483599662780762, "learning_rate": 0.000254825105432895, "loss": 0.276, "step": 1725 }, { "epoch": 0.6, "grad_norm": 2.0058553218841553, "learning_rate": 0.00025478789382287273, "loss": 0.5288, "step": 1726 }, { "epoch": 0.61, "grad_norm": 3.122645616531372, "learning_rate": 0.0002547506822128504, "loss": 0.5928, "step": 1727 }, { "epoch": 0.61, "grad_norm": 2.18483829498291, "learning_rate": 0.0002547134706028281, "loss": 0.579, "step": 1728 }, { "epoch": 0.61, "grad_norm": 2.4548044204711914, "learning_rate": 0.00025467625899280573, "loss": 0.5435, "step": 1729 }, { "epoch": 0.61, "grad_norm": 4.351948261260986, "learning_rate": 0.0002546390473827834, "loss": 0.7439, "step": 1730 }, { "epoch": 0.61, "grad_norm": 2.7342119216918945, "learning_rate": 0.0002546018357727611, "loss": 0.5848, "step": 1731 }, { "epoch": 0.61, "grad_norm": 2.438290596008301, "learning_rate": 0.00025456462416273873, "loss": 0.4699, "step": 1732 }, { "epoch": 0.61, "grad_norm": 2.6222329139709473, "learning_rate": 0.00025452741255271643, "loss": 0.267, "step": 1733 }, { "epoch": 0.61, "grad_norm": 3.6478686332702637, "learning_rate": 0.00025449020094269414, "loss": 0.5306, "step": 1734 }, { "epoch": 0.61, "grad_norm": 3.7666268348693848, "learning_rate": 0.0002544529893326718, "loss": 0.676, "step": 1735 }, { "epoch": 0.61, "grad_norm": 3.8781347274780273, "learning_rate": 0.00025441577772264943, "loss": 0.7332, "step": 1736 }, { "epoch": 0.61, "grad_norm": 5.474586486816406, "learning_rate": 0.0002543785661126271, "loss": 1.1677, "step": 1737 }, { "epoch": 0.61, "grad_norm": 3.475315809249878, "learning_rate": 0.0002543413545026048, "loss": 0.4186, "step": 1738 }, { "epoch": 0.61, "grad_norm": 2.4638478755950928, "learning_rate": 0.0002543041428925825, "loss": 0.5006, "step": 1739 }, { "epoch": 0.61, "grad_norm": 2.56996750831604, "learning_rate": 0.00025426693128256014, "loss": 0.3888, "step": 1740 }, { "epoch": 0.61, "grad_norm": 6.963738918304443, "learning_rate": 0.00025422971967253784, "loss": 0.6737, "step": 1741 }, { "epoch": 0.61, "grad_norm": 2.476889133453369, "learning_rate": 0.0002541925080625155, "loss": 0.2271, "step": 1742 }, { "epoch": 0.61, "grad_norm": 3.8654210567474365, "learning_rate": 0.00025415529645249314, "loss": 1.5383, "step": 1743 }, { "epoch": 0.61, "grad_norm": 6.272684097290039, "learning_rate": 0.00025411808484247084, "loss": 1.1683, "step": 1744 }, { "epoch": 0.61, "grad_norm": 2.0367603302001953, "learning_rate": 0.0002540808732324485, "loss": 0.1529, "step": 1745 }, { "epoch": 0.61, "grad_norm": 4.270781993865967, "learning_rate": 0.0002540436616224262, "loss": 0.3932, "step": 1746 }, { "epoch": 0.61, "grad_norm": 3.1253650188446045, "learning_rate": 0.00025400645001240384, "loss": 0.2344, "step": 1747 }, { "epoch": 0.61, "grad_norm": 4.384814739227295, "learning_rate": 0.0002539692384023815, "loss": 0.6099, "step": 1748 }, { "epoch": 0.61, "grad_norm": 3.7127060890197754, "learning_rate": 0.0002539320267923592, "loss": 0.7978, "step": 1749 }, { "epoch": 0.61, "grad_norm": 8.662912368774414, "learning_rate": 0.00025389481518233684, "loss": 1.5128, "step": 1750 }, { "epoch": 0.61, "grad_norm": 4.237752914428711, "learning_rate": 0.00025385760357231454, "loss": 1.3841, "step": 1751 }, { "epoch": 0.61, "grad_norm": 6.472276210784912, "learning_rate": 0.00025382039196229225, "loss": 1.1862, "step": 1752 }, { "epoch": 0.61, "grad_norm": 5.888750076293945, "learning_rate": 0.0002537831803522699, "loss": 1.139, "step": 1753 }, { "epoch": 0.61, "grad_norm": 4.32738733291626, "learning_rate": 0.00025374596874224754, "loss": 0.6998, "step": 1754 }, { "epoch": 0.61, "grad_norm": 4.718627452850342, "learning_rate": 0.0002537087571322252, "loss": 1.035, "step": 1755 }, { "epoch": 0.62, "grad_norm": 2.6345198154449463, "learning_rate": 0.0002536715455222029, "loss": 0.8893, "step": 1756 }, { "epoch": 0.62, "grad_norm": 2.600464105606079, "learning_rate": 0.0002536343339121806, "loss": 0.7878, "step": 1757 }, { "epoch": 0.62, "grad_norm": 3.2108471393585205, "learning_rate": 0.00025359712230215825, "loss": 0.8279, "step": 1758 }, { "epoch": 0.62, "grad_norm": 3.6531155109405518, "learning_rate": 0.00025355991069213595, "loss": 1.1393, "step": 1759 }, { "epoch": 0.62, "grad_norm": 3.240154504776001, "learning_rate": 0.0002535226990821136, "loss": 0.5395, "step": 1760 }, { "epoch": 0.62, "grad_norm": 1.4271785020828247, "learning_rate": 0.00025348548747209125, "loss": 0.2189, "step": 1761 }, { "epoch": 0.62, "grad_norm": 2.7750396728515625, "learning_rate": 0.00025344827586206895, "loss": 0.7323, "step": 1762 }, { "epoch": 0.62, "grad_norm": 3.3077313899993896, "learning_rate": 0.0002534110642520466, "loss": 0.4554, "step": 1763 }, { "epoch": 0.62, "grad_norm": 2.405827760696411, "learning_rate": 0.0002533738526420243, "loss": 0.6688, "step": 1764 }, { "epoch": 0.62, "grad_norm": 2.642482042312622, "learning_rate": 0.00025333664103200195, "loss": 0.2194, "step": 1765 }, { "epoch": 0.62, "grad_norm": 2.6313064098358154, "learning_rate": 0.00025329942942197965, "loss": 0.6633, "step": 1766 }, { "epoch": 0.62, "grad_norm": 10.008116722106934, "learning_rate": 0.0002532622178119573, "loss": 2.5217, "step": 1767 }, { "epoch": 0.62, "grad_norm": 4.850469589233398, "learning_rate": 0.00025322500620193495, "loss": 1.1002, "step": 1768 }, { "epoch": 0.62, "grad_norm": 4.958583354949951, "learning_rate": 0.00025318779459191265, "loss": 0.5638, "step": 1769 }, { "epoch": 0.62, "grad_norm": 2.5077438354492188, "learning_rate": 0.00025315058298189036, "loss": 0.4724, "step": 1770 }, { "epoch": 0.62, "grad_norm": 5.4337687492370605, "learning_rate": 0.000253113371371868, "loss": 1.4577, "step": 1771 }, { "epoch": 0.62, "grad_norm": 3.459052801132202, "learning_rate": 0.00025307615976184565, "loss": 0.6951, "step": 1772 }, { "epoch": 0.62, "grad_norm": 8.31182861328125, "learning_rate": 0.00025303894815182336, "loss": 1.0552, "step": 1773 }, { "epoch": 0.62, "grad_norm": 3.019193172454834, "learning_rate": 0.000253001736541801, "loss": 0.2639, "step": 1774 }, { "epoch": 0.62, "grad_norm": NaN, "learning_rate": 0.000253001736541801, "loss": 0.1885, "step": 1775 }, { "epoch": 0.62, "grad_norm": 4.729272842407227, "learning_rate": 0.0002529645249317787, "loss": 1.1689, "step": 1776 }, { "epoch": 0.62, "grad_norm": 2.806692361831665, "learning_rate": 0.00025292731332175636, "loss": 0.9207, "step": 1777 }, { "epoch": 0.62, "grad_norm": 3.110698699951172, "learning_rate": 0.00025289010171173406, "loss": 0.6481, "step": 1778 }, { "epoch": 0.62, "grad_norm": 3.653905153274536, "learning_rate": 0.0002528528901017117, "loss": 0.7404, "step": 1779 }, { "epoch": 0.62, "grad_norm": 3.4081614017486572, "learning_rate": 0.00025281567849168936, "loss": 0.7589, "step": 1780 }, { "epoch": 0.62, "grad_norm": 2.3491172790527344, "learning_rate": 0.00025277846688166706, "loss": 0.5285, "step": 1781 }, { "epoch": 0.62, "grad_norm": 2.7228968143463135, "learning_rate": 0.0002527412552716447, "loss": 0.947, "step": 1782 }, { "epoch": 0.62, "grad_norm": 3.284248113632202, "learning_rate": 0.0002527040436616224, "loss": 0.4871, "step": 1783 }, { "epoch": 0.63, "grad_norm": 3.298612117767334, "learning_rate": 0.0002526668320516001, "loss": 0.6996, "step": 1784 }, { "epoch": 0.63, "grad_norm": 1.53757643699646, "learning_rate": 0.00025262962044157776, "loss": 0.3541, "step": 1785 }, { "epoch": 0.63, "grad_norm": 2.7956089973449707, "learning_rate": 0.0002525924088315554, "loss": 0.3465, "step": 1786 }, { "epoch": 0.63, "grad_norm": 4.37371826171875, "learning_rate": 0.0002525551972215331, "loss": 0.5631, "step": 1787 }, { "epoch": 0.63, "grad_norm": 3.794459819793701, "learning_rate": 0.00025251798561151076, "loss": 0.8957, "step": 1788 }, { "epoch": 0.63, "grad_norm": 2.2786507606506348, "learning_rate": 0.00025248077400148847, "loss": 0.5798, "step": 1789 }, { "epoch": 0.63, "grad_norm": 2.390256404876709, "learning_rate": 0.0002524435623914661, "loss": 0.4864, "step": 1790 }, { "epoch": 0.63, "grad_norm": 4.0629777908325195, "learning_rate": 0.00025240635078144376, "loss": 0.6592, "step": 1791 }, { "epoch": 0.63, "grad_norm": 3.212480068206787, "learning_rate": 0.00025236913917142147, "loss": 1.3613, "step": 1792 }, { "epoch": 0.63, "grad_norm": 4.64257287979126, "learning_rate": 0.0002523319275613991, "loss": 0.4347, "step": 1793 }, { "epoch": 0.63, "grad_norm": 4.668285369873047, "learning_rate": 0.0002522947159513768, "loss": 0.7722, "step": 1794 }, { "epoch": 0.63, "grad_norm": 3.8437860012054443, "learning_rate": 0.00025225750434135447, "loss": 0.3561, "step": 1795 }, { "epoch": 0.63, "grad_norm": 3.092412233352661, "learning_rate": 0.00025222029273133217, "loss": 0.4881, "step": 1796 }, { "epoch": 0.63, "grad_norm": 1.986132264137268, "learning_rate": 0.0002521830811213098, "loss": 0.1549, "step": 1797 }, { "epoch": 0.63, "grad_norm": 2.6645147800445557, "learning_rate": 0.00025214586951128747, "loss": 0.4356, "step": 1798 }, { "epoch": 0.63, "grad_norm": 3.3920583724975586, "learning_rate": 0.00025210865790126517, "loss": 0.3476, "step": 1799 }, { "epoch": 0.63, "grad_norm": 3.257504463195801, "learning_rate": 0.0002520714462912429, "loss": 0.6923, "step": 1800 }, { "epoch": 0.63, "eval_loss": 0.6619295477867126, "eval_runtime": 50.7331, "eval_samples_per_second": 42.733, "eval_steps_per_second": 10.683, "eval_wer": 0.5835495588998443, "step": 1800 }, { "epoch": 0.63, "grad_norm": 3.3466756343841553, "learning_rate": 0.0002520342346812205, "loss": 0.6547, "step": 1801 }, { "epoch": 0.63, "grad_norm": 3.27302885055542, "learning_rate": 0.0002519970230711982, "loss": 1.0436, "step": 1802 }, { "epoch": 0.63, "grad_norm": 2.4561309814453125, "learning_rate": 0.0002519598114611759, "loss": 0.8965, "step": 1803 }, { "epoch": 0.63, "grad_norm": 3.3739259243011475, "learning_rate": 0.0002519225998511535, "loss": 0.705, "step": 1804 }, { "epoch": 0.63, "grad_norm": 2.3616445064544678, "learning_rate": 0.0002518853882411312, "loss": 0.8433, "step": 1805 }, { "epoch": 0.63, "grad_norm": 2.9913322925567627, "learning_rate": 0.0002518481766311089, "loss": 0.4734, "step": 1806 }, { "epoch": 0.63, "grad_norm": 2.3768415451049805, "learning_rate": 0.0002518109650210866, "loss": 0.4895, "step": 1807 }, { "epoch": 0.63, "grad_norm": 2.2762320041656494, "learning_rate": 0.0002517737534110642, "loss": 0.621, "step": 1808 }, { "epoch": 0.63, "grad_norm": 3.1981852054595947, "learning_rate": 0.00025173654180104193, "loss": 0.5364, "step": 1809 }, { "epoch": 0.63, "grad_norm": 2.924687147140503, "learning_rate": 0.0002516993301910196, "loss": 0.2811, "step": 1810 }, { "epoch": 0.63, "grad_norm": 2.7158939838409424, "learning_rate": 0.0002516621185809972, "loss": 0.6797, "step": 1811 }, { "epoch": 0.63, "grad_norm": 2.226498603820801, "learning_rate": 0.00025162490697097493, "loss": 0.3998, "step": 1812 }, { "epoch": 0.64, "grad_norm": 2.0350279808044434, "learning_rate": 0.0002515876953609526, "loss": 0.2877, "step": 1813 }, { "epoch": 0.64, "grad_norm": 4.871344089508057, "learning_rate": 0.0002515504837509303, "loss": 0.6181, "step": 1814 }, { "epoch": 0.64, "grad_norm": 3.3787949085235596, "learning_rate": 0.00025151327214090793, "loss": 0.7396, "step": 1815 }, { "epoch": 0.64, "grad_norm": 2.748871326446533, "learning_rate": 0.0002514760605308856, "loss": 0.2645, "step": 1816 }, { "epoch": 0.64, "grad_norm": 3.798830986022949, "learning_rate": 0.0002514388489208633, "loss": 0.5294, "step": 1817 }, { "epoch": 0.64, "grad_norm": 4.125420570373535, "learning_rate": 0.000251401637310841, "loss": 0.565, "step": 1818 }, { "epoch": 0.64, "grad_norm": 4.471548080444336, "learning_rate": 0.00025136442570081863, "loss": 0.5297, "step": 1819 }, { "epoch": 0.64, "grad_norm": 1.885577917098999, "learning_rate": 0.00025132721409079633, "loss": 0.3337, "step": 1820 }, { "epoch": 0.64, "grad_norm": 1.8695416450500488, "learning_rate": 0.000251290002480774, "loss": 0.2729, "step": 1821 }, { "epoch": 0.64, "grad_norm": 4.600594520568848, "learning_rate": 0.00025125279087075163, "loss": 1.8971, "step": 1822 }, { "epoch": 0.64, "grad_norm": 6.382452011108398, "learning_rate": 0.00025121557926072933, "loss": 2.4557, "step": 1823 }, { "epoch": 0.64, "grad_norm": 7.36460542678833, "learning_rate": 0.000251178367650707, "loss": 2.337, "step": 1824 }, { "epoch": 0.64, "grad_norm": 5.724518299102783, "learning_rate": 0.0002511411560406847, "loss": 1.9224, "step": 1825 }, { "epoch": 0.64, "grad_norm": 3.918628454208374, "learning_rate": 0.00025110394443066234, "loss": 1.1783, "step": 1826 }, { "epoch": 0.64, "grad_norm": 3.5602731704711914, "learning_rate": 0.00025106673282064004, "loss": 1.2087, "step": 1827 }, { "epoch": 0.64, "grad_norm": 1.9868627786636353, "learning_rate": 0.0002510295212106177, "loss": 0.5769, "step": 1828 }, { "epoch": 0.64, "grad_norm": 3.202514410018921, "learning_rate": 0.00025099230960059534, "loss": 1.2293, "step": 1829 }, { "epoch": 0.64, "grad_norm": 1.9510756731033325, "learning_rate": 0.00025095509799057304, "loss": 0.4025, "step": 1830 }, { "epoch": 0.64, "grad_norm": 2.144929885864258, "learning_rate": 0.00025091788638055074, "loss": 0.4471, "step": 1831 }, { "epoch": 0.64, "grad_norm": 1.4620763063430786, "learning_rate": 0.0002508806747705284, "loss": 0.2977, "step": 1832 }, { "epoch": 0.64, "grad_norm": 2.262373208999634, "learning_rate": 0.00025084346316050604, "loss": 0.6991, "step": 1833 }, { "epoch": 0.64, "grad_norm": 3.069397211074829, "learning_rate": 0.00025080625155048374, "loss": 0.4164, "step": 1834 }, { "epoch": 0.64, "grad_norm": 13.504817008972168, "learning_rate": 0.0002507690399404614, "loss": 3.6967, "step": 1835 }, { "epoch": 0.64, "grad_norm": 3.4897937774658203, "learning_rate": 0.0002507318283304391, "loss": 0.8833, "step": 1836 }, { "epoch": 0.64, "grad_norm": 3.5838449001312256, "learning_rate": 0.00025069461672041674, "loss": 0.6879, "step": 1837 }, { "epoch": 0.64, "grad_norm": 4.084447860717773, "learning_rate": 0.00025065740511039444, "loss": 0.9948, "step": 1838 }, { "epoch": 0.64, "grad_norm": 3.7266480922698975, "learning_rate": 0.0002506201935003721, "loss": 0.978, "step": 1839 }, { "epoch": 0.64, "grad_norm": 2.592857599258423, "learning_rate": 0.00025058298189034974, "loss": 0.4904, "step": 1840 }, { "epoch": 0.65, "grad_norm": 3.836439609527588, "learning_rate": 0.00025054577028032744, "loss": 0.4452, "step": 1841 }, { "epoch": 0.65, "grad_norm": 4.47084379196167, "learning_rate": 0.0002505085586703051, "loss": 0.7633, "step": 1842 }, { "epoch": 0.65, "grad_norm": 3.29998517036438, "learning_rate": 0.0002504713470602828, "loss": 1.2435, "step": 1843 }, { "epoch": 0.65, "grad_norm": 2.52744460105896, "learning_rate": 0.0002504341354502605, "loss": 0.3426, "step": 1844 }, { "epoch": 0.65, "grad_norm": 2.3847482204437256, "learning_rate": 0.00025039692384023815, "loss": 0.2179, "step": 1845 }, { "epoch": 0.65, "grad_norm": 5.141510009765625, "learning_rate": 0.0002503597122302158, "loss": 0.6102, "step": 1846 }, { "epoch": 0.65, "grad_norm": 4.136857509613037, "learning_rate": 0.00025032250062019345, "loss": 1.0573, "step": 1847 }, { "epoch": 0.65, "grad_norm": 2.8954155445098877, "learning_rate": 0.00025028528901017115, "loss": 0.4377, "step": 1848 }, { "epoch": 0.65, "grad_norm": 2.4569027423858643, "learning_rate": 0.00025024807740014885, "loss": 0.2517, "step": 1849 }, { "epoch": 0.65, "grad_norm": 4.620333671569824, "learning_rate": 0.0002502108657901265, "loss": 1.7501, "step": 1850 }, { "epoch": 0.65, "grad_norm": 2.291147470474243, "learning_rate": 0.0002501736541801042, "loss": 1.2419, "step": 1851 }, { "epoch": 0.65, "grad_norm": 3.5810606479644775, "learning_rate": 0.00025013644257008185, "loss": 1.0098, "step": 1852 }, { "epoch": 0.65, "grad_norm": 3.492764472961426, "learning_rate": 0.0002500992309600595, "loss": 0.8263, "step": 1853 }, { "epoch": 0.65, "grad_norm": 2.030451536178589, "learning_rate": 0.0002500620193500372, "loss": 0.7977, "step": 1854 }, { "epoch": 0.65, "grad_norm": 3.7013792991638184, "learning_rate": 0.00025002480774001485, "loss": 0.7732, "step": 1855 }, { "epoch": 0.65, "grad_norm": 2.162282943725586, "learning_rate": 0.00024998759612999255, "loss": 0.8146, "step": 1856 }, { "epoch": 0.65, "grad_norm": 3.0203332901000977, "learning_rate": 0.0002499503845199702, "loss": 0.8234, "step": 1857 }, { "epoch": 0.65, "grad_norm": 2.6350369453430176, "learning_rate": 0.00024991317290994785, "loss": 0.3615, "step": 1858 }, { "epoch": 0.65, "grad_norm": 3.785946846008301, "learning_rate": 0.00024987596129992556, "loss": 0.6719, "step": 1859 }, { "epoch": 0.65, "grad_norm": 4.30195426940918, "learning_rate": 0.0002498387496899032, "loss": 0.4817, "step": 1860 }, { "epoch": 0.65, "grad_norm": 2.285341262817383, "learning_rate": 0.0002498015380798809, "loss": 0.4845, "step": 1861 }, { "epoch": 0.65, "grad_norm": 2.9334990978240967, "learning_rate": 0.0002497643264698586, "loss": 0.6951, "step": 1862 }, { "epoch": 0.65, "grad_norm": 1.6328988075256348, "learning_rate": 0.00024972711485983626, "loss": 0.3517, "step": 1863 }, { "epoch": 0.65, "grad_norm": 3.6665728092193604, "learning_rate": 0.0002496899032498139, "loss": 1.0003, "step": 1864 }, { "epoch": 0.65, "grad_norm": 2.054015636444092, "learning_rate": 0.00024965269163979156, "loss": 0.4366, "step": 1865 }, { "epoch": 0.65, "grad_norm": 3.34871506690979, "learning_rate": 0.00024961548002976926, "loss": 0.3632, "step": 1866 }, { "epoch": 0.65, "grad_norm": 1.7466020584106445, "learning_rate": 0.00024957826841974696, "loss": 0.2544, "step": 1867 }, { "epoch": 0.65, "grad_norm": 2.3395614624023438, "learning_rate": 0.0002495410568097246, "loss": 0.2607, "step": 1868 }, { "epoch": 0.65, "grad_norm": 5.0486836433410645, "learning_rate": 0.0002495038451997023, "loss": 0.6904, "step": 1869 }, { "epoch": 0.66, "grad_norm": 5.611022472381592, "learning_rate": 0.00024946663358967996, "loss": 0.643, "step": 1870 }, { "epoch": 0.66, "grad_norm": 1.7561452388763428, "learning_rate": 0.0002494294219796576, "loss": 0.2442, "step": 1871 }, { "epoch": 0.66, "grad_norm": 5.131147384643555, "learning_rate": 0.0002493922103696353, "loss": 0.6293, "step": 1872 }, { "epoch": 0.66, "grad_norm": 6.987359523773193, "learning_rate": 0.00024935499875961296, "loss": 0.5611, "step": 1873 }, { "epoch": 0.66, "grad_norm": 5.005058765411377, "learning_rate": 0.00024931778714959066, "loss": 1.9181, "step": 1874 }, { "epoch": 0.66, "grad_norm": 5.872385501861572, "learning_rate": 0.0002492805755395683, "loss": 0.4414, "step": 1875 }, { "epoch": 0.66, "grad_norm": 3.087235927581787, "learning_rate": 0.000249243363929546, "loss": 1.1648, "step": 1876 }, { "epoch": 0.66, "grad_norm": 2.866982936859131, "learning_rate": 0.00024920615231952367, "loss": 0.8997, "step": 1877 }, { "epoch": 0.66, "grad_norm": 3.6549713611602783, "learning_rate": 0.0002491689407095013, "loss": 0.7768, "step": 1878 }, { "epoch": 0.66, "grad_norm": 3.923379421234131, "learning_rate": 0.000249131729099479, "loss": 0.7754, "step": 1879 }, { "epoch": 0.66, "grad_norm": 3.286487340927124, "learning_rate": 0.0002490945174894567, "loss": 0.5886, "step": 1880 }, { "epoch": 0.66, "grad_norm": 2.357010841369629, "learning_rate": 0.00024905730587943437, "loss": 0.5951, "step": 1881 }, { "epoch": 0.66, "grad_norm": 1.6229811906814575, "learning_rate": 0.000249020094269412, "loss": 0.4376, "step": 1882 }, { "epoch": 0.66, "grad_norm": 2.4344239234924316, "learning_rate": 0.0002489828826593897, "loss": 0.7466, "step": 1883 }, { "epoch": 0.66, "grad_norm": 6.589191436767578, "learning_rate": 0.00024894567104936737, "loss": 0.6586, "step": 1884 }, { "epoch": 0.66, "grad_norm": 3.2534403800964355, "learning_rate": 0.00024890845943934507, "loss": 0.7062, "step": 1885 }, { "epoch": 0.66, "grad_norm": 2.400862455368042, "learning_rate": 0.0002488712478293227, "loss": 0.306, "step": 1886 }, { "epoch": 0.66, "grad_norm": 2.253848075866699, "learning_rate": 0.0002488340362193004, "loss": 0.6433, "step": 1887 }, { "epoch": 0.66, "grad_norm": 2.553795337677002, "learning_rate": 0.00024879682460927807, "loss": 0.4091, "step": 1888 }, { "epoch": 0.66, "grad_norm": 2.4395694732666016, "learning_rate": 0.0002487596129992557, "loss": 0.5482, "step": 1889 }, { "epoch": 0.66, "grad_norm": 3.8321399688720703, "learning_rate": 0.0002487224013892334, "loss": 0.6352, "step": 1890 }, { "epoch": 0.66, "grad_norm": 2.5882153511047363, "learning_rate": 0.00024868518977921107, "loss": 0.4969, "step": 1891 }, { "epoch": 0.66, "grad_norm": 4.412918567657471, "learning_rate": 0.0002486479781691888, "loss": 1.0272, "step": 1892 }, { "epoch": 0.66, "grad_norm": 5.513781547546387, "learning_rate": 0.0002486107665591665, "loss": 1.2136, "step": 1893 }, { "epoch": 0.66, "grad_norm": 2.6732091903686523, "learning_rate": 0.0002485735549491441, "loss": 0.3256, "step": 1894 }, { "epoch": 0.66, "grad_norm": 3.6457021236419678, "learning_rate": 0.0002485363433391218, "loss": 0.4514, "step": 1895 }, { "epoch": 0.66, "grad_norm": 3.561769485473633, "learning_rate": 0.0002484991317290994, "loss": 0.6833, "step": 1896 }, { "epoch": 0.66, "grad_norm": 2.9853644371032715, "learning_rate": 0.0002484619201190771, "loss": 1.2547, "step": 1897 }, { "epoch": 0.67, "grad_norm": 1.9570162296295166, "learning_rate": 0.00024842470850905483, "loss": 0.2446, "step": 1898 }, { "epoch": 0.67, "grad_norm": 2.1816675662994385, "learning_rate": 0.0002483874968990325, "loss": 0.4237, "step": 1899 }, { "epoch": 0.67, "grad_norm": 8.543338775634766, "learning_rate": 0.0002483502852890101, "loss": 1.1266, "step": 1900 }, { "epoch": 0.67, "eval_loss": 0.6919850707054138, "eval_runtime": 51.2516, "eval_samples_per_second": 42.301, "eval_steps_per_second": 10.575, "eval_wer": 0.6012800553537451, "step": 1900 }, { "epoch": 0.67, "grad_norm": 2.464935302734375, "learning_rate": 0.00024831307367898783, "loss": 0.9893, "step": 1901 }, { "epoch": 0.67, "grad_norm": 2.831908941268921, "learning_rate": 0.0002482758620689655, "loss": 0.9191, "step": 1902 }, { "epoch": 0.67, "grad_norm": 4.71111536026001, "learning_rate": 0.0002482386504589432, "loss": 1.5041, "step": 1903 }, { "epoch": 0.67, "grad_norm": 2.486187219619751, "learning_rate": 0.00024820143884892083, "loss": 0.4669, "step": 1904 }, { "epoch": 0.67, "grad_norm": 2.497227191925049, "learning_rate": 0.00024816422723889853, "loss": 0.8797, "step": 1905 }, { "epoch": 0.67, "grad_norm": 3.937802791595459, "learning_rate": 0.0002481270156288762, "loss": 0.7039, "step": 1906 }, { "epoch": 0.67, "grad_norm": 2.5400185585021973, "learning_rate": 0.00024808980401885383, "loss": 0.7996, "step": 1907 }, { "epoch": 0.67, "grad_norm": 2.81622576713562, "learning_rate": 0.00024805259240883153, "loss": 0.5797, "step": 1908 }, { "epoch": 0.67, "grad_norm": 2.533505916595459, "learning_rate": 0.0002480153807988092, "loss": 0.7682, "step": 1909 }, { "epoch": 0.67, "grad_norm": 2.5406887531280518, "learning_rate": 0.0002479781691887869, "loss": 0.8529, "step": 1910 }, { "epoch": 0.67, "grad_norm": 2.635380268096924, "learning_rate": 0.0002479409575787646, "loss": 0.5315, "step": 1911 }, { "epoch": 0.67, "grad_norm": 3.293522834777832, "learning_rate": 0.00024790374596874224, "loss": 0.762, "step": 1912 }, { "epoch": 0.67, "grad_norm": 3.7569010257720947, "learning_rate": 0.0002478665343587199, "loss": 0.8445, "step": 1913 }, { "epoch": 0.67, "grad_norm": 4.208041667938232, "learning_rate": 0.00024782932274869753, "loss": 0.7308, "step": 1914 }, { "epoch": 0.67, "grad_norm": 2.8499300479888916, "learning_rate": 0.00024779211113867524, "loss": 0.7733, "step": 1915 }, { "epoch": 0.67, "grad_norm": 1.9192874431610107, "learning_rate": 0.00024775489952865294, "loss": 0.3927, "step": 1916 }, { "epoch": 0.67, "grad_norm": 2.4525372982025146, "learning_rate": 0.0002477176879186306, "loss": 0.2387, "step": 1917 }, { "epoch": 0.67, "grad_norm": 2.631014347076416, "learning_rate": 0.0002476804763086083, "loss": 0.7168, "step": 1918 }, { "epoch": 0.67, "grad_norm": 1.9727121591567993, "learning_rate": 0.00024764326469858594, "loss": 0.5055, "step": 1919 }, { "epoch": 0.67, "grad_norm": 8.828970909118652, "learning_rate": 0.0002476060530885636, "loss": 1.4732, "step": 1920 }, { "epoch": 0.67, "grad_norm": 4.060351848602295, "learning_rate": 0.0002475688414785413, "loss": 1.418, "step": 1921 }, { "epoch": 0.67, "grad_norm": 2.0158917903900146, "learning_rate": 0.00024753162986851894, "loss": 0.2002, "step": 1922 }, { "epoch": 0.67, "grad_norm": 3.4808578491210938, "learning_rate": 0.00024749441825849664, "loss": 0.5887, "step": 1923 }, { "epoch": 0.67, "grad_norm": 5.383602142333984, "learning_rate": 0.0002474572066484743, "loss": 1.5568, "step": 1924 }, { "epoch": 0.67, "grad_norm": 2.389026165008545, "learning_rate": 0.000247419995038452, "loss": 0.3765, "step": 1925 }, { "epoch": 0.67, "grad_norm": 2.9787375926971436, "learning_rate": 0.00024738278342842964, "loss": 0.9215, "step": 1926 }, { "epoch": 0.68, "grad_norm": 1.7015265226364136, "learning_rate": 0.0002473455718184073, "loss": 0.5214, "step": 1927 }, { "epoch": 0.68, "grad_norm": 1.7144050598144531, "learning_rate": 0.000247308360208385, "loss": 0.6626, "step": 1928 }, { "epoch": 0.68, "grad_norm": 1.8831634521484375, "learning_rate": 0.0002472711485983627, "loss": 0.7753, "step": 1929 }, { "epoch": 0.68, "grad_norm": 2.180262565612793, "learning_rate": 0.00024723393698834035, "loss": 0.5357, "step": 1930 }, { "epoch": 0.68, "grad_norm": 2.9402575492858887, "learning_rate": 0.000247196725378318, "loss": 0.4598, "step": 1931 }, { "epoch": 0.68, "grad_norm": 2.6572482585906982, "learning_rate": 0.0002471595137682957, "loss": 0.5624, "step": 1932 }, { "epoch": 0.68, "grad_norm": 2.5817763805389404, "learning_rate": 0.00024712230215827335, "loss": 0.6215, "step": 1933 }, { "epoch": 0.68, "grad_norm": 3.858384132385254, "learning_rate": 0.00024708509054825105, "loss": 0.879, "step": 1934 }, { "epoch": 0.68, "grad_norm": 2.5925073623657227, "learning_rate": 0.0002470478789382287, "loss": 0.7597, "step": 1935 }, { "epoch": 0.68, "grad_norm": 1.7075647115707397, "learning_rate": 0.0002470106673282064, "loss": 0.3585, "step": 1936 }, { "epoch": 0.68, "grad_norm": 2.2654693126678467, "learning_rate": 0.00024697345571818405, "loss": 0.4981, "step": 1937 }, { "epoch": 0.68, "grad_norm": 2.9442179203033447, "learning_rate": 0.0002469362441081617, "loss": 0.5214, "step": 1938 }, { "epoch": 0.68, "grad_norm": 2.89056396484375, "learning_rate": 0.0002468990324981394, "loss": 0.4043, "step": 1939 }, { "epoch": 0.68, "grad_norm": 2.2005202770233154, "learning_rate": 0.00024686182088811705, "loss": 0.2301, "step": 1940 }, { "epoch": 0.68, "grad_norm": 4.534944534301758, "learning_rate": 0.00024682460927809475, "loss": 0.8182, "step": 1941 }, { "epoch": 0.68, "grad_norm": 3.317547082901001, "learning_rate": 0.0002467873976680724, "loss": 0.8466, "step": 1942 }, { "epoch": 0.68, "grad_norm": 3.431687593460083, "learning_rate": 0.0002467501860580501, "loss": 0.4799, "step": 1943 }, { "epoch": 0.68, "grad_norm": 2.7903997898101807, "learning_rate": 0.00024671297444802775, "loss": 0.4264, "step": 1944 }, { "epoch": 0.68, "grad_norm": 2.921651840209961, "learning_rate": 0.0002466757628380054, "loss": 0.3343, "step": 1945 }, { "epoch": 0.68, "grad_norm": 4.756716728210449, "learning_rate": 0.0002466385512279831, "loss": 1.4059, "step": 1946 }, { "epoch": 0.68, "grad_norm": 2.8157451152801514, "learning_rate": 0.0002466013396179608, "loss": 0.2149, "step": 1947 }, { "epoch": 0.68, "grad_norm": 4.292349338531494, "learning_rate": 0.00024656412800793846, "loss": 0.6947, "step": 1948 }, { "epoch": 0.68, "grad_norm": 5.523711204528809, "learning_rate": 0.0002465269163979161, "loss": 1.8716, "step": 1949 }, { "epoch": 0.68, "grad_norm": 5.184019088745117, "learning_rate": 0.0002464897047878938, "loss": 1.5156, "step": 1950 }, { "epoch": 0.68, "grad_norm": 9.27737808227539, "learning_rate": 0.00024645249317787146, "loss": 1.8832, "step": 1951 }, { "epoch": 0.68, "grad_norm": 11.288307189941406, "learning_rate": 0.00024641528156784916, "loss": 1.2195, "step": 1952 }, { "epoch": 0.68, "grad_norm": 6.941704273223877, "learning_rate": 0.0002463780699578268, "loss": 1.0644, "step": 1953 }, { "epoch": 0.68, "grad_norm": 5.17105770111084, "learning_rate": 0.0002463408583478045, "loss": 1.0977, "step": 1954 }, { "epoch": 0.69, "grad_norm": 2.853844165802002, "learning_rate": 0.00024630364673778216, "loss": 0.734, "step": 1955 }, { "epoch": 0.69, "grad_norm": 3.7086217403411865, "learning_rate": 0.0002462664351277598, "loss": 0.9751, "step": 1956 }, { "epoch": 0.69, "grad_norm": 2.8849620819091797, "learning_rate": 0.0002462292235177375, "loss": 0.6382, "step": 1957 }, { "epoch": 0.69, "grad_norm": 2.1819937229156494, "learning_rate": 0.00024619201190771516, "loss": 0.5407, "step": 1958 }, { "epoch": 0.69, "grad_norm": 3.5230162143707275, "learning_rate": 0.00024615480029769286, "loss": 1.6646, "step": 1959 }, { "epoch": 0.69, "grad_norm": 2.293956995010376, "learning_rate": 0.00024611758868767057, "loss": 0.5963, "step": 1960 }, { "epoch": 0.69, "grad_norm": 2.532461643218994, "learning_rate": 0.0002460803770776482, "loss": 0.7947, "step": 1961 }, { "epoch": 0.69, "grad_norm": 2.1214241981506348, "learning_rate": 0.00024604316546762586, "loss": 0.6076, "step": 1962 }, { "epoch": 0.69, "grad_norm": 3.7289674282073975, "learning_rate": 0.00024600595385760357, "loss": 0.9689, "step": 1963 }, { "epoch": 0.69, "grad_norm": 2.5347867012023926, "learning_rate": 0.0002459687422475812, "loss": 0.3691, "step": 1964 }, { "epoch": 0.69, "grad_norm": 1.878653645515442, "learning_rate": 0.0002459315306375589, "loss": 0.3384, "step": 1965 }, { "epoch": 0.69, "grad_norm": 1.7828047275543213, "learning_rate": 0.00024589431902753657, "loss": 0.6173, "step": 1966 }, { "epoch": 0.69, "grad_norm": 2.254936456680298, "learning_rate": 0.00024585710741751427, "loss": 0.8577, "step": 1967 }, { "epoch": 0.69, "grad_norm": 1.9848906993865967, "learning_rate": 0.0002458198958074919, "loss": 0.2193, "step": 1968 }, { "epoch": 0.69, "grad_norm": 3.952481985092163, "learning_rate": 0.00024578268419746957, "loss": 1.0369, "step": 1969 }, { "epoch": 0.69, "grad_norm": 3.548969030380249, "learning_rate": 0.00024574547258744727, "loss": 0.749, "step": 1970 }, { "epoch": 0.69, "grad_norm": 2.2937052249908447, "learning_rate": 0.0002457082609774249, "loss": 0.4634, "step": 1971 }, { "epoch": 0.69, "grad_norm": 2.3762097358703613, "learning_rate": 0.0002456710493674026, "loss": 0.2366, "step": 1972 }, { "epoch": 0.69, "grad_norm": 5.370176792144775, "learning_rate": 0.00024563383775738027, "loss": 1.8996, "step": 1973 }, { "epoch": 0.69, "grad_norm": 3.1844329833984375, "learning_rate": 0.0002455966261473579, "loss": 0.3431, "step": 1974 }, { "epoch": 0.69, "grad_norm": 2.5123300552368164, "learning_rate": 0.0002455594145373356, "loss": 0.2331, "step": 1975 }, { "epoch": 0.69, "grad_norm": 2.754368782043457, "learning_rate": 0.0002455222029273133, "loss": 1.245, "step": 1976 }, { "epoch": 0.69, "grad_norm": 3.0486223697662354, "learning_rate": 0.000245484991317291, "loss": 0.6925, "step": 1977 }, { "epoch": 0.69, "grad_norm": 2.9800477027893066, "learning_rate": 0.0002454477797072687, "loss": 1.1662, "step": 1978 }, { "epoch": 0.69, "grad_norm": 1.9747953414916992, "learning_rate": 0.0002454105680972463, "loss": 0.3735, "step": 1979 }, { "epoch": 0.69, "grad_norm": 2.3765416145324707, "learning_rate": 0.000245373356487224, "loss": 0.7723, "step": 1980 }, { "epoch": 0.69, "grad_norm": 3.1413557529449463, "learning_rate": 0.0002453361448772017, "loss": 0.666, "step": 1981 }, { "epoch": 0.69, "grad_norm": 2.1634583473205566, "learning_rate": 0.0002452989332671793, "loss": 0.3816, "step": 1982 }, { "epoch": 0.69, "grad_norm": 1.8471031188964844, "learning_rate": 0.00024526172165715703, "loss": 0.6517, "step": 1983 }, { "epoch": 0.7, "grad_norm": 3.5720808506011963, "learning_rate": 0.0002452245100471347, "loss": 0.4428, "step": 1984 }, { "epoch": 0.7, "grad_norm": 2.9162683486938477, "learning_rate": 0.0002451872984371124, "loss": 0.4646, "step": 1985 }, { "epoch": 0.7, "grad_norm": 3.0276424884796143, "learning_rate": 0.00024515008682709003, "loss": 0.4527, "step": 1986 }, { "epoch": 0.7, "grad_norm": 3.1987595558166504, "learning_rate": 0.0002451128752170677, "loss": 0.631, "step": 1987 }, { "epoch": 0.7, "grad_norm": 1.891221284866333, "learning_rate": 0.0002450756636070454, "loss": 0.4301, "step": 1988 }, { "epoch": 0.7, "grad_norm": 2.999260425567627, "learning_rate": 0.00024503845199702303, "loss": 0.7106, "step": 1989 }, { "epoch": 0.7, "grad_norm": 4.418884754180908, "learning_rate": 0.00024500124038700073, "loss": 0.8794, "step": 1990 }, { "epoch": 0.7, "grad_norm": 2.5227293968200684, "learning_rate": 0.0002449640287769784, "loss": 0.6786, "step": 1991 }, { "epoch": 0.7, "grad_norm": 3.8759443759918213, "learning_rate": 0.0002449268171669561, "loss": 0.9838, "step": 1992 }, { "epoch": 0.7, "grad_norm": 5.6880059242248535, "learning_rate": 0.00024488960555693373, "loss": 1.1084, "step": 1993 }, { "epoch": 0.7, "grad_norm": 5.6343183517456055, "learning_rate": 0.00024485239394691143, "loss": 0.8567, "step": 1994 }, { "epoch": 0.7, "grad_norm": 2.4194846153259277, "learning_rate": 0.0002448151823368891, "loss": 0.2741, "step": 1995 }, { "epoch": 0.7, "grad_norm": 2.982990264892578, "learning_rate": 0.0002447779707268668, "loss": 0.3463, "step": 1996 }, { "epoch": 0.7, "grad_norm": 3.392058849334717, "learning_rate": 0.00024474075911684444, "loss": 0.6745, "step": 1997 }, { "epoch": 0.7, "grad_norm": 4.701278209686279, "learning_rate": 0.0002447035475068221, "loss": 1.1925, "step": 1998 }, { "epoch": 0.7, "grad_norm": 3.1057417392730713, "learning_rate": 0.0002446663358967998, "loss": 0.225, "step": 1999 }, { "epoch": 0.7, "grad_norm": 2.4700098037719727, "learning_rate": 0.00024462912428677744, "loss": 0.2833, "step": 2000 }, { "epoch": 0.7, "eval_loss": 0.8290925025939941, "eval_runtime": 50.9678, "eval_samples_per_second": 42.537, "eval_steps_per_second": 10.634, "eval_wer": 0.6420169520844144, "step": 2000 }, { "epoch": 0.7, "grad_norm": 7.508627414703369, "learning_rate": 0.00024459191267675514, "loss": 1.6429, "step": 2001 }, { "epoch": 0.7, "grad_norm": 3.5678176879882812, "learning_rate": 0.0002445547010667328, "loss": 1.0799, "step": 2002 }, { "epoch": 0.7, "grad_norm": 2.7853446006774902, "learning_rate": 0.0002445174894567105, "loss": 0.7929, "step": 2003 }, { "epoch": 0.7, "grad_norm": 2.158714532852173, "learning_rate": 0.00024448027784668814, "loss": 0.6522, "step": 2004 }, { "epoch": 0.7, "grad_norm": 1.3752233982086182, "learning_rate": 0.0002444430662366658, "loss": 0.407, "step": 2005 }, { "epoch": 0.7, "grad_norm": 2.856546401977539, "learning_rate": 0.0002444058546266435, "loss": 0.649, "step": 2006 }, { "epoch": 0.7, "grad_norm": 2.812596321105957, "learning_rate": 0.0002443686430166212, "loss": 0.4625, "step": 2007 }, { "epoch": 0.7, "grad_norm": 2.5984840393066406, "learning_rate": 0.00024433143140659884, "loss": 0.3318, "step": 2008 }, { "epoch": 0.7, "grad_norm": 2.0432894229888916, "learning_rate": 0.00024429421979657654, "loss": 0.3098, "step": 2009 }, { "epoch": 0.7, "grad_norm": 2.646289587020874, "learning_rate": 0.0002442570081865542, "loss": 0.4007, "step": 2010 }, { "epoch": 0.7, "grad_norm": 2.5235435962677, "learning_rate": 0.00024421979657653184, "loss": 0.3968, "step": 2011 }, { "epoch": 0.7, "grad_norm": 4.276270866394043, "learning_rate": 0.00024418258496650954, "loss": 1.1839, "step": 2012 }, { "epoch": 0.71, "grad_norm": 3.5912859439849854, "learning_rate": 0.0002441453733564872, "loss": 0.5202, "step": 2013 }, { "epoch": 0.71, "grad_norm": 4.616440296173096, "learning_rate": 0.00024410816174646487, "loss": 1.8386, "step": 2014 }, { "epoch": 0.71, "grad_norm": 4.167287349700928, "learning_rate": 0.00024407095013644255, "loss": 0.7833, "step": 2015 }, { "epoch": 0.71, "grad_norm": 3.9030117988586426, "learning_rate": 0.00024403373852642022, "loss": 0.9212, "step": 2016 }, { "epoch": 0.71, "grad_norm": 5.886682987213135, "learning_rate": 0.0002439965269163979, "loss": 0.3937, "step": 2017 }, { "epoch": 0.71, "grad_norm": 2.818249464035034, "learning_rate": 0.00024395931530637555, "loss": 0.305, "step": 2018 }, { "epoch": 0.71, "grad_norm": 4.023711204528809, "learning_rate": 0.00024392210369635325, "loss": 0.5857, "step": 2019 }, { "epoch": 0.71, "grad_norm": 3.1700754165649414, "learning_rate": 0.00024388489208633092, "loss": 0.5954, "step": 2020 }, { "epoch": 0.71, "grad_norm": 4.295510768890381, "learning_rate": 0.00024384768047630857, "loss": 0.742, "step": 2021 }, { "epoch": 0.71, "grad_norm": 4.545529365539551, "learning_rate": 0.00024381046886628628, "loss": 0.9577, "step": 2022 }, { "epoch": 0.71, "grad_norm": 2.260254383087158, "learning_rate": 0.00024377325725626392, "loss": 0.2636, "step": 2023 }, { "epoch": 0.71, "grad_norm": 2.4573659896850586, "learning_rate": 0.0002437360456462416, "loss": 0.313, "step": 2024 }, { "epoch": 0.71, "grad_norm": 4.888119220733643, "learning_rate": 0.0002436988340362193, "loss": 0.3761, "step": 2025 }, { "epoch": 0.71, "grad_norm": 3.143555164337158, "learning_rate": 0.00024366162242619695, "loss": 1.1434, "step": 2026 }, { "epoch": 0.71, "grad_norm": 2.872159719467163, "learning_rate": 0.00024362441081617463, "loss": 0.8784, "step": 2027 }, { "epoch": 0.71, "grad_norm": 15.59583568572998, "learning_rate": 0.00024358719920615228, "loss": 6.3413, "step": 2028 }, { "epoch": 0.71, "grad_norm": 3.5535459518432617, "learning_rate": 0.00024354998759612998, "loss": 0.6744, "step": 2029 }, { "epoch": 0.71, "grad_norm": 3.3724849224090576, "learning_rate": 0.00024351277598610766, "loss": 0.6494, "step": 2030 }, { "epoch": 0.71, "grad_norm": 12.914846420288086, "learning_rate": 0.0002434755643760853, "loss": 4.2932, "step": 2031 }, { "epoch": 0.71, "grad_norm": 2.6765973567962646, "learning_rate": 0.000243438352766063, "loss": 0.3652, "step": 2032 }, { "epoch": 0.71, "grad_norm": 3.295680284500122, "learning_rate": 0.00024340114115604068, "loss": 0.6002, "step": 2033 }, { "epoch": 0.71, "grad_norm": 2.5459048748016357, "learning_rate": 0.00024336392954601833, "loss": 0.4525, "step": 2034 }, { "epoch": 0.71, "grad_norm": 3.9346370697021484, "learning_rate": 0.000243326717935996, "loss": 0.4579, "step": 2035 }, { "epoch": 0.71, "grad_norm": 3.241410732269287, "learning_rate": 0.00024328950632597368, "loss": 0.7072, "step": 2036 }, { "epoch": 0.71, "grad_norm": 1.559287667274475, "learning_rate": 0.00024325229471595136, "loss": 0.2728, "step": 2037 }, { "epoch": 0.71, "grad_norm": 2.5284101963043213, "learning_rate": 0.00024321508310592903, "loss": 0.5484, "step": 2038 }, { "epoch": 0.71, "grad_norm": 3.7311956882476807, "learning_rate": 0.00024317787149590668, "loss": 0.8191, "step": 2039 }, { "epoch": 0.71, "grad_norm": 12.553634643554688, "learning_rate": 0.00024314065988588439, "loss": 1.2444, "step": 2040 }, { "epoch": 0.72, "grad_norm": 6.129791259765625, "learning_rate": 0.00024310344827586203, "loss": 0.8698, "step": 2041 }, { "epoch": 0.72, "grad_norm": 2.7819488048553467, "learning_rate": 0.0002430662366658397, "loss": 0.4961, "step": 2042 }, { "epoch": 0.72, "grad_norm": 3.355811357498169, "learning_rate": 0.0002430290250558174, "loss": 0.5451, "step": 2043 }, { "epoch": 0.72, "grad_norm": 3.024493932723999, "learning_rate": 0.00024299181344579506, "loss": 0.7597, "step": 2044 }, { "epoch": 0.72, "grad_norm": 3.637416124343872, "learning_rate": 0.00024295460183577274, "loss": 0.7338, "step": 2045 }, { "epoch": 0.72, "grad_norm": 2.891045570373535, "learning_rate": 0.0002429173902257504, "loss": 0.3468, "step": 2046 }, { "epoch": 0.72, "grad_norm": 5.023557186126709, "learning_rate": 0.0002428801786157281, "loss": 0.424, "step": 2047 }, { "epoch": 0.72, "grad_norm": 3.320523262023926, "learning_rate": 0.00024284296700570577, "loss": 0.3529, "step": 2048 }, { "epoch": 0.72, "grad_norm": 7.243979454040527, "learning_rate": 0.00024280575539568341, "loss": 2.7054, "step": 2049 }, { "epoch": 0.72, "grad_norm": 1.9604123830795288, "learning_rate": 0.00024276854378566112, "loss": 0.2114, "step": 2050 }, { "epoch": 0.72, "grad_norm": 4.573541164398193, "learning_rate": 0.0002427313321756388, "loss": 1.3774, "step": 2051 }, { "epoch": 0.72, "grad_norm": 2.40055251121521, "learning_rate": 0.00024269412056561644, "loss": 0.8643, "step": 2052 }, { "epoch": 0.72, "grad_norm": 3.0245659351348877, "learning_rate": 0.00024265690895559414, "loss": 0.4089, "step": 2053 }, { "epoch": 0.72, "grad_norm": 3.8446860313415527, "learning_rate": 0.0002426196973455718, "loss": 0.8581, "step": 2054 }, { "epoch": 0.72, "grad_norm": 2.76045823097229, "learning_rate": 0.00024258248573554947, "loss": 0.4217, "step": 2055 }, { "epoch": 0.72, "grad_norm": 2.384861707687378, "learning_rate": 0.00024254527412552714, "loss": 0.5413, "step": 2056 }, { "epoch": 0.72, "grad_norm": 1.9222930669784546, "learning_rate": 0.00024250806251550482, "loss": 0.3125, "step": 2057 }, { "epoch": 0.72, "grad_norm": 3.540152072906494, "learning_rate": 0.0002424708509054825, "loss": 1.8312, "step": 2058 }, { "epoch": 0.72, "grad_norm": 2.780682325363159, "learning_rate": 0.00024243363929546014, "loss": 0.6677, "step": 2059 }, { "epoch": 0.72, "grad_norm": 3.9040141105651855, "learning_rate": 0.00024239642768543782, "loss": 0.7344, "step": 2060 }, { "epoch": 0.72, "grad_norm": 4.254075050354004, "learning_rate": 0.00024235921607541552, "loss": 0.9292, "step": 2061 }, { "epoch": 0.72, "grad_norm": 3.1678977012634277, "learning_rate": 0.00024232200446539317, "loss": 1.1215, "step": 2062 }, { "epoch": 0.72, "grad_norm": 2.6753976345062256, "learning_rate": 0.00024228479285537085, "loss": 0.4991, "step": 2063 }, { "epoch": 0.72, "grad_norm": 4.377896308898926, "learning_rate": 0.00024224758124534855, "loss": 0.5978, "step": 2064 }, { "epoch": 0.72, "grad_norm": 3.7869486808776855, "learning_rate": 0.0002422103696353262, "loss": 0.8578, "step": 2065 }, { "epoch": 0.72, "grad_norm": 3.29856014251709, "learning_rate": 0.00024217315802530388, "loss": 1.6217, "step": 2066 }, { "epoch": 0.72, "grad_norm": 3.67818021774292, "learning_rate": 0.00024213594641528152, "loss": 0.5943, "step": 2067 }, { "epoch": 0.72, "grad_norm": 4.469092845916748, "learning_rate": 0.00024209873480525923, "loss": 1.1634, "step": 2068 }, { "epoch": 0.72, "grad_norm": 2.7627439498901367, "learning_rate": 0.0002420615231952369, "loss": 0.591, "step": 2069 }, { "epoch": 0.73, "grad_norm": 2.3488175868988037, "learning_rate": 0.00024202431158521455, "loss": 0.2379, "step": 2070 }, { "epoch": 0.73, "grad_norm": 3.005652904510498, "learning_rate": 0.00024198709997519225, "loss": 0.2445, "step": 2071 }, { "epoch": 0.73, "grad_norm": 4.345755100250244, "learning_rate": 0.0002419498883651699, "loss": 0.7675, "step": 2072 }, { "epoch": 0.73, "grad_norm": 7.79234504699707, "learning_rate": 0.00024191267675514758, "loss": 0.6093, "step": 2073 }, { "epoch": 0.73, "grad_norm": 6.379326820373535, "learning_rate": 0.00024187546514512528, "loss": 1.4354, "step": 2074 }, { "epoch": 0.73, "grad_norm": 3.6521170139312744, "learning_rate": 0.00024183825353510293, "loss": 0.7191, "step": 2075 }, { "epoch": 0.73, "grad_norm": 3.690326452255249, "learning_rate": 0.0002418010419250806, "loss": 1.3118, "step": 2076 }, { "epoch": 0.73, "grad_norm": 4.035961627960205, "learning_rate": 0.00024176383031505828, "loss": 1.0016, "step": 2077 }, { "epoch": 0.73, "grad_norm": 4.05783748626709, "learning_rate": 0.00024172661870503596, "loss": 0.6541, "step": 2078 }, { "epoch": 0.73, "grad_norm": 1.9695038795471191, "learning_rate": 0.00024168940709501363, "loss": 0.4606, "step": 2079 }, { "epoch": 0.73, "grad_norm": 1.9987822771072388, "learning_rate": 0.00024165219548499128, "loss": 0.6701, "step": 2080 }, { "epoch": 0.73, "grad_norm": 2.004821538925171, "learning_rate": 0.00024161498387496896, "loss": 0.4184, "step": 2081 }, { "epoch": 0.73, "grad_norm": 2.6122078895568848, "learning_rate": 0.00024157777226494666, "loss": 0.3568, "step": 2082 }, { "epoch": 0.73, "grad_norm": 3.2118942737579346, "learning_rate": 0.0002415405606549243, "loss": 0.4723, "step": 2083 }, { "epoch": 0.73, "grad_norm": 2.2981133460998535, "learning_rate": 0.00024150334904490199, "loss": 0.5278, "step": 2084 }, { "epoch": 0.73, "grad_norm": 3.0347321033477783, "learning_rate": 0.00024146613743487963, "loss": 0.8326, "step": 2085 }, { "epoch": 0.73, "grad_norm": 3.122243881225586, "learning_rate": 0.00024142892582485734, "loss": 0.6792, "step": 2086 }, { "epoch": 0.73, "grad_norm": 3.2204928398132324, "learning_rate": 0.000241391714214835, "loss": 0.6001, "step": 2087 }, { "epoch": 0.73, "grad_norm": 4.347808837890625, "learning_rate": 0.00024135450260481266, "loss": 1.9925, "step": 2088 }, { "epoch": 0.73, "grad_norm": 2.3485236167907715, "learning_rate": 0.00024131729099479036, "loss": 0.3445, "step": 2089 }, { "epoch": 0.73, "grad_norm": 2.8277785778045654, "learning_rate": 0.000241280079384768, "loss": 0.5412, "step": 2090 }, { "epoch": 0.73, "grad_norm": 1.600690484046936, "learning_rate": 0.0002412428677747457, "loss": 0.1389, "step": 2091 }, { "epoch": 0.73, "grad_norm": 1.351870059967041, "learning_rate": 0.0002412056561647234, "loss": 0.1752, "step": 2092 }, { "epoch": 0.73, "grad_norm": 3.944504499435425, "learning_rate": 0.00024116844455470104, "loss": 0.5421, "step": 2093 }, { "epoch": 0.73, "grad_norm": 2.7904212474823, "learning_rate": 0.00024113123294467872, "loss": 0.481, "step": 2094 }, { "epoch": 0.73, "grad_norm": 4.337879180908203, "learning_rate": 0.00024109402133465642, "loss": 1.1637, "step": 2095 }, { "epoch": 0.73, "grad_norm": 3.739457607269287, "learning_rate": 0.00024105680972463407, "loss": 0.6533, "step": 2096 }, { "epoch": 0.73, "grad_norm": 1.9053199291229248, "learning_rate": 0.00024101959811461174, "loss": 0.326, "step": 2097 }, { "epoch": 0.74, "grad_norm": 10.310988426208496, "learning_rate": 0.0002409823865045894, "loss": 1.6222, "step": 2098 }, { "epoch": 0.74, "grad_norm": 2.2947075366973877, "learning_rate": 0.0002409451748945671, "loss": 0.2548, "step": 2099 }, { "epoch": 0.74, "grad_norm": 3.7225122451782227, "learning_rate": 0.00024090796328454477, "loss": 0.2533, "step": 2100 }, { "epoch": 0.74, "eval_loss": 0.7072895169258118, "eval_runtime": 50.5869, "eval_samples_per_second": 42.857, "eval_steps_per_second": 10.714, "eval_wer": 0.5960906417574814, "step": 2100 }, { "epoch": 0.74, "grad_norm": 4.087221622467041, "learning_rate": 0.00024087075167452242, "loss": 0.9829, "step": 2101 }, { "epoch": 0.74, "grad_norm": 5.295423984527588, "learning_rate": 0.0002408335400645001, "loss": 1.442, "step": 2102 }, { "epoch": 0.74, "grad_norm": 1.7258617877960205, "learning_rate": 0.00024079632845447777, "loss": 0.4635, "step": 2103 }, { "epoch": 0.74, "grad_norm": 2.04913067817688, "learning_rate": 0.00024075911684445545, "loss": 0.462, "step": 2104 }, { "epoch": 0.74, "grad_norm": 2.6161487102508545, "learning_rate": 0.00024072190523443312, "loss": 0.6827, "step": 2105 }, { "epoch": 0.74, "grad_norm": 3.9730608463287354, "learning_rate": 0.00024068469362441077, "loss": 0.9988, "step": 2106 }, { "epoch": 0.74, "grad_norm": 2.8048250675201416, "learning_rate": 0.00024064748201438847, "loss": 1.0969, "step": 2107 }, { "epoch": 0.74, "grad_norm": 2.332301616668701, "learning_rate": 0.00024061027040436615, "loss": 0.4455, "step": 2108 }, { "epoch": 0.74, "grad_norm": 3.4177818298339844, "learning_rate": 0.0002405730587943438, "loss": 2.1572, "step": 2109 }, { "epoch": 0.74, "grad_norm": 5.298447132110596, "learning_rate": 0.0002405358471843215, "loss": 0.3914, "step": 2110 }, { "epoch": 0.74, "grad_norm": 2.914675235748291, "learning_rate": 0.00024049863557429915, "loss": 1.0013, "step": 2111 }, { "epoch": 0.74, "grad_norm": 4.703636646270752, "learning_rate": 0.00024046142396427683, "loss": 0.4293, "step": 2112 }, { "epoch": 0.74, "grad_norm": 2.7955198287963867, "learning_rate": 0.00024042421235425453, "loss": 0.5958, "step": 2113 }, { "epoch": 0.74, "grad_norm": 2.913386583328247, "learning_rate": 0.00024038700074423218, "loss": 0.7706, "step": 2114 }, { "epoch": 0.74, "grad_norm": 3.9934210777282715, "learning_rate": 0.00024034978913420985, "loss": 0.4863, "step": 2115 }, { "epoch": 0.74, "grad_norm": 1.5534229278564453, "learning_rate": 0.0002403125775241875, "loss": 0.2289, "step": 2116 }, { "epoch": 0.74, "grad_norm": 3.206246852874756, "learning_rate": 0.0002402753659141652, "loss": 1.0357, "step": 2117 }, { "epoch": 0.74, "grad_norm": 3.4681451320648193, "learning_rate": 0.00024023815430414288, "loss": 0.9145, "step": 2118 }, { "epoch": 0.74, "grad_norm": 3.2907421588897705, "learning_rate": 0.00024020094269412053, "loss": 0.6565, "step": 2119 }, { "epoch": 0.74, "grad_norm": 2.354326009750366, "learning_rate": 0.00024016373108409823, "loss": 0.5887, "step": 2120 }, { "epoch": 0.74, "grad_norm": 3.702387571334839, "learning_rate": 0.0002401265194740759, "loss": 0.4698, "step": 2121 }, { "epoch": 0.74, "grad_norm": 2.9041430950164795, "learning_rate": 0.00024008930786405356, "loss": 0.5025, "step": 2122 }, { "epoch": 0.74, "grad_norm": 2.8005261421203613, "learning_rate": 0.00024005209625403123, "loss": 0.4798, "step": 2123 }, { "epoch": 0.74, "grad_norm": 2.7690351009368896, "learning_rate": 0.0002400148846440089, "loss": 0.2449, "step": 2124 }, { "epoch": 0.74, "grad_norm": 5.324347496032715, "learning_rate": 0.00023997767303398658, "loss": 1.9174, "step": 2125 }, { "epoch": 0.74, "grad_norm": 2.6067378520965576, "learning_rate": 0.00023994046142396426, "loss": 0.6974, "step": 2126 }, { "epoch": 0.75, "grad_norm": 2.2820823192596436, "learning_rate": 0.0002399032498139419, "loss": 0.3203, "step": 2127 }, { "epoch": 0.75, "grad_norm": 1.06552255153656, "learning_rate": 0.0002398660382039196, "loss": 0.2081, "step": 2128 }, { "epoch": 0.75, "grad_norm": 2.3411970138549805, "learning_rate": 0.00023982882659389726, "loss": 0.6395, "step": 2129 }, { "epoch": 0.75, "grad_norm": 2.0925540924072266, "learning_rate": 0.00023979161498387494, "loss": 0.6678, "step": 2130 }, { "epoch": 0.75, "grad_norm": 2.3923542499542236, "learning_rate": 0.00023975440337385264, "loss": 0.5511, "step": 2131 }, { "epoch": 0.75, "grad_norm": 2.139909267425537, "learning_rate": 0.0002397171917638303, "loss": 0.4857, "step": 2132 }, { "epoch": 0.75, "grad_norm": 5.286214828491211, "learning_rate": 0.00023967998015380796, "loss": 0.7586, "step": 2133 }, { "epoch": 0.75, "grad_norm": 6.681207656860352, "learning_rate": 0.0002396427685437856, "loss": 0.7079, "step": 2134 }, { "epoch": 0.75, "grad_norm": 3.236323595046997, "learning_rate": 0.00023960555693376332, "loss": 0.6949, "step": 2135 }, { "epoch": 0.75, "grad_norm": 2.150146722793579, "learning_rate": 0.000239568345323741, "loss": 0.7396, "step": 2136 }, { "epoch": 0.75, "grad_norm": 2.668715715408325, "learning_rate": 0.00023953113371371864, "loss": 0.7919, "step": 2137 }, { "epoch": 0.75, "grad_norm": 2.448146104812622, "learning_rate": 0.00023949392210369634, "loss": 0.4682, "step": 2138 }, { "epoch": 0.75, "grad_norm": 2.377995014190674, "learning_rate": 0.00023945671049367402, "loss": 0.6805, "step": 2139 }, { "epoch": 0.75, "grad_norm": 2.0972490310668945, "learning_rate": 0.00023941949888365167, "loss": 0.388, "step": 2140 }, { "epoch": 0.75, "grad_norm": 4.356851100921631, "learning_rate": 0.00023938228727362937, "loss": 0.9222, "step": 2141 }, { "epoch": 0.75, "grad_norm": 3.6163222789764404, "learning_rate": 0.00023934507566360702, "loss": 0.3178, "step": 2142 }, { "epoch": 0.75, "grad_norm": 3.244340181350708, "learning_rate": 0.0002393078640535847, "loss": 0.414, "step": 2143 }, { "epoch": 0.75, "grad_norm": 2.176497220993042, "learning_rate": 0.00023927065244356237, "loss": 0.3438, "step": 2144 }, { "epoch": 0.75, "grad_norm": 3.1383144855499268, "learning_rate": 0.00023923344083354005, "loss": 0.3254, "step": 2145 }, { "epoch": 0.75, "grad_norm": 2.8384389877319336, "learning_rate": 0.00023919622922351772, "loss": 0.3931, "step": 2146 }, { "epoch": 0.75, "grad_norm": 4.945198059082031, "learning_rate": 0.00023915901761349537, "loss": 0.6616, "step": 2147 }, { "epoch": 0.75, "grad_norm": 2.2696187496185303, "learning_rate": 0.00023912180600347305, "loss": 0.1269, "step": 2148 }, { "epoch": 0.75, "grad_norm": 4.868678092956543, "learning_rate": 0.00023908459439345075, "loss": 1.2393, "step": 2149 }, { "epoch": 0.75, "grad_norm": 4.27547025680542, "learning_rate": 0.0002390473827834284, "loss": 0.3182, "step": 2150 }, { "epoch": 0.75, "grad_norm": 4.301406383514404, "learning_rate": 0.00023901017117340607, "loss": 1.3776, "step": 2151 }, { "epoch": 0.75, "grad_norm": 2.5143606662750244, "learning_rate": 0.00023897295956338378, "loss": 0.5273, "step": 2152 }, { "epoch": 0.75, "grad_norm": 2.4446358680725098, "learning_rate": 0.00023893574795336143, "loss": 0.7657, "step": 2153 }, { "epoch": 0.75, "grad_norm": 2.8754398822784424, "learning_rate": 0.0002388985363433391, "loss": 0.8753, "step": 2154 }, { "epoch": 0.76, "grad_norm": 3.9459950923919678, "learning_rate": 0.00023886132473331675, "loss": 1.1413, "step": 2155 }, { "epoch": 0.76, "grad_norm": 4.867654800415039, "learning_rate": 0.00023882411312329445, "loss": 0.612, "step": 2156 }, { "epoch": 0.76, "grad_norm": 2.5319912433624268, "learning_rate": 0.00023878690151327213, "loss": 0.633, "step": 2157 }, { "epoch": 0.76, "grad_norm": 3.040459632873535, "learning_rate": 0.00023874968990324978, "loss": 0.3897, "step": 2158 }, { "epoch": 0.76, "grad_norm": 3.5026168823242188, "learning_rate": 0.00023871247829322748, "loss": 0.6734, "step": 2159 }, { "epoch": 0.76, "grad_norm": 2.2834792137145996, "learning_rate": 0.00023867526668320513, "loss": 0.4584, "step": 2160 }, { "epoch": 0.76, "grad_norm": 1.716819167137146, "learning_rate": 0.0002386380550731828, "loss": 0.4489, "step": 2161 }, { "epoch": 0.76, "grad_norm": 3.5152437686920166, "learning_rate": 0.0002386008434631605, "loss": 1.6475, "step": 2162 }, { "epoch": 0.76, "grad_norm": 2.483433961868286, "learning_rate": 0.00023856363185313816, "loss": 0.2928, "step": 2163 }, { "epoch": 0.76, "grad_norm": 3.2262301445007324, "learning_rate": 0.00023852642024311583, "loss": 0.5461, "step": 2164 }, { "epoch": 0.76, "grad_norm": 3.866180896759033, "learning_rate": 0.0002384892086330935, "loss": 0.4271, "step": 2165 }, { "epoch": 0.76, "grad_norm": 2.7532100677490234, "learning_rate": 0.00023845199702307118, "loss": 0.3409, "step": 2166 }, { "epoch": 0.76, "grad_norm": 3.9978840351104736, "learning_rate": 0.00023841478541304886, "loss": 1.1862, "step": 2167 }, { "epoch": 0.76, "grad_norm": 1.0184621810913086, "learning_rate": 0.0002383775738030265, "loss": 0.1015, "step": 2168 }, { "epoch": 0.76, "grad_norm": 2.473518133163452, "learning_rate": 0.00023834036219300418, "loss": 0.4473, "step": 2169 }, { "epoch": 0.76, "grad_norm": 2.6468117237091064, "learning_rate": 0.0002383031505829819, "loss": 0.49, "step": 2170 }, { "epoch": 0.76, "grad_norm": 4.993438720703125, "learning_rate": 0.00023826593897295954, "loss": 0.6108, "step": 2171 }, { "epoch": 0.76, "grad_norm": 3.0172436237335205, "learning_rate": 0.0002382287273629372, "loss": 0.4763, "step": 2172 }, { "epoch": 0.76, "grad_norm": 5.393608570098877, "learning_rate": 0.0002381915157529149, "loss": 0.4268, "step": 2173 }, { "epoch": 0.76, "grad_norm": 4.117353439331055, "learning_rate": 0.00023815430414289256, "loss": 0.7324, "step": 2174 }, { "epoch": 0.76, "grad_norm": 4.40974235534668, "learning_rate": 0.00023811709253287024, "loss": 0.3379, "step": 2175 }, { "epoch": 0.76, "grad_norm": 5.692419052124023, "learning_rate": 0.0002380798809228479, "loss": 1.1812, "step": 2176 }, { "epoch": 0.76, "grad_norm": 5.502355098724365, "learning_rate": 0.0002380426693128256, "loss": 1.0161, "step": 2177 }, { "epoch": 0.76, "grad_norm": 2.216310501098633, "learning_rate": 0.00023800545770280324, "loss": 0.488, "step": 2178 }, { "epoch": 0.76, "grad_norm": 2.1364212036132812, "learning_rate": 0.00023796824609278091, "loss": 0.528, "step": 2179 }, { "epoch": 0.76, "grad_norm": 2.4948229789733887, "learning_rate": 0.00023793103448275862, "loss": 0.6491, "step": 2180 }, { "epoch": 0.76, "grad_norm": 1.0776448249816895, "learning_rate": 0.00023789382287273627, "loss": 0.1683, "step": 2181 }, { "epoch": 0.76, "grad_norm": 2.0587196350097656, "learning_rate": 0.00023785661126271394, "loss": 0.4055, "step": 2182 }, { "epoch": 0.76, "grad_norm": 2.165605306625366, "learning_rate": 0.00023781939965269164, "loss": 0.7523, "step": 2183 }, { "epoch": 0.77, "grad_norm": 2.3306822776794434, "learning_rate": 0.0002377821880426693, "loss": 0.2969, "step": 2184 }, { "epoch": 0.77, "grad_norm": 3.706977128982544, "learning_rate": 0.00023774497643264697, "loss": 0.7947, "step": 2185 }, { "epoch": 0.77, "grad_norm": 3.0041239261627197, "learning_rate": 0.00023770776482262462, "loss": 0.6185, "step": 2186 }, { "epoch": 0.77, "grad_norm": 1.9066877365112305, "learning_rate": 0.00023767055321260232, "loss": 0.3012, "step": 2187 }, { "epoch": 0.77, "grad_norm": 3.505563974380493, "learning_rate": 0.00023763334160258, "loss": 0.321, "step": 2188 }, { "epoch": 0.77, "grad_norm": 4.046771049499512, "learning_rate": 0.00023759612999255765, "loss": 1.1728, "step": 2189 }, { "epoch": 0.77, "grad_norm": 3.1281588077545166, "learning_rate": 0.00023755891838253532, "loss": 0.4046, "step": 2190 }, { "epoch": 0.77, "grad_norm": 3.7986767292022705, "learning_rate": 0.000237521706772513, "loss": 0.8822, "step": 2191 }, { "epoch": 0.77, "grad_norm": 3.6561527252197266, "learning_rate": 0.00023748449516249067, "loss": 0.6199, "step": 2192 }, { "epoch": 0.77, "grad_norm": 4.620680332183838, "learning_rate": 0.00023744728355246835, "loss": 0.9583, "step": 2193 }, { "epoch": 0.77, "grad_norm": 3.1020591259002686, "learning_rate": 0.00023741007194244602, "loss": 0.464, "step": 2194 }, { "epoch": 0.77, "grad_norm": 3.0864760875701904, "learning_rate": 0.0002373728603324237, "loss": 0.5002, "step": 2195 }, { "epoch": 0.77, "grad_norm": 3.774919033050537, "learning_rate": 0.00023733564872240138, "loss": 0.7486, "step": 2196 }, { "epoch": 0.77, "grad_norm": 3.6889686584472656, "learning_rate": 0.00023729843711237902, "loss": 0.5443, "step": 2197 }, { "epoch": 0.77, "grad_norm": 3.7951676845550537, "learning_rate": 0.00023726122550235673, "loss": 1.2228, "step": 2198 }, { "epoch": 0.77, "grad_norm": 4.316519260406494, "learning_rate": 0.00023722401389233438, "loss": 1.6501, "step": 2199 }, { "epoch": 0.77, "grad_norm": 2.2464311122894287, "learning_rate": 0.00023718680228231205, "loss": 0.3804, "step": 2200 }, { "epoch": 0.77, "eval_loss": 0.6850249171257019, "eval_runtime": 50.5746, "eval_samples_per_second": 42.867, "eval_steps_per_second": 10.717, "eval_wer": 0.5974744853831517, "step": 2200 }, { "epoch": 0.77, "grad_norm": 8.690844535827637, "learning_rate": 0.00023714959067228976, "loss": 2.5763, "step": 2201 }, { "epoch": 0.77, "grad_norm": 1.9838602542877197, "learning_rate": 0.0002371123790622674, "loss": 0.7831, "step": 2202 }, { "epoch": 0.77, "grad_norm": 2.0435171127319336, "learning_rate": 0.00023707516745224508, "loss": 0.5686, "step": 2203 }, { "epoch": 0.77, "grad_norm": 2.6351277828216553, "learning_rate": 0.00023703795584222273, "loss": 0.9036, "step": 2204 }, { "epoch": 0.77, "grad_norm": 2.1336121559143066, "learning_rate": 0.00023700074423220043, "loss": 0.6445, "step": 2205 }, { "epoch": 0.77, "grad_norm": 9.804746627807617, "learning_rate": 0.0002369635326221781, "loss": 2.4538, "step": 2206 }, { "epoch": 0.77, "grad_norm": 2.9941658973693848, "learning_rate": 0.00023692632101215576, "loss": 0.5441, "step": 2207 }, { "epoch": 0.77, "grad_norm": 2.662944793701172, "learning_rate": 0.00023688910940213346, "loss": 0.2834, "step": 2208 }, { "epoch": 0.77, "grad_norm": 2.271035671234131, "learning_rate": 0.00023685189779211113, "loss": 0.373, "step": 2209 }, { "epoch": 0.77, "grad_norm": 2.727461099624634, "learning_rate": 0.00023681468618208878, "loss": 0.4508, "step": 2210 }, { "epoch": 0.77, "grad_norm": 2.807447910308838, "learning_rate": 0.00023677747457206646, "loss": 0.5131, "step": 2211 }, { "epoch": 0.78, "grad_norm": 3.0257365703582764, "learning_rate": 0.00023674026296204413, "loss": 0.4543, "step": 2212 }, { "epoch": 0.78, "grad_norm": 3.6574392318725586, "learning_rate": 0.0002367030513520218, "loss": 0.8255, "step": 2213 }, { "epoch": 0.78, "grad_norm": 5.633670330047607, "learning_rate": 0.00023666583974199949, "loss": 0.7957, "step": 2214 }, { "epoch": 0.78, "grad_norm": 3.175776720046997, "learning_rate": 0.00023662862813197716, "loss": 0.7773, "step": 2215 }, { "epoch": 0.78, "grad_norm": 2.383112907409668, "learning_rate": 0.00023659141652195484, "loss": 0.4066, "step": 2216 }, { "epoch": 0.78, "grad_norm": 2.62146258354187, "learning_rate": 0.0002365542049119325, "loss": 0.8173, "step": 2217 }, { "epoch": 0.78, "grad_norm": 48.06704330444336, "learning_rate": 0.00023651699330191016, "loss": 0.7836, "step": 2218 }, { "epoch": 0.78, "grad_norm": 3.3420729637145996, "learning_rate": 0.00023647978169188787, "loss": 0.2653, "step": 2219 }, { "epoch": 0.78, "grad_norm": 3.6878349781036377, "learning_rate": 0.00023644257008186551, "loss": 1.2167, "step": 2220 }, { "epoch": 0.78, "grad_norm": 2.163689613342285, "learning_rate": 0.0002364053584718432, "loss": 0.2967, "step": 2221 }, { "epoch": 0.78, "grad_norm": 2.2857308387756348, "learning_rate": 0.0002363681468618209, "loss": 0.2476, "step": 2222 }, { "epoch": 0.78, "grad_norm": 2.968120574951172, "learning_rate": 0.00023633093525179854, "loss": 0.4314, "step": 2223 }, { "epoch": 0.78, "grad_norm": 4.3224382400512695, "learning_rate": 0.00023629372364177622, "loss": 0.6707, "step": 2224 }, { "epoch": 0.78, "grad_norm": 5.102696895599365, "learning_rate": 0.00023625651203175387, "loss": 0.7693, "step": 2225 }, { "epoch": 0.78, "grad_norm": 4.199782371520996, "learning_rate": 0.00023621930042173157, "loss": 1.5092, "step": 2226 }, { "epoch": 0.78, "grad_norm": 4.694085121154785, "learning_rate": 0.00023618208881170924, "loss": 1.4029, "step": 2227 }, { "epoch": 0.78, "grad_norm": 2.0905003547668457, "learning_rate": 0.0002361448772016869, "loss": 0.5829, "step": 2228 }, { "epoch": 0.78, "grad_norm": 2.0144598484039307, "learning_rate": 0.0002361076655916646, "loss": 0.6812, "step": 2229 }, { "epoch": 0.78, "grad_norm": 2.288403034210205, "learning_rate": 0.00023607045398164224, "loss": 0.5396, "step": 2230 }, { "epoch": 0.78, "grad_norm": 1.9742906093597412, "learning_rate": 0.00023603324237161992, "loss": 0.3993, "step": 2231 }, { "epoch": 0.78, "grad_norm": 2.8184242248535156, "learning_rate": 0.0002359960307615976, "loss": 0.6374, "step": 2232 }, { "epoch": 0.78, "grad_norm": 1.8115568161010742, "learning_rate": 0.00023595881915157527, "loss": 0.6224, "step": 2233 }, { "epoch": 0.78, "grad_norm": 1.4084148406982422, "learning_rate": 0.00023592160754155295, "loss": 0.3404, "step": 2234 }, { "epoch": 0.78, "grad_norm": 2.8344879150390625, "learning_rate": 0.0002358843959315306, "loss": 0.7027, "step": 2235 }, { "epoch": 0.78, "grad_norm": 3.2461719512939453, "learning_rate": 0.0002358471843215083, "loss": 0.4054, "step": 2236 }, { "epoch": 0.78, "grad_norm": 3.190657377243042, "learning_rate": 0.00023580997271148598, "loss": 0.7986, "step": 2237 }, { "epoch": 0.78, "grad_norm": 1.7157618999481201, "learning_rate": 0.00023577276110146362, "loss": 0.3132, "step": 2238 }, { "epoch": 0.78, "grad_norm": 2.2127954959869385, "learning_rate": 0.0002357355494914413, "loss": 0.3935, "step": 2239 }, { "epoch": 0.78, "grad_norm": 3.468662738800049, "learning_rate": 0.000235698337881419, "loss": 0.6404, "step": 2240 }, { "epoch": 0.79, "grad_norm": 2.6316206455230713, "learning_rate": 0.00023566112627139665, "loss": 0.5468, "step": 2241 }, { "epoch": 0.79, "grad_norm": 3.6631693840026855, "learning_rate": 0.00023562391466137433, "loss": 0.7712, "step": 2242 }, { "epoch": 0.79, "grad_norm": 4.116514205932617, "learning_rate": 0.00023558670305135198, "loss": 0.7397, "step": 2243 }, { "epoch": 0.79, "grad_norm": 2.6557669639587402, "learning_rate": 0.00023554949144132968, "loss": 0.5441, "step": 2244 }, { "epoch": 0.79, "grad_norm": 3.5287699699401855, "learning_rate": 0.00023551227983130735, "loss": 0.4418, "step": 2245 }, { "epoch": 0.79, "grad_norm": 3.1134016513824463, "learning_rate": 0.000235475068221285, "loss": 0.4333, "step": 2246 }, { "epoch": 0.79, "grad_norm": 4.637173652648926, "learning_rate": 0.0002354378566112627, "loss": 0.5619, "step": 2247 }, { "epoch": 0.79, "grad_norm": 4.621034145355225, "learning_rate": 0.00023540064500124035, "loss": 0.8087, "step": 2248 }, { "epoch": 0.79, "grad_norm": 3.706472635269165, "learning_rate": 0.00023536343339121803, "loss": 0.3778, "step": 2249 }, { "epoch": 0.79, "grad_norm": 6.068023204803467, "learning_rate": 0.00023532622178119573, "loss": 0.5066, "step": 2250 }, { "epoch": 0.79, "grad_norm": 3.4167532920837402, "learning_rate": 0.00023528901017117338, "loss": 1.2843, "step": 2251 }, { "epoch": 0.79, "grad_norm": 1.9952954053878784, "learning_rate": 0.00023525179856115106, "loss": 0.7945, "step": 2252 }, { "epoch": 0.79, "grad_norm": 3.9978554248809814, "learning_rate": 0.00023521458695112876, "loss": 0.5449, "step": 2253 }, { "epoch": 0.79, "grad_norm": 2.489971876144409, "learning_rate": 0.0002351773753411064, "loss": 0.6356, "step": 2254 }, { "epoch": 0.79, "grad_norm": 2.167726993560791, "learning_rate": 0.00023514016373108409, "loss": 0.4688, "step": 2255 }, { "epoch": 0.79, "grad_norm": 2.427248477935791, "learning_rate": 0.00023510295212106173, "loss": 0.7861, "step": 2256 }, { "epoch": 0.79, "grad_norm": 2.5070106983184814, "learning_rate": 0.00023506574051103944, "loss": 0.9434, "step": 2257 }, { "epoch": 0.79, "grad_norm": 2.904707908630371, "learning_rate": 0.0002350285289010171, "loss": 0.7917, "step": 2258 }, { "epoch": 0.79, "grad_norm": 4.7793803215026855, "learning_rate": 0.00023499131729099476, "loss": 1.5236, "step": 2259 }, { "epoch": 0.79, "grad_norm": 3.193175792694092, "learning_rate": 0.00023495410568097244, "loss": 0.9601, "step": 2260 }, { "epoch": 0.79, "grad_norm": 1.6871867179870605, "learning_rate": 0.0002349168940709501, "loss": 0.3522, "step": 2261 }, { "epoch": 0.79, "grad_norm": 2.900092124938965, "learning_rate": 0.0002348796824609278, "loss": 0.9403, "step": 2262 }, { "epoch": 0.79, "grad_norm": 3.3442814350128174, "learning_rate": 0.00023484247085090546, "loss": 0.807, "step": 2263 }, { "epoch": 0.79, "grad_norm": 2.9604439735412598, "learning_rate": 0.0002348052592408831, "loss": 0.6192, "step": 2264 }, { "epoch": 0.79, "grad_norm": 5.023985862731934, "learning_rate": 0.00023476804763086082, "loss": 1.084, "step": 2265 }, { "epoch": 0.79, "grad_norm": 2.1374659538269043, "learning_rate": 0.0002347308360208385, "loss": 0.2996, "step": 2266 }, { "epoch": 0.79, "grad_norm": 2.652350425720215, "learning_rate": 0.00023469362441081614, "loss": 0.5887, "step": 2267 }, { "epoch": 0.79, "grad_norm": 3.5104193687438965, "learning_rate": 0.00023465641280079384, "loss": 0.6121, "step": 2268 }, { "epoch": 0.8, "grad_norm": 4.1145853996276855, "learning_rate": 0.0002346192011907715, "loss": 0.5084, "step": 2269 }, { "epoch": 0.8, "grad_norm": 4.850748538970947, "learning_rate": 0.00023458198958074917, "loss": 0.6602, "step": 2270 }, { "epoch": 0.8, "grad_norm": 1.4793652296066284, "learning_rate": 0.00023454477797072687, "loss": 0.1197, "step": 2271 }, { "epoch": 0.8, "grad_norm": 1.932457447052002, "learning_rate": 0.00023450756636070452, "loss": 0.1678, "step": 2272 }, { "epoch": 0.8, "grad_norm": 1.8412857055664062, "learning_rate": 0.0002344703547506822, "loss": 0.1298, "step": 2273 }, { "epoch": 0.8, "grad_norm": 5.570624828338623, "learning_rate": 0.00023443314314065984, "loss": 0.5715, "step": 2274 }, { "epoch": 0.8, "grad_norm": 3.606353759765625, "learning_rate": 0.00023439593153063755, "loss": 0.3448, "step": 2275 }, { "epoch": 0.8, "grad_norm": 2.9131336212158203, "learning_rate": 0.00023435871992061522, "loss": 1.2812, "step": 2276 }, { "epoch": 0.8, "grad_norm": 2.4033138751983643, "learning_rate": 0.00023432150831059287, "loss": 0.5682, "step": 2277 }, { "epoch": 0.8, "grad_norm": 2.5214476585388184, "learning_rate": 0.00023428429670057057, "loss": 0.734, "step": 2278 }, { "epoch": 0.8, "grad_norm": 3.3294756412506104, "learning_rate": 0.00023424708509054822, "loss": 0.685, "step": 2279 }, { "epoch": 0.8, "grad_norm": 2.0864174365997314, "learning_rate": 0.0002342098734805259, "loss": 0.6529, "step": 2280 }, { "epoch": 0.8, "grad_norm": 1.4727028608322144, "learning_rate": 0.00023417266187050357, "loss": 0.3351, "step": 2281 }, { "epoch": 0.8, "grad_norm": 1.7328686714172363, "learning_rate": 0.00023413545026048125, "loss": 0.4686, "step": 2282 }, { "epoch": 0.8, "grad_norm": 2.6917636394500732, "learning_rate": 0.00023409823865045893, "loss": 0.4922, "step": 2283 }, { "epoch": 0.8, "grad_norm": 2.5129282474517822, "learning_rate": 0.0002340610270404366, "loss": 0.4395, "step": 2284 }, { "epoch": 0.8, "grad_norm": 2.758345603942871, "learning_rate": 0.00023402381543041425, "loss": 0.5023, "step": 2285 }, { "epoch": 0.8, "grad_norm": 4.338339328765869, "learning_rate": 0.00023398660382039195, "loss": 0.6605, "step": 2286 }, { "epoch": 0.8, "grad_norm": 2.811253786087036, "learning_rate": 0.0002339493922103696, "loss": 0.5628, "step": 2287 }, { "epoch": 0.8, "grad_norm": 2.3005897998809814, "learning_rate": 0.00023391218060034728, "loss": 0.644, "step": 2288 }, { "epoch": 0.8, "grad_norm": 3.50140118598938, "learning_rate": 0.00023387496899032498, "loss": 1.2557, "step": 2289 }, { "epoch": 0.8, "grad_norm": 2.0913150310516357, "learning_rate": 0.00023383775738030263, "loss": 0.4355, "step": 2290 }, { "epoch": 0.8, "grad_norm": 2.851500988006592, "learning_rate": 0.0002338005457702803, "loss": 0.4956, "step": 2291 }, { "epoch": 0.8, "grad_norm": 4.441885471343994, "learning_rate": 0.00023376333416025795, "loss": 0.6377, "step": 2292 }, { "epoch": 0.8, "grad_norm": 2.17043399810791, "learning_rate": 0.00023372612255023566, "loss": 0.5092, "step": 2293 }, { "epoch": 0.8, "grad_norm": 2.0366196632385254, "learning_rate": 0.00023368891094021333, "loss": 0.1851, "step": 2294 }, { "epoch": 0.8, "grad_norm": 3.8057641983032227, "learning_rate": 0.00023365169933019098, "loss": 0.733, "step": 2295 }, { "epoch": 0.8, "grad_norm": 6.428284168243408, "learning_rate": 0.00023361448772016868, "loss": 0.7048, "step": 2296 }, { "epoch": 0.8, "grad_norm": 3.9241631031036377, "learning_rate": 0.00023357727611014636, "loss": 1.9665, "step": 2297 }, { "epoch": 0.81, "grad_norm": 4.794024467468262, "learning_rate": 0.000233540064500124, "loss": 0.953, "step": 2298 }, { "epoch": 0.81, "grad_norm": 3.998034954071045, "learning_rate": 0.0002335028528901017, "loss": 1.7438, "step": 2299 }, { "epoch": 0.81, "grad_norm": 4.720144271850586, "learning_rate": 0.00023346564128007936, "loss": 0.7473, "step": 2300 }, { "epoch": 0.81, "eval_loss": 0.8185052275657654, "eval_runtime": 50.5567, "eval_samples_per_second": 42.883, "eval_steps_per_second": 10.721, "eval_wer": 0.7170039785504237, "step": 2300 }, { "epoch": 0.81, "grad_norm": 3.0130574703216553, "learning_rate": 0.00023342842967005704, "loss": 0.6867, "step": 2301 }, { "epoch": 0.81, "grad_norm": 3.1833200454711914, "learning_rate": 0.0002333912180600347, "loss": 0.9255, "step": 2302 }, { "epoch": 0.81, "grad_norm": 3.382704019546509, "learning_rate": 0.0002333540064500124, "loss": 0.7009, "step": 2303 }, { "epoch": 0.81, "grad_norm": 18.74046516418457, "learning_rate": 0.00023331679483999006, "loss": 5.78, "step": 2304 }, { "epoch": 0.81, "grad_norm": 2.770467758178711, "learning_rate": 0.0002332795832299677, "loss": 0.7705, "step": 2305 }, { "epoch": 0.81, "grad_norm": 2.4621565341949463, "learning_rate": 0.0002332423716199454, "loss": 0.7438, "step": 2306 }, { "epoch": 0.81, "grad_norm": 8.334583282470703, "learning_rate": 0.0002332051600099231, "loss": 0.7081, "step": 2307 }, { "epoch": 0.81, "grad_norm": 2.128434658050537, "learning_rate": 0.00023316794839990074, "loss": 0.4139, "step": 2308 }, { "epoch": 0.81, "grad_norm": 5.296910285949707, "learning_rate": 0.00023313073678987842, "loss": 0.8335, "step": 2309 }, { "epoch": 0.81, "grad_norm": 2.945010185241699, "learning_rate": 0.00023309352517985612, "loss": 0.6668, "step": 2310 }, { "epoch": 0.81, "grad_norm": 3.273423433303833, "learning_rate": 0.00023305631356983377, "loss": 0.7138, "step": 2311 }, { "epoch": 0.81, "grad_norm": 1.95408034324646, "learning_rate": 0.00023301910195981144, "loss": 0.4044, "step": 2312 }, { "epoch": 0.81, "grad_norm": 7.172848224639893, "learning_rate": 0.0002329818903497891, "loss": 2.0813, "step": 2313 }, { "epoch": 0.81, "grad_norm": 2.5199854373931885, "learning_rate": 0.0002329446787397668, "loss": 0.4251, "step": 2314 }, { "epoch": 0.81, "grad_norm": 2.866971969604492, "learning_rate": 0.00023290746712974447, "loss": 0.5057, "step": 2315 }, { "epoch": 0.81, "grad_norm": 2.345081090927124, "learning_rate": 0.00023287025551972212, "loss": 0.4534, "step": 2316 }, { "epoch": 0.81, "grad_norm": 3.3792688846588135, "learning_rate": 0.00023283304390969982, "loss": 0.6358, "step": 2317 }, { "epoch": 0.81, "grad_norm": 2.9677326679229736, "learning_rate": 0.00023279583229967747, "loss": 1.3059, "step": 2318 }, { "epoch": 0.81, "grad_norm": 3.3454091548919678, "learning_rate": 0.00023275862068965515, "loss": 0.4036, "step": 2319 }, { "epoch": 0.81, "grad_norm": 3.9923884868621826, "learning_rate": 0.00023272140907963285, "loss": 1.2093, "step": 2320 }, { "epoch": 0.81, "grad_norm": 2.1582930088043213, "learning_rate": 0.0002326841974696105, "loss": 0.3846, "step": 2321 }, { "epoch": 0.81, "grad_norm": 3.0936267375946045, "learning_rate": 0.00023264698585958817, "loss": 0.5771, "step": 2322 }, { "epoch": 0.81, "grad_norm": 2.7140424251556396, "learning_rate": 0.00023260977424956582, "loss": 0.3977, "step": 2323 }, { "epoch": 0.81, "grad_norm": 4.4949116706848145, "learning_rate": 0.00023257256263954353, "loss": 1.5159, "step": 2324 }, { "epoch": 0.81, "grad_norm": 2.300839424133301, "learning_rate": 0.0002325353510295212, "loss": 0.3565, "step": 2325 }, { "epoch": 0.81, "grad_norm": 2.1757450103759766, "learning_rate": 0.00023249813941949885, "loss": 0.9017, "step": 2326 }, { "epoch": 0.82, "grad_norm": 2.34287166595459, "learning_rate": 0.00023246092780947653, "loss": 0.5174, "step": 2327 }, { "epoch": 0.82, "grad_norm": 2.0251080989837646, "learning_rate": 0.00023242371619945423, "loss": 0.678, "step": 2328 }, { "epoch": 0.82, "grad_norm": 2.359485149383545, "learning_rate": 0.00023238650458943188, "loss": 0.6204, "step": 2329 }, { "epoch": 0.82, "grad_norm": 2.325961112976074, "learning_rate": 0.00023234929297940955, "loss": 0.6806, "step": 2330 }, { "epoch": 0.82, "grad_norm": 2.272260904312134, "learning_rate": 0.0002323120813693872, "loss": 1.1038, "step": 2331 }, { "epoch": 0.82, "grad_norm": 2.266369581222534, "learning_rate": 0.0002322748697593649, "loss": 0.5986, "step": 2332 }, { "epoch": 0.82, "grad_norm": 2.7468202114105225, "learning_rate": 0.00023223765814934258, "loss": 0.6712, "step": 2333 }, { "epoch": 0.82, "grad_norm": 2.0581917762756348, "learning_rate": 0.00023220044653932023, "loss": 0.4981, "step": 2334 }, { "epoch": 0.82, "grad_norm": 3.185770273208618, "learning_rate": 0.00023216323492929793, "loss": 0.8548, "step": 2335 }, { "epoch": 0.82, "grad_norm": 2.0419859886169434, "learning_rate": 0.00023212602331927558, "loss": 0.2562, "step": 2336 }, { "epoch": 0.82, "grad_norm": 3.8037047386169434, "learning_rate": 0.00023208881170925326, "loss": 0.625, "step": 2337 }, { "epoch": 0.82, "grad_norm": 6.072167873382568, "learning_rate": 0.00023205160009923096, "loss": 1.8957, "step": 2338 }, { "epoch": 0.82, "grad_norm": 2.633638858795166, "learning_rate": 0.0002320143884892086, "loss": 0.7044, "step": 2339 }, { "epoch": 0.82, "grad_norm": 2.252713680267334, "learning_rate": 0.00023197717687918628, "loss": 0.481, "step": 2340 }, { "epoch": 0.82, "grad_norm": 2.7188823223114014, "learning_rate": 0.000231939965269164, "loss": 0.6524, "step": 2341 }, { "epoch": 0.82, "grad_norm": 3.263317108154297, "learning_rate": 0.00023190275365914164, "loss": 0.3887, "step": 2342 }, { "epoch": 0.82, "grad_norm": 3.679230213165283, "learning_rate": 0.0002318655420491193, "loss": 0.4308, "step": 2343 }, { "epoch": 0.82, "grad_norm": 3.2064208984375, "learning_rate": 0.00023182833043909696, "loss": 1.0281, "step": 2344 }, { "epoch": 0.82, "grad_norm": 4.935098171234131, "learning_rate": 0.00023179111882907466, "loss": 0.9202, "step": 2345 }, { "epoch": 0.82, "grad_norm": 4.840257167816162, "learning_rate": 0.00023175390721905234, "loss": 1.914, "step": 2346 }, { "epoch": 0.82, "grad_norm": 3.3795762062072754, "learning_rate": 0.00023171669560903, "loss": 0.6141, "step": 2347 }, { "epoch": 0.82, "grad_norm": 4.437031269073486, "learning_rate": 0.00023167948399900766, "loss": 0.9272, "step": 2348 }, { "epoch": 0.82, "grad_norm": 1.7707891464233398, "learning_rate": 0.00023164227238898534, "loss": 0.2347, "step": 2349 }, { "epoch": 0.82, "grad_norm": 1.9971076250076294, "learning_rate": 0.00023160506077896301, "loss": 0.1552, "step": 2350 }, { "epoch": 0.82, "grad_norm": 4.799489974975586, "learning_rate": 0.0002315678491689407, "loss": 1.1959, "step": 2351 }, { "epoch": 0.82, "grad_norm": 1.57328462600708, "learning_rate": 0.00023153063755891834, "loss": 0.448, "step": 2352 }, { "epoch": 0.82, "grad_norm": 3.8030247688293457, "learning_rate": 0.00023149342594889604, "loss": 1.108, "step": 2353 }, { "epoch": 0.82, "grad_norm": 2.7969067096710205, "learning_rate": 0.00023145621433887372, "loss": 0.9344, "step": 2354 }, { "epoch": 0.83, "grad_norm": 1.8098201751708984, "learning_rate": 0.00023141900272885137, "loss": 0.2884, "step": 2355 }, { "epoch": 0.83, "grad_norm": 3.249875783920288, "learning_rate": 0.00023138179111882907, "loss": 0.6573, "step": 2356 }, { "epoch": 0.83, "grad_norm": 3.7881550788879395, "learning_rate": 0.00023134457950880672, "loss": 1.0587, "step": 2357 }, { "epoch": 0.83, "grad_norm": 3.2492504119873047, "learning_rate": 0.0002313073678987844, "loss": 0.8628, "step": 2358 }, { "epoch": 0.83, "grad_norm": 2.8697025775909424, "learning_rate": 0.0002312701562887621, "loss": 0.622, "step": 2359 }, { "epoch": 0.83, "grad_norm": 3.8673083782196045, "learning_rate": 0.00023123294467873975, "loss": 0.7776, "step": 2360 }, { "epoch": 0.83, "grad_norm": 3.7203593254089355, "learning_rate": 0.00023119573306871742, "loss": 0.8226, "step": 2361 }, { "epoch": 0.83, "grad_norm": 10.66352653503418, "learning_rate": 0.00023115852145869507, "loss": 2.7217, "step": 2362 }, { "epoch": 0.83, "grad_norm": 2.4907355308532715, "learning_rate": 0.00023112130984867277, "loss": 0.3725, "step": 2363 }, { "epoch": 0.83, "grad_norm": 6.29020357131958, "learning_rate": 0.00023108409823865045, "loss": 2.0326, "step": 2364 }, { "epoch": 0.83, "grad_norm": 3.2957611083984375, "learning_rate": 0.0002310468866286281, "loss": 0.6962, "step": 2365 }, { "epoch": 0.83, "grad_norm": 4.59990930557251, "learning_rate": 0.0002310096750186058, "loss": 0.9934, "step": 2366 }, { "epoch": 0.83, "grad_norm": 3.0413990020751953, "learning_rate": 0.00023097246340858345, "loss": 0.4285, "step": 2367 }, { "epoch": 0.83, "grad_norm": 2.2277538776397705, "learning_rate": 0.00023093525179856112, "loss": 0.4112, "step": 2368 }, { "epoch": 0.83, "grad_norm": 2.3201451301574707, "learning_rate": 0.0002308980401885388, "loss": 0.3224, "step": 2369 }, { "epoch": 0.83, "grad_norm": 3.672980785369873, "learning_rate": 0.00023086082857851648, "loss": 0.4006, "step": 2370 }, { "epoch": 0.83, "grad_norm": 2.6918740272521973, "learning_rate": 0.00023082361696849415, "loss": 0.4209, "step": 2371 }, { "epoch": 0.83, "grad_norm": 4.196498394012451, "learning_rate": 0.00023078640535847183, "loss": 0.6705, "step": 2372 }, { "epoch": 0.83, "grad_norm": 1.965975046157837, "learning_rate": 0.00023074919374844948, "loss": 0.2825, "step": 2373 }, { "epoch": 0.83, "grad_norm": 3.151905059814453, "learning_rate": 0.00023071198213842718, "loss": 0.5709, "step": 2374 }, { "epoch": 0.83, "grad_norm": 9.685364723205566, "learning_rate": 0.00023067477052840483, "loss": 2.5129, "step": 2375 }, { "epoch": 0.83, "grad_norm": 1.8640772104263306, "learning_rate": 0.0002306375589183825, "loss": 0.7282, "step": 2376 }, { "epoch": 0.83, "grad_norm": 2.197077989578247, "learning_rate": 0.0002306003473083602, "loss": 0.6786, "step": 2377 }, { "epoch": 0.83, "grad_norm": 2.610832691192627, "learning_rate": 0.00023056313569833786, "loss": 0.4671, "step": 2378 }, { "epoch": 0.83, "grad_norm": 2.7640953063964844, "learning_rate": 0.00023052592408831553, "loss": 0.4904, "step": 2379 }, { "epoch": 0.83, "grad_norm": 1.4002923965454102, "learning_rate": 0.00023048871247829318, "loss": 0.3408, "step": 2380 }, { "epoch": 0.83, "grad_norm": 2.018538475036621, "learning_rate": 0.00023045150086827088, "loss": 0.5375, "step": 2381 }, { "epoch": 0.83, "grad_norm": 2.6300442218780518, "learning_rate": 0.00023041428925824856, "loss": 0.4204, "step": 2382 }, { "epoch": 0.83, "grad_norm": 11.127724647521973, "learning_rate": 0.0002303770776482262, "loss": 2.8526, "step": 2383 }, { "epoch": 0.84, "grad_norm": 1.2783316373825073, "learning_rate": 0.0002303398660382039, "loss": 0.1801, "step": 2384 }, { "epoch": 0.84, "grad_norm": 2.074375867843628, "learning_rate": 0.00023030265442818159, "loss": 0.5052, "step": 2385 }, { "epoch": 0.84, "grad_norm": 2.3270208835601807, "learning_rate": 0.00023026544281815923, "loss": 0.306, "step": 2386 }, { "epoch": 0.84, "grad_norm": 4.8103556632995605, "learning_rate": 0.00023022823120813694, "loss": 1.2487, "step": 2387 }, { "epoch": 0.84, "grad_norm": 3.1849074363708496, "learning_rate": 0.0002301910195981146, "loss": 0.5772, "step": 2388 }, { "epoch": 0.84, "grad_norm": 3.9481422901153564, "learning_rate": 0.00023015380798809226, "loss": 1.5224, "step": 2389 }, { "epoch": 0.84, "grad_norm": 2.1063621044158936, "learning_rate": 0.00023011659637806994, "loss": 0.3233, "step": 2390 }, { "epoch": 0.84, "grad_norm": 2.34477162361145, "learning_rate": 0.00023007938476804761, "loss": 0.4639, "step": 2391 }, { "epoch": 0.84, "grad_norm": 1.563861608505249, "learning_rate": 0.0002300421731580253, "loss": 0.203, "step": 2392 }, { "epoch": 0.84, "grad_norm": 3.3610353469848633, "learning_rate": 0.00023000496154800294, "loss": 0.5348, "step": 2393 }, { "epoch": 0.84, "grad_norm": 2.9858109951019287, "learning_rate": 0.00022996774993798061, "loss": 0.2315, "step": 2394 }, { "epoch": 0.84, "grad_norm": 6.193589210510254, "learning_rate": 0.00022993053832795832, "loss": 3.4532, "step": 2395 }, { "epoch": 0.84, "grad_norm": 6.4159698486328125, "learning_rate": 0.00022989332671793597, "loss": 1.3103, "step": 2396 }, { "epoch": 0.84, "grad_norm": 2.0241963863372803, "learning_rate": 0.00022985611510791364, "loss": 0.1741, "step": 2397 }, { "epoch": 0.84, "grad_norm": 4.740751266479492, "learning_rate": 0.00022981890349789134, "loss": 0.7566, "step": 2398 }, { "epoch": 0.84, "grad_norm": 2.657395362854004, "learning_rate": 0.000229781691887869, "loss": 0.4493, "step": 2399 }, { "epoch": 0.84, "grad_norm": 3.8431265354156494, "learning_rate": 0.00022974448027784667, "loss": 0.6928, "step": 2400 }, { "epoch": 0.84, "eval_loss": 0.6465885639190674, "eval_runtime": 50.1998, "eval_samples_per_second": 43.187, "eval_steps_per_second": 10.797, "eval_wer": 0.5759384189586577, "step": 2400 }, { "epoch": 0.84, "grad_norm": 2.204583168029785, "learning_rate": 0.00022970726866782432, "loss": 0.8461, "step": 2401 }, { "epoch": 0.84, "grad_norm": 2.2085318565368652, "learning_rate": 0.00022967005705780202, "loss": 0.7323, "step": 2402 }, { "epoch": 0.84, "grad_norm": 2.1746833324432373, "learning_rate": 0.0002296328454477797, "loss": 0.5866, "step": 2403 }, { "epoch": 0.84, "grad_norm": 2.1107888221740723, "learning_rate": 0.00022959563383775734, "loss": 0.4081, "step": 2404 }, { "epoch": 0.84, "grad_norm": 3.436554193496704, "learning_rate": 0.00022955842222773505, "loss": 0.5447, "step": 2405 }, { "epoch": 0.84, "grad_norm": 1.6409112215042114, "learning_rate": 0.0002295212106177127, "loss": 0.4951, "step": 2406 }, { "epoch": 0.84, "grad_norm": 2.4227728843688965, "learning_rate": 0.00022948399900769037, "loss": 1.4331, "step": 2407 }, { "epoch": 0.84, "grad_norm": 1.892978549003601, "learning_rate": 0.00022944678739766808, "loss": 0.3022, "step": 2408 }, { "epoch": 0.84, "grad_norm": 3.203372001647949, "learning_rate": 0.00022940957578764572, "loss": 0.5053, "step": 2409 }, { "epoch": 0.84, "grad_norm": 4.97529411315918, "learning_rate": 0.0002293723641776234, "loss": 2.5665, "step": 2410 }, { "epoch": 0.84, "grad_norm": 3.133910894393921, "learning_rate": 0.00022933515256760105, "loss": 0.5585, "step": 2411 }, { "epoch": 0.85, "grad_norm": 2.481238842010498, "learning_rate": 0.00022929794095757875, "loss": 0.7493, "step": 2412 }, { "epoch": 0.85, "grad_norm": 2.110828399658203, "learning_rate": 0.00022926072934755643, "loss": 0.6066, "step": 2413 }, { "epoch": 0.85, "grad_norm": 2.420081377029419, "learning_rate": 0.00022922351773753408, "loss": 0.4951, "step": 2414 }, { "epoch": 0.85, "grad_norm": 4.824462890625, "learning_rate": 0.00022918630612751175, "loss": 0.6661, "step": 2415 }, { "epoch": 0.85, "grad_norm": 3.048468828201294, "learning_rate": 0.00022914909451748945, "loss": 0.6657, "step": 2416 }, { "epoch": 0.85, "grad_norm": 5.503145217895508, "learning_rate": 0.0002291118829074671, "loss": 0.4398, "step": 2417 }, { "epoch": 0.85, "grad_norm": 2.5302720069885254, "learning_rate": 0.00022907467129744478, "loss": 0.4753, "step": 2418 }, { "epoch": 0.85, "grad_norm": 1.1612811088562012, "learning_rate": 0.00022903745968742245, "loss": 0.143, "step": 2419 }, { "epoch": 0.85, "grad_norm": 8.667667388916016, "learning_rate": 0.00022900024807740013, "loss": 0.6554, "step": 2420 }, { "epoch": 0.85, "grad_norm": 4.002796649932861, "learning_rate": 0.0002289630364673778, "loss": 0.5772, "step": 2421 }, { "epoch": 0.85, "grad_norm": 6.211899280548096, "learning_rate": 0.00022892582485735546, "loss": 1.3073, "step": 2422 }, { "epoch": 0.85, "grad_norm": 3.510040521621704, "learning_rate": 0.00022888861324733316, "loss": 0.6746, "step": 2423 }, { "epoch": 0.85, "grad_norm": 8.804295539855957, "learning_rate": 0.0002288514016373108, "loss": 2.8085, "step": 2424 }, { "epoch": 0.85, "grad_norm": 1.8787487745285034, "learning_rate": 0.00022881419002728848, "loss": 0.3381, "step": 2425 }, { "epoch": 0.85, "grad_norm": 2.092188596725464, "learning_rate": 0.00022877697841726619, "loss": 0.9032, "step": 2426 }, { "epoch": 0.85, "grad_norm": 3.2084264755249023, "learning_rate": 0.00022873976680724383, "loss": 0.6057, "step": 2427 }, { "epoch": 0.85, "grad_norm": 3.3689112663269043, "learning_rate": 0.0002287025551972215, "loss": 0.9147, "step": 2428 }, { "epoch": 0.85, "grad_norm": 1.9687527418136597, "learning_rate": 0.0002286653435871992, "loss": 0.5522, "step": 2429 }, { "epoch": 0.85, "grad_norm": 2.207888603210449, "learning_rate": 0.00022862813197717686, "loss": 0.6443, "step": 2430 }, { "epoch": 0.85, "grad_norm": 2.4033074378967285, "learning_rate": 0.00022859092036715454, "loss": 0.5481, "step": 2431 }, { "epoch": 0.85, "grad_norm": 1.924738883972168, "learning_rate": 0.00022855370875713219, "loss": 0.2973, "step": 2432 }, { "epoch": 0.85, "grad_norm": 2.1252126693725586, "learning_rate": 0.0002285164971471099, "loss": 0.3515, "step": 2433 }, { "epoch": 0.85, "grad_norm": 3.03769850730896, "learning_rate": 0.00022847928553708756, "loss": 0.5084, "step": 2434 }, { "epoch": 0.85, "grad_norm": 2.743135929107666, "learning_rate": 0.0002284420739270652, "loss": 0.7894, "step": 2435 }, { "epoch": 0.85, "grad_norm": 2.572986602783203, "learning_rate": 0.00022840486231704292, "loss": 0.6504, "step": 2436 }, { "epoch": 0.85, "grad_norm": 2.5167198181152344, "learning_rate": 0.00022836765070702056, "loss": 0.479, "step": 2437 }, { "epoch": 0.85, "grad_norm": 2.302143096923828, "learning_rate": 0.00022833043909699824, "loss": 0.6252, "step": 2438 }, { "epoch": 0.85, "grad_norm": 1.8964296579360962, "learning_rate": 0.00022829322748697592, "loss": 0.2638, "step": 2439 }, { "epoch": 0.85, "grad_norm": 1.6442536115646362, "learning_rate": 0.0002282560158769536, "loss": 0.1767, "step": 2440 }, { "epoch": 0.86, "grad_norm": 2.2055866718292236, "learning_rate": 0.00022821880426693127, "loss": 0.2613, "step": 2441 }, { "epoch": 0.86, "grad_norm": 2.640995979309082, "learning_rate": 0.00022818159265690894, "loss": 0.5495, "step": 2442 }, { "epoch": 0.86, "grad_norm": 2.59956955909729, "learning_rate": 0.0002281443810468866, "loss": 0.5332, "step": 2443 }, { "epoch": 0.86, "grad_norm": 4.761136531829834, "learning_rate": 0.0002281071694368643, "loss": 1.1939, "step": 2444 }, { "epoch": 0.86, "grad_norm": 3.0465445518493652, "learning_rate": 0.00022806995782684194, "loss": 0.3178, "step": 2445 }, { "epoch": 0.86, "grad_norm": 2.101271390914917, "learning_rate": 0.00022803274621681962, "loss": 0.5765, "step": 2446 }, { "epoch": 0.86, "grad_norm": 3.0883326530456543, "learning_rate": 0.00022799553460679732, "loss": 0.2466, "step": 2447 }, { "epoch": 0.86, "grad_norm": 2.459491491317749, "learning_rate": 0.00022795832299677497, "loss": 0.3144, "step": 2448 }, { "epoch": 0.86, "grad_norm": 5.362196445465088, "learning_rate": 0.00022792111138675265, "loss": 0.6192, "step": 2449 }, { "epoch": 0.86, "grad_norm": 3.9791548252105713, "learning_rate": 0.0002278838997767303, "loss": 0.548, "step": 2450 }, { "epoch": 0.86, "grad_norm": 2.309065103530884, "learning_rate": 0.000227846688166708, "loss": 1.0592, "step": 2451 }, { "epoch": 0.86, "grad_norm": 3.0640029907226562, "learning_rate": 0.00022780947655668567, "loss": 1.2627, "step": 2452 }, { "epoch": 0.86, "grad_norm": 2.2788937091827393, "learning_rate": 0.00022777226494666332, "loss": 0.7759, "step": 2453 }, { "epoch": 0.86, "grad_norm": 1.9871941804885864, "learning_rate": 0.00022773505333664103, "loss": 0.7314, "step": 2454 }, { "epoch": 0.86, "grad_norm": 2.758849620819092, "learning_rate": 0.0002276978417266187, "loss": 0.5533, "step": 2455 }, { "epoch": 0.86, "grad_norm": 2.1479053497314453, "learning_rate": 0.00022766063011659635, "loss": 0.6265, "step": 2456 }, { "epoch": 0.86, "grad_norm": 2.5701639652252197, "learning_rate": 0.00022762341850657405, "loss": 0.6011, "step": 2457 }, { "epoch": 0.86, "grad_norm": 2.6615350246429443, "learning_rate": 0.0002275862068965517, "loss": 0.6043, "step": 2458 }, { "epoch": 0.86, "grad_norm": 1.9056451320648193, "learning_rate": 0.00022754899528652938, "loss": 0.4244, "step": 2459 }, { "epoch": 0.86, "grad_norm": 2.3481318950653076, "learning_rate": 0.00022751178367650705, "loss": 0.3759, "step": 2460 }, { "epoch": 0.86, "grad_norm": 1.501145601272583, "learning_rate": 0.00022747457206648473, "loss": 0.3211, "step": 2461 }, { "epoch": 0.86, "grad_norm": 1.2316216230392456, "learning_rate": 0.0002274373604564624, "loss": 0.213, "step": 2462 }, { "epoch": 0.86, "grad_norm": 3.9179749488830566, "learning_rate": 0.00022740014884644005, "loss": 0.5273, "step": 2463 }, { "epoch": 0.86, "grad_norm": 2.6479461193084717, "learning_rate": 0.00022736293723641773, "loss": 0.3405, "step": 2464 }, { "epoch": 0.86, "grad_norm": 3.821375846862793, "learning_rate": 0.00022732572562639543, "loss": 0.4056, "step": 2465 }, { "epoch": 0.86, "grad_norm": 3.5695273876190186, "learning_rate": 0.00022728851401637308, "loss": 0.65, "step": 2466 }, { "epoch": 0.86, "grad_norm": 3.268481969833374, "learning_rate": 0.00022725130240635076, "loss": 0.4224, "step": 2467 }, { "epoch": 0.86, "grad_norm": 3.3104357719421387, "learning_rate": 0.0002272140907963284, "loss": 0.5315, "step": 2468 }, { "epoch": 0.87, "grad_norm": 3.089568853378296, "learning_rate": 0.0002271768791863061, "loss": 0.5053, "step": 2469 }, { "epoch": 0.87, "grad_norm": 4.867792129516602, "learning_rate": 0.00022713966757628378, "loss": 1.8816, "step": 2470 }, { "epoch": 0.87, "grad_norm": 2.6889870166778564, "learning_rate": 0.00022710245596626143, "loss": 0.3397, "step": 2471 }, { "epoch": 0.87, "grad_norm": 5.504836559295654, "learning_rate": 0.00022706524435623914, "loss": 0.8586, "step": 2472 }, { "epoch": 0.87, "grad_norm": 3.099041700363159, "learning_rate": 0.0002270280327462168, "loss": 0.3921, "step": 2473 }, { "epoch": 0.87, "grad_norm": 3.710519790649414, "learning_rate": 0.00022699082113619446, "loss": 0.3917, "step": 2474 }, { "epoch": 0.87, "grad_norm": 2.710888624191284, "learning_rate": 0.00022695360952617216, "loss": 0.1841, "step": 2475 }, { "epoch": 0.87, "grad_norm": 2.646345376968384, "learning_rate": 0.0002269163979161498, "loss": 0.9153, "step": 2476 }, { "epoch": 0.87, "grad_norm": 2.3178653717041016, "learning_rate": 0.0002268791863061275, "loss": 0.6073, "step": 2477 }, { "epoch": 0.87, "grad_norm": 32.63193130493164, "learning_rate": 0.0002268419746961052, "loss": 6.8604, "step": 2478 }, { "epoch": 0.87, "grad_norm": 2.098633289337158, "learning_rate": 0.00022680476308608284, "loss": 0.6981, "step": 2479 }, { "epoch": 0.87, "grad_norm": 1.7247930765151978, "learning_rate": 0.00022676755147606052, "loss": 0.3698, "step": 2480 }, { "epoch": 0.87, "grad_norm": 3.1738440990448, "learning_rate": 0.00022673033986603816, "loss": 0.5705, "step": 2481 }, { "epoch": 0.87, "grad_norm": 3.0334391593933105, "learning_rate": 0.00022669312825601587, "loss": 0.3283, "step": 2482 }, { "epoch": 0.87, "grad_norm": 1.9234660863876343, "learning_rate": 0.00022665591664599354, "loss": 0.633, "step": 2483 }, { "epoch": 0.87, "grad_norm": 2.026090621948242, "learning_rate": 0.0002266187050359712, "loss": 0.3883, "step": 2484 }, { "epoch": 0.87, "grad_norm": 3.060999631881714, "learning_rate": 0.00022658149342594887, "loss": 0.6174, "step": 2485 }, { "epoch": 0.87, "grad_norm": 3.9415786266326904, "learning_rate": 0.00022654428181592657, "loss": 0.7415, "step": 2486 }, { "epoch": 0.87, "grad_norm": 3.4952282905578613, "learning_rate": 0.00022650707020590422, "loss": 0.7448, "step": 2487 }, { "epoch": 0.87, "grad_norm": 4.7627339363098145, "learning_rate": 0.0002264698585958819, "loss": 0.86, "step": 2488 }, { "epoch": 0.87, "grad_norm": 2.7875306606292725, "learning_rate": 0.00022643264698585954, "loss": 0.6431, "step": 2489 }, { "epoch": 0.87, "grad_norm": 4.5709052085876465, "learning_rate": 0.00022639543537583725, "loss": 0.4823, "step": 2490 }, { "epoch": 0.87, "grad_norm": 4.550630569458008, "learning_rate": 0.00022635822376581492, "loss": 0.951, "step": 2491 }, { "epoch": 0.87, "grad_norm": 2.399900436401367, "learning_rate": 0.00022632101215579257, "loss": 0.3805, "step": 2492 }, { "epoch": 0.87, "grad_norm": 2.55071759223938, "learning_rate": 0.00022628380054577027, "loss": 0.3896, "step": 2493 }, { "epoch": 0.87, "grad_norm": 3.296781063079834, "learning_rate": 0.00022624658893574792, "loss": 0.3268, "step": 2494 }, { "epoch": 0.87, "grad_norm": 3.0526139736175537, "learning_rate": 0.0002262093773257256, "loss": 0.6264, "step": 2495 }, { "epoch": 0.87, "grad_norm": 4.58652400970459, "learning_rate": 0.0002261721657157033, "loss": 0.5402, "step": 2496 }, { "epoch": 0.87, "grad_norm": 6.031370162963867, "learning_rate": 0.00022613495410568095, "loss": 1.0957, "step": 2497 }, { "epoch": 0.88, "grad_norm": 3.1321797370910645, "learning_rate": 0.00022609774249565863, "loss": 0.4783, "step": 2498 }, { "epoch": 0.88, "grad_norm": 3.8745222091674805, "learning_rate": 0.00022606053088563633, "loss": 0.3831, "step": 2499 }, { "epoch": 0.88, "grad_norm": 3.6690893173217773, "learning_rate": 0.00022602331927561398, "loss": 0.6712, "step": 2500 }, { "epoch": 0.88, "eval_loss": 0.5969449877738953, "eval_runtime": 50.5023, "eval_samples_per_second": 42.929, "eval_steps_per_second": 10.732, "eval_wer": 0.5697111226431413, "step": 2500 }, { "epoch": 0.88, "grad_norm": 3.2524619102478027, "learning_rate": 0.00022598610766559165, "loss": 1.1098, "step": 2501 }, { "epoch": 0.88, "grad_norm": 3.3563032150268555, "learning_rate": 0.0002259488960555693, "loss": 0.8505, "step": 2502 }, { "epoch": 0.88, "grad_norm": 1.617898941040039, "learning_rate": 0.000225911684445547, "loss": 0.5266, "step": 2503 }, { "epoch": 0.88, "grad_norm": 1.1156997680664062, "learning_rate": 0.00022587447283552468, "loss": 0.285, "step": 2504 }, { "epoch": 0.88, "grad_norm": 2.1009535789489746, "learning_rate": 0.00022583726122550233, "loss": 0.4942, "step": 2505 }, { "epoch": 0.88, "grad_norm": 2.473078966140747, "learning_rate": 0.00022580004961548, "loss": 0.5901, "step": 2506 }, { "epoch": 0.88, "grad_norm": 2.716780662536621, "learning_rate": 0.00022576283800545768, "loss": 0.834, "step": 2507 }, { "epoch": 0.88, "grad_norm": 2.1853911876678467, "learning_rate": 0.00022572562639543536, "loss": 0.7706, "step": 2508 }, { "epoch": 0.88, "grad_norm": 4.181541919708252, "learning_rate": 0.00022568841478541303, "loss": 0.9358, "step": 2509 }, { "epoch": 0.88, "grad_norm": 3.0345635414123535, "learning_rate": 0.00022565120317539068, "loss": 0.6332, "step": 2510 }, { "epoch": 0.88, "grad_norm": 2.495234966278076, "learning_rate": 0.00022561399156536838, "loss": 0.5035, "step": 2511 }, { "epoch": 0.88, "grad_norm": 2.819892168045044, "learning_rate": 0.00022557677995534603, "loss": 0.4648, "step": 2512 }, { "epoch": 0.88, "grad_norm": 2.7033729553222656, "learning_rate": 0.0002255395683453237, "loss": 0.5328, "step": 2513 }, { "epoch": 0.88, "grad_norm": 1.782414197921753, "learning_rate": 0.0002255023567353014, "loss": 0.5677, "step": 2514 }, { "epoch": 0.88, "grad_norm": 2.6469204425811768, "learning_rate": 0.00022546514512527906, "loss": 0.6008, "step": 2515 }, { "epoch": 0.88, "grad_norm": 3.3379626274108887, "learning_rate": 0.00022542793351525674, "loss": 0.621, "step": 2516 }, { "epoch": 0.88, "grad_norm": 3.2679710388183594, "learning_rate": 0.00022539072190523444, "loss": 0.5374, "step": 2517 }, { "epoch": 0.88, "grad_norm": 1.7875244617462158, "learning_rate": 0.0002253535102952121, "loss": 0.2722, "step": 2518 }, { "epoch": 0.88, "grad_norm": 1.986699104309082, "learning_rate": 0.00022531629868518976, "loss": 0.4861, "step": 2519 }, { "epoch": 0.88, "grad_norm": 4.055794715881348, "learning_rate": 0.0002252790870751674, "loss": 0.8421, "step": 2520 }, { "epoch": 0.88, "grad_norm": 3.854576587677002, "learning_rate": 0.00022524187546514511, "loss": 0.2723, "step": 2521 }, { "epoch": 0.88, "grad_norm": 4.405861854553223, "learning_rate": 0.0002252046638551228, "loss": 0.6845, "step": 2522 }, { "epoch": 0.88, "grad_norm": 7.5870161056518555, "learning_rate": 0.00022516745224510044, "loss": 2.0838, "step": 2523 }, { "epoch": 0.88, "grad_norm": 2.9509167671203613, "learning_rate": 0.00022513024063507814, "loss": 0.2952, "step": 2524 }, { "epoch": 0.88, "grad_norm": 2.6641058921813965, "learning_rate": 0.0002250930290250558, "loss": 0.4337, "step": 2525 }, { "epoch": 0.89, "grad_norm": 5.685914993286133, "learning_rate": 0.00022505581741503347, "loss": 1.4865, "step": 2526 }, { "epoch": 0.89, "grad_norm": 2.8381335735321045, "learning_rate": 0.00022501860580501114, "loss": 0.8048, "step": 2527 }, { "epoch": 0.89, "grad_norm": 2.5004918575286865, "learning_rate": 0.00022498139419498882, "loss": 0.8519, "step": 2528 }, { "epoch": 0.89, "grad_norm": 2.1345882415771484, "learning_rate": 0.0002249441825849665, "loss": 0.6214, "step": 2529 }, { "epoch": 0.89, "grad_norm": 3.871791362762451, "learning_rate": 0.00022490697097494417, "loss": 1.7872, "step": 2530 }, { "epoch": 0.89, "grad_norm": 2.4108870029449463, "learning_rate": 0.00022486975936492182, "loss": 0.4127, "step": 2531 }, { "epoch": 0.89, "grad_norm": 2.154738426208496, "learning_rate": 0.00022483254775489952, "loss": 0.3094, "step": 2532 }, { "epoch": 0.89, "grad_norm": 1.8952206373214722, "learning_rate": 0.00022479533614487717, "loss": 0.4161, "step": 2533 }, { "epoch": 0.89, "grad_norm": 2.7930335998535156, "learning_rate": 0.00022475812453485485, "loss": 0.7349, "step": 2534 }, { "epoch": 0.89, "grad_norm": 2.5914433002471924, "learning_rate": 0.00022472091292483255, "loss": 0.6303, "step": 2535 }, { "epoch": 0.89, "grad_norm": 2.9063329696655273, "learning_rate": 0.0002246837013148102, "loss": 0.7971, "step": 2536 }, { "epoch": 0.89, "grad_norm": 2.814877986907959, "learning_rate": 0.00022464648970478787, "loss": 0.6404, "step": 2537 }, { "epoch": 0.89, "grad_norm": 2.2921807765960693, "learning_rate": 0.00022460927809476552, "loss": 0.6443, "step": 2538 }, { "epoch": 0.89, "grad_norm": 3.2125370502471924, "learning_rate": 0.00022457206648474322, "loss": 1.599, "step": 2539 }, { "epoch": 0.89, "grad_norm": 5.036359786987305, "learning_rate": 0.0002245348548747209, "loss": 0.8502, "step": 2540 }, { "epoch": 0.89, "grad_norm": 2.4251275062561035, "learning_rate": 0.00022449764326469855, "loss": 0.3993, "step": 2541 }, { "epoch": 0.89, "grad_norm": 3.6843395233154297, "learning_rate": 0.00022446043165467625, "loss": 1.2595, "step": 2542 }, { "epoch": 0.89, "grad_norm": 1.9409282207489014, "learning_rate": 0.00022442322004465393, "loss": 0.213, "step": 2543 }, { "epoch": 0.89, "grad_norm": 2.4829013347625732, "learning_rate": 0.00022438600843463158, "loss": 0.4155, "step": 2544 }, { "epoch": 0.89, "grad_norm": 12.528837203979492, "learning_rate": 0.00022434879682460928, "loss": 2.7458, "step": 2545 }, { "epoch": 0.89, "grad_norm": 3.5831103324890137, "learning_rate": 0.00022431158521458693, "loss": 0.9758, "step": 2546 }, { "epoch": 0.89, "grad_norm": 3.7440414428710938, "learning_rate": 0.0002242743736045646, "loss": 0.8341, "step": 2547 }, { "epoch": 0.89, "grad_norm": 3.1389410495758057, "learning_rate": 0.00022423716199454228, "loss": 0.3766, "step": 2548 }, { "epoch": 0.89, "grad_norm": 3.8604750633239746, "learning_rate": 0.00022419995038451996, "loss": 0.4142, "step": 2549 }, { "epoch": 0.89, "grad_norm": 5.646600246429443, "learning_rate": 0.00022416273877449763, "loss": 0.4988, "step": 2550 }, { "epoch": 0.89, "grad_norm": 2.2489912509918213, "learning_rate": 0.00022412552716447528, "loss": 1.1134, "step": 2551 }, { "epoch": 0.89, "grad_norm": 1.9654885530471802, "learning_rate": 0.00022408831555445296, "loss": 0.4167, "step": 2552 }, { "epoch": 0.89, "grad_norm": 2.2615177631378174, "learning_rate": 0.00022405110394443066, "loss": 0.6725, "step": 2553 }, { "epoch": 0.89, "grad_norm": 5.323215961456299, "learning_rate": 0.0002240138923344083, "loss": 0.9081, "step": 2554 }, { "epoch": 0.9, "grad_norm": 2.1853933334350586, "learning_rate": 0.00022397668072438598, "loss": 0.5613, "step": 2555 }, { "epoch": 0.9, "grad_norm": 2.7656657695770264, "learning_rate": 0.00022393946911436363, "loss": 0.4834, "step": 2556 }, { "epoch": 0.9, "grad_norm": 2.5364134311676025, "learning_rate": 0.00022390225750434133, "loss": 0.8214, "step": 2557 }, { "epoch": 0.9, "grad_norm": 3.2517011165618896, "learning_rate": 0.000223865045894319, "loss": 0.6773, "step": 2558 }, { "epoch": 0.9, "grad_norm": 6.1538166999816895, "learning_rate": 0.00022382783428429666, "loss": 2.0649, "step": 2559 }, { "epoch": 0.9, "grad_norm": 3.786186695098877, "learning_rate": 0.00022379062267427436, "loss": 0.6274, "step": 2560 }, { "epoch": 0.9, "grad_norm": 2.8489811420440674, "learning_rate": 0.00022375341106425204, "loss": 0.5626, "step": 2561 }, { "epoch": 0.9, "grad_norm": 14.115158081054688, "learning_rate": 0.0002237161994542297, "loss": 3.3753, "step": 2562 }, { "epoch": 0.9, "grad_norm": 2.628080129623413, "learning_rate": 0.0002236789878442074, "loss": 0.333, "step": 2563 }, { "epoch": 0.9, "grad_norm": 2.309471607208252, "learning_rate": 0.00022364177623418504, "loss": 0.3252, "step": 2564 }, { "epoch": 0.9, "grad_norm": 4.064052104949951, "learning_rate": 0.00022360456462416271, "loss": 0.7157, "step": 2565 }, { "epoch": 0.9, "grad_norm": 1.8947696685791016, "learning_rate": 0.00022356735301414042, "loss": 0.3062, "step": 2566 }, { "epoch": 0.9, "grad_norm": 3.9529271125793457, "learning_rate": 0.00022353014140411807, "loss": 0.3285, "step": 2567 }, { "epoch": 0.9, "grad_norm": 2.573422431945801, "learning_rate": 0.00022349292979409574, "loss": 0.462, "step": 2568 }, { "epoch": 0.9, "grad_norm": 3.323622703552246, "learning_rate": 0.0002234557181840734, "loss": 0.4158, "step": 2569 }, { "epoch": 0.9, "grad_norm": 3.2690248489379883, "learning_rate": 0.0002234185065740511, "loss": 1.531, "step": 2570 }, { "epoch": 0.9, "grad_norm": 1.335678219795227, "learning_rate": 0.00022338129496402877, "loss": 0.1792, "step": 2571 }, { "epoch": 0.9, "grad_norm": 3.555920362472534, "learning_rate": 0.00022334408335400642, "loss": 0.3438, "step": 2572 }, { "epoch": 0.9, "grad_norm": 1.97938072681427, "learning_rate": 0.0002233068717439841, "loss": 0.1992, "step": 2573 }, { "epoch": 0.9, "grad_norm": 4.451127529144287, "learning_rate": 0.0002232696601339618, "loss": 0.9753, "step": 2574 }, { "epoch": 0.9, "grad_norm": 3.8145601749420166, "learning_rate": 0.00022323244852393944, "loss": 0.5944, "step": 2575 }, { "epoch": 0.9, "grad_norm": 3.062714099884033, "learning_rate": 0.00022319523691391712, "loss": 1.197, "step": 2576 }, { "epoch": 0.9, "grad_norm": 2.360086441040039, "learning_rate": 0.00022315802530389477, "loss": 0.4267, "step": 2577 }, { "epoch": 0.9, "grad_norm": 3.727750301361084, "learning_rate": 0.00022312081369387247, "loss": 1.1279, "step": 2578 }, { "epoch": 0.9, "grad_norm": 1.5544394254684448, "learning_rate": 0.00022308360208385015, "loss": 0.5312, "step": 2579 }, { "epoch": 0.9, "grad_norm": 2.51163387298584, "learning_rate": 0.0002230463904738278, "loss": 0.3286, "step": 2580 }, { "epoch": 0.9, "grad_norm": 1.9520326852798462, "learning_rate": 0.0002230091788638055, "loss": 0.6118, "step": 2581 }, { "epoch": 0.9, "grad_norm": 1.6972302198410034, "learning_rate": 0.00022297196725378315, "loss": 0.289, "step": 2582 }, { "epoch": 0.91, "grad_norm": 2.5757126808166504, "learning_rate": 0.00022293475564376082, "loss": 0.4856, "step": 2583 }, { "epoch": 0.91, "grad_norm": 2.543616533279419, "learning_rate": 0.00022289754403373853, "loss": 0.5646, "step": 2584 }, { "epoch": 0.91, "grad_norm": 3.380565643310547, "learning_rate": 0.00022286033242371618, "loss": 0.5098, "step": 2585 }, { "epoch": 0.91, "grad_norm": 1.8600701093673706, "learning_rate": 0.00022282312081369385, "loss": 0.4146, "step": 2586 }, { "epoch": 0.91, "grad_norm": 1.9469465017318726, "learning_rate": 0.00022278590920367155, "loss": 0.3405, "step": 2587 }, { "epoch": 0.91, "grad_norm": 6.387368679046631, "learning_rate": 0.0002227486975936492, "loss": 2.7503, "step": 2588 }, { "epoch": 0.91, "grad_norm": 2.1771650314331055, "learning_rate": 0.00022271148598362688, "loss": 0.2495, "step": 2589 }, { "epoch": 0.91, "grad_norm": 3.2582592964172363, "learning_rate": 0.00022267427437360453, "loss": 0.7721, "step": 2590 }, { "epoch": 0.91, "grad_norm": 4.26287317276001, "learning_rate": 0.00022263706276358223, "loss": 0.3901, "step": 2591 }, { "epoch": 0.91, "grad_norm": 4.030311584472656, "learning_rate": 0.0002225998511535599, "loss": 1.1871, "step": 2592 }, { "epoch": 0.91, "grad_norm": 3.2739689350128174, "learning_rate": 0.00022256263954353756, "loss": 0.6562, "step": 2593 }, { "epoch": 0.91, "grad_norm": 2.621596574783325, "learning_rate": 0.00022252542793351523, "loss": 0.3165, "step": 2594 }, { "epoch": 0.91, "grad_norm": 9.08333683013916, "learning_rate": 0.0002224882163234929, "loss": 2.6412, "step": 2595 }, { "epoch": 0.91, "grad_norm": 2.8135602474212646, "learning_rate": 0.00022245100471347058, "loss": 0.5285, "step": 2596 }, { "epoch": 0.91, "grad_norm": 2.7928903102874756, "learning_rate": 0.00022241379310344826, "loss": 0.3371, "step": 2597 }, { "epoch": 0.91, "grad_norm": 1.8602352142333984, "learning_rate": 0.0002223765814934259, "loss": 0.2245, "step": 2598 }, { "epoch": 0.91, "grad_norm": 3.1992337703704834, "learning_rate": 0.0002223393698834036, "loss": 0.5035, "step": 2599 }, { "epoch": 0.91, "grad_norm": 7.390182971954346, "learning_rate": 0.00022230215827338126, "loss": 0.8551, "step": 2600 }, { "epoch": 0.91, "eval_loss": 0.6041048169136047, "eval_runtime": 51.0519, "eval_samples_per_second": 42.467, "eval_steps_per_second": 10.617, "eval_wer": 0.5422072305829442, "step": 2600 }, { "epoch": 0.91, "grad_norm": 2.8501031398773193, "learning_rate": 0.00022226494666335893, "loss": 0.9814, "step": 2601 }, { "epoch": 0.91, "grad_norm": 3.3961238861083984, "learning_rate": 0.00022222773505333664, "loss": 1.0017, "step": 2602 }, { "epoch": 0.91, "grad_norm": 3.359489917755127, "learning_rate": 0.00022219052344331429, "loss": 0.5428, "step": 2603 }, { "epoch": 0.91, "grad_norm": 3.420226573944092, "learning_rate": 0.00022215331183329196, "loss": 0.854, "step": 2604 }, { "epoch": 0.91, "grad_norm": 1.7213860750198364, "learning_rate": 0.00022211610022326966, "loss": 0.2745, "step": 2605 }, { "epoch": 0.91, "grad_norm": 3.067789077758789, "learning_rate": 0.0002220788886132473, "loss": 0.5725, "step": 2606 }, { "epoch": 0.91, "grad_norm": 2.9434151649475098, "learning_rate": 0.000222041677003225, "loss": 0.4392, "step": 2607 }, { "epoch": 0.91, "grad_norm": 3.986076593399048, "learning_rate": 0.00022200446539320264, "loss": 0.5028, "step": 2608 }, { "epoch": 0.91, "grad_norm": 3.050105094909668, "learning_rate": 0.00022196725378318034, "loss": 0.506, "step": 2609 }, { "epoch": 0.91, "grad_norm": 8.282485961914062, "learning_rate": 0.00022193004217315802, "loss": 2.1749, "step": 2610 }, { "epoch": 0.91, "grad_norm": 4.559239864349365, "learning_rate": 0.00022189283056313567, "loss": 0.4975, "step": 2611 }, { "epoch": 0.92, "grad_norm": 2.908665895462036, "learning_rate": 0.00022185561895311337, "loss": 0.3902, "step": 2612 }, { "epoch": 0.92, "grad_norm": 2.392807722091675, "learning_rate": 0.00022181840734309102, "loss": 0.2691, "step": 2613 }, { "epoch": 0.92, "grad_norm": 3.977257251739502, "learning_rate": 0.0002217811957330687, "loss": 0.5115, "step": 2614 }, { "epoch": 0.92, "grad_norm": 2.195502758026123, "learning_rate": 0.00022174398412304637, "loss": 0.3884, "step": 2615 }, { "epoch": 0.92, "grad_norm": 3.966074228286743, "learning_rate": 0.00022170677251302404, "loss": 0.7567, "step": 2616 }, { "epoch": 0.92, "grad_norm": 1.6738837957382202, "learning_rate": 0.00022166956090300172, "loss": 0.238, "step": 2617 }, { "epoch": 0.92, "grad_norm": 1.954295039176941, "learning_rate": 0.0002216323492929794, "loss": 0.4697, "step": 2618 }, { "epoch": 0.92, "grad_norm": 2.3093600273132324, "learning_rate": 0.00022159513768295704, "loss": 0.2654, "step": 2619 }, { "epoch": 0.92, "grad_norm": 9.452103614807129, "learning_rate": 0.00022155792607293475, "loss": 0.5994, "step": 2620 }, { "epoch": 0.92, "grad_norm": 2.7643561363220215, "learning_rate": 0.0002215207144629124, "loss": 0.2038, "step": 2621 }, { "epoch": 0.92, "grad_norm": 4.625439167022705, "learning_rate": 0.00022148350285289007, "loss": 0.6295, "step": 2622 }, { "epoch": 0.92, "grad_norm": 6.1190996170043945, "learning_rate": 0.00022144629124286777, "loss": 0.4905, "step": 2623 }, { "epoch": 0.92, "grad_norm": 4.208706378936768, "learning_rate": 0.00022140907963284542, "loss": 1.0413, "step": 2624 }, { "epoch": 0.92, "grad_norm": 6.129053592681885, "learning_rate": 0.0002213718680228231, "loss": 0.58, "step": 2625 }, { "epoch": 0.92, "grad_norm": 2.2219321727752686, "learning_rate": 0.00022133465641280075, "loss": 1.1687, "step": 2626 }, { "epoch": 0.92, "grad_norm": 2.6231391429901123, "learning_rate": 0.00022129744480277845, "loss": 0.6352, "step": 2627 }, { "epoch": 0.92, "grad_norm": 1.8433243036270142, "learning_rate": 0.00022126023319275613, "loss": 0.7305, "step": 2628 }, { "epoch": 0.92, "grad_norm": 2.111185073852539, "learning_rate": 0.00022122302158273378, "loss": 0.4803, "step": 2629 }, { "epoch": 0.92, "grad_norm": 2.3844072818756104, "learning_rate": 0.00022118580997271148, "loss": 0.5074, "step": 2630 }, { "epoch": 0.92, "grad_norm": 1.9091522693634033, "learning_rate": 0.00022114859836268915, "loss": 0.6794, "step": 2631 }, { "epoch": 0.92, "grad_norm": 2.5186028480529785, "learning_rate": 0.0002211113867526668, "loss": 0.6303, "step": 2632 }, { "epoch": 0.92, "grad_norm": 3.9880521297454834, "learning_rate": 0.0002210741751426445, "loss": 0.3124, "step": 2633 }, { "epoch": 0.92, "grad_norm": 1.2950042486190796, "learning_rate": 0.00022103696353262215, "loss": 0.1439, "step": 2634 }, { "epoch": 0.92, "grad_norm": 1.8072837591171265, "learning_rate": 0.00022099975192259983, "loss": 0.1895, "step": 2635 }, { "epoch": 0.92, "grad_norm": 2.449474334716797, "learning_rate": 0.0002209625403125775, "loss": 0.5064, "step": 2636 }, { "epoch": 0.92, "grad_norm": 2.273451805114746, "learning_rate": 0.00022092532870255518, "loss": 0.7899, "step": 2637 }, { "epoch": 0.92, "grad_norm": 3.689117193222046, "learning_rate": 0.00022088811709253286, "loss": 0.4124, "step": 2638 }, { "epoch": 0.92, "grad_norm": 3.5043230056762695, "learning_rate": 0.0002208509054825105, "loss": 0.6174, "step": 2639 }, { "epoch": 0.93, "grad_norm": 0.7596065998077393, "learning_rate": 0.0002208136938724882, "loss": 0.0876, "step": 2640 }, { "epoch": 0.93, "grad_norm": 2.0988588333129883, "learning_rate": 0.00022077648226246588, "loss": 0.486, "step": 2641 }, { "epoch": 0.93, "grad_norm": 3.2585132122039795, "learning_rate": 0.00022073927065244353, "loss": 0.7462, "step": 2642 }, { "epoch": 0.93, "grad_norm": 2.4397363662719727, "learning_rate": 0.0002207020590424212, "loss": 0.2855, "step": 2643 }, { "epoch": 0.93, "grad_norm": 3.3539910316467285, "learning_rate": 0.0002206648474323989, "loss": 0.6796, "step": 2644 }, { "epoch": 0.93, "grad_norm": 6.321644306182861, "learning_rate": 0.00022062763582237656, "loss": 1.5811, "step": 2645 }, { "epoch": 0.93, "grad_norm": 5.399683952331543, "learning_rate": 0.00022059042421235424, "loss": 0.7615, "step": 2646 }, { "epoch": 0.93, "grad_norm": 4.508324146270752, "learning_rate": 0.00022055321260233189, "loss": 1.0115, "step": 2647 }, { "epoch": 0.93, "grad_norm": 4.360750198364258, "learning_rate": 0.0002205160009923096, "loss": 0.6244, "step": 2648 }, { "epoch": 0.93, "grad_norm": 2.8480136394500732, "learning_rate": 0.00022047878938228726, "loss": 0.444, "step": 2649 }, { "epoch": 0.93, "grad_norm": NaN, "learning_rate": 0.00022047878938228726, "loss": 0.3347, "step": 2650 }, { "epoch": 0.93, "grad_norm": 2.590242385864258, "learning_rate": 0.0002204415777722649, "loss": 0.506, "step": 2651 }, { "epoch": 0.93, "grad_norm": 2.5283608436584473, "learning_rate": 0.00022040436616224262, "loss": 0.5284, "step": 2652 }, { "epoch": 0.93, "grad_norm": 3.57216477394104, "learning_rate": 0.00022036715455222026, "loss": 1.126, "step": 2653 }, { "epoch": 0.93, "grad_norm": 2.9254002571105957, "learning_rate": 0.00022032994294219794, "loss": 0.7635, "step": 2654 }, { "epoch": 0.93, "grad_norm": 5.162715911865234, "learning_rate": 0.00022029273133217564, "loss": 0.7601, "step": 2655 }, { "epoch": 0.93, "grad_norm": 3.1532511711120605, "learning_rate": 0.0002202555197221533, "loss": 0.941, "step": 2656 }, { "epoch": 0.93, "grad_norm": 2.203185796737671, "learning_rate": 0.00022021830811213097, "loss": 0.4708, "step": 2657 }, { "epoch": 0.93, "grad_norm": 2.62638521194458, "learning_rate": 0.00022018109650210862, "loss": 0.3295, "step": 2658 }, { "epoch": 0.93, "grad_norm": 4.998661994934082, "learning_rate": 0.00022014388489208632, "loss": 1.4867, "step": 2659 }, { "epoch": 0.93, "grad_norm": 3.389922857284546, "learning_rate": 0.000220106673282064, "loss": 1.2759, "step": 2660 }, { "epoch": 0.93, "grad_norm": 5.583156108856201, "learning_rate": 0.00022006946167204164, "loss": 0.4192, "step": 2661 }, { "epoch": 0.93, "grad_norm": 4.610195159912109, "learning_rate": 0.00022003225006201935, "loss": 0.5941, "step": 2662 }, { "epoch": 0.93, "grad_norm": 2.2611424922943115, "learning_rate": 0.00021999503845199702, "loss": 0.7521, "step": 2663 }, { "epoch": 0.93, "grad_norm": 2.2116403579711914, "learning_rate": 0.00021995782684197467, "loss": 0.4965, "step": 2664 }, { "epoch": 0.93, "grad_norm": 3.487800359725952, "learning_rate": 0.00021992061523195235, "loss": 0.7573, "step": 2665 }, { "epoch": 0.93, "grad_norm": 2.6182491779327393, "learning_rate": 0.00021988340362193002, "loss": 0.4018, "step": 2666 }, { "epoch": 0.93, "grad_norm": 2.2056775093078613, "learning_rate": 0.0002198461920119077, "loss": 0.3728, "step": 2667 }, { "epoch": 0.93, "grad_norm": 1.8955894708633423, "learning_rate": 0.00021980898040188537, "loss": 0.4454, "step": 2668 }, { "epoch": 0.94, "grad_norm": 3.290935754776001, "learning_rate": 0.00021977176879186302, "loss": 0.8278, "step": 2669 }, { "epoch": 0.94, "grad_norm": 5.875994682312012, "learning_rate": 0.00021973455718184073, "loss": 0.3937, "step": 2670 }, { "epoch": 0.94, "grad_norm": 7.943798065185547, "learning_rate": 0.00021969734557181837, "loss": 2.0068, "step": 2671 }, { "epoch": 0.94, "grad_norm": 5.384636878967285, "learning_rate": 0.00021966013396179605, "loss": 0.6497, "step": 2672 }, { "epoch": 0.94, "grad_norm": 5.006543159484863, "learning_rate": 0.00021962292235177375, "loss": 0.4773, "step": 2673 }, { "epoch": 0.94, "grad_norm": 2.17258358001709, "learning_rate": 0.0002195857107417514, "loss": 0.3188, "step": 2674 }, { "epoch": 0.94, "grad_norm": 4.57729434967041, "learning_rate": 0.00021954849913172908, "loss": 0.857, "step": 2675 }, { "epoch": 0.94, "grad_norm": 2.9307923316955566, "learning_rate": 0.00021951128752170678, "loss": 0.6661, "step": 2676 }, { "epoch": 0.94, "grad_norm": 2.026794910430908, "learning_rate": 0.00021947407591168443, "loss": 0.4217, "step": 2677 }, { "epoch": 0.94, "grad_norm": 2.6623916625976562, "learning_rate": 0.0002194368643016621, "loss": 0.5877, "step": 2678 }, { "epoch": 0.94, "grad_norm": 2.1093175411224365, "learning_rate": 0.00021939965269163975, "loss": 0.3837, "step": 2679 }, { "epoch": 0.94, "grad_norm": 1.958167552947998, "learning_rate": 0.00021936244108161746, "loss": 0.438, "step": 2680 }, { "epoch": 0.94, "grad_norm": 2.3107874393463135, "learning_rate": 0.00021932522947159513, "loss": 0.2563, "step": 2681 }, { "epoch": 0.94, "grad_norm": 2.5440564155578613, "learning_rate": 0.00021928801786157278, "loss": 0.5432, "step": 2682 }, { "epoch": 0.94, "grad_norm": 2.4444539546966553, "learning_rate": 0.00021925080625155048, "loss": 0.8262, "step": 2683 }, { "epoch": 0.94, "grad_norm": 3.0069375038146973, "learning_rate": 0.00021921359464152813, "loss": 0.8865, "step": 2684 }, { "epoch": 0.94, "grad_norm": 2.6746938228607178, "learning_rate": 0.0002191763830315058, "loss": 0.425, "step": 2685 }, { "epoch": 0.94, "grad_norm": 2.956552028656006, "learning_rate": 0.00021913917142148348, "loss": 0.6153, "step": 2686 }, { "epoch": 0.94, "grad_norm": 2.7592060565948486, "learning_rate": 0.00021910195981146116, "loss": 0.6633, "step": 2687 }, { "epoch": 0.94, "grad_norm": 4.972530841827393, "learning_rate": 0.00021906474820143884, "loss": 0.5743, "step": 2688 }, { "epoch": 0.94, "grad_norm": 2.044546365737915, "learning_rate": 0.0002190275365914165, "loss": 0.2586, "step": 2689 }, { "epoch": 0.94, "grad_norm": 2.6088757514953613, "learning_rate": 0.00021899032498139416, "loss": 0.2942, "step": 2690 }, { "epoch": 0.94, "grad_norm": 2.929062843322754, "learning_rate": 0.00021895311337137186, "loss": 0.4355, "step": 2691 }, { "epoch": 0.94, "grad_norm": 4.158072471618652, "learning_rate": 0.0002189159017613495, "loss": 0.597, "step": 2692 }, { "epoch": 0.94, "grad_norm": 2.822199821472168, "learning_rate": 0.0002188786901513272, "loss": 0.5923, "step": 2693 }, { "epoch": 0.94, "grad_norm": 2.6804251670837402, "learning_rate": 0.0002188414785413049, "loss": 0.3537, "step": 2694 }, { "epoch": 0.94, "grad_norm": 1.3298931121826172, "learning_rate": 0.00021880426693128254, "loss": 0.1007, "step": 2695 }, { "epoch": 0.94, "grad_norm": 1.8608430624008179, "learning_rate": 0.00021876705532126022, "loss": 0.1824, "step": 2696 }, { "epoch": 0.94, "grad_norm": 2.879000663757324, "learning_rate": 0.00021872984371123786, "loss": 0.3099, "step": 2697 }, { "epoch": 0.95, "grad_norm": 4.222035884857178, "learning_rate": 0.00021869263210121557, "loss": 0.6009, "step": 2698 }, { "epoch": 0.95, "grad_norm": 2.855086326599121, "learning_rate": 0.00021865542049119324, "loss": 0.6408, "step": 2699 }, { "epoch": 0.95, "grad_norm": 2.642244577407837, "learning_rate": 0.0002186182088811709, "loss": 0.2426, "step": 2700 }, { "epoch": 0.95, "eval_loss": 0.5336953401565552, "eval_runtime": 51.4996, "eval_samples_per_second": 42.097, "eval_steps_per_second": 10.524, "eval_wer": 0.48719944646254976, "step": 2700 }, { "epoch": 0.95, "grad_norm": 2.4252710342407227, "learning_rate": 0.0002185809972711486, "loss": 0.9991, "step": 2701 }, { "epoch": 0.95, "grad_norm": 2.518662452697754, "learning_rate": 0.00021854378566112624, "loss": 0.5021, "step": 2702 }, { "epoch": 0.95, "grad_norm": 2.2588770389556885, "learning_rate": 0.00021850657405110392, "loss": 0.6613, "step": 2703 }, { "epoch": 0.95, "grad_norm": 2.61147141456604, "learning_rate": 0.00021846936244108162, "loss": 0.6095, "step": 2704 }, { "epoch": 0.95, "grad_norm": 2.213031053543091, "learning_rate": 0.00021843215083105927, "loss": 0.3952, "step": 2705 }, { "epoch": 0.95, "grad_norm": 1.7817362546920776, "learning_rate": 0.00021839493922103695, "loss": 0.467, "step": 2706 }, { "epoch": 0.95, "grad_norm": 1.5945087671279907, "learning_rate": 0.00021835772761101462, "loss": 0.2962, "step": 2707 }, { "epoch": 0.95, "grad_norm": 3.3003673553466797, "learning_rate": 0.0002183205160009923, "loss": 0.5996, "step": 2708 }, { "epoch": 0.95, "grad_norm": 2.145582675933838, "learning_rate": 0.00021828330439096997, "loss": 0.4746, "step": 2709 }, { "epoch": 0.95, "grad_norm": 2.858013153076172, "learning_rate": 0.00021824609278094762, "loss": 0.7782, "step": 2710 }, { "epoch": 0.95, "grad_norm": 2.5435516834259033, "learning_rate": 0.0002182088811709253, "loss": 0.5275, "step": 2711 }, { "epoch": 0.95, "grad_norm": 2.836350202560425, "learning_rate": 0.000218171669560903, "loss": 0.4082, "step": 2712 }, { "epoch": 0.95, "grad_norm": 1.8937970399856567, "learning_rate": 0.00021813445795088065, "loss": 0.3822, "step": 2713 }, { "epoch": 0.95, "grad_norm": 2.3945305347442627, "learning_rate": 0.00021809724634085833, "loss": 0.371, "step": 2714 }, { "epoch": 0.95, "grad_norm": 1.8893818855285645, "learning_rate": 0.00021806003473083597, "loss": 0.2202, "step": 2715 }, { "epoch": 0.95, "grad_norm": 2.0246646404266357, "learning_rate": 0.00021802282312081368, "loss": 0.1932, "step": 2716 }, { "epoch": 0.95, "grad_norm": 1.8880715370178223, "learning_rate": 0.00021798561151079135, "loss": 0.2989, "step": 2717 }, { "epoch": 0.95, "grad_norm": 5.3225202560424805, "learning_rate": 0.000217948399900769, "loss": 1.991, "step": 2718 }, { "epoch": 0.95, "grad_norm": 5.143655776977539, "learning_rate": 0.0002179111882907467, "loss": 0.3331, "step": 2719 }, { "epoch": 0.95, "grad_norm": 2.7824084758758545, "learning_rate": 0.00021787397668072438, "loss": 0.3482, "step": 2720 }, { "epoch": 0.95, "grad_norm": 1.4806097745895386, "learning_rate": 0.00021783676507070203, "loss": 0.1659, "step": 2721 }, { "epoch": 0.95, "grad_norm": 4.375702381134033, "learning_rate": 0.00021779955346067973, "loss": 0.7355, "step": 2722 }, { "epoch": 0.95, "grad_norm": 3.235441207885742, "learning_rate": 0.00021776234185065738, "loss": 0.5138, "step": 2723 }, { "epoch": 0.95, "grad_norm": 5.325510501861572, "learning_rate": 0.00021772513024063506, "loss": 0.2818, "step": 2724 }, { "epoch": 0.95, "grad_norm": 2.0752792358398438, "learning_rate": 0.00021768791863061276, "loss": 0.1656, "step": 2725 }, { "epoch": 0.96, "grad_norm": 2.6207728385925293, "learning_rate": 0.0002176507070205904, "loss": 1.094, "step": 2726 }, { "epoch": 0.96, "grad_norm": 1.5188002586364746, "learning_rate": 0.00021761349541056808, "loss": 0.2545, "step": 2727 }, { "epoch": 0.96, "grad_norm": 2.4537734985351562, "learning_rate": 0.00021757628380054573, "loss": 0.476, "step": 2728 }, { "epoch": 0.96, "grad_norm": 1.6389610767364502, "learning_rate": 0.00021753907219052343, "loss": 0.3701, "step": 2729 }, { "epoch": 0.96, "grad_norm": 2.6082892417907715, "learning_rate": 0.0002175018605805011, "loss": 0.5414, "step": 2730 }, { "epoch": 0.96, "grad_norm": 3.334270477294922, "learning_rate": 0.00021746464897047876, "loss": 0.5533, "step": 2731 }, { "epoch": 0.96, "grad_norm": 5.05968713760376, "learning_rate": 0.00021742743736045644, "loss": 0.7159, "step": 2732 }, { "epoch": 0.96, "grad_norm": 3.399036407470703, "learning_rate": 0.00021739022575043414, "loss": 1.2499, "step": 2733 }, { "epoch": 0.96, "grad_norm": 1.3443745374679565, "learning_rate": 0.0002173530141404118, "loss": 0.176, "step": 2734 }, { "epoch": 0.96, "grad_norm": 2.077892303466797, "learning_rate": 0.00021731580253038946, "loss": 0.2954, "step": 2735 }, { "epoch": 0.96, "grad_norm": 2.6642885208129883, "learning_rate": 0.0002172785909203671, "loss": 0.2387, "step": 2736 }, { "epoch": 0.96, "grad_norm": 4.107677459716797, "learning_rate": 0.00021724137931034481, "loss": 0.6326, "step": 2737 }, { "epoch": 0.96, "grad_norm": 3.31645131111145, "learning_rate": 0.0002172041677003225, "loss": 0.4842, "step": 2738 }, { "epoch": 0.96, "grad_norm": 2.741530418395996, "learning_rate": 0.00021716695609030014, "loss": 0.3405, "step": 2739 }, { "epoch": 0.96, "grad_norm": 1.6625313758850098, "learning_rate": 0.00021712974448027784, "loss": 0.1492, "step": 2740 }, { "epoch": 0.96, "grad_norm": 4.23145866394043, "learning_rate": 0.0002170925328702555, "loss": 0.4991, "step": 2741 }, { "epoch": 0.96, "grad_norm": 3.0887067317962646, "learning_rate": 0.00021705532126023317, "loss": 0.3526, "step": 2742 }, { "epoch": 0.96, "grad_norm": 4.2543463706970215, "learning_rate": 0.00021701810965021087, "loss": 0.6271, "step": 2743 }, { "epoch": 0.96, "grad_norm": 3.850689172744751, "learning_rate": 0.00021698089804018852, "loss": 0.5781, "step": 2744 }, { "epoch": 0.96, "grad_norm": 3.8406736850738525, "learning_rate": 0.0002169436864301662, "loss": 0.5398, "step": 2745 }, { "epoch": 0.96, "grad_norm": 2.770142078399658, "learning_rate": 0.00021690647482014384, "loss": 0.1511, "step": 2746 }, { "epoch": 0.96, "grad_norm": 2.917293071746826, "learning_rate": 0.00021686926321012154, "loss": 1.3509, "step": 2747 }, { "epoch": 0.96, "grad_norm": 1.7973790168762207, "learning_rate": 0.00021683205160009922, "loss": 0.1045, "step": 2748 }, { "epoch": 0.96, "grad_norm": 3.61274790763855, "learning_rate": 0.00021679483999007687, "loss": 0.3338, "step": 2749 }, { "epoch": 0.96, "grad_norm": 3.060636043548584, "learning_rate": 0.00021675762838005457, "loss": 0.2876, "step": 2750 }, { "epoch": 0.96, "grad_norm": 3.3534083366394043, "learning_rate": 0.00021672041677003225, "loss": 1.3637, "step": 2751 }, { "epoch": 0.96, "grad_norm": 3.5700647830963135, "learning_rate": 0.0002166832051600099, "loss": 0.9309, "step": 2752 }, { "epoch": 0.96, "grad_norm": 1.867060661315918, "learning_rate": 0.00021664599354998757, "loss": 0.4095, "step": 2753 }, { "epoch": 0.96, "grad_norm": 2.7170629501342773, "learning_rate": 0.00021660878193996525, "loss": 0.7493, "step": 2754 }, { "epoch": 0.97, "grad_norm": 2.105668783187866, "learning_rate": 0.00021657157032994292, "loss": 0.6006, "step": 2755 }, { "epoch": 0.97, "grad_norm": 1.1052336692810059, "learning_rate": 0.0002165343587199206, "loss": 0.097, "step": 2756 }, { "epoch": 0.97, "grad_norm": 2.781369924545288, "learning_rate": 0.00021649714710989825, "loss": 0.5647, "step": 2757 }, { "epoch": 0.97, "grad_norm": 2.3035714626312256, "learning_rate": 0.00021645993549987595, "loss": 0.3785, "step": 2758 }, { "epoch": 0.97, "grad_norm": 1.3320902585983276, "learning_rate": 0.0002164227238898536, "loss": 0.2026, "step": 2759 }, { "epoch": 0.97, "grad_norm": 1.859253168106079, "learning_rate": 0.00021638551227983128, "loss": 0.2957, "step": 2760 }, { "epoch": 0.97, "grad_norm": 2.7369508743286133, "learning_rate": 0.00021634830066980898, "loss": 0.6835, "step": 2761 }, { "epoch": 0.97, "grad_norm": 2.1216232776641846, "learning_rate": 0.00021631108905978663, "loss": 0.2972, "step": 2762 }, { "epoch": 0.97, "grad_norm": 1.9394500255584717, "learning_rate": 0.0002162738774497643, "loss": 0.5818, "step": 2763 }, { "epoch": 0.97, "grad_norm": 1.9596755504608154, "learning_rate": 0.000216236665839742, "loss": 0.4004, "step": 2764 }, { "epoch": 0.97, "grad_norm": 4.52447509765625, "learning_rate": 0.00021619945422971966, "loss": 0.4622, "step": 2765 }, { "epoch": 0.97, "grad_norm": 2.721889019012451, "learning_rate": 0.00021616224261969733, "loss": 0.2771, "step": 2766 }, { "epoch": 0.97, "grad_norm": 2.8889129161834717, "learning_rate": 0.00021612503100967498, "loss": 0.6328, "step": 2767 }, { "epoch": 0.97, "grad_norm": 3.644382953643799, "learning_rate": 0.00021608781939965268, "loss": 0.5304, "step": 2768 }, { "epoch": 0.97, "grad_norm": 4.635915279388428, "learning_rate": 0.00021605060778963036, "loss": 0.6429, "step": 2769 }, { "epoch": 0.97, "grad_norm": 3.2131359577178955, "learning_rate": 0.000216013396179608, "loss": 1.1534, "step": 2770 }, { "epoch": 0.97, "grad_norm": 2.65970516204834, "learning_rate": 0.0002159761845695857, "loss": 0.3379, "step": 2771 }, { "epoch": 0.97, "grad_norm": 1.9941884279251099, "learning_rate": 0.00021593897295956336, "loss": 0.2206, "step": 2772 }, { "epoch": 0.97, "grad_norm": 3.2422287464141846, "learning_rate": 0.00021590176134954103, "loss": 0.2301, "step": 2773 }, { "epoch": 0.97, "grad_norm": 2.5298879146575928, "learning_rate": 0.0002158645497395187, "loss": 0.2831, "step": 2774 }, { "epoch": 0.97, "grad_norm": 4.899098873138428, "learning_rate": 0.00021582733812949639, "loss": 2.1931, "step": 2775 }, { "epoch": 0.97, "grad_norm": 2.338663101196289, "learning_rate": 0.00021579012651947406, "loss": 0.8356, "step": 2776 }, { "epoch": 0.97, "grad_norm": 2.9031519889831543, "learning_rate": 0.00021575291490945174, "loss": 1.0787, "step": 2777 }, { "epoch": 0.97, "grad_norm": 3.205125093460083, "learning_rate": 0.00021571570329942939, "loss": 0.9548, "step": 2778 }, { "epoch": 0.97, "grad_norm": 2.117971181869507, "learning_rate": 0.0002156784916894071, "loss": 0.4246, "step": 2779 }, { "epoch": 0.97, "grad_norm": 2.159374713897705, "learning_rate": 0.00021564128007938474, "loss": 0.3248, "step": 2780 }, { "epoch": 0.97, "grad_norm": 2.537348508834839, "learning_rate": 0.00021560406846936241, "loss": 0.7855, "step": 2781 }, { "epoch": 0.97, "grad_norm": 3.984494924545288, "learning_rate": 0.00021556685685934012, "loss": 0.6787, "step": 2782 }, { "epoch": 0.98, "grad_norm": 2.4059112071990967, "learning_rate": 0.00021552964524931777, "loss": 0.5901, "step": 2783 }, { "epoch": 0.98, "grad_norm": 2.266899824142456, "learning_rate": 0.00021549243363929544, "loss": 0.4769, "step": 2784 }, { "epoch": 0.98, "grad_norm": 2.490513563156128, "learning_rate": 0.0002154552220292731, "loss": 0.5872, "step": 2785 }, { "epoch": 0.98, "grad_norm": 2.9180526733398438, "learning_rate": 0.0002154180104192508, "loss": 0.3963, "step": 2786 }, { "epoch": 0.98, "grad_norm": 1.8597452640533447, "learning_rate": 0.00021538079880922847, "loss": 0.5766, "step": 2787 }, { "epoch": 0.98, "grad_norm": 1.8061572313308716, "learning_rate": 0.00021534358719920612, "loss": 0.4378, "step": 2788 }, { "epoch": 0.98, "grad_norm": 2.9399218559265137, "learning_rate": 0.00021530637558918382, "loss": 0.5846, "step": 2789 }, { "epoch": 0.98, "grad_norm": 2.21833872795105, "learning_rate": 0.00021526916397916147, "loss": 0.424, "step": 2790 }, { "epoch": 0.98, "grad_norm": 2.508014678955078, "learning_rate": 0.00021523195236913914, "loss": 0.3813, "step": 2791 }, { "epoch": 0.98, "grad_norm": 1.099500060081482, "learning_rate": 0.00021519474075911685, "loss": 0.2054, "step": 2792 }, { "epoch": 0.98, "grad_norm": 2.6345198154449463, "learning_rate": 0.0002151575291490945, "loss": 0.537, "step": 2793 }, { "epoch": 0.98, "grad_norm": 3.041452646255493, "learning_rate": 0.00021512031753907217, "loss": 0.5931, "step": 2794 }, { "epoch": 0.98, "grad_norm": 2.204749822616577, "learning_rate": 0.00021508310592904985, "loss": 0.418, "step": 2795 }, { "epoch": 0.98, "grad_norm": 3.116248369216919, "learning_rate": 0.00021504589431902752, "loss": 0.4851, "step": 2796 }, { "epoch": 0.98, "grad_norm": 4.110528469085693, "learning_rate": 0.0002150086827090052, "loss": 0.4382, "step": 2797 }, { "epoch": 0.98, "grad_norm": 3.8598759174346924, "learning_rate": 0.00021497147109898285, "loss": 0.542, "step": 2798 }, { "epoch": 0.98, "grad_norm": 3.3355400562286377, "learning_rate": 0.00021493425948896052, "loss": 0.3713, "step": 2799 }, { "epoch": 0.98, "grad_norm": 5.150740623474121, "learning_rate": 0.00021489704787893823, "loss": 2.0462, "step": 2800 }, { "epoch": 0.98, "eval_loss": 0.5526766180992126, "eval_runtime": 51.4541, "eval_samples_per_second": 42.135, "eval_steps_per_second": 10.534, "eval_wer": 0.49014011416709913, "step": 2800 }, { "epoch": 0.98, "grad_norm": 3.123936414718628, "learning_rate": 0.00021485983626891588, "loss": 0.9954, "step": 2801 }, { "epoch": 0.98, "grad_norm": 25.905872344970703, "learning_rate": 0.00021482262465889355, "loss": 5.7803, "step": 2802 }, { "epoch": 0.98, "grad_norm": 2.5377426147460938, "learning_rate": 0.0002147854130488712, "loss": 0.5039, "step": 2803 }, { "epoch": 0.98, "grad_norm": 1.8195780515670776, "learning_rate": 0.0002147482014388489, "loss": 0.8649, "step": 2804 }, { "epoch": 0.98, "grad_norm": 2.3947935104370117, "learning_rate": 0.00021471098982882658, "loss": 0.7395, "step": 2805 }, { "epoch": 0.98, "grad_norm": 2.462125062942505, "learning_rate": 0.00021467377821880423, "loss": 0.3764, "step": 2806 }, { "epoch": 0.98, "grad_norm": 3.7861380577087402, "learning_rate": 0.00021463656660878193, "loss": 1.1175, "step": 2807 }, { "epoch": 0.98, "grad_norm": 2.9747540950775146, "learning_rate": 0.0002145993549987596, "loss": 0.8438, "step": 2808 }, { "epoch": 0.98, "grad_norm": 2.355682611465454, "learning_rate": 0.00021456214338873725, "loss": 0.6275, "step": 2809 }, { "epoch": 0.98, "grad_norm": 1.986989140510559, "learning_rate": 0.00021452493177871496, "loss": 0.4578, "step": 2810 }, { "epoch": 0.98, "grad_norm": 3.2945938110351562, "learning_rate": 0.0002144877201686926, "loss": 0.4254, "step": 2811 }, { "epoch": 0.99, "grad_norm": 2.7034192085266113, "learning_rate": 0.00021445050855867028, "loss": 0.8129, "step": 2812 }, { "epoch": 0.99, "grad_norm": 3.6484246253967285, "learning_rate": 0.00021441329694864798, "loss": 0.5471, "step": 2813 }, { "epoch": 0.99, "grad_norm": 9.947596549987793, "learning_rate": 0.00021437608533862563, "loss": 2.901, "step": 2814 }, { "epoch": 0.99, "grad_norm": 2.3516814708709717, "learning_rate": 0.0002143388737286033, "loss": 0.3611, "step": 2815 }, { "epoch": 0.99, "grad_norm": 3.094783067703247, "learning_rate": 0.00021430166211858096, "loss": 0.4288, "step": 2816 }, { "epoch": 0.99, "grad_norm": 2.745293617248535, "learning_rate": 0.00021426445050855866, "loss": 0.4119, "step": 2817 }, { "epoch": 0.99, "grad_norm": 2.0576019287109375, "learning_rate": 0.00021422723889853634, "loss": 0.3339, "step": 2818 }, { "epoch": 0.99, "grad_norm": 1.2697802782058716, "learning_rate": 0.00021419002728851399, "loss": 0.1731, "step": 2819 }, { "epoch": 0.99, "grad_norm": 3.58626127243042, "learning_rate": 0.00021415281567849166, "loss": 0.4412, "step": 2820 }, { "epoch": 0.99, "grad_norm": 2.652714490890503, "learning_rate": 0.00021411560406846936, "loss": 0.2896, "step": 2821 }, { "epoch": 0.99, "grad_norm": 2.500732660293579, "learning_rate": 0.000214078392458447, "loss": 0.2454, "step": 2822 }, { "epoch": 0.99, "grad_norm": 1.2118359804153442, "learning_rate": 0.0002140411808484247, "loss": 0.0464, "step": 2823 }, { "epoch": 0.99, "grad_norm": 2.111560106277466, "learning_rate": 0.00021400396923840236, "loss": 0.3128, "step": 2824 }, { "epoch": 0.99, "grad_norm": 5.749283790588379, "learning_rate": 0.00021396675762838004, "loss": 0.9602, "step": 2825 }, { "epoch": 0.99, "grad_norm": 3.080014228820801, "learning_rate": 0.00021392954601835772, "loss": 0.9871, "step": 2826 }, { "epoch": 0.99, "grad_norm": 2.2783267498016357, "learning_rate": 0.00021389233440833536, "loss": 0.6355, "step": 2827 }, { "epoch": 0.99, "grad_norm": 1.5478532314300537, "learning_rate": 0.00021385512279831307, "loss": 0.3404, "step": 2828 }, { "epoch": 0.99, "grad_norm": 2.262725353240967, "learning_rate": 0.00021381791118829072, "loss": 0.3556, "step": 2829 }, { "epoch": 0.99, "grad_norm": 2.2624759674072266, "learning_rate": 0.0002137806995782684, "loss": 0.4647, "step": 2830 }, { "epoch": 0.99, "grad_norm": 2.1995623111724854, "learning_rate": 0.0002137434879682461, "loss": 0.4685, "step": 2831 }, { "epoch": 0.99, "grad_norm": 2.1879045963287354, "learning_rate": 0.00021370627635822374, "loss": 0.5635, "step": 2832 }, { "epoch": 0.99, "grad_norm": 1.52578604221344, "learning_rate": 0.00021366906474820142, "loss": 0.3173, "step": 2833 }, { "epoch": 0.99, "grad_norm": 3.537405490875244, "learning_rate": 0.00021363185313817907, "loss": 0.7481, "step": 2834 }, { "epoch": 0.99, "grad_norm": 1.3475868701934814, "learning_rate": 0.00021359464152815677, "loss": 0.2276, "step": 2835 }, { "epoch": 0.99, "grad_norm": 2.945054769515991, "learning_rate": 0.00021355742991813445, "loss": 0.7077, "step": 2836 }, { "epoch": 0.99, "grad_norm": 2.155667304992676, "learning_rate": 0.0002135202183081121, "loss": 0.4307, "step": 2837 }, { "epoch": 0.99, "grad_norm": 4.234096527099609, "learning_rate": 0.0002134830066980898, "loss": 0.3034, "step": 2838 }, { "epoch": 0.99, "grad_norm": 2.2338759899139404, "learning_rate": 0.00021344579508806747, "loss": 0.4335, "step": 2839 }, { "epoch": 1.0, "grad_norm": 4.541741371154785, "learning_rate": 0.00021340858347804512, "loss": 1.6635, "step": 2840 }, { "epoch": 1.0, "grad_norm": 5.528841495513916, "learning_rate": 0.0002133713718680228, "loss": 2.2904, "step": 2841 }, { "epoch": 1.0, "grad_norm": 1.8137770891189575, "learning_rate": 0.00021333416025800047, "loss": 0.2749, "step": 2842 }, { "epoch": 1.0, "grad_norm": 2.021759033203125, "learning_rate": 0.00021329694864797815, "loss": 0.2995, "step": 2843 }, { "epoch": 1.0, "grad_norm": 4.046627044677734, "learning_rate": 0.00021325973703795583, "loss": 0.5484, "step": 2844 }, { "epoch": 1.0, "grad_norm": 4.797898292541504, "learning_rate": 0.0002132225254279335, "loss": 0.5586, "step": 2845 }, { "epoch": 1.0, "grad_norm": 3.8933186531066895, "learning_rate": 0.00021318531381791118, "loss": 0.2478, "step": 2846 }, { "epoch": 1.0, "grad_norm": 2.601649045944214, "learning_rate": 0.00021314810220788883, "loss": 0.2398, "step": 2847 }, { "epoch": 1.0, "grad_norm": 2.6861727237701416, "learning_rate": 0.0002131108905978665, "loss": 0.3503, "step": 2848 }, { "epoch": 1.0, "grad_norm": 3.729224681854248, "learning_rate": 0.0002130736789878442, "loss": 0.5147, "step": 2849 }, { "epoch": 1.0, "grad_norm": 4.694298267364502, "learning_rate": 0.00021303646737782185, "loss": 1.8265, "step": 2850 }, { "epoch": 1.0, "grad_norm": 2.0727405548095703, "learning_rate": 0.00021299925576779953, "loss": 0.7055, "step": 2851 }, { "epoch": 1.0, "grad_norm": 2.8349599838256836, "learning_rate": 0.00021296204415777723, "loss": 0.373, "step": 2852 }, { "epoch": 1.0, "grad_norm": 1.7524197101593018, "learning_rate": 0.00021292483254775488, "loss": 0.3642, "step": 2853 }, { "epoch": 1.0, "grad_norm": 1.7273423671722412, "learning_rate": 0.00021288762093773256, "loss": 0.4562, "step": 2854 }, { "epoch": 1.0, "grad_norm": 1.9599108695983887, "learning_rate": 0.0002128504093277102, "loss": 0.8411, "step": 2855 }, { "epoch": 1.0, "grad_norm": 2.2834584712982178, "learning_rate": 0.0002128131977176879, "loss": 0.6458, "step": 2856 }, { "epoch": 1.0, "grad_norm": 2.6620326042175293, "learning_rate": 0.00021277598610766558, "loss": 0.7874, "step": 2857 }, { "epoch": 1.0, "grad_norm": 1.7331115007400513, "learning_rate": 0.00021273877449764323, "loss": 0.4673, "step": 2858 }, { "epoch": 1.0, "grad_norm": 1.6492310762405396, "learning_rate": 0.00021270156288762094, "loss": 0.4048, "step": 2859 }, { "epoch": 1.0, "grad_norm": 1.8001649379730225, "learning_rate": 0.00021266435127759858, "loss": 0.5527, "step": 2860 }, { "epoch": 1.0, "grad_norm": 3.7418673038482666, "learning_rate": 0.00021262713966757626, "loss": 0.8918, "step": 2861 }, { "epoch": 1.0, "grad_norm": 1.890509843826294, "learning_rate": 0.00021258992805755394, "loss": 0.4264, "step": 2862 }, { "epoch": 1.0, "grad_norm": 2.927704334259033, "learning_rate": 0.0002125527164475316, "loss": 0.7042, "step": 2863 }, { "epoch": 1.0, "grad_norm": 2.7126057147979736, "learning_rate": 0.0002125155048375093, "loss": 0.4692, "step": 2864 }, { "epoch": 1.0, "grad_norm": 1.3469655513763428, "learning_rate": 0.00021247829322748696, "loss": 0.3412, "step": 2865 }, { "epoch": 1.0, "grad_norm": 2.2996063232421875, "learning_rate": 0.00021244108161746464, "loss": 0.2926, "step": 2866 }, { "epoch": 1.0, "grad_norm": 1.5315076112747192, "learning_rate": 0.00021240387000744232, "loss": 0.1615, "step": 2867 }, { "epoch": 1.0, "grad_norm": 3.6261684894561768, "learning_rate": 0.00021236665839741996, "loss": 0.7396, "step": 2868 }, { "epoch": 1.01, "grad_norm": 1.5153400897979736, "learning_rate": 0.00021232944678739764, "loss": 0.2164, "step": 2869 }, { "epoch": 1.01, "grad_norm": 3.3273916244506836, "learning_rate": 0.00021229223517737534, "loss": 0.6213, "step": 2870 }, { "epoch": 1.01, "grad_norm": 4.1875104904174805, "learning_rate": 0.000212255023567353, "loss": 0.7009, "step": 2871 }, { "epoch": 1.01, "grad_norm": 1.51561439037323, "learning_rate": 0.00021221781195733067, "loss": 0.1144, "step": 2872 }, { "epoch": 1.01, "grad_norm": 2.509672164916992, "learning_rate": 0.00021218060034730832, "loss": 0.3553, "step": 2873 }, { "epoch": 1.01, "grad_norm": 1.6920043230056763, "learning_rate": 0.00021214338873728602, "loss": 0.1711, "step": 2874 }, { "epoch": 1.01, "grad_norm": 3.063408136367798, "learning_rate": 0.0002121061771272637, "loss": 0.195, "step": 2875 }, { "epoch": 1.01, "grad_norm": 4.922022819519043, "learning_rate": 0.00021206896551724134, "loss": 0.6325, "step": 2876 }, { "epoch": 1.01, "grad_norm": 3.254242181777954, "learning_rate": 0.00021203175390721905, "loss": 0.4139, "step": 2877 }, { "epoch": 1.01, "grad_norm": 1.8609694242477417, "learning_rate": 0.00021199454229719672, "loss": 0.3271, "step": 2878 }, { "epoch": 1.01, "grad_norm": 4.9110283851623535, "learning_rate": 0.00021195733068717437, "loss": 0.4979, "step": 2879 }, { "epoch": 1.01, "grad_norm": 2.849888324737549, "learning_rate": 0.00021192011907715207, "loss": 0.9667, "step": 2880 }, { "epoch": 1.01, "grad_norm": 3.0285425186157227, "learning_rate": 0.00021188290746712972, "loss": 0.6187, "step": 2881 }, { "epoch": 1.01, "grad_norm": 2.866227388381958, "learning_rate": 0.0002118456958571074, "loss": 0.5702, "step": 2882 }, { "epoch": 1.01, "grad_norm": 2.6997718811035156, "learning_rate": 0.00021180848424708507, "loss": 0.5016, "step": 2883 }, { "epoch": 1.01, "grad_norm": 1.718056559562683, "learning_rate": 0.00021177127263706275, "loss": 0.289, "step": 2884 }, { "epoch": 1.01, "grad_norm": 3.1275880336761475, "learning_rate": 0.00021173406102704043, "loss": 0.376, "step": 2885 }, { "epoch": 1.01, "grad_norm": 4.316718101501465, "learning_rate": 0.00021169684941701807, "loss": 0.8418, "step": 2886 }, { "epoch": 1.01, "grad_norm": 2.493377685546875, "learning_rate": 0.00021165963780699578, "loss": 0.2839, "step": 2887 }, { "epoch": 1.01, "grad_norm": 2.5852391719818115, "learning_rate": 0.00021162242619697345, "loss": 0.4074, "step": 2888 }, { "epoch": 1.01, "grad_norm": 2.4773120880126953, "learning_rate": 0.0002115852145869511, "loss": 0.4167, "step": 2889 }, { "epoch": 1.01, "grad_norm": 1.7587331533432007, "learning_rate": 0.00021154800297692878, "loss": 0.2211, "step": 2890 }, { "epoch": 1.01, "grad_norm": 2.402329444885254, "learning_rate": 0.00021151079136690645, "loss": 0.2386, "step": 2891 }, { "epoch": 1.01, "grad_norm": 2.4426639080047607, "learning_rate": 0.00021147357975688413, "loss": 0.5694, "step": 2892 }, { "epoch": 1.01, "grad_norm": 1.6130284070968628, "learning_rate": 0.0002114363681468618, "loss": 0.3316, "step": 2893 }, { "epoch": 1.01, "grad_norm": 2.9390599727630615, "learning_rate": 0.00021139915653683945, "loss": 0.2903, "step": 2894 }, { "epoch": 1.01, "grad_norm": 3.764599561691284, "learning_rate": 0.00021136194492681716, "loss": 0.5749, "step": 2895 }, { "epoch": 1.01, "grad_norm": 1.7966651916503906, "learning_rate": 0.00021132473331679483, "loss": 0.1905, "step": 2896 }, { "epoch": 1.02, "grad_norm": 2.91357684135437, "learning_rate": 0.00021128752170677248, "loss": 0.9233, "step": 2897 }, { "epoch": 1.02, "grad_norm": 2.6174521446228027, "learning_rate": 0.00021125031009675018, "loss": 0.3764, "step": 2898 }, { "epoch": 1.02, "grad_norm": 5.59943151473999, "learning_rate": 0.00021121309848672783, "loss": 0.5162, "step": 2899 }, { "epoch": 1.02, "grad_norm": 6.940462589263916, "learning_rate": 0.0002111758868767055, "loss": 0.2316, "step": 2900 }, { "epoch": 1.02, "eval_loss": 0.6327888369560242, "eval_runtime": 51.2836, "eval_samples_per_second": 42.275, "eval_steps_per_second": 10.569, "eval_wer": 0.5484345268984605, "step": 2900 }, { "epoch": 1.02, "grad_norm": 2.0894784927368164, "learning_rate": 0.0002111386752666832, "loss": 0.1064, "step": 2901 }, { "epoch": 1.02, "grad_norm": 3.177110195159912, "learning_rate": 0.00021110146365666086, "loss": 0.3558, "step": 2902 }, { "epoch": 1.02, "grad_norm": 4.781301021575928, "learning_rate": 0.00021106425204663854, "loss": 0.9566, "step": 2903 }, { "epoch": 1.02, "grad_norm": 1.9870142936706543, "learning_rate": 0.00021102704043661618, "loss": 0.3168, "step": 2904 }, { "epoch": 1.02, "grad_norm": 3.021113395690918, "learning_rate": 0.0002109898288265939, "loss": 1.3912, "step": 2905 }, { "epoch": 1.02, "grad_norm": 1.378859519958496, "learning_rate": 0.00021095261721657156, "loss": 0.6019, "step": 2906 }, { "epoch": 1.02, "grad_norm": 2.121788740158081, "learning_rate": 0.0002109154056065492, "loss": 0.6099, "step": 2907 }, { "epoch": 1.02, "grad_norm": 1.3554753065109253, "learning_rate": 0.00021087819399652691, "loss": 0.2695, "step": 2908 }, { "epoch": 1.02, "grad_norm": 2.836920738220215, "learning_rate": 0.0002108409823865046, "loss": 0.4332, "step": 2909 }, { "epoch": 1.02, "grad_norm": 2.007983922958374, "learning_rate": 0.00021080377077648224, "loss": 0.3614, "step": 2910 }, { "epoch": 1.02, "grad_norm": 2.3177947998046875, "learning_rate": 0.00021076655916645991, "loss": 0.4861, "step": 2911 }, { "epoch": 1.02, "grad_norm": 2.913270950317383, "learning_rate": 0.0002107293475564376, "loss": 0.5023, "step": 2912 }, { "epoch": 1.02, "grad_norm": 5.127606391906738, "learning_rate": 0.00021069213594641527, "loss": 0.8786, "step": 2913 }, { "epoch": 1.02, "grad_norm": 2.110982894897461, "learning_rate": 0.00021065492433639294, "loss": 0.4902, "step": 2914 }, { "epoch": 1.02, "grad_norm": 4.175901889801025, "learning_rate": 0.0002106177127263706, "loss": 0.4782, "step": 2915 }, { "epoch": 1.02, "grad_norm": 2.3658077716827393, "learning_rate": 0.0002105805011163483, "loss": 0.3401, "step": 2916 }, { "epoch": 1.02, "grad_norm": 3.592463970184326, "learning_rate": 0.00021054328950632594, "loss": 0.882, "step": 2917 }, { "epoch": 1.02, "grad_norm": 2.3057944774627686, "learning_rate": 0.00021050607789630362, "loss": 0.5945, "step": 2918 }, { "epoch": 1.02, "grad_norm": 3.1992056369781494, "learning_rate": 0.00021046886628628132, "loss": 0.7865, "step": 2919 }, { "epoch": 1.02, "grad_norm": 5.556008815765381, "learning_rate": 0.00021043165467625897, "loss": 0.8176, "step": 2920 }, { "epoch": 1.02, "grad_norm": 4.94827127456665, "learning_rate": 0.00021039444306623665, "loss": 0.3772, "step": 2921 }, { "epoch": 1.02, "grad_norm": 2.1931419372558594, "learning_rate": 0.00021035723145621435, "loss": 0.1395, "step": 2922 }, { "epoch": 1.02, "grad_norm": 2.6224441528320312, "learning_rate": 0.000210320019846192, "loss": 0.3006, "step": 2923 }, { "epoch": 1.02, "grad_norm": 14.6669282913208, "learning_rate": 0.00021028280823616967, "loss": 3.7282, "step": 2924 }, { "epoch": 1.02, "grad_norm": 2.3192381858825684, "learning_rate": 0.00021024559662614732, "loss": 0.4179, "step": 2925 }, { "epoch": 1.03, "grad_norm": 1.563372254371643, "learning_rate": 0.00021020838501612502, "loss": 0.1219, "step": 2926 }, { "epoch": 1.03, "grad_norm": 1.8706691265106201, "learning_rate": 0.0002101711734061027, "loss": 0.1685, "step": 2927 }, { "epoch": 1.03, "grad_norm": 2.756892204284668, "learning_rate": 0.00021013396179608035, "loss": 0.2088, "step": 2928 }, { "epoch": 1.03, "grad_norm": 4.2238030433654785, "learning_rate": 0.00021009675018605805, "loss": 1.3, "step": 2929 }, { "epoch": 1.03, "grad_norm": 4.2373576164245605, "learning_rate": 0.0002100595385760357, "loss": 0.8429, "step": 2930 }, { "epoch": 1.03, "grad_norm": 1.899154782295227, "learning_rate": 0.00021002232696601338, "loss": 0.524, "step": 2931 }, { "epoch": 1.03, "grad_norm": 2.224109411239624, "learning_rate": 0.00020998511535599105, "loss": 0.6571, "step": 2932 }, { "epoch": 1.03, "grad_norm": 3.5380167961120605, "learning_rate": 0.00020994790374596873, "loss": 0.998, "step": 2933 }, { "epoch": 1.03, "grad_norm": 1.9109605550765991, "learning_rate": 0.0002099106921359464, "loss": 0.2862, "step": 2934 }, { "epoch": 1.03, "grad_norm": 3.0752928256988525, "learning_rate": 0.00020987348052592405, "loss": 0.4658, "step": 2935 }, { "epoch": 1.03, "grad_norm": 2.45890212059021, "learning_rate": 0.00020983626891590173, "loss": 0.7092, "step": 2936 }, { "epoch": 1.03, "grad_norm": 3.076648235321045, "learning_rate": 0.00020979905730587943, "loss": 0.6868, "step": 2937 }, { "epoch": 1.03, "grad_norm": 2.497008800506592, "learning_rate": 0.00020976184569585708, "loss": 0.2683, "step": 2938 }, { "epoch": 1.03, "grad_norm": 1.9469985961914062, "learning_rate": 0.00020972463408583476, "loss": 0.2959, "step": 2939 }, { "epoch": 1.03, "grad_norm": 1.4684535264968872, "learning_rate": 0.00020968742247581246, "loss": 0.1444, "step": 2940 }, { "epoch": 1.03, "grad_norm": 2.822787046432495, "learning_rate": 0.0002096502108657901, "loss": 0.4749, "step": 2941 }, { "epoch": 1.03, "grad_norm": 2.6502792835235596, "learning_rate": 0.00020961299925576778, "loss": 0.5122, "step": 2942 }, { "epoch": 1.03, "grad_norm": 9.322734832763672, "learning_rate": 0.00020957578764574543, "loss": 2.2989, "step": 2943 }, { "epoch": 1.03, "grad_norm": 2.103727340698242, "learning_rate": 0.00020953857603572313, "loss": 0.444, "step": 2944 }, { "epoch": 1.03, "grad_norm": 3.0473310947418213, "learning_rate": 0.0002095013644257008, "loss": 0.4347, "step": 2945 }, { "epoch": 1.03, "grad_norm": 1.7083921432495117, "learning_rate": 0.00020946415281567846, "loss": 0.3226, "step": 2946 }, { "epoch": 1.03, "grad_norm": 3.072146415710449, "learning_rate": 0.00020942694120565616, "loss": 0.5612, "step": 2947 }, { "epoch": 1.03, "grad_norm": 2.4064691066741943, "learning_rate": 0.0002093897295956338, "loss": 0.1788, "step": 2948 }, { "epoch": 1.03, "grad_norm": 1.3841482400894165, "learning_rate": 0.00020935251798561149, "loss": 0.1171, "step": 2949 }, { "epoch": 1.03, "grad_norm": 2.551501989364624, "learning_rate": 0.0002093153063755892, "loss": 0.616, "step": 2950 }, { "epoch": 1.03, "grad_norm": 3.9594438076019287, "learning_rate": 0.00020927809476556684, "loss": 0.7705, "step": 2951 }, { "epoch": 1.03, "grad_norm": 4.557310104370117, "learning_rate": 0.00020924088315554451, "loss": 1.3497, "step": 2952 }, { "epoch": 1.03, "grad_norm": 2.093480348587036, "learning_rate": 0.0002092036715455222, "loss": 0.2775, "step": 2953 }, { "epoch": 1.04, "grad_norm": 3.45406174659729, "learning_rate": 0.00020916645993549987, "loss": 0.619, "step": 2954 }, { "epoch": 1.04, "grad_norm": 2.039518356323242, "learning_rate": 0.00020912924832547754, "loss": 0.9185, "step": 2955 }, { "epoch": 1.04, "grad_norm": 2.3017618656158447, "learning_rate": 0.0002090920367154552, "loss": 0.5944, "step": 2956 }, { "epoch": 1.04, "grad_norm": 1.7939972877502441, "learning_rate": 0.00020905482510543287, "loss": 0.5319, "step": 2957 }, { "epoch": 1.04, "grad_norm": 29.27108383178711, "learning_rate": 0.00020901761349541057, "loss": 5.4024, "step": 2958 }, { "epoch": 1.04, "grad_norm": 3.3019933700561523, "learning_rate": 0.00020898040188538822, "loss": 0.8981, "step": 2959 }, { "epoch": 1.04, "grad_norm": 2.4643125534057617, "learning_rate": 0.0002089431902753659, "loss": 0.8616, "step": 2960 }, { "epoch": 1.04, "grad_norm": 2.9230358600616455, "learning_rate": 0.00020890597866534354, "loss": 0.5856, "step": 2961 }, { "epoch": 1.04, "grad_norm": 1.3859621286392212, "learning_rate": 0.00020886876705532124, "loss": 0.3128, "step": 2962 }, { "epoch": 1.04, "grad_norm": 3.490842342376709, "learning_rate": 0.00020883155544529892, "loss": 0.4453, "step": 2963 }, { "epoch": 1.04, "grad_norm": 2.791914701461792, "learning_rate": 0.00020879434383527657, "loss": 0.6412, "step": 2964 }, { "epoch": 1.04, "grad_norm": 2.955834150314331, "learning_rate": 0.00020875713222525427, "loss": 0.3226, "step": 2965 }, { "epoch": 1.04, "grad_norm": 1.8695831298828125, "learning_rate": 0.00020871992061523195, "loss": 0.3433, "step": 2966 }, { "epoch": 1.04, "grad_norm": 2.185141086578369, "learning_rate": 0.0002086827090052096, "loss": 0.2187, "step": 2967 }, { "epoch": 1.04, "grad_norm": 2.4184865951538086, "learning_rate": 0.0002086454973951873, "loss": 0.2672, "step": 2968 }, { "epoch": 1.04, "grad_norm": 7.882993221282959, "learning_rate": 0.00020860828578516495, "loss": 0.2307, "step": 2969 }, { "epoch": 1.04, "grad_norm": 1.4877923727035522, "learning_rate": 0.00020857107417514262, "loss": 0.1831, "step": 2970 }, { "epoch": 1.04, "grad_norm": 1.8400189876556396, "learning_rate": 0.00020853386256512033, "loss": 0.3421, "step": 2971 }, { "epoch": 1.04, "grad_norm": 1.835120439529419, "learning_rate": 0.00020849665095509798, "loss": 0.2359, "step": 2972 }, { "epoch": 1.04, "grad_norm": 2.832714080810547, "learning_rate": 0.00020845943934507565, "loss": 0.4349, "step": 2973 }, { "epoch": 1.04, "grad_norm": 0.8223691582679749, "learning_rate": 0.0002084222277350533, "loss": 0.039, "step": 2974 }, { "epoch": 1.04, "grad_norm": 3.099144697189331, "learning_rate": 0.000208385016125031, "loss": 0.5629, "step": 2975 }, { "epoch": 1.04, "grad_norm": 2.728029251098633, "learning_rate": 0.00020834780451500868, "loss": 0.3675, "step": 2976 }, { "epoch": 1.04, "grad_norm": 2.1751224994659424, "learning_rate": 0.00020831059290498633, "loss": 0.1833, "step": 2977 }, { "epoch": 1.04, "grad_norm": 8.328917503356934, "learning_rate": 0.000208273381294964, "loss": 0.5198, "step": 2978 }, { "epoch": 1.04, "grad_norm": 6.681622505187988, "learning_rate": 0.00020823616968494168, "loss": 2.3852, "step": 2979 }, { "epoch": 1.04, "grad_norm": 3.279292583465576, "learning_rate": 0.00020819895807491935, "loss": 1.1333, "step": 2980 }, { "epoch": 1.04, "grad_norm": 1.996398687362671, "learning_rate": 0.00020816174646489703, "loss": 0.5301, "step": 2981 }, { "epoch": 1.04, "grad_norm": 1.8816949129104614, "learning_rate": 0.00020812453485487468, "loss": 0.4437, "step": 2982 }, { "epoch": 1.05, "grad_norm": 2.7721152305603027, "learning_rate": 0.00020808732324485238, "loss": 0.7248, "step": 2983 }, { "epoch": 1.05, "grad_norm": 1.3053263425827026, "learning_rate": 0.00020805011163483006, "loss": 0.1844, "step": 2984 }, { "epoch": 1.05, "grad_norm": 1.6006126403808594, "learning_rate": 0.0002080129000248077, "loss": 0.3389, "step": 2985 }, { "epoch": 1.05, "grad_norm": 6.084453582763672, "learning_rate": 0.0002079756884147854, "loss": 2.0407, "step": 2986 }, { "epoch": 1.05, "grad_norm": 1.4119192361831665, "learning_rate": 0.00020793847680476306, "loss": 0.2708, "step": 2987 }, { "epoch": 1.05, "grad_norm": 2.911585807800293, "learning_rate": 0.00020790126519474073, "loss": 0.3626, "step": 2988 }, { "epoch": 1.05, "grad_norm": 1.2379357814788818, "learning_rate": 0.00020786405358471844, "loss": 0.1421, "step": 2989 }, { "epoch": 1.05, "grad_norm": 1.679255485534668, "learning_rate": 0.00020782684197469609, "loss": 0.2169, "step": 2990 }, { "epoch": 1.05, "grad_norm": 1.825289011001587, "learning_rate": 0.00020778963036467376, "loss": 0.1638, "step": 2991 }, { "epoch": 1.05, "grad_norm": 2.66688871383667, "learning_rate": 0.0002077524187546514, "loss": 0.6995, "step": 2992 }, { "epoch": 1.05, "grad_norm": 3.020879030227661, "learning_rate": 0.0002077152071446291, "loss": 0.4142, "step": 2993 }, { "epoch": 1.05, "grad_norm": 2.3098480701446533, "learning_rate": 0.0002076779955346068, "loss": 0.371, "step": 2994 }, { "epoch": 1.05, "grad_norm": 4.051997661590576, "learning_rate": 0.00020764078392458444, "loss": 0.4462, "step": 2995 }, { "epoch": 1.05, "grad_norm": 2.9029579162597656, "learning_rate": 0.00020760357231456214, "loss": 0.3817, "step": 2996 }, { "epoch": 1.05, "grad_norm": 5.890556812286377, "learning_rate": 0.00020756636070453982, "loss": 1.261, "step": 2997 }, { "epoch": 1.05, "grad_norm": 2.089731216430664, "learning_rate": 0.00020752914909451746, "loss": 0.3077, "step": 2998 }, { "epoch": 1.05, "grad_norm": 4.995520114898682, "learning_rate": 0.00020749193748449514, "loss": 1.0239, "step": 2999 }, { "epoch": 1.05, "grad_norm": 4.084024429321289, "learning_rate": 0.00020745472587447282, "loss": 0.6881, "step": 3000 }, { "epoch": 1.05, "eval_loss": 0.6195849776268005, "eval_runtime": 50.8747, "eval_samples_per_second": 42.615, "eval_steps_per_second": 10.654, "eval_wer": 0.5003459609064176, "step": 3000 }, { "epoch": 1.05, "grad_norm": 7.645015239715576, "learning_rate": 0.0002074175142644505, "loss": 0.9345, "step": 3001 }, { "epoch": 1.05, "grad_norm": 3.707676887512207, "learning_rate": 0.00020738030265442817, "loss": 0.3963, "step": 3002 }, { "epoch": 1.05, "grad_norm": 2.314399003982544, "learning_rate": 0.00020734309104440582, "loss": 0.2629, "step": 3003 }, { "epoch": 1.05, "grad_norm": 5.601327896118164, "learning_rate": 0.00020730587943438352, "loss": 0.5629, "step": 3004 }, { "epoch": 1.05, "grad_norm": 3.5226032733917236, "learning_rate": 0.00020726866782436117, "loss": 1.5056, "step": 3005 }, { "epoch": 1.05, "grad_norm": 1.585764765739441, "learning_rate": 0.00020723145621433884, "loss": 0.4755, "step": 3006 }, { "epoch": 1.05, "grad_norm": 1.83793044090271, "learning_rate": 0.00020719424460431655, "loss": 0.5136, "step": 3007 }, { "epoch": 1.05, "grad_norm": 2.307407855987549, "learning_rate": 0.0002071570329942942, "loss": 0.5605, "step": 3008 }, { "epoch": 1.05, "grad_norm": 1.9383962154388428, "learning_rate": 0.00020711982138427187, "loss": 0.2642, "step": 3009 }, { "epoch": 1.05, "grad_norm": 5.103401184082031, "learning_rate": 0.00020708260977424957, "loss": 0.685, "step": 3010 }, { "epoch": 1.06, "grad_norm": 2.193871259689331, "learning_rate": 0.00020704539816422722, "loss": 0.363, "step": 3011 }, { "epoch": 1.06, "grad_norm": 2.350487232208252, "learning_rate": 0.0002070081865542049, "loss": 0.4608, "step": 3012 }, { "epoch": 1.06, "grad_norm": 1.3571010828018188, "learning_rate": 0.00020697097494418255, "loss": 0.1649, "step": 3013 }, { "epoch": 1.06, "grad_norm": 2.3821628093719482, "learning_rate": 0.00020693376333416025, "loss": 0.2691, "step": 3014 }, { "epoch": 1.06, "grad_norm": 1.663122296333313, "learning_rate": 0.00020689655172413793, "loss": 0.3526, "step": 3015 }, { "epoch": 1.06, "grad_norm": 3.3357670307159424, "learning_rate": 0.00020685934011411557, "loss": 0.3898, "step": 3016 }, { "epoch": 1.06, "grad_norm": 4.479585647583008, "learning_rate": 0.00020682212850409328, "loss": 0.2298, "step": 3017 }, { "epoch": 1.06, "grad_norm": 3.3683953285217285, "learning_rate": 0.00020678491689407093, "loss": 0.4032, "step": 3018 }, { "epoch": 1.06, "grad_norm": 12.573981285095215, "learning_rate": 0.0002067477052840486, "loss": 3.1628, "step": 3019 }, { "epoch": 1.06, "grad_norm": 2.957279682159424, "learning_rate": 0.00020671049367402628, "loss": 0.6773, "step": 3020 }, { "epoch": 1.06, "grad_norm": 2.875685453414917, "learning_rate": 0.00020667328206400395, "loss": 0.6258, "step": 3021 }, { "epoch": 1.06, "grad_norm": 4.08288049697876, "learning_rate": 0.00020663607045398163, "loss": 1.0724, "step": 3022 }, { "epoch": 1.06, "grad_norm": 0.861497163772583, "learning_rate": 0.00020659885884395928, "loss": 0.0636, "step": 3023 }, { "epoch": 1.06, "grad_norm": 3.3727078437805176, "learning_rate": 0.00020656164723393695, "loss": 0.5172, "step": 3024 }, { "epoch": 1.06, "grad_norm": 3.8272769451141357, "learning_rate": 0.00020652443562391466, "loss": 0.6818, "step": 3025 }, { "epoch": 1.06, "grad_norm": 0.6726584434509277, "learning_rate": 0.0002064872240138923, "loss": 0.0578, "step": 3026 }, { "epoch": 1.06, "grad_norm": 4.285344123840332, "learning_rate": 0.00020645001240386998, "loss": 0.7979, "step": 3027 }, { "epoch": 1.06, "grad_norm": 7.106195449829102, "learning_rate": 0.00020641280079384768, "loss": 3.3654, "step": 3028 }, { "epoch": 1.06, "grad_norm": 1.9665549993515015, "learning_rate": 0.00020637558918382533, "loss": 0.1718, "step": 3029 }, { "epoch": 1.06, "grad_norm": 1.6774803400039673, "learning_rate": 0.000206338377573803, "loss": 0.6851, "step": 3030 }, { "epoch": 1.06, "grad_norm": 2.010876417160034, "learning_rate": 0.00020630116596378066, "loss": 0.4386, "step": 3031 }, { "epoch": 1.06, "grad_norm": 1.4450249671936035, "learning_rate": 0.00020626395435375836, "loss": 0.3772, "step": 3032 }, { "epoch": 1.06, "grad_norm": 2.3104891777038574, "learning_rate": 0.00020622674274373604, "loss": 0.3893, "step": 3033 }, { "epoch": 1.06, "grad_norm": 1.3111859560012817, "learning_rate": 0.00020618953113371368, "loss": 0.2572, "step": 3034 }, { "epoch": 1.06, "grad_norm": 2.2951879501342773, "learning_rate": 0.0002061523195236914, "loss": 0.3824, "step": 3035 }, { "epoch": 1.06, "grad_norm": 2.6300010681152344, "learning_rate": 0.00020611510791366904, "loss": 0.5777, "step": 3036 }, { "epoch": 1.06, "grad_norm": 3.6638519763946533, "learning_rate": 0.0002060778963036467, "loss": 1.1108, "step": 3037 }, { "epoch": 1.06, "grad_norm": 2.498910427093506, "learning_rate": 0.00020604068469362442, "loss": 0.3768, "step": 3038 }, { "epoch": 1.06, "grad_norm": 2.4238033294677734, "learning_rate": 0.00020600347308360206, "loss": 0.5088, "step": 3039 }, { "epoch": 1.07, "grad_norm": 1.8531930446624756, "learning_rate": 0.00020596626147357974, "loss": 0.2276, "step": 3040 }, { "epoch": 1.07, "grad_norm": 2.1931774616241455, "learning_rate": 0.00020592904986355742, "loss": 0.3416, "step": 3041 }, { "epoch": 1.07, "grad_norm": 1.7940934896469116, "learning_rate": 0.0002058918382535351, "loss": 0.2504, "step": 3042 }, { "epoch": 1.07, "grad_norm": 1.3244794607162476, "learning_rate": 0.00020585462664351277, "loss": 0.1899, "step": 3043 }, { "epoch": 1.07, "grad_norm": 2.233159303665161, "learning_rate": 0.00020581741503349042, "loss": 0.4307, "step": 3044 }, { "epoch": 1.07, "grad_norm": 4.452759265899658, "learning_rate": 0.0002057802034234681, "loss": 1.5291, "step": 3045 }, { "epoch": 1.07, "grad_norm": 6.447901248931885, "learning_rate": 0.0002057429918134458, "loss": 0.3252, "step": 3046 }, { "epoch": 1.07, "grad_norm": 6.9542765617370605, "learning_rate": 0.00020570578020342344, "loss": 0.2644, "step": 3047 }, { "epoch": 1.07, "grad_norm": 3.4594507217407227, "learning_rate": 0.00020566856859340112, "loss": 0.6206, "step": 3048 }, { "epoch": 1.07, "grad_norm": 4.8815388679504395, "learning_rate": 0.0002056313569833788, "loss": 1.6332, "step": 3049 }, { "epoch": 1.07, "grad_norm": 4.452385902404785, "learning_rate": 0.00020559414537335647, "loss": 0.9805, "step": 3050 }, { "epoch": 1.07, "grad_norm": 3.0658645629882812, "learning_rate": 0.00020555693376333415, "loss": 0.5303, "step": 3051 }, { "epoch": 1.07, "grad_norm": 4.26853084564209, "learning_rate": 0.0002055197221533118, "loss": 0.3217, "step": 3052 }, { "epoch": 1.07, "grad_norm": 2.2705271244049072, "learning_rate": 0.0002054825105432895, "loss": 0.3074, "step": 3053 }, { "epoch": 1.07, "grad_norm": 2.20687198638916, "learning_rate": 0.00020544529893326717, "loss": 0.2464, "step": 3054 }, { "epoch": 1.07, "grad_norm": 3.222644090652466, "learning_rate": 0.00020540808732324482, "loss": 1.1983, "step": 3055 }, { "epoch": 1.07, "grad_norm": 1.951676368713379, "learning_rate": 0.00020537087571322253, "loss": 0.4934, "step": 3056 }, { "epoch": 1.07, "grad_norm": 1.844739317893982, "learning_rate": 0.00020533366410320017, "loss": 0.4015, "step": 3057 }, { "epoch": 1.07, "grad_norm": 2.413743019104004, "learning_rate": 0.00020529645249317785, "loss": 0.4277, "step": 3058 }, { "epoch": 1.07, "grad_norm": 2.190391778945923, "learning_rate": 0.00020525924088315555, "loss": 0.3995, "step": 3059 }, { "epoch": 1.07, "grad_norm": 3.3840878009796143, "learning_rate": 0.0002052220292731332, "loss": 0.5431, "step": 3060 }, { "epoch": 1.07, "grad_norm": 3.4068565368652344, "learning_rate": 0.00020518481766311088, "loss": 0.2978, "step": 3061 }, { "epoch": 1.07, "grad_norm": 1.8028383255004883, "learning_rate": 0.00020514760605308853, "loss": 0.5075, "step": 3062 }, { "epoch": 1.07, "grad_norm": 2.067969799041748, "learning_rate": 0.00020511039444306623, "loss": 0.3496, "step": 3063 }, { "epoch": 1.07, "grad_norm": 3.3436696529388428, "learning_rate": 0.0002050731828330439, "loss": 0.5107, "step": 3064 }, { "epoch": 1.07, "grad_norm": 2.0510003566741943, "learning_rate": 0.00020503597122302155, "loss": 0.3864, "step": 3065 }, { "epoch": 1.07, "grad_norm": 1.5820317268371582, "learning_rate": 0.00020499875961299923, "loss": 0.2344, "step": 3066 }, { "epoch": 1.07, "grad_norm": 2.2191803455352783, "learning_rate": 0.00020496154800297693, "loss": 0.493, "step": 3067 }, { "epoch": 1.07, "grad_norm": 1.7818882465362549, "learning_rate": 0.00020492433639295458, "loss": 0.1892, "step": 3068 }, { "epoch": 1.08, "grad_norm": 3.912038564682007, "learning_rate": 0.00020488712478293226, "loss": 0.6714, "step": 3069 }, { "epoch": 1.08, "grad_norm": 2.960134506225586, "learning_rate": 0.00020484991317290993, "loss": 0.264, "step": 3070 }, { "epoch": 1.08, "grad_norm": 2.104323625564575, "learning_rate": 0.0002048127015628876, "loss": 0.5416, "step": 3071 }, { "epoch": 1.08, "grad_norm": 4.2121148109436035, "learning_rate": 0.00020477548995286528, "loss": 1.9044, "step": 3072 }, { "epoch": 1.08, "grad_norm": 2.713250160217285, "learning_rate": 0.00020473827834284293, "loss": 0.3652, "step": 3073 }, { "epoch": 1.08, "grad_norm": 0.9541451334953308, "learning_rate": 0.00020470106673282064, "loss": 0.0514, "step": 3074 }, { "epoch": 1.08, "grad_norm": 9.565132141113281, "learning_rate": 0.00020466385512279828, "loss": 1.9225, "step": 3075 }, { "epoch": 1.08, "grad_norm": 3.1412031650543213, "learning_rate": 0.00020462664351277596, "loss": 0.3966, "step": 3076 }, { "epoch": 1.08, "grad_norm": 1.8707399368286133, "learning_rate": 0.00020458943190275366, "loss": 0.2158, "step": 3077 }, { "epoch": 1.08, "grad_norm": 2.589046001434326, "learning_rate": 0.0002045522202927313, "loss": 0.3275, "step": 3078 }, { "epoch": 1.08, "grad_norm": 4.2253804206848145, "learning_rate": 0.000204515008682709, "loss": 0.3441, "step": 3079 }, { "epoch": 1.08, "grad_norm": 3.184797525405884, "learning_rate": 0.00020447779707268664, "loss": 1.0592, "step": 3080 }, { "epoch": 1.08, "grad_norm": 2.5358669757843018, "learning_rate": 0.00020444058546266434, "loss": 0.6669, "step": 3081 }, { "epoch": 1.08, "grad_norm": 3.5697948932647705, "learning_rate": 0.00020440337385264201, "loss": 0.4798, "step": 3082 }, { "epoch": 1.08, "grad_norm": 1.44380521774292, "learning_rate": 0.00020436616224261966, "loss": 0.4084, "step": 3083 }, { "epoch": 1.08, "grad_norm": 12.404438972473145, "learning_rate": 0.00020432895063259737, "loss": 4.2819, "step": 3084 }, { "epoch": 1.08, "grad_norm": 2.316073417663574, "learning_rate": 0.00020429173902257504, "loss": 0.6902, "step": 3085 }, { "epoch": 1.08, "grad_norm": 1.3172087669372559, "learning_rate": 0.0002042545274125527, "loss": 0.2745, "step": 3086 }, { "epoch": 1.08, "grad_norm": 2.7705490589141846, "learning_rate": 0.00020421731580253037, "loss": 1.3201, "step": 3087 }, { "epoch": 1.08, "grad_norm": 2.1392929553985596, "learning_rate": 0.00020418010419250804, "loss": 0.2256, "step": 3088 }, { "epoch": 1.08, "grad_norm": 2.1735339164733887, "learning_rate": 0.00020414289258248572, "loss": 0.7259, "step": 3089 }, { "epoch": 1.08, "grad_norm": 1.7704854011535645, "learning_rate": 0.0002041056809724634, "loss": 0.3775, "step": 3090 }, { "epoch": 1.08, "grad_norm": 2.1959261894226074, "learning_rate": 0.00020406846936244107, "loss": 0.3028, "step": 3091 }, { "epoch": 1.08, "grad_norm": 1.4941906929016113, "learning_rate": 0.00020403125775241875, "loss": 0.2983, "step": 3092 }, { "epoch": 1.08, "grad_norm": 1.2572664022445679, "learning_rate": 0.0002039940461423964, "loss": 0.1432, "step": 3093 }, { "epoch": 1.08, "grad_norm": 1.722988247871399, "learning_rate": 0.00020395683453237407, "loss": 0.252, "step": 3094 }, { "epoch": 1.08, "grad_norm": 1.260048508644104, "learning_rate": 0.00020391962292235177, "loss": 0.2426, "step": 3095 }, { "epoch": 1.08, "grad_norm": 1.990040898323059, "learning_rate": 0.00020388241131232942, "loss": 0.2344, "step": 3096 }, { "epoch": 1.09, "grad_norm": 4.270902156829834, "learning_rate": 0.0002038451997023071, "loss": 0.445, "step": 3097 }, { "epoch": 1.09, "grad_norm": 6.141219615936279, "learning_rate": 0.0002038079880922848, "loss": 0.8813, "step": 3098 }, { "epoch": 1.09, "grad_norm": 4.40023946762085, "learning_rate": 0.00020377077648226245, "loss": 0.3991, "step": 3099 }, { "epoch": 1.09, "grad_norm": 2.3631558418273926, "learning_rate": 0.00020373356487224012, "loss": 0.2522, "step": 3100 }, { "epoch": 1.09, "eval_loss": 0.5331594347953796, "eval_runtime": 50.9883, "eval_samples_per_second": 42.52, "eval_steps_per_second": 10.63, "eval_wer": 0.46116588825462723, "step": 3100 }, { "epoch": 1.09, "grad_norm": 6.387331485748291, "learning_rate": 0.00020369635326221777, "loss": 0.5592, "step": 3101 }, { "epoch": 1.09, "grad_norm": 3.025975227355957, "learning_rate": 0.00020365914165219548, "loss": 0.1448, "step": 3102 }, { "epoch": 1.09, "grad_norm": 5.364758014678955, "learning_rate": 0.00020362193004217315, "loss": 0.854, "step": 3103 }, { "epoch": 1.09, "grad_norm": 3.2311348915100098, "learning_rate": 0.0002035847184321508, "loss": 0.2019, "step": 3104 }, { "epoch": 1.09, "grad_norm": 2.2923085689544678, "learning_rate": 0.0002035475068221285, "loss": 0.7306, "step": 3105 }, { "epoch": 1.09, "grad_norm": 1.8082457780838013, "learning_rate": 0.00020351029521210615, "loss": 0.74, "step": 3106 }, { "epoch": 1.09, "grad_norm": 3.0178422927856445, "learning_rate": 0.00020347308360208383, "loss": 0.5268, "step": 3107 }, { "epoch": 1.09, "grad_norm": 1.8592593669891357, "learning_rate": 0.0002034358719920615, "loss": 0.3687, "step": 3108 }, { "epoch": 1.09, "grad_norm": 2.91344952583313, "learning_rate": 0.00020339866038203918, "loss": 0.3192, "step": 3109 }, { "epoch": 1.09, "grad_norm": 4.410944938659668, "learning_rate": 0.00020336144877201686, "loss": 0.3268, "step": 3110 }, { "epoch": 1.09, "grad_norm": 2.5860300064086914, "learning_rate": 0.00020332423716199453, "loss": 0.6365, "step": 3111 }, { "epoch": 1.09, "grad_norm": 2.230043411254883, "learning_rate": 0.0002032870255519722, "loss": 0.3995, "step": 3112 }, { "epoch": 1.09, "grad_norm": 2.6025543212890625, "learning_rate": 0.00020324981394194988, "loss": 0.534, "step": 3113 }, { "epoch": 1.09, "grad_norm": 3.757107973098755, "learning_rate": 0.00020321260233192753, "loss": 0.6327, "step": 3114 }, { "epoch": 1.09, "grad_norm": 1.6777108907699585, "learning_rate": 0.0002031753907219052, "loss": 0.1812, "step": 3115 }, { "epoch": 1.09, "grad_norm": 2.1241323947906494, "learning_rate": 0.0002031381791118829, "loss": 0.3462, "step": 3116 }, { "epoch": 1.09, "grad_norm": 2.4214656352996826, "learning_rate": 0.00020310096750186056, "loss": 0.5252, "step": 3117 }, { "epoch": 1.09, "grad_norm": 1.9669643640518188, "learning_rate": 0.00020306375589183823, "loss": 0.2442, "step": 3118 }, { "epoch": 1.09, "grad_norm": 2.5513925552368164, "learning_rate": 0.00020302654428181588, "loss": 0.7533, "step": 3119 }, { "epoch": 1.09, "grad_norm": 2.9641880989074707, "learning_rate": 0.00020298933267179359, "loss": 0.5389, "step": 3120 }, { "epoch": 1.09, "grad_norm": 2.38806414604187, "learning_rate": 0.00020295212106177126, "loss": 0.1863, "step": 3121 }, { "epoch": 1.09, "grad_norm": 3.5284969806671143, "learning_rate": 0.0002029149094517489, "loss": 0.6842, "step": 3122 }, { "epoch": 1.09, "grad_norm": 3.640097141265869, "learning_rate": 0.00020287769784172661, "loss": 0.9635, "step": 3123 }, { "epoch": 1.09, "grad_norm": 3.8287434577941895, "learning_rate": 0.00020284048623170426, "loss": 1.0185, "step": 3124 }, { "epoch": 1.09, "grad_norm": 2.4016950130462646, "learning_rate": 0.00020280327462168194, "loss": 0.3064, "step": 3125 }, { "epoch": 1.1, "grad_norm": 4.09964656829834, "learning_rate": 0.00020276606301165964, "loss": 0.3252, "step": 3126 }, { "epoch": 1.1, "grad_norm": 1.654166579246521, "learning_rate": 0.0002027288514016373, "loss": 0.2792, "step": 3127 }, { "epoch": 1.1, "grad_norm": 4.5107622146606445, "learning_rate": 0.00020269163979161497, "loss": 0.4426, "step": 3128 }, { "epoch": 1.1, "grad_norm": 2.8204357624053955, "learning_rate": 0.00020265442818159264, "loss": 0.2263, "step": 3129 }, { "epoch": 1.1, "grad_norm": 2.2634711265563965, "learning_rate": 0.00020261721657157032, "loss": 0.7353, "step": 3130 }, { "epoch": 1.1, "grad_norm": 2.142753839492798, "learning_rate": 0.000202580004961548, "loss": 0.6885, "step": 3131 }, { "epoch": 1.1, "grad_norm": 1.8485157489776611, "learning_rate": 0.00020254279335152564, "loss": 0.4566, "step": 3132 }, { "epoch": 1.1, "grad_norm": 2.232492685317993, "learning_rate": 0.00020250558174150334, "loss": 0.4872, "step": 3133 }, { "epoch": 1.1, "grad_norm": 2.160388231277466, "learning_rate": 0.00020246837013148102, "loss": 0.3397, "step": 3134 }, { "epoch": 1.1, "grad_norm": 1.7792160511016846, "learning_rate": 0.00020243115852145867, "loss": 0.5532, "step": 3135 }, { "epoch": 1.1, "grad_norm": 1.8902424573898315, "learning_rate": 0.00020239394691143634, "loss": 0.3345, "step": 3136 }, { "epoch": 1.1, "grad_norm": 2.7055909633636475, "learning_rate": 0.00020235673530141402, "loss": 0.5375, "step": 3137 }, { "epoch": 1.1, "grad_norm": 1.9411323070526123, "learning_rate": 0.0002023195236913917, "loss": 0.3124, "step": 3138 }, { "epoch": 1.1, "grad_norm": 1.9080373048782349, "learning_rate": 0.00020228231208136937, "loss": 0.2328, "step": 3139 }, { "epoch": 1.1, "grad_norm": 2.3703057765960693, "learning_rate": 0.00020224510047134702, "loss": 0.4152, "step": 3140 }, { "epoch": 1.1, "grad_norm": 3.085244655609131, "learning_rate": 0.00020220788886132472, "loss": 0.6424, "step": 3141 }, { "epoch": 1.1, "grad_norm": 4.0253801345825195, "learning_rate": 0.0002021706772513024, "loss": 0.3865, "step": 3142 }, { "epoch": 1.1, "grad_norm": 2.0360500812530518, "learning_rate": 0.00020213346564128005, "loss": 0.2447, "step": 3143 }, { "epoch": 1.1, "grad_norm": 2.123710870742798, "learning_rate": 0.00020209625403125775, "loss": 0.3863, "step": 3144 }, { "epoch": 1.1, "grad_norm": 2.0543999671936035, "learning_rate": 0.0002020590424212354, "loss": 0.3734, "step": 3145 }, { "epoch": 1.1, "grad_norm": 2.6033356189727783, "learning_rate": 0.00020202183081121308, "loss": 1.1586, "step": 3146 }, { "epoch": 1.1, "grad_norm": 2.208231210708618, "learning_rate": 0.00020198461920119078, "loss": 0.4899, "step": 3147 }, { "epoch": 1.1, "grad_norm": 2.316422700881958, "learning_rate": 0.00020194740759116843, "loss": 0.7458, "step": 3148 }, { "epoch": 1.1, "grad_norm": 2.228898525238037, "learning_rate": 0.0002019101959811461, "loss": 0.355, "step": 3149 }, { "epoch": 1.1, "grad_norm": 3.5317399501800537, "learning_rate": 0.00020187298437112375, "loss": 0.3847, "step": 3150 }, { "epoch": 1.1, "grad_norm": 2.578821897506714, "learning_rate": 0.00020183577276110145, "loss": 0.3668, "step": 3151 }, { "epoch": 1.1, "grad_norm": 4.172102928161621, "learning_rate": 0.00020179856115107913, "loss": 0.5389, "step": 3152 }, { "epoch": 1.1, "grad_norm": 2.7270286083221436, "learning_rate": 0.00020176134954105678, "loss": 0.4602, "step": 3153 }, { "epoch": 1.11, "grad_norm": 3.5921061038970947, "learning_rate": 0.00020172413793103448, "loss": 0.3265, "step": 3154 }, { "epoch": 1.11, "grad_norm": 1.7611098289489746, "learning_rate": 0.00020168692632101216, "loss": 0.359, "step": 3155 }, { "epoch": 1.11, "grad_norm": 2.818851947784424, "learning_rate": 0.0002016497147109898, "loss": 0.8464, "step": 3156 }, { "epoch": 1.11, "grad_norm": 2.597490072250366, "learning_rate": 0.00020161250310096748, "loss": 0.6396, "step": 3157 }, { "epoch": 1.11, "grad_norm": 4.247330665588379, "learning_rate": 0.00020157529149094516, "loss": 1.4697, "step": 3158 }, { "epoch": 1.11, "grad_norm": 2.6339824199676514, "learning_rate": 0.00020153807988092283, "loss": 0.6861, "step": 3159 }, { "epoch": 1.11, "grad_norm": 1.2723904848098755, "learning_rate": 0.0002015008682709005, "loss": 0.2326, "step": 3160 }, { "epoch": 1.11, "grad_norm": 2.37565016746521, "learning_rate": 0.00020146365666087816, "loss": 0.2286, "step": 3161 }, { "epoch": 1.11, "grad_norm": 3.4451630115509033, "learning_rate": 0.00020142644505085586, "loss": 0.9374, "step": 3162 }, { "epoch": 1.11, "grad_norm": 2.457343101501465, "learning_rate": 0.0002013892334408335, "loss": 0.3735, "step": 3163 }, { "epoch": 1.11, "grad_norm": 2.033566951751709, "learning_rate": 0.00020135202183081119, "loss": 0.2974, "step": 3164 }, { "epoch": 1.11, "grad_norm": 1.4532384872436523, "learning_rate": 0.0002013148102207889, "loss": 0.2587, "step": 3165 }, { "epoch": 1.11, "grad_norm": 9.672016143798828, "learning_rate": 0.00020127759861076654, "loss": 2.1447, "step": 3166 }, { "epoch": 1.11, "grad_norm": 3.0728158950805664, "learning_rate": 0.0002012403870007442, "loss": 0.4088, "step": 3167 }, { "epoch": 1.11, "grad_norm": 3.4193506240844727, "learning_rate": 0.00020120317539072186, "loss": 0.6485, "step": 3168 }, { "epoch": 1.11, "grad_norm": 2.32649302482605, "learning_rate": 0.00020116596378069956, "loss": 0.5016, "step": 3169 }, { "epoch": 1.11, "grad_norm": 1.4236040115356445, "learning_rate": 0.00020112875217067724, "loss": 0.2539, "step": 3170 }, { "epoch": 1.11, "grad_norm": 2.348944902420044, "learning_rate": 0.0002010915405606549, "loss": 0.3252, "step": 3171 }, { "epoch": 1.11, "grad_norm": 0.6811813712120056, "learning_rate": 0.0002010543289506326, "loss": 0.0438, "step": 3172 }, { "epoch": 1.11, "grad_norm": 2.597822427749634, "learning_rate": 0.00020101711734061027, "loss": 0.3622, "step": 3173 }, { "epoch": 1.11, "grad_norm": 1.7118090391159058, "learning_rate": 0.00020097990573058792, "loss": 0.2325, "step": 3174 }, { "epoch": 1.11, "grad_norm": 2.546022415161133, "learning_rate": 0.00020094269412056562, "loss": 0.2348, "step": 3175 }, { "epoch": 1.11, "grad_norm": 1.8402515649795532, "learning_rate": 0.00020090548251054327, "loss": 0.2659, "step": 3176 }, { "epoch": 1.11, "grad_norm": 3.6556687355041504, "learning_rate": 0.00020086827090052094, "loss": 0.2047, "step": 3177 }, { "epoch": 1.11, "grad_norm": 3.188159942626953, "learning_rate": 0.00020083105929049862, "loss": 0.3726, "step": 3178 }, { "epoch": 1.11, "grad_norm": NaN, "learning_rate": 0.00020083105929049862, "loss": 0.0813, "step": 3179 }, { "epoch": 1.11, "grad_norm": 2.2536139488220215, "learning_rate": 0.0002007938476804763, "loss": 0.7854, "step": 3180 }, { "epoch": 1.11, "grad_norm": 1.8012816905975342, "learning_rate": 0.00020075663607045397, "loss": 0.406, "step": 3181 }, { "epoch": 1.11, "grad_norm": 2.1195740699768066, "learning_rate": 0.00020071942446043162, "loss": 0.309, "step": 3182 }, { "epoch": 1.12, "grad_norm": 2.053685188293457, "learning_rate": 0.0002006822128504093, "loss": 0.485, "step": 3183 }, { "epoch": 1.12, "grad_norm": 2.672041654586792, "learning_rate": 0.000200645001240387, "loss": 0.4533, "step": 3184 }, { "epoch": 1.12, "grad_norm": 3.1025266647338867, "learning_rate": 0.00020060778963036465, "loss": 0.9278, "step": 3185 }, { "epoch": 1.12, "grad_norm": 1.895678162574768, "learning_rate": 0.00020057057802034232, "loss": 0.3792, "step": 3186 }, { "epoch": 1.12, "grad_norm": 3.569613218307495, "learning_rate": 0.00020053336641032003, "loss": 0.9807, "step": 3187 }, { "epoch": 1.12, "grad_norm": 2.383301019668579, "learning_rate": 0.00020049615480029767, "loss": 0.4056, "step": 3188 }, { "epoch": 1.12, "grad_norm": 5.517592430114746, "learning_rate": 0.00020045894319027535, "loss": 0.4775, "step": 3189 }, { "epoch": 1.12, "grad_norm": 3.9130759239196777, "learning_rate": 0.000200421731580253, "loss": 1.5079, "step": 3190 }, { "epoch": 1.12, "grad_norm": 1.221430778503418, "learning_rate": 0.0002003845199702307, "loss": 0.1815, "step": 3191 }, { "epoch": 1.12, "grad_norm": 1.0000752210617065, "learning_rate": 0.00020034730836020838, "loss": 0.1435, "step": 3192 }, { "epoch": 1.12, "grad_norm": 2.36952543258667, "learning_rate": 0.00020031009675018603, "loss": 0.4677, "step": 3193 }, { "epoch": 1.12, "grad_norm": 1.628010869026184, "learning_rate": 0.00020027288514016373, "loss": 0.2069, "step": 3194 }, { "epoch": 1.12, "grad_norm": 3.232687473297119, "learning_rate": 0.00020023567353014138, "loss": 0.4343, "step": 3195 }, { "epoch": 1.12, "grad_norm": 2.015259027481079, "learning_rate": 0.00020019846192011905, "loss": 0.3253, "step": 3196 }, { "epoch": 1.12, "grad_norm": 1.6881203651428223, "learning_rate": 0.00020016125031009676, "loss": 0.2276, "step": 3197 }, { "epoch": 1.12, "grad_norm": 2.478654384613037, "learning_rate": 0.0002001240387000744, "loss": 0.4578, "step": 3198 }, { "epoch": 1.12, "grad_norm": 3.1310973167419434, "learning_rate": 0.00020008682709005208, "loss": 0.3007, "step": 3199 }, { "epoch": 1.12, "grad_norm": 2.1835124492645264, "learning_rate": 0.00020004961548002976, "loss": 0.4325, "step": 3200 }, { "epoch": 1.12, "eval_loss": 0.5288262367248535, "eval_runtime": 51.0509, "eval_samples_per_second": 42.467, "eval_steps_per_second": 10.617, "eval_wer": 0.4644525168655942, "step": 3200 }, { "epoch": 1.12, "grad_norm": 5.164034843444824, "learning_rate": 0.00020001240387000743, "loss": 0.4793, "step": 3201 }, { "epoch": 1.12, "grad_norm": 4.324225902557373, "learning_rate": 0.0001999751922599851, "loss": 1.9038, "step": 3202 }, { "epoch": 1.12, "grad_norm": 3.0840883255004883, "learning_rate": 0.00019993798064996276, "loss": 0.2772, "step": 3203 }, { "epoch": 1.12, "grad_norm": 4.304738521575928, "learning_rate": 0.00019990076903994043, "loss": 0.4444, "step": 3204 }, { "epoch": 1.12, "grad_norm": 2.658390998840332, "learning_rate": 0.00019986355742991814, "loss": 1.3376, "step": 3205 }, { "epoch": 1.12, "grad_norm": 1.7802234888076782, "learning_rate": 0.00019982634581989578, "loss": 0.5389, "step": 3206 }, { "epoch": 1.12, "grad_norm": 2.279994010925293, "learning_rate": 0.00019978913420987346, "loss": 0.5139, "step": 3207 }, { "epoch": 1.12, "grad_norm": 2.162747621536255, "learning_rate": 0.0001997519225998511, "loss": 0.508, "step": 3208 }, { "epoch": 1.12, "grad_norm": 2.005580186843872, "learning_rate": 0.0001997147109898288, "loss": 0.5523, "step": 3209 }, { "epoch": 1.12, "grad_norm": 3.2639243602752686, "learning_rate": 0.0001996774993798065, "loss": 0.7956, "step": 3210 }, { "epoch": 1.13, "grad_norm": 1.1911425590515137, "learning_rate": 0.00019964028776978414, "loss": 0.1047, "step": 3211 }, { "epoch": 1.13, "grad_norm": 2.8164703845977783, "learning_rate": 0.00019960307615976184, "loss": 0.3521, "step": 3212 }, { "epoch": 1.13, "grad_norm": 1.5338577032089233, "learning_rate": 0.0001995658645497395, "loss": 0.3544, "step": 3213 }, { "epoch": 1.13, "grad_norm": 2.2264373302459717, "learning_rate": 0.00019952865293971716, "loss": 0.5603, "step": 3214 }, { "epoch": 1.13, "grad_norm": 2.001485824584961, "learning_rate": 0.00019949144132969487, "loss": 0.3046, "step": 3215 }, { "epoch": 1.13, "grad_norm": 4.817356586456299, "learning_rate": 0.00019945422971967252, "loss": 0.527, "step": 3216 }, { "epoch": 1.13, "grad_norm": 2.8212592601776123, "learning_rate": 0.0001994170181096502, "loss": 0.595, "step": 3217 }, { "epoch": 1.13, "grad_norm": 4.72152853012085, "learning_rate": 0.0001993798064996279, "loss": 0.9974, "step": 3218 }, { "epoch": 1.13, "grad_norm": 3.7662367820739746, "learning_rate": 0.00019934259488960554, "loss": 0.8654, "step": 3219 }, { "epoch": 1.13, "grad_norm": 0.8878252506256104, "learning_rate": 0.00019930538327958322, "loss": 0.0876, "step": 3220 }, { "epoch": 1.13, "grad_norm": 4.1150922775268555, "learning_rate": 0.00019926817166956087, "loss": 0.3632, "step": 3221 }, { "epoch": 1.13, "grad_norm": 2.1186084747314453, "learning_rate": 0.00019923096005953857, "loss": 0.4102, "step": 3222 }, { "epoch": 1.13, "grad_norm": 2.5478599071502686, "learning_rate": 0.00019919374844951625, "loss": 0.2458, "step": 3223 }, { "epoch": 1.13, "grad_norm": 2.566505193710327, "learning_rate": 0.0001991565368394939, "loss": 0.2762, "step": 3224 }, { "epoch": 1.13, "grad_norm": 2.5866873264312744, "learning_rate": 0.00019911932522947157, "loss": 0.8733, "step": 3225 }, { "epoch": 1.13, "grad_norm": 1.6467078924179077, "learning_rate": 0.00019908211361944925, "loss": 0.1229, "step": 3226 }, { "epoch": 1.13, "grad_norm": 7.527315139770508, "learning_rate": 0.00019904490200942692, "loss": 0.5953, "step": 3227 }, { "epoch": 1.13, "grad_norm": 2.10206937789917, "learning_rate": 0.0001990076903994046, "loss": 0.4737, "step": 3228 }, { "epoch": 1.13, "grad_norm": 1.532116174697876, "learning_rate": 0.00019897047878938225, "loss": 0.1381, "step": 3229 }, { "epoch": 1.13, "grad_norm": 1.7178963422775269, "learning_rate": 0.00019893326717935995, "loss": 0.6589, "step": 3230 }, { "epoch": 1.13, "grad_norm": 1.9948376417160034, "learning_rate": 0.00019889605556933763, "loss": 0.4115, "step": 3231 }, { "epoch": 1.13, "grad_norm": 1.7607015371322632, "learning_rate": 0.00019885884395931527, "loss": 0.3731, "step": 3232 }, { "epoch": 1.13, "grad_norm": 1.7697004079818726, "learning_rate": 0.00019882163234929298, "loss": 0.2709, "step": 3233 }, { "epoch": 1.13, "grad_norm": 1.821341633796692, "learning_rate": 0.00019878442073927063, "loss": 0.2842, "step": 3234 }, { "epoch": 1.13, "grad_norm": 2.076326370239258, "learning_rate": 0.0001987472091292483, "loss": 0.3179, "step": 3235 }, { "epoch": 1.13, "grad_norm": 1.9342252016067505, "learning_rate": 0.000198709997519226, "loss": 0.2932, "step": 3236 }, { "epoch": 1.13, "grad_norm": 2.633678674697876, "learning_rate": 0.00019867278590920365, "loss": 0.5614, "step": 3237 }, { "epoch": 1.13, "grad_norm": 2.345721483230591, "learning_rate": 0.00019863557429918133, "loss": 0.3905, "step": 3238 }, { "epoch": 1.13, "grad_norm": 5.671786785125732, "learning_rate": 0.00019859836268915898, "loss": 0.373, "step": 3239 }, { "epoch": 1.14, "grad_norm": 3.5498249530792236, "learning_rate": 0.00019856115107913668, "loss": 0.3899, "step": 3240 }, { "epoch": 1.14, "grad_norm": 2.3076889514923096, "learning_rate": 0.00019852393946911436, "loss": 0.3927, "step": 3241 }, { "epoch": 1.14, "grad_norm": 1.8654396533966064, "learning_rate": 0.000198486727859092, "loss": 0.4113, "step": 3242 }, { "epoch": 1.14, "grad_norm": 3.3518593311309814, "learning_rate": 0.0001984495162490697, "loss": 0.4291, "step": 3243 }, { "epoch": 1.14, "grad_norm": 3.1347527503967285, "learning_rate": 0.00019841230463904738, "loss": 0.5388, "step": 3244 }, { "epoch": 1.14, "grad_norm": 1.9862194061279297, "learning_rate": 0.00019837509302902503, "loss": 0.3185, "step": 3245 }, { "epoch": 1.14, "grad_norm": 3.0931460857391357, "learning_rate": 0.0001983378814190027, "loss": 0.2293, "step": 3246 }, { "epoch": 1.14, "grad_norm": 3.0222811698913574, "learning_rate": 0.00019830066980898038, "loss": 0.675, "step": 3247 }, { "epoch": 1.14, "grad_norm": 4.591670989990234, "learning_rate": 0.00019826345819895806, "loss": 1.6013, "step": 3248 }, { "epoch": 1.14, "grad_norm": 2.511385679244995, "learning_rate": 0.00019822624658893574, "loss": 0.3192, "step": 3249 }, { "epoch": 1.14, "grad_norm": 2.967205047607422, "learning_rate": 0.00019818903497891338, "loss": 0.2893, "step": 3250 }, { "epoch": 1.14, "grad_norm": 1.8851898908615112, "learning_rate": 0.0001981518233688911, "loss": 0.221, "step": 3251 }, { "epoch": 1.14, "grad_norm": 4.387383460998535, "learning_rate": 0.00019811461175886874, "loss": 0.3816, "step": 3252 }, { "epoch": 1.14, "grad_norm": 1.66984224319458, "learning_rate": 0.0001980774001488464, "loss": 0.1179, "step": 3253 }, { "epoch": 1.14, "grad_norm": 2.261944055557251, "learning_rate": 0.00019804018853882411, "loss": 0.23, "step": 3254 }, { "epoch": 1.14, "grad_norm": 2.3139607906341553, "learning_rate": 0.00019800297692880176, "loss": 0.6849, "step": 3255 }, { "epoch": 1.14, "grad_norm": 3.3277676105499268, "learning_rate": 0.00019796576531877944, "loss": 0.821, "step": 3256 }, { "epoch": 1.14, "grad_norm": 1.5088117122650146, "learning_rate": 0.0001979285537087571, "loss": 0.3421, "step": 3257 }, { "epoch": 1.14, "grad_norm": 26.52508544921875, "learning_rate": 0.0001978913420987348, "loss": 5.8404, "step": 3258 }, { "epoch": 1.14, "grad_norm": 2.2505085468292236, "learning_rate": 0.00019785413048871247, "loss": 0.2756, "step": 3259 }, { "epoch": 1.14, "grad_norm": 1.61557137966156, "learning_rate": 0.00019781691887869012, "loss": 0.2101, "step": 3260 }, { "epoch": 1.14, "grad_norm": 1.9748115539550781, "learning_rate": 0.00019777970726866782, "loss": 0.3818, "step": 3261 }, { "epoch": 1.14, "grad_norm": 1.2797274589538574, "learning_rate": 0.0001977424956586455, "loss": 0.2122, "step": 3262 }, { "epoch": 1.14, "grad_norm": 2.0891008377075195, "learning_rate": 0.00019770528404862314, "loss": 0.3929, "step": 3263 }, { "epoch": 1.14, "grad_norm": 1.7179592847824097, "learning_rate": 0.00019766807243860085, "loss": 0.2509, "step": 3264 }, { "epoch": 1.14, "grad_norm": 2.773587703704834, "learning_rate": 0.0001976308608285785, "loss": 0.2744, "step": 3265 }, { "epoch": 1.14, "grad_norm": 1.902330756187439, "learning_rate": 0.00019759364921855617, "loss": 0.3719, "step": 3266 }, { "epoch": 1.14, "grad_norm": 3.543816566467285, "learning_rate": 0.00019755643760853385, "loss": 0.6176, "step": 3267 }, { "epoch": 1.15, "grad_norm": 2.187521457672119, "learning_rate": 0.00019751922599851152, "loss": 0.2295, "step": 3268 }, { "epoch": 1.15, "grad_norm": 2.898231029510498, "learning_rate": 0.0001974820143884892, "loss": 0.3294, "step": 3269 }, { "epoch": 1.15, "grad_norm": 1.8710620403289795, "learning_rate": 0.00019744480277846685, "loss": 0.2361, "step": 3270 }, { "epoch": 1.15, "grad_norm": 1.4173840284347534, "learning_rate": 0.00019740759116844452, "loss": 0.2267, "step": 3271 }, { "epoch": 1.15, "grad_norm": 3.5871973037719727, "learning_rate": 0.00019737037955842222, "loss": 0.6848, "step": 3272 }, { "epoch": 1.15, "grad_norm": 3.1234724521636963, "learning_rate": 0.00019733316794839987, "loss": 0.3783, "step": 3273 }, { "epoch": 1.15, "grad_norm": 1.742729902267456, "learning_rate": 0.00019729595633837755, "loss": 0.3094, "step": 3274 }, { "epoch": 1.15, "grad_norm": 3.235287666320801, "learning_rate": 0.00019725874472835525, "loss": 0.5631, "step": 3275 }, { "epoch": 1.15, "grad_norm": 5.217351913452148, "learning_rate": 0.0001972215331183329, "loss": 0.5498, "step": 3276 }, { "epoch": 1.15, "grad_norm": 1.3385629653930664, "learning_rate": 0.00019718432150831058, "loss": 0.066, "step": 3277 }, { "epoch": 1.15, "grad_norm": 5.248405933380127, "learning_rate": 0.00019714710989828823, "loss": 0.769, "step": 3278 }, { "epoch": 1.15, "grad_norm": 3.6078615188598633, "learning_rate": 0.00019710989828826593, "loss": 1.5544, "step": 3279 }, { "epoch": 1.15, "grad_norm": 3.464466094970703, "learning_rate": 0.0001970726866782436, "loss": 0.6935, "step": 3280 }, { "epoch": 1.15, "grad_norm": 1.388893723487854, "learning_rate": 0.00019703547506822125, "loss": 0.196, "step": 3281 }, { "epoch": 1.15, "grad_norm": 2.4973855018615723, "learning_rate": 0.00019699826345819896, "loss": 1.0041, "step": 3282 }, { "epoch": 1.15, "grad_norm": 1.7962448596954346, "learning_rate": 0.0001969610518481766, "loss": 0.3729, "step": 3283 }, { "epoch": 1.15, "grad_norm": 2.4133572578430176, "learning_rate": 0.00019692384023815428, "loss": 0.4041, "step": 3284 }, { "epoch": 1.15, "grad_norm": 1.40854012966156, "learning_rate": 0.00019688662862813198, "loss": 0.2944, "step": 3285 }, { "epoch": 1.15, "grad_norm": 2.9127371311187744, "learning_rate": 0.00019684941701810963, "loss": 0.1197, "step": 3286 }, { "epoch": 1.15, "grad_norm": 1.9602265357971191, "learning_rate": 0.0001968122054080873, "loss": 0.3084, "step": 3287 }, { "epoch": 1.15, "grad_norm": 3.038182258605957, "learning_rate": 0.00019677499379806498, "loss": 0.7004, "step": 3288 }, { "epoch": 1.15, "grad_norm": 1.9776606559753418, "learning_rate": 0.00019673778218804266, "loss": 0.5882, "step": 3289 }, { "epoch": 1.15, "grad_norm": 2.051262855529785, "learning_rate": 0.00019670057057802033, "loss": 0.3358, "step": 3290 }, { "epoch": 1.15, "grad_norm": 2.713244676589966, "learning_rate": 0.00019666335896799798, "loss": 0.6035, "step": 3291 }, { "epoch": 1.15, "grad_norm": 1.6439521312713623, "learning_rate": 0.00019662614735797566, "loss": 0.1498, "step": 3292 }, { "epoch": 1.15, "grad_norm": 2.486056327819824, "learning_rate": 0.00019658893574795336, "loss": 0.5642, "step": 3293 }, { "epoch": 1.15, "grad_norm": 1.6842329502105713, "learning_rate": 0.000196551724137931, "loss": 0.3027, "step": 3294 }, { "epoch": 1.15, "grad_norm": 5.799333095550537, "learning_rate": 0.0001965145125279087, "loss": 0.771, "step": 3295 }, { "epoch": 1.15, "grad_norm": 3.132808208465576, "learning_rate": 0.00019647730091788636, "loss": 0.5863, "step": 3296 }, { "epoch": 1.16, "grad_norm": 1.502746343612671, "learning_rate": 0.00019644008930786404, "loss": 0.1146, "step": 3297 }, { "epoch": 1.16, "grad_norm": 2.2371695041656494, "learning_rate": 0.00019640287769784171, "loss": 0.1923, "step": 3298 }, { "epoch": 1.16, "grad_norm": 1.7083606719970703, "learning_rate": 0.00019636566608781936, "loss": 0.2605, "step": 3299 }, { "epoch": 1.16, "grad_norm": 1.663743019104004, "learning_rate": 0.00019632845447779707, "loss": 0.2404, "step": 3300 }, { "epoch": 1.16, "eval_loss": 0.53449547290802, "eval_runtime": 51.0865, "eval_samples_per_second": 42.438, "eval_steps_per_second": 10.609, "eval_wer": 0.48806434872859367, "step": 3300 }, { "epoch": 1.16, "grad_norm": 2.146204710006714, "learning_rate": 0.00019629124286777474, "loss": 0.3883, "step": 3301 }, { "epoch": 1.16, "grad_norm": 4.3042311668396, "learning_rate": 0.0001962540312577524, "loss": 0.5085, "step": 3302 }, { "epoch": 1.16, "grad_norm": 1.9416069984436035, "learning_rate": 0.0001962168196477301, "loss": 0.1635, "step": 3303 }, { "epoch": 1.16, "grad_norm": 1.1905418634414673, "learning_rate": 0.00019617960803770774, "loss": 0.0755, "step": 3304 }, { "epoch": 1.16, "grad_norm": 2.9481916427612305, "learning_rate": 0.00019614239642768542, "loss": 1.2842, "step": 3305 }, { "epoch": 1.16, "grad_norm": 4.018986701965332, "learning_rate": 0.00019610518481766312, "loss": 1.239, "step": 3306 }, { "epoch": 1.16, "grad_norm": 2.8045568466186523, "learning_rate": 0.00019606797320764077, "loss": 0.7349, "step": 3307 }, { "epoch": 1.16, "grad_norm": 2.780566692352295, "learning_rate": 0.00019603076159761844, "loss": 0.6553, "step": 3308 }, { "epoch": 1.16, "grad_norm": 1.8182471990585327, "learning_rate": 0.0001959935499875961, "loss": 0.3462, "step": 3309 }, { "epoch": 1.16, "grad_norm": 2.2723684310913086, "learning_rate": 0.0001959563383775738, "loss": 0.3089, "step": 3310 }, { "epoch": 1.16, "grad_norm": 2.788958787918091, "learning_rate": 0.00019591912676755147, "loss": 0.439, "step": 3311 }, { "epoch": 1.16, "grad_norm": 1.5581282377243042, "learning_rate": 0.00019588191515752912, "loss": 0.1266, "step": 3312 }, { "epoch": 1.16, "grad_norm": 2.770595073699951, "learning_rate": 0.0001958447035475068, "loss": 0.5998, "step": 3313 }, { "epoch": 1.16, "grad_norm": 2.167032480239868, "learning_rate": 0.00019580749193748447, "loss": 0.4174, "step": 3314 }, { "epoch": 1.16, "grad_norm": 3.6844260692596436, "learning_rate": 0.00019577028032746215, "loss": 0.574, "step": 3315 }, { "epoch": 1.16, "grad_norm": 2.024935722351074, "learning_rate": 0.00019573306871743982, "loss": 0.2714, "step": 3316 }, { "epoch": 1.16, "grad_norm": 3.656796932220459, "learning_rate": 0.0001956958571074175, "loss": 0.7618, "step": 3317 }, { "epoch": 1.16, "grad_norm": 3.2108490467071533, "learning_rate": 0.00019565864549739518, "loss": 0.4265, "step": 3318 }, { "epoch": 1.16, "grad_norm": 1.514299750328064, "learning_rate": 0.00019562143388737285, "loss": 0.1267, "step": 3319 }, { "epoch": 1.16, "grad_norm": 4.003231048583984, "learning_rate": 0.0001955842222773505, "loss": 0.4855, "step": 3320 }, { "epoch": 1.16, "grad_norm": 2.6469032764434814, "learning_rate": 0.0001955470106673282, "loss": 0.4387, "step": 3321 }, { "epoch": 1.16, "grad_norm": 1.6862777471542358, "learning_rate": 0.00019550979905730585, "loss": 0.2019, "step": 3322 }, { "epoch": 1.16, "grad_norm": 3.7266769409179688, "learning_rate": 0.00019547258744728353, "loss": 0.6177, "step": 3323 }, { "epoch": 1.16, "grad_norm": 2.3731110095977783, "learning_rate": 0.00019543537583726123, "loss": 0.2896, "step": 3324 }, { "epoch": 1.17, "grad_norm": 3.063096761703491, "learning_rate": 0.00019539816422723888, "loss": 0.3411, "step": 3325 }, { "epoch": 1.17, "grad_norm": 2.101358413696289, "learning_rate": 0.00019536095261721655, "loss": 0.3822, "step": 3326 }, { "epoch": 1.17, "grad_norm": 4.259075164794922, "learning_rate": 0.0001953237410071942, "loss": 1.7024, "step": 3327 }, { "epoch": 1.17, "grad_norm": 4.619799613952637, "learning_rate": 0.0001952865293971719, "loss": 0.7057, "step": 3328 }, { "epoch": 1.17, "grad_norm": 1.7613863945007324, "learning_rate": 0.00019524931778714958, "loss": 0.1737, "step": 3329 }, { "epoch": 1.17, "grad_norm": 2.4438929557800293, "learning_rate": 0.00019521210617712723, "loss": 0.759, "step": 3330 }, { "epoch": 1.17, "grad_norm": 2.7754616737365723, "learning_rate": 0.00019517489456710493, "loss": 0.7182, "step": 3331 }, { "epoch": 1.17, "grad_norm": 2.5432474613189697, "learning_rate": 0.0001951376829570826, "loss": 0.3178, "step": 3332 }, { "epoch": 1.17, "grad_norm": 1.662652850151062, "learning_rate": 0.00019510047134706026, "loss": 0.268, "step": 3333 }, { "epoch": 1.17, "grad_norm": 1.8840018510818481, "learning_rate": 0.00019506325973703793, "loss": 0.5437, "step": 3334 }, { "epoch": 1.17, "grad_norm": 2.192028760910034, "learning_rate": 0.0001950260481270156, "loss": 0.2108, "step": 3335 }, { "epoch": 1.17, "grad_norm": 1.9820222854614258, "learning_rate": 0.00019498883651699329, "loss": 0.2483, "step": 3336 }, { "epoch": 1.17, "grad_norm": 3.4582228660583496, "learning_rate": 0.00019495162490697096, "loss": 0.6822, "step": 3337 }, { "epoch": 1.17, "grad_norm": 1.7434332370758057, "learning_rate": 0.00019491441329694864, "loss": 0.2123, "step": 3338 }, { "epoch": 1.17, "grad_norm": 2.4500560760498047, "learning_rate": 0.0001948772016869263, "loss": 0.482, "step": 3339 }, { "epoch": 1.17, "grad_norm": 2.2425453662872314, "learning_rate": 0.00019483999007690396, "loss": 0.3816, "step": 3340 }, { "epoch": 1.17, "grad_norm": 3.76282000541687, "learning_rate": 0.00019480277846688164, "loss": 0.3388, "step": 3341 }, { "epoch": 1.17, "grad_norm": 1.8158988952636719, "learning_rate": 0.00019476556685685934, "loss": 0.2258, "step": 3342 }, { "epoch": 1.17, "grad_norm": 1.5334137678146362, "learning_rate": 0.000194728355246837, "loss": 0.217, "step": 3343 }, { "epoch": 1.17, "grad_norm": 3.0049142837524414, "learning_rate": 0.00019469114363681466, "loss": 0.3094, "step": 3344 }, { "epoch": 1.17, "grad_norm": 0.7647264003753662, "learning_rate": 0.00019465393202679237, "loss": 0.0804, "step": 3345 }, { "epoch": 1.17, "grad_norm": 2.254091739654541, "learning_rate": 0.00019461672041677002, "loss": 0.2055, "step": 3346 }, { "epoch": 1.17, "grad_norm": 3.365161657333374, "learning_rate": 0.0001945795088067477, "loss": 0.6031, "step": 3347 }, { "epoch": 1.17, "grad_norm": 2.869905471801758, "learning_rate": 0.00019454229719672534, "loss": 0.2699, "step": 3348 }, { "epoch": 1.17, "grad_norm": 1.7060647010803223, "learning_rate": 0.00019450508558670304, "loss": 0.1983, "step": 3349 }, { "epoch": 1.17, "grad_norm": 3.431352376937866, "learning_rate": 0.00019446787397668072, "loss": 0.2818, "step": 3350 }, { "epoch": 1.17, "grad_norm": 3.3158788681030273, "learning_rate": 0.00019443066236665837, "loss": 0.6713, "step": 3351 }, { "epoch": 1.17, "grad_norm": 2.5465288162231445, "learning_rate": 0.00019439345075663607, "loss": 0.1836, "step": 3352 }, { "epoch": 1.17, "grad_norm": 7.218588352203369, "learning_rate": 0.00019435623914661372, "loss": 0.5528, "step": 3353 }, { "epoch": 1.18, "grad_norm": 3.608271360397339, "learning_rate": 0.0001943190275365914, "loss": 0.2997, "step": 3354 }, { "epoch": 1.18, "grad_norm": 2.334040403366089, "learning_rate": 0.00019428181592656907, "loss": 0.8903, "step": 3355 }, { "epoch": 1.18, "grad_norm": 3.217416524887085, "learning_rate": 0.00019424460431654675, "loss": 0.8583, "step": 3356 }, { "epoch": 1.18, "grad_norm": 2.663219928741455, "learning_rate": 0.00019420739270652442, "loss": 1.0071, "step": 3357 }, { "epoch": 1.18, "grad_norm": 2.4265856742858887, "learning_rate": 0.00019417018109650207, "loss": 0.5831, "step": 3358 }, { "epoch": 1.18, "grad_norm": 3.747819185256958, "learning_rate": 0.00019413296948647977, "loss": 0.9428, "step": 3359 }, { "epoch": 1.18, "grad_norm": 1.7516096830368042, "learning_rate": 0.00019409575787645745, "loss": 0.3003, "step": 3360 }, { "epoch": 1.18, "grad_norm": 2.066317558288574, "learning_rate": 0.0001940585462664351, "loss": 0.4669, "step": 3361 }, { "epoch": 1.18, "grad_norm": 1.9735511541366577, "learning_rate": 0.00019402133465641277, "loss": 0.5892, "step": 3362 }, { "epoch": 1.18, "grad_norm": 3.3901679515838623, "learning_rate": 0.00019398412304639048, "loss": 0.6881, "step": 3363 }, { "epoch": 1.18, "grad_norm": 6.7699785232543945, "learning_rate": 0.00019394691143636813, "loss": 0.9592, "step": 3364 }, { "epoch": 1.18, "grad_norm": 3.4974114894866943, "learning_rate": 0.0001939096998263458, "loss": 0.4567, "step": 3365 }, { "epoch": 1.18, "grad_norm": 1.7345564365386963, "learning_rate": 0.00019387248821632345, "loss": 0.2935, "step": 3366 }, { "epoch": 1.18, "grad_norm": 2.886187791824341, "learning_rate": 0.00019383527660630115, "loss": 0.3215, "step": 3367 }, { "epoch": 1.18, "grad_norm": 2.7109858989715576, "learning_rate": 0.00019379806499627883, "loss": 0.6187, "step": 3368 }, { "epoch": 1.18, "grad_norm": 3.9955689907073975, "learning_rate": 0.00019376085338625648, "loss": 0.9804, "step": 3369 }, { "epoch": 1.18, "grad_norm": 2.5214648246765137, "learning_rate": 0.00019372364177623418, "loss": 0.509, "step": 3370 }, { "epoch": 1.18, "grad_norm": 1.90560781955719, "learning_rate": 0.00019368643016621183, "loss": 0.1936, "step": 3371 }, { "epoch": 1.18, "grad_norm": 5.796487331390381, "learning_rate": 0.0001936492185561895, "loss": 1.6628, "step": 3372 }, { "epoch": 1.18, "grad_norm": 2.390986919403076, "learning_rate": 0.0001936120069461672, "loss": 0.1252, "step": 3373 }, { "epoch": 1.18, "grad_norm": 1.6030224561691284, "learning_rate": 0.00019357479533614486, "loss": 0.1826, "step": 3374 }, { "epoch": 1.18, "grad_norm": 1.6612952947616577, "learning_rate": 0.00019353758372612253, "loss": 0.3292, "step": 3375 }, { "epoch": 1.18, "grad_norm": 4.129511833190918, "learning_rate": 0.0001935003721161002, "loss": 0.5278, "step": 3376 }, { "epoch": 1.18, "grad_norm": 3.8274450302124023, "learning_rate": 0.00019346316050607788, "loss": 0.537, "step": 3377 }, { "epoch": 1.18, "grad_norm": 1.8161903619766235, "learning_rate": 0.00019342594889605556, "loss": 0.2818, "step": 3378 }, { "epoch": 1.18, "grad_norm": 3.630079746246338, "learning_rate": 0.0001933887372860332, "loss": 0.1704, "step": 3379 }, { "epoch": 1.18, "grad_norm": 3.014974594116211, "learning_rate": 0.0001933515256760109, "loss": 1.1119, "step": 3380 }, { "epoch": 1.18, "grad_norm": 6.352705478668213, "learning_rate": 0.0001933143140659886, "loss": 1.7637, "step": 3381 }, { "epoch": 1.19, "grad_norm": 1.94754159450531, "learning_rate": 0.00019327710245596624, "loss": 0.5617, "step": 3382 }, { "epoch": 1.19, "grad_norm": 1.7361313104629517, "learning_rate": 0.0001932398908459439, "loss": 0.4192, "step": 3383 }, { "epoch": 1.19, "grad_norm": 1.9336076974868774, "learning_rate": 0.0001932026792359216, "loss": 0.5007, "step": 3384 }, { "epoch": 1.19, "grad_norm": 1.9910989999771118, "learning_rate": 0.00019316546762589926, "loss": 0.6072, "step": 3385 }, { "epoch": 1.19, "grad_norm": 1.9290117025375366, "learning_rate": 0.00019312825601587694, "loss": 0.3207, "step": 3386 }, { "epoch": 1.19, "grad_norm": 2.000905990600586, "learning_rate": 0.0001930910444058546, "loss": 0.6482, "step": 3387 }, { "epoch": 1.19, "grad_norm": 2.0836875438690186, "learning_rate": 0.0001930538327958323, "loss": 0.3071, "step": 3388 }, { "epoch": 1.19, "grad_norm": 1.299101710319519, "learning_rate": 0.00019301662118580997, "loss": 0.2281, "step": 3389 }, { "epoch": 1.19, "grad_norm": 2.1644299030303955, "learning_rate": 0.00019297940957578762, "loss": 0.4635, "step": 3390 }, { "epoch": 1.19, "grad_norm": 1.7756919860839844, "learning_rate": 0.00019294219796576532, "loss": 0.3381, "step": 3391 }, { "epoch": 1.19, "grad_norm": 1.1819500923156738, "learning_rate": 0.00019290498635574297, "loss": 0.0619, "step": 3392 }, { "epoch": 1.19, "grad_norm": 2.2042393684387207, "learning_rate": 0.00019286777474572064, "loss": 0.3169, "step": 3393 }, { "epoch": 1.19, "grad_norm": 1.324364423751831, "learning_rate": 0.00019283056313569835, "loss": 0.0623, "step": 3394 }, { "epoch": 1.19, "grad_norm": 3.631133556365967, "learning_rate": 0.000192793351525676, "loss": 0.4106, "step": 3395 }, { "epoch": 1.19, "grad_norm": 4.347462177276611, "learning_rate": 0.00019275613991565367, "loss": 0.4728, "step": 3396 }, { "epoch": 1.19, "grad_norm": 3.753833770751953, "learning_rate": 0.00019271892830563132, "loss": 0.6239, "step": 3397 }, { "epoch": 1.19, "grad_norm": 3.2457504272460938, "learning_rate": 0.00019268171669560902, "loss": 0.4568, "step": 3398 }, { "epoch": 1.19, "grad_norm": 3.2867891788482666, "learning_rate": 0.0001926445050855867, "loss": 0.8455, "step": 3399 }, { "epoch": 1.19, "grad_norm": 2.0727713108062744, "learning_rate": 0.00019260729347556435, "loss": 0.2855, "step": 3400 }, { "epoch": 1.19, "eval_loss": 0.5019727349281311, "eval_runtime": 51.094, "eval_samples_per_second": 42.432, "eval_steps_per_second": 10.608, "eval_wer": 0.48131811105345096, "step": 3400 }, { "epoch": 1.19, "grad_norm": 0.6999847292900085, "learning_rate": 0.00019257008186554205, "loss": 0.0542, "step": 3401 }, { "epoch": 1.19, "grad_norm": 2.255366563796997, "learning_rate": 0.0001925328702555197, "loss": 0.2321, "step": 3402 }, { "epoch": 1.19, "grad_norm": 3.2004432678222656, "learning_rate": 0.00019249565864549737, "loss": 0.5873, "step": 3403 }, { "epoch": 1.19, "grad_norm": 7.720293998718262, "learning_rate": 0.00019245844703547505, "loss": 1.3688, "step": 3404 }, { "epoch": 1.19, "grad_norm": 2.9082274436950684, "learning_rate": 0.00019242123542545273, "loss": 0.8094, "step": 3405 }, { "epoch": 1.19, "grad_norm": 24.3272762298584, "learning_rate": 0.0001923840238154304, "loss": 5.8724, "step": 3406 }, { "epoch": 1.19, "grad_norm": 1.7688156366348267, "learning_rate": 0.00019234681220540808, "loss": 0.2914, "step": 3407 }, { "epoch": 1.19, "grad_norm": 2.372715950012207, "learning_rate": 0.00019230960059538573, "loss": 0.2986, "step": 3408 }, { "epoch": 1.19, "grad_norm": 2.354346990585327, "learning_rate": 0.00019227238898536343, "loss": 0.4223, "step": 3409 }, { "epoch": 1.19, "grad_norm": 1.976027250289917, "learning_rate": 0.00019223517737534108, "loss": 0.2763, "step": 3410 }, { "epoch": 1.2, "grad_norm": 1.1134535074234009, "learning_rate": 0.00019219796576531875, "loss": 0.1037, "step": 3411 }, { "epoch": 1.2, "grad_norm": 2.9392096996307373, "learning_rate": 0.00019216075415529646, "loss": 0.4426, "step": 3412 }, { "epoch": 1.2, "grad_norm": 1.128270149230957, "learning_rate": 0.0001921235425452741, "loss": 0.1267, "step": 3413 }, { "epoch": 1.2, "grad_norm": 0.8088712692260742, "learning_rate": 0.00019208633093525178, "loss": 0.0562, "step": 3414 }, { "epoch": 1.2, "grad_norm": NaN, "learning_rate": 0.00019208633093525178, "loss": 0.4185, "step": 3415 }, { "epoch": 1.2, "grad_norm": 4.43418550491333, "learning_rate": 0.00019204911932522943, "loss": 0.8585, "step": 3416 }, { "epoch": 1.2, "grad_norm": 2.2035839557647705, "learning_rate": 0.00019201190771520713, "loss": 0.2187, "step": 3417 }, { "epoch": 1.2, "grad_norm": 2.9955644607543945, "learning_rate": 0.0001919746961051848, "loss": 0.2888, "step": 3418 }, { "epoch": 1.2, "grad_norm": 3.610004186630249, "learning_rate": 0.00019193748449516246, "loss": 0.8794, "step": 3419 }, { "epoch": 1.2, "grad_norm": 5.0635809898376465, "learning_rate": 0.00019190027288514016, "loss": 0.5365, "step": 3420 }, { "epoch": 1.2, "grad_norm": 2.6513617038726807, "learning_rate": 0.00019186306127511784, "loss": 0.2151, "step": 3421 }, { "epoch": 1.2, "grad_norm": 3.7692182064056396, "learning_rate": 0.00019182584966509548, "loss": 0.5967, "step": 3422 }, { "epoch": 1.2, "grad_norm": 4.396376609802246, "learning_rate": 0.0001917886380550732, "loss": 0.6936, "step": 3423 }, { "epoch": 1.2, "grad_norm": 5.046158313751221, "learning_rate": 0.00019175142644505084, "loss": 0.7558, "step": 3424 }, { "epoch": 1.2, "grad_norm": 2.674578905105591, "learning_rate": 0.0001917142148350285, "loss": 0.2655, "step": 3425 }, { "epoch": 1.2, "grad_norm": 3.748619556427002, "learning_rate": 0.0001916770032250062, "loss": 0.7096, "step": 3426 }, { "epoch": 1.2, "grad_norm": 4.511143684387207, "learning_rate": 0.00019163979161498386, "loss": 0.3599, "step": 3427 }, { "epoch": 1.2, "grad_norm": 7.0941481590271, "learning_rate": 0.00019160258000496154, "loss": 0.2173, "step": 3428 }, { "epoch": 1.2, "grad_norm": NaN, "learning_rate": 0.00019160258000496154, "loss": 0.0813, "step": 3429 }, { "epoch": 1.2, "grad_norm": 2.511932134628296, "learning_rate": 0.0001915653683949392, "loss": 0.896, "step": 3430 }, { "epoch": 1.2, "grad_norm": 1.8741555213928223, "learning_rate": 0.00019152815678491686, "loss": 0.4229, "step": 3431 }, { "epoch": 1.2, "grad_norm": 1.6796114444732666, "learning_rate": 0.00019149094517489457, "loss": 0.4073, "step": 3432 }, { "epoch": 1.2, "grad_norm": 2.2420263290405273, "learning_rate": 0.00019145373356487222, "loss": 0.2323, "step": 3433 }, { "epoch": 1.2, "grad_norm": 1.184206485748291, "learning_rate": 0.0001914165219548499, "loss": 0.1977, "step": 3434 }, { "epoch": 1.2, "grad_norm": 1.924786925315857, "learning_rate": 0.0001913793103448276, "loss": 0.3857, "step": 3435 }, { "epoch": 1.2, "grad_norm": 2.2253284454345703, "learning_rate": 0.00019134209873480524, "loss": 0.3105, "step": 3436 }, { "epoch": 1.2, "grad_norm": 2.1328001022338867, "learning_rate": 0.00019130488712478292, "loss": 0.3734, "step": 3437 }, { "epoch": 1.2, "grad_norm": 2.7778384685516357, "learning_rate": 0.00019126767551476057, "loss": 0.2954, "step": 3438 }, { "epoch": 1.2, "grad_norm": 2.7506439685821533, "learning_rate": 0.00019123046390473827, "loss": 0.2154, "step": 3439 }, { "epoch": 1.21, "grad_norm": 1.9881768226623535, "learning_rate": 0.00019119325229471595, "loss": 0.2684, "step": 3440 }, { "epoch": 1.21, "grad_norm": 2.7313220500946045, "learning_rate": 0.0001911560406846936, "loss": 0.2496, "step": 3441 }, { "epoch": 1.21, "grad_norm": 2.945786237716675, "learning_rate": 0.0001911188290746713, "loss": 0.7178, "step": 3442 }, { "epoch": 1.21, "grad_norm": 1.2393323183059692, "learning_rate": 0.00019108161746464895, "loss": 0.1755, "step": 3443 }, { "epoch": 1.21, "grad_norm": 2.6016125679016113, "learning_rate": 0.00019104440585462662, "loss": 0.4143, "step": 3444 }, { "epoch": 1.21, "grad_norm": 4.074510097503662, "learning_rate": 0.00019100719424460432, "loss": 0.4698, "step": 3445 }, { "epoch": 1.21, "grad_norm": 1.408915400505066, "learning_rate": 0.00019096998263458197, "loss": 0.1266, "step": 3446 }, { "epoch": 1.21, "grad_norm": 2.3694286346435547, "learning_rate": 0.00019093277102455965, "loss": 0.3139, "step": 3447 }, { "epoch": 1.21, "grad_norm": 2.525958776473999, "learning_rate": 0.0001908955594145373, "loss": 0.4195, "step": 3448 }, { "epoch": 1.21, "grad_norm": 2.69555401802063, "learning_rate": 0.000190858347804515, "loss": 0.3823, "step": 3449 }, { "epoch": 1.21, "grad_norm": 1.620243787765503, "learning_rate": 0.00019082113619449268, "loss": 0.1779, "step": 3450 }, { "epoch": 1.21, "grad_norm": 4.35145902633667, "learning_rate": 0.00019078392458447033, "loss": 1.6517, "step": 3451 }, { "epoch": 1.21, "grad_norm": 3.985114336013794, "learning_rate": 0.000190746712974448, "loss": 1.7988, "step": 3452 }, { "epoch": 1.21, "grad_norm": 4.146234512329102, "learning_rate": 0.0001907095013644257, "loss": 0.2545, "step": 3453 }, { "epoch": 1.21, "grad_norm": 7.112207412719727, "learning_rate": 0.00019067228975440335, "loss": 0.6047, "step": 3454 }, { "epoch": 1.21, "grad_norm": 2.409411668777466, "learning_rate": 0.00019063507814438103, "loss": 0.9475, "step": 3455 }, { "epoch": 1.21, "grad_norm": 3.0187294483184814, "learning_rate": 0.00019059786653435868, "loss": 0.7817, "step": 3456 }, { "epoch": 1.21, "grad_norm": 1.8108159303665161, "learning_rate": 0.00019056065492433638, "loss": 0.5218, "step": 3457 }, { "epoch": 1.21, "grad_norm": 1.8068140745162964, "learning_rate": 0.00019052344331431406, "loss": 0.4357, "step": 3458 }, { "epoch": 1.21, "grad_norm": 2.0581214427948, "learning_rate": 0.0001904862317042917, "loss": 0.4965, "step": 3459 }, { "epoch": 1.21, "grad_norm": 2.0165770053863525, "learning_rate": 0.0001904490200942694, "loss": 0.3724, "step": 3460 }, { "epoch": 1.21, "grad_norm": 1.243851900100708, "learning_rate": 0.00019041180848424706, "loss": 0.2379, "step": 3461 }, { "epoch": 1.21, "grad_norm": 2.6917412281036377, "learning_rate": 0.00019037459687422473, "loss": 0.7907, "step": 3462 }, { "epoch": 1.21, "grad_norm": 1.7246170043945312, "learning_rate": 0.00019033738526420243, "loss": 0.4098, "step": 3463 }, { "epoch": 1.21, "grad_norm": 1.3708009719848633, "learning_rate": 0.00019030017365418008, "loss": 0.182, "step": 3464 }, { "epoch": 1.21, "grad_norm": 1.1372557878494263, "learning_rate": 0.00019026296204415776, "loss": 0.2405, "step": 3465 }, { "epoch": 1.21, "grad_norm": 2.3582069873809814, "learning_rate": 0.00019022575043413546, "loss": 0.3401, "step": 3466 }, { "epoch": 1.21, "grad_norm": 5.7276835441589355, "learning_rate": 0.0001901885388241131, "loss": 0.4506, "step": 3467 }, { "epoch": 1.22, "grad_norm": 1.6258736848831177, "learning_rate": 0.0001901513272140908, "loss": 0.2368, "step": 3468 }, { "epoch": 1.22, "grad_norm": 3.5020952224731445, "learning_rate": 0.00019011411560406844, "loss": 0.7847, "step": 3469 }, { "epoch": 1.22, "grad_norm": 4.4619526863098145, "learning_rate": 0.00019007690399404614, "loss": 0.3962, "step": 3470 }, { "epoch": 1.22, "grad_norm": 1.9173682928085327, "learning_rate": 0.00019003969238402381, "loss": 0.194, "step": 3471 }, { "epoch": 1.22, "grad_norm": 2.3916375637054443, "learning_rate": 0.00019000248077400146, "loss": 0.3269, "step": 3472 }, { "epoch": 1.22, "grad_norm": 1.9122830629348755, "learning_rate": 0.00018996526916397914, "loss": 0.0976, "step": 3473 }, { "epoch": 1.22, "grad_norm": 1.4700841903686523, "learning_rate": 0.00018992805755395681, "loss": 0.0848, "step": 3474 }, { "epoch": 1.22, "grad_norm": 5.40512228012085, "learning_rate": 0.0001898908459439345, "loss": 0.3826, "step": 3475 }, { "epoch": 1.22, "grad_norm": 9.25135612487793, "learning_rate": 0.00018985363433391217, "loss": 2.3233, "step": 3476 }, { "epoch": 1.22, "grad_norm": 4.043542861938477, "learning_rate": 0.00018981642272388981, "loss": 0.726, "step": 3477 }, { "epoch": 1.22, "grad_norm": 0.8716025948524475, "learning_rate": 0.00018977921111386752, "loss": 0.0542, "step": 3478 }, { "epoch": 1.22, "grad_norm": 1.9265203475952148, "learning_rate": 0.0001897419995038452, "loss": 0.2385, "step": 3479 }, { "epoch": 1.22, "grad_norm": 3.297703981399536, "learning_rate": 0.00018970478789382284, "loss": 0.9332, "step": 3480 }, { "epoch": 1.22, "grad_norm": 4.390806674957275, "learning_rate": 0.00018966757628380054, "loss": 0.9762, "step": 3481 }, { "epoch": 1.22, "grad_norm": 2.7843103408813477, "learning_rate": 0.0001896303646737782, "loss": 0.3961, "step": 3482 }, { "epoch": 1.22, "grad_norm": 2.2936036586761475, "learning_rate": 0.00018959315306375587, "loss": 0.515, "step": 3483 }, { "epoch": 1.22, "grad_norm": 1.33071768283844, "learning_rate": 0.00018955594145373357, "loss": 0.1201, "step": 3484 }, { "epoch": 1.22, "grad_norm": 1.876550555229187, "learning_rate": 0.00018951872984371122, "loss": 0.391, "step": 3485 }, { "epoch": 1.22, "grad_norm": 1.878441333770752, "learning_rate": 0.0001894815182336889, "loss": 0.2876, "step": 3486 }, { "epoch": 1.22, "grad_norm": 2.9820950031280518, "learning_rate": 0.00018944430662366655, "loss": 0.5285, "step": 3487 }, { "epoch": 1.22, "grad_norm": 2.3062541484832764, "learning_rate": 0.00018940709501364425, "loss": 0.3399, "step": 3488 }, { "epoch": 1.22, "grad_norm": 2.2896065711975098, "learning_rate": 0.00018936988340362192, "loss": 0.3287, "step": 3489 }, { "epoch": 1.22, "grad_norm": 2.6173689365386963, "learning_rate": 0.00018933267179359957, "loss": 0.7003, "step": 3490 }, { "epoch": 1.22, "grad_norm": 1.3826167583465576, "learning_rate": 0.00018929546018357728, "loss": 0.1447, "step": 3491 }, { "epoch": 1.22, "grad_norm": 1.945378303527832, "learning_rate": 0.00018925824857355495, "loss": 0.1798, "step": 3492 }, { "epoch": 1.22, "grad_norm": 3.251554489135742, "learning_rate": 0.0001892210369635326, "loss": 0.4669, "step": 3493 }, { "epoch": 1.22, "grad_norm": 3.7647817134857178, "learning_rate": 0.00018918382535351028, "loss": 0.3134, "step": 3494 }, { "epoch": 1.22, "grad_norm": 1.6382604837417603, "learning_rate": 0.00018914661374348795, "loss": 0.168, "step": 3495 }, { "epoch": 1.22, "grad_norm": 4.437539100646973, "learning_rate": 0.00018910940213346563, "loss": 0.4604, "step": 3496 }, { "epoch": 1.23, "grad_norm": 19.861299514770508, "learning_rate": 0.0001890721905234433, "loss": 1.9396, "step": 3497 }, { "epoch": 1.23, "grad_norm": 1.6166614294052124, "learning_rate": 0.00018903497891342095, "loss": 0.0805, "step": 3498 }, { "epoch": 1.23, "grad_norm": 1.6526031494140625, "learning_rate": 0.00018899776730339865, "loss": 0.2469, "step": 3499 }, { "epoch": 1.23, "grad_norm": 3.7113420963287354, "learning_rate": 0.0001889605556933763, "loss": 0.3856, "step": 3500 }, { "epoch": 1.23, "eval_loss": 0.48135173320770264, "eval_runtime": 51.3793, "eval_samples_per_second": 42.196, "eval_steps_per_second": 10.549, "eval_wer": 0.4328835841549905, "step": 3500 }, { "epoch": 1.23, "grad_norm": 6.976255893707275, "learning_rate": 0.00018892334408335398, "loss": 2.395, "step": 3501 }, { "epoch": 1.23, "grad_norm": 3.2706868648529053, "learning_rate": 0.00018888613247333168, "loss": 0.5762, "step": 3502 }, { "epoch": 1.23, "grad_norm": 2.8764538764953613, "learning_rate": 0.00018884892086330933, "loss": 0.2334, "step": 3503 }, { "epoch": 1.23, "grad_norm": 4.566110610961914, "learning_rate": 0.000188811709253287, "loss": 1.624, "step": 3504 }, { "epoch": 1.23, "grad_norm": 3.793048620223999, "learning_rate": 0.00018877449764326466, "loss": 1.0498, "step": 3505 }, { "epoch": 1.23, "grad_norm": 2.507688283920288, "learning_rate": 0.00018873728603324236, "loss": 0.4346, "step": 3506 }, { "epoch": 1.23, "grad_norm": 2.692643404006958, "learning_rate": 0.00018870007442322003, "loss": 0.4612, "step": 3507 }, { "epoch": 1.23, "grad_norm": 1.4150710105895996, "learning_rate": 0.00018866286281319768, "loss": 0.2225, "step": 3508 }, { "epoch": 1.23, "grad_norm": 2.229931116104126, "learning_rate": 0.00018862565120317539, "loss": 0.386, "step": 3509 }, { "epoch": 1.23, "grad_norm": 1.482277750968933, "learning_rate": 0.00018858843959315306, "loss": 0.4351, "step": 3510 }, { "epoch": 1.23, "grad_norm": 2.131032943725586, "learning_rate": 0.0001885512279831307, "loss": 0.2765, "step": 3511 }, { "epoch": 1.23, "grad_norm": 1.5943416357040405, "learning_rate": 0.0001885140163731084, "loss": 0.3301, "step": 3512 }, { "epoch": 1.23, "grad_norm": 1.9223726987838745, "learning_rate": 0.00018847680476308606, "loss": 0.3836, "step": 3513 }, { "epoch": 1.23, "grad_norm": 1.1175265312194824, "learning_rate": 0.00018843959315306374, "loss": 0.208, "step": 3514 }, { "epoch": 1.23, "grad_norm": 2.3698058128356934, "learning_rate": 0.0001884023815430414, "loss": 0.422, "step": 3515 }, { "epoch": 1.23, "grad_norm": 3.7978971004486084, "learning_rate": 0.0001883651699330191, "loss": 1.4153, "step": 3516 }, { "epoch": 1.23, "grad_norm": 2.1298134326934814, "learning_rate": 0.00018832795832299676, "loss": 0.5128, "step": 3517 }, { "epoch": 1.23, "grad_norm": 1.7395635843276978, "learning_rate": 0.00018829074671297441, "loss": 0.1592, "step": 3518 }, { "epoch": 1.23, "grad_norm": 1.4399739503860474, "learning_rate": 0.0001882535351029521, "loss": 0.0962, "step": 3519 }, { "epoch": 1.23, "grad_norm": 2.0876457691192627, "learning_rate": 0.0001882163234929298, "loss": 0.5794, "step": 3520 }, { "epoch": 1.23, "grad_norm": 2.5795669555664062, "learning_rate": 0.00018817911188290744, "loss": 0.45, "step": 3521 }, { "epoch": 1.23, "grad_norm": 0.8200573921203613, "learning_rate": 0.00018814190027288512, "loss": 0.0697, "step": 3522 }, { "epoch": 1.23, "grad_norm": 1.8458431959152222, "learning_rate": 0.00018810468866286282, "loss": 0.1935, "step": 3523 }, { "epoch": 1.23, "grad_norm": 1.1191134452819824, "learning_rate": 0.00018806747705284047, "loss": 0.0881, "step": 3524 }, { "epoch": 1.24, "grad_norm": 4.453530311584473, "learning_rate": 0.00018803026544281814, "loss": 0.5468, "step": 3525 }, { "epoch": 1.24, "grad_norm": 1.0060765743255615, "learning_rate": 0.0001879930538327958, "loss": 0.122, "step": 3526 }, { "epoch": 1.24, "grad_norm": 2.2647805213928223, "learning_rate": 0.0001879558422227735, "loss": 0.1266, "step": 3527 }, { "epoch": 1.24, "grad_norm": 3.9961869716644287, "learning_rate": 0.00018791863061275117, "loss": 0.118, "step": 3528 }, { "epoch": 1.24, "grad_norm": 0.9355319142341614, "learning_rate": 0.00018788141900272882, "loss": 0.0574, "step": 3529 }, { "epoch": 1.24, "grad_norm": 3.0905239582061768, "learning_rate": 0.00018784420739270652, "loss": 1.0581, "step": 3530 }, { "epoch": 1.24, "grad_norm": 2.1791467666625977, "learning_rate": 0.00018780699578268417, "loss": 0.6661, "step": 3531 }, { "epoch": 1.24, "grad_norm": 2.5521254539489746, "learning_rate": 0.00018776978417266185, "loss": 0.4533, "step": 3532 }, { "epoch": 1.24, "grad_norm": 2.4842238426208496, "learning_rate": 0.00018773257256263955, "loss": 0.6154, "step": 3533 }, { "epoch": 1.24, "grad_norm": 1.9282567501068115, "learning_rate": 0.0001876953609526172, "loss": 0.3092, "step": 3534 }, { "epoch": 1.24, "grad_norm": 2.4563050270080566, "learning_rate": 0.00018765814934259488, "loss": 0.3415, "step": 3535 }, { "epoch": 1.24, "grad_norm": 1.9763050079345703, "learning_rate": 0.00018762093773257255, "loss": 0.4392, "step": 3536 }, { "epoch": 1.24, "grad_norm": 3.0104451179504395, "learning_rate": 0.00018758372612255023, "loss": 0.3869, "step": 3537 }, { "epoch": 1.24, "grad_norm": 2.270270824432373, "learning_rate": 0.0001875465145125279, "loss": 0.2269, "step": 3538 }, { "epoch": 1.24, "grad_norm": 4.0361008644104, "learning_rate": 0.00018750930290250555, "loss": 0.2886, "step": 3539 }, { "epoch": 1.24, "grad_norm": 2.7986207008361816, "learning_rate": 0.00018747209129248323, "loss": 0.4975, "step": 3540 }, { "epoch": 1.24, "grad_norm": 2.5085690021514893, "learning_rate": 0.00018743487968246093, "loss": 0.3265, "step": 3541 }, { "epoch": 1.24, "grad_norm": 2.8387558460235596, "learning_rate": 0.00018739766807243858, "loss": 0.2783, "step": 3542 }, { "epoch": 1.24, "grad_norm": 3.8573365211486816, "learning_rate": 0.00018736045646241625, "loss": 0.6503, "step": 3543 }, { "epoch": 1.24, "grad_norm": 4.418932914733887, "learning_rate": 0.00018732324485239393, "loss": 0.5577, "step": 3544 }, { "epoch": 1.24, "grad_norm": 1.9382532835006714, "learning_rate": 0.0001872860332423716, "loss": 0.4881, "step": 3545 }, { "epoch": 1.24, "grad_norm": 2.3090128898620605, "learning_rate": 0.00018724882163234928, "loss": 0.4069, "step": 3546 }, { "epoch": 1.24, "grad_norm": 2.0739097595214844, "learning_rate": 0.00018721161002232693, "loss": 0.3043, "step": 3547 }, { "epoch": 1.24, "grad_norm": 2.7553725242614746, "learning_rate": 0.00018717439841230463, "loss": 0.299, "step": 3548 }, { "epoch": 1.24, "grad_norm": 4.5599212646484375, "learning_rate": 0.00018713718680228228, "loss": 1.5577, "step": 3549 }, { "epoch": 1.24, "grad_norm": 4.312692642211914, "learning_rate": 0.00018709997519225996, "loss": 0.4333, "step": 3550 }, { "epoch": 1.24, "grad_norm": 2.080211877822876, "learning_rate": 0.00018706276358223766, "loss": 0.1648, "step": 3551 }, { "epoch": 1.24, "grad_norm": 4.01967191696167, "learning_rate": 0.0001870255519722153, "loss": 0.4049, "step": 3552 }, { "epoch": 1.24, "grad_norm": 2.141727924346924, "learning_rate": 0.00018698834036219299, "loss": 0.265, "step": 3553 }, { "epoch": 1.25, "grad_norm": 1.7477151155471802, "learning_rate": 0.0001869511287521707, "loss": 0.2846, "step": 3554 }, { "epoch": 1.25, "grad_norm": 2.3710789680480957, "learning_rate": 0.00018691391714214834, "loss": 0.4719, "step": 3555 }, { "epoch": 1.25, "grad_norm": 1.525882601737976, "learning_rate": 0.000186876705532126, "loss": 0.4626, "step": 3556 }, { "epoch": 1.25, "grad_norm": 1.5245758295059204, "learning_rate": 0.00018683949392210366, "loss": 0.367, "step": 3557 }, { "epoch": 1.25, "grad_norm": 3.189377784729004, "learning_rate": 0.00018680228231208136, "loss": 0.8119, "step": 3558 }, { "epoch": 1.25, "grad_norm": 1.608099102973938, "learning_rate": 0.00018676507070205904, "loss": 0.3054, "step": 3559 }, { "epoch": 1.25, "grad_norm": 1.2353789806365967, "learning_rate": 0.0001867278590920367, "loss": 0.1744, "step": 3560 }, { "epoch": 1.25, "grad_norm": 1.7148106098175049, "learning_rate": 0.00018669064748201436, "loss": 0.4698, "step": 3561 }, { "epoch": 1.25, "grad_norm": 2.018219470977783, "learning_rate": 0.00018665343587199204, "loss": 0.2835, "step": 3562 }, { "epoch": 1.25, "grad_norm": 1.603143572807312, "learning_rate": 0.00018661622426196972, "loss": 0.2637, "step": 3563 }, { "epoch": 1.25, "grad_norm": 1.5139524936676025, "learning_rate": 0.0001865790126519474, "loss": 0.3385, "step": 3564 }, { "epoch": 1.25, "grad_norm": 2.398815393447876, "learning_rate": 0.00018654180104192507, "loss": 0.195, "step": 3565 }, { "epoch": 1.25, "grad_norm": 2.6348488330841064, "learning_rate": 0.00018650458943190274, "loss": 0.8485, "step": 3566 }, { "epoch": 1.25, "grad_norm": 3.205677032470703, "learning_rate": 0.00018646737782188042, "loss": 0.6398, "step": 3567 }, { "epoch": 1.25, "grad_norm": 1.6468408107757568, "learning_rate": 0.00018643016621185807, "loss": 0.2491, "step": 3568 }, { "epoch": 1.25, "grad_norm": 1.7327412366867065, "learning_rate": 0.00018639295460183577, "loss": 0.2545, "step": 3569 }, { "epoch": 1.25, "grad_norm": 2.0780012607574463, "learning_rate": 0.00018635574299181342, "loss": 0.3401, "step": 3570 }, { "epoch": 1.25, "grad_norm": 1.9040396213531494, "learning_rate": 0.0001863185313817911, "loss": 0.2863, "step": 3571 }, { "epoch": 1.25, "grad_norm": 1.8664028644561768, "learning_rate": 0.0001862813197717688, "loss": 0.1769, "step": 3572 }, { "epoch": 1.25, "grad_norm": 2.1530826091766357, "learning_rate": 0.00018624410816174645, "loss": 0.2482, "step": 3573 }, { "epoch": 1.25, "grad_norm": 4.735560417175293, "learning_rate": 0.00018620689655172412, "loss": 1.8875, "step": 3574 }, { "epoch": 1.25, "grad_norm": 0.7484613656997681, "learning_rate": 0.00018616968494170177, "loss": 0.0572, "step": 3575 }, { "epoch": 1.25, "grad_norm": 2.2754640579223633, "learning_rate": 0.00018613247333167947, "loss": 0.12, "step": 3576 }, { "epoch": 1.25, "grad_norm": 2.0318729877471924, "learning_rate": 0.00018609526172165715, "loss": 0.0707, "step": 3577 }, { "epoch": 1.25, "grad_norm": 4.152217388153076, "learning_rate": 0.0001860580501116348, "loss": 1.4776, "step": 3578 }, { "epoch": 1.25, "grad_norm": 6.380149841308594, "learning_rate": 0.0001860208385016125, "loss": 0.3457, "step": 3579 }, { "epoch": 1.25, "grad_norm": 2.7456815242767334, "learning_rate": 0.00018598362689159018, "loss": 0.8919, "step": 3580 }, { "epoch": 1.25, "grad_norm": 1.7026749849319458, "learning_rate": 0.00018594641528156783, "loss": 0.424, "step": 3581 }, { "epoch": 1.26, "grad_norm": 1.725861668586731, "learning_rate": 0.0001859092036715455, "loss": 0.3761, "step": 3582 }, { "epoch": 1.26, "grad_norm": 1.5452537536621094, "learning_rate": 0.00018587199206152318, "loss": 0.3575, "step": 3583 }, { "epoch": 1.26, "grad_norm": 2.4589481353759766, "learning_rate": 0.00018583478045150085, "loss": 0.4449, "step": 3584 }, { "epoch": 1.26, "grad_norm": 1.2494957447052002, "learning_rate": 0.00018579756884147853, "loss": 0.2364, "step": 3585 }, { "epoch": 1.26, "grad_norm": 1.3550739288330078, "learning_rate": 0.0001857603572314562, "loss": 0.2196, "step": 3586 }, { "epoch": 1.26, "grad_norm": 2.515165090560913, "learning_rate": 0.00018572314562143388, "loss": 0.3064, "step": 3587 }, { "epoch": 1.26, "grad_norm": 1.4528539180755615, "learning_rate": 0.00018568593401141153, "loss": 0.2979, "step": 3588 }, { "epoch": 1.26, "grad_norm": 2.131699800491333, "learning_rate": 0.0001856487224013892, "loss": 0.3577, "step": 3589 }, { "epoch": 1.26, "grad_norm": 2.56373929977417, "learning_rate": 0.0001856115107913669, "loss": 0.6694, "step": 3590 }, { "epoch": 1.26, "grad_norm": 3.1758902072906494, "learning_rate": 0.00018557429918134456, "loss": 0.7342, "step": 3591 }, { "epoch": 1.26, "grad_norm": 1.6790735721588135, "learning_rate": 0.00018553708757132223, "loss": 0.2676, "step": 3592 }, { "epoch": 1.26, "grad_norm": 3.171738624572754, "learning_rate": 0.00018549987596129988, "loss": 0.6191, "step": 3593 }, { "epoch": 1.26, "grad_norm": 3.72245454788208, "learning_rate": 0.00018546266435127758, "loss": 0.9003, "step": 3594 }, { "epoch": 1.26, "grad_norm": 3.158806085586548, "learning_rate": 0.00018542545274125526, "loss": 0.5151, "step": 3595 }, { "epoch": 1.26, "grad_norm": 2.433006763458252, "learning_rate": 0.0001853882411312329, "loss": 0.3028, "step": 3596 }, { "epoch": 1.26, "grad_norm": 2.634091377258301, "learning_rate": 0.0001853510295212106, "loss": 0.3578, "step": 3597 }, { "epoch": 1.26, "grad_norm": 1.5862232446670532, "learning_rate": 0.0001853138179111883, "loss": 0.2275, "step": 3598 }, { "epoch": 1.26, "grad_norm": 3.65346097946167, "learning_rate": 0.00018527660630116594, "loss": 0.4811, "step": 3599 }, { "epoch": 1.26, "grad_norm": 2.1591145992279053, "learning_rate": 0.00018523939469114364, "loss": 0.3709, "step": 3600 }, { "epoch": 1.26, "eval_loss": 0.5617594122886658, "eval_runtime": 51.0249, "eval_samples_per_second": 42.489, "eval_steps_per_second": 10.622, "eval_wer": 0.5672029060716139, "step": 3600 }, { "epoch": 1.26, "grad_norm": 4.291797161102295, "learning_rate": 0.0001852021830811213, "loss": 0.7502, "step": 3601 }, { "epoch": 1.26, "grad_norm": 2.1709883213043213, "learning_rate": 0.00018516497147109896, "loss": 0.6292, "step": 3602 }, { "epoch": 1.26, "grad_norm": 2.368156909942627, "learning_rate": 0.00018512775986107664, "loss": 0.3489, "step": 3603 }, { "epoch": 1.26, "grad_norm": 4.229372024536133, "learning_rate": 0.00018509054825105432, "loss": 1.2459, "step": 3604 }, { "epoch": 1.26, "grad_norm": 2.126002788543701, "learning_rate": 0.000185053336641032, "loss": 0.7528, "step": 3605 }, { "epoch": 1.26, "grad_norm": 2.2286715507507324, "learning_rate": 0.00018501612503100964, "loss": 0.6335, "step": 3606 }, { "epoch": 1.26, "grad_norm": 2.8708016872406006, "learning_rate": 0.00018497891342098734, "loss": 0.6489, "step": 3607 }, { "epoch": 1.26, "grad_norm": 1.7687532901763916, "learning_rate": 0.00018494170181096502, "loss": 0.6581, "step": 3608 }, { "epoch": 1.26, "grad_norm": 1.8210864067077637, "learning_rate": 0.00018490449020094267, "loss": 0.4258, "step": 3609 }, { "epoch": 1.26, "grad_norm": 1.6010420322418213, "learning_rate": 0.00018486727859092034, "loss": 0.2422, "step": 3610 }, { "epoch": 1.27, "grad_norm": 1.8562211990356445, "learning_rate": 0.00018483006698089805, "loss": 0.3653, "step": 3611 }, { "epoch": 1.27, "grad_norm": 2.263174533843994, "learning_rate": 0.0001847928553708757, "loss": 0.5212, "step": 3612 }, { "epoch": 1.27, "grad_norm": 2.848961591720581, "learning_rate": 0.00018475564376085337, "loss": 0.383, "step": 3613 }, { "epoch": 1.27, "grad_norm": 4.881875514984131, "learning_rate": 0.00018471843215083102, "loss": 0.6873, "step": 3614 }, { "epoch": 1.27, "grad_norm": 3.0648579597473145, "learning_rate": 0.00018468122054080872, "loss": 0.8545, "step": 3615 }, { "epoch": 1.27, "grad_norm": 2.5609729290008545, "learning_rate": 0.0001846440089307864, "loss": 0.3276, "step": 3616 }, { "epoch": 1.27, "grad_norm": 2.5366320610046387, "learning_rate": 0.00018460679732076405, "loss": 0.3974, "step": 3617 }, { "epoch": 1.27, "grad_norm": 1.6741987466812134, "learning_rate": 0.00018456958571074175, "loss": 0.3688, "step": 3618 }, { "epoch": 1.27, "grad_norm": 2.0672824382781982, "learning_rate": 0.0001845323741007194, "loss": 0.2186, "step": 3619 }, { "epoch": 1.27, "grad_norm": 3.1901285648345947, "learning_rate": 0.00018449516249069707, "loss": 0.3451, "step": 3620 }, { "epoch": 1.27, "grad_norm": 2.1494998931884766, "learning_rate": 0.00018445795088067478, "loss": 0.2902, "step": 3621 }, { "epoch": 1.27, "grad_norm": 5.410013675689697, "learning_rate": 0.00018442073927065243, "loss": 0.2924, "step": 3622 }, { "epoch": 1.27, "grad_norm": 1.7226850986480713, "learning_rate": 0.0001843835276606301, "loss": 0.2274, "step": 3623 }, { "epoch": 1.27, "grad_norm": 1.2159602642059326, "learning_rate": 0.00018434631605060778, "loss": 0.0521, "step": 3624 }, { "epoch": 1.27, "grad_norm": 6.059643268585205, "learning_rate": 0.00018430910444058545, "loss": 0.7462, "step": 3625 }, { "epoch": 1.27, "grad_norm": 1.91728937625885, "learning_rate": 0.00018427189283056313, "loss": 0.1399, "step": 3626 }, { "epoch": 1.27, "grad_norm": 2.763751745223999, "learning_rate": 0.00018423468122054078, "loss": 0.9764, "step": 3627 }, { "epoch": 1.27, "grad_norm": 3.757922887802124, "learning_rate": 0.00018419746961051848, "loss": 0.4835, "step": 3628 }, { "epoch": 1.27, "grad_norm": 4.211630821228027, "learning_rate": 0.00018416025800049616, "loss": 0.4143, "step": 3629 }, { "epoch": 1.27, "grad_norm": 4.662005424499512, "learning_rate": 0.0001841230463904738, "loss": 0.9232, "step": 3630 }, { "epoch": 1.27, "grad_norm": 1.8224451541900635, "learning_rate": 0.00018408583478045148, "loss": 0.2985, "step": 3631 }, { "epoch": 1.27, "grad_norm": 1.7688192129135132, "learning_rate": 0.00018404862317042916, "loss": 0.2334, "step": 3632 }, { "epoch": 1.27, "grad_norm": 1.983191967010498, "learning_rate": 0.00018401141156040683, "loss": 0.3987, "step": 3633 }, { "epoch": 1.27, "grad_norm": 2.6247076988220215, "learning_rate": 0.0001839741999503845, "loss": 0.5811, "step": 3634 }, { "epoch": 1.27, "grad_norm": 1.944804310798645, "learning_rate": 0.00018393698834036216, "loss": 0.5739, "step": 3635 }, { "epoch": 1.27, "grad_norm": 1.3238880634307861, "learning_rate": 0.00018389977673033986, "loss": 0.2322, "step": 3636 }, { "epoch": 1.27, "grad_norm": 2.190138339996338, "learning_rate": 0.0001838625651203175, "loss": 0.4987, "step": 3637 }, { "epoch": 1.27, "grad_norm": 1.9091345071792603, "learning_rate": 0.00018382535351029518, "loss": 0.3517, "step": 3638 }, { "epoch": 1.28, "grad_norm": 1.6717495918273926, "learning_rate": 0.0001837881419002729, "loss": 0.1578, "step": 3639 }, { "epoch": 1.28, "grad_norm": 2.4669203758239746, "learning_rate": 0.00018375093029025054, "loss": 0.3245, "step": 3640 }, { "epoch": 1.28, "grad_norm": 1.4179534912109375, "learning_rate": 0.0001837137186802282, "loss": 0.2772, "step": 3641 }, { "epoch": 1.28, "grad_norm": 2.291625738143921, "learning_rate": 0.00018367650707020591, "loss": 0.7771, "step": 3642 }, { "epoch": 1.28, "grad_norm": 2.39475417137146, "learning_rate": 0.00018363929546018356, "loss": 0.3678, "step": 3643 }, { "epoch": 1.28, "grad_norm": 1.2829241752624512, "learning_rate": 0.00018360208385016124, "loss": 0.3058, "step": 3644 }, { "epoch": 1.28, "grad_norm": 1.9139297008514404, "learning_rate": 0.0001835648722401389, "loss": 0.1881, "step": 3645 }, { "epoch": 1.28, "grad_norm": 4.035134792327881, "learning_rate": 0.0001835276606301166, "loss": 0.2359, "step": 3646 }, { "epoch": 1.28, "grad_norm": 3.865679979324341, "learning_rate": 0.00018349044902009427, "loss": 0.4176, "step": 3647 }, { "epoch": 1.28, "grad_norm": 7.729085445404053, "learning_rate": 0.00018345323741007191, "loss": 1.612, "step": 3648 }, { "epoch": 1.28, "grad_norm": 1.2197033166885376, "learning_rate": 0.00018341602580004962, "loss": 0.1282, "step": 3649 }, { "epoch": 1.28, "grad_norm": 1.467267394065857, "learning_rate": 0.00018337881419002727, "loss": 0.1326, "step": 3650 }, { "epoch": 1.28, "grad_norm": 4.615586757659912, "learning_rate": 0.00018334160258000494, "loss": 0.674, "step": 3651 }, { "epoch": 1.28, "grad_norm": 1.724073052406311, "learning_rate": 0.00018330439096998262, "loss": 0.268, "step": 3652 }, { "epoch": 1.28, "grad_norm": 3.465282440185547, "learning_rate": 0.0001832671793599603, "loss": 0.5606, "step": 3653 }, { "epoch": 1.28, "grad_norm": 2.1217899322509766, "learning_rate": 0.00018322996774993797, "loss": 0.1697, "step": 3654 }, { "epoch": 1.28, "grad_norm": 1.2871822118759155, "learning_rate": 0.00018319275613991565, "loss": 0.4938, "step": 3655 }, { "epoch": 1.28, "grad_norm": 2.0756819248199463, "learning_rate": 0.0001831555445298933, "loss": 0.5783, "step": 3656 }, { "epoch": 1.28, "grad_norm": 3.0986368656158447, "learning_rate": 0.000183118332919871, "loss": 0.5325, "step": 3657 }, { "epoch": 1.28, "grad_norm": 1.4617551565170288, "learning_rate": 0.00018308112130984865, "loss": 0.3135, "step": 3658 }, { "epoch": 1.28, "grad_norm": 1.108564019203186, "learning_rate": 0.00018304390969982632, "loss": 0.1062, "step": 3659 }, { "epoch": 1.28, "grad_norm": 3.5125279426574707, "learning_rate": 0.00018300669808980402, "loss": 0.6693, "step": 3660 }, { "epoch": 1.28, "grad_norm": 2.014686346054077, "learning_rate": 0.00018296948647978167, "loss": 0.4059, "step": 3661 }, { "epoch": 1.28, "grad_norm": 1.8392510414123535, "learning_rate": 0.00018293227486975935, "loss": 0.4116, "step": 3662 }, { "epoch": 1.28, "grad_norm": 2.073396921157837, "learning_rate": 0.000182895063259737, "loss": 0.3753, "step": 3663 }, { "epoch": 1.28, "grad_norm": 2.183483362197876, "learning_rate": 0.0001828578516497147, "loss": 0.5035, "step": 3664 }, { "epoch": 1.28, "grad_norm": 2.8279621601104736, "learning_rate": 0.00018282064003969238, "loss": 0.3977, "step": 3665 }, { "epoch": 1.28, "grad_norm": 2.0163662433624268, "learning_rate": 0.00018278342842967002, "loss": 0.2057, "step": 3666 }, { "epoch": 1.28, "grad_norm": 2.163269519805908, "learning_rate": 0.00018274621681964773, "loss": 0.5138, "step": 3667 }, { "epoch": 1.29, "grad_norm": 1.980743169784546, "learning_rate": 0.0001827090052096254, "loss": 0.3655, "step": 3668 }, { "epoch": 1.29, "grad_norm": 2.201828956604004, "learning_rate": 0.00018267179359960305, "loss": 0.6443, "step": 3669 }, { "epoch": 1.29, "grad_norm": 1.6139568090438843, "learning_rate": 0.00018263458198958075, "loss": 0.1735, "step": 3670 }, { "epoch": 1.29, "grad_norm": 3.5710642337799072, "learning_rate": 0.0001825973703795584, "loss": 0.6014, "step": 3671 }, { "epoch": 1.29, "grad_norm": 1.6693230867385864, "learning_rate": 0.00018256015876953608, "loss": 0.3088, "step": 3672 }, { "epoch": 1.29, "grad_norm": 2.734638214111328, "learning_rate": 0.00018252294715951376, "loss": 0.4733, "step": 3673 }, { "epoch": 1.29, "grad_norm": 3.7495346069335938, "learning_rate": 0.00018248573554949143, "loss": 0.6562, "step": 3674 }, { "epoch": 1.29, "grad_norm": 0.6632982492446899, "learning_rate": 0.0001824485239394691, "loss": 0.0649, "step": 3675 }, { "epoch": 1.29, "grad_norm": 2.975598096847534, "learning_rate": 0.00018241131232944676, "loss": 0.3478, "step": 3676 }, { "epoch": 1.29, "grad_norm": 1.9812325239181519, "learning_rate": 0.00018237410071942443, "loss": 0.3126, "step": 3677 }, { "epoch": 1.29, "grad_norm": 4.1589274406433105, "learning_rate": 0.00018233688910940213, "loss": 0.3591, "step": 3678 }, { "epoch": 1.29, "grad_norm": 1.129032015800476, "learning_rate": 0.00018229967749937978, "loss": 0.0812, "step": 3679 }, { "epoch": 1.29, "grad_norm": 1.9920796155929565, "learning_rate": 0.00018226246588935746, "loss": 1.0188, "step": 3680 }, { "epoch": 1.29, "grad_norm": 2.480633020401001, "learning_rate": 0.0001822252542793351, "loss": 0.8319, "step": 3681 }, { "epoch": 1.29, "grad_norm": 1.5719470977783203, "learning_rate": 0.0001821880426693128, "loss": 0.446, "step": 3682 }, { "epoch": 1.29, "grad_norm": 2.4275617599487305, "learning_rate": 0.00018215083105929049, "loss": 0.601, "step": 3683 }, { "epoch": 1.29, "grad_norm": 1.5678902864456177, "learning_rate": 0.00018211361944926813, "loss": 0.326, "step": 3684 }, { "epoch": 1.29, "grad_norm": 1.8893762826919556, "learning_rate": 0.00018207640783924584, "loss": 0.3836, "step": 3685 }, { "epoch": 1.29, "grad_norm": 1.8527387380599976, "learning_rate": 0.0001820391962292235, "loss": 0.3873, "step": 3686 }, { "epoch": 1.29, "grad_norm": 2.947082757949829, "learning_rate": 0.00018200198461920116, "loss": 0.2627, "step": 3687 }, { "epoch": 1.29, "grad_norm": 1.0232598781585693, "learning_rate": 0.00018196477300917886, "loss": 0.1291, "step": 3688 }, { "epoch": 1.29, "grad_norm": 1.9064313173294067, "learning_rate": 0.00018192756139915651, "loss": 0.2368, "step": 3689 }, { "epoch": 1.29, "grad_norm": 1.997897982597351, "learning_rate": 0.0001818903497891342, "loss": 0.17, "step": 3690 }, { "epoch": 1.29, "grad_norm": 8.157362937927246, "learning_rate": 0.0001818531381791119, "loss": 1.9281, "step": 3691 }, { "epoch": 1.29, "grad_norm": 4.2168660163879395, "learning_rate": 0.00018181592656908954, "loss": 0.4399, "step": 3692 }, { "epoch": 1.29, "grad_norm": 2.208404064178467, "learning_rate": 0.00018177871495906722, "loss": 0.1804, "step": 3693 }, { "epoch": 1.29, "grad_norm": 2.2321760654449463, "learning_rate": 0.00018174150334904487, "loss": 0.2453, "step": 3694 }, { "epoch": 1.29, "grad_norm": 1.9782792329788208, "learning_rate": 0.00018170429173902257, "loss": 0.1628, "step": 3695 }, { "epoch": 1.3, "grad_norm": 2.479062795639038, "learning_rate": 0.00018166708012900024, "loss": 0.3915, "step": 3696 }, { "epoch": 1.3, "grad_norm": 1.956723928451538, "learning_rate": 0.0001816298685189779, "loss": 0.2848, "step": 3697 }, { "epoch": 1.3, "grad_norm": 4.0798211097717285, "learning_rate": 0.00018159265690895557, "loss": 1.0078, "step": 3698 }, { "epoch": 1.3, "grad_norm": 2.6179561614990234, "learning_rate": 0.00018155544529893327, "loss": 0.3553, "step": 3699 }, { "epoch": 1.3, "grad_norm": 2.044512987136841, "learning_rate": 0.00018151823368891092, "loss": 0.1905, "step": 3700 }, { "epoch": 1.3, "eval_loss": 0.46060672402381897, "eval_runtime": 51.537, "eval_samples_per_second": 42.067, "eval_steps_per_second": 10.517, "eval_wer": 0.44300294066770457, "step": 3700 }, { "epoch": 1.3, "grad_norm": 3.8095672130584717, "learning_rate": 0.0001814810220788886, "loss": 1.5776, "step": 3701 }, { "epoch": 1.3, "grad_norm": 4.374634265899658, "learning_rate": 0.00018144381046886624, "loss": 1.4929, "step": 3702 }, { "epoch": 1.3, "grad_norm": 1.16568922996521, "learning_rate": 0.00018140659885884395, "loss": 0.1051, "step": 3703 }, { "epoch": 1.3, "grad_norm": 1.5767412185668945, "learning_rate": 0.00018136938724882162, "loss": 0.1765, "step": 3704 }, { "epoch": 1.3, "grad_norm": 4.894369125366211, "learning_rate": 0.00018133217563879927, "loss": 0.9859, "step": 3705 }, { "epoch": 1.3, "grad_norm": 3.5991573333740234, "learning_rate": 0.00018129496402877698, "loss": 0.5492, "step": 3706 }, { "epoch": 1.3, "grad_norm": 4.314338684082031, "learning_rate": 0.00018125775241875462, "loss": 0.9102, "step": 3707 }, { "epoch": 1.3, "grad_norm": 2.0932729244232178, "learning_rate": 0.0001812205408087323, "loss": 0.5376, "step": 3708 }, { "epoch": 1.3, "grad_norm": 1.41013765335083, "learning_rate": 0.00018118332919871, "loss": 0.2054, "step": 3709 }, { "epoch": 1.3, "grad_norm": 2.404813528060913, "learning_rate": 0.00018114611758868765, "loss": 0.2452, "step": 3710 }, { "epoch": 1.3, "grad_norm": 1.5884853601455688, "learning_rate": 0.00018110890597866533, "loss": 0.233, "step": 3711 }, { "epoch": 1.3, "grad_norm": 1.3731337785720825, "learning_rate": 0.00018107169436864303, "loss": 0.1386, "step": 3712 }, { "epoch": 1.3, "grad_norm": 3.145470142364502, "learning_rate": 0.00018103448275862068, "loss": 0.6139, "step": 3713 }, { "epoch": 1.3, "grad_norm": 1.8240619897842407, "learning_rate": 0.00018099727114859835, "loss": 0.4299, "step": 3714 }, { "epoch": 1.3, "grad_norm": 1.6262075901031494, "learning_rate": 0.000180960059538576, "loss": 0.2562, "step": 3715 }, { "epoch": 1.3, "grad_norm": 2.166367769241333, "learning_rate": 0.0001809228479285537, "loss": 0.3563, "step": 3716 }, { "epoch": 1.3, "grad_norm": 2.9389724731445312, "learning_rate": 0.00018088563631853138, "loss": 1.4989, "step": 3717 }, { "epoch": 1.3, "grad_norm": 1.0228006839752197, "learning_rate": 0.00018084842470850903, "loss": 0.0534, "step": 3718 }, { "epoch": 1.3, "grad_norm": 3.372701406478882, "learning_rate": 0.0001808112130984867, "loss": 0.4041, "step": 3719 }, { "epoch": 1.3, "grad_norm": 2.5104494094848633, "learning_rate": 0.00018077400148846438, "loss": 0.3649, "step": 3720 }, { "epoch": 1.3, "grad_norm": 3.1328682899475098, "learning_rate": 0.00018073678987844206, "loss": 0.5192, "step": 3721 }, { "epoch": 1.3, "grad_norm": 1.5079821348190308, "learning_rate": 0.00018069957826841973, "loss": 0.1187, "step": 3722 }, { "epoch": 1.3, "grad_norm": 2.341155529022217, "learning_rate": 0.00018066236665839738, "loss": 0.2129, "step": 3723 }, { "epoch": 1.3, "grad_norm": 3.211752414703369, "learning_rate": 0.00018062515504837509, "loss": 0.3416, "step": 3724 }, { "epoch": 1.31, "grad_norm": 1.5088387727737427, "learning_rate": 0.00018058794343835276, "loss": 0.2176, "step": 3725 }, { "epoch": 1.31, "grad_norm": 2.9890217781066895, "learning_rate": 0.0001805507318283304, "loss": 0.437, "step": 3726 }, { "epoch": 1.31, "grad_norm": 1.218353271484375, "learning_rate": 0.0001805135202183081, "loss": 0.1558, "step": 3727 }, { "epoch": 1.31, "grad_norm": 2.0937466621398926, "learning_rate": 0.00018047630860828576, "loss": 0.3612, "step": 3728 }, { "epoch": 1.31, "grad_norm": 2.505051612854004, "learning_rate": 0.00018043909699826344, "loss": 0.4007, "step": 3729 }, { "epoch": 1.31, "grad_norm": 2.501373529434204, "learning_rate": 0.00018040188538824114, "loss": 0.8542, "step": 3730 }, { "epoch": 1.31, "grad_norm": 1.9108080863952637, "learning_rate": 0.0001803646737782188, "loss": 0.6732, "step": 3731 }, { "epoch": 1.31, "grad_norm": 1.495678424835205, "learning_rate": 0.00018032746216819646, "loss": 0.2758, "step": 3732 }, { "epoch": 1.31, "grad_norm": 1.6871167421340942, "learning_rate": 0.0001802902505581741, "loss": 0.2974, "step": 3733 }, { "epoch": 1.31, "grad_norm": 1.7430815696716309, "learning_rate": 0.00018025303894815182, "loss": 0.3686, "step": 3734 }, { "epoch": 1.31, "grad_norm": 3.15686297416687, "learning_rate": 0.0001802158273381295, "loss": 0.4121, "step": 3735 }, { "epoch": 1.31, "grad_norm": 0.959276556968689, "learning_rate": 0.00018017861572810714, "loss": 0.0906, "step": 3736 }, { "epoch": 1.31, "grad_norm": 1.973922610282898, "learning_rate": 0.00018014140411808484, "loss": 0.3729, "step": 3737 }, { "epoch": 1.31, "grad_norm": 1.6108390092849731, "learning_rate": 0.0001801041925080625, "loss": 0.2135, "step": 3738 }, { "epoch": 1.31, "grad_norm": 1.7713210582733154, "learning_rate": 0.00018006698089804017, "loss": 0.447, "step": 3739 }, { "epoch": 1.31, "grad_norm": 2.696568727493286, "learning_rate": 0.00018002976928801784, "loss": 0.3003, "step": 3740 }, { "epoch": 1.31, "grad_norm": 1.4664280414581299, "learning_rate": 0.00017999255767799552, "loss": 0.174, "step": 3741 }, { "epoch": 1.31, "grad_norm": 3.218578815460205, "learning_rate": 0.0001799553460679732, "loss": 0.2536, "step": 3742 }, { "epoch": 1.31, "grad_norm": 3.3548967838287354, "learning_rate": 0.00017991813445795087, "loss": 0.3933, "step": 3743 }, { "epoch": 1.31, "grad_norm": 3.9141340255737305, "learning_rate": 0.00017988092284792852, "loss": 0.6092, "step": 3744 }, { "epoch": 1.31, "grad_norm": 2.043869972229004, "learning_rate": 0.00017984371123790622, "loss": 0.3214, "step": 3745 }, { "epoch": 1.31, "grad_norm": 2.997690200805664, "learning_rate": 0.00017980649962788387, "loss": 1.3812, "step": 3746 }, { "epoch": 1.31, "grad_norm": 1.9347894191741943, "learning_rate": 0.00017976928801786155, "loss": 0.185, "step": 3747 }, { "epoch": 1.31, "grad_norm": 2.9342029094696045, "learning_rate": 0.00017973207640783925, "loss": 0.5739, "step": 3748 }, { "epoch": 1.31, "grad_norm": 2.7313919067382812, "learning_rate": 0.0001796948647978169, "loss": 0.3957, "step": 3749 }, { "epoch": 1.31, "grad_norm": 3.2632312774658203, "learning_rate": 0.00017965765318779457, "loss": 0.1384, "step": 3750 }, { "epoch": 1.31, "grad_norm": 2.596008539199829, "learning_rate": 0.00017962044157777222, "loss": 0.4888, "step": 3751 }, { "epoch": 1.31, "grad_norm": 2.564819574356079, "learning_rate": 0.00017958322996774993, "loss": 0.2528, "step": 3752 }, { "epoch": 1.31, "grad_norm": 2.4152514934539795, "learning_rate": 0.0001795460183577276, "loss": 0.2479, "step": 3753 }, { "epoch": 1.32, "grad_norm": 1.620201587677002, "learning_rate": 0.00017950880674770525, "loss": 0.1761, "step": 3754 }, { "epoch": 1.32, "grad_norm": 2.0693604946136475, "learning_rate": 0.00017947159513768295, "loss": 0.9116, "step": 3755 }, { "epoch": 1.32, "grad_norm": 1.889093041419983, "learning_rate": 0.00017943438352766063, "loss": 0.417, "step": 3756 }, { "epoch": 1.32, "grad_norm": 2.0303893089294434, "learning_rate": 0.00017939717191763828, "loss": 0.3572, "step": 3757 }, { "epoch": 1.32, "grad_norm": 1.2891769409179688, "learning_rate": 0.00017935996030761598, "loss": 0.1608, "step": 3758 }, { "epoch": 1.32, "grad_norm": 1.4122023582458496, "learning_rate": 0.00017932274869759363, "loss": 0.2832, "step": 3759 }, { "epoch": 1.32, "grad_norm": 2.285872459411621, "learning_rate": 0.0001792855370875713, "loss": 0.4338, "step": 3760 }, { "epoch": 1.32, "grad_norm": 1.5522655248641968, "learning_rate": 0.00017924832547754898, "loss": 0.3917, "step": 3761 }, { "epoch": 1.32, "grad_norm": 5.722376823425293, "learning_rate": 0.00017921111386752666, "loss": 0.201, "step": 3762 }, { "epoch": 1.32, "grad_norm": 2.5768380165100098, "learning_rate": 0.00017917390225750433, "loss": 0.2258, "step": 3763 }, { "epoch": 1.32, "grad_norm": 3.3789002895355225, "learning_rate": 0.00017913669064748198, "loss": 0.6568, "step": 3764 }, { "epoch": 1.32, "grad_norm": 2.571653127670288, "learning_rate": 0.00017909947903745966, "loss": 0.3319, "step": 3765 }, { "epoch": 1.32, "grad_norm": 2.023444414138794, "learning_rate": 0.00017906226742743736, "loss": 0.2127, "step": 3766 }, { "epoch": 1.32, "grad_norm": 2.665778875350952, "learning_rate": 0.000179025055817415, "loss": 0.3803, "step": 3767 }, { "epoch": 1.32, "grad_norm": 2.8000216484069824, "learning_rate": 0.00017898784420739268, "loss": 0.3656, "step": 3768 }, { "epoch": 1.32, "grad_norm": 5.576625347137451, "learning_rate": 0.0001789506325973704, "loss": 2.0108, "step": 3769 }, { "epoch": 1.32, "grad_norm": 4.2713165283203125, "learning_rate": 0.00017891342098734804, "loss": 0.5785, "step": 3770 }, { "epoch": 1.32, "grad_norm": 2.837256908416748, "learning_rate": 0.0001788762093773257, "loss": 0.744, "step": 3771 }, { "epoch": 1.32, "grad_norm": 2.32977557182312, "learning_rate": 0.00017883899776730336, "loss": 0.1912, "step": 3772 }, { "epoch": 1.32, "grad_norm": 1.8392449617385864, "learning_rate": 0.00017880178615728106, "loss": 0.4553, "step": 3773 }, { "epoch": 1.32, "grad_norm": 2.949434757232666, "learning_rate": 0.00017876457454725874, "loss": 0.2178, "step": 3774 }, { "epoch": 1.32, "grad_norm": 1.4645886421203613, "learning_rate": 0.0001787273629372364, "loss": 0.1202, "step": 3775 }, { "epoch": 1.32, "grad_norm": 1.1854076385498047, "learning_rate": 0.0001786901513272141, "loss": 0.1576, "step": 3776 }, { "epoch": 1.32, "grad_norm": 1.7854406833648682, "learning_rate": 0.00017865293971719174, "loss": 0.2579, "step": 3777 }, { "epoch": 1.32, "grad_norm": 3.848768472671509, "learning_rate": 0.00017861572810716942, "loss": 1.4119, "step": 3778 }, { "epoch": 1.32, "grad_norm": 2.103977918624878, "learning_rate": 0.00017857851649714712, "loss": 0.2911, "step": 3779 }, { "epoch": 1.32, "grad_norm": 2.866246461868286, "learning_rate": 0.00017854130488712477, "loss": 0.9159, "step": 3780 }, { "epoch": 1.32, "grad_norm": 1.2970139980316162, "learning_rate": 0.00017850409327710244, "loss": 0.4276, "step": 3781 }, { "epoch": 1.33, "grad_norm": 1.2081961631774902, "learning_rate": 0.0001784668816670801, "loss": 0.3139, "step": 3782 }, { "epoch": 1.33, "grad_norm": 2.1453487873077393, "learning_rate": 0.0001784296700570578, "loss": 0.7616, "step": 3783 }, { "epoch": 1.33, "grad_norm": 1.9024728536605835, "learning_rate": 0.00017839245844703547, "loss": 0.2492, "step": 3784 }, { "epoch": 1.33, "grad_norm": 1.2727264165878296, "learning_rate": 0.00017835524683701312, "loss": 0.2043, "step": 3785 }, { "epoch": 1.33, "grad_norm": 1.4936034679412842, "learning_rate": 0.0001783180352269908, "loss": 0.3883, "step": 3786 }, { "epoch": 1.33, "grad_norm": 1.377806544303894, "learning_rate": 0.0001782808236169685, "loss": 0.273, "step": 3787 }, { "epoch": 1.33, "grad_norm": 2.399864435195923, "learning_rate": 0.00017824361200694615, "loss": 0.3119, "step": 3788 }, { "epoch": 1.33, "grad_norm": 1.5656849145889282, "learning_rate": 0.00017820640039692382, "loss": 0.2332, "step": 3789 }, { "epoch": 1.33, "grad_norm": 15.384991645812988, "learning_rate": 0.0001781691887869015, "loss": 5.3366, "step": 3790 }, { "epoch": 1.33, "grad_norm": 3.5615835189819336, "learning_rate": 0.00017813197717687917, "loss": 0.8114, "step": 3791 }, { "epoch": 1.33, "grad_norm": 2.1784439086914062, "learning_rate": 0.00017809476556685685, "loss": 0.5916, "step": 3792 }, { "epoch": 1.33, "grad_norm": 1.5509488582611084, "learning_rate": 0.0001780575539568345, "loss": 0.2626, "step": 3793 }, { "epoch": 1.33, "grad_norm": 0.8482944369316101, "learning_rate": 0.0001780203423468122, "loss": 0.066, "step": 3794 }, { "epoch": 1.33, "grad_norm": 1.3398417234420776, "learning_rate": 0.00017798313073678985, "loss": 0.1493, "step": 3795 }, { "epoch": 1.33, "grad_norm": 1.738661766052246, "learning_rate": 0.00017794591912676753, "loss": 0.198, "step": 3796 }, { "epoch": 1.33, "grad_norm": 4.550568580627441, "learning_rate": 0.00017790870751674523, "loss": 1.3028, "step": 3797 }, { "epoch": 1.33, "grad_norm": 2.300915002822876, "learning_rate": 0.00017787149590672288, "loss": 0.2393, "step": 3798 }, { "epoch": 1.33, "grad_norm": 1.3625463247299194, "learning_rate": 0.00017783428429670055, "loss": 0.1016, "step": 3799 }, { "epoch": 1.33, "grad_norm": 2.7032511234283447, "learning_rate": 0.00017779707268667826, "loss": 0.4724, "step": 3800 }, { "epoch": 1.33, "eval_loss": 0.49983254075050354, "eval_runtime": 51.5096, "eval_samples_per_second": 42.089, "eval_steps_per_second": 10.522, "eval_wer": 0.4850371907974399, "step": 3800 }, { "epoch": 1.33, "grad_norm": 3.71159029006958, "learning_rate": 0.0001777598610766559, "loss": 0.169, "step": 3801 }, { "epoch": 1.33, "grad_norm": 2.478241205215454, "learning_rate": 0.00017772264946663358, "loss": 0.2778, "step": 3802 }, { "epoch": 1.33, "grad_norm": 3.812366247177124, "learning_rate": 0.00017768543785661123, "loss": 0.4634, "step": 3803 }, { "epoch": 1.33, "grad_norm": 1.6730952262878418, "learning_rate": 0.00017764822624658893, "loss": 0.0989, "step": 3804 }, { "epoch": 1.33, "grad_norm": 3.086575508117676, "learning_rate": 0.0001776110146365666, "loss": 0.9503, "step": 3805 }, { "epoch": 1.33, "grad_norm": 1.9270522594451904, "learning_rate": 0.00017757380302654426, "loss": 0.7548, "step": 3806 }, { "epoch": 1.33, "grad_norm": 1.6181902885437012, "learning_rate": 0.00017753659141652193, "loss": 0.3449, "step": 3807 }, { "epoch": 1.33, "grad_norm": 2.112922430038452, "learning_rate": 0.0001774993798064996, "loss": 0.4004, "step": 3808 }, { "epoch": 1.33, "grad_norm": 2.618039846420288, "learning_rate": 0.00017746216819647728, "loss": 0.4503, "step": 3809 }, { "epoch": 1.33, "grad_norm": 2.6346006393432617, "learning_rate": 0.00017742495658645496, "loss": 0.6283, "step": 3810 }, { "epoch": 1.34, "grad_norm": 2.166271448135376, "learning_rate": 0.00017738774497643264, "loss": 0.21, "step": 3811 }, { "epoch": 1.34, "grad_norm": 1.9964041709899902, "learning_rate": 0.0001773505333664103, "loss": 0.1659, "step": 3812 }, { "epoch": 1.34, "grad_norm": 1.5653916597366333, "learning_rate": 0.000177313321756388, "loss": 0.2786, "step": 3813 }, { "epoch": 1.34, "grad_norm": 3.1856682300567627, "learning_rate": 0.00017727611014636564, "loss": 0.3468, "step": 3814 }, { "epoch": 1.34, "grad_norm": 1.068045973777771, "learning_rate": 0.00017723889853634334, "loss": 0.1729, "step": 3815 }, { "epoch": 1.34, "grad_norm": 2.467663526535034, "learning_rate": 0.000177201686926321, "loss": 0.3148, "step": 3816 }, { "epoch": 1.34, "grad_norm": 2.52803373336792, "learning_rate": 0.00017716447531629866, "loss": 0.6002, "step": 3817 }, { "epoch": 1.34, "grad_norm": 1.5784509181976318, "learning_rate": 0.00017712726370627637, "loss": 0.1783, "step": 3818 }, { "epoch": 1.34, "grad_norm": 5.923018455505371, "learning_rate": 0.00017709005209625401, "loss": 0.2747, "step": 3819 }, { "epoch": 1.34, "grad_norm": 1.4094946384429932, "learning_rate": 0.0001770528404862317, "loss": 0.0858, "step": 3820 }, { "epoch": 1.34, "grad_norm": 1.2953375577926636, "learning_rate": 0.00017701562887620934, "loss": 0.0638, "step": 3821 }, { "epoch": 1.34, "grad_norm": 4.134175777435303, "learning_rate": 0.00017697841726618704, "loss": 0.4833, "step": 3822 }, { "epoch": 1.34, "grad_norm": 4.836775302886963, "learning_rate": 0.00017694120565616472, "loss": 0.8205, "step": 3823 }, { "epoch": 1.34, "grad_norm": 3.950936794281006, "learning_rate": 0.00017690399404614237, "loss": 0.4628, "step": 3824 }, { "epoch": 1.34, "grad_norm": 2.5039966106414795, "learning_rate": 0.00017686678243612007, "loss": 0.2418, "step": 3825 }, { "epoch": 1.34, "grad_norm": 1.359667420387268, "learning_rate": 0.00017682957082609772, "loss": 0.1673, "step": 3826 }, { "epoch": 1.34, "grad_norm": 3.17059326171875, "learning_rate": 0.0001767923592160754, "loss": 0.2301, "step": 3827 }, { "epoch": 1.34, "grad_norm": 3.165222406387329, "learning_rate": 0.00017675514760605307, "loss": 0.2882, "step": 3828 }, { "epoch": 1.34, "grad_norm": 2.6665470600128174, "learning_rate": 0.00017671793599603075, "loss": 0.1528, "step": 3829 }, { "epoch": 1.34, "grad_norm": 3.1193861961364746, "learning_rate": 0.00017668072438600842, "loss": 0.8316, "step": 3830 }, { "epoch": 1.34, "grad_norm": 3.0916197299957275, "learning_rate": 0.0001766435127759861, "loss": 0.7043, "step": 3831 }, { "epoch": 1.34, "grad_norm": 2.5814852714538574, "learning_rate": 0.00017660630116596377, "loss": 0.8129, "step": 3832 }, { "epoch": 1.34, "grad_norm": 2.543747663497925, "learning_rate": 0.00017656908955594145, "loss": 0.3409, "step": 3833 }, { "epoch": 1.34, "grad_norm": 4.633474826812744, "learning_rate": 0.0001765318779459191, "loss": 1.6709, "step": 3834 }, { "epoch": 1.34, "grad_norm": 1.9206253290176392, "learning_rate": 0.00017649466633589677, "loss": 0.4245, "step": 3835 }, { "epoch": 1.34, "grad_norm": 2.735995054244995, "learning_rate": 0.00017645745472587448, "loss": 0.3843, "step": 3836 }, { "epoch": 1.34, "grad_norm": 1.561367392539978, "learning_rate": 0.00017642024311585212, "loss": 0.3513, "step": 3837 }, { "epoch": 1.34, "grad_norm": 2.5527138710021973, "learning_rate": 0.0001763830315058298, "loss": 0.2287, "step": 3838 }, { "epoch": 1.35, "grad_norm": 2.1322858333587646, "learning_rate": 0.00017634581989580745, "loss": 0.465, "step": 3839 }, { "epoch": 1.35, "grad_norm": 3.6257131099700928, "learning_rate": 0.00017630860828578515, "loss": 0.6081, "step": 3840 }, { "epoch": 1.35, "grad_norm": 2.3494338989257812, "learning_rate": 0.00017627139667576283, "loss": 0.5918, "step": 3841 }, { "epoch": 1.35, "grad_norm": 1.8620758056640625, "learning_rate": 0.00017623418506574048, "loss": 0.3641, "step": 3842 }, { "epoch": 1.35, "grad_norm": 1.5785250663757324, "learning_rate": 0.00017619697345571818, "loss": 0.1248, "step": 3843 }, { "epoch": 1.35, "grad_norm": 1.321878433227539, "learning_rate": 0.00017615976184569586, "loss": 0.1409, "step": 3844 }, { "epoch": 1.35, "grad_norm": 1.8503745794296265, "learning_rate": 0.0001761225502356735, "loss": 0.2243, "step": 3845 }, { "epoch": 1.35, "grad_norm": 0.8740498423576355, "learning_rate": 0.0001760853386256512, "loss": 0.0779, "step": 3846 }, { "epoch": 1.35, "grad_norm": 2.1911394596099854, "learning_rate": 0.00017604812701562886, "loss": 0.2616, "step": 3847 }, { "epoch": 1.35, "grad_norm": 1.2762500047683716, "learning_rate": 0.00017601091540560653, "loss": 0.0597, "step": 3848 }, { "epoch": 1.35, "grad_norm": 3.6884820461273193, "learning_rate": 0.0001759737037955842, "loss": 0.4311, "step": 3849 }, { "epoch": 1.35, "grad_norm": 1.9794673919677734, "learning_rate": 0.00017593649218556188, "loss": 0.1456, "step": 3850 }, { "epoch": 1.35, "grad_norm": 18.375457763671875, "learning_rate": 0.00017589928057553956, "loss": 0.7519, "step": 3851 }, { "epoch": 1.35, "grad_norm": 9.318000793457031, "learning_rate": 0.0001758620689655172, "loss": 1.2229, "step": 3852 }, { "epoch": 1.35, "grad_norm": 0.8230860233306885, "learning_rate": 0.0001758248573554949, "loss": 0.0594, "step": 3853 }, { "epoch": 1.35, "grad_norm": 3.9834702014923096, "learning_rate": 0.00017578764574547259, "loss": 1.0047, "step": 3854 }, { "epoch": 1.35, "grad_norm": 2.5470404624938965, "learning_rate": 0.00017575043413545023, "loss": 0.7612, "step": 3855 }, { "epoch": 1.35, "grad_norm": 2.304075241088867, "learning_rate": 0.0001757132225254279, "loss": 0.4069, "step": 3856 }, { "epoch": 1.35, "grad_norm": 2.3937854766845703, "learning_rate": 0.0001756760109154056, "loss": 0.4114, "step": 3857 }, { "epoch": 1.35, "grad_norm": 1.7787152528762817, "learning_rate": 0.00017563879930538326, "loss": 0.3035, "step": 3858 }, { "epoch": 1.35, "grad_norm": 1.8002721071243286, "learning_rate": 0.00017560158769536094, "loss": 0.6163, "step": 3859 }, { "epoch": 1.35, "grad_norm": 1.9406236410140991, "learning_rate": 0.0001755643760853386, "loss": 0.2896, "step": 3860 }, { "epoch": 1.35, "grad_norm": 2.4129421710968018, "learning_rate": 0.0001755271644753163, "loss": 0.6502, "step": 3861 }, { "epoch": 1.35, "grad_norm": 2.7634332180023193, "learning_rate": 0.00017548995286529397, "loss": 0.6027, "step": 3862 }, { "epoch": 1.35, "grad_norm": 1.0977592468261719, "learning_rate": 0.00017545274125527161, "loss": 0.1788, "step": 3863 }, { "epoch": 1.35, "grad_norm": 3.4504082202911377, "learning_rate": 0.00017541552964524932, "loss": 0.3887, "step": 3864 }, { "epoch": 1.35, "grad_norm": 1.857100009918213, "learning_rate": 0.00017537831803522697, "loss": 0.2107, "step": 3865 }, { "epoch": 1.35, "grad_norm": 4.293605327606201, "learning_rate": 0.00017534110642520464, "loss": 1.4237, "step": 3866 }, { "epoch": 1.35, "grad_norm": 2.1752376556396484, "learning_rate": 0.00017530389481518234, "loss": 0.3186, "step": 3867 }, { "epoch": 1.36, "grad_norm": 2.9012653827667236, "learning_rate": 0.00017526668320516, "loss": 0.3952, "step": 3868 }, { "epoch": 1.36, "grad_norm": 1.6721316576004028, "learning_rate": 0.00017522947159513767, "loss": 0.1556, "step": 3869 }, { "epoch": 1.36, "grad_norm": 3.3207669258117676, "learning_rate": 0.00017519225998511532, "loss": 0.1144, "step": 3870 }, { "epoch": 1.36, "grad_norm": 2.309567928314209, "learning_rate": 0.00017515504837509302, "loss": 0.2904, "step": 3871 }, { "epoch": 1.36, "grad_norm": 3.172945261001587, "learning_rate": 0.0001751178367650707, "loss": 0.4218, "step": 3872 }, { "epoch": 1.36, "grad_norm": 1.9167137145996094, "learning_rate": 0.00017508062515504834, "loss": 0.1754, "step": 3873 }, { "epoch": 1.36, "grad_norm": 0.7879188656806946, "learning_rate": 0.00017504341354502605, "loss": 0.0516, "step": 3874 }, { "epoch": 1.36, "grad_norm": 10.358311653137207, "learning_rate": 0.00017500620193500372, "loss": 0.6033, "step": 3875 }, { "epoch": 1.36, "grad_norm": 3.172363758087158, "learning_rate": 0.00017496899032498137, "loss": 0.2935, "step": 3876 }, { "epoch": 1.36, "grad_norm": 3.7427735328674316, "learning_rate": 0.00017493177871495905, "loss": 0.2327, "step": 3877 }, { "epoch": 1.36, "grad_norm": 5.786536693572998, "learning_rate": 0.00017489456710493672, "loss": 1.0866, "step": 3878 }, { "epoch": 1.36, "grad_norm": 5.266422748565674, "learning_rate": 0.0001748573554949144, "loss": 1.0328, "step": 3879 }, { "epoch": 1.36, "grad_norm": 1.993481993675232, "learning_rate": 0.00017482014388489208, "loss": 0.6055, "step": 3880 }, { "epoch": 1.36, "grad_norm": 2.0876619815826416, "learning_rate": 0.00017478293227486972, "loss": 0.6218, "step": 3881 }, { "epoch": 1.36, "grad_norm": 2.1874656677246094, "learning_rate": 0.00017474572066484743, "loss": 0.3442, "step": 3882 }, { "epoch": 1.36, "grad_norm": 1.9037868976593018, "learning_rate": 0.00017470850905482508, "loss": 0.1977, "step": 3883 }, { "epoch": 1.36, "grad_norm": 2.1550540924072266, "learning_rate": 0.00017467129744480275, "loss": 0.3056, "step": 3884 }, { "epoch": 1.36, "grad_norm": 2.475119113922119, "learning_rate": 0.00017463408583478045, "loss": 0.4085, "step": 3885 }, { "epoch": 1.36, "grad_norm": 2.8540899753570557, "learning_rate": 0.0001745968742247581, "loss": 0.2703, "step": 3886 }, { "epoch": 1.36, "grad_norm": 1.23556649684906, "learning_rate": 0.00017455966261473578, "loss": 0.1271, "step": 3887 }, { "epoch": 1.36, "grad_norm": 3.1399030685424805, "learning_rate": 0.00017452245100471348, "loss": 0.3749, "step": 3888 }, { "epoch": 1.36, "grad_norm": 2.0435614585876465, "learning_rate": 0.00017448523939469113, "loss": 0.3553, "step": 3889 }, { "epoch": 1.36, "grad_norm": 2.5815916061401367, "learning_rate": 0.0001744480277846688, "loss": 0.3135, "step": 3890 }, { "epoch": 1.36, "grad_norm": 2.8995308876037598, "learning_rate": 0.00017441081617464645, "loss": 0.3687, "step": 3891 }, { "epoch": 1.36, "grad_norm": 3.48101544380188, "learning_rate": 0.00017437360456462416, "loss": 0.5593, "step": 3892 }, { "epoch": 1.36, "grad_norm": 2.921421766281128, "learning_rate": 0.00017433639295460183, "loss": 0.3693, "step": 3893 }, { "epoch": 1.36, "grad_norm": 1.2923214435577393, "learning_rate": 0.00017429918134457948, "loss": 0.0802, "step": 3894 }, { "epoch": 1.36, "grad_norm": 2.212675094604492, "learning_rate": 0.00017426196973455719, "loss": 0.3009, "step": 3895 }, { "epoch": 1.37, "grad_norm": 4.238613128662109, "learning_rate": 0.00017422475812453483, "loss": 0.2686, "step": 3896 }, { "epoch": 1.37, "grad_norm": 2.2763991355895996, "learning_rate": 0.0001741875465145125, "loss": 0.234, "step": 3897 }, { "epoch": 1.37, "grad_norm": 2.234372615814209, "learning_rate": 0.00017415033490449019, "loss": 0.3564, "step": 3898 }, { "epoch": 1.37, "grad_norm": 4.635880470275879, "learning_rate": 0.00017411312329446786, "loss": 0.1816, "step": 3899 }, { "epoch": 1.37, "grad_norm": 2.752310037612915, "learning_rate": 0.00017407591168444554, "loss": 0.2585, "step": 3900 }, { "epoch": 1.37, "eval_loss": 0.5356029272079468, "eval_runtime": 51.7497, "eval_samples_per_second": 41.894, "eval_steps_per_second": 10.473, "eval_wer": 0.4876318975955717, "step": 3900 }, { "epoch": 1.37, "grad_norm": 2.9280731678009033, "learning_rate": 0.0001740387000744232, "loss": 0.4685, "step": 3901 }, { "epoch": 1.37, "grad_norm": 2.367094039916992, "learning_rate": 0.00017400148846440086, "loss": 0.1438, "step": 3902 }, { "epoch": 1.37, "grad_norm": 2.750795602798462, "learning_rate": 0.00017396427685437856, "loss": 0.3067, "step": 3903 }, { "epoch": 1.37, "grad_norm": 2.7507309913635254, "learning_rate": 0.0001739270652443562, "loss": 0.4385, "step": 3904 }, { "epoch": 1.37, "grad_norm": 1.9318255186080933, "learning_rate": 0.0001738898536343339, "loss": 0.5685, "step": 3905 }, { "epoch": 1.37, "grad_norm": 1.8872981071472168, "learning_rate": 0.0001738526420243116, "loss": 0.5847, "step": 3906 }, { "epoch": 1.37, "grad_norm": 1.6942192316055298, "learning_rate": 0.00017381543041428924, "loss": 0.3749, "step": 3907 }, { "epoch": 1.37, "grad_norm": 1.610882043838501, "learning_rate": 0.00017377821880426692, "loss": 0.3559, "step": 3908 }, { "epoch": 1.37, "grad_norm": 1.635751485824585, "learning_rate": 0.00017374100719424456, "loss": 0.1369, "step": 3909 }, { "epoch": 1.37, "grad_norm": 2.099339723587036, "learning_rate": 0.00017370379558422227, "loss": 0.6477, "step": 3910 }, { "epoch": 1.37, "grad_norm": 1.2585214376449585, "learning_rate": 0.00017366658397419994, "loss": 0.272, "step": 3911 }, { "epoch": 1.37, "grad_norm": 3.2351205348968506, "learning_rate": 0.0001736293723641776, "loss": 0.5484, "step": 3912 }, { "epoch": 1.37, "grad_norm": 2.1207034587860107, "learning_rate": 0.0001735921607541553, "loss": 0.3723, "step": 3913 }, { "epoch": 1.37, "grad_norm": 2.07908034324646, "learning_rate": 0.00017355494914413297, "loss": 0.314, "step": 3914 }, { "epoch": 1.37, "grad_norm": 2.0071959495544434, "learning_rate": 0.00017351773753411062, "loss": 0.6595, "step": 3915 }, { "epoch": 1.37, "grad_norm": 2.149886131286621, "learning_rate": 0.00017348052592408832, "loss": 0.6534, "step": 3916 }, { "epoch": 1.37, "grad_norm": 1.7911567687988281, "learning_rate": 0.00017344331431406597, "loss": 0.2316, "step": 3917 }, { "epoch": 1.37, "grad_norm": 3.381746292114258, "learning_rate": 0.00017340610270404365, "loss": 1.408, "step": 3918 }, { "epoch": 1.37, "grad_norm": 4.066563606262207, "learning_rate": 0.00017336889109402132, "loss": 0.5341, "step": 3919 }, { "epoch": 1.37, "grad_norm": 2.2210659980773926, "learning_rate": 0.000173331679483999, "loss": 0.2397, "step": 3920 }, { "epoch": 1.37, "grad_norm": 2.28897762298584, "learning_rate": 0.00017329446787397667, "loss": 0.277, "step": 3921 }, { "epoch": 1.37, "grad_norm": 2.897461175918579, "learning_rate": 0.00017325725626395432, "loss": 0.5583, "step": 3922 }, { "epoch": 1.37, "grad_norm": 2.140532970428467, "learning_rate": 0.000173220044653932, "loss": 0.4073, "step": 3923 }, { "epoch": 1.37, "grad_norm": 3.024677276611328, "learning_rate": 0.0001731828330439097, "loss": 0.5048, "step": 3924 }, { "epoch": 1.38, "grad_norm": 2.735630750656128, "learning_rate": 0.00017314562143388735, "loss": 0.374, "step": 3925 }, { "epoch": 1.38, "grad_norm": 5.665718078613281, "learning_rate": 0.00017310840982386503, "loss": 0.7075, "step": 3926 }, { "epoch": 1.38, "grad_norm": 4.23260498046875, "learning_rate": 0.00017307119821384268, "loss": 1.46, "step": 3927 }, { "epoch": 1.38, "grad_norm": 1.9788540601730347, "learning_rate": 0.00017303398660382038, "loss": 0.3469, "step": 3928 }, { "epoch": 1.38, "grad_norm": 1.4005393981933594, "learning_rate": 0.00017299677499379805, "loss": 0.1381, "step": 3929 }, { "epoch": 1.38, "grad_norm": 1.7616915702819824, "learning_rate": 0.0001729595633837757, "loss": 0.7645, "step": 3930 }, { "epoch": 1.38, "grad_norm": 2.4172654151916504, "learning_rate": 0.0001729223517737534, "loss": 0.5808, "step": 3931 }, { "epoch": 1.38, "grad_norm": 1.8812202215194702, "learning_rate": 0.00017288514016373108, "loss": 0.3962, "step": 3932 }, { "epoch": 1.38, "grad_norm": 2.0839879512786865, "learning_rate": 0.00017284792855370873, "loss": 0.3107, "step": 3933 }, { "epoch": 1.38, "grad_norm": 1.7547316551208496, "learning_rate": 0.00017281071694368643, "loss": 0.4774, "step": 3934 }, { "epoch": 1.38, "grad_norm": 1.7336246967315674, "learning_rate": 0.00017277350533366408, "loss": 0.297, "step": 3935 }, { "epoch": 1.38, "grad_norm": 1.464845895767212, "learning_rate": 0.00017273629372364176, "loss": 0.2388, "step": 3936 }, { "epoch": 1.38, "grad_norm": 1.2136765718460083, "learning_rate": 0.00017269908211361946, "loss": 0.0873, "step": 3937 }, { "epoch": 1.38, "grad_norm": 1.4514065980911255, "learning_rate": 0.0001726618705035971, "loss": 0.1335, "step": 3938 }, { "epoch": 1.38, "grad_norm": 3.2778918743133545, "learning_rate": 0.00017262465889357478, "loss": 0.3828, "step": 3939 }, { "epoch": 1.38, "grad_norm": 1.513120174407959, "learning_rate": 0.00017258744728355243, "loss": 0.1283, "step": 3940 }, { "epoch": 1.38, "grad_norm": 1.7113932371139526, "learning_rate": 0.00017255023567353014, "loss": 0.1633, "step": 3941 }, { "epoch": 1.38, "grad_norm": 2.0901389122009277, "learning_rate": 0.0001725130240635078, "loss": 0.2138, "step": 3942 }, { "epoch": 1.38, "grad_norm": 4.5583720207214355, "learning_rate": 0.00017247581245348546, "loss": 0.7136, "step": 3943 }, { "epoch": 1.38, "grad_norm": 1.8743005990982056, "learning_rate": 0.00017243860084346314, "loss": 0.2668, "step": 3944 }, { "epoch": 1.38, "grad_norm": 3.3022148609161377, "learning_rate": 0.00017240138923344084, "loss": 1.001, "step": 3945 }, { "epoch": 1.38, "grad_norm": 3.1593058109283447, "learning_rate": 0.0001723641776234185, "loss": 0.358, "step": 3946 }, { "epoch": 1.38, "grad_norm": 4.342871189117432, "learning_rate": 0.00017232696601339616, "loss": 0.8455, "step": 3947 }, { "epoch": 1.38, "grad_norm": 4.121707439422607, "learning_rate": 0.0001722897544033738, "loss": 0.511, "step": 3948 }, { "epoch": 1.38, "grad_norm": 2.737363815307617, "learning_rate": 0.00017225254279335152, "loss": 0.2577, "step": 3949 }, { "epoch": 1.38, "grad_norm": 4.477611541748047, "learning_rate": 0.0001722153311833292, "loss": 1.0024, "step": 3950 }, { "epoch": 1.38, "grad_norm": 2.959623336791992, "learning_rate": 0.00017217811957330684, "loss": 0.4979, "step": 3951 }, { "epoch": 1.38, "grad_norm": 3.6896047592163086, "learning_rate": 0.00017214090796328454, "loss": 0.502, "step": 3952 }, { "epoch": 1.39, "grad_norm": 2.5401012897491455, "learning_rate": 0.0001721036963532622, "loss": 0.4048, "step": 3953 }, { "epoch": 1.39, "grad_norm": 2.559720039367676, "learning_rate": 0.00017206648474323987, "loss": 0.201, "step": 3954 }, { "epoch": 1.39, "grad_norm": 1.9754453897476196, "learning_rate": 0.00017202927313321757, "loss": 0.6484, "step": 3955 }, { "epoch": 1.39, "grad_norm": 1.5338460206985474, "learning_rate": 0.00017199206152319522, "loss": 0.3546, "step": 3956 }, { "epoch": 1.39, "grad_norm": 1.5986213684082031, "learning_rate": 0.0001719548499131729, "loss": 0.3799, "step": 3957 }, { "epoch": 1.39, "grad_norm": 2.644622802734375, "learning_rate": 0.0001719176383031506, "loss": 0.5743, "step": 3958 }, { "epoch": 1.39, "grad_norm": 2.5025296211242676, "learning_rate": 0.00017188042669312825, "loss": 0.3493, "step": 3959 }, { "epoch": 1.39, "grad_norm": 1.100515604019165, "learning_rate": 0.00017184321508310592, "loss": 0.0687, "step": 3960 }, { "epoch": 1.39, "grad_norm": 1.8527333736419678, "learning_rate": 0.00017180600347308357, "loss": 0.172, "step": 3961 }, { "epoch": 1.39, "grad_norm": 2.1646475791931152, "learning_rate": 0.00017176879186306127, "loss": 0.3352, "step": 3962 }, { "epoch": 1.39, "grad_norm": 1.9406778812408447, "learning_rate": 0.00017173158025303895, "loss": 0.2927, "step": 3963 }, { "epoch": 1.39, "grad_norm": 1.5747565031051636, "learning_rate": 0.0001716943686430166, "loss": 0.2809, "step": 3964 }, { "epoch": 1.39, "grad_norm": 2.524975299835205, "learning_rate": 0.00017165715703299427, "loss": 0.3335, "step": 3965 }, { "epoch": 1.39, "grad_norm": 4.1112823486328125, "learning_rate": 0.00017161994542297195, "loss": 0.3868, "step": 3966 }, { "epoch": 1.39, "grad_norm": 2.326080799102783, "learning_rate": 0.00017158273381294963, "loss": 0.4065, "step": 3967 }, { "epoch": 1.39, "grad_norm": 1.6009414196014404, "learning_rate": 0.0001715455222029273, "loss": 0.1864, "step": 3968 }, { "epoch": 1.39, "grad_norm": 14.051066398620605, "learning_rate": 0.00017150831059290495, "loss": 2.8598, "step": 3969 }, { "epoch": 1.39, "grad_norm": 3.3767621517181396, "learning_rate": 0.00017147109898288265, "loss": 0.192, "step": 3970 }, { "epoch": 1.39, "grad_norm": 4.84522008895874, "learning_rate": 0.0001714338873728603, "loss": 0.5361, "step": 3971 }, { "epoch": 1.39, "grad_norm": 3.2274253368377686, "learning_rate": 0.00017139667576283798, "loss": 0.5027, "step": 3972 }, { "epoch": 1.39, "grad_norm": 1.2869607210159302, "learning_rate": 0.00017135946415281568, "loss": 0.1095, "step": 3973 }, { "epoch": 1.39, "grad_norm": 2.0447194576263428, "learning_rate": 0.00017132225254279333, "loss": 0.2067, "step": 3974 }, { "epoch": 1.39, "grad_norm": 4.841538906097412, "learning_rate": 0.000171285040932771, "loss": 0.6107, "step": 3975 }, { "epoch": 1.39, "grad_norm": 1.8944703340530396, "learning_rate": 0.0001712478293227487, "loss": 0.1473, "step": 3976 }, { "epoch": 1.39, "grad_norm": 4.2880659103393555, "learning_rate": 0.00017121061771272636, "loss": 1.5288, "step": 3977 }, { "epoch": 1.39, "grad_norm": 3.7833964824676514, "learning_rate": 0.00017117340610270403, "loss": 0.4495, "step": 3978 }, { "epoch": 1.39, "grad_norm": 5.308502674102783, "learning_rate": 0.00017113619449268168, "loss": 1.1916, "step": 3979 }, { "epoch": 1.39, "grad_norm": 1.6576392650604248, "learning_rate": 0.00017109898288265938, "loss": 0.5451, "step": 3980 }, { "epoch": 1.39, "grad_norm": 1.7152270078659058, "learning_rate": 0.00017106177127263706, "loss": 0.4996, "step": 3981 }, { "epoch": 1.4, "grad_norm": 2.689882278442383, "learning_rate": 0.0001710245596626147, "loss": 0.8473, "step": 3982 }, { "epoch": 1.4, "grad_norm": 2.522495746612549, "learning_rate": 0.0001709873480525924, "loss": 0.6978, "step": 3983 }, { "epoch": 1.4, "grad_norm": 1.8638933897018433, "learning_rate": 0.00017095013644257006, "loss": 0.3669, "step": 3984 }, { "epoch": 1.4, "grad_norm": 1.6137725114822388, "learning_rate": 0.00017091292483254774, "loss": 0.2279, "step": 3985 }, { "epoch": 1.4, "grad_norm": 3.917705774307251, "learning_rate": 0.0001708757132225254, "loss": 1.6021, "step": 3986 }, { "epoch": 1.4, "grad_norm": 3.742889165878296, "learning_rate": 0.0001708385016125031, "loss": 0.6855, "step": 3987 }, { "epoch": 1.4, "grad_norm": 2.922379493713379, "learning_rate": 0.00017080129000248076, "loss": 0.2816, "step": 3988 }, { "epoch": 1.4, "grad_norm": 1.8310586214065552, "learning_rate": 0.00017076407839245844, "loss": 0.3215, "step": 3989 }, { "epoch": 1.4, "grad_norm": 1.7077147960662842, "learning_rate": 0.0001707268667824361, "loss": 0.4342, "step": 3990 }, { "epoch": 1.4, "grad_norm": 1.127012014389038, "learning_rate": 0.0001706896551724138, "loss": 0.1672, "step": 3991 }, { "epoch": 1.4, "grad_norm": 2.1023213863372803, "learning_rate": 0.00017065244356239144, "loss": 0.4928, "step": 3992 }, { "epoch": 1.4, "grad_norm": 1.650435209274292, "learning_rate": 0.00017061523195236911, "loss": 0.3392, "step": 3993 }, { "epoch": 1.4, "grad_norm": 1.628867268562317, "learning_rate": 0.00017057802034234682, "loss": 0.1326, "step": 3994 }, { "epoch": 1.4, "grad_norm": 3.0410878658294678, "learning_rate": 0.00017054080873232447, "loss": 0.3564, "step": 3995 }, { "epoch": 1.4, "grad_norm": 4.636041164398193, "learning_rate": 0.00017050359712230214, "loss": 0.5428, "step": 3996 }, { "epoch": 1.4, "grad_norm": 2.0328681468963623, "learning_rate": 0.0001704663855122798, "loss": 0.1978, "step": 3997 }, { "epoch": 1.4, "grad_norm": 2.5405378341674805, "learning_rate": 0.0001704291739022575, "loss": 0.341, "step": 3998 }, { "epoch": 1.4, "grad_norm": 2.484320640563965, "learning_rate": 0.00017039196229223517, "loss": 0.3324, "step": 3999 }, { "epoch": 1.4, "grad_norm": 2.7092721462249756, "learning_rate": 0.00017035475068221282, "loss": 0.3742, "step": 4000 }, { "epoch": 1.4, "eval_loss": 0.4177427291870117, "eval_runtime": 50.8405, "eval_samples_per_second": 42.643, "eval_steps_per_second": 10.661, "eval_wer": 0.3851409790693652, "step": 4000 }, { "epoch": 1.4, "grad_norm": 1.0885396003723145, "learning_rate": 0.00017031753907219052, "loss": 0.0431, "step": 4001 }, { "epoch": 1.4, "grad_norm": 3.04752254486084, "learning_rate": 0.0001702803274621682, "loss": 0.2688, "step": 4002 }, { "epoch": 1.4, "grad_norm": 2.453718900680542, "learning_rate": 0.00017024311585214585, "loss": 0.1588, "step": 4003 }, { "epoch": 1.4, "grad_norm": 2.611881732940674, "learning_rate": 0.00017020590424212355, "loss": 0.1296, "step": 4004 }, { "epoch": 1.4, "grad_norm": 1.520546555519104, "learning_rate": 0.0001701686926321012, "loss": 0.4584, "step": 4005 }, { "epoch": 1.4, "grad_norm": 1.637783408164978, "learning_rate": 0.00017013148102207887, "loss": 0.4303, "step": 4006 }, { "epoch": 1.4, "grad_norm": 1.7446013689041138, "learning_rate": 0.00017009426941205655, "loss": 0.2818, "step": 4007 }, { "epoch": 1.4, "grad_norm": 1.9278056621551514, "learning_rate": 0.00017005705780203422, "loss": 0.1499, "step": 4008 }, { "epoch": 1.4, "grad_norm": 1.8733441829681396, "learning_rate": 0.0001700198461920119, "loss": 0.3069, "step": 4009 }, { "epoch": 1.41, "grad_norm": 1.9108270406723022, "learning_rate": 0.00016998263458198955, "loss": 0.3054, "step": 4010 }, { "epoch": 1.41, "grad_norm": 1.8484750986099243, "learning_rate": 0.00016994542297196722, "loss": 0.2867, "step": 4011 }, { "epoch": 1.41, "grad_norm": 0.9013972878456116, "learning_rate": 0.00016990821136194493, "loss": 0.129, "step": 4012 }, { "epoch": 1.41, "grad_norm": 1.9117753505706787, "learning_rate": 0.00016987099975192258, "loss": 0.2353, "step": 4013 }, { "epoch": 1.41, "grad_norm": 1.5956077575683594, "learning_rate": 0.00016983378814190025, "loss": 0.1636, "step": 4014 }, { "epoch": 1.41, "grad_norm": 3.0155153274536133, "learning_rate": 0.00016979657653187793, "loss": 1.5728, "step": 4015 }, { "epoch": 1.41, "grad_norm": 1.7413406372070312, "learning_rate": 0.0001697593649218556, "loss": 0.2977, "step": 4016 }, { "epoch": 1.41, "grad_norm": 3.3247792720794678, "learning_rate": 0.00016972215331183328, "loss": 0.3402, "step": 4017 }, { "epoch": 1.41, "grad_norm": 1.7041586637496948, "learning_rate": 0.00016968494170181093, "loss": 0.1643, "step": 4018 }, { "epoch": 1.41, "grad_norm": 2.3175644874572754, "learning_rate": 0.00016964773009178863, "loss": 0.2752, "step": 4019 }, { "epoch": 1.41, "grad_norm": 2.561286211013794, "learning_rate": 0.0001696105184817663, "loss": 0.3803, "step": 4020 }, { "epoch": 1.41, "grad_norm": 1.3216911554336548, "learning_rate": 0.00016957330687174396, "loss": 0.1224, "step": 4021 }, { "epoch": 1.41, "grad_norm": 2.120414972305298, "learning_rate": 0.00016953609526172166, "loss": 0.2079, "step": 4022 }, { "epoch": 1.41, "grad_norm": 2.5523006916046143, "learning_rate": 0.0001694988836516993, "loss": 0.3881, "step": 4023 }, { "epoch": 1.41, "grad_norm": 2.9489386081695557, "learning_rate": 0.00016946167204167698, "loss": 0.3729, "step": 4024 }, { "epoch": 1.41, "grad_norm": 2.491358757019043, "learning_rate": 0.00016942446043165469, "loss": 0.4037, "step": 4025 }, { "epoch": 1.41, "grad_norm": 0.5692845582962036, "learning_rate": 0.00016938724882163233, "loss": 0.044, "step": 4026 }, { "epoch": 1.41, "grad_norm": 5.157808780670166, "learning_rate": 0.00016935003721161, "loss": 0.2437, "step": 4027 }, { "epoch": 1.41, "grad_norm": 3.8554892539978027, "learning_rate": 0.00016931282560158766, "loss": 1.2543, "step": 4028 }, { "epoch": 1.41, "grad_norm": 4.396365642547607, "learning_rate": 0.00016927561399156536, "loss": 0.8959, "step": 4029 }, { "epoch": 1.41, "grad_norm": 5.370511054992676, "learning_rate": 0.00016923840238154304, "loss": 1.3415, "step": 4030 }, { "epoch": 1.41, "grad_norm": 2.6273608207702637, "learning_rate": 0.0001692011907715207, "loss": 0.7875, "step": 4031 }, { "epoch": 1.41, "grad_norm": 1.6899088621139526, "learning_rate": 0.00016916397916149836, "loss": 0.4165, "step": 4032 }, { "epoch": 1.41, "grad_norm": 2.6857593059539795, "learning_rate": 0.00016912676755147607, "loss": 0.552, "step": 4033 }, { "epoch": 1.41, "grad_norm": 3.3171510696411133, "learning_rate": 0.00016908955594145371, "loss": 0.2746, "step": 4034 }, { "epoch": 1.41, "grad_norm": 3.1243948936462402, "learning_rate": 0.0001690523443314314, "loss": 1.4537, "step": 4035 }, { "epoch": 1.41, "grad_norm": 2.551290273666382, "learning_rate": 0.00016901513272140907, "loss": 0.7787, "step": 4036 }, { "epoch": 1.41, "grad_norm": 1.3610817193984985, "learning_rate": 0.00016897792111138674, "loss": 0.2018, "step": 4037 }, { "epoch": 1.41, "grad_norm": 2.2377007007598877, "learning_rate": 0.00016894070950136442, "loss": 0.6747, "step": 4038 }, { "epoch": 1.42, "grad_norm": 3.3656833171844482, "learning_rate": 0.00016890349789134207, "loss": 1.3106, "step": 4039 }, { "epoch": 1.42, "grad_norm": 1.9152419567108154, "learning_rate": 0.00016886628628131977, "loss": 0.4056, "step": 4040 }, { "epoch": 1.42, "grad_norm": 2.9426541328430176, "learning_rate": 0.00016882907467129742, "loss": 0.7818, "step": 4041 }, { "epoch": 1.42, "grad_norm": 1.2641545534133911, "learning_rate": 0.0001687918630612751, "loss": 0.218, "step": 4042 }, { "epoch": 1.42, "grad_norm": 1.596787691116333, "learning_rate": 0.0001687546514512528, "loss": 0.2985, "step": 4043 }, { "epoch": 1.42, "grad_norm": 1.177599549293518, "learning_rate": 0.00016871743984123044, "loss": 0.1941, "step": 4044 }, { "epoch": 1.42, "grad_norm": 2.4323856830596924, "learning_rate": 0.00016868022823120812, "loss": 0.4303, "step": 4045 }, { "epoch": 1.42, "grad_norm": 1.3808494806289673, "learning_rate": 0.00016864301662118582, "loss": 0.1963, "step": 4046 }, { "epoch": 1.42, "grad_norm": 3.3002772331237793, "learning_rate": 0.00016860580501116347, "loss": 0.4449, "step": 4047 }, { "epoch": 1.42, "grad_norm": 3.7071423530578613, "learning_rate": 0.00016856859340114115, "loss": 1.0698, "step": 4048 }, { "epoch": 1.42, "grad_norm": 3.5901901721954346, "learning_rate": 0.0001685313817911188, "loss": 0.4192, "step": 4049 }, { "epoch": 1.42, "grad_norm": 1.7595155239105225, "learning_rate": 0.0001684941701810965, "loss": 0.1192, "step": 4050 }, { "epoch": 1.42, "grad_norm": 2.3339569568634033, "learning_rate": 0.00016845695857107418, "loss": 0.2746, "step": 4051 }, { "epoch": 1.42, "grad_norm": 2.930159568786621, "learning_rate": 0.00016841974696105182, "loss": 0.6843, "step": 4052 }, { "epoch": 1.42, "grad_norm": 0.9335921406745911, "learning_rate": 0.0001683825353510295, "loss": 0.0758, "step": 4053 }, { "epoch": 1.42, "grad_norm": 9.361223220825195, "learning_rate": 0.00016834532374100718, "loss": 0.8905, "step": 4054 }, { "epoch": 1.42, "grad_norm": 2.8475027084350586, "learning_rate": 0.00016830811213098485, "loss": 0.7053, "step": 4055 }, { "epoch": 1.42, "grad_norm": 3.661919593811035, "learning_rate": 0.00016827090052096253, "loss": 0.4094, "step": 4056 }, { "epoch": 1.42, "grad_norm": 1.8695873022079468, "learning_rate": 0.0001682336889109402, "loss": 0.1678, "step": 4057 }, { "epoch": 1.42, "grad_norm": 2.414778232574463, "learning_rate": 0.00016819647730091788, "loss": 0.5948, "step": 4058 }, { "epoch": 1.42, "grad_norm": 2.0762922763824463, "learning_rate": 0.00016815926569089553, "loss": 0.2875, "step": 4059 }, { "epoch": 1.42, "grad_norm": 2.1993393898010254, "learning_rate": 0.0001681220540808732, "loss": 0.5876, "step": 4060 }, { "epoch": 1.42, "grad_norm": 3.719446897506714, "learning_rate": 0.0001680848424708509, "loss": 0.5155, "step": 4061 }, { "epoch": 1.42, "grad_norm": 3.0953850746154785, "learning_rate": 0.00016804763086082855, "loss": 0.4639, "step": 4062 }, { "epoch": 1.42, "grad_norm": 2.066967487335205, "learning_rate": 0.00016801041925080623, "loss": 0.3151, "step": 4063 }, { "epoch": 1.42, "grad_norm": 2.6755409240722656, "learning_rate": 0.00016797320764078393, "loss": 0.4324, "step": 4064 }, { "epoch": 1.42, "grad_norm": 1.722860336303711, "learning_rate": 0.00016793599603076158, "loss": 0.3173, "step": 4065 }, { "epoch": 1.42, "grad_norm": 3.7338409423828125, "learning_rate": 0.00016789878442073926, "loss": 0.8751, "step": 4066 }, { "epoch": 1.43, "grad_norm": 1.5438848733901978, "learning_rate": 0.0001678615728107169, "loss": 0.164, "step": 4067 }, { "epoch": 1.43, "grad_norm": 2.704420328140259, "learning_rate": 0.0001678243612006946, "loss": 0.2626, "step": 4068 }, { "epoch": 1.43, "grad_norm": 3.400644063949585, "learning_rate": 0.00016778714959067229, "loss": 0.5246, "step": 4069 }, { "epoch": 1.43, "grad_norm": 2.035137414932251, "learning_rate": 0.00016774993798064993, "loss": 0.3621, "step": 4070 }, { "epoch": 1.43, "grad_norm": 3.7264976501464844, "learning_rate": 0.00016771272637062764, "loss": 0.6366, "step": 4071 }, { "epoch": 1.43, "grad_norm": 2.768700361251831, "learning_rate": 0.00016767551476060529, "loss": 0.8278, "step": 4072 }, { "epoch": 1.43, "grad_norm": 2.690241813659668, "learning_rate": 0.00016763830315058296, "loss": 0.3648, "step": 4073 }, { "epoch": 1.43, "grad_norm": 5.137421607971191, "learning_rate": 0.00016760109154056064, "loss": 0.3702, "step": 4074 }, { "epoch": 1.43, "grad_norm": 0.9374441504478455, "learning_rate": 0.0001675638799305383, "loss": 0.0988, "step": 4075 }, { "epoch": 1.43, "grad_norm": 1.983991265296936, "learning_rate": 0.000167526668320516, "loss": 0.1474, "step": 4076 }, { "epoch": 1.43, "grad_norm": 1.5250822305679321, "learning_rate": 0.00016748945671049366, "loss": 0.1217, "step": 4077 }, { "epoch": 1.43, "grad_norm": 1.7774677276611328, "learning_rate": 0.00016745224510047134, "loss": 0.1447, "step": 4078 }, { "epoch": 1.43, "grad_norm": 6.5276994705200195, "learning_rate": 0.00016741503349044902, "loss": 0.3185, "step": 4079 }, { "epoch": 1.43, "grad_norm": 2.6374850273132324, "learning_rate": 0.00016737782188042666, "loss": 1.071, "step": 4080 }, { "epoch": 1.43, "grad_norm": 1.5644630193710327, "learning_rate": 0.00016734061027040434, "loss": 0.4552, "step": 4081 }, { "epoch": 1.43, "grad_norm": 2.4700703620910645, "learning_rate": 0.00016730339866038204, "loss": 0.6185, "step": 4082 }, { "epoch": 1.43, "grad_norm": 1.9310435056686401, "learning_rate": 0.0001672661870503597, "loss": 0.3552, "step": 4083 }, { "epoch": 1.43, "grad_norm": 1.529681921005249, "learning_rate": 0.00016722897544033737, "loss": 0.2221, "step": 4084 }, { "epoch": 1.43, "grad_norm": 2.560516357421875, "learning_rate": 0.00016719176383031502, "loss": 0.2798, "step": 4085 }, { "epoch": 1.43, "grad_norm": 2.354257106781006, "learning_rate": 0.00016715455222029272, "loss": 0.2105, "step": 4086 }, { "epoch": 1.43, "grad_norm": 2.342054843902588, "learning_rate": 0.0001671173406102704, "loss": 0.2796, "step": 4087 }, { "epoch": 1.43, "grad_norm": 1.957754135131836, "learning_rate": 0.00016708012900024804, "loss": 0.2997, "step": 4088 }, { "epoch": 1.43, "grad_norm": 2.821141004562378, "learning_rate": 0.00016704291739022575, "loss": 0.7008, "step": 4089 }, { "epoch": 1.43, "grad_norm": 12.810015678405762, "learning_rate": 0.00016700570578020342, "loss": 3.4665, "step": 4090 }, { "epoch": 1.43, "grad_norm": 2.314800500869751, "learning_rate": 0.00016696849417018107, "loss": 0.689, "step": 4091 }, { "epoch": 1.43, "grad_norm": 3.096869468688965, "learning_rate": 0.00016693128256015877, "loss": 0.7017, "step": 4092 }, { "epoch": 1.43, "grad_norm": 3.2854676246643066, "learning_rate": 0.00016689407095013642, "loss": 0.5166, "step": 4093 }, { "epoch": 1.43, "grad_norm": 1.9120439291000366, "learning_rate": 0.0001668568593401141, "loss": 0.2128, "step": 4094 }, { "epoch": 1.43, "grad_norm": 2.307483434677124, "learning_rate": 0.00016681964773009177, "loss": 0.2255, "step": 4095 }, { "epoch": 1.44, "grad_norm": 3.0140573978424072, "learning_rate": 0.00016678243612006945, "loss": 0.3603, "step": 4096 }, { "epoch": 1.44, "grad_norm": 1.1581931114196777, "learning_rate": 0.00016674522451004713, "loss": 0.1719, "step": 4097 }, { "epoch": 1.44, "grad_norm": 1.792546033859253, "learning_rate": 0.00016670801290002478, "loss": 0.1403, "step": 4098 }, { "epoch": 1.44, "grad_norm": 1.0819088220596313, "learning_rate": 0.00016667080129000248, "loss": 0.0878, "step": 4099 }, { "epoch": 1.44, "grad_norm": 5.966635227203369, "learning_rate": 0.00016663358967998015, "loss": 0.6863, "step": 4100 }, { "epoch": 1.44, "eval_loss": 0.4655866026878357, "eval_runtime": 51.618, "eval_samples_per_second": 42.001, "eval_steps_per_second": 10.5, "eval_wer": 0.4545061408060889, "step": 4100 }, { "epoch": 1.44, "grad_norm": 1.8284623622894287, "learning_rate": 0.0001665963780699578, "loss": 0.0954, "step": 4101 }, { "epoch": 1.44, "grad_norm": 1.2109270095825195, "learning_rate": 0.00016655916645993548, "loss": 0.0594, "step": 4102 }, { "epoch": 1.44, "grad_norm": 2.24116587638855, "learning_rate": 0.00016652195484991315, "loss": 0.2425, "step": 4103 }, { "epoch": 1.44, "grad_norm": 6.5993475914001465, "learning_rate": 0.00016648474323989083, "loss": 1.3286, "step": 4104 }, { "epoch": 1.44, "grad_norm": 2.824436902999878, "learning_rate": 0.0001664475316298685, "loss": 0.8653, "step": 4105 }, { "epoch": 1.44, "grad_norm": 2.3655171394348145, "learning_rate": 0.00016641032001984615, "loss": 0.6443, "step": 4106 }, { "epoch": 1.44, "grad_norm": 2.7993006706237793, "learning_rate": 0.00016637310840982386, "loss": 0.4293, "step": 4107 }, { "epoch": 1.44, "grad_norm": 1.109576940536499, "learning_rate": 0.00016633589679980153, "loss": 0.3577, "step": 4108 }, { "epoch": 1.44, "grad_norm": 1.4774690866470337, "learning_rate": 0.00016629868518977918, "loss": 0.2693, "step": 4109 }, { "epoch": 1.44, "grad_norm": 1.442692518234253, "learning_rate": 0.00016626147357975688, "loss": 0.3693, "step": 4110 }, { "epoch": 1.44, "grad_norm": 1.9737093448638916, "learning_rate": 0.00016622426196973453, "loss": 0.2922, "step": 4111 }, { "epoch": 1.44, "grad_norm": 3.433197498321533, "learning_rate": 0.0001661870503597122, "loss": 0.2324, "step": 4112 }, { "epoch": 1.44, "grad_norm": 1.9645071029663086, "learning_rate": 0.0001661498387496899, "loss": 0.2494, "step": 4113 }, { "epoch": 1.44, "grad_norm": 2.4682822227478027, "learning_rate": 0.00016611262713966756, "loss": 0.3718, "step": 4114 }, { "epoch": 1.44, "grad_norm": 1.6847212314605713, "learning_rate": 0.00016607541552964524, "loss": 0.315, "step": 4115 }, { "epoch": 1.44, "grad_norm": 1.7860573530197144, "learning_rate": 0.00016603820391962289, "loss": 0.4285, "step": 4116 }, { "epoch": 1.44, "grad_norm": 2.275067090988159, "learning_rate": 0.0001660009923096006, "loss": 0.2831, "step": 4117 }, { "epoch": 1.44, "grad_norm": 2.7724900245666504, "learning_rate": 0.00016596378069957826, "loss": 0.2465, "step": 4118 }, { "epoch": 1.44, "grad_norm": 2.4586076736450195, "learning_rate": 0.0001659265690895559, "loss": 0.2474, "step": 4119 }, { "epoch": 1.44, "grad_norm": 2.156881332397461, "learning_rate": 0.00016588935747953362, "loss": 0.1468, "step": 4120 }, { "epoch": 1.44, "grad_norm": 2.4893946647644043, "learning_rate": 0.0001658521458695113, "loss": 0.2606, "step": 4121 }, { "epoch": 1.44, "grad_norm": 1.4689332246780396, "learning_rate": 0.00016581493425948894, "loss": 0.0906, "step": 4122 }, { "epoch": 1.44, "grad_norm": 4.48578405380249, "learning_rate": 0.00016577772264946662, "loss": 1.7996, "step": 4123 }, { "epoch": 1.44, "grad_norm": 1.9185426235198975, "learning_rate": 0.0001657405110394443, "loss": 0.1204, "step": 4124 }, { "epoch": 1.45, "grad_norm": 3.5373518466949463, "learning_rate": 0.00016570329942942197, "loss": 0.6004, "step": 4125 }, { "epoch": 1.45, "grad_norm": 0.8519726991653442, "learning_rate": 0.00016566608781939964, "loss": 0.0576, "step": 4126 }, { "epoch": 1.45, "grad_norm": 1.851152777671814, "learning_rate": 0.0001656288762093773, "loss": 0.2595, "step": 4127 }, { "epoch": 1.45, "grad_norm": 1.1237139701843262, "learning_rate": 0.000165591664599355, "loss": 0.0662, "step": 4128 }, { "epoch": 1.45, "grad_norm": 1.562714695930481, "learning_rate": 0.00016555445298933264, "loss": 0.1051, "step": 4129 }, { "epoch": 1.45, "grad_norm": 1.522381067276001, "learning_rate": 0.00016551724137931032, "loss": 0.4156, "step": 4130 }, { "epoch": 1.45, "grad_norm": 1.2748173475265503, "learning_rate": 0.00016548002976928802, "loss": 0.2003, "step": 4131 }, { "epoch": 1.45, "grad_norm": 2.538360118865967, "learning_rate": 0.00016544281815926567, "loss": 0.963, "step": 4132 }, { "epoch": 1.45, "grad_norm": 2.971334934234619, "learning_rate": 0.00016540560654924335, "loss": 0.6725, "step": 4133 }, { "epoch": 1.45, "grad_norm": 2.554715633392334, "learning_rate": 0.00016536839493922105, "loss": 0.8091, "step": 4134 }, { "epoch": 1.45, "grad_norm": 1.7067142724990845, "learning_rate": 0.0001653311833291987, "loss": 0.4307, "step": 4135 }, { "epoch": 1.45, "grad_norm": 2.621626377105713, "learning_rate": 0.00016529397171917637, "loss": 0.2681, "step": 4136 }, { "epoch": 1.45, "grad_norm": 4.470788478851318, "learning_rate": 0.00016525676010915402, "loss": 0.9887, "step": 4137 }, { "epoch": 1.45, "grad_norm": 1.222482442855835, "learning_rate": 0.00016521954849913173, "loss": 0.1786, "step": 4138 }, { "epoch": 1.45, "grad_norm": 2.5366249084472656, "learning_rate": 0.0001651823368891094, "loss": 0.4021, "step": 4139 }, { "epoch": 1.45, "grad_norm": 1.220999836921692, "learning_rate": 0.00016514512527908705, "loss": 0.1173, "step": 4140 }, { "epoch": 1.45, "grad_norm": 1.6525065898895264, "learning_rate": 0.00016510791366906475, "loss": 0.249, "step": 4141 }, { "epoch": 1.45, "grad_norm": 3.0360629558563232, "learning_rate": 0.0001650707020590424, "loss": 1.3031, "step": 4142 }, { "epoch": 1.45, "grad_norm": 2.506528854370117, "learning_rate": 0.00016503349044902008, "loss": 0.3216, "step": 4143 }, { "epoch": 1.45, "grad_norm": 2.794973611831665, "learning_rate": 0.00016499627883899775, "loss": 0.3352, "step": 4144 }, { "epoch": 1.45, "grad_norm": 1.864272117614746, "learning_rate": 0.00016495906722897543, "loss": 0.1704, "step": 4145 }, { "epoch": 1.45, "grad_norm": 0.7695359587669373, "learning_rate": 0.0001649218556189531, "loss": 0.0561, "step": 4146 }, { "epoch": 1.45, "grad_norm": 1.7436617612838745, "learning_rate": 0.00016488464400893078, "loss": 0.3065, "step": 4147 }, { "epoch": 1.45, "grad_norm": 1.8266197443008423, "learning_rate": 0.00016484743239890843, "loss": 0.1391, "step": 4148 }, { "epoch": 1.45, "grad_norm": 4.190832614898682, "learning_rate": 0.00016481022078888613, "loss": 0.4169, "step": 4149 }, { "epoch": 1.45, "grad_norm": 3.211941719055176, "learning_rate": 0.00016477300917886378, "loss": 0.2127, "step": 4150 }, { "epoch": 1.45, "grad_norm": 3.884977340698242, "learning_rate": 0.00016473579756884146, "loss": 1.0903, "step": 4151 }, { "epoch": 1.45, "grad_norm": 2.969237804412842, "learning_rate": 0.00016469858595881916, "loss": 1.1619, "step": 4152 }, { "epoch": 1.46, "grad_norm": 4.25085973739624, "learning_rate": 0.0001646613743487968, "loss": 0.3401, "step": 4153 }, { "epoch": 1.46, "grad_norm": 1.1101423501968384, "learning_rate": 0.00016462416273877448, "loss": 0.0716, "step": 4154 }, { "epoch": 1.46, "grad_norm": 1.9234617948532104, "learning_rate": 0.00016458695112875213, "loss": 0.8407, "step": 4155 }, { "epoch": 1.46, "grad_norm": 2.631096601486206, "learning_rate": 0.00016454973951872984, "loss": 0.7553, "step": 4156 }, { "epoch": 1.46, "grad_norm": 1.5557128190994263, "learning_rate": 0.0001645125279087075, "loss": 0.4707, "step": 4157 }, { "epoch": 1.46, "grad_norm": 1.641202449798584, "learning_rate": 0.00016447531629868516, "loss": 0.3575, "step": 4158 }, { "epoch": 1.46, "grad_norm": 2.3638205528259277, "learning_rate": 0.00016443810468866286, "loss": 0.3766, "step": 4159 }, { "epoch": 1.46, "grad_norm": 2.019489288330078, "learning_rate": 0.0001644008930786405, "loss": 0.412, "step": 4160 }, { "epoch": 1.46, "grad_norm": 1.5674275159835815, "learning_rate": 0.0001643636814686182, "loss": 0.1787, "step": 4161 }, { "epoch": 1.46, "grad_norm": 1.2668960094451904, "learning_rate": 0.0001643264698585959, "loss": 0.1301, "step": 4162 }, { "epoch": 1.46, "grad_norm": 1.8856401443481445, "learning_rate": 0.00016428925824857354, "loss": 0.3409, "step": 4163 }, { "epoch": 1.46, "grad_norm": 2.119640350341797, "learning_rate": 0.00016425204663855121, "loss": 0.2735, "step": 4164 }, { "epoch": 1.46, "grad_norm": 3.6939704418182373, "learning_rate": 0.0001642148350285289, "loss": 0.4351, "step": 4165 }, { "epoch": 1.46, "grad_norm": 3.8741753101348877, "learning_rate": 0.00016417762341850657, "loss": 0.4203, "step": 4166 }, { "epoch": 1.46, "grad_norm": 2.0240747928619385, "learning_rate": 0.00016414041180848424, "loss": 0.2846, "step": 4167 }, { "epoch": 1.46, "grad_norm": 1.6168068647384644, "learning_rate": 0.0001641032001984619, "loss": 0.3109, "step": 4168 }, { "epoch": 1.46, "grad_norm": 0.7785304188728333, "learning_rate": 0.00016406598858843957, "loss": 0.0582, "step": 4169 }, { "epoch": 1.46, "grad_norm": 2.258286476135254, "learning_rate": 0.00016402877697841727, "loss": 0.1708, "step": 4170 }, { "epoch": 1.46, "grad_norm": 1.3507694005966187, "learning_rate": 0.00016399156536839492, "loss": 0.1883, "step": 4171 }, { "epoch": 1.46, "grad_norm": 3.4873201847076416, "learning_rate": 0.0001639543537583726, "loss": 0.3557, "step": 4172 }, { "epoch": 1.46, "grad_norm": 2.767658233642578, "learning_rate": 0.00016391714214835024, "loss": 0.3405, "step": 4173 }, { "epoch": 1.46, "grad_norm": 4.61103630065918, "learning_rate": 0.00016387993053832795, "loss": 0.5315, "step": 4174 }, { "epoch": 1.46, "grad_norm": 4.691863536834717, "learning_rate": 0.00016384271892830562, "loss": 0.2525, "step": 4175 }, { "epoch": 1.46, "grad_norm": 2.6262426376342773, "learning_rate": 0.00016380550731828327, "loss": 0.3521, "step": 4176 }, { "epoch": 1.46, "grad_norm": 4.043594837188721, "learning_rate": 0.00016376829570826097, "loss": 0.7037, "step": 4177 }, { "epoch": 1.46, "grad_norm": 3.2441458702087402, "learning_rate": 0.00016373108409823865, "loss": 0.4664, "step": 4178 }, { "epoch": 1.46, "grad_norm": 1.617117166519165, "learning_rate": 0.0001636938724882163, "loss": 0.0479, "step": 4179 }, { "epoch": 1.46, "grad_norm": 1.4325096607208252, "learning_rate": 0.000163656660878194, "loss": 0.3984, "step": 4180 }, { "epoch": 1.46, "grad_norm": 2.373211622238159, "learning_rate": 0.00016361944926817165, "loss": 0.5353, "step": 4181 }, { "epoch": 1.47, "grad_norm": 1.8118925094604492, "learning_rate": 0.00016358223765814932, "loss": 0.3167, "step": 4182 }, { "epoch": 1.47, "grad_norm": 2.375030279159546, "learning_rate": 0.00016354502604812703, "loss": 0.173, "step": 4183 }, { "epoch": 1.47, "grad_norm": 2.043342113494873, "learning_rate": 0.00016350781443810468, "loss": 0.5591, "step": 4184 }, { "epoch": 1.47, "grad_norm": 2.062807321548462, "learning_rate": 0.00016347060282808235, "loss": 0.3204, "step": 4185 }, { "epoch": 1.47, "grad_norm": 1.876054286956787, "learning_rate": 0.00016343339121806, "loss": 0.2098, "step": 4186 }, { "epoch": 1.47, "grad_norm": 2.224905014038086, "learning_rate": 0.0001633961796080377, "loss": 0.5262, "step": 4187 }, { "epoch": 1.47, "grad_norm": 2.370276689529419, "learning_rate": 0.00016335896799801538, "loss": 0.6072, "step": 4188 }, { "epoch": 1.47, "grad_norm": 3.29032301902771, "learning_rate": 0.00016332175638799303, "loss": 0.7222, "step": 4189 }, { "epoch": 1.47, "grad_norm": 2.3209869861602783, "learning_rate": 0.0001632845447779707, "loss": 0.4061, "step": 4190 }, { "epoch": 1.47, "grad_norm": 2.168501853942871, "learning_rate": 0.0001632473331679484, "loss": 0.2453, "step": 4191 }, { "epoch": 1.47, "grad_norm": 1.7172929048538208, "learning_rate": 0.00016321012155792606, "loss": 0.4018, "step": 4192 }, { "epoch": 1.47, "grad_norm": 3.087212562561035, "learning_rate": 0.00016317290994790373, "loss": 0.2529, "step": 4193 }, { "epoch": 1.47, "grad_norm": 2.2027931213378906, "learning_rate": 0.00016313569833788138, "loss": 0.3247, "step": 4194 }, { "epoch": 1.47, "grad_norm": 1.7877609729766846, "learning_rate": 0.00016309848672785908, "loss": 0.1612, "step": 4195 }, { "epoch": 1.47, "grad_norm": 2.64384126663208, "learning_rate": 0.00016306127511783676, "loss": 0.4328, "step": 4196 }, { "epoch": 1.47, "grad_norm": 3.098459243774414, "learning_rate": 0.0001630240635078144, "loss": 0.4273, "step": 4197 }, { "epoch": 1.47, "grad_norm": 2.489880084991455, "learning_rate": 0.0001629868518977921, "loss": 0.3342, "step": 4198 }, { "epoch": 1.47, "grad_norm": 2.079407215118408, "learning_rate": 0.00016294964028776976, "loss": 0.1652, "step": 4199 }, { "epoch": 1.47, "grad_norm": 9.008259773254395, "learning_rate": 0.00016291242867774743, "loss": 0.281, "step": 4200 }, { "epoch": 1.47, "eval_loss": 0.4405984878540039, "eval_runtime": 51.6554, "eval_samples_per_second": 41.97, "eval_steps_per_second": 10.493, "eval_wer": 0.41567202906071615, "step": 4200 }, { "epoch": 1.47, "grad_norm": 2.8630118370056152, "learning_rate": 0.00016287521706772514, "loss": 0.3071, "step": 4201 }, { "epoch": 1.47, "grad_norm": 2.293118476867676, "learning_rate": 0.0001628380054577028, "loss": 0.2474, "step": 4202 }, { "epoch": 1.47, "grad_norm": 4.106233596801758, "learning_rate": 0.00016280079384768046, "loss": 0.094, "step": 4203 }, { "epoch": 1.47, "grad_norm": 1.9723445177078247, "learning_rate": 0.0001627635822376581, "loss": 0.1708, "step": 4204 }, { "epoch": 1.47, "grad_norm": 1.9736557006835938, "learning_rate": 0.00016272637062763581, "loss": 0.8252, "step": 4205 }, { "epoch": 1.47, "grad_norm": 1.89078688621521, "learning_rate": 0.0001626891590176135, "loss": 0.516, "step": 4206 }, { "epoch": 1.47, "grad_norm": 2.3267147541046143, "learning_rate": 0.00016265194740759114, "loss": 0.7264, "step": 4207 }, { "epoch": 1.47, "grad_norm": 1.9280672073364258, "learning_rate": 0.00016261473579756884, "loss": 0.462, "step": 4208 }, { "epoch": 1.47, "grad_norm": 1.9770915508270264, "learning_rate": 0.00016257752418754652, "loss": 0.2252, "step": 4209 }, { "epoch": 1.48, "grad_norm": 1.2686558961868286, "learning_rate": 0.00016254031257752417, "loss": 0.154, "step": 4210 }, { "epoch": 1.48, "grad_norm": 2.1990997791290283, "learning_rate": 0.00016250310096750184, "loss": 0.1505, "step": 4211 }, { "epoch": 1.48, "grad_norm": 1.4449461698532104, "learning_rate": 0.00016246588935747952, "loss": 0.218, "step": 4212 }, { "epoch": 1.48, "grad_norm": 2.5558810234069824, "learning_rate": 0.0001624286777474572, "loss": 0.6625, "step": 4213 }, { "epoch": 1.48, "grad_norm": 2.122541666030884, "learning_rate": 0.00016239146613743487, "loss": 0.3919, "step": 4214 }, { "epoch": 1.48, "grad_norm": 2.2353134155273438, "learning_rate": 0.00016235425452741252, "loss": 0.4296, "step": 4215 }, { "epoch": 1.48, "grad_norm": 1.7417476177215576, "learning_rate": 0.00016231704291739022, "loss": 0.4243, "step": 4216 }, { "epoch": 1.48, "grad_norm": 1.6109976768493652, "learning_rate": 0.00016227983130736787, "loss": 0.169, "step": 4217 }, { "epoch": 1.48, "grad_norm": 1.284348487854004, "learning_rate": 0.00016224261969734555, "loss": 0.2186, "step": 4218 }, { "epoch": 1.48, "grad_norm": 1.6397535800933838, "learning_rate": 0.00016220540808732325, "loss": 0.3277, "step": 4219 }, { "epoch": 1.48, "grad_norm": 3.001840114593506, "learning_rate": 0.0001621681964773009, "loss": 0.158, "step": 4220 }, { "epoch": 1.48, "grad_norm": 1.503371000289917, "learning_rate": 0.00016213098486727857, "loss": 0.2988, "step": 4221 }, { "epoch": 1.48, "grad_norm": 1.4237070083618164, "learning_rate": 0.00016209377325725628, "loss": 0.0898, "step": 4222 }, { "epoch": 1.48, "grad_norm": 3.1129143238067627, "learning_rate": 0.00016205656164723392, "loss": 0.2174, "step": 4223 }, { "epoch": 1.48, "grad_norm": 0.9691246747970581, "learning_rate": 0.0001620193500372116, "loss": 0.1042, "step": 4224 }, { "epoch": 1.48, "grad_norm": 2.2304303646087646, "learning_rate": 0.00016198213842718925, "loss": 0.1921, "step": 4225 }, { "epoch": 1.48, "grad_norm": 2.0455522537231445, "learning_rate": 0.00016194492681716695, "loss": 0.1548, "step": 4226 }, { "epoch": 1.48, "grad_norm": 3.2553882598876953, "learning_rate": 0.00016190771520714463, "loss": 0.523, "step": 4227 }, { "epoch": 1.48, "grad_norm": 1.433052897453308, "learning_rate": 0.00016187050359712228, "loss": 0.1224, "step": 4228 }, { "epoch": 1.48, "grad_norm": 4.60850191116333, "learning_rate": 0.00016183329198709998, "loss": 0.7773, "step": 4229 }, { "epoch": 1.48, "grad_norm": 3.2903518676757812, "learning_rate": 0.00016179608037707763, "loss": 0.5722, "step": 4230 }, { "epoch": 1.48, "grad_norm": 3.1476831436157227, "learning_rate": 0.0001617588687670553, "loss": 0.5969, "step": 4231 }, { "epoch": 1.48, "grad_norm": 1.1613272428512573, "learning_rate": 0.00016172165715703298, "loss": 0.1067, "step": 4232 }, { "epoch": 1.48, "grad_norm": 1.8969131708145142, "learning_rate": 0.00016168444554701065, "loss": 0.4415, "step": 4233 }, { "epoch": 1.48, "grad_norm": 2.0695786476135254, "learning_rate": 0.00016164723393698833, "loss": 0.3461, "step": 4234 }, { "epoch": 1.48, "grad_norm": 4.001595973968506, "learning_rate": 0.000161610022326966, "loss": 0.687, "step": 4235 }, { "epoch": 1.48, "grad_norm": 2.7482621669769287, "learning_rate": 0.00016157281071694366, "loss": 0.4221, "step": 4236 }, { "epoch": 1.48, "grad_norm": 3.1002767086029053, "learning_rate": 0.00016153559910692136, "loss": 0.2666, "step": 4237 }, { "epoch": 1.48, "grad_norm": 1.9085137844085693, "learning_rate": 0.000161498387496899, "loss": 0.4164, "step": 4238 }, { "epoch": 1.49, "grad_norm": 2.563671112060547, "learning_rate": 0.00016146117588687668, "loss": 0.3175, "step": 4239 }, { "epoch": 1.49, "grad_norm": 3.7906312942504883, "learning_rate": 0.00016142396427685439, "loss": 1.8977, "step": 4240 }, { "epoch": 1.49, "grad_norm": 4.173704147338867, "learning_rate": 0.00016138675266683203, "loss": 0.4102, "step": 4241 }, { "epoch": 1.49, "grad_norm": 4.604090690612793, "learning_rate": 0.0001613495410568097, "loss": 1.782, "step": 4242 }, { "epoch": 1.49, "grad_norm": 2.0995211601257324, "learning_rate": 0.00016131232944678736, "loss": 0.2092, "step": 4243 }, { "epoch": 1.49, "grad_norm": 3.1292269229888916, "learning_rate": 0.00016127511783676506, "loss": 0.341, "step": 4244 }, { "epoch": 1.49, "grad_norm": 2.0066728591918945, "learning_rate": 0.00016123790622674274, "loss": 0.3798, "step": 4245 }, { "epoch": 1.49, "grad_norm": 2.5838475227355957, "learning_rate": 0.00016120069461672039, "loss": 0.3337, "step": 4246 }, { "epoch": 1.49, "grad_norm": 3.64768385887146, "learning_rate": 0.0001611634830066981, "loss": 0.5525, "step": 4247 }, { "epoch": 1.49, "grad_norm": 5.899421215057373, "learning_rate": 0.00016112627139667574, "loss": 0.2683, "step": 4248 }, { "epoch": 1.49, "grad_norm": 2.8585946559906006, "learning_rate": 0.0001610890597866534, "loss": 0.3204, "step": 4249 }, { "epoch": 1.49, "grad_norm": 1.4531258344650269, "learning_rate": 0.00016105184817663112, "loss": 0.1442, "step": 4250 }, { "epoch": 1.49, "grad_norm": 2.3642122745513916, "learning_rate": 0.00016101463656660876, "loss": 0.2424, "step": 4251 }, { "epoch": 1.49, "grad_norm": 2.2466611862182617, "learning_rate": 0.00016097742495658644, "loss": 0.1768, "step": 4252 }, { "epoch": 1.49, "grad_norm": 1.5451269149780273, "learning_rate": 0.00016094021334656412, "loss": 0.065, "step": 4253 }, { "epoch": 1.49, "grad_norm": 3.8907992839813232, "learning_rate": 0.0001609030017365418, "loss": 0.3097, "step": 4254 }, { "epoch": 1.49, "grad_norm": 2.250171422958374, "learning_rate": 0.00016086579012651947, "loss": 0.908, "step": 4255 }, { "epoch": 1.49, "grad_norm": 5.258478164672852, "learning_rate": 0.00016082857851649712, "loss": 1.5232, "step": 4256 }, { "epoch": 1.49, "grad_norm": 2.6386005878448486, "learning_rate": 0.0001607913669064748, "loss": 0.4984, "step": 4257 }, { "epoch": 1.49, "grad_norm": 1.1076130867004395, "learning_rate": 0.0001607541552964525, "loss": 0.242, "step": 4258 }, { "epoch": 1.49, "grad_norm": 1.5664863586425781, "learning_rate": 0.00016071694368643014, "loss": 0.1759, "step": 4259 }, { "epoch": 1.49, "grad_norm": 2.852780342102051, "learning_rate": 0.00016067973207640782, "loss": 0.3636, "step": 4260 }, { "epoch": 1.49, "grad_norm": 1.7188012599945068, "learning_rate": 0.0001606425204663855, "loss": 0.2137, "step": 4261 }, { "epoch": 1.49, "grad_norm": 1.521380066871643, "learning_rate": 0.00016060530885636317, "loss": 0.2769, "step": 4262 }, { "epoch": 1.49, "grad_norm": 1.657301902770996, "learning_rate": 0.00016056809724634085, "loss": 0.3082, "step": 4263 }, { "epoch": 1.49, "grad_norm": 2.6540775299072266, "learning_rate": 0.0001605308856363185, "loss": 0.4584, "step": 4264 }, { "epoch": 1.49, "grad_norm": 1.5712755918502808, "learning_rate": 0.0001604936740262962, "loss": 0.1983, "step": 4265 }, { "epoch": 1.49, "grad_norm": 2.1187055110931396, "learning_rate": 0.00016045646241627387, "loss": 0.3515, "step": 4266 }, { "epoch": 1.5, "grad_norm": 4.760385036468506, "learning_rate": 0.00016041925080625152, "loss": 2.0062, "step": 4267 }, { "epoch": 1.5, "grad_norm": 12.318647384643555, "learning_rate": 0.00016038203919622923, "loss": 0.6059, "step": 4268 }, { "epoch": 1.5, "grad_norm": 1.8167134523391724, "learning_rate": 0.00016034482758620688, "loss": 0.1795, "step": 4269 }, { "epoch": 1.5, "grad_norm": 3.7228429317474365, "learning_rate": 0.00016030761597618455, "loss": 0.4189, "step": 4270 }, { "epoch": 1.5, "grad_norm": 2.039379596710205, "learning_rate": 0.00016027040436616225, "loss": 0.2122, "step": 4271 }, { "epoch": 1.5, "grad_norm": 3.154689073562622, "learning_rate": 0.0001602331927561399, "loss": 0.3161, "step": 4272 }, { "epoch": 1.5, "grad_norm": 2.978641986846924, "learning_rate": 0.00016019598114611758, "loss": 0.4632, "step": 4273 }, { "epoch": 1.5, "grad_norm": 1.5532993078231812, "learning_rate": 0.00016015876953609523, "loss": 0.2329, "step": 4274 }, { "epoch": 1.5, "grad_norm": 1.4337552785873413, "learning_rate": 0.00016012155792607293, "loss": 0.2356, "step": 4275 }, { "epoch": 1.5, "grad_norm": 2.861510992050171, "learning_rate": 0.0001600843463160506, "loss": 0.6937, "step": 4276 }, { "epoch": 1.5, "grad_norm": 2.7515218257904053, "learning_rate": 0.00016004713470602825, "loss": 0.2255, "step": 4277 }, { "epoch": 1.5, "grad_norm": 2.600706100463867, "learning_rate": 0.00016000992309600593, "loss": 0.2059, "step": 4278 }, { "epoch": 1.5, "grad_norm": 5.621941566467285, "learning_rate": 0.00015997271148598363, "loss": 0.5214, "step": 4279 }, { "epoch": 1.5, "grad_norm": 2.779236078262329, "learning_rate": 0.00015993549987596128, "loss": 0.9023, "step": 4280 }, { "epoch": 1.5, "grad_norm": 2.4109227657318115, "learning_rate": 0.00015989828826593896, "loss": 0.2555, "step": 4281 }, { "epoch": 1.5, "grad_norm": 1.9273884296417236, "learning_rate": 0.00015986107665591663, "loss": 0.3355, "step": 4282 }, { "epoch": 1.5, "grad_norm": 1.6707803010940552, "learning_rate": 0.0001598238650458943, "loss": 0.295, "step": 4283 }, { "epoch": 1.5, "grad_norm": 14.171672821044922, "learning_rate": 0.00015978665343587198, "loss": 4.537, "step": 4284 }, { "epoch": 1.5, "grad_norm": 2.2641332149505615, "learning_rate": 0.00015974944182584963, "loss": 0.502, "step": 4285 }, { "epoch": 1.5, "grad_norm": 1.5345269441604614, "learning_rate": 0.00015971223021582734, "loss": 0.4606, "step": 4286 }, { "epoch": 1.5, "grad_norm": 1.9413623809814453, "learning_rate": 0.00015967501860580499, "loss": 0.3375, "step": 4287 }, { "epoch": 1.5, "grad_norm": 1.7889741659164429, "learning_rate": 0.00015963780699578266, "loss": 0.2946, "step": 4288 }, { "epoch": 1.5, "grad_norm": 3.114777088165283, "learning_rate": 0.00015960059538576036, "loss": 0.5715, "step": 4289 }, { "epoch": 1.5, "grad_norm": 2.282841920852661, "learning_rate": 0.000159563383775738, "loss": 0.3345, "step": 4290 }, { "epoch": 1.5, "grad_norm": 1.8913458585739136, "learning_rate": 0.0001595261721657157, "loss": 0.3601, "step": 4291 }, { "epoch": 1.5, "grad_norm": 1.8621262311935425, "learning_rate": 0.00015948896055569334, "loss": 0.0981, "step": 4292 }, { "epoch": 1.5, "grad_norm": 3.931448221206665, "learning_rate": 0.00015945174894567104, "loss": 0.4209, "step": 4293 }, { "epoch": 1.5, "grad_norm": 1.0255603790283203, "learning_rate": 0.00015941453733564872, "loss": 0.1596, "step": 4294 }, { "epoch": 1.5, "grad_norm": 2.5541460514068604, "learning_rate": 0.00015937732572562636, "loss": 0.5015, "step": 4295 }, { "epoch": 1.51, "grad_norm": 1.8127018213272095, "learning_rate": 0.00015934011411560407, "loss": 0.1114, "step": 4296 }, { "epoch": 1.51, "grad_norm": 1.7823874950408936, "learning_rate": 0.00015930290250558174, "loss": 0.1683, "step": 4297 }, { "epoch": 1.51, "grad_norm": 3.2042083740234375, "learning_rate": 0.0001592656908955594, "loss": 0.4738, "step": 4298 }, { "epoch": 1.51, "grad_norm": 1.9896401166915894, "learning_rate": 0.00015922847928553707, "loss": 0.0851, "step": 4299 }, { "epoch": 1.51, "grad_norm": 1.8025238513946533, "learning_rate": 0.00015919126767551474, "loss": 0.733, "step": 4300 }, { "epoch": 1.51, "eval_loss": 0.47069793939590454, "eval_runtime": 51.3136, "eval_samples_per_second": 42.25, "eval_steps_per_second": 10.563, "eval_wer": 0.44680851063829785, "step": 4300 }, { "epoch": 1.51, "grad_norm": 2.523890495300293, "learning_rate": 0.00015915405606549242, "loss": 0.2174, "step": 4301 }, { "epoch": 1.51, "grad_norm": 7.219611644744873, "learning_rate": 0.0001591168444554701, "loss": 1.3755, "step": 4302 }, { "epoch": 1.51, "grad_norm": 2.2744252681732178, "learning_rate": 0.00015907963284544777, "loss": 0.2139, "step": 4303 }, { "epoch": 1.51, "grad_norm": 5.458730220794678, "learning_rate": 0.00015904242123542545, "loss": 1.3734, "step": 4304 }, { "epoch": 1.51, "grad_norm": 3.0790417194366455, "learning_rate": 0.0001590052096254031, "loss": 0.8824, "step": 4305 }, { "epoch": 1.51, "grad_norm": 2.6385726928710938, "learning_rate": 0.00015896799801538077, "loss": 0.629, "step": 4306 }, { "epoch": 1.51, "grad_norm": 3.4197399616241455, "learning_rate": 0.00015893078640535847, "loss": 0.6337, "step": 4307 }, { "epoch": 1.51, "grad_norm": 1.8060104846954346, "learning_rate": 0.00015889357479533612, "loss": 0.2284, "step": 4308 }, { "epoch": 1.51, "grad_norm": 1.9204670190811157, "learning_rate": 0.0001588563631853138, "loss": 0.2744, "step": 4309 }, { "epoch": 1.51, "grad_norm": 1.3821399211883545, "learning_rate": 0.0001588191515752915, "loss": 0.2596, "step": 4310 }, { "epoch": 1.51, "grad_norm": 2.8792121410369873, "learning_rate": 0.00015878193996526915, "loss": 0.8357, "step": 4311 }, { "epoch": 1.51, "grad_norm": 1.4682904481887817, "learning_rate": 0.00015874472835524683, "loss": 0.3634, "step": 4312 }, { "epoch": 1.51, "grad_norm": 2.5377604961395264, "learning_rate": 0.00015870751674522447, "loss": 0.1408, "step": 4313 }, { "epoch": 1.51, "grad_norm": 2.061809539794922, "learning_rate": 0.00015867030513520218, "loss": 0.1885, "step": 4314 }, { "epoch": 1.51, "grad_norm": 3.3205690383911133, "learning_rate": 0.00015863309352517985, "loss": 0.3503, "step": 4315 }, { "epoch": 1.51, "grad_norm": 1.4646776914596558, "learning_rate": 0.0001585958819151575, "loss": 0.1579, "step": 4316 }, { "epoch": 1.51, "grad_norm": 4.019704818725586, "learning_rate": 0.0001585586703051352, "loss": 0.4413, "step": 4317 }, { "epoch": 1.51, "grad_norm": 2.0454628467559814, "learning_rate": 0.00015852145869511285, "loss": 0.179, "step": 4318 }, { "epoch": 1.51, "grad_norm": 3.3551511764526367, "learning_rate": 0.00015848424708509053, "loss": 0.3165, "step": 4319 }, { "epoch": 1.51, "grad_norm": 2.5540342330932617, "learning_rate": 0.0001584470354750682, "loss": 0.1543, "step": 4320 }, { "epoch": 1.51, "grad_norm": 3.365748643875122, "learning_rate": 0.00015840982386504588, "loss": 0.3139, "step": 4321 }, { "epoch": 1.51, "grad_norm": 1.9014160633087158, "learning_rate": 0.00015837261225502356, "loss": 0.3596, "step": 4322 }, { "epoch": 1.51, "grad_norm": 2.817349910736084, "learning_rate": 0.00015833540064500123, "loss": 0.4239, "step": 4323 }, { "epoch": 1.52, "grad_norm": 7.915454864501953, "learning_rate": 0.0001582981890349789, "loss": 0.356, "step": 4324 }, { "epoch": 1.52, "grad_norm": 4.057788372039795, "learning_rate": 0.00015826097742495658, "loss": 0.2209, "step": 4325 }, { "epoch": 1.52, "grad_norm": 3.6405909061431885, "learning_rate": 0.00015822376581493423, "loss": 0.1062, "step": 4326 }, { "epoch": 1.52, "grad_norm": 2.823045015335083, "learning_rate": 0.0001581865542049119, "loss": 0.4549, "step": 4327 }, { "epoch": 1.52, "grad_norm": 4.013254642486572, "learning_rate": 0.0001581493425948896, "loss": 0.5598, "step": 4328 }, { "epoch": 1.52, "grad_norm": 2.840559244155884, "learning_rate": 0.00015811213098486726, "loss": 0.2415, "step": 4329 }, { "epoch": 1.52, "grad_norm": 2.6936604976654053, "learning_rate": 0.00015807491937484494, "loss": 0.7174, "step": 4330 }, { "epoch": 1.52, "grad_norm": 2.872133493423462, "learning_rate": 0.00015803770776482258, "loss": 0.8422, "step": 4331 }, { "epoch": 1.52, "grad_norm": 1.364221453666687, "learning_rate": 0.0001580004961548003, "loss": 0.2798, "step": 4332 }, { "epoch": 1.52, "grad_norm": 1.7011353969573975, "learning_rate": 0.00015796328454477796, "loss": 0.3753, "step": 4333 }, { "epoch": 1.52, "grad_norm": 1.4171128273010254, "learning_rate": 0.0001579260729347556, "loss": 0.1875, "step": 4334 }, { "epoch": 1.52, "grad_norm": 3.3720600605010986, "learning_rate": 0.00015788886132473331, "loss": 0.8102, "step": 4335 }, { "epoch": 1.52, "grad_norm": 1.8144382238388062, "learning_rate": 0.000157851649714711, "loss": 0.2117, "step": 4336 }, { "epoch": 1.52, "grad_norm": 2.3069920539855957, "learning_rate": 0.00015781443810468864, "loss": 0.8746, "step": 4337 }, { "epoch": 1.52, "grad_norm": 1.95841646194458, "learning_rate": 0.00015777722649466634, "loss": 0.3763, "step": 4338 }, { "epoch": 1.52, "grad_norm": 1.462180495262146, "learning_rate": 0.000157740014884644, "loss": 0.1606, "step": 4339 }, { "epoch": 1.52, "grad_norm": 1.9129983186721802, "learning_rate": 0.00015770280327462167, "loss": 0.2706, "step": 4340 }, { "epoch": 1.52, "grad_norm": 1.6017885208129883, "learning_rate": 0.00015766559166459934, "loss": 0.2079, "step": 4341 }, { "epoch": 1.52, "grad_norm": 2.809229612350464, "learning_rate": 0.00015762838005457702, "loss": 0.5534, "step": 4342 }, { "epoch": 1.52, "grad_norm": 1.4012563228607178, "learning_rate": 0.0001575911684445547, "loss": 0.1605, "step": 4343 }, { "epoch": 1.52, "grad_norm": 1.8997706174850464, "learning_rate": 0.00015755395683453234, "loss": 0.1523, "step": 4344 }, { "epoch": 1.52, "grad_norm": 5.734255313873291, "learning_rate": 0.00015751674522451005, "loss": 0.3817, "step": 4345 }, { "epoch": 1.52, "grad_norm": 2.4937775135040283, "learning_rate": 0.00015747953361448772, "loss": 0.3117, "step": 4346 }, { "epoch": 1.52, "grad_norm": NaN, "learning_rate": 0.00015747953361448772, "loss": 0.1006, "step": 4347 }, { "epoch": 1.52, "grad_norm": 1.9749798774719238, "learning_rate": 0.00015744232200446537, "loss": 0.4598, "step": 4348 }, { "epoch": 1.52, "grad_norm": 1.340526819229126, "learning_rate": 0.00015740511039444305, "loss": 0.0752, "step": 4349 }, { "epoch": 1.52, "grad_norm": 2.0161750316619873, "learning_rate": 0.00015736789878442072, "loss": 0.2629, "step": 4350 }, { "epoch": 1.52, "grad_norm": 2.0471723079681396, "learning_rate": 0.0001573306871743984, "loss": 0.3901, "step": 4351 }, { "epoch": 1.52, "grad_norm": 3.22011661529541, "learning_rate": 0.00015729347556437607, "loss": 0.2667, "step": 4352 }, { "epoch": 1.53, "grad_norm": 2.5319764614105225, "learning_rate": 0.00015725626395435372, "loss": 0.2311, "step": 4353 }, { "epoch": 1.53, "grad_norm": 3.327425718307495, "learning_rate": 0.00015721905234433142, "loss": 0.2222, "step": 4354 }, { "epoch": 1.53, "grad_norm": 1.8986114263534546, "learning_rate": 0.0001571818407343091, "loss": 0.5881, "step": 4355 }, { "epoch": 1.53, "grad_norm": 1.7233716249465942, "learning_rate": 0.00015714462912428675, "loss": 0.6135, "step": 4356 }, { "epoch": 1.53, "grad_norm": 1.6733200550079346, "learning_rate": 0.00015710741751426445, "loss": 0.2479, "step": 4357 }, { "epoch": 1.53, "grad_norm": 0.7937635183334351, "learning_rate": 0.0001570702059042421, "loss": 0.0792, "step": 4358 }, { "epoch": 1.53, "grad_norm": 2.3683009147644043, "learning_rate": 0.00015703299429421978, "loss": 0.2897, "step": 4359 }, { "epoch": 1.53, "grad_norm": 3.6302194595336914, "learning_rate": 0.00015699578268419748, "loss": 0.5056, "step": 4360 }, { "epoch": 1.53, "grad_norm": 2.038682460784912, "learning_rate": 0.00015695857107417513, "loss": 0.339, "step": 4361 }, { "epoch": 1.53, "grad_norm": 2.3707022666931152, "learning_rate": 0.0001569213594641528, "loss": 0.5805, "step": 4362 }, { "epoch": 1.53, "grad_norm": 11.967528343200684, "learning_rate": 0.00015688414785413045, "loss": 3.6802, "step": 4363 }, { "epoch": 1.53, "grad_norm": 3.1059775352478027, "learning_rate": 0.00015684693624410816, "loss": 0.6931, "step": 4364 }, { "epoch": 1.53, "grad_norm": 1.6652494668960571, "learning_rate": 0.00015680972463408583, "loss": 0.3522, "step": 4365 }, { "epoch": 1.53, "grad_norm": 2.2658798694610596, "learning_rate": 0.00015677251302406348, "loss": 0.3224, "step": 4366 }, { "epoch": 1.53, "grad_norm": 14.272518157958984, "learning_rate": 0.00015673530141404118, "loss": 3.4446, "step": 4367 }, { "epoch": 1.53, "grad_norm": 1.9729360342025757, "learning_rate": 0.00015669808980401886, "loss": 0.5131, "step": 4368 }, { "epoch": 1.53, "grad_norm": 2.555955648422241, "learning_rate": 0.0001566608781939965, "loss": 0.4208, "step": 4369 }, { "epoch": 1.53, "grad_norm": 0.9813282489776611, "learning_rate": 0.00015662366658397418, "loss": 0.1237, "step": 4370 }, { "epoch": 1.53, "grad_norm": 2.0302422046661377, "learning_rate": 0.00015658645497395186, "loss": 0.3017, "step": 4371 }, { "epoch": 1.53, "grad_norm": 2.8068108558654785, "learning_rate": 0.00015654924336392953, "loss": 0.3008, "step": 4372 }, { "epoch": 1.53, "grad_norm": 2.404326915740967, "learning_rate": 0.0001565120317539072, "loss": 0.4064, "step": 4373 }, { "epoch": 1.53, "grad_norm": 1.2311917543411255, "learning_rate": 0.00015647482014388486, "loss": 0.1547, "step": 4374 }, { "epoch": 1.53, "grad_norm": 2.6930012702941895, "learning_rate": 0.00015643760853386256, "loss": 0.3034, "step": 4375 }, { "epoch": 1.53, "grad_norm": 1.130953073501587, "learning_rate": 0.0001564003969238402, "loss": 0.0701, "step": 4376 }, { "epoch": 1.53, "grad_norm": 1.6812334060668945, "learning_rate": 0.0001563631853138179, "loss": 0.1034, "step": 4377 }, { "epoch": 1.53, "grad_norm": 3.887232780456543, "learning_rate": 0.0001563259737037956, "loss": 0.8612, "step": 4378 }, { "epoch": 1.53, "grad_norm": 4.974242687225342, "learning_rate": 0.00015628876209377324, "loss": 0.3702, "step": 4379 }, { "epoch": 1.53, "grad_norm": 2.9421982765197754, "learning_rate": 0.00015625155048375091, "loss": 0.6489, "step": 4380 }, { "epoch": 1.54, "grad_norm": 1.6786479949951172, "learning_rate": 0.00015621433887372862, "loss": 0.2916, "step": 4381 }, { "epoch": 1.54, "grad_norm": 2.749591112136841, "learning_rate": 0.00015617712726370627, "loss": 0.5676, "step": 4382 }, { "epoch": 1.54, "grad_norm": 1.2755649089813232, "learning_rate": 0.00015613991565368394, "loss": 0.1627, "step": 4383 }, { "epoch": 1.54, "grad_norm": 1.326809048652649, "learning_rate": 0.0001561027040436616, "loss": 0.1395, "step": 4384 }, { "epoch": 1.54, "grad_norm": 0.9218014478683472, "learning_rate": 0.0001560654924336393, "loss": 0.1224, "step": 4385 }, { "epoch": 1.54, "grad_norm": 2.6354215145111084, "learning_rate": 0.00015602828082361697, "loss": 0.5855, "step": 4386 }, { "epoch": 1.54, "grad_norm": 7.072910308837891, "learning_rate": 0.00015599106921359462, "loss": 1.493, "step": 4387 }, { "epoch": 1.54, "grad_norm": 1.5853562355041504, "learning_rate": 0.00015595385760357232, "loss": 0.2689, "step": 4388 }, { "epoch": 1.54, "grad_norm": 6.221762657165527, "learning_rate": 0.00015591664599354997, "loss": 0.4643, "step": 4389 }, { "epoch": 1.54, "grad_norm": 3.7703330516815186, "learning_rate": 0.00015587943438352765, "loss": 0.4222, "step": 4390 }, { "epoch": 1.54, "grad_norm": 2.02024245262146, "learning_rate": 0.00015584222277350532, "loss": 0.5177, "step": 4391 }, { "epoch": 1.54, "grad_norm": 2.0577681064605713, "learning_rate": 0.000155805011163483, "loss": 0.2802, "step": 4392 }, { "epoch": 1.54, "grad_norm": 2.6712489128112793, "learning_rate": 0.00015576779955346067, "loss": 0.1628, "step": 4393 }, { "epoch": 1.54, "grad_norm": 2.0091423988342285, "learning_rate": 0.00015573058794343832, "loss": 0.3688, "step": 4394 }, { "epoch": 1.54, "grad_norm": 5.743954181671143, "learning_rate": 0.000155693376333416, "loss": 0.5611, "step": 4395 }, { "epoch": 1.54, "grad_norm": 2.4916129112243652, "learning_rate": 0.0001556561647233937, "loss": 0.286, "step": 4396 }, { "epoch": 1.54, "grad_norm": 6.647517204284668, "learning_rate": 0.00015561895311337135, "loss": 0.7609, "step": 4397 }, { "epoch": 1.54, "grad_norm": 1.73363196849823, "learning_rate": 0.00015558174150334902, "loss": 0.1074, "step": 4398 }, { "epoch": 1.54, "grad_norm": 2.5416290760040283, "learning_rate": 0.00015554452989332673, "loss": 0.2281, "step": 4399 }, { "epoch": 1.54, "grad_norm": 1.1465420722961426, "learning_rate": 0.00015550731828330438, "loss": 0.123, "step": 4400 }, { "epoch": 1.54, "eval_loss": 0.3980734944343567, "eval_runtime": 51.2139, "eval_samples_per_second": 42.332, "eval_steps_per_second": 10.583, "eval_wer": 0.379173153433662, "step": 4400 }, { "epoch": 1.54, "grad_norm": 1.1583079099655151, "learning_rate": 0.00015547010667328205, "loss": 0.0627, "step": 4401 }, { "epoch": 1.54, "grad_norm": 1.6864182949066162, "learning_rate": 0.0001554328950632597, "loss": 0.1769, "step": 4402 }, { "epoch": 1.54, "grad_norm": 2.6987173557281494, "learning_rate": 0.0001553956834532374, "loss": 0.1046, "step": 4403 }, { "epoch": 1.54, "grad_norm": 4.390762805938721, "learning_rate": 0.00015535847184321508, "loss": 0.0657, "step": 4404 }, { "epoch": 1.54, "grad_norm": 2.5673904418945312, "learning_rate": 0.00015532126023319273, "loss": 0.8568, "step": 4405 }, { "epoch": 1.54, "grad_norm": 1.5236790180206299, "learning_rate": 0.00015528404862317043, "loss": 0.1907, "step": 4406 }, { "epoch": 1.54, "grad_norm": 1.6562654972076416, "learning_rate": 0.00015524683701314808, "loss": 0.1583, "step": 4407 }, { "epoch": 1.54, "grad_norm": 2.632766008377075, "learning_rate": 0.00015520962540312576, "loss": 0.5559, "step": 4408 }, { "epoch": 1.54, "grad_norm": 2.8173253536224365, "learning_rate": 0.00015517241379310346, "loss": 0.3249, "step": 4409 }, { "epoch": 1.55, "grad_norm": 3.325995922088623, "learning_rate": 0.0001551352021830811, "loss": 0.4845, "step": 4410 }, { "epoch": 1.55, "grad_norm": 1.7911338806152344, "learning_rate": 0.00015509799057305878, "loss": 0.2434, "step": 4411 }, { "epoch": 1.55, "grad_norm": 2.012998342514038, "learning_rate": 0.00015506077896303646, "loss": 0.2877, "step": 4412 }, { "epoch": 1.55, "grad_norm": 1.2822898626327515, "learning_rate": 0.00015502356735301413, "loss": 0.2108, "step": 4413 }, { "epoch": 1.55, "grad_norm": 1.760624885559082, "learning_rate": 0.0001549863557429918, "loss": 0.1786, "step": 4414 }, { "epoch": 1.55, "grad_norm": 2.9734385013580322, "learning_rate": 0.00015494914413296946, "loss": 0.3499, "step": 4415 }, { "epoch": 1.55, "grad_norm": 1.6788557767868042, "learning_rate": 0.00015491193252294713, "loss": 0.2782, "step": 4416 }, { "epoch": 1.55, "grad_norm": 2.383653163909912, "learning_rate": 0.00015487472091292484, "loss": 0.4833, "step": 4417 }, { "epoch": 1.55, "grad_norm": 4.293880939483643, "learning_rate": 0.00015483750930290249, "loss": 0.5878, "step": 4418 }, { "epoch": 1.55, "grad_norm": 2.733428716659546, "learning_rate": 0.00015480029769288016, "loss": 0.2317, "step": 4419 }, { "epoch": 1.55, "grad_norm": 1.5017122030258179, "learning_rate": 0.0001547630860828578, "loss": 0.0682, "step": 4420 }, { "epoch": 1.55, "grad_norm": 3.120365619659424, "learning_rate": 0.0001547258744728355, "loss": 0.3306, "step": 4421 }, { "epoch": 1.55, "grad_norm": 0.6654366254806519, "learning_rate": 0.0001546886628628132, "loss": 0.0215, "step": 4422 }, { "epoch": 1.55, "grad_norm": 2.2988080978393555, "learning_rate": 0.00015465145125279084, "loss": 0.282, "step": 4423 }, { "epoch": 1.55, "grad_norm": 3.34647536277771, "learning_rate": 0.00015461423964276854, "loss": 0.3574, "step": 4424 }, { "epoch": 1.55, "grad_norm": 1.9335713386535645, "learning_rate": 0.00015457702803274622, "loss": 0.1248, "step": 4425 }, { "epoch": 1.55, "grad_norm": 1.3534259796142578, "learning_rate": 0.00015453981642272387, "loss": 0.0529, "step": 4426 }, { "epoch": 1.55, "grad_norm": 8.45742130279541, "learning_rate": 0.00015450260481270157, "loss": 1.6093, "step": 4427 }, { "epoch": 1.55, "grad_norm": 0.5706729888916016, "learning_rate": 0.00015446539320267922, "loss": 0.0306, "step": 4428 }, { "epoch": 1.55, "grad_norm": 6.064884662628174, "learning_rate": 0.0001544281815926569, "loss": 0.898, "step": 4429 }, { "epoch": 1.55, "grad_norm": 2.807861328125, "learning_rate": 0.0001543909699826346, "loss": 1.1501, "step": 4430 }, { "epoch": 1.55, "grad_norm": 1.6200414896011353, "learning_rate": 0.00015435375837261224, "loss": 0.4798, "step": 4431 }, { "epoch": 1.55, "grad_norm": 2.61049485206604, "learning_rate": 0.00015431654676258992, "loss": 0.6317, "step": 4432 }, { "epoch": 1.55, "grad_norm": 1.9506374597549438, "learning_rate": 0.00015427933515256757, "loss": 0.2907, "step": 4433 }, { "epoch": 1.55, "grad_norm": 1.570434331893921, "learning_rate": 0.00015424212354254527, "loss": 0.2544, "step": 4434 }, { "epoch": 1.55, "grad_norm": 2.3015689849853516, "learning_rate": 0.00015420491193252295, "loss": 0.2989, "step": 4435 }, { "epoch": 1.55, "grad_norm": 1.8835831880569458, "learning_rate": 0.0001541677003225006, "loss": 0.3247, "step": 4436 }, { "epoch": 1.55, "grad_norm": 2.0309693813323975, "learning_rate": 0.00015413048871247827, "loss": 0.3081, "step": 4437 }, { "epoch": 1.56, "grad_norm": 1.200584888458252, "learning_rate": 0.00015409327710245595, "loss": 0.1762, "step": 4438 }, { "epoch": 1.56, "grad_norm": 3.047585964202881, "learning_rate": 0.00015405606549243362, "loss": 0.3421, "step": 4439 }, { "epoch": 1.56, "grad_norm": 2.6859099864959717, "learning_rate": 0.0001540188538824113, "loss": 0.2895, "step": 4440 }, { "epoch": 1.56, "grad_norm": 3.2242605686187744, "learning_rate": 0.00015398164227238895, "loss": 0.5598, "step": 4441 }, { "epoch": 1.56, "grad_norm": 3.5308659076690674, "learning_rate": 0.00015394443066236665, "loss": 0.581, "step": 4442 }, { "epoch": 1.56, "grad_norm": 2.4729762077331543, "learning_rate": 0.00015390721905234433, "loss": 0.371, "step": 4443 }, { "epoch": 1.56, "grad_norm": 1.6830202341079712, "learning_rate": 0.00015387000744232198, "loss": 0.2435, "step": 4444 }, { "epoch": 1.56, "grad_norm": 2.3365495204925537, "learning_rate": 0.00015383279583229968, "loss": 0.3798, "step": 4445 }, { "epoch": 1.56, "grad_norm": 2.603057384490967, "learning_rate": 0.00015379558422227733, "loss": 0.2935, "step": 4446 }, { "epoch": 1.56, "grad_norm": 1.5318717956542969, "learning_rate": 0.000153758372612255, "loss": 0.1117, "step": 4447 }, { "epoch": 1.56, "grad_norm": 3.2214760780334473, "learning_rate": 0.0001537211610022327, "loss": 0.5403, "step": 4448 }, { "epoch": 1.56, "grad_norm": 1.5494588613510132, "learning_rate": 0.00015368394939221035, "loss": 0.1355, "step": 4449 }, { "epoch": 1.56, "grad_norm": 2.5164105892181396, "learning_rate": 0.00015364673778218803, "loss": 0.4476, "step": 4450 }, { "epoch": 1.56, "grad_norm": 2.3167803287506104, "learning_rate": 0.00015360952617216568, "loss": 0.1465, "step": 4451 }, { "epoch": 1.56, "grad_norm": 1.139276146888733, "learning_rate": 0.00015357231456214338, "loss": 0.0653, "step": 4452 }, { "epoch": 1.56, "grad_norm": 9.703216552734375, "learning_rate": 0.00015353510295212106, "loss": 0.5352, "step": 4453 }, { "epoch": 1.56, "grad_norm": 2.554642915725708, "learning_rate": 0.0001534978913420987, "loss": 0.0762, "step": 4454 }, { "epoch": 1.56, "grad_norm": 2.5939784049987793, "learning_rate": 0.0001534606797320764, "loss": 0.7377, "step": 4455 }, { "epoch": 1.56, "grad_norm": 2.136479139328003, "learning_rate": 0.00015342346812205408, "loss": 0.8079, "step": 4456 }, { "epoch": 1.56, "grad_norm": 1.0142302513122559, "learning_rate": 0.00015338625651203173, "loss": 0.2065, "step": 4457 }, { "epoch": 1.56, "grad_norm": 1.6144118309020996, "learning_rate": 0.0001533490449020094, "loss": 0.3742, "step": 4458 }, { "epoch": 1.56, "grad_norm": 1.8569650650024414, "learning_rate": 0.00015331183329198709, "loss": 0.2521, "step": 4459 }, { "epoch": 1.56, "grad_norm": 1.7510970830917358, "learning_rate": 0.00015327462168196476, "loss": 0.2786, "step": 4460 }, { "epoch": 1.56, "grad_norm": 2.130952835083008, "learning_rate": 0.00015323741007194244, "loss": 0.1905, "step": 4461 }, { "epoch": 1.56, "grad_norm": 2.0437982082366943, "learning_rate": 0.00015320019846192009, "loss": 0.367, "step": 4462 }, { "epoch": 1.56, "grad_norm": 2.792440891265869, "learning_rate": 0.0001531629868518978, "loss": 0.5759, "step": 4463 }, { "epoch": 1.56, "grad_norm": 1.3819493055343628, "learning_rate": 0.00015312577524187544, "loss": 0.2252, "step": 4464 }, { "epoch": 1.56, "grad_norm": 4.414833068847656, "learning_rate": 0.0001530885636318531, "loss": 0.4762, "step": 4465 }, { "epoch": 1.56, "grad_norm": 2.5327303409576416, "learning_rate": 0.00015305135202183082, "loss": 0.4483, "step": 4466 }, { "epoch": 1.57, "grad_norm": 2.1207656860351562, "learning_rate": 0.00015301414041180846, "loss": 0.2676, "step": 4467 }, { "epoch": 1.57, "grad_norm": 2.1019747257232666, "learning_rate": 0.00015297692880178614, "loss": 0.3797, "step": 4468 }, { "epoch": 1.57, "grad_norm": 2.4563004970550537, "learning_rate": 0.00015293971719176384, "loss": 0.2758, "step": 4469 }, { "epoch": 1.57, "grad_norm": 3.147643804550171, "learning_rate": 0.0001529025055817415, "loss": 0.9069, "step": 4470 }, { "epoch": 1.57, "grad_norm": 4.162635803222656, "learning_rate": 0.00015286529397171917, "loss": 0.6038, "step": 4471 }, { "epoch": 1.57, "grad_norm": 4.253012657165527, "learning_rate": 0.00015282808236169682, "loss": 0.8356, "step": 4472 }, { "epoch": 1.57, "grad_norm": 2.0794060230255127, "learning_rate": 0.00015279087075167452, "loss": 0.3112, "step": 4473 }, { "epoch": 1.57, "grad_norm": 3.6223506927490234, "learning_rate": 0.0001527536591416522, "loss": 0.3925, "step": 4474 }, { "epoch": 1.57, "grad_norm": 2.9207382202148438, "learning_rate": 0.00015271644753162984, "loss": 0.3373, "step": 4475 }, { "epoch": 1.57, "grad_norm": 2.2720437049865723, "learning_rate": 0.00015267923592160755, "loss": 0.2101, "step": 4476 }, { "epoch": 1.57, "grad_norm": 1.576826572418213, "learning_rate": 0.0001526420243115852, "loss": 0.2305, "step": 4477 }, { "epoch": 1.57, "grad_norm": 0.4385606646537781, "learning_rate": 0.00015260481270156287, "loss": 0.0335, "step": 4478 }, { "epoch": 1.57, "grad_norm": 2.5743887424468994, "learning_rate": 0.00015256760109154055, "loss": 0.8183, "step": 4479 }, { "epoch": 1.57, "grad_norm": 1.7291278839111328, "learning_rate": 0.00015253038948151822, "loss": 0.334, "step": 4480 }, { "epoch": 1.57, "grad_norm": 1.7327170372009277, "learning_rate": 0.0001524931778714959, "loss": 0.4015, "step": 4481 }, { "epoch": 1.57, "grad_norm": 1.8238621950149536, "learning_rate": 0.00015245596626147355, "loss": 0.4654, "step": 4482 }, { "epoch": 1.57, "grad_norm": 1.92159104347229, "learning_rate": 0.00015241875465145122, "loss": 0.4713, "step": 4483 }, { "epoch": 1.57, "grad_norm": 6.046019077301025, "learning_rate": 0.00015238154304142893, "loss": 0.5623, "step": 4484 }, { "epoch": 1.57, "grad_norm": 2.2049782276153564, "learning_rate": 0.00015234433143140657, "loss": 0.3545, "step": 4485 }, { "epoch": 1.57, "grad_norm": 1.759090542793274, "learning_rate": 0.00015230711982138425, "loss": 0.2411, "step": 4486 }, { "epoch": 1.57, "grad_norm": 2.1252946853637695, "learning_rate": 0.00015226990821136195, "loss": 0.2737, "step": 4487 }, { "epoch": 1.57, "grad_norm": 1.3396649360656738, "learning_rate": 0.0001522326966013396, "loss": 0.1346, "step": 4488 }, { "epoch": 1.57, "grad_norm": 1.8554527759552002, "learning_rate": 0.00015219548499131728, "loss": 0.3038, "step": 4489 }, { "epoch": 1.57, "grad_norm": 1.3641473054885864, "learning_rate": 0.00015215827338129493, "loss": 0.1102, "step": 4490 }, { "epoch": 1.57, "grad_norm": 3.5774457454681396, "learning_rate": 0.00015212106177127263, "loss": 0.4797, "step": 4491 }, { "epoch": 1.57, "grad_norm": 4.142420768737793, "learning_rate": 0.0001520838501612503, "loss": 0.3904, "step": 4492 }, { "epoch": 1.57, "grad_norm": 1.1774590015411377, "learning_rate": 0.00015204663855122795, "loss": 0.0748, "step": 4493 }, { "epoch": 1.57, "grad_norm": 3.950049638748169, "learning_rate": 0.00015200942694120566, "loss": 0.3689, "step": 4494 }, { "epoch": 1.57, "grad_norm": 0.5134997963905334, "learning_rate": 0.0001519722153311833, "loss": 0.0291, "step": 4495 }, { "epoch": 1.58, "grad_norm": 6.9983296394348145, "learning_rate": 0.00015193500372116098, "loss": 2.1653, "step": 4496 }, { "epoch": 1.58, "grad_norm": 4.325428485870361, "learning_rate": 0.00015189779211113868, "loss": 0.4339, "step": 4497 }, { "epoch": 1.58, "grad_norm": 5.906067371368408, "learning_rate": 0.00015186058050111633, "loss": 0.5815, "step": 4498 }, { "epoch": 1.58, "grad_norm": 6.516275882720947, "learning_rate": 0.000151823368891094, "loss": 0.2292, "step": 4499 }, { "epoch": 1.58, "grad_norm": 5.414793491363525, "learning_rate": 0.00015178615728107168, "loss": 0.3685, "step": 4500 }, { "epoch": 1.58, "eval_loss": 0.4531491696834564, "eval_runtime": 51.0994, "eval_samples_per_second": 42.427, "eval_steps_per_second": 10.607, "eval_wer": 0.3689673066943435, "step": 4500 }, { "epoch": 1.58, "grad_norm": 5.927810192108154, "learning_rate": 0.00015174894567104936, "loss": 0.4957, "step": 4501 }, { "epoch": 1.58, "grad_norm": 3.2968862056732178, "learning_rate": 0.00015171173406102704, "loss": 0.1763, "step": 4502 }, { "epoch": 1.58, "grad_norm": 18.59258270263672, "learning_rate": 0.00015167452245100468, "loss": 0.3655, "step": 4503 }, { "epoch": 1.58, "grad_norm": 9.49154281616211, "learning_rate": 0.00015163731084098236, "loss": 1.3948, "step": 4504 }, { "epoch": 1.58, "grad_norm": 1.5831422805786133, "learning_rate": 0.00015160009923096006, "loss": 0.5639, "step": 4505 }, { "epoch": 1.58, "grad_norm": 1.5149224996566772, "learning_rate": 0.0001515628876209377, "loss": 0.361, "step": 4506 }, { "epoch": 1.58, "grad_norm": 1.7891191244125366, "learning_rate": 0.0001515256760109154, "loss": 0.4626, "step": 4507 }, { "epoch": 1.58, "grad_norm": 1.3684577941894531, "learning_rate": 0.00015148846440089306, "loss": 0.2943, "step": 4508 }, { "epoch": 1.58, "grad_norm": 1.371821641921997, "learning_rate": 0.00015145125279087074, "loss": 0.1162, "step": 4509 }, { "epoch": 1.58, "grad_norm": 1.8998194932937622, "learning_rate": 0.00015141404118084842, "loss": 0.2725, "step": 4510 }, { "epoch": 1.58, "grad_norm": 3.163402557373047, "learning_rate": 0.00015137682957082606, "loss": 0.475, "step": 4511 }, { "epoch": 1.58, "grad_norm": 3.110438108444214, "learning_rate": 0.00015133961796080377, "loss": 0.4981, "step": 4512 }, { "epoch": 1.58, "grad_norm": 2.5122923851013184, "learning_rate": 0.00015130240635078144, "loss": 0.4744, "step": 4513 }, { "epoch": 1.58, "grad_norm": 5.760143280029297, "learning_rate": 0.0001512651947407591, "loss": 1.8897, "step": 4514 }, { "epoch": 1.58, "grad_norm": 1.6696605682373047, "learning_rate": 0.0001512279831307368, "loss": 0.2554, "step": 4515 }, { "epoch": 1.58, "grad_norm": 2.809109687805176, "learning_rate": 0.00015119077152071444, "loss": 0.406, "step": 4516 }, { "epoch": 1.58, "grad_norm": 1.2222541570663452, "learning_rate": 0.00015115355991069212, "loss": 0.0803, "step": 4517 }, { "epoch": 1.58, "grad_norm": 3.3390469551086426, "learning_rate": 0.00015111634830066982, "loss": 0.4206, "step": 4518 }, { "epoch": 1.58, "grad_norm": 1.309799075126648, "learning_rate": 0.00015107913669064747, "loss": 0.1442, "step": 4519 }, { "epoch": 1.58, "grad_norm": 3.854196310043335, "learning_rate": 0.00015104192508062515, "loss": 1.496, "step": 4520 }, { "epoch": 1.58, "grad_norm": 2.176786422729492, "learning_rate": 0.0001510047134706028, "loss": 0.2341, "step": 4521 }, { "epoch": 1.58, "grad_norm": 1.9741429090499878, "learning_rate": 0.0001509675018605805, "loss": 0.2108, "step": 4522 }, { "epoch": 1.58, "grad_norm": 5.400284290313721, "learning_rate": 0.00015093029025055817, "loss": 2.0911, "step": 4523 }, { "epoch": 1.59, "grad_norm": 3.421788215637207, "learning_rate": 0.00015089307864053582, "loss": 0.4519, "step": 4524 }, { "epoch": 1.59, "grad_norm": 4.890323162078857, "learning_rate": 0.0001508558670305135, "loss": 0.5325, "step": 4525 }, { "epoch": 1.59, "grad_norm": 3.7187812328338623, "learning_rate": 0.00015081865542049117, "loss": 0.392, "step": 4526 }, { "epoch": 1.59, "grad_norm": 2.6591031551361084, "learning_rate": 0.00015078144381046885, "loss": 0.3085, "step": 4527 }, { "epoch": 1.59, "grad_norm": 3.004119873046875, "learning_rate": 0.00015074423220044653, "loss": 0.3717, "step": 4528 }, { "epoch": 1.59, "grad_norm": 1.017032265663147, "learning_rate": 0.0001507070205904242, "loss": 0.0585, "step": 4529 }, { "epoch": 1.59, "grad_norm": 2.0729451179504395, "learning_rate": 0.00015066980898040188, "loss": 0.393, "step": 4530 }, { "epoch": 1.59, "grad_norm": 1.5264289379119873, "learning_rate": 0.00015063259737037955, "loss": 0.3011, "step": 4531 }, { "epoch": 1.59, "grad_norm": 1.535727858543396, "learning_rate": 0.0001505953857603572, "loss": 0.3631, "step": 4532 }, { "epoch": 1.59, "grad_norm": 2.5578010082244873, "learning_rate": 0.0001505581741503349, "loss": 0.3524, "step": 4533 }, { "epoch": 1.59, "grad_norm": 2.180941104888916, "learning_rate": 0.00015052096254031255, "loss": 0.3525, "step": 4534 }, { "epoch": 1.59, "grad_norm": 1.2360469102859497, "learning_rate": 0.00015048375093029023, "loss": 0.1369, "step": 4535 }, { "epoch": 1.59, "grad_norm": 1.1341615915298462, "learning_rate": 0.00015044653932026793, "loss": 0.1489, "step": 4536 }, { "epoch": 1.59, "grad_norm": 1.9309412240982056, "learning_rate": 0.00015040932771024558, "loss": 0.2772, "step": 4537 }, { "epoch": 1.59, "grad_norm": 1.6193166971206665, "learning_rate": 0.00015037211610022326, "loss": 0.3045, "step": 4538 }, { "epoch": 1.59, "grad_norm": 1.2819677591323853, "learning_rate": 0.0001503349044902009, "loss": 0.2624, "step": 4539 }, { "epoch": 1.59, "grad_norm": 2.1933364868164062, "learning_rate": 0.0001502976928801786, "loss": 0.2117, "step": 4540 }, { "epoch": 1.59, "grad_norm": 2.030672073364258, "learning_rate": 0.00015026048127015628, "loss": 0.2636, "step": 4541 }, { "epoch": 1.59, "grad_norm": 3.077730894088745, "learning_rate": 0.00015022326966013393, "loss": 0.4608, "step": 4542 }, { "epoch": 1.59, "grad_norm": 1.8299646377563477, "learning_rate": 0.00015018605805011163, "loss": 0.1079, "step": 4543 }, { "epoch": 1.59, "grad_norm": 2.263606309890747, "learning_rate": 0.0001501488464400893, "loss": 0.2057, "step": 4544 }, { "epoch": 1.59, "grad_norm": 1.6388328075408936, "learning_rate": 0.00015011163483006696, "loss": 0.2149, "step": 4545 }, { "epoch": 1.59, "grad_norm": 1.8084484338760376, "learning_rate": 0.00015007442322004464, "loss": 0.2344, "step": 4546 }, { "epoch": 1.59, "grad_norm": 1.729175329208374, "learning_rate": 0.0001500372116100223, "loss": 0.1099, "step": 4547 }, { "epoch": 1.59, "grad_norm": 2.0611491203308105, "learning_rate": 0.00015, "loss": 0.3877, "step": 4548 }, { "epoch": 1.59, "grad_norm": 4.244816303253174, "learning_rate": 0.00014996278838997766, "loss": 0.4705, "step": 4549 }, { "epoch": 1.59, "grad_norm": 3.561108112335205, "learning_rate": 0.00014992557677995534, "loss": 0.3884, "step": 4550 }, { "epoch": 1.59, "grad_norm": 3.055769205093384, "learning_rate": 0.00014988836516993301, "loss": 0.234, "step": 4551 }, { "epoch": 1.59, "grad_norm": 4.927545547485352, "learning_rate": 0.0001498511535599107, "loss": 1.9593, "step": 4552 }, { "epoch": 1.6, "grad_norm": 5.328706741333008, "learning_rate": 0.00014981394194988834, "loss": 1.9694, "step": 4553 }, { "epoch": 1.6, "grad_norm": 2.255901575088501, "learning_rate": 0.00014977673033986601, "loss": 0.1712, "step": 4554 }, { "epoch": 1.6, "grad_norm": 2.4930551052093506, "learning_rate": 0.00014973951872984372, "loss": 0.6505, "step": 4555 }, { "epoch": 1.6, "grad_norm": 2.2214691638946533, "learning_rate": 0.00014970230711982137, "loss": 0.6706, "step": 4556 }, { "epoch": 1.6, "grad_norm": 2.260343313217163, "learning_rate": 0.00014966509550979904, "loss": 0.4399, "step": 4557 }, { "epoch": 1.6, "grad_norm": 1.8949406147003174, "learning_rate": 0.00014962788389977672, "loss": 0.681, "step": 4558 }, { "epoch": 1.6, "grad_norm": 1.0256386995315552, "learning_rate": 0.0001495906722897544, "loss": 0.1752, "step": 4559 }, { "epoch": 1.6, "grad_norm": 2.9112942218780518, "learning_rate": 0.00014955346067973207, "loss": 0.2714, "step": 4560 }, { "epoch": 1.6, "grad_norm": 1.3259639739990234, "learning_rate": 0.00014951624906970975, "loss": 0.1771, "step": 4561 }, { "epoch": 1.6, "grad_norm": 2.3593876361846924, "learning_rate": 0.0001494790374596874, "loss": 0.51, "step": 4562 }, { "epoch": 1.6, "grad_norm": 2.028107166290283, "learning_rate": 0.00014944182584966507, "loss": 0.6044, "step": 4563 }, { "epoch": 1.6, "grad_norm": 2.4526591300964355, "learning_rate": 0.00014940461423964277, "loss": 0.3303, "step": 4564 }, { "epoch": 1.6, "grad_norm": 3.8882853984832764, "learning_rate": 0.00014936740262962042, "loss": 0.7958, "step": 4565 }, { "epoch": 1.6, "grad_norm": 1.1775007247924805, "learning_rate": 0.0001493301910195981, "loss": 0.128, "step": 4566 }, { "epoch": 1.6, "grad_norm": 1.8732891082763672, "learning_rate": 0.00014929297940957577, "loss": 0.1814, "step": 4567 }, { "epoch": 1.6, "grad_norm": 2.4956729412078857, "learning_rate": 0.00014925576779955345, "loss": 0.2284, "step": 4568 }, { "epoch": 1.6, "grad_norm": 1.7072712182998657, "learning_rate": 0.00014921855618953112, "loss": 0.114, "step": 4569 }, { "epoch": 1.6, "grad_norm": 1.2081888914108276, "learning_rate": 0.0001491813445795088, "loss": 0.0593, "step": 4570 }, { "epoch": 1.6, "grad_norm": 3.359692335128784, "learning_rate": 0.00014914413296948648, "loss": 0.6738, "step": 4571 }, { "epoch": 1.6, "grad_norm": 2.427903652191162, "learning_rate": 0.00014910692135946412, "loss": 0.2915, "step": 4572 }, { "epoch": 1.6, "grad_norm": 1.3507580757141113, "learning_rate": 0.00014906970974944183, "loss": 0.2221, "step": 4573 }, { "epoch": 1.6, "grad_norm": 1.6972339153289795, "learning_rate": 0.00014903249813941948, "loss": 0.1641, "step": 4574 }, { "epoch": 1.6, "grad_norm": 4.0066328048706055, "learning_rate": 0.00014899528652939715, "loss": 0.6401, "step": 4575 }, { "epoch": 1.6, "grad_norm": 2.1944432258605957, "learning_rate": 0.00014895807491937483, "loss": 0.2104, "step": 4576 }, { "epoch": 1.6, "grad_norm": 3.7553157806396484, "learning_rate": 0.0001489208633093525, "loss": 0.3944, "step": 4577 }, { "epoch": 1.6, "grad_norm": NaN, "learning_rate": 0.0001489208633093525, "loss": 1.2432, "step": 4578 }, { "epoch": 1.6, "grad_norm": 1.1025826930999756, "learning_rate": 0.00014888365169933018, "loss": 0.0429, "step": 4579 }, { "epoch": 1.6, "grad_norm": 3.72552752494812, "learning_rate": 0.00014884644008930786, "loss": 0.7761, "step": 4580 }, { "epoch": 1.61, "grad_norm": 2.1023457050323486, "learning_rate": 0.00014880922847928553, "loss": 0.5812, "step": 4581 }, { "epoch": 1.61, "grad_norm": 2.557485818862915, "learning_rate": 0.0001487720168692632, "loss": 0.7201, "step": 4582 }, { "epoch": 1.61, "grad_norm": 1.0474027395248413, "learning_rate": 0.00014873480525924088, "loss": 0.174, "step": 4583 }, { "epoch": 1.61, "grad_norm": 1.4817769527435303, "learning_rate": 0.00014869759364921853, "loss": 0.0965, "step": 4584 }, { "epoch": 1.61, "grad_norm": 3.8702480792999268, "learning_rate": 0.0001486603820391962, "loss": 0.9629, "step": 4585 }, { "epoch": 1.61, "grad_norm": 13.161911010742188, "learning_rate": 0.00014862317042917388, "loss": 2.8826, "step": 4586 }, { "epoch": 1.61, "grad_norm": 3.8920679092407227, "learning_rate": 0.00014858595881915156, "loss": 0.6731, "step": 4587 }, { "epoch": 1.61, "grad_norm": 1.4065778255462646, "learning_rate": 0.00014854874720912923, "loss": 0.2983, "step": 4588 }, { "epoch": 1.61, "grad_norm": 3.574580430984497, "learning_rate": 0.0001485115355991069, "loss": 0.6195, "step": 4589 }, { "epoch": 1.61, "grad_norm": 6.651939392089844, "learning_rate": 0.00014847432398908459, "loss": 0.2192, "step": 4590 }, { "epoch": 1.61, "grad_norm": 2.2979915142059326, "learning_rate": 0.00014843711237906226, "loss": 0.51, "step": 4591 }, { "epoch": 1.61, "grad_norm": 1.8823531866073608, "learning_rate": 0.00014839990076903994, "loss": 0.3496, "step": 4592 }, { "epoch": 1.61, "grad_norm": 3.272880792617798, "learning_rate": 0.0001483626891590176, "loss": 0.3933, "step": 4593 }, { "epoch": 1.61, "grad_norm": 2.0282230377197266, "learning_rate": 0.00014832547754899526, "loss": 0.5362, "step": 4594 }, { "epoch": 1.61, "grad_norm": 2.4727609157562256, "learning_rate": 0.00014828826593897294, "loss": 0.3841, "step": 4595 }, { "epoch": 1.61, "grad_norm": 1.0097224712371826, "learning_rate": 0.00014825105432895061, "loss": 0.1183, "step": 4596 }, { "epoch": 1.61, "grad_norm": 2.555164337158203, "learning_rate": 0.0001482138427189283, "loss": 0.2854, "step": 4597 }, { "epoch": 1.61, "grad_norm": 4.284493446350098, "learning_rate": 0.00014817663110890597, "loss": 0.3708, "step": 4598 }, { "epoch": 1.61, "grad_norm": 3.0818679332733154, "learning_rate": 0.00014813941949888364, "loss": 0.2883, "step": 4599 }, { "epoch": 1.61, "grad_norm": 5.769961833953857, "learning_rate": 0.00014810220788886132, "loss": 1.0105, "step": 4600 }, { "epoch": 1.61, "eval_loss": 0.43010884523391724, "eval_runtime": 51.2971, "eval_samples_per_second": 42.264, "eval_steps_per_second": 10.566, "eval_wer": 0.40140114167099117, "step": 4600 }, { "epoch": 1.61, "grad_norm": 3.1520931720733643, "learning_rate": 0.000148064996278839, "loss": 0.2645, "step": 4601 }, { "epoch": 1.61, "grad_norm": 1.3779430389404297, "learning_rate": 0.00014802778466881667, "loss": 0.0886, "step": 4602 }, { "epoch": 1.61, "grad_norm": 4.223806381225586, "learning_rate": 0.00014799057305879432, "loss": 0.5118, "step": 4603 }, { "epoch": 1.61, "grad_norm": 2.133859157562256, "learning_rate": 0.00014795336144877202, "loss": 0.1417, "step": 4604 }, { "epoch": 1.61, "grad_norm": 2.7136905193328857, "learning_rate": 0.00014791614983874967, "loss": 0.789, "step": 4605 }, { "epoch": 1.61, "grad_norm": 1.8167808055877686, "learning_rate": 0.00014787893822872734, "loss": 0.3996, "step": 4606 }, { "epoch": 1.61, "grad_norm": 1.4409061670303345, "learning_rate": 0.00014784172661870502, "loss": 0.3649, "step": 4607 }, { "epoch": 1.61, "grad_norm": 2.616767406463623, "learning_rate": 0.0001478045150086827, "loss": 0.4231, "step": 4608 }, { "epoch": 1.61, "grad_norm": 1.7175992727279663, "learning_rate": 0.00014776730339866037, "loss": 0.3993, "step": 4609 }, { "epoch": 1.62, "grad_norm": 1.3235002756118774, "learning_rate": 0.00014773009178863805, "loss": 0.1547, "step": 4610 }, { "epoch": 1.62, "grad_norm": 2.10302996635437, "learning_rate": 0.00014769288017861572, "loss": 0.3232, "step": 4611 }, { "epoch": 1.62, "grad_norm": 1.746387004852295, "learning_rate": 0.00014765566856859337, "loss": 0.333, "step": 4612 }, { "epoch": 1.62, "grad_norm": 2.292447805404663, "learning_rate": 0.00014761845695857108, "loss": 0.2756, "step": 4613 }, { "epoch": 1.62, "grad_norm": 1.5536998510360718, "learning_rate": 0.00014758124534854875, "loss": 0.2888, "step": 4614 }, { "epoch": 1.62, "grad_norm": 2.7711429595947266, "learning_rate": 0.0001475440337385264, "loss": 0.5742, "step": 4615 }, { "epoch": 1.62, "grad_norm": 1.2211471796035767, "learning_rate": 0.00014750682212850408, "loss": 0.1224, "step": 4616 }, { "epoch": 1.62, "grad_norm": 2.2878341674804688, "learning_rate": 0.00014746961051848175, "loss": 0.3532, "step": 4617 }, { "epoch": 1.62, "grad_norm": 1.6021337509155273, "learning_rate": 0.00014743239890845943, "loss": 0.2547, "step": 4618 }, { "epoch": 1.62, "grad_norm": 3.6017909049987793, "learning_rate": 0.0001473951872984371, "loss": 1.4935, "step": 4619 }, { "epoch": 1.62, "grad_norm": 2.1262946128845215, "learning_rate": 0.00014735797568841478, "loss": 0.2398, "step": 4620 }, { "epoch": 1.62, "grad_norm": 2.549982786178589, "learning_rate": 0.00014732076407839243, "loss": 0.7301, "step": 4621 }, { "epoch": 1.62, "grad_norm": 0.8897354602813721, "learning_rate": 0.00014728355246837013, "loss": 0.0852, "step": 4622 }, { "epoch": 1.62, "grad_norm": 2.3464910984039307, "learning_rate": 0.0001472463408583478, "loss": 0.3429, "step": 4623 }, { "epoch": 1.62, "grad_norm": 2.2914788722991943, "learning_rate": 0.00014720912924832545, "loss": 0.2171, "step": 4624 }, { "epoch": 1.62, "grad_norm": 5.431281566619873, "learning_rate": 0.00014717191763830313, "loss": 0.4327, "step": 4625 }, { "epoch": 1.62, "grad_norm": 2.59971022605896, "learning_rate": 0.0001471347060282808, "loss": 0.2839, "step": 4626 }, { "epoch": 1.62, "grad_norm": 2.3822665214538574, "learning_rate": 0.00014709749441825848, "loss": 0.1915, "step": 4627 }, { "epoch": 1.62, "grad_norm": 1.2179664373397827, "learning_rate": 0.00014706028280823616, "loss": 0.1606, "step": 4628 }, { "epoch": 1.62, "grad_norm": 3.38651180267334, "learning_rate": 0.00014702307119821383, "loss": 1.2565, "step": 4629 }, { "epoch": 1.62, "grad_norm": 1.9841926097869873, "learning_rate": 0.00014698585958819148, "loss": 0.7275, "step": 4630 }, { "epoch": 1.62, "grad_norm": 2.126986503601074, "learning_rate": 0.00014694864797816919, "loss": 0.8143, "step": 4631 }, { "epoch": 1.62, "grad_norm": 1.2944574356079102, "learning_rate": 0.00014691143636814686, "loss": 0.2415, "step": 4632 }, { "epoch": 1.62, "grad_norm": 1.2523384094238281, "learning_rate": 0.0001468742247581245, "loss": 0.3349, "step": 4633 }, { "epoch": 1.62, "grad_norm": 2.1736972332000732, "learning_rate": 0.00014683701314810219, "loss": 0.2556, "step": 4634 }, { "epoch": 1.62, "grad_norm": 2.1065425872802734, "learning_rate": 0.0001467998015380799, "loss": 0.2447, "step": 4635 }, { "epoch": 1.62, "grad_norm": 3.6076436042785645, "learning_rate": 0.00014676258992805754, "loss": 0.7707, "step": 4636 }, { "epoch": 1.62, "grad_norm": 2.045930862426758, "learning_rate": 0.0001467253783180352, "loss": 0.3137, "step": 4637 }, { "epoch": 1.63, "grad_norm": 1.0164872407913208, "learning_rate": 0.0001466881667080129, "loss": 0.1478, "step": 4638 }, { "epoch": 1.63, "grad_norm": 2.6273579597473145, "learning_rate": 0.00014665095509799056, "loss": 0.2894, "step": 4639 }, { "epoch": 1.63, "grad_norm": 2.061847686767578, "learning_rate": 0.00014661374348796824, "loss": 0.109, "step": 4640 }, { "epoch": 1.63, "grad_norm": 2.5759425163269043, "learning_rate": 0.00014657653187794592, "loss": 0.1666, "step": 4641 }, { "epoch": 1.63, "grad_norm": 2.0799243450164795, "learning_rate": 0.00014653932026792356, "loss": 0.2198, "step": 4642 }, { "epoch": 1.63, "grad_norm": 2.562469959259033, "learning_rate": 0.00014650210865790124, "loss": 0.3698, "step": 4643 }, { "epoch": 1.63, "grad_norm": 5.489830493927002, "learning_rate": 0.00014646489704787894, "loss": 0.3143, "step": 4644 }, { "epoch": 1.63, "grad_norm": 3.388197422027588, "learning_rate": 0.0001464276854378566, "loss": 0.2593, "step": 4645 }, { "epoch": 1.63, "grad_norm": 3.1358988285064697, "learning_rate": 0.00014639047382783427, "loss": 0.2429, "step": 4646 }, { "epoch": 1.63, "grad_norm": 2.042487859725952, "learning_rate": 0.00014635326221781194, "loss": 0.2667, "step": 4647 }, { "epoch": 1.63, "grad_norm": 3.1954731941223145, "learning_rate": 0.00014631605060778962, "loss": 0.2772, "step": 4648 }, { "epoch": 1.63, "grad_norm": 6.20961856842041, "learning_rate": 0.0001462788389977673, "loss": 0.8224, "step": 4649 }, { "epoch": 1.63, "grad_norm": 2.716186046600342, "learning_rate": 0.00014624162738774497, "loss": 0.3826, "step": 4650 }, { "epoch": 1.63, "grad_norm": 2.333148717880249, "learning_rate": 0.00014620441577772265, "loss": 0.2995, "step": 4651 }, { "epoch": 1.63, "grad_norm": 9.35218620300293, "learning_rate": 0.0001461672041677003, "loss": 1.6333, "step": 4652 }, { "epoch": 1.63, "grad_norm": 2.3607444763183594, "learning_rate": 0.000146129992557678, "loss": 0.213, "step": 4653 }, { "epoch": 1.63, "grad_norm": 3.0793917179107666, "learning_rate": 0.00014609278094765565, "loss": 0.1334, "step": 4654 }, { "epoch": 1.63, "grad_norm": 3.1427955627441406, "learning_rate": 0.00014605556933763332, "loss": 0.8882, "step": 4655 }, { "epoch": 1.63, "grad_norm": 3.179497718811035, "learning_rate": 0.000146018357727611, "loss": 1.0056, "step": 4656 }, { "epoch": 1.63, "grad_norm": 2.290648937225342, "learning_rate": 0.00014598114611758867, "loss": 0.439, "step": 4657 }, { "epoch": 1.63, "grad_norm": 2.732677698135376, "learning_rate": 0.00014594393450756635, "loss": 0.7399, "step": 4658 }, { "epoch": 1.63, "grad_norm": 1.9614787101745605, "learning_rate": 0.00014590672289754403, "loss": 0.3696, "step": 4659 }, { "epoch": 1.63, "grad_norm": 1.658251404762268, "learning_rate": 0.0001458695112875217, "loss": 0.3531, "step": 4660 }, { "epoch": 1.63, "grad_norm": 1.365149974822998, "learning_rate": 0.00014583229967749935, "loss": 0.1389, "step": 4661 }, { "epoch": 1.63, "grad_norm": 1.6162738800048828, "learning_rate": 0.00014579508806747705, "loss": 0.2063, "step": 4662 }, { "epoch": 1.63, "grad_norm": 1.5521570444107056, "learning_rate": 0.0001457578764574547, "loss": 0.1486, "step": 4663 }, { "epoch": 1.63, "grad_norm": 1.7068825960159302, "learning_rate": 0.00014572066484743238, "loss": 0.4079, "step": 4664 }, { "epoch": 1.63, "grad_norm": 3.820570230484009, "learning_rate": 0.00014568345323741005, "loss": 1.0572, "step": 4665 }, { "epoch": 1.63, "grad_norm": 1.7748942375183105, "learning_rate": 0.00014564624162738773, "loss": 0.2345, "step": 4666 }, { "epoch": 1.64, "grad_norm": 2.3004446029663086, "learning_rate": 0.0001456090300173654, "loss": 0.2676, "step": 4667 }, { "epoch": 1.64, "grad_norm": 1.6133116483688354, "learning_rate": 0.00014557181840734308, "loss": 0.1579, "step": 4668 }, { "epoch": 1.64, "grad_norm": 1.5437865257263184, "learning_rate": 0.00014553460679732076, "loss": 0.1428, "step": 4669 }, { "epoch": 1.64, "grad_norm": 3.339294910430908, "learning_rate": 0.00014549739518729843, "loss": 0.1919, "step": 4670 }, { "epoch": 1.64, "grad_norm": 2.862550973892212, "learning_rate": 0.0001454601835772761, "loss": 0.5847, "step": 4671 }, { "epoch": 1.64, "grad_norm": 2.6982133388519287, "learning_rate": 0.00014542297196725378, "loss": 0.0781, "step": 4672 }, { "epoch": 1.64, "grad_norm": 4.697220325469971, "learning_rate": 0.00014538576035723143, "loss": 0.4647, "step": 4673 }, { "epoch": 1.64, "grad_norm": 2.356121778488159, "learning_rate": 0.0001453485487472091, "loss": 0.253, "step": 4674 }, { "epoch": 1.64, "grad_norm": 3.0695745944976807, "learning_rate": 0.00014531133713718678, "loss": 0.1711, "step": 4675 }, { "epoch": 1.64, "grad_norm": 2.4559316635131836, "learning_rate": 0.00014527412552716446, "loss": 0.1617, "step": 4676 }, { "epoch": 1.64, "grad_norm": 5.379946708679199, "learning_rate": 0.00014523691391714214, "loss": 0.486, "step": 4677 }, { "epoch": 1.64, "grad_norm": 4.076511859893799, "learning_rate": 0.0001451997023071198, "loss": 1.2421, "step": 4678 }, { "epoch": 1.64, "grad_norm": 3.5741493701934814, "learning_rate": 0.0001451624906970975, "loss": 0.1734, "step": 4679 }, { "epoch": 1.64, "grad_norm": 2.880136728286743, "learning_rate": 0.00014512527908707516, "loss": 0.8671, "step": 4680 }, { "epoch": 1.64, "grad_norm": 2.4380300045013428, "learning_rate": 0.00014508806747705284, "loss": 0.3913, "step": 4681 }, { "epoch": 1.64, "grad_norm": 1.858528733253479, "learning_rate": 0.0001450508558670305, "loss": 0.4003, "step": 4682 }, { "epoch": 1.64, "grad_norm": 1.5311648845672607, "learning_rate": 0.00014501364425700816, "loss": 0.2132, "step": 4683 }, { "epoch": 1.64, "grad_norm": 1.997759222984314, "learning_rate": 0.00014497643264698584, "loss": 0.4408, "step": 4684 }, { "epoch": 1.64, "grad_norm": 2.679245710372925, "learning_rate": 0.00014493922103696352, "loss": 0.4171, "step": 4685 }, { "epoch": 1.64, "grad_norm": 3.2098543643951416, "learning_rate": 0.0001449020094269412, "loss": 0.3754, "step": 4686 }, { "epoch": 1.64, "grad_norm": 3.1209075450897217, "learning_rate": 0.00014486479781691887, "loss": 0.6859, "step": 4687 }, { "epoch": 1.64, "grad_norm": 1.5616499185562134, "learning_rate": 0.00014482758620689654, "loss": 0.1983, "step": 4688 }, { "epoch": 1.64, "grad_norm": 2.346381425857544, "learning_rate": 0.00014479037459687422, "loss": 0.4227, "step": 4689 }, { "epoch": 1.64, "grad_norm": 2.0079967975616455, "learning_rate": 0.0001447531629868519, "loss": 0.2501, "step": 4690 }, { "epoch": 1.64, "grad_norm": 1.4115216732025146, "learning_rate": 0.00014471595137682954, "loss": 0.1448, "step": 4691 }, { "epoch": 1.64, "grad_norm": 1.5479143857955933, "learning_rate": 0.00014467873976680725, "loss": 0.0889, "step": 4692 }, { "epoch": 1.64, "grad_norm": 2.8004372119903564, "learning_rate": 0.00014464152815678492, "loss": 0.2043, "step": 4693 }, { "epoch": 1.64, "grad_norm": 3.007761240005493, "learning_rate": 0.00014460431654676257, "loss": 0.0748, "step": 4694 }, { "epoch": 1.65, "grad_norm": 1.9818124771118164, "learning_rate": 0.00014456710493674025, "loss": 0.2978, "step": 4695 }, { "epoch": 1.65, "grad_norm": 2.8312606811523438, "learning_rate": 0.00014452989332671792, "loss": 0.2482, "step": 4696 }, { "epoch": 1.65, "grad_norm": 2.239459753036499, "learning_rate": 0.0001444926817166956, "loss": 0.1302, "step": 4697 }, { "epoch": 1.65, "grad_norm": 2.9169397354125977, "learning_rate": 0.00014445547010667327, "loss": 0.2396, "step": 4698 }, { "epoch": 1.65, "grad_norm": 1.8439327478408813, "learning_rate": 0.00014441825849665095, "loss": 0.1673, "step": 4699 }, { "epoch": 1.65, "grad_norm": 4.448665618896484, "learning_rate": 0.0001443810468866286, "loss": 1.1154, "step": 4700 }, { "epoch": 1.65, "eval_loss": 0.5081397891044617, "eval_runtime": 51.1352, "eval_samples_per_second": 42.397, "eval_steps_per_second": 10.599, "eval_wer": 0.4475869226777374, "step": 4700 }, { "epoch": 1.65, "grad_norm": 2.9089102745056152, "learning_rate": 0.0001443438352766063, "loss": 0.3261, "step": 4701 }, { "epoch": 1.65, "grad_norm": 2.1453356742858887, "learning_rate": 0.00014430662366658398, "loss": 0.2774, "step": 4702 }, { "epoch": 1.65, "grad_norm": 3.1812984943389893, "learning_rate": 0.00014426941205656163, "loss": 0.1525, "step": 4703 }, { "epoch": 1.65, "grad_norm": 1.623590350151062, "learning_rate": 0.0001442322004465393, "loss": 0.1587, "step": 4704 }, { "epoch": 1.65, "grad_norm": 2.106051445007324, "learning_rate": 0.00014419498883651698, "loss": 0.3934, "step": 4705 }, { "epoch": 1.65, "grad_norm": 2.303579807281494, "learning_rate": 0.00014415777722649465, "loss": 0.5234, "step": 4706 }, { "epoch": 1.65, "grad_norm": 2.132972240447998, "learning_rate": 0.00014412056561647233, "loss": 0.4725, "step": 4707 }, { "epoch": 1.65, "grad_norm": 1.8919379711151123, "learning_rate": 0.00014408335400645, "loss": 0.2028, "step": 4708 }, { "epoch": 1.65, "grad_norm": 1.383896827697754, "learning_rate": 0.00014404614239642765, "loss": 0.1585, "step": 4709 }, { "epoch": 1.65, "grad_norm": 2.1082210540771484, "learning_rate": 0.00014400893078640536, "loss": 0.5981, "step": 4710 }, { "epoch": 1.65, "grad_norm": 1.662786841392517, "learning_rate": 0.00014397171917638303, "loss": 0.2439, "step": 4711 }, { "epoch": 1.65, "grad_norm": 2.6457130908966064, "learning_rate": 0.00014393450756636068, "loss": 0.43, "step": 4712 }, { "epoch": 1.65, "grad_norm": 2.411425828933716, "learning_rate": 0.00014389729595633836, "loss": 0.3565, "step": 4713 }, { "epoch": 1.65, "grad_norm": 2.6090455055236816, "learning_rate": 0.00014386008434631606, "loss": 0.1975, "step": 4714 }, { "epoch": 1.65, "grad_norm": 2.9632768630981445, "learning_rate": 0.0001438228727362937, "loss": 0.202, "step": 4715 }, { "epoch": 1.65, "grad_norm": 2.053697109222412, "learning_rate": 0.00014378566112627138, "loss": 0.3983, "step": 4716 }, { "epoch": 1.65, "grad_norm": 2.210340738296509, "learning_rate": 0.00014374844951624906, "loss": 0.3449, "step": 4717 }, { "epoch": 1.65, "grad_norm": 1.4516555070877075, "learning_rate": 0.00014371123790622674, "loss": 0.2329, "step": 4718 }, { "epoch": 1.65, "grad_norm": 1.6123175621032715, "learning_rate": 0.0001436740262962044, "loss": 0.2082, "step": 4719 }, { "epoch": 1.65, "grad_norm": 1.6988117694854736, "learning_rate": 0.0001436368146861821, "loss": 0.1245, "step": 4720 }, { "epoch": 1.65, "grad_norm": 1.9669654369354248, "learning_rate": 0.00014359960307615974, "loss": 0.2072, "step": 4721 }, { "epoch": 1.65, "grad_norm": 2.498318672180176, "learning_rate": 0.0001435623914661374, "loss": 0.1746, "step": 4722 }, { "epoch": 1.65, "grad_norm": 2.309772253036499, "learning_rate": 0.00014352517985611511, "loss": 0.3295, "step": 4723 }, { "epoch": 1.66, "grad_norm": 1.7596651315689087, "learning_rate": 0.00014348796824609276, "loss": 0.1938, "step": 4724 }, { "epoch": 1.66, "grad_norm": 2.1776697635650635, "learning_rate": 0.00014345075663607044, "loss": 0.124, "step": 4725 }, { "epoch": 1.66, "grad_norm": 3.659687042236328, "learning_rate": 0.00014341354502604811, "loss": 0.4348, "step": 4726 }, { "epoch": 1.66, "grad_norm": 2.932208776473999, "learning_rate": 0.0001433763334160258, "loss": 0.3146, "step": 4727 }, { "epoch": 1.66, "grad_norm": 2.094736099243164, "learning_rate": 0.00014333912180600347, "loss": 0.1576, "step": 4728 }, { "epoch": 1.66, "grad_norm": 1.049635887145996, "learning_rate": 0.00014330191019598114, "loss": 0.0841, "step": 4729 }, { "epoch": 1.66, "grad_norm": 1.7156835794448853, "learning_rate": 0.0001432646985859588, "loss": 0.7529, "step": 4730 }, { "epoch": 1.66, "grad_norm": 1.7752554416656494, "learning_rate": 0.00014322748697593647, "loss": 0.1068, "step": 4731 }, { "epoch": 1.66, "grad_norm": 1.4473437070846558, "learning_rate": 0.00014319027536591417, "loss": 0.2724, "step": 4732 }, { "epoch": 1.66, "grad_norm": 1.5380414724349976, "learning_rate": 0.00014315306375589182, "loss": 0.317, "step": 4733 }, { "epoch": 1.66, "grad_norm": 3.0182528495788574, "learning_rate": 0.0001431158521458695, "loss": 0.4587, "step": 4734 }, { "epoch": 1.66, "grad_norm": 1.3725048303604126, "learning_rate": 0.00014307864053584717, "loss": 0.2012, "step": 4735 }, { "epoch": 1.66, "grad_norm": 5.923343658447266, "learning_rate": 0.00014304142892582485, "loss": 0.4233, "step": 4736 }, { "epoch": 1.66, "grad_norm": 0.7124632596969604, "learning_rate": 0.00014300421731580252, "loss": 0.067, "step": 4737 }, { "epoch": 1.66, "grad_norm": 3.3575642108917236, "learning_rate": 0.0001429670057057802, "loss": 0.8093, "step": 4738 }, { "epoch": 1.66, "grad_norm": 2.0340492725372314, "learning_rate": 0.00014292979409575787, "loss": 0.3322, "step": 4739 }, { "epoch": 1.66, "grad_norm": 0.9786810874938965, "learning_rate": 0.00014289258248573552, "loss": 0.0871, "step": 4740 }, { "epoch": 1.66, "grad_norm": 1.9138987064361572, "learning_rate": 0.00014285537087571322, "loss": 0.3063, "step": 4741 }, { "epoch": 1.66, "grad_norm": 1.8535488843917847, "learning_rate": 0.00014281815926569087, "loss": 0.3178, "step": 4742 }, { "epoch": 1.66, "grad_norm": 1.4353394508361816, "learning_rate": 0.00014278094765566855, "loss": 0.1811, "step": 4743 }, { "epoch": 1.66, "grad_norm": 3.0626792907714844, "learning_rate": 0.00014274373604564622, "loss": 0.4848, "step": 4744 }, { "epoch": 1.66, "grad_norm": 2.1779870986938477, "learning_rate": 0.0001427065244356239, "loss": 0.2258, "step": 4745 }, { "epoch": 1.66, "grad_norm": 7.890713214874268, "learning_rate": 0.00014266931282560158, "loss": 0.9487, "step": 4746 }, { "epoch": 1.66, "grad_norm": 3.38159441947937, "learning_rate": 0.00014263210121557925, "loss": 0.2995, "step": 4747 }, { "epoch": 1.66, "grad_norm": 2.917301654815674, "learning_rate": 0.00014259488960555693, "loss": 0.5792, "step": 4748 }, { "epoch": 1.66, "grad_norm": 3.2175188064575195, "learning_rate": 0.00014255767799553458, "loss": 0.1773, "step": 4749 }, { "epoch": 1.66, "grad_norm": 2.1095893383026123, "learning_rate": 0.00014252046638551228, "loss": 0.2296, "step": 4750 }, { "epoch": 1.66, "grad_norm": 3.4463748931884766, "learning_rate": 0.00014248325477548993, "loss": 0.0579, "step": 4751 }, { "epoch": 1.67, "grad_norm": 3.1098155975341797, "learning_rate": 0.0001424460431654676, "loss": 0.6259, "step": 4752 }, { "epoch": 1.67, "grad_norm": 1.331878662109375, "learning_rate": 0.00014240883155544528, "loss": 0.0832, "step": 4753 }, { "epoch": 1.67, "grad_norm": 2.0971672534942627, "learning_rate": 0.00014237161994542296, "loss": 0.1894, "step": 4754 }, { "epoch": 1.67, "grad_norm": 9.917618751525879, "learning_rate": 0.00014233440833540063, "loss": 2.6677, "step": 4755 }, { "epoch": 1.67, "grad_norm": 1.685046672821045, "learning_rate": 0.0001422971967253783, "loss": 0.4792, "step": 4756 }, { "epoch": 1.67, "grad_norm": 3.0383598804473877, "learning_rate": 0.00014225998511535598, "loss": 0.6298, "step": 4757 }, { "epoch": 1.67, "grad_norm": 1.575229525566101, "learning_rate": 0.00014222277350533366, "loss": 0.1554, "step": 4758 }, { "epoch": 1.67, "grad_norm": 1.6466107368469238, "learning_rate": 0.00014218556189531133, "loss": 0.2602, "step": 4759 }, { "epoch": 1.67, "grad_norm": 1.8354624509811401, "learning_rate": 0.000142148350285289, "loss": 0.258, "step": 4760 }, { "epoch": 1.67, "grad_norm": 2.351383924484253, "learning_rate": 0.00014211113867526666, "loss": 0.3148, "step": 4761 }, { "epoch": 1.67, "grad_norm": 4.153576374053955, "learning_rate": 0.00014207392706524433, "loss": 0.4399, "step": 4762 }, { "epoch": 1.67, "grad_norm": 1.340376853942871, "learning_rate": 0.000142036715455222, "loss": 0.1725, "step": 4763 }, { "epoch": 1.67, "grad_norm": 2.0633182525634766, "learning_rate": 0.00014199950384519969, "loss": 0.3566, "step": 4764 }, { "epoch": 1.67, "grad_norm": 1.320162057876587, "learning_rate": 0.00014196229223517736, "loss": 0.2584, "step": 4765 }, { "epoch": 1.67, "grad_norm": 1.1114542484283447, "learning_rate": 0.00014192508062515504, "loss": 0.0903, "step": 4766 }, { "epoch": 1.67, "grad_norm": 3.655611276626587, "learning_rate": 0.00014188786901513271, "loss": 0.4427, "step": 4767 }, { "epoch": 1.67, "grad_norm": 5.079207897186279, "learning_rate": 0.0001418506574051104, "loss": 0.547, "step": 4768 }, { "epoch": 1.67, "grad_norm": 4.637149810791016, "learning_rate": 0.00014181344579508807, "loss": 0.2594, "step": 4769 }, { "epoch": 1.67, "grad_norm": 1.4649180173873901, "learning_rate": 0.00014177623418506571, "loss": 0.0639, "step": 4770 }, { "epoch": 1.67, "grad_norm": 2.595176935195923, "learning_rate": 0.00014173902257504342, "loss": 0.1618, "step": 4771 }, { "epoch": 1.67, "grad_norm": 3.4233720302581787, "learning_rate": 0.00014170181096502107, "loss": 0.3967, "step": 4772 }, { "epoch": 1.67, "grad_norm": 5.957653045654297, "learning_rate": 0.00014166459935499874, "loss": 1.8509, "step": 4773 }, { "epoch": 1.67, "grad_norm": 1.967976450920105, "learning_rate": 0.00014162738774497642, "loss": 0.1728, "step": 4774 }, { "epoch": 1.67, "grad_norm": 3.4824140071868896, "learning_rate": 0.0001415901761349541, "loss": 0.2681, "step": 4775 }, { "epoch": 1.67, "grad_norm": 1.7179447412490845, "learning_rate": 0.00014155296452493177, "loss": 0.1949, "step": 4776 }, { "epoch": 1.67, "grad_norm": 2.076540946960449, "learning_rate": 0.00014151575291490944, "loss": 0.248, "step": 4777 }, { "epoch": 1.67, "grad_norm": 2.4143893718719482, "learning_rate": 0.00014147854130488712, "loss": 0.3014, "step": 4778 }, { "epoch": 1.67, "grad_norm": 4.271050930023193, "learning_rate": 0.00014144132969486477, "loss": 0.3233, "step": 4779 }, { "epoch": 1.67, "grad_norm": 4.913974761962891, "learning_rate": 0.00014140411808484247, "loss": 1.046, "step": 4780 }, { "epoch": 1.68, "grad_norm": 3.808880090713501, "learning_rate": 0.00014136690647482015, "loss": 0.5579, "step": 4781 }, { "epoch": 1.68, "grad_norm": 3.678056478500366, "learning_rate": 0.0001413296948647978, "loss": 0.5987, "step": 4782 }, { "epoch": 1.68, "grad_norm": 1.150814175605774, "learning_rate": 0.00014129248325477547, "loss": 0.1279, "step": 4783 }, { "epoch": 1.68, "grad_norm": 2.413187026977539, "learning_rate": 0.00014125527164475315, "loss": 0.4254, "step": 4784 }, { "epoch": 1.68, "grad_norm": 1.675049066543579, "learning_rate": 0.00014121806003473082, "loss": 0.1614, "step": 4785 }, { "epoch": 1.68, "grad_norm": 3.6439006328582764, "learning_rate": 0.0001411808484247085, "loss": 0.4455, "step": 4786 }, { "epoch": 1.68, "grad_norm": 3.2664308547973633, "learning_rate": 0.00014114363681468618, "loss": 0.757, "step": 4787 }, { "epoch": 1.68, "grad_norm": 2.9309678077697754, "learning_rate": 0.00014110642520466382, "loss": 0.3334, "step": 4788 }, { "epoch": 1.68, "grad_norm": 2.2318620681762695, "learning_rate": 0.00014106921359464153, "loss": 0.3742, "step": 4789 }, { "epoch": 1.68, "grad_norm": 2.364555597305298, "learning_rate": 0.0001410320019846192, "loss": 0.3565, "step": 4790 }, { "epoch": 1.68, "grad_norm": 2.487501859664917, "learning_rate": 0.00014099479037459685, "loss": 0.2205, "step": 4791 }, { "epoch": 1.68, "grad_norm": 1.8816657066345215, "learning_rate": 0.00014095757876457453, "loss": 0.1847, "step": 4792 }, { "epoch": 1.68, "grad_norm": 2.2951865196228027, "learning_rate": 0.0001409203671545522, "loss": 0.258, "step": 4793 }, { "epoch": 1.68, "grad_norm": 1.8484877347946167, "learning_rate": 0.00014088315554452988, "loss": 0.1441, "step": 4794 }, { "epoch": 1.68, "grad_norm": 3.879554033279419, "learning_rate": 0.00014084594393450755, "loss": 0.5913, "step": 4795 }, { "epoch": 1.68, "grad_norm": 8.260385513305664, "learning_rate": 0.00014080873232448523, "loss": 1.9975, "step": 4796 }, { "epoch": 1.68, "grad_norm": 2.673550605773926, "learning_rate": 0.0001407715207144629, "loss": 0.6173, "step": 4797 }, { "epoch": 1.68, "grad_norm": 2.3849501609802246, "learning_rate": 0.00014073430910444058, "loss": 0.2429, "step": 4798 }, { "epoch": 1.68, "grad_norm": 4.342643737792969, "learning_rate": 0.00014069709749441826, "loss": 0.6903, "step": 4799 }, { "epoch": 1.68, "grad_norm": 2.8125104904174805, "learning_rate": 0.0001406598858843959, "loss": 0.5278, "step": 4800 }, { "epoch": 1.68, "eval_loss": 0.4198063313961029, "eval_runtime": 51.395, "eval_samples_per_second": 42.183, "eval_steps_per_second": 10.546, "eval_wer": 0.3914547656114859, "step": 4800 }, { "epoch": 1.68, "grad_norm": 1.2338154315948486, "learning_rate": 0.00014062267427437358, "loss": 0.102, "step": 4801 }, { "epoch": 1.68, "grad_norm": 1.0008563995361328, "learning_rate": 0.00014058546266435129, "loss": 0.0983, "step": 4802 }, { "epoch": 1.68, "grad_norm": 1.7836005687713623, "learning_rate": 0.00014054825105432893, "loss": 0.0784, "step": 4803 }, { "epoch": 1.68, "grad_norm": 5.8645453453063965, "learning_rate": 0.0001405110394443066, "loss": 0.5595, "step": 4804 }, { "epoch": 1.68, "grad_norm": 6.060259819030762, "learning_rate": 0.00014047382783428429, "loss": 1.0248, "step": 4805 }, { "epoch": 1.68, "grad_norm": 2.253270387649536, "learning_rate": 0.00014043661622426196, "loss": 0.417, "step": 4806 }, { "epoch": 1.68, "grad_norm": 2.2081809043884277, "learning_rate": 0.00014039940461423964, "loss": 0.4441, "step": 4807 }, { "epoch": 1.68, "grad_norm": 2.563767910003662, "learning_rate": 0.0001403621930042173, "loss": 0.5687, "step": 4808 }, { "epoch": 1.69, "grad_norm": 1.858834147453308, "learning_rate": 0.00014032498139419496, "loss": 0.491, "step": 4809 }, { "epoch": 1.69, "grad_norm": 2.5288004875183105, "learning_rate": 0.00014028776978417264, "loss": 0.4507, "step": 4810 }, { "epoch": 1.69, "grad_norm": 1.2425075769424438, "learning_rate": 0.00014025055817415034, "loss": 0.2789, "step": 4811 }, { "epoch": 1.69, "grad_norm": 3.400965929031372, "learning_rate": 0.000140213346564128, "loss": 0.785, "step": 4812 }, { "epoch": 1.69, "grad_norm": 2.7308123111724854, "learning_rate": 0.00014017613495410566, "loss": 0.4628, "step": 4813 }, { "epoch": 1.69, "grad_norm": 2.1761913299560547, "learning_rate": 0.00014013892334408334, "loss": 0.4261, "step": 4814 }, { "epoch": 1.69, "grad_norm": 3.010317325592041, "learning_rate": 0.00014010171173406102, "loss": 0.2918, "step": 4815 }, { "epoch": 1.69, "grad_norm": 2.6612460613250732, "learning_rate": 0.0001400645001240387, "loss": 0.4434, "step": 4816 }, { "epoch": 1.69, "grad_norm": 1.3962461948394775, "learning_rate": 0.00014002728851401637, "loss": 0.182, "step": 4817 }, { "epoch": 1.69, "grad_norm": 1.6107089519500732, "learning_rate": 0.00013999007690399404, "loss": 0.14, "step": 4818 }, { "epoch": 1.69, "grad_norm": 0.7715272903442383, "learning_rate": 0.0001399528652939717, "loss": 0.0546, "step": 4819 }, { "epoch": 1.69, "grad_norm": 1.2983365058898926, "learning_rate": 0.0001399156536839494, "loss": 0.0752, "step": 4820 }, { "epoch": 1.69, "grad_norm": 1.6679033041000366, "learning_rate": 0.00013987844207392704, "loss": 0.1966, "step": 4821 }, { "epoch": 1.69, "grad_norm": 1.6639858484268188, "learning_rate": 0.00013984123046390472, "loss": 0.0978, "step": 4822 }, { "epoch": 1.69, "grad_norm": 0.8459872007369995, "learning_rate": 0.0001398040188538824, "loss": 0.0403, "step": 4823 }, { "epoch": 1.69, "grad_norm": 8.972551345825195, "learning_rate": 0.00013976680724386007, "loss": 1.5492, "step": 4824 }, { "epoch": 1.69, "grad_norm": 3.672152042388916, "learning_rate": 0.00013972959563383775, "loss": 0.5177, "step": 4825 }, { "epoch": 1.69, "grad_norm": 4.917130947113037, "learning_rate": 0.00013969238402381542, "loss": 0.5867, "step": 4826 }, { "epoch": 1.69, "grad_norm": 3.8166067600250244, "learning_rate": 0.0001396551724137931, "loss": 0.7695, "step": 4827 }, { "epoch": 1.69, "grad_norm": NaN, "learning_rate": 0.0001396551724137931, "loss": 0.0208, "step": 4828 }, { "epoch": 1.69, "grad_norm": 5.4278788566589355, "learning_rate": 0.00013961796080377075, "loss": 0.9888, "step": 4829 }, { "epoch": 1.69, "grad_norm": 2.555227041244507, "learning_rate": 0.00013958074919374845, "loss": 0.4968, "step": 4830 }, { "epoch": 1.69, "grad_norm": 1.814968228340149, "learning_rate": 0.0001395435375837261, "loss": 0.3631, "step": 4831 }, { "epoch": 1.69, "grad_norm": 1.8207460641860962, "learning_rate": 0.00013950632597370377, "loss": 0.4976, "step": 4832 }, { "epoch": 1.69, "grad_norm": 2.339345693588257, "learning_rate": 0.00013946911436368145, "loss": 1.0818, "step": 4833 }, { "epoch": 1.69, "grad_norm": 2.1169614791870117, "learning_rate": 0.00013943190275365913, "loss": 0.2953, "step": 4834 }, { "epoch": 1.69, "grad_norm": 1.9661637544631958, "learning_rate": 0.0001393946911436368, "loss": 0.4478, "step": 4835 }, { "epoch": 1.69, "grad_norm": 1.695245623588562, "learning_rate": 0.00013935747953361448, "loss": 0.3099, "step": 4836 }, { "epoch": 1.69, "grad_norm": 1.6964011192321777, "learning_rate": 0.00013932026792359215, "loss": 0.3012, "step": 4837 }, { "epoch": 1.7, "grad_norm": 0.6117279529571533, "learning_rate": 0.00013928305631356983, "loss": 0.0769, "step": 4838 }, { "epoch": 1.7, "grad_norm": 4.413363933563232, "learning_rate": 0.0001392458447035475, "loss": 0.5109, "step": 4839 }, { "epoch": 1.7, "grad_norm": 5.733808994293213, "learning_rate": 0.00013920863309352518, "loss": 1.3301, "step": 4840 }, { "epoch": 1.7, "grad_norm": 2.3918161392211914, "learning_rate": 0.00013917142148350283, "loss": 0.716, "step": 4841 }, { "epoch": 1.7, "grad_norm": 1.3467110395431519, "learning_rate": 0.0001391342098734805, "loss": 0.186, "step": 4842 }, { "epoch": 1.7, "grad_norm": 1.767811894416809, "learning_rate": 0.00013909699826345818, "loss": 0.2146, "step": 4843 }, { "epoch": 1.7, "grad_norm": 2.754969835281372, "learning_rate": 0.00013905978665343586, "loss": 0.4884, "step": 4844 }, { "epoch": 1.7, "grad_norm": 1.2362042665481567, "learning_rate": 0.00013902257504341353, "loss": 0.2557, "step": 4845 }, { "epoch": 1.7, "grad_norm": 1.3661056756973267, "learning_rate": 0.0001389853634333912, "loss": 0.1672, "step": 4846 }, { "epoch": 1.7, "grad_norm": 0.7961978912353516, "learning_rate": 0.00013894815182336888, "loss": 0.0539, "step": 4847 }, { "epoch": 1.7, "grad_norm": 2.0398333072662354, "learning_rate": 0.00013891094021334656, "loss": 0.2883, "step": 4848 }, { "epoch": 1.7, "grad_norm": 1.8911408185958862, "learning_rate": 0.00013887372860332424, "loss": 0.2, "step": 4849 }, { "epoch": 1.7, "grad_norm": 3.2513821125030518, "learning_rate": 0.00013883651699330188, "loss": 0.5697, "step": 4850 }, { "epoch": 1.7, "grad_norm": 1.9013601541519165, "learning_rate": 0.00013879930538327956, "loss": 0.1448, "step": 4851 }, { "epoch": 1.7, "grad_norm": 3.6349408626556396, "learning_rate": 0.00013876209377325724, "loss": 0.4995, "step": 4852 }, { "epoch": 1.7, "grad_norm": 1.0852819681167603, "learning_rate": 0.0001387248821632349, "loss": 0.0309, "step": 4853 }, { "epoch": 1.7, "grad_norm": NaN, "learning_rate": 0.0001387248821632349, "loss": 0.0687, "step": 4854 }, { "epoch": 1.7, "grad_norm": 2.3139288425445557, "learning_rate": 0.0001386876705532126, "loss": 0.572, "step": 4855 }, { "epoch": 1.7, "grad_norm": 2.169691324234009, "learning_rate": 0.00013865045894319026, "loss": 0.265, "step": 4856 }, { "epoch": 1.7, "grad_norm": 2.2995455265045166, "learning_rate": 0.00013861324733316794, "loss": 0.4024, "step": 4857 }, { "epoch": 1.7, "grad_norm": 1.8771644830703735, "learning_rate": 0.00013857603572314562, "loss": 0.2272, "step": 4858 }, { "epoch": 1.7, "grad_norm": 1.6588895320892334, "learning_rate": 0.0001385388241131233, "loss": 0.2656, "step": 4859 }, { "epoch": 1.7, "grad_norm": 2.3973159790039062, "learning_rate": 0.00013850161250310094, "loss": 0.7651, "step": 4860 }, { "epoch": 1.7, "grad_norm": 1.8839689493179321, "learning_rate": 0.00013846440089307864, "loss": 0.4598, "step": 4861 }, { "epoch": 1.7, "grad_norm": 1.9465550184249878, "learning_rate": 0.00013842718928305632, "loss": 0.235, "step": 4862 }, { "epoch": 1.7, "grad_norm": 2.5686044692993164, "learning_rate": 0.00013838997767303397, "loss": 0.7046, "step": 4863 }, { "epoch": 1.7, "grad_norm": 2.570693254470825, "learning_rate": 0.00013835276606301164, "loss": 0.1989, "step": 4864 }, { "epoch": 1.7, "grad_norm": 2.393869638442993, "learning_rate": 0.00013831555445298932, "loss": 0.3419, "step": 4865 }, { "epoch": 1.7, "grad_norm": 2.3206074237823486, "learning_rate": 0.000138278342842967, "loss": 0.231, "step": 4866 }, { "epoch": 1.71, "grad_norm": 1.7298521995544434, "learning_rate": 0.00013824113123294467, "loss": 0.2686, "step": 4867 }, { "epoch": 1.71, "grad_norm": 1.511710286140442, "learning_rate": 0.00013820391962292235, "loss": 0.2347, "step": 4868 }, { "epoch": 1.71, "grad_norm": 2.156994104385376, "learning_rate": 0.0001381667080129, "loss": 0.2402, "step": 4869 }, { "epoch": 1.71, "grad_norm": 1.5252554416656494, "learning_rate": 0.0001381294964028777, "loss": 0.1506, "step": 4870 }, { "epoch": 1.71, "grad_norm": 3.855454683303833, "learning_rate": 0.00013809228479285537, "loss": 0.3318, "step": 4871 }, { "epoch": 1.71, "grad_norm": 3.8235645294189453, "learning_rate": 0.00013805507318283302, "loss": 0.6935, "step": 4872 }, { "epoch": 1.71, "grad_norm": 3.7631542682647705, "learning_rate": 0.0001380178615728107, "loss": 0.6565, "step": 4873 }, { "epoch": 1.71, "grad_norm": 4.347606658935547, "learning_rate": 0.00013798064996278837, "loss": 0.6065, "step": 4874 }, { "epoch": 1.71, "grad_norm": 0.8318398594856262, "learning_rate": 0.00013794343835276605, "loss": 0.0503, "step": 4875 }, { "epoch": 1.71, "grad_norm": 3.258185386657715, "learning_rate": 0.00013790622674274373, "loss": 0.6948, "step": 4876 }, { "epoch": 1.71, "grad_norm": 1.6956919431686401, "learning_rate": 0.0001378690151327214, "loss": 0.1668, "step": 4877 }, { "epoch": 1.71, "grad_norm": 5.9375224113464355, "learning_rate": 0.00013783180352269908, "loss": 0.6585, "step": 4878 }, { "epoch": 1.71, "grad_norm": 4.867272853851318, "learning_rate": 0.00013779459191267675, "loss": 0.2866, "step": 4879 }, { "epoch": 1.71, "grad_norm": 1.3362858295440674, "learning_rate": 0.00013775738030265443, "loss": 0.246, "step": 4880 }, { "epoch": 1.71, "grad_norm": 2.0603818893432617, "learning_rate": 0.00013772016869263208, "loss": 0.4112, "step": 4881 }, { "epoch": 1.71, "grad_norm": 2.5215094089508057, "learning_rate": 0.00013768295708260975, "loss": 0.47, "step": 4882 }, { "epoch": 1.71, "grad_norm": 2.3979105949401855, "learning_rate": 0.00013764574547258746, "loss": 0.5573, "step": 4883 }, { "epoch": 1.71, "grad_norm": 4.085318088531494, "learning_rate": 0.0001376085338625651, "loss": 0.4865, "step": 4884 }, { "epoch": 1.71, "grad_norm": 1.9340821504592896, "learning_rate": 0.00013757132225254278, "loss": 0.222, "step": 4885 }, { "epoch": 1.71, "grad_norm": 26.611042022705078, "learning_rate": 0.00013753411064252046, "loss": 4.7412, "step": 4886 }, { "epoch": 1.71, "grad_norm": 2.0535519123077393, "learning_rate": 0.00013749689903249813, "loss": 0.2508, "step": 4887 }, { "epoch": 1.71, "grad_norm": 3.884704828262329, "learning_rate": 0.0001374596874224758, "loss": 0.8941, "step": 4888 }, { "epoch": 1.71, "grad_norm": 1.4614795446395874, "learning_rate": 0.00013742247581245348, "loss": 0.2868, "step": 4889 }, { "epoch": 1.71, "grad_norm": 1.9116908311843872, "learning_rate": 0.00013738526420243113, "loss": 0.2023, "step": 4890 }, { "epoch": 1.71, "grad_norm": 0.9487919807434082, "learning_rate": 0.0001373480525924088, "loss": 0.0826, "step": 4891 }, { "epoch": 1.71, "grad_norm": 1.6531901359558105, "learning_rate": 0.0001373108409823865, "loss": 0.2198, "step": 4892 }, { "epoch": 1.71, "grad_norm": 1.2997630834579468, "learning_rate": 0.00013727362937236416, "loss": 0.1561, "step": 4893 }, { "epoch": 1.71, "grad_norm": 4.334744453430176, "learning_rate": 0.00013723641776234184, "loss": 0.5747, "step": 4894 }, { "epoch": 1.72, "grad_norm": 6.257345676422119, "learning_rate": 0.0001371992061523195, "loss": 1.8452, "step": 4895 }, { "epoch": 1.72, "grad_norm": 6.1517133712768555, "learning_rate": 0.0001371619945422972, "loss": 1.6441, "step": 4896 }, { "epoch": 1.72, "grad_norm": 2.8486979007720947, "learning_rate": 0.00013712478293227486, "loss": 0.343, "step": 4897 }, { "epoch": 1.72, "grad_norm": 2.724308490753174, "learning_rate": 0.00013708757132225254, "loss": 0.7712, "step": 4898 }, { "epoch": 1.72, "grad_norm": 1.3378300666809082, "learning_rate": 0.00013705035971223021, "loss": 0.0844, "step": 4899 }, { "epoch": 1.72, "grad_norm": 4.833279609680176, "learning_rate": 0.00013701314810220786, "loss": 0.1357, "step": 4900 }, { "epoch": 1.72, "eval_loss": 0.3683335483074188, "eval_runtime": 51.5895, "eval_samples_per_second": 42.024, "eval_steps_per_second": 10.506, "eval_wer": 0.3474312402698495, "step": 4900 }, { "epoch": 1.72, "grad_norm": 3.9212887287139893, "learning_rate": 0.00013697593649218557, "loss": 0.5343, "step": 4901 }, { "epoch": 1.72, "grad_norm": 3.2143747806549072, "learning_rate": 0.00013693872488216321, "loss": 0.4345, "step": 4902 }, { "epoch": 1.72, "grad_norm": 7.424620628356934, "learning_rate": 0.0001369015132721409, "loss": 0.5639, "step": 4903 }, { "epoch": 1.72, "grad_norm": 2.1714751720428467, "learning_rate": 0.00013686430166211857, "loss": 0.2382, "step": 4904 }, { "epoch": 1.72, "grad_norm": 2.139472007751465, "learning_rate": 0.00013682709005209624, "loss": 0.7981, "step": 4905 }, { "epoch": 1.72, "grad_norm": 1.1302719116210938, "learning_rate": 0.00013678987844207392, "loss": 0.242, "step": 4906 }, { "epoch": 1.72, "grad_norm": 1.639523983001709, "learning_rate": 0.0001367526668320516, "loss": 0.5281, "step": 4907 }, { "epoch": 1.72, "grad_norm": 1.3213592767715454, "learning_rate": 0.00013671545522202927, "loss": 0.2546, "step": 4908 }, { "epoch": 1.72, "grad_norm": 1.2504050731658936, "learning_rate": 0.00013667824361200692, "loss": 0.1545, "step": 4909 }, { "epoch": 1.72, "grad_norm": 1.2312544584274292, "learning_rate": 0.00013664103200198462, "loss": 0.1631, "step": 4910 }, { "epoch": 1.72, "grad_norm": 1.3905742168426514, "learning_rate": 0.00013660382039196227, "loss": 0.1149, "step": 4911 }, { "epoch": 1.72, "grad_norm": 2.0212652683258057, "learning_rate": 0.00013656660878193995, "loss": 0.4504, "step": 4912 }, { "epoch": 1.72, "grad_norm": 1.0436809062957764, "learning_rate": 0.00013652939717191762, "loss": 0.1652, "step": 4913 }, { "epoch": 1.72, "grad_norm": 3.0124869346618652, "learning_rate": 0.0001364921855618953, "loss": 0.354, "step": 4914 }, { "epoch": 1.72, "grad_norm": 1.9092917442321777, "learning_rate": 0.00013645497395187297, "loss": 0.2767, "step": 4915 }, { "epoch": 1.72, "grad_norm": 1.9692587852478027, "learning_rate": 0.00013641776234185065, "loss": 0.3325, "step": 4916 }, { "epoch": 1.72, "grad_norm": 2.2949063777923584, "learning_rate": 0.00013638055073182832, "loss": 0.3535, "step": 4917 }, { "epoch": 1.72, "grad_norm": 0.9984784722328186, "learning_rate": 0.00013634333912180597, "loss": 0.1386, "step": 4918 }, { "epoch": 1.72, "grad_norm": 2.6047825813293457, "learning_rate": 0.00013630612751178368, "loss": 0.2291, "step": 4919 }, { "epoch": 1.72, "grad_norm": 2.1782517433166504, "learning_rate": 0.00013626891590176135, "loss": 0.261, "step": 4920 }, { "epoch": 1.72, "grad_norm": 1.2911269664764404, "learning_rate": 0.000136231704291739, "loss": 0.0941, "step": 4921 }, { "epoch": 1.72, "grad_norm": 2.6118998527526855, "learning_rate": 0.00013619449268171668, "loss": 0.2503, "step": 4922 }, { "epoch": 1.72, "grad_norm": 1.126331090927124, "learning_rate": 0.00013615728107169435, "loss": 0.0635, "step": 4923 }, { "epoch": 1.73, "grad_norm": 4.782196044921875, "learning_rate": 0.00013612006946167203, "loss": 0.4418, "step": 4924 }, { "epoch": 1.73, "grad_norm": 1.8012721538543701, "learning_rate": 0.0001360828578516497, "loss": 0.1878, "step": 4925 }, { "epoch": 1.73, "grad_norm": 3.5132253170013428, "learning_rate": 0.00013604564624162738, "loss": 0.1484, "step": 4926 }, { "epoch": 1.73, "grad_norm": 3.3157382011413574, "learning_rate": 0.00013600843463160506, "loss": 0.2761, "step": 4927 }, { "epoch": 1.73, "grad_norm": 1.4232193231582642, "learning_rate": 0.00013597122302158273, "loss": 0.0699, "step": 4928 }, { "epoch": 1.73, "grad_norm": 1.8785415887832642, "learning_rate": 0.0001359340114115604, "loss": 0.0976, "step": 4929 }, { "epoch": 1.73, "grad_norm": 2.8369486331939697, "learning_rate": 0.00013589679980153806, "loss": 0.5884, "step": 4930 }, { "epoch": 1.73, "grad_norm": 1.4012984037399292, "learning_rate": 0.00013585958819151573, "loss": 0.3877, "step": 4931 }, { "epoch": 1.73, "grad_norm": 1.8025954961776733, "learning_rate": 0.0001358223765814934, "loss": 0.1753, "step": 4932 }, { "epoch": 1.73, "grad_norm": 1.4512267112731934, "learning_rate": 0.00013578516497147108, "loss": 0.379, "step": 4933 }, { "epoch": 1.73, "grad_norm": 2.033318281173706, "learning_rate": 0.00013574795336144876, "loss": 0.2617, "step": 4934 }, { "epoch": 1.73, "grad_norm": 3.8261144161224365, "learning_rate": 0.00013571074175142643, "loss": 0.4695, "step": 4935 }, { "epoch": 1.73, "grad_norm": 1.4579124450683594, "learning_rate": 0.0001356735301414041, "loss": 0.2473, "step": 4936 }, { "epoch": 1.73, "grad_norm": 1.6672676801681519, "learning_rate": 0.00013563631853138179, "loss": 0.2851, "step": 4937 }, { "epoch": 1.73, "grad_norm": 0.9775683283805847, "learning_rate": 0.00013559910692135946, "loss": 0.0493, "step": 4938 }, { "epoch": 1.73, "grad_norm": 3.237879514694214, "learning_rate": 0.0001355618953113371, "loss": 0.5164, "step": 4939 }, { "epoch": 1.73, "grad_norm": 3.030714273452759, "learning_rate": 0.0001355246837013148, "loss": 0.1808, "step": 4940 }, { "epoch": 1.73, "grad_norm": 2.291748285293579, "learning_rate": 0.0001354874720912925, "loss": 0.1526, "step": 4941 }, { "epoch": 1.73, "grad_norm": 2.577086925506592, "learning_rate": 0.00013545026048127014, "loss": 0.1565, "step": 4942 }, { "epoch": 1.73, "grad_norm": 3.0467169284820557, "learning_rate": 0.00013541304887124781, "loss": 0.3401, "step": 4943 }, { "epoch": 1.73, "grad_norm": 2.7685546875, "learning_rate": 0.0001353758372612255, "loss": 0.1665, "step": 4944 }, { "epoch": 1.73, "grad_norm": 2.6648311614990234, "learning_rate": 0.00013533862565120317, "loss": 0.1115, "step": 4945 }, { "epoch": 1.73, "grad_norm": 4.279283046722412, "learning_rate": 0.00013530141404118084, "loss": 0.59, "step": 4946 }, { "epoch": 1.73, "grad_norm": 3.774441719055176, "learning_rate": 0.00013526420243115852, "loss": 1.4274, "step": 4947 }, { "epoch": 1.73, "grad_norm": 3.0308432579040527, "learning_rate": 0.00013522699082113617, "loss": 0.2772, "step": 4948 }, { "epoch": 1.73, "grad_norm": 2.0633444786071777, "learning_rate": 0.00013518977921111387, "loss": 0.3336, "step": 4949 }, { "epoch": 1.73, "grad_norm": 3.2152934074401855, "learning_rate": 0.00013515256760109154, "loss": 0.3653, "step": 4950 }, { "epoch": 1.73, "grad_norm": 3.7646780014038086, "learning_rate": 0.0001351153559910692, "loss": 0.3, "step": 4951 }, { "epoch": 1.74, "grad_norm": 0.37087032198905945, "learning_rate": 0.00013507814438104687, "loss": 0.0195, "step": 4952 }, { "epoch": 1.74, "grad_norm": 1.66860830783844, "learning_rate": 0.00013504093277102454, "loss": 0.1063, "step": 4953 }, { "epoch": 1.74, "grad_norm": 1.9260728359222412, "learning_rate": 0.00013500372116100222, "loss": 0.1283, "step": 4954 }, { "epoch": 1.74, "grad_norm": 1.4697273969650269, "learning_rate": 0.0001349665095509799, "loss": 0.6385, "step": 4955 }, { "epoch": 1.74, "grad_norm": 1.3390899896621704, "learning_rate": 0.00013492929794095757, "loss": 0.3823, "step": 4956 }, { "epoch": 1.74, "grad_norm": 1.3227888345718384, "learning_rate": 0.00013489208633093522, "loss": 0.3421, "step": 4957 }, { "epoch": 1.74, "grad_norm": 2.2112112045288086, "learning_rate": 0.00013485487472091292, "loss": 0.4267, "step": 4958 }, { "epoch": 1.74, "grad_norm": 1.0806066989898682, "learning_rate": 0.0001348176631108906, "loss": 0.1819, "step": 4959 }, { "epoch": 1.74, "grad_norm": 1.1106966733932495, "learning_rate": 0.00013478045150086825, "loss": 0.0838, "step": 4960 }, { "epoch": 1.74, "grad_norm": 1.180200219154358, "learning_rate": 0.00013474323989084592, "loss": 0.1393, "step": 4961 }, { "epoch": 1.74, "grad_norm": 1.237693190574646, "learning_rate": 0.0001347060282808236, "loss": 0.172, "step": 4962 }, { "epoch": 1.74, "grad_norm": 2.4684200286865234, "learning_rate": 0.00013466881667080128, "loss": 0.3471, "step": 4963 }, { "epoch": 1.74, "grad_norm": 1.569759488105774, "learning_rate": 0.00013463160506077895, "loss": 0.246, "step": 4964 }, { "epoch": 1.74, "grad_norm": 2.071803092956543, "learning_rate": 0.00013459439345075663, "loss": 0.2071, "step": 4965 }, { "epoch": 1.74, "grad_norm": 2.41078519821167, "learning_rate": 0.0001345571818407343, "loss": 0.7541, "step": 4966 }, { "epoch": 1.74, "grad_norm": 1.8611916303634644, "learning_rate": 0.00013451997023071198, "loss": 0.5563, "step": 4967 }, { "epoch": 1.74, "grad_norm": 1.8303391933441162, "learning_rate": 0.00013448275862068965, "loss": 0.2874, "step": 4968 }, { "epoch": 1.74, "grad_norm": 1.8616209030151367, "learning_rate": 0.0001344455470106673, "loss": 0.1496, "step": 4969 }, { "epoch": 1.74, "grad_norm": 2.0775692462921143, "learning_rate": 0.00013440833540064498, "loss": 0.3153, "step": 4970 }, { "epoch": 1.74, "grad_norm": 2.2036242485046387, "learning_rate": 0.00013437112379062268, "loss": 0.3611, "step": 4971 }, { "epoch": 1.74, "grad_norm": 2.958390235900879, "learning_rate": 0.00013433391218060033, "loss": 0.3149, "step": 4972 }, { "epoch": 1.74, "grad_norm": 1.2696552276611328, "learning_rate": 0.000134296700570578, "loss": 0.0836, "step": 4973 }, { "epoch": 1.74, "grad_norm": 0.8660387992858887, "learning_rate": 0.00013425948896055568, "loss": 0.0415, "step": 4974 }, { "epoch": 1.74, "grad_norm": 3.5192763805389404, "learning_rate": 0.00013422227735053336, "loss": 0.3977, "step": 4975 }, { "epoch": 1.74, "grad_norm": 1.378849744796753, "learning_rate": 0.00013418506574051103, "loss": 0.0728, "step": 4976 }, { "epoch": 1.74, "grad_norm": 3.9449732303619385, "learning_rate": 0.0001341478541304887, "loss": 0.221, "step": 4977 }, { "epoch": 1.74, "grad_norm": 5.472590446472168, "learning_rate": 0.00013411064252046636, "loss": 1.7522, "step": 4978 }, { "epoch": 1.74, "grad_norm": NaN, "learning_rate": 0.00013411064252046636, "loss": 0.2462, "step": 4979 }, { "epoch": 1.74, "grad_norm": 4.078400611877441, "learning_rate": 0.00013407343091044403, "loss": 0.7826, "step": 4980 }, { "epoch": 1.75, "grad_norm": 1.8537403345108032, "learning_rate": 0.00013403621930042174, "loss": 0.4351, "step": 4981 }, { "epoch": 1.75, "grad_norm": 1.9395902156829834, "learning_rate": 0.00013399900769039939, "loss": 0.201, "step": 4982 }, { "epoch": 1.75, "grad_norm": 1.612997055053711, "learning_rate": 0.00013396179608037706, "loss": 0.2431, "step": 4983 }, { "epoch": 1.75, "grad_norm": 1.5586113929748535, "learning_rate": 0.00013392458447035474, "loss": 0.2738, "step": 4984 }, { "epoch": 1.75, "grad_norm": 1.7553973197937012, "learning_rate": 0.0001338873728603324, "loss": 0.3034, "step": 4985 }, { "epoch": 1.75, "grad_norm": 1.0098979473114014, "learning_rate": 0.0001338501612503101, "loss": 0.0768, "step": 4986 }, { "epoch": 1.75, "grad_norm": 3.2401552200317383, "learning_rate": 0.00013381294964028776, "loss": 1.3496, "step": 4987 }, { "epoch": 1.75, "grad_norm": 1.5994211435317993, "learning_rate": 0.00013377573803026544, "loss": 0.1578, "step": 4988 }, { "epoch": 1.75, "grad_norm": 2.8812813758850098, "learning_rate": 0.0001337385264202431, "loss": 0.6019, "step": 4989 }, { "epoch": 1.75, "grad_norm": 1.9770814180374146, "learning_rate": 0.0001337013148102208, "loss": 0.2927, "step": 4990 }, { "epoch": 1.75, "grad_norm": 3.3492937088012695, "learning_rate": 0.00013366410320019844, "loss": 0.8254, "step": 4991 }, { "epoch": 1.75, "grad_norm": 0.7024646997451782, "learning_rate": 0.00013362689159017612, "loss": 0.0549, "step": 4992 }, { "epoch": 1.75, "grad_norm": 1.3194102048873901, "learning_rate": 0.0001335896799801538, "loss": 0.094, "step": 4993 }, { "epoch": 1.75, "grad_norm": 1.7211250066757202, "learning_rate": 0.00013355246837013147, "loss": 0.1947, "step": 4994 }, { "epoch": 1.75, "grad_norm": 2.0496625900268555, "learning_rate": 0.00013351525676010914, "loss": 0.2378, "step": 4995 }, { "epoch": 1.75, "grad_norm": 1.6302729845046997, "learning_rate": 0.00013347804515008682, "loss": 0.2008, "step": 4996 }, { "epoch": 1.75, "grad_norm": 4.008142948150635, "learning_rate": 0.0001334408335400645, "loss": 0.5542, "step": 4997 }, { "epoch": 1.75, "grad_norm": 2.101016044616699, "learning_rate": 0.00013340362193004214, "loss": 0.1599, "step": 4998 }, { "epoch": 1.75, "grad_norm": 0.9622274041175842, "learning_rate": 0.00013336641032001985, "loss": 0.0923, "step": 4999 }, { "epoch": 1.75, "grad_norm": 4.595578670501709, "learning_rate": 0.0001333291987099975, "loss": 0.4642, "step": 5000 }, { "epoch": 1.75, "eval_loss": 0.3618814945220947, "eval_runtime": 51.7136, "eval_samples_per_second": 41.923, "eval_steps_per_second": 10.481, "eval_wer": 0.3420688462203771, "step": 5000 }, { "epoch": 1.75, "grad_norm": 2.2924017906188965, "learning_rate": 0.00013329198709997517, "loss": 0.3131, "step": 5001 }, { "epoch": 1.75, "grad_norm": 1.4063929319381714, "learning_rate": 0.00013325477548995285, "loss": 0.1032, "step": 5002 }, { "epoch": 1.75, "grad_norm": 1.765910267829895, "learning_rate": 0.00013321756387993052, "loss": 0.2847, "step": 5003 }, { "epoch": 1.75, "grad_norm": 2.635676383972168, "learning_rate": 0.0001331803522699082, "loss": 0.3925, "step": 5004 }, { "epoch": 1.75, "grad_norm": 2.2191758155822754, "learning_rate": 0.00013314314065988587, "loss": 0.5075, "step": 5005 }, { "epoch": 1.75, "grad_norm": 1.3144760131835938, "learning_rate": 0.00013310592904986355, "loss": 0.1606, "step": 5006 }, { "epoch": 1.75, "grad_norm": 2.2465732097625732, "learning_rate": 0.00013306871743984123, "loss": 0.3372, "step": 5007 }, { "epoch": 1.75, "grad_norm": 1.2884074449539185, "learning_rate": 0.0001330315058298189, "loss": 0.1578, "step": 5008 }, { "epoch": 1.76, "grad_norm": 1.7158373594284058, "learning_rate": 0.00013299429421979658, "loss": 0.2009, "step": 5009 }, { "epoch": 1.76, "grad_norm": 1.9673197269439697, "learning_rate": 0.00013295708260977423, "loss": 0.2124, "step": 5010 }, { "epoch": 1.76, "grad_norm": 2.0683186054229736, "learning_rate": 0.0001329198709997519, "loss": 0.445, "step": 5011 }, { "epoch": 1.76, "grad_norm": 2.1322107315063477, "learning_rate": 0.00013288265938972958, "loss": 0.1698, "step": 5012 }, { "epoch": 1.76, "grad_norm": 1.6621609926223755, "learning_rate": 0.00013284544777970725, "loss": 0.1697, "step": 5013 }, { "epoch": 1.76, "grad_norm": 2.388695240020752, "learning_rate": 0.00013280823616968493, "loss": 0.291, "step": 5014 }, { "epoch": 1.76, "grad_norm": 1.7287172079086304, "learning_rate": 0.0001327710245596626, "loss": 0.2866, "step": 5015 }, { "epoch": 1.76, "grad_norm": 2.0600686073303223, "learning_rate": 0.00013273381294964028, "loss": 0.2617, "step": 5016 }, { "epoch": 1.76, "grad_norm": 3.1544766426086426, "learning_rate": 0.00013269660133961796, "loss": 0.3467, "step": 5017 }, { "epoch": 1.76, "grad_norm": 3.4372010231018066, "learning_rate": 0.00013265938972959563, "loss": 0.3877, "step": 5018 }, { "epoch": 1.76, "grad_norm": 2.706695079803467, "learning_rate": 0.00013262217811957328, "loss": 0.3513, "step": 5019 }, { "epoch": 1.76, "grad_norm": 4.3857927322387695, "learning_rate": 0.00013258496650955096, "loss": 1.5684, "step": 5020 }, { "epoch": 1.76, "grad_norm": 2.2888591289520264, "learning_rate": 0.00013254775489952863, "loss": 0.2338, "step": 5021 }, { "epoch": 1.76, "grad_norm": 2.9611387252807617, "learning_rate": 0.0001325105432895063, "loss": 0.2578, "step": 5022 }, { "epoch": 1.76, "grad_norm": 2.1191227436065674, "learning_rate": 0.00013247333167948398, "loss": 0.1561, "step": 5023 }, { "epoch": 1.76, "grad_norm": 2.088348865509033, "learning_rate": 0.00013243612006946166, "loss": 0.1646, "step": 5024 }, { "epoch": 1.76, "grad_norm": 1.8998678922653198, "learning_rate": 0.00013239890845943934, "loss": 0.2291, "step": 5025 }, { "epoch": 1.76, "grad_norm": 2.2618954181671143, "learning_rate": 0.000132361696849417, "loss": 0.0624, "step": 5026 }, { "epoch": 1.76, "grad_norm": 6.207477569580078, "learning_rate": 0.0001323244852393947, "loss": 0.563, "step": 5027 }, { "epoch": 1.76, "grad_norm": 21.43695640563965, "learning_rate": 0.00013228727362937234, "loss": 2.0059, "step": 5028 }, { "epoch": 1.76, "grad_norm": NaN, "learning_rate": 0.00013228727362937234, "loss": 1.7438, "step": 5029 }, { "epoch": 1.76, "grad_norm": 5383.18994140625, "learning_rate": 0.00013225006201935004, "loss": 16.872, "step": 5030 }, { "epoch": 1.76, "grad_norm": 23.968223571777344, "learning_rate": 0.00013221285040932772, "loss": 4.1902, "step": 5031 }, { "epoch": 1.76, "grad_norm": 18.13584327697754, "learning_rate": 0.00013217563879930536, "loss": 3.597, "step": 5032 }, { "epoch": 1.76, "grad_norm": 27.954679489135742, "learning_rate": 0.00013213842718928304, "loss": 3.8674, "step": 5033 }, { "epoch": 1.76, "grad_norm": 12.132146835327148, "learning_rate": 0.00013210121557926072, "loss": 3.337, "step": 5034 }, { "epoch": 1.76, "grad_norm": 9.336516380310059, "learning_rate": 0.0001320640039692384, "loss": 3.0708, "step": 5035 }, { "epoch": 1.76, "grad_norm": 14.99378776550293, "learning_rate": 0.00013202679235921607, "loss": 3.4457, "step": 5036 }, { "epoch": 1.76, "grad_norm": 16.54006004333496, "learning_rate": 0.00013198958074919374, "loss": 3.4698, "step": 5037 }, { "epoch": 1.77, "grad_norm": 5.521266460418701, "learning_rate": 0.0001319523691391714, "loss": 3.0527, "step": 5038 }, { "epoch": 1.77, "grad_norm": 13.792803764343262, "learning_rate": 0.0001319151575291491, "loss": 3.1825, "step": 5039 }, { "epoch": 1.77, "grad_norm": 10.1255464553833, "learning_rate": 0.00013187794591912677, "loss": 3.3301, "step": 5040 }, { "epoch": 1.77, "grad_norm": 6.728694915771484, "learning_rate": 0.00013184073430910442, "loss": 3.4529, "step": 5041 }, { "epoch": 1.77, "grad_norm": 13.570353507995605, "learning_rate": 0.0001318035226990821, "loss": 3.1857, "step": 5042 }, { "epoch": 1.77, "grad_norm": 9.944263458251953, "learning_rate": 0.00013176631108905977, "loss": 3.0247, "step": 5043 }, { "epoch": 1.77, "grad_norm": 7.628637790679932, "learning_rate": 0.00013172909947903745, "loss": 3.0557, "step": 5044 }, { "epoch": 1.77, "grad_norm": 8.50871753692627, "learning_rate": 0.00013169188786901512, "loss": 2.9197, "step": 5045 }, { "epoch": 1.77, "grad_norm": 3.350416421890259, "learning_rate": 0.0001316546762589928, "loss": 2.7846, "step": 5046 }, { "epoch": 1.77, "grad_norm": 8.267583847045898, "learning_rate": 0.00013161746464897047, "loss": 2.9316, "step": 5047 }, { "epoch": 1.77, "grad_norm": 10.566850662231445, "learning_rate": 0.00013158025303894815, "loss": 3.5342, "step": 5048 }, { "epoch": 1.77, "grad_norm": 7.871520042419434, "learning_rate": 0.00013154304142892583, "loss": 3.2533, "step": 5049 }, { "epoch": 1.77, "grad_norm": 14.895259857177734, "learning_rate": 0.00013150582981890347, "loss": 3.3095, "step": 5050 }, { "epoch": 1.77, "grad_norm": 6.724153995513916, "learning_rate": 0.00013146861820888115, "loss": 2.804, "step": 5051 }, { "epoch": 1.77, "grad_norm": 10.182531356811523, "learning_rate": 0.00013143140659885885, "loss": 2.9485, "step": 5052 }, { "epoch": 1.77, "grad_norm": 12.471945762634277, "learning_rate": 0.0001313941949888365, "loss": 3.0598, "step": 5053 }, { "epoch": 1.77, "grad_norm": 4.7943267822265625, "learning_rate": 0.00013135698337881418, "loss": 2.6582, "step": 5054 }, { "epoch": 1.77, "grad_norm": 3.8134336471557617, "learning_rate": 0.00013131977176879185, "loss": 3.2571, "step": 5055 }, { "epoch": 1.77, "grad_norm": 2.2540090084075928, "learning_rate": 0.00013128256015876953, "loss": 3.1262, "step": 5056 }, { "epoch": 1.77, "grad_norm": 3.4563486576080322, "learning_rate": 0.0001312453485487472, "loss": 2.8854, "step": 5057 }, { "epoch": 1.77, "grad_norm": 4.2413249015808105, "learning_rate": 0.00013120813693872488, "loss": 2.9584, "step": 5058 }, { "epoch": 1.77, "grad_norm": 12.829059600830078, "learning_rate": 0.00013117092532870253, "loss": 3.1434, "step": 5059 }, { "epoch": 1.77, "grad_norm": 2.502635955810547, "learning_rate": 0.0001311337137186802, "loss": 3.0336, "step": 5060 }, { "epoch": 1.77, "grad_norm": 2.319552421569824, "learning_rate": 0.0001310965021086579, "loss": 2.9348, "step": 5061 }, { "epoch": 1.77, "grad_norm": 6.4483866691589355, "learning_rate": 0.00013105929049863556, "loss": 2.7653, "step": 5062 }, { "epoch": 1.77, "grad_norm": 2.6617424488067627, "learning_rate": 0.00013102207888861323, "loss": 2.9234, "step": 5063 }, { "epoch": 1.77, "grad_norm": 4.615228652954102, "learning_rate": 0.0001309848672785909, "loss": 2.7653, "step": 5064 }, { "epoch": 1.77, "grad_norm": 6.482029438018799, "learning_rate": 0.00013094765566856858, "loss": 2.7954, "step": 5065 }, { "epoch": 1.78, "grad_norm": 7.622623920440674, "learning_rate": 0.00013091044405854626, "loss": 3.0746, "step": 5066 }, { "epoch": 1.78, "grad_norm": 8.83546257019043, "learning_rate": 0.00013087323244852394, "loss": 2.9216, "step": 5067 }, { "epoch": 1.78, "grad_norm": 9.417671203613281, "learning_rate": 0.0001308360208385016, "loss": 2.9592, "step": 5068 }, { "epoch": 1.78, "grad_norm": 3.7668020725250244, "learning_rate": 0.00013079880922847926, "loss": 2.7894, "step": 5069 }, { "epoch": 1.78, "grad_norm": 8.39608097076416, "learning_rate": 0.00013076159761845696, "loss": 2.7767, "step": 5070 }, { "epoch": 1.78, "grad_norm": 3.686868190765381, "learning_rate": 0.0001307243860084346, "loss": 3.019, "step": 5071 }, { "epoch": 1.78, "grad_norm": 10.237363815307617, "learning_rate": 0.0001306871743984123, "loss": 3.0566, "step": 5072 }, { "epoch": 1.78, "grad_norm": 4.589514255523682, "learning_rate": 0.00013064996278838996, "loss": 3.2709, "step": 5073 }, { "epoch": 1.78, "grad_norm": 4.9313225746154785, "learning_rate": 0.00013061275117836764, "loss": 2.6611, "step": 5074 }, { "epoch": 1.78, "grad_norm": 4.68777322769165, "learning_rate": 0.00013057553956834531, "loss": 2.9001, "step": 5075 }, { "epoch": 1.78, "grad_norm": 3.2555599212646484, "learning_rate": 0.000130538327958323, "loss": 2.8576, "step": 5076 }, { "epoch": 1.78, "grad_norm": 3.0508415699005127, "learning_rate": 0.00013050111634830067, "loss": 2.7034, "step": 5077 }, { "epoch": 1.78, "grad_norm": 3.908602237701416, "learning_rate": 0.00013046390473827832, "loss": 2.8313, "step": 5078 }, { "epoch": 1.78, "grad_norm": 2.733027696609497, "learning_rate": 0.00013042669312825602, "loss": 2.3422, "step": 5079 }, { "epoch": 1.78, "grad_norm": 4.607684135437012, "learning_rate": 0.00013038948151823367, "loss": 3.3491, "step": 5080 }, { "epoch": 1.78, "grad_norm": 3.3591644763946533, "learning_rate": 0.00013035226990821134, "loss": 2.9754, "step": 5081 }, { "epoch": 1.78, "grad_norm": 3.9644618034362793, "learning_rate": 0.00013031505829818902, "loss": 3.053, "step": 5082 }, { "epoch": 1.78, "grad_norm": 8.168998718261719, "learning_rate": 0.0001302778466881667, "loss": 2.9517, "step": 5083 }, { "epoch": 1.78, "grad_norm": 3.216783046722412, "learning_rate": 0.00013024063507814437, "loss": 3.0665, "step": 5084 }, { "epoch": 1.78, "grad_norm": 2.6006112098693848, "learning_rate": 0.00013020342346812205, "loss": 2.8623, "step": 5085 }, { "epoch": 1.78, "grad_norm": 2.3029751777648926, "learning_rate": 0.00013016621185809972, "loss": 2.8781, "step": 5086 }, { "epoch": 1.78, "grad_norm": 3.3542487621307373, "learning_rate": 0.00013012900024807737, "loss": 2.7165, "step": 5087 }, { "epoch": 1.78, "grad_norm": 3.487226724624634, "learning_rate": 0.00013009178863805507, "loss": 2.8395, "step": 5088 }, { "epoch": 1.78, "grad_norm": 10.391279220581055, "learning_rate": 0.00013005457702803275, "loss": 3.1138, "step": 5089 }, { "epoch": 1.78, "grad_norm": 9.242776870727539, "learning_rate": 0.0001300173654180104, "loss": 3.0096, "step": 5090 }, { "epoch": 1.78, "grad_norm": 4.211190223693848, "learning_rate": 0.00012998015380798807, "loss": 2.8559, "step": 5091 }, { "epoch": 1.78, "grad_norm": 2.6914520263671875, "learning_rate": 0.00012994294219796575, "loss": 2.9511, "step": 5092 }, { "epoch": 1.78, "grad_norm": 4.44136381149292, "learning_rate": 0.00012990573058794342, "loss": 2.7139, "step": 5093 }, { "epoch": 1.78, "grad_norm": 3.7667293548583984, "learning_rate": 0.0001298685189779211, "loss": 2.7297, "step": 5094 }, { "epoch": 1.79, "grad_norm": 3.589275360107422, "learning_rate": 0.00012983130736789878, "loss": 2.8284, "step": 5095 }, { "epoch": 1.79, "grad_norm": 4.216124534606934, "learning_rate": 0.00012979409575787645, "loss": 2.9066, "step": 5096 }, { "epoch": 1.79, "grad_norm": 10.489272117614746, "learning_rate": 0.00012975688414785413, "loss": 2.9559, "step": 5097 }, { "epoch": 1.79, "grad_norm": 11.246708869934082, "learning_rate": 0.0001297196725378318, "loss": 2.9586, "step": 5098 }, { "epoch": 1.79, "grad_norm": 10.041492462158203, "learning_rate": 0.00012968246092780945, "loss": 2.736, "step": 5099 }, { "epoch": 1.79, "grad_norm": 5.19471549987793, "learning_rate": 0.00012964524931778713, "loss": 2.646, "step": 5100 }, { "epoch": 1.79, "eval_loss": 2.873821258544922, "eval_runtime": 51.453, "eval_samples_per_second": 42.136, "eval_steps_per_second": 10.534, "eval_wer": 1.0, "step": 5100 }, { "epoch": 1.79, "grad_norm": 9.86611557006836, "learning_rate": 0.0001296080377077648, "loss": 2.7976, "step": 5101 }, { "epoch": 1.79, "grad_norm": 10.880091667175293, "learning_rate": 0.00012957082609774248, "loss": 3.1771, "step": 5102 }, { "epoch": 1.79, "grad_norm": 6.943602085113525, "learning_rate": 0.00012953361448772016, "loss": 2.7736, "step": 5103 }, { "epoch": 1.79, "grad_norm": 7.127685070037842, "learning_rate": 0.00012949640287769783, "loss": 3.0676, "step": 5104 }, { "epoch": 1.79, "grad_norm": 18.559324264526367, "learning_rate": 0.0001294591912676755, "loss": 3.2199, "step": 5105 }, { "epoch": 1.79, "grad_norm": 2.158505916595459, "learning_rate": 0.00012942197965765318, "loss": 2.756, "step": 5106 }, { "epoch": 1.79, "grad_norm": 5.563910484313965, "learning_rate": 0.00012938476804763086, "loss": 2.8269, "step": 5107 }, { "epoch": 1.79, "grad_norm": 7.628482818603516, "learning_rate": 0.0001293475564376085, "loss": 2.9859, "step": 5108 }, { "epoch": 1.79, "grad_norm": 2.8963091373443604, "learning_rate": 0.00012931034482758618, "loss": 2.8255, "step": 5109 }, { "epoch": 1.79, "grad_norm": 3.270557165145874, "learning_rate": 0.00012927313321756389, "loss": 2.7845, "step": 5110 }, { "epoch": 1.79, "grad_norm": 6.280425548553467, "learning_rate": 0.00012923592160754154, "loss": 2.6916, "step": 5111 }, { "epoch": 1.79, "grad_norm": 6.498332977294922, "learning_rate": 0.0001291987099975192, "loss": 2.8811, "step": 5112 }, { "epoch": 1.79, "grad_norm": 5.853482723236084, "learning_rate": 0.0001291614983874969, "loss": 2.9358, "step": 5113 }, { "epoch": 1.79, "grad_norm": 6.113433361053467, "learning_rate": 0.00012912428677747456, "loss": 3.4643, "step": 5114 }, { "epoch": 1.79, "grad_norm": 4.190027236938477, "learning_rate": 0.00012908707516745224, "loss": 2.7947, "step": 5115 }, { "epoch": 1.79, "grad_norm": 7.508610248565674, "learning_rate": 0.00012904986355742991, "loss": 2.9305, "step": 5116 }, { "epoch": 1.79, "grad_norm": 7.606897830963135, "learning_rate": 0.00012901265194740756, "loss": 2.543, "step": 5117 }, { "epoch": 1.79, "grad_norm": 3.0015764236450195, "learning_rate": 0.00012897544033738527, "loss": 2.9905, "step": 5118 }, { "epoch": 1.79, "grad_norm": 3.0580461025238037, "learning_rate": 0.00012893822872736294, "loss": 3.1613, "step": 5119 }, { "epoch": 1.79, "grad_norm": 3.363950729370117, "learning_rate": 0.0001289010171173406, "loss": 2.8196, "step": 5120 }, { "epoch": 1.79, "grad_norm": 7.74603796005249, "learning_rate": 0.00012886380550731827, "loss": 2.9443, "step": 5121 }, { "epoch": 1.79, "grad_norm": 5.362524032592773, "learning_rate": 0.00012882659389729594, "loss": 2.7295, "step": 5122 }, { "epoch": 1.8, "grad_norm": 12.855659484863281, "learning_rate": 0.00012878938228727362, "loss": 3.0975, "step": 5123 }, { "epoch": 1.8, "grad_norm": 8.280109405517578, "learning_rate": 0.0001287521706772513, "loss": 3.0533, "step": 5124 }, { "epoch": 1.8, "grad_norm": 4.31693172454834, "learning_rate": 0.00012871495906722897, "loss": 2.5704, "step": 5125 }, { "epoch": 1.8, "grad_norm": 3.7444570064544678, "learning_rate": 0.00012867774745720664, "loss": 2.609, "step": 5126 }, { "epoch": 1.8, "grad_norm": 7.781627178192139, "learning_rate": 0.00012864053584718432, "loss": 2.6856, "step": 5127 }, { "epoch": 1.8, "grad_norm": 5.853876113891602, "learning_rate": 0.000128603324237162, "loss": 2.6211, "step": 5128 }, { "epoch": 1.8, "grad_norm": 4.829381942749023, "learning_rate": 0.00012856611262713965, "loss": 2.6558, "step": 5129 }, { "epoch": 1.8, "grad_norm": 7.683784484863281, "learning_rate": 0.00012852890101711732, "loss": 3.1132, "step": 5130 }, { "epoch": 1.8, "grad_norm": 7.247925758361816, "learning_rate": 0.000128491689407095, "loss": 2.9654, "step": 5131 }, { "epoch": 1.8, "grad_norm": 7.606195449829102, "learning_rate": 0.00012845447779707267, "loss": 3.026, "step": 5132 }, { "epoch": 1.8, "grad_norm": 7.540022373199463, "learning_rate": 0.00012841726618705035, "loss": 2.9083, "step": 5133 }, { "epoch": 1.8, "grad_norm": 8.643896102905273, "learning_rate": 0.00012838005457702802, "loss": 3.2663, "step": 5134 }, { "epoch": 1.8, "grad_norm": 5.016622543334961, "learning_rate": 0.0001283428429670057, "loss": 3.1068, "step": 5135 }, { "epoch": 1.8, "grad_norm": 3.617004871368408, "learning_rate": 0.00012830563135698338, "loss": 2.6929, "step": 5136 }, { "epoch": 1.8, "grad_norm": 2.4919373989105225, "learning_rate": 0.00012826841974696105, "loss": 2.7526, "step": 5137 }, { "epoch": 1.8, "grad_norm": 2.7996411323547363, "learning_rate": 0.0001282312081369387, "loss": 3.0863, "step": 5138 }, { "epoch": 1.8, "grad_norm": 14.32989501953125, "learning_rate": 0.00012819399652691638, "loss": 3.3443, "step": 5139 }, { "epoch": 1.8, "grad_norm": 13.708571434020996, "learning_rate": 0.00012815678491689408, "loss": 3.1483, "step": 5140 }, { "epoch": 1.8, "grad_norm": 10.349092483520508, "learning_rate": 0.00012811957330687173, "loss": 3.1138, "step": 5141 }, { "epoch": 1.8, "grad_norm": 2.638105630874634, "learning_rate": 0.0001280823616968494, "loss": 2.9172, "step": 5142 }, { "epoch": 1.8, "grad_norm": 3.8446462154388428, "learning_rate": 0.00012804515008682708, "loss": 3.0105, "step": 5143 }, { "epoch": 1.8, "grad_norm": 2.970163106918335, "learning_rate": 0.00012800793847680475, "loss": 2.6563, "step": 5144 }, { "epoch": 1.8, "grad_norm": 3.778756618499756, "learning_rate": 0.00012797072686678243, "loss": 2.5726, "step": 5145 }, { "epoch": 1.8, "grad_norm": 4.075989723205566, "learning_rate": 0.0001279335152567601, "loss": 2.6662, "step": 5146 }, { "epoch": 1.8, "grad_norm": 5.896014213562012, "learning_rate": 0.00012789630364673778, "loss": 2.9875, "step": 5147 }, { "epoch": 1.8, "grad_norm": 4.679042816162109, "learning_rate": 0.00012785909203671543, "loss": 2.9295, "step": 5148 }, { "epoch": 1.8, "grad_norm": 4.0041422843933105, "learning_rate": 0.00012782188042669313, "loss": 2.8009, "step": 5149 }, { "epoch": 1.8, "grad_norm": 5.066998481750488, "learning_rate": 0.00012778466881667078, "loss": 2.7084, "step": 5150 }, { "epoch": 1.8, "grad_norm": 4.032702445983887, "learning_rate": 0.00012774745720664846, "loss": 2.4773, "step": 5151 }, { "epoch": 1.81, "grad_norm": 5.38712215423584, "learning_rate": 0.00012771024559662613, "loss": 2.7513, "step": 5152 }, { "epoch": 1.81, "grad_norm": 9.350571632385254, "learning_rate": 0.0001276730339866038, "loss": 2.756, "step": 5153 }, { "epoch": 1.81, "grad_norm": 3.612746238708496, "learning_rate": 0.00012763582237658149, "loss": 3.0026, "step": 5154 }, { "epoch": 1.81, "grad_norm": 5.044943809509277, "learning_rate": 0.00012759861076655916, "loss": 3.0165, "step": 5155 }, { "epoch": 1.81, "grad_norm": 5.973738193511963, "learning_rate": 0.00012756139915653684, "loss": 3.0884, "step": 5156 }, { "epoch": 1.81, "grad_norm": 5.725467681884766, "learning_rate": 0.00012752418754651449, "loss": 2.9013, "step": 5157 }, { "epoch": 1.81, "grad_norm": 2.828871488571167, "learning_rate": 0.0001274869759364922, "loss": 2.7721, "step": 5158 }, { "epoch": 1.81, "grad_norm": 4.192399978637695, "learning_rate": 0.00012744976432646984, "loss": 2.9885, "step": 5159 }, { "epoch": 1.81, "grad_norm": 2.731215476989746, "learning_rate": 0.0001274125527164475, "loss": 2.9879, "step": 5160 }, { "epoch": 1.81, "grad_norm": 6.002933025360107, "learning_rate": 0.0001273753411064252, "loss": 2.9442, "step": 5161 }, { "epoch": 1.81, "grad_norm": 2.4598872661590576, "learning_rate": 0.00012733812949640286, "loss": 2.7012, "step": 5162 }, { "epoch": 1.81, "grad_norm": 6.382826328277588, "learning_rate": 0.00012730091788638054, "loss": 2.9316, "step": 5163 }, { "epoch": 1.81, "grad_norm": 3.4656591415405273, "learning_rate": 0.00012726370627635822, "loss": 2.6503, "step": 5164 }, { "epoch": 1.81, "grad_norm": 9.048042297363281, "learning_rate": 0.0001272264946663359, "loss": 2.9199, "step": 5165 }, { "epoch": 1.81, "grad_norm": 4.229365825653076, "learning_rate": 0.00012718928305631354, "loss": 2.8474, "step": 5166 }, { "epoch": 1.81, "grad_norm": 4.915844440460205, "learning_rate": 0.00012715207144629124, "loss": 2.8055, "step": 5167 }, { "epoch": 1.81, "grad_norm": 4.766310214996338, "learning_rate": 0.00012711485983626892, "loss": 2.7214, "step": 5168 }, { "epoch": 1.81, "grad_norm": 3.777912139892578, "learning_rate": 0.00012707764822624657, "loss": 2.8381, "step": 5169 }, { "epoch": 1.81, "grad_norm": 3.8435094356536865, "learning_rate": 0.00012704043661622424, "loss": 2.7613, "step": 5170 }, { "epoch": 1.81, "grad_norm": 3.1860551834106445, "learning_rate": 0.00012700322500620192, "loss": 2.8166, "step": 5171 }, { "epoch": 1.81, "grad_norm": 3.1261537075042725, "learning_rate": 0.0001269660133961796, "loss": 2.7175, "step": 5172 }, { "epoch": 1.81, "grad_norm": 13.234203338623047, "learning_rate": 0.00012692880178615727, "loss": 2.6832, "step": 5173 }, { "epoch": 1.81, "grad_norm": 4.1125264167785645, "learning_rate": 0.00012689159017613495, "loss": 3.0747, "step": 5174 }, { "epoch": 1.81, "grad_norm": 6.321409225463867, "learning_rate": 0.0001268543785661126, "loss": 2.6672, "step": 5175 }, { "epoch": 1.81, "grad_norm": 5.741054058074951, "learning_rate": 0.0001268171669560903, "loss": 2.5869, "step": 5176 }, { "epoch": 1.81, "grad_norm": 7.433377265930176, "learning_rate": 0.00012677995534606797, "loss": 2.5097, "step": 5177 }, { "epoch": 1.81, "grad_norm": 10.795042991638184, "learning_rate": 0.00012674274373604562, "loss": 2.767, "step": 5178 }, { "epoch": 1.81, "grad_norm": 3.698216199874878, "learning_rate": 0.0001267055321260233, "loss": 2.578, "step": 5179 }, { "epoch": 1.81, "grad_norm": 5.1897149085998535, "learning_rate": 0.00012666832051600098, "loss": 3.1661, "step": 5180 }, { "epoch": 1.82, "grad_norm": 13.520951271057129, "learning_rate": 0.00012663110890597865, "loss": 3.0335, "step": 5181 }, { "epoch": 1.82, "grad_norm": 3.7349276542663574, "learning_rate": 0.00012659389729595633, "loss": 2.9884, "step": 5182 }, { "epoch": 1.82, "grad_norm": 5.528043746948242, "learning_rate": 0.000126556685685934, "loss": 2.857, "step": 5183 }, { "epoch": 1.82, "grad_norm": 12.178468704223633, "learning_rate": 0.00012651947407591168, "loss": 2.8438, "step": 5184 }, { "epoch": 1.82, "grad_norm": 12.532594680786133, "learning_rate": 0.00012648226246588935, "loss": 2.8363, "step": 5185 }, { "epoch": 1.82, "grad_norm": 12.38633918762207, "learning_rate": 0.00012644505085586703, "loss": 3.0286, "step": 5186 }, { "epoch": 1.82, "grad_norm": 6.513081073760986, "learning_rate": 0.00012640783924584468, "loss": 2.9366, "step": 5187 }, { "epoch": 1.82, "grad_norm": 6.644528865814209, "learning_rate": 0.00012637062763582235, "loss": 2.8487, "step": 5188 }, { "epoch": 1.82, "grad_norm": 3.157069683074951, "learning_rate": 0.00012633341602580006, "loss": 2.9607, "step": 5189 }, { "epoch": 1.82, "grad_norm": 4.591786861419678, "learning_rate": 0.0001262962044157777, "loss": 2.6186, "step": 5190 }, { "epoch": 1.82, "grad_norm": 3.7421250343322754, "learning_rate": 0.00012625899280575538, "loss": 2.8948, "step": 5191 }, { "epoch": 1.82, "grad_norm": 6.926886558532715, "learning_rate": 0.00012622178119573306, "loss": 2.989, "step": 5192 }, { "epoch": 1.82, "grad_norm": 18.810298919677734, "learning_rate": 0.00012618456958571073, "loss": 3.0692, "step": 5193 }, { "epoch": 1.82, "grad_norm": 6.420024871826172, "learning_rate": 0.0001261473579756884, "loss": 2.7154, "step": 5194 }, { "epoch": 1.82, "grad_norm": 17.48590087890625, "learning_rate": 0.00012611014636566608, "loss": 2.7142, "step": 5195 }, { "epoch": 1.82, "grad_norm": 5.477769374847412, "learning_rate": 0.00012607293475564373, "loss": 2.8943, "step": 5196 }, { "epoch": 1.82, "grad_norm": 3.2958991527557373, "learning_rate": 0.00012603572314562144, "loss": 3.0727, "step": 5197 }, { "epoch": 1.82, "grad_norm": 2.588256359100342, "learning_rate": 0.0001259985115355991, "loss": 2.6309, "step": 5198 }, { "epoch": 1.82, "grad_norm": 4.680807113647461, "learning_rate": 0.00012596129992557676, "loss": 2.4823, "step": 5199 }, { "epoch": 1.82, "grad_norm": 8.458367347717285, "learning_rate": 0.00012592408831555444, "loss": 2.6377, "step": 5200 }, { "epoch": 1.82, "eval_loss": 2.794273853302002, "eval_runtime": 51.4561, "eval_samples_per_second": 42.133, "eval_steps_per_second": 10.533, "eval_wer": 1.0, "step": 5200 } ], "logging_steps": 1.0, "max_steps": 8562, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "total_flos": 2.1187209341970678e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }