diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7285 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 30.0, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0, + "loss": 16.7868, + "step": 1 + }, + { + "epoch": 0.05, + "learning_rate": 0.0, + "loss": 14.8595, + "step": 2 + }, + { + "epoch": 0.07, + "learning_rate": 1.4999999999999998e-06, + "loss": 13.3845, + "step": 3 + }, + { + "epoch": 0.1, + "learning_rate": 2.9999999999999997e-06, + "loss": 13.1354, + "step": 4 + }, + { + "epoch": 0.12, + "learning_rate": 4.499999999999999e-06, + "loss": 13.751, + "step": 5 + }, + { + "epoch": 0.15, + "learning_rate": 5.999999999999999e-06, + "loss": 12.7844, + "step": 6 + }, + { + "epoch": 0.17, + "learning_rate": 7.499999999999999e-06, + "loss": 16.4185, + "step": 7 + }, + { + "epoch": 0.2, + "learning_rate": 8.999999999999999e-06, + "loss": 14.9368, + "step": 8 + }, + { + "epoch": 0.23, + "learning_rate": 1.05e-05, + "loss": 13.8337, + "step": 9 + }, + { + "epoch": 0.25, + "learning_rate": 1.1999999999999999e-05, + "loss": 13.0948, + "step": 10 + }, + { + "epoch": 0.28, + "learning_rate": 1.3499999999999998e-05, + "loss": 13.1814, + "step": 11 + }, + { + "epoch": 0.3, + "learning_rate": 1.4999999999999999e-05, + "loss": 12.3933, + "step": 12 + }, + { + "epoch": 0.33, + "learning_rate": 1.6499999999999998e-05, + "loss": 14.1081, + "step": 13 + }, + { + "epoch": 0.35, + "learning_rate": 1.7999999999999997e-05, + "loss": 14.3403, + "step": 14 + }, + { + "epoch": 0.38, + "learning_rate": 1.95e-05, + "loss": 14.1041, + "step": 15 + }, + { + "epoch": 0.4, + "learning_rate": 2.1e-05, + "loss": 12.2322, + "step": 16 + }, + { + "epoch": 0.42, + "learning_rate": 2.2499999999999998e-05, + "loss": 12.5879, + "step": 17 + }, + { + "epoch": 0.45, + "learning_rate": 2.3999999999999997e-05, + "loss": 12.9614, + "step": 18 + }, + { + "epoch": 0.47, + "learning_rate": 2.55e-05, + "loss": 12.602, + "step": 19 + }, + { + "epoch": 0.5, + "learning_rate": 2.6999999999999996e-05, + "loss": 13.9366, + "step": 20 + }, + { + "epoch": 0.53, + "learning_rate": 2.8499999999999998e-05, + "loss": 13.3282, + "step": 21 + }, + { + "epoch": 0.55, + "learning_rate": 2.9999999999999997e-05, + "loss": 12.1615, + "step": 22 + }, + { + "epoch": 0.57, + "learning_rate": 3.149999999999999e-05, + "loss": 11.4266, + "step": 23 + }, + { + "epoch": 0.6, + "learning_rate": 3.2999999999999996e-05, + "loss": 10.6892, + "step": 24 + }, + { + "epoch": 0.62, + "learning_rate": 3.45e-05, + "loss": 10.1604, + "step": 25 + }, + { + "epoch": 0.65, + "learning_rate": 3.45e-05, + "loss": 12.1081, + "step": 26 + }, + { + "epoch": 0.68, + "learning_rate": 3.5999999999999994e-05, + "loss": 11.3549, + "step": 27 + }, + { + "epoch": 0.7, + "learning_rate": 3.75e-05, + "loss": 10.1111, + "step": 28 + }, + { + "epoch": 0.72, + "learning_rate": 3.9e-05, + "loss": 9.0024, + "step": 29 + }, + { + "epoch": 0.75, + "learning_rate": 4.05e-05, + "loss": 8.6496, + "step": 30 + }, + { + "epoch": 0.78, + "learning_rate": 4.2e-05, + "loss": 8.0396, + "step": 31 + }, + { + "epoch": 0.8, + "learning_rate": 4.3499999999999993e-05, + "loss": 9.5749, + "step": 32 + }, + { + "epoch": 0.82, + "learning_rate": 4.4999999999999996e-05, + "loss": 8.4065, + "step": 33 + }, + { + "epoch": 0.85, + "learning_rate": 4.65e-05, + "loss": 7.891, + "step": 34 + }, + { + "epoch": 0.88, + "learning_rate": 4.7999999999999994e-05, + "loss": 7.0403, + "step": 35 + }, + { + "epoch": 0.9, + "learning_rate": 4.95e-05, + "loss": 7.1381, + "step": 36 + }, + { + "epoch": 0.93, + "learning_rate": 5.1e-05, + "loss": 6.6468, + "step": 37 + }, + { + "epoch": 0.95, + "learning_rate": 5.2499999999999995e-05, + "loss": 6.8392, + "step": 38 + }, + { + "epoch": 0.97, + "learning_rate": 5.399999999999999e-05, + "loss": 6.2868, + "step": 39 + }, + { + "epoch": 1.0, + "learning_rate": 5.5499999999999994e-05, + "loss": 5.9633, + "step": 40 + }, + { + "epoch": 1.02, + "learning_rate": 5.6999999999999996e-05, + "loss": 6.5684, + "step": 41 + }, + { + "epoch": 1.05, + "learning_rate": 5.85e-05, + "loss": 6.2034, + "step": 42 + }, + { + "epoch": 1.07, + "learning_rate": 5.9999999999999995e-05, + "loss": 5.5007, + "step": 43 + }, + { + "epoch": 1.1, + "learning_rate": 6.149999999999999e-05, + "loss": 5.3496, + "step": 44 + }, + { + "epoch": 1.12, + "learning_rate": 6.299999999999999e-05, + "loss": 5.4191, + "step": 45 + }, + { + "epoch": 1.15, + "learning_rate": 6.45e-05, + "loss": 5.135, + "step": 46 + }, + { + "epoch": 1.18, + "learning_rate": 6.599999999999999e-05, + "loss": 5.743, + "step": 47 + }, + { + "epoch": 1.2, + "learning_rate": 6.75e-05, + "loss": 5.3021, + "step": 48 + }, + { + "epoch": 1.23, + "learning_rate": 6.9e-05, + "loss": 5.2977, + "step": 49 + }, + { + "epoch": 1.25, + "learning_rate": 7.049999999999999e-05, + "loss": 4.901, + "step": 50 + }, + { + "epoch": 1.27, + "learning_rate": 7.199999999999999e-05, + "loss": 4.7971, + "step": 51 + }, + { + "epoch": 1.3, + "learning_rate": 7.35e-05, + "loss": 4.8289, + "step": 52 + }, + { + "epoch": 1.32, + "learning_rate": 7.5e-05, + "loss": 5.1127, + "step": 53 + }, + { + "epoch": 1.35, + "learning_rate": 7.649999999999999e-05, + "loss": 4.9781, + "step": 54 + }, + { + "epoch": 1.38, + "learning_rate": 7.8e-05, + "loss": 4.7937, + "step": 55 + }, + { + "epoch": 1.4, + "learning_rate": 7.95e-05, + "loss": 4.6379, + "step": 56 + }, + { + "epoch": 1.43, + "learning_rate": 8.1e-05, + "loss": 4.4712, + "step": 57 + }, + { + "epoch": 1.45, + "learning_rate": 8.25e-05, + "loss": 4.4839, + "step": 58 + }, + { + "epoch": 1.48, + "learning_rate": 8.4e-05, + "loss": 4.5183, + "step": 59 + }, + { + "epoch": 1.5, + "learning_rate": 8.549999999999999e-05, + "loss": 4.4879, + "step": 60 + }, + { + "epoch": 1.52, + "learning_rate": 8.699999999999999e-05, + "loss": 4.4971, + "step": 61 + }, + { + "epoch": 1.55, + "learning_rate": 8.849999999999998e-05, + "loss": 4.3831, + "step": 62 + }, + { + "epoch": 1.57, + "learning_rate": 8.999999999999999e-05, + "loss": 4.2891, + "step": 63 + }, + { + "epoch": 1.6, + "learning_rate": 9.149999999999999e-05, + "loss": 4.1934, + "step": 64 + }, + { + "epoch": 1.62, + "learning_rate": 9.3e-05, + "loss": 4.1251, + "step": 65 + }, + { + "epoch": 1.65, + "learning_rate": 9.449999999999999e-05, + "loss": 4.635, + "step": 66 + }, + { + "epoch": 1.68, + "learning_rate": 9.599999999999999e-05, + "loss": 4.165, + "step": 67 + }, + { + "epoch": 1.7, + "learning_rate": 9.75e-05, + "loss": 4.154, + "step": 68 + }, + { + "epoch": 1.73, + "learning_rate": 9.9e-05, + "loss": 4.0427, + "step": 69 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001005, + "loss": 4.0911, + "step": 70 + }, + { + "epoch": 1.77, + "learning_rate": 0.000102, + "loss": 3.969, + "step": 71 + }, + { + "epoch": 1.8, + "learning_rate": 0.00010349999999999998, + "loss": 4.1288, + "step": 72 + }, + { + "epoch": 1.82, + "learning_rate": 0.00010499999999999999, + "loss": 4.091, + "step": 73 + }, + { + "epoch": 1.85, + "learning_rate": 0.00010649999999999999, + "loss": 3.9443, + "step": 74 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010799999999999998, + "loss": 3.7792, + "step": 75 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010949999999999999, + "loss": 3.8244, + "step": 76 + }, + { + "epoch": 1.93, + "learning_rate": 0.00011099999999999999, + "loss": 3.7284, + "step": 77 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001125, + "loss": 3.8774, + "step": 78 + }, + { + "epoch": 1.98, + "learning_rate": 0.00011399999999999999, + "loss": 3.7435, + "step": 79 + }, + { + "epoch": 2.0, + "learning_rate": 0.00011549999999999999, + "loss": 3.6561, + "step": 80 + }, + { + "epoch": 2.02, + "learning_rate": 0.000117, + "loss": 3.9569, + "step": 81 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001185, + "loss": 3.6893, + "step": 82 + }, + { + "epoch": 2.08, + "learning_rate": 0.00011999999999999999, + "loss": 3.6249, + "step": 83 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001215, + "loss": 3.5364, + "step": 84 + }, + { + "epoch": 2.12, + "learning_rate": 0.00012299999999999998, + "loss": 3.5281, + "step": 85 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001245, + "loss": 3.485, + "step": 86 + }, + { + "epoch": 2.17, + "learning_rate": 0.00012599999999999997, + "loss": 3.5419, + "step": 87 + }, + { + "epoch": 2.2, + "learning_rate": 0.00012749999999999998, + "loss": 3.4307, + "step": 88 + }, + { + "epoch": 2.23, + "learning_rate": 0.000129, + "loss": 3.4118, + "step": 89 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001305, + "loss": 3.3756, + "step": 90 + }, + { + "epoch": 2.27, + "learning_rate": 0.00013199999999999998, + "loss": 3.3473, + "step": 91 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001335, + "loss": 3.3208, + "step": 92 + }, + { + "epoch": 2.33, + "learning_rate": 0.000135, + "loss": 3.4177, + "step": 93 + }, + { + "epoch": 2.35, + "learning_rate": 0.00013649999999999998, + "loss": 3.3614, + "step": 94 + }, + { + "epoch": 2.38, + "learning_rate": 0.000138, + "loss": 3.3018, + "step": 95 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001395, + "loss": 3.2517, + "step": 96 + }, + { + "epoch": 2.42, + "learning_rate": 0.00014099999999999998, + "loss": 3.2269, + "step": 97 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001425, + "loss": 3.2305, + "step": 98 + }, + { + "epoch": 2.48, + "learning_rate": 0.00014399999999999998, + "loss": 3.2049, + "step": 99 + }, + { + "epoch": 2.5, + "learning_rate": 0.00014549999999999999, + "loss": 3.2521, + "step": 100 + }, + { + "epoch": 2.52, + "learning_rate": 0.000147, + "loss": 3.1998, + "step": 101 + }, + { + "epoch": 2.55, + "learning_rate": 0.00014849999999999998, + "loss": 3.1652, + "step": 102 + }, + { + "epoch": 2.58, + "learning_rate": 0.00015, + "loss": 3.1294, + "step": 103 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001515, + "loss": 3.1336, + "step": 104 + }, + { + "epoch": 2.62, + "learning_rate": 0.00015299999999999998, + "loss": 3.1014, + "step": 105 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001545, + "loss": 3.1631, + "step": 106 + }, + { + "epoch": 2.67, + "learning_rate": 0.000156, + "loss": 3.1484, + "step": 107 + }, + { + "epoch": 2.7, + "learning_rate": 0.00015749999999999998, + "loss": 3.0577, + "step": 108 + }, + { + "epoch": 2.73, + "learning_rate": 0.000159, + "loss": 3.0468, + "step": 109 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001605, + "loss": 3.0804, + "step": 110 + }, + { + "epoch": 2.77, + "learning_rate": 0.000162, + "loss": 3.0399, + "step": 111 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001635, + "loss": 3.0878, + "step": 112 + }, + { + "epoch": 2.83, + "learning_rate": 0.000165, + "loss": 3.0164, + "step": 113 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001665, + "loss": 3.0297, + "step": 114 + }, + { + "epoch": 2.88, + "learning_rate": 0.000168, + "loss": 3.0057, + "step": 115 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016949999999999997, + "loss": 2.9835, + "step": 116 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017099999999999998, + "loss": 2.9727, + "step": 117 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017249999999999996, + "loss": 3.007, + "step": 118 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017399999999999997, + "loss": 2.9819, + "step": 119 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017549999999999998, + "loss": 2.9878, + "step": 120 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017699999999999997, + "loss": 3.0223, + "step": 121 + }, + { + "epoch": 3.05, + "learning_rate": 0.00017849999999999997, + "loss": 3.0296, + "step": 122 + }, + { + "epoch": 3.08, + "learning_rate": 0.00017999999999999998, + "loss": 2.9798, + "step": 123 + }, + { + "epoch": 3.1, + "learning_rate": 0.00018149999999999997, + "loss": 2.9757, + "step": 124 + }, + { + "epoch": 3.12, + "learning_rate": 0.00018299999999999998, + "loss": 2.9642, + "step": 125 + }, + { + "epoch": 3.15, + "learning_rate": 0.00018449999999999999, + "loss": 2.9522, + "step": 126 + }, + { + "epoch": 3.17, + "learning_rate": 0.000186, + "loss": 3.0148, + "step": 127 + }, + { + "epoch": 3.2, + "learning_rate": 0.00018749999999999998, + "loss": 2.9753, + "step": 128 + }, + { + "epoch": 3.23, + "learning_rate": 0.00018899999999999999, + "loss": 2.9676, + "step": 129 + }, + { + "epoch": 3.25, + "learning_rate": 0.0001905, + "loss": 2.9477, + "step": 130 + }, + { + "epoch": 3.27, + "learning_rate": 0.00019199999999999998, + "loss": 2.9437, + "step": 131 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001935, + "loss": 2.9342, + "step": 132 + }, + { + "epoch": 3.33, + "learning_rate": 0.000195, + "loss": 2.9855, + "step": 133 + }, + { + "epoch": 3.35, + "learning_rate": 0.00019649999999999998, + "loss": 2.9948, + "step": 134 + }, + { + "epoch": 3.38, + "learning_rate": 0.000198, + "loss": 2.9306, + "step": 135 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001995, + "loss": 2.9184, + "step": 136 + }, + { + "epoch": 3.42, + "learning_rate": 0.000201, + "loss": 2.9272, + "step": 137 + }, + { + "epoch": 3.45, + "learning_rate": 0.0002025, + "loss": 2.9779, + "step": 138 + }, + { + "epoch": 3.48, + "learning_rate": 0.000204, + "loss": 2.9396, + "step": 139 + }, + { + "epoch": 3.5, + "learning_rate": 0.0002055, + "loss": 2.9318, + "step": 140 + }, + { + "epoch": 3.52, + "learning_rate": 0.00020699999999999996, + "loss": 2.9382, + "step": 141 + }, + { + "epoch": 3.55, + "learning_rate": 0.00020849999999999997, + "loss": 2.9197, + "step": 142 + }, + { + "epoch": 3.58, + "learning_rate": 0.00020999999999999998, + "loss": 2.9319, + "step": 143 + }, + { + "epoch": 3.6, + "learning_rate": 0.00021149999999999996, + "loss": 2.9453, + "step": 144 + }, + { + "epoch": 3.62, + "learning_rate": 0.00021299999999999997, + "loss": 2.9234, + "step": 145 + }, + { + "epoch": 3.65, + "learning_rate": 0.00021449999999999998, + "loss": 2.93, + "step": 146 + }, + { + "epoch": 3.67, + "learning_rate": 0.00021599999999999996, + "loss": 2.9188, + "step": 147 + }, + { + "epoch": 3.7, + "learning_rate": 0.00021749999999999997, + "loss": 2.9095, + "step": 148 + }, + { + "epoch": 3.73, + "learning_rate": 0.00021899999999999998, + "loss": 2.9112, + "step": 149 + }, + { + "epoch": 3.75, + "learning_rate": 0.00022049999999999997, + "loss": 2.9158, + "step": 150 + }, + { + "epoch": 3.77, + "learning_rate": 0.00022199999999999998, + "loss": 2.9139, + "step": 151 + }, + { + "epoch": 3.8, + "learning_rate": 0.00022349999999999998, + "loss": 2.93, + "step": 152 + }, + { + "epoch": 3.83, + "learning_rate": 0.000225, + "loss": 2.9289, + "step": 153 + }, + { + "epoch": 3.85, + "learning_rate": 0.00022649999999999998, + "loss": 2.9068, + "step": 154 + }, + { + "epoch": 3.88, + "learning_rate": 0.00022799999999999999, + "loss": 2.9078, + "step": 155 + }, + { + "epoch": 3.9, + "learning_rate": 0.0002295, + "loss": 2.8934, + "step": 156 + }, + { + "epoch": 3.92, + "learning_rate": 0.00023099999999999998, + "loss": 2.9074, + "step": 157 + }, + { + "epoch": 3.95, + "learning_rate": 0.00023249999999999999, + "loss": 2.9112, + "step": 158 + }, + { + "epoch": 3.98, + "learning_rate": 0.000234, + "loss": 2.9088, + "step": 159 + }, + { + "epoch": 4.0, + "learning_rate": 0.00023549999999999998, + "loss": 2.9322, + "step": 160 + }, + { + "epoch": 4.03, + "learning_rate": 0.000237, + "loss": 2.9264, + "step": 161 + }, + { + "epoch": 4.05, + "learning_rate": 0.0002385, + "loss": 2.9052, + "step": 162 + }, + { + "epoch": 4.08, + "learning_rate": 0.00023999999999999998, + "loss": 2.8861, + "step": 163 + }, + { + "epoch": 4.1, + "learning_rate": 0.0002415, + "loss": 2.8886, + "step": 164 + }, + { + "epoch": 4.12, + "learning_rate": 0.000243, + "loss": 2.9083, + "step": 165 + }, + { + "epoch": 4.15, + "learning_rate": 0.0002445, + "loss": 2.9099, + "step": 166 + }, + { + "epoch": 4.17, + "learning_rate": 0.00024599999999999996, + "loss": 2.907, + "step": 167 + }, + { + "epoch": 4.2, + "learning_rate": 0.00024749999999999994, + "loss": 2.9047, + "step": 168 + }, + { + "epoch": 4.22, + "learning_rate": 0.000249, + "loss": 2.9063, + "step": 169 + }, + { + "epoch": 4.25, + "learning_rate": 0.00025049999999999996, + "loss": 2.8841, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00025199999999999995, + "loss": 2.8924, + "step": 171 + }, + { + "epoch": 4.3, + "learning_rate": 0.0002535, + "loss": 2.8818, + "step": 172 + }, + { + "epoch": 4.33, + "learning_rate": 0.00025499999999999996, + "loss": 2.898, + "step": 173 + }, + { + "epoch": 4.35, + "learning_rate": 0.00025649999999999995, + "loss": 2.9063, + "step": 174 + }, + { + "epoch": 4.38, + "learning_rate": 0.000258, + "loss": 2.9082, + "step": 175 + }, + { + "epoch": 4.4, + "learning_rate": 0.00025949999999999997, + "loss": 2.8992, + "step": 176 + }, + { + "epoch": 4.42, + "learning_rate": 0.000261, + "loss": 2.9227, + "step": 177 + }, + { + "epoch": 4.45, + "learning_rate": 0.0002625, + "loss": 2.9063, + "step": 178 + }, + { + "epoch": 4.47, + "learning_rate": 0.00026399999999999997, + "loss": 2.8768, + "step": 179 + }, + { + "epoch": 4.5, + "learning_rate": 0.0002655, + "loss": 2.8997, + "step": 180 + }, + { + "epoch": 4.53, + "learning_rate": 0.000267, + "loss": 2.9128, + "step": 181 + }, + { + "epoch": 4.55, + "learning_rate": 0.00026849999999999997, + "loss": 2.871, + "step": 182 + }, + { + "epoch": 4.58, + "learning_rate": 0.00027, + "loss": 2.8746, + "step": 183 + }, + { + "epoch": 4.6, + "learning_rate": 0.0002715, + "loss": 2.8864, + "step": 184 + }, + { + "epoch": 4.62, + "learning_rate": 0.00027299999999999997, + "loss": 2.8741, + "step": 185 + }, + { + "epoch": 4.65, + "learning_rate": 0.0002745, + "loss": 2.9214, + "step": 186 + }, + { + "epoch": 4.67, + "learning_rate": 0.000276, + "loss": 2.9235, + "step": 187 + }, + { + "epoch": 4.7, + "learning_rate": 0.00027749999999999997, + "loss": 2.8978, + "step": 188 + }, + { + "epoch": 4.72, + "learning_rate": 0.000279, + "loss": 2.889, + "step": 189 + }, + { + "epoch": 4.75, + "learning_rate": 0.0002805, + "loss": 2.8871, + "step": 190 + }, + { + "epoch": 4.78, + "learning_rate": 0.00028199999999999997, + "loss": 2.8746, + "step": 191 + }, + { + "epoch": 4.8, + "learning_rate": 0.00028349999999999995, + "loss": 2.9017, + "step": 192 + }, + { + "epoch": 4.83, + "learning_rate": 0.000285, + "loss": 2.8951, + "step": 193 + }, + { + "epoch": 4.85, + "learning_rate": 0.00028649999999999997, + "loss": 2.8905, + "step": 194 + }, + { + "epoch": 4.88, + "learning_rate": 0.00028799999999999995, + "loss": 2.8908, + "step": 195 + }, + { + "epoch": 4.9, + "learning_rate": 0.0002895, + "loss": 2.8951, + "step": 196 + }, + { + "epoch": 4.92, + "learning_rate": 0.00029099999999999997, + "loss": 2.8982, + "step": 197 + }, + { + "epoch": 4.95, + "learning_rate": 0.00029249999999999995, + "loss": 2.8693, + "step": 198 + }, + { + "epoch": 4.97, + "learning_rate": 0.000294, + "loss": 2.8947, + "step": 199 + }, + { + "epoch": 5.0, + "learning_rate": 0.00029549999999999997, + "loss": 2.8769, + "step": 200 + }, + { + "epoch": 5.0, + "eval_cer": 0.9877725059047301, + "eval_loss": 2.887089252471924, + "eval_runtime": 7.9078, + "eval_samples_per_second": 49.824, + "eval_steps_per_second": 6.323, + "eval_wer": 1.0, + "step": 200 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029699999999999996, + "loss": 2.9024, + "step": 201 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002985, + "loss": 2.8893, + "step": 202 + }, + { + "epoch": 5.08, + "learning_rate": 0.0003, + "loss": 2.8794, + "step": 203 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029969999999999997, + "loss": 2.8636, + "step": 204 + }, + { + "epoch": 5.12, + "learning_rate": 0.00029939999999999996, + "loss": 2.8694, + "step": 205 + }, + { + "epoch": 5.15, + "learning_rate": 0.00029909999999999995, + "loss": 2.856, + "step": 206 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002988, + "loss": 2.8687, + "step": 207 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002985, + "loss": 2.888, + "step": 208 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002982, + "loss": 2.8859, + "step": 209 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002979, + "loss": 2.8724, + "step": 210 + }, + { + "epoch": 5.28, + "learning_rate": 0.00029759999999999997, + "loss": 2.8656, + "step": 211 + }, + { + "epoch": 5.3, + "learning_rate": 0.00029729999999999996, + "loss": 2.8576, + "step": 212 + }, + { + "epoch": 5.33, + "learning_rate": 0.00029699999999999996, + "loss": 2.8593, + "step": 213 + }, + { + "epoch": 5.35, + "learning_rate": 0.00029669999999999995, + "loss": 2.8652, + "step": 214 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002964, + "loss": 2.8831, + "step": 215 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002961, + "loss": 2.8704, + "step": 216 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002958, + "loss": 2.8628, + "step": 217 + }, + { + "epoch": 5.45, + "learning_rate": 0.00029549999999999997, + "loss": 2.878, + "step": 218 + }, + { + "epoch": 5.47, + "learning_rate": 0.00029519999999999997, + "loss": 2.857, + "step": 219 + }, + { + "epoch": 5.5, + "learning_rate": 0.00029489999999999996, + "loss": 2.8974, + "step": 220 + }, + { + "epoch": 5.53, + "learning_rate": 0.00029459999999999995, + "loss": 2.8828, + "step": 221 + }, + { + "epoch": 5.55, + "learning_rate": 0.00029429999999999994, + "loss": 2.8615, + "step": 222 + }, + { + "epoch": 5.58, + "learning_rate": 0.000294, + "loss": 2.8606, + "step": 223 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002937, + "loss": 2.8739, + "step": 224 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002934, + "loss": 2.8498, + "step": 225 + }, + { + "epoch": 5.65, + "learning_rate": 0.00029309999999999997, + "loss": 2.887, + "step": 226 + }, + { + "epoch": 5.67, + "learning_rate": 0.00029279999999999996, + "loss": 2.8769, + "step": 227 + }, + { + "epoch": 5.7, + "learning_rate": 0.00029249999999999995, + "loss": 2.8585, + "step": 228 + }, + { + "epoch": 5.72, + "learning_rate": 0.00029219999999999995, + "loss": 2.8652, + "step": 229 + }, + { + "epoch": 5.75, + "learning_rate": 0.0002919, + "loss": 2.8788, + "step": 230 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002916, + "loss": 2.8695, + "step": 231 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002913, + "loss": 2.8745, + "step": 232 + }, + { + "epoch": 5.83, + "learning_rate": 0.00029099999999999997, + "loss": 2.8641, + "step": 233 + }, + { + "epoch": 5.85, + "learning_rate": 0.00029069999999999996, + "loss": 2.8573, + "step": 234 + }, + { + "epoch": 5.88, + "learning_rate": 0.00029039999999999996, + "loss": 2.8544, + "step": 235 + }, + { + "epoch": 5.9, + "learning_rate": 0.00029009999999999995, + "loss": 2.8627, + "step": 236 + }, + { + "epoch": 5.92, + "learning_rate": 0.00028979999999999994, + "loss": 2.8689, + "step": 237 + }, + { + "epoch": 5.95, + "learning_rate": 0.0002895, + "loss": 2.8487, + "step": 238 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002892, + "loss": 2.8712, + "step": 239 + }, + { + "epoch": 6.0, + "learning_rate": 0.0002889, + "loss": 2.8417, + "step": 240 + }, + { + "epoch": 6.03, + "learning_rate": 0.00028859999999999997, + "loss": 2.8779, + "step": 241 + }, + { + "epoch": 6.05, + "learning_rate": 0.00028829999999999996, + "loss": 2.856, + "step": 242 + }, + { + "epoch": 6.08, + "learning_rate": 0.00028799999999999995, + "loss": 2.8544, + "step": 243 + }, + { + "epoch": 6.1, + "learning_rate": 0.00028769999999999995, + "loss": 2.8747, + "step": 244 + }, + { + "epoch": 6.12, + "learning_rate": 0.00028739999999999994, + "loss": 2.8706, + "step": 245 + }, + { + "epoch": 6.15, + "learning_rate": 0.0002871, + "loss": 2.8637, + "step": 246 + }, + { + "epoch": 6.17, + "learning_rate": 0.0002868, + "loss": 2.8778, + "step": 247 + }, + { + "epoch": 6.2, + "learning_rate": 0.00028649999999999997, + "loss": 2.864, + "step": 248 + }, + { + "epoch": 6.22, + "learning_rate": 0.00028619999999999996, + "loss": 2.8515, + "step": 249 + }, + { + "epoch": 6.25, + "learning_rate": 0.00028589999999999996, + "loss": 2.8488, + "step": 250 + }, + { + "epoch": 6.28, + "learning_rate": 0.00028559999999999995, + "loss": 2.8588, + "step": 251 + }, + { + "epoch": 6.3, + "learning_rate": 0.00028529999999999994, + "loss": 2.8256, + "step": 252 + }, + { + "epoch": 6.33, + "learning_rate": 0.000285, + "loss": 2.8418, + "step": 253 + }, + { + "epoch": 6.35, + "learning_rate": 0.0002847, + "loss": 2.8455, + "step": 254 + }, + { + "epoch": 6.38, + "learning_rate": 0.0002844, + "loss": 2.8611, + "step": 255 + }, + { + "epoch": 6.4, + "learning_rate": 0.00028409999999999997, + "loss": 2.8389, + "step": 256 + }, + { + "epoch": 6.42, + "learning_rate": 0.00028379999999999996, + "loss": 2.8246, + "step": 257 + }, + { + "epoch": 6.45, + "learning_rate": 0.00028349999999999995, + "loss": 2.8091, + "step": 258 + }, + { + "epoch": 6.47, + "learning_rate": 0.00028319999999999994, + "loss": 2.8201, + "step": 259 + }, + { + "epoch": 6.5, + "learning_rate": 0.00028289999999999994, + "loss": 2.8465, + "step": 260 + }, + { + "epoch": 6.53, + "learning_rate": 0.0002826, + "loss": 2.8379, + "step": 261 + }, + { + "epoch": 6.55, + "learning_rate": 0.0002823, + "loss": 2.9108, + "step": 262 + }, + { + "epoch": 6.58, + "learning_rate": 0.00028199999999999997, + "loss": 2.8535, + "step": 263 + }, + { + "epoch": 6.6, + "learning_rate": 0.00028169999999999996, + "loss": 2.8134, + "step": 264 + }, + { + "epoch": 6.62, + "learning_rate": 0.00028139999999999996, + "loss": 2.7941, + "step": 265 + }, + { + "epoch": 6.65, + "learning_rate": 0.0002811, + "loss": 2.8202, + "step": 266 + }, + { + "epoch": 6.67, + "learning_rate": 0.0002808, + "loss": 2.7988, + "step": 267 + }, + { + "epoch": 6.7, + "learning_rate": 0.0002805, + "loss": 2.7547, + "step": 268 + }, + { + "epoch": 6.72, + "learning_rate": 0.0002802, + "loss": 2.7081, + "step": 269 + }, + { + "epoch": 6.75, + "learning_rate": 0.0002799, + "loss": 2.7188, + "step": 270 + }, + { + "epoch": 6.78, + "learning_rate": 0.00027959999999999997, + "loss": 2.6883, + "step": 271 + }, + { + "epoch": 6.8, + "learning_rate": 0.0002793, + "loss": 2.7156, + "step": 272 + }, + { + "epoch": 6.83, + "learning_rate": 0.000279, + "loss": 2.6894, + "step": 273 + }, + { + "epoch": 6.85, + "learning_rate": 0.0002787, + "loss": 2.6191, + "step": 274 + }, + { + "epoch": 6.88, + "learning_rate": 0.0002784, + "loss": 2.6832, + "step": 275 + }, + { + "epoch": 6.9, + "learning_rate": 0.0002781, + "loss": 2.5653, + "step": 276 + }, + { + "epoch": 6.92, + "learning_rate": 0.0002778, + "loss": 2.5862, + "step": 277 + }, + { + "epoch": 6.95, + "learning_rate": 0.00027749999999999997, + "loss": 2.4953, + "step": 278 + }, + { + "epoch": 6.97, + "learning_rate": 0.0002772, + "loss": 2.4946, + "step": 279 + }, + { + "epoch": 7.0, + "learning_rate": 0.0002769, + "loss": 2.4524, + "step": 280 + }, + { + "epoch": 7.03, + "learning_rate": 0.0002766, + "loss": 2.4326, + "step": 281 + }, + { + "epoch": 7.05, + "learning_rate": 0.0002763, + "loss": 2.3657, + "step": 282 + }, + { + "epoch": 7.08, + "learning_rate": 0.000276, + "loss": 2.2954, + "step": 283 + }, + { + "epoch": 7.1, + "learning_rate": 0.0002757, + "loss": 2.2269, + "step": 284 + }, + { + "epoch": 7.12, + "learning_rate": 0.00027539999999999997, + "loss": 2.2098, + "step": 285 + }, + { + "epoch": 7.15, + "learning_rate": 0.00027509999999999996, + "loss": 2.1506, + "step": 286 + }, + { + "epoch": 7.17, + "learning_rate": 0.0002748, + "loss": 2.2161, + "step": 287 + }, + { + "epoch": 7.2, + "learning_rate": 0.0002745, + "loss": 2.1576, + "step": 288 + }, + { + "epoch": 7.22, + "learning_rate": 0.0002742, + "loss": 2.0598, + "step": 289 + }, + { + "epoch": 7.25, + "learning_rate": 0.0002739, + "loss": 2.0789, + "step": 290 + }, + { + "epoch": 7.28, + "learning_rate": 0.0002736, + "loss": 1.9738, + "step": 291 + }, + { + "epoch": 7.3, + "learning_rate": 0.0002733, + "loss": 1.9215, + "step": 292 + }, + { + "epoch": 7.33, + "learning_rate": 0.00027299999999999997, + "loss": 1.8849, + "step": 293 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027269999999999996, + "loss": 1.8478, + "step": 294 + }, + { + "epoch": 7.38, + "learning_rate": 0.0002724, + "loss": 1.795, + "step": 295 + }, + { + "epoch": 7.4, + "learning_rate": 0.0002721, + "loss": 1.7426, + "step": 296 + }, + { + "epoch": 7.42, + "learning_rate": 0.0002718, + "loss": 1.6475, + "step": 297 + }, + { + "epoch": 7.45, + "learning_rate": 0.0002715, + "loss": 1.5878, + "step": 298 + }, + { + "epoch": 7.47, + "learning_rate": 0.0002712, + "loss": 1.6167, + "step": 299 + }, + { + "epoch": 7.5, + "learning_rate": 0.00027089999999999997, + "loss": 1.6752, + "step": 300 + }, + { + "epoch": 7.53, + "learning_rate": 0.00027059999999999996, + "loss": 1.4976, + "step": 301 + }, + { + "epoch": 7.55, + "learning_rate": 0.00027029999999999996, + "loss": 1.4719, + "step": 302 + }, + { + "epoch": 7.58, + "learning_rate": 0.00027, + "loss": 1.4345, + "step": 303 + }, + { + "epoch": 7.6, + "learning_rate": 0.0002697, + "loss": 1.4122, + "step": 304 + }, + { + "epoch": 7.62, + "learning_rate": 0.0002694, + "loss": 1.2736, + "step": 305 + }, + { + "epoch": 7.65, + "learning_rate": 0.0002691, + "loss": 1.408, + "step": 306 + }, + { + "epoch": 7.67, + "learning_rate": 0.0002688, + "loss": 1.3904, + "step": 307 + }, + { + "epoch": 7.7, + "learning_rate": 0.00026849999999999997, + "loss": 1.3304, + "step": 308 + }, + { + "epoch": 7.72, + "learning_rate": 0.00026819999999999996, + "loss": 1.1912, + "step": 309 + }, + { + "epoch": 7.75, + "learning_rate": 0.0002679, + "loss": 1.1753, + "step": 310 + }, + { + "epoch": 7.78, + "learning_rate": 0.0002676, + "loss": 1.1732, + "step": 311 + }, + { + "epoch": 7.8, + "learning_rate": 0.0002673, + "loss": 1.2074, + "step": 312 + }, + { + "epoch": 7.83, + "learning_rate": 0.000267, + "loss": 1.1108, + "step": 313 + }, + { + "epoch": 7.85, + "learning_rate": 0.0002667, + "loss": 1.1075, + "step": 314 + }, + { + "epoch": 7.88, + "learning_rate": 0.00026639999999999997, + "loss": 1.0365, + "step": 315 + }, + { + "epoch": 7.9, + "learning_rate": 0.00026609999999999996, + "loss": 0.972, + "step": 316 + }, + { + "epoch": 7.92, + "learning_rate": 0.00026579999999999996, + "loss": 0.9697, + "step": 317 + }, + { + "epoch": 7.95, + "learning_rate": 0.0002655, + "loss": 0.9896, + "step": 318 + }, + { + "epoch": 7.97, + "learning_rate": 0.0002652, + "loss": 0.9626, + "step": 319 + }, + { + "epoch": 8.0, + "learning_rate": 0.0002649, + "loss": 0.985, + "step": 320 + }, + { + "epoch": 8.03, + "learning_rate": 0.0002646, + "loss": 0.9751, + "step": 321 + }, + { + "epoch": 8.05, + "learning_rate": 0.0002643, + "loss": 0.8559, + "step": 322 + }, + { + "epoch": 8.07, + "learning_rate": 0.00026399999999999997, + "loss": 0.8064, + "step": 323 + }, + { + "epoch": 8.1, + "learning_rate": 0.00026369999999999996, + "loss": 0.7447, + "step": 324 + }, + { + "epoch": 8.12, + "learning_rate": 0.00026339999999999995, + "loss": 0.7405, + "step": 325 + }, + { + "epoch": 8.15, + "learning_rate": 0.0002631, + "loss": 0.6602, + "step": 326 + }, + { + "epoch": 8.18, + "learning_rate": 0.0002628, + "loss": 0.7567, + "step": 327 + }, + { + "epoch": 8.2, + "learning_rate": 0.0002625, + "loss": 0.6912, + "step": 328 + }, + { + "epoch": 8.22, + "learning_rate": 0.0002622, + "loss": 0.7406, + "step": 329 + }, + { + "epoch": 8.25, + "learning_rate": 0.00026189999999999997, + "loss": 0.7396, + "step": 330 + }, + { + "epoch": 8.28, + "learning_rate": 0.00026159999999999996, + "loss": 0.6791, + "step": 331 + }, + { + "epoch": 8.3, + "learning_rate": 0.00026129999999999995, + "loss": 0.5948, + "step": 332 + }, + { + "epoch": 8.32, + "learning_rate": 0.000261, + "loss": 0.6729, + "step": 333 + }, + { + "epoch": 8.35, + "learning_rate": 0.0002607, + "loss": 0.6698, + "step": 334 + }, + { + "epoch": 8.38, + "learning_rate": 0.0002604, + "loss": 0.6305, + "step": 335 + }, + { + "epoch": 8.4, + "learning_rate": 0.0002601, + "loss": 0.6358, + "step": 336 + }, + { + "epoch": 8.43, + "learning_rate": 0.00025979999999999997, + "loss": 0.5804, + "step": 337 + }, + { + "epoch": 8.45, + "learning_rate": 0.00025949999999999997, + "loss": 0.5345, + "step": 338 + }, + { + "epoch": 8.47, + "learning_rate": 0.00025919999999999996, + "loss": 0.6047, + "step": 339 + }, + { + "epoch": 8.5, + "learning_rate": 0.00025889999999999995, + "loss": 0.656, + "step": 340 + }, + { + "epoch": 8.53, + "learning_rate": 0.0002586, + "loss": 0.557, + "step": 341 + }, + { + "epoch": 8.55, + "learning_rate": 0.0002583, + "loss": 0.5856, + "step": 342 + }, + { + "epoch": 8.57, + "learning_rate": 0.000258, + "loss": 0.5583, + "step": 343 + }, + { + "epoch": 8.6, + "learning_rate": 0.0002577, + "loss": 0.5335, + "step": 344 + }, + { + "epoch": 8.62, + "learning_rate": 0.00025739999999999997, + "loss": 0.4959, + "step": 345 + }, + { + "epoch": 8.65, + "learning_rate": 0.00025709999999999996, + "loss": 0.6238, + "step": 346 + }, + { + "epoch": 8.68, + "learning_rate": 0.00025679999999999995, + "loss": 0.6008, + "step": 347 + }, + { + "epoch": 8.7, + "learning_rate": 0.00025649999999999995, + "loss": 0.515, + "step": 348 + }, + { + "epoch": 8.72, + "learning_rate": 0.0002562, + "loss": 0.5541, + "step": 349 + }, + { + "epoch": 8.75, + "learning_rate": 0.0002559, + "loss": 0.5705, + "step": 350 + }, + { + "epoch": 8.78, + "learning_rate": 0.0002556, + "loss": 0.4859, + "step": 351 + }, + { + "epoch": 8.8, + "learning_rate": 0.00025529999999999997, + "loss": 0.5304, + "step": 352 + }, + { + "epoch": 8.82, + "learning_rate": 0.00025499999999999996, + "loss": 0.4834, + "step": 353 + }, + { + "epoch": 8.85, + "learning_rate": 0.00025469999999999996, + "loss": 0.5305, + "step": 354 + }, + { + "epoch": 8.88, + "learning_rate": 0.00025439999999999995, + "loss": 0.4857, + "step": 355 + }, + { + "epoch": 8.9, + "learning_rate": 0.0002541, + "loss": 0.4303, + "step": 356 + }, + { + "epoch": 8.93, + "learning_rate": 0.0002538, + "loss": 0.4325, + "step": 357 + }, + { + "epoch": 8.95, + "learning_rate": 0.0002535, + "loss": 0.545, + "step": 358 + }, + { + "epoch": 8.97, + "learning_rate": 0.0002532, + "loss": 0.4842, + "step": 359 + }, + { + "epoch": 9.0, + "learning_rate": 0.00025289999999999997, + "loss": 0.4255, + "step": 360 + }, + { + "epoch": 9.03, + "learning_rate": 0.00025259999999999996, + "loss": 0.4677, + "step": 361 + }, + { + "epoch": 9.05, + "learning_rate": 0.00025229999999999995, + "loss": 0.409, + "step": 362 + }, + { + "epoch": 9.07, + "learning_rate": 0.00025199999999999995, + "loss": 0.4293, + "step": 363 + }, + { + "epoch": 9.1, + "learning_rate": 0.0002517, + "loss": 0.3146, + "step": 364 + }, + { + "epoch": 9.12, + "learning_rate": 0.0002514, + "loss": 0.3329, + "step": 365 + }, + { + "epoch": 9.15, + "learning_rate": 0.0002511, + "loss": 0.3332, + "step": 366 + }, + { + "epoch": 9.18, + "learning_rate": 0.00025079999999999997, + "loss": 0.4191, + "step": 367 + }, + { + "epoch": 9.2, + "learning_rate": 0.00025049999999999996, + "loss": 0.4182, + "step": 368 + }, + { + "epoch": 9.22, + "learning_rate": 0.00025019999999999996, + "loss": 0.3925, + "step": 369 + }, + { + "epoch": 9.25, + "learning_rate": 0.00024989999999999995, + "loss": 0.3117, + "step": 370 + }, + { + "epoch": 9.28, + "learning_rate": 0.00024959999999999994, + "loss": 0.3054, + "step": 371 + }, + { + "epoch": 9.3, + "learning_rate": 0.0002493, + "loss": 0.3136, + "step": 372 + }, + { + "epoch": 9.32, + "learning_rate": 0.000249, + "loss": 0.3699, + "step": 373 + }, + { + "epoch": 9.35, + "learning_rate": 0.0002487, + "loss": 0.395, + "step": 374 + }, + { + "epoch": 9.38, + "learning_rate": 0.00024839999999999997, + "loss": 0.3453, + "step": 375 + }, + { + "epoch": 9.4, + "learning_rate": 0.00024809999999999996, + "loss": 0.3583, + "step": 376 + }, + { + "epoch": 9.43, + "learning_rate": 0.00024779999999999995, + "loss": 0.3036, + "step": 377 + }, + { + "epoch": 9.45, + "learning_rate": 0.00024749999999999994, + "loss": 0.286, + "step": 378 + }, + { + "epoch": 9.47, + "learning_rate": 0.0002472, + "loss": 0.2739, + "step": 379 + }, + { + "epoch": 9.5, + "learning_rate": 0.0002469, + "loss": 0.3236, + "step": 380 + }, + { + "epoch": 9.53, + "learning_rate": 0.0002466, + "loss": 0.3389, + "step": 381 + }, + { + "epoch": 9.55, + "learning_rate": 0.00024629999999999997, + "loss": 0.2755, + "step": 382 + }, + { + "epoch": 9.57, + "learning_rate": 0.00024599999999999996, + "loss": 0.3561, + "step": 383 + }, + { + "epoch": 9.6, + "learning_rate": 0.00024569999999999995, + "loss": 0.2761, + "step": 384 + }, + { + "epoch": 9.62, + "learning_rate": 0.00024539999999999995, + "loss": 0.3082, + "step": 385 + }, + { + "epoch": 9.65, + "learning_rate": 0.00024509999999999994, + "loss": 0.3891, + "step": 386 + }, + { + "epoch": 9.68, + "learning_rate": 0.0002448, + "loss": 0.3114, + "step": 387 + }, + { + "epoch": 9.7, + "learning_rate": 0.0002445, + "loss": 0.3427, + "step": 388 + }, + { + "epoch": 9.72, + "learning_rate": 0.00024419999999999997, + "loss": 0.3163, + "step": 389 + }, + { + "epoch": 9.75, + "learning_rate": 0.00024389999999999997, + "loss": 0.299, + "step": 390 + }, + { + "epoch": 9.78, + "learning_rate": 0.00024359999999999999, + "loss": 0.2514, + "step": 391 + }, + { + "epoch": 9.8, + "learning_rate": 0.0002433, + "loss": 0.3046, + "step": 392 + }, + { + "epoch": 9.82, + "learning_rate": 0.000243, + "loss": 0.313, + "step": 393 + }, + { + "epoch": 9.85, + "learning_rate": 0.0002427, + "loss": 0.3057, + "step": 394 + }, + { + "epoch": 9.88, + "learning_rate": 0.00024239999999999998, + "loss": 0.3002, + "step": 395 + }, + { + "epoch": 9.9, + "learning_rate": 0.0002421, + "loss": 0.2726, + "step": 396 + }, + { + "epoch": 9.93, + "learning_rate": 0.0002418, + "loss": 0.2354, + "step": 397 + }, + { + "epoch": 9.95, + "learning_rate": 0.0002415, + "loss": 0.3022, + "step": 398 + }, + { + "epoch": 9.97, + "learning_rate": 0.00024119999999999998, + "loss": 0.3126, + "step": 399 + }, + { + "epoch": 10.0, + "learning_rate": 0.0002409, + "loss": 0.2458, + "step": 400 + }, + { + "epoch": 10.0, + "eval_cer": 0.19513826474092344, + "eval_loss": 0.557033121585846, + "eval_runtime": 8.3651, + "eval_samples_per_second": 47.101, + "eval_steps_per_second": 5.977, + "eval_wer": 0.4898920997147464, + "step": 400 + }, + { + "epoch": 10.03, + "learning_rate": 0.0002406, + "loss": 0.3041, + "step": 401 + }, + { + "epoch": 10.05, + "learning_rate": 0.00024029999999999999, + "loss": 0.2244, + "step": 402 + }, + { + "epoch": 10.07, + "learning_rate": 0.00023999999999999998, + "loss": 0.2343, + "step": 403 + }, + { + "epoch": 10.1, + "learning_rate": 0.0002397, + "loss": 0.216, + "step": 404 + }, + { + "epoch": 10.12, + "learning_rate": 0.0002394, + "loss": 0.194, + "step": 405 + }, + { + "epoch": 10.15, + "learning_rate": 0.00023909999999999998, + "loss": 0.193, + "step": 406 + }, + { + "epoch": 10.18, + "learning_rate": 0.0002388, + "loss": 0.2353, + "step": 407 + }, + { + "epoch": 10.2, + "learning_rate": 0.0002385, + "loss": 0.2292, + "step": 408 + }, + { + "epoch": 10.22, + "learning_rate": 0.0002382, + "loss": 0.2217, + "step": 409 + }, + { + "epoch": 10.25, + "learning_rate": 0.00023789999999999998, + "loss": 0.2134, + "step": 410 + }, + { + "epoch": 10.28, + "learning_rate": 0.0002376, + "loss": 0.2036, + "step": 411 + }, + { + "epoch": 10.3, + "learning_rate": 0.0002373, + "loss": 0.2028, + "step": 412 + }, + { + "epoch": 10.32, + "learning_rate": 0.000237, + "loss": 0.2363, + "step": 413 + }, + { + "epoch": 10.35, + "learning_rate": 0.00023669999999999998, + "loss": 0.2264, + "step": 414 + }, + { + "epoch": 10.38, + "learning_rate": 0.0002364, + "loss": 0.2188, + "step": 415 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002361, + "loss": 0.2323, + "step": 416 + }, + { + "epoch": 10.43, + "learning_rate": 0.00023579999999999999, + "loss": 0.2174, + "step": 417 + }, + { + "epoch": 10.45, + "learning_rate": 0.00023549999999999998, + "loss": 0.208, + "step": 418 + }, + { + "epoch": 10.47, + "learning_rate": 0.0002352, + "loss": 0.2065, + "step": 419 + }, + { + "epoch": 10.5, + "learning_rate": 0.0002349, + "loss": 0.2343, + "step": 420 + }, + { + "epoch": 10.53, + "learning_rate": 0.00023459999999999998, + "loss": 0.2301, + "step": 421 + }, + { + "epoch": 10.55, + "learning_rate": 0.00023429999999999998, + "loss": 0.2212, + "step": 422 + }, + { + "epoch": 10.57, + "learning_rate": 0.000234, + "loss": 0.2378, + "step": 423 + }, + { + "epoch": 10.6, + "learning_rate": 0.0002337, + "loss": 0.1698, + "step": 424 + }, + { + "epoch": 10.62, + "learning_rate": 0.00023339999999999998, + "loss": 0.1647, + "step": 425 + }, + { + "epoch": 10.65, + "learning_rate": 0.00023309999999999997, + "loss": 0.3171, + "step": 426 + }, + { + "epoch": 10.68, + "learning_rate": 0.0002328, + "loss": 0.2252, + "step": 427 + }, + { + "epoch": 10.7, + "learning_rate": 0.00023249999999999999, + "loss": 0.2558, + "step": 428 + }, + { + "epoch": 10.72, + "learning_rate": 0.00023219999999999998, + "loss": 0.2052, + "step": 429 + }, + { + "epoch": 10.75, + "learning_rate": 0.0002319, + "loss": 0.2069, + "step": 430 + }, + { + "epoch": 10.78, + "learning_rate": 0.0002316, + "loss": 0.1526, + "step": 431 + }, + { + "epoch": 10.8, + "learning_rate": 0.00023129999999999998, + "loss": 0.2558, + "step": 432 + }, + { + "epoch": 10.82, + "learning_rate": 0.00023099999999999998, + "loss": 0.2245, + "step": 433 + }, + { + "epoch": 10.85, + "learning_rate": 0.0002307, + "loss": 0.2275, + "step": 434 + }, + { + "epoch": 10.88, + "learning_rate": 0.0002304, + "loss": 0.1748, + "step": 435 + }, + { + "epoch": 10.9, + "learning_rate": 0.00023009999999999998, + "loss": 0.2545, + "step": 436 + }, + { + "epoch": 10.93, + "learning_rate": 0.00022979999999999997, + "loss": 0.1942, + "step": 437 + }, + { + "epoch": 10.95, + "learning_rate": 0.0002295, + "loss": 0.1936, + "step": 438 + }, + { + "epoch": 10.97, + "learning_rate": 0.0002292, + "loss": 0.1847, + "step": 439 + }, + { + "epoch": 11.0, + "learning_rate": 0.00022889999999999998, + "loss": 0.1901, + "step": 440 + }, + { + "epoch": 11.03, + "learning_rate": 0.00022859999999999997, + "loss": 0.2054, + "step": 441 + }, + { + "epoch": 11.05, + "learning_rate": 0.0002283, + "loss": 0.1832, + "step": 442 + }, + { + "epoch": 11.07, + "learning_rate": 0.00022799999999999999, + "loss": 0.1488, + "step": 443 + }, + { + "epoch": 11.1, + "learning_rate": 0.00022769999999999998, + "loss": 0.1574, + "step": 444 + }, + { + "epoch": 11.12, + "learning_rate": 0.00022739999999999997, + "loss": 0.1485, + "step": 445 + }, + { + "epoch": 11.15, + "learning_rate": 0.0002271, + "loss": 0.1413, + "step": 446 + }, + { + "epoch": 11.18, + "learning_rate": 0.00022679999999999998, + "loss": 0.1872, + "step": 447 + }, + { + "epoch": 11.2, + "learning_rate": 0.00022649999999999998, + "loss": 0.1605, + "step": 448 + }, + { + "epoch": 11.22, + "learning_rate": 0.00022619999999999997, + "loss": 0.1715, + "step": 449 + }, + { + "epoch": 11.25, + "learning_rate": 0.0002259, + "loss": 0.1592, + "step": 450 + }, + { + "epoch": 11.28, + "learning_rate": 0.00022559999999999998, + "loss": 0.1803, + "step": 451 + }, + { + "epoch": 11.3, + "learning_rate": 0.00022529999999999997, + "loss": 0.1331, + "step": 452 + }, + { + "epoch": 11.32, + "learning_rate": 0.000225, + "loss": 0.192, + "step": 453 + }, + { + "epoch": 11.35, + "learning_rate": 0.0002247, + "loss": 0.1627, + "step": 454 + }, + { + "epoch": 11.38, + "learning_rate": 0.00022439999999999998, + "loss": 0.1595, + "step": 455 + }, + { + "epoch": 11.4, + "learning_rate": 0.00022409999999999997, + "loss": 0.1496, + "step": 456 + }, + { + "epoch": 11.43, + "learning_rate": 0.0002238, + "loss": 0.1311, + "step": 457 + }, + { + "epoch": 11.45, + "learning_rate": 0.00022349999999999998, + "loss": 0.1518, + "step": 458 + }, + { + "epoch": 11.47, + "learning_rate": 0.00022319999999999998, + "loss": 0.1271, + "step": 459 + }, + { + "epoch": 11.5, + "learning_rate": 0.00022289999999999997, + "loss": 0.2051, + "step": 460 + }, + { + "epoch": 11.53, + "learning_rate": 0.0002226, + "loss": 0.1601, + "step": 461 + }, + { + "epoch": 11.55, + "learning_rate": 0.00022229999999999998, + "loss": 0.1355, + "step": 462 + }, + { + "epoch": 11.57, + "learning_rate": 0.00022199999999999998, + "loss": 0.1558, + "step": 463 + }, + { + "epoch": 11.6, + "learning_rate": 0.00022169999999999997, + "loss": 0.1173, + "step": 464 + }, + { + "epoch": 11.62, + "learning_rate": 0.0002214, + "loss": 0.1286, + "step": 465 + }, + { + "epoch": 11.65, + "learning_rate": 0.00022109999999999998, + "loss": 0.1992, + "step": 466 + }, + { + "epoch": 11.68, + "learning_rate": 0.00022079999999999997, + "loss": 0.1626, + "step": 467 + }, + { + "epoch": 11.7, + "learning_rate": 0.00022049999999999997, + "loss": 0.1253, + "step": 468 + }, + { + "epoch": 11.72, + "learning_rate": 0.00022019999999999999, + "loss": 0.1341, + "step": 469 + }, + { + "epoch": 11.75, + "learning_rate": 0.00021989999999999998, + "loss": 0.1199, + "step": 470 + }, + { + "epoch": 11.78, + "learning_rate": 0.00021959999999999997, + "loss": 0.1205, + "step": 471 + }, + { + "epoch": 11.8, + "learning_rate": 0.00021929999999999996, + "loss": 0.1673, + "step": 472 + }, + { + "epoch": 11.82, + "learning_rate": 0.00021899999999999998, + "loss": 0.171, + "step": 473 + }, + { + "epoch": 11.85, + "learning_rate": 0.00021869999999999998, + "loss": 0.1802, + "step": 474 + }, + { + "epoch": 11.88, + "learning_rate": 0.00021839999999999997, + "loss": 0.1373, + "step": 475 + }, + { + "epoch": 11.9, + "learning_rate": 0.00021809999999999996, + "loss": 0.1699, + "step": 476 + }, + { + "epoch": 11.93, + "learning_rate": 0.00021779999999999998, + "loss": 0.1432, + "step": 477 + }, + { + "epoch": 11.95, + "learning_rate": 0.00021749999999999997, + "loss": 0.154, + "step": 478 + }, + { + "epoch": 11.97, + "learning_rate": 0.00021719999999999997, + "loss": 0.1703, + "step": 479 + }, + { + "epoch": 12.0, + "learning_rate": 0.0002169, + "loss": 0.1231, + "step": 480 + }, + { + "epoch": 12.03, + "learning_rate": 0.00021659999999999998, + "loss": 0.1541, + "step": 481 + }, + { + "epoch": 12.05, + "learning_rate": 0.00021629999999999997, + "loss": 0.134, + "step": 482 + }, + { + "epoch": 12.07, + "learning_rate": 0.00021599999999999996, + "loss": 0.1277, + "step": 483 + }, + { + "epoch": 12.1, + "learning_rate": 0.00021569999999999998, + "loss": 0.1138, + "step": 484 + }, + { + "epoch": 12.12, + "learning_rate": 0.00021539999999999998, + "loss": 0.0952, + "step": 485 + }, + { + "epoch": 12.15, + "learning_rate": 0.00021509999999999997, + "loss": 0.0954, + "step": 486 + }, + { + "epoch": 12.18, + "learning_rate": 0.00021479999999999996, + "loss": 0.1573, + "step": 487 + }, + { + "epoch": 12.2, + "learning_rate": 0.00021449999999999998, + "loss": 0.1164, + "step": 488 + }, + { + "epoch": 12.22, + "learning_rate": 0.00021419999999999998, + "loss": 0.1351, + "step": 489 + }, + { + "epoch": 12.25, + "learning_rate": 0.00021389999999999997, + "loss": 0.121, + "step": 490 + }, + { + "epoch": 12.28, + "learning_rate": 0.00021359999999999996, + "loss": 0.0994, + "step": 491 + }, + { + "epoch": 12.3, + "learning_rate": 0.00021329999999999998, + "loss": 0.1037, + "step": 492 + }, + { + "epoch": 12.32, + "learning_rate": 0.00021299999999999997, + "loss": 0.1373, + "step": 493 + }, + { + "epoch": 12.35, + "learning_rate": 0.00021269999999999997, + "loss": 0.1446, + "step": 494 + }, + { + "epoch": 12.38, + "learning_rate": 0.00021239999999999996, + "loss": 0.1347, + "step": 495 + }, + { + "epoch": 12.4, + "learning_rate": 0.00021209999999999998, + "loss": 0.1213, + "step": 496 + }, + { + "epoch": 12.43, + "learning_rate": 0.00021179999999999997, + "loss": 0.1511, + "step": 497 + }, + { + "epoch": 12.45, + "learning_rate": 0.00021149999999999996, + "loss": 0.1132, + "step": 498 + }, + { + "epoch": 12.47, + "learning_rate": 0.00021119999999999996, + "loss": 0.096, + "step": 499 + }, + { + "epoch": 12.5, + "learning_rate": 0.00021089999999999998, + "loss": 0.1601, + "step": 500 + }, + { + "epoch": 12.53, + "learning_rate": 0.00021059999999999997, + "loss": 0.1313, + "step": 501 + }, + { + "epoch": 12.55, + "learning_rate": 0.00021029999999999996, + "loss": 0.1197, + "step": 502 + }, + { + "epoch": 12.57, + "learning_rate": 0.00020999999999999998, + "loss": 0.1094, + "step": 503 + }, + { + "epoch": 12.6, + "learning_rate": 0.00020969999999999997, + "loss": 0.1092, + "step": 504 + }, + { + "epoch": 12.62, + "learning_rate": 0.00020939999999999997, + "loss": 0.0848, + "step": 505 + }, + { + "epoch": 12.65, + "learning_rate": 0.00020909999999999996, + "loss": 0.1633, + "step": 506 + }, + { + "epoch": 12.68, + "learning_rate": 0.00020879999999999998, + "loss": 0.104, + "step": 507 + }, + { + "epoch": 12.7, + "learning_rate": 0.00020849999999999997, + "loss": 0.1511, + "step": 508 + }, + { + "epoch": 12.72, + "learning_rate": 0.00020819999999999996, + "loss": 0.1033, + "step": 509 + }, + { + "epoch": 12.75, + "learning_rate": 0.00020789999999999996, + "loss": 0.1007, + "step": 510 + }, + { + "epoch": 12.78, + "learning_rate": 0.00020759999999999998, + "loss": 0.1196, + "step": 511 + }, + { + "epoch": 12.8, + "learning_rate": 0.00020729999999999997, + "loss": 0.1394, + "step": 512 + }, + { + "epoch": 12.82, + "learning_rate": 0.00020699999999999996, + "loss": 0.1388, + "step": 513 + }, + { + "epoch": 12.85, + "learning_rate": 0.00020669999999999996, + "loss": 0.1097, + "step": 514 + }, + { + "epoch": 12.88, + "learning_rate": 0.00020639999999999998, + "loss": 0.1076, + "step": 515 + }, + { + "epoch": 12.9, + "learning_rate": 0.0002061, + "loss": 0.0821, + "step": 516 + }, + { + "epoch": 12.93, + "learning_rate": 0.0002058, + "loss": 0.1071, + "step": 517 + }, + { + "epoch": 12.95, + "learning_rate": 0.0002055, + "loss": 0.1106, + "step": 518 + }, + { + "epoch": 12.97, + "learning_rate": 0.0002052, + "loss": 0.1335, + "step": 519 + }, + { + "epoch": 13.0, + "learning_rate": 0.0002049, + "loss": 0.1112, + "step": 520 + }, + { + "epoch": 13.03, + "learning_rate": 0.00020459999999999999, + "loss": 0.1057, + "step": 521 + }, + { + "epoch": 13.05, + "learning_rate": 0.0002043, + "loss": 0.1022, + "step": 522 + }, + { + "epoch": 13.07, + "learning_rate": 0.000204, + "loss": 0.1005, + "step": 523 + }, + { + "epoch": 13.1, + "learning_rate": 0.0002037, + "loss": 0.0933, + "step": 524 + }, + { + "epoch": 13.12, + "learning_rate": 0.00020339999999999998, + "loss": 0.1216, + "step": 525 + }, + { + "epoch": 13.15, + "learning_rate": 0.0002031, + "loss": 0.0897, + "step": 526 + }, + { + "epoch": 13.18, + "learning_rate": 0.0002028, + "loss": 0.1077, + "step": 527 + }, + { + "epoch": 13.2, + "learning_rate": 0.0002025, + "loss": 0.1295, + "step": 528 + }, + { + "epoch": 13.22, + "learning_rate": 0.0002022, + "loss": 0.1115, + "step": 529 + }, + { + "epoch": 13.25, + "learning_rate": 0.0002019, + "loss": 0.0805, + "step": 530 + }, + { + "epoch": 13.28, + "learning_rate": 0.0002016, + "loss": 0.0825, + "step": 531 + }, + { + "epoch": 13.3, + "learning_rate": 0.0002013, + "loss": 0.077, + "step": 532 + }, + { + "epoch": 13.32, + "learning_rate": 0.000201, + "loss": 0.1096, + "step": 533 + }, + { + "epoch": 13.35, + "learning_rate": 0.0002007, + "loss": 0.1062, + "step": 534 + }, + { + "epoch": 13.38, + "learning_rate": 0.0002004, + "loss": 0.118, + "step": 535 + }, + { + "epoch": 13.4, + "learning_rate": 0.00020009999999999998, + "loss": 0.0983, + "step": 536 + }, + { + "epoch": 13.43, + "learning_rate": 0.0001998, + "loss": 0.0855, + "step": 537 + }, + { + "epoch": 13.45, + "learning_rate": 0.0001995, + "loss": 0.0937, + "step": 538 + }, + { + "epoch": 13.47, + "learning_rate": 0.0001992, + "loss": 0.106, + "step": 539 + }, + { + "epoch": 13.5, + "learning_rate": 0.00019889999999999998, + "loss": 0.1077, + "step": 540 + }, + { + "epoch": 13.53, + "learning_rate": 0.0001986, + "loss": 0.0825, + "step": 541 + }, + { + "epoch": 13.55, + "learning_rate": 0.0001983, + "loss": 0.1028, + "step": 542 + }, + { + "epoch": 13.57, + "learning_rate": 0.000198, + "loss": 0.0723, + "step": 543 + }, + { + "epoch": 13.6, + "learning_rate": 0.00019769999999999998, + "loss": 0.0702, + "step": 544 + }, + { + "epoch": 13.62, + "learning_rate": 0.0001974, + "loss": 0.0939, + "step": 545 + }, + { + "epoch": 13.65, + "learning_rate": 0.0001971, + "loss": 0.1375, + "step": 546 + }, + { + "epoch": 13.68, + "learning_rate": 0.00019679999999999999, + "loss": 0.1154, + "step": 547 + }, + { + "epoch": 13.7, + "learning_rate": 0.00019649999999999998, + "loss": 0.067, + "step": 548 + }, + { + "epoch": 13.72, + "learning_rate": 0.0001962, + "loss": 0.0911, + "step": 549 + }, + { + "epoch": 13.75, + "learning_rate": 0.0001959, + "loss": 0.0685, + "step": 550 + }, + { + "epoch": 13.78, + "learning_rate": 0.00019559999999999998, + "loss": 0.0861, + "step": 551 + }, + { + "epoch": 13.8, + "learning_rate": 0.00019529999999999998, + "loss": 0.1077, + "step": 552 + }, + { + "epoch": 13.82, + "learning_rate": 0.000195, + "loss": 0.102, + "step": 553 + }, + { + "epoch": 13.85, + "learning_rate": 0.0001947, + "loss": 0.0851, + "step": 554 + }, + { + "epoch": 13.88, + "learning_rate": 0.00019439999999999998, + "loss": 0.0936, + "step": 555 + }, + { + "epoch": 13.9, + "learning_rate": 0.0001941, + "loss": 0.0976, + "step": 556 + }, + { + "epoch": 13.93, + "learning_rate": 0.0001938, + "loss": 0.0886, + "step": 557 + }, + { + "epoch": 13.95, + "learning_rate": 0.0001935, + "loss": 0.0929, + "step": 558 + }, + { + "epoch": 13.97, + "learning_rate": 0.00019319999999999998, + "loss": 0.1077, + "step": 559 + }, + { + "epoch": 14.0, + "learning_rate": 0.0001929, + "loss": 0.0969, + "step": 560 + }, + { + "epoch": 14.03, + "learning_rate": 0.0001926, + "loss": 0.1275, + "step": 561 + }, + { + "epoch": 14.05, + "learning_rate": 0.00019229999999999999, + "loss": 0.0881, + "step": 562 + }, + { + "epoch": 14.07, + "learning_rate": 0.00019199999999999998, + "loss": 0.0876, + "step": 563 + }, + { + "epoch": 14.1, + "learning_rate": 0.0001917, + "loss": 0.0816, + "step": 564 + }, + { + "epoch": 14.12, + "learning_rate": 0.0001914, + "loss": 0.0606, + "step": 565 + }, + { + "epoch": 14.15, + "learning_rate": 0.00019109999999999998, + "loss": 0.0819, + "step": 566 + }, + { + "epoch": 14.18, + "learning_rate": 0.00019079999999999998, + "loss": 0.0884, + "step": 567 + }, + { + "epoch": 14.2, + "learning_rate": 0.0001905, + "loss": 0.0923, + "step": 568 + }, + { + "epoch": 14.22, + "learning_rate": 0.0001902, + "loss": 0.0595, + "step": 569 + }, + { + "epoch": 14.25, + "learning_rate": 0.00018989999999999998, + "loss": 0.0796, + "step": 570 + }, + { + "epoch": 14.28, + "learning_rate": 0.00018959999999999997, + "loss": 0.0635, + "step": 571 + }, + { + "epoch": 14.3, + "learning_rate": 0.0001893, + "loss": 0.0728, + "step": 572 + }, + { + "epoch": 14.32, + "learning_rate": 0.00018899999999999999, + "loss": 0.1051, + "step": 573 + }, + { + "epoch": 14.35, + "learning_rate": 0.00018869999999999998, + "loss": 0.0853, + "step": 574 + }, + { + "epoch": 14.38, + "learning_rate": 0.00018839999999999997, + "loss": 0.0846, + "step": 575 + }, + { + "epoch": 14.4, + "learning_rate": 0.0001881, + "loss": 0.0869, + "step": 576 + }, + { + "epoch": 14.43, + "learning_rate": 0.00018779999999999998, + "loss": 0.064, + "step": 577 + }, + { + "epoch": 14.45, + "learning_rate": 0.00018749999999999998, + "loss": 0.0668, + "step": 578 + }, + { + "epoch": 14.47, + "learning_rate": 0.0001872, + "loss": 0.0847, + "step": 579 + }, + { + "epoch": 14.5, + "learning_rate": 0.0001869, + "loss": 0.1166, + "step": 580 + }, + { + "epoch": 14.53, + "learning_rate": 0.00018659999999999998, + "loss": 0.0903, + "step": 581 + }, + { + "epoch": 14.55, + "learning_rate": 0.00018629999999999997, + "loss": 0.1085, + "step": 582 + }, + { + "epoch": 14.57, + "learning_rate": 0.000186, + "loss": 0.1275, + "step": 583 + }, + { + "epoch": 14.6, + "learning_rate": 0.0001857, + "loss": 0.0655, + "step": 584 + }, + { + "epoch": 14.62, + "learning_rate": 0.00018539999999999998, + "loss": 0.0636, + "step": 585 + }, + { + "epoch": 14.65, + "learning_rate": 0.00018509999999999997, + "loss": 0.1008, + "step": 586 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001848, + "loss": 0.1095, + "step": 587 + }, + { + "epoch": 14.7, + "learning_rate": 0.00018449999999999999, + "loss": 0.0729, + "step": 588 + }, + { + "epoch": 14.72, + "learning_rate": 0.00018419999999999998, + "loss": 0.0819, + "step": 589 + }, + { + "epoch": 14.75, + "learning_rate": 0.00018389999999999997, + "loss": 0.0807, + "step": 590 + }, + { + "epoch": 14.78, + "learning_rate": 0.0001836, + "loss": 0.0503, + "step": 591 + }, + { + "epoch": 14.8, + "learning_rate": 0.00018329999999999998, + "loss": 0.1053, + "step": 592 + }, + { + "epoch": 14.82, + "learning_rate": 0.00018299999999999998, + "loss": 0.0826, + "step": 593 + }, + { + "epoch": 14.85, + "learning_rate": 0.00018269999999999997, + "loss": 0.0898, + "step": 594 + }, + { + "epoch": 14.88, + "learning_rate": 0.0001824, + "loss": 0.0703, + "step": 595 + }, + { + "epoch": 14.9, + "learning_rate": 0.00018209999999999998, + "loss": 0.0703, + "step": 596 + }, + { + "epoch": 14.93, + "learning_rate": 0.00018179999999999997, + "loss": 0.0759, + "step": 597 + }, + { + "epoch": 14.95, + "learning_rate": 0.00018149999999999997, + "loss": 0.0918, + "step": 598 + }, + { + "epoch": 14.97, + "learning_rate": 0.00018119999999999999, + "loss": 0.0818, + "step": 599 + }, + { + "epoch": 15.0, + "learning_rate": 0.00018089999999999998, + "loss": 0.0762, + "step": 600 + }, + { + "epoch": 15.0, + "eval_cer": 0.15617749723052485, + "eval_loss": 0.5212965607643127, + "eval_runtime": 8.4311, + "eval_samples_per_second": 46.732, + "eval_steps_per_second": 5.93, + "eval_wer": 0.3726900657323577, + "step": 600 + }, + { + "epoch": 15.03, + "learning_rate": 0.00018059999999999997, + "loss": 0.1135, + "step": 601 + }, + { + "epoch": 15.05, + "learning_rate": 0.00018029999999999996, + "loss": 0.0773, + "step": 602 + }, + { + "epoch": 15.07, + "learning_rate": 0.00017999999999999998, + "loss": 0.0751, + "step": 603 + }, + { + "epoch": 15.1, + "learning_rate": 0.00017969999999999998, + "loss": 0.064, + "step": 604 + }, + { + "epoch": 15.12, + "learning_rate": 0.00017939999999999997, + "loss": 0.0756, + "step": 605 + }, + { + "epoch": 15.15, + "learning_rate": 0.0001791, + "loss": 0.0821, + "step": 606 + }, + { + "epoch": 15.18, + "learning_rate": 0.00017879999999999998, + "loss": 0.0922, + "step": 607 + }, + { + "epoch": 15.2, + "learning_rate": 0.00017849999999999997, + "loss": 0.0719, + "step": 608 + }, + { + "epoch": 15.22, + "learning_rate": 0.00017819999999999997, + "loss": 0.0729, + "step": 609 + }, + { + "epoch": 15.25, + "learning_rate": 0.0001779, + "loss": 0.1057, + "step": 610 + }, + { + "epoch": 15.28, + "learning_rate": 0.00017759999999999998, + "loss": 0.1273, + "step": 611 + }, + { + "epoch": 15.3, + "learning_rate": 0.00017729999999999997, + "loss": 0.0513, + "step": 612 + }, + { + "epoch": 15.32, + "learning_rate": 0.00017699999999999997, + "loss": 0.0757, + "step": 613 + }, + { + "epoch": 15.35, + "learning_rate": 0.00017669999999999999, + "loss": 0.0734, + "step": 614 + }, + { + "epoch": 15.38, + "learning_rate": 0.00017639999999999998, + "loss": 0.0682, + "step": 615 + }, + { + "epoch": 15.4, + "learning_rate": 0.00017609999999999997, + "loss": 0.0755, + "step": 616 + }, + { + "epoch": 15.43, + "learning_rate": 0.00017579999999999996, + "loss": 0.0568, + "step": 617 + }, + { + "epoch": 15.45, + "learning_rate": 0.00017549999999999998, + "loss": 0.0719, + "step": 618 + }, + { + "epoch": 15.47, + "learning_rate": 0.00017519999999999998, + "loss": 0.068, + "step": 619 + }, + { + "epoch": 15.5, + "learning_rate": 0.00017489999999999997, + "loss": 0.0963, + "step": 620 + }, + { + "epoch": 15.53, + "learning_rate": 0.00017459999999999996, + "loss": 0.0916, + "step": 621 + }, + { + "epoch": 15.55, + "learning_rate": 0.00017429999999999998, + "loss": 0.0672, + "step": 622 + }, + { + "epoch": 15.57, + "learning_rate": 0.00017399999999999997, + "loss": 0.0784, + "step": 623 + }, + { + "epoch": 15.6, + "learning_rate": 0.00017369999999999997, + "loss": 0.0672, + "step": 624 + }, + { + "epoch": 15.62, + "learning_rate": 0.00017339999999999996, + "loss": 0.0729, + "step": 625 + }, + { + "epoch": 15.65, + "learning_rate": 0.00017309999999999998, + "loss": 0.0833, + "step": 626 + }, + { + "epoch": 15.68, + "learning_rate": 0.00017279999999999997, + "loss": 0.0799, + "step": 627 + }, + { + "epoch": 15.7, + "learning_rate": 0.00017249999999999996, + "loss": 0.0448, + "step": 628 + }, + { + "epoch": 15.72, + "learning_rate": 0.00017219999999999998, + "loss": 0.0702, + "step": 629 + }, + { + "epoch": 15.75, + "learning_rate": 0.00017189999999999998, + "loss": 0.0668, + "step": 630 + }, + { + "epoch": 15.78, + "learning_rate": 0.00017159999999999997, + "loss": 0.0719, + "step": 631 + }, + { + "epoch": 15.8, + "learning_rate": 0.00017129999999999996, + "loss": 0.0799, + "step": 632 + }, + { + "epoch": 15.82, + "learning_rate": 0.00017099999999999998, + "loss": 0.0898, + "step": 633 + }, + { + "epoch": 15.85, + "learning_rate": 0.00017069999999999998, + "loss": 0.0656, + "step": 634 + }, + { + "epoch": 15.88, + "learning_rate": 0.00017039999999999997, + "loss": 0.063, + "step": 635 + }, + { + "epoch": 15.9, + "learning_rate": 0.00017009999999999996, + "loss": 0.0493, + "step": 636 + }, + { + "epoch": 15.93, + "learning_rate": 0.00016979999999999998, + "loss": 0.0872, + "step": 637 + }, + { + "epoch": 15.95, + "learning_rate": 0.00016949999999999997, + "loss": 0.0663, + "step": 638 + }, + { + "epoch": 15.97, + "learning_rate": 0.00016919999999999997, + "loss": 0.0779, + "step": 639 + }, + { + "epoch": 16.0, + "learning_rate": 0.00016889999999999996, + "loss": 0.059, + "step": 640 + }, + { + "epoch": 16.02, + "learning_rate": 0.0001686, + "loss": 0.0903, + "step": 641 + }, + { + "epoch": 16.05, + "learning_rate": 0.0001683, + "loss": 0.0713, + "step": 642 + }, + { + "epoch": 16.07, + "learning_rate": 0.000168, + "loss": 0.0696, + "step": 643 + }, + { + "epoch": 16.1, + "learning_rate": 0.0001677, + "loss": 0.0527, + "step": 644 + }, + { + "epoch": 16.12, + "learning_rate": 0.0001674, + "loss": 0.0756, + "step": 645 + }, + { + "epoch": 16.15, + "learning_rate": 0.0001671, + "loss": 0.051, + "step": 646 + }, + { + "epoch": 16.18, + "learning_rate": 0.0001668, + "loss": 0.0809, + "step": 647 + }, + { + "epoch": 16.2, + "learning_rate": 0.0001665, + "loss": 0.0698, + "step": 648 + }, + { + "epoch": 16.23, + "learning_rate": 0.0001662, + "loss": 0.0686, + "step": 649 + }, + { + "epoch": 16.25, + "learning_rate": 0.0001659, + "loss": 0.0572, + "step": 650 + }, + { + "epoch": 16.27, + "learning_rate": 0.0001656, + "loss": 0.0551, + "step": 651 + }, + { + "epoch": 16.3, + "learning_rate": 0.0001653, + "loss": 0.065, + "step": 652 + }, + { + "epoch": 16.32, + "learning_rate": 0.000165, + "loss": 0.0693, + "step": 653 + }, + { + "epoch": 16.35, + "learning_rate": 0.0001647, + "loss": 0.0908, + "step": 654 + }, + { + "epoch": 16.38, + "learning_rate": 0.0001644, + "loss": 0.098, + "step": 655 + }, + { + "epoch": 16.4, + "learning_rate": 0.0001641, + "loss": 0.0605, + "step": 656 + }, + { + "epoch": 16.43, + "learning_rate": 0.0001638, + "loss": 0.0866, + "step": 657 + }, + { + "epoch": 16.45, + "learning_rate": 0.0001635, + "loss": 0.0523, + "step": 658 + }, + { + "epoch": 16.48, + "learning_rate": 0.0001632, + "loss": 0.077, + "step": 659 + }, + { + "epoch": 16.5, + "learning_rate": 0.0001629, + "loss": 0.0685, + "step": 660 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001626, + "loss": 0.0561, + "step": 661 + }, + { + "epoch": 16.55, + "learning_rate": 0.0001623, + "loss": 0.0857, + "step": 662 + }, + { + "epoch": 16.57, + "learning_rate": 0.000162, + "loss": 0.0643, + "step": 663 + }, + { + "epoch": 16.6, + "learning_rate": 0.0001617, + "loss": 0.0518, + "step": 664 + }, + { + "epoch": 16.62, + "learning_rate": 0.0001614, + "loss": 0.0349, + "step": 665 + }, + { + "epoch": 16.65, + "learning_rate": 0.00016109999999999999, + "loss": 0.0626, + "step": 666 + }, + { + "epoch": 16.68, + "learning_rate": 0.0001608, + "loss": 0.0781, + "step": 667 + }, + { + "epoch": 16.7, + "learning_rate": 0.0001605, + "loss": 0.0667, + "step": 668 + }, + { + "epoch": 16.73, + "learning_rate": 0.0001602, + "loss": 0.0616, + "step": 669 + }, + { + "epoch": 16.75, + "learning_rate": 0.00015989999999999998, + "loss": 0.0693, + "step": 670 + }, + { + "epoch": 16.77, + "learning_rate": 0.0001596, + "loss": 0.0614, + "step": 671 + }, + { + "epoch": 16.8, + "learning_rate": 0.0001593, + "loss": 0.0895, + "step": 672 + }, + { + "epoch": 16.82, + "learning_rate": 0.000159, + "loss": 0.0742, + "step": 673 + }, + { + "epoch": 16.85, + "learning_rate": 0.00015869999999999998, + "loss": 0.0619, + "step": 674 + }, + { + "epoch": 16.88, + "learning_rate": 0.0001584, + "loss": 0.0557, + "step": 675 + }, + { + "epoch": 16.9, + "learning_rate": 0.0001581, + "loss": 0.0538, + "step": 676 + }, + { + "epoch": 16.93, + "learning_rate": 0.0001578, + "loss": 0.0737, + "step": 677 + }, + { + "epoch": 16.95, + "learning_rate": 0.00015749999999999998, + "loss": 0.072, + "step": 678 + }, + { + "epoch": 16.98, + "learning_rate": 0.0001572, + "loss": 0.08, + "step": 679 + }, + { + "epoch": 17.0, + "learning_rate": 0.0001569, + "loss": 0.0508, + "step": 680 + }, + { + "epoch": 17.02, + "learning_rate": 0.00015659999999999998, + "loss": 0.0787, + "step": 681 + }, + { + "epoch": 17.05, + "learning_rate": 0.0001563, + "loss": 0.0546, + "step": 682 + }, + { + "epoch": 17.07, + "learning_rate": 0.000156, + "loss": 0.0689, + "step": 683 + }, + { + "epoch": 17.1, + "learning_rate": 0.0001557, + "loss": 0.0617, + "step": 684 + }, + { + "epoch": 17.12, + "learning_rate": 0.00015539999999999998, + "loss": 0.0422, + "step": 685 + }, + { + "epoch": 17.15, + "learning_rate": 0.0001551, + "loss": 0.067, + "step": 686 + }, + { + "epoch": 17.18, + "learning_rate": 0.0001548, + "loss": 0.071, + "step": 687 + }, + { + "epoch": 17.2, + "learning_rate": 0.0001545, + "loss": 0.0647, + "step": 688 + }, + { + "epoch": 17.23, + "learning_rate": 0.00015419999999999998, + "loss": 0.0582, + "step": 689 + }, + { + "epoch": 17.25, + "learning_rate": 0.0001539, + "loss": 0.0481, + "step": 690 + }, + { + "epoch": 17.27, + "learning_rate": 0.0001536, + "loss": 0.0874, + "step": 691 + }, + { + "epoch": 17.3, + "learning_rate": 0.00015329999999999999, + "loss": 0.0575, + "step": 692 + }, + { + "epoch": 17.32, + "learning_rate": 0.00015299999999999998, + "loss": 0.0634, + "step": 693 + }, + { + "epoch": 17.35, + "learning_rate": 0.0001527, + "loss": 0.0515, + "step": 694 + }, + { + "epoch": 17.38, + "learning_rate": 0.0001524, + "loss": 0.0585, + "step": 695 + }, + { + "epoch": 17.4, + "learning_rate": 0.00015209999999999998, + "loss": 0.0551, + "step": 696 + }, + { + "epoch": 17.43, + "learning_rate": 0.00015179999999999998, + "loss": 0.0392, + "step": 697 + }, + { + "epoch": 17.45, + "learning_rate": 0.0001515, + "loss": 0.0543, + "step": 698 + }, + { + "epoch": 17.48, + "learning_rate": 0.0001512, + "loss": 0.0577, + "step": 699 + }, + { + "epoch": 17.5, + "learning_rate": 0.00015089999999999998, + "loss": 0.0713, + "step": 700 + }, + { + "epoch": 17.52, + "learning_rate": 0.00015059999999999997, + "loss": 0.0585, + "step": 701 + }, + { + "epoch": 17.55, + "learning_rate": 0.0001503, + "loss": 0.0689, + "step": 702 + }, + { + "epoch": 17.57, + "learning_rate": 0.00015, + "loss": 0.0691, + "step": 703 + }, + { + "epoch": 17.6, + "learning_rate": 0.00014969999999999998, + "loss": 0.0418, + "step": 704 + }, + { + "epoch": 17.62, + "learning_rate": 0.0001494, + "loss": 0.0588, + "step": 705 + }, + { + "epoch": 17.65, + "learning_rate": 0.0001491, + "loss": 0.0716, + "step": 706 + }, + { + "epoch": 17.68, + "learning_rate": 0.00014879999999999998, + "loss": 0.0642, + "step": 707 + }, + { + "epoch": 17.7, + "learning_rate": 0.00014849999999999998, + "loss": 0.0507, + "step": 708 + }, + { + "epoch": 17.73, + "learning_rate": 0.0001482, + "loss": 0.0484, + "step": 709 + }, + { + "epoch": 17.75, + "learning_rate": 0.0001479, + "loss": 0.0524, + "step": 710 + }, + { + "epoch": 17.77, + "learning_rate": 0.00014759999999999998, + "loss": 0.0416, + "step": 711 + }, + { + "epoch": 17.8, + "learning_rate": 0.00014729999999999998, + "loss": 0.0626, + "step": 712 + }, + { + "epoch": 17.82, + "learning_rate": 0.000147, + "loss": 0.0815, + "step": 713 + }, + { + "epoch": 17.85, + "learning_rate": 0.0001467, + "loss": 0.0653, + "step": 714 + }, + { + "epoch": 17.88, + "learning_rate": 0.00014639999999999998, + "loss": 0.0433, + "step": 715 + }, + { + "epoch": 17.9, + "learning_rate": 0.00014609999999999997, + "loss": 0.0512, + "step": 716 + }, + { + "epoch": 17.93, + "learning_rate": 0.0001458, + "loss": 0.051, + "step": 717 + }, + { + "epoch": 17.95, + "learning_rate": 0.00014549999999999999, + "loss": 0.0628, + "step": 718 + }, + { + "epoch": 17.98, + "learning_rate": 0.00014519999999999998, + "loss": 0.0643, + "step": 719 + }, + { + "epoch": 18.0, + "learning_rate": 0.00014489999999999997, + "loss": 0.0533, + "step": 720 + }, + { + "epoch": 18.02, + "learning_rate": 0.0001446, + "loss": 0.0536, + "step": 721 + }, + { + "epoch": 18.05, + "learning_rate": 0.00014429999999999998, + "loss": 0.0526, + "step": 722 + }, + { + "epoch": 18.07, + "learning_rate": 0.00014399999999999998, + "loss": 0.0506, + "step": 723 + }, + { + "epoch": 18.1, + "learning_rate": 0.00014369999999999997, + "loss": 0.0556, + "step": 724 + }, + { + "epoch": 18.12, + "learning_rate": 0.0001434, + "loss": 0.059, + "step": 725 + }, + { + "epoch": 18.15, + "learning_rate": 0.00014309999999999998, + "loss": 0.0447, + "step": 726 + }, + { + "epoch": 18.18, + "learning_rate": 0.00014279999999999997, + "loss": 0.0572, + "step": 727 + }, + { + "epoch": 18.2, + "learning_rate": 0.0001425, + "loss": 0.0517, + "step": 728 + }, + { + "epoch": 18.23, + "learning_rate": 0.0001422, + "loss": 0.072, + "step": 729 + }, + { + "epoch": 18.25, + "learning_rate": 0.00014189999999999998, + "loss": 0.0518, + "step": 730 + }, + { + "epoch": 18.27, + "learning_rate": 0.00014159999999999997, + "loss": 0.0853, + "step": 731 + }, + { + "epoch": 18.3, + "learning_rate": 0.0001413, + "loss": 0.0657, + "step": 732 + }, + { + "epoch": 18.32, + "learning_rate": 0.00014099999999999998, + "loss": 0.0694, + "step": 733 + }, + { + "epoch": 18.35, + "learning_rate": 0.00014069999999999998, + "loss": 0.0775, + "step": 734 + }, + { + "epoch": 18.38, + "learning_rate": 0.0001404, + "loss": 0.0633, + "step": 735 + }, + { + "epoch": 18.4, + "learning_rate": 0.0001401, + "loss": 0.0446, + "step": 736 + }, + { + "epoch": 18.43, + "learning_rate": 0.00013979999999999998, + "loss": 0.051, + "step": 737 + }, + { + "epoch": 18.45, + "learning_rate": 0.0001395, + "loss": 0.0421, + "step": 738 + }, + { + "epoch": 18.48, + "learning_rate": 0.0001392, + "loss": 0.0387, + "step": 739 + }, + { + "epoch": 18.5, + "learning_rate": 0.0001389, + "loss": 0.0541, + "step": 740 + }, + { + "epoch": 18.52, + "learning_rate": 0.0001386, + "loss": 0.0609, + "step": 741 + }, + { + "epoch": 18.55, + "learning_rate": 0.0001383, + "loss": 0.0627, + "step": 742 + }, + { + "epoch": 18.57, + "learning_rate": 0.000138, + "loss": 0.0384, + "step": 743 + }, + { + "epoch": 18.6, + "learning_rate": 0.00013769999999999999, + "loss": 0.0392, + "step": 744 + }, + { + "epoch": 18.62, + "learning_rate": 0.0001374, + "loss": 0.0487, + "step": 745 + }, + { + "epoch": 18.65, + "learning_rate": 0.0001371, + "loss": 0.0811, + "step": 746 + }, + { + "epoch": 18.68, + "learning_rate": 0.0001368, + "loss": 0.0603, + "step": 747 + }, + { + "epoch": 18.7, + "learning_rate": 0.00013649999999999998, + "loss": 0.0502, + "step": 748 + }, + { + "epoch": 18.73, + "learning_rate": 0.0001362, + "loss": 0.0501, + "step": 749 + }, + { + "epoch": 18.75, + "learning_rate": 0.0001359, + "loss": 0.0344, + "step": 750 + }, + { + "epoch": 18.77, + "learning_rate": 0.0001356, + "loss": 0.053, + "step": 751 + }, + { + "epoch": 18.8, + "learning_rate": 0.00013529999999999998, + "loss": 0.0727, + "step": 752 + }, + { + "epoch": 18.82, + "learning_rate": 0.000135, + "loss": 0.0696, + "step": 753 + }, + { + "epoch": 18.85, + "learning_rate": 0.0001347, + "loss": 0.052, + "step": 754 + }, + { + "epoch": 18.88, + "learning_rate": 0.0001344, + "loss": 0.0613, + "step": 755 + }, + { + "epoch": 18.9, + "learning_rate": 0.00013409999999999998, + "loss": 0.0455, + "step": 756 + }, + { + "epoch": 18.93, + "learning_rate": 0.0001338, + "loss": 0.0411, + "step": 757 + }, + { + "epoch": 18.95, + "learning_rate": 0.0001335, + "loss": 0.0692, + "step": 758 + }, + { + "epoch": 18.98, + "learning_rate": 0.00013319999999999999, + "loss": 0.0478, + "step": 759 + }, + { + "epoch": 19.0, + "learning_rate": 0.00013289999999999998, + "loss": 0.0381, + "step": 760 + }, + { + "epoch": 19.02, + "learning_rate": 0.0001326, + "loss": 0.0501, + "step": 761 + }, + { + "epoch": 19.05, + "learning_rate": 0.0001323, + "loss": 0.0418, + "step": 762 + }, + { + "epoch": 19.07, + "learning_rate": 0.00013199999999999998, + "loss": 0.0448, + "step": 763 + }, + { + "epoch": 19.1, + "learning_rate": 0.00013169999999999998, + "loss": 0.0611, + "step": 764 + }, + { + "epoch": 19.12, + "learning_rate": 0.0001314, + "loss": 0.0431, + "step": 765 + }, + { + "epoch": 19.15, + "learning_rate": 0.0001311, + "loss": 0.0507, + "step": 766 + }, + { + "epoch": 19.18, + "learning_rate": 0.00013079999999999998, + "loss": 0.0578, + "step": 767 + }, + { + "epoch": 19.2, + "learning_rate": 0.0001305, + "loss": 0.0432, + "step": 768 + }, + { + "epoch": 19.23, + "learning_rate": 0.0001302, + "loss": 0.0516, + "step": 769 + }, + { + "epoch": 19.25, + "learning_rate": 0.00012989999999999999, + "loss": 0.0392, + "step": 770 + }, + { + "epoch": 19.27, + "learning_rate": 0.00012959999999999998, + "loss": 0.031, + "step": 771 + }, + { + "epoch": 19.3, + "learning_rate": 0.0001293, + "loss": 0.0646, + "step": 772 + }, + { + "epoch": 19.32, + "learning_rate": 0.000129, + "loss": 0.047, + "step": 773 + }, + { + "epoch": 19.35, + "learning_rate": 0.00012869999999999998, + "loss": 0.0586, + "step": 774 + }, + { + "epoch": 19.38, + "learning_rate": 0.00012839999999999998, + "loss": 0.0526, + "step": 775 + }, + { + "epoch": 19.4, + "learning_rate": 0.0001281, + "loss": 0.0422, + "step": 776 + }, + { + "epoch": 19.43, + "learning_rate": 0.0001278, + "loss": 0.0336, + "step": 777 + }, + { + "epoch": 19.45, + "learning_rate": 0.00012749999999999998, + "loss": 0.033, + "step": 778 + }, + { + "epoch": 19.48, + "learning_rate": 0.00012719999999999997, + "loss": 0.0593, + "step": 779 + }, + { + "epoch": 19.5, + "learning_rate": 0.0001269, + "loss": 0.0578, + "step": 780 + }, + { + "epoch": 19.52, + "learning_rate": 0.0001266, + "loss": 0.0588, + "step": 781 + }, + { + "epoch": 19.55, + "learning_rate": 0.00012629999999999998, + "loss": 0.0542, + "step": 782 + }, + { + "epoch": 19.57, + "learning_rate": 0.00012599999999999997, + "loss": 0.0485, + "step": 783 + }, + { + "epoch": 19.6, + "learning_rate": 0.0001257, + "loss": 0.0444, + "step": 784 + }, + { + "epoch": 19.62, + "learning_rate": 0.00012539999999999999, + "loss": 0.0407, + "step": 785 + }, + { + "epoch": 19.65, + "learning_rate": 0.00012509999999999998, + "loss": 0.0772, + "step": 786 + }, + { + "epoch": 19.68, + "learning_rate": 0.00012479999999999997, + "loss": 0.0577, + "step": 787 + }, + { + "epoch": 19.7, + "learning_rate": 0.0001245, + "loss": 0.0495, + "step": 788 + }, + { + "epoch": 19.73, + "learning_rate": 0.00012419999999999998, + "loss": 0.058, + "step": 789 + }, + { + "epoch": 19.75, + "learning_rate": 0.00012389999999999998, + "loss": 0.0481, + "step": 790 + }, + { + "epoch": 19.77, + "learning_rate": 0.0001236, + "loss": 0.0411, + "step": 791 + }, + { + "epoch": 19.8, + "learning_rate": 0.0001233, + "loss": 0.0591, + "step": 792 + }, + { + "epoch": 19.82, + "learning_rate": 0.00012299999999999998, + "loss": 0.0516, + "step": 793 + }, + { + "epoch": 19.85, + "learning_rate": 0.00012269999999999997, + "loss": 0.0542, + "step": 794 + }, + { + "epoch": 19.88, + "learning_rate": 0.0001224, + "loss": 0.0429, + "step": 795 + }, + { + "epoch": 19.9, + "learning_rate": 0.00012209999999999999, + "loss": 0.0949, + "step": 796 + }, + { + "epoch": 19.93, + "learning_rate": 0.00012179999999999999, + "loss": 0.0474, + "step": 797 + }, + { + "epoch": 19.95, + "learning_rate": 0.0001215, + "loss": 0.0506, + "step": 798 + }, + { + "epoch": 19.98, + "learning_rate": 0.00012119999999999999, + "loss": 0.0486, + "step": 799 + }, + { + "epoch": 20.0, + "learning_rate": 0.0001209, + "loss": 0.0334, + "step": 800 + }, + { + "epoch": 20.0, + "eval_cer": 0.1542545408941747, + "eval_loss": 0.5741926431655884, + "eval_runtime": 8.0485, + "eval_samples_per_second": 48.953, + "eval_steps_per_second": 6.212, + "eval_wer": 0.3666129232295672, + "step": 800 + }, + { + "epoch": 20.02, + "learning_rate": 0.00012059999999999999, + "loss": 0.0611, + "step": 801 + }, + { + "epoch": 20.05, + "learning_rate": 0.0001203, + "loss": 0.0542, + "step": 802 + }, + { + "epoch": 20.07, + "learning_rate": 0.00011999999999999999, + "loss": 0.0351, + "step": 803 + }, + { + "epoch": 20.1, + "learning_rate": 0.0001197, + "loss": 0.038, + "step": 804 + }, + { + "epoch": 20.12, + "learning_rate": 0.0001194, + "loss": 0.0368, + "step": 805 + }, + { + "epoch": 20.15, + "learning_rate": 0.0001191, + "loss": 0.0383, + "step": 806 + }, + { + "epoch": 20.18, + "learning_rate": 0.0001188, + "loss": 0.0456, + "step": 807 + }, + { + "epoch": 20.2, + "learning_rate": 0.0001185, + "loss": 0.0572, + "step": 808 + }, + { + "epoch": 20.23, + "learning_rate": 0.0001182, + "loss": 0.0488, + "step": 809 + }, + { + "epoch": 20.25, + "learning_rate": 0.00011789999999999999, + "loss": 0.0364, + "step": 810 + }, + { + "epoch": 20.27, + "learning_rate": 0.0001176, + "loss": 0.0516, + "step": 811 + }, + { + "epoch": 20.3, + "learning_rate": 0.00011729999999999999, + "loss": 0.0321, + "step": 812 + }, + { + "epoch": 20.32, + "learning_rate": 0.000117, + "loss": 0.0685, + "step": 813 + }, + { + "epoch": 20.35, + "learning_rate": 0.00011669999999999999, + "loss": 0.0497, + "step": 814 + }, + { + "epoch": 20.38, + "learning_rate": 0.0001164, + "loss": 0.0524, + "step": 815 + }, + { + "epoch": 20.4, + "learning_rate": 0.00011609999999999999, + "loss": 0.051, + "step": 816 + }, + { + "epoch": 20.43, + "learning_rate": 0.0001158, + "loss": 0.0449, + "step": 817 + }, + { + "epoch": 20.45, + "learning_rate": 0.00011549999999999999, + "loss": 0.0357, + "step": 818 + }, + { + "epoch": 20.48, + "learning_rate": 0.0001152, + "loss": 0.0388, + "step": 819 + }, + { + "epoch": 20.5, + "learning_rate": 0.00011489999999999999, + "loss": 0.0591, + "step": 820 + }, + { + "epoch": 20.52, + "learning_rate": 0.0001146, + "loss": 0.0579, + "step": 821 + }, + { + "epoch": 20.55, + "learning_rate": 0.00011429999999999999, + "loss": 0.0501, + "step": 822 + }, + { + "epoch": 20.57, + "learning_rate": 0.00011399999999999999, + "loss": 0.0425, + "step": 823 + }, + { + "epoch": 20.6, + "learning_rate": 0.00011369999999999999, + "loss": 0.0877, + "step": 824 + }, + { + "epoch": 20.62, + "learning_rate": 0.00011339999999999999, + "loss": 0.0333, + "step": 825 + }, + { + "epoch": 20.65, + "learning_rate": 0.00011309999999999998, + "loss": 0.0639, + "step": 826 + }, + { + "epoch": 20.68, + "learning_rate": 0.00011279999999999999, + "loss": 0.0441, + "step": 827 + }, + { + "epoch": 20.7, + "learning_rate": 0.0001125, + "loss": 0.0525, + "step": 828 + }, + { + "epoch": 20.73, + "learning_rate": 0.00011219999999999999, + "loss": 0.027, + "step": 829 + }, + { + "epoch": 20.75, + "learning_rate": 0.0001119, + "loss": 0.0337, + "step": 830 + }, + { + "epoch": 20.77, + "learning_rate": 0.00011159999999999999, + "loss": 0.0463, + "step": 831 + }, + { + "epoch": 20.8, + "learning_rate": 0.0001113, + "loss": 0.0633, + "step": 832 + }, + { + "epoch": 20.82, + "learning_rate": 0.00011099999999999999, + "loss": 0.0476, + "step": 833 + }, + { + "epoch": 20.85, + "learning_rate": 0.0001107, + "loss": 0.0453, + "step": 834 + }, + { + "epoch": 20.88, + "learning_rate": 0.00011039999999999999, + "loss": 0.0341, + "step": 835 + }, + { + "epoch": 20.9, + "learning_rate": 0.00011009999999999999, + "loss": 0.0324, + "step": 836 + }, + { + "epoch": 20.93, + "learning_rate": 0.00010979999999999999, + "loss": 0.0343, + "step": 837 + }, + { + "epoch": 20.95, + "learning_rate": 0.00010949999999999999, + "loss": 0.0429, + "step": 838 + }, + { + "epoch": 20.98, + "learning_rate": 0.00010919999999999998, + "loss": 0.0424, + "step": 839 + }, + { + "epoch": 21.0, + "learning_rate": 0.00010889999999999999, + "loss": 0.0349, + "step": 840 + }, + { + "epoch": 21.02, + "learning_rate": 0.00010859999999999998, + "loss": 0.0502, + "step": 841 + }, + { + "epoch": 21.05, + "learning_rate": 0.00010829999999999999, + "loss": 0.0455, + "step": 842 + }, + { + "epoch": 21.07, + "learning_rate": 0.00010799999999999998, + "loss": 0.0431, + "step": 843 + }, + { + "epoch": 21.1, + "learning_rate": 0.00010769999999999999, + "loss": 0.0334, + "step": 844 + }, + { + "epoch": 21.12, + "learning_rate": 0.00010739999999999998, + "loss": 0.0349, + "step": 845 + }, + { + "epoch": 21.15, + "learning_rate": 0.00010709999999999999, + "loss": 0.0338, + "step": 846 + }, + { + "epoch": 21.18, + "learning_rate": 0.00010679999999999998, + "loss": 0.0405, + "step": 847 + }, + { + "epoch": 21.2, + "learning_rate": 0.00010649999999999999, + "loss": 0.0435, + "step": 848 + }, + { + "epoch": 21.23, + "learning_rate": 0.00010619999999999998, + "loss": 0.0363, + "step": 849 + }, + { + "epoch": 21.25, + "learning_rate": 0.00010589999999999999, + "loss": 0.044, + "step": 850 + }, + { + "epoch": 21.27, + "learning_rate": 0.00010559999999999998, + "loss": 0.0276, + "step": 851 + }, + { + "epoch": 21.3, + "learning_rate": 0.00010529999999999998, + "loss": 0.0397, + "step": 852 + }, + { + "epoch": 21.32, + "learning_rate": 0.00010499999999999999, + "loss": 0.0479, + "step": 853 + }, + { + "epoch": 21.35, + "learning_rate": 0.00010469999999999998, + "loss": 0.052, + "step": 854 + }, + { + "epoch": 21.38, + "learning_rate": 0.00010439999999999999, + "loss": 0.0471, + "step": 855 + }, + { + "epoch": 21.4, + "learning_rate": 0.00010409999999999998, + "loss": 0.042, + "step": 856 + }, + { + "epoch": 21.43, + "learning_rate": 0.00010379999999999999, + "loss": 0.0446, + "step": 857 + }, + { + "epoch": 21.45, + "learning_rate": 0.00010349999999999998, + "loss": 0.0357, + "step": 858 + }, + { + "epoch": 21.48, + "learning_rate": 0.00010319999999999999, + "loss": 0.0435, + "step": 859 + }, + { + "epoch": 21.5, + "learning_rate": 0.0001029, + "loss": 0.0347, + "step": 860 + }, + { + "epoch": 21.52, + "learning_rate": 0.0001026, + "loss": 0.0483, + "step": 861 + }, + { + "epoch": 21.55, + "learning_rate": 0.00010229999999999999, + "loss": 0.04, + "step": 862 + }, + { + "epoch": 21.57, + "learning_rate": 0.000102, + "loss": 0.0388, + "step": 863 + }, + { + "epoch": 21.6, + "learning_rate": 0.00010169999999999999, + "loss": 0.0761, + "step": 864 + }, + { + "epoch": 21.62, + "learning_rate": 0.0001014, + "loss": 0.0354, + "step": 865 + }, + { + "epoch": 21.65, + "learning_rate": 0.0001011, + "loss": 0.0637, + "step": 866 + }, + { + "epoch": 21.68, + "learning_rate": 0.0001008, + "loss": 0.0539, + "step": 867 + }, + { + "epoch": 21.7, + "learning_rate": 0.0001005, + "loss": 0.0405, + "step": 868 + }, + { + "epoch": 21.73, + "learning_rate": 0.0001002, + "loss": 0.0382, + "step": 869 + }, + { + "epoch": 21.75, + "learning_rate": 9.99e-05, + "loss": 0.0454, + "step": 870 + }, + { + "epoch": 21.77, + "learning_rate": 9.96e-05, + "loss": 0.0416, + "step": 871 + }, + { + "epoch": 21.8, + "learning_rate": 9.93e-05, + "loss": 0.0465, + "step": 872 + }, + { + "epoch": 21.82, + "learning_rate": 9.9e-05, + "loss": 0.0528, + "step": 873 + }, + { + "epoch": 21.85, + "learning_rate": 9.87e-05, + "loss": 0.0373, + "step": 874 + }, + { + "epoch": 21.88, + "learning_rate": 9.839999999999999e-05, + "loss": 0.0313, + "step": 875 + }, + { + "epoch": 21.9, + "learning_rate": 9.81e-05, + "loss": 0.0317, + "step": 876 + }, + { + "epoch": 21.93, + "learning_rate": 9.779999999999999e-05, + "loss": 0.0425, + "step": 877 + }, + { + "epoch": 21.95, + "learning_rate": 9.75e-05, + "loss": 0.0425, + "step": 878 + }, + { + "epoch": 21.98, + "learning_rate": 9.719999999999999e-05, + "loss": 0.039, + "step": 879 + }, + { + "epoch": 22.0, + "learning_rate": 9.69e-05, + "loss": 0.0412, + "step": 880 + }, + { + "epoch": 22.02, + "learning_rate": 9.659999999999999e-05, + "loss": 0.0442, + "step": 881 + }, + { + "epoch": 22.05, + "learning_rate": 9.63e-05, + "loss": 0.0302, + "step": 882 + }, + { + "epoch": 22.07, + "learning_rate": 9.599999999999999e-05, + "loss": 0.0439, + "step": 883 + }, + { + "epoch": 22.1, + "learning_rate": 9.57e-05, + "loss": 0.0418, + "step": 884 + }, + { + "epoch": 22.12, + "learning_rate": 9.539999999999999e-05, + "loss": 0.0347, + "step": 885 + }, + { + "epoch": 22.15, + "learning_rate": 9.51e-05, + "loss": 0.0296, + "step": 886 + }, + { + "epoch": 22.18, + "learning_rate": 9.479999999999999e-05, + "loss": 0.0289, + "step": 887 + }, + { + "epoch": 22.2, + "learning_rate": 9.449999999999999e-05, + "loss": 0.0416, + "step": 888 + }, + { + "epoch": 22.23, + "learning_rate": 9.419999999999999e-05, + "loss": 0.042, + "step": 889 + }, + { + "epoch": 22.25, + "learning_rate": 9.389999999999999e-05, + "loss": 0.0307, + "step": 890 + }, + { + "epoch": 22.27, + "learning_rate": 9.36e-05, + "loss": 0.0284, + "step": 891 + }, + { + "epoch": 22.3, + "learning_rate": 9.329999999999999e-05, + "loss": 0.0357, + "step": 892 + }, + { + "epoch": 22.32, + "learning_rate": 9.3e-05, + "loss": 0.0571, + "step": 893 + }, + { + "epoch": 22.35, + "learning_rate": 9.269999999999999e-05, + "loss": 0.0481, + "step": 894 + }, + { + "epoch": 22.38, + "learning_rate": 9.24e-05, + "loss": 0.0464, + "step": 895 + }, + { + "epoch": 22.4, + "learning_rate": 9.209999999999999e-05, + "loss": 0.048, + "step": 896 + }, + { + "epoch": 22.43, + "learning_rate": 9.18e-05, + "loss": 0.0243, + "step": 897 + }, + { + "epoch": 22.45, + "learning_rate": 9.149999999999999e-05, + "loss": 0.0352, + "step": 898 + }, + { + "epoch": 22.48, + "learning_rate": 9.12e-05, + "loss": 0.0281, + "step": 899 + }, + { + "epoch": 22.5, + "learning_rate": 9.089999999999999e-05, + "loss": 0.0448, + "step": 900 + }, + { + "epoch": 22.52, + "learning_rate": 9.059999999999999e-05, + "loss": 0.0517, + "step": 901 + }, + { + "epoch": 22.55, + "learning_rate": 9.029999999999999e-05, + "loss": 0.0309, + "step": 902 + }, + { + "epoch": 22.57, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0331, + "step": 903 + }, + { + "epoch": 22.6, + "learning_rate": 8.969999999999998e-05, + "loss": 0.0236, + "step": 904 + }, + { + "epoch": 22.62, + "learning_rate": 8.939999999999999e-05, + "loss": 0.0319, + "step": 905 + }, + { + "epoch": 22.65, + "learning_rate": 8.909999999999998e-05, + "loss": 0.0605, + "step": 906 + }, + { + "epoch": 22.68, + "learning_rate": 8.879999999999999e-05, + "loss": 0.0381, + "step": 907 + }, + { + "epoch": 22.7, + "learning_rate": 8.849999999999998e-05, + "loss": 0.029, + "step": 908 + }, + { + "epoch": 22.73, + "learning_rate": 8.819999999999999e-05, + "loss": 0.0186, + "step": 909 + }, + { + "epoch": 22.75, + "learning_rate": 8.789999999999998e-05, + "loss": 0.0836, + "step": 910 + }, + { + "epoch": 22.77, + "learning_rate": 8.759999999999999e-05, + "loss": 0.0171, + "step": 911 + }, + { + "epoch": 22.8, + "learning_rate": 8.729999999999998e-05, + "loss": 0.0451, + "step": 912 + }, + { + "epoch": 22.82, + "learning_rate": 8.699999999999999e-05, + "loss": 0.0371, + "step": 913 + }, + { + "epoch": 22.85, + "learning_rate": 8.669999999999998e-05, + "loss": 0.0378, + "step": 914 + }, + { + "epoch": 22.88, + "learning_rate": 8.639999999999999e-05, + "loss": 0.0365, + "step": 915 + }, + { + "epoch": 22.9, + "learning_rate": 8.609999999999999e-05, + "loss": 0.0401, + "step": 916 + }, + { + "epoch": 22.93, + "learning_rate": 8.579999999999998e-05, + "loss": 0.0341, + "step": 917 + }, + { + "epoch": 22.95, + "learning_rate": 8.549999999999999e-05, + "loss": 0.0496, + "step": 918 + }, + { + "epoch": 22.98, + "learning_rate": 8.519999999999998e-05, + "loss": 0.0266, + "step": 919 + }, + { + "epoch": 23.0, + "learning_rate": 8.489999999999999e-05, + "loss": 0.0281, + "step": 920 + }, + { + "epoch": 23.02, + "learning_rate": 8.459999999999998e-05, + "loss": 0.0471, + "step": 921 + }, + { + "epoch": 23.05, + "learning_rate": 8.43e-05, + "loss": 0.0524, + "step": 922 + }, + { + "epoch": 23.07, + "learning_rate": 8.4e-05, + "loss": 0.0387, + "step": 923 + }, + { + "epoch": 23.1, + "learning_rate": 8.37e-05, + "loss": 0.025, + "step": 924 + }, + { + "epoch": 23.12, + "learning_rate": 8.34e-05, + "loss": 0.0727, + "step": 925 + }, + { + "epoch": 23.15, + "learning_rate": 8.31e-05, + "loss": 0.0305, + "step": 926 + }, + { + "epoch": 23.18, + "learning_rate": 8.28e-05, + "loss": 0.0447, + "step": 927 + }, + { + "epoch": 23.2, + "learning_rate": 8.25e-05, + "loss": 0.0288, + "step": 928 + }, + { + "epoch": 23.23, + "learning_rate": 8.22e-05, + "loss": 0.0482, + "step": 929 + }, + { + "epoch": 23.25, + "learning_rate": 8.19e-05, + "loss": 0.0275, + "step": 930 + }, + { + "epoch": 23.27, + "learning_rate": 8.16e-05, + "loss": 0.0318, + "step": 931 + }, + { + "epoch": 23.3, + "learning_rate": 8.13e-05, + "loss": 0.0379, + "step": 932 + }, + { + "epoch": 23.32, + "learning_rate": 8.1e-05, + "loss": 0.0359, + "step": 933 + }, + { + "epoch": 23.35, + "learning_rate": 8.07e-05, + "loss": 0.0482, + "step": 934 + }, + { + "epoch": 23.38, + "learning_rate": 8.04e-05, + "loss": 0.0353, + "step": 935 + }, + { + "epoch": 23.4, + "learning_rate": 8.01e-05, + "loss": 0.0247, + "step": 936 + }, + { + "epoch": 23.43, + "learning_rate": 7.98e-05, + "loss": 0.0307, + "step": 937 + }, + { + "epoch": 23.45, + "learning_rate": 7.95e-05, + "loss": 0.0249, + "step": 938 + }, + { + "epoch": 23.48, + "learning_rate": 7.92e-05, + "loss": 0.04, + "step": 939 + }, + { + "epoch": 23.5, + "learning_rate": 7.89e-05, + "loss": 0.0457, + "step": 940 + }, + { + "epoch": 23.52, + "learning_rate": 7.86e-05, + "loss": 0.0376, + "step": 941 + }, + { + "epoch": 23.55, + "learning_rate": 7.829999999999999e-05, + "loss": 0.0432, + "step": 942 + }, + { + "epoch": 23.57, + "learning_rate": 7.8e-05, + "loss": 0.0257, + "step": 943 + }, + { + "epoch": 23.6, + "learning_rate": 7.769999999999999e-05, + "loss": 0.033, + "step": 944 + }, + { + "epoch": 23.62, + "learning_rate": 7.74e-05, + "loss": 0.0274, + "step": 945 + }, + { + "epoch": 23.65, + "learning_rate": 7.709999999999999e-05, + "loss": 0.0402, + "step": 946 + }, + { + "epoch": 23.68, + "learning_rate": 7.68e-05, + "loss": 0.0332, + "step": 947 + }, + { + "epoch": 23.7, + "learning_rate": 7.649999999999999e-05, + "loss": 0.0505, + "step": 948 + }, + { + "epoch": 23.73, + "learning_rate": 7.62e-05, + "loss": 0.0271, + "step": 949 + }, + { + "epoch": 23.75, + "learning_rate": 7.589999999999999e-05, + "loss": 0.0333, + "step": 950 + }, + { + "epoch": 23.77, + "learning_rate": 7.56e-05, + "loss": 0.0571, + "step": 951 + }, + { + "epoch": 23.8, + "learning_rate": 7.529999999999999e-05, + "loss": 0.0302, + "step": 952 + }, + { + "epoch": 23.82, + "learning_rate": 7.5e-05, + "loss": 0.0364, + "step": 953 + }, + { + "epoch": 23.85, + "learning_rate": 7.47e-05, + "loss": 0.032, + "step": 954 + }, + { + "epoch": 23.88, + "learning_rate": 7.439999999999999e-05, + "loss": 0.0374, + "step": 955 + }, + { + "epoch": 23.9, + "learning_rate": 7.41e-05, + "loss": 0.0474, + "step": 956 + }, + { + "epoch": 23.93, + "learning_rate": 7.379999999999999e-05, + "loss": 0.023, + "step": 957 + }, + { + "epoch": 23.95, + "learning_rate": 7.35e-05, + "loss": 0.0455, + "step": 958 + }, + { + "epoch": 23.98, + "learning_rate": 7.319999999999999e-05, + "loss": 0.0369, + "step": 959 + }, + { + "epoch": 24.0, + "learning_rate": 7.29e-05, + "loss": 0.0272, + "step": 960 + }, + { + "epoch": 24.02, + "learning_rate": 7.259999999999999e-05, + "loss": 0.0546, + "step": 961 + }, + { + "epoch": 24.05, + "learning_rate": 7.23e-05, + "loss": 0.0427, + "step": 962 + }, + { + "epoch": 24.07, + "learning_rate": 7.199999999999999e-05, + "loss": 0.0256, + "step": 963 + }, + { + "epoch": 24.1, + "learning_rate": 7.17e-05, + "loss": 0.0342, + "step": 964 + }, + { + "epoch": 24.12, + "learning_rate": 7.139999999999999e-05, + "loss": 0.073, + "step": 965 + }, + { + "epoch": 24.15, + "learning_rate": 7.11e-05, + "loss": 0.0234, + "step": 966 + }, + { + "epoch": 24.18, + "learning_rate": 7.079999999999999e-05, + "loss": 0.022, + "step": 967 + }, + { + "epoch": 24.2, + "learning_rate": 7.049999999999999e-05, + "loss": 0.0254, + "step": 968 + }, + { + "epoch": 24.23, + "learning_rate": 7.02e-05, + "loss": 0.0418, + "step": 969 + }, + { + "epoch": 24.25, + "learning_rate": 6.989999999999999e-05, + "loss": 0.0323, + "step": 970 + }, + { + "epoch": 24.27, + "learning_rate": 6.96e-05, + "loss": 0.0475, + "step": 971 + }, + { + "epoch": 24.3, + "learning_rate": 6.93e-05, + "loss": 0.0288, + "step": 972 + }, + { + "epoch": 24.32, + "learning_rate": 6.9e-05, + "loss": 0.0438, + "step": 973 + }, + { + "epoch": 24.35, + "learning_rate": 6.87e-05, + "loss": 0.0412, + "step": 974 + }, + { + "epoch": 24.38, + "learning_rate": 6.84e-05, + "loss": 0.024, + "step": 975 + }, + { + "epoch": 24.4, + "learning_rate": 6.81e-05, + "loss": 0.0327, + "step": 976 + }, + { + "epoch": 24.43, + "learning_rate": 6.78e-05, + "loss": 0.0334, + "step": 977 + }, + { + "epoch": 24.45, + "learning_rate": 6.75e-05, + "loss": 0.0245, + "step": 978 + }, + { + "epoch": 24.48, + "learning_rate": 6.72e-05, + "loss": 0.0361, + "step": 979 + }, + { + "epoch": 24.5, + "learning_rate": 6.69e-05, + "loss": 0.0471, + "step": 980 + }, + { + "epoch": 24.52, + "learning_rate": 6.659999999999999e-05, + "loss": 0.0484, + "step": 981 + }, + { + "epoch": 24.55, + "learning_rate": 6.63e-05, + "loss": 0.0324, + "step": 982 + }, + { + "epoch": 24.57, + "learning_rate": 6.599999999999999e-05, + "loss": 0.0244, + "step": 983 + }, + { + "epoch": 24.6, + "learning_rate": 6.57e-05, + "loss": 0.0205, + "step": 984 + }, + { + "epoch": 24.62, + "learning_rate": 6.539999999999999e-05, + "loss": 0.0252, + "step": 985 + }, + { + "epoch": 24.65, + "learning_rate": 6.51e-05, + "loss": 0.0381, + "step": 986 + }, + { + "epoch": 24.68, + "learning_rate": 6.479999999999999e-05, + "loss": 0.0411, + "step": 987 + }, + { + "epoch": 24.7, + "learning_rate": 6.45e-05, + "loss": 0.0284, + "step": 988 + }, + { + "epoch": 24.73, + "learning_rate": 6.419999999999999e-05, + "loss": 0.0319, + "step": 989 + }, + { + "epoch": 24.75, + "learning_rate": 6.39e-05, + "loss": 0.0287, + "step": 990 + }, + { + "epoch": 24.77, + "learning_rate": 6.359999999999999e-05, + "loss": 0.0266, + "step": 991 + }, + { + "epoch": 24.8, + "learning_rate": 6.33e-05, + "loss": 0.0569, + "step": 992 + }, + { + "epoch": 24.82, + "learning_rate": 6.299999999999999e-05, + "loss": 0.0373, + "step": 993 + }, + { + "epoch": 24.85, + "learning_rate": 6.269999999999999e-05, + "loss": 0.0425, + "step": 994 + }, + { + "epoch": 24.88, + "learning_rate": 6.239999999999999e-05, + "loss": 0.0283, + "step": 995 + }, + { + "epoch": 24.9, + "learning_rate": 6.209999999999999e-05, + "loss": 0.0271, + "step": 996 + }, + { + "epoch": 24.93, + "learning_rate": 6.18e-05, + "loss": 0.0285, + "step": 997 + }, + { + "epoch": 24.95, + "learning_rate": 6.149999999999999e-05, + "loss": 0.0252, + "step": 998 + }, + { + "epoch": 24.98, + "learning_rate": 6.12e-05, + "loss": 0.03, + "step": 999 + }, + { + "epoch": 25.0, + "learning_rate": 6.0899999999999996e-05, + "loss": 0.0244, + "step": 1000 + }, + { + "epoch": 25.0, + "eval_cer": 0.14988608573877057, + "eval_loss": 0.5907031893730164, + "eval_runtime": 8.0782, + "eval_samples_per_second": 48.773, + "eval_steps_per_second": 6.189, + "eval_wer": 0.3545826615403696, + "step": 1000 + }, + { + "epoch": 25.02, + "learning_rate": 6.0599999999999996e-05, + "loss": 0.0456, + "step": 1001 + }, + { + "epoch": 25.05, + "learning_rate": 6.0299999999999995e-05, + "loss": 0.0379, + "step": 1002 + }, + { + "epoch": 25.07, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.027, + "step": 1003 + }, + { + "epoch": 25.1, + "learning_rate": 5.97e-05, + "loss": 0.0222, + "step": 1004 + }, + { + "epoch": 25.12, + "learning_rate": 5.94e-05, + "loss": 0.0291, + "step": 1005 + }, + { + "epoch": 25.15, + "learning_rate": 5.91e-05, + "loss": 0.0478, + "step": 1006 + }, + { + "epoch": 25.18, + "learning_rate": 5.88e-05, + "loss": 0.0294, + "step": 1007 + }, + { + "epoch": 25.2, + "learning_rate": 5.85e-05, + "loss": 0.0354, + "step": 1008 + }, + { + "epoch": 25.23, + "learning_rate": 5.82e-05, + "loss": 0.032, + "step": 1009 + }, + { + "epoch": 25.25, + "learning_rate": 5.79e-05, + "loss": 0.026, + "step": 1010 + }, + { + "epoch": 25.27, + "learning_rate": 5.76e-05, + "loss": 0.0228, + "step": 1011 + }, + { + "epoch": 25.3, + "learning_rate": 5.73e-05, + "loss": 0.02, + "step": 1012 + }, + { + "epoch": 25.32, + "learning_rate": 5.6999999999999996e-05, + "loss": 0.0354, + "step": 1013 + }, + { + "epoch": 25.35, + "learning_rate": 5.6699999999999996e-05, + "loss": 0.0352, + "step": 1014 + }, + { + "epoch": 25.38, + "learning_rate": 5.6399999999999995e-05, + "loss": 0.03, + "step": 1015 + }, + { + "epoch": 25.4, + "learning_rate": 5.6099999999999995e-05, + "loss": 0.0297, + "step": 1016 + }, + { + "epoch": 25.43, + "learning_rate": 5.5799999999999994e-05, + "loss": 0.0663, + "step": 1017 + }, + { + "epoch": 25.45, + "learning_rate": 5.5499999999999994e-05, + "loss": 0.0171, + "step": 1018 + }, + { + "epoch": 25.48, + "learning_rate": 5.519999999999999e-05, + "loss": 0.0242, + "step": 1019 + }, + { + "epoch": 25.5, + "learning_rate": 5.489999999999999e-05, + "loss": 0.0336, + "step": 1020 + }, + { + "epoch": 25.52, + "learning_rate": 5.459999999999999e-05, + "loss": 0.0447, + "step": 1021 + }, + { + "epoch": 25.55, + "learning_rate": 5.429999999999999e-05, + "loss": 0.0291, + "step": 1022 + }, + { + "epoch": 25.57, + "learning_rate": 5.399999999999999e-05, + "loss": 0.0274, + "step": 1023 + }, + { + "epoch": 25.6, + "learning_rate": 5.369999999999999e-05, + "loss": 0.0158, + "step": 1024 + }, + { + "epoch": 25.62, + "learning_rate": 5.339999999999999e-05, + "loss": 0.0287, + "step": 1025 + }, + { + "epoch": 25.65, + "learning_rate": 5.309999999999999e-05, + "loss": 0.0417, + "step": 1026 + }, + { + "epoch": 25.68, + "learning_rate": 5.279999999999999e-05, + "loss": 0.0328, + "step": 1027 + }, + { + "epoch": 25.7, + "learning_rate": 5.2499999999999995e-05, + "loss": 0.0432, + "step": 1028 + }, + { + "epoch": 25.73, + "learning_rate": 5.2199999999999995e-05, + "loss": 0.0387, + "step": 1029 + }, + { + "epoch": 25.75, + "learning_rate": 5.1899999999999994e-05, + "loss": 0.0274, + "step": 1030 + }, + { + "epoch": 25.77, + "learning_rate": 5.1599999999999994e-05, + "loss": 0.0265, + "step": 1031 + }, + { + "epoch": 25.8, + "learning_rate": 5.13e-05, + "loss": 0.0396, + "step": 1032 + }, + { + "epoch": 25.82, + "learning_rate": 5.1e-05, + "loss": 0.0324, + "step": 1033 + }, + { + "epoch": 25.85, + "learning_rate": 5.07e-05, + "loss": 0.0518, + "step": 1034 + }, + { + "epoch": 25.88, + "learning_rate": 5.04e-05, + "loss": 0.0379, + "step": 1035 + }, + { + "epoch": 25.9, + "learning_rate": 5.01e-05, + "loss": 0.0213, + "step": 1036 + }, + { + "epoch": 25.93, + "learning_rate": 4.98e-05, + "loss": 0.0329, + "step": 1037 + }, + { + "epoch": 25.95, + "learning_rate": 4.95e-05, + "loss": 0.0301, + "step": 1038 + }, + { + "epoch": 25.98, + "learning_rate": 4.9199999999999997e-05, + "loss": 0.0327, + "step": 1039 + }, + { + "epoch": 26.0, + "learning_rate": 4.8899999999999996e-05, + "loss": 0.0362, + "step": 1040 + }, + { + "epoch": 26.02, + "learning_rate": 4.8599999999999995e-05, + "loss": 0.0377, + "step": 1041 + }, + { + "epoch": 26.05, + "learning_rate": 4.8299999999999995e-05, + "loss": 0.0348, + "step": 1042 + }, + { + "epoch": 26.07, + "learning_rate": 4.7999999999999994e-05, + "loss": 0.024, + "step": 1043 + }, + { + "epoch": 26.1, + "learning_rate": 4.7699999999999994e-05, + "loss": 0.029, + "step": 1044 + }, + { + "epoch": 26.12, + "learning_rate": 4.7399999999999993e-05, + "loss": 0.0321, + "step": 1045 + }, + { + "epoch": 26.15, + "learning_rate": 4.709999999999999e-05, + "loss": 0.0317, + "step": 1046 + }, + { + "epoch": 26.18, + "learning_rate": 4.68e-05, + "loss": 0.034, + "step": 1047 + }, + { + "epoch": 26.2, + "learning_rate": 4.65e-05, + "loss": 0.0317, + "step": 1048 + }, + { + "epoch": 26.23, + "learning_rate": 4.62e-05, + "loss": 0.0316, + "step": 1049 + }, + { + "epoch": 26.25, + "learning_rate": 4.59e-05, + "loss": 0.0328, + "step": 1050 + }, + { + "epoch": 26.27, + "learning_rate": 4.56e-05, + "loss": 0.022, + "step": 1051 + }, + { + "epoch": 26.3, + "learning_rate": 4.5299999999999997e-05, + "loss": 0.029, + "step": 1052 + }, + { + "epoch": 26.32, + "learning_rate": 4.4999999999999996e-05, + "loss": 0.0247, + "step": 1053 + }, + { + "epoch": 26.35, + "learning_rate": 4.4699999999999996e-05, + "loss": 0.034, + "step": 1054 + }, + { + "epoch": 26.38, + "learning_rate": 4.4399999999999995e-05, + "loss": 0.0331, + "step": 1055 + }, + { + "epoch": 26.4, + "learning_rate": 4.4099999999999995e-05, + "loss": 0.0244, + "step": 1056 + }, + { + "epoch": 26.43, + "learning_rate": 4.3799999999999994e-05, + "loss": 0.0177, + "step": 1057 + }, + { + "epoch": 26.45, + "learning_rate": 4.3499999999999993e-05, + "loss": 0.0274, + "step": 1058 + }, + { + "epoch": 26.48, + "learning_rate": 4.319999999999999e-05, + "loss": 0.0283, + "step": 1059 + }, + { + "epoch": 26.5, + "learning_rate": 4.289999999999999e-05, + "loss": 0.0251, + "step": 1060 + }, + { + "epoch": 26.52, + "learning_rate": 4.259999999999999e-05, + "loss": 0.04, + "step": 1061 + }, + { + "epoch": 26.55, + "learning_rate": 4.229999999999999e-05, + "loss": 0.0316, + "step": 1062 + }, + { + "epoch": 26.57, + "learning_rate": 4.2e-05, + "loss": 0.0664, + "step": 1063 + }, + { + "epoch": 26.6, + "learning_rate": 4.17e-05, + "loss": 0.0174, + "step": 1064 + }, + { + "epoch": 26.62, + "learning_rate": 4.14e-05, + "loss": 0.0242, + "step": 1065 + }, + { + "epoch": 26.65, + "learning_rate": 4.11e-05, + "loss": 0.0354, + "step": 1066 + }, + { + "epoch": 26.68, + "learning_rate": 4.08e-05, + "loss": 0.0393, + "step": 1067 + }, + { + "epoch": 26.7, + "learning_rate": 4.05e-05, + "loss": 0.034, + "step": 1068 + }, + { + "epoch": 26.73, + "learning_rate": 4.02e-05, + "loss": 0.0311, + "step": 1069 + }, + { + "epoch": 26.75, + "learning_rate": 3.99e-05, + "loss": 0.0143, + "step": 1070 + }, + { + "epoch": 26.77, + "learning_rate": 3.96e-05, + "loss": 0.0208, + "step": 1071 + }, + { + "epoch": 26.8, + "learning_rate": 3.93e-05, + "loss": 0.0365, + "step": 1072 + }, + { + "epoch": 26.82, + "learning_rate": 3.9e-05, + "loss": 0.0241, + "step": 1073 + }, + { + "epoch": 26.85, + "learning_rate": 3.87e-05, + "loss": 0.0295, + "step": 1074 + }, + { + "epoch": 26.88, + "learning_rate": 3.84e-05, + "loss": 0.0221, + "step": 1075 + }, + { + "epoch": 26.9, + "learning_rate": 3.81e-05, + "loss": 0.0187, + "step": 1076 + }, + { + "epoch": 26.93, + "learning_rate": 3.78e-05, + "loss": 0.0197, + "step": 1077 + }, + { + "epoch": 26.95, + "learning_rate": 3.75e-05, + "loss": 0.0349, + "step": 1078 + }, + { + "epoch": 26.98, + "learning_rate": 3.7199999999999996e-05, + "loss": 0.0292, + "step": 1079 + }, + { + "epoch": 27.0, + "learning_rate": 3.6899999999999996e-05, + "loss": 0.0175, + "step": 1080 + }, + { + "epoch": 27.02, + "learning_rate": 3.6599999999999995e-05, + "loss": 0.029, + "step": 1081 + }, + { + "epoch": 27.05, + "learning_rate": 3.6299999999999995e-05, + "loss": 0.027, + "step": 1082 + }, + { + "epoch": 27.07, + "learning_rate": 3.5999999999999994e-05, + "loss": 0.0308, + "step": 1083 + }, + { + "epoch": 27.1, + "learning_rate": 3.5699999999999994e-05, + "loss": 0.0293, + "step": 1084 + }, + { + "epoch": 27.12, + "learning_rate": 3.539999999999999e-05, + "loss": 0.0198, + "step": 1085 + }, + { + "epoch": 27.15, + "learning_rate": 3.51e-05, + "loss": 0.0229, + "step": 1086 + }, + { + "epoch": 27.18, + "learning_rate": 3.48e-05, + "loss": 0.0391, + "step": 1087 + }, + { + "epoch": 27.2, + "learning_rate": 3.45e-05, + "loss": 0.0461, + "step": 1088 + }, + { + "epoch": 27.23, + "learning_rate": 3.42e-05, + "loss": 0.0265, + "step": 1089 + }, + { + "epoch": 27.25, + "learning_rate": 3.39e-05, + "loss": 0.0203, + "step": 1090 + }, + { + "epoch": 27.27, + "learning_rate": 3.36e-05, + "loss": 0.0225, + "step": 1091 + }, + { + "epoch": 27.3, + "learning_rate": 3.3299999999999996e-05, + "loss": 0.016, + "step": 1092 + }, + { + "epoch": 27.32, + "learning_rate": 3.2999999999999996e-05, + "loss": 0.0387, + "step": 1093 + }, + { + "epoch": 27.35, + "learning_rate": 3.2699999999999995e-05, + "loss": 0.0278, + "step": 1094 + }, + { + "epoch": 27.38, + "learning_rate": 3.2399999999999995e-05, + "loss": 0.0277, + "step": 1095 + }, + { + "epoch": 27.4, + "learning_rate": 3.2099999999999994e-05, + "loss": 0.0286, + "step": 1096 + }, + { + "epoch": 27.43, + "learning_rate": 3.1799999999999994e-05, + "loss": 0.023, + "step": 1097 + }, + { + "epoch": 27.45, + "learning_rate": 3.149999999999999e-05, + "loss": 0.0175, + "step": 1098 + }, + { + "epoch": 27.48, + "learning_rate": 3.119999999999999e-05, + "loss": 0.0193, + "step": 1099 + }, + { + "epoch": 27.5, + "learning_rate": 3.09e-05, + "loss": 0.0243, + "step": 1100 + }, + { + "epoch": 27.52, + "learning_rate": 3.06e-05, + "loss": 0.0251, + "step": 1101 + }, + { + "epoch": 27.55, + "learning_rate": 3.0299999999999998e-05, + "loss": 0.0207, + "step": 1102 + }, + { + "epoch": 27.57, + "learning_rate": 2.9999999999999997e-05, + "loss": 0.0227, + "step": 1103 + }, + { + "epoch": 27.6, + "learning_rate": 2.97e-05, + "loss": 0.0694, + "step": 1104 + }, + { + "epoch": 27.62, + "learning_rate": 2.94e-05, + "loss": 0.0346, + "step": 1105 + }, + { + "epoch": 27.65, + "learning_rate": 2.91e-05, + "loss": 0.0373, + "step": 1106 + }, + { + "epoch": 27.68, + "learning_rate": 2.88e-05, + "loss": 0.0259, + "step": 1107 + }, + { + "epoch": 27.7, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0307, + "step": 1108 + }, + { + "epoch": 27.73, + "learning_rate": 2.8199999999999998e-05, + "loss": 0.0229, + "step": 1109 + }, + { + "epoch": 27.75, + "learning_rate": 2.7899999999999997e-05, + "loss": 0.0202, + "step": 1110 + }, + { + "epoch": 27.77, + "learning_rate": 2.7599999999999997e-05, + "loss": 0.0231, + "step": 1111 + }, + { + "epoch": 27.8, + "learning_rate": 2.7299999999999996e-05, + "loss": 0.0244, + "step": 1112 + }, + { + "epoch": 27.82, + "learning_rate": 2.6999999999999996e-05, + "loss": 0.0311, + "step": 1113 + }, + { + "epoch": 27.85, + "learning_rate": 2.6699999999999995e-05, + "loss": 0.0447, + "step": 1114 + }, + { + "epoch": 27.88, + "learning_rate": 2.6399999999999995e-05, + "loss": 0.0293, + "step": 1115 + }, + { + "epoch": 27.9, + "learning_rate": 2.6099999999999997e-05, + "loss": 0.0255, + "step": 1116 + }, + { + "epoch": 27.93, + "learning_rate": 2.5799999999999997e-05, + "loss": 0.021, + "step": 1117 + }, + { + "epoch": 27.95, + "learning_rate": 2.55e-05, + "loss": 0.0276, + "step": 1118 + }, + { + "epoch": 27.98, + "learning_rate": 2.52e-05, + "loss": 0.0358, + "step": 1119 + }, + { + "epoch": 28.0, + "learning_rate": 2.49e-05, + "loss": 0.0136, + "step": 1120 + }, + { + "epoch": 28.02, + "learning_rate": 2.4599999999999998e-05, + "loss": 0.0417, + "step": 1121 + }, + { + "epoch": 28.05, + "learning_rate": 2.4299999999999998e-05, + "loss": 0.0293, + "step": 1122 + }, + { + "epoch": 28.07, + "learning_rate": 2.3999999999999997e-05, + "loss": 0.0401, + "step": 1123 + }, + { + "epoch": 28.1, + "learning_rate": 2.3699999999999997e-05, + "loss": 0.0195, + "step": 1124 + }, + { + "epoch": 28.12, + "learning_rate": 2.34e-05, + "loss": 0.0179, + "step": 1125 + }, + { + "epoch": 28.15, + "learning_rate": 2.31e-05, + "loss": 0.0201, + "step": 1126 + }, + { + "epoch": 28.18, + "learning_rate": 2.28e-05, + "loss": 0.0269, + "step": 1127 + }, + { + "epoch": 28.2, + "learning_rate": 2.2499999999999998e-05, + "loss": 0.0343, + "step": 1128 + }, + { + "epoch": 28.23, + "learning_rate": 2.2199999999999998e-05, + "loss": 0.0269, + "step": 1129 + }, + { + "epoch": 28.25, + "learning_rate": 2.1899999999999997e-05, + "loss": 0.0197, + "step": 1130 + }, + { + "epoch": 28.27, + "learning_rate": 2.1599999999999996e-05, + "loss": 0.029, + "step": 1131 + }, + { + "epoch": 28.3, + "learning_rate": 2.1299999999999996e-05, + "loss": 0.027, + "step": 1132 + }, + { + "epoch": 28.32, + "learning_rate": 2.1e-05, + "loss": 0.028, + "step": 1133 + }, + { + "epoch": 28.35, + "learning_rate": 2.07e-05, + "loss": 0.0298, + "step": 1134 + }, + { + "epoch": 28.38, + "learning_rate": 2.04e-05, + "loss": 0.0328, + "step": 1135 + }, + { + "epoch": 28.4, + "learning_rate": 2.01e-05, + "loss": 0.0235, + "step": 1136 + }, + { + "epoch": 28.43, + "learning_rate": 1.98e-05, + "loss": 0.0159, + "step": 1137 + }, + { + "epoch": 28.45, + "learning_rate": 1.95e-05, + "loss": 0.0151, + "step": 1138 + }, + { + "epoch": 28.48, + "learning_rate": 1.92e-05, + "loss": 0.0227, + "step": 1139 + }, + { + "epoch": 28.5, + "learning_rate": 1.89e-05, + "loss": 0.0283, + "step": 1140 + }, + { + "epoch": 28.52, + "learning_rate": 1.8599999999999998e-05, + "loss": 0.0229, + "step": 1141 + }, + { + "epoch": 28.55, + "learning_rate": 1.8299999999999998e-05, + "loss": 0.0259, + "step": 1142 + }, + { + "epoch": 28.57, + "learning_rate": 1.7999999999999997e-05, + "loss": 0.0298, + "step": 1143 + }, + { + "epoch": 28.6, + "learning_rate": 1.7699999999999997e-05, + "loss": 0.0709, + "step": 1144 + }, + { + "epoch": 28.62, + "learning_rate": 1.74e-05, + "loss": 0.0279, + "step": 1145 + }, + { + "epoch": 28.65, + "learning_rate": 1.71e-05, + "loss": 0.0321, + "step": 1146 + }, + { + "epoch": 28.68, + "learning_rate": 1.68e-05, + "loss": 0.0268, + "step": 1147 + }, + { + "epoch": 28.7, + "learning_rate": 1.6499999999999998e-05, + "loss": 0.0341, + "step": 1148 + }, + { + "epoch": 28.73, + "learning_rate": 1.6199999999999997e-05, + "loss": 0.0224, + "step": 1149 + }, + { + "epoch": 28.75, + "learning_rate": 1.5899999999999997e-05, + "loss": 0.0206, + "step": 1150 + }, + { + "epoch": 28.77, + "learning_rate": 1.5599999999999996e-05, + "loss": 0.0299, + "step": 1151 + }, + { + "epoch": 28.8, + "learning_rate": 1.53e-05, + "loss": 0.0375, + "step": 1152 + }, + { + "epoch": 28.82, + "learning_rate": 1.4999999999999999e-05, + "loss": 0.0376, + "step": 1153 + }, + { + "epoch": 28.85, + "learning_rate": 1.47e-05, + "loss": 0.0286, + "step": 1154 + }, + { + "epoch": 28.88, + "learning_rate": 1.44e-05, + "loss": 0.0235, + "step": 1155 + }, + { + "epoch": 28.9, + "learning_rate": 1.4099999999999999e-05, + "loss": 0.021, + "step": 1156 + }, + { + "epoch": 28.93, + "learning_rate": 1.3799999999999998e-05, + "loss": 0.0225, + "step": 1157 + }, + { + "epoch": 28.95, + "learning_rate": 1.3499999999999998e-05, + "loss": 0.03, + "step": 1158 + }, + { + "epoch": 28.98, + "learning_rate": 1.3199999999999997e-05, + "loss": 0.0395, + "step": 1159 + }, + { + "epoch": 29.0, + "learning_rate": 1.2899999999999998e-05, + "loss": 0.0231, + "step": 1160 + }, + { + "epoch": 29.02, + "learning_rate": 1.26e-05, + "loss": 0.04, + "step": 1161 + }, + { + "epoch": 29.05, + "learning_rate": 1.2299999999999999e-05, + "loss": 0.0264, + "step": 1162 + }, + { + "epoch": 29.07, + "learning_rate": 1.1999999999999999e-05, + "loss": 0.0284, + "step": 1163 + }, + { + "epoch": 29.1, + "learning_rate": 1.17e-05, + "loss": 0.0262, + "step": 1164 + }, + { + "epoch": 29.12, + "learning_rate": 1.14e-05, + "loss": 0.0165, + "step": 1165 + }, + { + "epoch": 29.15, + "learning_rate": 1.1099999999999999e-05, + "loss": 0.016, + "step": 1166 + }, + { + "epoch": 29.18, + "learning_rate": 1.0799999999999998e-05, + "loss": 0.0307, + "step": 1167 + }, + { + "epoch": 29.2, + "learning_rate": 1.05e-05, + "loss": 0.0387, + "step": 1168 + }, + { + "epoch": 29.23, + "learning_rate": 1.02e-05, + "loss": 0.0272, + "step": 1169 + }, + { + "epoch": 29.25, + "learning_rate": 9.9e-06, + "loss": 0.0201, + "step": 1170 + }, + { + "epoch": 29.27, + "learning_rate": 9.6e-06, + "loss": 0.0246, + "step": 1171 + }, + { + "epoch": 29.3, + "learning_rate": 9.299999999999999e-06, + "loss": 0.0309, + "step": 1172 + }, + { + "epoch": 29.32, + "learning_rate": 8.999999999999999e-06, + "loss": 0.033, + "step": 1173 + }, + { + "epoch": 29.35, + "learning_rate": 8.7e-06, + "loss": 0.0267, + "step": 1174 + }, + { + "epoch": 29.38, + "learning_rate": 8.4e-06, + "loss": 0.0267, + "step": 1175 + }, + { + "epoch": 29.4, + "learning_rate": 8.099999999999999e-06, + "loss": 0.0255, + "step": 1176 + }, + { + "epoch": 29.43, + "learning_rate": 7.799999999999998e-06, + "loss": 0.0326, + "step": 1177 + }, + { + "epoch": 29.45, + "learning_rate": 7.499999999999999e-06, + "loss": 0.0309, + "step": 1178 + }, + { + "epoch": 29.48, + "learning_rate": 7.2e-06, + "loss": 0.0234, + "step": 1179 + }, + { + "epoch": 29.5, + "learning_rate": 6.899999999999999e-06, + "loss": 0.0288, + "step": 1180 + }, + { + "epoch": 29.52, + "learning_rate": 6.599999999999999e-06, + "loss": 0.0244, + "step": 1181 + }, + { + "epoch": 29.55, + "learning_rate": 6.3e-06, + "loss": 0.0282, + "step": 1182 + }, + { + "epoch": 29.57, + "learning_rate": 5.999999999999999e-06, + "loss": 0.0202, + "step": 1183 + }, + { + "epoch": 29.6, + "learning_rate": 5.7e-06, + "loss": 0.0657, + "step": 1184 + }, + { + "epoch": 29.62, + "learning_rate": 5.399999999999999e-06, + "loss": 0.0205, + "step": 1185 + }, + { + "epoch": 29.65, + "learning_rate": 5.1e-06, + "loss": 0.0422, + "step": 1186 + }, + { + "epoch": 29.68, + "learning_rate": 4.8e-06, + "loss": 0.0203, + "step": 1187 + }, + { + "epoch": 29.7, + "learning_rate": 4.499999999999999e-06, + "loss": 0.0197, + "step": 1188 + }, + { + "epoch": 29.73, + "learning_rate": 4.2e-06, + "loss": 0.032, + "step": 1189 + }, + { + "epoch": 29.75, + "learning_rate": 3.899999999999999e-06, + "loss": 0.0243, + "step": 1190 + }, + { + "epoch": 29.77, + "learning_rate": 3.6e-06, + "loss": 0.0162, + "step": 1191 + }, + { + "epoch": 29.8, + "learning_rate": 3.2999999999999993e-06, + "loss": 0.0353, + "step": 1192 + }, + { + "epoch": 29.82, + "learning_rate": 2.9999999999999997e-06, + "loss": 0.0329, + "step": 1193 + }, + { + "epoch": 29.85, + "learning_rate": 2.6999999999999996e-06, + "loss": 0.0159, + "step": 1194 + }, + { + "epoch": 29.88, + "learning_rate": 2.4e-06, + "loss": 0.0263, + "step": 1195 + }, + { + "epoch": 29.9, + "learning_rate": 2.1e-06, + "loss": 0.0211, + "step": 1196 + }, + { + "epoch": 29.93, + "learning_rate": 1.8e-06, + "loss": 0.0178, + "step": 1197 + }, + { + "epoch": 29.95, + "learning_rate": 1.4999999999999998e-06, + "loss": 0.0278, + "step": 1198 + }, + { + "epoch": 29.98, + "learning_rate": 1.2e-06, + "loss": 0.0283, + "step": 1199 + }, + { + "epoch": 30.0, + "learning_rate": 9e-07, + "loss": 0.0143, + "step": 1200 + }, + { + "epoch": 30.0, + "eval_cer": 0.14687624103839642, + "eval_loss": 0.5961307287216187, + "eval_runtime": 8.1271, + "eval_samples_per_second": 48.48, + "eval_steps_per_second": 6.152, + "eval_wer": 0.34602505270990946, + "step": 1200 + }, + { + "epoch": 30.0, + "step": 1200, + "total_flos": 2.747721152315628e+19, + "train_loss": 1.155013433122852, + "train_runtime": 1766.4943, + "train_samples_per_second": 43.306, + "train_steps_per_second": 0.679 + } + ], + "max_steps": 1200, + "num_train_epochs": 30, + "total_flos": 2.747721152315628e+19, + "trial_name": null, + "trial_params": null +}