Automatic Speech Recognition
Transformers
Safetensors
Welsh
English
wav2vec2
Inference Endpoints
DewiBrynJones commited on
Commit
a4d3644
1 Parent(s): 124ec62

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-ccv-en-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.2754
21
  - Wer: 0.2115
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - techiaith/commonvoice_16_1_en_cy
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-ccv-en-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the TECHIAITH/COMMONVOICE_16_1_EN_CY - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.2754
23
  - Wer: 0.2115
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 4.52,
3
- "eval_loss": 0.2764733135700226,
4
- "eval_runtime": 1091.0858,
5
  "eval_samples": 26228,
6
- "eval_samples_per_second": 24.038,
7
- "eval_steps_per_second": 0.752,
8
- "eval_wer": 0.21145248459613483,
9
- "train_loss": 0.49683192168341744,
10
- "train_runtime": 52483.3074,
11
  "train_samples": 127287,
12
- "train_samples_per_second": 10.975,
13
- "train_steps_per_second": 0.171
14
  }
 
1
  {
2
  "epoch": 4.52,
3
+ "eval_loss": 0.2754187285900116,
4
+ "eval_runtime": 1104.7482,
5
  "eval_samples": 26228,
6
+ "eval_samples_per_second": 23.741,
7
+ "eval_steps_per_second": 0.742,
8
+ "eval_wer": 0.2115155720985006,
9
+ "train_loss": 0.5072881503634983,
10
+ "train_runtime": 52839.7574,
11
  "train_samples": 127287,
12
+ "train_samples_per_second": 10.901,
13
+ "train_steps_per_second": 0.17
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 4.52,
3
- "eval_loss": 0.2764733135700226,
4
- "eval_runtime": 1091.0858,
5
  "eval_samples": 26228,
6
- "eval_samples_per_second": 24.038,
7
- "eval_steps_per_second": 0.752,
8
- "eval_wer": 0.21145248459613483
9
  }
 
1
  {
2
  "epoch": 4.52,
3
+ "eval_loss": 0.2754187285900116,
4
+ "eval_runtime": 1104.7482,
5
  "eval_samples": 26228,
6
+ "eval_samples_per_second": 23.741,
7
+ "eval_steps_per_second": 0.742,
8
+ "eval_wer": 0.2115155720985006
9
  }
runs/Mar06_22-19-49_b210676ecd60/events.out.tfevents.1709821970.b210676ecd60.30.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90be2fc1ee0800238cbdd863f6400d5f4d18c343ba2b7fad0e1ac0cc490e4da2
3
+ size 406
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.52,
3
- "train_loss": 0.49683192168341744,
4
- "train_runtime": 52483.3074,
5
  "train_samples": 127287,
6
- "train_samples_per_second": 10.975,
7
- "train_steps_per_second": 0.171
8
  }
 
1
  {
2
  "epoch": 4.52,
3
+ "train_loss": 0.5072881503634983,
4
+ "train_runtime": 52839.7574,
5
  "train_samples": 127287,
6
+ "train_samples_per_second": 10.901,
7
+ "train_steps_per_second": 0.17
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.2764733135700226,
3
  "best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000",
4
  "epoch": 4.524886877828054,
5
  "eval_steps": 500,
@@ -10,328 +10,328 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2,
13
- "grad_norm": 1.6815159320831299,
14
- "learning_rate": 0.00014774999999999999,
15
- "loss": 5.9898,
16
  "step": 400
17
  },
18
  {
19
  "epoch": 0.25,
20
- "eval_loss": 1.3093085289001465,
21
- "eval_runtime": 3176.8808,
22
- "eval_samples_per_second": 8.256,
23
- "eval_steps_per_second": 0.258,
24
- "eval_wer": 0.7970769457237188,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.4,
29
- "grad_norm": 2.3292043209075928,
30
- "learning_rate": 0.00029775,
31
- "loss": 1.0749,
32
  "step": 800
33
  },
34
  {
35
  "epoch": 0.5,
36
- "eval_loss": 0.5815957188606262,
37
- "eval_runtime": 1123.0217,
38
- "eval_samples_per_second": 23.355,
39
- "eval_steps_per_second": 0.73,
40
- "eval_wer": 0.4617458414821357,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.6,
45
- "grad_norm": 2.9652466773986816,
46
- "learning_rate": 0.0002855853658536585,
47
- "loss": 0.4332,
48
  "step": 1200
49
  },
50
  {
51
  "epoch": 0.75,
52
- "eval_loss": 0.48338082432746887,
53
- "eval_runtime": 1101.5414,
54
- "eval_samples_per_second": 23.81,
55
- "eval_steps_per_second": 0.744,
56
- "eval_wer": 0.4091476878430383,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.8,
61
- "grad_norm": 3.57124924659729,
62
- "learning_rate": 0.0002709512195121951,
63
- "loss": 0.3655,
64
  "step": 1600
65
  },
66
  {
67
  "epoch": 1.01,
68
- "grad_norm": 1.1706229448318481,
69
- "learning_rate": 0.0002563170731707317,
70
- "loss": 0.3303,
71
  "step": 2000
72
  },
73
  {
74
  "epoch": 1.01,
75
- "eval_loss": 0.42033129930496216,
76
- "eval_runtime": 1101.1368,
77
- "eval_samples_per_second": 23.819,
78
- "eval_steps_per_second": 0.745,
79
- "eval_wer": 0.3419174394885707,
80
  "step": 2000
81
  },
82
  {
83
  "epoch": 1.21,
84
- "grad_norm": 0.8928599953651428,
85
- "learning_rate": 0.0002416829268292683,
86
- "loss": 0.276,
87
  "step": 2400
88
  },
89
  {
90
  "epoch": 1.26,
91
- "eval_loss": 0.3909631669521332,
92
- "eval_runtime": 1098.4606,
93
- "eval_samples_per_second": 23.877,
94
- "eval_steps_per_second": 0.746,
95
- "eval_wer": 0.3186423569490884,
96
  "step": 2500
97
  },
98
  {
99
  "epoch": 1.41,
100
- "grad_norm": 0.6678148508071899,
101
- "learning_rate": 0.00022704878048780485,
102
- "loss": 0.2591,
103
  "step": 2800
104
  },
105
  {
106
  "epoch": 1.51,
107
- "eval_loss": 0.39008986949920654,
108
- "eval_runtime": 1093.6554,
109
- "eval_samples_per_second": 23.982,
110
- "eval_steps_per_second": 0.75,
111
- "eval_wer": 0.3067188190019557,
112
  "step": 3000
113
  },
114
  {
115
  "epoch": 1.61,
116
- "grad_norm": 0.7449674606323242,
117
- "learning_rate": 0.00021241463414634144,
118
- "loss": 0.2501,
119
  "step": 3200
120
  },
121
  {
122
  "epoch": 1.76,
123
- "eval_loss": 0.3645510971546173,
124
- "eval_runtime": 1101.42,
125
- "eval_samples_per_second": 23.813,
126
- "eval_steps_per_second": 0.744,
127
- "eval_wer": 0.2895379891910079,
128
  "step": 3500
129
  },
130
  {
131
  "epoch": 1.81,
132
- "grad_norm": 0.994420051574707,
133
- "learning_rate": 0.00019778048780487803,
134
- "loss": 0.2332,
135
  "step": 3600
136
  },
137
  {
138
  "epoch": 2.01,
139
- "grad_norm": 0.632382333278656,
140
- "learning_rate": 0.00018314634146341462,
141
- "loss": 0.224,
142
  "step": 4000
143
  },
144
  {
145
  "epoch": 2.01,
146
- "eval_loss": 0.35174447298049927,
147
- "eval_runtime": 1113.1837,
148
- "eval_samples_per_second": 23.561,
149
- "eval_steps_per_second": 0.737,
150
- "eval_wer": 0.2805501230206296,
151
  "step": 4000
152
  },
153
  {
154
  "epoch": 2.21,
155
- "grad_norm": 0.5861485600471497,
156
- "learning_rate": 0.00016851219512195123,
157
- "loss": 0.182,
158
  "step": 4400
159
  },
160
  {
161
  "epoch": 2.26,
162
- "eval_loss": 0.33475443720817566,
163
- "eval_runtime": 1111.5845,
164
- "eval_samples_per_second": 23.595,
165
- "eval_steps_per_second": 0.738,
166
- "eval_wer": 0.2655689441255021,
167
  "step": 4500
168
  },
169
  {
170
  "epoch": 2.41,
171
- "grad_norm": 0.585738480091095,
172
- "learning_rate": 0.0001538780487804878,
173
- "loss": 0.1777,
174
  "step": 4800
175
  },
176
  {
177
  "epoch": 2.51,
178
- "eval_loss": 0.32769647240638733,
179
- "eval_runtime": 1109.5932,
180
- "eval_samples_per_second": 23.637,
181
- "eval_steps_per_second": 0.739,
182
- "eval_wer": 0.2611948772948079,
183
  "step": 5000
184
  },
185
  {
186
  "epoch": 2.61,
187
- "grad_norm": 0.6404664516448975,
188
- "learning_rate": 0.00013924390243902438,
189
- "loss": 0.1734,
190
  "step": 5200
191
  },
192
  {
193
  "epoch": 2.77,
194
- "eval_loss": 0.33233708143234253,
195
- "eval_runtime": 1114.1252,
196
- "eval_samples_per_second": 23.541,
197
- "eval_steps_per_second": 0.736,
198
- "eval_wer": 0.2643113999116775,
199
  "step": 5500
200
  },
201
  {
202
  "epoch": 2.82,
203
- "grad_norm": 1.567084550857544,
204
- "learning_rate": 0.00012460975609756097,
205
- "loss": 0.1704,
206
  "step": 5600
207
  },
208
  {
209
  "epoch": 3.02,
210
- "grad_norm": 1.35818612575531,
211
- "learning_rate": 0.00010997560975609755,
212
- "loss": 0.1629,
213
  "step": 6000
214
  },
215
  {
216
  "epoch": 3.02,
217
- "eval_loss": 0.31713536381721497,
218
- "eval_runtime": 1084.7842,
219
- "eval_samples_per_second": 24.178,
220
- "eval_steps_per_second": 0.756,
221
- "eval_wer": 0.24851428931928585,
222
  "step": 6000
223
  },
224
  {
225
  "epoch": 3.22,
226
- "grad_norm": 1.0975894927978516,
227
- "learning_rate": 9.534146341463413e-05,
228
- "loss": 0.1338,
229
  "step": 6400
230
  },
231
  {
232
  "epoch": 3.27,
233
- "eval_loss": 0.310283362865448,
234
- "eval_runtime": 1090.9879,
235
- "eval_samples_per_second": 24.041,
236
- "eval_steps_per_second": 0.752,
237
- "eval_wer": 0.23984186066073643,
238
  "step": 6500
239
  },
240
  {
241
  "epoch": 3.42,
242
- "grad_norm": 1.2747470140457153,
243
- "learning_rate": 8.070731707317072e-05,
244
- "loss": 0.1292,
245
  "step": 6800
246
  },
247
  {
248
  "epoch": 3.52,
249
- "eval_loss": 0.2933865785598755,
250
- "eval_runtime": 1076.7354,
251
- "eval_samples_per_second": 24.359,
252
- "eval_steps_per_second": 0.762,
253
- "eval_wer": 0.22680798267196603,
254
  "step": 7000
255
  },
256
  {
257
  "epoch": 3.62,
258
- "grad_norm": 0.5606548190116882,
259
- "learning_rate": 6.607317073170731e-05,
260
- "loss": 0.1264,
261
  "step": 7200
262
  },
263
  {
264
  "epoch": 3.77,
265
- "eval_loss": 0.29226595163345337,
266
- "eval_runtime": 1074.899,
267
- "eval_samples_per_second": 24.4,
268
- "eval_steps_per_second": 0.763,
269
- "eval_wer": 0.22483965259815364,
270
  "step": 7500
271
  },
272
  {
273
  "epoch": 3.82,
274
- "grad_norm": 1.5185168981552124,
275
- "learning_rate": 5.14390243902439e-05,
276
- "loss": 0.1241,
277
  "step": 7600
278
  },
279
  {
280
  "epoch": 4.02,
281
- "grad_norm": 0.7815582156181335,
282
- "learning_rate": 3.680487804878048e-05,
283
- "loss": 0.118,
284
  "step": 8000
285
  },
286
  {
287
  "epoch": 4.02,
288
- "eval_loss": 0.28800907731056213,
289
- "eval_runtime": 1083.1518,
290
- "eval_samples_per_second": 24.215,
291
- "eval_steps_per_second": 0.757,
292
- "eval_wer": 0.21931739322440225,
293
  "step": 8000
294
  },
295
  {
296
  "epoch": 4.22,
297
- "grad_norm": 1.339690089225769,
298
- "learning_rate": 2.217073170731707e-05,
299
- "loss": 0.0996,
300
  "step": 8400
301
  },
302
  {
303
  "epoch": 4.27,
304
- "eval_loss": 0.2792861759662628,
305
- "eval_runtime": 1078.7477,
306
- "eval_samples_per_second": 24.313,
307
- "eval_steps_per_second": 0.76,
308
- "eval_wer": 0.21242403213256786,
309
  "step": 8500
310
  },
311
  {
312
  "epoch": 4.42,
313
- "grad_norm": 1.0311238765716553,
314
- "learning_rate": 7.536585365853659e-06,
315
- "loss": 0.0969,
316
  "step": 8800
317
  },
318
  {
319
  "epoch": 4.52,
320
- "eval_loss": 0.2764733135700226,
321
- "eval_runtime": 1074.1619,
322
- "eval_samples_per_second": 24.417,
323
- "eval_steps_per_second": 0.763,
324
- "eval_wer": 0.21145248459613483,
325
  "step": 9000
326
  },
327
  {
328
  "epoch": 4.52,
329
  "step": 9000,
330
  "total_flos": 8.933861078537978e+19,
331
- "train_loss": 0.49683192168341744,
332
- "train_runtime": 52483.3074,
333
- "train_samples_per_second": 10.975,
334
- "train_steps_per_second": 0.171
335
  }
336
  ],
337
  "logging_steps": 400,
 
1
  {
2
+ "best_metric": 0.2754187285900116,
3
  "best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000",
4
  "epoch": 4.524886877828054,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2,
13
+ "grad_norm": 0.8404507637023926,
14
+ "learning_rate": 0.000147375,
15
+ "loss": 6.0574,
16
  "step": 400
17
  },
18
  {
19
  "epoch": 0.25,
20
+ "eval_loss": 2.0296876430511475,
21
+ "eval_runtime": 1138.2592,
22
+ "eval_samples_per_second": 23.042,
23
+ "eval_steps_per_second": 0.72,
24
+ "eval_wer": 0.9990915399659327,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.4,
29
+ "grad_norm": 3.7007296085357666,
30
+ "learning_rate": 0.00029737499999999995,
31
+ "loss": 1.224,
32
  "step": 800
33
  },
34
  {
35
  "epoch": 0.5,
36
+ "eval_loss": 0.5368403792381287,
37
+ "eval_runtime": 1086.8898,
38
+ "eval_samples_per_second": 24.131,
39
+ "eval_steps_per_second": 0.754,
40
+ "eval_wer": 0.4342018379492356,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.6,
45
+ "grad_norm": 3.096679449081421,
46
+ "learning_rate": 0.0002856219512195122,
47
+ "loss": 0.434,
48
  "step": 1200
49
  },
50
  {
51
  "epoch": 0.75,
52
+ "eval_loss": 0.48607054352760315,
53
+ "eval_runtime": 1099.4965,
54
+ "eval_samples_per_second": 23.855,
55
+ "eval_steps_per_second": 0.746,
56
+ "eval_wer": 0.3890522154227914,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.8,
61
+ "grad_norm": 3.0235676765441895,
62
+ "learning_rate": 0.00027098780487804874,
63
+ "loss": 0.3643,
64
  "step": 1600
65
  },
66
  {
67
  "epoch": 1.01,
68
+ "grad_norm": 0.7186101078987122,
69
+ "learning_rate": 0.00025635365853658536,
70
+ "loss": 0.3295,
71
  "step": 2000
72
  },
73
  {
74
  "epoch": 1.01,
75
+ "eval_loss": 0.4301180839538574,
76
+ "eval_runtime": 1082.408,
77
+ "eval_samples_per_second": 24.231,
78
+ "eval_steps_per_second": 0.758,
79
+ "eval_wer": 0.34114777195970813,
80
  "step": 2000
81
  },
82
  {
83
  "epoch": 1.21,
84
+ "grad_norm": 1.1324162483215332,
85
+ "learning_rate": 0.00024171951219512195,
86
+ "loss": 0.2739,
87
  "step": 2400
88
  },
89
  {
90
  "epoch": 1.26,
91
+ "eval_loss": 0.381789892911911,
92
+ "eval_runtime": 1080.4732,
93
+ "eval_samples_per_second": 24.275,
94
+ "eval_steps_per_second": 0.759,
95
+ "eval_wer": 0.30533089394990853,
96
  "step": 2500
97
  },
98
  {
99
  "epoch": 1.41,
100
+ "grad_norm": 0.6504969596862793,
101
+ "learning_rate": 0.0002270853658536585,
102
+ "loss": 0.2619,
103
  "step": 2800
104
  },
105
  {
106
  "epoch": 1.51,
107
+ "eval_loss": 0.38941365480422974,
108
+ "eval_runtime": 1085.3029,
109
+ "eval_samples_per_second": 24.167,
110
+ "eval_steps_per_second": 0.756,
111
+ "eval_wer": 0.30601644480895,
112
  "step": 3000
113
  },
114
  {
115
  "epoch": 1.61,
116
+ "grad_norm": 0.8715857863426208,
117
+ "learning_rate": 0.0002124512195121951,
118
+ "loss": 0.2517,
119
  "step": 3200
120
  },
121
  {
122
  "epoch": 1.76,
123
+ "eval_loss": 0.3497116267681122,
124
+ "eval_runtime": 1075.6705,
125
+ "eval_samples_per_second": 24.383,
126
+ "eval_steps_per_second": 0.762,
127
+ "eval_wer": 0.2801926271738902,
128
  "step": 3500
129
  },
130
  {
131
  "epoch": 1.81,
132
+ "grad_norm": 0.5574731826782227,
133
+ "learning_rate": 0.0001978170731707317,
134
+ "loss": 0.2346,
135
  "step": 3600
136
  },
137
  {
138
  "epoch": 2.01,
139
+ "grad_norm": 1.0655726194381714,
140
+ "learning_rate": 0.00018318292682926828,
141
+ "loss": 0.2244,
142
  "step": 4000
143
  },
144
  {
145
  "epoch": 2.01,
146
+ "eval_loss": 0.35193705558776855,
147
+ "eval_runtime": 1078.0396,
148
+ "eval_samples_per_second": 24.329,
149
+ "eval_steps_per_second": 0.761,
150
+ "eval_wer": 0.2792000504700019,
151
  "step": 4000
152
  },
153
  {
154
  "epoch": 2.21,
155
+ "grad_norm": 0.5482127070426941,
156
+ "learning_rate": 0.0001685487804878049,
157
+ "loss": 0.1854,
158
  "step": 4400
159
  },
160
  {
161
  "epoch": 2.26,
162
+ "eval_loss": 0.33764052391052246,
163
+ "eval_runtime": 1076.667,
164
+ "eval_samples_per_second": 24.36,
165
+ "eval_steps_per_second": 0.762,
166
+ "eval_wer": 0.2718104010262234,
167
  "step": 4500
168
  },
169
  {
170
  "epoch": 2.41,
171
+ "grad_norm": 0.6764945387840271,
172
+ "learning_rate": 0.00015391463414634145,
173
+ "loss": 0.1779,
174
  "step": 4800
175
  },
176
  {
177
  "epoch": 2.51,
178
+ "eval_loss": 0.32059213519096375,
179
+ "eval_runtime": 1088.6325,
180
+ "eval_samples_per_second": 24.093,
181
+ "eval_steps_per_second": 0.753,
182
+ "eval_wer": 0.25195466111496645,
183
  "step": 5000
184
  },
185
  {
186
  "epoch": 2.61,
187
+ "grad_norm": 1.0232322216033936,
188
+ "learning_rate": 0.00013928048780487804,
189
+ "loss": 0.1749,
190
  "step": 5200
191
  },
192
  {
193
  "epoch": 2.77,
194
+ "eval_loss": 0.31690067052841187,
195
+ "eval_runtime": 1101.7225,
196
+ "eval_samples_per_second": 23.806,
197
+ "eval_steps_per_second": 0.744,
198
+ "eval_wer": 0.2534687611717452,
199
  "step": 5500
200
  },
201
  {
202
  "epoch": 2.82,
203
+ "grad_norm": 0.6185225248336792,
204
+ "learning_rate": 0.00012464634146341463,
205
+ "loss": 0.172,
206
  "step": 5600
207
  },
208
  {
209
  "epoch": 3.02,
210
+ "grad_norm": 0.9978949427604675,
211
+ "learning_rate": 0.0001100121951219512,
212
+ "loss": 0.1636,
213
  "step": 6000
214
  },
215
  {
216
  "epoch": 3.02,
217
+ "eval_loss": 0.3122297525405884,
218
+ "eval_runtime": 1102.4428,
219
+ "eval_samples_per_second": 23.791,
220
+ "eval_steps_per_second": 0.744,
221
+ "eval_wer": 0.24648707757659874,
222
  "step": 6000
223
  },
224
  {
225
  "epoch": 3.22,
226
+ "grad_norm": 0.9038313031196594,
227
+ "learning_rate": 9.53780487804878e-05,
228
+ "loss": 0.137,
229
  "step": 6400
230
  },
231
  {
232
  "epoch": 3.27,
233
+ "eval_loss": 0.30537155270576477,
234
+ "eval_runtime": 1108.9259,
235
+ "eval_samples_per_second": 23.652,
236
+ "eval_steps_per_second": 0.739,
237
+ "eval_wer": 0.23823523226715454,
238
  "step": 6500
239
  },
240
  {
241
  "epoch": 3.42,
242
+ "grad_norm": 0.9038735032081604,
243
+ "learning_rate": 8.074390243902438e-05,
244
+ "loss": 0.1311,
245
  "step": 6800
246
  },
247
  {
248
  "epoch": 3.52,
249
+ "eval_loss": 0.29557299613952637,
250
+ "eval_runtime": 1090.6163,
251
+ "eval_samples_per_second": 24.049,
252
+ "eval_steps_per_second": 0.752,
253
+ "eval_wer": 0.22798141021596954,
254
  "step": 7000
255
  },
256
  {
257
  "epoch": 3.62,
258
+ "grad_norm": 0.523389995098114,
259
+ "learning_rate": 6.610975609756097e-05,
260
+ "loss": 0.1261,
261
  "step": 7200
262
  },
263
  {
264
  "epoch": 3.77,
265
+ "eval_loss": 0.28975382447242737,
266
+ "eval_runtime": 1108.1062,
267
+ "eval_samples_per_second": 23.669,
268
+ "eval_steps_per_second": 0.74,
269
+ "eval_wer": 0.22361575505225748,
270
  "step": 7500
271
  },
272
  {
273
  "epoch": 3.82,
274
+ "grad_norm": 0.8056386709213257,
275
+ "learning_rate": 5.147560975609756e-05,
276
+ "loss": 0.1242,
277
  "step": 7600
278
  },
279
  {
280
  "epoch": 4.02,
281
+ "grad_norm": 1.5024261474609375,
282
+ "learning_rate": 3.684146341463414e-05,
283
+ "loss": 0.1187,
284
  "step": 8000
285
  },
286
  {
287
  "epoch": 4.02,
288
+ "eval_loss": 0.28465163707733154,
289
+ "eval_runtime": 1092.1505,
290
+ "eval_samples_per_second": 24.015,
291
+ "eval_steps_per_second": 0.751,
292
+ "eval_wer": 0.21763085399449036,
293
  "step": 8000
294
  },
295
  {
296
  "epoch": 4.22,
297
+ "grad_norm": 1.5841491222381592,
298
+ "learning_rate": 2.2207317073170727e-05,
299
+ "loss": 0.1011,
300
  "step": 8400
301
  },
302
  {
303
  "epoch": 4.27,
304
+ "eval_loss": 0.2762569189071655,
305
+ "eval_runtime": 1102.2478,
306
+ "eval_samples_per_second": 23.795,
307
+ "eval_steps_per_second": 0.744,
308
+ "eval_wer": 0.21240720879860367,
309
  "step": 8500
310
  },
311
  {
312
  "epoch": 4.42,
313
+ "grad_norm": 0.9246074557304382,
314
+ "learning_rate": 7.573170731707317e-06,
315
+ "loss": 0.0981,
316
  "step": 8800
317
  },
318
  {
319
  "epoch": 4.52,
320
+ "eval_loss": 0.2754187285900116,
321
+ "eval_runtime": 1092.5524,
322
+ "eval_samples_per_second": 24.006,
323
+ "eval_steps_per_second": 0.751,
324
+ "eval_wer": 0.2115155720985006,
325
  "step": 9000
326
  },
327
  {
328
  "epoch": 4.52,
329
  "step": 9000,
330
  "total_flos": 8.933861078537978e+19,
331
+ "train_loss": 0.5072881503634983,
332
+ "train_runtime": 52839.7574,
333
+ "train_samples_per_second": 10.901,
334
+ "train_steps_per_second": 0.17
335
  }
336
  ],
337
  "logging_steps": 400,