DewiBrynJones commited on
Commit
022365e
1 Parent(s): 9b09bbe

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: nan
21
  - Wer: 1.0
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: nan
23
  - Wer: 1.0
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 0.9619084263178146,
3
  "eval_loss": NaN,
4
- "eval_runtime": 185.8154,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.79,
7
- "eval_steps_per_second": 0.592,
8
  "eval_wer": 1.0,
9
  "total_flos": 1.7109669148845115e+19,
10
- "train_loss": 0.5128920831044514,
11
- "train_runtime": 11433.8652,
12
  "train_samples": 124748,
13
- "train_samples_per_second": 10.495,
14
- "train_steps_per_second": 1.312
15
  }
 
1
  {
2
  "epoch": 0.9619084263178146,
3
  "eval_loss": NaN,
4
+ "eval_runtime": 185.5249,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.849,
7
+ "eval_steps_per_second": 0.593,
8
  "eval_wer": 1.0,
9
  "total_flos": 1.7109669148845115e+19,
10
+ "train_loss": 0.6328924499511719,
11
+ "train_runtime": 11517.021,
12
  "train_samples": 124748,
13
+ "train_samples_per_second": 10.419,
14
+ "train_steps_per_second": 1.302
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9619084263178146,
3
  "eval_loss": NaN,
4
- "eval_runtime": 185.8154,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.79,
7
- "eval_steps_per_second": 0.592,
8
  "eval_wer": 1.0
9
  }
 
1
  {
2
  "epoch": 0.9619084263178146,
3
  "eval_loss": NaN,
4
+ "eval_runtime": 185.5249,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.849,
7
+ "eval_steps_per_second": 0.593,
8
  "eval_wer": 1.0
9
  }
runs/Sep01_12-20-45_c461da42ad14/events.out.tfevents.1725203622.c461da42ad14.30.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c2819053c7f08c0ba4802f854bc9f4102a8fef2cadf07fe1f2e665b91ec4ee
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9619084263178146,
3
  "total_flos": 1.7109669148845115e+19,
4
- "train_loss": 0.5128920831044514,
5
- "train_runtime": 11433.8652,
6
  "train_samples": 124748,
7
- "train_samples_per_second": 10.495,
8
- "train_steps_per_second": 1.312
9
  }
 
1
  {
2
  "epoch": 0.9619084263178146,
3
  "total_flos": 1.7109669148845115e+19,
4
+ "train_loss": 0.6328924499511719,
5
+ "train_runtime": 11517.021,
6
  "train_samples": 124748,
7
+ "train_samples_per_second": 10.419,
8
+ "train_steps_per_second": 1.302
9
  }
trainer_state.json CHANGED
@@ -10,384 +10,384 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
- "grad_norm": 9.123078346252441,
14
- "learning_rate": 0.0002465,
15
- "loss": 4.6156,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
- "eval_loss": 1.5867419242858887,
21
- "eval_runtime": 188.7881,
22
- "eval_samples_per_second": 37.195,
23
- "eval_steps_per_second": 0.583,
24
- "eval_wer": 0.9176576887814082,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
- "grad_norm": 6.654547691345215,
30
- "learning_rate": 0.00029181249999999997,
31
- "loss": 1.0315,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
- "eval_loss": 1.1748294830322266,
37
- "eval_runtime": 190.3404,
38
- "eval_samples_per_second": 36.892,
39
- "eval_steps_per_second": 0.578,
40
- "eval_wer": 0.7888358867377988,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
- "grad_norm": 6.171149253845215,
46
- "learning_rate": 0.0002813958333333333,
47
- "loss": 0.834,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
- "eval_loss": 1.0392996072769165,
53
- "eval_runtime": 189.2832,
54
- "eval_samples_per_second": 37.098,
55
- "eval_steps_per_second": 0.581,
56
- "eval_wer": 0.7219867391275462,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
- "grad_norm": 6.896900177001953,
62
- "learning_rate": 0.00027097916666666666,
63
- "loss": 0.7184,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
- "eval_loss": 0.9616143703460693,
69
- "eval_runtime": 190.9944,
70
- "eval_samples_per_second": 36.765,
71
- "eval_steps_per_second": 0.576,
72
- "eval_wer": 0.663747857399115,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
- "grad_norm": 9.408955574035645,
78
- "learning_rate": 0.0002605625,
79
- "loss": 0.6655,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
- "eval_loss": 0.9033711552619934,
85
- "eval_runtime": 190.9851,
86
- "eval_samples_per_second": 36.767,
87
- "eval_steps_per_second": 0.576,
88
- "eval_wer": 0.6331335787081944,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
- "grad_norm": 6.4334211349487305,
94
- "learning_rate": 0.0002501458333333333,
95
- "loss": 0.6193,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
- "eval_loss": 0.8614802956581116,
101
- "eval_runtime": 191.2463,
102
- "eval_samples_per_second": 36.717,
103
- "eval_steps_per_second": 0.575,
104
- "eval_wer": 0.6238988028009939,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
- "grad_norm": 3.711681365966797,
110
- "learning_rate": 0.00023972916666666665,
111
- "loss": 0.5952,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
- "eval_loss": 0.8161324858665466,
117
- "eval_runtime": 191.2031,
118
- "eval_samples_per_second": 36.725,
119
- "eval_steps_per_second": 0.575,
120
- "eval_wer": 0.5866275129884798,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
- "grad_norm": 7.527787208557129,
126
- "learning_rate": 0.00022933333333333332,
127
- "loss": 0.5622,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
- "eval_loss": 0.811023473739624,
133
- "eval_runtime": 190.6985,
134
- "eval_samples_per_second": 36.823,
135
- "eval_steps_per_second": 0.577,
136
- "eval_wer": 0.5850728816487065,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
- "grad_norm": 11.801218032836914,
142
- "learning_rate": 0.0002189583333333333,
143
- "loss": 0.5341,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
- "eval_loss": 0.757978618144989,
149
- "eval_runtime": 192.268,
150
- "eval_samples_per_second": 36.522,
151
- "eval_steps_per_second": 0.572,
152
- "eval_wer": 0.5546579146680132,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
- "grad_norm": 9.381750106811523,
158
- "learning_rate": 0.00020854166666666664,
159
- "loss": 0.522,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
- "eval_loss": 0.7397128343582153,
165
- "eval_runtime": 191.0373,
166
- "eval_samples_per_second": 36.757,
167
- "eval_steps_per_second": 0.576,
168
- "eval_wer": 0.5411711556092959,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
- "grad_norm": 6.341240882873535,
174
- "learning_rate": 0.00019812499999999998,
175
- "loss": 0.5123,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
- "eval_loss": 0.7228623628616333,
181
- "eval_runtime": 191.6536,
182
- "eval_samples_per_second": 36.639,
183
- "eval_steps_per_second": 0.574,
184
- "eval_wer": 0.531737067991868,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
- "grad_norm": 6.53903341293335,
190
- "learning_rate": 0.00018772916666666666,
191
- "loss": 0.4884,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
- "eval_loss": 0.72346431016922,
197
- "eval_runtime": 191.4082,
198
- "eval_samples_per_second": 36.686,
199
- "eval_steps_per_second": 0.575,
200
- "eval_wer": 0.5164830784358017,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
- "grad_norm": 10.402660369873047,
206
- "learning_rate": 0.00017731249999999998,
207
- "loss": 0.4658,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
- "eval_loss": 0.681357204914093,
213
- "eval_runtime": 191.0697,
214
- "eval_samples_per_second": 36.751,
215
- "eval_steps_per_second": 0.576,
216
- "eval_wer": 0.5116995973903453,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
- "grad_norm": 11.663326263427734,
222
- "learning_rate": 0.00016691666666666667,
223
- "loss": 0.4471,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
- "eval_loss": 0.662290632724762,
229
- "eval_runtime": 191.4867,
230
- "eval_samples_per_second": 36.671,
231
- "eval_steps_per_second": 0.574,
232
- "eval_wer": 0.4890577871085186,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
- "grad_norm": 7.363061428070068,
238
- "learning_rate": 0.00015649999999999998,
239
- "loss": 0.4338,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
- "eval_loss": 0.6449915170669556,
245
- "eval_runtime": 190.9868,
246
- "eval_samples_per_second": 36.767,
247
- "eval_steps_per_second": 0.576,
248
- "eval_wer": 0.4913830903945043,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
- "grad_norm": 14.478469848632812,
254
- "learning_rate": 0.00014610416666666667,
255
- "loss": 0.4267,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
- "eval_loss": 0.6256160736083984,
261
- "eval_runtime": 190.8261,
262
- "eval_samples_per_second": 36.798,
263
- "eval_steps_per_second": 0.576,
264
- "eval_wer": 0.4685419684024502,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
- "grad_norm": 10.456161499023438,
270
- "learning_rate": 0.00013568749999999998,
271
- "loss": 0.4283,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
- "eval_loss": 0.6342806816101074,
277
- "eval_runtime": 190.609,
278
- "eval_samples_per_second": 36.84,
279
- "eval_steps_per_second": 0.577,
280
- "eval_wer": 0.4710665833986633,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
- "grad_norm": 9.847672462463379,
286
- "learning_rate": 0.00012527083333333333,
287
- "loss": 0.4131,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
- "eval_loss": 0.5988845229148865,
293
- "eval_runtime": 189.2404,
294
- "eval_samples_per_second": 37.106,
295
- "eval_steps_per_second": 0.581,
296
- "eval_wer": 0.4486506597217608,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
- "grad_norm": 7.610143661499023,
302
- "learning_rate": 0.00011485416666666666,
303
- "loss": 0.4317,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
- "eval_loss": 0.7167520523071289,
309
- "eval_runtime": 189.8256,
310
- "eval_samples_per_second": 36.992,
311
- "eval_steps_per_second": 0.579,
312
- "eval_wer": 0.4919677380778379,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
- "grad_norm": NaN,
318
- "learning_rate": 0.00010691666666666665,
319
- "loss": 0.5904,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
- "eval_loss": NaN,
325
- "eval_runtime": 190.1563,
326
- "eval_samples_per_second": 36.928,
327
- "eval_steps_per_second": 0.578,
328
- "eval_wer": 0.7309956284298224,
329
  "step": 10000
330
  },
331
  {
332
  "epoch": 0.6733358984224702,
333
- "grad_norm": NaN,
334
- "learning_rate": 0.000106875,
335
- "loss": 0.0513,
336
  "step": 10500
337
  },
338
  {
339
  "epoch": 0.6733358984224702,
340
- "eval_loss": NaN,
341
- "eval_runtime": 185.5416,
342
- "eval_samples_per_second": 37.846,
343
- "eval_steps_per_second": 0.593,
344
- "eval_wer": 1.0,
345
  "step": 10500
346
  },
347
  {
348
  "epoch": 0.705399512633064,
349
- "grad_norm": NaN,
350
- "learning_rate": 0.000106875,
351
- "loss": 0.0,
352
  "step": 11000
353
  },
354
  {
355
  "epoch": 0.705399512633064,
356
- "eval_loss": NaN,
357
- "eval_runtime": 185.2695,
358
- "eval_samples_per_second": 37.902,
359
- "eval_steps_per_second": 0.594,
360
- "eval_wer": 1.0,
361
  "step": 11000
362
  },
363
  {
364
  "epoch": 0.7374631268436578,
365
- "grad_norm": NaN,
366
- "learning_rate": 0.000106875,
367
- "loss": 0.0,
368
  "step": 11500
369
  },
370
  {
371
  "epoch": 0.7374631268436578,
372
- "eval_loss": NaN,
373
- "eval_runtime": 185.1794,
374
- "eval_samples_per_second": 37.92,
375
- "eval_steps_per_second": 0.594,
376
- "eval_wer": 1.0,
377
  "step": 11500
378
  },
379
  {
380
  "epoch": 0.7695267410542517,
381
  "grad_norm": NaN,
382
- "learning_rate": 0.000106875,
383
- "loss": 0.0,
384
  "step": 12000
385
  },
386
  {
387
  "epoch": 0.7695267410542517,
388
  "eval_loss": NaN,
389
- "eval_runtime": 184.8489,
390
- "eval_samples_per_second": 37.988,
391
  "eval_steps_per_second": 0.595,
392
  "eval_wer": 1.0,
393
  "step": 12000
@@ -395,15 +395,15 @@
395
  {
396
  "epoch": 0.8015903552648455,
397
  "grad_norm": NaN,
398
- "learning_rate": 0.000106875,
399
  "loss": 0.0,
400
  "step": 12500
401
  },
402
  {
403
  "epoch": 0.8015903552648455,
404
  "eval_loss": NaN,
405
- "eval_runtime": 184.8249,
406
- "eval_samples_per_second": 37.993,
407
  "eval_steps_per_second": 0.595,
408
  "eval_wer": 1.0,
409
  "step": 12500
@@ -411,80 +411,80 @@
411
  {
412
  "epoch": 0.8336539694754392,
413
  "grad_norm": NaN,
414
- "learning_rate": 0.000106875,
415
  "loss": 0.0,
416
  "step": 13000
417
  },
418
  {
419
  "epoch": 0.8336539694754392,
420
  "eval_loss": NaN,
421
- "eval_runtime": 185.2964,
422
- "eval_samples_per_second": 37.896,
423
- "eval_steps_per_second": 0.594,
424
  "eval_wer": 1.0,
425
  "step": 13000
426
  },
427
  {
428
  "epoch": 0.8657175836860331,
429
  "grad_norm": NaN,
430
- "learning_rate": 0.000106875,
431
  "loss": 0.0,
432
  "step": 13500
433
  },
434
  {
435
  "epoch": 0.8657175836860331,
436
  "eval_loss": NaN,
437
- "eval_runtime": 184.7613,
438
- "eval_samples_per_second": 38.006,
439
- "eval_steps_per_second": 0.595,
440
  "eval_wer": 1.0,
441
  "step": 13500
442
  },
443
  {
444
  "epoch": 0.8977811978966269,
445
  "grad_norm": NaN,
446
- "learning_rate": 0.000106875,
447
  "loss": 0.0,
448
  "step": 14000
449
  },
450
  {
451
  "epoch": 0.8977811978966269,
452
  "eval_loss": NaN,
453
- "eval_runtime": 184.7837,
454
- "eval_samples_per_second": 38.001,
455
- "eval_steps_per_second": 0.595,
456
  "eval_wer": 1.0,
457
  "step": 14000
458
  },
459
  {
460
  "epoch": 0.9298448121072207,
461
  "grad_norm": NaN,
462
- "learning_rate": 0.000106875,
463
  "loss": 0.0,
464
  "step": 14500
465
  },
466
  {
467
  "epoch": 0.9298448121072207,
468
  "eval_loss": NaN,
469
- "eval_runtime": 184.6054,
470
- "eval_samples_per_second": 38.038,
471
- "eval_steps_per_second": 0.596,
472
  "eval_wer": 1.0,
473
  "step": 14500
474
  },
475
  {
476
  "epoch": 0.9619084263178146,
477
  "grad_norm": NaN,
478
- "learning_rate": 0.000106875,
479
  "loss": 0.0,
480
  "step": 15000
481
  },
482
  {
483
  "epoch": 0.9619084263178146,
484
  "eval_loss": NaN,
485
- "eval_runtime": 184.8182,
486
- "eval_samples_per_second": 37.994,
487
- "eval_steps_per_second": 0.595,
488
  "eval_wer": 1.0,
489
  "step": 15000
490
  },
@@ -492,10 +492,10 @@
492
  "epoch": 0.9619084263178146,
493
  "step": 15000,
494
  "total_flos": 1.7109669148845115e+19,
495
- "train_loss": 0.5128920831044514,
496
- "train_runtime": 11433.8652,
497
- "train_samples_per_second": 10.495,
498
- "train_steps_per_second": 1.312
499
  }
500
  ],
501
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
+ "grad_norm": 4.760500431060791,
14
+ "learning_rate": 9.859999999999998e-05,
15
+ "loss": 5.7778,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
+ "eval_loss": 2.885216236114502,
21
+ "eval_runtime": 184.1165,
22
+ "eval_samples_per_second": 38.139,
23
+ "eval_steps_per_second": 0.597,
24
+ "eval_wer": 1.0,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
+ "grad_norm": 5.567607402801514,
30
+ "learning_rate": 0.0001986,
31
+ "loss": 1.4914,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
+ "eval_loss": 1.2011666297912598,
37
+ "eval_runtime": 186.6136,
38
+ "eval_samples_per_second": 37.629,
39
+ "eval_steps_per_second": 0.589,
40
+ "eval_wer": 0.7805710944870381,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
+ "grad_norm": 5.768416881561279,
46
+ "learning_rate": 0.00029859999999999994,
47
+ "loss": 0.8803,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
+ "eval_loss": 1.1211999654769897,
53
+ "eval_runtime": 186.8607,
54
+ "eval_samples_per_second": 37.579,
55
+ "eval_steps_per_second": 0.589,
56
+ "eval_wer": 0.7589657050983936,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
+ "grad_norm": 9.690132141113281,
62
+ "learning_rate": 0.00028904444444444443,
63
+ "loss": 0.7723,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
+ "eval_loss": 0.9680694937705994,
69
+ "eval_runtime": 188.1934,
70
+ "eval_samples_per_second": 37.313,
71
+ "eval_steps_per_second": 0.585,
72
+ "eval_wer": 0.6770220173002565,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
+ "grad_norm": 5.588994979858398,
78
+ "learning_rate": 0.0002779333333333333,
79
+ "loss": 0.6988,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
+ "eval_loss": 0.9452723860740662,
85
+ "eval_runtime": 187.5323,
86
+ "eval_samples_per_second": 37.444,
87
+ "eval_steps_per_second": 0.587,
88
+ "eval_wer": 0.6598812102207045,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
+ "grad_norm": 5.276751518249512,
94
+ "learning_rate": 0.0002668222222222222,
95
+ "loss": 0.6392,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
+ "eval_loss": 0.8690649271011353,
101
+ "eval_runtime": 187.6799,
102
+ "eval_samples_per_second": 37.415,
103
+ "eval_steps_per_second": 0.586,
104
+ "eval_wer": 0.6200055807278864,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
+ "grad_norm": 6.366265296936035,
110
+ "learning_rate": 0.0002557111111111111,
111
+ "loss": 0.6114,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
+ "eval_loss": 0.8661066293716431,
117
+ "eval_runtime": 188.4956,
118
+ "eval_samples_per_second": 37.253,
119
+ "eval_steps_per_second": 0.584,
120
+ "eval_wer": 0.619155184097583,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
+ "grad_norm": 11.82204818725586,
126
+ "learning_rate": 0.0002446222222222222,
127
+ "loss": 0.5807,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
+ "eval_loss": 0.7884626984596252,
133
+ "eval_runtime": 188.2678,
134
+ "eval_samples_per_second": 37.298,
135
+ "eval_steps_per_second": 0.584,
136
+ "eval_wer": 0.5793592792888558,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
+ "grad_norm": 14.343709945678711,
142
+ "learning_rate": 0.00023355555555555553,
143
+ "loss": 0.5534,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
+ "eval_loss": 0.7738627791404724,
149
+ "eval_runtime": 188.8344,
150
+ "eval_samples_per_second": 37.186,
151
+ "eval_steps_per_second": 0.583,
152
+ "eval_wer": 0.549037324439602,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
+ "grad_norm": 8.92720890045166,
158
+ "learning_rate": 0.00022244444444444444,
159
+ "loss": 0.5358,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
+ "eval_loss": 0.7416187524795532,
165
+ "eval_runtime": 189.1576,
166
+ "eval_samples_per_second": 37.122,
167
+ "eval_steps_per_second": 0.582,
168
+ "eval_wer": 0.5414767668983111,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
+ "grad_norm": 7.230262279510498,
174
+ "learning_rate": 0.0002113333333333333,
175
+ "loss": 0.5189,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
+ "eval_loss": 0.7361556887626648,
181
+ "eval_runtime": 188.5731,
182
+ "eval_samples_per_second": 37.238,
183
+ "eval_steps_per_second": 0.583,
184
+ "eval_wer": 0.5303285985729281,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
+ "grad_norm": 7.765280246734619,
190
+ "learning_rate": 0.0002002444444444444,
191
+ "loss": 0.4991,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
+ "eval_loss": 0.7187824845314026,
197
+ "eval_runtime": 188.8496,
198
+ "eval_samples_per_second": 37.183,
199
+ "eval_steps_per_second": 0.582,
200
+ "eval_wer": 0.5065839301611768,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
+ "grad_norm": 5.903895854949951,
206
+ "learning_rate": 0.00018913333333333331,
207
+ "loss": 0.48,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
+ "eval_loss": 0.6984608173370361,
213
+ "eval_runtime": 189.9958,
214
+ "eval_samples_per_second": 36.959,
215
+ "eval_steps_per_second": 0.579,
216
+ "eval_wer": 0.5177719608286052,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
+ "grad_norm": 3.641240358352661,
222
+ "learning_rate": 0.00017804444444444444,
223
+ "loss": 0.463,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
+ "eval_loss": 0.6681538820266724,
229
+ "eval_runtime": 189.2879,
230
+ "eval_samples_per_second": 37.097,
231
+ "eval_steps_per_second": 0.581,
232
+ "eval_wer": 0.49330977026003536,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
+ "grad_norm": 6.733245849609375,
238
+ "learning_rate": 0.00016693333333333332,
239
+ "loss": 0.4477,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
+ "eval_loss": 0.6624513268470764,
245
+ "eval_runtime": 189.8301,
246
+ "eval_samples_per_second": 36.991,
247
+ "eval_steps_per_second": 0.579,
248
+ "eval_wer": 0.48671919637518435,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
+ "grad_norm": 7.44530725479126,
254
+ "learning_rate": 0.00015584444444444442,
255
+ "loss": 0.4431,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
+ "eval_loss": 0.6373856663703918,
261
+ "eval_runtime": 189.4419,
262
+ "eval_samples_per_second": 37.067,
263
+ "eval_steps_per_second": 0.581,
264
+ "eval_wer": 0.47356462350017936,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
+ "grad_norm": 3.664278745651245,
270
+ "learning_rate": 0.00014473333333333332,
271
+ "loss": 0.4392,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
+ "eval_loss": 0.6391619443893433,
277
+ "eval_runtime": 189.4208,
278
+ "eval_samples_per_second": 37.071,
279
+ "eval_steps_per_second": 0.581,
280
+ "eval_wer": 0.4772452464157111,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
+ "grad_norm": 11.637319564819336,
286
+ "learning_rate": 0.00013362222222222222,
287
+ "loss": 0.4197,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
+ "eval_loss": 0.6158761978149414,
293
+ "eval_runtime": 188.8242,
294
+ "eval_samples_per_second": 37.188,
295
+ "eval_steps_per_second": 0.583,
296
+ "eval_wer": 0.45473631060736924,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
+ "grad_norm": 7.102973461151123,
302
+ "learning_rate": 0.0001225111111111111,
303
+ "loss": 0.4147,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
+ "eval_loss": 0.5994922518730164,
309
+ "eval_runtime": 188.4237,
310
+ "eval_samples_per_second": 37.267,
311
+ "eval_steps_per_second": 0.584,
312
+ "eval_wer": 0.45217183326911065,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
+ "grad_norm": 6.166309833526611,
318
+ "learning_rate": 0.0001114,
319
+ "loss": 0.3912,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
+ "eval_loss": 0.5847700834274292,
325
+ "eval_runtime": 188.8879,
326
+ "eval_samples_per_second": 37.175,
327
+ "eval_steps_per_second": 0.582,
328
+ "eval_wer": 0.4285866142255411,
329
  "step": 10000
330
  },
331
  {
332
  "epoch": 0.6733358984224702,
333
+ "grad_norm": 8.538312911987305,
334
+ "learning_rate": 0.00010028888888888889,
335
+ "loss": 0.3742,
336
  "step": 10500
337
  },
338
  {
339
  "epoch": 0.6733358984224702,
340
+ "eval_loss": 0.585001528263092,
341
+ "eval_runtime": 189.3338,
342
+ "eval_samples_per_second": 37.088,
343
+ "eval_steps_per_second": 0.581,
344
+ "eval_wer": 0.4259025498611462,
345
  "step": 10500
346
  },
347
  {
348
  "epoch": 0.705399512633064,
349
+ "grad_norm": 16.837343215942383,
350
+ "learning_rate": 8.917777777777777e-05,
351
+ "loss": 0.402,
352
  "step": 11000
353
  },
354
  {
355
  "epoch": 0.705399512633064,
356
+ "eval_loss": 0.6351918578147888,
357
+ "eval_runtime": 188.7345,
358
+ "eval_samples_per_second": 37.206,
359
+ "eval_steps_per_second": 0.583,
360
+ "eval_wer": 0.44894298356342766,
361
  "step": 11000
362
  },
363
  {
364
  "epoch": 0.7374631268436578,
365
+ "grad_norm": 4.569055557250977,
366
+ "learning_rate": 7.806666666666666e-05,
367
+ "loss": 0.5746,
368
  "step": 11500
369
  },
370
  {
371
  "epoch": 0.7374631268436578,
372
+ "eval_loss": 0.7711716294288635,
373
+ "eval_runtime": 188.4281,
374
+ "eval_samples_per_second": 37.266,
375
+ "eval_steps_per_second": 0.584,
376
+ "eval_wer": 0.5170810135664837,
377
  "step": 11500
378
  },
379
  {
380
  "epoch": 0.7695267410542517,
381
  "grad_norm": NaN,
382
+ "learning_rate": 7.275555555555556e-05,
383
+ "loss": 0.5783,
384
  "step": 12000
385
  },
386
  {
387
  "epoch": 0.7695267410542517,
388
  "eval_loss": NaN,
389
+ "eval_runtime": 184.9484,
390
+ "eval_samples_per_second": 37.967,
391
  "eval_steps_per_second": 0.595,
392
  "eval_wer": 1.0,
393
  "step": 12000
 
395
  {
396
  "epoch": 0.8015903552648455,
397
  "grad_norm": NaN,
398
+ "learning_rate": 7.275555555555556e-05,
399
  "loss": 0.0,
400
  "step": 12500
401
  },
402
  {
403
  "epoch": 0.8015903552648455,
404
  "eval_loss": NaN,
405
+ "eval_runtime": 184.8998,
406
+ "eval_samples_per_second": 37.977,
407
  "eval_steps_per_second": 0.595,
408
  "eval_wer": 1.0,
409
  "step": 12500
 
411
  {
412
  "epoch": 0.8336539694754392,
413
  "grad_norm": NaN,
414
+ "learning_rate": 7.275555555555556e-05,
415
  "loss": 0.0,
416
  "step": 13000
417
  },
418
  {
419
  "epoch": 0.8336539694754392,
420
  "eval_loss": NaN,
421
+ "eval_runtime": 184.8775,
422
+ "eval_samples_per_second": 37.982,
423
+ "eval_steps_per_second": 0.595,
424
  "eval_wer": 1.0,
425
  "step": 13000
426
  },
427
  {
428
  "epoch": 0.8657175836860331,
429
  "grad_norm": NaN,
430
+ "learning_rate": 7.275555555555556e-05,
431
  "loss": 0.0,
432
  "step": 13500
433
  },
434
  {
435
  "epoch": 0.8657175836860331,
436
  "eval_loss": NaN,
437
+ "eval_runtime": 185.2742,
438
+ "eval_samples_per_second": 37.901,
439
+ "eval_steps_per_second": 0.594,
440
  "eval_wer": 1.0,
441
  "step": 13500
442
  },
443
  {
444
  "epoch": 0.8977811978966269,
445
  "grad_norm": NaN,
446
+ "learning_rate": 7.275555555555556e-05,
447
  "loss": 0.0,
448
  "step": 14000
449
  },
450
  {
451
  "epoch": 0.8977811978966269,
452
  "eval_loss": NaN,
453
+ "eval_runtime": 185.0536,
454
+ "eval_samples_per_second": 37.946,
455
+ "eval_steps_per_second": 0.594,
456
  "eval_wer": 1.0,
457
  "step": 14000
458
  },
459
  {
460
  "epoch": 0.9298448121072207,
461
  "grad_norm": NaN,
462
+ "learning_rate": 7.275555555555556e-05,
463
  "loss": 0.0,
464
  "step": 14500
465
  },
466
  {
467
  "epoch": 0.9298448121072207,
468
  "eval_loss": NaN,
469
+ "eval_runtime": 184.7301,
470
+ "eval_samples_per_second": 38.012,
471
+ "eval_steps_per_second": 0.595,
472
  "eval_wer": 1.0,
473
  "step": 14500
474
  },
475
  {
476
  "epoch": 0.9619084263178146,
477
  "grad_norm": NaN,
478
+ "learning_rate": 7.275555555555556e-05,
479
  "loss": 0.0,
480
  "step": 15000
481
  },
482
  {
483
  "epoch": 0.9619084263178146,
484
  "eval_loss": NaN,
485
+ "eval_runtime": 185.3079,
486
+ "eval_samples_per_second": 37.894,
487
+ "eval_steps_per_second": 0.594,
488
  "eval_wer": 1.0,
489
  "step": 15000
490
  },
 
492
  "epoch": 0.9619084263178146,
493
  "step": 15000,
494
  "total_flos": 1.7109669148845115e+19,
495
+ "train_loss": 0.6328924499511719,
496
+ "train_runtime": 11517.021,
497
+ "train_samples_per_second": 10.419,
498
+ "train_steps_per_second": 1.302
499
  }
500
  ],
501
  "logging_steps": 500,