DewiBrynJones commited on
Commit
00168d0
1 Parent(s): ce389ad

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.5324
21
  - Wer: 0.4014
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.5324
23
  - Wer: 0.4014
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 1.2825445684237526,
3
- "eval_loss": NaN,
4
- "eval_runtime": 187.7207,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.407,
7
- "eval_steps_per_second": 0.586,
8
- "eval_wer": 1.0,
9
- "total_flos": 2.2824984432894013e+19,
10
- "train_loss": 0.38660173568725587,
11
- "train_runtime": 15166.2226,
12
  "train_samples": 124748,
13
- "train_samples_per_second": 10.55,
14
- "train_steps_per_second": 1.319
15
  }
 
1
  {
2
+ "epoch": 0.6412722842118763,
3
+ "eval_loss": 0.5324302911758423,
4
+ "eval_runtime": 187.9214,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.367,
7
+ "eval_steps_per_second": 0.585,
8
+ "eval_wer": 0.40138720950318235,
9
+ "total_flos": 1.1393778193380235e+19,
10
+ "train_loss": 0.73015986328125,
11
+ "train_runtime": 7697.7754,
12
  "train_samples": 124748,
13
+ "train_samples_per_second": 10.393,
14
+ "train_steps_per_second": 1.299
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.2825445684237526,
3
- "eval_loss": NaN,
4
- "eval_runtime": 187.7207,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.407,
7
- "eval_steps_per_second": 0.586,
8
- "eval_wer": 1.0
9
  }
 
1
  {
2
+ "epoch": 0.6412722842118763,
3
+ "eval_loss": 0.5324302911758423,
4
+ "eval_runtime": 187.9214,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.367,
7
+ "eval_steps_per_second": 0.585,
8
+ "eval_wer": 0.40138720950318235
9
  }
runs/Aug31_12-05-35_4b35055fdbcb/events.out.tfevents.1725112512.4b35055fdbcb.1183.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a367da909b477d2cc3d53417f6b581ff1af39454fd1f3910864130a0a5f3545
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.2825445684237526,
3
- "total_flos": 2.2824984432894013e+19,
4
- "train_loss": 0.38660173568725587,
5
- "train_runtime": 15166.2226,
6
  "train_samples": 124748,
7
- "train_samples_per_second": 10.55,
8
- "train_steps_per_second": 1.319
9
  }
 
1
  {
2
+ "epoch": 0.6412722842118763,
3
+ "total_flos": 1.1393778193380235e+19,
4
+ "train_loss": 0.73015986328125,
5
+ "train_runtime": 7697.7754,
6
  "train_samples": 124748,
7
+ "train_samples_per_second": 10.393,
8
+ "train_steps_per_second": 1.299
9
  }
trainer_state.json CHANGED
@@ -1,667 +1,347 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2825445684237526,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
- "grad_norm": 9.577472686767578,
14
  "learning_rate": 0.0002465,
15
- "loss": 4.7126,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
- "eval_loss": 1.7046922445297241,
21
- "eval_runtime": 189.0412,
22
- "eval_samples_per_second": 37.145,
23
- "eval_steps_per_second": 0.582,
24
- "eval_wer": 0.9345593218086873,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
- "grad_norm": 6.515851020812988,
30
- "learning_rate": 0.0002939226804123711,
31
- "loss": 1.0533,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
- "eval_loss": 1.1487088203430176,
37
- "eval_runtime": 189.0728,
38
- "eval_samples_per_second": 37.139,
39
- "eval_steps_per_second": 0.582,
40
- "eval_wer": 0.7906695544718904,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
- "grad_norm": 3.676572799682617,
46
- "learning_rate": 0.0002861907216494845,
47
- "loss": 0.8268,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
- "eval_loss": 1.060188889503479,
53
- "eval_runtime": 190.9733,
54
- "eval_samples_per_second": 36.77,
55
- "eval_steps_per_second": 0.576,
56
- "eval_wer": 0.7815012158014324,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
- "grad_norm": 9.430739402770996,
62
- "learning_rate": 0.00027845876288659795,
63
- "loss": 0.7188,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
- "eval_loss": 0.9336337447166443,
69
- "eval_runtime": 191.0896,
70
- "eval_samples_per_second": 36.747,
71
- "eval_steps_per_second": 0.576,
72
- "eval_wer": 0.671746900702906,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
- "grad_norm": 2.9828200340270996,
78
- "learning_rate": 0.00027072680412371135,
79
- "loss": 0.6725,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
- "eval_loss": 0.9303568005561829,
85
- "eval_runtime": 191.2157,
86
- "eval_samples_per_second": 36.723,
87
- "eval_steps_per_second": 0.575,
88
- "eval_wer": 0.6560677128316879,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
- "grad_norm": 4.710850238800049,
94
- "learning_rate": 0.0002629948453608247,
95
- "loss": 0.6295,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
- "eval_loss": 0.8600214719772339,
101
- "eval_runtime": 191.6797,
102
- "eval_samples_per_second": 36.634,
103
- "eval_steps_per_second": 0.574,
104
- "eval_wer": 0.6257324705350855,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
- "grad_norm": 4.912868976593018,
110
- "learning_rate": 0.0002552628865979381,
111
- "loss": 0.6003,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
- "eval_loss": 0.8395254611968994,
117
- "eval_runtime": 191.3108,
118
- "eval_samples_per_second": 36.705,
119
- "eval_steps_per_second": 0.575,
120
- "eval_wer": 0.6113288776093224,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
- "grad_norm": 4.513955116271973,
126
- "learning_rate": 0.00024754639175257734,
127
- "loss": 0.5847,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
- "eval_loss": 0.7883865833282471,
133
- "eval_runtime": 192.8783,
134
- "eval_samples_per_second": 36.406,
135
- "eval_steps_per_second": 0.57,
136
- "eval_wer": 0.5861491648839341,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
- "grad_norm": 16.630624771118164,
142
- "learning_rate": 0.00023984536082474227,
143
- "loss": 0.5521,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
- "eval_loss": 0.7741186618804932,
149
- "eval_runtime": 189.6516,
150
- "eval_samples_per_second": 37.026,
151
- "eval_steps_per_second": 0.58,
152
- "eval_wer": 0.5686628841733214,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
- "grad_norm": 7.58245325088501,
158
- "learning_rate": 0.00023211340206185567,
159
- "loss": 0.5477,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
- "eval_loss": 0.7594121098518372,
165
- "eval_runtime": 190.5466,
166
- "eval_samples_per_second": 36.852,
167
- "eval_steps_per_second": 0.577,
168
- "eval_wer": 0.5535550565380885,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
- "grad_norm": 5.051167011260986,
174
- "learning_rate": 0.00022438144329896904,
175
- "loss": 0.5346,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
- "eval_loss": 0.7481973767280579,
181
- "eval_runtime": 190.5699,
182
- "eval_samples_per_second": 36.847,
183
- "eval_steps_per_second": 0.577,
184
- "eval_wer": 0.5394039251119468,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
- "grad_norm": 4.212076187133789,
190
- "learning_rate": 0.00021666494845360825,
191
- "loss": 0.5154,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
- "eval_loss": 0.7294158935546875,
197
- "eval_runtime": 189.7232,
198
- "eval_samples_per_second": 37.012,
199
- "eval_steps_per_second": 0.58,
200
- "eval_wer": 0.53515194196043,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
- "grad_norm": 5.682095527648926,
206
- "learning_rate": 0.00020893298969072165,
207
- "loss": 0.492,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
- "eval_loss": 0.7247592806816101,
213
- "eval_runtime": 190.6553,
214
- "eval_samples_per_second": 36.831,
215
- "eval_steps_per_second": 0.577,
216
- "eval_wer": 0.5492632110445262,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
- "grad_norm": 8.364203453063965,
222
- "learning_rate": 0.0002012164948453608,
223
- "loss": 0.4759,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
- "eval_loss": 0.7076719403266907,
229
- "eval_runtime": 189.5572,
230
- "eval_samples_per_second": 37.044,
231
- "eval_steps_per_second": 0.58,
232
- "eval_wer": 0.5134402529929976,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
- "grad_norm": 4.447290897369385,
238
- "learning_rate": 0.0001934845360824742,
239
- "loss": 0.4655,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
- "eval_loss": 0.673875629901886,
245
- "eval_runtime": 190.3324,
246
- "eval_samples_per_second": 36.893,
247
- "eval_steps_per_second": 0.578,
248
- "eval_wer": 0.5063979058982979,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
- "grad_norm": 12.618865013122559,
254
- "learning_rate": 0.0001857680412371134,
255
- "loss": 0.4594,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
- "eval_loss": 0.6574720144271851,
261
- "eval_runtime": 190.8303,
262
- "eval_samples_per_second": 36.797,
263
- "eval_steps_per_second": 0.576,
264
- "eval_wer": 0.5067300920820101,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
- "grad_norm": 2.756011962890625,
270
- "learning_rate": 0.0001780360824742268,
271
- "loss": 0.4538,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
- "eval_loss": 0.6492609977722168,
277
- "eval_runtime": 189.5472,
278
- "eval_samples_per_second": 37.046,
279
- "eval_steps_per_second": 0.58,
280
- "eval_wer": 0.500325542460038,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
- "grad_norm": 7.6861491203308105,
286
- "learning_rate": 0.0001703041237113402,
287
- "loss": 0.4739,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
- "eval_loss": 0.7676782608032227,
293
- "eval_runtime": 189.9033,
294
- "eval_samples_per_second": 36.977,
295
- "eval_steps_per_second": 0.579,
296
- "eval_wer": 0.5238576117142136,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
- "grad_norm": NaN,
302
- "learning_rate": 0.00016787628865979378,
303
- "loss": 0.695,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
- "eval_loss": NaN,
309
- "eval_runtime": 186.9158,
310
- "eval_samples_per_second": 37.568,
311
- "eval_steps_per_second": 0.589,
312
- "eval_wer": 1.0,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
- "grad_norm": NaN,
318
- "learning_rate": 0.00016787628865979378,
319
- "loss": 0.0,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
- "eval_loss": NaN,
325
- "eval_runtime": 186.7613,
326
- "eval_samples_per_second": 37.599,
327
- "eval_steps_per_second": 0.589,
328
- "eval_wer": 1.0,
329
- "step": 10000
330
- },
331
- {
332
- "epoch": 0.6733358984224702,
333
- "grad_norm": NaN,
334
- "learning_rate": 0.00016787628865979378,
335
- "loss": 0.0,
336
- "step": 10500
337
- },
338
- {
339
- "epoch": 0.6733358984224702,
340
- "eval_loss": NaN,
341
- "eval_runtime": 185.9746,
342
- "eval_samples_per_second": 37.758,
343
- "eval_steps_per_second": 0.591,
344
- "eval_wer": 1.0,
345
- "step": 10500
346
- },
347
- {
348
- "epoch": 0.705399512633064,
349
- "grad_norm": NaN,
350
- "learning_rate": 0.00016787628865979378,
351
- "loss": 0.0,
352
- "step": 11000
353
- },
354
- {
355
- "epoch": 0.705399512633064,
356
- "eval_loss": NaN,
357
- "eval_runtime": 186.0588,
358
- "eval_samples_per_second": 37.741,
359
- "eval_steps_per_second": 0.591,
360
- "eval_wer": 1.0,
361
- "step": 11000
362
- },
363
- {
364
- "epoch": 0.7374631268436578,
365
- "grad_norm": NaN,
366
- "learning_rate": 0.00016787628865979378,
367
- "loss": 0.0,
368
- "step": 11500
369
- },
370
- {
371
- "epoch": 0.7374631268436578,
372
- "eval_loss": NaN,
373
- "eval_runtime": 187.0825,
374
- "eval_samples_per_second": 37.534,
375
- "eval_steps_per_second": 0.588,
376
- "eval_wer": 1.0,
377
- "step": 11500
378
- },
379
- {
380
- "epoch": 0.7695267410542517,
381
- "grad_norm": NaN,
382
- "learning_rate": 0.00016787628865979378,
383
- "loss": 0.0,
384
- "step": 12000
385
- },
386
- {
387
- "epoch": 0.7695267410542517,
388
- "eval_loss": NaN,
389
- "eval_runtime": 186.5183,
390
- "eval_samples_per_second": 37.648,
391
- "eval_steps_per_second": 0.59,
392
- "eval_wer": 1.0,
393
- "step": 12000
394
- },
395
- {
396
- "epoch": 0.8015903552648455,
397
- "grad_norm": NaN,
398
- "learning_rate": 0.00016787628865979378,
399
- "loss": 0.0,
400
- "step": 12500
401
- },
402
- {
403
- "epoch": 0.8015903552648455,
404
- "eval_loss": NaN,
405
- "eval_runtime": 186.3281,
406
- "eval_samples_per_second": 37.686,
407
- "eval_steps_per_second": 0.59,
408
- "eval_wer": 1.0,
409
- "step": 12500
410
- },
411
- {
412
- "epoch": 0.8336539694754392,
413
- "grad_norm": NaN,
414
- "learning_rate": 0.00016787628865979378,
415
- "loss": 0.0,
416
- "step": 13000
417
- },
418
- {
419
- "epoch": 0.8336539694754392,
420
- "eval_loss": NaN,
421
- "eval_runtime": 185.5922,
422
- "eval_samples_per_second": 37.836,
423
- "eval_steps_per_second": 0.593,
424
- "eval_wer": 1.0,
425
- "step": 13000
426
- },
427
- {
428
- "epoch": 0.8657175836860331,
429
- "grad_norm": NaN,
430
- "learning_rate": 0.00016787628865979378,
431
- "loss": 0.0,
432
- "step": 13500
433
- },
434
- {
435
- "epoch": 0.8657175836860331,
436
- "eval_loss": NaN,
437
- "eval_runtime": 185.7237,
438
- "eval_samples_per_second": 37.809,
439
- "eval_steps_per_second": 0.592,
440
- "eval_wer": 1.0,
441
- "step": 13500
442
- },
443
- {
444
- "epoch": 0.8977811978966269,
445
- "grad_norm": NaN,
446
- "learning_rate": 0.00016787628865979378,
447
- "loss": 0.0,
448
- "step": 14000
449
- },
450
- {
451
- "epoch": 0.8977811978966269,
452
- "eval_loss": NaN,
453
- "eval_runtime": 186.6259,
454
- "eval_samples_per_second": 37.626,
455
- "eval_steps_per_second": 0.589,
456
- "eval_wer": 1.0,
457
- "step": 14000
458
- },
459
- {
460
- "epoch": 0.9298448121072207,
461
- "grad_norm": NaN,
462
- "learning_rate": 0.00016787628865979378,
463
- "loss": 0.0,
464
- "step": 14500
465
- },
466
- {
467
- "epoch": 0.9298448121072207,
468
- "eval_loss": NaN,
469
- "eval_runtime": 186.1517,
470
- "eval_samples_per_second": 37.722,
471
  "eval_steps_per_second": 0.591,
472
- "eval_wer": 1.0,
473
- "step": 14500
474
- },
475
- {
476
- "epoch": 0.9619084263178146,
477
- "grad_norm": NaN,
478
- "learning_rate": 0.00016787628865979378,
479
- "loss": 0.0,
480
- "step": 15000
481
- },
482
- {
483
- "epoch": 0.9619084263178146,
484
- "eval_loss": NaN,
485
- "eval_runtime": 186.7927,
486
- "eval_samples_per_second": 37.592,
487
- "eval_steps_per_second": 0.589,
488
- "eval_wer": 1.0,
489
- "step": 15000
490
- },
491
- {
492
- "epoch": 0.9939720405284084,
493
- "grad_norm": NaN,
494
- "learning_rate": 0.00016787628865979378,
495
- "loss": 0.0,
496
- "step": 15500
497
- },
498
- {
499
- "epoch": 0.9939720405284084,
500
- "eval_loss": NaN,
501
- "eval_runtime": 186.1708,
502
- "eval_samples_per_second": 37.718,
503
- "eval_steps_per_second": 0.591,
504
- "eval_wer": 1.0,
505
- "step": 15500
506
- },
507
- {
508
- "epoch": 1.0260356547390022,
509
- "grad_norm": NaN,
510
- "learning_rate": 0.00016787628865979378,
511
- "loss": 0.0,
512
- "step": 16000
513
- },
514
- {
515
- "epoch": 1.0260356547390022,
516
- "eval_loss": NaN,
517
- "eval_runtime": 186.1341,
518
- "eval_samples_per_second": 37.725,
519
- "eval_steps_per_second": 0.591,
520
- "eval_wer": 1.0,
521
- "step": 16000
522
- },
523
- {
524
- "epoch": 1.058099268949596,
525
- "grad_norm": NaN,
526
- "learning_rate": 0.00016787628865979378,
527
- "loss": 0.0,
528
- "step": 16500
529
- },
530
- {
531
- "epoch": 1.058099268949596,
532
- "eval_loss": NaN,
533
- "eval_runtime": 186.4575,
534
- "eval_samples_per_second": 37.66,
535
- "eval_steps_per_second": 0.59,
536
- "eval_wer": 1.0,
537
- "step": 16500
538
- },
539
- {
540
- "epoch": 1.0901628831601897,
541
- "grad_norm": NaN,
542
- "learning_rate": 0.00016787628865979378,
543
- "loss": 0.0,
544
- "step": 17000
545
- },
546
- {
547
- "epoch": 1.0901628831601897,
548
- "eval_loss": NaN,
549
- "eval_runtime": 185.4444,
550
- "eval_samples_per_second": 37.866,
551
- "eval_steps_per_second": 0.593,
552
- "eval_wer": 1.0,
553
- "step": 17000
554
- },
555
- {
556
- "epoch": 1.1222264973707836,
557
- "grad_norm": NaN,
558
- "learning_rate": 0.00016787628865979378,
559
- "loss": 0.0,
560
- "step": 17500
561
- },
562
- {
563
- "epoch": 1.1222264973707836,
564
- "eval_loss": NaN,
565
- "eval_runtime": 186.15,
566
- "eval_samples_per_second": 37.722,
567
- "eval_steps_per_second": 0.591,
568
- "eval_wer": 1.0,
569
- "step": 17500
570
- },
571
- {
572
- "epoch": 1.1542901115813775,
573
- "grad_norm": NaN,
574
- "learning_rate": 0.00016787628865979378,
575
- "loss": 0.0,
576
- "step": 18000
577
- },
578
- {
579
- "epoch": 1.1542901115813775,
580
- "eval_loss": NaN,
581
- "eval_runtime": 186.0027,
582
- "eval_samples_per_second": 37.752,
583
- "eval_steps_per_second": 0.591,
584
- "eval_wer": 1.0,
585
- "step": 18000
586
- },
587
- {
588
- "epoch": 1.1863537257919712,
589
- "grad_norm": NaN,
590
- "learning_rate": 0.00016787628865979378,
591
- "loss": 0.0,
592
- "step": 18500
593
- },
594
- {
595
- "epoch": 1.1863537257919712,
596
- "eval_loss": NaN,
597
- "eval_runtime": 185.6149,
598
- "eval_samples_per_second": 37.831,
599
- "eval_steps_per_second": 0.593,
600
- "eval_wer": 1.0,
601
- "step": 18500
602
- },
603
- {
604
- "epoch": 1.218417340002565,
605
- "grad_norm": NaN,
606
- "learning_rate": 0.00016787628865979378,
607
- "loss": 0.0,
608
- "step": 19000
609
- },
610
- {
611
- "epoch": 1.218417340002565,
612
- "eval_loss": NaN,
613
- "eval_runtime": 186.7557,
614
- "eval_samples_per_second": 37.6,
615
- "eval_steps_per_second": 0.589,
616
- "eval_wer": 1.0,
617
- "step": 19000
618
- },
619
- {
620
- "epoch": 1.250480954213159,
621
- "grad_norm": NaN,
622
- "learning_rate": 0.00016787628865979378,
623
- "loss": 0.0,
624
- "step": 19500
625
- },
626
- {
627
- "epoch": 1.250480954213159,
628
- "eval_loss": NaN,
629
- "eval_runtime": 186.7166,
630
- "eval_samples_per_second": 37.608,
631
- "eval_steps_per_second": 0.589,
632
- "eval_wer": 1.0,
633
- "step": 19500
634
- },
635
- {
636
- "epoch": 1.2825445684237526,
637
- "grad_norm": NaN,
638
- "learning_rate": 0.00016787628865979378,
639
- "loss": 0.0,
640
- "step": 20000
641
- },
642
- {
643
- "epoch": 1.2825445684237526,
644
- "eval_loss": NaN,
645
- "eval_runtime": 186.0546,
646
- "eval_samples_per_second": 37.742,
647
- "eval_steps_per_second": 0.591,
648
- "eval_wer": 1.0,
649
- "step": 20000
650
  },
651
  {
652
- "epoch": 1.2825445684237526,
653
- "step": 20000,
654
- "total_flos": 2.2824984432894013e+19,
655
- "train_loss": 0.38660173568725587,
656
- "train_runtime": 15166.2226,
657
- "train_samples_per_second": 10.55,
658
- "train_steps_per_second": 1.319
659
  }
660
  ],
661
  "logging_steps": 500,
662
- "max_steps": 20000,
663
  "num_input_tokens_seen": 0,
664
- "num_train_epochs": 2,
665
  "save_steps": 500,
666
  "stateful_callbacks": {
667
  "TrainerControl": {
@@ -675,7 +355,7 @@
675
  "attributes": {}
676
  }
677
  },
678
- "total_flos": 2.2824984432894013e+19,
679
  "train_batch_size": 8,
680
  "trial_name": null,
681
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6412722842118763,
5
  "eval_steps": 500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
+ "grad_norm": 8.037480354309082,
14
  "learning_rate": 0.0002465,
15
+ "loss": 4.7051,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
+ "eval_loss": 1.7504417896270752,
21
+ "eval_runtime": 184.6482,
22
+ "eval_samples_per_second": 38.029,
23
+ "eval_steps_per_second": 0.596,
24
+ "eval_wer": 0.9570416827223323,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
+ "grad_norm": 7.763198375701904,
30
+ "learning_rate": 0.0002874574468085106,
31
+ "loss": 1.0409,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
+ "eval_loss": 1.1511156558990479,
37
+ "eval_runtime": 184.729,
38
+ "eval_samples_per_second": 38.012,
39
+ "eval_steps_per_second": 0.595,
40
+ "eval_wer": 0.776093224730597,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
+ "grad_norm": 3.2267072200775146,
46
+ "learning_rate": 0.0002715,
47
+ "loss": 0.8183,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
+ "eval_loss": 1.0506497621536255,
53
+ "eval_runtime": 186.2316,
54
+ "eval_samples_per_second": 37.706,
55
+ "eval_steps_per_second": 0.591,
56
+ "eval_wer": 0.70972242522489,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
+ "grad_norm": 6.372620582580566,
62
+ "learning_rate": 0.00025554255319148935,
63
+ "loss": 0.7091,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
+ "eval_loss": 0.9421387314796448,
69
+ "eval_runtime": 186.5946,
70
+ "eval_samples_per_second": 37.632,
71
+ "eval_steps_per_second": 0.59,
72
+ "eval_wer": 0.6609707809032807,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
+ "grad_norm": 5.675894260406494,
78
+ "learning_rate": 0.0002395851063829787,
79
+ "loss": 0.6547,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
+ "eval_loss": 0.8725515007972717,
85
+ "eval_runtime": 187.2013,
86
+ "eval_samples_per_second": 37.51,
87
+ "eval_steps_per_second": 0.588,
88
+ "eval_wer": 0.6127639219229594,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
+ "grad_norm": 6.913870334625244,
94
+ "learning_rate": 0.00022362765957446805,
95
+ "loss": 0.6088,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
+ "eval_loss": 0.8246235847473145,
101
+ "eval_runtime": 188.3497,
102
+ "eval_samples_per_second": 37.282,
103
+ "eval_steps_per_second": 0.584,
104
+ "eval_wer": 0.5989582641278784,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
+ "grad_norm": 4.30249547958374,
110
+ "learning_rate": 0.00020767021276595744,
111
+ "loss": 0.5781,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
+ "eval_loss": 0.802536129951477,
117
+ "eval_runtime": 187.0791,
118
+ "eval_samples_per_second": 37.535,
119
+ "eval_steps_per_second": 0.588,
120
+ "eval_wer": 0.5747352476115813,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
+ "grad_norm": 3.4820008277893066,
126
+ "learning_rate": 0.0001917446808510638,
127
+ "loss": 0.5429,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
+ "eval_loss": 0.7359501123428345,
133
+ "eval_runtime": 186.7747,
134
+ "eval_samples_per_second": 37.596,
135
+ "eval_steps_per_second": 0.589,
136
+ "eval_wer": 0.53048804794111,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
+ "grad_norm": 11.696717262268066,
142
+ "learning_rate": 0.00017585106382978722,
143
+ "loss": 0.5104,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
+ "eval_loss": 0.7335178852081299,
149
+ "eval_runtime": 187.3685,
150
+ "eval_samples_per_second": 37.477,
151
+ "eval_steps_per_second": 0.587,
152
+ "eval_wer": 0.5394039251119468,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
+ "grad_norm": 7.053103446960449,
158
+ "learning_rate": 0.00015989361702127658,
159
+ "loss": 0.501,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
+ "eval_loss": 0.6932825446128845,
165
+ "eval_runtime": 186.2726,
166
+ "eval_samples_per_second": 37.697,
167
+ "eval_steps_per_second": 0.591,
168
+ "eval_wer": 0.5087763589736776,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
+ "grad_norm": 6.128586769104004,
174
+ "learning_rate": 0.00014393617021276595,
175
+ "loss": 0.4708,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
+ "eval_loss": 0.6770374774932861,
181
+ "eval_runtime": 188.2655,
182
+ "eval_samples_per_second": 37.298,
183
+ "eval_steps_per_second": 0.584,
184
+ "eval_wer": 0.5112743990751937,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
+ "grad_norm": 7.154539108276367,
190
+ "learning_rate": 0.00012801063829787234,
191
+ "loss": 0.4526,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
+ "eval_loss": 0.6608560681343079,
197
+ "eval_runtime": 187.3283,
198
+ "eval_samples_per_second": 37.485,
199
+ "eval_steps_per_second": 0.587,
200
+ "eval_wer": 0.48059368314753054,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
+ "grad_norm": 5.313536643981934,
206
+ "learning_rate": 0.0001120531914893617,
207
+ "loss": 0.4235,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
+ "eval_loss": 0.637322187423706,
213
+ "eval_runtime": 186.315,
214
+ "eval_samples_per_second": 37.689,
215
+ "eval_steps_per_second": 0.59,
216
+ "eval_wer": 0.485842224850184,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
+ "grad_norm": 6.399425983428955,
222
+ "learning_rate": 9.612765957446806e-05,
223
+ "loss": 0.4032,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
+ "eval_loss": 0.6047533750534058,
229
+ "eval_runtime": 186.8155,
230
+ "eval_samples_per_second": 37.588,
231
+ "eval_steps_per_second": 0.589,
232
+ "eval_wer": 0.4466176802774419,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
+ "grad_norm": 11.48141098022461,
238
+ "learning_rate": 8.017021276595744e-05,
239
+ "loss": 0.3863,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
+ "eval_loss": 0.5946004390716553,
245
+ "eval_runtime": 186.2938,
246
+ "eval_samples_per_second": 37.693,
247
+ "eval_steps_per_second": 0.59,
248
+ "eval_wer": 0.4432160937562285,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
+ "grad_norm": 32.89252471923828,
254
+ "learning_rate": 6.424468085106383e-05,
255
+ "loss": 0.3766,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
+ "eval_loss": 0.5737225413322449,
261
+ "eval_runtime": 186.9085,
262
+ "eval_samples_per_second": 37.569,
263
+ "eval_steps_per_second": 0.589,
264
+ "eval_wer": 0.4298489217236477,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
+ "grad_norm": 4.741519451141357,
270
+ "learning_rate": 4.8287234042553194e-05,
271
+ "loss": 0.3746,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
+ "eval_loss": 0.5668203234672546,
277
+ "eval_runtime": 186.8619,
278
+ "eval_samples_per_second": 37.579,
279
+ "eval_steps_per_second": 0.589,
280
+ "eval_wer": 0.4247731168365245,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
+ "grad_norm": 13.890504837036133,
286
+ "learning_rate": 3.232978723404255e-05,
287
+ "loss": 0.3586,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
+ "eval_loss": 0.5485312342643738,
293
+ "eval_runtime": 187.9252,
294
+ "eval_samples_per_second": 37.366,
295
+ "eval_steps_per_second": 0.585,
296
+ "eval_wer": 0.4100772000690947,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
+ "grad_norm": 8.073569297790527,
302
+ "learning_rate": 1.6372340425531912e-05,
303
+ "loss": 0.3552,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
+ "eval_loss": 0.5377594828605652,
309
+ "eval_runtime": 187.0305,
310
+ "eval_samples_per_second": 37.545,
311
+ "eval_steps_per_second": 0.588,
312
+ "eval_wer": 0.40320758978992544,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
+ "grad_norm": 6.519000053405762,
318
+ "learning_rate": 4.1489361702127654e-07,
319
+ "loss": 0.3326,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
+ "eval_loss": 0.5324302911758423,
325
+ "eval_runtime": 186.1276,
326
+ "eval_samples_per_second": 37.727,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  "eval_steps_per_second": 0.591,
328
+ "eval_wer": 0.40138720950318235,
329
+ "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  },
331
  {
332
+ "epoch": 0.6412722842118763,
333
+ "step": 10000,
334
+ "total_flos": 1.1393778193380235e+19,
335
+ "train_loss": 0.73015986328125,
336
+ "train_runtime": 7697.7754,
337
+ "train_samples_per_second": 10.393,
338
+ "train_steps_per_second": 1.299
339
  }
340
  ],
341
  "logging_steps": 500,
342
+ "max_steps": 10000,
343
  "num_input_tokens_seen": 0,
344
+ "num_train_epochs": 1,
345
  "save_steps": 500,
346
  "stateful_callbacks": {
347
  "TrainerControl": {
 
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 1.1393778193380235e+19,
359
  "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null