DewiBrynJones commited on
Commit
b4c629e
1 Parent(s): 66985d8

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: inf
21
  - Wer: 0.3264
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: inf
23
  - Wer: 0.3264
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 7.722007722007722,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 195.5606,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 35.907,
7
- "eval_steps_per_second": 4.49,
8
- "eval_wer": 0.9913261397426408,
9
- "total_flos": 4.216639119976582e+19,
10
- "train_loss": 2.028042752075195,
11
- "train_runtime": 21383.256,
12
  "train_samples": 41435,
13
- "train_samples_per_second": 14.965,
14
- "train_steps_per_second": 0.468
15
  }
 
1
  {
2
+ "epoch": 2.3166023166023164,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 194.093,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 36.179,
7
+ "eval_steps_per_second": 0.567,
8
+ "eval_wer": 0.3263604845935102,
9
+ "total_flos": 1.2607274134194512e+19,
10
+ "train_loss": 0.8722912038167318,
11
+ "train_runtime": 9671.7075,
12
  "train_samples": 41435,
13
+ "train_samples_per_second": 9.926,
14
+ "train_steps_per_second": 0.62
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.722007722007722,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 195.5606,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 35.907,
7
- "eval_steps_per_second": 4.49,
8
- "eval_wer": 0.9913261397426408
9
  }
 
1
  {
2
+ "epoch": 2.3166023166023164,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 194.093,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 36.179,
7
+ "eval_steps_per_second": 0.567,
8
+ "eval_wer": 0.3263604845935102
9
  }
runs/Aug16_11-07-52_a142c9bbb9ba/events.out.tfevents.1723813892.a142c9bbb9ba.3157.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c5c7f97526d1aeefc4f1d1676844ef4f934ddd7534642bc615c47352742675
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.722007722007722,
3
- "total_flos": 4.216639119976582e+19,
4
- "train_loss": 2.028042752075195,
5
- "train_runtime": 21383.256,
6
  "train_samples": 41435,
7
- "train_samples_per_second": 14.965,
8
- "train_steps_per_second": 0.468
9
  }
 
1
  {
2
+ "epoch": 2.3166023166023164,
3
+ "total_flos": 1.2607274134194512e+19,
4
+ "train_loss": 0.8722912038167318,
5
+ "train_runtime": 9671.7075,
6
  "train_samples": 41435,
7
+ "train_samples_per_second": 9.926,
8
+ "train_steps_per_second": 0.62
9
  }
trainer_state.json CHANGED
@@ -1,617 +1,381 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.722007722007722,
5
  "eval_steps": 200,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.15444015444015444,
13
  "eval_loss": Infinity,
14
- "eval_runtime": 214.0401,
15
- "eval_samples_per_second": 32.807,
16
- "eval_steps_per_second": 4.102,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
- "epoch": 0.3088803088803089,
22
  "eval_loss": Infinity,
23
- "eval_runtime": 192.4639,
24
- "eval_samples_per_second": 36.485,
25
- "eval_steps_per_second": 4.562,
26
- "eval_wer": 0.8748003583810525,
27
  "step": 400
28
  },
29
  {
30
- "epoch": 0.3861003861003861,
31
- "grad_norm": 3.6337695121765137,
32
- "learning_rate": 0.00029699999999999996,
33
- "loss": 3.7622,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 0.46332046332046334,
38
  "eval_loss": Infinity,
39
- "eval_runtime": 193.7023,
40
- "eval_samples_per_second": 36.251,
41
- "eval_steps_per_second": 4.533,
42
- "eval_wer": 0.6793917909963253,
43
  "step": 600
44
  },
45
  {
46
- "epoch": 0.6177606177606177,
47
  "eval_loss": Infinity,
48
- "eval_runtime": 193.7574,
49
- "eval_samples_per_second": 36.241,
50
- "eval_steps_per_second": 4.531,
51
- "eval_wer": 0.5749289081064236,
52
  "step": 800
53
  },
54
  {
55
- "epoch": 0.7722007722007722,
56
- "grad_norm": 2.0483286380767822,
57
- "learning_rate": 0.00028443157894736843,
58
- "loss": 0.8615,
59
  "step": 1000
60
  },
61
  {
62
- "epoch": 0.7722007722007722,
63
  "eval_loss": Infinity,
64
- "eval_runtime": 193.3223,
65
- "eval_samples_per_second": 36.323,
66
- "eval_steps_per_second": 4.542,
67
- "eval_wer": 0.5193149208575175,
68
  "step": 1000
69
  },
70
  {
71
- "epoch": 0.9266409266409267,
72
  "eval_loss": Infinity,
73
- "eval_runtime": 193.499,
74
- "eval_samples_per_second": 36.29,
75
- "eval_steps_per_second": 4.537,
76
- "eval_wer": 0.5194317842442185,
77
  "step": 1200
78
  },
79
  {
80
- "epoch": 1.0810810810810811,
81
  "eval_loss": Infinity,
82
- "eval_runtime": 194.7994,
83
- "eval_samples_per_second": 36.047,
84
- "eval_steps_per_second": 4.507,
85
- "eval_wer": 0.4780491605313389,
86
  "step": 1400
87
  },
88
  {
89
- "epoch": 1.1583011583011582,
90
- "grad_norm": 0.5093265175819397,
91
- "learning_rate": 0.00026864210526315787,
92
- "loss": 0.6742,
93
  "step": 1500
94
  },
95
  {
96
- "epoch": 1.2355212355212355,
97
  "eval_loss": Infinity,
98
- "eval_runtime": 193.9458,
99
- "eval_samples_per_second": 36.206,
100
- "eval_steps_per_second": 4.527,
101
- "eval_wer": 0.4446911560385909,
102
  "step": 1600
103
  },
104
  {
105
- "epoch": 1.3899613899613898,
106
  "eval_loss": Infinity,
107
- "eval_runtime": 194.4956,
108
- "eval_samples_per_second": 36.104,
109
- "eval_steps_per_second": 4.514,
110
- "eval_wer": 0.42839520600418113,
111
  "step": 1800
112
  },
113
  {
114
- "epoch": 1.5444015444015444,
115
- "grad_norm": 0.8163366317749023,
116
- "learning_rate": 0.00025288421052631577,
117
- "loss": 0.5813,
118
  "step": 2000
119
  },
120
  {
121
- "epoch": 1.5444015444015444,
122
  "eval_loss": Infinity,
123
- "eval_runtime": 196.3165,
124
- "eval_samples_per_second": 35.769,
125
- "eval_steps_per_second": 4.472,
126
- "eval_wer": 0.4189552413228935,
127
  "step": 2000
128
  },
129
  {
130
- "epoch": 1.698841698841699,
131
  "eval_loss": Infinity,
132
- "eval_runtime": 194.273,
133
- "eval_samples_per_second": 36.145,
134
- "eval_steps_per_second": 4.519,
135
- "eval_wer": 0.4159687325516471,
136
  "step": 2200
137
  },
138
  {
139
- "epoch": 1.8532818532818531,
140
  "eval_loss": Infinity,
141
- "eval_runtime": 194.413,
142
- "eval_samples_per_second": 36.119,
143
- "eval_steps_per_second": 4.516,
144
- "eval_wer": 0.41259267915806425,
145
  "step": 2400
146
  },
147
  {
148
- "epoch": 1.9305019305019306,
149
- "grad_norm": 0.7404251098632812,
150
- "learning_rate": 0.00023718947368421052,
151
- "loss": 0.568,
152
  "step": 2500
153
  },
154
  {
155
- "epoch": 2.0077220077220077,
156
  "eval_loss": Infinity,
157
- "eval_runtime": 194.5283,
158
- "eval_samples_per_second": 36.098,
159
- "eval_steps_per_second": 4.513,
160
- "eval_wer": 0.3879085349226754,
161
  "step": 2600
162
  },
163
  {
164
- "epoch": 2.1621621621621623,
165
  "eval_loss": Infinity,
166
- "eval_runtime": 194.5649,
167
- "eval_samples_per_second": 36.091,
168
- "eval_steps_per_second": 4.513,
169
- "eval_wer": 0.3813382156259333,
170
  "step": 2800
171
  },
172
  {
173
- "epoch": 2.3166023166023164,
174
- "grad_norm": 0.8203113079071045,
175
- "learning_rate": 0.0002214315789473684,
176
- "loss": 0.4909,
177
  "step": 3000
178
  },
179
  {
180
- "epoch": 2.3166023166023164,
181
  "eval_loss": Infinity,
182
- "eval_runtime": 195.9563,
183
- "eval_samples_per_second": 35.835,
184
- "eval_steps_per_second": 4.481,
185
- "eval_wer": 0.38915507771415214,
186
  "step": 3000
187
  },
188
  {
189
- "epoch": 2.471042471042471,
190
  "eval_loss": Infinity,
191
- "eval_runtime": 195.0882,
192
- "eval_samples_per_second": 35.994,
193
- "eval_steps_per_second": 4.501,
194
- "eval_wer": 0.4131899809123135,
195
  "step": 3200
196
  },
197
  {
198
- "epoch": 2.6254826254826256,
199
  "eval_loss": Infinity,
200
- "eval_runtime": 195.2925,
201
- "eval_samples_per_second": 35.956,
202
- "eval_steps_per_second": 4.496,
203
- "eval_wer": 0.5341825406100269,
204
  "step": 3400
205
  },
206
  {
207
- "epoch": 2.7027027027027026,
208
- "grad_norm": 13.376907348632812,
209
- "learning_rate": 0.0002057052631578947,
210
- "loss": 0.5703,
211
  "step": 3500
212
  },
213
  {
214
- "epoch": 2.7799227799227797,
215
  "eval_loss": Infinity,
216
- "eval_runtime": 196.3521,
217
- "eval_samples_per_second": 35.762,
218
- "eval_steps_per_second": 4.472,
219
- "eval_wer": 0.5748769688234454,
220
  "step": 3600
221
  },
222
  {
223
- "epoch": 2.9343629343629343,
224
  "eval_loss": Infinity,
225
- "eval_runtime": 195.7385,
226
- "eval_samples_per_second": 35.874,
227
- "eval_steps_per_second": 4.486,
228
- "eval_wer": 0.7368366379702128,
229
  "step": 3800
230
  },
231
  {
232
- "epoch": 3.088803088803089,
233
- "grad_norm": 6.712296485900879,
234
- "learning_rate": 0.00018994736842105263,
235
- "loss": 1.1938,
236
  "step": 4000
237
  },
238
  {
239
- "epoch": 3.088803088803089,
240
  "eval_loss": Infinity,
241
- "eval_runtime": 195.4685,
242
- "eval_samples_per_second": 35.924,
243
- "eval_steps_per_second": 4.492,
244
- "eval_wer": 0.83566410865698,
245
  "step": 4000
246
  },
247
  {
248
- "epoch": 3.2432432432432434,
249
  "eval_loss": Infinity,
250
- "eval_runtime": 196.3276,
251
- "eval_samples_per_second": 35.767,
252
- "eval_steps_per_second": 4.472,
253
- "eval_wer": 0.81303156609923,
254
  "step": 4200
255
  },
256
  {
257
- "epoch": 3.3976833976833976,
258
  "eval_loss": Infinity,
259
- "eval_runtime": 196.3772,
260
- "eval_samples_per_second": 35.758,
261
- "eval_steps_per_second": 4.471,
262
- "eval_wer": 0.6570319296742109,
263
  "step": 4400
264
  },
265
  {
266
- "epoch": 3.474903474903475,
267
- "grad_norm": 1.9403835535049438,
268
- "learning_rate": 0.0001741894736842105,
269
- "loss": 1.1654,
270
  "step": 4500
271
  },
272
  {
273
- "epoch": 3.552123552123552,
274
  "eval_loss": Infinity,
275
- "eval_runtime": 195.2624,
276
- "eval_samples_per_second": 35.962,
277
- "eval_steps_per_second": 4.497,
278
- "eval_wer": 0.5719294145144326,
279
  "step": 4600
280
  },
281
  {
282
- "epoch": 3.7065637065637067,
283
  "eval_loss": Infinity,
284
- "eval_runtime": 195.7006,
285
- "eval_samples_per_second": 35.881,
286
- "eval_steps_per_second": 4.486,
287
- "eval_wer": 0.6034176048199654,
288
  "step": 4800
289
  },
290
  {
291
- "epoch": 3.861003861003861,
292
- "grad_norm": 5.047078609466553,
293
- "learning_rate": 0.0001584315789473684,
294
- "loss": 1.0408,
295
  "step": 5000
296
  },
297
  {
298
- "epoch": 3.861003861003861,
299
  "eval_loss": Infinity,
300
- "eval_runtime": 196.2029,
301
- "eval_samples_per_second": 35.789,
302
- "eval_steps_per_second": 4.475,
303
- "eval_wer": 0.6953371508706322,
304
  "step": 5000
305
  },
306
  {
307
- "epoch": 4.015444015444015,
308
  "eval_loss": Infinity,
309
- "eval_runtime": 196.4203,
310
- "eval_samples_per_second": 35.75,
311
- "eval_steps_per_second": 4.47,
312
- "eval_wer": 0.998039292067573,
313
  "step": 5200
314
  },
315
  {
316
- "epoch": 4.1698841698841695,
317
  "eval_loss": Infinity,
318
- "eval_runtime": 197.5397,
319
- "eval_samples_per_second": 35.547,
320
- "eval_steps_per_second": 4.445,
321
- "eval_wer": 0.9477101268616986,
322
  "step": 5400
323
  },
324
  {
325
- "epoch": 4.2471042471042475,
326
- "grad_norm": 0.5360209941864014,
327
- "learning_rate": 0.00014270526315789472,
328
- "loss": 1.7263,
329
  "step": 5500
330
  },
331
  {
332
- "epoch": 4.324324324324325,
333
  "eval_loss": Infinity,
334
- "eval_runtime": 196.1748,
335
- "eval_samples_per_second": 35.795,
336
- "eval_steps_per_second": 4.476,
337
- "eval_wer": 0.9963123109085479,
338
  "step": 5600
339
  },
340
  {
341
- "epoch": 4.478764478764479,
342
  "eval_loss": Infinity,
343
- "eval_runtime": 197.0555,
344
- "eval_samples_per_second": 35.635,
345
- "eval_steps_per_second": 4.456,
346
- "eval_wer": 0.9998311973303209,
347
  "step": 5800
348
  },
349
  {
350
- "epoch": 4.633204633204633,
351
- "grad_norm": 0.9715490341186523,
352
- "learning_rate": 0.0001269157894736842,
353
- "loss": 2.8212,
354
  "step": 6000
355
  },
356
  {
357
- "epoch": 4.633204633204633,
358
  "eval_loss": Infinity,
359
- "eval_runtime": 195.6507,
360
- "eval_samples_per_second": 35.89,
361
- "eval_steps_per_second": 4.488,
362
- "eval_wer": 0.9975848233415138,
363
  "step": 6000
364
  },
365
  {
366
- "epoch": 4.787644787644788,
367
- "eval_loss": Infinity,
368
- "eval_runtime": 194.5522,
369
- "eval_samples_per_second": 36.093,
370
- "eval_steps_per_second": 4.513,
371
- "eval_wer": 0.9958578421824886,
372
- "step": 6200
373
- },
374
- {
375
- "epoch": 4.942084942084942,
376
- "eval_loss": Infinity,
377
- "eval_runtime": 195.551,
378
- "eval_samples_per_second": 35.909,
379
- "eval_steps_per_second": 4.49,
380
- "eval_wer": 0.9917416540064664,
381
- "step": 6400
382
- },
383
- {
384
- "epoch": 5.019305019305019,
385
- "grad_norm": 1.0234254598617554,
386
- "learning_rate": 0.00011125263157894737,
387
- "loss": 2.7652,
388
- "step": 6500
389
- },
390
- {
391
- "epoch": 5.096525096525096,
392
- "eval_loss": Infinity,
393
- "eval_runtime": 196.4433,
394
- "eval_samples_per_second": 35.746,
395
- "eval_steps_per_second": 4.469,
396
- "eval_wer": 0.9897290067910612,
397
- "step": 6600
398
- },
399
- {
400
- "epoch": 5.250965250965251,
401
- "eval_loss": Infinity,
402
- "eval_runtime": 195.3652,
403
- "eval_samples_per_second": 35.943,
404
- "eval_steps_per_second": 4.494,
405
- "eval_wer": 0.9902094451586095,
406
- "step": 6800
407
- },
408
- {
409
- "epoch": 5.405405405405405,
410
- "grad_norm": 0.9109746217727661,
411
- "learning_rate": 9.549473684210525e-05,
412
- "loss": 2.7358,
413
- "step": 7000
414
- },
415
- {
416
- "epoch": 5.405405405405405,
417
- "eval_loss": Infinity,
418
- "eval_runtime": 194.6043,
419
- "eval_samples_per_second": 36.083,
420
- "eval_steps_per_second": 4.512,
421
- "eval_wer": 0.9889499175463883,
422
- "step": 7000
423
- },
424
- {
425
- "epoch": 5.559845559845559,
426
- "eval_loss": Infinity,
427
- "eval_runtime": 193.7693,
428
- "eval_samples_per_second": 36.239,
429
- "eval_steps_per_second": 4.531,
430
- "eval_wer": 0.990456156752756,
431
- "step": 7200
432
- },
433
- {
434
- "epoch": 5.714285714285714,
435
- "eval_loss": Infinity,
436
- "eval_runtime": 195.6666,
437
- "eval_samples_per_second": 35.888,
438
- "eval_steps_per_second": 4.487,
439
- "eval_wer": 0.9887161907729863,
440
- "step": 7400
441
- },
442
- {
443
- "epoch": 5.7915057915057915,
444
- "grad_norm": 1.3490198850631714,
445
- "learning_rate": 7.973684210526315e-05,
446
- "loss": 2.7122,
447
- "step": 7500
448
- },
449
- {
450
- "epoch": 5.8687258687258685,
451
- "eval_loss": Infinity,
452
- "eval_runtime": 194.1989,
453
- "eval_samples_per_second": 36.159,
454
- "eval_steps_per_second": 4.521,
455
- "eval_wer": 0.9878202381416125,
456
- "step": 7600
457
- },
458
- {
459
- "epoch": 6.023166023166024,
460
- "eval_loss": Infinity,
461
- "eval_runtime": 194.0207,
462
- "eval_samples_per_second": 36.192,
463
- "eval_steps_per_second": 4.525,
464
- "eval_wer": 0.9847298508044097,
465
- "step": 7800
466
- },
467
- {
468
- "epoch": 6.177606177606178,
469
- "grad_norm": 2.4652857780456543,
470
- "learning_rate": 6.401052631578946e-05,
471
- "loss": 2.7345,
472
- "step": 8000
473
- },
474
- {
475
- "epoch": 6.177606177606178,
476
- "eval_loss": Infinity,
477
- "eval_runtime": 193.3926,
478
- "eval_samples_per_second": 36.31,
479
- "eval_steps_per_second": 4.54,
480
- "eval_wer": 0.9842494124368613,
481
- "step": 8000
482
- },
483
- {
484
- "epoch": 6.332046332046332,
485
- "eval_loss": Infinity,
486
- "eval_runtime": 195.2901,
487
- "eval_samples_per_second": 35.957,
488
- "eval_steps_per_second": 4.496,
489
- "eval_wer": 0.9882227675846935,
490
- "step": 8200
491
- },
492
- {
493
- "epoch": 6.486486486486487,
494
- "eval_loss": Infinity,
495
- "eval_runtime": 195.6895,
496
- "eval_samples_per_second": 35.883,
497
- "eval_steps_per_second": 4.487,
498
- "eval_wer": 0.9871580122836404,
499
- "step": 8400
500
- },
501
- {
502
- "epoch": 6.563706563706564,
503
- "grad_norm": 2.1805906295776367,
504
- "learning_rate": 4.828421052631579e-05,
505
- "loss": 3.035,
506
- "step": 8500
507
- },
508
- {
509
- "epoch": 6.640926640926641,
510
- "eval_loss": Infinity,
511
- "eval_runtime": 196.9379,
512
- "eval_samples_per_second": 35.656,
513
- "eval_steps_per_second": 4.458,
514
- "eval_wer": 0.9920532897043356,
515
- "step": 8600
516
- },
517
- {
518
- "epoch": 6.795366795366795,
519
- "eval_loss": Infinity,
520
- "eval_runtime": 196.3085,
521
- "eval_samples_per_second": 35.77,
522
- "eval_steps_per_second": 4.473,
523
- "eval_wer": 0.990573020139457,
524
- "step": 8800
525
- },
526
- {
527
- "epoch": 6.94980694980695,
528
- "grad_norm": 0.7723463177680969,
529
- "learning_rate": 3.25578947368421e-05,
530
- "loss": 3.688,
531
- "step": 9000
532
- },
533
- {
534
- "epoch": 6.94980694980695,
535
- "eval_loss": Infinity,
536
- "eval_runtime": 196.6042,
537
- "eval_samples_per_second": 35.716,
538
- "eval_steps_per_second": 4.466,
539
- "eval_wer": 0.9915858361575318,
540
- "step": 9000
541
- },
542
- {
543
- "epoch": 7.104247104247104,
544
- "eval_loss": Infinity,
545
- "eval_runtime": 195.2386,
546
- "eval_samples_per_second": 35.966,
547
- "eval_steps_per_second": 4.497,
548
- "eval_wer": 0.990573020139457,
549
- "step": 9200
550
- },
551
- {
552
- "epoch": 7.258687258687258,
553
- "eval_loss": Infinity,
554
- "eval_runtime": 196.3464,
555
- "eval_samples_per_second": 35.763,
556
- "eval_steps_per_second": 4.472,
557
- "eval_wer": 0.9908067469128589,
558
- "step": 9400
559
- },
560
- {
561
- "epoch": 7.335907335907336,
562
- "grad_norm": 0.0,
563
- "learning_rate": 1.6831578947368418e-05,
564
- "loss": 3.7017,
565
- "step": 9500
566
- },
567
- {
568
- "epoch": 7.413127413127413,
569
- "eval_loss": Infinity,
570
- "eval_runtime": 196.9124,
571
- "eval_samples_per_second": 35.661,
572
- "eval_steps_per_second": 4.459,
573
- "eval_wer": 0.9911962915351954,
574
- "step": 9600
575
- },
576
- {
577
- "epoch": 7.5675675675675675,
578
- "eval_loss": Infinity,
579
- "eval_runtime": 195.3324,
580
- "eval_samples_per_second": 35.949,
581
- "eval_steps_per_second": 4.495,
582
- "eval_wer": 0.9913001701011518,
583
- "step": 9800
584
- },
585
- {
586
- "epoch": 7.722007722007722,
587
- "grad_norm": 0.0,
588
- "learning_rate": 1.1052631578947367e-06,
589
- "loss": 3.7327,
590
- "step": 10000
591
- },
592
- {
593
- "epoch": 7.722007722007722,
594
- "eval_loss": Infinity,
595
- "eval_runtime": 198.1495,
596
- "eval_samples_per_second": 35.438,
597
- "eval_steps_per_second": 4.431,
598
- "eval_wer": 0.9913261397426408,
599
- "step": 10000
600
- },
601
- {
602
- "epoch": 7.722007722007722,
603
- "step": 10000,
604
- "total_flos": 4.216639119976582e+19,
605
- "train_loss": 2.028042752075195,
606
- "train_runtime": 21383.256,
607
- "train_samples_per_second": 14.965,
608
- "train_steps_per_second": 0.468
609
  }
610
  ],
611
  "logging_steps": 500,
612
- "max_steps": 10000,
613
  "num_input_tokens_seen": 0,
614
- "num_train_epochs": 8,
615
  "save_steps": 500,
616
  "stateful_callbacks": {
617
  "TrainerControl": {
@@ -625,8 +389,8 @@
625
  "attributes": {}
626
  }
627
  },
628
- "total_flos": 4.216639119976582e+19,
629
- "train_batch_size": 32,
630
  "trial_name": null,
631
  "trial_params": null
632
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3166023166023164,
5
  "eval_steps": 200,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.07722007722007722,
13
  "eval_loss": Infinity,
14
+ "eval_runtime": 223.773,
15
+ "eval_samples_per_second": 31.38,
16
+ "eval_steps_per_second": 0.492,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
+ "epoch": 0.15444015444015444,
22
  "eval_loss": Infinity,
23
+ "eval_runtime": 201.0458,
24
+ "eval_samples_per_second": 34.927,
25
+ "eval_steps_per_second": 0.547,
26
+ "eval_wer": 0.8963291911755158,
27
  "step": 400
28
  },
29
  {
30
+ "epoch": 0.19305019305019305,
31
+ "grad_norm": 4.3846588134765625,
32
+ "learning_rate": 0.00024799999999999996,
33
+ "loss": 3.9177,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 0.23166023166023167,
38
  "eval_loss": Infinity,
39
+ "eval_runtime": 194.7647,
40
+ "eval_samples_per_second": 36.054,
41
+ "eval_steps_per_second": 0.565,
42
+ "eval_wer": 0.7594821653487074,
43
  "step": 600
44
  },
45
  {
46
+ "epoch": 0.3088803088803089,
47
  "eval_loss": Infinity,
48
+ "eval_runtime": 194.859,
49
+ "eval_samples_per_second": 36.036,
50
+ "eval_steps_per_second": 0.565,
51
+ "eval_wer": 0.7512108345344293,
52
  "step": 800
53
  },
54
  {
55
+ "epoch": 0.3861003861003861,
56
+ "grad_norm": 3.36423921585083,
57
+ "learning_rate": 0.00027805555555555553,
58
+ "loss": 0.9791,
59
  "step": 1000
60
  },
61
  {
62
+ "epoch": 0.3861003861003861,
63
  "eval_loss": Infinity,
64
+ "eval_runtime": 195.6019,
65
+ "eval_samples_per_second": 35.899,
66
+ "eval_steps_per_second": 0.562,
67
+ "eval_wer": 0.598444418474803,
68
  "step": 1000
69
  },
70
  {
71
+ "epoch": 0.46332046332046334,
72
  "eval_loss": Infinity,
73
+ "eval_runtime": 197.8836,
74
+ "eval_samples_per_second": 35.486,
75
+ "eval_steps_per_second": 0.556,
76
+ "eval_wer": 0.5867710646254528,
77
  "step": 1200
78
  },
79
  {
80
+ "epoch": 0.5405405405405406,
81
  "eval_loss": Infinity,
82
+ "eval_runtime": 203.8782,
83
+ "eval_samples_per_second": 34.442,
84
+ "eval_steps_per_second": 0.54,
85
+ "eval_wer": 0.5255476348149014,
86
  "step": 1400
87
  },
88
  {
89
+ "epoch": 0.5791505791505791,
90
+ "grad_norm": 2.6850786209106445,
91
+ "learning_rate": 0.0002503333333333333,
92
+ "loss": 0.805,
93
  "step": 1500
94
  },
95
  {
96
+ "epoch": 0.6177606177606177,
97
  "eval_loss": Infinity,
98
+ "eval_runtime": 199.2388,
99
+ "eval_samples_per_second": 35.244,
100
+ "eval_steps_per_second": 0.552,
101
+ "eval_wer": 0.5281575837845559,
102
  "step": 1600
103
  },
104
  {
105
+ "epoch": 0.694980694980695,
106
  "eval_loss": Infinity,
107
+ "eval_runtime": 194.9574,
108
+ "eval_samples_per_second": 36.018,
109
+ "eval_steps_per_second": 0.564,
110
+ "eval_wer": 0.4768805266643294,
111
  "step": 1800
112
  },
113
  {
114
+ "epoch": 0.7722007722007722,
115
+ "grad_norm": 2.9242658615112305,
116
+ "learning_rate": 0.0002226111111111111,
117
+ "loss": 0.7184,
118
  "step": 2000
119
  },
120
  {
121
+ "epoch": 0.7722007722007722,
122
  "eval_loss": Infinity,
123
+ "eval_runtime": 198.941,
124
+ "eval_samples_per_second": 35.297,
125
+ "eval_steps_per_second": 0.553,
126
+ "eval_wer": 0.4743095321569086,
127
  "step": 2000
128
  },
129
  {
130
+ "epoch": 0.8494208494208494,
131
  "eval_loss": Infinity,
132
+ "eval_runtime": 207.8762,
133
+ "eval_samples_per_second": 33.78,
134
+ "eval_steps_per_second": 0.529,
135
+ "eval_wer": 0.46802487891654654,
136
  "step": 2200
137
  },
138
  {
139
+ "epoch": 0.9266409266409267,
140
  "eval_loss": Infinity,
141
+ "eval_runtime": 215.534,
142
+ "eval_samples_per_second": 32.58,
143
+ "eval_steps_per_second": 0.51,
144
+ "eval_wer": 0.457026735745913,
145
  "step": 2400
146
  },
147
  {
148
+ "epoch": 0.9652509652509652,
149
+ "grad_norm": 3.865280866622925,
150
+ "learning_rate": 0.00019483333333333332,
151
+ "loss": 0.6704,
152
  "step": 2500
153
  },
154
  {
155
+ "epoch": 1.0038610038610039,
156
  "eval_loss": Infinity,
157
+ "eval_runtime": 212.4212,
158
+ "eval_samples_per_second": 33.057,
159
+ "eval_steps_per_second": 0.518,
160
+ "eval_wer": 0.4252528793840001,
161
  "step": 2600
162
  },
163
  {
164
+ "epoch": 1.0810810810810811,
165
  "eval_loss": Infinity,
166
+ "eval_runtime": 211.2545,
167
+ "eval_samples_per_second": 33.24,
168
+ "eval_steps_per_second": 0.521,
169
+ "eval_wer": 0.4163972316362173,
170
  "step": 2800
171
  },
172
  {
173
+ "epoch": 1.1583011583011582,
174
+ "grad_norm": 0.6646206974983215,
175
+ "learning_rate": 0.00016716666666666665,
176
+ "loss": 0.5664,
177
  "step": 3000
178
  },
179
  {
180
+ "epoch": 1.1583011583011582,
181
  "eval_loss": Infinity,
182
+ "eval_runtime": 202.1583,
183
+ "eval_samples_per_second": 34.735,
184
+ "eval_steps_per_second": 0.544,
185
+ "eval_wer": 0.41592977808941345,
186
  "step": 3000
187
  },
188
  {
189
+ "epoch": 1.2355212355212355,
190
  "eval_loss": Infinity,
191
+ "eval_runtime": 198.8952,
192
+ "eval_samples_per_second": 35.305,
193
+ "eval_steps_per_second": 0.553,
194
+ "eval_wer": 0.3995039798475582,
195
  "step": 3200
196
  },
197
  {
198
+ "epoch": 1.3127413127413128,
199
  "eval_loss": Infinity,
200
+ "eval_runtime": 197.172,
201
+ "eval_samples_per_second": 35.614,
202
+ "eval_steps_per_second": 0.558,
203
+ "eval_wer": 0.3940633399555919,
204
  "step": 3400
205
  },
206
  {
207
+ "epoch": 1.3513513513513513,
208
+ "grad_norm": 1.06748366355896,
209
+ "learning_rate": 0.00013944444444444442,
210
+ "loss": 0.5359,
211
  "step": 3500
212
  },
213
  {
214
+ "epoch": 1.3899613899613898,
215
  "eval_loss": Infinity,
216
+ "eval_runtime": 194.4658,
217
+ "eval_samples_per_second": 36.109,
218
+ "eval_steps_per_second": 0.566,
219
+ "eval_wer": 0.38185760845571526,
220
  "step": 3600
221
  },
222
  {
223
+ "epoch": 1.4671814671814671,
224
  "eval_loss": Infinity,
225
+ "eval_runtime": 201.7117,
226
+ "eval_samples_per_second": 34.812,
227
+ "eval_steps_per_second": 0.545,
228
+ "eval_wer": 0.3810785192110423,
229
  "step": 3800
230
  },
231
  {
232
+ "epoch": 1.5444015444015444,
233
+ "grad_norm": 0.8601678013801575,
234
+ "learning_rate": 0.00011166666666666667,
235
+ "loss": 0.5172,
236
  "step": 4000
237
  },
238
  {
239
+ "epoch": 1.5444015444015444,
240
  "eval_loss": Infinity,
241
+ "eval_runtime": 196.5164,
242
+ "eval_samples_per_second": 35.732,
243
+ "eval_steps_per_second": 0.56,
244
+ "eval_wer": 0.36905457520158935,
245
  "step": 4000
246
  },
247
  {
248
+ "epoch": 1.6216216216216215,
249
  "eval_loss": Infinity,
250
+ "eval_runtime": 193.7969,
251
+ "eval_samples_per_second": 36.234,
252
+ "eval_steps_per_second": 0.568,
253
+ "eval_wer": 0.36086115331177854,
254
  "step": 4200
255
  },
256
  {
257
+ "epoch": 1.698841698841699,
258
  "eval_loss": Infinity,
259
+ "eval_runtime": 196.3411,
260
+ "eval_samples_per_second": 35.764,
261
+ "eval_steps_per_second": 0.56,
262
+ "eval_wer": 0.3599652006804046,
263
  "step": 4400
264
  },
265
  {
266
+ "epoch": 1.7374517374517375,
267
+ "grad_norm": 0.6527121663093567,
268
+ "learning_rate": 8.394444444444443e-05,
269
+ "loss": 0.4817,
270
  "step": 4500
271
  },
272
  {
273
+ "epoch": 1.776061776061776,
274
  "eval_loss": Infinity,
275
+ "eval_runtime": 194.384,
276
+ "eval_samples_per_second": 36.124,
277
+ "eval_steps_per_second": 0.566,
278
+ "eval_wer": 0.35086284133847534,
279
  "step": 4600
280
  },
281
  {
282
+ "epoch": 1.8532818532818531,
283
  "eval_loss": Infinity,
284
+ "eval_runtime": 196.7828,
285
+ "eval_samples_per_second": 35.684,
286
+ "eval_steps_per_second": 0.559,
287
+ "eval_wer": 0.3529663822990924,
288
  "step": 4800
289
  },
290
  {
291
+ "epoch": 1.9305019305019306,
292
+ "grad_norm": 0.7631692886352539,
293
+ "learning_rate": 5.6166666666666665e-05,
294
+ "loss": 0.4818,
295
  "step": 5000
296
  },
297
  {
298
+ "epoch": 1.9305019305019306,
299
  "eval_loss": Infinity,
300
+ "eval_runtime": 194.6791,
301
+ "eval_samples_per_second": 36.07,
302
+ "eval_steps_per_second": 0.565,
303
+ "eval_wer": 0.34340955423110386,
304
  "step": 5000
305
  },
306
  {
307
+ "epoch": 2.0077220077220077,
308
  "eval_loss": Infinity,
309
+ "eval_runtime": 200.454,
310
+ "eval_samples_per_second": 35.03,
311
+ "eval_steps_per_second": 0.549,
312
+ "eval_wer": 0.336254918000857,
313
  "step": 5200
314
  },
315
  {
316
+ "epoch": 2.0849420849420848,
317
  "eval_loss": Infinity,
318
+ "eval_runtime": 193.7233,
319
+ "eval_samples_per_second": 36.248,
320
+ "eval_steps_per_second": 0.568,
321
+ "eval_wer": 0.33718982509446455,
322
  "step": 5400
323
  },
324
  {
325
+ "epoch": 2.1235521235521237,
326
+ "grad_norm": 1.005771279335022,
327
+ "learning_rate": 2.844444444444444e-05,
328
+ "loss": 0.4196,
329
  "step": 5500
330
  },
331
  {
332
+ "epoch": 2.1621621621621623,
333
  "eval_loss": Infinity,
334
+ "eval_runtime": 198.8274,
335
+ "eval_samples_per_second": 35.317,
336
+ "eval_steps_per_second": 0.553,
337
+ "eval_wer": 0.3320348512588784,
338
  "step": 5600
339
  },
340
  {
341
+ "epoch": 2.2393822393822393,
342
  "eval_loss": Infinity,
343
+ "eval_runtime": 197.3434,
344
+ "eval_samples_per_second": 35.583,
345
+ "eval_steps_per_second": 0.557,
346
+ "eval_wer": 0.3292690844402893,
347
  "step": 5800
348
  },
349
  {
350
+ "epoch": 2.3166023166023164,
351
+ "grad_norm": 1.702697515487671,
352
+ "learning_rate": 7.222222222222222e-07,
353
+ "loss": 0.3743,
354
  "step": 6000
355
  },
356
  {
357
+ "epoch": 2.3166023166023164,
358
  "eval_loss": Infinity,
359
+ "eval_runtime": 192.6595,
360
+ "eval_samples_per_second": 36.448,
361
+ "eval_steps_per_second": 0.571,
362
+ "eval_wer": 0.3263604845935102,
363
  "step": 6000
364
  },
365
  {
366
+ "epoch": 2.3166023166023164,
367
+ "step": 6000,
368
+ "total_flos": 1.2607274134194512e+19,
369
+ "train_loss": 0.8722912038167318,
370
+ "train_runtime": 9671.7075,
371
+ "train_samples_per_second": 9.926,
372
+ "train_steps_per_second": 0.62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  }
374
  ],
375
  "logging_steps": 500,
376
+ "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
+ "num_train_epochs": 3,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
 
389
  "attributes": {}
390
  }
391
  },
392
+ "total_flos": 1.2607274134194512e+19,
393
+ "train_batch_size": 16,
394
  "trial_name": null,
395
  "trial_params": null
396
  }