DewiBrynJones commited on
Commit
d59d9a7
1 Parent(s): 21a680a

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.4122
21
  - Wer: 0.3223
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.4122
23
  - Wer: 0.3223
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 1.9357336430507162,
3
- "eval_loss": 0.41177985072135925,
4
- "eval_runtime": 150.8127,
5
  "eval_samples": 5656,
6
- "eval_samples_per_second": 37.503,
7
- "eval_steps_per_second": 4.688,
8
- "eval_wer": 0.32191747845484747,
9
  "total_flos": 1.1255918428180738e+19,
10
- "train_loss": 0.7339932418823242,
11
- "train_runtime": 18725.1494,
12
  "train_samples": 41326,
13
- "train_samples_per_second": 4.272,
14
- "train_steps_per_second": 0.534
15
  }
 
1
  {
2
  "epoch": 1.9357336430507162,
3
+ "eval_loss": 0.4121682941913605,
4
+ "eval_runtime": 148.2246,
5
  "eval_samples": 5656,
6
+ "eval_samples_per_second": 38.158,
7
+ "eval_steps_per_second": 4.77,
8
+ "eval_wer": 0.3222865946622587,
9
  "total_flos": 1.1255918428180738e+19,
10
+ "train_loss": 0.7340839981079101,
11
+ "train_runtime": 18566.2158,
12
  "train_samples": 41326,
13
+ "train_samples_per_second": 4.309,
14
+ "train_steps_per_second": 0.539
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.9357336430507162,
3
- "eval_loss": 0.41177985072135925,
4
- "eval_runtime": 150.8127,
5
  "eval_samples": 5656,
6
- "eval_samples_per_second": 37.503,
7
- "eval_steps_per_second": 4.688,
8
- "eval_wer": 0.32191747845484747
9
  }
 
1
  {
2
  "epoch": 1.9357336430507162,
3
+ "eval_loss": 0.4121682941913605,
4
+ "eval_runtime": 148.2246,
5
  "eval_samples": 5656,
6
+ "eval_samples_per_second": 38.158,
7
+ "eval_steps_per_second": 4.77,
8
+ "eval_wer": 0.3222865946622587
9
  }
runs/Jun27_19-17-27_1a548bea00a4/events.out.tfevents.1719531034.1a548bea00a4.30.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e98e1b76ea283ce2766410db9e3a516095ddf9df759a51ef455d31f605235b
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.9357336430507162,
3
  "total_flos": 1.1255918428180738e+19,
4
- "train_loss": 0.7339932418823242,
5
- "train_runtime": 18725.1494,
6
  "train_samples": 41326,
7
- "train_samples_per_second": 4.272,
8
- "train_steps_per_second": 0.534
9
  }
 
1
  {
2
  "epoch": 1.9357336430507162,
3
  "total_flos": 1.1255918428180738e+19,
4
+ "train_loss": 0.7340839981079101,
5
+ "train_runtime": 18566.2158,
6
  "train_samples": 41326,
7
+ "train_samples_per_second": 4.309,
8
+ "train_steps_per_second": 0.539
9
  }
trainer_state.json CHANGED
@@ -10,1052 +10,1052 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.019357336430507164,
13
- "eval_loss": 3.5727736949920654,
14
- "eval_runtime": 146.249,
15
- "eval_samples_per_second": 38.674,
16
- "eval_steps_per_second": 4.834,
17
  "eval_wer": 1.0,
18
  "step": 100
19
  },
20
  {
21
  "epoch": 0.03871467286101433,
22
- "eval_loss": 3.076800584793091,
23
- "eval_runtime": 143.9591,
24
- "eval_samples_per_second": 39.289,
25
- "eval_steps_per_second": 4.911,
26
  "eval_wer": 1.0,
27
  "step": 200
28
  },
29
  {
30
  "epoch": 0.05807200929152149,
31
- "eval_loss": 3.500979423522949,
32
- "eval_runtime": 144.2352,
33
- "eval_samples_per_second": 39.214,
34
- "eval_steps_per_second": 4.902,
35
  "eval_wer": 1.0,
36
  "step": 300
37
  },
38
  {
39
  "epoch": 0.07742934572202866,
40
- "eval_loss": 2.0594074726104736,
41
- "eval_runtime": 144.7975,
42
- "eval_samples_per_second": 39.061,
43
- "eval_steps_per_second": 4.883,
44
- "eval_wer": 0.9899857168076263,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.09678668215253582,
49
- "grad_norm": 3.567307710647583,
50
- "learning_rate": 0.00029759999999999997,
51
- "loss": 4.06,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.09678668215253582,
56
- "eval_loss": 1.4703481197357178,
57
- "eval_runtime": 145.0673,
58
- "eval_samples_per_second": 38.989,
59
- "eval_steps_per_second": 4.874,
60
- "eval_wer": 0.8800211840605993,
61
  "step": 500
62
  },
63
  {
64
  "epoch": 0.11614401858304298,
65
- "eval_loss": 1.2463648319244385,
66
- "eval_runtime": 146.5204,
67
- "eval_samples_per_second": 38.602,
68
- "eval_steps_per_second": 4.825,
69
- "eval_wer": 0.8296608945451044,
70
  "step": 600
71
  },
72
  {
73
  "epoch": 0.13550135501355012,
74
- "eval_loss": 1.0686180591583252,
75
- "eval_runtime": 145.9363,
76
- "eval_samples_per_second": 38.757,
77
- "eval_steps_per_second": 4.845,
78
- "eval_wer": 0.7492738039832453,
79
  "step": 700
80
  },
81
  {
82
  "epoch": 0.1548586914440573,
83
- "eval_loss": 1.006879448890686,
84
- "eval_runtime": 146.414,
85
- "eval_samples_per_second": 38.63,
86
- "eval_steps_per_second": 4.829,
87
- "eval_wer": 0.7116239508273018,
88
  "step": 800
89
  },
90
  {
91
  "epoch": 0.17421602787456447,
92
- "eval_loss": 0.936712920665741,
93
- "eval_runtime": 146.8805,
94
- "eval_samples_per_second": 38.507,
95
- "eval_steps_per_second": 4.813,
96
- "eval_wer": 0.6887868915600777,
97
  "step": 900
98
  },
99
  {
100
  "epoch": 0.19357336430507163,
101
- "grad_norm": 2.5543222427368164,
102
- "learning_rate": 0.0002844,
103
- "loss": 1.0399,
104
  "step": 1000
105
  },
106
  {
107
  "epoch": 0.19357336430507163,
108
- "eval_loss": 0.8960636854171753,
109
- "eval_runtime": 146.9745,
110
- "eval_samples_per_second": 38.483,
111
- "eval_steps_per_second": 4.81,
112
- "eval_wer": 0.6741987771019563,
113
  "step": 1000
114
  },
115
  {
116
  "epoch": 0.2129307007355788,
117
- "eval_loss": 0.896744430065155,
118
- "eval_runtime": 146.7401,
119
- "eval_samples_per_second": 38.544,
120
  "eval_steps_per_second": 4.818,
121
- "eval_wer": 0.6412671919885734,
122
  "step": 1100
123
  },
124
  {
125
  "epoch": 0.23228803716608595,
126
- "eval_loss": 0.8311247825622559,
127
- "eval_runtime": 145.8768,
128
- "eval_samples_per_second": 38.772,
129
- "eval_steps_per_second": 4.847,
130
- "eval_wer": 0.6152685721622185,
131
  "step": 1200
132
  },
133
  {
134
  "epoch": 0.2516453735965931,
135
- "eval_loss": 0.8018700480461121,
136
- "eval_runtime": 146.5709,
137
- "eval_samples_per_second": 38.589,
138
- "eval_steps_per_second": 4.824,
139
- "eval_wer": 0.5965238882380318,
140
  "step": 1300
141
  },
142
  {
143
  "epoch": 0.27100271002710025,
144
- "eval_loss": 0.7925447225570679,
145
- "eval_runtime": 146.4405,
146
- "eval_samples_per_second": 38.623,
147
- "eval_steps_per_second": 4.828,
148
- "eval_wer": 0.5926561923255926,
149
  "step": 1400
150
  },
151
  {
152
  "epoch": 0.29036004645760743,
153
- "grad_norm": 2.373326539993286,
154
- "learning_rate": 0.00026861052631578947,
155
- "loss": 0.8395,
156
  "step": 1500
157
  },
158
  {
159
  "epoch": 0.29036004645760743,
160
- "eval_loss": 0.8164969086647034,
161
- "eval_runtime": 151.4209,
162
- "eval_samples_per_second": 37.353,
163
- "eval_steps_per_second": 4.669,
164
- "eval_wer": 0.5986743913594711,
165
  "step": 1500
166
  },
167
  {
168
  "epoch": 0.3097173828881146,
169
- "eval_loss": 0.7696186304092407,
170
- "eval_runtime": 147.0556,
171
- "eval_samples_per_second": 38.462,
172
- "eval_steps_per_second": 4.808,
173
- "eval_wer": 0.6150278442008634,
174
  "step": 1600
175
  },
176
  {
177
  "epoch": 0.32907471931862176,
178
- "eval_loss": 0.7454735636711121,
179
- "eval_runtime": 148.1475,
180
- "eval_samples_per_second": 38.178,
181
- "eval_steps_per_second": 4.772,
182
- "eval_wer": 0.5624207603793873,
183
  "step": 1700
184
  },
185
  {
186
  "epoch": 0.34843205574912894,
187
- "eval_loss": 0.7681124806404114,
188
- "eval_runtime": 147.9355,
189
- "eval_samples_per_second": 38.233,
190
- "eval_steps_per_second": 4.779,
191
- "eval_wer": 0.5684068623517518,
192
  "step": 1800
193
  },
194
  {
195
  "epoch": 0.3677893921796361,
196
- "eval_loss": 0.7292491793632507,
197
- "eval_runtime": 148.2347,
198
- "eval_samples_per_second": 38.156,
199
- "eval_steps_per_second": 4.769,
200
- "eval_wer": 0.5609282470189854,
201
  "step": 1900
202
  },
203
  {
204
  "epoch": 0.38714672861014326,
205
- "grad_norm": 2.988316059112549,
206
- "learning_rate": 0.0002528210526315789,
207
- "loss": 0.7574,
208
  "step": 2000
209
  },
210
  {
211
  "epoch": 0.38714672861014326,
212
- "eval_loss": 0.7304644584655762,
213
- "eval_runtime": 148.3775,
214
- "eval_samples_per_second": 38.119,
215
- "eval_steps_per_second": 4.765,
216
- "eval_wer": 0.5534014860939481,
217
  "step": 2000
218
  },
219
  {
220
  "epoch": 0.4065040650406504,
221
- "eval_loss": 0.7095713019371033,
222
- "eval_runtime": 148.1439,
223
- "eval_samples_per_second": 38.179,
224
- "eval_steps_per_second": 4.772,
225
- "eval_wer": 0.5363418978992474,
226
  "step": 2100
227
  },
228
  {
229
  "epoch": 0.4258614014711576,
230
- "eval_loss": 0.7107743620872498,
231
- "eval_runtime": 147.443,
232
- "eval_samples_per_second": 38.361,
233
- "eval_steps_per_second": 4.795,
234
- "eval_wer": 0.5572370849448733,
235
  "step": 2200
236
  },
237
  {
238
  "epoch": 0.4452187379016647,
239
- "eval_loss": 0.6702781319618225,
240
- "eval_runtime": 147.3568,
241
- "eval_samples_per_second": 38.383,
242
- "eval_steps_per_second": 4.798,
243
- "eval_wer": 0.5175330198520326,
244
  "step": 2300
245
  },
246
  {
247
  "epoch": 0.4645760743321719,
248
- "eval_loss": 0.6596451997756958,
249
- "eval_runtime": 148.5753,
250
- "eval_samples_per_second": 38.068,
251
- "eval_steps_per_second": 4.759,
252
- "eval_wer": 0.514885012277126,
253
  "step": 2400
254
  },
255
  {
256
  "epoch": 0.48393341076267904,
257
- "grad_norm": 3.3213086128234863,
258
- "learning_rate": 0.0002370315789473684,
259
- "loss": 0.6864,
260
  "step": 2500
261
  },
262
  {
263
  "epoch": 0.48393341076267904,
264
- "eval_loss": 0.6845841407775879,
265
- "eval_runtime": 149.4982,
266
- "eval_samples_per_second": 37.833,
267
- "eval_steps_per_second": 4.729,
268
- "eval_wer": 0.5336457447320698,
269
  "step": 2500
270
  },
271
  {
272
  "epoch": 0.5032907471931862,
273
- "eval_loss": 0.6666129231452942,
274
- "eval_runtime": 148.0482,
275
- "eval_samples_per_second": 38.204,
276
- "eval_steps_per_second": 4.775,
277
- "eval_wer": 0.5285744090128549,
278
  "step": 2600
279
  },
280
  {
281
  "epoch": 0.5226480836236934,
282
- "eval_loss": 0.6390946507453918,
283
- "eval_runtime": 148.4402,
284
- "eval_samples_per_second": 38.103,
285
- "eval_steps_per_second": 4.763,
286
- "eval_wer": 0.4949366885461636,
287
  "step": 2700
288
  },
289
  {
290
  "epoch": 0.5420054200542005,
291
- "eval_loss": 0.6295592188835144,
292
- "eval_runtime": 147.8141,
293
- "eval_samples_per_second": 38.264,
294
- "eval_steps_per_second": 4.783,
295
- "eval_wer": 0.4989648697661729,
296
  "step": 2800
297
  },
298
  {
299
  "epoch": 0.5613627564847077,
300
- "eval_loss": 0.6291782855987549,
301
- "eval_runtime": 148.1212,
302
- "eval_samples_per_second": 38.185,
303
- "eval_steps_per_second": 4.773,
304
- "eval_wer": 0.4957391150840141,
305
  "step": 2900
306
  },
307
  {
308
  "epoch": 0.5807200929152149,
309
- "grad_norm": 5.012236595153809,
310
- "learning_rate": 0.00022124210526315786,
311
- "loss": 0.6734,
312
  "step": 3000
313
  },
314
  {
315
  "epoch": 0.5807200929152149,
316
- "eval_loss": 0.6164219975471497,
317
- "eval_runtime": 148.0479,
318
- "eval_samples_per_second": 38.204,
319
- "eval_steps_per_second": 4.775,
320
- "eval_wer": 0.47652902376787404,
321
  "step": 3000
322
  },
323
  {
324
  "epoch": 0.6000774293457221,
325
- "eval_loss": 0.6179572343826294,
326
- "eval_runtime": 148.2452,
327
- "eval_samples_per_second": 38.153,
328
- "eval_steps_per_second": 4.769,
329
- "eval_wer": 0.4777808091669208,
330
  "step": 3100
331
  },
332
  {
333
  "epoch": 0.6194347657762292,
334
- "eval_loss": 0.6132367849349976,
335
- "eval_runtime": 148.4317,
336
- "eval_samples_per_second": 38.105,
337
- "eval_steps_per_second": 4.763,
338
- "eval_wer": 0.49086036173388325,
339
  "step": 3200
340
  },
341
  {
342
  "epoch": 0.6387921022067363,
343
- "eval_loss": 0.6107444763183594,
344
- "eval_runtime": 148.2189,
345
- "eval_samples_per_second": 38.16,
346
- "eval_steps_per_second": 4.77,
347
- "eval_wer": 0.4683442730817994,
348
  "step": 3300
349
  },
350
  {
351
  "epoch": 0.6581494386372435,
352
- "eval_loss": 0.6068131327629089,
353
- "eval_runtime": 147.7251,
354
- "eval_samples_per_second": 38.287,
355
- "eval_steps_per_second": 4.786,
356
- "eval_wer": 0.4748760250999021,
357
  "step": 3400
358
  },
359
  {
360
  "epoch": 0.6775067750677507,
361
- "grad_norm": 3.184985399246216,
362
- "learning_rate": 0.00020545263157894736,
363
- "loss": 0.6433,
364
  "step": 3500
365
  },
366
  {
367
  "epoch": 0.6775067750677507,
368
- "eval_loss": 0.6008120775222778,
369
- "eval_runtime": 147.947,
370
- "eval_samples_per_second": 38.23,
371
- "eval_steps_per_second": 4.779,
372
- "eval_wer": 0.47725120765193946,
373
  "step": 3500
374
  },
375
  {
376
  "epoch": 0.6968641114982579,
377
- "eval_loss": 0.5916668772697449,
378
- "eval_runtime": 147.0363,
379
- "eval_samples_per_second": 38.467,
380
- "eval_steps_per_second": 4.808,
381
- "eval_wer": 0.4656320713838648,
382
  "step": 3600
383
  },
384
  {
385
  "epoch": 0.716221447928765,
386
- "eval_loss": 0.5885007381439209,
387
- "eval_runtime": 148.9484,
388
- "eval_samples_per_second": 37.973,
389
- "eval_steps_per_second": 4.747,
390
- "eval_wer": 0.4600953282726966,
391
  "step": 3700
392
  },
393
  {
394
  "epoch": 0.7355787843592722,
395
- "eval_loss": 0.5848101377487183,
396
- "eval_runtime": 148.7388,
397
- "eval_samples_per_second": 38.026,
398
- "eval_steps_per_second": 4.753,
399
- "eval_wer": 0.44823546404326686,
400
  "step": 3800
401
  },
402
  {
403
  "epoch": 0.7549361207897793,
404
- "eval_loss": 0.5852195620536804,
405
- "eval_runtime": 148.3227,
406
- "eval_samples_per_second": 38.133,
407
- "eval_steps_per_second": 4.767,
408
- "eval_wer": 0.44963168621912664,
409
  "step": 3900
410
  },
411
  {
412
  "epoch": 0.7742934572202865,
413
- "grad_norm": 4.9515814781188965,
414
- "learning_rate": 0.00018966315789473683,
415
- "loss": 0.6217,
416
  "step": 4000
417
  },
418
  {
419
  "epoch": 0.7742934572202865,
420
- "eval_loss": 0.577220618724823,
421
- "eval_runtime": 147.6504,
422
- "eval_samples_per_second": 38.307,
423
- "eval_steps_per_second": 4.788,
424
- "eval_wer": 0.44163951790213607,
425
  "step": 4000
426
  },
427
  {
428
  "epoch": 0.7936507936507936,
429
- "eval_loss": 0.56705242395401,
430
- "eval_runtime": 152.2357,
431
- "eval_samples_per_second": 37.153,
432
- "eval_steps_per_second": 4.644,
433
- "eval_wer": 0.44691948452119207,
434
  "step": 4100
435
  },
436
  {
437
  "epoch": 0.8130081300813008,
438
- "eval_loss": 0.5668296813964844,
439
- "eval_runtime": 148.0111,
440
- "eval_samples_per_second": 38.213,
441
- "eval_steps_per_second": 4.777,
442
- "eval_wer": 0.4462614947601547,
443
  "step": 4200
444
  },
445
  {
446
  "epoch": 0.832365466511808,
447
- "eval_loss": 0.5557947754859924,
448
- "eval_runtime": 149.4281,
449
- "eval_samples_per_second": 37.851,
450
- "eval_steps_per_second": 4.731,
451
- "eval_wer": 0.44006676188794913,
452
  "step": 4300
453
  },
454
  {
455
  "epoch": 0.8517228029423152,
456
- "eval_loss": 0.5651959776878357,
457
- "eval_runtime": 149.3956,
458
- "eval_samples_per_second": 37.859,
459
- "eval_steps_per_second": 4.732,
460
- "eval_wer": 0.4306783713950988,
461
  "step": 4400
462
  },
463
  {
464
  "epoch": 0.8710801393728222,
465
- "grad_norm": 3.5483193397521973,
466
- "learning_rate": 0.0001738736842105263,
467
- "loss": 0.5954,
468
  "step": 4500
469
  },
470
  {
471
  "epoch": 0.8710801393728222,
472
- "eval_loss": 0.5561267733573914,
473
- "eval_runtime": 149.9212,
474
- "eval_samples_per_second": 37.726,
475
- "eval_steps_per_second": 4.716,
476
- "eval_wer": 0.4307265169873698,
477
  "step": 4500
478
  },
479
  {
480
  "epoch": 0.8904374758033294,
481
- "eval_loss": 0.5431749820709229,
482
- "eval_runtime": 149.9454,
483
- "eval_samples_per_second": 37.72,
484
- "eval_steps_per_second": 4.715,
485
- "eval_wer": 0.420648039671968,
486
  "step": 4600
487
  },
488
  {
489
  "epoch": 0.9097948122338366,
490
- "eval_loss": 0.5294374823570251,
491
- "eval_runtime": 148.9794,
492
- "eval_samples_per_second": 37.965,
493
- "eval_steps_per_second": 4.746,
494
- "eval_wer": 0.41371507438494004,
495
  "step": 4700
496
  },
497
  {
498
  "epoch": 0.9291521486643438,
499
- "eval_loss": 0.5444126725196838,
500
- "eval_runtime": 148.5962,
501
- "eval_samples_per_second": 38.063,
502
- "eval_steps_per_second": 4.758,
503
- "eval_wer": 0.4209529617563512,
504
  "step": 4800
505
  },
506
  {
507
  "epoch": 0.948509485094851,
508
- "eval_loss": 0.5291473269462585,
509
- "eval_runtime": 150.1832,
510
- "eval_samples_per_second": 37.661,
511
- "eval_steps_per_second": 4.708,
512
- "eval_wer": 0.4156569466065382,
513
  "step": 4900
514
  },
515
  {
516
  "epoch": 0.9678668215253581,
517
- "grad_norm": 3.1595053672790527,
518
- "learning_rate": 0.0001581157894736842,
519
- "loss": 0.5663,
520
  "step": 5000
521
  },
522
  {
523
  "epoch": 0.9678668215253581,
524
- "eval_loss": 0.5428867340087891,
525
- "eval_runtime": 149.6435,
526
- "eval_samples_per_second": 37.797,
527
- "eval_steps_per_second": 4.725,
528
- "eval_wer": 0.4139558023462952,
529
  "step": 5000
530
  },
531
  {
532
  "epoch": 0.9872241579558653,
533
- "eval_loss": 0.5208781361579895,
534
- "eval_runtime": 149.0703,
535
- "eval_samples_per_second": 37.942,
536
- "eval_steps_per_second": 4.743,
537
- "eval_wer": 0.41159666832501485,
538
  "step": 5100
539
  },
540
  {
541
  "epoch": 1.0065814943863725,
542
- "eval_loss": 0.5281690359115601,
543
- "eval_runtime": 148.6703,
544
- "eval_samples_per_second": 38.044,
545
- "eval_steps_per_second": 4.755,
546
- "eval_wer": 0.40421434417679064,
547
  "step": 5200
548
  },
549
  {
550
  "epoch": 1.0259388308168795,
551
- "eval_loss": 0.5118032693862915,
552
- "eval_runtime": 148.0473,
553
- "eval_samples_per_second": 38.204,
554
- "eval_steps_per_second": 4.776,
555
- "eval_wer": 0.39184092696313655,
556
  "step": 5300
557
  },
558
  {
559
  "epoch": 1.0452961672473868,
560
- "eval_loss": 0.5089045166969299,
561
- "eval_runtime": 147.9634,
562
- "eval_samples_per_second": 38.226,
563
- "eval_steps_per_second": 4.778,
564
- "eval_wer": 0.39927139670363176,
565
  "step": 5400
566
  },
567
  {
568
  "epoch": 1.064653503677894,
569
- "grad_norm": 2.1315221786499023,
570
- "learning_rate": 0.0001423578947368421,
571
- "loss": 0.4941,
572
  "step": 5500
573
  },
574
  {
575
  "epoch": 1.064653503677894,
576
- "eval_loss": 0.5010989308357239,
577
- "eval_runtime": 147.8753,
578
- "eval_samples_per_second": 38.248,
579
- "eval_steps_per_second": 4.781,
580
- "eval_wer": 0.3921458490475197,
581
  "step": 5500
582
  },
583
  {
584
  "epoch": 1.084010840108401,
585
- "eval_loss": 0.5022321343421936,
586
- "eval_runtime": 148.3164,
587
- "eval_samples_per_second": 38.135,
588
- "eval_steps_per_second": 4.767,
589
- "eval_wer": 0.38869541493476273,
590
  "step": 5600
591
  },
592
  {
593
  "epoch": 1.1033681765389083,
594
- "eval_loss": 0.5066320896148682,
595
- "eval_runtime": 148.554,
596
- "eval_samples_per_second": 38.074,
597
- "eval_steps_per_second": 4.759,
598
- "eval_wer": 0.38526102935276274,
599
  "step": 5700
600
  },
601
  {
602
  "epoch": 1.1227255129694154,
603
- "eval_loss": 0.49068546295166016,
604
- "eval_runtime": 148.2455,
605
- "eval_samples_per_second": 38.153,
606
- "eval_steps_per_second": 4.769,
607
- "eval_wer": 0.3815217216863796,
608
  "step": 5800
609
  },
610
  {
611
  "epoch": 1.1420828493999227,
612
- "eval_loss": 0.4982084035873413,
613
- "eval_runtime": 148.9817,
614
- "eval_samples_per_second": 37.964,
615
- "eval_steps_per_second": 4.746,
616
- "eval_wer": 0.38086373192534223,
617
  "step": 5900
618
  },
619
  {
620
  "epoch": 1.1614401858304297,
621
- "grad_norm": 0.8627763390541077,
622
- "learning_rate": 0.00012656842105263156,
623
- "loss": 0.4628,
624
  "step": 6000
625
  },
626
  {
627
  "epoch": 1.1614401858304297,
628
- "eval_loss": 0.49128398299217224,
629
- "eval_runtime": 149.7714,
630
- "eval_samples_per_second": 37.764,
631
- "eval_steps_per_second": 4.721,
632
- "eval_wer": 0.38956203559564123,
633
  "step": 6000
634
  },
635
  {
636
  "epoch": 1.1807975222609368,
637
- "eval_loss": 0.48260679841041565,
638
- "eval_runtime": 149.8626,
639
- "eval_samples_per_second": 37.741,
640
- "eval_steps_per_second": 4.718,
641
- "eval_wer": 0.373449310715604,
642
  "step": 6100
643
  },
644
  {
645
  "epoch": 1.2001548586914441,
646
- "eval_loss": 0.4883708655834198,
647
- "eval_runtime": 149.0462,
648
- "eval_samples_per_second": 37.948,
649
- "eval_steps_per_second": 4.743,
650
- "eval_wer": 0.3739949607613423,
651
  "step": 6200
652
  },
653
  {
654
  "epoch": 1.2195121951219512,
655
- "eval_loss": 0.4841243028640747,
656
- "eval_runtime": 148.8948,
657
- "eval_samples_per_second": 37.987,
658
- "eval_steps_per_second": 4.748,
659
- "eval_wer": 0.37004702219511804,
660
  "step": 6300
661
  },
662
  {
663
  "epoch": 1.2388695315524583,
664
- "eval_loss": 0.4828014671802521,
665
- "eval_runtime": 149.5102,
666
- "eval_samples_per_second": 37.83,
667
- "eval_steps_per_second": 4.729,
668
- "eval_wer": 0.36971000304922086,
669
  "step": 6400
670
  },
671
  {
672
  "epoch": 1.2582268679829656,
673
- "grad_norm": 1.5625278949737549,
674
- "learning_rate": 0.00011077894736842105,
675
- "loss": 0.4435,
676
  "step": 6500
677
  },
678
  {
679
  "epoch": 1.2582268679829656,
680
- "eval_loss": 0.48161521553993225,
681
- "eval_runtime": 148.9005,
682
- "eval_samples_per_second": 37.985,
683
- "eval_steps_per_second": 4.748,
684
- "eval_wer": 0.37389866957680024,
685
  "step": 6500
686
  },
687
  {
688
  "epoch": 1.2775842044134726,
689
- "eval_loss": 0.47928386926651,
690
- "eval_runtime": 149.5106,
691
- "eval_samples_per_second": 37.83,
692
- "eval_steps_per_second": 4.729,
693
- "eval_wer": 0.3673990146202115,
694
  "step": 6600
695
  },
696
  {
697
  "epoch": 1.29694154084398,
698
- "eval_loss": 0.4744218587875366,
699
- "eval_runtime": 148.9048,
700
- "eval_samples_per_second": 37.984,
701
- "eval_steps_per_second": 4.748,
702
- "eval_wer": 0.36688546163598723,
703
  "step": 6700
704
  },
705
  {
706
  "epoch": 1.316298877274487,
707
- "eval_loss": 0.46821942925453186,
708
- "eval_runtime": 148.7411,
709
- "eval_samples_per_second": 38.026,
710
- "eval_steps_per_second": 4.753,
711
- "eval_wer": 0.3608672626021088,
712
  "step": 6800
713
  },
714
  {
715
  "epoch": 1.3356562137049943,
716
- "eval_loss": 0.46276068687438965,
717
- "eval_runtime": 150.3036,
718
- "eval_samples_per_second": 37.63,
719
- "eval_steps_per_second": 4.704,
720
- "eval_wer": 0.359438943364735,
721
  "step": 6900
722
  },
723
  {
724
  "epoch": 1.3550135501355014,
725
- "grad_norm": 0.7794021964073181,
726
- "learning_rate": 9.498947368421052e-05,
727
- "loss": 0.4298,
728
  "step": 7000
729
  },
730
  {
731
  "epoch": 1.3550135501355014,
732
- "eval_loss": 0.4662827253341675,
733
- "eval_runtime": 149.5174,
734
- "eval_samples_per_second": 37.828,
735
- "eval_steps_per_second": 4.729,
736
- "eval_wer": 0.3554428592062397,
737
  "step": 7000
738
  },
739
  {
740
  "epoch": 1.3743708865660085,
741
- "eval_loss": 0.4656233489513397,
742
- "eval_runtime": 148.8165,
743
- "eval_samples_per_second": 38.007,
744
- "eval_steps_per_second": 4.751,
745
- "eval_wer": 0.3583797403347724,
746
  "step": 7100
747
  },
748
  {
749
  "epoch": 1.3937282229965158,
750
- "eval_loss": 0.45931774377822876,
751
- "eval_runtime": 150.2338,
752
- "eval_samples_per_second": 37.648,
753
- "eval_steps_per_second": 4.706,
754
- "eval_wer": 0.35648601370544525,
755
  "step": 7200
756
  },
757
  {
758
  "epoch": 1.4130855594270229,
759
- "eval_loss": 0.45989105105400085,
760
- "eval_runtime": 150.9977,
761
- "eval_samples_per_second": 37.458,
762
- "eval_steps_per_second": 4.682,
763
- "eval_wer": 0.3565823048899873,
764
  "step": 7300
765
  },
766
  {
767
  "epoch": 1.43244289585753,
768
- "eval_loss": 0.46128061413764954,
769
- "eval_runtime": 150.0246,
770
- "eval_samples_per_second": 37.7,
771
- "eval_steps_per_second": 4.713,
772
- "eval_wer": 0.35208871627802474,
773
  "step": 7400
774
  },
775
  {
776
  "epoch": 1.4518002322880372,
777
- "grad_norm": 0.7098228931427002,
778
- "learning_rate": 7.92e-05,
779
- "loss": 0.4292,
780
  "step": 7500
781
  },
782
  {
783
  "epoch": 1.4518002322880372,
784
- "eval_loss": 0.4520701467990875,
785
- "eval_runtime": 149.5493,
786
- "eval_samples_per_second": 37.82,
787
- "eval_steps_per_second": 4.728,
788
- "eval_wer": 0.34745069088924907,
789
  "step": 7500
790
  },
791
  {
792
  "epoch": 1.4711575687185443,
793
- "eval_loss": 0.4512416422367096,
794
- "eval_runtime": 149.5055,
795
- "eval_samples_per_second": 37.831,
796
- "eval_steps_per_second": 4.729,
797
- "eval_wer": 0.349071592495707,
798
  "step": 7600
799
  },
800
  {
801
  "epoch": 1.4905149051490514,
802
- "eval_loss": 0.4478435218334198,
803
- "eval_runtime": 149.0622,
804
- "eval_samples_per_second": 37.944,
805
- "eval_steps_per_second": 4.743,
806
- "eval_wer": 0.35175169713212756,
807
  "step": 7700
808
  },
809
  {
810
  "epoch": 1.5098722415795587,
811
- "eval_loss": 0.4415859878063202,
812
- "eval_runtime": 148.899,
813
- "eval_samples_per_second": 37.985,
814
- "eval_steps_per_second": 4.748,
815
- "eval_wer": 0.34213862720867905,
816
  "step": 7800
817
  },
818
  {
819
  "epoch": 1.5292295780100658,
820
- "eval_loss": 0.4426974952220917,
821
- "eval_runtime": 149.2815,
822
- "eval_samples_per_second": 37.888,
823
- "eval_steps_per_second": 4.736,
824
- "eval_wer": 0.3458779348750622,
825
  "step": 7900
826
  },
827
  {
828
  "epoch": 1.5485869144405728,
829
- "grad_norm": 1.0578420162200928,
830
- "learning_rate": 6.344210526315788e-05,
831
- "loss": 0.4072,
832
  "step": 8000
833
  },
834
  {
835
  "epoch": 1.5485869144405728,
836
- "eval_loss": 0.43879374861717224,
837
- "eval_runtime": 148.7049,
838
- "eval_samples_per_second": 38.035,
839
- "eval_steps_per_second": 4.754,
840
- "eval_wer": 0.34565325544446407,
841
  "step": 8000
842
  },
843
  {
844
  "epoch": 1.5679442508710801,
845
- "eval_loss": 0.44011563062667847,
846
- "eval_runtime": 150.4046,
847
- "eval_samples_per_second": 37.605,
848
- "eval_steps_per_second": 4.701,
849
- "eval_wer": 0.3453162362985669,
850
  "step": 8100
851
  },
852
  {
853
  "epoch": 1.5873015873015874,
854
- "eval_loss": 0.43649429082870483,
855
- "eval_runtime": 148.8759,
856
- "eval_samples_per_second": 37.991,
857
- "eval_steps_per_second": 4.749,
858
- "eval_wer": 0.3434385581999968,
859
  "step": 8200
860
  },
861
  {
862
  "epoch": 1.6066589237320945,
863
- "eval_loss": 0.4346481263637543,
864
- "eval_runtime": 149.1351,
865
- "eval_samples_per_second": 37.925,
866
- "eval_steps_per_second": 4.741,
867
- "eval_wer": 0.33974739612588467,
868
  "step": 8300
869
  },
870
  {
871
  "epoch": 1.6260162601626016,
872
- "eval_loss": 0.43247029185295105,
873
- "eval_runtime": 149.5691,
874
- "eval_samples_per_second": 37.815,
875
- "eval_steps_per_second": 4.727,
876
- "eval_wer": 0.33604018552101556,
877
  "step": 8400
878
  },
879
  {
880
  "epoch": 1.645373596593109,
881
- "grad_norm": 1.7964462041854858,
882
- "learning_rate": 4.765263157894736e-05,
883
- "loss": 0.3991,
884
  "step": 8500
885
  },
886
  {
887
  "epoch": 1.645373596593109,
888
- "eval_loss": 0.43196219205856323,
889
- "eval_runtime": 150.109,
890
- "eval_samples_per_second": 37.679,
891
- "eval_steps_per_second": 4.71,
892
- "eval_wer": 0.3357834090289034,
893
  "step": 8500
894
  },
895
  {
896
  "epoch": 1.664730933023616,
897
- "eval_loss": 0.42872872948646545,
898
- "eval_runtime": 150.0401,
899
- "eval_samples_per_second": 37.697,
900
- "eval_steps_per_second": 4.712,
901
- "eval_wer": 0.3354624384137632,
902
  "step": 8600
903
  },
904
  {
905
  "epoch": 1.684088269454123,
906
- "eval_loss": 0.42928823828697205,
907
- "eval_runtime": 149.2284,
908
- "eval_samples_per_second": 37.902,
909
- "eval_steps_per_second": 4.738,
910
- "eval_wer": 0.33342427500762306,
911
  "step": 8700
912
  },
913
  {
914
  "epoch": 1.7034456058846303,
915
- "eval_loss": 0.4271656274795532,
916
- "eval_runtime": 149.5274,
917
- "eval_samples_per_second": 37.826,
918
- "eval_steps_per_second": 4.728,
919
- "eval_wer": 0.333327983823081,
920
  "step": 8800
921
  },
922
  {
923
  "epoch": 1.7228029423151374,
924
- "eval_loss": 0.4219857156276703,
925
- "eval_runtime": 149.1865,
926
- "eval_samples_per_second": 37.912,
927
- "eval_steps_per_second": 4.739,
928
- "eval_wer": 0.3302948115100063,
929
  "step": 8900
930
  },
931
  {
932
  "epoch": 1.7421602787456445,
933
- "grad_norm": 1.7460029125213623,
934
- "learning_rate": 3.189473684210526e-05,
935
- "loss": 0.3916,
936
  "step": 9000
937
  },
938
  {
939
  "epoch": 1.7421602787456445,
940
- "eval_loss": 0.4238153398036957,
941
- "eval_runtime": 149.4733,
942
- "eval_samples_per_second": 37.84,
943
- "eval_steps_per_second": 4.73,
944
- "eval_wer": 0.3291874628877726,
945
  "step": 9000
946
  },
947
  {
948
  "epoch": 1.7615176151761518,
949
- "eval_loss": 0.42150619626045227,
950
- "eval_runtime": 148.8948,
951
- "eval_samples_per_second": 37.987,
952
- "eval_steps_per_second": 4.748,
953
- "eval_wer": 0.32812825985781,
954
  "step": 9100
955
  },
956
  {
957
  "epoch": 1.7808749516066589,
958
- "eval_loss": 0.4176540672779083,
959
- "eval_runtime": 150.0504,
960
- "eval_samples_per_second": 37.694,
961
- "eval_steps_per_second": 4.712,
962
- "eval_wer": 0.3265876009051371,
963
  "step": 9200
964
  },
965
  {
966
  "epoch": 1.800232288037166,
967
- "eval_loss": 0.41875413060188293,
968
- "eval_runtime": 150.5043,
969
- "eval_samples_per_second": 37.58,
970
- "eval_steps_per_second": 4.698,
971
- "eval_wer": 0.32573702877501565,
972
  "step": 9300
973
  },
974
  {
975
  "epoch": 1.8195896244676733,
976
- "eval_loss": 0.41637665033340454,
977
- "eval_runtime": 150.1757,
978
- "eval_samples_per_second": 37.663,
979
- "eval_steps_per_second": 4.708,
980
- "eval_wer": 0.32469387427581003,
981
  "step": 9400
982
  },
983
  {
984
  "epoch": 1.8389469608981805,
985
- "grad_norm": 0.8558129668235779,
986
- "learning_rate": 1.6105263157894736e-05,
987
- "loss": 0.3687,
988
  "step": 9500
989
  },
990
  {
991
  "epoch": 1.8389469608981805,
992
- "eval_loss": 0.41629916429519653,
993
- "eval_runtime": 149.3775,
994
- "eval_samples_per_second": 37.864,
995
- "eval_steps_per_second": 4.733,
996
- "eval_wer": 0.3242766124761278,
997
  "step": 9500
998
  },
999
  {
1000
  "epoch": 1.8583042973286876,
1001
- "eval_loss": 0.4140332341194153,
1002
- "eval_runtime": 149.5915,
1003
- "eval_samples_per_second": 37.81,
1004
- "eval_steps_per_second": 4.726,
1005
- "eval_wer": 0.3238914477379596,
1006
  "step": 9600
1007
  },
1008
  {
1009
  "epoch": 1.8776616337591947,
1010
- "eval_loss": 0.4132048189640045,
1011
- "eval_runtime": 150.4642,
1012
- "eval_samples_per_second": 37.59,
1013
- "eval_steps_per_second": 4.699,
1014
- "eval_wer": 0.324661777214296,
1015
  "step": 9700
1016
  },
1017
  {
1018
  "epoch": 1.897018970189702,
1019
- "eval_loss": 0.4122065007686615,
1020
- "eval_runtime": 150.0219,
1021
- "eval_samples_per_second": 37.701,
1022
- "eval_steps_per_second": 4.713,
1023
- "eval_wer": 0.3223668373160437,
1024
  "step": 9800
1025
  },
1026
  {
1027
  "epoch": 1.916376306620209,
1028
- "eval_loss": 0.41170838475227356,
1029
- "eval_runtime": 149.8162,
1030
- "eval_samples_per_second": 37.753,
1031
- "eval_steps_per_second": 4.719,
1032
- "eval_wer": 0.3218532843318194,
1033
  "step": 9900
1034
  },
1035
  {
1036
  "epoch": 1.9357336430507162,
1037
- "grad_norm": 2.01002836227417,
1038
- "learning_rate": 3.157894736842105e-07,
1039
- "loss": 0.3707,
1040
  "step": 10000
1041
  },
1042
  {
1043
  "epoch": 1.9357336430507162,
1044
- "eval_loss": 0.41177985072135925,
1045
- "eval_runtime": 148.9604,
1046
- "eval_samples_per_second": 37.97,
1047
- "eval_steps_per_second": 4.746,
1048
- "eval_wer": 0.32191747845484747,
1049
  "step": 10000
1050
  },
1051
  {
1052
  "epoch": 1.9357336430507162,
1053
  "step": 10000,
1054
  "total_flos": 1.1255918428180738e+19,
1055
- "train_loss": 0.7339932418823242,
1056
- "train_runtime": 18725.1494,
1057
- "train_samples_per_second": 4.272,
1058
- "train_steps_per_second": 0.534
1059
  }
1060
  ],
1061
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.019357336430507164,
13
+ "eval_loss": 3.554515838623047,
14
+ "eval_runtime": 148.6327,
15
+ "eval_samples_per_second": 38.054,
16
+ "eval_steps_per_second": 4.757,
17
  "eval_wer": 1.0,
18
  "step": 100
19
  },
20
  {
21
  "epoch": 0.03871467286101433,
22
+ "eval_loss": 3.02604603767395,
23
+ "eval_runtime": 146.1639,
24
+ "eval_samples_per_second": 38.696,
25
+ "eval_steps_per_second": 4.837,
26
  "eval_wer": 1.0,
27
  "step": 200
28
  },
29
  {
30
  "epoch": 0.05807200929152149,
31
+ "eval_loss": 2.906620979309082,
32
+ "eval_runtime": 146.0171,
33
+ "eval_samples_per_second": 38.735,
34
+ "eval_steps_per_second": 4.842,
35
  "eval_wer": 1.0,
36
  "step": 300
37
  },
38
  {
39
  "epoch": 0.07742934572202866,
40
+ "eval_loss": 2.013317823410034,
41
+ "eval_runtime": 145.6964,
42
+ "eval_samples_per_second": 38.82,
43
+ "eval_steps_per_second": 4.853,
44
+ "eval_wer": 0.9847217987193272,
45
  "step": 400
46
  },
47
  {
48
  "epoch": 0.09678668215253582,
49
+ "grad_norm": 4.750732421875,
50
+ "learning_rate": 0.0002982,
51
+ "loss": 4.0489,
52
  "step": 500
53
  },
54
  {
55
  "epoch": 0.09678668215253582,
56
+ "eval_loss": 1.459844708442688,
57
+ "eval_runtime": 145.9454,
58
+ "eval_samples_per_second": 38.754,
59
+ "eval_steps_per_second": 4.844,
60
+ "eval_wer": 0.9003546725297299,
61
  "step": 500
62
  },
63
  {
64
  "epoch": 0.11614401858304298,
65
+ "eval_loss": 1.1772356033325195,
66
+ "eval_runtime": 145.9941,
67
+ "eval_samples_per_second": 38.741,
68
+ "eval_steps_per_second": 4.843,
69
+ "eval_wer": 0.8042079247644878,
70
  "step": 600
71
  },
72
  {
73
  "epoch": 0.13550135501355012,
74
+ "eval_loss": 1.07865571975708,
75
+ "eval_runtime": 146.4442,
76
+ "eval_samples_per_second": 38.622,
77
+ "eval_steps_per_second": 4.828,
78
+ "eval_wer": 0.7589510680297219,
79
  "step": 700
80
  },
81
  {
82
  "epoch": 0.1548586914440573,
83
+ "eval_loss": 1.0144398212432861,
84
+ "eval_runtime": 146.6444,
85
+ "eval_samples_per_second": 38.569,
86
+ "eval_steps_per_second": 4.821,
87
+ "eval_wer": 0.7211567780969652,
88
  "step": 800
89
  },
90
  {
91
  "epoch": 0.17421602787456447,
92
+ "eval_loss": 0.9338767528533936,
93
+ "eval_runtime": 147.2439,
94
+ "eval_samples_per_second": 38.412,
95
+ "eval_steps_per_second": 4.802,
96
+ "eval_wer": 0.6931520919259841,
97
  "step": 900
98
  },
99
  {
100
  "epoch": 0.19357336430507163,
101
+ "grad_norm": 3.122023105621338,
102
+ "learning_rate": 0.0002843684210526315,
103
+ "loss": 1.0454,
104
  "step": 1000
105
  },
106
  {
107
  "epoch": 0.19357336430507163,
108
+ "eval_loss": 0.8805950284004211,
109
+ "eval_runtime": 146.7917,
110
+ "eval_samples_per_second": 38.531,
111
+ "eval_steps_per_second": 4.816,
112
+ "eval_wer": 0.659739050889891,
113
  "step": 1000
114
  },
115
  {
116
  "epoch": 0.2129307007355788,
117
+ "eval_loss": 0.864371657371521,
118
+ "eval_runtime": 146.75,
119
+ "eval_samples_per_second": 38.542,
120
  "eval_steps_per_second": 4.818,
121
+ "eval_wer": 0.6553738505239846,
122
  "step": 1100
123
  },
124
  {
125
  "epoch": 0.23228803716608595,
126
+ "eval_loss": 0.845355749130249,
127
+ "eval_runtime": 147.3826,
128
+ "eval_samples_per_second": 38.376,
129
+ "eval_steps_per_second": 4.797,
130
+ "eval_wer": 0.6313652485114988,
131
  "step": 1200
132
  },
133
  {
134
  "epoch": 0.2516453735965931,
135
+ "eval_loss": 0.8092750906944275,
136
+ "eval_runtime": 147.8588,
137
+ "eval_samples_per_second": 38.253,
138
+ "eval_steps_per_second": 4.782,
139
+ "eval_wer": 0.5919179599107701,
140
  "step": 1300
141
  },
142
  {
143
  "epoch": 0.27100271002710025,
144
+ "eval_loss": 0.8075847029685974,
145
+ "eval_runtime": 147.3238,
146
+ "eval_samples_per_second": 38.392,
147
+ "eval_steps_per_second": 4.799,
148
+ "eval_wer": 0.6072282582529569,
149
  "step": 1400
150
  },
151
  {
152
  "epoch": 0.29036004645760743,
153
+ "grad_norm": 3.26877498626709,
154
+ "learning_rate": 0.000268578947368421,
155
+ "loss": 0.842,
156
  "step": 1500
157
  },
158
  {
159
  "epoch": 0.29036004645760743,
160
+ "eval_loss": 0.7783301472663879,
161
+ "eval_runtime": 151.3584,
162
+ "eval_samples_per_second": 37.368,
163
+ "eval_steps_per_second": 4.671,
164
+ "eval_wer": 0.5857232270385646,
165
  "step": 1500
166
  },
167
  {
168
  "epoch": 0.3097173828881146,
169
+ "eval_loss": 0.7964575290679932,
170
+ "eval_runtime": 147.3418,
171
+ "eval_samples_per_second": 38.387,
172
+ "eval_steps_per_second": 4.798,
173
+ "eval_wer": 0.5940524145014524,
174
  "step": 1600
175
  },
176
  {
177
  "epoch": 0.32907471931862176,
178
+ "eval_loss": 0.7414730191230774,
179
+ "eval_runtime": 145.4997,
180
+ "eval_samples_per_second": 38.873,
181
+ "eval_steps_per_second": 4.859,
182
+ "eval_wer": 0.5505127505576864,
183
  "step": 1700
184
  },
185
  {
186
  "epoch": 0.34843205574912894,
187
+ "eval_loss": 0.7440277934074402,
188
+ "eval_runtime": 144.5466,
189
+ "eval_samples_per_second": 39.129,
190
+ "eval_steps_per_second": 4.891,
191
+ "eval_wer": 0.563656497247677,
192
  "step": 1800
193
  },
194
  {
195
  "epoch": 0.3677893921796361,
196
+ "eval_loss": 0.7360929846763611,
197
+ "eval_runtime": 144.6106,
198
+ "eval_samples_per_second": 39.112,
199
+ "eval_steps_per_second": 4.889,
200
+ "eval_wer": 0.5865256535764151,
201
  "step": 1900
202
  },
203
  {
204
  "epoch": 0.38714672861014326,
205
+ "grad_norm": 1.8907363414764404,
206
+ "learning_rate": 0.0002527894736842105,
207
+ "loss": 0.755,
208
  "step": 2000
209
  },
210
  {
211
  "epoch": 0.38714672861014326,
212
+ "eval_loss": 0.7313781380653381,
213
+ "eval_runtime": 144.7461,
214
+ "eval_samples_per_second": 39.075,
215
+ "eval_steps_per_second": 4.884,
216
+ "eval_wer": 0.542729213140537,
217
  "step": 2000
218
  },
219
  {
220
  "epoch": 0.4065040650406504,
221
+ "eval_loss": 0.6866404414176941,
222
+ "eval_runtime": 144.8178,
223
+ "eval_samples_per_second": 39.056,
224
+ "eval_steps_per_second": 4.882,
225
+ "eval_wer": 0.5181428640207989,
226
  "step": 2100
227
  },
228
  {
229
  "epoch": 0.4258614014711576,
230
+ "eval_loss": 0.6947888731956482,
231
+ "eval_runtime": 144.3916,
232
+ "eval_samples_per_second": 39.171,
233
+ "eval_steps_per_second": 4.896,
234
+ "eval_wer": 0.5425847763637239,
235
  "step": 2200
236
  },
237
  {
238
  "epoch": 0.4452187379016647,
239
+ "eval_loss": 0.6796152591705322,
240
+ "eval_runtime": 144.4575,
241
+ "eval_samples_per_second": 39.153,
242
+ "eval_steps_per_second": 4.894,
243
+ "eval_wer": 0.5158960697148176,
244
  "step": 2300
245
  },
246
  {
247
  "epoch": 0.4645760743321719,
248
+ "eval_loss": 0.6898629069328308,
249
+ "eval_runtime": 144.5493,
250
+ "eval_samples_per_second": 39.129,
251
+ "eval_steps_per_second": 4.891,
252
+ "eval_wer": 0.5305162812344529,
253
  "step": 2400
254
  },
255
  {
256
  "epoch": 0.48393341076267904,
257
+ "grad_norm": 2.9580163955688477,
258
+ "learning_rate": 0.000237,
259
+ "loss": 0.6884,
260
  "step": 2500
261
  },
262
  {
263
  "epoch": 0.48393341076267904,
264
+ "eval_loss": 0.6736070513725281,
265
+ "eval_runtime": 144.9197,
266
+ "eval_samples_per_second": 39.029,
267
+ "eval_steps_per_second": 4.879,
268
+ "eval_wer": 0.5103272295421354,
269
  "step": 2500
270
  },
271
  {
272
  "epoch": 0.5032907471931862,
273
+ "eval_loss": 0.6728157997131348,
274
+ "eval_runtime": 145.4712,
275
+ "eval_samples_per_second": 38.881,
276
+ "eval_steps_per_second": 4.86,
277
+ "eval_wer": 0.5257338190688643,
278
  "step": 2600
279
  },
280
  {
281
  "epoch": 0.5226480836236934,
282
+ "eval_loss": 0.6537250876426697,
283
+ "eval_runtime": 145.152,
284
+ "eval_samples_per_second": 38.966,
285
+ "eval_steps_per_second": 4.871,
286
+ "eval_wer": 0.5026560318402851,
287
  "step": 2700
288
  },
289
  {
290
  "epoch": 0.5420054200542005,
291
+ "eval_loss": 0.631415843963623,
292
+ "eval_runtime": 145.09,
293
+ "eval_samples_per_second": 38.983,
294
+ "eval_steps_per_second": 4.873,
295
+ "eval_wer": 0.48227439777888337,
296
  "step": 2800
297
  },
298
  {
299
  "epoch": 0.5613627564847077,
300
+ "eval_loss": 0.6316519379615784,
301
+ "eval_runtime": 144.7345,
302
+ "eval_samples_per_second": 39.078,
303
+ "eval_steps_per_second": 4.885,
304
+ "eval_wer": 0.48296448460143476,
305
  "step": 2900
306
  },
307
  {
308
  "epoch": 0.5807200929152149,
309
+ "grad_norm": 8.703415870666504,
310
+ "learning_rate": 0.00022121052631578946,
311
+ "loss": 0.6756,
312
  "step": 3000
313
  },
314
  {
315
  "epoch": 0.5807200929152149,
316
+ "eval_loss": 0.6204401254653931,
317
+ "eval_runtime": 144.9451,
318
+ "eval_samples_per_second": 39.022,
319
+ "eval_steps_per_second": 4.878,
320
+ "eval_wer": 0.4761117619681918,
321
  "step": 3000
322
  },
323
  {
324
  "epoch": 0.6000774293457221,
325
+ "eval_loss": 0.6310548782348633,
326
+ "eval_runtime": 144.6818,
327
+ "eval_samples_per_second": 39.093,
328
+ "eval_steps_per_second": 4.887,
329
+ "eval_wer": 0.4810868065028647,
330
  "step": 3100
331
  },
332
  {
333
  "epoch": 0.6194347657762292,
334
+ "eval_loss": 0.623622477054596,
335
+ "eval_runtime": 145.2962,
336
+ "eval_samples_per_second": 38.927,
337
+ "eval_steps_per_second": 4.866,
338
+ "eval_wer": 0.48631862752964966,
339
  "step": 3200
340
  },
341
  {
342
  "epoch": 0.6387921022067363,
343
+ "eval_loss": 0.6224333047866821,
344
+ "eval_runtime": 144.731,
345
+ "eval_samples_per_second": 39.079,
346
+ "eval_steps_per_second": 4.885,
347
+ "eval_wer": 0.46287172409365923,
348
  "step": 3300
349
  },
350
  {
351
  "epoch": 0.6581494386372435,
352
+ "eval_loss": 0.597332239151001,
353
+ "eval_runtime": 145.3577,
354
+ "eval_samples_per_second": 38.911,
355
+ "eval_steps_per_second": 4.864,
356
+ "eval_wer": 0.4622939769864069,
357
  "step": 3400
358
  },
359
  {
360
  "epoch": 0.6775067750677507,
361
+ "grad_norm": 4.671802997589111,
362
+ "learning_rate": 0.00020542105263157893,
363
+ "loss": 0.6435,
364
  "step": 3500
365
  },
366
  {
367
  "epoch": 0.6775067750677507,
368
+ "eval_loss": 0.5913041830062866,
369
+ "eval_runtime": 145.2068,
370
+ "eval_samples_per_second": 38.951,
371
+ "eval_steps_per_second": 4.869,
372
+ "eval_wer": 0.47079969828762175,
373
  "step": 3500
374
  },
375
  {
376
  "epoch": 0.6968641114982579,
377
+ "eval_loss": 0.6086587905883789,
378
+ "eval_runtime": 145.0548,
379
+ "eval_samples_per_second": 38.992,
380
+ "eval_steps_per_second": 4.874,
381
+ "eval_wer": 0.4743945691771918,
382
  "step": 3600
383
  },
384
  {
385
  "epoch": 0.716221447928765,
386
+ "eval_loss": 0.5827310681343079,
387
+ "eval_runtime": 145.6494,
388
+ "eval_samples_per_second": 38.833,
389
+ "eval_steps_per_second": 4.854,
390
+ "eval_wer": 0.45213525701722007,
391
  "step": 3700
392
  },
393
  {
394
  "epoch": 0.7355787843592722,
395
+ "eval_loss": 0.5875205397605896,
396
+ "eval_runtime": 145.7866,
397
+ "eval_samples_per_second": 38.796,
398
+ "eval_steps_per_second": 4.85,
399
+ "eval_wer": 0.460785415095248,
400
  "step": 3800
401
  },
402
  {
403
  "epoch": 0.7549361207897793,
404
+ "eval_loss": 0.592467725276947,
405
+ "eval_runtime": 145.4885,
406
+ "eval_samples_per_second": 38.876,
407
+ "eval_steps_per_second": 4.859,
408
+ "eval_wer": 0.45573012790679013,
409
  "step": 3900
410
  },
411
  {
412
  "epoch": 0.7742934572202865,
413
+ "grad_norm": 3.854473114013672,
414
+ "learning_rate": 0.0001896315789473684,
415
+ "loss": 0.6282,
416
  "step": 4000
417
  },
418
  {
419
  "epoch": 0.7742934572202865,
420
+ "eval_loss": 0.5799296498298645,
421
+ "eval_runtime": 145.6866,
422
+ "eval_samples_per_second": 38.823,
423
+ "eval_steps_per_second": 4.853,
424
+ "eval_wer": 0.4494230553192855,
425
  "step": 4000
426
  },
427
  {
428
  "epoch": 0.7936507936507936,
429
+ "eval_loss": 0.567882239818573,
430
+ "eval_runtime": 146.7561,
431
+ "eval_samples_per_second": 38.54,
432
+ "eval_steps_per_second": 4.818,
433
+ "eval_wer": 0.45260066440917335,
434
  "step": 4100
435
  },
436
  {
437
  "epoch": 0.8130081300813008,
438
+ "eval_loss": 0.5699547529220581,
439
+ "eval_runtime": 145.6353,
440
+ "eval_samples_per_second": 38.837,
441
+ "eval_steps_per_second": 4.855,
442
+ "eval_wer": 0.4549597984304537,
443
  "step": 4200
444
  },
445
  {
446
  "epoch": 0.832365466511808,
447
+ "eval_loss": 0.5610417127609253,
448
+ "eval_runtime": 145.0921,
449
+ "eval_samples_per_second": 38.982,
450
+ "eval_steps_per_second": 4.873,
451
+ "eval_wer": 0.43432138787693986,
452
  "step": 4300
453
  },
454
  {
455
  "epoch": 0.8517228029423152,
456
+ "eval_loss": 0.5616321563720703,
457
+ "eval_runtime": 145.7565,
458
+ "eval_samples_per_second": 38.804,
459
+ "eval_steps_per_second": 4.851,
460
+ "eval_wer": 0.42727608287461283,
461
  "step": 4400
462
  },
463
  {
464
  "epoch": 0.8710801393728222,
465
+ "grad_norm": 3.4542479515075684,
466
+ "learning_rate": 0.0001738421052631579,
467
+ "loss": 0.5937,
468
  "step": 4500
469
  },
470
  {
471
  "epoch": 0.8710801393728222,
472
+ "eval_loss": 0.5463823080062866,
473
+ "eval_runtime": 145.3415,
474
+ "eval_samples_per_second": 38.915,
475
+ "eval_steps_per_second": 4.864,
476
+ "eval_wer": 0.42207635890934186,
477
  "step": 4500
478
  },
479
  {
480
  "epoch": 0.8904374758033294,
481
+ "eval_loss": 0.5485692620277405,
482
+ "eval_runtime": 145.5247,
483
+ "eval_samples_per_second": 38.866,
484
+ "eval_steps_per_second": 4.858,
485
+ "eval_wer": 0.4287685962350147,
486
  "step": 4600
487
  },
488
  {
489
  "epoch": 0.9097948122338366,
490
+ "eval_loss": 0.5307685136795044,
491
+ "eval_runtime": 145.423,
492
+ "eval_samples_per_second": 38.893,
493
+ "eval_steps_per_second": 4.862,
494
+ "eval_wer": 0.41673219816725776,
495
  "step": 4700
496
  },
497
  {
498
  "epoch": 0.9291521486643438,
499
+ "eval_loss": 0.5520421862602234,
500
+ "eval_runtime": 145.5542,
501
+ "eval_samples_per_second": 38.858,
502
+ "eval_steps_per_second": 4.857,
503
+ "eval_wer": 0.41997400138017366,
504
  "step": 4800
505
  },
506
  {
507
  "epoch": 0.948509485094851,
508
+ "eval_loss": 0.5321463346481323,
509
+ "eval_runtime": 146.3191,
510
+ "eval_samples_per_second": 38.655,
511
+ "eval_steps_per_second": 4.832,
512
+ "eval_wer": 0.41804817768933256,
513
  "step": 4900
514
  },
515
  {
516
  "epoch": 0.9678668215253581,
517
+ "grad_norm": 5.257072925567627,
518
+ "learning_rate": 0.00015808421052631577,
519
+ "loss": 0.5659,
520
  "step": 5000
521
  },
522
  {
523
  "epoch": 0.9678668215253581,
524
+ "eval_loss": 0.5333205461502075,
525
+ "eval_runtime": 145.4188,
526
+ "eval_samples_per_second": 38.895,
527
+ "eval_steps_per_second": 4.862,
528
+ "eval_wer": 0.4176148673588933,
529
  "step": 5000
530
  },
531
  {
532
  "epoch": 0.9872241579558653,
533
+ "eval_loss": 0.5260410904884338,
534
+ "eval_runtime": 146.9543,
535
+ "eval_samples_per_second": 38.488,
536
+ "eval_steps_per_second": 4.811,
537
+ "eval_wer": 0.41113126093306157,
538
  "step": 5100
539
  },
540
  {
541
  "epoch": 1.0065814943863725,
542
+ "eval_loss": 0.5185408592224121,
543
+ "eval_runtime": 145.8818,
544
+ "eval_samples_per_second": 38.771,
545
+ "eval_steps_per_second": 4.846,
546
+ "eval_wer": 0.3973616215435477,
547
  "step": 5200
548
  },
549
  {
550
  "epoch": 1.0259388308168795,
551
+ "eval_loss": 0.5147408843040466,
552
+ "eval_runtime": 150.3669,
553
+ "eval_samples_per_second": 37.615,
554
+ "eval_steps_per_second": 4.702,
555
+ "eval_wer": 0.3917927813708655,
556
  "step": 5300
557
  },
558
  {
559
  "epoch": 1.0452961672473868,
560
+ "eval_loss": 0.5154542326927185,
561
+ "eval_runtime": 145.571,
562
+ "eval_samples_per_second": 38.854,
563
+ "eval_steps_per_second": 4.857,
564
+ "eval_wer": 0.3975863009741458,
565
  "step": 5400
566
  },
567
  {
568
  "epoch": 1.064653503677894,
569
+ "grad_norm": 1.1321543455123901,
570
+ "learning_rate": 0.00014232631578947366,
571
+ "loss": 0.4928,
572
  "step": 5500
573
  },
574
  {
575
  "epoch": 1.064653503677894,
576
+ "eval_loss": 0.5057936906814575,
577
+ "eval_runtime": 144.9813,
578
+ "eval_samples_per_second": 39.012,
579
+ "eval_steps_per_second": 4.876,
580
+ "eval_wer": 0.3936062653464075,
581
  "step": 5500
582
  },
583
  {
584
  "epoch": 1.084010840108401,
585
+ "eval_loss": 0.504751443862915,
586
+ "eval_runtime": 145.1866,
587
+ "eval_samples_per_second": 38.957,
588
+ "eval_steps_per_second": 4.87,
589
+ "eval_wer": 0.3964629038211552,
590
  "step": 5600
591
  },
592
  {
593
  "epoch": 1.1033681765389083,
594
+ "eval_loss": 0.5011361241340637,
595
+ "eval_runtime": 146.2522,
596
+ "eval_samples_per_second": 38.673,
597
+ "eval_steps_per_second": 4.834,
598
+ "eval_wer": 0.3818266437707628,
599
  "step": 5700
600
  },
601
  {
602
  "epoch": 1.1227255129694154,
603
+ "eval_loss": 0.4964805543422699,
604
+ "eval_runtime": 146.0597,
605
+ "eval_samples_per_second": 38.724,
606
+ "eval_steps_per_second": 4.84,
607
+ "eval_wer": 0.3830463321082955,
608
  "step": 5800
609
  },
610
  {
611
  "epoch": 1.1420828493999227,
612
+ "eval_loss": 0.4969277083873749,
613
+ "eval_runtime": 145.1404,
614
+ "eval_samples_per_second": 38.969,
615
+ "eval_steps_per_second": 4.871,
616
+ "eval_wer": 0.383993195422959,
617
  "step": 5900
618
  },
619
  {
620
  "epoch": 1.1614401858304297,
621
+ "grad_norm": 1.1488393545150757,
622
+ "learning_rate": 0.00012653684210526316,
623
+ "loss": 0.4619,
624
  "step": 6000
625
  },
626
  {
627
  "epoch": 1.1614401858304297,
628
+ "eval_loss": 0.4863020181655884,
629
+ "eval_runtime": 146.181,
630
+ "eval_samples_per_second": 38.692,
631
+ "eval_steps_per_second": 4.836,
632
+ "eval_wer": 0.3799650142029497,
633
  "step": 6000
634
  },
635
  {
636
  "epoch": 1.1807975222609368,
637
+ "eval_loss": 0.49075642228126526,
638
+ "eval_runtime": 145.2768,
639
+ "eval_samples_per_second": 38.933,
640
+ "eval_steps_per_second": 4.867,
641
+ "eval_wer": 0.37998106273370674,
642
  "step": 6100
643
  },
644
  {
645
  "epoch": 1.2001548586914441,
646
+ "eval_loss": 0.4835449457168579,
647
+ "eval_runtime": 145.0604,
648
+ "eval_samples_per_second": 38.991,
649
+ "eval_steps_per_second": 4.874,
650
+ "eval_wer": 0.3712185649403797,
651
  "step": 6200
652
  },
653
  {
654
  "epoch": 1.2195121951219512,
655
+ "eval_loss": 0.4926937520503998,
656
+ "eval_runtime": 145.5282,
657
+ "eval_samples_per_second": 38.865,
658
+ "eval_steps_per_second": 4.858,
659
+ "eval_wer": 0.3766590168670058,
660
  "step": 6300
661
  },
662
  {
663
  "epoch": 1.2388695315524583,
664
+ "eval_loss": 0.4942048490047455,
665
+ "eval_runtime": 146.7525,
666
+ "eval_samples_per_second": 38.541,
667
+ "eval_steps_per_second": 4.818,
668
+ "eval_wer": 0.368281683811847,
669
  "step": 6400
670
  },
671
  {
672
  "epoch": 1.2582268679829656,
673
+ "grad_norm": 1.8319953680038452,
674
+ "learning_rate": 0.00011074736842105263,
675
+ "loss": 0.4421,
676
  "step": 6500
677
  },
678
  {
679
  "epoch": 1.2582268679829656,
680
+ "eval_loss": 0.4833586513996124,
681
+ "eval_runtime": 147.4272,
682
+ "eval_samples_per_second": 38.365,
683
+ "eval_steps_per_second": 4.796,
684
+ "eval_wer": 0.37393076663831426,
685
  "step": 6500
686
  },
687
  {
688
  "epoch": 1.2775842044134726,
689
+ "eval_loss": 0.47513511776924133,
690
+ "eval_runtime": 146.0617,
691
+ "eval_samples_per_second": 38.723,
692
+ "eval_steps_per_second": 4.84,
693
+ "eval_wer": 0.3633868819309592,
694
  "step": 6600
695
  },
696
  {
697
  "epoch": 1.29694154084398,
698
+ "eval_loss": 0.4733775854110718,
699
+ "eval_runtime": 145.526,
700
+ "eval_samples_per_second": 38.866,
701
+ "eval_steps_per_second": 4.858,
702
+ "eval_wer": 0.36327454221566013,
703
  "step": 6700
704
  },
705
  {
706
  "epoch": 1.316298877274487,
707
+ "eval_loss": 0.4685443937778473,
708
+ "eval_runtime": 145.3437,
709
+ "eval_samples_per_second": 38.915,
710
+ "eval_steps_per_second": 4.864,
711
+ "eval_wer": 0.36447818202243587,
712
  "step": 6800
713
  },
714
  {
715
  "epoch": 1.3356562137049943,
716
+ "eval_loss": 0.4654460847377777,
717
+ "eval_runtime": 145.3646,
718
+ "eval_samples_per_second": 38.909,
719
+ "eval_steps_per_second": 4.864,
720
+ "eval_wer": 0.3624560671470527,
721
  "step": 6900
722
  },
723
  {
724
  "epoch": 1.3550135501355014,
725
+ "grad_norm": 0.9300881624221802,
726
+ "learning_rate": 9.49578947368421e-05,
727
+ "loss": 0.4304,
728
  "step": 7000
729
  },
730
  {
731
  "epoch": 1.3550135501355014,
732
+ "eval_loss": 0.47420966625213623,
733
+ "eval_runtime": 145.5058,
734
+ "eval_samples_per_second": 38.871,
735
+ "eval_steps_per_second": 4.859,
736
+ "eval_wer": 0.3615413008939032,
737
  "step": 7000
738
  },
739
  {
740
  "epoch": 1.3743708865660085,
741
+ "eval_loss": 0.46446511149406433,
742
+ "eval_runtime": 145.4472,
743
+ "eval_samples_per_second": 38.887,
744
+ "eval_steps_per_second": 4.861,
745
+ "eval_wer": 0.359567331610791,
746
  "step": 7100
747
  },
748
  {
749
  "epoch": 1.3937282229965158,
750
+ "eval_loss": 0.45991310477256775,
751
+ "eval_runtime": 145.7318,
752
+ "eval_samples_per_second": 38.811,
753
+ "eval_steps_per_second": 4.851,
754
+ "eval_wer": 0.3593587007109499,
755
  "step": 7200
756
  },
757
  {
758
  "epoch": 1.4130855594270229,
759
+ "eval_loss": 0.4554171562194824,
760
+ "eval_runtime": 145.4195,
761
+ "eval_samples_per_second": 38.894,
762
+ "eval_steps_per_second": 4.862,
763
+ "eval_wer": 0.3555391503907817,
764
  "step": 7300
765
  },
766
  {
767
  "epoch": 1.43244289585753,
768
+ "eval_loss": 0.457757830619812,
769
+ "eval_runtime": 145.3536,
770
+ "eval_samples_per_second": 38.912,
771
+ "eval_steps_per_second": 4.864,
772
+ "eval_wer": 0.35780199322752004,
773
  "step": 7400
774
  },
775
  {
776
  "epoch": 1.4518002322880372,
777
+ "grad_norm": 1.012635588645935,
778
+ "learning_rate": 7.916842105263156e-05,
779
+ "loss": 0.4275,
780
  "step": 7500
781
  },
782
  {
783
  "epoch": 1.4518002322880372,
784
+ "eval_loss": 0.45184171199798584,
785
+ "eval_runtime": 149.8003,
786
+ "eval_samples_per_second": 37.757,
787
+ "eval_steps_per_second": 4.72,
788
+ "eval_wer": 0.3521850074625668,
789
  "step": 7500
790
  },
791
  {
792
  "epoch": 1.4711575687185443,
793
+ "eval_loss": 0.44799017906188965,
794
+ "eval_runtime": 145.3878,
795
+ "eval_samples_per_second": 38.903,
796
+ "eval_steps_per_second": 4.863,
797
+ "eval_wer": 0.35107765884033315,
798
  "step": 7600
799
  },
800
  {
801
  "epoch": 1.4905149051490514,
802
+ "eval_loss": 0.4465474486351013,
803
+ "eval_runtime": 145.7643,
804
+ "eval_samples_per_second": 38.802,
805
+ "eval_steps_per_second": 4.85,
806
+ "eval_wer": 0.3500986984641556,
807
  "step": 7700
808
  },
809
  {
810
  "epoch": 1.5098722415795587,
811
+ "eval_loss": 0.44539782404899597,
812
+ "eval_runtime": 145.5321,
813
+ "eval_samples_per_second": 38.864,
814
+ "eval_steps_per_second": 4.858,
815
+ "eval_wer": 0.34281266550047346,
816
  "step": 7800
817
  },
818
  {
819
  "epoch": 1.5292295780100658,
820
+ "eval_loss": 0.44271060824394226,
821
+ "eval_runtime": 145.0706,
822
+ "eval_samples_per_second": 38.988,
823
+ "eval_steps_per_second": 4.873,
824
+ "eval_wer": 0.34387186853043605,
825
  "step": 7900
826
  },
827
  {
828
  "epoch": 1.5485869144405728,
829
+ "grad_norm": 1.030893087387085,
830
+ "learning_rate": 6.341052631578946e-05,
831
+ "loss": 0.4089,
832
  "step": 8000
833
  },
834
  {
835
  "epoch": 1.5485869144405728,
836
+ "eval_loss": 0.437569797039032,
837
+ "eval_runtime": 144.8474,
838
+ "eval_samples_per_second": 39.048,
839
+ "eval_steps_per_second": 4.881,
840
+ "eval_wer": 0.3406942594405482,
841
  "step": 8000
842
  },
843
  {
844
  "epoch": 1.5679442508710801,
845
+ "eval_loss": 0.4396042823791504,
846
+ "eval_runtime": 145.4051,
847
+ "eval_samples_per_second": 38.898,
848
+ "eval_steps_per_second": 4.862,
849
+ "eval_wer": 0.3415127345091557,
850
  "step": 8100
851
  },
852
  {
853
  "epoch": 1.5873015873015874,
854
+ "eval_loss": 0.4342670738697052,
855
+ "eval_runtime": 144.5678,
856
+ "eval_samples_per_second": 39.124,
857
+ "eval_steps_per_second": 4.89,
858
+ "eval_wer": 0.34217072427019307,
859
  "step": 8200
860
  },
861
  {
862
  "epoch": 1.6066589237320945,
863
+ "eval_loss": 0.4358905851840973,
864
+ "eval_runtime": 144.9749,
865
+ "eval_samples_per_second": 39.014,
866
+ "eval_steps_per_second": 4.877,
867
+ "eval_wer": 0.3406461138482772,
868
  "step": 8300
869
  },
870
  {
871
  "epoch": 1.6260162601626016,
872
+ "eval_loss": 0.43577995896339417,
873
+ "eval_runtime": 144.8242,
874
+ "eval_samples_per_second": 39.054,
875
+ "eval_steps_per_second": 4.882,
876
+ "eval_wer": 0.3373401165123333,
877
  "step": 8400
878
  },
879
  {
880
  "epoch": 1.645373596593109,
881
+ "grad_norm": 0.9704302549362183,
882
+ "learning_rate": 4.762105263157894e-05,
883
+ "loss": 0.4005,
884
  "step": 8500
885
  },
886
  {
887
  "epoch": 1.645373596593109,
888
+ "eval_loss": 0.4330734312534332,
889
+ "eval_runtime": 145.4279,
890
+ "eval_samples_per_second": 38.892,
891
+ "eval_steps_per_second": 4.862,
892
+ "eval_wer": 0.3364895443822118,
893
  "step": 8500
894
  },
895
  {
896
  "epoch": 1.664730933023616,
897
+ "eval_loss": 0.4302414059638977,
898
+ "eval_runtime": 144.5881,
899
+ "eval_samples_per_second": 39.118,
900
+ "eval_steps_per_second": 4.89,
901
+ "eval_wer": 0.3352859045754361,
902
  "step": 8600
903
  },
904
  {
905
  "epoch": 1.684088269454123,
906
+ "eval_loss": 0.43084925413131714,
907
+ "eval_runtime": 145.3335,
908
+ "eval_samples_per_second": 38.917,
909
+ "eval_steps_per_second": 4.865,
910
+ "eval_wer": 0.33549453547527724,
911
  "step": 8700
912
  },
913
  {
914
  "epoch": 1.7034456058846303,
915
+ "eval_loss": 0.4258446991443634,
916
+ "eval_runtime": 145.2324,
917
+ "eval_samples_per_second": 38.944,
918
+ "eval_steps_per_second": 4.868,
919
+ "eval_wer": 0.335109370737109,
920
  "step": 8800
921
  },
922
  {
923
  "epoch": 1.7228029423151374,
924
+ "eval_loss": 0.42223912477493286,
925
+ "eval_runtime": 145.1268,
926
+ "eval_samples_per_second": 38.973,
927
+ "eval_steps_per_second": 4.872,
928
+ "eval_wer": 0.33530195310619315,
929
  "step": 8900
930
  },
931
  {
932
  "epoch": 1.7421602787456445,
933
+ "grad_norm": 1.2587796449661255,
934
+ "learning_rate": 3.186315789473684e-05,
935
+ "loss": 0.3879,
936
  "step": 9000
937
  },
938
  {
939
  "epoch": 1.7421602787456445,
940
+ "eval_loss": 0.4238055944442749,
941
+ "eval_runtime": 145.2053,
942
+ "eval_samples_per_second": 38.952,
943
+ "eval_steps_per_second": 4.869,
944
+ "eval_wer": 0.33119352923239875,
945
  "step": 9000
946
  },
947
  {
948
  "epoch": 1.7615176151761518,
949
+ "eval_loss": 0.4244863986968994,
950
+ "eval_runtime": 145.3408,
951
+ "eval_samples_per_second": 38.915,
952
+ "eval_steps_per_second": 4.864,
953
+ "eval_wer": 0.3288022981496044,
954
  "step": 9100
955
  },
956
  {
957
  "epoch": 1.7808749516066589,
958
+ "eval_loss": 0.42055588960647583,
959
+ "eval_runtime": 145.738,
960
+ "eval_samples_per_second": 38.809,
961
+ "eval_steps_per_second": 4.851,
962
+ "eval_wer": 0.32641106706681006,
963
  "step": 9200
964
  },
965
  {
966
  "epoch": 1.800232288037166,
967
+ "eval_loss": 0.4201093018054962,
968
+ "eval_runtime": 145.1603,
969
+ "eval_samples_per_second": 38.964,
970
+ "eval_steps_per_second": 4.87,
971
+ "eval_wer": 0.3284492304729502,
972
  "step": 9300
973
  },
974
  {
975
  "epoch": 1.8195896244676733,
976
+ "eval_loss": 0.41892749071121216,
977
+ "eval_runtime": 150.4269,
978
+ "eval_samples_per_second": 37.6,
979
+ "eval_steps_per_second": 4.7,
980
+ "eval_wer": 0.324597583091268,
981
  "step": 9400
982
  },
983
  {
984
  "epoch": 1.8389469608981805,
985
+ "grad_norm": 1.2362818717956543,
986
+ "learning_rate": 1.6073684210526313e-05,
987
+ "loss": 0.369,
988
  "step": 9500
989
  },
990
  {
991
  "epoch": 1.8389469608981805,
992
+ "eval_loss": 0.4160093665122986,
993
+ "eval_runtime": 145.5064,
994
+ "eval_samples_per_second": 38.871,
995
+ "eval_steps_per_second": 4.859,
996
+ "eval_wer": 0.32576912583652967,
997
  "step": 9500
998
  },
999
  {
1000
  "epoch": 1.8583042973286876,
1001
+ "eval_loss": 0.4141674041748047,
1002
+ "eval_runtime": 145.8993,
1003
+ "eval_samples_per_second": 38.766,
1004
+ "eval_steps_per_second": 4.846,
1005
+ "eval_wer": 0.3248062139911091,
1006
  "step": 9600
1007
  },
1008
  {
1009
  "epoch": 1.8776616337591947,
1010
+ "eval_loss": 0.41305750608444214,
1011
+ "eval_runtime": 146.4678,
1012
+ "eval_samples_per_second": 38.616,
1013
+ "eval_steps_per_second": 4.827,
1014
+ "eval_wer": 0.32517533019852035,
1015
  "step": 9700
1016
  },
1017
  {
1018
  "epoch": 1.897018970189702,
1019
+ "eval_loss": 0.4127916693687439,
1020
+ "eval_runtime": 145.2897,
1021
+ "eval_samples_per_second": 38.929,
1022
+ "eval_steps_per_second": 4.866,
1023
+ "eval_wer": 0.32276805058496894,
1024
  "step": 9800
1025
  },
1026
  {
1027
  "epoch": 1.916376306620209,
1028
+ "eval_loss": 0.41222211718559265,
1029
+ "eval_runtime": 145.7491,
1030
+ "eval_samples_per_second": 38.806,
1031
+ "eval_steps_per_second": 4.851,
1032
+ "eval_wer": 0.3221421578854456,
1033
  "step": 9900
1034
  },
1035
  {
1036
  "epoch": 1.9357336430507162,
1037
+ "grad_norm": 1.1011325120925903,
1038
+ "learning_rate": 2.842105263157894e-07,
1039
+ "loss": 0.3738,
1040
  "step": 10000
1041
  },
1042
  {
1043
  "epoch": 1.9357336430507162,
1044
+ "eval_loss": 0.4121682941913605,
1045
+ "eval_runtime": 145.6809,
1046
+ "eval_samples_per_second": 38.825,
1047
+ "eval_steps_per_second": 4.853,
1048
+ "eval_wer": 0.3222865946622587,
1049
  "step": 10000
1050
  },
1051
  {
1052
  "epoch": 1.9357336430507162,
1053
  "step": 10000,
1054
  "total_flos": 1.1255918428180738e+19,
1055
+ "train_loss": 0.7340839981079101,
1056
+ "train_runtime": 18566.2158,
1057
+ "train_samples_per_second": 4.309,
1058
+ "train_steps_per_second": 0.539
1059
  }
1060
  ],
1061
  "logging_steps": 500,