ylacombe HF staff commited on
Commit
239bc0b
1 Parent(s): 221ab03

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,10 @@
1
  ---
 
 
2
  base_model: ylacombe/w2v-bert-2.0
3
  tags:
 
 
4
  - generated_from_trainer
5
  datasets:
6
  - common_voice_16_0
@@ -13,11 +17,11 @@ model-index:
13
  name: Automatic Speech Recognition
14
  type: automatic-speech-recognition
15
  dataset:
16
- name: common_voice_16_0
17
  type: common_voice_16_0
18
  config: tr
19
  split: test
20
- args: tr
21
  metrics:
22
  - name: Wer
23
  type: wer
@@ -29,7 +33,7 @@ should probably proofread and complete it, then remove this comment. -->
29
 
30
  # wav2vec2-common_voice-tr-demo
31
 
32
- This model is a fine-tuned version of [ylacombe/w2v-bert-2.0](https://huggingface.co/ylacombe/w2v-bert-2.0) on the common_voice_16_0 dataset.
33
  It achieves the following results on the evaluation set:
34
  - Loss: nan
35
  - Wer: 1.0
 
1
  ---
2
+ language:
3
+ - tr
4
  base_model: ylacombe/w2v-bert-2.0
5
  tags:
6
+ - automatic-speech-recognition
7
+ - mozilla-foundation/common_voice_16_0
8
  - generated_from_trainer
9
  datasets:
10
  - common_voice_16_0
 
17
  name: Automatic Speech Recognition
18
  type: automatic-speech-recognition
19
  dataset:
20
+ name: MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - TR
21
  type: common_voice_16_0
22
  config: tr
23
  split: test
24
+ args: 'Config: tr, Training split: train+validation, Eval split: test'
25
  metrics:
26
  - name: Wer
27
  type: wer
 
33
 
34
  # wav2vec2-common_voice-tr-demo
35
 
36
+ This model is a fine-tuned version of [ylacombe/w2v-bert-2.0](https://huggingface.co/ylacombe/w2v-bert-2.0) on the MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - TR dataset.
37
  It achieves the following results on the evaluation set:
38
  - Loss: nan
39
  - Wer: 1.0
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_loss": 3.0581979751586914,
4
- "eval_runtime": 309.3284,
5
  "eval_samples": 11035,
6
- "eval_samples_per_second": 35.674,
7
- "eval_steps_per_second": 1.115,
8
  "eval_wer": 1.0,
9
- "train_loss": 3.3253096312547563,
10
- "train_runtime": 44962.3834,
11
  "train_samples": 43798,
12
- "train_samples_per_second": 14.612,
13
- "train_steps_per_second": 0.365
14
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_loss": NaN,
4
+ "eval_runtime": 324.5516,
5
  "eval_samples": 11035,
6
+ "eval_samples_per_second": 34.001,
7
+ "eval_steps_per_second": 1.063,
8
  "eval_wer": 1.0,
9
+ "train_loss": 1.129231213434646,
10
+ "train_runtime": 45647.7738,
11
  "train_samples": 43798,
12
+ "train_samples_per_second": 14.392,
13
+ "train_steps_per_second": 0.36
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_loss": 3.0581979751586914,
4
- "eval_runtime": 309.3284,
5
  "eval_samples": 11035,
6
- "eval_samples_per_second": 35.674,
7
- "eval_steps_per_second": 1.115,
8
  "eval_wer": 1.0
9
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_loss": NaN,
4
+ "eval_runtime": 324.5516,
5
  "eval_samples": 11035,
6
+ "eval_samples_per_second": 34.001,
7
+ "eval_steps_per_second": 1.063,
8
  "eval_wer": 1.0
9
  }
runs/Jan04_20-37-14_vorace/events.out.tfevents.1704447903.vorace.1537045.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09560f79aeeebf2523077eeee9b0ff225d0f827645df60dfe224ba16ac54d122
3
+ size 364
runs/Jan05_08-39-35_vorace/events.out.tfevents.1704445083.vorace.1722965.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d83369742a1f74133bc19aca815f10b018697dbe547b6f55b85b1910cbc89544
3
- size 6761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c84137a9499be9a4be711454736f6dd77efee3a4093c4f6c586261b7c1810bf
3
+ size 6918
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "train_loss": 3.3253096312547563,
4
- "train_runtime": 44962.3834,
5
  "train_samples": 43798,
6
- "train_samples_per_second": 14.612,
7
- "train_steps_per_second": 0.365
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "train_loss": 1.129231213434646,
4
+ "train_runtime": 45647.7738,
5
  "train_samples": 43798,
6
+ "train_samples_per_second": 14.392,
7
+ "train_steps_per_second": 0.36
8
  }
trainer_state.json CHANGED
@@ -10,679 +10,679 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.27,
13
- "eval_loss": 7.266334533691406,
14
- "eval_runtime": 325.0132,
15
- "eval_samples_per_second": 33.952,
16
- "eval_steps_per_second": 1.061,
17
  "eval_wer": 1.0,
18
  "step": 300
19
  },
20
  {
21
  "epoch": 0.46,
22
- "learning_rate": 7.14420628442998e-05,
23
- "loss": 10.5256,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.55,
28
- "eval_loss": 3.0892837047576904,
29
- "eval_runtime": 316.7833,
30
- "eval_samples_per_second": 34.835,
31
- "eval_steps_per_second": 1.089,
32
  "eval_wer": 1.0,
33
  "step": 600
34
  },
35
  {
36
  "epoch": 0.82,
37
- "eval_loss": 3.061225652694702,
38
- "eval_runtime": 483.1327,
39
- "eval_samples_per_second": 22.841,
40
- "eval_steps_per_second": 0.714,
41
  "eval_wer": 1.0,
42
  "step": 900
43
  },
44
  {
45
  "epoch": 0.91,
46
- "learning_rate": 0.0001434602713566988,
47
- "loss": 2.9795,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 1.1,
52
- "eval_loss": 2.9936766624450684,
53
- "eval_runtime": 320.3828,
54
- "eval_samples_per_second": 34.443,
55
- "eval_steps_per_second": 1.077,
56
  "eval_wer": 1.0,
57
  "step": 1200
58
  },
59
  {
60
  "epoch": 1.37,
61
- "learning_rate": 0.0002154784798690978,
62
- "loss": 2.9564,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 1.37,
67
- "eval_loss": 3.242413282394409,
68
- "eval_runtime": 328.1378,
69
- "eval_samples_per_second": 33.629,
70
- "eval_steps_per_second": 1.051,
71
  "eval_wer": 1.0,
72
  "step": 1500
73
  },
74
  {
75
  "epoch": 1.64,
76
- "eval_loss": 3.2866387367248535,
77
- "eval_runtime": 325.8189,
78
- "eval_samples_per_second": 33.869,
79
- "eval_steps_per_second": 1.059,
80
  "eval_wer": 1.0,
81
  "step": 1800
82
  },
83
  {
84
  "epoch": 1.83,
85
- "learning_rate": 0.0002874966883814968,
86
- "loss": 3.1552,
87
  "step": 2000
88
  },
89
  {
90
  "epoch": 1.92,
91
- "eval_loss": 3.6338589191436768,
92
- "eval_runtime": 322.0052,
93
- "eval_samples_per_second": 34.27,
94
- "eval_steps_per_second": 1.071,
95
- "eval_wer": 1.0,
96
  "step": 2100
97
  },
98
  {
99
  "epoch": 2.19,
100
- "eval_loss": 3.1184866428375244,
101
- "eval_runtime": 316.2742,
102
- "eval_samples_per_second": 34.891,
103
- "eval_steps_per_second": 1.091,
104
  "eval_wer": 1.0,
105
  "step": 2400
106
  },
107
  {
108
  "epoch": 2.28,
109
- "learning_rate": 0.00035951489689389575,
110
- "loss": 3.2079,
111
  "step": 2500
112
  },
113
  {
114
  "epoch": 2.47,
115
- "eval_loss": 3.183176040649414,
116
- "eval_runtime": 316.7437,
117
- "eval_samples_per_second": 34.839,
118
- "eval_steps_per_second": 1.089,
119
  "eval_wer": 1.0,
120
  "step": 2700
121
  },
122
  {
123
  "epoch": 2.74,
124
- "learning_rate": 0.00043153310540629475,
125
- "loss": 3.1275,
126
  "step": 3000
127
  },
128
  {
129
  "epoch": 2.74,
130
- "eval_loss": 3.3952367305755615,
131
- "eval_runtime": 319.4373,
132
- "eval_samples_per_second": 34.545,
133
- "eval_steps_per_second": 1.08,
134
  "eval_wer": 1.0,
135
  "step": 3000
136
  },
137
  {
138
  "epoch": 3.01,
139
- "eval_loss": 3.2981579303741455,
140
- "eval_runtime": 327.568,
141
- "eval_samples_per_second": 33.688,
142
- "eval_steps_per_second": 1.053,
143
  "eval_wer": 1.0,
144
  "step": 3300
145
  },
146
  {
147
  "epoch": 3.2,
148
- "learning_rate": 0.0005035513139186939,
149
- "loss": 3.0987,
150
  "step": 3500
151
  },
152
  {
153
  "epoch": 3.29,
154
- "eval_loss": 3.103595733642578,
155
- "eval_runtime": 327.3342,
156
- "eval_samples_per_second": 33.712,
157
- "eval_steps_per_second": 1.054,
158
  "eval_wer": 1.0,
159
  "step": 3600
160
  },
161
  {
162
  "epoch": 3.56,
163
- "eval_loss": 3.1222941875457764,
164
- "eval_runtime": 312.8357,
165
- "eval_samples_per_second": 35.274,
166
- "eval_steps_per_second": 1.103,
167
  "eval_wer": 1.0,
168
  "step": 3900
169
  },
170
  {
171
  "epoch": 3.65,
172
- "learning_rate": 0.0005755695224310928,
173
- "loss": 2.9301,
174
  "step": 4000
175
  },
176
  {
177
  "epoch": 3.84,
178
- "eval_loss": 3.114525556564331,
179
- "eval_runtime": 308.5965,
180
- "eval_samples_per_second": 35.759,
181
- "eval_steps_per_second": 1.118,
182
  "eval_wer": 1.0,
183
  "step": 4200
184
  },
185
  {
186
  "epoch": 4.11,
187
- "learning_rate": 0.0006475877309434917,
188
- "loss": 2.9197,
189
  "step": 4500
190
  },
191
  {
192
  "epoch": 4.11,
193
- "eval_loss": 3.0324432849884033,
194
- "eval_runtime": 308.9122,
195
- "eval_samples_per_second": 35.722,
196
- "eval_steps_per_second": 1.117,
197
  "eval_wer": 1.0,
198
  "step": 4500
199
  },
200
  {
201
  "epoch": 4.38,
202
- "eval_loss": 2.999401807785034,
203
- "eval_runtime": 308.612,
204
- "eval_samples_per_second": 35.757,
205
- "eval_steps_per_second": 1.118,
206
- "eval_wer": 1.9598550067965563,
207
  "step": 4800
208
  },
209
  {
210
  "epoch": 4.57,
211
- "learning_rate": 0.0007196059394558908,
212
- "loss": 2.9023,
213
  "step": 5000
214
  },
215
  {
216
  "epoch": 4.66,
217
- "eval_loss": 2.991722822189331,
218
- "eval_runtime": 309.103,
219
- "eval_samples_per_second": 35.7,
220
- "eval_steps_per_second": 1.116,
221
- "eval_wer": 1.8240144993203444,
222
  "step": 5100
223
  },
224
  {
225
  "epoch": 4.93,
226
- "eval_loss": 2.9946165084838867,
227
- "eval_runtime": 308.4148,
228
- "eval_samples_per_second": 35.78,
229
- "eval_steps_per_second": 1.119,
230
- "eval_wer": 1.958948799275034,
231
  "step": 5400
232
  },
233
  {
234
  "epoch": 5.02,
235
- "learning_rate": 0.0006889164121067909,
236
- "loss": 2.9007,
237
  "step": 5500
238
  },
239
  {
240
  "epoch": 5.21,
241
- "eval_loss": 3.195502519607544,
242
- "eval_runtime": 308.3861,
243
- "eval_samples_per_second": 35.783,
244
- "eval_steps_per_second": 1.119,
245
  "eval_wer": 1.0,
246
  "step": 5700
247
  },
248
  {
249
  "epoch": 5.48,
250
- "learning_rate": 0.0006574616321963427,
251
- "loss": 3.1887,
252
  "step": 6000
253
  },
254
  {
255
  "epoch": 5.48,
256
- "eval_loss": 3.1901698112487793,
257
- "eval_runtime": 308.4713,
258
- "eval_samples_per_second": 35.773,
259
- "eval_steps_per_second": 1.118,
260
  "eval_wer": 1.0,
261
  "step": 6000
262
  },
263
  {
264
  "epoch": 5.75,
265
- "eval_loss": 3.167245864868164,
266
- "eval_runtime": 308.5576,
267
- "eval_samples_per_second": 35.763,
268
- "eval_steps_per_second": 1.118,
269
  "eval_wer": 1.0,
270
  "step": 6300
271
  },
272
  {
273
  "epoch": 5.94,
274
- "learning_rate": 0.0006259438166548115,
275
- "loss": 3.135,
276
  "step": 6500
277
  },
278
  {
279
  "epoch": 6.03,
280
- "eval_loss": 3.2076234817504883,
281
- "eval_runtime": 308.784,
282
- "eval_samples_per_second": 35.737,
283
- "eval_steps_per_second": 1.117,
284
  "eval_wer": 1.0,
285
  "step": 6600
286
  },
287
  {
288
  "epoch": 6.3,
289
- "eval_loss": 3.212040424346924,
290
- "eval_runtime": 309.3257,
291
- "eval_samples_per_second": 35.674,
292
- "eval_steps_per_second": 1.115,
293
  "eval_wer": 1.0,
294
  "step": 6900
295
  },
296
  {
297
  "epoch": 6.39,
298
- "learning_rate": 0.0005944260011132802,
299
- "loss": 3.1482,
300
  "step": 7000
301
  },
302
  {
303
  "epoch": 6.58,
304
- "eval_loss": 3.1832025051116943,
305
- "eval_runtime": 308.2925,
306
- "eval_samples_per_second": 35.794,
307
- "eval_steps_per_second": 1.119,
308
  "eval_wer": 1.0,
309
  "step": 7200
310
  },
311
  {
312
  "epoch": 6.85,
313
- "learning_rate": 0.0005629081855717488,
314
- "loss": 3.1546,
315
  "step": 7500
316
  },
317
  {
318
  "epoch": 6.85,
319
- "eval_loss": 3.1799111366271973,
320
- "eval_runtime": 308.7059,
321
- "eval_samples_per_second": 35.746,
322
- "eval_steps_per_second": 1.118,
323
  "eval_wer": 1.0,
324
  "step": 7500
325
  },
326
  {
327
  "epoch": 7.12,
328
- "eval_loss": 3.2451581954956055,
329
- "eval_runtime": 307.3881,
330
- "eval_samples_per_second": 35.899,
331
- "eval_steps_per_second": 1.122,
332
  "eval_wer": 1.0,
333
  "step": 7800
334
  },
335
  {
336
  "epoch": 7.31,
337
- "learning_rate": 0.0005313903700302176,
338
- "loss": 3.1567,
339
  "step": 8000
340
  },
341
  {
342
  "epoch": 7.4,
343
- "eval_loss": 3.2318718433380127,
344
- "eval_runtime": 308.2983,
345
- "eval_samples_per_second": 35.793,
346
- "eval_steps_per_second": 1.119,
347
  "eval_wer": 1.0,
348
  "step": 8100
349
  },
350
  {
351
  "epoch": 7.67,
352
- "eval_loss": 3.222830057144165,
353
- "eval_runtime": 308.3145,
354
- "eval_samples_per_second": 35.791,
355
- "eval_steps_per_second": 1.119,
356
  "eval_wer": 1.0,
357
  "step": 8400
358
  },
359
  {
360
  "epoch": 7.76,
361
- "learning_rate": 0.0004998725544886862,
362
- "loss": 3.1719,
363
  "step": 8500
364
  },
365
  {
366
  "epoch": 7.95,
367
- "eval_loss": 3.2054970264434814,
368
- "eval_runtime": 308.0159,
369
- "eval_samples_per_second": 35.826,
370
- "eval_steps_per_second": 1.12,
371
  "eval_wer": 1.0,
372
  "step": 8700
373
  },
374
  {
375
  "epoch": 8.22,
376
- "learning_rate": 0.00046835473894715497,
377
- "loss": 3.168,
378
  "step": 9000
379
  },
380
  {
381
  "epoch": 8.22,
382
- "eval_loss": 3.2552778720855713,
383
- "eval_runtime": 303.2884,
384
- "eval_samples_per_second": 36.385,
385
- "eval_steps_per_second": 1.138,
386
  "eval_wer": 1.0,
387
  "step": 9000
388
  },
389
  {
390
  "epoch": 8.49,
391
- "eval_loss": 3.197523593902588,
392
- "eval_runtime": 305.1147,
393
- "eval_samples_per_second": 36.167,
394
- "eval_steps_per_second": 1.131,
395
  "eval_wer": 1.0,
396
  "step": 9300
397
  },
398
  {
399
  "epoch": 8.68,
400
- "learning_rate": 0.0004368369234056237,
401
- "loss": 3.1643,
402
  "step": 9500
403
  },
404
  {
405
  "epoch": 8.77,
406
- "eval_loss": 3.2445874214172363,
407
- "eval_runtime": 303.4296,
408
- "eval_samples_per_second": 36.368,
409
- "eval_steps_per_second": 1.137,
410
  "eval_wer": 1.0,
411
  "step": 9600
412
  },
413
  {
414
  "epoch": 9.04,
415
- "eval_loss": 3.2781076431274414,
416
- "eval_runtime": 305.7237,
417
- "eval_samples_per_second": 36.095,
418
- "eval_steps_per_second": 1.128,
419
  "eval_wer": 1.0,
420
  "step": 9900
421
  },
422
  {
423
  "epoch": 9.13,
424
- "learning_rate": 0.0004053191078640924,
425
- "loss": 3.169,
426
  "step": 10000
427
  },
428
  {
429
  "epoch": 9.32,
430
- "eval_loss": 3.2596964836120605,
431
- "eval_runtime": 306.6385,
432
- "eval_samples_per_second": 35.987,
433
- "eval_steps_per_second": 1.125,
434
  "eval_wer": 1.0,
435
  "step": 10200
436
  },
437
  {
438
  "epoch": 9.59,
439
- "learning_rate": 0.00037380129232256106,
440
- "loss": 3.1789,
441
  "step": 10500
442
  },
443
  {
444
  "epoch": 9.59,
445
- "eval_loss": 3.2585501670837402,
446
- "eval_runtime": 307.6539,
447
- "eval_samples_per_second": 35.868,
448
- "eval_steps_per_second": 1.121,
449
  "eval_wer": 1.0,
450
  "step": 10500
451
  },
452
  {
453
  "epoch": 9.86,
454
- "eval_loss": 3.2689764499664307,
455
- "eval_runtime": 307.738,
456
- "eval_samples_per_second": 35.858,
457
- "eval_steps_per_second": 1.121,
458
  "eval_wer": 1.0,
459
  "step": 10800
460
  },
461
  {
462
  "epoch": 10.05,
463
- "learning_rate": 0.0003422834767810298,
464
- "loss": 3.1701,
465
  "step": 11000
466
  },
467
  {
468
  "epoch": 10.14,
469
- "eval_loss": 3.273723602294922,
470
- "eval_runtime": 308.5346,
471
- "eval_samples_per_second": 35.766,
472
- "eval_steps_per_second": 1.118,
473
  "eval_wer": 1.0,
474
  "step": 11100
475
  },
476
  {
477
  "epoch": 10.41,
478
- "eval_loss": 3.273848533630371,
479
- "eval_runtime": 308.8614,
480
- "eval_samples_per_second": 35.728,
481
- "eval_steps_per_second": 1.117,
482
  "eval_wer": 1.0,
483
  "step": 11400
484
  },
485
  {
486
  "epoch": 10.5,
487
- "learning_rate": 0.00031076566123949855,
488
- "loss": 3.1698,
489
  "step": 11500
490
  },
491
  {
492
  "epoch": 10.68,
493
- "eval_loss": 3.2595293521881104,
494
- "eval_runtime": 308.7628,
495
- "eval_samples_per_second": 35.739,
496
- "eval_steps_per_second": 1.117,
497
  "eval_wer": 1.0,
498
  "step": 11700
499
  },
500
  {
501
  "epoch": 10.96,
502
- "learning_rate": 0.00027924784569796727,
503
- "loss": 3.1595,
504
  "step": 12000
505
  },
506
  {
507
  "epoch": 10.96,
508
- "eval_loss": 3.2467362880706787,
509
- "eval_runtime": 308.3094,
510
- "eval_samples_per_second": 35.792,
511
- "eval_steps_per_second": 1.119,
512
  "eval_wer": 1.0,
513
  "step": 12000
514
  },
515
  {
516
  "epoch": 11.23,
517
- "eval_loss": 3.252420663833618,
518
- "eval_runtime": 309.0904,
519
- "eval_samples_per_second": 35.702,
520
- "eval_steps_per_second": 1.116,
521
  "eval_wer": 1.0,
522
  "step": 12300
523
  },
524
  {
525
  "epoch": 11.42,
526
- "learning_rate": 0.00024773003015643593,
527
- "loss": 3.15,
528
  "step": 12500
529
  },
530
  {
531
  "epoch": 11.51,
532
- "eval_loss": 3.2327377796173096,
533
- "eval_runtime": 308.9397,
534
- "eval_samples_per_second": 35.719,
535
- "eval_steps_per_second": 1.117,
536
  "eval_wer": 1.0,
537
  "step": 12600
538
  },
539
  {
540
  "epoch": 11.78,
541
- "eval_loss": 3.219557046890259,
542
- "eval_runtime": 309.2594,
543
- "eval_samples_per_second": 35.682,
544
- "eval_steps_per_second": 1.116,
545
  "eval_wer": 1.0,
546
  "step": 12900
547
  },
548
  {
549
  "epoch": 11.87,
550
- "learning_rate": 0.00021621221461490465,
551
- "loss": 3.1444,
552
  "step": 13000
553
  },
554
  {
555
  "epoch": 12.05,
556
- "eval_loss": 3.1942968368530273,
557
- "eval_runtime": 309.871,
558
- "eval_samples_per_second": 35.612,
559
- "eval_steps_per_second": 1.113,
560
  "eval_wer": 1.0,
561
  "step": 13200
562
  },
563
  {
564
  "epoch": 12.33,
565
- "learning_rate": 0.00018469439907337336,
566
- "loss": 3.132,
567
  "step": 13500
568
  },
569
  {
570
  "epoch": 12.33,
571
- "eval_loss": 3.191138744354248,
572
- "eval_runtime": 309.3206,
573
- "eval_samples_per_second": 35.675,
574
- "eval_steps_per_second": 1.115,
575
  "eval_wer": 1.0,
576
  "step": 13500
577
  },
578
  {
579
  "epoch": 12.6,
580
- "eval_loss": 3.207465648651123,
581
- "eval_runtime": 309.5517,
582
- "eval_samples_per_second": 35.648,
583
- "eval_steps_per_second": 1.115,
584
  "eval_wer": 1.0,
585
  "step": 13800
586
  },
587
  {
588
  "epoch": 12.79,
589
- "learning_rate": 0.00015323961916292511,
590
- "loss": 3.1153,
591
  "step": 14000
592
  },
593
  {
594
  "epoch": 12.88,
595
- "eval_loss": 3.1938300132751465,
596
- "eval_runtime": 310.0376,
597
- "eval_samples_per_second": 35.592,
598
- "eval_steps_per_second": 1.113,
599
  "eval_wer": 1.0,
600
  "step": 14100
601
  },
602
  {
603
  "epoch": 13.15,
604
- "eval_loss": 3.1638731956481934,
605
- "eval_runtime": 308.9592,
606
- "eval_samples_per_second": 35.717,
607
- "eval_steps_per_second": 1.117,
608
  "eval_wer": 1.0,
609
  "step": 14400
610
  },
611
  {
612
  "epoch": 13.24,
613
- "learning_rate": 0.00012172180362139385,
614
- "loss": 3.1039,
615
  "step": 14500
616
  },
617
  {
618
  "epoch": 13.42,
619
- "eval_loss": 3.15146803855896,
620
- "eval_runtime": 308.6922,
621
- "eval_samples_per_second": 35.748,
622
- "eval_steps_per_second": 1.118,
623
  "eval_wer": 1.0,
624
  "step": 14700
625
  },
626
  {
627
  "epoch": 13.7,
628
- "learning_rate": 9.020398807986256e-05,
629
- "loss": 3.0839,
630
  "step": 15000
631
  },
632
  {
633
  "epoch": 13.7,
634
- "eval_loss": 3.153453826904297,
635
- "eval_runtime": 309.1197,
636
- "eval_samples_per_second": 35.698,
637
- "eval_steps_per_second": 1.116,
638
  "eval_wer": 1.0,
639
  "step": 15000
640
  },
641
  {
642
  "epoch": 13.97,
643
- "eval_loss": 3.130723237991333,
644
- "eval_runtime": 309.6167,
645
- "eval_samples_per_second": 35.641,
646
- "eval_steps_per_second": 1.114,
647
  "eval_wer": 1.0,
648
  "step": 15300
649
  },
650
  {
651
  "epoch": 14.16,
652
- "learning_rate": 5.8686172538331265e-05,
653
- "loss": 3.0632,
654
  "step": 15500
655
  },
656
  {
657
  "epoch": 14.25,
658
- "eval_loss": 3.1138317584991455,
659
- "eval_runtime": 309.4562,
660
- "eval_samples_per_second": 35.659,
661
- "eval_steps_per_second": 1.115,
662
  "eval_wer": 1.0,
663
  "step": 15600
664
  },
665
  {
666
  "epoch": 14.52,
667
- "eval_loss": 3.128912925720215,
668
- "eval_runtime": 309.4874,
669
- "eval_samples_per_second": 35.656,
670
- "eval_steps_per_second": 1.115,
671
  "eval_wer": 1.0,
672
  "step": 15900
673
  },
674
  {
675
  "epoch": 14.61,
676
- "learning_rate": 2.7168356996799972e-05,
677
- "loss": 3.0518,
678
  "step": 16000
679
  },
680
  {
681
  "epoch": 14.79,
682
- "eval_loss": 3.081491708755493,
683
- "eval_runtime": 308.8218,
684
- "eval_samples_per_second": 35.733,
685
- "eval_steps_per_second": 1.117,
686
  "eval_wer": 1.0,
687
  "step": 16200
688
  },
@@ -690,10 +690,10 @@
690
  "epoch": 15.0,
691
  "step": 16425,
692
  "total_flos": 6.442470243808035e+19,
693
- "train_loss": 3.3253096312547563,
694
- "train_runtime": 44962.3834,
695
- "train_samples_per_second": 14.612,
696
- "train_steps_per_second": 0.365
697
  }
698
  ],
699
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.27,
13
+ "eval_loss": 3.293024778366089,
14
+ "eval_runtime": 329.2436,
15
+ "eval_samples_per_second": 33.516,
16
+ "eval_steps_per_second": 1.048,
17
  "eval_wer": 1.0,
18
  "step": 300
19
  },
20
  {
21
  "epoch": 0.46,
22
+ "learning_rate": 0.0007389237223065434,
23
+ "loss": 5.6462,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.55,
28
+ "eval_loss": 3.415907859802246,
29
+ "eval_runtime": 328.5335,
30
+ "eval_samples_per_second": 33.589,
31
+ "eval_steps_per_second": 1.05,
32
  "eval_wer": 1.0,
33
  "step": 600
34
  },
35
  {
36
  "epoch": 0.82,
37
+ "eval_loss": 3.4422173500061035,
38
+ "eval_runtime": 338.1217,
39
+ "eval_samples_per_second": 32.636,
40
+ "eval_steps_per_second": 1.02,
41
  "eval_wer": 1.0,
42
  "step": 900
43
  },
44
  {
45
  "epoch": 0.91,
46
+ "learning_rate": 0.0014838065068897525,
47
+ "loss": 3.3522,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 1.1,
52
+ "eval_loss": 3.3719358444213867,
53
+ "eval_runtime": 340.7912,
54
+ "eval_samples_per_second": 32.381,
55
+ "eval_steps_per_second": 1.012,
56
  "eval_wer": 1.0,
57
  "step": 1200
58
  },
59
  {
60
  "epoch": 1.37,
61
+ "learning_rate": 0.002228689291472962,
62
+ "loss": 3.2605,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 1.37,
67
+ "eval_loss": 3.4025769233703613,
68
+ "eval_runtime": 335.1987,
69
+ "eval_samples_per_second": 32.921,
70
+ "eval_steps_per_second": 1.029,
71
  "eval_wer": 1.0,
72
  "step": 1500
73
  },
74
  {
75
  "epoch": 1.64,
76
+ "eval_loss": 3.444835662841797,
77
+ "eval_runtime": 332.4199,
78
+ "eval_samples_per_second": 33.196,
79
+ "eval_steps_per_second": 1.038,
80
  "eval_wer": 1.0,
81
  "step": 1800
82
  },
83
  {
84
  "epoch": 1.83,
85
+ "learning_rate": 0.0029735720760561708,
86
+ "loss": 3.2766,
87
  "step": 2000
88
  },
89
  {
90
  "epoch": 1.92,
91
+ "eval_loss": 3.473637104034424,
92
+ "eval_runtime": 334.5697,
93
+ "eval_samples_per_second": 32.983,
94
+ "eval_steps_per_second": 1.031,
95
+ "eval_wer": 0.9999093792478477,
96
  "step": 2100
97
  },
98
  {
99
  "epoch": 2.19,
100
+ "eval_loss": 3.982806444168091,
101
+ "eval_runtime": 342.0917,
102
+ "eval_samples_per_second": 32.257,
103
+ "eval_steps_per_second": 1.009,
104
  "eval_wer": 1.0,
105
  "step": 2400
106
  },
107
  {
108
  "epoch": 2.28,
109
+ "learning_rate": 0.0037184548606393796,
110
+ "loss": 3.2853,
111
  "step": 2500
112
  },
113
  {
114
  "epoch": 2.47,
115
+ "eval_loss": 3.553187370300293,
116
+ "eval_runtime": 329.7168,
117
+ "eval_samples_per_second": 33.468,
118
+ "eval_steps_per_second": 1.046,
119
  "eval_wer": 1.0,
120
  "step": 2700
121
  },
122
  {
123
  "epoch": 2.74,
124
+ "learning_rate": 0.004460358114084256,
125
+ "loss": 3.3389,
126
  "step": 3000
127
  },
128
  {
129
  "epoch": 2.74,
130
+ "eval_loss": 3.781858444213867,
131
+ "eval_runtime": 323.3731,
132
+ "eval_samples_per_second": 34.125,
133
+ "eval_steps_per_second": 1.067,
134
  "eval_wer": 1.0,
135
  "step": 3000
136
  },
137
  {
138
  "epoch": 3.01,
139
+ "eval_loss": 3.2249505519866943,
140
+ "eval_runtime": 323.988,
141
+ "eval_samples_per_second": 34.06,
142
+ "eval_steps_per_second": 1.065,
143
  "eval_wer": 1.0,
144
  "step": 3300
145
  },
146
  {
147
  "epoch": 3.2,
148
+ "learning_rate": 0.005205240898667465,
149
+ "loss": 3.2186,
150
  "step": 3500
151
  },
152
  {
153
  "epoch": 3.29,
154
+ "eval_loss": 3.2372846603393555,
155
+ "eval_runtime": 324.034,
156
+ "eval_samples_per_second": 34.055,
157
+ "eval_steps_per_second": 1.065,
158
  "eval_wer": 1.0,
159
  "step": 3600
160
  },
161
  {
162
  "epoch": 3.56,
163
+ "eval_loss": 3.2161905765533447,
164
+ "eval_runtime": 323.7754,
165
+ "eval_samples_per_second": 34.082,
166
+ "eval_steps_per_second": 1.066,
167
  "eval_wer": 1.0,
168
  "step": 3900
169
  },
170
  {
171
  "epoch": 3.65,
172
+ "learning_rate": 0.005950123683250674,
173
+ "loss": 3.1916,
174
  "step": 4000
175
  },
176
  {
177
  "epoch": 3.84,
178
+ "eval_loss": 3.2367777824401855,
179
+ "eval_runtime": 323.3817,
180
+ "eval_samples_per_second": 34.124,
181
+ "eval_steps_per_second": 1.067,
182
  "eval_wer": 1.0,
183
  "step": 4200
184
  },
185
  {
186
  "epoch": 4.11,
187
+ "learning_rate": 0.0066950064678338835,
188
+ "loss": 3.2188,
189
  "step": 4500
190
  },
191
  {
192
  "epoch": 4.11,
193
+ "eval_loss": 3.2376551628112793,
194
+ "eval_runtime": 323.9231,
195
+ "eval_samples_per_second": 34.067,
196
+ "eval_steps_per_second": 1.065,
197
  "eval_wer": 1.0,
198
  "step": 4500
199
  },
200
  {
201
  "epoch": 4.38,
202
+ "eval_loss": 3.4206786155700684,
203
+ "eval_runtime": 324.1643,
204
+ "eval_samples_per_second": 34.041,
205
+ "eval_steps_per_second": 1.064,
206
+ "eval_wer": 1.0,
207
  "step": 4800
208
  },
209
  {
210
  "epoch": 4.57,
211
+ "learning_rate": 0.007152364497567974,
212
+ "loss": 5.3067,
213
  "step": 5000
214
  },
215
  {
216
  "epoch": 4.66,
217
+ "eval_loss": NaN,
218
+ "eval_runtime": 324.7836,
219
+ "eval_samples_per_second": 33.976,
220
+ "eval_steps_per_second": 1.062,
221
+ "eval_wer": 1.0,
222
  "step": 5100
223
  },
224
  {
225
  "epoch": 4.93,
226
+ "eval_loss": NaN,
227
+ "eval_runtime": 323.844,
228
+ "eval_samples_per_second": 34.075,
229
+ "eval_steps_per_second": 1.065,
230
+ "eval_wer": 1.0,
231
  "step": 5400
232
  },
233
  {
234
  "epoch": 5.02,
235
+ "learning_rate": 0.007152364497567974,
236
+ "loss": 0.0,
237
  "step": 5500
238
  },
239
  {
240
  "epoch": 5.21,
241
+ "eval_loss": NaN,
242
+ "eval_runtime": 323.0895,
243
+ "eval_samples_per_second": 34.155,
244
+ "eval_steps_per_second": 1.068,
245
  "eval_wer": 1.0,
246
  "step": 5700
247
  },
248
  {
249
  "epoch": 5.48,
250
+ "learning_rate": 0.007152364497567974,
251
+ "loss": 0.0,
252
  "step": 6000
253
  },
254
  {
255
  "epoch": 5.48,
256
+ "eval_loss": NaN,
257
+ "eval_runtime": 323.9944,
258
+ "eval_samples_per_second": 34.059,
259
+ "eval_steps_per_second": 1.065,
260
  "eval_wer": 1.0,
261
  "step": 6000
262
  },
263
  {
264
  "epoch": 5.75,
265
+ "eval_loss": NaN,
266
+ "eval_runtime": 324.3089,
267
+ "eval_samples_per_second": 34.026,
268
+ "eval_steps_per_second": 1.064,
269
  "eval_wer": 1.0,
270
  "step": 6300
271
  },
272
  {
273
  "epoch": 5.94,
274
+ "learning_rate": 0.007152364497567974,
275
+ "loss": 0.0,
276
  "step": 6500
277
  },
278
  {
279
  "epoch": 6.03,
280
+ "eval_loss": NaN,
281
+ "eval_runtime": 323.6949,
282
+ "eval_samples_per_second": 34.091,
283
+ "eval_steps_per_second": 1.066,
284
  "eval_wer": 1.0,
285
  "step": 6600
286
  },
287
  {
288
  "epoch": 6.3,
289
+ "eval_loss": NaN,
290
+ "eval_runtime": 323.4351,
291
+ "eval_samples_per_second": 34.118,
292
+ "eval_steps_per_second": 1.067,
293
  "eval_wer": 1.0,
294
  "step": 6900
295
  },
296
  {
297
  "epoch": 6.39,
298
+ "learning_rate": 0.007152364497567974,
299
+ "loss": 0.0,
300
  "step": 7000
301
  },
302
  {
303
  "epoch": 6.58,
304
+ "eval_loss": NaN,
305
+ "eval_runtime": 323.8786,
306
+ "eval_samples_per_second": 34.071,
307
+ "eval_steps_per_second": 1.065,
308
  "eval_wer": 1.0,
309
  "step": 7200
310
  },
311
  {
312
  "epoch": 6.85,
313
+ "learning_rate": 0.007152364497567974,
314
+ "loss": 0.0,
315
  "step": 7500
316
  },
317
  {
318
  "epoch": 6.85,
319
+ "eval_loss": NaN,
320
+ "eval_runtime": 323.8341,
321
+ "eval_samples_per_second": 34.076,
322
+ "eval_steps_per_second": 1.065,
323
  "eval_wer": 1.0,
324
  "step": 7500
325
  },
326
  {
327
  "epoch": 7.12,
328
+ "eval_loss": NaN,
329
+ "eval_runtime": 323.3556,
330
+ "eval_samples_per_second": 34.127,
331
+ "eval_steps_per_second": 1.067,
332
  "eval_wer": 1.0,
333
  "step": 7800
334
  },
335
  {
336
  "epoch": 7.31,
337
+ "learning_rate": 0.007152364497567974,
338
+ "loss": 0.0,
339
  "step": 8000
340
  },
341
  {
342
  "epoch": 7.4,
343
+ "eval_loss": NaN,
344
+ "eval_runtime": 323.4333,
345
+ "eval_samples_per_second": 34.118,
346
+ "eval_steps_per_second": 1.067,
347
  "eval_wer": 1.0,
348
  "step": 8100
349
  },
350
  {
351
  "epoch": 7.67,
352
+ "eval_loss": NaN,
353
+ "eval_runtime": 323.7546,
354
+ "eval_samples_per_second": 34.084,
355
+ "eval_steps_per_second": 1.066,
356
  "eval_wer": 1.0,
357
  "step": 8400
358
  },
359
  {
360
  "epoch": 7.76,
361
+ "learning_rate": 0.007152364497567974,
362
+ "loss": 0.0,
363
  "step": 8500
364
  },
365
  {
366
  "epoch": 7.95,
367
+ "eval_loss": NaN,
368
+ "eval_runtime": 323.2121,
369
+ "eval_samples_per_second": 34.142,
370
+ "eval_steps_per_second": 1.067,
371
  "eval_wer": 1.0,
372
  "step": 8700
373
  },
374
  {
375
  "epoch": 8.22,
376
+ "learning_rate": 0.007152364497567974,
377
+ "loss": 0.0,
378
  "step": 9000
379
  },
380
  {
381
  "epoch": 8.22,
382
+ "eval_loss": NaN,
383
+ "eval_runtime": 322.4572,
384
+ "eval_samples_per_second": 34.222,
385
+ "eval_steps_per_second": 1.07,
386
  "eval_wer": 1.0,
387
  "step": 9000
388
  },
389
  {
390
  "epoch": 8.49,
391
+ "eval_loss": NaN,
392
+ "eval_runtime": 318.5677,
393
+ "eval_samples_per_second": 34.639,
394
+ "eval_steps_per_second": 1.083,
395
  "eval_wer": 1.0,
396
  "step": 9300
397
  },
398
  {
399
  "epoch": 8.68,
400
+ "learning_rate": 0.007152364497567974,
401
+ "loss": 0.0,
402
  "step": 9500
403
  },
404
  {
405
  "epoch": 8.77,
406
+ "eval_loss": NaN,
407
+ "eval_runtime": 319.654,
408
+ "eval_samples_per_second": 34.522,
409
+ "eval_steps_per_second": 1.079,
410
  "eval_wer": 1.0,
411
  "step": 9600
412
  },
413
  {
414
  "epoch": 9.04,
415
+ "eval_loss": NaN,
416
+ "eval_runtime": 320.9193,
417
+ "eval_samples_per_second": 34.386,
418
+ "eval_steps_per_second": 1.075,
419
  "eval_wer": 1.0,
420
  "step": 9900
421
  },
422
  {
423
  "epoch": 9.13,
424
+ "learning_rate": 0.007152364497567974,
425
+ "loss": 0.0,
426
  "step": 10000
427
  },
428
  {
429
  "epoch": 9.32,
430
+ "eval_loss": NaN,
431
+ "eval_runtime": 322.8537,
432
+ "eval_samples_per_second": 34.18,
433
+ "eval_steps_per_second": 1.069,
434
  "eval_wer": 1.0,
435
  "step": 10200
436
  },
437
  {
438
  "epoch": 9.59,
439
+ "learning_rate": 0.007152364497567974,
440
+ "loss": 0.0,
441
  "step": 10500
442
  },
443
  {
444
  "epoch": 9.59,
445
+ "eval_loss": NaN,
446
+ "eval_runtime": 321.9757,
447
+ "eval_samples_per_second": 34.273,
448
+ "eval_steps_per_second": 1.072,
449
  "eval_wer": 1.0,
450
  "step": 10500
451
  },
452
  {
453
  "epoch": 9.86,
454
+ "eval_loss": NaN,
455
+ "eval_runtime": 323.2439,
456
+ "eval_samples_per_second": 34.138,
457
+ "eval_steps_per_second": 1.067,
458
  "eval_wer": 1.0,
459
  "step": 10800
460
  },
461
  {
462
  "epoch": 10.05,
463
+ "learning_rate": 0.007152364497567974,
464
+ "loss": 0.0,
465
  "step": 11000
466
  },
467
  {
468
  "epoch": 10.14,
469
+ "eval_loss": NaN,
470
+ "eval_runtime": 323.9537,
471
+ "eval_samples_per_second": 34.064,
472
+ "eval_steps_per_second": 1.065,
473
  "eval_wer": 1.0,
474
  "step": 11100
475
  },
476
  {
477
  "epoch": 10.41,
478
+ "eval_loss": NaN,
479
+ "eval_runtime": 323.8438,
480
+ "eval_samples_per_second": 34.075,
481
+ "eval_steps_per_second": 1.065,
482
  "eval_wer": 1.0,
483
  "step": 11400
484
  },
485
  {
486
  "epoch": 10.5,
487
+ "learning_rate": 0.007152364497567974,
488
+ "loss": 0.0,
489
  "step": 11500
490
  },
491
  {
492
  "epoch": 10.68,
493
+ "eval_loss": NaN,
494
+ "eval_runtime": 323.9188,
495
+ "eval_samples_per_second": 34.067,
496
+ "eval_steps_per_second": 1.065,
497
  "eval_wer": 1.0,
498
  "step": 11700
499
  },
500
  {
501
  "epoch": 10.96,
502
+ "learning_rate": 0.007152364497567974,
503
+ "loss": 0.0,
504
  "step": 12000
505
  },
506
  {
507
  "epoch": 10.96,
508
+ "eval_loss": NaN,
509
+ "eval_runtime": 323.8826,
510
+ "eval_samples_per_second": 34.071,
511
+ "eval_steps_per_second": 1.065,
512
  "eval_wer": 1.0,
513
  "step": 12000
514
  },
515
  {
516
  "epoch": 11.23,
517
+ "eval_loss": NaN,
518
+ "eval_runtime": 325.2969,
519
+ "eval_samples_per_second": 33.923,
520
+ "eval_steps_per_second": 1.061,
521
  "eval_wer": 1.0,
522
  "step": 12300
523
  },
524
  {
525
  "epoch": 11.42,
526
+ "learning_rate": 0.007152364497567974,
527
+ "loss": 0.0,
528
  "step": 12500
529
  },
530
  {
531
  "epoch": 11.51,
532
+ "eval_loss": NaN,
533
+ "eval_runtime": 324.7333,
534
+ "eval_samples_per_second": 33.982,
535
+ "eval_steps_per_second": 1.062,
536
  "eval_wer": 1.0,
537
  "step": 12600
538
  },
539
  {
540
  "epoch": 11.78,
541
+ "eval_loss": NaN,
542
+ "eval_runtime": 324.3204,
543
+ "eval_samples_per_second": 34.025,
544
+ "eval_steps_per_second": 1.064,
545
  "eval_wer": 1.0,
546
  "step": 12900
547
  },
548
  {
549
  "epoch": 11.87,
550
+ "learning_rate": 0.007152364497567974,
551
+ "loss": 0.0,
552
  "step": 13000
553
  },
554
  {
555
  "epoch": 12.05,
556
+ "eval_loss": NaN,
557
+ "eval_runtime": 324.8823,
558
+ "eval_samples_per_second": 33.966,
559
+ "eval_steps_per_second": 1.062,
560
  "eval_wer": 1.0,
561
  "step": 13200
562
  },
563
  {
564
  "epoch": 12.33,
565
+ "learning_rate": 0.007152364497567974,
566
+ "loss": 0.0,
567
  "step": 13500
568
  },
569
  {
570
  "epoch": 12.33,
571
+ "eval_loss": NaN,
572
+ "eval_runtime": 323.9396,
573
+ "eval_samples_per_second": 34.065,
574
+ "eval_steps_per_second": 1.065,
575
  "eval_wer": 1.0,
576
  "step": 13500
577
  },
578
  {
579
  "epoch": 12.6,
580
+ "eval_loss": NaN,
581
+ "eval_runtime": 326.2533,
582
+ "eval_samples_per_second": 33.823,
583
+ "eval_steps_per_second": 1.057,
584
  "eval_wer": 1.0,
585
  "step": 13800
586
  },
587
  {
588
  "epoch": 12.79,
589
+ "learning_rate": 0.007152364497567974,
590
+ "loss": 0.0,
591
  "step": 14000
592
  },
593
  {
594
  "epoch": 12.88,
595
+ "eval_loss": NaN,
596
+ "eval_runtime": 323.7094,
597
+ "eval_samples_per_second": 34.089,
598
+ "eval_steps_per_second": 1.066,
599
  "eval_wer": 1.0,
600
  "step": 14100
601
  },
602
  {
603
  "epoch": 13.15,
604
+ "eval_loss": NaN,
605
+ "eval_runtime": 325.3499,
606
+ "eval_samples_per_second": 33.917,
607
+ "eval_steps_per_second": 1.06,
608
  "eval_wer": 1.0,
609
  "step": 14400
610
  },
611
  {
612
  "epoch": 13.24,
613
+ "learning_rate": 0.007152364497567974,
614
+ "loss": 0.0,
615
  "step": 14500
616
  },
617
  {
618
  "epoch": 13.42,
619
+ "eval_loss": NaN,
620
+ "eval_runtime": 325.7039,
621
+ "eval_samples_per_second": 33.88,
622
+ "eval_steps_per_second": 1.059,
623
  "eval_wer": 1.0,
624
  "step": 14700
625
  },
626
  {
627
  "epoch": 13.7,
628
+ "learning_rate": 0.007152364497567974,
629
+ "loss": 0.0,
630
  "step": 15000
631
  },
632
  {
633
  "epoch": 13.7,
634
+ "eval_loss": NaN,
635
+ "eval_runtime": 323.516,
636
+ "eval_samples_per_second": 34.11,
637
+ "eval_steps_per_second": 1.066,
638
  "eval_wer": 1.0,
639
  "step": 15000
640
  },
641
  {
642
  "epoch": 13.97,
643
+ "eval_loss": NaN,
644
+ "eval_runtime": 471.9655,
645
+ "eval_samples_per_second": 23.381,
646
+ "eval_steps_per_second": 0.731,
647
  "eval_wer": 1.0,
648
  "step": 15300
649
  },
650
  {
651
  "epoch": 14.16,
652
+ "learning_rate": 0.007152364497567974,
653
+ "loss": 0.0,
654
  "step": 15500
655
  },
656
  {
657
  "epoch": 14.25,
658
+ "eval_loss": NaN,
659
+ "eval_runtime": 324.2368,
660
+ "eval_samples_per_second": 34.034,
661
+ "eval_steps_per_second": 1.064,
662
  "eval_wer": 1.0,
663
  "step": 15600
664
  },
665
  {
666
  "epoch": 14.52,
667
+ "eval_loss": NaN,
668
+ "eval_runtime": 325.6971,
669
+ "eval_samples_per_second": 33.881,
670
+ "eval_steps_per_second": 1.059,
671
  "eval_wer": 1.0,
672
  "step": 15900
673
  },
674
  {
675
  "epoch": 14.61,
676
+ "learning_rate": 0.007152364497567974,
677
+ "loss": 0.0,
678
  "step": 16000
679
  },
680
  {
681
  "epoch": 14.79,
682
+ "eval_loss": NaN,
683
+ "eval_runtime": 325.4219,
684
+ "eval_samples_per_second": 33.91,
685
+ "eval_steps_per_second": 1.06,
686
  "eval_wer": 1.0,
687
  "step": 16200
688
  },
 
690
  "epoch": 15.0,
691
  "step": 16425,
692
  "total_flos": 6.442470243808035e+19,
693
+ "train_loss": 1.129231213434646,
694
+ "train_runtime": 45647.7738,
695
+ "train_samples_per_second": 14.392,
696
+ "train_steps_per_second": 0.36
697
  }
698
  ],
699
  "logging_steps": 500,