patrickvonplaten commited on
Commit
53c1de1
1 Parent(s): 053e866

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 1.3688374757766724,
4
- "eval_runtime": 49.4666,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 33.962,
7
- "eval_steps_per_second": 33.962,
8
- "eval_wer": 0.6817586658397078,
9
- "train_loss": 0.938711966728342,
10
- "train_runtime": 4007.4157,
11
  "train_samples": 4620,
12
- "train_samples_per_second": 23.057,
13
- "train_steps_per_second": 0.724
14
  }
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 1.3600618839263916,
4
+ "eval_runtime": 50.525,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 33.251,
7
+ "eval_steps_per_second": 33.251,
8
+ "eval_wer": 0.6776238715457239,
9
+ "train_loss": 0.9487250096427984,
10
+ "train_runtime": 4020.0745,
11
  "train_samples": 4620,
12
+ "train_samples_per_second": 22.985,
13
+ "train_steps_per_second": 0.721
14
  }
emissions.csv CHANGED
@@ -1,2 +1,3 @@
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2021-10-27T10:35:59,1041b9e5-8f0f-43d4-9424-cc7eff53732e,codecarbon,4004.539319753647,0.009920645469192451,0.04713434641766139,United States,USA,new york,N,,
 
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2021-10-27T10:35:59,1041b9e5-8f0f-43d4-9424-cc7eff53732e,codecarbon,4004.539319753647,0.009920645469192451,0.04713434641766139,United States,USA,new york,N,,
3
+ 2021-10-28T00:30:31,912953b8-a980-4cab-8157-37666b2dbc9d,codecarbon,4017.2396454811096,0.00995231552467445,0.04728481519217719,United States,USA,new york,N,,
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 1.3688374757766724,
4
- "eval_runtime": 49.4666,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 33.962,
7
- "eval_steps_per_second": 33.962,
8
- "eval_wer": 0.6817586658397078
9
  }
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 1.3600618839263916,
4
+ "eval_runtime": 50.525,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 33.251,
7
+ "eval_steps_per_second": 33.251,
8
+ "eval_wer": 0.6776238715457239
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a64bd40598afe47c4bfb209c83bbb9f3ee40d64fd5fc5a0107161c71a11af0d
3
  size 94084922
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7441e7588c191aa050f6e17e77c7bb12b02d18099677a48d44f1a501f35d21a
3
  size 94084922
runs/Oct27_23-22-06_brutasse/events.out.tfevents.1635377008.brutasse.8534.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3bf58c7a1823434d50f8eabd0e7d5cdef53fd54ffd603c5a3cc418ed9e5c775
3
- size 56769
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b9385ab7e8da796d15482d78b20a5b4bee103a2a0226a4a9d1968fc8b9f866
3
+ size 59011
runs/Oct27_23-22-06_brutasse/events.out.tfevents.1635381083.brutasse.8534.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed901ec653638b6ed5f1d206f6d8f48ba63ac816478bd6ede38c4ba6212555cd
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.938711966728342,
4
- "train_runtime": 4007.4157,
5
  "train_samples": 4620,
6
- "train_samples_per_second": 23.057,
7
- "train_steps_per_second": 0.724
8
  }
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.9487250096427984,
4
+ "train_runtime": 4020.0745,
5
  "train_samples": 4620,
6
+ "train_samples_per_second": 22.985,
7
+ "train_steps_per_second": 0.721
8
  }
trainer_state.json CHANGED
@@ -10,2017 +10,2017 @@
10
  {
11
  "epoch": 0.07,
12
  "learning_rate": 8.000000000000001e-07,
13
- "loss": 9.653,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.14,
18
  "learning_rate": 1.8e-06,
19
- "loss": 8.4639,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.21,
24
  "learning_rate": 2.8000000000000003e-06,
25
- "loss": 7.9446,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.28,
30
  "learning_rate": 3.7e-06,
31
- "loss": 8.6256,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.34,
36
  "learning_rate": 4.7e-06,
37
- "loss": 8.0756,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 5.7000000000000005e-06,
43
- "loss": 7.4432,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.48,
48
  "learning_rate": 6.700000000000001e-06,
49
- "loss": 6.5524,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.55,
54
  "learning_rate": 7.7e-06,
55
- "loss": 6.5869,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.62,
60
  "learning_rate": 8.7e-06,
61
- "loss": 5.2277,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.69,
66
  "learning_rate": 9.7e-06,
67
- "loss": 4.2247,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.69,
72
- "eval_loss": 3.8606975078582764,
73
- "eval_runtime": 48.7411,
74
- "eval_samples_per_second": 34.468,
75
- "eval_steps_per_second": 34.468,
76
  "eval_wer": 1.0,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.76,
81
  "learning_rate": 1.0700000000000001e-05,
82
- "loss": 3.6454,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.83,
87
  "learning_rate": 1.1700000000000001e-05,
88
- "loss": 3.3306,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.9,
93
  "learning_rate": 1.27e-05,
94
- "loss": 3.1763,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.97,
99
  "learning_rate": 1.3700000000000001e-05,
100
- "loss": 3.0708,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.03,
105
  "learning_rate": 1.47e-05,
106
- "loss": 3.0836,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.1,
111
  "learning_rate": 1.5700000000000002e-05,
112
- "loss": 2.999,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.17,
117
  "learning_rate": 1.6700000000000003e-05,
118
- "loss": 2.9717,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.24,
123
  "learning_rate": 1.77e-05,
124
- "loss": 2.9846,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.31,
129
  "learning_rate": 1.87e-05,
130
- "loss": 2.9912,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.38,
135
  "learning_rate": 1.97e-05,
136
- "loss": 2.9444,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.38,
141
- "eval_loss": 2.950913667678833,
142
- "eval_runtime": 49.1765,
143
- "eval_samples_per_second": 34.163,
144
- "eval_steps_per_second": 34.163,
145
  "eval_wer": 1.0,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 1.45,
150
  "learning_rate": 2.07e-05,
151
- "loss": 2.9386,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 1.52,
156
  "learning_rate": 2.1700000000000002e-05,
157
- "loss": 2.9601,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 1.59,
162
  "learning_rate": 2.2700000000000003e-05,
163
- "loss": 2.9395,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 1.66,
168
  "learning_rate": 2.37e-05,
169
- "loss": 2.9159,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 1.72,
174
  "learning_rate": 2.47e-05,
175
- "loss": 2.9124,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 1.79,
180
  "learning_rate": 2.57e-05,
181
- "loss": 2.9412,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 1.86,
186
  "learning_rate": 2.6700000000000002e-05,
187
- "loss": 2.8826,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 1.93,
192
  "learning_rate": 2.7700000000000002e-05,
193
- "loss": 2.8915,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 2.0,
198
  "learning_rate": 2.87e-05,
199
- "loss": 2.9096,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 2.07,
204
  "learning_rate": 2.97e-05,
205
- "loss": 2.8858,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 2.07,
210
- "eval_loss": 2.844599723815918,
211
- "eval_runtime": 49.0987,
212
- "eval_samples_per_second": 34.217,
213
- "eval_steps_per_second": 34.217,
214
  "eval_wer": 1.0,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 2.14,
219
  "learning_rate": 3.07e-05,
220
- "loss": 2.8025,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 2.21,
225
  "learning_rate": 3.1700000000000005e-05,
226
- "loss": 2.7678,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 2.28,
231
  "learning_rate": 3.27e-05,
232
- "loss": 2.7655,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 2.34,
237
  "learning_rate": 3.3700000000000006e-05,
238
- "loss": 2.6538,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 2.41,
243
  "learning_rate": 3.4699999999999996e-05,
244
- "loss": 2.5774,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 2.48,
249
  "learning_rate": 3.57e-05,
250
- "loss": 2.5132,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 2.55,
255
  "learning_rate": 3.6700000000000004e-05,
256
- "loss": 2.4977,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 2.62,
261
  "learning_rate": 3.77e-05,
262
- "loss": 2.3494,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 2.69,
267
  "learning_rate": 3.8700000000000006e-05,
268
- "loss": 2.2912,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 2.76,
273
  "learning_rate": 3.97e-05,
274
- "loss": 2.2804,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 2.76,
279
- "eval_loss": 2.1984939575195312,
280
- "eval_runtime": 49.2048,
281
- "eval_samples_per_second": 34.143,
282
- "eval_steps_per_second": 34.143,
283
- "eval_wer": 1.0013782647646612,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 2.83,
288
  "learning_rate": 4.07e-05,
289
- "loss": 2.1559,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 2.9,
294
  "learning_rate": 4.17e-05,
295
- "loss": 2.013,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 2.97,
300
  "learning_rate": 4.27e-05,
301
- "loss": 1.9527,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 3.03,
306
  "learning_rate": 4.3700000000000005e-05,
307
- "loss": 1.9805,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 3.1,
312
  "learning_rate": 4.47e-05,
313
- "loss": 1.7592,
314
  "step": 450
315
  },
316
  {
317
  "epoch": 3.17,
318
  "learning_rate": 4.5700000000000006e-05,
319
- "loss": 1.6803,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 3.24,
324
  "learning_rate": 4.6700000000000003e-05,
325
- "loss": 1.674,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 3.31,
330
  "learning_rate": 4.77e-05,
331
- "loss": 1.6908,
332
  "step": 480
333
  },
334
  {
335
  "epoch": 3.38,
336
  "learning_rate": 4.87e-05,
337
- "loss": 1.4964,
338
  "step": 490
339
  },
340
  {
341
  "epoch": 3.45,
342
  "learning_rate": 4.97e-05,
343
- "loss": 1.505,
344
  "step": 500
345
  },
346
  {
347
  "epoch": 3.45,
348
- "eval_loss": 1.4971957206726074,
349
- "eval_runtime": 49.3444,
350
- "eval_samples_per_second": 34.046,
351
- "eval_steps_per_second": 34.046,
352
- "eval_wer": 0.9609261939218524,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 3.52,
357
  "learning_rate": 5.0700000000000006e-05,
358
- "loss": 1.5943,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 3.59,
363
  "learning_rate": 5.17e-05,
364
- "loss": 1.3954,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 3.66,
369
  "learning_rate": 5.270000000000001e-05,
370
- "loss": 1.3033,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 3.72,
375
  "learning_rate": 5.3700000000000004e-05,
376
- "loss": 1.4023,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 3.79,
381
  "learning_rate": 5.470000000000001e-05,
382
- "loss": 1.4554,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 3.86,
387
  "learning_rate": 5.5700000000000005e-05,
388
- "loss": 1.2369,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 3.93,
393
  "learning_rate": 5.6699999999999996e-05,
394
- "loss": 1.2277,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 4.0,
399
  "learning_rate": 5.77e-05,
400
- "loss": 1.3168,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 4.07,
405
  "learning_rate": 5.87e-05,
406
- "loss": 1.2854,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 4.14,
411
  "learning_rate": 5.97e-05,
412
- "loss": 1.06,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 4.14,
417
- "eval_loss": 1.2013553380966187,
418
- "eval_runtime": 49.3577,
419
- "eval_samples_per_second": 34.037,
420
- "eval_steps_per_second": 34.037,
421
- "eval_wer": 0.805802494659224,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 4.21,
426
  "learning_rate": 6.07e-05,
427
- "loss": 1.1026,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 4.28,
432
  "learning_rate": 6.170000000000001e-05,
433
- "loss": 1.2475,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 4.34,
438
  "learning_rate": 6.27e-05,
439
- "loss": 1.0605,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 4.41,
444
  "learning_rate": 6.37e-05,
445
- "loss": 1.0361,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 4.48,
450
  "learning_rate": 6.47e-05,
451
- "loss": 1.1079,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 4.55,
456
  "learning_rate": 6.570000000000001e-05,
457
- "loss": 1.2017,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 4.62,
462
  "learning_rate": 6.670000000000001e-05,
463
- "loss": 0.9803,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 4.69,
468
  "learning_rate": 6.77e-05,
469
- "loss": 1.0294,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 4.76,
474
  "learning_rate": 6.87e-05,
475
- "loss": 1.1393,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 4.83,
480
  "learning_rate": 6.97e-05,
481
- "loss": 1.0166,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 4.83,
486
- "eval_loss": 1.0605226755142212,
487
- "eval_runtime": 49.5346,
488
- "eval_samples_per_second": 33.916,
489
- "eval_steps_per_second": 33.916,
490
- "eval_wer": 0.7535662600785611,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 4.9,
495
  "learning_rate": 7.07e-05,
496
- "loss": 0.8619,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 4.97,
501
  "learning_rate": 7.17e-05,
502
- "loss": 0.9549,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 5.03,
507
  "learning_rate": 7.27e-05,
508
- "loss": 1.1162,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 5.1,
513
  "learning_rate": 7.37e-05,
514
- "loss": 0.8634,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 5.17,
519
  "learning_rate": 7.47e-05,
520
- "loss": 0.8374,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 5.24,
525
  "learning_rate": 7.570000000000001e-05,
526
- "loss": 0.9316,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 5.31,
531
  "learning_rate": 7.670000000000001e-05,
532
- "loss": 0.9827,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 5.38,
537
  "learning_rate": 7.77e-05,
538
- "loss": 0.7866,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 5.45,
543
  "learning_rate": 7.87e-05,
544
- "loss": 0.816,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 5.52,
549
  "learning_rate": 7.970000000000001e-05,
550
- "loss": 0.966,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 5.52,
555
- "eval_loss": 0.9963059425354004,
556
- "eval_runtime": 49.4475,
557
- "eval_samples_per_second": 33.975,
558
- "eval_steps_per_second": 33.975,
559
- "eval_wer": 0.7100820067534973,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 5.59,
564
  "learning_rate": 8.070000000000001e-05,
565
- "loss": 0.8691,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 5.66,
570
  "learning_rate": 8.17e-05,
571
- "loss": 0.748,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 5.72,
576
  "learning_rate": 8.27e-05,
577
- "loss": 0.8441,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 5.79,
582
  "learning_rate": 8.37e-05,
583
- "loss": 1.0217,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 5.86,
588
  "learning_rate": 8.47e-05,
589
- "loss": 0.7328,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 5.93,
594
  "learning_rate": 8.57e-05,
595
- "loss": 0.7625,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 6.0,
600
  "learning_rate": 8.67e-05,
601
- "loss": 0.883,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 6.07,
606
  "learning_rate": 8.77e-05,
607
- "loss": 0.8376,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 6.14,
612
  "learning_rate": 8.87e-05,
613
- "loss": 0.6518,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 6.21,
618
  "learning_rate": 8.970000000000001e-05,
619
- "loss": 0.6857,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 6.21,
624
- "eval_loss": 0.9442995190620422,
625
- "eval_runtime": 49.4918,
626
- "eval_samples_per_second": 33.945,
627
- "eval_steps_per_second": 33.945,
628
- "eval_wer": 0.6898215147129764,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 6.28,
633
  "learning_rate": 9.070000000000001e-05,
634
- "loss": 0.8764,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 6.34,
639
  "learning_rate": 9.17e-05,
640
- "loss": 0.7431,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 6.41,
645
  "learning_rate": 9.27e-05,
646
- "loss": 0.6688,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 6.48,
651
  "learning_rate": 9.370000000000001e-05,
652
- "loss": 0.7162,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 6.55,
657
  "learning_rate": 9.47e-05,
658
- "loss": 0.8465,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 6.62,
663
  "learning_rate": 9.57e-05,
664
- "loss": 0.6191,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 6.69,
669
  "learning_rate": 9.67e-05,
670
- "loss": 0.6149,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 6.76,
675
  "learning_rate": 9.77e-05,
676
- "loss": 0.8114,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 6.83,
681
  "learning_rate": 9.87e-05,
682
- "loss": 0.7832,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 6.9,
687
  "learning_rate": 9.970000000000001e-05,
688
- "loss": 0.5859,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 6.9,
693
- "eval_loss": 0.9042919278144836,
694
- "eval_runtime": 49.5927,
695
- "eval_samples_per_second": 33.876,
696
- "eval_steps_per_second": 33.876,
697
- "eval_wer": 0.6795534422162497,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 6.97,
702
  "learning_rate": 9.963157894736843e-05,
703
- "loss": 0.6366,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 7.03,
708
  "learning_rate": 9.910526315789475e-05,
709
- "loss": 0.8343,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 7.1,
714
  "learning_rate": 9.857894736842106e-05,
715
- "loss": 0.5977,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 7.17,
720
  "learning_rate": 9.805263157894737e-05,
721
- "loss": 0.5143,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 7.24,
726
  "learning_rate": 9.752631578947369e-05,
727
- "loss": 0.5725,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 7.31,
732
  "learning_rate": 9.7e-05,
733
- "loss": 0.7447,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 7.38,
738
  "learning_rate": 9.647368421052631e-05,
739
- "loss": 0.4958,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 7.45,
744
  "learning_rate": 9.594736842105264e-05,
745
- "loss": 0.5129,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 7.52,
750
  "learning_rate": 9.542105263157895e-05,
751
- "loss": 0.6971,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 7.59,
756
  "learning_rate": 9.489473684210527e-05,
757
- "loss": 0.6812,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 7.59,
762
- "eval_loss": 0.9094838500022888,
763
- "eval_runtime": 49.4365,
764
- "eval_samples_per_second": 33.983,
765
- "eval_steps_per_second": 33.983,
766
- "eval_wer": 0.6715595065812142,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 7.66,
771
  "learning_rate": 9.436842105263158e-05,
772
- "loss": 0.4968,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 7.72,
777
  "learning_rate": 9.384210526315789e-05,
778
- "loss": 0.5241,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 7.79,
783
  "learning_rate": 9.331578947368422e-05,
784
- "loss": 0.7348,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 7.86,
789
  "learning_rate": 9.278947368421053e-05,
790
- "loss": 0.5148,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 7.93,
795
  "learning_rate": 9.226315789473686e-05,
796
- "loss": 0.518,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 8.0,
801
  "learning_rate": 9.173684210526317e-05,
802
- "loss": 0.6009,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 8.07,
807
  "learning_rate": 9.121052631578948e-05,
808
- "loss": 0.6677,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 8.14,
813
  "learning_rate": 9.06842105263158e-05,
814
- "loss": 0.4171,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 8.21,
819
  "learning_rate": 9.015789473684211e-05,
820
- "loss": 0.4129,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 8.28,
825
  "learning_rate": 8.963157894736842e-05,
826
- "loss": 0.6088,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 8.28,
831
- "eval_loss": 0.9421910047531128,
832
- "eval_runtime": 50.1077,
833
- "eval_samples_per_second": 33.528,
834
- "eval_steps_per_second": 33.528,
835
- "eval_wer": 0.6677003652401626,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 8.34,
840
  "learning_rate": 8.910526315789474e-05,
841
- "loss": 0.4936,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 8.41,
846
  "learning_rate": 8.857894736842106e-05,
847
- "loss": 0.4166,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 8.48,
852
  "learning_rate": 8.805263157894737e-05,
853
- "loss": 0.4619,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 8.55,
858
  "learning_rate": 8.752631578947369e-05,
859
- "loss": 0.6438,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 8.62,
864
  "learning_rate": 8.7e-05,
865
- "loss": 0.4375,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 8.69,
870
  "learning_rate": 8.647368421052631e-05,
871
- "loss": 0.3933,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 8.76,
876
  "learning_rate": 8.594736842105263e-05,
877
- "loss": 0.5451,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 8.83,
882
  "learning_rate": 8.542105263157894e-05,
883
- "loss": 0.5696,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 8.9,
888
  "learning_rate": 8.489473684210527e-05,
889
- "loss": 0.3988,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 8.97,
894
  "learning_rate": 8.436842105263158e-05,
895
- "loss": 0.4162,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 8.97,
900
- "eval_loss": 0.9547951817512512,
901
- "eval_runtime": 49.7551,
902
- "eval_samples_per_second": 33.765,
903
- "eval_steps_per_second": 33.765,
904
- "eval_wer": 0.6657018813314037,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 9.03,
909
  "learning_rate": 8.38421052631579e-05,
910
- "loss": 0.581,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 9.1,
915
  "learning_rate": 8.331578947368422e-05,
916
- "loss": 0.3721,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 9.17,
921
  "learning_rate": 8.278947368421053e-05,
922
- "loss": 0.3251,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 9.24,
927
  "learning_rate": 8.226315789473684e-05,
928
- "loss": 0.3486,
929
  "step": 1340
930
  },
931
  {
932
  "epoch": 9.31,
933
  "learning_rate": 8.173684210526317e-05,
934
- "loss": 0.5894,
935
  "step": 1350
936
  },
937
  {
938
  "epoch": 9.38,
939
  "learning_rate": 8.121052631578948e-05,
940
- "loss": 0.3616,
941
  "step": 1360
942
  },
943
  {
944
  "epoch": 9.45,
945
  "learning_rate": 8.06842105263158e-05,
946
- "loss": 0.3189,
947
  "step": 1370
948
  },
949
  {
950
  "epoch": 9.52,
951
  "learning_rate": 8.015789473684211e-05,
952
- "loss": 0.4903,
953
  "step": 1380
954
  },
955
  {
956
  "epoch": 9.59,
957
  "learning_rate": 7.963157894736842e-05,
958
- "loss": 0.4564,
959
  "step": 1390
960
  },
961
  {
962
  "epoch": 9.66,
963
  "learning_rate": 7.910526315789474e-05,
964
- "loss": 0.3411,
965
  "step": 1400
966
  },
967
  {
968
  "epoch": 9.66,
969
- "eval_loss": 0.9900805950164795,
970
- "eval_runtime": 49.6853,
971
- "eval_samples_per_second": 33.813,
972
- "eval_steps_per_second": 33.813,
973
- "eval_wer": 0.6688718902901247,
974
  "step": 1400
975
  },
976
  {
977
  "epoch": 9.72,
978
  "learning_rate": 7.857894736842105e-05,
979
- "loss": 0.3415,
980
  "step": 1410
981
  },
982
  {
983
  "epoch": 9.79,
984
  "learning_rate": 7.805263157894738e-05,
985
- "loss": 0.5735,
986
  "step": 1420
987
  },
988
  {
989
  "epoch": 9.86,
990
  "learning_rate": 7.752631578947369e-05,
991
- "loss": 0.3583,
992
  "step": 1430
993
  },
994
  {
995
  "epoch": 9.93,
996
  "learning_rate": 7.7e-05,
997
- "loss": 0.2923,
998
  "step": 1440
999
  },
1000
  {
1001
  "epoch": 10.0,
1002
  "learning_rate": 7.647368421052631e-05,
1003
- "loss": 0.3396,
1004
  "step": 1450
1005
  },
1006
  {
1007
  "epoch": 10.07,
1008
  "learning_rate": 7.594736842105263e-05,
1009
- "loss": 0.493,
1010
  "step": 1460
1011
  },
1012
  {
1013
  "epoch": 10.14,
1014
  "learning_rate": 7.542105263157895e-05,
1015
- "loss": 0.2637,
1016
  "step": 1470
1017
  },
1018
  {
1019
  "epoch": 10.21,
1020
  "learning_rate": 7.489473684210527e-05,
1021
- "loss": 0.2342,
1022
  "step": 1480
1023
  },
1024
  {
1025
  "epoch": 10.28,
1026
  "learning_rate": 7.43684210526316e-05,
1027
- "loss": 0.4601,
1028
  "step": 1490
1029
  },
1030
  {
1031
  "epoch": 10.34,
1032
  "learning_rate": 7.38421052631579e-05,
1033
- "loss": 0.3323,
1034
  "step": 1500
1035
  },
1036
  {
1037
  "epoch": 10.34,
1038
- "eval_loss": 0.9995749592781067,
1039
- "eval_runtime": 49.7001,
1040
- "eval_samples_per_second": 33.803,
1041
- "eval_steps_per_second": 33.803,
1042
- "eval_wer": 0.6637723106608779,
1043
  "step": 1500
1044
  },
1045
  {
1046
  "epoch": 10.41,
1047
  "learning_rate": 7.331578947368422e-05,
1048
- "loss": 0.2467,
1049
  "step": 1510
1050
  },
1051
  {
1052
  "epoch": 10.48,
1053
  "learning_rate": 7.278947368421053e-05,
1054
- "loss": 0.2583,
1055
  "step": 1520
1056
  },
1057
  {
1058
  "epoch": 10.55,
1059
  "learning_rate": 7.226315789473685e-05,
1060
- "loss": 0.5144,
1061
  "step": 1530
1062
  },
1063
  {
1064
  "epoch": 10.62,
1065
  "learning_rate": 7.173684210526316e-05,
1066
- "loss": 0.2957,
1067
  "step": 1540
1068
  },
1069
  {
1070
  "epoch": 10.69,
1071
  "learning_rate": 7.121052631578947e-05,
1072
- "loss": 0.2613,
1073
  "step": 1550
1074
  },
1075
  {
1076
  "epoch": 10.76,
1077
  "learning_rate": 7.06842105263158e-05,
1078
- "loss": 0.3751,
1079
  "step": 1560
1080
  },
1081
  {
1082
  "epoch": 10.83,
1083
  "learning_rate": 7.015789473684211e-05,
1084
- "loss": 0.4468,
1085
  "step": 1570
1086
  },
1087
  {
1088
  "epoch": 10.9,
1089
  "learning_rate": 6.963157894736842e-05,
1090
- "loss": 0.2497,
1091
  "step": 1580
1092
  },
1093
  {
1094
  "epoch": 10.97,
1095
  "learning_rate": 6.910526315789474e-05,
1096
- "loss": 0.2478,
1097
  "step": 1590
1098
  },
1099
  {
1100
  "epoch": 11.03,
1101
  "learning_rate": 6.857894736842105e-05,
1102
- "loss": 0.431,
1103
  "step": 1600
1104
  },
1105
  {
1106
  "epoch": 11.03,
1107
- "eval_loss": 1.0520888566970825,
1108
- "eval_runtime": 49.5966,
1109
- "eval_samples_per_second": 33.873,
1110
- "eval_steps_per_second": 33.873,
1111
- "eval_wer": 0.6708014609606505,
1112
  "step": 1600
1113
  },
1114
  {
1115
  "epoch": 11.1,
1116
  "learning_rate": 6.805263157894736e-05,
1117
- "loss": 0.2673,
1118
  "step": 1610
1119
  },
1120
  {
1121
  "epoch": 11.17,
1122
  "learning_rate": 6.752631578947368e-05,
1123
- "loss": 0.1873,
1124
  "step": 1620
1125
  },
1126
  {
1127
  "epoch": 11.24,
1128
  "learning_rate": 6.7e-05,
1129
- "loss": 0.1937,
1130
  "step": 1630
1131
  },
1132
  {
1133
  "epoch": 11.31,
1134
  "learning_rate": 6.647368421052632e-05,
1135
- "loss": 0.48,
1136
  "step": 1640
1137
  },
1138
  {
1139
  "epoch": 11.38,
1140
  "learning_rate": 6.594736842105264e-05,
1141
- "loss": 0.2313,
1142
  "step": 1650
1143
  },
1144
  {
1145
  "epoch": 11.45,
1146
  "learning_rate": 6.542105263157895e-05,
1147
- "loss": 0.1876,
1148
  "step": 1660
1149
  },
1150
  {
1151
  "epoch": 11.52,
1152
  "learning_rate": 6.489473684210527e-05,
1153
- "loss": 0.3463,
1154
  "step": 1670
1155
  },
1156
  {
1157
  "epoch": 11.59,
1158
  "learning_rate": 6.436842105263158e-05,
1159
- "loss": 0.3318,
1160
  "step": 1680
1161
  },
1162
  {
1163
  "epoch": 11.66,
1164
  "learning_rate": 6.384210526315791e-05,
1165
- "loss": 0.1915,
1166
  "step": 1690
1167
  },
1168
  {
1169
  "epoch": 11.72,
1170
  "learning_rate": 6.331578947368422e-05,
1171
- "loss": 0.2029,
1172
  "step": 1700
1173
  },
1174
  {
1175
  "epoch": 11.72,
1176
- "eval_loss": 1.0945649147033691,
1177
- "eval_runtime": 49.6143,
1178
- "eval_samples_per_second": 33.861,
1179
- "eval_steps_per_second": 33.861,
1180
- "eval_wer": 0.6792777892633175,
1181
  "step": 1700
1182
  },
1183
  {
1184
  "epoch": 11.79,
1185
  "learning_rate": 6.278947368421053e-05,
1186
- "loss": 0.4609,
1187
  "step": 1710
1188
  },
1189
  {
1190
  "epoch": 11.86,
1191
  "learning_rate": 6.226315789473685e-05,
1192
- "loss": 0.2301,
1193
  "step": 1720
1194
  },
1195
  {
1196
  "epoch": 11.93,
1197
  "learning_rate": 6.173684210526316e-05,
1198
- "loss": 0.2117,
1199
  "step": 1730
1200
  },
1201
  {
1202
  "epoch": 12.0,
1203
  "learning_rate": 6.121052631578947e-05,
1204
- "loss": 0.2205,
1205
  "step": 1740
1206
  },
1207
  {
1208
  "epoch": 12.07,
1209
  "learning_rate": 6.0684210526315785e-05,
1210
- "loss": 0.396,
1211
  "step": 1750
1212
  },
1213
  {
1214
  "epoch": 12.14,
1215
  "learning_rate": 6.015789473684211e-05,
1216
- "loss": 0.1706,
1217
  "step": 1760
1218
  },
1219
  {
1220
  "epoch": 12.21,
1221
  "learning_rate": 5.9631578947368425e-05,
1222
- "loss": 0.1486,
1223
  "step": 1770
1224
  },
1225
  {
1226
  "epoch": 12.28,
1227
  "learning_rate": 5.9105263157894744e-05,
1228
- "loss": 0.3378,
1229
  "step": 1780
1230
  },
1231
  {
1232
  "epoch": 12.34,
1233
  "learning_rate": 5.857894736842106e-05,
1234
- "loss": 0.2552,
1235
  "step": 1790
1236
  },
1237
  {
1238
  "epoch": 12.41,
1239
  "learning_rate": 5.805263157894737e-05,
1240
- "loss": 0.1424,
1241
  "step": 1800
1242
  },
1243
  {
1244
  "epoch": 12.41,
1245
- "eval_loss": 1.1287890672683716,
1246
- "eval_runtime": 49.6023,
1247
- "eval_samples_per_second": 33.869,
1248
- "eval_steps_per_second": 33.869,
1249
- "eval_wer": 0.6712149403900489,
1250
  "step": 1800
1251
  },
1252
  {
1253
  "epoch": 12.48,
1254
  "learning_rate": 5.752631578947368e-05,
1255
- "loss": 0.1614,
1256
  "step": 1810
1257
  },
1258
  {
1259
  "epoch": 12.55,
1260
  "learning_rate": 5.6999999999999996e-05,
1261
- "loss": 0.3994,
1262
  "step": 1820
1263
  },
1264
  {
1265
  "epoch": 12.62,
1266
  "learning_rate": 5.647368421052632e-05,
1267
- "loss": 0.1802,
1268
  "step": 1830
1269
  },
1270
  {
1271
  "epoch": 12.69,
1272
  "learning_rate": 5.5947368421052636e-05,
1273
- "loss": 0.1493,
1274
  "step": 1840
1275
  },
1276
  {
1277
  "epoch": 12.76,
1278
  "learning_rate": 5.542105263157895e-05,
1279
- "loss": 0.2477,
1280
  "step": 1850
1281
  },
1282
  {
1283
  "epoch": 12.83,
1284
  "learning_rate": 5.489473684210527e-05,
1285
- "loss": 0.3343,
1286
  "step": 1860
1287
  },
1288
  {
1289
  "epoch": 12.9,
1290
  "learning_rate": 5.436842105263158e-05,
1291
- "loss": 0.1604,
1292
  "step": 1870
1293
  },
1294
  {
1295
  "epoch": 12.97,
1296
  "learning_rate": 5.3842105263157895e-05,
1297
- "loss": 0.1511,
1298
  "step": 1880
1299
  },
1300
  {
1301
  "epoch": 13.03,
1302
  "learning_rate": 5.331578947368421e-05,
1303
- "loss": 0.3277,
1304
  "step": 1890
1305
  },
1306
  {
1307
  "epoch": 13.1,
1308
  "learning_rate": 5.2789473684210534e-05,
1309
- "loss": 0.1922,
1310
  "step": 1900
1311
  },
1312
  {
1313
  "epoch": 13.1,
1314
- "eval_loss": 1.1456407308578491,
1315
- "eval_runtime": 49.7045,
1316
- "eval_samples_per_second": 33.8,
1317
- "eval_steps_per_second": 33.8,
1318
- "eval_wer": 0.6739714699193715,
1319
  "step": 1900
1320
  },
1321
  {
1322
  "epoch": 13.17,
1323
  "learning_rate": 5.226315789473685e-05,
1324
- "loss": 0.1204,
1325
  "step": 1910
1326
  },
1327
  {
1328
  "epoch": 13.24,
1329
  "learning_rate": 5.173684210526316e-05,
1330
- "loss": 0.1144,
1331
  "step": 1920
1332
  },
1333
  {
1334
  "epoch": 13.31,
1335
  "learning_rate": 5.121052631578947e-05,
1336
- "loss": 0.373,
1337
  "step": 1930
1338
  },
1339
  {
1340
  "epoch": 13.38,
1341
  "learning_rate": 5.068421052631579e-05,
1342
- "loss": 0.1471,
1343
  "step": 1940
1344
  },
1345
  {
1346
  "epoch": 13.45,
1347
  "learning_rate": 5.0157894736842106e-05,
1348
- "loss": 0.1108,
1349
  "step": 1950
1350
  },
1351
  {
1352
  "epoch": 13.52,
1353
  "learning_rate": 4.9631578947368426e-05,
1354
- "loss": 0.2611,
1355
  "step": 1960
1356
  },
1357
  {
1358
  "epoch": 13.59,
1359
  "learning_rate": 4.910526315789474e-05,
1360
- "loss": 0.2552,
1361
  "step": 1970
1362
  },
1363
  {
1364
  "epoch": 13.66,
1365
  "learning_rate": 4.857894736842106e-05,
1366
- "loss": 0.1166,
1367
  "step": 1980
1368
  },
1369
  {
1370
  "epoch": 13.72,
1371
  "learning_rate": 4.805263157894737e-05,
1372
- "loss": 0.124,
1373
  "step": 1990
1374
  },
1375
  {
1376
  "epoch": 13.79,
1377
  "learning_rate": 4.7526315789473684e-05,
1378
- "loss": 0.326,
1379
  "step": 2000
1380
  },
1381
  {
1382
  "epoch": 13.79,
1383
- "eval_loss": 1.2077220678329468,
1384
- "eval_runtime": 49.5752,
1385
- "eval_samples_per_second": 33.888,
1386
- "eval_steps_per_second": 33.888,
1387
- "eval_wer": 0.6914754324305699,
1388
  "step": 2000
1389
  },
1390
  {
1391
  "epoch": 13.86,
1392
  "learning_rate": 4.7e-05,
1393
- "loss": 0.1467,
1394
  "step": 2010
1395
  },
1396
  {
1397
  "epoch": 13.93,
1398
  "learning_rate": 4.647368421052632e-05,
1399
- "loss": 0.1127,
1400
  "step": 2020
1401
  },
1402
  {
1403
  "epoch": 14.0,
1404
  "learning_rate": 4.594736842105264e-05,
1405
- "loss": 0.1298,
1406
  "step": 2030
1407
  },
1408
  {
1409
  "epoch": 14.07,
1410
  "learning_rate": 4.542105263157895e-05,
1411
- "loss": 0.3033,
1412
  "step": 2040
1413
  },
1414
  {
1415
  "epoch": 14.14,
1416
  "learning_rate": 4.489473684210527e-05,
1417
- "loss": 0.0986,
1418
  "step": 2050
1419
  },
1420
  {
1421
  "epoch": 14.21,
1422
  "learning_rate": 4.436842105263158e-05,
1423
- "loss": 0.0835,
1424
  "step": 2060
1425
  },
1426
  {
1427
  "epoch": 14.28,
1428
  "learning_rate": 4.3842105263157895e-05,
1429
- "loss": 0.2537,
1430
  "step": 2070
1431
  },
1432
  {
1433
  "epoch": 14.34,
1434
  "learning_rate": 4.3315789473684215e-05,
1435
- "loss": 0.1715,
1436
  "step": 2080
1437
  },
1438
  {
1439
  "epoch": 14.41,
1440
  "learning_rate": 4.278947368421053e-05,
1441
- "loss": 0.1022,
1442
  "step": 2090
1443
  },
1444
  {
1445
  "epoch": 14.48,
1446
  "learning_rate": 4.226315789473684e-05,
1447
- "loss": 0.0892,
1448
  "step": 2100
1449
  },
1450
  {
1451
  "epoch": 14.48,
1452
- "eval_loss": 1.2525088787078857,
1453
- "eval_runtime": 49.6787,
1454
- "eval_samples_per_second": 33.817,
1455
- "eval_steps_per_second": 33.817,
1456
- "eval_wer": 0.6795534422162497,
1457
  "step": 2100
1458
  },
1459
  {
1460
  "epoch": 14.55,
1461
  "learning_rate": 4.1736842105263154e-05,
1462
- "loss": 0.2858,
1463
  "step": 2110
1464
  },
1465
  {
1466
  "epoch": 14.62,
1467
  "learning_rate": 4.1210526315789474e-05,
1468
- "loss": 0.1159,
1469
  "step": 2120
1470
  },
1471
  {
1472
  "epoch": 14.69,
1473
  "learning_rate": 4.0684210526315794e-05,
1474
- "loss": 0.0943,
1475
  "step": 2130
1476
  },
1477
  {
1478
  "epoch": 14.76,
1479
  "learning_rate": 4.015789473684211e-05,
1480
- "loss": 0.1748,
1481
  "step": 2140
1482
  },
1483
  {
1484
  "epoch": 14.83,
1485
  "learning_rate": 3.9631578947368426e-05,
1486
- "loss": 0.2513,
1487
  "step": 2150
1488
  },
1489
  {
1490
  "epoch": 14.9,
1491
  "learning_rate": 3.910526315789474e-05,
1492
- "loss": 0.0915,
1493
  "step": 2160
1494
  },
1495
  {
1496
  "epoch": 14.97,
1497
  "learning_rate": 3.857894736842105e-05,
1498
- "loss": 0.0812,
1499
  "step": 2170
1500
  },
1501
  {
1502
  "epoch": 15.03,
1503
  "learning_rate": 3.8052631578947365e-05,
1504
- "loss": 0.2623,
1505
  "step": 2180
1506
  },
1507
  {
1508
  "epoch": 15.1,
1509
  "learning_rate": 3.7526315789473685e-05,
1510
- "loss": 0.1312,
1511
  "step": 2190
1512
  },
1513
  {
1514
  "epoch": 15.17,
1515
  "learning_rate": 3.7e-05,
1516
- "loss": 0.0769,
1517
  "step": 2200
1518
  },
1519
  {
1520
  "epoch": 15.17,
1521
- "eval_loss": 1.2313077449798584,
1522
- "eval_runtime": 49.7254,
1523
- "eval_samples_per_second": 33.786,
1524
- "eval_steps_per_second": 33.786,
1525
- "eval_wer": 0.6736269037282062,
1526
  "step": 2200
1527
  },
1528
  {
1529
  "epoch": 15.24,
1530
  "learning_rate": 3.647368421052632e-05,
1531
- "loss": 0.0657,
1532
  "step": 2210
1533
  },
1534
  {
1535
  "epoch": 15.31,
1536
  "learning_rate": 3.594736842105264e-05,
1537
- "loss": 0.2479,
1538
  "step": 2220
1539
  },
1540
  {
1541
  "epoch": 15.38,
1542
  "learning_rate": 3.542105263157895e-05,
1543
- "loss": 0.1003,
1544
  "step": 2230
1545
  },
1546
  {
1547
  "epoch": 15.45,
1548
  "learning_rate": 3.4894736842105264e-05,
1549
- "loss": 0.0655,
1550
  "step": 2240
1551
  },
1552
  {
1553
  "epoch": 15.52,
1554
  "learning_rate": 3.436842105263158e-05,
1555
- "loss": 0.1793,
1556
  "step": 2250
1557
  },
1558
  {
1559
  "epoch": 15.59,
1560
  "learning_rate": 3.3842105263157896e-05,
1561
- "loss": 0.162,
1562
  "step": 2260
1563
  },
1564
  {
1565
  "epoch": 15.66,
1566
  "learning_rate": 3.331578947368421e-05,
1567
- "loss": 0.0824,
1568
  "step": 2270
1569
  },
1570
  {
1571
  "epoch": 15.72,
1572
  "learning_rate": 3.278947368421052e-05,
1573
- "loss": 0.0805,
1574
  "step": 2280
1575
  },
1576
  {
1577
  "epoch": 15.79,
1578
  "learning_rate": 3.226315789473684e-05,
1579
- "loss": 0.2688,
1580
  "step": 2290
1581
  },
1582
  {
1583
  "epoch": 15.86,
1584
  "learning_rate": 3.173684210526316e-05,
1585
- "loss": 0.0927,
1586
  "step": 2300
1587
  },
1588
  {
1589
  "epoch": 15.86,
1590
- "eval_loss": 1.300140619277954,
1591
- "eval_runtime": 49.6316,
1592
- "eval_samples_per_second": 33.849,
1593
- "eval_steps_per_second": 33.849,
1594
- "eval_wer": 0.6863758528013232,
1595
  "step": 2300
1596
  },
1597
  {
1598
  "epoch": 15.93,
1599
  "learning_rate": 3.1210526315789475e-05,
1600
- "loss": 0.0762,
1601
  "step": 2310
1602
  },
1603
  {
1604
  "epoch": 16.0,
1605
  "learning_rate": 3.0684210526315795e-05,
1606
- "loss": 0.0758,
1607
  "step": 2320
1608
  },
1609
  {
1610
  "epoch": 16.07,
1611
  "learning_rate": 3.0157894736842108e-05,
1612
- "loss": 0.2356,
1613
  "step": 2330
1614
  },
1615
  {
1616
  "epoch": 16.14,
1617
  "learning_rate": 2.963157894736842e-05,
1618
- "loss": 0.0667,
1619
  "step": 2340
1620
  },
1621
  {
1622
  "epoch": 16.21,
1623
  "learning_rate": 2.910526315789474e-05,
1624
- "loss": 0.0607,
1625
  "step": 2350
1626
  },
1627
  {
1628
  "epoch": 16.28,
1629
  "learning_rate": 2.8578947368421057e-05,
1630
- "loss": 0.1887,
1631
  "step": 2360
1632
  },
1633
  {
1634
  "epoch": 16.34,
1635
  "learning_rate": 2.805263157894737e-05,
1636
- "loss": 0.1209,
1637
  "step": 2370
1638
  },
1639
  {
1640
  "epoch": 16.41,
1641
  "learning_rate": 2.7526315789473683e-05,
1642
- "loss": 0.0688,
1643
  "step": 2380
1644
  },
1645
  {
1646
  "epoch": 16.48,
1647
  "learning_rate": 2.7000000000000002e-05,
1648
- "loss": 0.0572,
1649
  "step": 2390
1650
  },
1651
  {
1652
  "epoch": 16.55,
1653
  "learning_rate": 2.647368421052632e-05,
1654
- "loss": 0.232,
1655
  "step": 2400
1656
  },
1657
  {
1658
  "epoch": 16.55,
1659
- "eval_loss": 1.349038004875183,
1660
- "eval_runtime": 49.7195,
1661
- "eval_samples_per_second": 33.79,
1662
- "eval_steps_per_second": 33.79,
1663
- "eval_wer": 0.6962993591068845,
1664
  "step": 2400
1665
  },
1666
  {
1667
  "epoch": 16.62,
1668
  "learning_rate": 2.5947368421052632e-05,
1669
- "loss": 0.0775,
1670
  "step": 2410
1671
  },
1672
  {
1673
  "epoch": 16.69,
1674
  "learning_rate": 2.542105263157895e-05,
1675
- "loss": 0.0604,
1676
  "step": 2420
1677
  },
1678
  {
1679
  "epoch": 16.76,
1680
  "learning_rate": 2.4894736842105264e-05,
1681
- "loss": 0.1308,
1682
  "step": 2430
1683
  },
1684
  {
1685
  "epoch": 16.83,
1686
  "learning_rate": 2.436842105263158e-05,
1687
- "loss": 0.1677,
1688
  "step": 2440
1689
  },
1690
  {
1691
  "epoch": 16.9,
1692
  "learning_rate": 2.3842105263157897e-05,
1693
- "loss": 0.0647,
1694
  "step": 2450
1695
  },
1696
  {
1697
  "epoch": 16.97,
1698
  "learning_rate": 2.331578947368421e-05,
1699
- "loss": 0.0554,
1700
  "step": 2460
1701
  },
1702
  {
1703
  "epoch": 17.03,
1704
  "learning_rate": 2.2789473684210527e-05,
1705
- "loss": 0.1842,
1706
  "step": 2470
1707
  },
1708
  {
1709
  "epoch": 17.1,
1710
  "learning_rate": 2.2263157894736843e-05,
1711
- "loss": 0.0917,
1712
  "step": 2480
1713
  },
1714
  {
1715
  "epoch": 17.17,
1716
  "learning_rate": 2.173684210526316e-05,
1717
- "loss": 0.0469,
1718
  "step": 2490
1719
  },
1720
  {
1721
  "epoch": 17.24,
1722
  "learning_rate": 2.1210526315789476e-05,
1723
- "loss": 0.0485,
1724
  "step": 2500
1725
  },
1726
  {
1727
  "epoch": 17.24,
1728
- "eval_loss": 1.3268318176269531,
1729
- "eval_runtime": 49.6454,
1730
- "eval_samples_per_second": 33.84,
1731
- "eval_steps_per_second": 33.84,
1732
- "eval_wer": 0.6763145200192957,
1733
  "step": 2500
1734
  },
1735
  {
1736
  "epoch": 17.31,
1737
  "learning_rate": 2.068421052631579e-05,
1738
- "loss": 0.2096,
1739
  "step": 2510
1740
  },
1741
  {
1742
  "epoch": 17.38,
1743
  "learning_rate": 2.0157894736842105e-05,
1744
- "loss": 0.0561,
1745
  "step": 2520
1746
  },
1747
  {
1748
  "epoch": 17.45,
1749
  "learning_rate": 1.963157894736842e-05,
1750
- "loss": 0.0476,
1751
  "step": 2530
1752
  },
1753
  {
1754
  "epoch": 17.52,
1755
  "learning_rate": 1.9105263157894738e-05,
1756
- "loss": 0.1363,
1757
  "step": 2540
1758
  },
1759
  {
1760
  "epoch": 17.59,
1761
  "learning_rate": 1.8578947368421054e-05,
1762
- "loss": 0.1129,
1763
  "step": 2550
1764
  },
1765
  {
1766
  "epoch": 17.66,
1767
  "learning_rate": 1.8052631578947367e-05,
1768
- "loss": 0.0508,
1769
  "step": 2560
1770
  },
1771
  {
1772
  "epoch": 17.72,
1773
  "learning_rate": 1.7526315789473683e-05,
1774
- "loss": 0.0426,
1775
  "step": 2570
1776
  },
1777
  {
1778
  "epoch": 17.79,
1779
  "learning_rate": 1.7000000000000003e-05,
1780
- "loss": 0.1895,
1781
  "step": 2580
1782
  },
1783
  {
1784
  "epoch": 17.86,
1785
  "learning_rate": 1.6473684210526316e-05,
1786
- "loss": 0.0756,
1787
  "step": 2590
1788
  },
1789
  {
1790
  "epoch": 17.93,
1791
  "learning_rate": 1.5947368421052633e-05,
1792
- "loss": 0.0487,
1793
  "step": 2600
1794
  },
1795
  {
1796
  "epoch": 17.93,
1797
- "eval_loss": 1.3375909328460693,
1798
- "eval_runtime": 49.815,
1799
- "eval_samples_per_second": 33.725,
1800
- "eval_steps_per_second": 33.725,
1801
- "eval_wer": 0.6779684377368893,
1802
  "step": 2600
1803
  },
1804
  {
1805
  "epoch": 18.0,
1806
  "learning_rate": 1.5421052631578946e-05,
1807
- "loss": 0.0497,
1808
  "step": 2610
1809
  },
1810
  {
1811
  "epoch": 18.07,
1812
  "learning_rate": 1.4894736842105264e-05,
1813
- "loss": 0.1801,
1814
  "step": 2620
1815
  },
1816
  {
1817
  "epoch": 18.14,
1818
  "learning_rate": 1.4368421052631582e-05,
1819
- "loss": 0.047,
1820
  "step": 2630
1821
  },
1822
  {
1823
  "epoch": 18.21,
1824
  "learning_rate": 1.3842105263157895e-05,
1825
- "loss": 0.044,
1826
  "step": 2640
1827
  },
1828
  {
1829
  "epoch": 18.28,
1830
  "learning_rate": 1.3315789473684213e-05,
1831
- "loss": 0.1409,
1832
  "step": 2650
1833
  },
1834
  {
1835
  "epoch": 18.34,
1836
  "learning_rate": 1.2789473684210526e-05,
1837
- "loss": 0.0887,
1838
  "step": 2660
1839
  },
1840
  {
1841
  "epoch": 18.41,
1842
  "learning_rate": 1.2263157894736844e-05,
1843
- "loss": 0.0456,
1844
  "step": 2670
1845
  },
1846
  {
1847
  "epoch": 18.48,
1848
  "learning_rate": 1.1736842105263158e-05,
1849
- "loss": 0.0403,
1850
  "step": 2680
1851
  },
1852
  {
1853
  "epoch": 18.55,
1854
  "learning_rate": 1.1210526315789475e-05,
1855
- "loss": 0.161,
1856
  "step": 2690
1857
  },
1858
  {
1859
  "epoch": 18.62,
1860
  "learning_rate": 1.068421052631579e-05,
1861
- "loss": 0.0607,
1862
  "step": 2700
1863
  },
1864
  {
1865
  "epoch": 18.62,
1866
- "eval_loss": 1.3700547218322754,
1867
- "eval_runtime": 49.648,
1868
- "eval_samples_per_second": 33.838,
1869
- "eval_steps_per_second": 33.838,
1870
- "eval_wer": 0.6894769485218111,
1871
  "step": 2700
1872
  },
1873
  {
1874
  "epoch": 18.69,
1875
  "learning_rate": 1.0157894736842106e-05,
1876
- "loss": 0.0388,
1877
  "step": 2710
1878
  },
1879
  {
1880
  "epoch": 18.76,
1881
  "learning_rate": 9.631578947368422e-06,
1882
- "loss": 0.1065,
1883
  "step": 2720
1884
  },
1885
  {
1886
  "epoch": 18.83,
1887
  "learning_rate": 9.105263157894737e-06,
1888
- "loss": 0.1294,
1889
  "step": 2730
1890
  },
1891
  {
1892
  "epoch": 18.9,
1893
  "learning_rate": 8.578947368421053e-06,
1894
- "loss": 0.0507,
1895
  "step": 2740
1896
  },
1897
  {
1898
  "epoch": 18.97,
1899
  "learning_rate": 8.052631578947368e-06,
1900
- "loss": 0.0382,
1901
  "step": 2750
1902
  },
1903
  {
1904
  "epoch": 19.03,
1905
  "learning_rate": 7.526315789473684e-06,
1906
- "loss": 0.1584,
1907
  "step": 2760
1908
  },
1909
  {
1910
  "epoch": 19.1,
1911
  "learning_rate": 7.000000000000001e-06,
1912
- "loss": 0.0715,
1913
  "step": 2770
1914
  },
1915
  {
1916
  "epoch": 19.17,
1917
  "learning_rate": 6.473684210526316e-06,
1918
- "loss": 0.0443,
1919
  "step": 2780
1920
  },
1921
  {
1922
  "epoch": 19.24,
1923
  "learning_rate": 5.947368421052632e-06,
1924
- "loss": 0.041,
1925
  "step": 2790
1926
  },
1927
  {
1928
  "epoch": 19.31,
1929
  "learning_rate": 5.421052631578947e-06,
1930
- "loss": 0.1618,
1931
  "step": 2800
1932
  },
1933
  {
1934
  "epoch": 19.31,
1935
- "eval_loss": 1.3657355308532715,
1936
- "eval_runtime": 49.611,
1937
- "eval_samples_per_second": 33.863,
1938
- "eval_steps_per_second": 33.863,
1939
- "eval_wer": 0.6795534422162497,
1940
  "step": 2800
1941
  },
1942
  {
1943
  "epoch": 19.38,
1944
  "learning_rate": 4.894736842105263e-06,
1945
- "loss": 0.0475,
1946
  "step": 2810
1947
  },
1948
  {
1949
  "epoch": 19.45,
1950
  "learning_rate": 4.368421052631579e-06,
1951
- "loss": 0.0417,
1952
  "step": 2820
1953
  },
1954
  {
1955
  "epoch": 19.52,
1956
  "learning_rate": 3.842105263157895e-06,
1957
- "loss": 0.0996,
1958
  "step": 2830
1959
  },
1960
  {
1961
  "epoch": 19.59,
1962
  "learning_rate": 3.315789473684211e-06,
1963
- "loss": 0.0923,
1964
  "step": 2840
1965
  },
1966
  {
1967
  "epoch": 19.66,
1968
  "learning_rate": 2.7894736842105266e-06,
1969
- "loss": 0.0363,
1970
  "step": 2850
1971
  },
1972
  {
1973
  "epoch": 19.72,
1974
  "learning_rate": 2.2631578947368426e-06,
1975
- "loss": 0.0363,
1976
  "step": 2860
1977
  },
1978
  {
1979
  "epoch": 19.79,
1980
  "learning_rate": 1.7368421052631579e-06,
1981
- "loss": 0.1545,
1982
  "step": 2870
1983
  },
1984
  {
1985
  "epoch": 19.86,
1986
  "learning_rate": 1.2105263157894738e-06,
1987
- "loss": 0.0575,
1988
  "step": 2880
1989
  },
1990
  {
1991
  "epoch": 19.93,
1992
  "learning_rate": 6.842105263157896e-07,
1993
- "loss": 0.0372,
1994
  "step": 2890
1995
  },
1996
  {
1997
  "epoch": 20.0,
1998
  "learning_rate": 1.5789473684210527e-07,
1999
- "loss": 0.0415,
2000
  "step": 2900
2001
  },
2002
  {
2003
  "epoch": 20.0,
2004
- "eval_loss": 1.3688374757766724,
2005
- "eval_runtime": 49.8972,
2006
- "eval_samples_per_second": 33.669,
2007
- "eval_steps_per_second": 33.669,
2008
- "eval_wer": 0.6817586658397078,
2009
  "step": 2900
2010
  },
2011
  {
2012
  "epoch": 20.0,
2013
  "step": 2900,
2014
- "total_flos": 6.619750382049396e+17,
2015
- "train_loss": 0.938711966728342,
2016
- "train_runtime": 4007.4157,
2017
- "train_samples_per_second": 23.057,
2018
- "train_steps_per_second": 0.724
2019
  }
2020
  ],
2021
  "max_steps": 2900,
2022
  "num_train_epochs": 20,
2023
- "total_flos": 6.619750382049396e+17,
2024
  "trial_name": null,
2025
  "trial_params": null
2026
  }
10
  {
11
  "epoch": 0.07,
12
  "learning_rate": 8.000000000000001e-07,
13
+ "loss": 9.5936,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.14,
18
  "learning_rate": 1.8e-06,
19
+ "loss": 8.4469,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.21,
24
  "learning_rate": 2.8000000000000003e-06,
25
+ "loss": 7.9584,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.28,
30
  "learning_rate": 3.7e-06,
31
+ "loss": 8.7061,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.34,
36
  "learning_rate": 4.7e-06,
37
+ "loss": 8.267,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 5.7000000000000005e-06,
43
+ "loss": 7.7959,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.48,
48
  "learning_rate": 6.700000000000001e-06,
49
+ "loss": 7.1516,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.55,
54
  "learning_rate": 7.7e-06,
55
+ "loss": 7.8574,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.62,
60
  "learning_rate": 8.7e-06,
61
+ "loss": 6.5821,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.69,
66
  "learning_rate": 9.7e-06,
67
+ "loss": 5.4447,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "eval_loss": 4.9545793533325195,
73
+ "eval_runtime": 50.1326,
74
+ "eval_samples_per_second": 33.511,
75
+ "eval_steps_per_second": 33.511,
76
  "eval_wer": 1.0,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.76,
81
  "learning_rate": 1.0700000000000001e-05,
82
+ "loss": 4.5955,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.83,
87
  "learning_rate": 1.1700000000000001e-05,
88
+ "loss": 3.8067,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.9,
93
  "learning_rate": 1.27e-05,
94
+ "loss": 3.3454,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.97,
99
  "learning_rate": 1.3700000000000001e-05,
100
+ "loss": 3.1785,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.03,
105
  "learning_rate": 1.47e-05,
106
+ "loss": 3.1751,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.1,
111
  "learning_rate": 1.5700000000000002e-05,
112
+ "loss": 3.0387,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.17,
117
  "learning_rate": 1.6700000000000003e-05,
118
+ "loss": 2.9955,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.24,
123
  "learning_rate": 1.77e-05,
124
+ "loss": 2.9967,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.31,
129
  "learning_rate": 1.87e-05,
130
+ "loss": 2.992,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.38,
135
  "learning_rate": 1.97e-05,
136
+ "loss": 2.9499,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.38,
141
+ "eval_loss": 2.951915979385376,
142
+ "eval_runtime": 50.1317,
143
+ "eval_samples_per_second": 33.512,
144
+ "eval_steps_per_second": 33.512,
145
  "eval_wer": 1.0,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 1.45,
150
  "learning_rate": 2.07e-05,
151
+ "loss": 2.9419,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 1.52,
156
  "learning_rate": 2.1700000000000002e-05,
157
+ "loss": 2.9594,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 1.59,
162
  "learning_rate": 2.2700000000000003e-05,
163
+ "loss": 2.942,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 1.66,
168
  "learning_rate": 2.37e-05,
169
+ "loss": 2.9188,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 1.72,
174
  "learning_rate": 2.47e-05,
175
+ "loss": 2.9152,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 1.79,
180
  "learning_rate": 2.57e-05,
181
+ "loss": 2.9417,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 1.86,
186
  "learning_rate": 2.6700000000000002e-05,
187
+ "loss": 2.8942,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 1.93,
192
  "learning_rate": 2.7700000000000002e-05,
193
+ "loss": 2.9023,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 2.0,
198
  "learning_rate": 2.87e-05,
199
+ "loss": 2.9168,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 2.07,
204
  "learning_rate": 2.97e-05,
205
+ "loss": 2.8989,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 2.07,
210
+ "eval_loss": 2.862382650375366,
211
+ "eval_runtime": 50.3009,
212
+ "eval_samples_per_second": 33.399,
213
+ "eval_steps_per_second": 33.399,
214
  "eval_wer": 1.0,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 2.14,
219
  "learning_rate": 3.07e-05,
220
+ "loss": 2.8309,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 2.21,
225
  "learning_rate": 3.1700000000000005e-05,
226
+ "loss": 2.8001,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 2.28,
231
  "learning_rate": 3.27e-05,
232
+ "loss": 2.7949,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 2.34,
237
  "learning_rate": 3.3700000000000006e-05,
238
+ "loss": 2.6851,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 2.41,
243
  "learning_rate": 3.4699999999999996e-05,
244
+ "loss": 2.591,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 2.48,
249
  "learning_rate": 3.57e-05,
250
+ "loss": 2.5043,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 2.55,
255
  "learning_rate": 3.6700000000000004e-05,
256
+ "loss": 2.4854,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 2.62,
261
  "learning_rate": 3.77e-05,
262
+ "loss": 2.3164,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 2.69,
267
  "learning_rate": 3.8700000000000006e-05,
268
+ "loss": 2.245,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 2.76,
273
  "learning_rate": 3.97e-05,
274
+ "loss": 2.2076,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 2.76,
279
+ "eval_loss": 2.108926296234131,
280
+ "eval_runtime": 50.3626,
281
+ "eval_samples_per_second": 33.358,
282
+ "eval_steps_per_second": 33.358,
283
+ "eval_wer": 1.0008269588587968,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 2.83,
288
  "learning_rate": 4.07e-05,
289
+ "loss": 2.0686,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 2.9,
294
  "learning_rate": 4.17e-05,
295
+ "loss": 1.9182,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 2.97,
300
  "learning_rate": 4.27e-05,
301
+ "loss": 1.8595,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 3.03,
306
  "learning_rate": 4.3700000000000005e-05,
307
+ "loss": 1.881,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 3.1,
312
  "learning_rate": 4.47e-05,
313
+ "loss": 1.6557,
314
  "step": 450
315
  },
316
  {
317
  "epoch": 3.17,
318
  "learning_rate": 4.5700000000000006e-05,
319
+ "loss": 1.5894,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 3.24,
324
  "learning_rate": 4.6700000000000003e-05,
325
+ "loss": 1.5914,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 3.31,
330
  "learning_rate": 4.77e-05,
331
+ "loss": 1.6008,
332
  "step": 480
333
  },
334
  {
335
  "epoch": 3.38,
336
  "learning_rate": 4.87e-05,
337
+ "loss": 1.4103,
338
  "step": 490
339
  },
340
  {
341
  "epoch": 3.45,
342
  "learning_rate": 4.97e-05,
343
+ "loss": 1.4186,
344
  "step": 500
345
  },
346
  {
347
  "epoch": 3.45,
348
+ "eval_loss": 1.4111816883087158,
349
+ "eval_runtime": 50.441,
350
+ "eval_samples_per_second": 33.306,
351
+ "eval_steps_per_second": 33.306,
352
+ "eval_wer": 0.9164771552615257,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 3.52,
357
  "learning_rate": 5.0700000000000006e-05,
358
+ "loss": 1.5086,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 3.59,
363
  "learning_rate": 5.17e-05,
364
+ "loss": 1.3112,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 3.66,
369
  "learning_rate": 5.270000000000001e-05,
370
+ "loss": 1.2336,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 3.72,
375
  "learning_rate": 5.3700000000000004e-05,
376
+ "loss": 1.3263,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 3.79,
381
  "learning_rate": 5.470000000000001e-05,
382
+ "loss": 1.3748,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 3.86,
387
  "learning_rate": 5.5700000000000005e-05,
388
+ "loss": 1.1614,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 3.93,
393
  "learning_rate": 5.6699999999999996e-05,
394
+ "loss": 1.1547,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 4.0,
399
  "learning_rate": 5.77e-05,
400
+ "loss": 1.2548,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 4.07,
405
  "learning_rate": 5.87e-05,
406
+ "loss": 1.2191,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 4.14,
411
  "learning_rate": 5.97e-05,
412
+ "loss": 0.9951,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 4.14,
417
+ "eval_loss": 1.1378357410430908,
418
+ "eval_runtime": 50.3227,
419
+ "eval_samples_per_second": 33.385,
420
+ "eval_steps_per_second": 33.385,
421
+ "eval_wer": 0.7701054372544965,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 4.21,
426
  "learning_rate": 6.07e-05,
427
+ "loss": 1.0463,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 4.28,
432
  "learning_rate": 6.170000000000001e-05,
433
+ "loss": 1.1918,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 4.34,
438
  "learning_rate": 6.27e-05,
439
+ "loss": 0.9968,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 4.41,
444
  "learning_rate": 6.37e-05,
445
+ "loss": 0.9801,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 4.48,
450
  "learning_rate": 6.47e-05,
451
+ "loss": 1.0596,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 4.55,
456
  "learning_rate": 6.570000000000001e-05,
457
+ "loss": 1.1484,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 4.62,
462
  "learning_rate": 6.670000000000001e-05,
463
+ "loss": 0.9331,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 4.69,
468
  "learning_rate": 6.77e-05,
469
+ "loss": 0.9861,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 4.76,
474
  "learning_rate": 6.87e-05,
475
+ "loss": 1.0923,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 4.83,
480
  "learning_rate": 6.97e-05,
481
+ "loss": 0.9754,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 4.83,
486
+ "eval_loss": 1.0151933431625366,
487
+ "eval_runtime": 50.6525,
488
+ "eval_samples_per_second": 33.167,
489
+ "eval_steps_per_second": 33.167,
490
+ "eval_wer": 0.7273792295499966,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 4.9,
495
  "learning_rate": 7.07e-05,
496
+ "loss": 0.8267,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 4.97,
501
  "learning_rate": 7.17e-05,
502
+ "loss": 0.9163,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 5.03,
507
  "learning_rate": 7.27e-05,
508
+ "loss": 1.0682,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 5.1,
513
  "learning_rate": 7.37e-05,
514
+ "loss": 0.8203,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 5.17,
519
  "learning_rate": 7.47e-05,
520
+ "loss": 0.8,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 5.24,
525
  "learning_rate": 7.570000000000001e-05,
526
+ "loss": 0.8987,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 5.31,
531
  "learning_rate": 7.670000000000001e-05,
532
+ "loss": 0.9465,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 5.38,
537
  "learning_rate": 7.77e-05,
538
+ "loss": 0.7583,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 5.45,
543
  "learning_rate": 7.87e-05,
544
+ "loss": 0.7862,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 5.52,
549
  "learning_rate": 7.970000000000001e-05,
550
+ "loss": 0.9364,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 5.52,
555
+ "eval_loss": 0.9619123339653015,
556
+ "eval_runtime": 50.5375,
557
+ "eval_samples_per_second": 33.243,
558
+ "eval_steps_per_second": 33.243,
559
+ "eval_wer": 0.7011232857831989,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 5.59,
564
  "learning_rate": 8.070000000000001e-05,
565
+ "loss": 0.8322,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 5.66,
570
  "learning_rate": 8.17e-05,
571
+ "loss": 0.724,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 5.72,
576
  "learning_rate": 8.27e-05,
577
+ "loss": 0.8198,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 5.79,
582
  "learning_rate": 8.37e-05,
583
+ "loss": 0.9825,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 5.86,
588
  "learning_rate": 8.47e-05,
589
+ "loss": 0.7021,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 5.93,
594
  "learning_rate": 8.57e-05,
595
+ "loss": 0.7367,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 6.0,
600
  "learning_rate": 8.67e-05,
601
+ "loss": 0.8597,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 6.07,
606
  "learning_rate": 8.77e-05,
607
+ "loss": 0.791,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 6.14,
612
  "learning_rate": 8.87e-05,
613
+ "loss": 0.6308,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 6.21,
618
  "learning_rate": 8.970000000000001e-05,
619
+ "loss": 0.6557,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 6.21,
624
+ "eval_loss": 0.9144111275672913,
625
+ "eval_runtime": 50.5004,
626
+ "eval_samples_per_second": 33.267,
627
+ "eval_steps_per_second": 33.267,
628
+ "eval_wer": 0.6867893322307215,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 6.28,
633
  "learning_rate": 9.070000000000001e-05,
634
+ "loss": 0.8438,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 6.34,
639
  "learning_rate": 9.17e-05,
640
+ "loss": 0.7106,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 6.41,
645
  "learning_rate": 9.27e-05,
646
+ "loss": 0.6506,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 6.48,
651
  "learning_rate": 9.370000000000001e-05,
652
+ "loss": 0.7062,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 6.55,
657
  "learning_rate": 9.47e-05,
658
+ "loss": 0.8188,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 6.62,
663
  "learning_rate": 9.57e-05,
664
+ "loss": 0.6014,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 6.69,
669
  "learning_rate": 9.67e-05,
670
+ "loss": 0.5934,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 6.76,
675
  "learning_rate": 9.77e-05,
676
+ "loss": 0.7971,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 6.83,
681
  "learning_rate": 9.87e-05,
682
+ "loss": 0.7547,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 6.9,
687
  "learning_rate": 9.970000000000001e-05,
688
+ "loss": 0.5681,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 6.9,
693
+ "eval_loss": 0.889886736869812,
694
+ "eval_runtime": 50.3615,
695
+ "eval_samples_per_second": 33.359,
696
+ "eval_steps_per_second": 33.359,
697
+ "eval_wer": 0.6682516711460271,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 6.97,
702
  "learning_rate": 9.963157894736843e-05,
703
+ "loss": 0.6196,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 7.03,
708
  "learning_rate": 9.910526315789475e-05,
709
+ "loss": 0.819,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 7.1,
714
  "learning_rate": 9.857894736842106e-05,
715
+ "loss": 0.5811,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 7.17,
720
  "learning_rate": 9.805263157894737e-05,
721
+ "loss": 0.5121,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 7.24,
726
  "learning_rate": 9.752631578947369e-05,
727
+ "loss": 0.5588,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 7.31,
732
  "learning_rate": 9.7e-05,
733
+ "loss": 0.7227,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 7.38,
738
  "learning_rate": 9.647368421052631e-05,
739
+ "loss": 0.4808,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 7.45,
744
  "learning_rate": 9.594736842105264e-05,
745
+ "loss": 0.5031,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 7.52,
750
  "learning_rate": 9.542105263157895e-05,
751
+ "loss": 0.6767,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 7.59,
756
  "learning_rate": 9.489473684210527e-05,
757
+ "loss": 0.66,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 7.59,
762
+ "eval_loss": 0.8991804122924805,
763
+ "eval_runtime": 50.3017,
764
+ "eval_samples_per_second": 33.399,
765
+ "eval_steps_per_second": 33.399,
766
+ "eval_wer": 0.6653573151402384,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 7.66,
771
  "learning_rate": 9.436842105263158e-05,
772
+ "loss": 0.487,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 7.72,
777
  "learning_rate": 9.384210526315789e-05,
778
+ "loss": 0.5132,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 7.79,
783
  "learning_rate": 9.331578947368422e-05,
784
+ "loss": 0.704,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 7.86,
789
  "learning_rate": 9.278947368421053e-05,
790
+ "loss": 0.5007,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 7.93,
795
  "learning_rate": 9.226315789473686e-05,
796
+ "loss": 0.513,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 8.0,
801
  "learning_rate": 9.173684210526317e-05,
802
+ "loss": 0.5883,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 8.07,
807
  "learning_rate": 9.121052631578948e-05,
808
+ "loss": 0.6605,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 8.14,
813
  "learning_rate": 9.06842105263158e-05,
814
+ "loss": 0.4155,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 8.21,
819
  "learning_rate": 9.015789473684211e-05,
820
+ "loss": 0.414,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 8.28,
825
  "learning_rate": 8.963157894736842e-05,
826
+ "loss": 0.6144,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 8.28,
831
+ "eval_loss": 0.9299214482307434,
832
+ "eval_runtime": 50.3304,
833
+ "eval_samples_per_second": 33.379,
834
+ "eval_steps_per_second": 33.379,
835
+ "eval_wer": 0.6898215147129764,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 8.34,
840
  "learning_rate": 8.910526315789474e-05,
841
+ "loss": 0.4794,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 8.41,
846
  "learning_rate": 8.857894736842106e-05,
847
+ "loss": 0.4113,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 8.48,
852
  "learning_rate": 8.805263157894737e-05,
853
+ "loss": 0.4479,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 8.55,
858
  "learning_rate": 8.752631578947369e-05,
859
+ "loss": 0.6263,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 8.62,
864
  "learning_rate": 8.7e-05,
865
+ "loss": 0.4205,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 8.69,
870
  "learning_rate": 8.647368421052631e-05,
871
+ "loss": 0.4036,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 8.76,
876
  "learning_rate": 8.594736842105263e-05,
877
+ "loss": 0.5458,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 8.83,
882
  "learning_rate": 8.542105263157894e-05,
883
+ "loss": 0.5463,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 8.9,
888
  "learning_rate": 8.489473684210527e-05,
889
+ "loss": 0.3955,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 8.97,
894
  "learning_rate": 8.436842105263158e-05,
895
+ "loss": 0.4099,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 8.97,
900
+ "eval_loss": 0.9509925246238708,
901
+ "eval_runtime": 50.5682,
902
+ "eval_samples_per_second": 33.222,
903
+ "eval_steps_per_second": 33.222,
904
+ "eval_wer": 0.6673557990489973,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 9.03,
909
  "learning_rate": 8.38421052631579e-05,
910
+ "loss": 0.5702,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 9.1,
915
  "learning_rate": 8.331578947368422e-05,
916
+ "loss": 0.3673,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 9.17,
921
  "learning_rate": 8.278947368421053e-05,
922
+ "loss": 0.314,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 9.24,
927
  "learning_rate": 8.226315789473684e-05,
928
+ "loss": 0.341,
929
  "step": 1340
930
  },
931
  {
932
  "epoch": 9.31,
933
  "learning_rate": 8.173684210526317e-05,
934
+ "loss": 0.5948,
935
  "step": 1350
936
  },
937
  {
938
  "epoch": 9.38,
939
  "learning_rate": 8.121052631578948e-05,
940
+ "loss": 0.3596,
941
  "step": 1360
942
  },
943
  {
944
  "epoch": 9.45,
945
  "learning_rate": 8.06842105263158e-05,
946
+ "loss": 0.3284,
947
  "step": 1370
948
  },
949
  {
950
  "epoch": 9.52,
951
  "learning_rate": 8.015789473684211e-05,
952
+ "loss": 0.4988,
953
  "step": 1380
954
  },
955
  {
956
  "epoch": 9.59,
957
  "learning_rate": 7.963157894736842e-05,
958
+ "loss": 0.4438,
959
  "step": 1390
960
  },
961
  {
962
  "epoch": 9.66,
963
  "learning_rate": 7.910526315789474e-05,
964
+ "loss": 0.3384,
965
  "step": 1400
966
  },
967
  {
968
  "epoch": 9.66,
969
+ "eval_loss": 0.9598137736320496,
970
+ "eval_runtime": 50.3681,
971
+ "eval_samples_per_second": 33.354,
972
+ "eval_steps_per_second": 33.354,
973
+ "eval_wer": 0.6611536076080216,
974
  "step": 1400
975
  },
976
  {
977
  "epoch": 9.72,
978
  "learning_rate": 7.857894736842105e-05,
979
+ "loss": 0.3422,
980
  "step": 1410
981
  },
982
  {
983
  "epoch": 9.79,
984
  "learning_rate": 7.805263157894738e-05,
985
+ "loss": 0.5592,
986
  "step": 1420
987
  },
988
  {
989
  "epoch": 9.86,
990
  "learning_rate": 7.752631578947369e-05,
991
+ "loss": 0.3536,
992
  "step": 1430
993
  },
994
  {
995
  "epoch": 9.93,
996
  "learning_rate": 7.7e-05,
997
+ "loss": 0.3111,
998
  "step": 1440
999
  },
1000
  {
1001
  "epoch": 10.0,
1002
  "learning_rate": 7.647368421052631e-05,
1003
+ "loss": 0.3469,
1004
  "step": 1450
1005
  },
1006
  {
1007
  "epoch": 10.07,
1008
  "learning_rate": 7.594736842105263e-05,
1009
+ "loss": 0.4812,
1010
  "step": 1460
1011
  },
1012
  {
1013
  "epoch": 10.14,
1014
  "learning_rate": 7.542105263157895e-05,
1015
+ "loss": 0.2573,
1016
  "step": 1470
1017
  },
1018
  {
1019
  "epoch": 10.21,
1020
  "learning_rate": 7.489473684210527e-05,
1021
+ "loss": 0.2372,
1022
  "step": 1480
1023
  },
1024
  {
1025
  "epoch": 10.28,
1026
  "learning_rate": 7.43684210526316e-05,
1027
+ "loss": 0.4399,
1028
  "step": 1490
1029
  },
1030
  {
1031
  "epoch": 10.34,
1032
  "learning_rate": 7.38421052631579e-05,
1033
+ "loss": 0.3163,
1034
  "step": 1500
1035
  },
1036
  {
1037
  "epoch": 10.34,
1038
+ "eval_loss": 0.9953927993774414,
1039
+ "eval_runtime": 50.4333,
1040
+ "eval_samples_per_second": 33.311,
1041
+ "eval_steps_per_second": 33.311,
1042
+ "eval_wer": 0.6612225208462545,
1043
  "step": 1500
1044
  },
1045
  {
1046
  "epoch": 10.41,
1047
  "learning_rate": 7.331578947368422e-05,
1048
+ "loss": 0.2451,
1049
  "step": 1510
1050
  },
1051
  {
1052
  "epoch": 10.48,
1053
  "learning_rate": 7.278947368421053e-05,
1054
+ "loss": 0.2571,
1055
  "step": 1520
1056
  },
1057
  {
1058
  "epoch": 10.55,
1059
  "learning_rate": 7.226315789473685e-05,
1060
+ "loss": 0.5127,
1061
  "step": 1530
1062
  },
1063
  {
1064
  "epoch": 10.62,
1065
  "learning_rate": 7.173684210526316e-05,
1066
+ "loss": 0.2847,
1067
  "step": 1540
1068
  },
1069
  {
1070
  "epoch": 10.69,
1071
  "learning_rate": 7.121052631578947e-05,
1072
+ "loss": 0.2619,
1073
  "step": 1550
1074
  },
1075
  {
1076
  "epoch": 10.76,
1077
  "learning_rate": 7.06842105263158e-05,
1078
+ "loss": 0.3631,
1079
  "step": 1560
1080
  },
1081
  {
1082
  "epoch": 10.83,
1083
  "learning_rate": 7.015789473684211e-05,
1084
+ "loss": 0.4317,
1085
  "step": 1570
1086
  },
1087
  {
1088
  "epoch": 10.9,
1089
  "learning_rate": 6.963157894736842e-05,
1090
+ "loss": 0.2507,
1091
  "step": 1580
1092
  },
1093
  {
1094
  "epoch": 10.97,
1095
  "learning_rate": 6.910526315789474e-05,
1096
+ "loss": 0.2485,
1097
  "step": 1590
1098
  },
1099
  {
1100
  "epoch": 11.03,
1101
  "learning_rate": 6.857894736842105e-05,
1102
+ "loss": 0.4204,
1103
  "step": 1600
1104
  },
1105
  {
1106
  "epoch": 11.03,
1107
+ "eval_loss": 1.0164344310760498,
1108
+ "eval_runtime": 50.3517,
1109
+ "eval_samples_per_second": 33.365,
1110
+ "eval_steps_per_second": 33.365,
1111
+ "eval_wer": 0.66067121494039,
1112
  "step": 1600
1113
  },
1114
  {
1115
  "epoch": 11.1,
1116
  "learning_rate": 6.805263157894736e-05,
1117
+ "loss": 0.265,
1118
  "step": 1610
1119
  },
1120
  {
1121
  "epoch": 11.17,
1122
  "learning_rate": 6.752631578947368e-05,
1123
+ "loss": 0.1877,
1124
  "step": 1620
1125
  },
1126
  {
1127
  "epoch": 11.24,
1128
  "learning_rate": 6.7e-05,
1129
+ "loss": 0.1922,
1130
  "step": 1630
1131
  },
1132
  {
1133
  "epoch": 11.31,
1134
  "learning_rate": 6.647368421052632e-05,
1135
+ "loss": 0.4621,
1136
  "step": 1640
1137
  },
1138
  {
1139
  "epoch": 11.38,
1140
  "learning_rate": 6.594736842105264e-05,
1141
+ "loss": 0.2252,
1142
  "step": 1650
1143
  },
1144
  {
1145
  "epoch": 11.45,
1146
  "learning_rate": 6.542105263157895e-05,
1147
+ "loss": 0.1885,
1148
  "step": 1660
1149
  },
1150
  {
1151
  "epoch": 11.52,
1152
  "learning_rate": 6.489473684210527e-05,
1153
+ "loss": 0.3207,
1154
  "step": 1670
1155
  },
1156
  {
1157
  "epoch": 11.59,
1158
  "learning_rate": 6.436842105263158e-05,
1159
+ "loss": 0.3149,
1160
  "step": 1680
1161
  },
1162
  {
1163
  "epoch": 11.66,
1164
  "learning_rate": 6.384210526315791e-05,
1165
+ "loss": 0.1912,
1166
  "step": 1690
1167
  },
1168
  {
1169
  "epoch": 11.72,
1170
  "learning_rate": 6.331578947368422e-05,
1171
+ "loss": 0.1932,
1172
  "step": 1700
1173
  },
1174
  {
1175
  "epoch": 11.72,
1176
+ "eval_loss": 1.0637414455413818,
1177
+ "eval_runtime": 50.3761,
1178
+ "eval_samples_per_second": 33.349,
1179
+ "eval_steps_per_second": 33.349,
1180
+ "eval_wer": 0.6657707945696368,
1181
  "step": 1700
1182
  },
1183
  {
1184
  "epoch": 11.79,
1185
  "learning_rate": 6.278947368421053e-05,
1186
+ "loss": 0.4378,
1187
  "step": 1710
1188
  },
1189
  {
1190
  "epoch": 11.86,
1191
  "learning_rate": 6.226315789473685e-05,
1192
+ "loss": 0.2184,
1193
  "step": 1720
1194
  },
1195
  {
1196
  "epoch": 11.93,
1197
  "learning_rate": 6.173684210526316e-05,
1198
+ "loss": 0.2063,
1199
  "step": 1730
1200
  },
1201
  {
1202
  "epoch": 12.0,
1203
  "learning_rate": 6.121052631578947e-05,
1204
+ "loss": 0.2094,
1205
  "step": 1740
1206
  },
1207
  {
1208
  "epoch": 12.07,
1209
  "learning_rate": 6.0684210526315785e-05,
1210
+ "loss": 0.3799,
1211
  "step": 1750
1212
  },
1213
  {
1214
  "epoch": 12.14,
1215
  "learning_rate": 6.015789473684211e-05,
1216
+ "loss": 0.1585,
1217
  "step": 1760
1218
  },
1219
  {
1220
  "epoch": 12.21,
1221
  "learning_rate": 5.9631578947368425e-05,
1222
+ "loss": 0.1441,
1223
  "step": 1770
1224
  },
1225
  {
1226
  "epoch": 12.28,
1227
  "learning_rate": 5.9105263157894744e-05,
1228
+ "loss": 0.3064,
1229
  "step": 1780
1230
  },
1231
  {
1232
  "epoch": 12.34,
1233
  "learning_rate": 5.857894736842106e-05,
1234
+ "loss": 0.2439,
1235
  "step": 1790
1236
  },
1237
  {
1238
  "epoch": 12.41,
1239
  "learning_rate": 5.805263157894737e-05,
1240
+ "loss": 0.1449,
1241
  "step": 1800
1242
  },
1243
  {
1244
  "epoch": 12.41,
1245
+ "eval_loss": 1.1190370321273804,
1246
+ "eval_runtime": 50.5074,
1247
+ "eval_samples_per_second": 33.262,
1248
+ "eval_steps_per_second": 33.262,
1249
+ "eval_wer": 0.6652194886637723,
1250
  "step": 1800
1251
  },
1252
  {
1253
  "epoch": 12.48,
1254
  "learning_rate": 5.752631578947368e-05,
1255
+ "loss": 0.1513,
1256
  "step": 1810
1257
  },
1258
  {
1259
  "epoch": 12.55,
1260
  "learning_rate": 5.6999999999999996e-05,
1261
+ "loss": 0.3656,
1262
  "step": 1820
1263
  },
1264
  {
1265
  "epoch": 12.62,
1266
  "learning_rate": 5.647368421052632e-05,
1267
+ "loss": 0.171,
1268
  "step": 1830
1269
  },
1270
  {
1271
  "epoch": 12.69,
1272
  "learning_rate": 5.5947368421052636e-05,
1273
+ "loss": 0.1463,
1274
  "step": 1840
1275
  },
1276
  {
1277
  "epoch": 12.76,
1278
  "learning_rate": 5.542105263157895e-05,
1279
+ "loss": 0.2383,
1280
  "step": 1850
1281
  },
1282
  {
1283
  "epoch": 12.83,
1284
  "learning_rate": 5.489473684210527e-05,
1285
+ "loss": 0.3031,
1286
  "step": 1860
1287
  },
1288
  {
1289
  "epoch": 12.9,
1290
  "learning_rate": 5.436842105263158e-05,
1291
+ "loss": 0.1513,
1292
  "step": 1870
1293
  },
1294
  {
1295
  "epoch": 12.97,
1296
  "learning_rate": 5.3842105263157895e-05,
1297
+ "loss": 0.1484,
1298
  "step": 1880
1299
  },
1300
  {
1301
  "epoch": 13.03,
1302
  "learning_rate": 5.331578947368421e-05,
1303
+ "loss": 0.3051,
1304
  "step": 1890
1305
  },
1306
  {
1307
  "epoch": 13.1,
1308
  "learning_rate": 5.2789473684210534e-05,
1309
+ "loss": 0.1803,
1310
  "step": 1900
1311
  },
1312
  {
1313
  "epoch": 13.1,
1314
+ "eval_loss": 1.1260483264923096,
1315
+ "eval_runtime": 50.4412,
1316
+ "eval_samples_per_second": 33.306,
1317
+ "eval_steps_per_second": 33.306,
1318
+ "eval_wer": 0.6689408035283578,
1319
  "step": 1900
1320
  },
1321
  {
1322
  "epoch": 13.17,
1323
  "learning_rate": 5.226315789473685e-05,
1324
+ "loss": 0.1158,
1325
  "step": 1910
1326
  },
1327
  {
1328
  "epoch": 13.24,
1329
  "learning_rate": 5.173684210526316e-05,
1330
+ "loss": 0.1038,
1331
  "step": 1920
1332
  },
1333
  {
1334
  "epoch": 13.31,
1335
  "learning_rate": 5.121052631578947e-05,
1336
+ "loss": 0.3329,
1337
  "step": 1930
1338
  },
1339
  {
1340
  "epoch": 13.38,
1341
  "learning_rate": 5.068421052631579e-05,
1342
+ "loss": 0.1411,
1343
  "step": 1940
1344
  },
1345
  {
1346
  "epoch": 13.45,
1347
  "learning_rate": 5.0157894736842106e-05,
1348
+ "loss": 0.1082,
1349
  "step": 1950
1350
  },
1351
  {
1352
  "epoch": 13.52,
1353
  "learning_rate": 4.9631578947368426e-05,
1354
+ "loss": 0.2466,
1355
  "step": 1960
1356
  },
1357
  {
1358
  "epoch": 13.59,
1359
  "learning_rate": 4.910526315789474e-05,
1360
+ "loss": 0.2415,
1361
  "step": 1970
1362
  },
1363
  {
1364
  "epoch": 13.66,
1365
  "learning_rate": 4.857894736842106e-05,
1366
+ "loss": 0.1175,
1367
  "step": 1980
1368
  },
1369
  {
1370
  "epoch": 13.72,
1371
  "learning_rate": 4.805263157894737e-05,
1372
+ "loss": 0.121,
1373
  "step": 1990
1374
  },
1375
  {
1376
  "epoch": 13.79,
1377
  "learning_rate": 4.7526315789473684e-05,
1378
+ "loss": 0.328,
1379
  "step": 2000
1380
  },
1381
  {
1382
  "epoch": 13.79,
1383
+ "eval_loss": 1.218603253364563,
1384
+ "eval_runtime": 50.4012,
1385
+ "eval_samples_per_second": 33.333,
1386
+ "eval_steps_per_second": 33.333,
1387
+ "eval_wer": 0.6751429949693336,
1388
  "step": 2000
1389
  },
1390
  {
1391
  "epoch": 13.86,
1392
  "learning_rate": 4.7e-05,
1393
+ "loss": 0.1445,
1394
  "step": 2010
1395
  },
1396
  {
1397
  "epoch": 13.93,
1398
  "learning_rate": 4.647368421052632e-05,
1399
+ "loss": 0.1145,
1400
  "step": 2020
1401
  },
1402
  {
1403
  "epoch": 14.0,
1404
  "learning_rate": 4.594736842105264e-05,
1405
+ "loss": 0.122,
1406
  "step": 2030
1407
  },
1408
  {
1409
  "epoch": 14.07,
1410
  "learning_rate": 4.542105263157895e-05,
1411
+ "loss": 0.2931,
1412
  "step": 2040
1413
  },
1414
  {
1415
  "epoch": 14.14,
1416
  "learning_rate": 4.489473684210527e-05,
1417
+ "loss": 0.0965,
1418
  "step": 2050
1419
  },
1420
  {
1421
  "epoch": 14.21,
1422
  "learning_rate": 4.436842105263158e-05,
1423
+ "loss": 0.084,
1424
  "step": 2060
1425
  },
1426
  {
1427
  "epoch": 14.28,
1428
  "learning_rate": 4.3842105263157895e-05,
1429
+ "loss": 0.2362,
1430
  "step": 2070
1431
  },
1432
  {
1433
  "epoch": 14.34,
1434
  "learning_rate": 4.3315789473684215e-05,
1435
+ "loss": 0.16,
1436
  "step": 2080
1437
  },
1438
  {
1439
  "epoch": 14.41,
1440
  "learning_rate": 4.278947368421053e-05,
1441
+ "loss": 0.1034,
1442
  "step": 2090
1443
  },
1444
  {
1445
  "epoch": 14.48,
1446
  "learning_rate": 4.226315789473684e-05,
1447
+ "loss": 0.0838,
1448
  "step": 2100
1449
  },
1450
  {
1451
  "epoch": 14.48,
1452
+ "eval_loss": 1.2591233253479004,
1453
+ "eval_runtime": 50.4613,
1454
+ "eval_samples_per_second": 33.293,
1455
+ "eval_steps_per_second": 33.293,
1456
+ "eval_wer": 0.6908552132864724,
1457
  "step": 2100
1458
  },
1459
  {
1460
  "epoch": 14.55,
1461
  "learning_rate": 4.1736842105263154e-05,
1462
+ "loss": 0.2733,
1463
  "step": 2110
1464
  },
1465
  {
1466
  "epoch": 14.62,
1467
  "learning_rate": 4.1210526315789474e-05,
1468
+ "loss": 0.1126,
1469
  "step": 2120
1470
  },
1471
  {
1472
  "epoch": 14.69,
1473
  "learning_rate": 4.0684210526315794e-05,
1474
+ "loss": 0.0986,
1475
  "step": 2130
1476
  },
1477
  {
1478
  "epoch": 14.76,
1479
  "learning_rate": 4.015789473684211e-05,
1480
+ "loss": 0.1728,
1481
  "step": 2140
1482
  },
1483
  {
1484
  "epoch": 14.83,
1485
  "learning_rate": 3.9631578947368426e-05,
1486
+ "loss": 0.2317,
1487
  "step": 2150
1488
  },
1489
  {
1490
  "epoch": 14.9,
1491
  "learning_rate": 3.910526315789474e-05,
1492
+ "loss": 0.0932,
1493
  "step": 2160
1494
  },
1495
  {
1496
  "epoch": 14.97,
1497
  "learning_rate": 3.857894736842105e-05,
1498
+ "loss": 0.0831,
1499
  "step": 2170
1500
  },
1501
  {
1502
  "epoch": 15.03,
1503
  "learning_rate": 3.8052631578947365e-05,
1504
+ "loss": 0.2388,
1505
  "step": 2180
1506
  },
1507
  {
1508
  "epoch": 15.1,
1509
  "learning_rate": 3.7526315789473685e-05,
1510
+ "loss": 0.129,
1511
  "step": 2190
1512
  },
1513
  {
1514
  "epoch": 15.17,
1515
  "learning_rate": 3.7e-05,
1516
+ "loss": 0.0766,
1517
  "step": 2200
1518
  },
1519
  {
1520
  "epoch": 15.17,
1521
+ "eval_loss": 1.252910852432251,
1522
+ "eval_runtime": 50.3936,
1523
+ "eval_samples_per_second": 33.338,
1524
+ "eval_steps_per_second": 33.338,
1525
+ "eval_wer": 0.6779684377368893,
1526
  "step": 2200
1527
  },
1528
  {
1529
  "epoch": 15.24,
1530
  "learning_rate": 3.647368421052632e-05,
1531
+ "loss": 0.0677,
1532
  "step": 2210
1533
  },
1534
  {
1535
  "epoch": 15.31,
1536
  "learning_rate": 3.594736842105264e-05,
1537
+ "loss": 0.2474,
1538
  "step": 2220
1539
  },
1540
  {
1541
  "epoch": 15.38,
1542
  "learning_rate": 3.542105263157895e-05,
1543
+ "loss": 0.0998,
1544
  "step": 2230
1545
  },
1546
  {
1547
  "epoch": 15.45,
1548
  "learning_rate": 3.4894736842105264e-05,
1549
+ "loss": 0.0651,
1550
  "step": 2240
1551
  },
1552
  {
1553
  "epoch": 15.52,
1554
  "learning_rate": 3.436842105263158e-05,
1555
+ "loss": 0.1772,
1556
  "step": 2250
1557
  },
1558
  {
1559
  "epoch": 15.59,
1560
  "learning_rate": 3.3842105263157896e-05,
1561
+ "loss": 0.1637,
1562
  "step": 2260
1563
  },
1564
  {
1565
  "epoch": 15.66,
1566
  "learning_rate": 3.331578947368421e-05,
1567
+ "loss": 0.0886,
1568
  "step": 2270
1569
  },
1570
  {
1571
  "epoch": 15.72,
1572
  "learning_rate": 3.278947368421052e-05,
1573
+ "loss": 0.0785,
1574
  "step": 2280
1575
  },
1576
  {
1577
  "epoch": 15.79,
1578
  "learning_rate": 3.226315789473684e-05,
1579
+ "loss": 0.2612,
1580
  "step": 2290
1581
  },
1582
  {
1583
  "epoch": 15.86,
1584
  "learning_rate": 3.173684210526316e-05,
1585
+ "loss": 0.0956,
1586
  "step": 2300
1587
  },
1588
  {
1589
  "epoch": 15.86,
1590
+ "eval_loss": 1.253722906112671,
1591
+ "eval_runtime": 50.2864,
1592
+ "eval_samples_per_second": 33.409,
1593
+ "eval_steps_per_second": 33.409,
1594
+ "eval_wer": 0.6668044931431328,
1595
  "step": 2300
1596
  },
1597
  {
1598
  "epoch": 15.93,
1599
  "learning_rate": 3.1210526315789475e-05,
1600
+ "loss": 0.0747,
1601
  "step": 2310
1602
  },
1603
  {
1604
  "epoch": 16.0,
1605
  "learning_rate": 3.0684210526315795e-05,
1606
+ "loss": 0.0816,
1607
  "step": 2320
1608
  },
1609
  {
1610
  "epoch": 16.07,
1611
  "learning_rate": 3.0157894736842108e-05,
1612
+ "loss": 0.2428,
1613
  "step": 2330
1614
  },
1615
  {
1616
  "epoch": 16.14,
1617
  "learning_rate": 2.963157894736842e-05,
1618
+ "loss": 0.0704,
1619
  "step": 2340
1620
  },
1621
  {
1622
  "epoch": 16.21,
1623
  "learning_rate": 2.910526315789474e-05,
1624
+ "loss": 0.0637,
1625
  "step": 2350
1626
  },
1627
  {
1628
  "epoch": 16.28,
1629
  "learning_rate": 2.8578947368421057e-05,
1630
+ "loss": 0.2066,
1631
  "step": 2360
1632
  },
1633
  {
1634
  "epoch": 16.34,
1635
  "learning_rate": 2.805263157894737e-05,
1636
+ "loss": 0.1234,
1637
  "step": 2370
1638
  },
1639
  {
1640
  "epoch": 16.41,
1641
  "learning_rate": 2.7526315789473683e-05,
1642
+ "loss": 0.0793,
1643
  "step": 2380
1644
  },
1645
  {
1646
  "epoch": 16.48,
1647
  "learning_rate": 2.7000000000000002e-05,
1648
+ "loss": 0.0579,
1649
  "step": 2390
1650
  },
1651
  {
1652
  "epoch": 16.55,
1653
  "learning_rate": 2.647368421052632e-05,
1654
+ "loss": 0.2339,
1655
  "step": 2400
1656
  },
1657
  {
1658
  "epoch": 16.55,
1659
+ "eval_loss": 1.3210315704345703,
1660
+ "eval_runtime": 50.3899,
1661
+ "eval_samples_per_second": 33.34,
1662
+ "eval_steps_per_second": 33.34,
1663
+ "eval_wer": 0.6796912686927159,
1664
  "step": 2400
1665
  },
1666
  {
1667
  "epoch": 16.62,
1668
  "learning_rate": 2.5947368421052632e-05,
1669
+ "loss": 0.0771,
1670
  "step": 2410
1671
  },
1672
  {
1673
  "epoch": 16.69,
1674
  "learning_rate": 2.542105263157895e-05,
1675
+ "loss": 0.0636,
1676
  "step": 2420
1677
  },
1678
  {
1679
  "epoch": 16.76,
1680
  "learning_rate": 2.4894736842105264e-05,
1681
+ "loss": 0.1349,
1682
  "step": 2430
1683
  },
1684
  {
1685
  "epoch": 16.83,
1686
  "learning_rate": 2.436842105263158e-05,
1687
+ "loss": 0.1704,
1688
  "step": 2440
1689
  },
1690
  {
1691
  "epoch": 16.9,
1692
  "learning_rate": 2.3842105263157897e-05,
1693
+ "loss": 0.0735,
1694
  "step": 2450
1695
  },
1696
  {
1697
  "epoch": 16.97,
1698
  "learning_rate": 2.331578947368421e-05,
1699
+ "loss": 0.0561,
1700
  "step": 2460
1701
  },
1702
  {
1703
  "epoch": 17.03,
1704
  "learning_rate": 2.2789473684210527e-05,
1705
+ "loss": 0.1833,
1706
  "step": 2470
1707
  },
1708
  {
1709
  "epoch": 17.1,
1710
  "learning_rate": 2.2263157894736843e-05,
1711
+ "loss": 0.0899,
1712
  "step": 2480
1713
  },
1714
  {
1715
  "epoch": 17.17,
1716
  "learning_rate": 2.173684210526316e-05,
1717
+ "loss": 0.0514,
1718
  "step": 2490
1719
  },
1720
  {
1721
  "epoch": 17.24,
1722
  "learning_rate": 2.1210526315789476e-05,
1723
+ "loss": 0.0431,
1724
  "step": 2500
1725
  },
1726
  {
1727
  "epoch": 17.24,
1728
+ "eval_loss": 1.3241108655929565,
1729
+ "eval_runtime": 50.2912,
1730
+ "eval_samples_per_second": 33.405,
1731
+ "eval_steps_per_second": 33.405,
1732
+ "eval_wer": 0.6781062642133554,
1733
  "step": 2500
1734
  },
1735
  {
1736
  "epoch": 17.31,
1737
  "learning_rate": 2.068421052631579e-05,
1738
+ "loss": 0.2135,
1739
  "step": 2510
1740
  },
1741
  {
1742
  "epoch": 17.38,
1743
  "learning_rate": 2.0157894736842105e-05,
1744
+ "loss": 0.0591,
1745
  "step": 2520
1746
  },
1747
  {
1748
  "epoch": 17.45,
1749
  "learning_rate": 1.963157894736842e-05,
1750
+ "loss": 0.053,
1751
  "step": 2530
1752
  },
1753
  {
1754
  "epoch": 17.52,
1755
  "learning_rate": 1.9105263157894738e-05,
1756
+ "loss": 0.1288,
1757
  "step": 2540
1758
  },
1759
  {
1760
  "epoch": 17.59,
1761
  "learning_rate": 1.8578947368421054e-05,
1762
+ "loss": 0.1119,
1763
  "step": 2550
1764
  },
1765
  {
1766
  "epoch": 17.66,
1767
  "learning_rate": 1.8052631578947367e-05,
1768
+ "loss": 0.0529,
1769
  "step": 2560
1770
  },
1771
  {
1772
  "epoch": 17.72,
1773
  "learning_rate": 1.7526315789473683e-05,
1774
+ "loss": 0.0454,
1775
  "step": 2570
1776
  },
1777
  {
1778
  "epoch": 17.79,
1779
  "learning_rate": 1.7000000000000003e-05,
1780
+ "loss": 0.1928,
1781
  "step": 2580
1782
  },
1783
  {
1784
  "epoch": 17.86,
1785
  "learning_rate": 1.6473684210526316e-05,
1786
+ "loss": 0.074,
1787
  "step": 2590
1788
  },
1789
  {
1790
  "epoch": 17.93,
1791
  "learning_rate": 1.5947368421052633e-05,
1792
+ "loss": 0.0508,
1793
  "step": 2600
1794
  },
1795
  {
1796
  "epoch": 17.93,
1797
+ "eval_loss": 1.3183706998825073,
1798
+ "eval_runtime": 50.4573,
1799
+ "eval_samples_per_second": 33.295,
1800
+ "eval_steps_per_second": 33.295,
1801
+ "eval_wer": 0.6683205843842602,
1802
  "step": 2600
1803
  },
1804
  {
1805
  "epoch": 18.0,
1806
  "learning_rate": 1.5421052631578946e-05,
1807
+ "loss": 0.053,
1808
  "step": 2610
1809
  },
1810
  {
1811
  "epoch": 18.07,
1812
  "learning_rate": 1.4894736842105264e-05,
1813
+ "loss": 0.1823,
1814
  "step": 2620
1815
  },
1816
  {
1817
  "epoch": 18.14,
1818
  "learning_rate": 1.4368421052631582e-05,
1819
+ "loss": 0.0447,
1820
  "step": 2630
1821
  },
1822
  {
1823
  "epoch": 18.21,
1824
  "learning_rate": 1.3842105263157895e-05,
1825
+ "loss": 0.0422,
1826
  "step": 2640
1827
  },
1828
  {
1829
  "epoch": 18.28,
1830
  "learning_rate": 1.3315789473684213e-05,
1831
+ "loss": 0.1376,
1832
  "step": 2650
1833
  },
1834
  {
1835
  "epoch": 18.34,
1836
  "learning_rate": 1.2789473684210526e-05,
1837
+ "loss": 0.0875,
1838
  "step": 2660
1839
  },
1840
  {
1841
  "epoch": 18.41,
1842
  "learning_rate": 1.2263157894736844e-05,
1843
+ "loss": 0.0485,
1844
  "step": 2670
1845
  },
1846
  {
1847
  "epoch": 18.48,
1848
  "learning_rate": 1.1736842105263158e-05,
1849
+ "loss": 0.0435,
1850
  "step": 2680
1851
  },
1852
  {
1853
  "epoch": 18.55,
1854
  "learning_rate": 1.1210526315789475e-05,
1855
+ "loss": 0.1602,
1856
  "step": 2690
1857
  },
1858
  {
1859
  "epoch": 18.62,
1860
  "learning_rate": 1.068421052631579e-05,
1861
+ "loss": 0.0616,
1862
  "step": 2700
1863
  },
1864
  {
1865
  "epoch": 18.62,
1866
+ "eval_loss": 1.372841477394104,
1867
+ "eval_runtime": 50.3934,
1868
+ "eval_samples_per_second": 33.338,
1869
+ "eval_steps_per_second": 33.338,
1870
+ "eval_wer": 0.6888567293777135,
1871
  "step": 2700
1872
  },
1873
  {
1874
  "epoch": 18.69,
1875
  "learning_rate": 1.0157894736842106e-05,
1876
+ "loss": 0.0399,
1877
  "step": 2710
1878
  },
1879
  {
1880
  "epoch": 18.76,
1881
  "learning_rate": 9.631578947368422e-06,
1882
+ "loss": 0.1019,
1883
  "step": 2720
1884
  },
1885
  {
1886
  "epoch": 18.83,
1887
  "learning_rate": 9.105263157894737e-06,
1888
+ "loss": 0.1264,
1889
  "step": 2730
1890
  },
1891
  {
1892
  "epoch": 18.9,
1893
  "learning_rate": 8.578947368421053e-06,
1894
+ "loss": 0.0499,
1895
  "step": 2740
1896
  },
1897
  {
1898
  "epoch": 18.97,
1899
  "learning_rate": 8.052631578947368e-06,
1900
+ "loss": 0.0399,
1901
  "step": 2750
1902
  },
1903
  {
1904
  "epoch": 19.03,
1905
  "learning_rate": 7.526315789473684e-06,
1906
+ "loss": 0.1536,
1907
  "step": 2760
1908
  },
1909
  {
1910
  "epoch": 19.1,
1911
  "learning_rate": 7.000000000000001e-06,
1912
+ "loss": 0.0706,
1913
  "step": 2770
1914
  },
1915
  {
1916
  "epoch": 19.17,
1917
  "learning_rate": 6.473684210526316e-06,
1918
+ "loss": 0.0439,
1919
  "step": 2780
1920
  },
1921
  {
1922
  "epoch": 19.24,
1923
  "learning_rate": 5.947368421052632e-06,
1924
+ "loss": 0.0418,
1925
  "step": 2790
1926
  },
1927
  {
1928
  "epoch": 19.31,
1929
  "learning_rate": 5.421052631578947e-06,
1930
+ "loss": 0.1608,
1931
  "step": 2800
1932
  },
1933
  {
1934
  "epoch": 19.31,
1935
+ "eval_loss": 1.3572485446929932,
1936
+ "eval_runtime": 50.6618,
1937
+ "eval_samples_per_second": 33.161,
1938
+ "eval_steps_per_second": 33.161,
1939
+ "eval_wer": 0.6770725656398594,
1940
  "step": 2800
1941
  },
1942
  {
1943
  "epoch": 19.38,
1944
  "learning_rate": 4.894736842105263e-06,
1945
+ "loss": 0.0482,
1946
  "step": 2810
1947
  },
1948
  {
1949
  "epoch": 19.45,
1950
  "learning_rate": 4.368421052631579e-06,
1951
+ "loss": 0.0388,
1952
  "step": 2820
1953
  },
1954
  {
1955
  "epoch": 19.52,
1956
  "learning_rate": 3.842105263157895e-06,
1957
+ "loss": 0.0956,
1958
  "step": 2830
1959
  },
1960
  {
1961
  "epoch": 19.59,
1962
  "learning_rate": 3.315789473684211e-06,
1963
+ "loss": 0.087,
1964
  "step": 2840
1965
  },
1966
  {
1967
  "epoch": 19.66,
1968
  "learning_rate": 2.7894736842105266e-06,
1969
+ "loss": 0.0349,
1970
  "step": 2850
1971
  },
1972
  {
1973
  "epoch": 19.72,
1974
  "learning_rate": 2.2631578947368426e-06,
1975
+ "loss": 0.0349,
1976
  "step": 2860
1977
  },
1978
  {
1979
  "epoch": 19.79,
1980
  "learning_rate": 1.7368421052631579e-06,
1981
+ "loss": 0.1533,
1982
  "step": 2870
1983
  },
1984
  {
1985
  "epoch": 19.86,
1986
  "learning_rate": 1.2105263157894738e-06,
1987
+ "loss": 0.0562,
1988
  "step": 2880
1989
  },
1990
  {
1991
  "epoch": 19.93,
1992
  "learning_rate": 6.842105263157896e-07,
1993
+ "loss": 0.0375,
1994
  "step": 2890
1995
  },
1996
  {
1997
  "epoch": 20.0,
1998
  "learning_rate": 1.5789473684210527e-07,
1999
+ "loss": 0.0378,
2000
  "step": 2900
2001
  },
2002
  {
2003
  "epoch": 20.0,
2004
+ "eval_loss": 1.3600618839263916,
2005
+ "eval_runtime": 50.6265,
2006
+ "eval_samples_per_second": 33.184,
2007
+ "eval_steps_per_second": 33.184,
2008
+ "eval_wer": 0.6776238715457239,
2009
  "step": 2900
2010
  },
2011
  {
2012
  "epoch": 20.0,
2013
  "step": 2900,
2014
+ "total_flos": 6.619462136392861e+17,
2015
+ "train_loss": 0.9487250096427984,
2016
+ "train_runtime": 4020.0745,
2017
+ "train_samples_per_second": 22.985,
2018
+ "train_steps_per_second": 0.721
2019
  }
2020
  ],
2021
  "max_steps": 2900,
2022
  "num_train_epochs": 20,
2023
+ "total_flos": 6.619462136392861e+17,
2024
  "trial_name": null,
2025
  "trial_params": null
2026
  }