warmestman commited on
Commit
5df743c
1 Parent(s): 7d002ce

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0dc78eb3736d5a634642f33b04366ae8bc82d44f79e35262912a8f79a297da9
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cdd883a5460a3d6fef3ba86dc48a5c7818f0b59f546569efce6e0c6c05644fb
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dd0b804f4d8175687f3626e929c7766181c67e2d08bc33bc20168248f4a9ba9
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c3361154434454a2c82e489b524924f18144e43f09f9bc443707201e953d6cb
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b9a68cbcd5de279cabaa6560612707ffd9ca1a64ece09207391eab36fc73a58
3
  size 3095446256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8225f7d2b350449309cf6cb95eacece99044be12a3a7aa483bdf0783e22ec40f
3
  size 3095446256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b7b6ce3ff2951e767c721be76510187471b655709a639fc1d52c719fdc5af80
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da289394bd41e2cdd34e1fa9b7ba6ae9221802035398fe0e37a4ddc28b5db9f6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34d49fca63da670319ba76874c77a485016cecfb604305dc12d82741a53f00d7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:380aebeeba070e703dcd469862cff7b4aace5b115dadb0d7385847d018530611
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,1016 +1,1763 @@
1
  {
2
- "best_metric": 38.127715747208626,
3
- "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-4000",
4
- "epoch": 23.952095808383234,
5
  "eval_steps": 1000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.15,
13
- "learning_rate": 5e-06,
14
- "loss": 1.1872,
15
  "step": 25
16
  },
17
  {
18
  "epoch": 0.3,
19
- "learning_rate": 1e-05,
20
- "loss": 0.7033,
21
  "step": 50
22
  },
23
  {
24
  "epoch": 0.45,
25
- "learning_rate": 1.5e-05,
26
- "loss": 0.4953,
27
  "step": 75
28
  },
29
  {
30
  "epoch": 0.6,
31
- "learning_rate": 2e-05,
32
- "loss": 0.3952,
33
  "step": 100
34
  },
35
  {
36
  "epoch": 0.75,
37
- "learning_rate": 2.5e-05,
38
- "loss": 0.3674,
39
  "step": 125
40
  },
41
  {
42
  "epoch": 0.9,
43
- "learning_rate": 3e-05,
44
- "loss": 0.3351,
45
  "step": 150
46
  },
47
  {
48
  "epoch": 1.05,
49
- "learning_rate": 3.5e-05,
50
- "loss": 0.2835,
51
  "step": 175
52
  },
53
  {
54
  "epoch": 1.2,
55
- "learning_rate": 4e-05,
56
- "loss": 0.2188,
57
  "step": 200
58
  },
59
  {
60
  "epoch": 1.35,
61
- "learning_rate": 4.5e-05,
62
- "loss": 0.2255,
63
  "step": 225
64
  },
65
  {
66
  "epoch": 1.5,
67
- "learning_rate": 5e-05,
68
- "loss": 0.22,
69
  "step": 250
70
  },
71
  {
72
  "epoch": 1.65,
73
- "learning_rate": 5.500000000000001e-05,
74
- "loss": 0.236,
75
  "step": 275
76
  },
77
  {
78
  "epoch": 1.8,
79
- "learning_rate": 6e-05,
80
- "loss": 0.2212,
81
  "step": 300
82
  },
83
  {
84
  "epoch": 1.95,
85
- "learning_rate": 6.500000000000001e-05,
86
- "loss": 0.2182,
87
  "step": 325
88
  },
89
  {
90
  "epoch": 2.1,
91
- "learning_rate": 7e-05,
92
- "loss": 0.1616,
93
  "step": 350
94
  },
95
  {
96
  "epoch": 2.25,
97
- "learning_rate": 7.500000000000001e-05,
98
- "loss": 0.1583,
99
  "step": 375
100
  },
101
  {
102
  "epoch": 2.4,
103
- "learning_rate": 8e-05,
104
- "loss": 0.1514,
105
  "step": 400
106
  },
107
  {
108
  "epoch": 2.54,
109
- "learning_rate": 8.5e-05,
110
- "loss": 0.1729,
111
  "step": 425
112
  },
113
  {
114
  "epoch": 2.69,
115
- "learning_rate": 9e-05,
116
- "loss": 0.1626,
117
  "step": 450
118
  },
119
  {
120
  "epoch": 2.84,
121
- "learning_rate": 9.5e-05,
122
- "loss": 0.1807,
123
  "step": 475
124
  },
125
  {
126
  "epoch": 2.99,
127
- "learning_rate": 0.0001,
128
- "loss": 0.1821,
129
  "step": 500
130
  },
131
  {
132
  "epoch": 3.14,
133
- "learning_rate": 9.973684210526316e-05,
134
- "loss": 0.1437,
135
  "step": 525
136
  },
137
  {
138
  "epoch": 3.29,
139
- "learning_rate": 9.947368421052632e-05,
140
- "loss": 0.1333,
141
  "step": 550
142
  },
143
  {
144
  "epoch": 3.44,
145
- "learning_rate": 9.921052631578947e-05,
146
- "loss": 0.1411,
147
  "step": 575
148
  },
149
  {
150
  "epoch": 3.59,
151
- "learning_rate": 9.894736842105263e-05,
152
- "loss": 0.1341,
153
  "step": 600
154
  },
155
  {
156
  "epoch": 3.74,
157
- "learning_rate": 9.868421052631579e-05,
158
- "loss": 0.1371,
159
  "step": 625
160
  },
161
  {
162
  "epoch": 3.89,
163
- "learning_rate": 9.842105263157894e-05,
164
- "loss": 0.1365,
165
  "step": 650
166
  },
167
  {
168
  "epoch": 4.04,
169
- "learning_rate": 9.815789473684211e-05,
170
- "loss": 0.118,
171
  "step": 675
172
  },
173
  {
174
  "epoch": 4.19,
175
- "learning_rate": 9.789473684210527e-05,
176
- "loss": 0.0827,
177
  "step": 700
178
  },
179
  {
180
  "epoch": 4.34,
181
- "learning_rate": 9.763157894736843e-05,
182
- "loss": 0.0879,
183
  "step": 725
184
  },
185
  {
186
  "epoch": 4.49,
187
- "learning_rate": 9.736842105263158e-05,
188
- "loss": 0.0937,
189
  "step": 750
190
  },
191
  {
192
  "epoch": 4.64,
193
- "learning_rate": 9.710526315789474e-05,
194
- "loss": 0.0887,
195
  "step": 775
196
  },
197
  {
198
  "epoch": 4.79,
199
- "learning_rate": 9.68421052631579e-05,
200
- "loss": 0.0946,
201
  "step": 800
202
  },
203
  {
204
  "epoch": 4.94,
205
- "learning_rate": 9.657894736842105e-05,
206
- "loss": 0.1032,
207
  "step": 825
208
  },
209
  {
210
  "epoch": 5.09,
211
- "learning_rate": 9.631578947368421e-05,
212
- "loss": 0.077,
213
  "step": 850
214
  },
215
  {
216
  "epoch": 5.24,
217
- "learning_rate": 9.605263157894737e-05,
218
- "loss": 0.0592,
219
  "step": 875
220
  },
221
  {
222
  "epoch": 5.39,
223
- "learning_rate": 9.578947368421052e-05,
224
- "loss": 0.0664,
225
  "step": 900
226
  },
227
  {
228
  "epoch": 5.54,
229
- "learning_rate": 9.552631578947369e-05,
230
- "loss": 0.066,
231
  "step": 925
232
  },
233
  {
234
  "epoch": 5.69,
235
- "learning_rate": 9.526315789473685e-05,
236
- "loss": 0.0641,
237
  "step": 950
238
  },
239
  {
240
  "epoch": 5.84,
241
- "learning_rate": 9.5e-05,
242
- "loss": 0.0662,
243
  "step": 975
244
  },
245
  {
246
  "epoch": 5.99,
247
- "learning_rate": 9.473684210526316e-05,
248
- "loss": 0.0691,
249
  "step": 1000
250
  },
251
  {
252
  "epoch": 5.99,
253
- "eval_loss": 0.45974645018577576,
254
- "eval_runtime": 585.199,
255
- "eval_samples_per_second": 0.716,
256
  "eval_steps_per_second": 0.091,
257
- "eval_wer": 41.504867719047354,
258
  "step": 1000
259
  },
260
  {
261
  "epoch": 6.14,
262
- "learning_rate": 9.447368421052633e-05,
263
- "loss": 0.0455,
264
  "step": 1025
265
  },
266
  {
267
  "epoch": 6.29,
268
- "learning_rate": 9.421052631578949e-05,
269
- "loss": 0.0471,
270
  "step": 1050
271
  },
272
  {
273
  "epoch": 6.44,
274
- "learning_rate": 9.394736842105264e-05,
275
- "loss": 0.0471,
276
  "step": 1075
277
  },
278
  {
279
  "epoch": 6.59,
280
- "learning_rate": 9.36842105263158e-05,
281
- "loss": 0.0465,
282
  "step": 1100
283
  },
284
  {
285
  "epoch": 6.74,
286
- "learning_rate": 9.342105263157896e-05,
287
- "loss": 0.0486,
288
  "step": 1125
289
  },
290
  {
291
  "epoch": 6.89,
292
- "learning_rate": 9.315789473684211e-05,
293
- "loss": 0.044,
294
  "step": 1150
295
  },
296
  {
297
  "epoch": 7.04,
298
- "learning_rate": 9.289473684210527e-05,
299
- "loss": 0.0421,
300
  "step": 1175
301
  },
302
  {
303
  "epoch": 7.19,
304
- "learning_rate": 9.263157894736843e-05,
305
- "loss": 0.0333,
306
  "step": 1200
307
  },
308
  {
309
  "epoch": 7.34,
310
- "learning_rate": 9.236842105263158e-05,
311
- "loss": 0.0323,
312
  "step": 1225
313
  },
314
  {
315
  "epoch": 7.49,
316
- "learning_rate": 9.210526315789474e-05,
317
- "loss": 0.0289,
318
  "step": 1250
319
  },
320
  {
321
  "epoch": 7.63,
322
- "learning_rate": 9.18421052631579e-05,
323
- "loss": 0.0339,
324
  "step": 1275
325
  },
326
  {
327
  "epoch": 7.78,
328
- "learning_rate": 9.157894736842105e-05,
329
- "loss": 0.0332,
330
  "step": 1300
331
  },
332
  {
333
  "epoch": 7.93,
334
- "learning_rate": 9.131578947368421e-05,
335
- "loss": 0.0701,
336
  "step": 1325
337
  },
338
  {
339
  "epoch": 8.08,
340
- "learning_rate": 9.105263157894738e-05,
341
- "loss": 0.0601,
342
  "step": 1350
343
  },
344
  {
345
  "epoch": 8.23,
346
- "learning_rate": 9.078947368421054e-05,
347
- "loss": 0.0285,
348
  "step": 1375
349
  },
350
  {
351
  "epoch": 8.38,
352
- "learning_rate": 9.052631578947369e-05,
353
- "loss": 0.0258,
354
  "step": 1400
355
  },
356
  {
357
  "epoch": 8.53,
358
- "learning_rate": 9.026315789473685e-05,
359
- "loss": 0.0262,
360
  "step": 1425
361
  },
362
  {
363
  "epoch": 8.68,
364
- "learning_rate": 9e-05,
365
- "loss": 0.0256,
366
  "step": 1450
367
  },
368
  {
369
  "epoch": 8.83,
370
- "learning_rate": 8.973684210526316e-05,
371
- "loss": 0.0286,
372
  "step": 1475
373
  },
374
  {
375
  "epoch": 8.98,
376
- "learning_rate": 8.947368421052632e-05,
377
- "loss": 0.0265,
378
  "step": 1500
379
  },
380
  {
381
  "epoch": 9.13,
382
- "learning_rate": 8.921052631578948e-05,
383
- "loss": 0.0208,
384
  "step": 1525
385
  },
386
  {
387
  "epoch": 9.28,
388
- "learning_rate": 8.894736842105263e-05,
389
- "loss": 0.0237,
390
  "step": 1550
391
  },
392
  {
393
  "epoch": 9.43,
394
- "learning_rate": 8.868421052631579e-05,
395
- "loss": 0.0235,
396
  "step": 1575
397
  },
398
  {
399
  "epoch": 9.58,
400
- "learning_rate": 8.842105263157894e-05,
401
- "loss": 0.0224,
402
  "step": 1600
403
  },
404
  {
405
  "epoch": 9.73,
406
- "learning_rate": 8.81578947368421e-05,
407
- "loss": 0.024,
408
  "step": 1625
409
  },
410
  {
411
  "epoch": 9.88,
412
- "learning_rate": 8.789473684210526e-05,
413
- "loss": 0.0246,
414
  "step": 1650
415
  },
416
  {
417
  "epoch": 10.03,
418
- "learning_rate": 8.763157894736841e-05,
419
- "loss": 0.0248,
420
  "step": 1675
421
  },
422
  {
423
  "epoch": 10.18,
424
- "learning_rate": 8.736842105263158e-05,
425
- "loss": 0.0173,
426
  "step": 1700
427
  },
428
  {
429
  "epoch": 10.33,
430
- "learning_rate": 8.710526315789474e-05,
431
- "loss": 0.0174,
432
  "step": 1725
433
  },
434
  {
435
  "epoch": 10.48,
436
- "learning_rate": 8.68421052631579e-05,
437
- "loss": 0.0215,
438
  "step": 1750
439
  },
440
  {
441
  "epoch": 10.63,
442
- "learning_rate": 8.657894736842105e-05,
443
- "loss": 0.0236,
444
  "step": 1775
445
  },
446
  {
447
  "epoch": 10.78,
448
- "learning_rate": 8.631578947368421e-05,
449
- "loss": 0.0203,
450
  "step": 1800
451
  },
452
  {
453
  "epoch": 10.93,
454
- "learning_rate": 8.605263157894738e-05,
455
- "loss": 0.0192,
456
  "step": 1825
457
  },
458
  {
459
  "epoch": 11.08,
460
- "learning_rate": 8.578947368421054e-05,
461
- "loss": 0.0172,
462
  "step": 1850
463
  },
464
  {
465
  "epoch": 11.23,
466
- "learning_rate": 8.552631578947369e-05,
467
- "loss": 0.0197,
468
  "step": 1875
469
  },
470
  {
471
  "epoch": 11.38,
472
- "learning_rate": 8.526315789473685e-05,
473
- "loss": 0.0188,
474
  "step": 1900
475
  },
476
  {
477
  "epoch": 11.53,
478
- "learning_rate": 8.5e-05,
479
- "loss": 0.0197,
480
  "step": 1925
481
  },
482
  {
483
  "epoch": 11.68,
484
- "learning_rate": 8.473684210526316e-05,
485
- "loss": 0.0197,
486
  "step": 1950
487
  },
488
  {
489
  "epoch": 11.83,
490
- "learning_rate": 8.447368421052632e-05,
491
- "loss": 0.0212,
492
  "step": 1975
493
  },
494
  {
495
  "epoch": 11.98,
496
- "learning_rate": 8.421052631578948e-05,
497
- "loss": 0.0183,
498
  "step": 2000
499
  },
500
  {
501
  "epoch": 11.98,
502
- "eval_loss": 0.49957072734832764,
503
- "eval_runtime": 578.898,
504
- "eval_samples_per_second": 0.724,
505
- "eval_steps_per_second": 0.092,
506
- "eval_wer": 38.2982234200539,
507
  "step": 2000
508
  },
509
  {
510
  "epoch": 12.13,
511
- "learning_rate": 8.394736842105263e-05,
512
- "loss": 0.0152,
513
  "step": 2025
514
  },
515
  {
516
  "epoch": 12.28,
517
- "learning_rate": 8.36842105263158e-05,
518
- "loss": 0.0127,
519
  "step": 2050
520
  },
521
  {
522
  "epoch": 12.43,
523
- "learning_rate": 8.342105263157896e-05,
524
- "loss": 0.0162,
525
  "step": 2075
526
  },
527
  {
528
  "epoch": 12.57,
529
- "learning_rate": 8.315789473684212e-05,
530
- "loss": 0.016,
531
  "step": 2100
532
  },
533
  {
534
  "epoch": 12.72,
535
- "learning_rate": 8.289473684210527e-05,
536
- "loss": 0.0146,
537
  "step": 2125
538
  },
539
  {
540
  "epoch": 12.87,
541
- "learning_rate": 8.263157894736843e-05,
542
- "loss": 0.0202,
543
  "step": 2150
544
  },
545
  {
546
  "epoch": 13.02,
547
- "learning_rate": 8.236842105263158e-05,
548
- "loss": 0.0208,
549
  "step": 2175
550
  },
551
  {
552
  "epoch": 13.17,
553
- "learning_rate": 8.210526315789474e-05,
554
- "loss": 0.0218,
555
  "step": 2200
556
  },
557
  {
558
  "epoch": 13.32,
559
- "learning_rate": 8.18421052631579e-05,
560
- "loss": 0.0191,
561
  "step": 2225
562
  },
563
  {
564
  "epoch": 13.47,
565
- "learning_rate": 8.157894736842105e-05,
566
- "loss": 0.0182,
567
  "step": 2250
568
  },
569
  {
570
  "epoch": 13.62,
571
- "learning_rate": 8.131578947368421e-05,
572
- "loss": 0.02,
573
  "step": 2275
574
  },
575
  {
576
  "epoch": 13.77,
577
- "learning_rate": 8.105263157894737e-05,
578
- "loss": 0.0227,
579
  "step": 2300
580
  },
581
  {
582
  "epoch": 13.92,
583
- "learning_rate": 8.078947368421052e-05,
584
- "loss": 0.0215,
585
  "step": 2325
586
  },
587
  {
588
  "epoch": 14.07,
589
- "learning_rate": 8.052631578947368e-05,
590
- "loss": 0.0207,
591
  "step": 2350
592
  },
593
  {
594
  "epoch": 14.22,
595
- "learning_rate": 8.026315789473685e-05,
596
- "loss": 0.0174,
597
  "step": 2375
598
  },
599
  {
600
  "epoch": 14.37,
601
- "learning_rate": 8e-05,
602
- "loss": 0.0163,
603
  "step": 2400
604
  },
605
  {
606
  "epoch": 14.52,
607
- "learning_rate": 7.973684210526316e-05,
608
- "loss": 0.0161,
609
  "step": 2425
610
  },
611
  {
612
  "epoch": 14.67,
613
- "learning_rate": 7.947368421052632e-05,
614
- "loss": 0.014,
615
  "step": 2450
616
  },
617
  {
618
  "epoch": 14.82,
619
- "learning_rate": 7.921052631578948e-05,
620
- "loss": 0.0157,
621
  "step": 2475
622
  },
623
  {
624
  "epoch": 14.97,
625
- "learning_rate": 7.894736842105263e-05,
626
- "loss": 0.0171,
627
  "step": 2500
628
  },
629
  {
630
  "epoch": 15.12,
631
- "learning_rate": 7.868421052631579e-05,
632
- "loss": 0.0111,
633
  "step": 2525
634
  },
635
  {
636
  "epoch": 15.27,
637
- "learning_rate": 7.842105263157895e-05,
638
- "loss": 0.0119,
639
  "step": 2550
640
  },
641
  {
642
  "epoch": 15.42,
643
- "learning_rate": 7.81578947368421e-05,
644
- "loss": 0.0136,
645
  "step": 2575
646
  },
647
  {
648
  "epoch": 15.57,
649
- "learning_rate": 7.789473684210526e-05,
650
- "loss": 0.0148,
651
  "step": 2600
652
  },
653
  {
654
  "epoch": 15.72,
655
- "learning_rate": 7.763157894736843e-05,
656
- "loss": 0.0123,
657
  "step": 2625
658
  },
659
  {
660
  "epoch": 15.87,
661
- "learning_rate": 7.736842105263159e-05,
662
- "loss": 0.0108,
663
  "step": 2650
664
  },
665
  {
666
  "epoch": 16.02,
667
- "learning_rate": 7.710526315789474e-05,
668
- "loss": 0.013,
669
  "step": 2675
670
  },
671
  {
672
  "epoch": 16.17,
673
- "learning_rate": 7.68421052631579e-05,
674
- "loss": 0.0106,
675
  "step": 2700
676
  },
677
  {
678
  "epoch": 16.32,
679
- "learning_rate": 7.657894736842105e-05,
680
- "loss": 0.0123,
681
  "step": 2725
682
  },
683
  {
684
  "epoch": 16.47,
685
- "learning_rate": 7.631578947368422e-05,
686
- "loss": 0.0152,
687
  "step": 2750
688
  },
689
  {
690
  "epoch": 16.62,
691
- "learning_rate": 7.605263157894738e-05,
692
- "loss": 0.0126,
693
  "step": 2775
694
  },
695
  {
696
  "epoch": 16.77,
697
- "learning_rate": 7.578947368421054e-05,
698
- "loss": 0.0137,
699
  "step": 2800
700
  },
701
  {
702
  "epoch": 16.92,
703
- "learning_rate": 7.55263157894737e-05,
704
- "loss": 0.0163,
705
  "step": 2825
706
  },
707
  {
708
  "epoch": 17.07,
709
- "learning_rate": 7.526315789473685e-05,
710
- "loss": 0.0132,
711
  "step": 2850
712
  },
713
  {
714
  "epoch": 17.22,
715
- "learning_rate": 7.500000000000001e-05,
716
- "loss": 0.011,
717
  "step": 2875
718
  },
719
  {
720
  "epoch": 17.37,
721
- "learning_rate": 7.473684210526316e-05,
722
- "loss": 0.0094,
723
  "step": 2900
724
  },
725
  {
726
  "epoch": 17.51,
727
- "learning_rate": 7.447368421052632e-05,
728
- "loss": 0.0094,
729
  "step": 2925
730
  },
731
  {
732
  "epoch": 17.66,
733
- "learning_rate": 7.421052631578948e-05,
734
- "loss": 0.0107,
735
  "step": 2950
736
  },
737
  {
738
  "epoch": 17.81,
739
- "learning_rate": 7.394736842105263e-05,
740
- "loss": 0.011,
741
  "step": 2975
742
  },
743
  {
744
  "epoch": 17.96,
745
- "learning_rate": 7.368421052631579e-05,
746
- "loss": 0.012,
747
  "step": 3000
748
  },
749
  {
750
  "epoch": 17.96,
751
- "eval_loss": 0.5328223705291748,
752
- "eval_runtime": 578.3593,
753
- "eval_samples_per_second": 0.724,
754
- "eval_steps_per_second": 0.092,
755
- "eval_wer": 38.54023431054397,
756
  "step": 3000
757
  },
758
  {
759
  "epoch": 18.11,
760
- "learning_rate": 7.342105263157895e-05,
761
- "loss": 0.0094,
762
  "step": 3025
763
  },
764
  {
765
  "epoch": 18.26,
766
- "learning_rate": 7.315789473684212e-05,
767
- "loss": 0.0085,
768
  "step": 3050
769
  },
770
  {
771
  "epoch": 18.41,
772
- "learning_rate": 7.289473684210527e-05,
773
- "loss": 0.0089,
774
  "step": 3075
775
  },
776
  {
777
  "epoch": 18.56,
778
- "learning_rate": 7.263157894736843e-05,
779
- "loss": 0.0101,
780
  "step": 3100
781
  },
782
  {
783
  "epoch": 18.71,
784
- "learning_rate": 7.236842105263159e-05,
785
- "loss": 0.0091,
786
  "step": 3125
787
  },
788
  {
789
  "epoch": 18.86,
790
- "learning_rate": 7.210526315789474e-05,
791
- "loss": 0.0106,
792
  "step": 3150
793
  },
794
  {
795
  "epoch": 19.01,
796
- "learning_rate": 7.18421052631579e-05,
797
- "loss": 0.01,
798
  "step": 3175
799
  },
800
  {
801
  "epoch": 19.16,
802
- "learning_rate": 7.157894736842105e-05,
803
- "loss": 0.0067,
804
  "step": 3200
805
  },
806
  {
807
  "epoch": 19.31,
808
- "learning_rate": 7.131578947368421e-05,
809
- "loss": 0.0077,
810
  "step": 3225
811
  },
812
  {
813
  "epoch": 19.46,
814
- "learning_rate": 7.105263157894737e-05,
815
- "loss": 0.0072,
816
  "step": 3250
817
  },
818
  {
819
  "epoch": 19.61,
820
- "learning_rate": 7.078947368421052e-05,
821
- "loss": 0.0066,
822
  "step": 3275
823
  },
824
  {
825
  "epoch": 19.76,
826
- "learning_rate": 7.052631578947368e-05,
827
- "loss": 0.0073,
828
  "step": 3300
829
  },
830
  {
831
  "epoch": 19.91,
832
- "learning_rate": 7.026315789473684e-05,
833
- "loss": 0.0079,
834
  "step": 3325
835
  },
836
  {
837
  "epoch": 20.06,
838
- "learning_rate": 7e-05,
839
- "loss": 0.0101,
840
  "step": 3350
841
  },
842
  {
843
  "epoch": 20.21,
844
- "learning_rate": 6.973684210526315e-05,
845
  "loss": 0.0075,
846
  "step": 3375
847
  },
848
  {
849
  "epoch": 20.36,
850
- "learning_rate": 6.947368421052632e-05,
851
- "loss": 0.0096,
852
  "step": 3400
853
  },
854
  {
855
  "epoch": 20.51,
856
- "learning_rate": 6.921052631578948e-05,
857
- "loss": 0.0095,
858
  "step": 3425
859
  },
860
  {
861
  "epoch": 20.66,
862
- "learning_rate": 6.894736842105263e-05,
863
- "loss": 0.0105,
864
  "step": 3450
865
  },
866
  {
867
  "epoch": 20.81,
868
- "learning_rate": 6.868421052631579e-05,
869
- "loss": 0.0112,
870
  "step": 3475
871
  },
872
  {
873
  "epoch": 20.96,
874
- "learning_rate": 6.842105263157895e-05,
875
- "loss": 0.0116,
876
  "step": 3500
877
  },
878
  {
879
  "epoch": 21.11,
880
- "learning_rate": 6.81578947368421e-05,
881
- "loss": 0.0088,
882
  "step": 3525
883
  },
884
  {
885
  "epoch": 21.26,
886
- "learning_rate": 6.789473684210527e-05,
887
- "loss": 0.01,
888
  "step": 3550
889
  },
890
  {
891
  "epoch": 21.41,
892
- "learning_rate": 6.763157894736843e-05,
893
- "loss": 0.01,
894
  "step": 3575
895
  },
896
  {
897
  "epoch": 21.56,
898
- "learning_rate": 6.736842105263159e-05,
899
- "loss": 0.0083,
900
  "step": 3600
901
  },
902
  {
903
  "epoch": 21.71,
904
- "learning_rate": 6.710526315789474e-05,
905
- "loss": 0.0088,
906
  "step": 3625
907
  },
908
  {
909
  "epoch": 21.86,
910
- "learning_rate": 6.68421052631579e-05,
911
- "loss": 0.0086,
912
  "step": 3650
913
  },
914
  {
915
  "epoch": 22.01,
916
- "learning_rate": 6.657894736842106e-05,
917
- "loss": 0.0078,
918
  "step": 3675
919
  },
920
  {
921
  "epoch": 22.16,
922
- "learning_rate": 6.631578947368421e-05,
923
- "loss": 0.0061,
924
  "step": 3700
925
  },
926
  {
927
  "epoch": 22.31,
928
- "learning_rate": 6.605263157894738e-05,
929
- "loss": 0.0073,
930
  "step": 3725
931
  },
932
  {
933
  "epoch": 22.46,
934
- "learning_rate": 6.578947368421054e-05,
935
- "loss": 0.0071,
936
  "step": 3750
937
  },
938
  {
939
  "epoch": 22.6,
940
- "learning_rate": 6.55263157894737e-05,
941
- "loss": 0.0085,
942
  "step": 3775
943
  },
944
  {
945
  "epoch": 22.75,
946
- "learning_rate": 6.526315789473685e-05,
947
- "loss": 0.0082,
948
  "step": 3800
949
  },
950
  {
951
  "epoch": 22.9,
952
- "learning_rate": 6.500000000000001e-05,
953
- "loss": 0.0098,
954
  "step": 3825
955
  },
956
  {
957
  "epoch": 23.05,
958
- "learning_rate": 6.473684210526316e-05,
959
- "loss": 0.0087,
960
  "step": 3850
961
  },
962
  {
963
  "epoch": 23.2,
964
- "learning_rate": 6.447368421052632e-05,
965
- "loss": 0.0087,
966
  "step": 3875
967
  },
968
  {
969
  "epoch": 23.35,
970
- "learning_rate": 6.421052631578948e-05,
971
- "loss": 0.0076,
972
  "step": 3900
973
  },
974
  {
975
  "epoch": 23.5,
976
- "learning_rate": 6.394736842105263e-05,
977
- "loss": 0.0077,
978
  "step": 3925
979
  },
980
  {
981
  "epoch": 23.65,
982
- "learning_rate": 6.368421052631579e-05,
983
- "loss": 0.0081,
984
  "step": 3950
985
  },
986
  {
987
  "epoch": 23.8,
988
- "learning_rate": 6.342105263157895e-05,
989
- "loss": 0.0083,
990
  "step": 3975
991
  },
992
  {
993
  "epoch": 23.95,
994
- "learning_rate": 6.31578947368421e-05,
995
- "loss": 0.0091,
996
  "step": 4000
997
  },
998
  {
999
  "epoch": 23.95,
1000
- "eval_loss": 0.5618667602539062,
1001
- "eval_runtime": 582.8261,
1002
- "eval_samples_per_second": 0.719,
1003
- "eval_steps_per_second": 0.091,
1004
- "eval_wer": 38.127715747208626,
1005
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
  }
1007
  ],
1008
  "logging_steps": 25,
1009
- "max_steps": 10000,
1010
  "num_input_tokens_seen": 0,
1011
- "num_train_epochs": 60,
1012
  "save_steps": 1000,
1013
- "total_flos": 2.1720546423668736e+20,
1014
  "train_batch_size": 16,
1015
  "trial_name": null,
1016
  "trial_params": null
 
1
  {
2
+ "best_metric": 41.91738628238271,
3
+ "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
4
+ "epoch": 41.91616766467066,
5
  "eval_steps": 1000,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.15,
13
+ "learning_rate": 5e-08,
14
+ "loss": 1.4341,
15
  "step": 25
16
  },
17
  {
18
  "epoch": 0.3,
19
+ "learning_rate": 1e-07,
20
+ "loss": 1.3997,
21
  "step": 50
22
  },
23
  {
24
  "epoch": 0.45,
25
+ "learning_rate": 1.5e-07,
26
+ "loss": 1.33,
27
  "step": 75
28
  },
29
  {
30
  "epoch": 0.6,
31
+ "learning_rate": 2e-07,
32
+ "loss": 1.2112,
33
  "step": 100
34
  },
35
  {
36
  "epoch": 0.75,
37
+ "learning_rate": 2.5e-07,
38
+ "loss": 1.1128,
39
  "step": 125
40
  },
41
  {
42
  "epoch": 0.9,
43
+ "learning_rate": 3e-07,
44
+ "loss": 1.0554,
45
  "step": 150
46
  },
47
  {
48
  "epoch": 1.05,
49
+ "learning_rate": 3.5e-07,
50
+ "loss": 0.9914,
51
  "step": 175
52
  },
53
  {
54
  "epoch": 1.2,
55
+ "learning_rate": 4e-07,
56
+ "loss": 0.9151,
57
  "step": 200
58
  },
59
  {
60
  "epoch": 1.35,
61
+ "learning_rate": 4.5e-07,
62
+ "loss": 0.8636,
63
  "step": 225
64
  },
65
  {
66
  "epoch": 1.5,
67
+ "learning_rate": 5e-07,
68
+ "loss": 0.7988,
69
  "step": 250
70
  },
71
  {
72
  "epoch": 1.65,
73
+ "learning_rate": 5.5e-07,
74
+ "loss": 0.755,
75
  "step": 275
76
  },
77
  {
78
  "epoch": 1.8,
79
+ "learning_rate": 6e-07,
80
+ "loss": 0.7075,
81
  "step": 300
82
  },
83
  {
84
  "epoch": 1.95,
85
+ "learning_rate": 6.5e-07,
86
+ "loss": 0.6772,
87
  "step": 325
88
  },
89
  {
90
  "epoch": 2.1,
91
+ "learning_rate": 7e-07,
92
+ "loss": 0.6182,
93
  "step": 350
94
  },
95
  {
96
  "epoch": 2.25,
97
+ "learning_rate": 7.5e-07,
98
+ "loss": 0.5912,
99
  "step": 375
100
  },
101
  {
102
  "epoch": 2.4,
103
+ "learning_rate": 8e-07,
104
+ "loss": 0.5273,
105
  "step": 400
106
  },
107
  {
108
  "epoch": 2.54,
109
+ "learning_rate": 8.499999999999999e-07,
110
+ "loss": 0.5237,
111
  "step": 425
112
  },
113
  {
114
  "epoch": 2.69,
115
+ "learning_rate": 9e-07,
116
+ "loss": 0.4872,
117
  "step": 450
118
  },
119
  {
120
  "epoch": 2.84,
121
+ "learning_rate": 9.499999999999999e-07,
122
+ "loss": 0.4826,
123
  "step": 475
124
  },
125
  {
126
  "epoch": 2.99,
127
+ "learning_rate": 1e-06,
128
+ "loss": 0.4555,
129
  "step": 500
130
  },
131
  {
132
  "epoch": 3.14,
133
+ "learning_rate": 9.987179487179487e-07,
134
+ "loss": 0.4109,
135
  "step": 525
136
  },
137
  {
138
  "epoch": 3.29,
139
+ "learning_rate": 9.974358974358974e-07,
140
+ "loss": 0.3968,
141
  "step": 550
142
  },
143
  {
144
  "epoch": 3.44,
145
+ "learning_rate": 9.961538461538461e-07,
146
+ "loss": 0.3976,
147
  "step": 575
148
  },
149
  {
150
  "epoch": 3.59,
151
+ "learning_rate": 9.948717948717949e-07,
152
+ "loss": 0.3741,
153
  "step": 600
154
  },
155
  {
156
  "epoch": 3.74,
157
+ "learning_rate": 9.935897435897436e-07,
158
+ "loss": 0.3633,
159
  "step": 625
160
  },
161
  {
162
  "epoch": 3.89,
163
+ "learning_rate": 9.923076923076923e-07,
164
+ "loss": 0.3697,
165
  "step": 650
166
  },
167
  {
168
  "epoch": 4.04,
169
+ "learning_rate": 9.91025641025641e-07,
170
+ "loss": 0.3382,
171
  "step": 675
172
  },
173
  {
174
  "epoch": 4.19,
175
+ "learning_rate": 9.897435897435898e-07,
176
+ "loss": 0.3004,
177
  "step": 700
178
  },
179
  {
180
  "epoch": 4.34,
181
+ "learning_rate": 9.884615384615385e-07,
182
+ "loss": 0.3067,
183
  "step": 725
184
  },
185
  {
186
  "epoch": 4.49,
187
+ "learning_rate": 9.871794871794872e-07,
188
+ "loss": 0.3106,
189
  "step": 750
190
  },
191
  {
192
  "epoch": 4.64,
193
+ "learning_rate": 9.85897435897436e-07,
194
+ "loss": 0.286,
195
  "step": 775
196
  },
197
  {
198
  "epoch": 4.79,
199
+ "learning_rate": 9.846153846153847e-07,
200
+ "loss": 0.2978,
201
  "step": 800
202
  },
203
  {
204
  "epoch": 4.94,
205
+ "learning_rate": 9.833333333333332e-07,
206
+ "loss": 0.2964,
207
  "step": 825
208
  },
209
  {
210
  "epoch": 5.09,
211
+ "learning_rate": 9.820512820512819e-07,
212
+ "loss": 0.2618,
213
  "step": 850
214
  },
215
  {
216
  "epoch": 5.24,
217
+ "learning_rate": 9.807692307692306e-07,
218
+ "loss": 0.2552,
219
  "step": 875
220
  },
221
  {
222
  "epoch": 5.39,
223
+ "learning_rate": 9.794871794871793e-07,
224
+ "loss": 0.2487,
225
  "step": 900
226
  },
227
  {
228
  "epoch": 5.54,
229
+ "learning_rate": 9.78205128205128e-07,
230
+ "loss": 0.2412,
231
  "step": 925
232
  },
233
  {
234
  "epoch": 5.69,
235
+ "learning_rate": 9.769230769230768e-07,
236
+ "loss": 0.2331,
237
  "step": 950
238
  },
239
  {
240
  "epoch": 5.84,
241
+ "learning_rate": 9.756410256410255e-07,
242
+ "loss": 0.2418,
243
  "step": 975
244
  },
245
  {
246
  "epoch": 5.99,
247
+ "learning_rate": 9.743589743589742e-07,
248
+ "loss": 0.24,
249
  "step": 1000
250
  },
251
  {
252
  "epoch": 5.99,
253
+ "eval_loss": 0.4358440637588501,
254
+ "eval_runtime": 584.1166,
255
+ "eval_samples_per_second": 0.717,
256
  "eval_steps_per_second": 0.091,
257
+ "eval_wer": 47.676145426544196,
258
  "step": 1000
259
  },
260
  {
261
  "epoch": 6.14,
262
+ "learning_rate": 9.73076923076923e-07,
263
+ "loss": 0.2033,
264
  "step": 1025
265
  },
266
  {
267
  "epoch": 6.29,
268
+ "learning_rate": 9.717948717948717e-07,
269
+ "loss": 0.2085,
270
  "step": 1050
271
  },
272
  {
273
  "epoch": 6.44,
274
+ "learning_rate": 9.705128205128204e-07,
275
+ "loss": 0.2081,
276
  "step": 1075
277
  },
278
  {
279
  "epoch": 6.59,
280
+ "learning_rate": 9.692307692307691e-07,
281
+ "loss": 0.1931,
282
  "step": 1100
283
  },
284
  {
285
  "epoch": 6.74,
286
+ "learning_rate": 9.679487179487179e-07,
287
+ "loss": 0.2043,
288
  "step": 1125
289
  },
290
  {
291
  "epoch": 6.89,
292
+ "learning_rate": 9.666666666666666e-07,
293
+ "loss": 0.1946,
294
  "step": 1150
295
  },
296
  {
297
  "epoch": 7.04,
298
+ "learning_rate": 9.653846153846153e-07,
299
+ "loss": 0.1871,
300
  "step": 1175
301
  },
302
  {
303
  "epoch": 7.19,
304
+ "learning_rate": 9.64102564102564e-07,
305
+ "loss": 0.1628,
306
  "step": 1200
307
  },
308
  {
309
  "epoch": 7.34,
310
+ "learning_rate": 9.628205128205128e-07,
311
+ "loss": 0.1626,
312
  "step": 1225
313
  },
314
  {
315
  "epoch": 7.49,
316
+ "learning_rate": 9.615384615384615e-07,
317
+ "loss": 0.1659,
318
  "step": 1250
319
  },
320
  {
321
  "epoch": 7.63,
322
+ "learning_rate": 9.602564102564102e-07,
323
+ "loss": 0.1692,
324
  "step": 1275
325
  },
326
  {
327
  "epoch": 7.78,
328
+ "learning_rate": 9.58974358974359e-07,
329
+ "loss": 0.1708,
330
  "step": 1300
331
  },
332
  {
333
  "epoch": 7.93,
334
+ "learning_rate": 9.576923076923077e-07,
335
+ "loss": 0.1643,
336
  "step": 1325
337
  },
338
  {
339
  "epoch": 8.08,
340
+ "learning_rate": 9.564102564102564e-07,
341
+ "loss": 0.1476,
342
  "step": 1350
343
  },
344
  {
345
  "epoch": 8.23,
346
+ "learning_rate": 9.551282051282051e-07,
347
+ "loss": 0.1328,
348
  "step": 1375
349
  },
350
  {
351
  "epoch": 8.38,
352
+ "learning_rate": 9.538461538461538e-07,
353
+ "loss": 0.1337,
354
  "step": 1400
355
  },
356
  {
357
  "epoch": 8.53,
358
+ "learning_rate": 9.525641025641025e-07,
359
+ "loss": 0.1484,
360
  "step": 1425
361
  },
362
  {
363
  "epoch": 8.68,
364
+ "learning_rate": 9.512820512820512e-07,
365
+ "loss": 0.1384,
366
  "step": 1450
367
  },
368
  {
369
  "epoch": 8.83,
370
+ "learning_rate": 9.499999999999999e-07,
371
+ "loss": 0.1323,
372
  "step": 1475
373
  },
374
  {
375
  "epoch": 8.98,
376
+ "learning_rate": 9.487179487179486e-07,
377
+ "loss": 0.1402,
378
  "step": 1500
379
  },
380
  {
381
  "epoch": 9.13,
382
+ "learning_rate": 9.474358974358974e-07,
383
+ "loss": 0.1109,
384
  "step": 1525
385
  },
386
  {
387
  "epoch": 9.28,
388
+ "learning_rate": 9.461538461538461e-07,
389
+ "loss": 0.1134,
390
  "step": 1550
391
  },
392
  {
393
  "epoch": 9.43,
394
+ "learning_rate": 9.448717948717948e-07,
395
+ "loss": 0.1103,
396
  "step": 1575
397
  },
398
  {
399
  "epoch": 9.58,
400
+ "learning_rate": 9.435897435897435e-07,
401
+ "loss": 0.1112,
402
  "step": 1600
403
  },
404
  {
405
  "epoch": 9.73,
406
+ "learning_rate": 9.423076923076923e-07,
407
+ "loss": 0.1114,
408
  "step": 1625
409
  },
410
  {
411
  "epoch": 9.88,
412
+ "learning_rate": 9.41025641025641e-07,
413
+ "loss": 0.1199,
414
  "step": 1650
415
  },
416
  {
417
  "epoch": 10.03,
418
+ "learning_rate": 9.397435897435897e-07,
419
+ "loss": 0.1094,
420
  "step": 1675
421
  },
422
  {
423
  "epoch": 10.18,
424
+ "learning_rate": 9.384615384615384e-07,
425
+ "loss": 0.0867,
426
  "step": 1700
427
  },
428
  {
429
  "epoch": 10.33,
430
+ "learning_rate": 9.371794871794872e-07,
431
+ "loss": 0.0892,
432
  "step": 1725
433
  },
434
  {
435
  "epoch": 10.48,
436
+ "learning_rate": 9.358974358974359e-07,
437
+ "loss": 0.0911,
438
  "step": 1750
439
  },
440
  {
441
  "epoch": 10.63,
442
+ "learning_rate": 9.346153846153846e-07,
443
+ "loss": 0.0944,
444
  "step": 1775
445
  },
446
  {
447
  "epoch": 10.78,
448
+ "learning_rate": 9.333333333333333e-07,
449
+ "loss": 0.091,
450
  "step": 1800
451
  },
452
  {
453
  "epoch": 10.93,
454
+ "learning_rate": 9.32051282051282e-07,
455
+ "loss": 0.0931,
456
  "step": 1825
457
  },
458
  {
459
  "epoch": 11.08,
460
+ "learning_rate": 9.307692307692308e-07,
461
+ "loss": 0.0798,
462
  "step": 1850
463
  },
464
  {
465
  "epoch": 11.23,
466
+ "learning_rate": 9.294871794871795e-07,
467
+ "loss": 0.071,
468
  "step": 1875
469
  },
470
  {
471
  "epoch": 11.38,
472
+ "learning_rate": 9.282051282051282e-07,
473
+ "loss": 0.0722,
474
  "step": 1900
475
  },
476
  {
477
  "epoch": 11.53,
478
+ "learning_rate": 9.26923076923077e-07,
479
+ "loss": 0.0762,
480
  "step": 1925
481
  },
482
  {
483
  "epoch": 11.68,
484
+ "learning_rate": 9.256410256410257e-07,
485
+ "loss": 0.0734,
486
  "step": 1950
487
  },
488
  {
489
  "epoch": 11.83,
490
+ "learning_rate": 9.243589743589744e-07,
491
+ "loss": 0.0738,
492
  "step": 1975
493
  },
494
  {
495
  "epoch": 11.98,
496
+ "learning_rate": 9.230769230769231e-07,
497
+ "loss": 0.077,
498
  "step": 2000
499
  },
500
  {
501
  "epoch": 11.98,
502
+ "eval_loss": 0.4446885585784912,
503
+ "eval_runtime": 586.2687,
504
+ "eval_samples_per_second": 0.715,
505
+ "eval_steps_per_second": 0.09,
506
+ "eval_wer": 43.46845608052362,
507
  "step": 2000
508
  },
509
  {
510
  "epoch": 12.13,
511
+ "learning_rate": 9.217948717948717e-07,
512
+ "loss": 0.0593,
513
  "step": 2025
514
  },
515
  {
516
  "epoch": 12.28,
517
+ "learning_rate": 9.205641025641026e-07,
518
+ "loss": 0.0547,
519
  "step": 2050
520
  },
521
  {
522
  "epoch": 12.43,
523
+ "learning_rate": 9.192820512820513e-07,
524
+ "loss": 0.0598,
525
  "step": 2075
526
  },
527
  {
528
  "epoch": 12.57,
529
+ "learning_rate": 9.18e-07,
530
+ "loss": 0.0564,
531
  "step": 2100
532
  },
533
  {
534
  "epoch": 12.72,
535
+ "learning_rate": 9.167179487179488e-07,
536
+ "loss": 0.0574,
537
  "step": 2125
538
  },
539
  {
540
  "epoch": 12.87,
541
+ "learning_rate": 9.154358974358975e-07,
542
+ "loss": 0.0614,
543
  "step": 2150
544
  },
545
  {
546
  "epoch": 13.02,
547
+ "learning_rate": 9.141538461538462e-07,
548
+ "loss": 0.0575,
549
  "step": 2175
550
  },
551
  {
552
  "epoch": 13.17,
553
+ "learning_rate": 9.128717948717947e-07,
554
+ "loss": 0.0474,
555
  "step": 2200
556
  },
557
  {
558
  "epoch": 13.32,
559
+ "learning_rate": 9.115897435897435e-07,
560
+ "loss": 0.0434,
561
  "step": 2225
562
  },
563
  {
564
  "epoch": 13.47,
565
+ "learning_rate": 9.103076923076922e-07,
566
+ "loss": 0.0464,
567
  "step": 2250
568
  },
569
  {
570
  "epoch": 13.62,
571
+ "learning_rate": 9.090256410256409e-07,
572
+ "loss": 0.0437,
573
  "step": 2275
574
  },
575
  {
576
  "epoch": 13.77,
577
+ "learning_rate": 9.077435897435896e-07,
578
+ "loss": 0.0437,
579
  "step": 2300
580
  },
581
  {
582
  "epoch": 13.92,
583
+ "learning_rate": 9.064615384615384e-07,
584
+ "loss": 0.0457,
585
  "step": 2325
586
  },
587
  {
588
  "epoch": 14.07,
589
+ "learning_rate": 9.051794871794871e-07,
590
+ "loss": 0.042,
591
  "step": 2350
592
  },
593
  {
594
  "epoch": 14.22,
595
+ "learning_rate": 9.038974358974358e-07,
596
+ "loss": 0.0339,
597
  "step": 2375
598
  },
599
  {
600
  "epoch": 14.37,
601
+ "learning_rate": 9.026153846153845e-07,
602
+ "loss": 0.0342,
603
  "step": 2400
604
  },
605
  {
606
  "epoch": 14.52,
607
+ "learning_rate": 9.013333333333333e-07,
608
+ "loss": 0.0323,
609
  "step": 2425
610
  },
611
  {
612
  "epoch": 14.67,
613
+ "learning_rate": 9.00051282051282e-07,
614
+ "loss": 0.0348,
615
  "step": 2450
616
  },
617
  {
618
  "epoch": 14.82,
619
+ "learning_rate": 8.987692307692307e-07,
620
+ "loss": 0.0368,
621
  "step": 2475
622
  },
623
  {
624
  "epoch": 14.97,
625
+ "learning_rate": 8.974871794871794e-07,
626
+ "loss": 0.036,
627
  "step": 2500
628
  },
629
  {
630
  "epoch": 15.12,
631
+ "learning_rate": 8.962051282051282e-07,
632
+ "loss": 0.0252,
633
  "step": 2525
634
  },
635
  {
636
  "epoch": 15.27,
637
+ "learning_rate": 8.949230769230769e-07,
638
+ "loss": 0.0287,
639
  "step": 2550
640
  },
641
  {
642
  "epoch": 15.42,
643
+ "learning_rate": 8.936410256410256e-07,
644
+ "loss": 0.0275,
645
  "step": 2575
646
  },
647
  {
648
  "epoch": 15.57,
649
+ "learning_rate": 8.923589743589743e-07,
650
+ "loss": 0.0263,
651
  "step": 2600
652
  },
653
  {
654
  "epoch": 15.72,
655
+ "learning_rate": 8.91076923076923e-07,
656
+ "loss": 0.0268,
657
  "step": 2625
658
  },
659
  {
660
  "epoch": 15.87,
661
+ "learning_rate": 8.897948717948718e-07,
662
+ "loss": 0.0277,
663
  "step": 2650
664
  },
665
  {
666
  "epoch": 16.02,
667
+ "learning_rate": 8.885128205128205e-07,
668
+ "loss": 0.0263,
669
  "step": 2675
670
  },
671
  {
672
  "epoch": 16.17,
673
+ "learning_rate": 8.872307692307692e-07,
674
+ "loss": 0.0182,
675
  "step": 2700
676
  },
677
  {
678
  "epoch": 16.32,
679
+ "learning_rate": 8.85948717948718e-07,
680
+ "loss": 0.0222,
681
  "step": 2725
682
  },
683
  {
684
  "epoch": 16.47,
685
+ "learning_rate": 8.846666666666667e-07,
686
+ "loss": 0.0205,
687
  "step": 2750
688
  },
689
  {
690
  "epoch": 16.62,
691
+ "learning_rate": 8.833846153846154e-07,
692
+ "loss": 0.0216,
693
  "step": 2775
694
  },
695
  {
696
  "epoch": 16.77,
697
+ "learning_rate": 8.821025641025641e-07,
698
+ "loss": 0.0207,
699
  "step": 2800
700
  },
701
  {
702
  "epoch": 16.92,
703
+ "learning_rate": 8.808205128205127e-07,
704
+ "loss": 0.0223,
705
  "step": 2825
706
  },
707
  {
708
  "epoch": 17.07,
709
+ "learning_rate": 8.795384615384615e-07,
710
+ "loss": 0.0173,
711
  "step": 2850
712
  },
713
  {
714
  "epoch": 17.22,
715
+ "learning_rate": 8.782564102564102e-07,
716
+ "loss": 0.0144,
717
  "step": 2875
718
  },
719
  {
720
  "epoch": 17.37,
721
+ "learning_rate": 8.769743589743589e-07,
722
+ "loss": 0.0159,
723
  "step": 2900
724
  },
725
  {
726
  "epoch": 17.51,
727
+ "learning_rate": 8.756923076923076e-07,
728
+ "loss": 0.0146,
729
  "step": 2925
730
  },
731
  {
732
  "epoch": 17.66,
733
+ "learning_rate": 8.744102564102564e-07,
734
+ "loss": 0.0154,
735
  "step": 2950
736
  },
737
  {
738
  "epoch": 17.81,
739
+ "learning_rate": 8.731282051282051e-07,
740
+ "loss": 0.0152,
741
  "step": 2975
742
  },
743
  {
744
  "epoch": 17.96,
745
+ "learning_rate": 8.718461538461538e-07,
746
+ "loss": 0.0188,
747
  "step": 3000
748
  },
749
  {
750
  "epoch": 17.96,
751
+ "eval_loss": 0.5422356724739075,
752
+ "eval_runtime": 586.4584,
753
+ "eval_samples_per_second": 0.714,
754
+ "eval_steps_per_second": 0.09,
755
+ "eval_wer": 43.32544964523404,
756
  "step": 3000
757
  },
758
  {
759
  "epoch": 18.11,
760
+ "learning_rate": 8.705641025641025e-07,
761
+ "loss": 0.0133,
762
  "step": 3025
763
  },
764
  {
765
  "epoch": 18.26,
766
+ "learning_rate": 8.692820512820513e-07,
767
+ "loss": 0.0108,
768
  "step": 3050
769
  },
770
  {
771
  "epoch": 18.41,
772
+ "learning_rate": 8.68e-07,
773
+ "loss": 0.0131,
774
  "step": 3075
775
  },
776
  {
777
  "epoch": 18.56,
778
+ "learning_rate": 8.667179487179487e-07,
779
+ "loss": 0.0126,
780
  "step": 3100
781
  },
782
  {
783
  "epoch": 18.71,
784
+ "learning_rate": 8.654358974358974e-07,
785
+ "loss": 0.0128,
786
  "step": 3125
787
  },
788
  {
789
  "epoch": 18.86,
790
+ "learning_rate": 8.641538461538462e-07,
791
+ "loss": 0.012,
792
  "step": 3150
793
  },
794
  {
795
  "epoch": 19.01,
796
+ "learning_rate": 8.628717948717949e-07,
797
+ "loss": 0.0128,
798
  "step": 3175
799
  },
800
  {
801
  "epoch": 19.16,
802
+ "learning_rate": 8.615897435897436e-07,
803
+ "loss": 0.0098,
804
  "step": 3200
805
  },
806
  {
807
  "epoch": 19.31,
808
+ "learning_rate": 8.603076923076923e-07,
809
+ "loss": 0.0092,
810
  "step": 3225
811
  },
812
  {
813
  "epoch": 19.46,
814
+ "learning_rate": 8.590256410256411e-07,
815
+ "loss": 0.0092,
816
  "step": 3250
817
  },
818
  {
819
  "epoch": 19.61,
820
+ "learning_rate": 8.577435897435897e-07,
821
+ "loss": 0.009,
822
  "step": 3275
823
  },
824
  {
825
  "epoch": 19.76,
826
+ "learning_rate": 8.564615384615384e-07,
827
+ "loss": 0.0092,
828
  "step": 3300
829
  },
830
  {
831
  "epoch": 19.91,
832
+ "learning_rate": 8.551794871794871e-07,
833
+ "loss": 0.0091,
834
  "step": 3325
835
  },
836
  {
837
  "epoch": 20.06,
838
+ "learning_rate": 8.538974358974359e-07,
839
+ "loss": 0.0097,
840
  "step": 3350
841
  },
842
  {
843
  "epoch": 20.21,
844
+ "learning_rate": 8.526153846153846e-07,
845
  "loss": 0.0075,
846
  "step": 3375
847
  },
848
  {
849
  "epoch": 20.36,
850
+ "learning_rate": 8.513333333333333e-07,
851
+ "loss": 0.0084,
852
  "step": 3400
853
  },
854
  {
855
  "epoch": 20.51,
856
+ "learning_rate": 8.500512820512819e-07,
857
+ "loss": 0.0069,
858
  "step": 3425
859
  },
860
  {
861
  "epoch": 20.66,
862
+ "learning_rate": 8.487692307692307e-07,
863
+ "loss": 0.0071,
864
  "step": 3450
865
  },
866
  {
867
  "epoch": 20.81,
868
+ "learning_rate": 8.474871794871794e-07,
869
+ "loss": 0.0077,
870
  "step": 3475
871
  },
872
  {
873
  "epoch": 20.96,
874
+ "learning_rate": 8.462051282051281e-07,
875
+ "loss": 0.0081,
876
  "step": 3500
877
  },
878
  {
879
  "epoch": 21.11,
880
+ "learning_rate": 8.449230769230768e-07,
881
+ "loss": 0.006,
882
  "step": 3525
883
  },
884
  {
885
  "epoch": 21.26,
886
+ "learning_rate": 8.436410256410255e-07,
887
+ "loss": 0.0058,
888
  "step": 3550
889
  },
890
  {
891
  "epoch": 21.41,
892
+ "learning_rate": 8.423589743589743e-07,
893
+ "loss": 0.0059,
894
  "step": 3575
895
  },
896
  {
897
  "epoch": 21.56,
898
+ "learning_rate": 8.41076923076923e-07,
899
+ "loss": 0.0053,
900
  "step": 3600
901
  },
902
  {
903
  "epoch": 21.71,
904
+ "learning_rate": 8.397948717948717e-07,
905
+ "loss": 0.0057,
906
  "step": 3625
907
  },
908
  {
909
  "epoch": 21.86,
910
+ "learning_rate": 8.385128205128204e-07,
911
+ "loss": 0.0053,
912
  "step": 3650
913
  },
914
  {
915
  "epoch": 22.01,
916
+ "learning_rate": 8.372307692307692e-07,
917
+ "loss": 0.0063,
918
  "step": 3675
919
  },
920
  {
921
  "epoch": 22.16,
922
+ "learning_rate": 8.359487179487179e-07,
923
+ "loss": 0.0045,
924
  "step": 3700
925
  },
926
  {
927
  "epoch": 22.31,
928
+ "learning_rate": 8.346666666666666e-07,
929
+ "loss": 0.0043,
930
  "step": 3725
931
  },
932
  {
933
  "epoch": 22.46,
934
+ "learning_rate": 8.333846153846153e-07,
935
+ "loss": 0.0041,
936
  "step": 3750
937
  },
938
  {
939
  "epoch": 22.6,
940
+ "learning_rate": 8.321025641025641e-07,
941
+ "loss": 0.0042,
942
  "step": 3775
943
  },
944
  {
945
  "epoch": 22.75,
946
+ "learning_rate": 8.308205128205128e-07,
947
+ "loss": 0.0044,
948
  "step": 3800
949
  },
950
  {
951
  "epoch": 22.9,
952
+ "learning_rate": 8.295384615384615e-07,
953
+ "loss": 0.0046,
954
  "step": 3825
955
  },
956
  {
957
  "epoch": 23.05,
958
+ "learning_rate": 8.282564102564102e-07,
959
+ "loss": 0.005,
960
  "step": 3850
961
  },
962
  {
963
  "epoch": 23.2,
964
+ "learning_rate": 8.26974358974359e-07,
965
+ "loss": 0.0036,
966
  "step": 3875
967
  },
968
  {
969
  "epoch": 23.35,
970
+ "learning_rate": 8.256923076923077e-07,
971
+ "loss": 0.0039,
972
  "step": 3900
973
  },
974
  {
975
  "epoch": 23.5,
976
+ "learning_rate": 8.244102564102564e-07,
977
+ "loss": 0.0033,
978
  "step": 3925
979
  },
980
  {
981
  "epoch": 23.65,
982
+ "learning_rate": 8.231282051282051e-07,
983
+ "loss": 0.0035,
984
  "step": 3950
985
  },
986
  {
987
  "epoch": 23.8,
988
+ "learning_rate": 8.218461538461539e-07,
989
+ "loss": 0.0041,
990
  "step": 3975
991
  },
992
  {
993
  "epoch": 23.95,
994
+ "learning_rate": 8.205641025641026e-07,
995
+ "loss": 0.004,
996
  "step": 4000
997
  },
998
  {
999
  "epoch": 23.95,
1000
+ "eval_loss": 0.6144538521766663,
1001
+ "eval_runtime": 590.5826,
1002
+ "eval_samples_per_second": 0.709,
1003
+ "eval_steps_per_second": 0.09,
1004
+ "eval_wer": 42.95693306198779,
1005
  "step": 4000
1006
+ },
1007
+ {
1008
+ "epoch": 24.1,
1009
+ "learning_rate": 8.192820512820512e-07,
1010
+ "loss": 0.0037,
1011
+ "step": 4025
1012
+ },
1013
+ {
1014
+ "epoch": 24.25,
1015
+ "learning_rate": 8.179999999999999e-07,
1016
+ "loss": 0.0031,
1017
+ "step": 4050
1018
+ },
1019
+ {
1020
+ "epoch": 24.4,
1021
+ "learning_rate": 8.167179487179487e-07,
1022
+ "loss": 0.0028,
1023
+ "step": 4075
1024
+ },
1025
+ {
1026
+ "epoch": 24.55,
1027
+ "learning_rate": 8.154358974358974e-07,
1028
+ "loss": 0.0034,
1029
+ "step": 4100
1030
+ },
1031
+ {
1032
+ "epoch": 24.7,
1033
+ "learning_rate": 8.141538461538461e-07,
1034
+ "loss": 0.0032,
1035
+ "step": 4125
1036
+ },
1037
+ {
1038
+ "epoch": 24.85,
1039
+ "learning_rate": 8.128717948717948e-07,
1040
+ "loss": 0.0031,
1041
+ "step": 4150
1042
+ },
1043
+ {
1044
+ "epoch": 25.0,
1045
+ "learning_rate": 8.115897435897436e-07,
1046
+ "loss": 0.0031,
1047
+ "step": 4175
1048
+ },
1049
+ {
1050
+ "epoch": 25.15,
1051
+ "learning_rate": 8.103076923076923e-07,
1052
+ "loss": 0.0023,
1053
+ "step": 4200
1054
+ },
1055
+ {
1056
+ "epoch": 25.3,
1057
+ "learning_rate": 8.09025641025641e-07,
1058
+ "loss": 0.0024,
1059
+ "step": 4225
1060
+ },
1061
+ {
1062
+ "epoch": 25.45,
1063
+ "learning_rate": 8.077435897435897e-07,
1064
+ "loss": 0.0025,
1065
+ "step": 4250
1066
+ },
1067
+ {
1068
+ "epoch": 25.6,
1069
+ "learning_rate": 8.064615384615385e-07,
1070
+ "loss": 0.0021,
1071
+ "step": 4275
1072
+ },
1073
+ {
1074
+ "epoch": 25.75,
1075
+ "learning_rate": 8.051794871794872e-07,
1076
+ "loss": 0.0022,
1077
+ "step": 4300
1078
+ },
1079
+ {
1080
+ "epoch": 25.9,
1081
+ "learning_rate": 8.038974358974359e-07,
1082
+ "loss": 0.0022,
1083
+ "step": 4325
1084
+ },
1085
+ {
1086
+ "epoch": 26.05,
1087
+ "learning_rate": 8.026153846153845e-07,
1088
+ "loss": 0.0024,
1089
+ "step": 4350
1090
+ },
1091
+ {
1092
+ "epoch": 26.2,
1093
+ "learning_rate": 8.013333333333333e-07,
1094
+ "loss": 0.0018,
1095
+ "step": 4375
1096
+ },
1097
+ {
1098
+ "epoch": 26.35,
1099
+ "learning_rate": 8.00051282051282e-07,
1100
+ "loss": 0.0016,
1101
+ "step": 4400
1102
+ },
1103
+ {
1104
+ "epoch": 26.5,
1105
+ "learning_rate": 7.987692307692307e-07,
1106
+ "loss": 0.0019,
1107
+ "step": 4425
1108
+ },
1109
+ {
1110
+ "epoch": 26.65,
1111
+ "learning_rate": 7.974871794871794e-07,
1112
+ "loss": 0.0021,
1113
+ "step": 4450
1114
+ },
1115
+ {
1116
+ "epoch": 26.8,
1117
+ "learning_rate": 7.962051282051282e-07,
1118
+ "loss": 0.0019,
1119
+ "step": 4475
1120
+ },
1121
+ {
1122
+ "epoch": 26.95,
1123
+ "learning_rate": 7.949230769230769e-07,
1124
+ "loss": 0.0019,
1125
+ "step": 4500
1126
+ },
1127
+ {
1128
+ "epoch": 27.1,
1129
+ "learning_rate": 7.936410256410256e-07,
1130
+ "loss": 0.0021,
1131
+ "step": 4525
1132
+ },
1133
+ {
1134
+ "epoch": 27.25,
1135
+ "learning_rate": 7.923589743589743e-07,
1136
+ "loss": 0.0019,
1137
+ "step": 4550
1138
+ },
1139
+ {
1140
+ "epoch": 27.4,
1141
+ "learning_rate": 7.910769230769231e-07,
1142
+ "loss": 0.002,
1143
+ "step": 4575
1144
+ },
1145
+ {
1146
+ "epoch": 27.54,
1147
+ "learning_rate": 7.897948717948718e-07,
1148
+ "loss": 0.0024,
1149
+ "step": 4600
1150
+ },
1151
+ {
1152
+ "epoch": 27.69,
1153
+ "learning_rate": 7.885128205128205e-07,
1154
+ "loss": 0.0028,
1155
+ "step": 4625
1156
+ },
1157
+ {
1158
+ "epoch": 27.84,
1159
+ "learning_rate": 7.872307692307691e-07,
1160
+ "loss": 0.0033,
1161
+ "step": 4650
1162
+ },
1163
+ {
1164
+ "epoch": 27.99,
1165
+ "learning_rate": 7.859487179487178e-07,
1166
+ "loss": 0.0036,
1167
+ "step": 4675
1168
+ },
1169
+ {
1170
+ "epoch": 28.14,
1171
+ "learning_rate": 7.846666666666666e-07,
1172
+ "loss": 0.0035,
1173
+ "step": 4700
1174
+ },
1175
+ {
1176
+ "epoch": 28.29,
1177
+ "learning_rate": 7.833846153846153e-07,
1178
+ "loss": 0.0031,
1179
+ "step": 4725
1180
+ },
1181
+ {
1182
+ "epoch": 28.44,
1183
+ "learning_rate": 7.82102564102564e-07,
1184
+ "loss": 0.0032,
1185
+ "step": 4750
1186
+ },
1187
+ {
1188
+ "epoch": 28.59,
1189
+ "learning_rate": 7.808205128205127e-07,
1190
+ "loss": 0.0037,
1191
+ "step": 4775
1192
+ },
1193
+ {
1194
+ "epoch": 28.74,
1195
+ "learning_rate": 7.795384615384615e-07,
1196
+ "loss": 0.0036,
1197
+ "step": 4800
1198
+ },
1199
+ {
1200
+ "epoch": 28.89,
1201
+ "learning_rate": 7.782564102564102e-07,
1202
+ "loss": 0.0046,
1203
+ "step": 4825
1204
+ },
1205
+ {
1206
+ "epoch": 29.04,
1207
+ "learning_rate": 7.769743589743589e-07,
1208
+ "loss": 0.0042,
1209
+ "step": 4850
1210
+ },
1211
+ {
1212
+ "epoch": 29.19,
1213
+ "learning_rate": 7.756923076923076e-07,
1214
+ "loss": 0.0032,
1215
+ "step": 4875
1216
+ },
1217
+ {
1218
+ "epoch": 29.34,
1219
+ "learning_rate": 7.744102564102564e-07,
1220
+ "loss": 0.0037,
1221
+ "step": 4900
1222
+ },
1223
+ {
1224
+ "epoch": 29.49,
1225
+ "learning_rate": 7.731282051282051e-07,
1226
+ "loss": 0.0039,
1227
+ "step": 4925
1228
+ },
1229
+ {
1230
+ "epoch": 29.64,
1231
+ "learning_rate": 7.718461538461538e-07,
1232
+ "loss": 0.0034,
1233
+ "step": 4950
1234
+ },
1235
+ {
1236
+ "epoch": 29.79,
1237
+ "learning_rate": 7.705641025641025e-07,
1238
+ "loss": 0.0034,
1239
+ "step": 4975
1240
+ },
1241
+ {
1242
+ "epoch": 29.94,
1243
+ "learning_rate": 7.692820512820513e-07,
1244
+ "loss": 0.0036,
1245
+ "step": 5000
1246
+ },
1247
+ {
1248
+ "epoch": 29.94,
1249
+ "eval_loss": 0.6325389742851257,
1250
+ "eval_runtime": 588.3841,
1251
+ "eval_samples_per_second": 0.712,
1252
+ "eval_steps_per_second": 0.09,
1253
+ "eval_wer": 42.08789395522798,
1254
+ "step": 5000
1255
+ },
1256
+ {
1257
+ "epoch": 30.09,
1258
+ "learning_rate": 7.68e-07,
1259
+ "loss": 0.0032,
1260
+ "step": 5025
1261
+ },
1262
+ {
1263
+ "epoch": 30.24,
1264
+ "learning_rate": 7.667179487179487e-07,
1265
+ "loss": 0.003,
1266
+ "step": 5050
1267
+ },
1268
+ {
1269
+ "epoch": 30.39,
1270
+ "learning_rate": 7.654358974358974e-07,
1271
+ "loss": 0.0026,
1272
+ "step": 5075
1273
+ },
1274
+ {
1275
+ "epoch": 30.54,
1276
+ "learning_rate": 7.641538461538462e-07,
1277
+ "loss": 0.0026,
1278
+ "step": 5100
1279
+ },
1280
+ {
1281
+ "epoch": 30.69,
1282
+ "learning_rate": 7.628717948717949e-07,
1283
+ "loss": 0.0026,
1284
+ "step": 5125
1285
+ },
1286
+ {
1287
+ "epoch": 30.84,
1288
+ "learning_rate": 7.615897435897436e-07,
1289
+ "loss": 0.0027,
1290
+ "step": 5150
1291
+ },
1292
+ {
1293
+ "epoch": 30.99,
1294
+ "learning_rate": 7.603076923076923e-07,
1295
+ "loss": 0.0029,
1296
+ "step": 5175
1297
+ },
1298
+ {
1299
+ "epoch": 31.14,
1300
+ "learning_rate": 7.590256410256411e-07,
1301
+ "loss": 0.0024,
1302
+ "step": 5200
1303
+ },
1304
+ {
1305
+ "epoch": 31.29,
1306
+ "learning_rate": 7.577435897435898e-07,
1307
+ "loss": 0.0019,
1308
+ "step": 5225
1309
+ },
1310
+ {
1311
+ "epoch": 31.44,
1312
+ "learning_rate": 7.564615384615384e-07,
1313
+ "loss": 0.0023,
1314
+ "step": 5250
1315
+ },
1316
+ {
1317
+ "epoch": 31.59,
1318
+ "learning_rate": 7.551794871794871e-07,
1319
+ "loss": 0.0021,
1320
+ "step": 5275
1321
+ },
1322
+ {
1323
+ "epoch": 31.74,
1324
+ "learning_rate": 7.538974358974359e-07,
1325
+ "loss": 0.002,
1326
+ "step": 5300
1327
+ },
1328
+ {
1329
+ "epoch": 31.89,
1330
+ "learning_rate": 7.526153846153846e-07,
1331
+ "loss": 0.0021,
1332
+ "step": 5325
1333
+ },
1334
+ {
1335
+ "epoch": 32.04,
1336
+ "learning_rate": 7.513333333333333e-07,
1337
+ "loss": 0.0024,
1338
+ "step": 5350
1339
+ },
1340
+ {
1341
+ "epoch": 32.19,
1342
+ "learning_rate": 7.50051282051282e-07,
1343
+ "loss": 0.002,
1344
+ "step": 5375
1345
+ },
1346
+ {
1347
+ "epoch": 32.34,
1348
+ "learning_rate": 7.487692307692308e-07,
1349
+ "loss": 0.0021,
1350
+ "step": 5400
1351
+ },
1352
+ {
1353
+ "epoch": 32.49,
1354
+ "learning_rate": 7.475384615384615e-07,
1355
+ "loss": 0.0019,
1356
+ "step": 5425
1357
+ },
1358
+ {
1359
+ "epoch": 32.63,
1360
+ "learning_rate": 7.462564102564101e-07,
1361
+ "loss": 0.0019,
1362
+ "step": 5450
1363
+ },
1364
+ {
1365
+ "epoch": 32.78,
1366
+ "learning_rate": 7.449743589743588e-07,
1367
+ "loss": 0.0018,
1368
+ "step": 5475
1369
+ },
1370
+ {
1371
+ "epoch": 32.93,
1372
+ "learning_rate": 7.436923076923076e-07,
1373
+ "loss": 0.0018,
1374
+ "step": 5500
1375
+ },
1376
+ {
1377
+ "epoch": 33.08,
1378
+ "learning_rate": 7.424102564102563e-07,
1379
+ "loss": 0.0016,
1380
+ "step": 5525
1381
+ },
1382
+ {
1383
+ "epoch": 33.23,
1384
+ "learning_rate": 7.41128205128205e-07,
1385
+ "loss": 0.0019,
1386
+ "step": 5550
1387
+ },
1388
+ {
1389
+ "epoch": 33.38,
1390
+ "learning_rate": 7.398461538461537e-07,
1391
+ "loss": 0.0017,
1392
+ "step": 5575
1393
+ },
1394
+ {
1395
+ "epoch": 33.53,
1396
+ "learning_rate": 7.385641025641025e-07,
1397
+ "loss": 0.0019,
1398
+ "step": 5600
1399
+ },
1400
+ {
1401
+ "epoch": 33.68,
1402
+ "learning_rate": 7.372820512820512e-07,
1403
+ "loss": 0.0015,
1404
+ "step": 5625
1405
+ },
1406
+ {
1407
+ "epoch": 33.83,
1408
+ "learning_rate": 7.359999999999999e-07,
1409
+ "loss": 0.0016,
1410
+ "step": 5650
1411
+ },
1412
+ {
1413
+ "epoch": 33.98,
1414
+ "learning_rate": 7.347179487179486e-07,
1415
+ "loss": 0.0016,
1416
+ "step": 5675
1417
+ },
1418
+ {
1419
+ "epoch": 34.13,
1420
+ "learning_rate": 7.334358974358974e-07,
1421
+ "loss": 0.0013,
1422
+ "step": 5700
1423
+ },
1424
+ {
1425
+ "epoch": 34.28,
1426
+ "learning_rate": 7.321538461538461e-07,
1427
+ "loss": 0.0018,
1428
+ "step": 5725
1429
+ },
1430
+ {
1431
+ "epoch": 34.43,
1432
+ "learning_rate": 7.308717948717948e-07,
1433
+ "loss": 0.0015,
1434
+ "step": 5750
1435
+ },
1436
+ {
1437
+ "epoch": 34.58,
1438
+ "learning_rate": 7.295897435897435e-07,
1439
+ "loss": 0.0017,
1440
+ "step": 5775
1441
+ },
1442
+ {
1443
+ "epoch": 34.73,
1444
+ "learning_rate": 7.283076923076923e-07,
1445
+ "loss": 0.0017,
1446
+ "step": 5800
1447
+ },
1448
+ {
1449
+ "epoch": 34.88,
1450
+ "learning_rate": 7.27025641025641e-07,
1451
+ "loss": 0.0018,
1452
+ "step": 5825
1453
+ },
1454
+ {
1455
+ "epoch": 35.03,
1456
+ "learning_rate": 7.257435897435897e-07,
1457
+ "loss": 0.002,
1458
+ "step": 5850
1459
+ },
1460
+ {
1461
+ "epoch": 35.18,
1462
+ "learning_rate": 7.244615384615384e-07,
1463
+ "loss": 0.0016,
1464
+ "step": 5875
1465
+ },
1466
+ {
1467
+ "epoch": 35.33,
1468
+ "learning_rate": 7.231794871794872e-07,
1469
+ "loss": 0.0018,
1470
+ "step": 5900
1471
+ },
1472
+ {
1473
+ "epoch": 35.48,
1474
+ "learning_rate": 7.218974358974359e-07,
1475
+ "loss": 0.0016,
1476
+ "step": 5925
1477
+ },
1478
+ {
1479
+ "epoch": 35.63,
1480
+ "learning_rate": 7.206153846153846e-07,
1481
+ "loss": 0.0015,
1482
+ "step": 5950
1483
+ },
1484
+ {
1485
+ "epoch": 35.78,
1486
+ "learning_rate": 7.193333333333333e-07,
1487
+ "loss": 0.0014,
1488
+ "step": 5975
1489
+ },
1490
+ {
1491
+ "epoch": 35.93,
1492
+ "learning_rate": 7.180512820512821e-07,
1493
+ "loss": 0.0018,
1494
+ "step": 6000
1495
+ },
1496
+ {
1497
+ "epoch": 35.93,
1498
+ "eval_loss": 0.6662390232086182,
1499
+ "eval_runtime": 594.4694,
1500
+ "eval_samples_per_second": 0.705,
1501
+ "eval_steps_per_second": 0.089,
1502
+ "eval_wer": 41.91738628238271,
1503
+ "step": 6000
1504
+ },
1505
+ {
1506
+ "epoch": 36.08,
1507
+ "learning_rate": 7.167692307692307e-07,
1508
+ "loss": 0.0017,
1509
+ "step": 6025
1510
+ },
1511
+ {
1512
+ "epoch": 36.23,
1513
+ "learning_rate": 7.154871794871794e-07,
1514
+ "loss": 0.0019,
1515
+ "step": 6050
1516
+ },
1517
+ {
1518
+ "epoch": 36.38,
1519
+ "learning_rate": 7.142051282051281e-07,
1520
+ "loss": 0.0015,
1521
+ "step": 6075
1522
+ },
1523
+ {
1524
+ "epoch": 36.53,
1525
+ "learning_rate": 7.129230769230769e-07,
1526
+ "loss": 0.0025,
1527
+ "step": 6100
1528
+ },
1529
+ {
1530
+ "epoch": 36.68,
1531
+ "learning_rate": 7.116410256410256e-07,
1532
+ "loss": 0.0017,
1533
+ "step": 6125
1534
+ },
1535
+ {
1536
+ "epoch": 36.83,
1537
+ "learning_rate": 7.103589743589743e-07,
1538
+ "loss": 0.0014,
1539
+ "step": 6150
1540
+ },
1541
+ {
1542
+ "epoch": 36.98,
1543
+ "learning_rate": 7.09076923076923e-07,
1544
+ "loss": 0.0015,
1545
+ "step": 6175
1546
+ },
1547
+ {
1548
+ "epoch": 37.13,
1549
+ "learning_rate": 7.077948717948718e-07,
1550
+ "loss": 0.0011,
1551
+ "step": 6200
1552
+ },
1553
+ {
1554
+ "epoch": 37.28,
1555
+ "learning_rate": 7.065128205128205e-07,
1556
+ "loss": 0.0012,
1557
+ "step": 6225
1558
+ },
1559
+ {
1560
+ "epoch": 37.43,
1561
+ "learning_rate": 7.052307692307692e-07,
1562
+ "loss": 0.0017,
1563
+ "step": 6250
1564
+ },
1565
+ {
1566
+ "epoch": 37.57,
1567
+ "learning_rate": 7.039487179487179e-07,
1568
+ "loss": 0.0015,
1569
+ "step": 6275
1570
+ },
1571
+ {
1572
+ "epoch": 37.72,
1573
+ "learning_rate": 7.026666666666667e-07,
1574
+ "loss": 0.0016,
1575
+ "step": 6300
1576
+ },
1577
+ {
1578
+ "epoch": 37.87,
1579
+ "learning_rate": 7.013846153846154e-07,
1580
+ "loss": 0.0018,
1581
+ "step": 6325
1582
+ },
1583
+ {
1584
+ "epoch": 38.02,
1585
+ "learning_rate": 7.001025641025641e-07,
1586
+ "loss": 0.0013,
1587
+ "step": 6350
1588
+ },
1589
+ {
1590
+ "epoch": 38.17,
1591
+ "learning_rate": 6.988205128205128e-07,
1592
+ "loss": 0.0012,
1593
+ "step": 6375
1594
+ },
1595
+ {
1596
+ "epoch": 38.32,
1597
+ "learning_rate": 6.975384615384616e-07,
1598
+ "loss": 0.0013,
1599
+ "step": 6400
1600
+ },
1601
+ {
1602
+ "epoch": 38.47,
1603
+ "learning_rate": 6.962564102564103e-07,
1604
+ "loss": 0.0009,
1605
+ "step": 6425
1606
+ },
1607
+ {
1608
+ "epoch": 38.62,
1609
+ "learning_rate": 6.94974358974359e-07,
1610
+ "loss": 0.0012,
1611
+ "step": 6450
1612
+ },
1613
+ {
1614
+ "epoch": 38.77,
1615
+ "learning_rate": 6.936923076923077e-07,
1616
+ "loss": 0.001,
1617
+ "step": 6475
1618
+ },
1619
+ {
1620
+ "epoch": 38.92,
1621
+ "learning_rate": 6.924102564102565e-07,
1622
+ "loss": 0.001,
1623
+ "step": 6500
1624
+ },
1625
+ {
1626
+ "epoch": 39.07,
1627
+ "learning_rate": 6.911282051282052e-07,
1628
+ "loss": 0.001,
1629
+ "step": 6525
1630
+ },
1631
+ {
1632
+ "epoch": 39.22,
1633
+ "learning_rate": 6.898461538461539e-07,
1634
+ "loss": 0.0012,
1635
+ "step": 6550
1636
+ },
1637
+ {
1638
+ "epoch": 39.37,
1639
+ "learning_rate": 6.885641025641025e-07,
1640
+ "loss": 0.0012,
1641
+ "step": 6575
1642
+ },
1643
+ {
1644
+ "epoch": 39.52,
1645
+ "learning_rate": 6.872820512820513e-07,
1646
+ "loss": 0.0014,
1647
+ "step": 6600
1648
+ },
1649
+ {
1650
+ "epoch": 39.67,
1651
+ "learning_rate": 6.86e-07,
1652
+ "loss": 0.0018,
1653
+ "step": 6625
1654
+ },
1655
+ {
1656
+ "epoch": 39.82,
1657
+ "learning_rate": 6.847179487179486e-07,
1658
+ "loss": 0.0014,
1659
+ "step": 6650
1660
+ },
1661
+ {
1662
+ "epoch": 39.97,
1663
+ "learning_rate": 6.834358974358973e-07,
1664
+ "loss": 0.0017,
1665
+ "step": 6675
1666
+ },
1667
+ {
1668
+ "epoch": 40.12,
1669
+ "learning_rate": 6.82153846153846e-07,
1670
+ "loss": 0.0015,
1671
+ "step": 6700
1672
+ },
1673
+ {
1674
+ "epoch": 40.27,
1675
+ "learning_rate": 6.808717948717948e-07,
1676
+ "loss": 0.0019,
1677
+ "step": 6725
1678
+ },
1679
+ {
1680
+ "epoch": 40.42,
1681
+ "learning_rate": 6.795897435897435e-07,
1682
+ "loss": 0.0014,
1683
+ "step": 6750
1684
+ },
1685
+ {
1686
+ "epoch": 40.57,
1687
+ "learning_rate": 6.783076923076922e-07,
1688
+ "loss": 0.0019,
1689
+ "step": 6775
1690
+ },
1691
+ {
1692
+ "epoch": 40.72,
1693
+ "learning_rate": 6.770256410256409e-07,
1694
+ "loss": 0.0016,
1695
+ "step": 6800
1696
+ },
1697
+ {
1698
+ "epoch": 40.87,
1699
+ "learning_rate": 6.757435897435897e-07,
1700
+ "loss": 0.0015,
1701
+ "step": 6825
1702
+ },
1703
+ {
1704
+ "epoch": 41.02,
1705
+ "learning_rate": 6.744615384615384e-07,
1706
+ "loss": 0.0014,
1707
+ "step": 6850
1708
+ },
1709
+ {
1710
+ "epoch": 41.17,
1711
+ "learning_rate": 6.731794871794871e-07,
1712
+ "loss": 0.0012,
1713
+ "step": 6875
1714
+ },
1715
+ {
1716
+ "epoch": 41.32,
1717
+ "learning_rate": 6.718974358974358e-07,
1718
+ "loss": 0.0013,
1719
+ "step": 6900
1720
+ },
1721
+ {
1722
+ "epoch": 41.47,
1723
+ "learning_rate": 6.706153846153846e-07,
1724
+ "loss": 0.0013,
1725
+ "step": 6925
1726
+ },
1727
+ {
1728
+ "epoch": 41.62,
1729
+ "learning_rate": 6.693333333333333e-07,
1730
+ "loss": 0.0012,
1731
+ "step": 6950
1732
+ },
1733
+ {
1734
+ "epoch": 41.77,
1735
+ "learning_rate": 6.68051282051282e-07,
1736
+ "loss": 0.0012,
1737
+ "step": 6975
1738
+ },
1739
+ {
1740
+ "epoch": 41.92,
1741
+ "learning_rate": 6.667692307692307e-07,
1742
+ "loss": 0.0013,
1743
+ "step": 7000
1744
+ },
1745
+ {
1746
+ "epoch": 41.92,
1747
+ "eval_loss": 0.6843737363815308,
1748
+ "eval_runtime": 592.3336,
1749
+ "eval_samples_per_second": 0.707,
1750
+ "eval_steps_per_second": 0.089,
1751
+ "eval_wer": 42.35190583576261,
1752
+ "step": 7000
1753
  }
1754
  ],
1755
  "logging_steps": 25,
1756
+ "max_steps": 20000,
1757
  "num_input_tokens_seen": 0,
1758
+ "num_train_epochs": 120,
1759
  "save_steps": 1000,
1760
+ "total_flos": 3.801019180430131e+20,
1761
  "train_batch_size": 16,
1762
  "trial_name": null,
1763
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1972f394bef5f2dfba82d719e660c110d5e82dd9389cd87393cdd1f35c3201be
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0359fd8c0bca8ce29466944ad8b07dcfba0ff1a4788df0f13fa23689a41235a
3
  size 5112