patrickvonplaten commited on
Commit
9bbf813
1 Parent(s): 10d8be9

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 1.7561472654342651,
4
- "eval_runtime": 63.6624,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 26.389,
7
- "eval_steps_per_second": 26.389,
8
- "eval_wer": 0.7971194266418579,
9
- "train_loss": 2.3102814756590746,
10
- "train_runtime": 4074.7961,
11
  "train_samples": 4620,
12
- "train_samples_per_second": 22.676,
13
- "train_steps_per_second": 0.712
14
  }
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 1.754138469696045,
4
+ "eval_runtime": 63.5156,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 26.45,
7
+ "eval_steps_per_second": 26.45,
8
+ "eval_wer": 0.8060781476121563,
9
+ "train_loss": 2.314238773214406,
10
+ "train_runtime": 4067.0318,
11
  "train_samples": 4620,
12
+ "train_samples_per_second": 22.719,
13
+ "train_steps_per_second": 0.713
14
  }
emissions.csv CHANGED
@@ -1,2 +1,3 @@
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2021-10-27T14:36:34,4a240274-9a5c-450e-8973-cba63eaca4a6,codecarbon,4071.9189558029175,0.01008591898710101,0.04791958355479716,United States,USA,new york,N,,
 
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2021-10-27T14:36:34,4a240274-9a5c-450e-8973-cba63eaca4a6,codecarbon,4071.9189558029175,0.01008591898710101,0.04791958355479716,United States,USA,new york,N,,
3
+ 2021-10-27T17:13:43,f2a4ada6-7daa-4826-8b1b-8690dde510d9,codecarbon,4064.1793417930603,0.010069911942805982,0.04784353189331327,United States,USA,new york,N,,
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 1.7561472654342651,
4
- "eval_runtime": 63.6624,
5
  "eval_samples": 1680,
6
- "eval_samples_per_second": 26.389,
7
- "eval_steps_per_second": 26.389,
8
- "eval_wer": 0.7971194266418579
9
  }
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 1.754138469696045,
4
+ "eval_runtime": 63.5156,
5
  "eval_samples": 1680,
6
+ "eval_samples_per_second": 26.45,
7
+ "eval_steps_per_second": 26.45,
8
+ "eval_wer": 0.8060781476121563
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9070fd8b49d5d6c5f2707c22213ba0d06e76a4228ad327299c6dd3c7b09f56a5
3
  size 164046635
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4024a23c429f41f5ad32173a51c26d61d24d79cd90beb516baaa28df171f942f
3
  size 164046635
runs/Oct27_16-04-41_brutasse/events.out.tfevents.1635350753.brutasse.21935.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb2a528c85d0f2af4277e3ed6f7a1f5bd8a20c2f845d73ba9fd0efd0980e2d98
3
- size 57201
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e621c09fa484301709f06d1e9032e6cfa30cb2a906fcb556e8abf82c42b4b605
3
+ size 59443
runs/Oct27_16-04-41_brutasse/events.out.tfevents.1635354889.brutasse.21935.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478071f6a026d89dec737724c8e0dc46a385e1b4d3892b736ef8f7395ceba675
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 2.3102814756590746,
4
- "train_runtime": 4074.7961,
5
  "train_samples": 4620,
6
- "train_samples_per_second": 22.676,
7
- "train_steps_per_second": 0.712
8
  }
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 2.314238773214406,
4
+ "train_runtime": 4067.0318,
5
  "train_samples": 4620,
6
+ "train_samples_per_second": 22.719,
7
+ "train_steps_per_second": 0.713
8
  }
trainer_state.json CHANGED
@@ -69,10 +69,10 @@
69
  },
70
  {
71
  "epoch": 0.69,
72
- "eval_loss": 4.080197811126709,
73
- "eval_runtime": 62.227,
74
- "eval_samples_per_second": 26.998,
75
- "eval_steps_per_second": 26.998,
76
  "eval_wer": 1.0,
77
  "step": 100
78
  },
@@ -133,15 +133,15 @@
133
  {
134
  "epoch": 1.38,
135
  "learning_rate": 1.97e-05,
136
- "loss": 2.9806,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.38,
141
- "eval_loss": 2.979236125946045,
142
- "eval_runtime": 62.498,
143
- "eval_samples_per_second": 26.881,
144
- "eval_steps_per_second": 26.881,
145
  "eval_wer": 1.0,
146
  "step": 200
147
  },
@@ -207,10 +207,10 @@
207
  },
208
  {
209
  "epoch": 2.07,
210
- "eval_loss": 2.9407615661621094,
211
- "eval_runtime": 62.3432,
212
- "eval_samples_per_second": 26.948,
213
- "eval_steps_per_second": 26.948,
214
  "eval_wer": 1.0,
215
  "step": 300
216
  },
@@ -276,10 +276,10 @@
276
  },
277
  {
278
  "epoch": 2.76,
279
- "eval_loss": 2.914332151412964,
280
- "eval_runtime": 62.583,
281
- "eval_samples_per_second": 26.844,
282
- "eval_steps_per_second": 26.844,
283
  "eval_wer": 1.0,
284
  "step": 400
285
  },
@@ -316,19 +316,19 @@
316
  {
317
  "epoch": 3.17,
318
  "learning_rate": 4.5700000000000006e-05,
319
- "loss": 2.8883,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 3.24,
324
  "learning_rate": 4.6700000000000003e-05,
325
- "loss": 2.8962,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 3.31,
330
  "learning_rate": 4.77e-05,
331
- "loss": 2.9122,
332
  "step": 480
333
  },
334
  {
@@ -345,1677 +345,1677 @@
345
  },
346
  {
347
  "epoch": 3.45,
348
- "eval_loss": 2.877377986907959,
349
- "eval_runtime": 62.5594,
350
- "eval_samples_per_second": 26.854,
351
- "eval_steps_per_second": 26.854,
352
  "eval_wer": 1.0,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 3.52,
357
  "learning_rate": 5.0700000000000006e-05,
358
- "loss": 2.9153,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 3.59,
363
  "learning_rate": 5.17e-05,
364
- "loss": 2.8849,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 3.66,
369
  "learning_rate": 5.270000000000001e-05,
370
- "loss": 2.8636,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 3.72,
375
  "learning_rate": 5.3700000000000004e-05,
376
- "loss": 2.8881,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 3.79,
381
  "learning_rate": 5.470000000000001e-05,
382
- "loss": 2.9056,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 3.86,
387
  "learning_rate": 5.5700000000000005e-05,
388
- "loss": 2.8631,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 3.93,
393
  "learning_rate": 5.6699999999999996e-05,
394
- "loss": 2.844,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 4.0,
399
  "learning_rate": 5.77e-05,
400
- "loss": 2.8526,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 4.07,
405
  "learning_rate": 5.87e-05,
406
- "loss": 2.8439,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 4.14,
411
  "learning_rate": 5.97e-05,
412
- "loss": 2.7712,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 4.14,
417
- "eval_loss": 2.7769241333007812,
418
- "eval_runtime": 62.7853,
419
- "eval_samples_per_second": 26.758,
420
- "eval_steps_per_second": 26.758,
421
- "eval_wer": 0.9999310867617669,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 4.21,
426
  "learning_rate": 6.07e-05,
427
- "loss": 2.7714,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 4.28,
432
  "learning_rate": 6.170000000000001e-05,
433
- "loss": 2.8165,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 4.34,
438
  "learning_rate": 6.27e-05,
439
- "loss": 2.7616,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 4.41,
444
  "learning_rate": 6.37e-05,
445
- "loss": 2.7563,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 4.48,
450
  "learning_rate": 6.47e-05,
451
- "loss": 2.7404,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 4.55,
456
  "learning_rate": 6.570000000000001e-05,
457
- "loss": 2.7597,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 4.62,
462
  "learning_rate": 6.670000000000001e-05,
463
- "loss": 2.69,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 4.69,
468
  "learning_rate": 6.77e-05,
469
- "loss": 2.6965,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 4.76,
474
  "learning_rate": 6.87e-05,
475
- "loss": 2.7619,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 4.83,
480
  "learning_rate": 6.97e-05,
481
- "loss": 2.6662,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 4.83,
486
- "eval_loss": 2.642500162124634,
487
- "eval_runtime": 62.8037,
488
- "eval_samples_per_second": 26.75,
489
- "eval_steps_per_second": 26.75,
490
- "eval_wer": 0.9789125491006823,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 4.9,
495
  "learning_rate": 7.07e-05,
496
- "loss": 2.6053,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 4.97,
501
  "learning_rate": 7.17e-05,
502
- "loss": 2.6388,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 5.03,
507
  "learning_rate": 7.27e-05,
508
- "loss": 2.7087,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 5.1,
513
  "learning_rate": 7.37e-05,
514
- "loss": 2.5896,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 5.17,
519
  "learning_rate": 7.47e-05,
520
- "loss": 2.5602,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 5.24,
525
  "learning_rate": 7.570000000000001e-05,
526
- "loss": 2.6433,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 5.31,
531
  "learning_rate": 7.670000000000001e-05,
532
- "loss": 2.6369,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 5.38,
537
  "learning_rate": 7.77e-05,
538
- "loss": 2.5393,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 5.45,
543
  "learning_rate": 7.87e-05,
544
- "loss": 2.5482,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 5.52,
549
  "learning_rate": 7.970000000000001e-05,
550
- "loss": 2.632,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 5.52,
555
- "eval_loss": 2.51424503326416,
556
- "eval_runtime": 62.9233,
557
- "eval_samples_per_second": 26.699,
558
- "eval_steps_per_second": 26.699,
559
- "eval_wer": 1.0317690028254427,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 5.59,
564
  "learning_rate": 8.070000000000001e-05,
565
- "loss": 2.5368,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 5.66,
570
  "learning_rate": 8.17e-05,
571
- "loss": 2.4602,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 5.72,
576
  "learning_rate": 8.27e-05,
577
- "loss": 2.5246,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 5.79,
582
  "learning_rate": 8.37e-05,
583
- "loss": 2.6324,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 5.86,
588
  "learning_rate": 8.47e-05,
589
- "loss": 2.4211,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 5.93,
594
  "learning_rate": 8.57e-05,
595
- "loss": 2.4506,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 6.0,
600
  "learning_rate": 8.67e-05,
601
- "loss": 2.5508,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 6.07,
606
  "learning_rate": 8.77e-05,
607
- "loss": 2.4846,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 6.14,
612
  "learning_rate": 8.87e-05,
613
- "loss": 2.3699,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 6.21,
618
  "learning_rate": 8.970000000000001e-05,
619
- "loss": 2.3794,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 6.21,
624
- "eval_loss": 2.436049222946167,
625
- "eval_runtime": 62.8458,
626
- "eval_samples_per_second": 26.732,
627
- "eval_steps_per_second": 26.732,
628
- "eval_wer": 1.1474743298187582,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 6.28,
633
  "learning_rate": 9.070000000000001e-05,
634
- "loss": 2.565,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 6.34,
639
  "learning_rate": 9.17e-05,
640
- "loss": 2.4147,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 6.41,
645
  "learning_rate": 9.27e-05,
646
- "loss": 2.3703,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 6.48,
651
  "learning_rate": 9.370000000000001e-05,
652
- "loss": 2.4564,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 6.55,
657
  "learning_rate": 9.47e-05,
658
- "loss": 2.5176,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 6.62,
663
  "learning_rate": 9.57e-05,
664
- "loss": 2.2307,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 6.69,
669
  "learning_rate": 9.67e-05,
670
- "loss": 2.2363,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 6.76,
675
  "learning_rate": 9.77e-05,
676
- "loss": 2.5199,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 6.83,
681
  "learning_rate": 9.87e-05,
682
- "loss": 2.3646,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 6.9,
687
  "learning_rate": 9.970000000000001e-05,
688
- "loss": 2.1406,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 6.9,
693
- "eval_loss": 2.293168783187866,
694
- "eval_runtime": 62.6106,
695
- "eval_samples_per_second": 26.833,
696
- "eval_steps_per_second": 26.833,
697
- "eval_wer": 0.9962097718971814,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 6.97,
702
  "learning_rate": 9.963157894736843e-05,
703
- "loss": 2.3291,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 7.03,
708
  "learning_rate": 9.910526315789475e-05,
709
- "loss": 2.5112,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 7.1,
714
  "learning_rate": 9.857894736842106e-05,
715
- "loss": 2.1736,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 7.17,
720
  "learning_rate": 9.805263157894737e-05,
721
- "loss": 2.1442,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 7.24,
726
  "learning_rate": 9.752631578947369e-05,
727
- "loss": 2.3392,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 7.31,
732
  "learning_rate": 9.7e-05,
733
- "loss": 2.3714,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 7.38,
738
  "learning_rate": 9.647368421052631e-05,
739
- "loss": 1.978,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 7.45,
744
  "learning_rate": 9.594736842105264e-05,
745
- "loss": 2.1326,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 7.52,
750
  "learning_rate": 9.542105263157895e-05,
751
- "loss": 2.445,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 7.59,
756
  "learning_rate": 9.489473684210527e-05,
757
- "loss": 2.223,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 7.59,
762
- "eval_loss": 2.159000873565674,
763
- "eval_runtime": 63.0762,
764
- "eval_samples_per_second": 26.634,
765
- "eval_steps_per_second": 26.634,
766
- "eval_wer": 0.9280545792846806,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 7.66,
771
  "learning_rate": 9.436842105263158e-05,
772
- "loss": 1.969,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 7.72,
777
  "learning_rate": 9.384210526315789e-05,
778
- "loss": 2.1386,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 7.79,
783
  "learning_rate": 9.331578947368422e-05,
784
- "loss": 2.3359,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 7.86,
789
  "learning_rate": 9.278947368421053e-05,
790
- "loss": 1.9546,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 7.93,
795
  "learning_rate": 9.226315789473686e-05,
796
- "loss": 2.0345,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 8.0,
801
  "learning_rate": 9.173684210526317e-05,
802
- "loss": 2.3499,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 8.07,
807
  "learning_rate": 9.121052631578948e-05,
808
- "loss": 2.2055,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 8.14,
813
  "learning_rate": 9.06842105263158e-05,
814
- "loss": 1.9114,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 8.21,
819
  "learning_rate": 9.015789473684211e-05,
820
- "loss": 2.0313,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 8.28,
825
  "learning_rate": 8.963157894736842e-05,
826
- "loss": 2.3607,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 8.28,
831
- "eval_loss": 2.0553243160247803,
832
- "eval_runtime": 62.862,
833
- "eval_samples_per_second": 26.725,
834
- "eval_steps_per_second": 26.725,
835
- "eval_wer": 0.8681689752601475,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 8.34,
840
  "learning_rate": 8.910526315789474e-05,
841
- "loss": 1.8724,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 8.41,
846
  "learning_rate": 8.857894736842106e-05,
847
- "loss": 1.875,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 8.48,
852
  "learning_rate": 8.805263157894737e-05,
853
- "loss": 2.187,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 8.55,
858
  "learning_rate": 8.752631578947369e-05,
859
- "loss": 2.2535,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 8.62,
864
  "learning_rate": 8.7e-05,
865
- "loss": 1.8278,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 8.69,
870
  "learning_rate": 8.647368421052631e-05,
871
- "loss": 1.9544,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 8.76,
876
  "learning_rate": 8.594736842105263e-05,
877
- "loss": 2.3521,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 8.83,
882
  "learning_rate": 8.542105263157894e-05,
883
- "loss": 2.0843,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 8.9,
888
  "learning_rate": 8.489473684210527e-05,
889
- "loss": 1.8205,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 8.97,
894
  "learning_rate": 8.436842105263158e-05,
895
- "loss": 2.1058,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 8.97,
900
- "eval_loss": 2.0442521572113037,
901
- "eval_runtime": 62.644,
902
- "eval_samples_per_second": 26.818,
903
- "eval_steps_per_second": 26.818,
904
- "eval_wer": 0.8901522982564951,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 9.03,
909
  "learning_rate": 8.38421052631579e-05,
910
- "loss": 2.3144,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 9.1,
915
  "learning_rate": 8.331578947368422e-05,
916
- "loss": 1.7695,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 9.17,
921
  "learning_rate": 8.278947368421053e-05,
922
- "loss": 1.8144,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 9.24,
927
  "learning_rate": 8.226315789473684e-05,
928
- "loss": 2.174,
929
  "step": 1340
930
  },
931
  {
932
  "epoch": 9.31,
933
  "learning_rate": 8.173684210526317e-05,
934
- "loss": 2.1989,
935
  "step": 1350
936
  },
937
  {
938
  "epoch": 9.38,
939
  "learning_rate": 8.121052631578948e-05,
940
- "loss": 1.8499,
941
  "step": 1360
942
  },
943
  {
944
  "epoch": 9.45,
945
  "learning_rate": 8.06842105263158e-05,
946
- "loss": 1.9672,
947
  "step": 1370
948
  },
949
  {
950
  "epoch": 9.52,
951
  "learning_rate": 8.015789473684211e-05,
952
- "loss": 2.2523,
953
  "step": 1380
954
  },
955
  {
956
  "epoch": 9.59,
957
  "learning_rate": 7.963157894736842e-05,
958
- "loss": 1.8925,
959
  "step": 1390
960
  },
961
  {
962
  "epoch": 9.66,
963
  "learning_rate": 7.910526315789474e-05,
964
- "loss": 1.8191,
965
  "step": 1400
966
  },
967
  {
968
  "epoch": 9.66,
969
- "eval_loss": 1.9585875272750854,
970
- "eval_runtime": 62.9578,
971
- "eval_samples_per_second": 26.685,
972
- "eval_steps_per_second": 26.685,
973
- "eval_wer": 0.8237199365998208,
974
  "step": 1400
975
  },
976
  {
977
  "epoch": 9.72,
978
  "learning_rate": 7.857894736842105e-05,
979
- "loss": 2.1013,
980
  "step": 1410
981
  },
982
  {
983
  "epoch": 9.79,
984
  "learning_rate": 7.805263157894738e-05,
985
- "loss": 2.2354,
986
  "step": 1420
987
  },
988
  {
989
  "epoch": 9.86,
990
  "learning_rate": 7.752631578947369e-05,
991
- "loss": 1.7551,
992
  "step": 1430
993
  },
994
  {
995
  "epoch": 9.93,
996
  "learning_rate": 7.7e-05,
997
- "loss": 1.8477,
998
  "step": 1440
999
  },
1000
  {
1001
  "epoch": 10.0,
1002
  "learning_rate": 7.647368421052631e-05,
1003
- "loss": 2.1596,
1004
  "step": 1450
1005
  },
1006
  {
1007
  "epoch": 10.07,
1008
  "learning_rate": 7.594736842105263e-05,
1009
- "loss": 2.0151,
1010
  "step": 1460
1011
  },
1012
  {
1013
  "epoch": 10.14,
1014
  "learning_rate": 7.542105263157895e-05,
1015
- "loss": 1.7181,
1016
  "step": 1470
1017
  },
1018
  {
1019
  "epoch": 10.21,
1020
  "learning_rate": 7.489473684210527e-05,
1021
- "loss": 1.8951,
1022
  "step": 1480
1023
  },
1024
  {
1025
  "epoch": 10.28,
1026
  "learning_rate": 7.43684210526316e-05,
1027
- "loss": 2.3172,
1028
  "step": 1490
1029
  },
1030
  {
1031
  "epoch": 10.34,
1032
  "learning_rate": 7.38421052631579e-05,
1033
- "loss": 1.7013,
1034
  "step": 1500
1035
  },
1036
  {
1037
  "epoch": 10.34,
1038
- "eval_loss": 1.9585844278335571,
1039
- "eval_runtime": 62.8863,
1040
- "eval_samples_per_second": 26.715,
1041
- "eval_steps_per_second": 26.715,
1042
- "eval_wer": 0.8689270208807112,
1043
  "step": 1500
1044
  },
1045
  {
1046
  "epoch": 10.41,
1047
  "learning_rate": 7.331578947368422e-05,
1048
- "loss": 1.7497,
1049
  "step": 1510
1050
  },
1051
  {
1052
  "epoch": 10.48,
1053
  "learning_rate": 7.278947368421053e-05,
1054
- "loss": 2.0471,
1055
  "step": 1520
1056
  },
1057
  {
1058
  "epoch": 10.55,
1059
  "learning_rate": 7.226315789473685e-05,
1060
- "loss": 2.2056,
1061
  "step": 1530
1062
  },
1063
  {
1064
  "epoch": 10.62,
1065
  "learning_rate": 7.173684210526316e-05,
1066
- "loss": 1.7579,
1067
  "step": 1540
1068
  },
1069
  {
1070
  "epoch": 10.69,
1071
  "learning_rate": 7.121052631578947e-05,
1072
- "loss": 1.9813,
1073
  "step": 1550
1074
  },
1075
  {
1076
  "epoch": 10.76,
1077
  "learning_rate": 7.06842105263158e-05,
1078
- "loss": 2.2292,
1079
  "step": 1560
1080
  },
1081
  {
1082
  "epoch": 10.83,
1083
  "learning_rate": 7.015789473684211e-05,
1084
- "loss": 1.9037,
1085
  "step": 1570
1086
  },
1087
  {
1088
  "epoch": 10.9,
1089
  "learning_rate": 6.963157894736842e-05,
1090
- "loss": 1.736,
1091
  "step": 1580
1092
  },
1093
  {
1094
  "epoch": 10.97,
1095
  "learning_rate": 6.910526315789474e-05,
1096
- "loss": 1.8723,
1097
  "step": 1590
1098
  },
1099
  {
1100
  "epoch": 11.03,
1101
  "learning_rate": 6.857894736842105e-05,
1102
- "loss": 2.2289,
1103
  "step": 1600
1104
  },
1105
  {
1106
  "epoch": 11.03,
1107
- "eval_loss": 1.908236026763916,
1108
- "eval_runtime": 62.9372,
1109
- "eval_samples_per_second": 26.693,
1110
- "eval_steps_per_second": 26.693,
1111
- "eval_wer": 0.8610709117221418,
1112
  "step": 1600
1113
  },
1114
  {
1115
  "epoch": 11.1,
1116
  "learning_rate": 6.805263157894736e-05,
1117
- "loss": 1.7338,
1118
  "step": 1610
1119
  },
1120
  {
1121
  "epoch": 11.17,
1122
  "learning_rate": 6.752631578947368e-05,
1123
- "loss": 1.7262,
1124
  "step": 1620
1125
  },
1126
  {
1127
  "epoch": 11.24,
1128
  "learning_rate": 6.7e-05,
1129
- "loss": 2.0395,
1130
  "step": 1630
1131
  },
1132
  {
1133
  "epoch": 11.31,
1134
  "learning_rate": 6.647368421052632e-05,
1135
- "loss": 2.0661,
1136
  "step": 1640
1137
  },
1138
  {
1139
  "epoch": 11.38,
1140
  "learning_rate": 6.594736842105264e-05,
1141
- "loss": 1.7218,
1142
  "step": 1650
1143
  },
1144
  {
1145
  "epoch": 11.45,
1146
  "learning_rate": 6.542105263157895e-05,
1147
- "loss": 1.8517,
1148
  "step": 1660
1149
  },
1150
  {
1151
  "epoch": 11.52,
1152
  "learning_rate": 6.489473684210527e-05,
1153
- "loss": 2.2154,
1154
  "step": 1670
1155
  },
1156
  {
1157
  "epoch": 11.59,
1158
  "learning_rate": 6.436842105263158e-05,
1159
- "loss": 1.7295,
1160
  "step": 1680
1161
  },
1162
  {
1163
  "epoch": 11.66,
1164
  "learning_rate": 6.384210526315791e-05,
1165
- "loss": 1.6468,
1166
  "step": 1690
1167
  },
1168
  {
1169
  "epoch": 11.72,
1170
  "learning_rate": 6.331578947368422e-05,
1171
- "loss": 1.9125,
1172
  "step": 1700
1173
  },
1174
  {
1175
  "epoch": 11.72,
1176
- "eval_loss": 1.8771533966064453,
1177
- "eval_runtime": 62.6761,
1178
- "eval_samples_per_second": 26.804,
1179
- "eval_steps_per_second": 26.804,
1180
- "eval_wer": 0.8149679553442216,
1181
  "step": 1700
1182
  },
1183
  {
1184
  "epoch": 11.79,
1185
  "learning_rate": 6.278947368421053e-05,
1186
- "loss": 2.1522,
1187
  "step": 1710
1188
  },
1189
  {
1190
  "epoch": 11.86,
1191
  "learning_rate": 6.226315789473685e-05,
1192
- "loss": 1.6395,
1193
  "step": 1720
1194
  },
1195
  {
1196
  "epoch": 11.93,
1197
  "learning_rate": 6.173684210526316e-05,
1198
- "loss": 1.7794,
1199
  "step": 1730
1200
  },
1201
  {
1202
  "epoch": 12.0,
1203
  "learning_rate": 6.121052631578947e-05,
1204
- "loss": 2.1512,
1205
  "step": 1740
1206
  },
1207
  {
1208
  "epoch": 12.07,
1209
  "learning_rate": 6.0684210526315785e-05,
1210
- "loss": 1.9098,
1211
  "step": 1750
1212
  },
1213
  {
1214
  "epoch": 12.14,
1215
  "learning_rate": 6.015789473684211e-05,
1216
- "loss": 1.6335,
1217
  "step": 1760
1218
  },
1219
  {
1220
  "epoch": 12.21,
1221
  "learning_rate": 5.9631578947368425e-05,
1222
- "loss": 1.8464,
1223
  "step": 1770
1224
  },
1225
  {
1226
  "epoch": 12.28,
1227
  "learning_rate": 5.9105263157894744e-05,
1228
- "loss": 2.15,
1229
  "step": 1780
1230
  },
1231
  {
1232
  "epoch": 12.34,
1233
  "learning_rate": 5.857894736842106e-05,
1234
- "loss": 1.7132,
1235
  "step": 1790
1236
  },
1237
  {
1238
  "epoch": 12.41,
1239
  "learning_rate": 5.805263157894737e-05,
1240
- "loss": 1.6424,
1241
  "step": 1800
1242
  },
1243
  {
1244
  "epoch": 12.41,
1245
- "eval_loss": 1.8670605421066284,
1246
- "eval_runtime": 62.8262,
1247
- "eval_samples_per_second": 26.74,
1248
- "eval_steps_per_second": 26.74,
1249
- "eval_wer": 0.7870580938598305,
1250
  "step": 1800
1251
  },
1252
  {
1253
  "epoch": 12.48,
1254
  "learning_rate": 5.752631578947368e-05,
1255
- "loss": 2.0029,
1256
  "step": 1810
1257
  },
1258
  {
1259
  "epoch": 12.55,
1260
  "learning_rate": 5.6999999999999996e-05,
1261
- "loss": 2.0917,
1262
  "step": 1820
1263
  },
1264
  {
1265
  "epoch": 12.62,
1266
  "learning_rate": 5.647368421052632e-05,
1267
- "loss": 1.6332,
1268
  "step": 1830
1269
  },
1270
  {
1271
  "epoch": 12.69,
1272
  "learning_rate": 5.5947368421052636e-05,
1273
- "loss": 1.7216,
1274
  "step": 1840
1275
  },
1276
  {
1277
  "epoch": 12.76,
1278
  "learning_rate": 5.542105263157895e-05,
1279
- "loss": 2.0909,
1280
  "step": 1850
1281
  },
1282
  {
1283
  "epoch": 12.83,
1284
  "learning_rate": 5.489473684210527e-05,
1285
- "loss": 1.8317,
1286
  "step": 1860
1287
  },
1288
  {
1289
  "epoch": 12.9,
1290
  "learning_rate": 5.436842105263158e-05,
1291
- "loss": 1.6383,
1292
  "step": 1870
1293
  },
1294
  {
1295
  "epoch": 12.97,
1296
  "learning_rate": 5.3842105263157895e-05,
1297
- "loss": 1.8577,
1298
  "step": 1880
1299
  },
1300
  {
1301
  "epoch": 13.03,
1302
  "learning_rate": 5.331578947368421e-05,
1303
- "loss": 2.1335,
1304
  "step": 1890
1305
  },
1306
  {
1307
  "epoch": 13.1,
1308
  "learning_rate": 5.2789473684210534e-05,
1309
- "loss": 1.6553,
1310
  "step": 1900
1311
  },
1312
  {
1313
  "epoch": 13.1,
1314
- "eval_loss": 1.819168210029602,
1315
- "eval_runtime": 62.7381,
1316
- "eval_samples_per_second": 26.778,
1317
- "eval_steps_per_second": 26.778,
1318
- "eval_wer": 0.8120735993384329,
1319
  "step": 1900
1320
  },
1321
  {
1322
  "epoch": 13.17,
1323
  "learning_rate": 5.226315789473685e-05,
1324
- "loss": 1.7138,
1325
  "step": 1910
1326
  },
1327
  {
1328
  "epoch": 13.24,
1329
  "learning_rate": 5.173684210526316e-05,
1330
- "loss": 1.9229,
1331
  "step": 1920
1332
  },
1333
  {
1334
  "epoch": 13.31,
1335
  "learning_rate": 5.121052631578947e-05,
1336
- "loss": 1.9276,
1337
  "step": 1930
1338
  },
1339
  {
1340
  "epoch": 13.38,
1341
  "learning_rate": 5.068421052631579e-05,
1342
- "loss": 1.6501,
1343
  "step": 1940
1344
  },
1345
  {
1346
  "epoch": 13.45,
1347
  "learning_rate": 5.0157894736842106e-05,
1348
- "loss": 1.7051,
1349
  "step": 1950
1350
  },
1351
  {
1352
  "epoch": 13.52,
1353
  "learning_rate": 4.9631578947368426e-05,
1354
- "loss": 2.1649,
1355
  "step": 1960
1356
  },
1357
  {
1358
  "epoch": 13.59,
1359
  "learning_rate": 4.910526315789474e-05,
1360
- "loss": 1.7654,
1361
  "step": 1970
1362
  },
1363
  {
1364
  "epoch": 13.66,
1365
  "learning_rate": 4.857894736842106e-05,
1366
- "loss": 1.6051,
1367
  "step": 1980
1368
  },
1369
  {
1370
  "epoch": 13.72,
1371
  "learning_rate": 4.805263157894737e-05,
1372
- "loss": 1.9606,
1373
  "step": 1990
1374
  },
1375
  {
1376
  "epoch": 13.79,
1377
  "learning_rate": 4.7526315789473684e-05,
1378
- "loss": 2.0382,
1379
  "step": 2000
1380
  },
1381
  {
1382
  "epoch": 13.79,
1383
- "eval_loss": 1.814583659172058,
1384
- "eval_runtime": 62.7874,
1385
- "eval_samples_per_second": 26.757,
1386
- "eval_steps_per_second": 26.757,
1387
- "eval_wer": 0.8440493418785748,
1388
  "step": 2000
1389
  },
1390
  {
1391
  "epoch": 13.86,
1392
  "learning_rate": 4.7e-05,
1393
- "loss": 1.5853,
1394
  "step": 2010
1395
  },
1396
  {
1397
  "epoch": 13.93,
1398
  "learning_rate": 4.647368421052632e-05,
1399
- "loss": 1.6782,
1400
  "step": 2020
1401
  },
1402
  {
1403
  "epoch": 14.0,
1404
  "learning_rate": 4.594736842105264e-05,
1405
- "loss": 1.9267,
1406
  "step": 2030
1407
  },
1408
  {
1409
  "epoch": 14.07,
1410
  "learning_rate": 4.542105263157895e-05,
1411
- "loss": 1.854,
1412
  "step": 2040
1413
  },
1414
  {
1415
  "epoch": 14.14,
1416
  "learning_rate": 4.489473684210527e-05,
1417
- "loss": 1.5179,
1418
  "step": 2050
1419
  },
1420
  {
1421
  "epoch": 14.21,
1422
  "learning_rate": 4.436842105263158e-05,
1423
- "loss": 1.776,
1424
  "step": 2060
1425
  },
1426
  {
1427
  "epoch": 14.28,
1428
  "learning_rate": 4.3842105263157895e-05,
1429
- "loss": 2.1206,
1430
  "step": 2070
1431
  },
1432
  {
1433
  "epoch": 14.34,
1434
  "learning_rate": 4.3315789473684215e-05,
1435
- "loss": 1.679,
1436
  "step": 2080
1437
  },
1438
  {
1439
  "epoch": 14.41,
1440
  "learning_rate": 4.278947368421053e-05,
1441
- "loss": 1.6648,
1442
  "step": 2090
1443
  },
1444
  {
1445
  "epoch": 14.48,
1446
  "learning_rate": 4.226315789473684e-05,
1447
- "loss": 1.8785,
1448
  "step": 2100
1449
  },
1450
  {
1451
  "epoch": 14.48,
1452
- "eval_loss": 1.8094158172607422,
1453
- "eval_runtime": 62.7821,
1454
- "eval_samples_per_second": 26.759,
1455
- "eval_steps_per_second": 26.759,
1456
- "eval_wer": 0.8202053614499345,
1457
  "step": 2100
1458
  },
1459
  {
1460
  "epoch": 14.55,
1461
  "learning_rate": 4.1736842105263154e-05,
1462
- "loss": 1.9647,
1463
  "step": 2110
1464
  },
1465
  {
1466
  "epoch": 14.62,
1467
  "learning_rate": 4.1210526315789474e-05,
1468
- "loss": 1.5669,
1469
  "step": 2120
1470
  },
1471
  {
1472
  "epoch": 14.69,
1473
  "learning_rate": 4.0684210526315794e-05,
1474
- "loss": 1.7738,
1475
  "step": 2130
1476
  },
1477
  {
1478
  "epoch": 14.76,
1479
  "learning_rate": 4.015789473684211e-05,
1480
- "loss": 2.025,
1481
  "step": 2140
1482
  },
1483
  {
1484
  "epoch": 14.83,
1485
  "learning_rate": 3.9631578947368426e-05,
1486
- "loss": 1.7119,
1487
  "step": 2150
1488
  },
1489
  {
1490
  "epoch": 14.9,
1491
  "learning_rate": 3.910526315789474e-05,
1492
- "loss": 1.5006,
1493
  "step": 2160
1494
  },
1495
  {
1496
  "epoch": 14.97,
1497
  "learning_rate": 3.857894736842105e-05,
1498
- "loss": 1.7046,
1499
  "step": 2170
1500
  },
1501
  {
1502
  "epoch": 15.03,
1503
  "learning_rate": 3.8052631578947365e-05,
1504
- "loss": 2.0324,
1505
  "step": 2180
1506
  },
1507
  {
1508
  "epoch": 15.1,
1509
  "learning_rate": 3.7526315789473685e-05,
1510
- "loss": 1.574,
1511
  "step": 2190
1512
  },
1513
  {
1514
  "epoch": 15.17,
1515
  "learning_rate": 3.7e-05,
1516
- "loss": 1.6148,
1517
  "step": 2200
1518
  },
1519
  {
1520
  "epoch": 15.17,
1521
- "eval_loss": 1.8131372928619385,
1522
- "eval_runtime": 63.0605,
1523
- "eval_samples_per_second": 26.641,
1524
- "eval_steps_per_second": 26.641,
1525
- "eval_wer": 0.8233753704086555,
1526
  "step": 2200
1527
  },
1528
  {
1529
  "epoch": 15.24,
1530
  "learning_rate": 3.647368421052632e-05,
1531
- "loss": 1.8876,
1532
  "step": 2210
1533
  },
1534
  {
1535
  "epoch": 15.31,
1536
  "learning_rate": 3.594736842105264e-05,
1537
- "loss": 1.8269,
1538
  "step": 2220
1539
  },
1540
  {
1541
  "epoch": 15.38,
1542
  "learning_rate": 3.542105263157895e-05,
1543
- "loss": 1.5885,
1544
  "step": 2230
1545
  },
1546
  {
1547
  "epoch": 15.45,
1548
  "learning_rate": 3.4894736842105264e-05,
1549
- "loss": 1.6395,
1550
  "step": 2240
1551
  },
1552
  {
1553
  "epoch": 15.52,
1554
  "learning_rate": 3.436842105263158e-05,
1555
- "loss": 2.0412,
1556
  "step": 2250
1557
  },
1558
  {
1559
  "epoch": 15.59,
1560
  "learning_rate": 3.3842105263157896e-05,
1561
- "loss": 1.5959,
1562
  "step": 2260
1563
  },
1564
  {
1565
  "epoch": 15.66,
1566
  "learning_rate": 3.331578947368421e-05,
1567
- "loss": 1.5854,
1568
  "step": 2270
1569
  },
1570
  {
1571
  "epoch": 15.72,
1572
  "learning_rate": 3.278947368421052e-05,
1573
- "loss": 1.8598,
1574
  "step": 2280
1575
  },
1576
  {
1577
  "epoch": 15.79,
1578
  "learning_rate": 3.226315789473684e-05,
1579
- "loss": 2.0513,
1580
  "step": 2290
1581
  },
1582
  {
1583
  "epoch": 15.86,
1584
  "learning_rate": 3.173684210526316e-05,
1585
- "loss": 1.4948,
1586
  "step": 2300
1587
  },
1588
  {
1589
  "epoch": 15.86,
1590
- "eval_loss": 1.7968575954437256,
1591
- "eval_runtime": 62.8797,
1592
- "eval_samples_per_second": 26.718,
1593
- "eval_steps_per_second": 26.718,
1594
- "eval_wer": 0.8255805940321136,
1595
  "step": 2300
1596
  },
1597
  {
1598
  "epoch": 15.93,
1599
  "learning_rate": 3.1210526315789475e-05,
1600
- "loss": 1.609,
1601
  "step": 2310
1602
  },
1603
  {
1604
  "epoch": 16.0,
1605
  "learning_rate": 3.0684210526315795e-05,
1606
- "loss": 1.9741,
1607
  "step": 2320
1608
  },
1609
  {
1610
  "epoch": 16.07,
1611
  "learning_rate": 3.0157894736842108e-05,
1612
- "loss": 1.805,
1613
  "step": 2330
1614
  },
1615
  {
1616
  "epoch": 16.14,
1617
  "learning_rate": 2.963157894736842e-05,
1618
- "loss": 1.489,
1619
  "step": 2340
1620
  },
1621
  {
1622
  "epoch": 16.21,
1623
  "learning_rate": 2.910526315789474e-05,
1624
- "loss": 1.6549,
1625
  "step": 2350
1626
  },
1627
  {
1628
  "epoch": 16.28,
1629
  "learning_rate": 2.8578947368421057e-05,
1630
- "loss": 2.0304,
1631
  "step": 2360
1632
  },
1633
  {
1634
  "epoch": 16.34,
1635
  "learning_rate": 2.805263157894737e-05,
1636
- "loss": 1.5466,
1637
  "step": 2370
1638
  },
1639
  {
1640
  "epoch": 16.41,
1641
  "learning_rate": 2.7526315789473683e-05,
1642
- "loss": 1.5875,
1643
  "step": 2380
1644
  },
1645
  {
1646
  "epoch": 16.48,
1647
  "learning_rate": 2.7000000000000002e-05,
1648
- "loss": 1.7809,
1649
  "step": 2390
1650
  },
1651
  {
1652
  "epoch": 16.55,
1653
  "learning_rate": 2.647368421052632e-05,
1654
- "loss": 1.8844,
1655
  "step": 2400
1656
  },
1657
  {
1658
  "epoch": 16.55,
1659
- "eval_loss": 1.7790474891662598,
1660
- "eval_runtime": 62.917,
1661
- "eval_samples_per_second": 26.702,
1662
- "eval_steps_per_second": 26.702,
1663
- "eval_wer": 0.8066983667562538,
1664
  "step": 2400
1665
  },
1666
  {
1667
  "epoch": 16.62,
1668
  "learning_rate": 2.5947368421052632e-05,
1669
- "loss": 1.4775,
1670
  "step": 2410
1671
  },
1672
  {
1673
  "epoch": 16.69,
1674
  "learning_rate": 2.542105263157895e-05,
1675
- "loss": 1.6398,
1676
  "step": 2420
1677
  },
1678
  {
1679
  "epoch": 16.76,
1680
  "learning_rate": 2.4894736842105264e-05,
1681
- "loss": 1.9707,
1682
  "step": 2430
1683
  },
1684
  {
1685
  "epoch": 16.83,
1686
  "learning_rate": 2.436842105263158e-05,
1687
- "loss": 1.6675,
1688
  "step": 2440
1689
  },
1690
  {
1691
  "epoch": 16.9,
1692
  "learning_rate": 2.3842105263157897e-05,
1693
- "loss": 1.5405,
1694
  "step": 2450
1695
  },
1696
  {
1697
  "epoch": 16.97,
1698
  "learning_rate": 2.331578947368421e-05,
1699
- "loss": 1.6781,
1700
  "step": 2460
1701
  },
1702
  {
1703
  "epoch": 17.03,
1704
  "learning_rate": 2.2789473684210527e-05,
1705
- "loss": 1.993,
1706
  "step": 2470
1707
  },
1708
  {
1709
  "epoch": 17.1,
1710
  "learning_rate": 2.2263157894736843e-05,
1711
- "loss": 1.4809,
1712
  "step": 2480
1713
  },
1714
  {
1715
  "epoch": 17.17,
1716
  "learning_rate": 2.173684210526316e-05,
1717
- "loss": 1.5379,
1718
  "step": 2490
1719
  },
1720
  {
1721
  "epoch": 17.24,
1722
  "learning_rate": 2.1210526315789476e-05,
1723
- "loss": 1.8099,
1724
  "step": 2500
1725
  },
1726
  {
1727
  "epoch": 17.24,
1728
- "eval_loss": 1.7782752513885498,
1729
- "eval_runtime": 62.8161,
1730
- "eval_samples_per_second": 26.745,
1731
- "eval_steps_per_second": 26.745,
1732
- "eval_wer": 0.8073185859003514,
1733
  "step": 2500
1734
  },
1735
  {
1736
  "epoch": 17.31,
1737
  "learning_rate": 2.068421052631579e-05,
1738
- "loss": 1.8612,
1739
  "step": 2510
1740
  },
1741
  {
1742
  "epoch": 17.38,
1743
  "learning_rate": 2.0157894736842105e-05,
1744
- "loss": 1.4152,
1745
  "step": 2520
1746
  },
1747
  {
1748
  "epoch": 17.45,
1749
  "learning_rate": 1.963157894736842e-05,
1750
- "loss": 1.6037,
1751
  "step": 2530
1752
  },
1753
  {
1754
  "epoch": 17.52,
1755
  "learning_rate": 1.9105263157894738e-05,
1756
- "loss": 1.9064,
1757
  "step": 2540
1758
  },
1759
  {
1760
  "epoch": 17.59,
1761
  "learning_rate": 1.8578947368421054e-05,
1762
- "loss": 1.6072,
1763
  "step": 2550
1764
  },
1765
  {
1766
  "epoch": 17.66,
1767
  "learning_rate": 1.8052631578947367e-05,
1768
- "loss": 1.5197,
1769
  "step": 2560
1770
  },
1771
  {
1772
  "epoch": 17.72,
1773
  "learning_rate": 1.7526315789473683e-05,
1774
- "loss": 1.745,
1775
  "step": 2570
1776
  },
1777
  {
1778
  "epoch": 17.79,
1779
  "learning_rate": 1.7000000000000003e-05,
1780
- "loss": 1.89,
1781
  "step": 2580
1782
  },
1783
  {
1784
  "epoch": 17.86,
1785
  "learning_rate": 1.6473684210526316e-05,
1786
- "loss": 1.4768,
1787
  "step": 2590
1788
  },
1789
  {
1790
  "epoch": 17.93,
1791
  "learning_rate": 1.5947368421052633e-05,
1792
- "loss": 1.5488,
1793
  "step": 2600
1794
  },
1795
  {
1796
  "epoch": 17.93,
1797
- "eval_loss": 1.766753911972046,
1798
- "eval_runtime": 62.8076,
1799
- "eval_samples_per_second": 26.748,
1800
- "eval_steps_per_second": 26.748,
1801
- "eval_wer": 0.7971194266418579,
1802
  "step": 2600
1803
  },
1804
  {
1805
  "epoch": 18.0,
1806
  "learning_rate": 1.5421052631578946e-05,
1807
- "loss": 1.9043,
1808
  "step": 2610
1809
  },
1810
  {
1811
  "epoch": 18.07,
1812
  "learning_rate": 1.4894736842105264e-05,
1813
- "loss": 1.6967,
1814
  "step": 2620
1815
  },
1816
  {
1817
  "epoch": 18.14,
1818
  "learning_rate": 1.4368421052631582e-05,
1819
- "loss": 1.4113,
1820
  "step": 2630
1821
  },
1822
  {
1823
  "epoch": 18.21,
1824
  "learning_rate": 1.3842105263157895e-05,
1825
- "loss": 1.616,
1826
  "step": 2640
1827
  },
1828
  {
1829
  "epoch": 18.28,
1830
  "learning_rate": 1.3315789473684213e-05,
1831
- "loss": 1.9563,
1832
  "step": 2650
1833
  },
1834
  {
1835
  "epoch": 18.34,
1836
  "learning_rate": 1.2789473684210526e-05,
1837
- "loss": 1.4982,
1838
  "step": 2660
1839
  },
1840
  {
1841
  "epoch": 18.41,
1842
  "learning_rate": 1.2263157894736844e-05,
1843
- "loss": 1.4753,
1844
  "step": 2670
1845
  },
1846
  {
1847
  "epoch": 18.48,
1848
  "learning_rate": 1.1736842105263158e-05,
1849
- "loss": 1.777,
1850
  "step": 2680
1851
  },
1852
  {
1853
  "epoch": 18.55,
1854
  "learning_rate": 1.1210526315789475e-05,
1855
- "loss": 1.801,
1856
  "step": 2690
1857
  },
1858
  {
1859
  "epoch": 18.62,
1860
  "learning_rate": 1.068421052631579e-05,
1861
- "loss": 1.458,
1862
  "step": 2700
1863
  },
1864
  {
1865
  "epoch": 18.62,
1866
- "eval_loss": 1.7622854709625244,
1867
- "eval_runtime": 63.2501,
1868
- "eval_samples_per_second": 26.561,
1869
- "eval_steps_per_second": 26.561,
1870
- "eval_wer": 0.7973261663565571,
1871
  "step": 2700
1872
  },
1873
  {
1874
  "epoch": 18.69,
1875
  "learning_rate": 1.0157894736842106e-05,
1876
- "loss": 1.5744,
1877
  "step": 2710
1878
  },
1879
  {
1880
  "epoch": 18.76,
1881
  "learning_rate": 9.631578947368422e-06,
1882
- "loss": 1.9091,
1883
  "step": 2720
1884
  },
1885
  {
1886
  "epoch": 18.83,
1887
  "learning_rate": 9.105263157894737e-06,
1888
- "loss": 1.5814,
1889
  "step": 2730
1890
  },
1891
  {
1892
  "epoch": 18.9,
1893
  "learning_rate": 8.578947368421053e-06,
1894
- "loss": 1.5344,
1895
  "step": 2740
1896
  },
1897
  {
1898
  "epoch": 18.97,
1899
  "learning_rate": 8.052631578947368e-06,
1900
- "loss": 1.6318,
1901
  "step": 2750
1902
  },
1903
  {
1904
  "epoch": 19.03,
1905
  "learning_rate": 7.526315789473684e-06,
1906
- "loss": 1.8497,
1907
  "step": 2760
1908
  },
1909
  {
1910
  "epoch": 19.1,
1911
  "learning_rate": 7.000000000000001e-06,
1912
- "loss": 1.4525,
1913
  "step": 2770
1914
  },
1915
  {
1916
  "epoch": 19.17,
1917
  "learning_rate": 6.473684210526316e-06,
1918
- "loss": 1.467,
1919
  "step": 2780
1920
  },
1921
  {
1922
  "epoch": 19.24,
1923
  "learning_rate": 5.947368421052632e-06,
1924
- "loss": 1.8153,
1925
  "step": 2790
1926
  },
1927
  {
1928
  "epoch": 19.31,
1929
  "learning_rate": 5.421052631578947e-06,
1930
- "loss": 1.7656,
1931
  "step": 2800
1932
  },
1933
  {
1934
  "epoch": 19.31,
1935
- "eval_loss": 1.7574056386947632,
1936
- "eval_runtime": 62.7899,
1937
- "eval_samples_per_second": 26.756,
1938
- "eval_steps_per_second": 26.756,
1939
- "eval_wer": 0.8012542209358418,
1940
  "step": 2800
1941
  },
1942
  {
1943
  "epoch": 19.38,
1944
  "learning_rate": 4.894736842105263e-06,
1945
- "loss": 1.4587,
1946
  "step": 2810
1947
  },
1948
  {
1949
  "epoch": 19.45,
1950
  "learning_rate": 4.368421052631579e-06,
1951
- "loss": 1.5571,
1952
  "step": 2820
1953
  },
1954
  {
1955
  "epoch": 19.52,
1956
  "learning_rate": 3.842105263157895e-06,
1957
- "loss": 1.9397,
1958
  "step": 2830
1959
  },
1960
  {
1961
  "epoch": 19.59,
1962
  "learning_rate": 3.315789473684211e-06,
1963
- "loss": 1.498,
1964
  "step": 2840
1965
  },
1966
  {
1967
  "epoch": 19.66,
1968
  "learning_rate": 2.7894736842105266e-06,
1969
- "loss": 1.4464,
1970
  "step": 2850
1971
  },
1972
  {
1973
  "epoch": 19.72,
1974
  "learning_rate": 2.2631578947368426e-06,
1975
- "loss": 1.6899,
1976
  "step": 2860
1977
  },
1978
  {
1979
  "epoch": 19.79,
1980
  "learning_rate": 1.7368421052631579e-06,
1981
- "loss": 1.8521,
1982
  "step": 2870
1983
  },
1984
  {
1985
  "epoch": 19.86,
1986
  "learning_rate": 1.2105263157894738e-06,
1987
- "loss": 1.4235,
1988
  "step": 2880
1989
  },
1990
  {
1991
  "epoch": 19.93,
1992
  "learning_rate": 6.842105263157896e-07,
1993
- "loss": 1.5813,
1994
  "step": 2890
1995
  },
1996
  {
1997
  "epoch": 20.0,
1998
  "learning_rate": 1.5789473684210527e-07,
1999
- "loss": 1.7583,
2000
  "step": 2900
2001
  },
2002
  {
2003
  "epoch": 20.0,
2004
- "eval_loss": 1.7561472654342651,
2005
- "eval_runtime": 63.1867,
2006
- "eval_samples_per_second": 26.588,
2007
- "eval_steps_per_second": 26.588,
2008
- "eval_wer": 0.7971194266418579,
2009
  "step": 2900
2010
  },
2011
  {
2012
  "epoch": 20.0,
2013
  "step": 2900,
2014
  "total_flos": 1.1463619274186412e+18,
2015
- "train_loss": 2.3102814756590746,
2016
- "train_runtime": 4074.7961,
2017
- "train_samples_per_second": 22.676,
2018
- "train_steps_per_second": 0.712
2019
  }
2020
  ],
2021
  "max_steps": 2900,
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "eval_loss": 4.080199241638184,
73
+ "eval_runtime": 62.2219,
74
+ "eval_samples_per_second": 27.0,
75
+ "eval_steps_per_second": 27.0,
76
  "eval_wer": 1.0,
77
  "step": 100
78
  },
133
  {
134
  "epoch": 1.38,
135
  "learning_rate": 1.97e-05,
136
+ "loss": 2.9805,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.38,
141
+ "eval_loss": 2.9792351722717285,
142
+ "eval_runtime": 62.532,
143
+ "eval_samples_per_second": 26.866,
144
+ "eval_steps_per_second": 26.866,
145
  "eval_wer": 1.0,
146
  "step": 200
147
  },
207
  },
208
  {
209
  "epoch": 2.07,
210
+ "eval_loss": 2.940765857696533,
211
+ "eval_runtime": 62.8838,
212
+ "eval_samples_per_second": 26.716,
213
+ "eval_steps_per_second": 26.716,
214
  "eval_wer": 1.0,
215
  "step": 300
216
  },
276
  },
277
  {
278
  "epoch": 2.76,
279
+ "eval_loss": 2.9143340587615967,
280
+ "eval_runtime": 63.136,
281
+ "eval_samples_per_second": 26.609,
282
+ "eval_steps_per_second": 26.609,
283
  "eval_wer": 1.0,
284
  "step": 400
285
  },
316
  {
317
  "epoch": 3.17,
318
  "learning_rate": 4.5700000000000006e-05,
319
+ "loss": 2.8882,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 3.24,
324
  "learning_rate": 4.6700000000000003e-05,
325
+ "loss": 2.8961,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 3.31,
330
  "learning_rate": 4.77e-05,
331
+ "loss": 2.9121,
332
  "step": 480
333
  },
334
  {
345
  },
346
  {
347
  "epoch": 3.45,
348
+ "eval_loss": 2.8774502277374268,
349
+ "eval_runtime": 63.3794,
350
+ "eval_samples_per_second": 26.507,
351
+ "eval_steps_per_second": 26.507,
352
  "eval_wer": 1.0,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 3.52,
357
  "learning_rate": 5.0700000000000006e-05,
358
+ "loss": 2.9154,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 3.59,
363
  "learning_rate": 5.17e-05,
364
+ "loss": 2.885,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 3.66,
369
  "learning_rate": 5.270000000000001e-05,
370
+ "loss": 2.8637,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 3.72,
375
  "learning_rate": 5.3700000000000004e-05,
376
+ "loss": 2.888,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 3.79,
381
  "learning_rate": 5.470000000000001e-05,
382
+ "loss": 2.9059,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 3.86,
387
  "learning_rate": 5.5700000000000005e-05,
388
+ "loss": 2.863,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 3.93,
393
  "learning_rate": 5.6699999999999996e-05,
394
+ "loss": 2.8439,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 4.0,
399
  "learning_rate": 5.77e-05,
400
+ "loss": 2.8525,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 4.07,
405
  "learning_rate": 5.87e-05,
406
+ "loss": 2.8441,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 4.14,
411
  "learning_rate": 5.97e-05,
412
+ "loss": 2.7718,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 4.14,
417
+ "eval_loss": 2.7786543369293213,
418
+ "eval_runtime": 63.096,
419
+ "eval_samples_per_second": 26.626,
420
+ "eval_steps_per_second": 26.626,
421
+ "eval_wer": 1.0,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 4.21,
426
  "learning_rate": 6.07e-05,
427
+ "loss": 2.772,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 4.28,
432
  "learning_rate": 6.170000000000001e-05,
433
+ "loss": 2.8153,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 4.34,
438
  "learning_rate": 6.27e-05,
439
+ "loss": 2.7641,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 4.41,
444
  "learning_rate": 6.37e-05,
445
+ "loss": 2.7527,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 4.48,
450
  "learning_rate": 6.47e-05,
451
+ "loss": 2.746,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 4.55,
456
  "learning_rate": 6.570000000000001e-05,
457
+ "loss": 2.7824,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 4.62,
462
  "learning_rate": 6.670000000000001e-05,
463
+ "loss": 2.6965,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 4.69,
468
  "learning_rate": 6.77e-05,
469
+ "loss": 2.701,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 4.76,
474
  "learning_rate": 6.87e-05,
475
+ "loss": 2.7603,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 4.83,
480
  "learning_rate": 6.97e-05,
481
+ "loss": 2.6711,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 4.83,
486
+ "eval_loss": 2.640113115310669,
487
+ "eval_runtime": 63.3201,
488
+ "eval_samples_per_second": 26.532,
489
+ "eval_steps_per_second": 26.532,
490
+ "eval_wer": 0.978567982909517,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 4.9,
495
  "learning_rate": 7.07e-05,
496
+ "loss": 2.6076,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 4.97,
501
  "learning_rate": 7.17e-05,
502
+ "loss": 2.6269,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 5.03,
507
  "learning_rate": 7.27e-05,
508
+ "loss": 2.7102,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 5.1,
513
  "learning_rate": 7.37e-05,
514
+ "loss": 2.6061,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 5.17,
519
  "learning_rate": 7.47e-05,
520
+ "loss": 2.5717,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 5.24,
525
  "learning_rate": 7.570000000000001e-05,
526
+ "loss": 2.6457,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 5.31,
531
  "learning_rate": 7.670000000000001e-05,
532
+ "loss": 2.6255,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 5.38,
537
  "learning_rate": 7.77e-05,
538
+ "loss": 2.5464,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 5.45,
543
  "learning_rate": 7.87e-05,
544
+ "loss": 2.5584,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 5.52,
549
  "learning_rate": 7.970000000000001e-05,
550
+ "loss": 2.6403,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 5.52,
555
+ "eval_loss": 2.5434978008270264,
556
+ "eval_runtime": 63.2107,
557
+ "eval_samples_per_second": 26.578,
558
+ "eval_steps_per_second": 26.578,
559
+ "eval_wer": 1.0392116325546137,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 5.59,
564
  "learning_rate": 8.070000000000001e-05,
565
+ "loss": 2.5656,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 5.66,
570
  "learning_rate": 8.17e-05,
571
+ "loss": 2.4717,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 5.72,
576
  "learning_rate": 8.27e-05,
577
+ "loss": 2.533,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 5.79,
582
  "learning_rate": 8.37e-05,
583
+ "loss": 2.6475,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 5.86,
588
  "learning_rate": 8.47e-05,
589
+ "loss": 2.4591,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 5.93,
594
  "learning_rate": 8.57e-05,
595
+ "loss": 2.4935,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 6.0,
600
  "learning_rate": 8.67e-05,
601
+ "loss": 2.5621,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 6.07,
606
  "learning_rate": 8.77e-05,
607
+ "loss": 2.5034,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 6.14,
612
  "learning_rate": 8.87e-05,
613
+ "loss": 2.4035,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 6.21,
618
  "learning_rate": 8.970000000000001e-05,
619
+ "loss": 2.4052,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 6.21,
624
+ "eval_loss": 2.4579782485961914,
625
+ "eval_runtime": 63.5786,
626
+ "eval_samples_per_second": 26.424,
627
+ "eval_steps_per_second": 26.424,
628
+ "eval_wer": 1.0706360691888912,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 6.28,
633
  "learning_rate": 9.070000000000001e-05,
634
+ "loss": 2.5836,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 6.34,
639
  "learning_rate": 9.17e-05,
640
+ "loss": 2.419,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 6.41,
645
  "learning_rate": 9.27e-05,
646
+ "loss": 2.391,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 6.48,
651
  "learning_rate": 9.370000000000001e-05,
652
+ "loss": 2.4647,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 6.55,
657
  "learning_rate": 9.47e-05,
658
+ "loss": 2.5118,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 6.62,
663
  "learning_rate": 9.57e-05,
664
+ "loss": 2.2727,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 6.69,
669
  "learning_rate": 9.67e-05,
670
+ "loss": 2.267,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 6.76,
675
  "learning_rate": 9.77e-05,
676
+ "loss": 2.5176,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 6.83,
681
  "learning_rate": 9.87e-05,
682
+ "loss": 2.4048,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 6.9,
687
  "learning_rate": 9.970000000000001e-05,
688
+ "loss": 2.1708,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 6.9,
693
+ "eval_loss": 2.2800345420837402,
694
+ "eval_runtime": 62.9699,
695
+ "eval_samples_per_second": 26.679,
696
+ "eval_steps_per_second": 26.679,
697
+ "eval_wer": 1.0089587209702984,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 6.97,
702
  "learning_rate": 9.963157894736843e-05,
703
+ "loss": 2.3227,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 7.03,
708
  "learning_rate": 9.910526315789475e-05,
709
+ "loss": 2.5246,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 7.1,
714
  "learning_rate": 9.857894736842106e-05,
715
+ "loss": 2.2273,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 7.17,
720
  "learning_rate": 9.805263157894737e-05,
721
+ "loss": 2.181,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 7.24,
726
  "learning_rate": 9.752631578947369e-05,
727
+ "loss": 2.3324,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 7.31,
732
  "learning_rate": 9.7e-05,
733
+ "loss": 2.3922,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 7.38,
738
  "learning_rate": 9.647368421052631e-05,
739
+ "loss": 2.0177,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 7.45,
744
  "learning_rate": 9.594736842105264e-05,
745
+ "loss": 2.1559,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 7.52,
750
  "learning_rate": 9.542105263157895e-05,
751
+ "loss": 2.458,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 7.59,
756
  "learning_rate": 9.489473684210527e-05,
757
+ "loss": 2.2555,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 7.59,
762
+ "eval_loss": 2.1493031978607178,
763
+ "eval_runtime": 63.4,
764
+ "eval_samples_per_second": 26.498,
765
+ "eval_steps_per_second": 26.498,
766
+ "eval_wer": 0.9578940114395975,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 7.66,
771
  "learning_rate": 9.436842105263158e-05,
772
+ "loss": 1.9823,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 7.72,
777
  "learning_rate": 9.384210526315789e-05,
778
+ "loss": 2.1439,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 7.79,
783
  "learning_rate": 9.331578947368422e-05,
784
+ "loss": 2.4031,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 7.86,
789
  "learning_rate": 9.278947368421053e-05,
790
+ "loss": 2.0473,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 7.93,
795
  "learning_rate": 9.226315789473686e-05,
796
+ "loss": 2.0511,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 8.0,
801
  "learning_rate": 9.173684210526317e-05,
802
+ "loss": 2.379,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 8.07,
807
  "learning_rate": 9.121052631578948e-05,
808
+ "loss": 2.2715,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 8.14,
813
  "learning_rate": 9.06842105263158e-05,
814
+ "loss": 1.9654,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 8.21,
819
  "learning_rate": 9.015789473684211e-05,
820
+ "loss": 2.0454,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 8.28,
825
  "learning_rate": 8.963157894736842e-05,
826
+ "loss": 2.3673,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 8.28,
831
+ "eval_loss": 2.0709428787231445,
832
+ "eval_runtime": 63.9178,
833
+ "eval_samples_per_second": 26.284,
834
+ "eval_steps_per_second": 26.284,
835
+ "eval_wer": 0.9051064709530701,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 8.34,
840
  "learning_rate": 8.910526315789474e-05,
841
+ "loss": 1.9115,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 8.41,
846
  "learning_rate": 8.857894736842106e-05,
847
+ "loss": 1.9225,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 8.48,
852
  "learning_rate": 8.805263157894737e-05,
853
+ "loss": 2.2256,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 8.55,
858
  "learning_rate": 8.752631578947369e-05,
859
+ "loss": 2.3292,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 8.62,
864
  "learning_rate": 8.7e-05,
865
+ "loss": 1.8711,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 8.69,
870
  "learning_rate": 8.647368421052631e-05,
871
+ "loss": 1.9655,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 8.76,
876
  "learning_rate": 8.594736842105263e-05,
877
+ "loss": 2.3567,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 8.83,
882
  "learning_rate": 8.542105263157894e-05,
883
+ "loss": 2.0859,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 8.9,
888
  "learning_rate": 8.489473684210527e-05,
889
+ "loss": 1.8144,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 8.97,
894
  "learning_rate": 8.436842105263158e-05,
895
+ "loss": 2.091,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 8.97,
900
+ "eval_loss": 2.025775909423828,
901
+ "eval_runtime": 63.4247,
902
+ "eval_samples_per_second": 26.488,
903
+ "eval_steps_per_second": 26.488,
904
+ "eval_wer": 0.8926331748328854,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 9.03,
909
  "learning_rate": 8.38421052631579e-05,
910
+ "loss": 2.3111,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 9.1,
915
  "learning_rate": 8.331578947368422e-05,
916
+ "loss": 1.7863,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 9.17,
921
  "learning_rate": 8.278947368421053e-05,
922
+ "loss": 1.8556,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 9.24,
927
  "learning_rate": 8.226315789473684e-05,
928
+ "loss": 2.217,
929
  "step": 1340
930
  },
931
  {
932
  "epoch": 9.31,
933
  "learning_rate": 8.173684210526317e-05,
934
+ "loss": 2.2324,
935
  "step": 1350
936
  },
937
  {
938
  "epoch": 9.38,
939
  "learning_rate": 8.121052631578948e-05,
940
+ "loss": 1.8721,
941
  "step": 1360
942
  },
943
  {
944
  "epoch": 9.45,
945
  "learning_rate": 8.06842105263158e-05,
946
+ "loss": 1.9822,
947
  "step": 1370
948
  },
949
  {
950
  "epoch": 9.52,
951
  "learning_rate": 8.015789473684211e-05,
952
+ "loss": 2.2722,
953
  "step": 1380
954
  },
955
  {
956
  "epoch": 9.59,
957
  "learning_rate": 7.963157894736842e-05,
958
+ "loss": 1.8985,
959
  "step": 1390
960
  },
961
  {
962
  "epoch": 9.66,
963
  "learning_rate": 7.910526315789474e-05,
964
+ "loss": 1.8433,
965
  "step": 1400
966
  },
967
  {
968
  "epoch": 9.66,
969
+ "eval_loss": 1.9644908905029297,
970
+ "eval_runtime": 63.8196,
971
+ "eval_samples_per_second": 26.324,
972
+ "eval_steps_per_second": 26.324,
973
+ "eval_wer": 0.8242712425056854,
974
  "step": 1400
975
  },
976
  {
977
  "epoch": 9.72,
978
  "learning_rate": 7.857894736842105e-05,
979
+ "loss": 2.1261,
980
  "step": 1410
981
  },
982
  {
983
  "epoch": 9.79,
984
  "learning_rate": 7.805263157894738e-05,
985
+ "loss": 2.2486,
986
  "step": 1420
987
  },
988
  {
989
  "epoch": 9.86,
990
  "learning_rate": 7.752631578947369e-05,
991
+ "loss": 1.7644,
992
  "step": 1430
993
  },
994
  {
995
  "epoch": 9.93,
996
  "learning_rate": 7.7e-05,
997
+ "loss": 1.845,
998
  "step": 1440
999
  },
1000
  {
1001
  "epoch": 10.0,
1002
  "learning_rate": 7.647368421052631e-05,
1003
+ "loss": 2.1699,
1004
  "step": 1450
1005
  },
1006
  {
1007
  "epoch": 10.07,
1008
  "learning_rate": 7.594736842105263e-05,
1009
+ "loss": 2.0384,
1010
  "step": 1460
1011
  },
1012
  {
1013
  "epoch": 10.14,
1014
  "learning_rate": 7.542105263157895e-05,
1015
+ "loss": 1.7081,
1016
  "step": 1470
1017
  },
1018
  {
1019
  "epoch": 10.21,
1020
  "learning_rate": 7.489473684210527e-05,
1021
+ "loss": 1.8563,
1022
  "step": 1480
1023
  },
1024
  {
1025
  "epoch": 10.28,
1026
  "learning_rate": 7.43684210526316e-05,
1027
+ "loss": 2.2783,
1028
  "step": 1490
1029
  },
1030
  {
1031
  "epoch": 10.34,
1032
  "learning_rate": 7.38421052631579e-05,
1033
+ "loss": 1.6824,
1034
  "step": 1500
1035
  },
1036
  {
1037
  "epoch": 10.34,
1038
+ "eval_loss": 1.9211441278457642,
1039
+ "eval_runtime": 63.3051,
1040
+ "eval_samples_per_second": 26.538,
1041
+ "eval_steps_per_second": 26.538,
1042
+ "eval_wer": 0.8707187650747709,
1043
  "step": 1500
1044
  },
1045
  {
1046
  "epoch": 10.41,
1047
  "learning_rate": 7.331578947368422e-05,
1048
+ "loss": 1.7436,
1049
  "step": 1510
1050
  },
1051
  {
1052
  "epoch": 10.48,
1053
  "learning_rate": 7.278947368421053e-05,
1054
+ "loss": 2.0382,
1055
  "step": 1520
1056
  },
1057
  {
1058
  "epoch": 10.55,
1059
  "learning_rate": 7.226315789473685e-05,
1060
+ "loss": 2.2084,
1061
  "step": 1530
1062
  },
1063
  {
1064
  "epoch": 10.62,
1065
  "learning_rate": 7.173684210526316e-05,
1066
+ "loss": 1.7541,
1067
  "step": 1540
1068
  },
1069
  {
1070
  "epoch": 10.69,
1071
  "learning_rate": 7.121052631578947e-05,
1072
+ "loss": 1.9633,
1073
  "step": 1550
1074
  },
1075
  {
1076
  "epoch": 10.76,
1077
  "learning_rate": 7.06842105263158e-05,
1078
+ "loss": 2.2264,
1079
  "step": 1560
1080
  },
1081
  {
1082
  "epoch": 10.83,
1083
  "learning_rate": 7.015789473684211e-05,
1084
+ "loss": 1.8931,
1085
  "step": 1570
1086
  },
1087
  {
1088
  "epoch": 10.9,
1089
  "learning_rate": 6.963157894736842e-05,
1090
+ "loss": 1.7337,
1091
  "step": 1580
1092
  },
1093
  {
1094
  "epoch": 10.97,
1095
  "learning_rate": 6.910526315789474e-05,
1096
+ "loss": 1.8795,
1097
  "step": 1590
1098
  },
1099
  {
1100
  "epoch": 11.03,
1101
  "learning_rate": 6.857894736842105e-05,
1102
+ "loss": 2.2282,
1103
  "step": 1600
1104
  },
1105
  {
1106
  "epoch": 11.03,
1107
+ "eval_loss": 1.8913612365722656,
1108
+ "eval_runtime": 63.4568,
1109
+ "eval_samples_per_second": 26.475,
1110
+ "eval_steps_per_second": 26.475,
1111
+ "eval_wer": 0.8695472400248088,
1112
  "step": 1600
1113
  },
1114
  {
1115
  "epoch": 11.1,
1116
  "learning_rate": 6.805263157894736e-05,
1117
+ "loss": 1.7207,
1118
  "step": 1610
1119
  },
1120
  {
1121
  "epoch": 11.17,
1122
  "learning_rate": 6.752631578947368e-05,
1123
+ "loss": 1.714,
1124
  "step": 1620
1125
  },
1126
  {
1127
  "epoch": 11.24,
1128
  "learning_rate": 6.7e-05,
1129
+ "loss": 2.034,
1130
  "step": 1630
1131
  },
1132
  {
1133
  "epoch": 11.31,
1134
  "learning_rate": 6.647368421052632e-05,
1135
+ "loss": 2.0639,
1136
  "step": 1640
1137
  },
1138
  {
1139
  "epoch": 11.38,
1140
  "learning_rate": 6.594736842105264e-05,
1141
+ "loss": 1.7275,
1142
  "step": 1650
1143
  },
1144
  {
1145
  "epoch": 11.45,
1146
  "learning_rate": 6.542105263157895e-05,
1147
+ "loss": 1.8704,
1148
  "step": 1660
1149
  },
1150
  {
1151
  "epoch": 11.52,
1152
  "learning_rate": 6.489473684210527e-05,
1153
+ "loss": 2.2262,
1154
  "step": 1670
1155
  },
1156
  {
1157
  "epoch": 11.59,
1158
  "learning_rate": 6.436842105263158e-05,
1159
+ "loss": 1.7371,
1160
  "step": 1680
1161
  },
1162
  {
1163
  "epoch": 11.66,
1164
  "learning_rate": 6.384210526315791e-05,
1165
+ "loss": 1.6343,
1166
  "step": 1690
1167
  },
1168
  {
1169
  "epoch": 11.72,
1170
  "learning_rate": 6.331578947368422e-05,
1171
+ "loss": 1.9027,
1172
  "step": 1700
1173
  },
1174
  {
1175
  "epoch": 11.72,
1176
+ "eval_loss": 1.871816873550415,
1177
+ "eval_runtime": 63.6857,
1178
+ "eval_samples_per_second": 26.38,
1179
+ "eval_steps_per_second": 26.38,
1180
+ "eval_wer": 0.8342636620494797,
1181
  "step": 1700
1182
  },
1183
  {
1184
  "epoch": 11.79,
1185
  "learning_rate": 6.278947368421053e-05,
1186
+ "loss": 2.1587,
1187
  "step": 1710
1188
  },
1189
  {
1190
  "epoch": 11.86,
1191
  "learning_rate": 6.226315789473685e-05,
1192
+ "loss": 1.643,
1193
  "step": 1720
1194
  },
1195
  {
1196
  "epoch": 11.93,
1197
  "learning_rate": 6.173684210526316e-05,
1198
+ "loss": 1.7811,
1199
  "step": 1730
1200
  },
1201
  {
1202
  "epoch": 12.0,
1203
  "learning_rate": 6.121052631578947e-05,
1204
+ "loss": 2.1443,
1205
  "step": 1740
1206
  },
1207
  {
1208
  "epoch": 12.07,
1209
  "learning_rate": 6.0684210526315785e-05,
1210
+ "loss": 1.9129,
1211
  "step": 1750
1212
  },
1213
  {
1214
  "epoch": 12.14,
1215
  "learning_rate": 6.015789473684211e-05,
1216
+ "loss": 1.629,
1217
  "step": 1760
1218
  },
1219
  {
1220
  "epoch": 12.21,
1221
  "learning_rate": 5.9631578947368425e-05,
1222
+ "loss": 1.8447,
1223
  "step": 1770
1224
  },
1225
  {
1226
  "epoch": 12.28,
1227
  "learning_rate": 5.9105263157894744e-05,
1228
+ "loss": 2.1536,
1229
  "step": 1780
1230
  },
1231
  {
1232
  "epoch": 12.34,
1233
  "learning_rate": 5.857894736842106e-05,
1234
+ "loss": 1.7021,
1235
  "step": 1790
1236
  },
1237
  {
1238
  "epoch": 12.41,
1239
  "learning_rate": 5.805263157894737e-05,
1240
+ "loss": 1.6303,
1241
  "step": 1800
1242
  },
1243
  {
1244
  "epoch": 12.41,
1245
+ "eval_loss": 1.86460280418396,
1246
+ "eval_runtime": 63.8627,
1247
+ "eval_samples_per_second": 26.306,
1248
+ "eval_steps_per_second": 26.306,
1249
+ "eval_wer": 0.8231686306939563,
1250
  "step": 1800
1251
  },
1252
  {
1253
  "epoch": 12.48,
1254
  "learning_rate": 5.752631578947368e-05,
1255
+ "loss": 1.9938,
1256
  "step": 1810
1257
  },
1258
  {
1259
  "epoch": 12.55,
1260
  "learning_rate": 5.6999999999999996e-05,
1261
+ "loss": 2.0685,
1262
  "step": 1820
1263
  },
1264
  {
1265
  "epoch": 12.62,
1266
  "learning_rate": 5.647368421052632e-05,
1267
+ "loss": 1.624,
1268
  "step": 1830
1269
  },
1270
  {
1271
  "epoch": 12.69,
1272
  "learning_rate": 5.5947368421052636e-05,
1273
+ "loss": 1.7289,
1274
  "step": 1840
1275
  },
1276
  {
1277
  "epoch": 12.76,
1278
  "learning_rate": 5.542105263157895e-05,
1279
+ "loss": 2.1195,
1280
  "step": 1850
1281
  },
1282
  {
1283
  "epoch": 12.83,
1284
  "learning_rate": 5.489473684210527e-05,
1285
+ "loss": 1.8338,
1286
  "step": 1860
1287
  },
1288
  {
1289
  "epoch": 12.9,
1290
  "learning_rate": 5.436842105263158e-05,
1291
+ "loss": 1.6352,
1292
  "step": 1870
1293
  },
1294
  {
1295
  "epoch": 12.97,
1296
  "learning_rate": 5.3842105263157895e-05,
1297
+ "loss": 1.8484,
1298
  "step": 1880
1299
  },
1300
  {
1301
  "epoch": 13.03,
1302
  "learning_rate": 5.331578947368421e-05,
1303
+ "loss": 2.1297,
1304
  "step": 1890
1305
  },
1306
  {
1307
  "epoch": 13.1,
1308
  "learning_rate": 5.2789473684210534e-05,
1309
+ "loss": 1.648,
1310
  "step": 1900
1311
  },
1312
  {
1313
  "epoch": 13.1,
1314
+ "eval_loss": 1.8297438621520996,
1315
+ "eval_runtime": 63.5984,
1316
+ "eval_samples_per_second": 26.416,
1317
+ "eval_steps_per_second": 26.416,
1318
+ "eval_wer": 0.8176555716353111,
1319
  "step": 1900
1320
  },
1321
  {
1322
  "epoch": 13.17,
1323
  "learning_rate": 5.226315789473685e-05,
1324
+ "loss": 1.7123,
1325
  "step": 1910
1326
  },
1327
  {
1328
  "epoch": 13.24,
1329
  "learning_rate": 5.173684210526316e-05,
1330
+ "loss": 1.9177,
1331
  "step": 1920
1332
  },
1333
  {
1334
  "epoch": 13.31,
1335
  "learning_rate": 5.121052631578947e-05,
1336
+ "loss": 1.9108,
1337
  "step": 1930
1338
  },
1339
  {
1340
  "epoch": 13.38,
1341
  "learning_rate": 5.068421052631579e-05,
1342
+ "loss": 1.6338,
1343
  "step": 1940
1344
  },
1345
  {
1346
  "epoch": 13.45,
1347
  "learning_rate": 5.0157894736842106e-05,
1348
+ "loss": 1.6991,
1349
  "step": 1950
1350
  },
1351
  {
1352
  "epoch": 13.52,
1353
  "learning_rate": 4.9631578947368426e-05,
1354
+ "loss": 2.1586,
1355
  "step": 1960
1356
  },
1357
  {
1358
  "epoch": 13.59,
1359
  "learning_rate": 4.910526315789474e-05,
1360
+ "loss": 1.747,
1361
  "step": 1970
1362
  },
1363
  {
1364
  "epoch": 13.66,
1365
  "learning_rate": 4.857894736842106e-05,
1366
+ "loss": 1.5962,
1367
  "step": 1980
1368
  },
1369
  {
1370
  "epoch": 13.72,
1371
  "learning_rate": 4.805263157894737e-05,
1372
+ "loss": 1.9579,
1373
  "step": 1990
1374
  },
1375
  {
1376
  "epoch": 13.79,
1377
  "learning_rate": 4.7526315789473684e-05,
1378
+ "loss": 2.0429,
1379
  "step": 2000
1380
  },
1381
  {
1382
  "epoch": 13.79,
1383
+ "eval_loss": 1.812680721282959,
1384
+ "eval_runtime": 63.4889,
1385
+ "eval_samples_per_second": 26.461,
1386
+ "eval_steps_per_second": 26.461,
1387
+ "eval_wer": 0.8641720074426297,
1388
  "step": 2000
1389
  },
1390
  {
1391
  "epoch": 13.86,
1392
  "learning_rate": 4.7e-05,
1393
+ "loss": 1.5885,
1394
  "step": 2010
1395
  },
1396
  {
1397
  "epoch": 13.93,
1398
  "learning_rate": 4.647368421052632e-05,
1399
+ "loss": 1.6708,
1400
  "step": 2020
1401
  },
1402
  {
1403
  "epoch": 14.0,
1404
  "learning_rate": 4.594736842105264e-05,
1405
+ "loss": 1.9297,
1406
  "step": 2030
1407
  },
1408
  {
1409
  "epoch": 14.07,
1410
  "learning_rate": 4.542105263157895e-05,
1411
+ "loss": 1.8333,
1412
  "step": 2040
1413
  },
1414
  {
1415
  "epoch": 14.14,
1416
  "learning_rate": 4.489473684210527e-05,
1417
+ "loss": 1.5051,
1418
  "step": 2050
1419
  },
1420
  {
1421
  "epoch": 14.21,
1422
  "learning_rate": 4.436842105263158e-05,
1423
+ "loss": 1.7685,
1424
  "step": 2060
1425
  },
1426
  {
1427
  "epoch": 14.28,
1428
  "learning_rate": 4.3842105263157895e-05,
1429
+ "loss": 2.1308,
1430
  "step": 2070
1431
  },
1432
  {
1433
  "epoch": 14.34,
1434
  "learning_rate": 4.3315789473684215e-05,
1435
+ "loss": 1.671,
1436
  "step": 2080
1437
  },
1438
  {
1439
  "epoch": 14.41,
1440
  "learning_rate": 4.278947368421053e-05,
1441
+ "loss": 1.6557,
1442
  "step": 2090
1443
  },
1444
  {
1445
  "epoch": 14.48,
1446
  "learning_rate": 4.226315789473684e-05,
1447
+ "loss": 1.8833,
1448
  "step": 2100
1449
  },
1450
  {
1451
  "epoch": 14.48,
1452
+ "eval_loss": 1.80045747756958,
1453
+ "eval_runtime": 63.6753,
1454
+ "eval_samples_per_second": 26.384,
1455
+ "eval_steps_per_second": 26.384,
1456
+ "eval_wer": 0.8307490868995934,
1457
  "step": 2100
1458
  },
1459
  {
1460
  "epoch": 14.55,
1461
  "learning_rate": 4.1736842105263154e-05,
1462
+ "loss": 1.9599,
1463
  "step": 2110
1464
  },
1465
  {
1466
  "epoch": 14.62,
1467
  "learning_rate": 4.1210526315789474e-05,
1468
+ "loss": 1.5676,
1469
  "step": 2120
1470
  },
1471
  {
1472
  "epoch": 14.69,
1473
  "learning_rate": 4.0684210526315794e-05,
1474
+ "loss": 1.7685,
1475
  "step": 2130
1476
  },
1477
  {
1478
  "epoch": 14.76,
1479
  "learning_rate": 4.015789473684211e-05,
1480
+ "loss": 2.0334,
1481
  "step": 2140
1482
  },
1483
  {
1484
  "epoch": 14.83,
1485
  "learning_rate": 3.9631578947368426e-05,
1486
+ "loss": 1.7082,
1487
  "step": 2150
1488
  },
1489
  {
1490
  "epoch": 14.9,
1491
  "learning_rate": 3.910526315789474e-05,
1492
+ "loss": 1.4858,
1493
  "step": 2160
1494
  },
1495
  {
1496
  "epoch": 14.97,
1497
  "learning_rate": 3.857894736842105e-05,
1498
+ "loss": 1.6838,
1499
  "step": 2170
1500
  },
1501
  {
1502
  "epoch": 15.03,
1503
  "learning_rate": 3.8052631578947365e-05,
1504
+ "loss": 2.0267,
1505
  "step": 2180
1506
  },
1507
  {
1508
  "epoch": 15.1,
1509
  "learning_rate": 3.7526315789473685e-05,
1510
+ "loss": 1.5655,
1511
  "step": 2190
1512
  },
1513
  {
1514
  "epoch": 15.17,
1515
  "learning_rate": 3.7e-05,
1516
+ "loss": 1.5996,
1517
  "step": 2200
1518
  },
1519
  {
1520
  "epoch": 15.17,
1521
+ "eval_loss": 1.7925808429718018,
1522
+ "eval_runtime": 63.6696,
1523
+ "eval_samples_per_second": 26.386,
1524
+ "eval_steps_per_second": 26.386,
1525
+ "eval_wer": 0.8467369581696644,
1526
  "step": 2200
1527
  },
1528
  {
1529
  "epoch": 15.24,
1530
  "learning_rate": 3.647368421052632e-05,
1531
+ "loss": 1.8772,
1532
  "step": 2210
1533
  },
1534
  {
1535
  "epoch": 15.31,
1536
  "learning_rate": 3.594736842105264e-05,
1537
+ "loss": 1.803,
1538
  "step": 2220
1539
  },
1540
  {
1541
  "epoch": 15.38,
1542
  "learning_rate": 3.542105263157895e-05,
1543
+ "loss": 1.5784,
1544
  "step": 2230
1545
  },
1546
  {
1547
  "epoch": 15.45,
1548
  "learning_rate": 3.4894736842105264e-05,
1549
+ "loss": 1.6237,
1550
  "step": 2240
1551
  },
1552
  {
1553
  "epoch": 15.52,
1554
  "learning_rate": 3.436842105263158e-05,
1555
+ "loss": 2.0538,
1556
  "step": 2250
1557
  },
1558
  {
1559
  "epoch": 15.59,
1560
  "learning_rate": 3.3842105263157896e-05,
1561
+ "loss": 1.5915,
1562
  "step": 2260
1563
  },
1564
  {
1565
  "epoch": 15.66,
1566
  "learning_rate": 3.331578947368421e-05,
1567
+ "loss": 1.5799,
1568
  "step": 2270
1569
  },
1570
  {
1571
  "epoch": 15.72,
1572
  "learning_rate": 3.278947368421052e-05,
1573
+ "loss": 1.8569,
1574
  "step": 2280
1575
  },
1576
  {
1577
  "epoch": 15.79,
1578
  "learning_rate": 3.226315789473684e-05,
1579
+ "loss": 2.054,
1580
  "step": 2290
1581
  },
1582
  {
1583
  "epoch": 15.86,
1584
  "learning_rate": 3.173684210526316e-05,
1585
+ "loss": 1.4876,
1586
  "step": 2300
1587
  },
1588
  {
1589
  "epoch": 15.86,
1590
+ "eval_loss": 1.779452919960022,
1591
+ "eval_runtime": 63.3412,
1592
+ "eval_samples_per_second": 26.523,
1593
+ "eval_steps_per_second": 26.523,
1594
+ "eval_wer": 0.8341258355730136,
1595
  "step": 2300
1596
  },
1597
  {
1598
  "epoch": 15.93,
1599
  "learning_rate": 3.1210526315789475e-05,
1600
+ "loss": 1.602,
1601
  "step": 2310
1602
  },
1603
  {
1604
  "epoch": 16.0,
1605
  "learning_rate": 3.0684210526315795e-05,
1606
+ "loss": 1.989,
1607
  "step": 2320
1608
  },
1609
  {
1610
  "epoch": 16.07,
1611
  "learning_rate": 3.0157894736842108e-05,
1612
+ "loss": 1.8007,
1613
  "step": 2330
1614
  },
1615
  {
1616
  "epoch": 16.14,
1617
  "learning_rate": 2.963157894736842e-05,
1618
+ "loss": 1.4876,
1619
  "step": 2340
1620
  },
1621
  {
1622
  "epoch": 16.21,
1623
  "learning_rate": 2.910526315789474e-05,
1624
+ "loss": 1.666,
1625
  "step": 2350
1626
  },
1627
  {
1628
  "epoch": 16.28,
1629
  "learning_rate": 2.8578947368421057e-05,
1630
+ "loss": 2.0479,
1631
  "step": 2360
1632
  },
1633
  {
1634
  "epoch": 16.34,
1635
  "learning_rate": 2.805263157894737e-05,
1636
+ "loss": 1.5446,
1637
  "step": 2370
1638
  },
1639
  {
1640
  "epoch": 16.41,
1641
  "learning_rate": 2.7526315789473683e-05,
1642
+ "loss": 1.5795,
1643
  "step": 2380
1644
  },
1645
  {
1646
  "epoch": 16.48,
1647
  "learning_rate": 2.7000000000000002e-05,
1648
+ "loss": 1.7922,
1649
  "step": 2390
1650
  },
1651
  {
1652
  "epoch": 16.55,
1653
  "learning_rate": 2.647368421052632e-05,
1654
+ "loss": 1.8925,
1655
  "step": 2400
1656
  },
1657
  {
1658
  "epoch": 16.55,
1659
+ "eval_loss": 1.7715719938278198,
1660
+ "eval_runtime": 63.5709,
1661
+ "eval_samples_per_second": 26.427,
1662
+ "eval_steps_per_second": 26.427,
1663
+ "eval_wer": 0.8198607952587692,
1664
  "step": 2400
1665
  },
1666
  {
1667
  "epoch": 16.62,
1668
  "learning_rate": 2.5947368421052632e-05,
1669
+ "loss": 1.4745,
1670
  "step": 2410
1671
  },
1672
  {
1673
  "epoch": 16.69,
1674
  "learning_rate": 2.542105263157895e-05,
1675
+ "loss": 1.6365,
1676
  "step": 2420
1677
  },
1678
  {
1679
  "epoch": 16.76,
1680
  "learning_rate": 2.4894736842105264e-05,
1681
+ "loss": 1.9693,
1682
  "step": 2430
1683
  },
1684
  {
1685
  "epoch": 16.83,
1686
  "learning_rate": 2.436842105263158e-05,
1687
+ "loss": 1.6706,
1688
  "step": 2440
1689
  },
1690
  {
1691
  "epoch": 16.9,
1692
  "learning_rate": 2.3842105263157897e-05,
1693
+ "loss": 1.5347,
1694
  "step": 2450
1695
  },
1696
  {
1697
  "epoch": 16.97,
1698
  "learning_rate": 2.331578947368421e-05,
1699
+ "loss": 1.68,
1700
  "step": 2460
1701
  },
1702
  {
1703
  "epoch": 17.03,
1704
  "learning_rate": 2.2789473684210527e-05,
1705
+ "loss": 1.9789,
1706
  "step": 2470
1707
  },
1708
  {
1709
  "epoch": 17.1,
1710
  "learning_rate": 2.2263157894736843e-05,
1711
+ "loss": 1.4876,
1712
  "step": 2480
1713
  },
1714
  {
1715
  "epoch": 17.17,
1716
  "learning_rate": 2.173684210526316e-05,
1717
+ "loss": 1.5282,
1718
  "step": 2490
1719
  },
1720
  {
1721
  "epoch": 17.24,
1722
  "learning_rate": 2.1210526315789476e-05,
1723
+ "loss": 1.814,
1724
  "step": 2500
1725
  },
1726
  {
1727
  "epoch": 17.24,
1728
+ "eval_loss": 1.7846348285675049,
1729
+ "eval_runtime": 63.8548,
1730
+ "eval_samples_per_second": 26.31,
1731
+ "eval_steps_per_second": 26.31,
1732
+ "eval_wer": 0.8085590241885466,
1733
  "step": 2500
1734
  },
1735
  {
1736
  "epoch": 17.31,
1737
  "learning_rate": 2.068421052631579e-05,
1738
+ "loss": 1.8587,
1739
  "step": 2510
1740
  },
1741
  {
1742
  "epoch": 17.38,
1743
  "learning_rate": 2.0157894736842105e-05,
1744
+ "loss": 1.4217,
1745
  "step": 2520
1746
  },
1747
  {
1748
  "epoch": 17.45,
1749
  "learning_rate": 1.963157894736842e-05,
1750
+ "loss": 1.6074,
1751
  "step": 2530
1752
  },
1753
  {
1754
  "epoch": 17.52,
1755
  "learning_rate": 1.9105263157894738e-05,
1756
+ "loss": 1.9219,
1757
  "step": 2540
1758
  },
1759
  {
1760
  "epoch": 17.59,
1761
  "learning_rate": 1.8578947368421054e-05,
1762
+ "loss": 1.6039,
1763
  "step": 2550
1764
  },
1765
  {
1766
  "epoch": 17.66,
1767
  "learning_rate": 1.8052631578947367e-05,
1768
+ "loss": 1.506,
1769
  "step": 2560
1770
  },
1771
  {
1772
  "epoch": 17.72,
1773
  "learning_rate": 1.7526315789473683e-05,
1774
+ "loss": 1.7399,
1775
  "step": 2570
1776
  },
1777
  {
1778
  "epoch": 17.79,
1779
  "learning_rate": 1.7000000000000003e-05,
1780
+ "loss": 1.8959,
1781
  "step": 2580
1782
  },
1783
  {
1784
  "epoch": 17.86,
1785
  "learning_rate": 1.6473684210526316e-05,
1786
+ "loss": 1.4699,
1787
  "step": 2590
1788
  },
1789
  {
1790
  "epoch": 17.93,
1791
  "learning_rate": 1.5947368421052633e-05,
1792
+ "loss": 1.536,
1793
  "step": 2600
1794
  },
1795
  {
1796
  "epoch": 17.93,
1797
+ "eval_loss": 1.7655223608016968,
1798
+ "eval_runtime": 63.3968,
1799
+ "eval_samples_per_second": 26.5,
1800
+ "eval_steps_per_second": 26.5,
1801
+ "eval_wer": 0.8018744400799394,
1802
  "step": 2600
1803
  },
1804
  {
1805
  "epoch": 18.0,
1806
  "learning_rate": 1.5421052631578946e-05,
1807
+ "loss": 1.9096,
1808
  "step": 2610
1809
  },
1810
  {
1811
  "epoch": 18.07,
1812
  "learning_rate": 1.4894736842105264e-05,
1813
+ "loss": 1.6969,
1814
  "step": 2620
1815
  },
1816
  {
1817
  "epoch": 18.14,
1818
  "learning_rate": 1.4368421052631582e-05,
1819
+ "loss": 1.4026,
1820
  "step": 2630
1821
  },
1822
  {
1823
  "epoch": 18.21,
1824
  "learning_rate": 1.3842105263157895e-05,
1825
+ "loss": 1.611,
1826
  "step": 2640
1827
  },
1828
  {
1829
  "epoch": 18.28,
1830
  "learning_rate": 1.3315789473684213e-05,
1831
+ "loss": 1.947,
1832
  "step": 2650
1833
  },
1834
  {
1835
  "epoch": 18.34,
1836
  "learning_rate": 1.2789473684210526e-05,
1837
+ "loss": 1.5006,
1838
  "step": 2660
1839
  },
1840
  {
1841
  "epoch": 18.41,
1842
  "learning_rate": 1.2263157894736844e-05,
1843
+ "loss": 1.4805,
1844
  "step": 2670
1845
  },
1846
  {
1847
  "epoch": 18.48,
1848
  "learning_rate": 1.1736842105263158e-05,
1849
+ "loss": 1.7787,
1850
  "step": 2680
1851
  },
1852
  {
1853
  "epoch": 18.55,
1854
  "learning_rate": 1.1210526315789475e-05,
1855
+ "loss": 1.8082,
1856
  "step": 2690
1857
  },
1858
  {
1859
  "epoch": 18.62,
1860
  "learning_rate": 1.068421052631579e-05,
1861
+ "loss": 1.4476,
1862
  "step": 2700
1863
  },
1864
  {
1865
  "epoch": 18.62,
1866
+ "eval_loss": 1.7598618268966675,
1867
+ "eval_runtime": 63.6208,
1868
+ "eval_samples_per_second": 26.406,
1869
+ "eval_steps_per_second": 26.406,
1870
+ "eval_wer": 0.8070429329474192,
1871
  "step": 2700
1872
  },
1873
  {
1874
  "epoch": 18.69,
1875
  "learning_rate": 1.0157894736842106e-05,
1876
+ "loss": 1.5683,
1877
  "step": 2710
1878
  },
1879
  {
1880
  "epoch": 18.76,
1881
  "learning_rate": 9.631578947368422e-06,
1882
+ "loss": 1.9225,
1883
  "step": 2720
1884
  },
1885
  {
1886
  "epoch": 18.83,
1887
  "learning_rate": 9.105263157894737e-06,
1888
+ "loss": 1.5732,
1889
  "step": 2730
1890
  },
1891
  {
1892
  "epoch": 18.9,
1893
  "learning_rate": 8.578947368421053e-06,
1894
+ "loss": 1.5404,
1895
  "step": 2740
1896
  },
1897
  {
1898
  "epoch": 18.97,
1899
  "learning_rate": 8.052631578947368e-06,
1900
+ "loss": 1.6251,
1901
  "step": 2750
1902
  },
1903
  {
1904
  "epoch": 19.03,
1905
  "learning_rate": 7.526315789473684e-06,
1906
+ "loss": 1.8527,
1907
  "step": 2760
1908
  },
1909
  {
1910
  "epoch": 19.1,
1911
  "learning_rate": 7.000000000000001e-06,
1912
+ "loss": 1.4474,
1913
  "step": 2770
1914
  },
1915
  {
1916
  "epoch": 19.17,
1917
  "learning_rate": 6.473684210526316e-06,
1918
+ "loss": 1.475,
1919
  "step": 2780
1920
  },
1921
  {
1922
  "epoch": 19.24,
1923
  "learning_rate": 5.947368421052632e-06,
1924
+ "loss": 1.8146,
1925
  "step": 2790
1926
  },
1927
  {
1928
  "epoch": 19.31,
1929
  "learning_rate": 5.421052631578947e-06,
1930
+ "loss": 1.7629,
1931
  "step": 2800
1932
  },
1933
  {
1934
  "epoch": 19.31,
1935
+ "eval_loss": 1.7588989734649658,
1936
+ "eval_runtime": 63.459,
1937
+ "eval_samples_per_second": 26.474,
1938
+ "eval_steps_per_second": 26.474,
1939
+ "eval_wer": 0.8119357728619668,
1940
  "step": 2800
1941
  },
1942
  {
1943
  "epoch": 19.38,
1944
  "learning_rate": 4.894736842105263e-06,
1945
+ "loss": 1.4493,
1946
  "step": 2810
1947
  },
1948
  {
1949
  "epoch": 19.45,
1950
  "learning_rate": 4.368421052631579e-06,
1951
+ "loss": 1.5521,
1952
  "step": 2820
1953
  },
1954
  {
1955
  "epoch": 19.52,
1956
  "learning_rate": 3.842105263157895e-06,
1957
+ "loss": 1.9465,
1958
  "step": 2830
1959
  },
1960
  {
1961
  "epoch": 19.59,
1962
  "learning_rate": 3.315789473684211e-06,
1963
+ "loss": 1.4912,
1964
  "step": 2840
1965
  },
1966
  {
1967
  "epoch": 19.66,
1968
  "learning_rate": 2.7894736842105266e-06,
1969
+ "loss": 1.4484,
1970
  "step": 2850
1971
  },
1972
  {
1973
  "epoch": 19.72,
1974
  "learning_rate": 2.2631578947368426e-06,
1975
+ "loss": 1.6934,
1976
  "step": 2860
1977
  },
1978
  {
1979
  "epoch": 19.79,
1980
  "learning_rate": 1.7368421052631579e-06,
1981
+ "loss": 1.8646,
1982
  "step": 2870
1983
  },
1984
  {
1985
  "epoch": 19.86,
1986
  "learning_rate": 1.2105263157894738e-06,
1987
+ "loss": 1.4162,
1988
  "step": 2880
1989
  },
1990
  {
1991
  "epoch": 19.93,
1992
  "learning_rate": 6.842105263157896e-07,
1993
+ "loss": 1.5734,
1994
  "step": 2890
1995
  },
1996
  {
1997
  "epoch": 20.0,
1998
  "learning_rate": 1.5789473684210527e-07,
1999
+ "loss": 1.7646,
2000
  "step": 2900
2001
  },
2002
  {
2003
  "epoch": 20.0,
2004
+ "eval_loss": 1.754138469696045,
2005
+ "eval_runtime": 63.415,
2006
+ "eval_samples_per_second": 26.492,
2007
+ "eval_steps_per_second": 26.492,
2008
+ "eval_wer": 0.8060781476121563,
2009
  "step": 2900
2010
  },
2011
  {
2012
  "epoch": 20.0,
2013
  "step": 2900,
2014
  "total_flos": 1.1463619274186412e+18,
2015
+ "train_loss": 2.314238773214406,
2016
+ "train_runtime": 4067.0318,
2017
+ "train_samples_per_second": 22.719,
2018
+ "train_steps_per_second": 0.713
2019
  }
2020
  ],
2021
  "max_steps": 2900,