AndrewMcDowell commited on
Commit
69b978b
1 Parent(s): d448748

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +956 -20
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 1.3699203729629517,
4
- "eval_runtime": 441.7782,
5
  "eval_samples": 10388,
6
- "eval_samples_per_second": 23.514,
7
- "eval_steps_per_second": 2.94,
8
- "eval_wer": 0.928593891632906,
9
- "train_loss": 2.4458747799871756,
10
- "train_runtime": 25997.2696,
11
  "train_samples": 38209,
12
- "train_samples_per_second": 14.697,
13
- "train_steps_per_second": 0.23
14
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_loss": 1.137310266494751,
4
+ "eval_runtime": 417.5245,
5
  "eval_samples": 10388,
6
+ "eval_samples_per_second": 24.88,
7
+ "eval_steps_per_second": 3.111,
8
+ "eval_wer": 0.860665593725142,
9
+ "train_loss": 1.4937853462266097,
10
+ "train_runtime": 52137.1608,
11
  "train_samples": 38209,
12
+ "train_samples_per_second": 21.986,
13
+ "train_steps_per_second": 0.344
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 1.3699203729629517,
4
- "eval_runtime": 441.7782,
5
  "eval_samples": 10388,
6
- "eval_samples_per_second": 23.514,
7
- "eval_steps_per_second": 2.94,
8
- "eval_wer": 0.928593891632906
9
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_loss": 1.137310266494751,
4
+ "eval_runtime": 417.5245,
5
  "eval_samples": 10388,
6
+ "eval_samples_per_second": 24.88,
7
+ "eval_steps_per_second": 3.111,
8
+ "eval_wer": 0.860665593725142
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 2.4458747799871756,
4
- "train_runtime": 25997.2696,
5
  "train_samples": 38209,
6
- "train_samples_per_second": 14.697,
7
- "train_steps_per_second": 0.23
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "train_loss": 1.4937853462266097,
4
+ "train_runtime": 52137.1608,
5
  "train_samples": 38209,
6
+ "train_samples_per_second": 21.986,
7
+ "train_steps_per_second": 0.344
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.999581414817916,
5
- "global_step": 5970,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -438,41 +438,977 @@
438
  },
439
  {
440
  "epoch": 9.38,
441
- "learning_rate": 9.370277078085642e-05,
442
- "loss": 2.1936,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 9.55,
447
- "learning_rate": 6.851385390428212e-05,
448
- "loss": 2.1796,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 9.71,
453
- "learning_rate": 4.332493702770781e-05,
454
- "loss": 2.1748,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 9.88,
459
- "learning_rate": 1.8136020151133502e-05,
460
- "loss": 2.1846,
461
  "step": 5900
462
  },
463
  {
464
- "epoch": 10.0,
465
- "step": 5970,
466
- "total_flos": 1.0051413716540667e+20,
467
- "train_loss": 2.4458747799871756,
468
- "train_runtime": 25997.2696,
469
- "train_samples_per_second": 14.697,
470
- "train_steps_per_second": 0.23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  }
472
  ],
473
- "max_steps": 5970,
474
- "num_train_epochs": 10,
475
- "total_flos": 1.0051413716540667e+20,
476
  "trial_name": null,
477
  "trial_params": null
478
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 29.999581414817914,
5
+ "global_step": 17910,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
438
  },
439
  {
440
  "epoch": 9.38,
441
+ "learning_rate": 0.000773852922690132,
442
+ "loss": 2.3213,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 9.55,
447
+ "learning_rate": 0.0007675675675675676,
448
+ "loss": 2.3412,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 9.71,
453
+ "learning_rate": 0.0007612822124450032,
454
+ "loss": 2.3508,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 9.88,
459
+ "learning_rate": 0.0007549968573224387,
460
+ "loss": 2.3767,
461
  "step": 5900
462
  },
463
  {
464
+ "epoch": 10.05,
465
+ "learning_rate": 0.0007487115021998742,
466
+ "loss": 2.3768,
467
+ "step": 6000
468
+ },
469
+ {
470
+ "epoch": 10.05,
471
+ "eval_loss": 1.6662662029266357,
472
+ "eval_runtime": 424.8561,
473
+ "eval_samples_per_second": 24.451,
474
+ "eval_steps_per_second": 3.058,
475
+ "eval_wer": 0.9664798268494367,
476
+ "step": 6000
477
+ },
478
+ {
479
+ "epoch": 10.22,
480
+ "learning_rate": 0.0007424261470773099,
481
+ "loss": 2.3681,
482
+ "step": 6100
483
+ },
484
+ {
485
+ "epoch": 10.39,
486
+ "learning_rate": 0.0007361407919547455,
487
+ "loss": 2.389,
488
+ "step": 6200
489
+ },
490
+ {
491
+ "epoch": 10.55,
492
+ "learning_rate": 0.0007299182903834066,
493
+ "loss": 2.3671,
494
+ "step": 6300
495
+ },
496
+ {
497
+ "epoch": 10.72,
498
+ "learning_rate": 0.0007236329352608423,
499
+ "loss": 2.3784,
500
+ "step": 6400
501
+ },
502
+ {
503
+ "epoch": 10.89,
504
+ "learning_rate": 0.0007173475801382778,
505
+ "loss": 2.3804,
506
+ "step": 6500
507
+ },
508
+ {
509
+ "epoch": 10.89,
510
+ "eval_loss": 1.6570764780044556,
511
+ "eval_runtime": 413.7975,
512
+ "eval_samples_per_second": 25.104,
513
+ "eval_steps_per_second": 3.139,
514
+ "eval_wer": 0.9719740274155059,
515
+ "step": 6500
516
+ },
517
+ {
518
+ "epoch": 11.06,
519
+ "learning_rate": 0.0007110622250157134,
520
+ "loss": 2.3757,
521
+ "step": 6600
522
+ },
523
+ {
524
+ "epoch": 11.22,
525
+ "learning_rate": 0.0007047768698931489,
526
+ "loss": 2.3487,
527
+ "step": 6700
528
+ },
529
+ {
530
+ "epoch": 11.39,
531
+ "learning_rate": 0.0006984915147705846,
532
+ "loss": 2.3473,
533
+ "step": 6800
534
+ },
535
+ {
536
+ "epoch": 11.56,
537
+ "learning_rate": 0.0006922061596480202,
538
+ "loss": 2.3481,
539
+ "step": 6900
540
+ },
541
+ {
542
+ "epoch": 11.72,
543
+ "learning_rate": 0.0006859208045254557,
544
+ "loss": 2.3237,
545
+ "step": 7000
546
+ },
547
+ {
548
+ "epoch": 11.72,
549
+ "eval_loss": 1.604884147644043,
550
+ "eval_runtime": 412.0866,
551
+ "eval_samples_per_second": 25.208,
552
+ "eval_steps_per_second": 3.152,
553
+ "eval_wer": 0.9637049780786947,
554
+ "step": 7000
555
+ },
556
+ {
557
+ "epoch": 11.89,
558
+ "learning_rate": 0.0006796354494028913,
559
+ "loss": 2.3379,
560
+ "step": 7100
561
+ },
562
+ {
563
+ "epoch": 12.06,
564
+ "learning_rate": 0.0006733500942803269,
565
+ "loss": 2.3362,
566
+ "step": 7200
567
+ },
568
+ {
569
+ "epoch": 12.23,
570
+ "learning_rate": 0.0006670647391577624,
571
+ "loss": 2.3148,
572
+ "step": 7300
573
+ },
574
+ {
575
+ "epoch": 12.4,
576
+ "learning_rate": 0.000660779384035198,
577
+ "loss": 2.3242,
578
+ "step": 7400
579
+ },
580
+ {
581
+ "epoch": 12.56,
582
+ "learning_rate": 0.0006544940289126335,
583
+ "loss": 2.317,
584
+ "step": 7500
585
+ },
586
+ {
587
+ "epoch": 12.56,
588
+ "eval_loss": 1.5874534845352173,
589
+ "eval_runtime": 412.3759,
590
+ "eval_samples_per_second": 25.191,
591
+ "eval_steps_per_second": 3.15,
592
+ "eval_wer": 0.9655363782673845,
593
+ "step": 7500
594
+ },
595
+ {
596
+ "epoch": 12.73,
597
+ "learning_rate": 0.0006482086737900693,
598
+ "loss": 2.3342,
599
+ "step": 7600
600
+ },
601
+ {
602
+ "epoch": 12.9,
603
+ "learning_rate": 0.0006419233186675048,
604
+ "loss": 2.3229,
605
+ "step": 7700
606
+ },
607
+ {
608
+ "epoch": 13.07,
609
+ "learning_rate": 0.0006356379635449403,
610
+ "loss": 2.3128,
611
+ "step": 7800
612
+ },
613
+ {
614
+ "epoch": 13.23,
615
+ "learning_rate": 0.0006293526084223759,
616
+ "loss": 2.2992,
617
+ "step": 7900
618
+ },
619
+ {
620
+ "epoch": 13.4,
621
+ "learning_rate": 0.0006230672532998114,
622
+ "loss": 2.2988,
623
+ "step": 8000
624
+ },
625
+ {
626
+ "epoch": 13.4,
627
+ "eval_loss": 1.5357071161270142,
628
+ "eval_runtime": 411.1707,
629
+ "eval_samples_per_second": 25.264,
630
+ "eval_steps_per_second": 3.159,
631
+ "eval_wer": 0.9603381615701945,
632
+ "step": 8000
633
+ },
634
+ {
635
+ "epoch": 13.57,
636
+ "learning_rate": 0.000616781898177247,
637
+ "loss": 2.3041,
638
+ "step": 8100
639
+ },
640
+ {
641
+ "epoch": 13.74,
642
+ "learning_rate": 0.0006104965430546826,
643
+ "loss": 2.2905,
644
+ "step": 8200
645
+ },
646
+ {
647
+ "epoch": 13.9,
648
+ "learning_rate": 0.0006043368950345695,
649
+ "loss": 2.2946,
650
+ "step": 8300
651
+ },
652
+ {
653
+ "epoch": 14.07,
654
+ "learning_rate": 0.000598051539912005,
655
+ "loss": 2.3022,
656
+ "step": 8400
657
+ },
658
+ {
659
+ "epoch": 14.24,
660
+ "learning_rate": 0.0005917661847894407,
661
+ "loss": 2.2906,
662
+ "step": 8500
663
+ },
664
+ {
665
+ "epoch": 14.24,
666
+ "eval_loss": 1.5637153387069702,
667
+ "eval_runtime": 411.7098,
668
+ "eval_samples_per_second": 25.231,
669
+ "eval_steps_per_second": 3.155,
670
+ "eval_wer": 0.9592097230700927,
671
+ "step": 8500
672
+ },
673
+ {
674
+ "epoch": 14.41,
675
+ "learning_rate": 0.0005854808296668762,
676
+ "loss": 2.2918,
677
+ "step": 8600
678
+ },
679
+ {
680
+ "epoch": 14.57,
681
+ "learning_rate": 0.0005791954745443117,
682
+ "loss": 2.2805,
683
+ "step": 8700
684
+ },
685
+ {
686
+ "epoch": 14.74,
687
+ "learning_rate": 0.0005729101194217473,
688
+ "loss": 2.2951,
689
+ "step": 8800
690
+ },
691
+ {
692
+ "epoch": 14.91,
693
+ "learning_rate": 0.0005666247642991829,
694
+ "loss": 2.2876,
695
+ "step": 8900
696
+ },
697
+ {
698
+ "epoch": 15.08,
699
+ "learning_rate": 0.0005603394091766186,
700
+ "loss": 2.2848,
701
+ "step": 9000
702
+ },
703
+ {
704
+ "epoch": 15.08,
705
+ "eval_loss": 1.5325744152069092,
706
+ "eval_runtime": 411.6076,
707
+ "eval_samples_per_second": 25.238,
708
+ "eval_steps_per_second": 3.156,
709
+ "eval_wer": 0.9537340214958285,
710
+ "step": 9000
711
+ },
712
+ {
713
+ "epoch": 15.24,
714
+ "learning_rate": 0.0005540540540540541,
715
+ "loss": 2.2537,
716
+ "step": 9100
717
+ },
718
+ {
719
+ "epoch": 15.41,
720
+ "learning_rate": 0.0005477686989314896,
721
+ "loss": 2.2504,
722
+ "step": 9200
723
+ },
724
+ {
725
+ "epoch": 15.58,
726
+ "learning_rate": 0.0005414833438089252,
727
+ "loss": 2.2542,
728
+ "step": 9300
729
+ },
730
+ {
731
+ "epoch": 15.75,
732
+ "learning_rate": 0.0005351979886863608,
733
+ "loss": 2.2455,
734
+ "step": 9400
735
+ },
736
+ {
737
+ "epoch": 15.91,
738
+ "learning_rate": 0.0005289126335637963,
739
+ "loss": 2.2381,
740
+ "step": 9500
741
+ },
742
+ {
743
+ "epoch": 15.91,
744
+ "eval_loss": 1.563069462776184,
745
+ "eval_runtime": 410.1326,
746
+ "eval_samples_per_second": 25.328,
747
+ "eval_steps_per_second": 3.167,
748
+ "eval_wer": 0.9508296797824518,
749
+ "step": 9500
750
+ },
751
+ {
752
+ "epoch": 16.08,
753
+ "learning_rate": 0.0005226272784412319,
754
+ "loss": 2.2406,
755
+ "step": 9600
756
+ },
757
+ {
758
+ "epoch": 16.25,
759
+ "learning_rate": 0.0005163419233186674,
760
+ "loss": 2.2265,
761
+ "step": 9700
762
+ },
763
+ {
764
+ "epoch": 16.42,
765
+ "learning_rate": 0.0005100565681961032,
766
+ "loss": 2.2221,
767
+ "step": 9800
768
+ },
769
+ {
770
+ "epoch": 16.58,
771
+ "learning_rate": 0.0005037712130735387,
772
+ "loss": 2.2122,
773
+ "step": 9900
774
+ },
775
+ {
776
+ "epoch": 16.75,
777
+ "learning_rate": 0.0004974858579509742,
778
+ "loss": 2.2072,
779
+ "step": 10000
780
+ },
781
+ {
782
+ "epoch": 16.75,
783
+ "eval_loss": 1.4565062522888184,
784
+ "eval_runtime": 409.2254,
785
+ "eval_samples_per_second": 25.385,
786
+ "eval_steps_per_second": 3.174,
787
+ "eval_wer": 0.9395452947814344,
788
+ "step": 10000
789
+ },
790
+ {
791
+ "epoch": 16.92,
792
+ "learning_rate": 0.0004912005028284098,
793
+ "loss": 2.1876,
794
+ "step": 10100
795
+ },
796
+ {
797
+ "epoch": 17.09,
798
+ "learning_rate": 0.00048491514770584537,
799
+ "loss": 2.2144,
800
+ "step": 10200
801
+ },
802
+ {
803
+ "epoch": 17.25,
804
+ "learning_rate": 0.00047862979258328096,
805
+ "loss": 2.1943,
806
+ "step": 10300
807
+ },
808
+ {
809
+ "epoch": 17.42,
810
+ "learning_rate": 0.00047234443746071655,
811
+ "loss": 2.1901,
812
+ "step": 10400
813
+ },
814
+ {
815
+ "epoch": 17.59,
816
+ "learning_rate": 0.00046605908233815214,
817
+ "loss": 2.197,
818
+ "step": 10500
819
+ },
820
+ {
821
+ "epoch": 17.59,
822
+ "eval_loss": 1.430406093597412,
823
+ "eval_runtime": 410.1605,
824
+ "eval_samples_per_second": 25.327,
825
+ "eval_steps_per_second": 3.167,
826
+ "eval_wer": 0.9405997373143163,
827
+ "step": 10500
828
+ },
829
+ {
830
+ "epoch": 17.76,
831
+ "learning_rate": 0.0004597737272155877,
832
+ "loss": 2.1872,
833
+ "step": 10600
834
+ },
835
+ {
836
+ "epoch": 17.92,
837
+ "learning_rate": 0.0004534883720930232,
838
+ "loss": 2.2033,
839
+ "step": 10700
840
+ },
841
+ {
842
+ "epoch": 18.09,
843
+ "learning_rate": 0.00044720301697045886,
844
+ "loss": 2.1865,
845
+ "step": 10800
846
+ },
847
+ {
848
+ "epoch": 18.26,
849
+ "learning_rate": 0.0004409176618478944,
850
+ "loss": 2.194,
851
+ "step": 10900
852
+ },
853
+ {
854
+ "epoch": 18.43,
855
+ "learning_rate": 0.00043463230672533,
856
+ "loss": 2.198,
857
+ "step": 11000
858
+ },
859
+ {
860
+ "epoch": 18.43,
861
+ "eval_loss": 1.423040747642517,
862
+ "eval_runtime": 411.9246,
863
+ "eval_samples_per_second": 25.218,
864
+ "eval_steps_per_second": 3.153,
865
+ "eval_wer": 0.9382318663632832,
866
+ "step": 11000
867
+ },
868
+ {
869
+ "epoch": 18.59,
870
+ "learning_rate": 0.0004283469516027655,
871
+ "loss": 2.1784,
872
+ "step": 11100
873
+ },
874
+ {
875
+ "epoch": 18.76,
876
+ "learning_rate": 0.00042206159648020117,
877
+ "loss": 2.1739,
878
+ "step": 11200
879
+ },
880
+ {
881
+ "epoch": 18.93,
882
+ "learning_rate": 0.0004157762413576367,
883
+ "loss": 2.1686,
884
+ "step": 11300
885
+ },
886
+ {
887
+ "epoch": 19.1,
888
+ "learning_rate": 0.0004094908862350723,
889
+ "loss": 2.1639,
890
+ "step": 11400
891
+ },
892
+ {
893
+ "epoch": 19.26,
894
+ "learning_rate": 0.00040320553111250783,
895
+ "loss": 2.1668,
896
+ "step": 11500
897
+ },
898
+ {
899
+ "epoch": 19.26,
900
+ "eval_loss": 1.3998422622680664,
901
+ "eval_runtime": 412.8679,
902
+ "eval_samples_per_second": 25.161,
903
+ "eval_steps_per_second": 3.146,
904
+ "eval_wer": 0.9314982333462827,
905
+ "step": 11500
906
+ },
907
+ {
908
+ "epoch": 19.43,
909
+ "learning_rate": 0.00039692017598994347,
910
+ "loss": 2.1694,
911
+ "step": 11600
912
+ },
913
+ {
914
+ "epoch": 19.6,
915
+ "learning_rate": 0.000390634820867379,
916
+ "loss": 2.1492,
917
+ "step": 11700
918
+ },
919
+ {
920
+ "epoch": 19.77,
921
+ "learning_rate": 0.0003843494657448146,
922
+ "loss": 2.1465,
923
+ "step": 11800
924
+ },
925
+ {
926
+ "epoch": 19.93,
927
+ "learning_rate": 0.00037806411062225013,
928
+ "loss": 2.1484,
929
+ "step": 11900
930
+ },
931
+ {
932
+ "epoch": 20.1,
933
+ "learning_rate": 0.0003718416090509114,
934
+ "loss": 2.1498,
935
+ "step": 12000
936
+ },
937
+ {
938
+ "epoch": 20.1,
939
+ "eval_loss": 1.3919602632522583,
940
+ "eval_runtime": 412.6773,
941
+ "eval_samples_per_second": 25.172,
942
+ "eval_steps_per_second": 3.148,
943
+ "eval_wer": 0.9257635458867491,
944
+ "step": 12000
945
+ },
946
+ {
947
+ "epoch": 20.27,
948
+ "learning_rate": 0.00036555625392834694,
949
+ "loss": 2.1295,
950
+ "step": 12100
951
+ },
952
+ {
953
+ "epoch": 20.44,
954
+ "learning_rate": 0.00035927089880578253,
955
+ "loss": 2.1346,
956
+ "step": 12200
957
+ },
958
+ {
959
+ "epoch": 20.6,
960
+ "learning_rate": 0.0003529855436832181,
961
+ "loss": 2.1227,
962
+ "step": 12300
963
+ },
964
+ {
965
+ "epoch": 20.77,
966
+ "learning_rate": 0.00034670018856065366,
967
+ "loss": 2.1205,
968
+ "step": 12400
969
+ },
970
+ {
971
+ "epoch": 20.94,
972
+ "learning_rate": 0.00034041483343808925,
973
+ "loss": 2.1244,
974
+ "step": 12500
975
+ },
976
+ {
977
+ "epoch": 20.94,
978
+ "eval_loss": 1.3584457635879517,
979
+ "eval_runtime": 410.923,
980
+ "eval_samples_per_second": 25.28,
981
+ "eval_steps_per_second": 3.161,
982
+ "eval_wer": 0.9152561185415394,
983
+ "step": 12500
984
+ },
985
+ {
986
+ "epoch": 21.11,
987
+ "learning_rate": 0.00033412947831552484,
988
+ "loss": 2.1163,
989
+ "step": 12600
990
+ },
991
+ {
992
+ "epoch": 21.27,
993
+ "learning_rate": 0.00032784412319296043,
994
+ "loss": 2.1141,
995
+ "step": 12700
996
+ },
997
+ {
998
+ "epoch": 21.44,
999
+ "learning_rate": 0.00032155876807039597,
1000
+ "loss": 2.1122,
1001
+ "step": 12800
1002
+ },
1003
+ {
1004
+ "epoch": 21.61,
1005
+ "learning_rate": 0.00031527341294783156,
1006
+ "loss": 2.0937,
1007
+ "step": 12900
1008
+ },
1009
+ {
1010
+ "epoch": 21.78,
1011
+ "learning_rate": 0.00030898805782526715,
1012
+ "loss": 2.0953,
1013
+ "step": 13000
1014
+ },
1015
+ {
1016
+ "epoch": 21.78,
1017
+ "eval_loss": 1.327351450920105,
1018
+ "eval_runtime": 411.8656,
1019
+ "eval_samples_per_second": 25.222,
1020
+ "eval_steps_per_second": 3.154,
1021
+ "eval_wer": 0.905377656917698,
1022
+ "step": 13000
1023
+ },
1024
+ {
1025
+ "epoch": 21.94,
1026
+ "learning_rate": 0.00030270270270270274,
1027
+ "loss": 2.096,
1028
+ "step": 13100
1029
+ },
1030
+ {
1031
+ "epoch": 22.11,
1032
+ "learning_rate": 0.0002964173475801383,
1033
+ "loss": 2.1102,
1034
+ "step": 13200
1035
+ },
1036
+ {
1037
+ "epoch": 22.28,
1038
+ "learning_rate": 0.00029013199245757386,
1039
+ "loss": 2.0892,
1040
+ "step": 13300
1041
+ },
1042
+ {
1043
+ "epoch": 22.45,
1044
+ "learning_rate": 0.00028384663733500945,
1045
+ "loss": 2.0805,
1046
+ "step": 13400
1047
+ },
1048
+ {
1049
+ "epoch": 22.61,
1050
+ "learning_rate": 0.00027756128221244504,
1051
+ "loss": 2.0762,
1052
+ "step": 13500
1053
+ },
1054
+ {
1055
+ "epoch": 22.61,
1056
+ "eval_loss": 1.2932939529418945,
1057
+ "eval_runtime": 410.2802,
1058
+ "eval_samples_per_second": 25.319,
1059
+ "eval_steps_per_second": 3.166,
1060
+ "eval_wer": 0.9073015520654124,
1061
+ "step": 13500
1062
+ },
1063
+ {
1064
+ "epoch": 22.78,
1065
+ "learning_rate": 0.0002712759270898806,
1066
+ "loss": 2.0867,
1067
+ "step": 13600
1068
+ },
1069
+ {
1070
+ "epoch": 22.95,
1071
+ "learning_rate": 0.00026499057196731617,
1072
+ "loss": 2.0757,
1073
+ "step": 13700
1074
+ },
1075
+ {
1076
+ "epoch": 23.12,
1077
+ "learning_rate": 0.00025870521684475176,
1078
+ "loss": 2.0883,
1079
+ "step": 13800
1080
+ },
1081
+ {
1082
+ "epoch": 23.28,
1083
+ "learning_rate": 0.0002524198617221873,
1084
+ "loss": 2.0696,
1085
+ "step": 13900
1086
+ },
1087
+ {
1088
+ "epoch": 23.45,
1089
+ "learning_rate": 0.0002461345065996229,
1090
+ "loss": 2.0587,
1091
+ "step": 14000
1092
+ },
1093
+ {
1094
+ "epoch": 23.45,
1095
+ "eval_loss": 1.2515921592712402,
1096
+ "eval_runtime": 410.7551,
1097
+ "eval_samples_per_second": 25.29,
1098
+ "eval_steps_per_second": 3.162,
1099
+ "eval_wer": 0.8944447527609746,
1100
+ "step": 14000
1101
+ },
1102
+ {
1103
+ "epoch": 23.62,
1104
+ "learning_rate": 0.00023984915147705848,
1105
+ "loss": 2.0661,
1106
+ "step": 14100
1107
+ },
1108
+ {
1109
+ "epoch": 23.79,
1110
+ "learning_rate": 0.00023356379635449404,
1111
+ "loss": 2.0529,
1112
+ "step": 14200
1113
+ },
1114
+ {
1115
+ "epoch": 23.95,
1116
+ "learning_rate": 0.00022727844123192963,
1117
+ "loss": 2.0509,
1118
+ "step": 14300
1119
+ },
1120
+ {
1121
+ "epoch": 24.12,
1122
+ "learning_rate": 0.0002209930861093652,
1123
+ "loss": 2.0481,
1124
+ "step": 14400
1125
+ },
1126
+ {
1127
+ "epoch": 24.29,
1128
+ "learning_rate": 0.00021470773098680078,
1129
+ "loss": 2.0363,
1130
+ "step": 14500
1131
+ },
1132
+ {
1133
+ "epoch": 24.29,
1134
+ "eval_loss": 1.2214268445968628,
1135
+ "eval_runtime": 412.9112,
1136
+ "eval_samples_per_second": 25.158,
1137
+ "eval_steps_per_second": 3.146,
1138
+ "eval_wer": 0.8901529866622269,
1139
+ "step": 14500
1140
+ },
1141
+ {
1142
+ "epoch": 24.46,
1143
+ "learning_rate": 0.00020842237586423635,
1144
+ "loss": 2.0412,
1145
+ "step": 14600
1146
+ },
1147
+ {
1148
+ "epoch": 24.62,
1149
+ "learning_rate": 0.00020213702074167188,
1150
+ "loss": 2.0264,
1151
+ "step": 14700
1152
+ },
1153
+ {
1154
+ "epoch": 24.79,
1155
+ "learning_rate": 0.00019585166561910747,
1156
+ "loss": 2.0373,
1157
+ "step": 14800
1158
+ },
1159
+ {
1160
+ "epoch": 24.96,
1161
+ "learning_rate": 0.00018956631049654304,
1162
+ "loss": 2.0373,
1163
+ "step": 14900
1164
+ },
1165
+ {
1166
+ "epoch": 25.13,
1167
+ "learning_rate": 0.00018328095537397863,
1168
+ "loss": 2.0302,
1169
+ "step": 15000
1170
+ },
1171
+ {
1172
+ "epoch": 25.13,
1173
+ "eval_loss": 1.2087428569793701,
1174
+ "eval_runtime": 412.179,
1175
+ "eval_samples_per_second": 25.203,
1176
+ "eval_steps_per_second": 3.152,
1177
+ "eval_wer": 0.8871191520062157,
1178
+ "step": 15000
1179
+ },
1180
+ {
1181
+ "epoch": 25.29,
1182
+ "learning_rate": 0.0001769956002514142,
1183
+ "loss": 2.0109,
1184
+ "step": 15100
1185
+ },
1186
+ {
1187
+ "epoch": 25.46,
1188
+ "learning_rate": 0.00017071024512884978,
1189
+ "loss": 2.0215,
1190
+ "step": 15200
1191
+ },
1192
+ {
1193
+ "epoch": 25.63,
1194
+ "learning_rate": 0.00016442489000628534,
1195
+ "loss": 2.0137,
1196
+ "step": 15300
1197
+ },
1198
+ {
1199
+ "epoch": 25.8,
1200
+ "learning_rate": 0.00015813953488372093,
1201
+ "loss": 2.0084,
1202
+ "step": 15400
1203
+ },
1204
+ {
1205
+ "epoch": 25.96,
1206
+ "learning_rate": 0.0001518541797611565,
1207
+ "loss": 2.0071,
1208
+ "step": 15500
1209
+ },
1210
+ {
1211
+ "epoch": 25.96,
1212
+ "eval_loss": 1.1953096389770508,
1213
+ "eval_runtime": 413.1745,
1214
+ "eval_samples_per_second": 25.142,
1215
+ "eval_steps_per_second": 3.144,
1216
+ "eval_wer": 0.8785726177923303,
1217
+ "step": 15500
1218
+ },
1219
+ {
1220
+ "epoch": 26.13,
1221
+ "learning_rate": 0.00014556882463859208,
1222
+ "loss": 2.0112,
1223
+ "step": 15600
1224
+ },
1225
+ {
1226
+ "epoch": 26.3,
1227
+ "learning_rate": 0.00013928346951602765,
1228
+ "loss": 2.0077,
1229
+ "step": 15700
1230
+ },
1231
+ {
1232
+ "epoch": 26.47,
1233
+ "learning_rate": 0.00013299811439346324,
1234
+ "loss": 2.0052,
1235
+ "step": 15800
1236
+ },
1237
+ {
1238
+ "epoch": 26.63,
1239
+ "learning_rate": 0.0001267127592708988,
1240
+ "loss": 1.9947,
1241
+ "step": 15900
1242
+ },
1243
+ {
1244
+ "epoch": 26.8,
1245
+ "learning_rate": 0.00012042740414833438,
1246
+ "loss": 1.9882,
1247
+ "step": 16000
1248
+ },
1249
+ {
1250
+ "epoch": 26.8,
1251
+ "eval_loss": 1.1737616062164307,
1252
+ "eval_runtime": 414.4584,
1253
+ "eval_samples_per_second": 25.064,
1254
+ "eval_steps_per_second": 3.134,
1255
+ "eval_wer": 0.8711730210703517,
1256
+ "step": 16000
1257
+ },
1258
+ {
1259
+ "epoch": 26.97,
1260
+ "learning_rate": 0.0001142049025769956,
1261
+ "loss": 1.9926,
1262
+ "step": 16100
1263
+ },
1264
+ {
1265
+ "epoch": 27.14,
1266
+ "learning_rate": 0.00010791954745443117,
1267
+ "loss": 2.0032,
1268
+ "step": 16200
1269
+ },
1270
+ {
1271
+ "epoch": 27.3,
1272
+ "learning_rate": 0.00010163419233186675,
1273
+ "loss": 1.996,
1274
+ "step": 16300
1275
+ },
1276
+ {
1277
+ "epoch": 27.47,
1278
+ "learning_rate": 9.534883720930233e-05,
1279
+ "loss": 1.9746,
1280
+ "step": 16400
1281
+ },
1282
+ {
1283
+ "epoch": 27.64,
1284
+ "learning_rate": 8.906348208673789e-05,
1285
+ "loss": 1.9772,
1286
+ "step": 16500
1287
+ },
1288
+ {
1289
+ "epoch": 27.64,
1290
+ "eval_loss": 1.164720892906189,
1291
+ "eval_runtime": 425.7197,
1292
+ "eval_samples_per_second": 24.401,
1293
+ "eval_steps_per_second": 3.051,
1294
+ "eval_wer": 0.867214236824093,
1295
+ "step": 16500
1296
+ },
1297
+ {
1298
+ "epoch": 27.81,
1299
+ "learning_rate": 8.277812696417347e-05,
1300
+ "loss": 1.9759,
1301
+ "step": 16600
1302
+ },
1303
+ {
1304
+ "epoch": 27.97,
1305
+ "learning_rate": 7.649277184160904e-05,
1306
+ "loss": 1.9657,
1307
+ "step": 16700
1308
+ },
1309
+ {
1310
+ "epoch": 28.14,
1311
+ "learning_rate": 7.020741671904462e-05,
1312
+ "loss": 1.9806,
1313
+ "step": 16800
1314
+ },
1315
+ {
1316
+ "epoch": 28.31,
1317
+ "learning_rate": 6.39220615964802e-05,
1318
+ "loss": 1.9802,
1319
+ "step": 16900
1320
+ },
1321
+ {
1322
+ "epoch": 28.48,
1323
+ "learning_rate": 5.763670647391578e-05,
1324
+ "loss": 1.9585,
1325
+ "step": 17000
1326
+ },
1327
+ {
1328
+ "epoch": 28.48,
1329
+ "eval_loss": 1.1459153890609741,
1330
+ "eval_runtime": 417.3472,
1331
+ "eval_samples_per_second": 24.891,
1332
+ "eval_steps_per_second": 3.113,
1333
+ "eval_wer": 0.8634774404794938,
1334
+ "step": 17000
1335
+ },
1336
+ {
1337
+ "epoch": 28.64,
1338
+ "learning_rate": 5.135135135135136e-05,
1339
+ "loss": 1.9573,
1340
+ "step": 17100
1341
+ },
1342
+ {
1343
+ "epoch": 28.81,
1344
+ "learning_rate": 4.506599622878693e-05,
1345
+ "loss": 1.962,
1346
+ "step": 17200
1347
+ },
1348
+ {
1349
+ "epoch": 28.98,
1350
+ "learning_rate": 3.8780641106222504e-05,
1351
+ "loss": 1.9653,
1352
+ "step": 17300
1353
+ },
1354
+ {
1355
+ "epoch": 29.15,
1356
+ "learning_rate": 3.249528598365808e-05,
1357
+ "loss": 1.9631,
1358
+ "step": 17400
1359
+ },
1360
+ {
1361
+ "epoch": 29.31,
1362
+ "learning_rate": 2.620993086109365e-05,
1363
+ "loss": 1.944,
1364
+ "step": 17500
1365
+ },
1366
+ {
1367
+ "epoch": 29.31,
1368
+ "eval_loss": 1.1414194107055664,
1369
+ "eval_runtime": 414.6007,
1370
+ "eval_samples_per_second": 25.055,
1371
+ "eval_steps_per_second": 3.133,
1372
+ "eval_wer": 0.8616275412989992,
1373
+ "step": 17500
1374
+ },
1375
+ {
1376
+ "epoch": 29.48,
1377
+ "learning_rate": 1.9924575738529227e-05,
1378
+ "loss": 1.9547,
1379
+ "step": 17600
1380
+ },
1381
+ {
1382
+ "epoch": 29.65,
1383
+ "learning_rate": 1.3639220615964803e-05,
1384
+ "loss": 1.9557,
1385
+ "step": 17700
1386
+ },
1387
+ {
1388
+ "epoch": 29.82,
1389
+ "learning_rate": 7.353865493400377e-06,
1390
+ "loss": 1.9464,
1391
+ "step": 17800
1392
+ },
1393
+ {
1394
+ "epoch": 29.98,
1395
+ "learning_rate": 1.0685103708359522e-06,
1396
+ "loss": 1.9654,
1397
+ "step": 17900
1398
+ },
1399
+ {
1400
+ "epoch": 30.0,
1401
+ "step": 17910,
1402
+ "total_flos": 3.015307099908152e+20,
1403
+ "train_loss": 1.4937853462266097,
1404
+ "train_runtime": 52137.1608,
1405
+ "train_samples_per_second": 21.986,
1406
+ "train_steps_per_second": 0.344
1407
  }
1408
  ],
1409
+ "max_steps": 17910,
1410
+ "num_train_epochs": 30,
1411
+ "total_flos": 3.015307099908152e+20,
1412
  "trial_name": null,
1413
  "trial_params": null
1414
  }