marinone94 commited on
Commit
fc2f4cd
β€’
1 Parent(s): 6012db9

Training in progress, step 2700

Browse files
{checkpoint-1500 β†’ checkpoint-2600}/config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2600}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59d9e961a47ab1f00dd325d343d6b3a2c56a477a890edfff182fab52ad7d1961
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:446a2b6be86378473522da6f8340e8995c1f93f317dbf12dd45b200fe2a7cc10
3
  size 2490337809
{checkpoint-1500 β†’ checkpoint-2600}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2600}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:577857a0d5063da507a1ec462e3b3478b15131b86a5760953cbe69453453fe49
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a8acf504932720b905a27cdf2f6f7fe41c15519686ef918a30514dacbc0cee
3
  size 1262063089
{checkpoint-1600 β†’ checkpoint-2600}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32079a7ecfdaedcafafe628415e8789ccbf50b12b21038d2ca0bc5ad993d93dd
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9452e603549f04f4903c5fb527b7bfc83e1fbaca4162bb7fdd612e22dd24235
3
  size 14567
{checkpoint-1600 β†’ checkpoint-2600}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45cc99d310862a4180e2b863b5866b132e05e88ff0bb7c56d68724e754f0549e
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981227375b8c6d2439b9fd2664e9cd784500649faa2e607c97ed38e07fc17be3
3
  size 559
{checkpoint-1500 β†’ checkpoint-2600}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6d0ac6fcb7f6a54e2016654eacc790bd572cb9149b97999e246f807a8519d7d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6505acc346b949554e5cbd939e1a9cf4a757843e611b8b1dc1e36690134723
3
  size 623
{checkpoint-1600 β†’ checkpoint-2600}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.579234972677597,
5
- "global_step": 1600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -630,11 +630,401 @@
630
  "eval_steps_per_second": 0.795,
631
  "eval_wer": 0.17885325007096226,
632
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  }
634
  ],
635
  "max_steps": 4550,
636
  "num_train_epochs": 50,
637
- "total_flos": 2.4776551891108737e+19,
638
  "trial_name": null,
639
  "trial_params": null
640
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.56830601092896,
5
+ "global_step": 2600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
630
  "eval_steps_per_second": 0.795,
631
  "eval_wer": 0.17885325007096226,
632
  "step": 1600
633
+ },
634
+ {
635
+ "epoch": 17.8,
636
+ "learning_rate": 0.00020250000000000002,
637
+ "loss": 1.0322,
638
+ "step": 1620
639
+ },
640
+ {
641
+ "epoch": 18.02,
642
+ "learning_rate": 0.000205,
643
+ "loss": 1.0176,
644
+ "step": 1640
645
+ },
646
+ {
647
+ "epoch": 18.24,
648
+ "learning_rate": 0.0002075,
649
+ "loss": 1.0272,
650
+ "step": 1660
651
+ },
652
+ {
653
+ "epoch": 18.46,
654
+ "learning_rate": 0.00021,
655
+ "loss": 0.9675,
656
+ "step": 1680
657
+ },
658
+ {
659
+ "epoch": 18.68,
660
+ "learning_rate": 0.0002125,
661
+ "loss": 0.9816,
662
+ "step": 1700
663
+ },
664
+ {
665
+ "epoch": 18.68,
666
+ "eval_loss": 0.19402356445789337,
667
+ "eval_runtime": 194.75,
668
+ "eval_samples_per_second": 24.868,
669
+ "eval_steps_per_second": 0.78,
670
+ "eval_wer": 0.18010218563724098,
671
+ "step": 1700
672
+ },
673
+ {
674
+ "epoch": 18.9,
675
+ "learning_rate": 0.000215,
676
+ "loss": 1.0023,
677
+ "step": 1720
678
+ },
679
+ {
680
+ "epoch": 19.12,
681
+ "learning_rate": 0.0002175,
682
+ "loss": 1.013,
683
+ "step": 1740
684
+ },
685
+ {
686
+ "epoch": 19.34,
687
+ "learning_rate": 0.00022,
688
+ "loss": 0.9664,
689
+ "step": 1760
690
+ },
691
+ {
692
+ "epoch": 19.56,
693
+ "learning_rate": 0.00022250000000000001,
694
+ "loss": 0.9736,
695
+ "step": 1780
696
+ },
697
+ {
698
+ "epoch": 19.78,
699
+ "learning_rate": 0.00022500000000000002,
700
+ "loss": 0.9814,
701
+ "step": 1800
702
+ },
703
+ {
704
+ "epoch": 19.78,
705
+ "eval_loss": 0.18596723675727844,
706
+ "eval_runtime": 188.4639,
707
+ "eval_samples_per_second": 25.697,
708
+ "eval_steps_per_second": 0.807,
709
+ "eval_wer": 0.16667612829974454,
710
+ "step": 1800
711
+ },
712
+ {
713
+ "epoch": 19.99,
714
+ "learning_rate": 0.0002275,
715
+ "loss": 1.0064,
716
+ "step": 1820
717
+ },
718
+ {
719
+ "epoch": 20.22,
720
+ "learning_rate": 0.00023,
721
+ "loss": 0.9583,
722
+ "step": 1840
723
+ },
724
+ {
725
+ "epoch": 20.44,
726
+ "learning_rate": 0.0002325,
727
+ "loss": 0.9646,
728
+ "step": 1860
729
+ },
730
+ {
731
+ "epoch": 20.66,
732
+ "learning_rate": 0.000235,
733
+ "loss": 0.9762,
734
+ "step": 1880
735
+ },
736
+ {
737
+ "epoch": 20.87,
738
+ "learning_rate": 0.0002375,
739
+ "loss": 0.9787,
740
+ "step": 1900
741
+ },
742
+ {
743
+ "epoch": 20.87,
744
+ "eval_loss": 0.18878202140331268,
745
+ "eval_runtime": 188.244,
746
+ "eval_samples_per_second": 25.727,
747
+ "eval_steps_per_second": 0.807,
748
+ "eval_wer": 0.16420664206642066,
749
+ "step": 1900
750
+ },
751
+ {
752
+ "epoch": 21.1,
753
+ "learning_rate": 0.00024,
754
+ "loss": 1.0218,
755
+ "step": 1920
756
+ },
757
+ {
758
+ "epoch": 21.32,
759
+ "learning_rate": 0.00024249999999999999,
760
+ "loss": 0.9505,
761
+ "step": 1940
762
+ },
763
+ {
764
+ "epoch": 21.54,
765
+ "learning_rate": 0.000245,
766
+ "loss": 0.9554,
767
+ "step": 1960
768
+ },
769
+ {
770
+ "epoch": 21.75,
771
+ "learning_rate": 0.0002475,
772
+ "loss": 0.9728,
773
+ "step": 1980
774
+ },
775
+ {
776
+ "epoch": 21.97,
777
+ "learning_rate": 0.00025,
778
+ "loss": 0.9699,
779
+ "step": 2000
780
+ },
781
+ {
782
+ "epoch": 21.97,
783
+ "eval_loss": 0.18748582899570465,
784
+ "eval_runtime": 190.7875,
785
+ "eval_samples_per_second": 25.384,
786
+ "eval_steps_per_second": 0.797,
787
+ "eval_wer": 0.17042293499858074,
788
+ "step": 2000
789
+ },
790
+ {
791
+ "epoch": 22.2,
792
+ "learning_rate": 0.00024803921568627453,
793
+ "loss": 0.9624,
794
+ "step": 2020
795
+ },
796
+ {
797
+ "epoch": 22.42,
798
+ "learning_rate": 0.000246078431372549,
799
+ "loss": 0.9419,
800
+ "step": 2040
801
+ },
802
+ {
803
+ "epoch": 22.63,
804
+ "learning_rate": 0.00024411764705882354,
805
+ "loss": 0.9563,
806
+ "step": 2060
807
+ },
808
+ {
809
+ "epoch": 22.85,
810
+ "learning_rate": 0.00024215686274509804,
811
+ "loss": 0.9643,
812
+ "step": 2080
813
+ },
814
+ {
815
+ "epoch": 23.08,
816
+ "learning_rate": 0.00024019607843137256,
817
+ "loss": 0.9616,
818
+ "step": 2100
819
+ },
820
+ {
821
+ "epoch": 23.08,
822
+ "eval_loss": 0.18017500638961792,
823
+ "eval_runtime": 191.3932,
824
+ "eval_samples_per_second": 25.304,
825
+ "eval_steps_per_second": 0.794,
826
+ "eval_wer": 0.16173715583309678,
827
+ "step": 2100
828
+ },
829
+ {
830
+ "epoch": 23.3,
831
+ "learning_rate": 0.00023823529411764704,
832
+ "loss": 0.917,
833
+ "step": 2120
834
+ },
835
+ {
836
+ "epoch": 23.51,
837
+ "learning_rate": 0.00023627450980392157,
838
+ "loss": 0.945,
839
+ "step": 2140
840
+ },
841
+ {
842
+ "epoch": 23.73,
843
+ "learning_rate": 0.0002343137254901961,
844
+ "loss": 0.9243,
845
+ "step": 2160
846
+ },
847
+ {
848
+ "epoch": 23.95,
849
+ "learning_rate": 0.0002323529411764706,
850
+ "loss": 0.9288,
851
+ "step": 2180
852
+ },
853
+ {
854
+ "epoch": 24.17,
855
+ "learning_rate": 0.0002303921568627451,
856
+ "loss": 0.9378,
857
+ "step": 2200
858
+ },
859
+ {
860
+ "epoch": 24.17,
861
+ "eval_loss": 0.17928896844387054,
862
+ "eval_runtime": 189.7619,
863
+ "eval_samples_per_second": 25.521,
864
+ "eval_steps_per_second": 0.801,
865
+ "eval_wer": 0.1577348850411581,
866
+ "step": 2200
867
+ },
868
+ {
869
+ "epoch": 24.39,
870
+ "learning_rate": 0.0002284313725490196,
871
+ "loss": 0.9071,
872
+ "step": 2220
873
+ },
874
+ {
875
+ "epoch": 24.61,
876
+ "learning_rate": 0.00022647058823529412,
877
+ "loss": 0.9054,
878
+ "step": 2240
879
+ },
880
+ {
881
+ "epoch": 24.83,
882
+ "learning_rate": 0.0002246078431372549,
883
+ "loss": 0.9303,
884
+ "step": 2260
885
+ },
886
+ {
887
+ "epoch": 25.05,
888
+ "learning_rate": 0.00022264705882352943,
889
+ "loss": 0.9376,
890
+ "step": 2280
891
+ },
892
+ {
893
+ "epoch": 25.27,
894
+ "learning_rate": 0.0002206862745098039,
895
+ "loss": 0.888,
896
+ "step": 2300
897
+ },
898
+ {
899
+ "epoch": 25.27,
900
+ "eval_loss": 0.17642559111118317,
901
+ "eval_runtime": 187.0437,
902
+ "eval_samples_per_second": 25.892,
903
+ "eval_steps_per_second": 0.813,
904
+ "eval_wer": 0.15452739142776042,
905
+ "step": 2300
906
+ },
907
+ {
908
+ "epoch": 25.49,
909
+ "learning_rate": 0.00021872549019607843,
910
+ "loss": 0.9135,
911
+ "step": 2320
912
+ },
913
+ {
914
+ "epoch": 25.71,
915
+ "learning_rate": 0.00021676470588235294,
916
+ "loss": 0.9094,
917
+ "step": 2340
918
+ },
919
+ {
920
+ "epoch": 25.93,
921
+ "learning_rate": 0.00021480392156862746,
922
+ "loss": 0.8879,
923
+ "step": 2360
924
+ },
925
+ {
926
+ "epoch": 26.15,
927
+ "learning_rate": 0.00021284313725490196,
928
+ "loss": 0.929,
929
+ "step": 2380
930
+ },
931
+ {
932
+ "epoch": 26.37,
933
+ "learning_rate": 0.00021088235294117647,
934
+ "loss": 0.8942,
935
+ "step": 2400
936
+ },
937
+ {
938
+ "epoch": 26.37,
939
+ "eval_loss": 0.16744859516620636,
940
+ "eval_runtime": 190.6796,
941
+ "eval_samples_per_second": 25.399,
942
+ "eval_steps_per_second": 0.797,
943
+ "eval_wer": 0.14916264547260857,
944
+ "step": 2400
945
+ },
946
+ {
947
+ "epoch": 26.59,
948
+ "learning_rate": 0.000208921568627451,
949
+ "loss": 0.8717,
950
+ "step": 2420
951
+ },
952
+ {
953
+ "epoch": 26.81,
954
+ "learning_rate": 0.0002069607843137255,
955
+ "loss": 0.8952,
956
+ "step": 2440
957
+ },
958
+ {
959
+ "epoch": 27.03,
960
+ "learning_rate": 0.000205,
961
+ "loss": 0.8849,
962
+ "step": 2460
963
+ },
964
+ {
965
+ "epoch": 27.25,
966
+ "learning_rate": 0.00020313725490196078,
967
+ "loss": 0.8781,
968
+ "step": 2480
969
+ },
970
+ {
971
+ "epoch": 27.47,
972
+ "learning_rate": 0.0002011764705882353,
973
+ "loss": 0.8701,
974
+ "step": 2500
975
+ },
976
+ {
977
+ "epoch": 27.47,
978
+ "eval_loss": 0.1738910973072052,
979
+ "eval_runtime": 187.7205,
980
+ "eval_samples_per_second": 25.799,
981
+ "eval_steps_per_second": 0.81,
982
+ "eval_wer": 0.1511779733181947,
983
+ "step": 2500
984
+ },
985
+ {
986
+ "epoch": 27.69,
987
+ "learning_rate": 0.0001992156862745098,
988
+ "loss": 0.8674,
989
+ "step": 2520
990
+ },
991
+ {
992
+ "epoch": 27.91,
993
+ "learning_rate": 0.00019725490196078433,
994
+ "loss": 0.8752,
995
+ "step": 2540
996
+ },
997
+ {
998
+ "epoch": 28.13,
999
+ "learning_rate": 0.0001952941176470588,
1000
+ "loss": 0.91,
1001
+ "step": 2560
1002
+ },
1003
+ {
1004
+ "epoch": 28.35,
1005
+ "learning_rate": 0.00019333333333333333,
1006
+ "loss": 0.8693,
1007
+ "step": 2580
1008
+ },
1009
+ {
1010
+ "epoch": 28.57,
1011
+ "learning_rate": 0.00019137254901960786,
1012
+ "loss": 0.8555,
1013
+ "step": 2600
1014
+ },
1015
+ {
1016
+ "epoch": 28.57,
1017
+ "eval_loss": 0.1689654141664505,
1018
+ "eval_runtime": 196.2665,
1019
+ "eval_samples_per_second": 24.676,
1020
+ "eval_steps_per_second": 0.774,
1021
+ "eval_wer": 0.14459267669599773,
1022
+ "step": 2600
1023
  }
1024
  ],
1025
  "max_steps": 4550,
1026
  "num_train_epochs": 50,
1027
+ "total_flos": 4.023333138533005e+19,
1028
  "trial_name": null,
1029
  "trial_params": null
1030
  }
{checkpoint-1500 β†’ checkpoint-2600}/training_args.bin RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2700}/config.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2700}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c01538eb4bd0c6517e58a239a0edad2ba490de930aaa3809478517bc233bd339
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865f44abdd5045a8acf626b3b992e8fdc983a2d4a3ba5dd6901b27ee83a91484
3
  size 2490337809
{checkpoint-1600 β†’ checkpoint-2700}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2700}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c881807ec99ef3592933c11ac25ff336dab472ee8669960c08f6bb055cb11c
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbbb4678046f14503aa95af2d02c0cc6a2fb7071e468967b463cf71594e2914
3
  size 1262063089
{checkpoint-1500 β†’ checkpoint-2700}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26a16fb06bd95550585922193ec31f3a9a5878ee3e77efc87c1b8bdb4a21d142
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9213f1eed5d75326eef26e352d35aa78d567c2885250ab304bce7d59c834157a
3
+ size 14631
{checkpoint-1500 β†’ checkpoint-2700}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18b77bf231929dcf1b0885a05986daead51666518206e8005c99cd124832d9f9
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7970626d52baf6e9c965f8503c1a6318b3c55b1f5ba73fdd8b8209e28d5d529e
3
  size 559
{checkpoint-1600 β†’ checkpoint-2700}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a5627b7bd7a23e3bd273e778f370a9ce7118842c3616c776314b677675af7a5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b50e7b743fe5caf734a580430810fca62d132f602e2a17e8e075ce20b82717
3
  size 623
{checkpoint-1500 β†’ checkpoint-2700}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.48087431693989,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -591,11 +591,479 @@
591
  "eval_steps_per_second": 0.804,
592
  "eval_wer": 0.18944081748509792,
593
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  }
595
  ],
596
  "max_steps": 4550,
597
  "num_train_epochs": 50,
598
- "total_flos": 2.31847014520748e+19,
599
  "trial_name": null,
600
  "trial_params": null
601
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 29.666666666666668,
5
+ "global_step": 2700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
591
  "eval_steps_per_second": 0.804,
592
  "eval_wer": 0.18944081748509792,
593
  "step": 1500
594
+ },
595
+ {
596
+ "epoch": 16.7,
597
+ "learning_rate": 0.00019,
598
+ "loss": 1.0204,
599
+ "step": 1520
600
+ },
601
+ {
602
+ "epoch": 16.92,
603
+ "learning_rate": 0.00019250000000000002,
604
+ "loss": 1.0356,
605
+ "step": 1540
606
+ },
607
+ {
608
+ "epoch": 17.14,
609
+ "learning_rate": 0.00019500000000000002,
610
+ "loss": 1.0538,
611
+ "step": 1560
612
+ },
613
+ {
614
+ "epoch": 17.36,
615
+ "learning_rate": 0.0001975,
616
+ "loss": 1.0228,
617
+ "step": 1580
618
+ },
619
+ {
620
+ "epoch": 17.58,
621
+ "learning_rate": 0.0002,
622
+ "loss": 1.0193,
623
+ "step": 1600
624
+ },
625
+ {
626
+ "epoch": 17.58,
627
+ "eval_loss": 0.1991206556558609,
628
+ "eval_runtime": 191.1717,
629
+ "eval_samples_per_second": 25.333,
630
+ "eval_steps_per_second": 0.795,
631
+ "eval_wer": 0.17885325007096226,
632
+ "step": 1600
633
+ },
634
+ {
635
+ "epoch": 17.8,
636
+ "learning_rate": 0.00020250000000000002,
637
+ "loss": 1.0322,
638
+ "step": 1620
639
+ },
640
+ {
641
+ "epoch": 18.02,
642
+ "learning_rate": 0.000205,
643
+ "loss": 1.0176,
644
+ "step": 1640
645
+ },
646
+ {
647
+ "epoch": 18.24,
648
+ "learning_rate": 0.0002075,
649
+ "loss": 1.0272,
650
+ "step": 1660
651
+ },
652
+ {
653
+ "epoch": 18.46,
654
+ "learning_rate": 0.00021,
655
+ "loss": 0.9675,
656
+ "step": 1680
657
+ },
658
+ {
659
+ "epoch": 18.68,
660
+ "learning_rate": 0.0002125,
661
+ "loss": 0.9816,
662
+ "step": 1700
663
+ },
664
+ {
665
+ "epoch": 18.68,
666
+ "eval_loss": 0.19402356445789337,
667
+ "eval_runtime": 194.75,
668
+ "eval_samples_per_second": 24.868,
669
+ "eval_steps_per_second": 0.78,
670
+ "eval_wer": 0.18010218563724098,
671
+ "step": 1700
672
+ },
673
+ {
674
+ "epoch": 18.9,
675
+ "learning_rate": 0.000215,
676
+ "loss": 1.0023,
677
+ "step": 1720
678
+ },
679
+ {
680
+ "epoch": 19.12,
681
+ "learning_rate": 0.0002175,
682
+ "loss": 1.013,
683
+ "step": 1740
684
+ },
685
+ {
686
+ "epoch": 19.34,
687
+ "learning_rate": 0.00022,
688
+ "loss": 0.9664,
689
+ "step": 1760
690
+ },
691
+ {
692
+ "epoch": 19.56,
693
+ "learning_rate": 0.00022250000000000001,
694
+ "loss": 0.9736,
695
+ "step": 1780
696
+ },
697
+ {
698
+ "epoch": 19.78,
699
+ "learning_rate": 0.00022500000000000002,
700
+ "loss": 0.9814,
701
+ "step": 1800
702
+ },
703
+ {
704
+ "epoch": 19.78,
705
+ "eval_loss": 0.18596723675727844,
706
+ "eval_runtime": 188.4639,
707
+ "eval_samples_per_second": 25.697,
708
+ "eval_steps_per_second": 0.807,
709
+ "eval_wer": 0.16667612829974454,
710
+ "step": 1800
711
+ },
712
+ {
713
+ "epoch": 19.99,
714
+ "learning_rate": 0.0002275,
715
+ "loss": 1.0064,
716
+ "step": 1820
717
+ },
718
+ {
719
+ "epoch": 20.22,
720
+ "learning_rate": 0.00023,
721
+ "loss": 0.9583,
722
+ "step": 1840
723
+ },
724
+ {
725
+ "epoch": 20.44,
726
+ "learning_rate": 0.0002325,
727
+ "loss": 0.9646,
728
+ "step": 1860
729
+ },
730
+ {
731
+ "epoch": 20.66,
732
+ "learning_rate": 0.000235,
733
+ "loss": 0.9762,
734
+ "step": 1880
735
+ },
736
+ {
737
+ "epoch": 20.87,
738
+ "learning_rate": 0.0002375,
739
+ "loss": 0.9787,
740
+ "step": 1900
741
+ },
742
+ {
743
+ "epoch": 20.87,
744
+ "eval_loss": 0.18878202140331268,
745
+ "eval_runtime": 188.244,
746
+ "eval_samples_per_second": 25.727,
747
+ "eval_steps_per_second": 0.807,
748
+ "eval_wer": 0.16420664206642066,
749
+ "step": 1900
750
+ },
751
+ {
752
+ "epoch": 21.1,
753
+ "learning_rate": 0.00024,
754
+ "loss": 1.0218,
755
+ "step": 1920
756
+ },
757
+ {
758
+ "epoch": 21.32,
759
+ "learning_rate": 0.00024249999999999999,
760
+ "loss": 0.9505,
761
+ "step": 1940
762
+ },
763
+ {
764
+ "epoch": 21.54,
765
+ "learning_rate": 0.000245,
766
+ "loss": 0.9554,
767
+ "step": 1960
768
+ },
769
+ {
770
+ "epoch": 21.75,
771
+ "learning_rate": 0.0002475,
772
+ "loss": 0.9728,
773
+ "step": 1980
774
+ },
775
+ {
776
+ "epoch": 21.97,
777
+ "learning_rate": 0.00025,
778
+ "loss": 0.9699,
779
+ "step": 2000
780
+ },
781
+ {
782
+ "epoch": 21.97,
783
+ "eval_loss": 0.18748582899570465,
784
+ "eval_runtime": 190.7875,
785
+ "eval_samples_per_second": 25.384,
786
+ "eval_steps_per_second": 0.797,
787
+ "eval_wer": 0.17042293499858074,
788
+ "step": 2000
789
+ },
790
+ {
791
+ "epoch": 22.2,
792
+ "learning_rate": 0.00024803921568627453,
793
+ "loss": 0.9624,
794
+ "step": 2020
795
+ },
796
+ {
797
+ "epoch": 22.42,
798
+ "learning_rate": 0.000246078431372549,
799
+ "loss": 0.9419,
800
+ "step": 2040
801
+ },
802
+ {
803
+ "epoch": 22.63,
804
+ "learning_rate": 0.00024411764705882354,
805
+ "loss": 0.9563,
806
+ "step": 2060
807
+ },
808
+ {
809
+ "epoch": 22.85,
810
+ "learning_rate": 0.00024215686274509804,
811
+ "loss": 0.9643,
812
+ "step": 2080
813
+ },
814
+ {
815
+ "epoch": 23.08,
816
+ "learning_rate": 0.00024019607843137256,
817
+ "loss": 0.9616,
818
+ "step": 2100
819
+ },
820
+ {
821
+ "epoch": 23.08,
822
+ "eval_loss": 0.18017500638961792,
823
+ "eval_runtime": 191.3932,
824
+ "eval_samples_per_second": 25.304,
825
+ "eval_steps_per_second": 0.794,
826
+ "eval_wer": 0.16173715583309678,
827
+ "step": 2100
828
+ },
829
+ {
830
+ "epoch": 23.3,
831
+ "learning_rate": 0.00023823529411764704,
832
+ "loss": 0.917,
833
+ "step": 2120
834
+ },
835
+ {
836
+ "epoch": 23.51,
837
+ "learning_rate": 0.00023627450980392157,
838
+ "loss": 0.945,
839
+ "step": 2140
840
+ },
841
+ {
842
+ "epoch": 23.73,
843
+ "learning_rate": 0.0002343137254901961,
844
+ "loss": 0.9243,
845
+ "step": 2160
846
+ },
847
+ {
848
+ "epoch": 23.95,
849
+ "learning_rate": 0.0002323529411764706,
850
+ "loss": 0.9288,
851
+ "step": 2180
852
+ },
853
+ {
854
+ "epoch": 24.17,
855
+ "learning_rate": 0.0002303921568627451,
856
+ "loss": 0.9378,
857
+ "step": 2200
858
+ },
859
+ {
860
+ "epoch": 24.17,
861
+ "eval_loss": 0.17928896844387054,
862
+ "eval_runtime": 189.7619,
863
+ "eval_samples_per_second": 25.521,
864
+ "eval_steps_per_second": 0.801,
865
+ "eval_wer": 0.1577348850411581,
866
+ "step": 2200
867
+ },
868
+ {
869
+ "epoch": 24.39,
870
+ "learning_rate": 0.0002284313725490196,
871
+ "loss": 0.9071,
872
+ "step": 2220
873
+ },
874
+ {
875
+ "epoch": 24.61,
876
+ "learning_rate": 0.00022647058823529412,
877
+ "loss": 0.9054,
878
+ "step": 2240
879
+ },
880
+ {
881
+ "epoch": 24.83,
882
+ "learning_rate": 0.0002246078431372549,
883
+ "loss": 0.9303,
884
+ "step": 2260
885
+ },
886
+ {
887
+ "epoch": 25.05,
888
+ "learning_rate": 0.00022264705882352943,
889
+ "loss": 0.9376,
890
+ "step": 2280
891
+ },
892
+ {
893
+ "epoch": 25.27,
894
+ "learning_rate": 0.0002206862745098039,
895
+ "loss": 0.888,
896
+ "step": 2300
897
+ },
898
+ {
899
+ "epoch": 25.27,
900
+ "eval_loss": 0.17642559111118317,
901
+ "eval_runtime": 187.0437,
902
+ "eval_samples_per_second": 25.892,
903
+ "eval_steps_per_second": 0.813,
904
+ "eval_wer": 0.15452739142776042,
905
+ "step": 2300
906
+ },
907
+ {
908
+ "epoch": 25.49,
909
+ "learning_rate": 0.00021872549019607843,
910
+ "loss": 0.9135,
911
+ "step": 2320
912
+ },
913
+ {
914
+ "epoch": 25.71,
915
+ "learning_rate": 0.00021676470588235294,
916
+ "loss": 0.9094,
917
+ "step": 2340
918
+ },
919
+ {
920
+ "epoch": 25.93,
921
+ "learning_rate": 0.00021480392156862746,
922
+ "loss": 0.8879,
923
+ "step": 2360
924
+ },
925
+ {
926
+ "epoch": 26.15,
927
+ "learning_rate": 0.00021284313725490196,
928
+ "loss": 0.929,
929
+ "step": 2380
930
+ },
931
+ {
932
+ "epoch": 26.37,
933
+ "learning_rate": 0.00021088235294117647,
934
+ "loss": 0.8942,
935
+ "step": 2400
936
+ },
937
+ {
938
+ "epoch": 26.37,
939
+ "eval_loss": 0.16744859516620636,
940
+ "eval_runtime": 190.6796,
941
+ "eval_samples_per_second": 25.399,
942
+ "eval_steps_per_second": 0.797,
943
+ "eval_wer": 0.14916264547260857,
944
+ "step": 2400
945
+ },
946
+ {
947
+ "epoch": 26.59,
948
+ "learning_rate": 0.000208921568627451,
949
+ "loss": 0.8717,
950
+ "step": 2420
951
+ },
952
+ {
953
+ "epoch": 26.81,
954
+ "learning_rate": 0.0002069607843137255,
955
+ "loss": 0.8952,
956
+ "step": 2440
957
+ },
958
+ {
959
+ "epoch": 27.03,
960
+ "learning_rate": 0.000205,
961
+ "loss": 0.8849,
962
+ "step": 2460
963
+ },
964
+ {
965
+ "epoch": 27.25,
966
+ "learning_rate": 0.00020313725490196078,
967
+ "loss": 0.8781,
968
+ "step": 2480
969
+ },
970
+ {
971
+ "epoch": 27.47,
972
+ "learning_rate": 0.0002011764705882353,
973
+ "loss": 0.8701,
974
+ "step": 2500
975
+ },
976
+ {
977
+ "epoch": 27.47,
978
+ "eval_loss": 0.1738910973072052,
979
+ "eval_runtime": 187.7205,
980
+ "eval_samples_per_second": 25.799,
981
+ "eval_steps_per_second": 0.81,
982
+ "eval_wer": 0.1511779733181947,
983
+ "step": 2500
984
+ },
985
+ {
986
+ "epoch": 27.69,
987
+ "learning_rate": 0.0001992156862745098,
988
+ "loss": 0.8674,
989
+ "step": 2520
990
+ },
991
+ {
992
+ "epoch": 27.91,
993
+ "learning_rate": 0.00019725490196078433,
994
+ "loss": 0.8752,
995
+ "step": 2540
996
+ },
997
+ {
998
+ "epoch": 28.13,
999
+ "learning_rate": 0.0001952941176470588,
1000
+ "loss": 0.91,
1001
+ "step": 2560
1002
+ },
1003
+ {
1004
+ "epoch": 28.35,
1005
+ "learning_rate": 0.00019333333333333333,
1006
+ "loss": 0.8693,
1007
+ "step": 2580
1008
+ },
1009
+ {
1010
+ "epoch": 28.57,
1011
+ "learning_rate": 0.00019137254901960786,
1012
+ "loss": 0.8555,
1013
+ "step": 2600
1014
+ },
1015
+ {
1016
+ "epoch": 28.57,
1017
+ "eval_loss": 0.1689654141664505,
1018
+ "eval_runtime": 196.2665,
1019
+ "eval_samples_per_second": 24.676,
1020
+ "eval_steps_per_second": 0.774,
1021
+ "eval_wer": 0.14459267669599773,
1022
+ "step": 2600
1023
+ },
1024
+ {
1025
+ "epoch": 28.79,
1026
+ "learning_rate": 0.00018941176470588236,
1027
+ "loss": 0.8796,
1028
+ "step": 2620
1029
+ },
1030
+ {
1031
+ "epoch": 29.01,
1032
+ "learning_rate": 0.00018745098039215686,
1033
+ "loss": 0.9045,
1034
+ "step": 2640
1035
+ },
1036
+ {
1037
+ "epoch": 29.23,
1038
+ "learning_rate": 0.00018549019607843137,
1039
+ "loss": 0.8515,
1040
+ "step": 2660
1041
+ },
1042
+ {
1043
+ "epoch": 29.45,
1044
+ "learning_rate": 0.0001835294117647059,
1045
+ "loss": 0.861,
1046
+ "step": 2680
1047
+ },
1048
+ {
1049
+ "epoch": 29.67,
1050
+ "learning_rate": 0.0001815686274509804,
1051
+ "loss": 0.8513,
1052
+ "step": 2700
1053
+ },
1054
+ {
1055
+ "epoch": 29.67,
1056
+ "eval_loss": 0.16488835215568542,
1057
+ "eval_runtime": 189.1938,
1058
+ "eval_samples_per_second": 25.598,
1059
+ "eval_steps_per_second": 0.803,
1060
+ "eval_wer": 0.14774340051092819,
1061
+ "step": 2700
1062
  }
1063
  ],
1064
  "max_steps": 4550,
1065
  "num_train_epochs": 50,
1066
+ "total_flos": 4.176183820424104e+19,
1067
  "trial_name": null,
1068
  "trial_params": null
1069
  }
{checkpoint-1600 β†’ checkpoint-2700}/training_args.bin RENAMED
File without changes