marcel commited on
Commit
0daa644
1 Parent(s): 74d5185
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +227 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2097409e22b1fab03277ae466406bfb2f48efaa2987c211dc571d6aeb8354e54
3
  size 2490339591
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0cbfe228f29e909f61ba70ddc910e6e4fa3b3e4fd586609bf658a29a5c3b944
3
  size 2490339591
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e8cefb76774bf50b86c1bc9486e5ec95567c8f849a411556992285820275c2
3
  size 1262065048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94dfee73ef732a05a4ecf0c7d027d89711722bce6f408c40b54fd07a5866fa9
3
  size 1262065048
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e58b5205a60751338b8db83fc32e97ce49b5dbb4327655a27ca0de316c1b3f76
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f8d3a80f7b9dd9e7c50e7b2de049b01c32890f1166f11413fd758673a399cb
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.9735503560529,
5
- "global_step": 21600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -762,11 +762,235 @@
762
  "eval_samples_per_second": 7.866,
763
  "eval_wer": 0.3369164619164619,
764
  "step": 21600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  }
766
  ],
767
  "max_steps": 29490,
768
  "num_train_epochs": 30,
769
- "total_flos": 1.1986301431319814e+20,
770
  "trial_name": null,
771
  "trial_params": null
772
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.484231943031535,
5
+ "global_step": 28000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
762
  "eval_samples_per_second": 7.866,
763
  "eval_wer": 0.3369164619164619,
764
  "step": 21600
765
+ },
766
+ {
767
+ "epoch": 22.38,
768
+ "learning_rate": 7.750948602966539e-05,
769
+ "loss": 0.0466,
770
+ "step": 22000
771
+ },
772
+ {
773
+ "epoch": 22.38,
774
+ "eval_loss": 0.5171410441398621,
775
+ "eval_runtime": 254.3593,
776
+ "eval_samples_per_second": 7.356,
777
+ "eval_wer": 0.34121621621621623,
778
+ "step": 22000
779
+ },
780
+ {
781
+ "epoch": 22.79,
782
+ "learning_rate": 7.33701276302173e-05,
783
+ "loss": 0.0456,
784
+ "step": 22400
785
+ },
786
+ {
787
+ "epoch": 22.79,
788
+ "eval_loss": 0.5072354674339294,
789
+ "eval_runtime": 240.0638,
790
+ "eval_samples_per_second": 7.794,
791
+ "eval_wer": 0.33507371007371006,
792
+ "step": 22400
793
+ },
794
+ {
795
+ "epoch": 23.19,
796
+ "learning_rate": 6.923076923076922e-05,
797
+ "loss": 0.0459,
798
+ "step": 22800
799
+ },
800
+ {
801
+ "epoch": 23.19,
802
+ "eval_loss": 0.521458625793457,
803
+ "eval_runtime": 457.6179,
804
+ "eval_samples_per_second": 4.089,
805
+ "eval_wer": 0.34170761670761673,
806
+ "step": 22800
807
+ },
808
+ {
809
+ "epoch": 23.6,
810
+ "learning_rate": 6.509141083132114e-05,
811
+ "loss": 0.0404,
812
+ "step": 23200
813
+ },
814
+ {
815
+ "epoch": 23.6,
816
+ "eval_loss": 0.5035232901573181,
817
+ "eval_runtime": 991.4067,
818
+ "eval_samples_per_second": 1.887,
819
+ "eval_wer": 0.33746928746928745,
820
+ "step": 23200
821
+ },
822
+ {
823
+ "epoch": 24.01,
824
+ "learning_rate": 6.095205243187306e-05,
825
+ "loss": 0.0426,
826
+ "step": 23600
827
+ },
828
+ {
829
+ "epoch": 24.01,
830
+ "eval_loss": 0.5209127068519592,
831
+ "eval_runtime": 809.3899,
832
+ "eval_samples_per_second": 2.312,
833
+ "eval_wer": 0.3356879606879607,
834
+ "step": 23600
835
+ },
836
+ {
837
+ "epoch": 24.42,
838
+ "learning_rate": 5.6812694032424966e-05,
839
+ "loss": 0.0407,
840
+ "step": 24000
841
+ },
842
+ {
843
+ "epoch": 24.42,
844
+ "eval_loss": 0.521318256855011,
845
+ "eval_runtime": 685.3958,
846
+ "eval_samples_per_second": 2.73,
847
+ "eval_wer": 0.3367936117936118,
848
+ "step": 24000
849
+ },
850
+ {
851
+ "epoch": 24.82,
852
+ "learning_rate": 5.2673335632976885e-05,
853
+ "loss": 0.0384,
854
+ "step": 24400
855
+ },
856
+ {
857
+ "epoch": 24.82,
858
+ "eval_loss": 0.533201277256012,
859
+ "eval_runtime": 995.2009,
860
+ "eval_samples_per_second": 1.88,
861
+ "eval_wer": 0.3352579852579853,
862
+ "step": 24400
863
+ },
864
+ {
865
+ "epoch": 25.23,
866
+ "learning_rate": 4.8533977233528796e-05,
867
+ "loss": 0.0381,
868
+ "step": 24800
869
+ },
870
+ {
871
+ "epoch": 25.23,
872
+ "eval_loss": 0.5235264897346497,
873
+ "eval_runtime": 297.7118,
874
+ "eval_samples_per_second": 6.285,
875
+ "eval_wer": 0.33175675675675675,
876
+ "step": 24800
877
+ },
878
+ {
879
+ "epoch": 25.64,
880
+ "learning_rate": 4.4394618834080715e-05,
881
+ "loss": 0.0392,
882
+ "step": 25200
883
+ },
884
+ {
885
+ "epoch": 25.64,
886
+ "eval_loss": 0.5155506134033203,
887
+ "eval_runtime": 236.6244,
888
+ "eval_samples_per_second": 7.907,
889
+ "eval_wer": 0.3305896805896806,
890
+ "step": 25200
891
+ },
892
+ {
893
+ "epoch": 26.04,
894
+ "learning_rate": 4.025526043463263e-05,
895
+ "loss": 0.0374,
896
+ "step": 25600
897
+ },
898
+ {
899
+ "epoch": 26.04,
900
+ "eval_loss": 0.52613765001297,
901
+ "eval_runtime": 238.4752,
902
+ "eval_samples_per_second": 7.846,
903
+ "eval_wer": 0.3332309582309582,
904
+ "step": 25600
905
+ },
906
+ {
907
+ "epoch": 26.45,
908
+ "learning_rate": 3.611590203518454e-05,
909
+ "loss": 0.0352,
910
+ "step": 26000
911
+ },
912
+ {
913
+ "epoch": 26.45,
914
+ "eval_loss": 0.5194851756095886,
915
+ "eval_runtime": 237.5911,
916
+ "eval_samples_per_second": 7.875,
917
+ "eval_wer": 0.33132678132678134,
918
+ "step": 26000
919
+ },
920
+ {
921
+ "epoch": 26.86,
922
+ "learning_rate": 3.197654363573646e-05,
923
+ "loss": 0.0357,
924
+ "step": 26400
925
+ },
926
+ {
927
+ "epoch": 26.86,
928
+ "eval_loss": 0.5337009429931641,
929
+ "eval_runtime": 236.9359,
930
+ "eval_samples_per_second": 7.897,
931
+ "eval_wer": 0.33218673218673217,
932
+ "step": 26400
933
+ },
934
+ {
935
+ "epoch": 27.26,
936
+ "learning_rate": 2.7837185236288373e-05,
937
+ "loss": 0.0342,
938
+ "step": 26800
939
+ },
940
+ {
941
+ "epoch": 27.26,
942
+ "eval_loss": 0.5221392512321472,
943
+ "eval_runtime": 237.1237,
944
+ "eval_samples_per_second": 7.89,
945
+ "eval_wer": 0.3324324324324324,
946
+ "step": 26800
947
+ },
948
+ {
949
+ "epoch": 27.67,
950
+ "learning_rate": 2.3697826836840288e-05,
951
+ "loss": 0.0346,
952
+ "step": 27200
953
+ },
954
+ {
955
+ "epoch": 27.67,
956
+ "eval_loss": 0.5248429775238037,
957
+ "eval_runtime": 238.7028,
958
+ "eval_samples_per_second": 7.838,
959
+ "eval_wer": 0.32843980343980345,
960
+ "step": 27200
961
+ },
962
+ {
963
+ "epoch": 28.08,
964
+ "learning_rate": 1.95584684373922e-05,
965
+ "loss": 0.0343,
966
+ "step": 27600
967
+ },
968
+ {
969
+ "epoch": 28.08,
970
+ "eval_loss": 0.5455237030982971,
971
+ "eval_runtime": 238.0797,
972
+ "eval_samples_per_second": 7.859,
973
+ "eval_wer": 0.3278869778869779,
974
+ "step": 27600
975
+ },
976
+ {
977
+ "epoch": 28.48,
978
+ "learning_rate": 1.541911003794412e-05,
979
+ "loss": 0.034,
980
+ "step": 28000
981
+ },
982
+ {
983
+ "epoch": 28.48,
984
+ "eval_loss": 0.5291683077812195,
985
+ "eval_runtime": 300.3419,
986
+ "eval_samples_per_second": 6.23,
987
+ "eval_wer": 0.3269041769041769,
988
+ "step": 28000
989
  }
990
  ],
991
  "max_steps": 29490,
992
  "num_train_epochs": 30,
993
+ "total_flos": 1.5535560057099262e+20,
994
  "trial_name": null,
995
  "trial_params": null
996
  }