fsicoli commited on
Commit
5554b37
1 Parent(s): f2d7bc7

Upload 11 files

Browse files
config.json CHANGED
@@ -43,7 +43,7 @@
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
- "torch_dtype": "float16",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
 
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
+ "torch_dtype": "float32",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
generation_config.json CHANGED
@@ -55,7 +55,7 @@
55
  ],
56
  [
57
  2,
58
- 50359
59
  ]
60
  ],
61
  "is_multilingual": true,
@@ -161,11 +161,10 @@
161
  "<|yue|>": 50358,
162
  "<|zh|>": 50260
163
  },
164
- "max_initial_timestamp_index": 50,
165
  "max_length": 448,
166
  "no_timestamps_token_id": 50364,
167
  "pad_token_id": 50257,
168
- "prev_sot_token_id": 50362,
169
  "return_timestamps": false,
170
  "suppress_tokens": [
171
  1,
 
55
  ],
56
  [
57
  2,
58
+ 50360
59
  ]
60
  ],
61
  "is_multilingual": true,
 
161
  "<|yue|>": 50358,
162
  "<|zh|>": 50260
163
  },
164
+ "max_initial_timestamp_index": 1,
165
  "max_length": 448,
166
  "no_timestamps_token_id": 50364,
167
  "pad_token_id": 50257,
 
168
  "return_timestamps": false,
169
  "suppress_tokens": [
170
  1,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fedb9cc896a6388bb4f5cddb373b7392782ec965512769af969c60e1af1a4e14
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f878789c48bcae8bdb738a21db184b61ea25d207190b7a28b1886fd661820964
3
  size 4993448880
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c6b0edf383dba5a20a86c58366da587af2b11f57f3238809e22174428275ba2
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cebf488a1c5179e1630e18b09c98e0fc49f633401cd2860996acc46e8ce31123
3
  size 1180663192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80e4f108eb5557c44d32434b4917e37c96c5a7f16fb94640266fc8260e5fd15e
3
  size 12333660476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b881cb7e883293384811c41e69a3af5ab73194ee3fd9c7fc959db40efbb8dce
3
  size 12333660476
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49966cb04e594c410fd0e18084a8363564e6ac46de2a7e2d0b5f4cc3add8b713
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473904b1f2366db08d341e1d5587ca85e45deb227516e6d83bdef880288fcb69
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:063989716508e91f1978729efb82b7e19b19c1473686b3b12f8b0931dcd025b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a7fed81325cf3d8746c8c8e29722d1ee71a0fee7a7fbd34629d55e97abe774b
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.032258064516129,
5
  "eval_steps": 1000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -754,255 +754,6 @@
754
  "eval_steps_per_second": 0.006,
755
  "eval_wer": 0.09970911441499677,
756
  "step": 3000
757
- },
758
- {
759
- "epoch": 0.78,
760
- "learning_rate": 6.59e-07,
761
- "loss": 0.1334,
762
- "step": 3025
763
- },
764
- {
765
- "epoch": 0.79,
766
- "learning_rate": 6.506666666666666e-07,
767
- "loss": 0.1584,
768
- "step": 3050
769
- },
770
- {
771
- "epoch": 0.79,
772
- "learning_rate": 6.423333333333333e-07,
773
- "loss": 0.1331,
774
- "step": 3075
775
- },
776
- {
777
- "epoch": 0.8,
778
- "learning_rate": 6.346666666666666e-07,
779
- "loss": 0.1226,
780
- "step": 3100
781
- },
782
- {
783
- "epoch": 0.81,
784
- "learning_rate": 6.263333333333332e-07,
785
- "loss": 0.135,
786
- "step": 3125
787
- },
788
- {
789
- "epoch": 0.81,
790
- "learning_rate": 6.18e-07,
791
- "loss": 0.1538,
792
- "step": 3150
793
- },
794
- {
795
- "epoch": 0.82,
796
- "learning_rate": 6.096666666666667e-07,
797
- "loss": 0.1405,
798
- "step": 3175
799
- },
800
- {
801
- "epoch": 0.83,
802
- "learning_rate": 6.013333333333334e-07,
803
- "loss": 0.1534,
804
- "step": 3200
805
- },
806
- {
807
- "epoch": 0.83,
808
- "learning_rate": 5.93e-07,
809
- "loss": 0.1628,
810
- "step": 3225
811
- },
812
- {
813
- "epoch": 0.84,
814
- "learning_rate": 5.846666666666667e-07,
815
- "loss": 0.2002,
816
- "step": 3250
817
- },
818
- {
819
- "epoch": 0.85,
820
- "learning_rate": 5.763333333333333e-07,
821
- "loss": 0.1155,
822
- "step": 3275
823
- },
824
- {
825
- "epoch": 0.85,
826
- "learning_rate": 5.679999999999999e-07,
827
- "loss": 0.179,
828
- "step": 3300
829
- },
830
- {
831
- "epoch": 0.86,
832
- "learning_rate": 5.596666666666666e-07,
833
- "loss": 0.1471,
834
- "step": 3325
835
- },
836
- {
837
- "epoch": 0.86,
838
- "learning_rate": 5.513333333333333e-07,
839
- "loss": 0.1386,
840
- "step": 3350
841
- },
842
- {
843
- "epoch": 0.87,
844
- "learning_rate": 5.43e-07,
845
- "loss": 0.1185,
846
- "step": 3375
847
- },
848
- {
849
- "epoch": 0.88,
850
- "learning_rate": 5.346666666666666e-07,
851
- "loss": 0.1418,
852
- "step": 3400
853
- },
854
- {
855
- "epoch": 0.88,
856
- "learning_rate": 5.263333333333333e-07,
857
- "loss": 0.1015,
858
- "step": 3425
859
- },
860
- {
861
- "epoch": 0.89,
862
- "learning_rate": 5.18e-07,
863
- "loss": 0.1525,
864
- "step": 3450
865
- },
866
- {
867
- "epoch": 0.9,
868
- "learning_rate": 5.096666666666667e-07,
869
- "loss": 0.1212,
870
- "step": 3475
871
- },
872
- {
873
- "epoch": 0.9,
874
- "learning_rate": 5.013333333333333e-07,
875
- "loss": 0.1623,
876
- "step": 3500
877
- },
878
- {
879
- "epoch": 0.91,
880
- "learning_rate": 4.93e-07,
881
- "loss": 0.1178,
882
- "step": 3525
883
- },
884
- {
885
- "epoch": 0.92,
886
- "learning_rate": 4.846666666666667e-07,
887
- "loss": 0.1618,
888
- "step": 3550
889
- },
890
- {
891
- "epoch": 0.92,
892
- "learning_rate": 4.763333333333333e-07,
893
- "loss": 0.1187,
894
- "step": 3575
895
- },
896
- {
897
- "epoch": 0.93,
898
- "learning_rate": 4.68e-07,
899
- "loss": 0.1382,
900
- "step": 3600
901
- },
902
- {
903
- "epoch": 0.94,
904
- "learning_rate": 4.5966666666666667e-07,
905
- "loss": 0.1211,
906
- "step": 3625
907
- },
908
- {
909
- "epoch": 0.94,
910
- "learning_rate": 4.5133333333333327e-07,
911
- "loss": 0.1582,
912
- "step": 3650
913
- },
914
- {
915
- "epoch": 0.95,
916
- "learning_rate": 4.43e-07,
917
- "loss": 0.1201,
918
- "step": 3675
919
- },
920
- {
921
- "epoch": 0.95,
922
- "learning_rate": 4.3466666666666664e-07,
923
- "loss": 0.1571,
924
- "step": 3700
925
- },
926
- {
927
- "epoch": 0.96,
928
- "learning_rate": 4.263333333333333e-07,
929
- "loss": 0.1247,
930
- "step": 3725
931
- },
932
- {
933
- "epoch": 0.97,
934
- "learning_rate": 4.1799999999999996e-07,
935
- "loss": 0.1648,
936
- "step": 3750
937
- },
938
- {
939
- "epoch": 0.97,
940
- "learning_rate": 4.0966666666666667e-07,
941
- "loss": 0.1313,
942
- "step": 3775
943
- },
944
- {
945
- "epoch": 0.98,
946
- "learning_rate": 4.0133333333333333e-07,
947
- "loss": 0.1528,
948
- "step": 3800
949
- },
950
- {
951
- "epoch": 0.99,
952
- "learning_rate": 3.93e-07,
953
- "loss": 0.1252,
954
- "step": 3825
955
- },
956
- {
957
- "epoch": 0.99,
958
- "learning_rate": 3.8466666666666664e-07,
959
- "loss": 0.1674,
960
- "step": 3850
961
- },
962
- {
963
- "epoch": 1.0,
964
- "learning_rate": 3.7633333333333335e-07,
965
- "loss": 0.1192,
966
- "step": 3875
967
- },
968
- {
969
- "epoch": 1.01,
970
- "learning_rate": 3.6799999999999996e-07,
971
- "loss": 0.1054,
972
- "step": 3900
973
- },
974
- {
975
- "epoch": 1.01,
976
- "learning_rate": 3.5966666666666667e-07,
977
- "loss": 0.1353,
978
- "step": 3925
979
- },
980
- {
981
- "epoch": 1.02,
982
- "learning_rate": 3.5133333333333333e-07,
983
- "loss": 0.1004,
984
- "step": 3950
985
- },
986
- {
987
- "epoch": 1.03,
988
- "learning_rate": 3.43e-07,
989
- "loss": 0.1382,
990
- "step": 3975
991
- },
992
- {
993
- "epoch": 1.03,
994
- "learning_rate": 3.3466666666666665e-07,
995
- "loss": 0.0821,
996
- "step": 4000
997
- },
998
- {
999
- "epoch": 1.03,
1000
- "eval_loss": 0.13208560645580292,
1001
- "eval_runtime": 194476.4747,
1002
- "eval_samples_per_second": 0.048,
1003
- "eval_steps_per_second": 0.006,
1004
- "eval_wer": 1.000743374272786,
1005
- "step": 4000
1006
  }
1007
  ],
1008
  "logging_steps": 25,
@@ -1010,7 +761,7 @@
1010
  "num_input_tokens_seen": 0,
1011
  "num_train_epochs": 2,
1012
  "save_steps": 1000,
1013
- "total_flos": 1.0871315081330688e+20,
1014
  "train_batch_size": 8,
1015
  "trial_name": null,
1016
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7741935483870968,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
754
  "eval_steps_per_second": 0.006,
755
  "eval_wer": 0.09970911441499677,
756
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
  }
758
  ],
759
  "logging_steps": 25,
 
761
  "num_input_tokens_seen": 0,
762
  "num_train_epochs": 2,
763
  "save_steps": 1000,
764
+ "total_flos": 8.153995935744e+19,
765
  "train_batch_size": 8,
766
  "trial_name": null,
767
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f665d772c299f2cf7e58571e6ed463fc30fe6fa27220a8ca365ab05354ee77f
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7333482a7c3f07f0c77f8cb008cb1f8732d2c0821c26cc6972eb4ef6e08368a7
3
  size 4856