jiangdongwei commited on
Commit
7564d88
·
1 Parent(s): 0b63d0f

Add model files

Browse files
Files changed (29) hide show
  1. RESULTS.md +48 -0
  2. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/RESULTS.md +48 -0
  3. README.md → asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/config.yaml +0 -113
  4. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/acc.png +0 -0
  5. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/backward_time.png +0 -0
  6. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/cer.png +0 -0
  7. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/cer_ctc.png +0 -0
  8. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/forward_time.png +0 -0
  9. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png +0 -0
  10. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/iter_time.png +0 -0
  11. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/loss.png +0 -0
  12. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/loss_att.png +0 -0
  13. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/loss_ctc.png +0 -0
  14. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/optim0_lr0.png +0 -0
  15. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/optim_step_time.png +0 -0
  16. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/train_time.png +0 -0
  17. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/wer.png +0 -0
  18. asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/valid.acc.ave_10best.pth +3 -0
  19. lm_train_lm_en_bpe500/config.yaml +627 -0
  20. lm_train_lm_en_bpe500/images/backward_time.png +0 -0
  21. lm_train_lm_en_bpe500/images/forward_time.png +0 -0
  22. lm_train_lm_en_bpe500/images/gpu_max_cached_mem_GB.png +0 -0
  23. lm_train_lm_en_bpe500/images/iter_time.png +0 -0
  24. lm_train_lm_en_bpe500/images/loss.png +0 -0
  25. lm_train_lm_en_bpe500/images/optim0_lr0.png +0 -0
  26. lm_train_lm_en_bpe500/images/optim_step_time.png +0 -0
  27. lm_train_lm_en_bpe500/images/train_time.png +0 -0
  28. lm_train_lm_en_bpe500/perplexity_test/ppl +1 -0
  29. lm_train_lm_en_bpe500/valid.loss.ave_10best.pth +3 -0
RESULTS.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Tue Apr 11 01:15:36 EDT 2023`
5
+ - python version: `3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202301`
7
+ - pytorch version: `pytorch 1.8.1`
8
+ - Git hash: `b0cceeac2ecd330e8270789cef945e49058858fa`
9
+ - Commit date: `Thu Mar 30 08:26:54 2023 -0400`
10
+
11
+ ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|27500|94.2|2.5|3.3|0.6|6.4|59.2|
17
+
18
+ ### CER
19
+
20
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
21
+ |---|---|---|---|---|---|---|---|---|
22
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|145066|96.8|0.5|2.7|0.6|3.8|59.2|
23
+
24
+ ### TER
25
+
26
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
27
+ |---|---|---|---|---|---|---|---|---|
28
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|54206|95.8|1.6|2.6|0.5|4.7|59.2|
29
+
30
+ ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave
31
+ ### WER
32
+
33
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
34
+ |---|---|---|---|---|---|---|---|---|
35
+ |org/dev|507|17783|93.6|3.1|3.3|0.9|7.3|69.0|
36
+
37
+ ### CER
38
+
39
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
40
+ |---|---|---|---|---|---|---|---|---|
41
+ |org/dev|507|95429|96.5|0.7|2.8|0.8|4.4|69.0|
42
+
43
+ ### TER
44
+
45
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
46
+ |---|---|---|---|---|---|---|---|---|
47
+ |org/dev|507|36002|95.4|2.0|2.6|0.8|5.5|69.0|
48
+
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/RESULTS.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Tue Apr 11 01:15:36 EDT 2023`
5
+ - python version: `3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202301`
7
+ - pytorch version: `pytorch 1.8.1`
8
+ - Git hash: `b0cceeac2ecd330e8270789cef945e49058858fa`
9
+ - Commit date: `Thu Mar 30 08:26:54 2023 -0400`
10
+
11
+ ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|27500|94.2|2.5|3.3|0.6|6.4|59.2|
17
+
18
+ ### CER
19
+
20
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
21
+ |---|---|---|---|---|---|---|---|---|
22
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|145066|96.8|0.5|2.7|0.6|3.8|59.2|
23
+
24
+ ### TER
25
+
26
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
27
+ |---|---|---|---|---|---|---|---|---|
28
+ |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|54206|95.8|1.6|2.6|0.5|4.7|59.2|
29
+
30
+ ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave
31
+ ### WER
32
+
33
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
34
+ |---|---|---|---|---|---|---|---|---|
35
+ |org/dev|507|17783|93.6|3.1|3.3|0.9|7.3|69.0|
36
+
37
+ ### CER
38
+
39
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
40
+ |---|---|---|---|---|---|---|---|---|
41
+ |org/dev|507|95429|96.5|0.7|2.8|0.8|4.4|69.0|
42
+
43
+ ### TER
44
+
45
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
46
+ |---|---|---|---|---|---|---|---|---|
47
+ |org/dev|507|36002|95.4|2.0|2.6|0.8|5.5|69.0|
48
+
README.md → asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/config.yaml RENAMED
@@ -1,80 +1,3 @@
1
- ---
2
- datasets:
3
- - LIUM/tedlium
4
- language:
5
- - en
6
- metrics:
7
- - wer
8
- library_name: espnet
9
- pipeline_tag: automatic-speech-recognition
10
- ---
11
-
12
- ## ESPnet2 ASR model
13
-
14
- ### `espnet/tedlium3`
15
-
16
- This model was trained by Dongwei Jiang using tedlium3 recipe in [espnet](https://github.com/espnet/espnet/).
17
-
18
- ### Demo: How to use in ESPnet2
19
-
20
- ```bash
21
- cd espnet
22
- git checkout ff841366229d539eb74d23ac999cae7c0cc62cad
23
- pip install -e .
24
- cd egs2/tedlium3/asr1
25
- ./run.sh --skip_data_prep false --skip_train true --download_model espnet/dongwei_tedlium3_asr_e-branchformer_external_lm
26
- ```
27
-
28
- <!-- Generated by scripts/utils/show_asr_result.sh -->
29
- # RESULTS
30
- ## Environments
31
- - date: `Tue Apr 11 01:15:36 EDT 2023`
32
- - python version: `3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0]`
33
- - espnet version: `espnet 202301`
34
- - pytorch version: `pytorch 1.8.1`
35
- - Git hash: `b0cceeac2ecd330e8270789cef945e49058858fa`
36
- - Commit date: `Thu Mar 30 08:26:54 2023 -0400`
37
-
38
-
39
- ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp
40
- ### WER
41
- |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
42
- |---|---|---|---|---|---|---|---|---|
43
- |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|27500|94.2|2.5|3.3|0.6|6.4|59.2|
44
-
45
- ### CER
46
- |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
47
- |---|---|---|---|---|---|---|---|---|
48
- |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|145066|96.8|0.5|2.7|0.6|3.8|59.2|
49
-
50
- ### TER
51
- |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
- |---|---|---|---|---|---|---|---|---|
53
- |decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave/test|1155|54206|95.8|1.6|2.6|0.5|4.7|59.2|
54
-
55
- ## exp/asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/decode_lm_lm_train_lm_en_bpe500_valid.loss.ave_asr_model_valid.acc.ave
56
- ### WER
57
- |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
58
- |---|---|---|---|---|---|---|---|---|
59
- |org/dev|507|17783|93.6|3.1|3.3|0.9|7.3|69.0|
60
-
61
- ### CER
62
- |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
63
- |---|---|---|---|---|---|---|---|---|
64
- |org/dev|507|95429|96.5|0.7|2.8|0.8|4.4|69.0|
65
-
66
- ### TER
67
- |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
68
- |---|---|---|---|---|---|---|---|---|
69
- |org/dev|507|36002|95.4|2.0|2.6|0.8|5.5|69.0|
70
-
71
-
72
-
73
- ## ASR config
74
-
75
- <details><summary>expand</summary>
76
-
77
- ```
78
  config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_e12_mactrue.yaml
79
  print_config: false
80
  log_level: INFO
@@ -784,39 +707,3 @@ required:
784
  - token_list
785
  version: '202301'
786
  distributed: true
787
- ```
788
-
789
- </details>
790
-
791
-
792
-
793
- ### Citing ESPnet
794
-
795
- ```BibTex
796
- @inproceedings{watanabe2018espnet,
797
- author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
798
- title={{ESPnet}: End-to-End Speech Processing Toolkit},
799
- year={2018},
800
- booktitle={Proceedings of Interspeech},
801
- pages={2207--2211},
802
- doi={10.21437/Interspeech.2018-1456},
803
- url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
804
- }
805
-
806
-
807
-
808
-
809
- ```
810
-
811
- or arXiv:
812
-
813
- ```bibtex
814
- @misc{watanabe2018espnet,
815
- title={ESPnet: End-to-End Speech Processing Toolkit},
816
- author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
817
- year={2018},
818
- eprint={1804.00015},
819
- archivePrefix={arXiv},
820
- primaryClass={cs.CL}
821
- }
822
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_e12_mactrue.yaml
2
  print_config: false
3
  log_level: INFO
 
707
  - token_list
708
  version: '202301'
709
  distributed: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/acc.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/backward_time.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/cer.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/cer_ctc.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/forward_time.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/iter_time.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/loss.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/loss_att.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/loss_ctc.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/optim0_lr0.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/optim_step_time.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/train_time.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/images/wer.png ADDED
asr_train_asr_e_branchformer_size256_mlp1024_e12_mactrue_raw_en_bpe500_sp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82fbcf415b6d04085f3a40cfc2cd5ed2434f0a77aea8b782be77287424f9008c
3
+ size 140364237
lm_train_lm_en_bpe500/config.yaml ADDED
@@ -0,0 +1,627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/lm_train_lm_en_bpe500
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 60077
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 20000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/lm_stats_en_bpe500/train/text_shape.bpe
72
+ valid_shape_file:
73
+ - exp/lm_stats_en_bpe500/valid/text_shape.bpe
74
+ batch_type: numel
75
+ valid_batch_type: null
76
+ fold_length:
77
+ - 150
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/lm_train.txt
86
+ - text
87
+ - text
88
+ valid_data_path_and_name_and_type:
89
+ - - dump/raw/dev/text
90
+ - text
91
+ - text
92
+ allow_variable_data_keys: false
93
+ max_cache_size: 0.0
94
+ max_cache_fd: 32
95
+ valid_max_cache_size: null
96
+ exclude_weight_decay: false
97
+ exclude_weight_decay_conf: {}
98
+ optim: adam
99
+ optim_conf:
100
+ lr: 0.001
101
+ scheduler: warmuplr
102
+ scheduler_conf:
103
+ warmup_steps: 25000
104
+ token_list:
105
+ - <blank>
106
+ - <unk>
107
+ - '[unk]'
108
+ - ▁
109
+ - s
110
+ - ▁the
111
+ - t
112
+ - ▁and
113
+ - e
114
+ - ▁a
115
+ - ▁to
116
+ - d
117
+ - ▁of
118
+ - ''''
119
+ - n
120
+ - ing
121
+ - ▁in
122
+ - ▁that
123
+ - re
124
+ - ▁i
125
+ - c
126
+ - o
127
+ - u
128
+ - ▁we
129
+ - y
130
+ - a
131
+ - ed
132
+ - ▁it
133
+ - ▁you
134
+ - i
135
+ - m
136
+ - ▁is
137
+ - er
138
+ - p
139
+ - g
140
+ - w
141
+ - al
142
+ - ▁this
143
+ - ▁so
144
+ - f
145
+ - le
146
+ - b
147
+ - ar
148
+ - ▁f
149
+ - k
150
+ - ▁c
151
+ - r
152
+ - in
153
+ - or
154
+ - ▁for
155
+ - ▁be
156
+ - ve
157
+ - ▁was
158
+ - te
159
+ - th
160
+ - ▁do
161
+ - es
162
+ - ly
163
+ - ▁they
164
+ - ro
165
+ - ▁are
166
+ - ▁with
167
+ - ▁have
168
+ - an
169
+ - v
170
+ - ch
171
+ - ▁on
172
+ - se
173
+ - lo
174
+ - ▁but
175
+ - en
176
+ - ri
177
+ - li
178
+ - ▁what
179
+ - it
180
+ - ic
181
+ - ▁can
182
+ - l
183
+ - ur
184
+ - ce
185
+ - ent
186
+ - ▁me
187
+ - ▁b
188
+ - ▁ma
189
+ - ▁he
190
+ - ra
191
+ - ▁de
192
+ - ll
193
+ - at
194
+ - ▁about
195
+ - ▁one
196
+ - ▁not
197
+ - ne
198
+ - ▁all
199
+ - ▁my
200
+ - ter
201
+ - el
202
+ - il
203
+ - ▁there
204
+ - 'on'
205
+ - ad
206
+ - ▁mo
207
+ - ol
208
+ - ation
209
+ - nd
210
+ - ▁like
211
+ - ▁people
212
+ - po
213
+ - ▁at
214
+ - ▁us
215
+ - us
216
+ - ▁g
217
+ - ci
218
+ - ▁our
219
+ - h
220
+ - pe
221
+ - ▁as
222
+ - ▁from
223
+ - vi
224
+ - ▁if
225
+ - as
226
+ - ▁ex
227
+ - ▁con
228
+ - ▁an
229
+ - ver
230
+ - ▁out
231
+ - ▁just
232
+ - un
233
+ - ▁see
234
+ - la
235
+ - ▁di
236
+ - ▁when
237
+ - ▁now
238
+ - ▁p
239
+ - ha
240
+ - ▁who
241
+ - ck
242
+ - ▁these
243
+ - ▁because
244
+ - ▁or
245
+ - ▁know
246
+ - ion
247
+ - ir
248
+ - ▁co
249
+ - ▁up
250
+ - ▁pa
251
+ - ment
252
+ - ▁think
253
+ - ge
254
+ - ▁how
255
+ - ide
256
+ - ▁by
257
+ - ul
258
+ - ity
259
+ - ▁go
260
+ - ▁get
261
+ - ▁ho
262
+ - ive
263
+ - ▁very
264
+ - ate
265
+ - ng
266
+ - ▁no
267
+ - ▁had
268
+ - ac
269
+ - ▁bo
270
+ - ry
271
+ - ▁more
272
+ - ▁them
273
+ - ▁some
274
+ - mi
275
+ - ▁time
276
+ - ▁your
277
+ - me
278
+ - ▁going
279
+ - op
280
+ - am
281
+ - per
282
+ - et
283
+ - ▁would
284
+ - ru
285
+ - ure
286
+ - ti
287
+ - ist
288
+ - ▁their
289
+ - x
290
+ - ▁were
291
+ - ▁look
292
+ - ▁pro
293
+ - ▁which
294
+ - ▁work
295
+ - tion
296
+ - est
297
+ - ty
298
+ - im
299
+ - z
300
+ - ta
301
+ - ▁want
302
+ - ▁two
303
+ - age
304
+ - ▁really
305
+ - om
306
+ - ma
307
+ - ers
308
+ - ting
309
+ - ▁world
310
+ - co
311
+ - ▁way
312
+ - ▁don
313
+ - wa
314
+ - hi
315
+ - tra
316
+ - ▁la
317
+ - ▁here
318
+ - able
319
+ - lu
320
+ - ▁other
321
+ - mo
322
+ - ies
323
+ - ▁has
324
+ - ▁could
325
+ - j
326
+ - ▁make
327
+ - ally
328
+ - ▁sta
329
+ - ten
330
+ - ▁will
331
+ - ▁un
332
+ - ig
333
+ - ▁where
334
+ - ▁into
335
+ - ke
336
+ - ▁than
337
+ - ▁comp
338
+ - ▁actually
339
+ - tic
340
+ - sh
341
+ - ▁did
342
+ - tor
343
+ - fa
344
+ - ical
345
+ - ▁she
346
+ - ▁years
347
+ - ▁say
348
+ - one
349
+ - ted
350
+ - ▁things
351
+ - ph
352
+ - ▁new
353
+ - ▁pre
354
+ - ▁any
355
+ - ▁thousand
356
+ - ▁been
357
+ - ▁inter
358
+ - ▁his
359
+ - ▁com
360
+ - ▁need
361
+ - nce
362
+ - ▁right
363
+ - ▁take
364
+ - ▁even
365
+ - ▁over
366
+ - ▁start
367
+ - ▁hundred
368
+ - min
369
+ - ▁sp
370
+ - ▁those
371
+ - ▁car
372
+ - ▁then
373
+ - mp
374
+ - ap
375
+ - ▁first
376
+ - les
377
+ - ize
378
+ - ▁every
379
+ - ba
380
+ - ▁something
381
+ - ▁well
382
+ - ard
383
+ - ▁str
384
+ - ▁back
385
+ - und
386
+ - ia
387
+ - pl
388
+ - ki
389
+ - ho
390
+ - ▁call
391
+ - ▁most
392
+ - ▁also
393
+ - bi
394
+ - ▁thing
395
+ - ▁life
396
+ - um
397
+ - ▁said
398
+ - ▁kind
399
+ - ▁lot
400
+ - ▁much
401
+ - va
402
+ - ▁ra
403
+ - ▁little
404
+ - ▁dr
405
+ - ▁got
406
+ - ▁come
407
+ - ful
408
+ - ▁talk
409
+ - ▁part
410
+ - ▁day
411
+ - ant
412
+ - ction
413
+ - ▁happen
414
+ - ▁only
415
+ - ▁many
416
+ - ▁wo
417
+ - pri
418
+ - ▁her
419
+ - ▁br
420
+ - qui
421
+ - ▁mean
422
+ - ▁three
423
+ - iv
424
+ - ▁different
425
+ - ugh
426
+ - ain
427
+ - ▁human
428
+ - ance
429
+ - ▁change
430
+ - ▁let
431
+ - ▁real
432
+ - ▁show
433
+ - ▁good
434
+ - ▁around
435
+ - ▁through
436
+ - ▁jo
437
+ - bu
438
+ - ▁down
439
+ - ight
440
+ - ga
441
+ - ▁why
442
+ - ▁live
443
+ - ff
444
+ - ▁tell
445
+ - ▁put
446
+ - ▁idea
447
+ - port
448
+ - ▁same
449
+ - ▁give
450
+ - ated
451
+ - ish
452
+ - ible
453
+ - ▁though
454
+ - ious
455
+ - ▁problem
456
+ - ▁five
457
+ - par
458
+ - ▁fact
459
+ - ▁cha
460
+ - ition
461
+ - ▁year
462
+ - ▁big
463
+ - ▁plan
464
+ - ▁great
465
+ - ▁find
466
+ - ▁four
467
+ - ▁app
468
+ - ▁after
469
+ - ▁system
470
+ - ▁place
471
+ - ▁em
472
+ - ▁build
473
+ - ▁percent
474
+ - ▁again
475
+ - ▁point
476
+ - ▁learn
477
+ - ▁own
478
+ - ▁long
479
+ - ▁made
480
+ - ▁today
481
+ - ▁nine
482
+ - ities
483
+ - ▁gene
484
+ - ▁six
485
+ - ▁question
486
+ - light
487
+ - ▁should
488
+ - ▁came
489
+ - ▁feel
490
+ - ▁turn
491
+ - ▁person
492
+ - ▁end
493
+ - ▁hu
494
+ - ▁design
495
+ - ▁help
496
+ - ▁brain
497
+ - ▁last
498
+ - ▁create
499
+ - ▁important
500
+ - ▁before
501
+ - ▁high
502
+ - ▁never
503
+ - ▁trans
504
+ - ▁another
505
+ - ▁him
506
+ - ▁eight
507
+ - ▁might
508
+ - ▁understand
509
+ - ▁power
510
+ - ▁better
511
+ - q
512
+ - ▁found
513
+ - ▁play
514
+ - ▁twenty
515
+ - ▁still
516
+ - ▁school
517
+ - ▁each
518
+ - ▁seven
519
+ - ▁together
520
+ - ▁few
521
+ - ▁hand
522
+ - ▁example
523
+ - que
524
+ - ▁next
525
+ - ▁million
526
+ - ▁story
527
+ - ▁women
528
+ - ▁under
529
+ - ▁number
530
+ - ▁course
531
+ - ▁water
532
+ - ▁ago
533
+ - ▁grow
534
+ - ▁between
535
+ - ▁develop
536
+ - ▁america
537
+ - ▁sort
538
+ - ▁technology
539
+ - ▁believe
540
+ - ▁second
541
+ - ▁small
542
+ - ▁maybe
543
+ - ▁become
544
+ - press
545
+ - ▁health
546
+ - ▁space
547
+ - ▁word
548
+ - ▁hard
549
+ - ▁children
550
+ - ▁organ
551
+ - ▁always
552
+ - ▁country
553
+ - ▁reason
554
+ - ▁experience
555
+ - ▁large
556
+ - ▁everything
557
+ - ▁friend
558
+ - ▁project
559
+ - ▁computer
560
+ - ▁fifty
561
+ - ▁money
562
+ - ▁information
563
+ - graph
564
+ - ▁walk
565
+ - ization
566
+ - ▁africa
567
+ - ▁picture
568
+ - ▁process
569
+ - ▁teach
570
+ - ▁enough
571
+ - ▁elect
572
+ - ▁thirty
573
+ - '0'
574
+ - '1'
575
+ - '2'
576
+ - '9'
577
+ - '3'
578
+ - '5'
579
+ - '8'
580
+ - '4'
581
+ - '7'
582
+ - '6'
583
+ - '&'
584
+ - +
585
+ - '#'
586
+ - '@'
587
+ - '*'
588
+ - \
589
+ - ^
590
+ - R
591
+ - _
592
+ - '-'
593
+ - '%'
594
+ - '='
595
+ - $
596
+ - M
597
+ - ā
598
+ - ']'
599
+ - E
600
+ - U
601
+ - A
602
+ - G
603
+ - '['
604
+ - <sos/eos>
605
+ init: null
606
+ model_conf:
607
+ ignore_id: 0
608
+ use_preprocessor: true
609
+ token_type: bpe
610
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
611
+ non_linguistic_symbols: null
612
+ cleaner: null
613
+ g2p: null
614
+ lm: transformer
615
+ lm_conf:
616
+ pos_enc: null
617
+ embed_unit: 128
618
+ att_unit: 512
619
+ head: 8
620
+ unit: 2048
621
+ layer: 15
622
+ dropout_rate: 0.1
623
+ required:
624
+ - output_dir
625
+ - token_list
626
+ version: '202301'
627
+ distributed: true
lm_train_lm_en_bpe500/images/backward_time.png ADDED
lm_train_lm_en_bpe500/images/forward_time.png ADDED
lm_train_lm_en_bpe500/images/gpu_max_cached_mem_GB.png ADDED
lm_train_lm_en_bpe500/images/iter_time.png ADDED
lm_train_lm_en_bpe500/images/loss.png ADDED
lm_train_lm_en_bpe500/images/optim0_lr0.png ADDED
lm_train_lm_en_bpe500/images/optim_step_time.png ADDED
lm_train_lm_en_bpe500/images/train_time.png ADDED
lm_train_lm_en_bpe500/perplexity_test/ppl ADDED
@@ -0,0 +1 @@
 
 
1
+ 10.155182315138273
lm_train_lm_en_bpe500/valid.loss.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec899e08f7764595b4bf1d45cd88d21156c92c7cb9885071e53d118232970df0
3
+ size 190793638