julien-c HF staff commited on
Commit
73e9b11
1 Parent(s): e86b2a3

initial import from https://zenodo.org/record/3957940

Browse files
.gitattributes CHANGED
@@ -6,3 +6,5 @@
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.pth filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - asr
6
+ language: en
7
+ datasets:
8
+ - ljspeech
9
+ license: cc-by-4.0
10
+ inference: false
11
+ ---
12
+
13
+ ## Example ESPnet2 ASR model
14
+
15
+ ### `kamo-naoyuki/mini_an4_asr_train_raw_bpe_valid.acc.best`
16
+
17
+ ♻️ Imported from https://zenodo.org/record/3957940#.X90XNelKjkM
18
+
19
+ This model was trained by kamo-naoyuki using mini_an4 recipe in [espnet](https://github.com/espnet/espnet/).
20
+
21
+
22
+
23
+ ### Demo: How to use in ESPnet2
24
+
25
+ ```python
26
+ # coming soon
27
+ ```
28
+
29
+ ### Citing ESPnet
30
+
31
+ ```BibTex
32
+ @inproceedings{watanabe2018espnet,
33
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
34
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
35
+ year={2018},
36
+ booktitle={Proceedings of Interspeech},
37
+ pages={2207--2211},
38
+ doi={10.21437/Interspeech.2018-1456},
39
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
40
+ }
41
+ @inproceedings{hayashi2020espnet,
42
+ title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
43
+ author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
44
+ booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
45
+ pages={7654--7658},
46
+ year={2020},
47
+ organization={IEEE}
48
+ }
49
+ ```
50
+
51
+ or arXiv:
52
+
53
+ ```bibtex
54
+ @misc{watanabe2018espnet,
55
+ title={ESPnet: End-to-End Speech Processing Toolkit},
56
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
57
+ year={2018},
58
+ eprint={1804.00015},
59
+ archivePrefix={arXiv},
60
+ primaryClass={cs.CL}
61
+ }
62
+ ```
63
+
data/token_list/bpe_unigram30/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1850425c02da50ac9c2d6efbbc1ecca8ee734cc2b45cee03e761f1b8aa1d40b
3
+ size 237983
exp/asr_stats_raw/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_train_raw_bpe/1epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34822f1cb8d9f4d25a4006e0f22013b8f5789ef46b7583c5c40a0f7a7ebf8d9b
3
+ size 33086007
exp/asr_train_raw_bpe/RESULTS.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Jul 23 03:18:58 JST 2020`
5
+ - python version: `3.7.3 (default, Mar 27 2019, 22:11:17) [GCC 7.3.0]`
6
+ - espnet version: `espnet 0.8.0`
7
+ - pytorch version: `pytorch 1.0.1`
8
+ - Git hash: `31794e2aec89e6159c1ac32643d4c09989200a30`
9
+ - Commit date: `Thu Jul 23 03:17:45 2020 +0900`
10
+
11
+ ## asr_train_raw_bpe
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_test_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|10|0.0|20.0|80.0|0.0|100.0|100.0|
17
+ |decode_test_seg_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|10|0.0|20.0|80.0|0.0|100.0|100.0|
18
+ |decode_train_dev_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|1|1|0.0|100.0|0.0|0.0|100.0|100.0|
19
+
20
+ ### CER
21
+
22
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
23
+ |---|---|---|---|---|---|---|---|---|
24
+ |decode_test_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|67|53.7|44.8|1.5|206.0|252.2|100.0|
25
+ |decode_test_seg_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|67|53.7|44.8|1.5|206.0|252.2|100.0|
26
+ |decode_train_dev_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|1|3|33.3|66.7|0.0|1200.0|1266.7|100.0|
27
+
28
+ ### TER
29
+
30
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
31
+ |---|---|---|---|---|---|---|---|---|
32
+ |decode_test_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|45|15.6|82.2|2.2|208.9|293.3|100.0|
33
+ |decode_test_seg_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|45|15.6|82.2|2.2|208.9|293.3|100.0|
34
+ |decode_train_dev_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|1|4|25.0|75.0|0.0|550.0|625.0|100.0|
35
+
exp/asr_train_raw_bpe/config.yaml ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 1
2
+ allow_variable_data_keys: false
3
+ batch_bins: 1000000
4
+ batch_size: 20
5
+ batch_type: folded
6
+ best_model_criterion:
7
+ - - train
8
+ - loss
9
+ - min
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - acc
15
+ - max
16
+ - - valid
17
+ - acc
18
+ - max
19
+ bpemodel: /Users/gibbon/Desktop/espnet_model_zoo/espnet_model_zoo/3962e0807e68f78b712263dc013036d3/data/token_list/bpe_unigram30/bpe.model
20
+ chunk_length: 500
21
+ chunk_shift_ratio: 0.5
22
+ cleaner: null
23
+ collect_stats: false
24
+ config: null
25
+ ctc_conf:
26
+ ctc_type: builtin
27
+ dropout_rate: 0.0
28
+ reduce: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ cudnn_enabled: true
32
+ decoder: rnn
33
+ decoder_conf: {}
34
+ dist_backend: nccl
35
+ dist_init_method: env://
36
+ dist_launcher: null
37
+ dist_master_addr: null
38
+ dist_master_port: null
39
+ dist_rank: null
40
+ dist_world_size: null
41
+ distributed: false
42
+ dry_run: false
43
+ early_stopping_criterion:
44
+ - valid
45
+ - loss
46
+ - min
47
+ encoder: rnn
48
+ encoder_conf: {}
49
+ fold_length:
50
+ - 80000
51
+ - 150
52
+ frontend: default
53
+ frontend_conf:
54
+ fs: 16k
55
+ g2p: null
56
+ grad_clip: 5.0
57
+ grad_noise: false
58
+ init: null
59
+ input_size: null
60
+ iterator_type: sequence
61
+ keep_nbest_models: 10
62
+ local_rank: 0
63
+ log_interval: null
64
+ log_level: INFO
65
+ max_cache_size: 0.0
66
+ max_epoch: 40
67
+ model_conf:
68
+ ctc_weight: 0.5
69
+ ignore_id: -1
70
+ length_normalized_loss: false
71
+ lsm_weight: 0.0
72
+ report_cer: true
73
+ report_wer: true
74
+ sym_blank: <blank>
75
+ sym_space: <space>
76
+ multiple_iterator: false
77
+ multiprocessing_distributed: false
78
+ ngpu: 1
79
+ no_forward_run: false
80
+ non_linguistic_symbols: null
81
+ normalize: global_mvn
82
+ normalize_conf:
83
+ stats_file: /Users/gibbon/Desktop/espnet_model_zoo/espnet_model_zoo/3962e0807e68f78b712263dc013036d3/exp/asr_stats_raw/train/feats_stats.npz
84
+ num_att_plot: 3
85
+ num_cache_chunks: 1024
86
+ num_iters_per_epoch: null
87
+ num_workers: 1
88
+ optim: adadelta
89
+ optim_conf: {}
90
+ output_dir: exp/asr_train_raw_bpe
91
+ patience: null
92
+ pretrain_key: []
93
+ pretrain_path: []
94
+ print_config: false
95
+ required:
96
+ - output_dir
97
+ - token_list
98
+ resume: true
99
+ scheduler: null
100
+ scheduler_conf: {}
101
+ seed: 0
102
+ sort_batch: descending
103
+ sort_in_batch: descending
104
+ specaug: null
105
+ specaug_conf: {}
106
+ token_list:
107
+ - <blank>
108
+ - <unk>
109
+ - T
110
+ - "\u2581"
111
+ - I
112
+ - H
113
+ - G
114
+ - O
115
+ - AR
116
+ - "\u2581T"
117
+ - NE
118
+ - E
119
+ - EN
120
+ - Y
121
+ - "\u2581E"
122
+ - "\u2581S"
123
+ - EVEN
124
+ - F
125
+ - M
126
+ - C
127
+ - R
128
+ - D
129
+ - N
130
+ - W
131
+ - ENT
132
+ - L
133
+ - <sos/eos>
134
+ token_type: bpe
135
+ train_data_path_and_name_and_type:
136
+ - - dump/raw/train_nodev/wav.scp
137
+ - speech
138
+ - sound
139
+ - - dump/raw/train_nodev/text
140
+ - text
141
+ - text
142
+ train_dtype: float32
143
+ train_shape_file:
144
+ - exp/asr_stats_raw/train/speech_shape
145
+ - exp/asr_stats_raw/train/text_shape.bpe
146
+ use_preprocessor: true
147
+ val_scheduler_criterion:
148
+ - valid
149
+ - loss
150
+ valid_batch_bins: null
151
+ valid_batch_size: null
152
+ valid_batch_type: null
153
+ valid_data_path_and_name_and_type:
154
+ - - dump/raw/train_dev/wav.scp
155
+ - speech
156
+ - sound
157
+ - - dump/raw/train_dev/text
158
+ - text
159
+ - text
160
+ valid_max_cache_size: null
161
+ valid_shape_file:
162
+ - exp/asr_stats_raw/valid/speech_shape
163
+ - exp/asr_stats_raw/valid/text_shape.bpe
164
+ write_collected_feats: false
exp/lm_train_bpe/8epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88bb70eeac7e10877ba53a0c87185d347d6f163cf1a21340d0a66466f0664e6a
3
+ size 27223811
exp/lm_train_bpe/config.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 1
2
+ allow_variable_data_keys: false
3
+ batch_bins: 1000000
4
+ batch_size: 20
5
+ batch_type: folded
6
+ best_model_criterion:
7
+ - - train
8
+ - loss
9
+ - min
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - acc
15
+ - max
16
+ - - valid
17
+ - acc
18
+ - max
19
+ bpemodel: /Users/gibbon/Desktop/espnet_model_zoo/espnet_model_zoo/3962e0807e68f78b712263dc013036d3/data/token_list/bpe_unigram30/bpe.model
20
+ chunk_length: 500
21
+ chunk_shift_ratio: 0.5
22
+ cleaner: null
23
+ collect_stats: false
24
+ config: null
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ cudnn_enabled: true
28
+ dist_backend: nccl
29
+ dist_init_method: env://
30
+ dist_launcher: null
31
+ dist_master_addr: null
32
+ dist_master_port: null
33
+ dist_rank: null
34
+ dist_world_size: null
35
+ distributed: false
36
+ dry_run: false
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ fold_length:
42
+ - 150
43
+ g2p: null
44
+ grad_clip: 5.0
45
+ grad_noise: false
46
+ init: null
47
+ iterator_type: sequence
48
+ keep_nbest_models: 10
49
+ lm: seq_rnn
50
+ lm_conf: {}
51
+ local_rank: 0
52
+ log_interval: null
53
+ log_level: INFO
54
+ max_cache_size: 0.0
55
+ max_epoch: 40
56
+ model_conf:
57
+ ignore_id: 0
58
+ multiple_iterator: false
59
+ multiprocessing_distributed: false
60
+ ngpu: 1
61
+ no_forward_run: false
62
+ non_linguistic_symbols: null
63
+ num_att_plot: 3
64
+ num_cache_chunks: 1024
65
+ num_iters_per_epoch: null
66
+ num_workers: 1
67
+ optim: adadelta
68
+ optim_conf: {}
69
+ output_dir: exp/lm_train_bpe
70
+ patience: null
71
+ pretrain_key: []
72
+ pretrain_path: []
73
+ print_config: false
74
+ required:
75
+ - output_dir
76
+ - token_list
77
+ resume: true
78
+ scheduler: null
79
+ scheduler_conf: {}
80
+ seed: 0
81
+ sort_batch: descending
82
+ sort_in_batch: descending
83
+ token_list:
84
+ - <blank>
85
+ - <unk>
86
+ - T
87
+ - "\u2581"
88
+ - I
89
+ - H
90
+ - G
91
+ - O
92
+ - AR
93
+ - "\u2581T"
94
+ - NE
95
+ - E
96
+ - EN
97
+ - Y
98
+ - "\u2581E"
99
+ - "\u2581S"
100
+ - EVEN
101
+ - F
102
+ - M
103
+ - C
104
+ - R
105
+ - D
106
+ - N
107
+ - W
108
+ - ENT
109
+ - L
110
+ - <sos/eos>
111
+ token_type: bpe
112
+ train_data_path_and_name_and_type:
113
+ - - dump/raw/srctexts
114
+ - text
115
+ - text
116
+ train_dtype: float32
117
+ train_shape_file:
118
+ - exp/lm_stats/train/text_shape.bpe
119
+ use_preprocessor: true
120
+ val_scheduler_criterion:
121
+ - valid
122
+ - loss
123
+ valid_batch_bins: null
124
+ valid_batch_size: null
125
+ valid_batch_type: null
126
+ valid_data_path_and_name_and_type:
127
+ - - dump/raw/train_dev/text
128
+ - text
129
+ - text
130
+ valid_max_cache_size: null
131
+ valid_shape_file:
132
+ - exp/lm_stats/valid/text_shape.bpe
133
+ write_collected_feats: false
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.8.0
2
+ files:
3
+ asr_model_file: exp/asr_train_raw_bpe/1epoch.pth
4
+ lm_file: exp/lm_train_bpe/8epoch.pth
5
+ python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
6
+ timestamp: 1595523607.756054
7
+ torch: 1.5.1
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_raw_bpe/config.yaml
10
+ lm_train_config: exp/lm_train_bpe/config.yaml