Siddhant commited on
Commit
8cd462e
1 Parent(s): 5f1d35a

import from zenodo

Browse files
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - mini_an4
9
+ license: cc-by-4.0
10
+ ---
11
+ ## Example ESPnet2 ASR model
12
+ ### `kamo-naoyuki/mini_an4_asr_train_raw_bpe_valid.acc.best`
13
+ ♻️ Imported from https://zenodo.org/record/3957940/
14
+
15
+ This model was trained by kamo-naoyuki using mini_an4/asr1 recipe in [espnet](https://github.com/espnet/espnet/).
16
+ ### Demo: How to use in ESPnet2
17
+ ```python
18
+ # coming soon
19
+ ```
20
+ ### Citing ESPnet
21
+ ```BibTex
22
+ @inproceedings{watanabe2018espnet,
23
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
24
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
25
+ year={2018},
26
+ booktitle={Proceedings of Interspeech},
27
+ pages={2207--2211},
28
+ doi={10.21437/Interspeech.2018-1456},
29
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
30
+ }
31
+ @inproceedings{hayashi2020espnet,
32
+ title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
33
+ author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
34
+ booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
35
+ pages={7654--7658},
36
+ year={2020},
37
+ organization={IEEE}
38
+ }
39
+ ```
40
+ or arXiv:
41
+ ```bibtex
42
+ @misc{watanabe2018espnet,
43
+ title={ESPnet: End-to-End Speech Processing Toolkit},
44
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
45
+ year={2018},
46
+ eprint={1804.00015},
47
+ archivePrefix={arXiv},
48
+ primaryClass={cs.CL}
49
+ }
50
+ ```
data/token_list/bpe_unigram30/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1850425c02da50ac9c2d6efbbc1ecca8ee734cc2b45cee03e761f1b8aa1d40b
3
+ size 237983
exp/asr_stats_raw/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_train_raw_bpe/1epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34822f1cb8d9f4d25a4006e0f22013b8f5789ef46b7583c5c40a0f7a7ebf8d9b
3
+ size 33086007
exp/asr_train_raw_bpe/RESULTS.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Jul 23 03:18:58 JST 2020`
5
+ - python version: `3.7.3 (default, Mar 27 2019, 22:11:17) [GCC 7.3.0]`
6
+ - espnet version: `espnet 0.8.0`
7
+ - pytorch version: `pytorch 1.0.1`
8
+ - Git hash: `31794e2aec89e6159c1ac32643d4c09989200a30`
9
+ - Commit date: `Thu Jul 23 03:17:45 2020 +0900`
10
+
11
+ ## asr_train_raw_bpe
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_test_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|10|0.0|20.0|80.0|0.0|100.0|100.0|
17
+ |decode_test_seg_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|10|0.0|20.0|80.0|0.0|100.0|100.0|
18
+ |decode_train_dev_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|1|1|0.0|100.0|0.0|0.0|100.0|100.0|
19
+
20
+ ### CER
21
+
22
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
23
+ |---|---|---|---|---|---|---|---|---|
24
+ |decode_test_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|67|53.7|44.8|1.5|206.0|252.2|100.0|
25
+ |decode_test_seg_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|67|53.7|44.8|1.5|206.0|252.2|100.0|
26
+ |decode_train_dev_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|1|3|33.3|66.7|0.0|1200.0|1266.7|100.0|
27
+
28
+ ### TER
29
+
30
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
31
+ |---|---|---|---|---|---|---|---|---|
32
+ |decode_test_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|45|15.6|82.2|2.2|208.9|293.3|100.0|
33
+ |decode_test_seg_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|2|45|15.6|82.2|2.2|208.9|293.3|100.0|
34
+ |decode_train_dev_decode_lm_train_bpe_valid.loss.best_asr_model_valid.acc.best|1|4|25.0|75.0|0.0|550.0|625.0|100.0|
35
+
exp/asr_train_raw_bpe/config.yaml ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: null
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_raw_bpe
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 40
26
+ patience: null
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - train
36
+ - loss
37
+ - min
38
+ - - valid
39
+ - loss
40
+ - min
41
+ - - train
42
+ - acc
43
+ - max
44
+ - - valid
45
+ - acc
46
+ - max
47
+ keep_nbest_models: 10
48
+ grad_clip: 5.0
49
+ grad_noise: false
50
+ accum_grad: 1
51
+ no_forward_run: false
52
+ resume: true
53
+ train_dtype: float32
54
+ log_interval: null
55
+ pretrain_path: []
56
+ pretrain_key: []
57
+ num_iters_per_epoch: null
58
+ batch_size: 20
59
+ valid_batch_size: null
60
+ batch_bins: 1000000
61
+ valid_batch_bins: null
62
+ train_shape_file:
63
+ - exp/asr_stats_raw/train/speech_shape
64
+ - exp/asr_stats_raw/train/text_shape.bpe
65
+ valid_shape_file:
66
+ - exp/asr_stats_raw/valid/speech_shape
67
+ - exp/asr_stats_raw/valid/text_shape.bpe
68
+ batch_type: folded
69
+ valid_batch_type: null
70
+ fold_length:
71
+ - 80000
72
+ - 150
73
+ sort_in_batch: descending
74
+ sort_batch: descending
75
+ multiple_iterator: false
76
+ chunk_length: 500
77
+ chunk_shift_ratio: 0.5
78
+ num_cache_chunks: 1024
79
+ train_data_path_and_name_and_type:
80
+ - - dump/raw/train_nodev/wav.scp
81
+ - speech
82
+ - sound
83
+ - - dump/raw/train_nodev/text
84
+ - text
85
+ - text
86
+ valid_data_path_and_name_and_type:
87
+ - - dump/raw/train_dev/wav.scp
88
+ - speech
89
+ - sound
90
+ - - dump/raw/train_dev/text
91
+ - text
92
+ - text
93
+ allow_variable_data_keys: false
94
+ max_cache_size: 0.0
95
+ valid_max_cache_size: null
96
+ optim: adadelta
97
+ optim_conf: {}
98
+ scheduler: null
99
+ scheduler_conf: {}
100
+ token_list:
101
+ - <blank>
102
+ - <unk>
103
+ - T
104
+ - "\u2581"
105
+ - I
106
+ - H
107
+ - G
108
+ - O
109
+ - AR
110
+ - "\u2581T"
111
+ - NE
112
+ - E
113
+ - EN
114
+ - Y
115
+ - "\u2581E"
116
+ - "\u2581S"
117
+ - EVEN
118
+ - F
119
+ - M
120
+ - C
121
+ - R
122
+ - D
123
+ - N
124
+ - W
125
+ - ENT
126
+ - L
127
+ - <sos/eos>
128
+ init: null
129
+ input_size: null
130
+ ctc_conf:
131
+ dropout_rate: 0.0
132
+ ctc_type: builtin
133
+ reduce: true
134
+ model_conf:
135
+ ctc_weight: 0.5
136
+ ignore_id: -1
137
+ lsm_weight: 0.0
138
+ length_normalized_loss: false
139
+ report_cer: true
140
+ report_wer: true
141
+ sym_space: <space>
142
+ sym_blank: <blank>
143
+ use_preprocessor: true
144
+ token_type: bpe
145
+ bpemodel: data/token_list/bpe_unigram30/bpe.model
146
+ non_linguistic_symbols: null
147
+ cleaner: null
148
+ g2p: null
149
+ frontend: default
150
+ frontend_conf:
151
+ fs: 16k
152
+ specaug: null
153
+ specaug_conf: {}
154
+ normalize: global_mvn
155
+ normalize_conf:
156
+ stats_file: exp/asr_stats_raw/train/feats_stats.npz
157
+ encoder: rnn
158
+ encoder_conf: {}
159
+ decoder: rnn
160
+ decoder_conf: {}
161
+ required:
162
+ - output_dir
163
+ - token_list
164
+ distributed: false
exp/lm_train_bpe/8epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88bb70eeac7e10877ba53a0c87185d347d6f163cf1a21340d0a66466f0664e6a
3
+ size 27223811
exp/lm_train_bpe/config.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: null
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/lm_train_bpe
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 40
26
+ patience: null
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - train
36
+ - loss
37
+ - min
38
+ - - valid
39
+ - loss
40
+ - min
41
+ - - train
42
+ - acc
43
+ - max
44
+ - - valid
45
+ - acc
46
+ - max
47
+ keep_nbest_models: 10
48
+ grad_clip: 5.0
49
+ grad_noise: false
50
+ accum_grad: 1
51
+ no_forward_run: false
52
+ resume: true
53
+ train_dtype: float32
54
+ log_interval: null
55
+ pretrain_path: []
56
+ pretrain_key: []
57
+ num_iters_per_epoch: null
58
+ batch_size: 20
59
+ valid_batch_size: null
60
+ batch_bins: 1000000
61
+ valid_batch_bins: null
62
+ train_shape_file:
63
+ - exp/lm_stats/train/text_shape.bpe
64
+ valid_shape_file:
65
+ - exp/lm_stats/valid/text_shape.bpe
66
+ batch_type: folded
67
+ valid_batch_type: null
68
+ fold_length:
69
+ - 150
70
+ sort_in_batch: descending
71
+ sort_batch: descending
72
+ multiple_iterator: false
73
+ chunk_length: 500
74
+ chunk_shift_ratio: 0.5
75
+ num_cache_chunks: 1024
76
+ train_data_path_and_name_and_type:
77
+ - - dump/raw/srctexts
78
+ - text
79
+ - text
80
+ valid_data_path_and_name_and_type:
81
+ - - dump/raw/train_dev/text
82
+ - text
83
+ - text
84
+ allow_variable_data_keys: false
85
+ max_cache_size: 0.0
86
+ valid_max_cache_size: null
87
+ optim: adadelta
88
+ optim_conf: {}
89
+ scheduler: null
90
+ scheduler_conf: {}
91
+ token_list:
92
+ - <blank>
93
+ - <unk>
94
+ - T
95
+ - "\u2581"
96
+ - I
97
+ - H
98
+ - G
99
+ - O
100
+ - AR
101
+ - "\u2581T"
102
+ - NE
103
+ - E
104
+ - EN
105
+ - Y
106
+ - "\u2581E"
107
+ - "\u2581S"
108
+ - EVEN
109
+ - F
110
+ - M
111
+ - C
112
+ - R
113
+ - D
114
+ - N
115
+ - W
116
+ - ENT
117
+ - L
118
+ - <sos/eos>
119
+ init: null
120
+ model_conf:
121
+ ignore_id: 0
122
+ use_preprocessor: true
123
+ token_type: bpe
124
+ bpemodel: data/token_list/bpe_unigram30/bpe.model
125
+ non_linguistic_symbols: null
126
+ cleaner: null
127
+ g2p: null
128
+ lm: seq_rnn
129
+ lm_conf: {}
130
+ required:
131
+ - output_dir
132
+ - token_list
133
+ distributed: false
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.8.0
2
+ files:
3
+ asr_model_file: exp/asr_train_raw_bpe/1epoch.pth
4
+ lm_file: exp/lm_train_bpe/8epoch.pth
5
+ python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
6
+ timestamp: 1595523607.756054
7
+ torch: 1.5.1
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_raw_bpe/config.yaml
10
+ lm_train_config: exp/lm_train_bpe/config.yaml