Automatic Speech Recognition
ESPnet
Japanese
Dallyana commited on
Commit
66240c9
1 Parent(s): 82c8888

Upload 6 files

Browse files
exp/asr_configuracion.yml_raw_es_bpe32_sp/config.yaml ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/configuracion.yml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/asr_configuracion.yml_raw_es_bpe32_sp
9
+ ngpu: 0
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: null
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 100
30
+ patience: 10
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - acc
41
+ - max
42
+ keep_nbest_models: 2
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: null
71
+ batch_size: 32
72
+ valid_batch_size: null
73
+ batch_bins: 1000000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - exp/asr_stats_raw_es_bpe32_sp/train/speech_shape
77
+ - exp/asr_stats_raw_es_bpe32_sp/train/text_shape.bpe
78
+ valid_shape_file:
79
+ - exp/asr_stats_raw_es_bpe32_sp/valid/speech_shape
80
+ - exp/asr_stats_raw_es_bpe32_sp/valid/text_shape.bpe
81
+ batch_type: folded
82
+ valid_batch_type: null
83
+ fold_length:
84
+ - 80000
85
+ - 150
86
+ sort_in_batch: descending
87
+ shuffle_within_batch: false
88
+ sort_batch: descending
89
+ multiple_iterator: false
90
+ chunk_length: 500
91
+ chunk_shift_ratio: 0.5
92
+ num_cache_chunks: 1024
93
+ chunk_excluded_key_prefixes: []
94
+ chunk_default_fs: null
95
+ train_data_path_and_name_and_type:
96
+ - - dump/raw/train_nodev_sp/wav.scp
97
+ - speech
98
+ - sound
99
+ - - dump/raw/train_nodev_sp/text
100
+ - text
101
+ - text
102
+ valid_data_path_and_name_and_type:
103
+ - - dump/raw/train_dev/wav.scp
104
+ - speech
105
+ - sound
106
+ - - dump/raw/train_dev/text
107
+ - text
108
+ - text
109
+ allow_variable_data_keys: false
110
+ max_cache_size: 0.0
111
+ max_cache_fd: 32
112
+ allow_multi_rates: false
113
+ valid_max_cache_size: null
114
+ exclude_weight_decay: false
115
+ exclude_weight_decay_conf: {}
116
+ optim: adam
117
+ optim_conf:
118
+ lr: 0.001
119
+ scheduler: warmuplr
120
+ scheduler_conf:
121
+ warmup_steps: 2500
122
+ token_list:
123
+ - <blank>
124
+ - <unk>
125
+ - ▁
126
+ - a
127
+ - r
128
+ - e
129
+ - o
130
+ - n
131
+ - l
132
+ - t
133
+ - d
134
+ - m
135
+ - i
136
+ - '2'
137
+ - s
138
+ - '0'
139
+ - '9'
140
+ - y
141
+ - ñ
142
+ - '1'
143
+ - u
144
+ - h
145
+ - p
146
+ - c
147
+ - '3'
148
+ - '4'
149
+ - '7'
150
+ - '8'
151
+ - z
152
+ - b
153
+ - f
154
+ - <sos/eos>
155
+ init: xavier_uniform
156
+ input_size: null
157
+ ctc_conf:
158
+ dropout_rate: 0.0
159
+ ctc_type: builtin
160
+ reduce: true
161
+ ignore_nan_grad: null
162
+ zero_infinity: true
163
+ brctc_risk_strategy: exp
164
+ brctc_group_strategy: end
165
+ brctc_risk_factor: 0.0
166
+ joint_net_conf: null
167
+ use_preprocessor: true
168
+ use_lang_prompt: false
169
+ use_nlp_prompt: false
170
+ token_type: bpe
171
+ bpemodel: data/es_token_list/bpe_unigram32/bpe.model
172
+ non_linguistic_symbols: null
173
+ cleaner: null
174
+ g2p: null
175
+ speech_volume_normalize: null
176
+ rir_scp: null
177
+ rir_apply_prob: 1.0
178
+ noise_scp: null
179
+ noise_apply_prob: 1.0
180
+ noise_db_range: '13_15'
181
+ short_noise_thres: 0.5
182
+ aux_ctc_tasks: []
183
+ frontend: default
184
+ frontend_conf:
185
+ fs: 16k
186
+ specaug: null
187
+ specaug_conf: {}
188
+ normalize: global_mvn
189
+ normalize_conf:
190
+ stats_file: exp/asr_stats_raw_es_bpe32_sp/train/feats_stats.npz
191
+ model: espnet
192
+ model_conf:
193
+ ctc_weight: 0.3
194
+ lsm_weight: 0.1
195
+ length_normalized_loss: false
196
+ preencoder: null
197
+ preencoder_conf: {}
198
+ encoder: transformer
199
+ encoder_conf:
200
+ output_size: 256
201
+ attention_heads: 4
202
+ linear_units: 2048
203
+ num_blocks: 12
204
+ dropout_rate: 0.1
205
+ positional_dropout_rate: 0.1
206
+ attention_dropout_rate: 0.0
207
+ input_layer: conv2d
208
+ normalize_before: true
209
+ postencoder: null
210
+ postencoder_conf: {}
211
+ decoder: transformer
212
+ decoder_conf:
213
+ attention_heads: 4
214
+ linear_units: 2048
215
+ num_blocks: 6
216
+ dropout_rate: 0.1
217
+ positional_dropout_rate: 0.1
218
+ self_attention_dropout_rate: 0.0
219
+ src_attention_dropout_rate: 0.0
220
+ preprocessor: default
221
+ preprocessor_conf: {}
222
+ required:
223
+ - output_dir
224
+ - token_list
225
+ version: '202310'
226
+ distributed: false
exp/asr_configuracion.yml_raw_es_bpe32_sp/valid.acc.ave_2best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da61a5516f2a8735cbd3256d312ac038907ec3e01953cc41b4e52f15227c07d
3
+ size 108684901
exp/asr_stats_raw_es_bpe32_sp/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6391295cbc172f928c0a50b30ef5cb0f99702fb2ae1bf6370eacefe49b3ae6d
3
+ size 1402
exp/lm_train_lm_es_bpe32/40epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43fc94e130ebca10007dec3c07cd7a0445727244fdea9262a0ba42340971d853
3
+ size 27251268
exp/lm_train_lm_es_bpe32/config.yaml ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/lm_train_lm_es_bpe32
9
+ ngpu: 0
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: null
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 40
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - loss
41
+ - min
42
+ keep_nbest_models: 1
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: null
71
+ batch_size: 256
72
+ valid_batch_size: null
73
+ batch_bins: 1000000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - exp/lm_stats_es_bpe32/train/text_shape.bpe
77
+ valid_shape_file:
78
+ - exp/lm_stats_es_bpe32/valid/text_shape.bpe
79
+ batch_type: folded
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 150
83
+ sort_in_batch: descending
84
+ shuffle_within_batch: false
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ chunk_excluded_key_prefixes: []
91
+ chunk_default_fs: null
92
+ train_data_path_and_name_and_type:
93
+ - - dump/raw/lm_train.txt
94
+ - text
95
+ - text
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/org/train_dev/text
98
+ - text
99
+ - text
100
+ allow_variable_data_keys: false
101
+ max_cache_size: 0.0
102
+ max_cache_fd: 32
103
+ allow_multi_rates: false
104
+ valid_max_cache_size: null
105
+ exclude_weight_decay: false
106
+ exclude_weight_decay_conf: {}
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.1
110
+ scheduler: null
111
+ scheduler_conf: {}
112
+ token_list:
113
+ - <blank>
114
+ - <unk>
115
+ - ▁
116
+ - a
117
+ - r
118
+ - e
119
+ - o
120
+ - n
121
+ - l
122
+ - t
123
+ - d
124
+ - m
125
+ - i
126
+ - '2'
127
+ - s
128
+ - '0'
129
+ - '9'
130
+ - y
131
+ - ñ
132
+ - '1'
133
+ - u
134
+ - h
135
+ - p
136
+ - c
137
+ - '3'
138
+ - '4'
139
+ - '7'
140
+ - '8'
141
+ - z
142
+ - b
143
+ - f
144
+ - <sos/eos>
145
+ init: null
146
+ model_conf:
147
+ ignore_id: 0
148
+ use_preprocessor: true
149
+ token_type: bpe
150
+ bpemodel: data/es_token_list/bpe_unigram32/bpe.model
151
+ non_linguistic_symbols: null
152
+ cleaner: null
153
+ g2p: null
154
+ lm: seq_rnn
155
+ lm_conf:
156
+ unit: 650
157
+ nlayers: 2
158
+ required:
159
+ - output_dir
160
+ - token_list
161
+ version: '202310'
162
+ distributed: false
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: '202209'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_conformer_raw_jp_char/valid.acc.ave_10best.pth
4
+ lm_file: exp/lm_train_lm_jp_char/40epoch.pth
5
+ python: "3.8.15 (default, Nov 24 2022, 15:19:38) \n[GCC 11.2.0]"
6
+ timestamp: 1673331209.522897
7
+ torch: 1.12.1
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_asr_conformer_raw_jp_char/config.yaml
10
+ lm_train_config: exp/lm_train_lm_jp_char/config.yaml