neillu commited on
Commit
3b413a4
1 Parent(s): 6f8734a

Update model

Browse files
README.md CHANGED
@@ -1,3 +1,258 @@
1
  ---
 
 
 
 
 
 
 
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - audio-to-audio
6
+ language: noinfo
7
+ datasets:
8
+ - l3das22
9
  license: cc-by-4.0
10
  ---
11
+
12
+ ## ESPnet2 ENH model
13
+
14
+ ### `espnet/Yen-Ju_Lu_l3das22_enh_train_enh_ineube_valid.loss.ave`
15
+
16
+ This model was trained by neillu23 using l3das22 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout 11d687844a544fcce6f6d0ce7a0a302e0e47d442
23
+ pip install -e .
24
+ cd egs2/l3das22/enh1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/Yen-Ju_Lu_l3das22_enh_train_enh_ineube_valid.loss.ave
26
+ ```
27
+
28
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Wed Jul 6 20:46:10 UTC 2022`
32
+ - python version: `3.8.13 (default, Mar 28 2022, 11:38:47) [GCC 7.5.0]`
33
+ - espnet version: `espnet 202205`
34
+ - pytorch version: `pytorch 1.8.1`
35
+ - Git hash: `77e36afdd3f069567dd33d4b5b997a26b634772b`
36
+ - Commit date: `Fri Jun 17 18:32:56 2022 -0400`
37
+
38
+
39
+ ## enh_train_enh_ineube_raw
40
+
41
+ config: conf/tuning/train_enh_ineube.yaml
42
+
43
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|WER|STOI|TASK 1 METRIC|
44
+ |---|---|---|---|---|---|---|---|---|
45
+ |enhanced_dev_multich|95.62|15.00|15.00|0.00|13.64|5.93|0.956|0.948|
46
+ |enhanced_test_multich|95.70|14.59|14.59|0.00|13.34|4.85|0.957|0.954|
47
+
48
+ ## ENH config
49
+
50
+ <details><summary>expand</summary>
51
+
52
+ ```
53
+ config: conf/tuning/train_enh_ineube.yaml
54
+ print_config: false
55
+ log_level: INFO
56
+ dry_run: false
57
+ iterator_type: chunk
58
+ output_dir: exp/enh_train_enh_ineube_raw
59
+ ngpu: 1
60
+ seed: 0
61
+ num_workers: 4
62
+ num_att_plot: 3
63
+ dist_backend: nccl
64
+ dist_init_method: env://
65
+ dist_world_size: 3
66
+ dist_rank: 0
67
+ local_rank: 0
68
+ dist_master_addr: localhost
69
+ dist_master_port: 50409
70
+ dist_launcher: null
71
+ multiprocessing_distributed: true
72
+ unused_parameters: true
73
+ sharded_ddp: false
74
+ cudnn_enabled: true
75
+ cudnn_benchmark: false
76
+ cudnn_deterministic: true
77
+ collect_stats: false
78
+ write_collected_feats: false
79
+ max_epoch: 100
80
+ patience: 20
81
+ val_scheduler_criterion:
82
+ - valid
83
+ - loss
84
+ early_stopping_criterion:
85
+ - valid
86
+ - loss
87
+ - min
88
+ best_model_criterion:
89
+ - - valid
90
+ - si_snr
91
+ - max
92
+ - - valid
93
+ - loss
94
+ - min
95
+ keep_nbest_models: 1
96
+ nbest_averaging_interval: 0
97
+ grad_clip: 5
98
+ grad_clip_type: 2.0
99
+ grad_noise: false
100
+ accum_grad: 1
101
+ no_forward_run: false
102
+ resume: true
103
+ train_dtype: float32
104
+ use_amp: false
105
+ log_interval: null
106
+ use_matplotlib: true
107
+ use_tensorboard: true
108
+ use_wandb: false
109
+ wandb_project: null
110
+ wandb_id: null
111
+ wandb_entity: null
112
+ wandb_name: null
113
+ wandb_model_log_interval: -1
114
+ detect_anomaly: false
115
+ pretrain_path: null
116
+ init_param: []
117
+ ignore_init_mismatch: false
118
+ freeze_param: []
119
+ num_iters_per_epoch: null
120
+ batch_size: 15
121
+ valid_batch_size: null
122
+ batch_bins: 1000000
123
+ valid_batch_bins: null
124
+ train_shape_file:
125
+ - exp/enh_stats_16k/train/speech_mix_shape
126
+ - exp/enh_stats_16k/train/speech_ref1_shape
127
+ valid_shape_file:
128
+ - exp/enh_stats_16k/valid/speech_mix_shape
129
+ - exp/enh_stats_16k/valid/speech_ref1_shape
130
+ batch_type: folded
131
+ valid_batch_type: null
132
+ fold_length:
133
+ - 80000
134
+ - 80000
135
+ sort_in_batch: descending
136
+ sort_batch: descending
137
+ multiple_iterator: false
138
+ chunk_length: 32000
139
+ chunk_shift_ratio: 0.5
140
+ num_cache_chunks: 1024
141
+ train_data_path_and_name_and_type:
142
+ - - dump/raw/train_multich/wav.scp
143
+ - speech_mix
144
+ - sound
145
+ - - dump/raw/train_multich/spk1.scp
146
+ - speech_ref1
147
+ - sound
148
+ valid_data_path_and_name_and_type:
149
+ - - dump/raw/dev_multich/wav.scp
150
+ - speech_mix
151
+ - sound
152
+ - - dump/raw/dev_multich/spk1.scp
153
+ - speech_ref1
154
+ - sound
155
+ allow_variable_data_keys: false
156
+ max_cache_size: 0.0
157
+ max_cache_fd: 32
158
+ valid_max_cache_size: null
159
+ optim: adam
160
+ optim_conf:
161
+ lr: 0.001
162
+ eps: 1.0e-08
163
+ weight_decay: 1.0e-07
164
+ scheduler: reducelronplateau
165
+ scheduler_conf:
166
+ mode: min
167
+ factor: 0.5
168
+ patience: 20
169
+ init: xavier_uniform
170
+ model_conf:
171
+ stft_consistency: false
172
+ loss_type: mask_mse
173
+ mask_type: null
174
+ criterions:
175
+ - name: snr
176
+ conf: {}
177
+ wrapper: fixed_order
178
+ wrapper_conf:
179
+ weight: 1.0
180
+ use_preprocessor: false
181
+ speech_volume_normalize: null
182
+ rir_scp: null
183
+ rir_apply_prob: 1.0
184
+ noise_scp: null
185
+ noise_apply_prob: 1.0
186
+ noise_db_range: '13_15'
187
+ short_noise_thres: 0.5
188
+ use_reverberant_ref: false
189
+ num_spk: 1
190
+ num_noise_type: 1
191
+ sample_rate: 8000
192
+ force_single_channel: false
193
+ encoder: same
194
+ encoder_conf: {}
195
+ separator: ineube
196
+ separator_conf:
197
+ n_fft: 512
198
+ stride: 128
199
+ window: hann
200
+ mic_channels: 8
201
+ decoder: same
202
+ decoder_conf: {}
203
+ mask_module: multi_mask
204
+ mask_module_conf: {}
205
+ required:
206
+ - output_dir
207
+ version: '202205'
208
+ distributed: true
209
+ ```
210
+
211
+ </details>
212
+
213
+
214
+
215
+ ### Citing ESPnet
216
+
217
+ ```BibTex
218
+ @inproceedings{watanabe2018espnet,
219
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
220
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
221
+ year={2018},
222
+ booktitle={Proceedings of Interspeech},
223
+ pages={2207--2211},
224
+ doi={10.21437/Interspeech.2018-1456},
225
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
226
+ }
227
+
228
+
229
+ @inproceedings{ESPnet-SE,
230
+ author = {Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and
231
+ Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph B{"{o}}ddeker and Zhuo Chen and Shinji Watanabe},
232
+ title = {ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration},
233
+ booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021},
234
+ pages = {785--792},
235
+ publisher = {{IEEE}},
236
+ year = {2021},
237
+ url = {https://doi.org/10.1109/SLT48900.2021.9383615},
238
+ doi = {10.1109/SLT48900.2021.9383615},
239
+ timestamp = {Mon, 12 Apr 2021 17:08:59 +0200},
240
+ biburl = {https://dblp.org/rec/conf/slt/Li0ZSCKHHBC021.bib},
241
+ bibsource = {dblp computer science bibliography, https://dblp.org}
242
+ }
243
+
244
+
245
+ ```
246
+
247
+ or arXiv:
248
+
249
+ ```bibtex
250
+ @misc{watanabe2018espnet,
251
+ title={ESPnet: End-to-End Speech Processing Toolkit},
252
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
253
+ year={2018},
254
+ eprint={1804.00015},
255
+ archivePrefix={arXiv},
256
+ primaryClass={cs.CL}
257
+ }
258
+ ```
exp/enh_stats_16k/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd36206f9254d1adfffa0dd03e2a53b6212bc6637a69a7451e651214a07bf414
3
+ size 826
exp/enh_train_enh_ineube_raw/99epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa75dbe6a394a608fc9a2c514baba2169b345789696d9a19c9a7340d435901b9
3
+ size 62103895
exp/enh_train_enh_ineube_raw/RESULTS.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Jul 6 20:46:10 UTC 2022`
5
+ - python version: `3.8.13 (default, Mar 28 2022, 11:38:47) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202205`
7
+ - pytorch version: `pytorch 1.8.1`
8
+ - Git hash: `77e36afdd3f069567dd33d4b5b997a26b634772b`
9
+ - Commit date: `Fri Jun 17 18:32:56 2022 -0400`
10
+
11
+
12
+ ## enh_train_enh_ineube_raw
13
+
14
+ config: conf/tuning/train_enh_ineube.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
17
+ |---|---|---|---|---|---|
18
+ |enhanced_dev_multich|95.62|15.00|15.00|0.00|13.64|
19
+ |enhanced_test_multich|95.70|14.59|14.59|0.00|13.34|
20
+
exp/enh_train_enh_ineube_raw/config.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_enh_ineube.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/enh_train_enh_ineube_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 3
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 50409
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: true
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 100
28
+ patience: 20
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - si_snr
39
+ - max
40
+ - - valid
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 1
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 5
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 1
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param: []
65
+ ignore_init_mismatch: false
66
+ freeze_param: []
67
+ num_iters_per_epoch: null
68
+ batch_size: 15
69
+ valid_batch_size: null
70
+ batch_bins: 1000000
71
+ valid_batch_bins: null
72
+ train_shape_file:
73
+ - exp/enh_stats_16k/train/speech_mix_shape
74
+ - exp/enh_stats_16k/train/speech_ref1_shape
75
+ valid_shape_file:
76
+ - exp/enh_stats_16k/valid/speech_mix_shape
77
+ - exp/enh_stats_16k/valid/speech_ref1_shape
78
+ batch_type: folded
79
+ valid_batch_type: null
80
+ fold_length:
81
+ - 80000
82
+ - 80000
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 32000
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train_multich/wav.scp
91
+ - speech_mix
92
+ - sound
93
+ - - dump/raw/train_multich/spk1.scp
94
+ - speech_ref1
95
+ - sound
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/dev_multich/wav.scp
98
+ - speech_mix
99
+ - sound
100
+ - - dump/raw/dev_multich/spk1.scp
101
+ - speech_ref1
102
+ - sound
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.001
110
+ eps: 1.0e-08
111
+ weight_decay: 1.0e-07
112
+ scheduler: reducelronplateau
113
+ scheduler_conf:
114
+ mode: min
115
+ factor: 0.5
116
+ patience: 20
117
+ init: xavier_uniform
118
+ model_conf:
119
+ stft_consistency: false
120
+ loss_type: mask_mse
121
+ mask_type: null
122
+ criterions:
123
+ - name: snr
124
+ conf: {}
125
+ wrapper: fixed_order
126
+ wrapper_conf:
127
+ weight: 1.0
128
+ use_preprocessor: false
129
+ speech_volume_normalize: null
130
+ rir_scp: null
131
+ rir_apply_prob: 1.0
132
+ noise_scp: null
133
+ noise_apply_prob: 1.0
134
+ noise_db_range: '13_15'
135
+ short_noise_thres: 0.5
136
+ use_reverberant_ref: false
137
+ num_spk: 1
138
+ num_noise_type: 1
139
+ sample_rate: 8000
140
+ force_single_channel: false
141
+ encoder: same
142
+ encoder_conf: {}
143
+ separator: ineube
144
+ separator_conf:
145
+ n_fft: 512
146
+ stride: 128
147
+ window: hann
148
+ mic_channels: 8
149
+ decoder: same
150
+ decoder_conf: {}
151
+ mask_module: multi_mask
152
+ mask_module_conf: {}
153
+ required:
154
+ - output_dir
155
+ version: '202205'
156
+ distributed: true
exp/enh_train_enh_ineube_raw/images/backward_time.png ADDED
exp/enh_train_enh_ineube_raw/images/forward_time.png ADDED
exp/enh_train_enh_ineube_raw/images/gpu_max_cached_mem_GB.png ADDED
exp/enh_train_enh_ineube_raw/images/iter_time.png ADDED
exp/enh_train_enh_ineube_raw/images/loss.png ADDED
exp/enh_train_enh_ineube_raw/images/optim0_lr0.png ADDED
exp/enh_train_enh_ineube_raw/images/optim_step_time.png ADDED
exp/enh_train_enh_ineube_raw/images/snr_loss.png ADDED
exp/enh_train_enh_ineube_raw/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202205'
2
+ files:
3
+ model_file: exp/enh_train_enh_ineube_raw/99epoch.pth
4
+ python: "3.8.13 (default, Mar 28 2022, 11:38:47) \n[GCC 7.5.0]"
5
+ timestamp: 1657140371.541695
6
+ torch: 1.8.1
7
+ yaml_files:
8
+ train_config: exp/enh_train_enh_ineube_raw/config.yaml