Fhrozen commited on
Commit
ef80619
1 Parent(s): c2daec0
README.md CHANGED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - audio-source-separation
6
+ language: en
7
+ datasets:
8
+ - wsj0_2mix
9
+ license: cc-by-4.0
10
+ inference: false
11
+ ---
12
+
13
+ # ESPnet2 ENH pretrained model
14
+
15
+ ## `Chenda Li/wsj0_2mix_enh_train_enh_rnn_tf_raw_valid.si_snr.ave, fs=8k, lang=en`
16
+
17
+ ♻️ Imported from <https://zenodo.org/record/4498554#.YOAOEpozZH4>.
18
+
19
+ This model was trained by Chenda Li using wsj0_2mix recipe in [espnet](https://github.com/espnet/espnet/).
20
+
21
+ ### Python API
22
+
23
+ ```text
24
+ See https://github.com/espnet/espnet_model_zoo
25
+ ```
26
+
27
+ ### Evaluate in the recipe
28
+
29
+ ```python
30
+ # coming soon
31
+ ```
32
+
33
+ ### Results
34
+
35
+ ```bash
36
+ # RESULTS
37
+ ## Environments
38
+ - date: `Thu Feb 4 01:08:19 CST 2021`
39
+ - python version: `3.7.6 (default, Jan 8 2020, 19:59:22) [GCC 7.3.0]`
40
+ - espnet version: `espnet 0.9.7`
41
+ - pytorch version: `pytorch 1.5.0`
42
+ - Git hash: `a3334220b0352931677946d178fade3313cf82bb`
43
+ - Commit date: `Fri Jan 29 23:35:47 2021 +0800`
44
+
45
+
46
+ ## enh_train_enh_rnn_tf_raw
47
+
48
+ config: conf/tuning/train_enh_rnn_tf.yaml
49
+
50
+ |dataset|STOI|SAR|SDR|SIR|
51
+ |---|---|---|---|---|
52
+ |enhanced_cv_min_8k|0.891065|11.556|10.3982|18.0655|
53
+ |enhanced_tt_min_8k|0.896373|11.4086|10.2433|18.0496|
54
+ ```
55
+
56
+ ### Training config
57
+
58
+ See full config in [`config.yaml`](./exp/enh_train_enh_rnn_tf_raw/config.yaml)
59
+
60
+ ```yaml
61
+ config: conf/tuning/train_enh_rnn_tf.yaml
62
+ print_config: false
63
+ log_level: INFO
64
+ dry_run: false
65
+ iterator_type: sequence
66
+ output_dir: exp/enh_train_enh_rnn_tf_raw
67
+ ngpu: 1
68
+ seed: 0
69
+ num_workers: 4
70
+ num_att_plot: 3
71
+ dist_backend: nccl
72
+ dist_init_method: env://
73
+ dist_world_size: null
74
+ dist_rank: null
75
+ local_rank: 0
76
+ dist_master_addr: null
77
+ dist_master_port: null
78
+ dist_launcher: null
79
+ multiprocessing_distributed: false
80
+ cudnn_enabled: true
81
+ cudnn_benchmark: false
82
+ cudnn_deterministic: true
83
+ ```
exp/enh_stats_8k/train/feats_stats.npz ADDED
Binary file (778 Bytes). View file
exp/enh_train_enh_rnn_tf_raw/99epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80783695f2a688a01ba2175483d2a945200117e3d07bcbdb564a3da50207f0f
3
+ size 191058039
exp/enh_train_enh_rnn_tf_raw/RESULTS.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Feb 4 01:08:19 CST 2021`
5
+ - python version: `3.7.6 (default, Jan 8 2020, 19:59:22) [GCC 7.3.0]`
6
+ - espnet version: `espnet 0.9.7`
7
+ - pytorch version: `pytorch 1.5.0`
8
+ - Git hash: `a3334220b0352931677946d178fade3313cf82bb`
9
+ - Commit date: `Fri Jan 29 23:35:47 2021 +0800`
10
+
11
+
12
+ ## enh_train_enh_rnn_tf_raw
13
+
14
+ config: conf/tuning/train_enh_rnn_tf.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|
17
+ |---|---|---|---|---|
18
+ |enhanced_cv_min_8k|0.891065|11.556|10.3982|18.0655|
19
+ |enhanced_tt_min_8k|0.896373|11.4086|10.2433|18.0496|
20
+
exp/enh_train_enh_rnn_tf_raw/config.yaml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_enh_rnn_tf.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/enh_train_enh_rnn_tf_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 100
26
+ patience: 10
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - valid
36
+ - si_snr
37
+ - max
38
+ - - valid
39
+ - loss
40
+ - min
41
+ keep_nbest_models: 1
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ unused_parameters: false
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ pretrain_path: null
57
+ init_param: []
58
+ freeze_param: []
59
+ num_iters_per_epoch: null
60
+ batch_size: 8
61
+ valid_batch_size: null
62
+ batch_bins: 1000000
63
+ valid_batch_bins: null
64
+ train_shape_file:
65
+ - exp/enh_stats_8k/train/speech_mix_shape
66
+ - exp/enh_stats_8k/train/speech_ref1_shape
67
+ - exp/enh_stats_8k/train/speech_ref2_shape
68
+ valid_shape_file:
69
+ - exp/enh_stats_8k/valid/speech_mix_shape
70
+ - exp/enh_stats_8k/valid/speech_ref1_shape
71
+ - exp/enh_stats_8k/valid/speech_ref2_shape
72
+ batch_type: folded
73
+ valid_batch_type: null
74
+ fold_length:
75
+ - 80000
76
+ - 80000
77
+ - 80000
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/tr_min_8k/wav.scp
86
+ - speech_mix
87
+ - sound
88
+ - - dump/raw/tr_min_8k/spk1.scp
89
+ - speech_ref1
90
+ - sound
91
+ - - dump/raw/tr_min_8k/spk2.scp
92
+ - speech_ref2
93
+ - sound
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/cv_min_8k/wav.scp
96
+ - speech_mix
97
+ - sound
98
+ - - dump/raw/cv_min_8k/spk1.scp
99
+ - speech_ref1
100
+ - sound
101
+ - - dump/raw/cv_min_8k/spk2.scp
102
+ - speech_ref2
103
+ - sound
104
+ allow_variable_data_keys: false
105
+ max_cache_size: 0.0
106
+ max_cache_fd: 32
107
+ valid_max_cache_size: null
108
+ optim: adam
109
+ optim_conf:
110
+ lr: 0.001
111
+ eps: 1.0e-08
112
+ weight_decay: 1.0e-07
113
+ scheduler: reducelronplateau
114
+ scheduler_conf:
115
+ mode: min
116
+ factor: 0.7
117
+ patience: 1
118
+ init: xavier_uniform
119
+ model_conf:
120
+ loss_type: mask_mse
121
+ mask_type: psm
122
+ use_preprocessor: false
123
+ encoder: stft
124
+ encoder_conf:
125
+ n_fft: 256
126
+ hop_length: 128
127
+ separator: rnn
128
+ separator_conf:
129
+ rnn_type: blstm
130
+ num_spk: 2
131
+ nonlinear: relu
132
+ layer: 3
133
+ unit: 896
134
+ dropout: 0.5
135
+ decoder: stft
136
+ decoder_conf:
137
+ n_fft: 256
138
+ hop_length: 128
139
+ required:
140
+ - output_dir
141
+ version: 0.9.7
142
+ distributed: false
exp/enh_train_enh_rnn_tf_raw/images/backward_time.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/forward_time.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/iter_time.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/loss.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/lr_0.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/optim_step_time.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/si_snr.png ADDED
exp/enh_train_enh_rnn_tf_raw/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ espnet: 0.9.7
2
+ files:
3
+ model_file: exp/enh_train_enh_rnn_tf_raw/99epoch.pth
4
+ python: "3.7.6 (default, Jan 8 2020, 19:59:22) \n[GCC 7.3.0]"
5
+ timestamp: 1612372229.396369
6
+ torch: 1.5.0
7
+ yaml_files:
8
+ train_config: exp/enh_train_enh_rnn_tf_raw/config.yaml