Zhaoheng Ni commited on
Commit
667c73d
1 Parent(s): e384c0a
exp/enh_train_enh_tfgrid_raw/33epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613db9fb4dafa7860d9e1390ec8f6bb61bba62cc0a67482bebcfbd0221d865c3
3
+ size 10332558
exp/enh_train_enh_tfgrid_raw/RESULTS.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Mar 6 01:29:02 UTC 2024`
5
+ - python version: `3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202308`
7
+ - pytorch version: `pytorch 2.1.0+cu118`
8
+ - Git hash: `60ce18efa06ca5a5922534682f47e2107ef88b13`
9
+ - Commit date: `Wed Sep 6 10:17:57 2023 -0700`
10
+
11
+
12
+ ## enh_train_enh_tfgrid_raw
13
+
14
+ config: ./conf/tuning/train_enh_tfgrid.yaml
15
+
16
+ |dataset|PESQ_WB|STOI|SAR|SDR|SIR|SI_SNR|
17
+ |---|---|---|---|---|---|---|
18
+ |enhanced_cv_synthetic|3.61|99.06|26.04|26.04|0.00|26.44|
19
+ |enhanced_tt_synthetic_no_reverb|3.32|97.88|20.18|20.18|0.00|20.17|
20
+ |enhanced_tt_synthetic_with_reverb|2.79|91.75|15.54|15.54|0.00|15.06|
21
+
exp/enh_train_enh_tfgrid_raw/config.yaml ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/tuning/train_enh_tfgrid.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: chunk
7
+ valid_iterator_type: null
8
+ output_dir: exp/enh_train_enh_tfgrid_raw
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 8
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 200
30
+ patience: 5
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - si_snr
41
+ - max
42
+ - - valid
43
+ - loss
44
+ - min
45
+ keep_nbest_models: 5
46
+ nbest_averaging_interval: 0
47
+ grad_clip: 5.0
48
+ grad_clip_type: 2.0
49
+ grad_noise: false
50
+ accum_grad: 1
51
+ no_forward_run: false
52
+ resume: true
53
+ train_dtype: float32
54
+ use_amp: false
55
+ log_interval: null
56
+ use_matplotlib: true
57
+ use_tensorboard: true
58
+ create_graph_in_tensorboard: false
59
+ use_wandb: false
60
+ wandb_project: null
61
+ wandb_id: null
62
+ wandb_entity: null
63
+ wandb_name: null
64
+ wandb_model_log_interval: -1
65
+ detect_anomaly: false
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: 5000
71
+ batch_size: 2
72
+ valid_batch_size: null
73
+ batch_bins: 1000000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - exp/enh_stats_16k/train/speech_mix_shape
77
+ - exp/enh_stats_16k/train/speech_ref1_shape
78
+ - exp/enh_stats_16k/train/noise_ref1_shape
79
+ valid_shape_file:
80
+ - exp/enh_stats_16k/valid/speech_mix_shape
81
+ - exp/enh_stats_16k/valid/speech_ref1_shape
82
+ - exp/enh_stats_16k/valid/noise_ref1_shape
83
+ batch_type: folded
84
+ valid_batch_type: null
85
+ fold_length:
86
+ - 80000
87
+ - 80000
88
+ - 80000
89
+ sort_in_batch: descending
90
+ shuffle_within_batch: false
91
+ sort_batch: descending
92
+ multiple_iterator: false
93
+ chunk_length: 48000
94
+ chunk_shift_ratio: 0.5
95
+ num_cache_chunks: 1024
96
+ chunk_excluded_key_prefixes: []
97
+ train_data_path_and_name_and_type:
98
+ - - dump/raw/tr_synthetic/wav.scp
99
+ - speech_mix
100
+ - sound
101
+ - - dump/raw/tr_synthetic/spk1.scp
102
+ - speech_ref1
103
+ - sound
104
+ - - dump/raw/tr_synthetic/noise1.scp
105
+ - noise_ref1
106
+ - sound
107
+ valid_data_path_and_name_and_type:
108
+ - - dump/raw/cv_synthetic/wav.scp
109
+ - speech_mix
110
+ - sound
111
+ - - dump/raw/cv_synthetic/spk1.scp
112
+ - speech_ref1
113
+ - sound
114
+ - - dump/raw/cv_synthetic/noise1.scp
115
+ - noise_ref1
116
+ - sound
117
+ allow_variable_data_keys: false
118
+ max_cache_size: 0.0
119
+ max_cache_fd: 32
120
+ valid_max_cache_size: null
121
+ exclude_weight_decay: false
122
+ exclude_weight_decay_conf: {}
123
+ optim: adam
124
+ optim_conf:
125
+ lr: 0.001
126
+ eps: 1.0e-08
127
+ weight_decay: 0
128
+ scheduler: reducelronplateau
129
+ scheduler_conf:
130
+ mode: min
131
+ factor: 0.7
132
+ patience: 1
133
+ init: xavier_uniform
134
+ model_conf:
135
+ stft_consistency: false
136
+ loss_type: mask_mse
137
+ mask_type: null
138
+ extract_feats_in_collect_stats: false
139
+ criterions:
140
+ - name: mr_l1_tfd
141
+ conf:
142
+ window_sz:
143
+ - 256
144
+ - 512
145
+ - 768
146
+ - 1024
147
+ hop_sz: null
148
+ eps: 1.0e-08
149
+ time_domain_weight: 0.5
150
+ wrapper: fixed_order
151
+ wrapper_conf:
152
+ weight: 1.0
153
+ - name: si_snr
154
+ conf:
155
+ eps: 1.0e-07
156
+ wrapper: fixed_order
157
+ wrapper_conf:
158
+ weight: 0.0
159
+ speech_volume_normalize: null
160
+ rir_scp: null
161
+ rir_apply_prob: 1.0
162
+ noise_scp: null
163
+ noise_apply_prob: 1.0
164
+ noise_db_range: '13_15'
165
+ short_noise_thres: 0.5
166
+ use_reverberant_ref: false
167
+ num_spk: 1
168
+ num_noise_type: 1
169
+ sample_rate: 8000
170
+ force_single_channel: false
171
+ channel_reordering: false
172
+ categories: []
173
+ dynamic_mixing: false
174
+ utt2spk: null
175
+ dynamic_mixing_gain_db: 0.0
176
+ encoder: same
177
+ encoder_conf: {}
178
+ separator: tfgridnet
179
+ separator_conf:
180
+ n_srcs: 1
181
+ n_fft: 512
182
+ stride: 256
183
+ window: hann
184
+ n_imics: 1
185
+ n_layers: 4
186
+ lstm_hidden_units: 128
187
+ attn_n_head: 4
188
+ attn_approx_qk_dim: 512
189
+ emb_dim: 32
190
+ emb_ks: 4
191
+ emb_hs: 4
192
+ activation: prelu
193
+ eps: 1.0e-05
194
+ decoder: same
195
+ decoder_conf: {}
196
+ mask_module: multi_mask
197
+ mask_module_conf: {}
198
+ preprocessor: null
199
+ preprocessor_conf: {}
200
+ required:
201
+ - output_dir
202
+ version: '202308'
203
+ distributed: false
exp/enh_train_enh_tfgrid_raw/images/backward_time.png ADDED
exp/enh_train_enh_tfgrid_raw/images/clip.png ADDED
exp/enh_train_enh_tfgrid_raw/images/forward_time.png ADDED
exp/enh_train_enh_tfgrid_raw/images/gpu_max_cached_mem_GB.png ADDED
exp/enh_train_enh_tfgrid_raw/images/grad_norm.png ADDED
exp/enh_train_enh_tfgrid_raw/images/iter_time.png ADDED
exp/enh_train_enh_tfgrid_raw/images/l1_timedomain+magspec_loss.png ADDED
exp/enh_train_enh_tfgrid_raw/images/loss.png ADDED
exp/enh_train_enh_tfgrid_raw/images/loss_scale.png ADDED
exp/enh_train_enh_tfgrid_raw/images/optim0_lr0.png ADDED
exp/enh_train_enh_tfgrid_raw/images/optim_step_time.png ADDED
exp/enh_train_enh_tfgrid_raw/images/si_snr_loss.png ADDED
exp/enh_train_enh_tfgrid_raw/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202308'
2
+ files:
3
+ model_file: exp/enh_train_enh_tfgrid_raw/33epoch.pth
4
+ python: 3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]
5
+ timestamp: 1711033647.373215
6
+ torch: 2.1.0+cu118
7
+ yaml_files:
8
+ train_config: exp/enh_train_enh_tfgrid_raw/config.yaml