soumi-maiti commited on
Commit
df94b2b
1 Parent(s): cc96e65

Update model

Browse files
README.md CHANGED
@@ -35,23 +35,23 @@ cd egs2/librimix/enh_diar1
35
  <details><summary>expand</summary>
36
 
37
  ```
38
- config: conf/tuning/train_diar_enh_convtasnet_concat_feats.yaml
39
  print_config: false
40
  log_level: INFO
41
  dry_run: false
42
  iterator_type: chunk
43
- output_dir: exp/diar_enh_train_diar_enh_convtasnet_concat_feats_raw
44
  ngpu: 1
45
  seed: 0
46
  num_workers: 4
47
  num_att_plot: 3
48
  dist_backend: nccl
49
  dist_init_method: env://
50
- dist_world_size: 2
51
  dist_rank: 0
52
  local_rank: 0
53
  dist_master_addr: localhost
54
- dist_master_port: 54493
55
  dist_launcher: null
56
  multiprocessing_distributed: true
57
  unused_parameters: false
@@ -62,7 +62,7 @@ cudnn_deterministic: true
62
  collect_stats: false
63
  write_collected_feats: false
64
  max_epoch: 100
65
- patience: 50
66
  val_scheduler_criterion:
67
  - valid
68
  - loss
@@ -79,7 +79,7 @@ nbest_averaging_interval: 0
79
  grad_clip: 5.0
80
  grad_clip_type: 2.0
81
  grad_noise: false
82
- accum_grad: 2
83
  no_forward_run: false
84
  resume: true
85
  train_dtype: float32
@@ -99,7 +99,7 @@ init_param: []
99
  ignore_init_mismatch: false
100
  freeze_param: []
101
  num_iters_per_epoch: null
102
- batch_size: 8
103
  valid_batch_size: null
104
  batch_bins: 1000000
105
  valid_batch_bins: null
@@ -174,7 +174,7 @@ scheduler: reducelronplateau
174
  scheduler_conf:
175
  mode: min
176
  factor: 0.5
177
- patience: 50
178
  token_list: null
179
  src_token_list: null
180
  init: xavier_uniform
@@ -253,7 +253,7 @@ enh_mask_module_conf:
253
  max_num_spk: 3
254
  mask_nonlinear: relu
255
  bottleneck_dim: 128
256
- frontend: default
257
  frontend_conf: {}
258
  specaug: null
259
  specaug_conf: {}
@@ -279,10 +279,8 @@ st_extra_asr_decoder: rnn
279
  st_extra_asr_decoder_conf: {}
280
  st_extra_mt_decoder: rnn
281
  st_extra_mt_decoder_conf: {}
282
- diar_frontend: default
283
- diar_frontend_conf:
284
- hop_length: 64
285
- fs: 8000
286
  diar_specaug: null
287
  diar_specaug_conf: {}
288
  diar_normalize: utterance_mvn
 
35
  <details><summary>expand</summary>
36
 
37
  ```
38
+ config: conf/tuning/train_diar_enh_convtasnet_2.yaml
39
  print_config: false
40
  log_level: INFO
41
  dry_run: false
42
  iterator_type: chunk
43
+ output_dir: exp/diar_enh_train_diar_enh_convtasnet_2_raw
44
  ngpu: 1
45
  seed: 0
46
  num_workers: 4
47
  num_att_plot: 3
48
  dist_backend: nccl
49
  dist_init_method: env://
50
+ dist_world_size: 4
51
  dist_rank: 0
52
  local_rank: 0
53
  dist_master_addr: localhost
54
+ dist_master_port: 55259
55
  dist_launcher: null
56
  multiprocessing_distributed: true
57
  unused_parameters: false
 
62
  collect_stats: false
63
  write_collected_feats: false
64
  max_epoch: 100
65
+ patience: 4
66
  val_scheduler_criterion:
67
  - valid
68
  - loss
 
79
  grad_clip: 5.0
80
  grad_clip_type: 2.0
81
  grad_noise: false
82
+ accum_grad: 1
83
  no_forward_run: false
84
  resume: true
85
  train_dtype: float32
 
99
  ignore_init_mismatch: false
100
  freeze_param: []
101
  num_iters_per_epoch: null
102
+ batch_size: 16
103
  valid_batch_size: null
104
  batch_bins: 1000000
105
  valid_batch_bins: null
 
174
  scheduler_conf:
175
  mode: min
176
  factor: 0.5
177
+ patience: 1
178
  token_list: null
179
  src_token_list: null
180
  init: xavier_uniform
 
253
  max_num_spk: 3
254
  mask_nonlinear: relu
255
  bottleneck_dim: 128
256
+ frontend: null
257
  frontend_conf: {}
258
  specaug: null
259
  specaug_conf: {}
 
279
  st_extra_asr_decoder_conf: {}
280
  st_extra_mt_decoder: rnn
281
  st_extra_mt_decoder_conf: {}
282
+ diar_frontend: null
283
+ diar_frontend_conf: {}
 
 
284
  diar_specaug: null
285
  diar_specaug_conf: {}
286
  diar_normalize: utterance_mvn
exp/diar_enh_train_diar_enh_convtasnet_2_raw/62epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941381356eb87a76ba1870bf347937653638947289f275c9d321a1cdcc17d66a
3
+ size 36279117
exp/diar_enh_train_diar_enh_convtasnet_2_raw/DIAR_RESULTS.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Sep 1 06:43:21 EDT 2022`
5
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202205`
7
+ - pytorch version: `pytorch 1.8.1+cu102`
8
+ - Git hash: `d837c97c88f13ffe655a30bcff93d814f212b225`
9
+ - Commit date: `Wed Jun 29 12:04:57 2022 -0700`
10
+
11
+ ## diar_enh_train_diar_enh_convtasnet_2_raw
12
+ ### DER
13
+ diarized_enhanced_test_wo_diar
14
+ |threshold_median_collar|DER|
15
+ |---|---|
exp/diar_enh_train_diar_enh_convtasnet_2_raw/ENH_RESULTS.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Sep 1 06:48:15 EDT 2022`
5
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202205`
7
+ - pytorch version: `pytorch 1.8.1+cu102`
8
+ - Git hash: `d837c97c88f13ffe655a30bcff93d814f212b225`
9
+ - Commit date: `Wed Jun 29 12:04:57 2022 -0700`
10
+
11
+
12
+ ## diar_enh_train_diar_enh_convtasnet_2_raw
13
+
14
+ config: conf/tuning/train_diar_enh_convtasnet_2.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
17
+ |---|---|---|---|---|---|
18
+ |diarized_enhanced_test_decode_diar_enh|83.8701|10.5786|10.0060|21.5454|9.2157|
19
+ |diarized_enhanced_test_decode_diar_enh2|83.8847|10.5814|10.0024|21.5087|9.2171|
20
+ |diarized_enhanced_test_wo_diar|83.8789|10.5800|9.9919|21.3513|9.2125|
21
+
exp/diar_enh_train_diar_enh_convtasnet_2_raw/config.yaml ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_diar_enh_convtasnet_2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/diar_enh_train_diar_enh_convtasnet_2_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 55259
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 100
28
+ patience: 4
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss_enh
39
+ - min
40
+ keep_nbest_models: 1
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ wandb_entity: null
57
+ wandb_name: null
58
+ wandb_model_log_interval: -1
59
+ detect_anomaly: false
60
+ pretrain_path: null
61
+ init_param: []
62
+ ignore_init_mismatch: false
63
+ freeze_param: []
64
+ num_iters_per_epoch: null
65
+ batch_size: 16
66
+ valid_batch_size: null
67
+ batch_bins: 1000000
68
+ valid_batch_bins: null
69
+ train_shape_file:
70
+ - exp/diar_enh_stats_8k/train/speech_shape
71
+ - exp/diar_enh_stats_8k/train/text_shape
72
+ - exp/diar_enh_stats_8k/train/speech_ref1_shape
73
+ - exp/diar_enh_stats_8k/train/speech_ref2_shape
74
+ - exp/diar_enh_stats_8k/train/noise_ref1_shape
75
+ valid_shape_file:
76
+ - exp/diar_enh_stats_8k/valid/speech_shape
77
+ - exp/diar_enh_stats_8k/valid/text_shape
78
+ - exp/diar_enh_stats_8k/valid/speech_ref1_shape
79
+ - exp/diar_enh_stats_8k/valid/speech_ref2_shape
80
+ - exp/diar_enh_stats_8k/valid/noise_ref1_shape
81
+ batch_type: folded
82
+ valid_batch_type: null
83
+ fold_length:
84
+ - 800
85
+ - 80000
86
+ - 80000
87
+ - 80000
88
+ - 80000
89
+ sort_in_batch: descending
90
+ sort_batch: descending
91
+ multiple_iterator: false
92
+ chunk_length: 24000
93
+ chunk_shift_ratio: 0.5
94
+ num_cache_chunks: 1024
95
+ train_data_path_and_name_and_type:
96
+ - - dump/raw/train/wav.scp
97
+ - speech
98
+ - sound
99
+ - - dump/raw/train/espnet_rttm
100
+ - text
101
+ - rttm
102
+ - - dump/raw/train/spk1.scp
103
+ - speech_ref1
104
+ - sound
105
+ - - dump/raw/train/spk2.scp
106
+ - speech_ref2
107
+ - sound
108
+ - - dump/raw/train/noise1.scp
109
+ - noise_ref1
110
+ - sound
111
+ valid_data_path_and_name_and_type:
112
+ - - dump/raw/dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - dump/raw/dev/espnet_rttm
116
+ - text
117
+ - rttm
118
+ - - dump/raw/dev/spk1.scp
119
+ - speech_ref1
120
+ - sound
121
+ - - dump/raw/dev/spk2.scp
122
+ - speech_ref2
123
+ - sound
124
+ - - dump/raw/dev/noise1.scp
125
+ - noise_ref1
126
+ - sound
127
+ allow_variable_data_keys: false
128
+ max_cache_size: 0.0
129
+ max_cache_fd: 32
130
+ valid_max_cache_size: null
131
+ optim: adam
132
+ optim_conf:
133
+ lr: 0.001
134
+ eps: 1.0e-07
135
+ weight_decay: 0
136
+ scheduler: reducelronplateau
137
+ scheduler_conf:
138
+ mode: min
139
+ factor: 0.5
140
+ patience: 1
141
+ token_list: null
142
+ src_token_list: null
143
+ init: xavier_uniform
144
+ input_size: null
145
+ ctc_conf:
146
+ dropout_rate: 0.0
147
+ ctc_type: builtin
148
+ reduce: true
149
+ ignore_nan_grad: null
150
+ zero_infinity: true
151
+ enh_criterions:
152
+ - name: si_snr
153
+ conf:
154
+ eps: 1.0e-07
155
+ wrapper: pit
156
+ wrapper_conf:
157
+ weight: 1.0
158
+ independent_perm: true
159
+ diar_num_spk: 2
160
+ diar_input_size: 128
161
+ enh_model_conf:
162
+ loss_type: si_snr
163
+ asr_model_conf:
164
+ ctc_weight: 0.5
165
+ interctc_weight: 0.0
166
+ ignore_id: -1
167
+ lsm_weight: 0.0
168
+ length_normalized_loss: false
169
+ report_cer: true
170
+ report_wer: true
171
+ sym_space: <space>
172
+ sym_blank: <blank>
173
+ extract_feats_in_collect_stats: true
174
+ st_model_conf:
175
+ stft_consistency: false
176
+ loss_type: mask_mse
177
+ mask_type: null
178
+ diar_model_conf:
179
+ diar_weight: 0.2
180
+ attractor_weight: 0.2
181
+ subtask_series:
182
+ - enh
183
+ - diar
184
+ model_conf:
185
+ calc_enh_loss: true
186
+ bypass_enh_prob: 0
187
+ use_preprocessor: true
188
+ token_type: bpe
189
+ bpemodel: null
190
+ src_token_type: bpe
191
+ src_bpemodel: null
192
+ non_linguistic_symbols: null
193
+ cleaner: null
194
+ g2p: null
195
+ enh_encoder: conv
196
+ enh_encoder_conf:
197
+ channel: 512
198
+ kernel_size: 16
199
+ stride: 8
200
+ enh_separator: tcn_nomask
201
+ enh_separator_conf:
202
+ layer: 8
203
+ stack: 3
204
+ bottleneck_dim: 128
205
+ hidden_dim: 512
206
+ kernel: 3
207
+ causal: false
208
+ norm_type: gLN
209
+ enh_decoder: conv
210
+ enh_decoder_conf:
211
+ channel: 512
212
+ kernel_size: 16
213
+ stride: 8
214
+ enh_mask_module: multi_mask
215
+ enh_mask_module_conf:
216
+ max_num_spk: 3
217
+ mask_nonlinear: relu
218
+ bottleneck_dim: 128
219
+ frontend: null
220
+ frontend_conf: {}
221
+ specaug: null
222
+ specaug_conf: {}
223
+ normalize: utterance_mvn
224
+ normalize_conf: {}
225
+ asr_preencoder: null
226
+ asr_preencoder_conf: {}
227
+ asr_encoder: rnn
228
+ asr_encoder_conf: {}
229
+ asr_postencoder: null
230
+ asr_postencoder_conf: {}
231
+ asr_decoder: rnn
232
+ asr_decoder_conf: {}
233
+ st_preencoder: null
234
+ st_preencoder_conf: {}
235
+ st_encoder: rnn
236
+ st_encoder_conf: {}
237
+ st_postencoder: null
238
+ st_postencoder_conf: {}
239
+ st_decoder: rnn
240
+ st_decoder_conf: {}
241
+ st_extra_asr_decoder: rnn
242
+ st_extra_asr_decoder_conf: {}
243
+ st_extra_mt_decoder: rnn
244
+ st_extra_mt_decoder_conf: {}
245
+ diar_frontend: null
246
+ diar_frontend_conf: {}
247
+ diar_specaug: null
248
+ diar_specaug_conf: {}
249
+ diar_normalize: utterance_mvn
250
+ diar_normalize_conf: {}
251
+ diar_encoder: transformer
252
+ diar_encoder_conf:
253
+ input_layer: conv2d8
254
+ num_blocks: 4
255
+ linear_units: 512
256
+ dropout_rate: 0.1
257
+ output_size: 256
258
+ attention_heads: 4
259
+ attention_dropout_rate: 0.1
260
+ diar_decoder: linear
261
+ diar_decoder_conf: {}
262
+ label_aggregator: label_aggregator
263
+ label_aggregator_conf:
264
+ win_length: 256
265
+ hop_length: 64
266
+ diar_attractor: rnn
267
+ diar_attractor_conf:
268
+ unit: 256
269
+ layer: 1
270
+ dropout: 0.0
271
+ attractor_grad: true
272
+ required:
273
+ - output_dir
274
+ version: '202205'
275
+ distributed: true
meta.yaml CHANGED
@@ -1,8 +1,8 @@
1
  espnet: '202205'
2
  files:
3
- model_file: exp/diar_enh_train_diar_enh_convtasnet_concat_feats_raw/57epoch.pth
4
  python: "3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]"
5
- timestamp: 1676313760.888564
6
  torch: 1.8.1+cu102
7
  yaml_files:
8
- train_config: exp/diar_enh_train_diar_enh_convtasnet_concat_feats_raw/config.yaml
 
1
  espnet: '202205'
2
  files:
3
+ model_file: exp/diar_enh_train_diar_enh_convtasnet_2_raw/62epoch.pth
4
  python: "3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]"
5
+ timestamp: 1676313901.897267
6
  torch: 1.8.1+cu102
7
  yaml_files:
8
+ train_config: exp/diar_enh_train_diar_enh_convtasnet_2_raw/config.yaml