YushiUeda commited on
Commit
f23c40b
1 Parent(s): 75772f6

Update model

Browse files
README.md ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - diarization
6
+ language: noinfo
7
+ datasets:
8
+ - callhome
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 DIAR model
13
+
14
+ ### `YushiUeda/callhome_adapt_simu`
15
+
16
+ This model was trained by YushiUeda using callhome recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout 0cabe65afd362122e77b04e2e967986a91de0fd8
23
+ pip install -e .
24
+ cd egs2/callhome/diar1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model YushiUeda/callhome_adapt_simu
26
+ ```
27
+
28
+
29
+
30
+ ## DIAR config
31
+
32
+ <details><summary>expand</summary>
33
+
34
+ ```
35
+ config: conf/tuning/train_diar_eda_adapt.yaml
36
+ print_config: false
37
+ log_level: INFO
38
+ dry_run: false
39
+ iterator_type: sequence
40
+ output_dir: exp/diar_train_diar_eda_adapt_simu
41
+ ngpu: 1
42
+ seed: 0
43
+ num_workers: 1
44
+ num_att_plot: 3
45
+ dist_backend: nccl
46
+ dist_init_method: env://
47
+ dist_world_size: 4
48
+ dist_rank: 0
49
+ local_rank: 0
50
+ dist_master_addr: localhost
51
+ dist_master_port: 43777
52
+ dist_launcher: null
53
+ multiprocessing_distributed: true
54
+ unused_parameters: false
55
+ sharded_ddp: false
56
+ cudnn_enabled: true
57
+ cudnn_benchmark: false
58
+ cudnn_deterministic: true
59
+ collect_stats: false
60
+ write_collected_feats: false
61
+ max_epoch: 50
62
+ patience: null
63
+ val_scheduler_criterion:
64
+ - valid
65
+ - loss
66
+ early_stopping_criterion:
67
+ - valid
68
+ - loss
69
+ - min
70
+ best_model_criterion:
71
+ - - valid
72
+ - acc
73
+ - max
74
+ - - train
75
+ - acc
76
+ - max
77
+ keep_nbest_models: 10
78
+ nbest_averaging_interval: 0
79
+ grad_clip: 5
80
+ grad_clip_type: 2.0
81
+ grad_noise: false
82
+ accum_grad: 4
83
+ no_forward_run: false
84
+ resume: true
85
+ train_dtype: float32
86
+ use_amp: false
87
+ log_interval: null
88
+ use_matplotlib: true
89
+ use_tensorboard: true
90
+ use_wandb: false
91
+ wandb_project: null
92
+ wandb_id: null
93
+ wandb_entity: null
94
+ wandb_name: null
95
+ wandb_model_log_interval: -1
96
+ detect_anomaly: false
97
+ pretrain_path: null
98
+ init_param:
99
+ - exp/diar_train_diar_eda_5_raw/latest.pth
100
+ ignore_init_mismatch: false
101
+ freeze_param: []
102
+ num_iters_per_epoch: null
103
+ batch_size: 16
104
+ valid_batch_size: null
105
+ batch_bins: 1000000
106
+ valid_batch_bins: null
107
+ train_shape_file:
108
+ - exp/diar_stats_8k/train/speech_shape
109
+ - exp/diar_stats_8k/train/spk_labels_shape
110
+ valid_shape_file:
111
+ - exp/diar_stats_8k/valid/speech_shape
112
+ - exp/diar_stats_8k/valid/spk_labels_shape
113
+ batch_type: folded
114
+ valid_batch_type: null
115
+ fold_length:
116
+ - 80000
117
+ - 800
118
+ sort_in_batch: descending
119
+ sort_batch: descending
120
+ multiple_iterator: false
121
+ chunk_length: 500
122
+ chunk_shift_ratio: 0.5
123
+ num_cache_chunks: 1024
124
+ train_data_path_and_name_and_type:
125
+ - - dump/raw/simu/data/swb_sre_tr_ns1n2n3n4_beta2n2n5n9_100000/wav.scp
126
+ - speech
127
+ - sound
128
+ - - dump/raw/simu/data/swb_sre_tr_ns1n2n3n4_beta2n2n5n9_100000/espnet_rttm
129
+ - spk_labels
130
+ - rttm
131
+ valid_data_path_and_name_and_type:
132
+ - - dump/raw/simu/data/swb_sre_cv_ns1n2n3n4_beta2n2n5n9_500/wav.scp
133
+ - speech
134
+ - sound
135
+ - - dump/raw/simu/data/swb_sre_cv_ns1n2n3n4_beta2n2n5n9_500/espnet_rttm
136
+ - spk_labels
137
+ - rttm
138
+ allow_variable_data_keys: false
139
+ max_cache_size: 0.0
140
+ max_cache_fd: 32
141
+ valid_max_cache_size: null
142
+ optim: adam
143
+ optim_conf:
144
+ lr: 0.0001
145
+ scheduler: null
146
+ scheduler_conf: {}
147
+ num_spk: 4
148
+ init: null
149
+ input_size: null
150
+ model_conf:
151
+ attractor_weight: 1.0
152
+ use_preprocessor: true
153
+ frontend: default
154
+ frontend_conf:
155
+ fs: 8k
156
+ hop_length: 128
157
+ specaug: specaug
158
+ specaug_conf:
159
+ apply_time_warp: false
160
+ apply_freq_mask: true
161
+ freq_mask_width_range:
162
+ - 0
163
+ - 30
164
+ num_freq_mask: 2
165
+ apply_time_mask: true
166
+ time_mask_width_range:
167
+ - 0
168
+ - 40
169
+ num_time_mask: 2
170
+ normalize: global_mvn
171
+ normalize_conf:
172
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
173
+ encoder: transformer
174
+ encoder_conf:
175
+ input_layer: conv2d
176
+ num_blocks: 4
177
+ linear_units: 512
178
+ dropout_rate: 0.1
179
+ output_size: 256
180
+ attention_heads: 4
181
+ attention_dropout_rate: 0.1
182
+ decoder: linear
183
+ decoder_conf: {}
184
+ label_aggregator: label_aggregator
185
+ label_aggregator_conf:
186
+ win_length: 1024
187
+ hop_length: 512
188
+ attractor: rnn
189
+ attractor_conf:
190
+ unit: 256
191
+ layer: 1
192
+ dropout: 0.0
193
+ attractor_grad: true
194
+ required:
195
+ - output_dir
196
+ version: '202204'
197
+ distributed: true
198
+ ```
199
+
200
+ </details>
201
+
202
+
203
+
204
+ ### Citing ESPnet
205
+
206
+ ```BibTex
207
+ @inproceedings{watanabe2018espnet,
208
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
209
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
210
+ year={2018},
211
+ booktitle={Proceedings of Interspeech},
212
+ pages={2207--2211},
213
+ doi={10.21437/Interspeech.2018-1456},
214
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
215
+ }
216
+
217
+
218
+
219
+
220
+ ```
221
+
222
+ or arXiv:
223
+
224
+ ```bibtex
225
+ @misc{watanabe2018espnet,
226
+ title={ESPnet: End-to-End Speech Processing Toolkit},
227
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
228
+ year={2018},
229
+ eprint={1804.00015},
230
+ archivePrefix={arXiv},
231
+ primaryClass={cs.CL}
232
+ }
233
+ ```
exp/diar_stats_8k/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
exp/diar_train_diar_eda_adapt_simu/13epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006735290e86e1ee4bb46dc5abaa7f62ffe4c86da385fc3560407ee45dda4e04
3
+ size 20113016
exp/diar_train_diar_eda_adapt_simu/RESULTS.md ADDED
File without changes
exp/diar_train_diar_eda_adapt_simu/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_diar_eda_adapt.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/diar_train_diar_eda_adapt_simu
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 43777
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ - - train
41
+ - acc
42
+ - max
43
+ keep_nbest_models: 10
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 5
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 4
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param:
65
+ - exp/diar_train_diar_eda_5_raw/latest.pth
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: null
69
+ batch_size: 16
70
+ valid_batch_size: null
71
+ batch_bins: 1000000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - exp/diar_stats_8k/train/speech_shape
75
+ - exp/diar_stats_8k/train/spk_labels_shape
76
+ valid_shape_file:
77
+ - exp/diar_stats_8k/valid/speech_shape
78
+ - exp/diar_stats_8k/valid/spk_labels_shape
79
+ batch_type: folded
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 80000
83
+ - 800
84
+ sort_in_batch: descending
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ train_data_path_and_name_and_type:
91
+ - - dump/raw/simu/data/swb_sre_tr_ns1n2n3n4_beta2n2n5n9_100000/wav.scp
92
+ - speech
93
+ - sound
94
+ - - dump/raw/simu/data/swb_sre_tr_ns1n2n3n4_beta2n2n5n9_100000/espnet_rttm
95
+ - spk_labels
96
+ - rttm
97
+ valid_data_path_and_name_and_type:
98
+ - - dump/raw/simu/data/swb_sre_cv_ns1n2n3n4_beta2n2n5n9_500/wav.scp
99
+ - speech
100
+ - sound
101
+ - - dump/raw/simu/data/swb_sre_cv_ns1n2n3n4_beta2n2n5n9_500/espnet_rttm
102
+ - spk_labels
103
+ - rttm
104
+ allow_variable_data_keys: false
105
+ max_cache_size: 0.0
106
+ max_cache_fd: 32
107
+ valid_max_cache_size: null
108
+ optim: adam
109
+ optim_conf:
110
+ lr: 0.0001
111
+ scheduler: null
112
+ scheduler_conf: {}
113
+ num_spk: 4
114
+ init: null
115
+ input_size: null
116
+ model_conf:
117
+ attractor_weight: 1.0
118
+ use_preprocessor: true
119
+ frontend: default
120
+ frontend_conf:
121
+ fs: 8k
122
+ hop_length: 128
123
+ specaug: specaug
124
+ specaug_conf:
125
+ apply_time_warp: false
126
+ apply_freq_mask: true
127
+ freq_mask_width_range:
128
+ - 0
129
+ - 30
130
+ num_freq_mask: 2
131
+ apply_time_mask: true
132
+ time_mask_width_range:
133
+ - 0
134
+ - 40
135
+ num_time_mask: 2
136
+ normalize: global_mvn
137
+ normalize_conf:
138
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
139
+ encoder: transformer
140
+ encoder_conf:
141
+ input_layer: conv2d
142
+ num_blocks: 4
143
+ linear_units: 512
144
+ dropout_rate: 0.1
145
+ output_size: 256
146
+ attention_heads: 4
147
+ attention_dropout_rate: 0.1
148
+ decoder: linear
149
+ decoder_conf: {}
150
+ label_aggregator: label_aggregator
151
+ label_aggregator_conf:
152
+ win_length: 1024
153
+ hop_length: 512
154
+ attractor: rnn
155
+ attractor_conf:
156
+ unit: 256
157
+ layer: 1
158
+ dropout: 0.0
159
+ attractor_grad: true
160
+ required:
161
+ - output_dir
162
+ version: '202204'
163
+ distributed: true
exp/diar_train_diar_eda_adapt_simu/images/acc.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/backward_time.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/cf.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/der.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/fa.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/forward_time.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/gpu_max_cached_mem_GB.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/iter_time.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/loss.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/loss_att.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/loss_pit.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/mi.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/optim0_lr0.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/optim_step_time.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/sad_fr.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/sad_mr.png ADDED
exp/diar_train_diar_eda_adapt_simu/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ espnet: '202205'
2
+ files:
3
+ model_file: exp/diar_train_diar_eda_adapt_simu/13epoch.pth
4
+ python: "3.7.11 (default, Jul 27 2021, 14:32:16) \n[GCC 7.5.0]"
5
+ timestamp: 1656444101.362689
6
+ torch: 1.9.1+cu102
7
+ yaml_files:
8
+ train_config: exp/diar_train_diar_eda_adapt_simu/config.yaml