Siddhant commited on
Commit
14e2bb5
1 Parent(s): 1b92274

import from zenodo

Browse files
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - <add_more_tags>
6
+ language: en
7
+ datasets:
8
+ - mini_librispeech
9
+ license: cc-by-4.0
10
+ ---
11
+ ## ESPnet2 DIAR pretrained model
12
+ ### `Yushi Ueda/mini_librispeech_diar_train_diar_raw_max_epoch20_valid.acc.best`
13
+ ♻️ Imported from https://zenodo.org/record/5264020/
14
+
15
+ This model was trained by Yushi Ueda using mini_librispeech/diar1 recipe in [espnet](https://github.com/espnet/espnet/).
16
+ ### Demo: How to use in ESPnet2
17
+ ```python
18
+ # coming soon
19
+ ```
20
+ ### Citing ESPnet
21
+ ```BibTex
22
+ @inproceedings{watanabe2018espnet,
23
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
24
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
25
+ year={2018},
26
+ booktitle={Proceedings of Interspeech},
27
+ pages={2207--2211},
28
+ doi={10.21437/Interspeech.2018-1456},
29
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
30
+ }
31
+
32
+ ```
33
+ or arXiv:
34
+ ```bibtex
35
+ @misc{watanabe2018espnet,
36
+ title={ESPnet: End-to-End Speech Processing Toolkit},
37
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
38
+ year={2018},
39
+ eprint={1804.00015},
40
+ archivePrefix={arXiv},
41
+ primaryClass={cs.CL}
42
+ }
43
+ ```
exp/diar_stats_8k/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
exp/diar_train_diar_raw_max_epoch20/19epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c133eb6c494ec800b8be6b6c14aa91e890af7b19488f36423a7d56105987f493
3
+ size 4404388
exp/diar_train_diar_raw_max_epoch20/RESULTS.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Aug 25 23:29:07 EDT 2021`
5
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
6
+ - espnet version: `espnet 0.10.2a1`
7
+ - pytorch version: `pytorch 1.9.0+cu102`
8
+ - Git hash: `19bcd34f9395e01e54a97c4db5ecbcedb429dd92`
9
+ - Commit date: `Tue Aug 24 19:50:44 2021 -0400`
10
+
11
+ ## `diar_train_diar_raw_max_epoch20`
12
+ ### DER
13
+ `dev_clean_2_ns2_beta2_500`
14
+
15
+ |threshold_median_collar|DER|
16
+ |---|---|
17
+ |result_th0.3_med1_collar0.0|32.42|
18
+ |result_th0.3_med11_collar0.0|32.03|
19
+ |result_th0.4_med1_collar0.0|30.96|
20
+ |result_th0.4_med11_collar0.0|30.26|
21
+ |result_th0.5_med1_collar0.0|30.35|
22
+ |result_th0.5_med11_collar0.0|29.37|
23
+ |result_th0.6_med1_collar0.0|30.77|
24
+ |result_th0.6_med11_collar0.0|29.52|
25
+ |result_th0.7_med1_collar0.0|32.60|
26
+ |result_th0.7_med11_collar0.0|31.03|
exp/diar_train_diar_raw_max_epoch20/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_diar.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/diar_train_diar_raw_max_epoch20
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 20
28
+ patience: 3
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 3
41
+ grad_clip: 5
42
+ grad_clip_type: 2.0
43
+ grad_noise: false
44
+ accum_grad: 2
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ use_amp: false
49
+ log_interval: null
50
+ use_tensorboard: true
51
+ use_wandb: false
52
+ wandb_project: null
53
+ wandb_id: null
54
+ wandb_entity: null
55
+ wandb_name: null
56
+ wandb_model_log_interval: -1
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param: []
60
+ ignore_init_mismatch: false
61
+ freeze_param: []
62
+ num_iters_per_epoch: null
63
+ batch_size: 16
64
+ valid_batch_size: null
65
+ batch_bins: 1000000
66
+ valid_batch_bins: null
67
+ train_shape_file:
68
+ - exp/diar_stats_8k/train/speech_shape
69
+ - exp/diar_stats_8k/train/spk_labels_shape
70
+ valid_shape_file:
71
+ - exp/diar_stats_8k/valid/speech_shape
72
+ - exp/diar_stats_8k/valid/spk_labels_shape
73
+ batch_type: folded
74
+ valid_batch_type: null
75
+ fold_length:
76
+ - 80000
77
+ - 800
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 200000
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 64
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/simu/data/train_clean_5_ns2_beta2_500/wav.scp
86
+ - speech
87
+ - sound
88
+ - - dump/raw/simu/data/train_clean_5_ns2_beta2_500/espnet_rttm
89
+ - spk_labels
90
+ - rttm
91
+ valid_data_path_and_name_and_type:
92
+ - - dump/raw/simu/data/dev_clean_2_ns2_beta2_500/wav.scp
93
+ - speech
94
+ - sound
95
+ - - dump/raw/simu/data/dev_clean_2_ns2_beta2_500/espnet_rttm
96
+ - spk_labels
97
+ - rttm
98
+ allow_variable_data_keys: false
99
+ max_cache_size: 0.0
100
+ max_cache_fd: 32
101
+ valid_max_cache_size: null
102
+ optim: adam
103
+ optim_conf:
104
+ lr: 0.01
105
+ scheduler: noamlr
106
+ scheduler_conf:
107
+ warmup_steps: 1000
108
+ num_spk: 2
109
+ init: xavier_uniform
110
+ input_size: null
111
+ model_conf:
112
+ loss_type: pit
113
+ use_preprocessor: true
114
+ frontend: default
115
+ frontend_conf:
116
+ fs: 8k
117
+ hop_length: 128
118
+ normalize: global_mvn
119
+ normalize_conf:
120
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
121
+ encoder: transformer
122
+ encoder_conf:
123
+ input_layer: linear
124
+ num_blocks: 2
125
+ linear_units: 512
126
+ dropout_rate: 0.1
127
+ output_size: 256
128
+ attention_heads: 4
129
+ attention_dropout_rate: 0.0
130
+ decoder: linear
131
+ decoder_conf: {}
132
+ label_aggregator: label_aggregator
133
+ label_aggregator_conf: {}
134
+ required:
135
+ - output_dir
136
+ version: 0.10.2a1
137
+ distributed: false
exp/diar_train_diar_raw_max_epoch20/images/acc.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/backward_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/cf.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/der.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/fa.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/forward_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/gpu_max_cached_mem_GB.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/iter_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/loss.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/mi.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/optim0_lr0.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/optim_step_time.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/sad_fr.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/sad_mr.png ADDED
exp/diar_train_diar_raw_max_epoch20/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.2a1
2
+ files:
3
+ model_file: exp/diar_train_diar_raw_max_epoch20/19epoch.pth
4
+ python: "3.7.11 (default, Jul 27 2021, 14:32:16) \n[GCC 7.5.0]"
5
+ timestamp: 1629948586.911716
6
+ torch: 1.9.0+cu102
7
+ yaml_files:
8
+ train_config: exp/diar_train_diar_raw_max_epoch20/config.yaml