add files
Browse files- epoch-50-avg-10.pt +3 -0
- inference_speaker_verification/log-decode-epoch-50-avg-10-2023-12-20-09-55-30 +26 -0
- inference_speaker_verification/log-decode-epoch-50-avg-2-2023-12-20-09-59-38 +28 -0
- inference_speaker_verification/log-decode-epoch-50-avg-4-2023-12-20-09-58-45 +25 -0
- inference_speaker_verification/log-decode-epoch-50-avg-6-2023-12-20-09-57-50 +25 -0
- inference_speaker_verification/log-decode-epoch-50-avg-8-2023-12-20-09-56-51 +26 -0
- log.tar.gz +3 -0
epoch-50-avg-10.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe3bd0ef78ce65d6d04953fbd321731fd22e14969aca2e26bb115c64f39bdce8
|
3 |
+
size 258240373
|
inference_speaker_verification/log-decode-epoch-50-avg-10-2023-12-20-09-55-30
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-12-20 09:55:30,067 INFO [inference_speaker.py:247] Evaluation started
|
2 |
+
2023-12-20 09:55:30,067 INFO [inference_speaker.py:249] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'multi_KD', 'icefall-git-sha1': 'df56b261-dirty', 'icefall-git-date': 'Fri Nov 10 10:29:38 2023', 'icefall-path': '/star-xy/softwares/icefall_development/icefall_multi_KD', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/anaconda3/envs/multi_KD/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-1207150844-f49d8c4f4-c49d5', 'IP address': '10.177.22.19'}, 'epoch': 50, 'iter': 0, 'avg': 10, 'use_averaged_model': False, 'exp_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun'), 'trained_with_distillation': True, 'freeze_encoder': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': True, 'use_ctc': False, 'speaker_input_idx': -1, 'whisper_dim': 768, 'num_codebooks': 32, 'mvq_kd_layer_idx': -1, 'use_subsampled_output': True, 'full_libri': True, 'mini_libri': False, 'use_vox2': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'enable_audioset': False, 'audioset_kd': False, 'use_musan_separately': False, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'whisper_mvq': False, 'beats_ckpt': 'data/models/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/inference_speaker_verification'), 'suffix': 'epoch-50-avg-10'}
|
3 |
+
2023-12-20 09:55:30,068 INFO [inference_speaker.py:255] About to create model
|
4 |
+
2023-12-20 09:55:30,838 INFO [inference_speaker.py:301] averaging ['multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-41.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-42.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-43.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-44.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-45.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-46.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-47.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-48.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-49.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-50.pt']
|
5 |
+
2023-12-20 09:56:07,742 INFO [inference_speaker.py:357] Number of model parameters: 65512262
|
6 |
+
2023-12-20 09:56:07,742 INFO [kd_datamodule.py:715] About to get the test set of voxceleb1 set.
|
7 |
+
2023-12-20 09:56:07,756 INFO [fetching.py:138] Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/hyperparams.yaml.
|
8 |
+
2023-12-20 09:56:07,757 INFO [fetching.py:159] Fetch custom.py: Delegating to Huggingface hub, source speechbrain/spkrec-ecapa-voxceleb.
|
9 |
+
2023-12-20 09:56:17,851 WARNING [_http.py:271] '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fecb1176aa0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 8273ca1d-728d-4d6a-aa0e-c9b21b06c9bc)')' thrown while requesting HEAD https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py
|
10 |
+
2023-12-20 09:56:18,098 INFO [fetching.py:138] Fetch embedding_model.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/embedding_model.ckpt.
|
11 |
+
2023-12-20 09:56:18,099 INFO [fetching.py:138] Fetch mean_var_norm_emb.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/mean_var_norm_emb.ckpt.
|
12 |
+
2023-12-20 09:56:18,099 INFO [fetching.py:138] Fetch classifier.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/classifier.ckpt.
|
13 |
+
2023-12-20 09:56:18,100 INFO [fetching.py:138] Fetch label_encoder.txt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/label_encoder.ckpt.
|
14 |
+
2023-12-20 09:56:18,100 INFO [parameter_transfer.py:299] Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder
|
15 |
+
2023-12-20 09:56:18,325 INFO [kd_datamodule.py:119] Successfully load ecapa-tdnn model.
|
16 |
+
2023-12-20 09:56:18,325 INFO [teachers.py:17] The teacher model is on device: cuda:0
|
17 |
+
2023-12-20 09:56:22,155 INFO [inference_speaker.py:187] Processed 152 cuts already.
|
18 |
+
2023-12-20 09:56:30,381 INFO [inference_speaker.py:187] Processed 2118 cuts already.
|
19 |
+
2023-12-20 09:56:38,198 INFO [inference_speaker.py:187] Processed 4392 cuts already.
|
20 |
+
2023-12-20 09:56:39,746 INFO [zipformer.py:1873] name=None, attn_weights_entropy = tensor([4.5454, 3.9730, 4.3014, 3.9227], device='cuda:0')
|
21 |
+
2023-12-20 09:56:40,965 INFO [inference_speaker.py:188] Finish collecting speaker embeddings
|
22 |
+
2023-12-20 09:56:40,970 INFO [inference_speaker.py:195] -----------For testing set: VoxCeleb1-cleaned------------
|
23 |
+
2023-12-20 09:56:41,000 INFO [inference_speaker.py:199] A total of 37611 pairs.
|
24 |
+
2023-12-20 09:56:42,734 INFO [inference_speaker.py:222] Operating threshold for VoxCeleb1-cleaned: 0.2879, FAR: 0.0110, FRR: 0.0110, EER: 0.0110
|
25 |
+
2023-12-20 09:56:42,735 INFO [inference_speaker.py:223] Finished testing for VoxCeleb1-cleaned
|
26 |
+
2023-12-20 09:56:42,740 INFO [inference_speaker.py:389] Done!
|
inference_speaker_verification/log-decode-epoch-50-avg-2-2023-12-20-09-59-38
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-12-20 09:59:38,138 INFO [inference_speaker.py:247] Evaluation started
|
2 |
+
2023-12-20 09:59:38,138 INFO [inference_speaker.py:249] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'multi_KD', 'icefall-git-sha1': 'df56b261-dirty', 'icefall-git-date': 'Fri Nov 10 10:29:38 2023', 'icefall-path': '/star-xy/softwares/icefall_development/icefall_multi_KD', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/anaconda3/envs/multi_KD/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-1207150844-f49d8c4f4-c49d5', 'IP address': '10.177.22.19'}, 'epoch': 50, 'iter': 0, 'avg': 2, 'use_averaged_model': False, 'exp_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun'), 'trained_with_distillation': True, 'freeze_encoder': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': True, 'use_ctc': False, 'speaker_input_idx': -1, 'whisper_dim': 768, 'num_codebooks': 32, 'mvq_kd_layer_idx': -1, 'use_subsampled_output': True, 'full_libri': True, 'mini_libri': False, 'use_vox2': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'enable_audioset': False, 'audioset_kd': False, 'use_musan_separately': False, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'whisper_mvq': False, 'beats_ckpt': 'data/models/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/inference_speaker_verification'), 'suffix': 'epoch-50-avg-2'}
|
3 |
+
2023-12-20 09:59:38,138 INFO [inference_speaker.py:255] About to create model
|
4 |
+
2023-12-20 09:59:38,815 INFO [inference_speaker.py:301] averaging ['multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-49.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-50.pt']
|
5 |
+
2023-12-20 09:59:43,674 INFO [inference_speaker.py:357] Number of model parameters: 65512262
|
6 |
+
2023-12-20 09:59:43,674 INFO [kd_datamodule.py:715] About to get the test set of voxceleb1 set.
|
7 |
+
2023-12-20 09:59:43,685 INFO [fetching.py:138] Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/hyperparams.yaml.
|
8 |
+
2023-12-20 09:59:43,686 INFO [fetching.py:159] Fetch custom.py: Delegating to Huggingface hub, source speechbrain/spkrec-ecapa-voxceleb.
|
9 |
+
2023-12-20 09:59:53,764 WARNING [_http.py:271] '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f0f6878c610>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: ee7f9c99-a1e1-4652-9703-db6a5bcb9c8e)')' thrown while requesting HEAD https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py
|
10 |
+
2023-12-20 09:59:54,032 INFO [fetching.py:138] Fetch embedding_model.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/embedding_model.ckpt.
|
11 |
+
2023-12-20 09:59:54,033 INFO [fetching.py:138] Fetch mean_var_norm_emb.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/mean_var_norm_emb.ckpt.
|
12 |
+
2023-12-20 09:59:54,034 INFO [fetching.py:138] Fetch classifier.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/classifier.ckpt.
|
13 |
+
2023-12-20 09:59:54,034 INFO [fetching.py:138] Fetch label_encoder.txt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/label_encoder.ckpt.
|
14 |
+
2023-12-20 09:59:54,035 INFO [parameter_transfer.py:299] Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder
|
15 |
+
2023-12-20 09:59:54,249 INFO [kd_datamodule.py:119] Successfully load ecapa-tdnn model.
|
16 |
+
2023-12-20 09:59:54,250 INFO [teachers.py:17] The teacher model is on device: cuda:0
|
17 |
+
2023-12-20 09:59:57,713 INFO [inference_speaker.py:187] Processed 152 cuts already.
|
18 |
+
2023-12-20 09:59:58,835 INFO [zipformer.py:1873] name=None, attn_weights_entropy = tensor([5.6481, 4.8028, 5.3316, 4.8006], device='cuda:0')
|
19 |
+
2023-12-20 10:00:06,125 INFO [inference_speaker.py:187] Processed 2118 cuts already.
|
20 |
+
2023-12-20 10:00:06,691 INFO [zipformer.py:1873] name=None, attn_weights_entropy = tensor([3.2800, 1.9757, 1.8407, 2.1556, 2.4188, 1.6292, 2.3401, 1.9971],
|
21 |
+
device='cuda:0')
|
22 |
+
2023-12-20 10:00:13,664 INFO [inference_speaker.py:187] Processed 4392 cuts already.
|
23 |
+
2023-12-20 10:00:16,345 INFO [inference_speaker.py:188] Finish collecting speaker embeddings
|
24 |
+
2023-12-20 10:00:16,350 INFO [inference_speaker.py:195] -----------For testing set: VoxCeleb1-cleaned------------
|
25 |
+
2023-12-20 10:00:16,375 INFO [inference_speaker.py:199] A total of 37611 pairs.
|
26 |
+
2023-12-20 10:00:18,055 INFO [inference_speaker.py:222] Operating threshold for VoxCeleb1-cleaned: 0.2873, FAR: 0.0106, FRR: 0.0106, EER: 0.0106
|
27 |
+
2023-12-20 10:00:18,055 INFO [inference_speaker.py:223] Finished testing for VoxCeleb1-cleaned
|
28 |
+
2023-12-20 10:00:18,060 INFO [inference_speaker.py:389] Done!
|
inference_speaker_verification/log-decode-epoch-50-avg-4-2023-12-20-09-58-45
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-12-20 09:58:45,697 INFO [inference_speaker.py:247] Evaluation started
|
2 |
+
2023-12-20 09:58:45,697 INFO [inference_speaker.py:249] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'multi_KD', 'icefall-git-sha1': 'df56b261-dirty', 'icefall-git-date': 'Fri Nov 10 10:29:38 2023', 'icefall-path': '/star-xy/softwares/icefall_development/icefall_multi_KD', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/anaconda3/envs/multi_KD/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-1207150844-f49d8c4f4-c49d5', 'IP address': '10.177.22.19'}, 'epoch': 50, 'iter': 0, 'avg': 4, 'use_averaged_model': False, 'exp_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun'), 'trained_with_distillation': True, 'freeze_encoder': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': True, 'use_ctc': False, 'speaker_input_idx': -1, 'whisper_dim': 768, 'num_codebooks': 32, 'mvq_kd_layer_idx': -1, 'use_subsampled_output': True, 'full_libri': True, 'mini_libri': False, 'use_vox2': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'enable_audioset': False, 'audioset_kd': False, 'use_musan_separately': False, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'whisper_mvq': False, 'beats_ckpt': 'data/models/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/inference_speaker_verification'), 'suffix': 'epoch-50-avg-4'}
|
3 |
+
2023-12-20 09:58:45,697 INFO [inference_speaker.py:255] About to create model
|
4 |
+
2023-12-20 09:58:46,412 INFO [inference_speaker.py:301] averaging ['multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-47.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-48.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-49.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-50.pt']
|
5 |
+
2023-12-20 09:58:54,001 INFO [inference_speaker.py:357] Number of model parameters: 65512262
|
6 |
+
2023-12-20 09:58:54,001 INFO [kd_datamodule.py:715] About to get the test set of voxceleb1 set.
|
7 |
+
2023-12-20 09:58:54,015 INFO [fetching.py:138] Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/hyperparams.yaml.
|
8 |
+
2023-12-20 09:58:54,015 INFO [fetching.py:159] Fetch custom.py: Delegating to Huggingface hub, source speechbrain/spkrec-ecapa-voxceleb.
|
9 |
+
2023-12-20 09:59:04,110 WARNING [_http.py:271] '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fd5dfa81330>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 8232cde0-5adc-446c-8eba-6a952f59c1d9)')' thrown while requesting HEAD https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py
|
10 |
+
2023-12-20 09:59:04,372 INFO [fetching.py:138] Fetch embedding_model.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/embedding_model.ckpt.
|
11 |
+
2023-12-20 09:59:04,373 INFO [fetching.py:138] Fetch mean_var_norm_emb.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/mean_var_norm_emb.ckpt.
|
12 |
+
2023-12-20 09:59:04,374 INFO [fetching.py:138] Fetch classifier.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/classifier.ckpt.
|
13 |
+
2023-12-20 09:59:04,375 INFO [fetching.py:138] Fetch label_encoder.txt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/label_encoder.ckpt.
|
14 |
+
2023-12-20 09:59:04,375 INFO [parameter_transfer.py:299] Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder
|
15 |
+
2023-12-20 09:59:04,595 INFO [kd_datamodule.py:119] Successfully load ecapa-tdnn model.
|
16 |
+
2023-12-20 09:59:04,595 INFO [teachers.py:17] The teacher model is on device: cuda:0
|
17 |
+
2023-12-20 09:59:08,379 INFO [inference_speaker.py:187] Processed 152 cuts already.
|
18 |
+
2023-12-20 09:59:16,870 INFO [inference_speaker.py:187] Processed 2118 cuts already.
|
19 |
+
2023-12-20 09:59:24,408 INFO [inference_speaker.py:187] Processed 4392 cuts already.
|
20 |
+
2023-12-20 09:59:27,092 INFO [inference_speaker.py:188] Finish collecting speaker embeddings
|
21 |
+
2023-12-20 09:59:27,097 INFO [inference_speaker.py:195] -----------For testing set: VoxCeleb1-cleaned------------
|
22 |
+
2023-12-20 09:59:27,122 INFO [inference_speaker.py:199] A total of 37611 pairs.
|
23 |
+
2023-12-20 09:59:28,770 INFO [inference_speaker.py:222] Operating threshold for VoxCeleb1-cleaned: 0.2884, FAR: 0.0107, FRR: 0.0107, EER: 0.0107
|
24 |
+
2023-12-20 09:59:28,771 INFO [inference_speaker.py:223] Finished testing for VoxCeleb1-cleaned
|
25 |
+
2023-12-20 09:59:28,775 INFO [inference_speaker.py:389] Done!
|
inference_speaker_verification/log-decode-epoch-50-avg-6-2023-12-20-09-57-50
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-12-20 09:57:50,871 INFO [inference_speaker.py:247] Evaluation started
|
2 |
+
2023-12-20 09:57:50,871 INFO [inference_speaker.py:249] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'multi_KD', 'icefall-git-sha1': 'df56b261-dirty', 'icefall-git-date': 'Fri Nov 10 10:29:38 2023', 'icefall-path': '/star-xy/softwares/icefall_development/icefall_multi_KD', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/anaconda3/envs/multi_KD/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-1207150844-f49d8c4f4-c49d5', 'IP address': '10.177.22.19'}, 'epoch': 50, 'iter': 0, 'avg': 6, 'use_averaged_model': False, 'exp_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun'), 'trained_with_distillation': True, 'freeze_encoder': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': True, 'use_ctc': False, 'speaker_input_idx': -1, 'whisper_dim': 768, 'num_codebooks': 32, 'mvq_kd_layer_idx': -1, 'use_subsampled_output': True, 'full_libri': True, 'mini_libri': False, 'use_vox2': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'enable_audioset': False, 'audioset_kd': False, 'use_musan_separately': False, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'whisper_mvq': False, 'beats_ckpt': 'data/models/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/inference_speaker_verification'), 'suffix': 'epoch-50-avg-6'}
|
3 |
+
2023-12-20 09:57:50,871 INFO [inference_speaker.py:255] About to create model
|
4 |
+
2023-12-20 09:57:51,597 INFO [inference_speaker.py:301] averaging ['multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-45.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-46.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-47.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-48.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-49.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-50.pt']
|
5 |
+
2023-12-20 09:58:01,997 INFO [inference_speaker.py:357] Number of model parameters: 65512262
|
6 |
+
2023-12-20 09:58:01,998 INFO [kd_datamodule.py:715] About to get the test set of voxceleb1 set.
|
7 |
+
2023-12-20 09:58:02,010 INFO [fetching.py:138] Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/hyperparams.yaml.
|
8 |
+
2023-12-20 09:58:02,011 INFO [fetching.py:159] Fetch custom.py: Delegating to Huggingface hub, source speechbrain/spkrec-ecapa-voxceleb.
|
9 |
+
2023-12-20 09:58:12,109 WARNING [_http.py:271] '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7ff023441150>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: e8bc049a-8b3a-4a5d-be11-927b9c60c2e8)')' thrown while requesting HEAD https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py
|
10 |
+
2023-12-20 09:58:12,372 INFO [fetching.py:138] Fetch embedding_model.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/embedding_model.ckpt.
|
11 |
+
2023-12-20 09:58:12,373 INFO [fetching.py:138] Fetch mean_var_norm_emb.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/mean_var_norm_emb.ckpt.
|
12 |
+
2023-12-20 09:58:12,374 INFO [fetching.py:138] Fetch classifier.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/classifier.ckpt.
|
13 |
+
2023-12-20 09:58:12,374 INFO [fetching.py:138] Fetch label_encoder.txt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/label_encoder.ckpt.
|
14 |
+
2023-12-20 09:58:12,374 INFO [parameter_transfer.py:299] Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder
|
15 |
+
2023-12-20 09:58:12,577 INFO [kd_datamodule.py:119] Successfully load ecapa-tdnn model.
|
16 |
+
2023-12-20 09:58:12,577 INFO [teachers.py:17] The teacher model is on device: cuda:0
|
17 |
+
2023-12-20 09:58:16,161 INFO [inference_speaker.py:187] Processed 152 cuts already.
|
18 |
+
2023-12-20 09:58:24,606 INFO [inference_speaker.py:187] Processed 2118 cuts already.
|
19 |
+
2023-12-20 09:58:32,117 INFO [inference_speaker.py:187] Processed 4392 cuts already.
|
20 |
+
2023-12-20 09:58:34,814 INFO [inference_speaker.py:188] Finish collecting speaker embeddings
|
21 |
+
2023-12-20 09:58:34,818 INFO [inference_speaker.py:195] -----------For testing set: VoxCeleb1-cleaned------------
|
22 |
+
2023-12-20 09:58:35,016 INFO [inference_speaker.py:199] A total of 37611 pairs.
|
23 |
+
2023-12-20 09:58:36,721 INFO [inference_speaker.py:222] Operating threshold for VoxCeleb1-cleaned: 0.2887, FAR: 0.0107, FRR: 0.0107, EER: 0.0107
|
24 |
+
2023-12-20 09:58:36,721 INFO [inference_speaker.py:223] Finished testing for VoxCeleb1-cleaned
|
25 |
+
2023-12-20 09:58:36,726 INFO [inference_speaker.py:389] Done!
|
inference_speaker_verification/log-decode-epoch-50-avg-8-2023-12-20-09-56-51
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-12-20 09:56:51,875 INFO [inference_speaker.py:247] Evaluation started
|
2 |
+
2023-12-20 09:56:51,875 INFO [inference_speaker.py:249] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'multi_KD', 'icefall-git-sha1': 'df56b261-dirty', 'icefall-git-date': 'Fri Nov 10 10:29:38 2023', 'icefall-path': '/star-xy/softwares/icefall_development/icefall_multi_KD', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/anaconda3/envs/multi_KD/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-1207150844-f49d8c4f4-c49d5', 'IP address': '10.177.22.19'}, 'epoch': 50, 'iter': 0, 'avg': 8, 'use_averaged_model': False, 'exp_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun'), 'trained_with_distillation': True, 'freeze_encoder': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'use_transducer': True, 'use_ctc': False, 'speaker_input_idx': -1, 'whisper_dim': 768, 'num_codebooks': 32, 'mvq_kd_layer_idx': -1, 'use_subsampled_output': True, 'full_libri': True, 'mini_libri': False, 'use_vox2': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'enable_audioset': False, 'audioset_kd': False, 'use_musan_separately': False, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'whisper_mvq': False, 'beats_ckpt': 'data/models/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/inference_speaker_verification'), 'suffix': 'epoch-50-avg-8'}
|
3 |
+
2023-12-20 09:56:51,876 INFO [inference_speaker.py:255] About to create model
|
4 |
+
2023-12-20 09:56:52,595 INFO [inference_speaker.py:301] averaging ['multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-43.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-44.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-45.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-46.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-47.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-48.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-49.pt', 'multi_KD/exp_vox2_base_lr_0.045_use_beats_0_use_ecapa_1_use_whisper_0_scale_1.0_rerun/epoch-50.pt']
|
5 |
+
2023-12-20 09:57:06,481 INFO [inference_speaker.py:357] Number of model parameters: 65512262
|
6 |
+
2023-12-20 09:57:06,482 INFO [kd_datamodule.py:715] About to get the test set of voxceleb1 set.
|
7 |
+
2023-12-20 09:57:06,496 INFO [fetching.py:138] Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/hyperparams.yaml.
|
8 |
+
2023-12-20 09:57:06,496 INFO [fetching.py:159] Fetch custom.py: Delegating to Huggingface hub, source speechbrain/spkrec-ecapa-voxceleb.
|
9 |
+
2023-12-20 09:57:16,587 WARNING [_http.py:271] '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f02f81271f0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: cd06d389-471c-47b6-8e70-aeeb5e77fc19)')' thrown while requesting HEAD https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb/resolve/main/custom.py
|
10 |
+
2023-12-20 09:57:16,840 INFO [fetching.py:138] Fetch embedding_model.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/embedding_model.ckpt.
|
11 |
+
2023-12-20 09:57:16,841 INFO [fetching.py:138] Fetch mean_var_norm_emb.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/mean_var_norm_emb.ckpt.
|
12 |
+
2023-12-20 09:57:16,842 INFO [fetching.py:138] Fetch classifier.ckpt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/classifier.ckpt.
|
13 |
+
2023-12-20 09:57:16,843 INFO [fetching.py:138] Fetch label_encoder.txt: Using existing file/symlink in pretrained_models/EncoderClassifier-8f6f7fdaa9628acf73e21ad1f99d5f83/label_encoder.ckpt.
|
14 |
+
2023-12-20 09:57:16,843 INFO [parameter_transfer.py:299] Loading pretrained files for: embedding_model, mean_var_norm_emb, classifier, label_encoder
|
15 |
+
2023-12-20 09:57:17,033 INFO [kd_datamodule.py:119] Successfully load ecapa-tdnn model.
|
16 |
+
2023-12-20 09:57:17,034 INFO [teachers.py:17] The teacher model is on device: cuda:0
|
17 |
+
2023-12-20 09:57:20,828 INFO [inference_speaker.py:187] Processed 152 cuts already.
|
18 |
+
2023-12-20 09:57:27,410 INFO [zipformer.py:1873] name=None, attn_weights_entropy = tensor([6.1591, 5.2558, 5.7965, 5.2403], device='cuda:0')
|
19 |
+
2023-12-20 09:57:29,281 INFO [inference_speaker.py:187] Processed 2118 cuts already.
|
20 |
+
2023-12-20 09:57:36,850 INFO [inference_speaker.py:187] Processed 4392 cuts already.
|
21 |
+
2023-12-20 09:57:39,788 INFO [inference_speaker.py:188] Finish collecting speaker embeddings
|
22 |
+
2023-12-20 09:57:39,792 INFO [inference_speaker.py:195] -----------For testing set: VoxCeleb1-cleaned------------
|
23 |
+
2023-12-20 09:57:39,816 INFO [inference_speaker.py:199] A total of 37611 pairs.
|
24 |
+
2023-12-20 09:57:41,565 INFO [inference_speaker.py:222] Operating threshold for VoxCeleb1-cleaned: 0.2882, FAR: 0.0110, FRR: 0.0109, EER: 0.0109
|
25 |
+
2023-12-20 09:57:41,565 INFO [inference_speaker.py:223] Finished testing for VoxCeleb1-cleaned
|
26 |
+
2023-12-20 09:57:41,571 INFO [inference_speaker.py:389] Done!
|
log.tar.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35c05cd86679c4cda80faaddd99f6a9ea0b4d4b8fe42f1f834ec8e4cd59c9091
|
3 |
+
size 24584315
|