config: conf/tuning/train_ska_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt.yaml print_config: false log_level: INFO drop_last_iter: true dry_run: false iterator_type: category valid_iterator_type: sequence output_dir: exp/spk_train_ska_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt_raw_sp ngpu: 1 seed: 0 num_workers: 8 num_att_plot: 0 dist_backend: nccl dist_init_method: env:// dist_world_size: 4 dist_rank: 0 local_rank: 0 dist_master_addr: localhost dist_master_port: 52613 dist_launcher: null multiprocessing_distributed: true unused_parameters: true sharded_ddp: false cudnn_enabled: true cudnn_benchmark: true cudnn_deterministic: false collect_stats: false write_collected_feats: false max_epoch: 20 patience: null val_scheduler_criterion: - valid - loss early_stopping_criterion: - valid - loss - min best_model_criterion: - - valid - eer - min keep_nbest_models: 2 nbest_averaging_interval: 0 grad_clip: 9999 grad_clip_type: 2.0 grad_noise: false accum_grad: 64 no_forward_run: false resume: true train_dtype: float32 use_amp: true log_interval: 100 use_matplotlib: true use_tensorboard: true create_graph_in_tensorboard: false use_wandb: false wandb_project: null wandb_id: null wandb_entity: null wandb_name: null wandb_model_log_interval: -1 detect_anomaly: false use_lora: false save_lora_only: true lora_conf: {} pretrain_path: null init_param: - exp/spk_train_ska_Vox12_emb192_torchmelspec_subcentertopk_wavlm_nodownsample_raw_sp/valid.eer.best.pth ignore_init_mismatch: false freeze_param: [] num_iters_per_epoch: 32000 batch_size: 16 valid_batch_size: 5 batch_bins: 1000000 valid_batch_bins: null train_shape_file: - exp/spk_stats_16k_sp/train/speech_shape valid_shape_file: - exp/spk_stats_16k_sp/valid/speech_shape batch_type: folded valid_batch_type: null fold_length: - 120000 sort_in_batch: descending shuffle_within_batch: false sort_batch: descending multiple_iterator: false chunk_length: 500 chunk_shift_ratio: 0.5 num_cache_chunks: 1024 chunk_excluded_key_prefixes: [] chunk_default_fs: null train_data_path_and_name_and_type: - - dump/raw/voxceleb12_devs_sp/wav.scp - speech - sound - - dump/raw/voxceleb12_devs_sp/utt2spk - spk_labels - text valid_data_path_and_name_and_type: - - dump/raw/voxceleb1_test/trial.scp - speech - sound - - dump/raw/voxceleb1_test/trial2.scp - speech2 - sound - - dump/raw/voxceleb1_test/trial_label - spk_labels - text allow_variable_data_keys: false max_cache_size: 0.0 max_cache_fd: 32 allow_multi_rates: false valid_max_cache_size: null exclude_weight_decay: false exclude_weight_decay_conf: {} optim: adam optim_conf: lr: 0.0001 weight_decay: 1.0e-05 amsgrad: false scheduler: cosineannealingwarmuprestarts scheduler_conf: first_cycle_steps: 10000 cycle_mult: 1.0 max_lr: 5.0e-05 min_lr: 5.0e-06 warmup_steps: 1000 gamma: 0.75 init: null use_preprocessor: true input_size: null target_duration: 3.0 spk2utt: dump/raw/voxceleb12_devs_sp/spk2utt spk_num: 21615 sample_rate: 16000 num_eval: 10 rir_scp: '' model_conf: extract_feats_in_collect_stats: false frontend: s3prl frontend_conf: frontend_conf: upstream: wavlm_large download_dir: ./hub multilayer_feature: true specaug: null specaug_conf: {} normalize: utterance_mvn normalize_conf: norm_vars: false encoder: ska_tdnn encoder_conf: model_scale: 8 ndim: 1024 ska_dim: 128 output_size: 1536 pooling: chn_attn_stat pooling_conf: {} projector: ska_tdnn projector_conf: output_size: 192 preprocessor: spk preprocessor_conf: target_duration: 6.0 sample_rate: 16000 num_eval: 3 noise_apply_prob: 0.0 noise_info: - - 1.0 - dump/raw/musan_speech.scp - - 4 - 7 - - 13 - 20 - - 1.0 - dump/raw/musan_noise.scp - - 1 - 1 - - 0 - 15 - - 1.0 - dump/raw/musan_music.scp - - 1 - 1 - - 5 - 15 rir_apply_prob: 0.0 rir_scp: dump/raw/rirs.scp loss: aamsoftmax_sc_topk loss_conf: margin: 0.5 scale: 30 K: 3 mp: 0.06 k_top: 5 required: - output_dir version: '202310' distributed: true