config: conf/train_quadms_ska.yaml print_config: false log_level: INFO drop_last_iter: true dry_run: false iterator_type: category valid_iterator_type: sequence output_dir: exp/spk_train_quadms_ska_raw ngpu: 1 seed: 0 num_workers: 4 num_att_plot: 0 dist_backend: nccl dist_init_method: env:// dist_world_size: null dist_rank: null local_rank: 0 dist_master_addr: null dist_master_port: null dist_launcher: null multiprocessing_distributed: false unused_parameters: false sharded_ddp: false cudnn_enabled: true cudnn_benchmark: true cudnn_deterministic: false collect_stats: false write_collected_feats: false max_epoch: 40 patience: null val_scheduler_criterion: - valid - loss early_stopping_criterion: - valid - loss - min best_model_criterion: - - valid - eer - min keep_nbest_models: 3 nbest_averaging_interval: 0 grad_clip: 9999 grad_clip_type: 2.0 grad_noise: false accum_grad: 1 no_forward_run: false resume: true train_dtype: float32 use_amp: true log_interval: 100 use_matplotlib: true use_tensorboard: true create_graph_in_tensorboard: false use_wandb: false wandb_project: null wandb_id: null wandb_entity: null wandb_name: null wandb_model_log_interval: -1 detect_anomaly: false use_adapter: false adapter: lora save_strategy: all adapter_conf: {} pretrain_path: null init_param: [] ignore_init_mismatch: false freeze_param: [] num_iters_per_epoch: 2239 batch_size: 64 valid_batch_size: 32 batch_bins: 1000000 valid_batch_bins: null train_shape_file: - exp/spk_stats_16k/train/speech_shape valid_shape_file: - exp/spk_stats_16k/valid/speech_shape batch_type: folded valid_batch_type: null fold_length: - 120000 sort_in_batch: descending shuffle_within_batch: false sort_batch: descending multiple_iterator: false chunk_length: 500 chunk_shift_ratio: 0.5 num_cache_chunks: 1024 chunk_excluded_key_prefixes: [] chunk_default_fs: null train_data_path_and_name_and_type: - - dump/raw/dev_vox1/wav.scp - speech - sound - - dump/raw/dev_vox1/utt2spk - spk_labels - text valid_data_path_and_name_and_type: - - dump/raw/test_vox1/trial.scp - speech - sound - - dump/raw/test_vox1/trial2.scp - speech2 - sound - - dump/raw/test_vox1/trial_label - spk_labels - text allow_variable_data_keys: false max_cache_size: 0.0 max_cache_fd: 32 allow_multi_rates: false valid_max_cache_size: null exclude_weight_decay: false exclude_weight_decay_conf: {} optim: adam optim_conf: lr: 0.001 weight_decay: 5.0e-05 amsgrad: false scheduler: cosineannealingwarmuprestarts scheduler_conf: first_cycle_steps: 11195 cycle_mult: 1.0 max_lr: 0.001 min_lr: 5.0e-06 warmup_steps: 1000 gamma: 0.75 init: null use_preprocessor: true input_size: null target_duration: 3.0 spk2utt: dump/raw/dev_vox1/spk2utt spk_num: 1211 sample_rate: 16000 num_eval: 10 rir_scp: '' model_conf: extract_feats_in_collect_stats: false frontend: melspec_torch frontend_conf: preemp: true n_fft: 512 log: true win_length: 400 hop_length: 160 n_mels: 80 normalize: mn specaug: null specaug_conf: {} normalize: null normalize_conf: {} encoder: quadms_ska_tdnn encoder_conf: model_scale: 8 ndim: 512 ska_dim: 128 output_size: 1536 pooling: chn_attn_stat pooling_conf: {} projector: ska_tdnn projector_conf: output_size: 192 preprocessor: spk preprocessor_conf: target_duration: 3.0 sample_rate: 16000 num_eval: 5 noise_apply_prob: 0.5 noise_info: - - 1.0 - dump/raw/musan_speech.scp - - 4 - 7 - - 13 - 20 - - 1.0 - dump/raw/musan_noise.scp - - 1 - 1 - - 0 - 15 - - 1.0 - dump/raw/musan_music.scp - - 1 - 1 - - 5 - 15 rir_apply_prob: 0.5 rir_scp: dump/raw/rirs.scp loss: aamsoftmax_sc_topk loss_conf: margin: 0.3 scale: 30 K: 3 mp: 0.06 k_top: 5 required: - output_dir version: '202402' distributed: false