config: conf/train_rawnet3.yaml print_config: false log_level: INFO drop_last_iter: true dry_run: false iterator_type: category valid_iterator_type: sequence output_dir: exp/spk_train_rawnet3_raw_sp ngpu: 1 seed: 0 num_workers: 2 num_att_plot: 0 dist_backend: nccl dist_init_method: env:// dist_world_size: 4 dist_rank: 0 local_rank: 0 dist_master_addr: localhost dist_master_port: 37073 dist_launcher: null multiprocessing_distributed: true unused_parameters: false sharded_ddp: false cudnn_enabled: true cudnn_benchmark: true cudnn_deterministic: false collect_stats: false write_collected_feats: false max_epoch: 40 patience: null val_scheduler_criterion: - valid - loss early_stopping_criterion: - valid - loss - min best_model_criterion: - - valid - eer - min keep_nbest_models: 3 nbest_averaging_interval: 0 grad_clip: 9999 grad_clip_type: 2.0 grad_noise: false accum_grad: 1 no_forward_run: false resume: true train_dtype: float32 use_amp: false log_interval: 100 use_matplotlib: true use_tensorboard: true create_graph_in_tensorboard: false use_wandb: false wandb_project: null wandb_id: null wandb_entity: null wandb_name: null wandb_model_log_interval: -1 detect_anomaly: false use_lora: false save_lora_only: true lora_conf: {} pretrain_path: null init_param: [] ignore_init_mismatch: false freeze_param: [] num_iters_per_epoch: null batch_size: 128 valid_batch_size: 40 batch_bins: 1000000 valid_batch_bins: null train_shape_file: - exp/spk_stats_16k_sp/train/speech_shape valid_shape_file: - exp/spk_stats_16k_sp/valid/speech_shape batch_type: folded valid_batch_type: null fold_length: - 120000 sort_in_batch: descending shuffle_within_batch: false sort_batch: descending multiple_iterator: false chunk_length: 500 chunk_shift_ratio: 0.5 num_cache_chunks: 1024 chunk_excluded_key_prefixes: [] chunk_default_fs: null train_data_path_and_name_and_type: - - dump/raw/combined_train_set_sp/wav.scp - speech - sound - - dump/raw/combined_train_set_sp/utt2spk - spk_labels - text valid_data_path_and_name_and_type: - - dump/raw/voxceleb1_test/trial.scp - speech - sound - - dump/raw/voxceleb1_test/trial2.scp - speech2 - sound - - dump/raw/voxceleb1_test/trial_label - spk_labels - text allow_variable_data_keys: false max_cache_size: 0.0 max_cache_fd: 32 allow_multi_rates: false valid_max_cache_size: null exclude_weight_decay: false exclude_weight_decay_conf: {} optim: adam optim_conf: lr: 0.001 weight_decay: 5.0e-05 amsgrad: false scheduler: cosineannealingwarmuprestarts scheduler_conf: first_cycle_steps: 158760 cycle_mult: 1.0 max_lr: 0.001 min_lr: 5.0e-06 warmup_steps: 1000 gamma: 0.75 init: null use_preprocessor: true input_size: null target_duration: 3.0 spk2utt: dump/raw/combined_train_set_sp/spk2utt spk_num: 37485 sample_rate: 16000 num_eval: 10 rir_scp: '' model_conf: extract_feats_in_collect_stats: false frontend: asteroid_frontend frontend_conf: sinc_stride: 16 sinc_kernel_size: 251 sinc_filters: 256 preemph_coef: 0.97 log_term: 1.0e-06 specaug: null specaug_conf: {} normalize: null normalize_conf: {} encoder: rawnet3 encoder_conf: model_scale: 8 ndim: 1024 output_size: 1536 pooling: chn_attn_stat pooling_conf: {} projector: rawnet3 projector_conf: output_size: 192 preprocessor: spk preprocessor_conf: target_duration: 3.0 sample_rate: 16000 num_eval: 5 noise_apply_prob: 0.5 noise_info: - - 1.0 - dump/raw/musan_speech.scp - - 4 - 7 - - 13 - 20 - - 1.0 - dump/raw/musan_noise.scp - - 1 - 1 - - 0 - 15 - - 1.0 - dump/raw/musan_music.scp - - 1 - 1 - - 5 - 15 rir_apply_prob: 0.5 rir_scp: dump/raw/rirs.scp loss: aamsoftmax_sc_topk loss_conf: margin: 0.3 scale: 30 K: 3 mp: 0.06 k_top: 5 required: - output_dir version: '202310' distributed: true