# STFT arguments sample_rate: 16000 n_fft: 512 win_length: 32 hop_length: 16 # Enhancement model args emb_channels: 1024 emb_kernel_size: 3 emb_padding: same enhancer_size: 512 enhancer_layers: 8 enhancer_heads: 8 enhancer_causal: False enhancer_drop_rate: 0.1 compute_stft: !new:speechbrain.processing.features.STFT sample_rate: !ref n_fft: !ref win_length: !ref hop_length: !ref compute_istft: !new:speechbrain.processing.features.ISTFT sample_rate: !ref n_fft: !ref win_length: !ref hop_length: !ref spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude power: 0.5 resynth: !name:speechbrain.processing.signal_processing.resynthesize stft: !ref istft: !ref enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE output_size: !ref // 2 + 1 d_model: !ref // 2 output_activation: !name:torch.nn.ReLU activation: !name:torch.nn.LeakyReLU dropout: !ref num_layers: !ref d_ffn: !ref nhead: !ref causal: !ref custom_emb_module: !new:speechbrain.nnet.containers.Sequential input_shape: [null, null, !ref // 2 + 1] conv1: !name:speechbrain.nnet.CNN.Conv1d out_channels: !ref kernel_size: 3 norm1: !name:speechbrain.nnet.normalization.LayerNorm act1: !new:torch.nn.LeakyReLU conv2: !name:speechbrain.nnet.CNN.Conv1d out_channels: !ref // 2 kernel_size: 3 norm2: !name:speechbrain.nnet.normalization.LayerNorm act2: !new:torch.nn.LeakyReLU conv3: !name:speechbrain.nnet.CNN.Conv1d out_channels: !ref // 4 kernel_size: 3 norm3: !name:speechbrain.nnet.normalization.LayerNorm act3: !new:torch.nn.LeakyReLU conv4: !name:speechbrain.nnet.CNN.Conv1d out_channels: !ref // 4 kernel_size: 3 norm4: !name:speechbrain.nnet.normalization.LayerNorm act4: !new:torch.nn.LeakyReLU modules: enhance_model: !ref pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: enhance_model: !ref