|
|
|
sample_rate: 16000 |
|
n_fft: 512 |
|
win_length: 32 |
|
hop_length: 16 |
|
|
|
|
|
emb_channels: 1024 |
|
emb_kernel_size: 3 |
|
emb_padding: same |
|
enhancer_size: 512 |
|
enhancer_layers: 8 |
|
enhancer_heads: 8 |
|
enhancer_causal: False |
|
enhancer_drop_rate: 0.1 |
|
|
|
compute_stft: !new:speechbrain.processing.features.STFT |
|
sample_rate: !ref <sample_rate> |
|
n_fft: !ref <n_fft> |
|
win_length: !ref <win_length> |
|
hop_length: !ref <hop_length> |
|
|
|
compute_istft: !new:speechbrain.processing.features.ISTFT |
|
sample_rate: !ref <sample_rate> |
|
n_fft: !ref <n_fft> |
|
win_length: !ref <win_length> |
|
hop_length: !ref <hop_length> |
|
|
|
spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude |
|
power: 0.5 |
|
|
|
resynth: !name:speechbrain.processing.signal_processing.resynthesize |
|
stft: !ref <compute_stft> |
|
istft: !ref <compute_istft> |
|
|
|
enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE |
|
output_size: !ref <n_fft> // 2 + 1 |
|
d_model: !ref <n_fft> // 2 |
|
output_activation: !name:torch.nn.ReLU |
|
activation: !name:torch.nn.LeakyReLU |
|
dropout: !ref <enhancer_drop_rate> |
|
num_layers: !ref <enhancer_layers> |
|
d_ffn: !ref <enhancer_size> |
|
nhead: !ref <enhancer_heads> |
|
causal: !ref <enhancer_causal> |
|
custom_emb_module: !new:speechbrain.nnet.containers.Sequential |
|
input_shape: [null, null, !ref <n_fft> // 2 + 1] |
|
conv1: !name:speechbrain.nnet.CNN.Conv1d |
|
out_channels: !ref <emb_channels> |
|
kernel_size: 3 |
|
norm1: !name:speechbrain.nnet.normalization.LayerNorm |
|
act1: !new:torch.nn.LeakyReLU |
|
conv2: !name:speechbrain.nnet.CNN.Conv1d |
|
out_channels: !ref <emb_channels> // 2 |
|
kernel_size: 3 |
|
norm2: !name:speechbrain.nnet.normalization.LayerNorm |
|
act2: !new:torch.nn.LeakyReLU |
|
conv3: !name:speechbrain.nnet.CNN.Conv1d |
|
out_channels: !ref <emb_channels> // 4 |
|
kernel_size: 3 |
|
norm3: !name:speechbrain.nnet.normalization.LayerNorm |
|
act3: !new:torch.nn.LeakyReLU |
|
conv4: !name:speechbrain.nnet.CNN.Conv1d |
|
out_channels: !ref <emb_channels> // 4 |
|
kernel_size: 3 |
|
norm4: !name:speechbrain.nnet.normalization.LayerNorm |
|
act4: !new:torch.nn.LeakyReLU |
|
|
|
modules: |
|
enhance_model: !ref <enhance_model> |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
enhance_model: !ref <enhance_model> |
|
|