File size: 2,482 Bytes
f79a1ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# STFT arguments
sample_rate: 16000
n_fft: 512
win_length: 32
hop_length: 16
# Enhancement model args
emb_channels: 1024
emb_kernel_size: 3
emb_padding: same
enhancer_size: 512
enhancer_layers: 8
enhancer_heads: 8
enhancer_causal: False
enhancer_drop_rate: 0.1
compute_stft: !new:speechbrain.processing.features.STFT
sample_rate: !ref <sample_rate>
n_fft: !ref <n_fft>
win_length: !ref <win_length>
hop_length: !ref <hop_length>
compute_istft: !new:speechbrain.processing.features.ISTFT
sample_rate: !ref <sample_rate>
n_fft: !ref <n_fft>
win_length: !ref <win_length>
hop_length: !ref <hop_length>
spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
power: 0.5
resynth: !name:speechbrain.processing.signal_processing.resynthesize
stft: !ref <compute_stft>
istft: !ref <compute_istft>
enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
output_size: !ref <n_fft> // 2 + 1
d_model: !ref <n_fft> // 2
output_activation: !name:torch.nn.ReLU
activation: !name:torch.nn.LeakyReLU
dropout: !ref <enhancer_drop_rate>
num_layers: !ref <enhancer_layers>
d_ffn: !ref <enhancer_size>
nhead: !ref <enhancer_heads>
causal: !ref <enhancer_causal>
custom_emb_module: !new:speechbrain.nnet.containers.Sequential
input_shape: [null, null, !ref <n_fft> // 2 + 1]
conv1: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels>
kernel_size: 3
norm1: !name:speechbrain.nnet.normalization.LayerNorm
act1: !new:torch.nn.LeakyReLU
conv2: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels> // 2
kernel_size: 3
norm2: !name:speechbrain.nnet.normalization.LayerNorm
act2: !new:torch.nn.LeakyReLU
conv3: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels> // 4
kernel_size: 3
norm3: !name:speechbrain.nnet.normalization.LayerNorm
act3: !new:torch.nn.LeakyReLU
conv4: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels> // 4
kernel_size: 3
norm4: !name:speechbrain.nnet.normalization.LayerNorm
act4: !new:torch.nn.LeakyReLU
modules:
enhance_model: !ref <enhance_model>
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
enhance_model: !ref <enhance_model>
|