speechbrain
/

mtl-mimic-voicebank

speech-enhancement

Model card Files Files and versions Community

mtl-mimic-voicebank / hyperparams.yaml

pplantinga's picture

Revert "Update model to latest version"

2a2af5c almost 3 years ago

raw history blame

No virus

2.48 kB

	# STFT arguments
	sample_rate: 16000
	n_fft: 512
	win_length: 32
	hop_length: 16

	# Enhancement model args
	emb_channels: 1024
	emb_kernel_size: 3
	emb_padding: same
	enhancer_size: 512
	enhancer_layers: 8
	enhancer_heads: 8
	enhancer_causal: False
	enhancer_drop_rate: 0.1

	compute_stft: !new:speechbrain.processing.features.STFT
	sample_rate: !ref <sample_rate>
	n_fft: !ref <n_fft>
	win_length: !ref <win_length>
	hop_length: !ref <hop_length>

	compute_istft: !new:speechbrain.processing.features.ISTFT
	sample_rate: !ref <sample_rate>
	n_fft: !ref <n_fft>
	win_length: !ref <win_length>
	hop_length: !ref <hop_length>

	spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
	power: 0.5

	resynth: !name:speechbrain.processing.signal_processing.resynthesize
	stft: !ref <compute_stft>
	istft: !ref <compute_istft>

	enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
	output_size: !ref <n_fft> // 2 + 1
	d_model: !ref <n_fft> // 2
	output_activation: !name:torch.nn.ReLU
	activation: !name:torch.nn.LeakyReLU
	dropout: !ref <enhancer_drop_rate>
	num_layers: !ref <enhancer_layers>
	d_ffn: !ref <enhancer_size>
	nhead: !ref <enhancer_heads>
	causal: !ref <enhancer_causal>
	custom_emb_module: !new:speechbrain.nnet.containers.Sequential
	input_shape: [null, null, !ref <n_fft> // 2 + 1]
	conv1: !name:speechbrain.nnet.CNN.Conv1d
	out_channels: !ref <emb_channels>
	kernel_size: 3
	norm1: !name:speechbrain.nnet.normalization.LayerNorm
	act1: !new:torch.nn.LeakyReLU
	conv2: !name:speechbrain.nnet.CNN.Conv1d
	out_channels: !ref <emb_channels> // 2
	kernel_size: 3
	norm2: !name:speechbrain.nnet.normalization.LayerNorm
	act2: !new:torch.nn.LeakyReLU
	conv3: !name:speechbrain.nnet.CNN.Conv1d
	out_channels: !ref <emb_channels> // 4
	kernel_size: 3
	norm3: !name:speechbrain.nnet.normalization.LayerNorm
	act3: !new:torch.nn.LeakyReLU
	conv4: !name:speechbrain.nnet.CNN.Conv1d
	out_channels: !ref <emb_channels> // 4
	kernel_size: 3
	norm4: !name:speechbrain.nnet.normalization.LayerNorm
	act4: !new:torch.nn.LeakyReLU

	modules:
	enhance_model: !ref <enhance_model>

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	enhance_model: !ref <enhance_model>