speechbrain
/

vad-crdnn-libriparty

Voice Activity Detection

Speech Activity Detection

Speaker Diarization

Model card Files Files and versions Community

vad-crdnn-libriparty / hyperparams.yaml

Adel-Moumen's picture

Update hyperparams.yaml

6492318 9 months ago

raw history blame contribute delete

No virus

2.29 kB

	# ############################################################################
	# Model: Small CRDNN model for Voice Activity Detection
	# Author: Mirco Ravanelli, 2021
	# ############################################################################

	# Feature parameters
	sample_rate: 16000
	time_resolution: 0.01 # in seconds (e.g,, 0.01 = 10 ms)
	n_fft: 400
	n_mels: 40

	# Model parameters
	activation: !name:torch.nn.LeakyReLU
	dropout: 0.15
	cnn_blocks: 2
	cnn_channels: (16, 32)
	cnn_kernelsize: (3, 3)
	rnn_layers: 2
	rnn_neurons: 32
	rnn_bidirectional: True
	dnn_blocks: 1
	dnn_neurons: 16
	output_neurons: 1
	device: 'cpu' # or 'cuda'

	# Feature/Model objects
	compute_features: !new:speechbrain.lobes.features.Fbank
	sample_rate: !ref <sample_rate>
	n_fft: !ref <n_fft>
	n_mels: !ref <n_mels>
	hop_length: !ref <time_resolution> * 1000 # in ms

	mean_var_norm: !new:speechbrain.processing.features.InputNormalization
	norm_type: sentence

	cnn: !new:speechbrain.nnet.containers.Sequential
	input_shape: [null, null, !ref <n_mels>]
	norm1: !name:speechbrain.nnet.normalization.LayerNorm
	cnn1: !name:speechbrain.lobes.models.CRDNN.CNN_Block
	channels: 16
	kernel_size: (3, 3)
	cnn2: !name:speechbrain.lobes.models.CRDNN.CNN_Block
	channels: 32
	kernel_size: (3, 3)

	rnn: !new:speechbrain.nnet.RNN.GRU
	input_shape: [null, null, 320]
	hidden_size: !ref <rnn_neurons>
	num_layers: !ref <rnn_layers>
	bidirectional: True

	dnn: !new:speechbrain.nnet.containers.Sequential
	input_shape: [null, null, !ref <rnn_neurons> * 2]
	dnn1: !name:speechbrain.lobes.models.CRDNN.DNN_Block
	neurons: !ref <dnn_neurons>
	dnn2: !name:speechbrain.lobes.models.CRDNN.DNN_Block
	neurons: !ref <dnn_neurons>
	lin: !name:speechbrain.nnet.linear.Linear
	n_neurons: !ref <output_neurons>
	bias: False


	model: !new:torch.nn.ModuleList
	- [!ref <cnn>, !ref <rnn>, !ref <dnn>]

	modules:
	compute_features: !ref <compute_features>
	model: !ref <model>
	cnn: !ref <cnn>
	rnn: !ref <rnn>
	dnn: !ref <dnn>
	mean_var_norm: !ref <mean_var_norm>

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	model: !ref <model>
	mean_var_norm: !ref <mean_var_norm>