Spaces:

metricspace
/

OcTra

Running

OcTra / speaker_encoder /hparams.py

adde revision

fc5ed00 about 1 year ago

910 Bytes

	## Mel-filterbank
	mel_window_length = 25 # In milliseconds
	mel_window_step = 10 # In milliseconds
	mel_n_channels = 40


	## Audio
	sampling_rate = 16000
	# Number of spectrogram frames in a partial utterance
	partials_n_frames = 160 # 1600 ms


	## Voice Activation Detection
	# Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
	# This sets the granularity of the VAD. Should not need to be changed.
	vad_window_length = 30 # In milliseconds
	# Number of frames to average together when performing the moving average smoothing.
	# The larger this value, the larger the VAD variations must be to not get smoothed out.
	vad_moving_average_width = 8
	# Maximum number of consecutive silent frames a segment can have.
	vad_max_silence_length = 6


	## Audio volume normalization
	audio_norm_target_dBFS = -30


	## Model parameters
	model_hidden_size = 256
	model_embedding_size = 256
	model_num_layers = 3