Spaces:

alibabasglab
/

SpeechScore

Running

SpeechScore / scores /srmr /vad.py

Upload 73 files

936f6fa verified 9 months ago

1.52 kB

	import numpy as np
	from srmrpy.segmentaxis import segment_axis

	def simple_energy_vad(x, fs, framelen=0.02, theta_main=30, theta_min=-55):
	'''Simple energy voice activity detection algorithm based on energy
	thresholds as described in Tomi Kinnunen and Padmanabhan Rajan, "A
	practical, self-adaptive voice activity detector for speaker verification
	with noisy telephone and microphone data", ICASSP 2013, Vancouver (NOTE:
	this is the benchmark method, not the method proposed by the authors).
	'''
	# Split signal in frames
	framelen = int(framelen * fs)
	frames = segment_axis(x, length=framelen, overlap=0, end='pad')
	frames_zero_mean = frames - frames.mean(axis=0)
	frame_energy = 10np.log10(1/(framelen-1) (frames_zero_mean**2).sum(axis=1) + 1e-6)
	max_energy = max(frame_energy)
	speech_presence = (frame_energy > max_energy - theta_main) & (frame_energy > theta_min)
	x_vad = np.zeros_like(x, dtype=bool)
	for idx, frame in enumerate(frames):
	if speech_presence[idx]:
	x_vad[idxframelen:(idx+1)framelen] = True
	else:
	x_vad[idxframelen:(idx+1)framelen] = False
	return x[x_vad], x_vad

	if __name__ == '__main__':
	import sys
	from scipy.io.wavfile import read as readwav
	from matplotlib import pyplot as plt

	fs, s = readwav(sys.argv[1])
	s = s.astype('float')/np.iinfo(s.dtype).max
	s_vad, speech_presence = simple_energy_vad(s, fs)

	plt.plot(s)
	plt.plot(s_vad - 1, 'g')
	plt.show()