Spaces:
Running
Running
| import numpy as np | |
| from srmrpy.segmentaxis import segment_axis | |
| def simple_energy_vad(x, fs, framelen=0.02, theta_main=30, theta_min=-55): | |
| '''Simple energy voice activity detection algorithm based on energy | |
| thresholds as described in Tomi Kinnunen and Padmanabhan Rajan, "A | |
| practical, self-adaptive voice activity detector for speaker verification | |
| with noisy telephone and microphone data", ICASSP 2013, Vancouver (NOTE: | |
| this is the benchmark method, not the method proposed by the authors). | |
| ''' | |
| # Split signal in frames | |
| framelen = int(framelen * fs) | |
| frames = segment_axis(x, length=framelen, overlap=0, end='pad') | |
| frames_zero_mean = frames - frames.mean(axis=0) | |
| frame_energy = 10*np.log10(1/(framelen-1) * (frames_zero_mean**2).sum(axis=1) + 1e-6) | |
| max_energy = max(frame_energy) | |
| speech_presence = (frame_energy > max_energy - theta_main) & (frame_energy > theta_min) | |
| x_vad = np.zeros_like(x, dtype=bool) | |
| for idx, frame in enumerate(frames): | |
| if speech_presence[idx]: | |
| x_vad[idx*framelen:(idx+1)*framelen] = True | |
| else: | |
| x_vad[idx*framelen:(idx+1)*framelen] = False | |
| return x[x_vad], x_vad | |
| if __name__ == '__main__': | |
| import sys | |
| from scipy.io.wavfile import read as readwav | |
| from matplotlib import pyplot as plt | |
| fs, s = readwav(sys.argv[1]) | |
| s = s.astype('float')/np.iinfo(s.dtype).max | |
| s_vad, speech_presence = simple_energy_vad(s, fs) | |
| plt.plot(s) | |
| plt.plot(s_vad - 1, 'g') | |
| plt.show() | |