File size: 581 Bytes
98115b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import os, sys
import glob
from resemblyzer import preprocess_wav, VoiceEncoder
import numpy as np
import glob, tqdm
encoder = VoiceEncoder(device='cpu')
def extract_speaker_embedding():
wavs = glob.glob("DATA/wavs/*.wav")
os.makedirs("DATA/embedding", exist_ok=True)
for path in tqdm.tqdm(wavs):
wav = preprocess_wav(path)
embed = encoder.embed_utterance(wav)
# print(embed.shape) # (256,)
np.save(path.replace("wavs", "embedding").replace(".wav",".npy"), embed)
if __name__ == '__main__':
extract_speaker_embedding() |