Vietnamese_VITS / Step2_extract_feature.py
fhieni's picture
Upload 7 files
98115b8
raw
history blame contribute delete
No virus
581 Bytes
import os, sys
import glob
from resemblyzer import preprocess_wav, VoiceEncoder
import numpy as np
import glob, tqdm
encoder = VoiceEncoder(device='cpu')
def extract_speaker_embedding():
wavs = glob.glob("DATA/wavs/*.wav")
os.makedirs("DATA/embedding", exist_ok=True)
for path in tqdm.tqdm(wavs):
wav = preprocess_wav(path)
embed = encoder.embed_utterance(wav)
# print(embed.shape) # (256,)
np.save(path.replace("wavs", "embedding").replace(".wav",".npy"), embed)
if __name__ == '__main__':
extract_speaker_embedding()