import torch import os from speechbrain.pretrained import EncoderClassifier spk_model_name = "speechbrain/spkrec-xvect-voxceleb" device = "cuda" if torch.cuda.is_available() else "cpu" speaker_model = EncoderClassifier.from_hparams( source=spk_model_name, run_opts={"device": device}, savedir=os.path.join("/tmp", spk_model_name), ) def create_speaker_embedding(waveform): with torch.no_grad(): speaker_embeddings = speaker_model.encode_batch(waveform) speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=-1) return speaker_embeddings