import torch import librosa import soundfile as sf from dreamvoice import DreamVoice_Plugin from dreamvoice.freevc_wrapper import get_freevc_models, convert device = 'cuda' freevc, cmodel, hps = get_freevc_models('ckpts_freevc/', 'dreamvoice/', device) # init dreamvoice dreamvoice = DreamVoice_Plugin(config='plugin_freevc.yaml', device=device) # generate speaker prompt = "old female's voice, deep and dark" target_se = dreamvoice.gen_spk(prompt) # content source source_path = 'examples/test1.wav' audio_clip = librosa.load(source_path, sr=16000)[0] audio_clip = torch.tensor(audio_clip).unsqueeze(0).to(device) content = cmodel(audio_clip).last_hidden_state.transpose(1, 2).to(device) output, out_sr = convert(freevc, content, target_se) sf.write('output.wav', output, out_sr)