from TTS.tts.models import setup_model as setup_tts_model from TTS.config import load_config import soundfile as sf import torch import os class TTS_object(object): def __init__(self, model_path, device): config = load_config(os.path.join(model_path, "config.json")) self.model = setup_tts_model(config) self.model.load_checkpoint(config, checkpoint_dir=model_path, eval=True, use_deepspeed=False, strict=False) self.model.to("cuda") def inference(self, text, reference_file_path): gpt_cond_latent, speaker_embedding = self.model.get_conditioning_latents(audio_path=[reference_file_path]) return self.model.inference( text, "ar", gpt_cond_latent, speaker_embedding, temperature=0.01 )['wav'] def save_inference(self, text, speaker, filepath): pass