from typing import Tuple import subprocess from torch import no_grad, package import numpy as np import os class PreTrainedPipeline(): def __init__(self, path: str): # Install espeak-ng subprocess.run("apt-get update -y && apt-get install espeak-ng -y", shell=True, universal_newlines=True, start_new_session=True) # Init model model_path = os.path.join(path, "model.pt") importer = package.PackageImporter(model_path) synt = importer.load_pickle("tts_models", "model") self.synt = synt self.tts_kwargs = { "speaker_name": "uk", "language_name": "uk", } self.sampling_rate = self.synt.output_sample_rate def __call__(self, inputs: str) -> Tuple[np.array, int]: """ Args: inputs (:obj:`str`): The text to generate audio from Return: A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int. """ with no_grad(): waveforms = self.synt.tts(inputs, **self.tts_kwargs) waveforms = np.array(waveforms, dtype=np.float32) return waveforms, self.sampling_rate