|
from typing import Tuple |
|
import subprocess |
|
|
|
from torch import no_grad, package |
|
import numpy as np |
|
import os |
|
|
|
|
|
|
|
|
|
|
|
class PreTrainedPipeline(): |
|
def __init__(self, path: str): |
|
|
|
subprocess.run("apt-get update -y && apt-get install espeak-ng -y", shell=True, |
|
universal_newlines=True, start_new_session=True) |
|
|
|
|
|
model_path = os.path.join(path, "model.pt") |
|
importer = package.PackageImporter(model_path) |
|
synt = importer.load_pickle("tts_models", "model") |
|
self.synt = synt |
|
|
|
self.tts_kwargs = { |
|
"speaker_name": "uk", |
|
"language_name": "uk", |
|
} |
|
|
|
self.sampling_rate = self.synt.output_sample_rate |
|
|
|
def __call__(self, inputs: str) -> Tuple[np.array, int]: |
|
""" |
|
Args: |
|
inputs (:obj:`str`): |
|
The text to generate audio from |
|
Return: |
|
A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int. |
|
""" |
|
with no_grad(): |
|
waveforms = self.synt.tts(inputs, **self.tts_kwargs) |
|
waveforms = np.array(waveforms, dtype=np.float32) |
|
return waveforms, self.sampling_rate |