File size: 1,290 Bytes
b546670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import pickle
import timeit

import numpy as np
import onnxruntime as ort

from nix.tokenizers.tokenizer_en import NixTokenizerEN

class NixTTSInference:

    def __init__(
        self,
        model_dir,
    ):
        # Load tokenizer
        self.tokenizer = NixTokenizerEN(pickle.load(open(os.path.join(model_dir, "tokenizer_state.pkl"), "rb")))
        # Load TTS model
        self.encoder = ort.InferenceSession(os.path.join(model_dir, "encoder.onnx"))
        self.decoder = ort.InferenceSession(os.path.join(model_dir, "decoder.onnx"))

    def tokenize(
        self,
        text,
    ):
        # Tokenize input text
        c, c_lengths, phonemes = self.tokenizer([text])

        return np.array(c, dtype = np.int64), np.array(c_lengths, dtype = np.int64), phonemes

    def vocalize(
        self,
        c,
        c_lengths,
    ):
        """
        Single-batch TTS inference
        """
        # Infer latent samples from encoder
        z = self.encoder.run(
            None,
            {
                "c": c,
                "c_lengths": c_lengths,
            }
        )[2]
        # Decode raw audio with decoder
        xw = self.decoder.run(
            None,
            {
                "z": z,
            }
        )[0]

        return xw