File size: 1,364 Bytes
998b155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# coding: utf-8
import sys
from os.path import dirname, join
tacotron_lib_dir = join(dirname(__file__), "..", "lib", "tacotron")
sys.path.append(tacotron_lib_dir)
from text import text_to_sequence, symbols
import torch
from torch.autograd import Variable
from tacotron_pytorch import Tacotron
import numpy as np


def _pad(seq, max_len):
    return np.pad(seq, (0, max_len - len(seq)),
                  mode='constant', constant_values=0)


def test_taco():
    B, T_out, D_out = 2, 400, 80
    r = 5
    T_encoder = T_out // r

    texts = ["Thank you very much.", "Hello"]
    seqs = [np.array(text_to_sequence(
        t, ["english_cleaners"]), dtype=np.int) for t in texts]
    input_lengths = np.array([len(s) for s in seqs])
    max_len = np.max(input_lengths)
    seqs = np.array([_pad(s, max_len) for s in seqs])

    x = torch.LongTensor(seqs)
    y = torch.rand(B, T_out, D_out)
    x = Variable(x)
    y = Variable(y)

    model = Tacotron(n_vocab=len(symbols), r=r)

    print("Encoder input shape: ", x.size())
    print("Decoder input shape: ", y.size())
    a, b, c = model(x, y, input_lengths=input_lengths)
    print("Mel shape:", a.size())
    print("Linear shape:", b.size())
    print("Attention shape:", c.size())

    assert c.size() == (B, T_encoder, max_len)

    # Test greddy decoding
    a, b, c = model(x, input_lengths=input_lengths)