File size: 1,388 Bytes
45ee559
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import random

import numpy as np
import torch

from TTS.vocoder.configs import WavernnConfig
from TTS.vocoder.models.wavernn import Wavernn, WavernnArgs


def test_wavernn():
    config = WavernnConfig()
    config.model_args = WavernnArgs(
        rnn_dims=512,
        fc_dims=512,
        mode="mold",
        mulaw=False,
        pad=2,
        use_aux_net=True,
        use_upsample_net=True,
        upsample_factors=[4, 8, 8],
        feat_dims=80,
        compute_dims=128,
        res_out_dims=128,
        num_res_blocks=10,
    )
    config.audio.hop_length = 256
    config.audio.sample_rate = 2048

    dummy_x = torch.rand((2, 1280))
    dummy_m = torch.rand((2, 80, 9))
    y_size = random.randrange(20, 60)
    dummy_y = torch.rand((80, y_size))

    # mode: mold
    model = Wavernn(config)
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 30)), output.shape

    # mode: gauss
    config.model_args.mode = "gauss"
    model = Wavernn(config)
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 2)), output.shape

    # mode: quantized
    config.model_args.mode = 4
    model = Wavernn(config)
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 2**4)), output.shape
    output = model.inference(dummy_y, True, 5500, 550)
    assert np.all(output.shape == (256 * (y_size - 1),))