File size: 4,360 Bytes
46a75d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import math
import os
import unittest
from dataclasses import dataclass

import librosa
import numpy as np
from coqpit import Coqpit

from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.utils.audio import numpy_transforms as np_transforms

TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")

os.makedirs(OUT_PATH, exist_ok=True)


# pylint: disable=no-self-use


class TestNumpyTransforms(unittest.TestCase):
    def setUp(self) -> None:
        @dataclass
        class AudioConfig(Coqpit):
            sample_rate: int = 22050
            fft_size: int = 1024
            num_mels: int = 256
            mel_fmax: int = 1800
            mel_fmin: int = 0
            hop_length: int = 256
            win_length: int = 1024
            pitch_fmax: int = 640
            pitch_fmin: int = 1
            trim_db: int = -1
            min_silence_sec: float = 0.01
            gain: float = 1.0
            base: float = 10.0

        self.config = AudioConfig()
        self.sample_wav, _ = librosa.load(WAV_FILE, sr=self.config.sample_rate)

    def test_build_mel_basis(self):
        """Check if the mel basis is correctly built"""
        print(" > Testing mel basis building.")
        mel_basis = np_transforms.build_mel_basis(**self.config)
        self.assertEqual(mel_basis.shape, (self.config.num_mels, self.config.fft_size // 2 + 1))

    def test_millisec_to_length(self):
        """Check if the conversion from milliseconds to length is correct"""
        print(" > Testing millisec to length conversion.")
        win_len, hop_len = np_transforms.millisec_to_length(
            frame_length_ms=1000, frame_shift_ms=12.5, sample_rate=self.config.sample_rate
        )
        self.assertEqual(hop_len, int(12.5 / 1000.0 * self.config.sample_rate))
        self.assertEqual(win_len, self.config.sample_rate)

    def test_amplitude_db_conversion(self):
        di = np.random.rand(11)
        o1 = np_transforms.amp_to_db(x=di, gain=1.0, base=10)
        o2 = np_transforms.db_to_amp(x=o1, gain=1.0, base=10)
        np.testing.assert_almost_equal(di, o2, decimal=5)

    def test_preemphasis_deemphasis(self):
        di = np.random.rand(11)
        o1 = np_transforms.preemphasis(x=di, coeff=0.95)
        o2 = np_transforms.deemphasis(x=o1, coeff=0.95)
        np.testing.assert_almost_equal(di, o2, decimal=5)

    def test_spec_to_mel(self):
        mel_basis = np_transforms.build_mel_basis(**self.config)
        spec = np.random.rand(self.config.fft_size // 2 + 1, 20)  # [C, T]
        mel = np_transforms.spec_to_mel(spec=spec, mel_basis=mel_basis)
        self.assertEqual(mel.shape, (self.config.num_mels, 20))

    def mel_to_spec(self):
        mel_basis = np_transforms.build_mel_basis(**self.config)
        mel = np.random.rand(self.config.num_mels, 20)  # [C, T]
        spec = np_transforms.mel_to_spec(mel=mel, mel_basis=mel_basis)
        self.assertEqual(spec.shape, (self.config.fft_size // 2 + 1, 20))

    def test_wav_to_spec(self):
        spec = np_transforms.wav_to_spec(wav=self.sample_wav, **self.config)
        self.assertEqual(
            spec.shape, (self.config.fft_size // 2 + 1, math.ceil(self.sample_wav.shape[0] / self.config.hop_length))
        )

    def test_wav_to_mel(self):
        mel_basis = np_transforms.build_mel_basis(**self.config)
        mel = np_transforms.wav_to_mel(wav=self.sample_wav, mel_basis=mel_basis, **self.config)
        self.assertEqual(
            mel.shape, (self.config.num_mels, math.ceil(self.sample_wav.shape[0] / self.config.hop_length))
        )

    def test_compute_f0(self):
        pitch = np_transforms.compute_f0(x=self.sample_wav, **self.config)
        mel_basis = np_transforms.build_mel_basis(**self.config)
        mel = np_transforms.wav_to_mel(wav=self.sample_wav, mel_basis=mel_basis, **self.config)
        assert pitch.shape[0] == mel.shape[1]

    def test_load_wav(self):
        wav = np_transforms.load_wav(filename=WAV_FILE, resample=False, sample_rate=22050)
        wav_resample = np_transforms.load_wav(filename=WAV_FILE, resample=True, sample_rate=16000)
        self.assertEqual(wav.shape, (self.sample_wav.shape[0],))
        self.assertNotEqual(wav_resample.shape, (self.sample_wav.shape[0],))