File size: 5,577 Bytes
9df2e22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
from librosa.filters import mel
from scipy import signal
from scipy.fftpack import fft


class Audio:
    """
    audio class which holds music data and timestamp for notes.

    Args:
        filename: file name.
        stereo: True or False; wether you have Don/Ka streo file or not. normaly True.
    Variables:


    Example:
        >>>from music_processor import *
        >>>song = Audio(filename)
        >>># to get audio data
        >>>song.data
        >>># to import .tja files:
        >>>song.import_tja(filename)
        >>># to get data converted
        >>>song.data = (song.data[:,0]+song.data[:,1])/2
        >>>fft_and_melscale(song, include_zero_cross=False)
    """

    def __init__(self, data, samplerate, stereo=True):
        self.data = data
        self.samplerate = samplerate
        if stereo is False:
            self.data = (self.data[:, 0] + self.data[:, 1]) / 2
        self.timestamp = []

    def plotaudio(self, start_t, stop_t):
        plt.plot(
            np.linspace(start_t, stop_t, stop_t - start_t), self.data[start_t:stop_t, 0]
        )
        plt.show()

    def save(self, filename, start_t=0, stop_t=None):
        if stop_t is None:
            stop_t = self.data.shape[0]
        sf.write(filename, self.data[start_t:stop_t], self.samplerate)

    def synthesize(self, diff=True, don="./asset/don.wav", ka="./asset/ka.wav"):
        donsound = sf.read(don)[0]
        donsound = (donsound[:, 0] + donsound[:, 1]) / 2
        kasound = sf.read(ka)[0]
        kasound = (kasound[:, 0] + kasound[:, 1]) / 2
        donlen = len(donsound)
        kalen = len(kasound)

        if diff is True:
            for stamp in self.timestamp:
                timing = int(stamp[0] * self.samplerate)
                try:
                    if stamp[1] in (1, 3, 5, 6, 7):
                        self.data[timing : timing + donlen] += donsound
                    elif stamp[1] in (2, 4):
                        self.data[timing : timing + kalen] += kasound
                except ValueError:
                    pass

        elif diff == "don":
            if isinstance(self.timestamp[0], tuple):
                for stamp in self.timestamp:
                    if stamp * self.samplerate + donlen < self.data.shape[0]:
                        self.data[
                            int(stamp[0] * self.samplerate) : int(
                                stamp[0] * self.samplerate
                            )
                            + donlen
                        ] += donsound
            else:
                for stamp in self.timestamp:
                    if stamp * self.samplerate + donlen < self.data.shape[0]:
                        self.data[
                            int(stamp * self.samplerate) : int(stamp * self.samplerate)
                            + donlen
                        ] += donsound

        elif diff == "ka":
            if isinstance(self.timestamp[0], tuple):
                for stamp in self.timestamp:
                    if stamp * self.samplerate + kalen < self.data.shape[0]:
                        self.data[
                            int(stamp[0] * self.samplerate) : int(
                                stamp[0] * self.samplerate
                            )
                            + kalen
                        ] += kasound
            else:
                for stamp in self.timestamp:
                    if stamp * self.samplerate + kalen < self.data.shape[0]:
                        self.data[
                            int(stamp * self.samplerate) : int(stamp * self.samplerate)
                            + kalen
                        ] += kasound


def make_frame(data, nhop, nfft):
    """
    helping function for fftandmelscale.
    細かい時間に切り分けたものを学習データとするため,nhop(512)ずつずらしながらnfftサイズのデータを配列として返す
    """

    length = data.shape[0]
    framedata = np.concatenate((data, np.zeros(nfft)))  # zero padding
    return np.array(
        [framedata[i * nhop : i * nhop + nfft] for i in range(length // nhop)]
    )


# @jit
def fft_and_melscale(
    song,
    nhop=512,
    nffts=[1024, 2048, 4096],
    mel_nband=80,
    mel_freqlo=27.5,
    mel_freqhi=16000.0,
    include_zero_cross=False,
):
    """
    fft and melscale method.
    fft: nfft = [1024, 2048, 4096]; サンプルの切り取る長さを変えながらデータからnp.arrayを抽出して高速フーリエ変換を行う.
    melscale: 周波数の次元を削減するとともに,log10の値を取っている.
    """

    feat_channels = []

    for nfft in nffts:
        feats = []
        window = signal.windows.blackmanharris(nfft)
        filt = mel(
            sr=song.samplerate,
            n_fft=nfft,
            n_mels=mel_nband,
            fmin=mel_freqlo,
            fmax=mel_freqhi,
        )

        # get normal frame
        frame = make_frame(song.data, nhop, nfft)
        # print(frame.shape)

        # melscaling
        processedframe = fft(window * frame)[:, : nfft // 2 + 1]
        processedframe = np.dot(filt, np.transpose(np.abs(processedframe) ** 2))
        processedframe = 20 * np.log10(processedframe + 0.1)
        # print(processedframe.shape)

        feat_channels.append(processedframe)

    if include_zero_cross:
        song.zero_crossing = np.where(np.diff(np.sign(song.data)))[0]
        print(song.zero_crossing)

    return np.array(feat_channels)