File size: 5,795 Bytes
5019931
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import argparse
import os
import time
from concurrent.futures import ProcessPoolExecutor
from typing import NoReturn

import h5py
import librosa
import musdb
import numpy as np

from bytesep.utils import float32_to_int16

# Source types of the MUSDB18 dataset.
SOURCE_TYPES = ["vocals", "drums", "bass", "other", "accompaniment"]


def pack_audios_to_hdf5s(args) -> NoReturn:
    r"""Pack (resampled) audio files into hdf5 files to speed up loading.

    Args:
        dataset_dir: str
        subset: str, 'train' | 'test'
        split: str, '' | 'train' | 'valid'
        hdf5s_dir: str, directory to write out hdf5 files
        sample_rate: int
        channels_num: int
        mono: bool

    Returns:
        NoReturn
    """

    # arguments & parameters
    dataset_dir = args.dataset_dir
    subset = args.subset
    split = None if args.split == "" else args.split
    hdf5s_dir = args.hdf5s_dir
    sample_rate = args.sample_rate
    channels = args.channels

    mono = True if channels == 1 else False
    source_types = SOURCE_TYPES
    resample_type = "kaiser_fast"

    # Paths
    os.makedirs(hdf5s_dir, exist_ok=True)

    # Dataset of corresponding subset and split.
    mus = musdb.DB(root=dataset_dir, subsets=[subset], split=split)
    print("Subset: {}, Split: {}, Total pieces: {}".format(subset, split, len(mus)))

    params = []  # A list of params for multiple processing.

    for track_index in range(len(mus.tracks)):

        param = (
            dataset_dir,
            subset,
            split,
            track_index,
            source_types,
            mono,
            sample_rate,
            resample_type,
            hdf5s_dir,
        )

        params.append(param)

    # Uncomment for debug.
    # write_single_audio_to_hdf5(params[0])
    # os._exit(0)

    pack_hdf5s_time = time.time()

    with ProcessPoolExecutor(max_workers=None) as pool:
        # Maximum works on the machine
        pool.map(write_single_audio_to_hdf5, params)

    print("Pack hdf5 time: {:.3f} s".format(time.time() - pack_hdf5s_time))


def write_single_audio_to_hdf5(param) -> NoReturn:
    r"""Write single audio into hdf5 file."""
    (
        dataset_dir,
        subset,
        split,
        track_index,
        source_types,
        mono,
        sample_rate,
        resample_type,
        hdf5s_dir,
    ) = param

    # Dataset of corresponding subset and split.
    mus = musdb.DB(root=dataset_dir, subsets=[subset], split=split)
    track = mus.tracks[track_index]

    # Path to write out hdf5 file.
    hdf5_path = os.path.join(hdf5s_dir, "{}.h5".format(track.name))

    with h5py.File(hdf5_path, "w") as hf:

        hf.attrs.create("audio_name", data=track.name.encode(), dtype="S100")
        hf.attrs.create("sample_rate", data=sample_rate, dtype=np.int32)

        for source_type in source_types:

            audio = track.targets[source_type].audio.T
            # (channels_num, audio_samples)

            # Preprocess audio to mono / stereo, and resample.
            audio = preprocess_audio(
                audio, mono, track.rate, sample_rate, resample_type
            )
            # audio = load_audio(audio_path=audio_path, mono=mono, sample_rate=sample_rate)
            # (channels_num, audio_samples) | (audio_samples,)

            hf.create_dataset(
                name=source_type, data=float32_to_int16(audio), dtype=np.int16
            )

        # Mixture
        audio = track.audio.T
        # (channels_num, audio_samples)

        # Preprocess audio to mono / stereo, and resample.
        audio = preprocess_audio(audio, mono, track.rate, sample_rate, resample_type)
        # (channels_num, audio_samples)

        hf.create_dataset(name="mixture", data=float32_to_int16(audio), dtype=np.int16)

    print("{} Write to {}, {}".format(track_index, hdf5_path, audio.shape))


def preprocess_audio(audio, mono, origin_sr, sr, resample_type) -> np.array:
    r"""Preprocess audio to mono / stereo, and resample.

    Args:
        audio: (channels_num, audio_samples), input audio
        mono: bool
        origin_sr: float, original sample rate
        sr: float, target sample rate
        resample_type: str, e.g., 'kaiser_fast'

    Returns:
        output: ndarray, output audio
    """
    if mono:
        audio = np.mean(audio, axis=0)
        # (audio_samples,)

    output = librosa.core.resample(
        audio, orig_sr=origin_sr, target_sr=sr, res_type=resample_type
    )
    # (audio_samples,) | (channels_num, audio_samples)

    if output.ndim == 1:
        output = output[None, :]
        # (1, audio_samples,)

    return output


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--dataset_dir",
        type=str,
        required=True,
        help="Directory of the MUSDB18 dataset.",
    )
    parser.add_argument(
        "--subset",
        type=str,
        required=True,
        choices=["train", "test"],
        help="Train subset: 100 pieces; test subset: 50 pieces.",
    )
    parser.add_argument(
        "--split",
        type=str,
        required=True,
        choices=["", "train", "valid"],
        help="Use '' to use all 100 pieces to train. Use 'train' to use 86 \
            pieces for train, and use 'test' to use 14 pieces for valid.",
    )
    parser.add_argument(
        "--hdf5s_dir",
        type=str,
        required=True,
        help="Directory to write out hdf5 files.",
    )
    parser.add_argument("--sample_rate", type=int, required=True, help="Sample rate.")
    parser.add_argument(
        "--channels", type=int, required=True, help="Use 1 for mono, 2 for stereo."
    )

    # Parse arguments.
    args = parser.parse_args()

    # Pack audios into hdf5 files.
    pack_audios_to_hdf5s(args)