Spaces:
Runtime error
Runtime error
import argparse | |
import os | |
import time | |
from concurrent.futures import ProcessPoolExecutor | |
from typing import NoReturn | |
import h5py | |
import librosa | |
import musdb | |
import numpy as np | |
from bytesep.utils import float32_to_int16 | |
# Source types of the MUSDB18 dataset. | |
SOURCE_TYPES = ["vocals", "drums", "bass", "other", "accompaniment"] | |
def pack_audios_to_hdf5s(args) -> NoReturn: | |
r"""Pack (resampled) audio files into hdf5 files to speed up loading. | |
Args: | |
dataset_dir: str | |
subset: str, 'train' | 'test' | |
split: str, '' | 'train' | 'valid' | |
hdf5s_dir: str, directory to write out hdf5 files | |
sample_rate: int | |
channels_num: int | |
mono: bool | |
Returns: | |
NoReturn | |
""" | |
# arguments & parameters | |
dataset_dir = args.dataset_dir | |
subset = args.subset | |
split = None if args.split == "" else args.split | |
hdf5s_dir = args.hdf5s_dir | |
sample_rate = args.sample_rate | |
channels = args.channels | |
mono = True if channels == 1 else False | |
source_types = SOURCE_TYPES | |
resample_type = "kaiser_fast" | |
# Paths | |
os.makedirs(hdf5s_dir, exist_ok=True) | |
# Dataset of corresponding subset and split. | |
mus = musdb.DB(root=dataset_dir, subsets=[subset], split=split) | |
print("Subset: {}, Split: {}, Total pieces: {}".format(subset, split, len(mus))) | |
params = [] # A list of params for multiple processing. | |
for track_index in range(len(mus.tracks)): | |
param = ( | |
dataset_dir, | |
subset, | |
split, | |
track_index, | |
source_types, | |
mono, | |
sample_rate, | |
resample_type, | |
hdf5s_dir, | |
) | |
params.append(param) | |
# Uncomment for debug. | |
# write_single_audio_to_hdf5(params[0]) | |
# os._exit(0) | |
pack_hdf5s_time = time.time() | |
with ProcessPoolExecutor(max_workers=None) as pool: | |
# Maximum works on the machine | |
pool.map(write_single_audio_to_hdf5, params) | |
print("Pack hdf5 time: {:.3f} s".format(time.time() - pack_hdf5s_time)) | |
def write_single_audio_to_hdf5(param) -> NoReturn: | |
r"""Write single audio into hdf5 file.""" | |
( | |
dataset_dir, | |
subset, | |
split, | |
track_index, | |
source_types, | |
mono, | |
sample_rate, | |
resample_type, | |
hdf5s_dir, | |
) = param | |
# Dataset of corresponding subset and split. | |
mus = musdb.DB(root=dataset_dir, subsets=[subset], split=split) | |
track = mus.tracks[track_index] | |
# Path to write out hdf5 file. | |
hdf5_path = os.path.join(hdf5s_dir, "{}.h5".format(track.name)) | |
with h5py.File(hdf5_path, "w") as hf: | |
hf.attrs.create("audio_name", data=track.name.encode(), dtype="S100") | |
hf.attrs.create("sample_rate", data=sample_rate, dtype=np.int32) | |
for source_type in source_types: | |
audio = track.targets[source_type].audio.T | |
# (channels_num, audio_samples) | |
# Preprocess audio to mono / stereo, and resample. | |
audio = preprocess_audio( | |
audio, mono, track.rate, sample_rate, resample_type | |
) | |
# audio = load_audio(audio_path=audio_path, mono=mono, sample_rate=sample_rate) | |
# (channels_num, audio_samples) | (audio_samples,) | |
hf.create_dataset( | |
name=source_type, data=float32_to_int16(audio), dtype=np.int16 | |
) | |
# Mixture | |
audio = track.audio.T | |
# (channels_num, audio_samples) | |
# Preprocess audio to mono / stereo, and resample. | |
audio = preprocess_audio(audio, mono, track.rate, sample_rate, resample_type) | |
# (channels_num, audio_samples) | |
hf.create_dataset(name="mixture", data=float32_to_int16(audio), dtype=np.int16) | |
print("{} Write to {}, {}".format(track_index, hdf5_path, audio.shape)) | |
def preprocess_audio(audio, mono, origin_sr, sr, resample_type) -> np.array: | |
r"""Preprocess audio to mono / stereo, and resample. | |
Args: | |
audio: (channels_num, audio_samples), input audio | |
mono: bool | |
origin_sr: float, original sample rate | |
sr: float, target sample rate | |
resample_type: str, e.g., 'kaiser_fast' | |
Returns: | |
output: ndarray, output audio | |
""" | |
if mono: | |
audio = np.mean(audio, axis=0) | |
# (audio_samples,) | |
output = librosa.core.resample( | |
audio, orig_sr=origin_sr, target_sr=sr, res_type=resample_type | |
) | |
# (audio_samples,) | (channels_num, audio_samples) | |
if output.ndim == 1: | |
output = output[None, :] | |
# (1, audio_samples,) | |
return output | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--dataset_dir", | |
type=str, | |
required=True, | |
help="Directory of the MUSDB18 dataset.", | |
) | |
parser.add_argument( | |
"--subset", | |
type=str, | |
required=True, | |
choices=["train", "test"], | |
help="Train subset: 100 pieces; test subset: 50 pieces.", | |
) | |
parser.add_argument( | |
"--split", | |
type=str, | |
required=True, | |
choices=["", "train", "valid"], | |
help="Use '' to use all 100 pieces to train. Use 'train' to use 86 \ | |
pieces for train, and use 'test' to use 14 pieces for valid.", | |
) | |
parser.add_argument( | |
"--hdf5s_dir", | |
type=str, | |
required=True, | |
help="Directory to write out hdf5 files.", | |
) | |
parser.add_argument("--sample_rate", type=int, required=True, help="Sample rate.") | |
parser.add_argument( | |
"--channels", type=int, required=True, help="Use 1 for mono, 2 for stereo." | |
) | |
# Parse arguments. | |
args = parser.parse_args() | |
# Pack audios into hdf5 files. | |
pack_audios_to_hdf5s(args) | |