Spaces:
Runtime error
Runtime error
import argparse | |
import os | |
import pathlib | |
import time | |
from concurrent.futures import ProcessPoolExecutor | |
from typing import NoReturn | |
from bytesep.dataset_creation.pack_audios_to_hdf5s.instruments_solo import ( | |
write_single_audio_to_hdf5, | |
) | |
def pack_audios_to_hdf5s(args) -> NoReturn: | |
r"""Pack (resampled) audio files into hdf5 files to speed up loading. | |
Args: | |
dataset_dir: str | |
split: str, 'train' | 'test' | |
hdf5s_dir: str, directory to write out hdf5 files | |
sample_rate: int | |
channels_num: int | |
mono: bool | |
Returns: | |
NoReturn | |
""" | |
# arguments & parameters | |
dataset_dir = args.dataset_dir | |
split = args.split | |
hdf5s_dir = args.hdf5s_dir | |
sample_rate = args.sample_rate | |
channels = args.channels | |
mono = True if channels == 1 else False | |
source_type = "speech" | |
# Only pack data for training data. | |
assert split == "train" | |
audios_dir = os.path.join(dataset_dir, 'wav48', split) | |
os.makedirs(hdf5s_dir, exist_ok=True) | |
speaker_ids = sorted(os.listdir(audios_dir)) | |
params = [] | |
audio_index = 0 | |
for speaker_id in speaker_ids: | |
speaker_audios_dir = os.path.join(audios_dir, speaker_id) | |
audio_names = sorted(os.listdir(speaker_audios_dir)) | |
for audio_name in audio_names: | |
audio_path = os.path.join(speaker_audios_dir, audio_name) | |
hdf5_path = os.path.join( | |
hdf5s_dir, "{}.h5".format(pathlib.Path(audio_name).stem) | |
) | |
param = ( | |
audio_index, | |
audio_name, | |
source_type, | |
audio_path, | |
mono, | |
sample_rate, | |
hdf5_path, | |
) | |
params.append(param) | |
audio_index += 1 | |
# Uncomment for debug. | |
# write_single_audio_to_hdf5(params[0]) | |
# os._exit(0) | |
pack_hdf5s_time = time.time() | |
with ProcessPoolExecutor(max_workers=None) as pool: | |
# Maximum works on the machine | |
pool.map(write_single_audio_to_hdf5, params) | |
print("Pack hdf5 time: {:.3f} s".format(time.time() - pack_hdf5s_time)) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--dataset_dir", | |
type=str, | |
required=True, | |
help="Directory of the VCTK dataset.", | |
) | |
parser.add_argument("--split", type=str, required=True, choices=["train", "test"]) | |
parser.add_argument( | |
"--hdf5s_dir", | |
type=str, | |
required=True, | |
help="Directory to write out hdf5 files.", | |
) | |
parser.add_argument("--sample_rate", type=int, required=True, help="Sample rate.") | |
parser.add_argument( | |
"--channels", type=int, required=True, help="Use 1 for mono, 2 for stereo." | |
) | |
# Parse arguments. | |
args = parser.parse_args() | |
# Pack audios into hdf5 files. | |
pack_audios_to_hdf5s(args) | |