Spaces:
Runtime error
Runtime error
import argparse | |
import os | |
import soundfile | |
from typing import NoReturn | |
import musdb | |
import numpy as np | |
from bytesep.utils import load_audio | |
def create_evaluation(args) -> NoReturn: | |
r"""Random mix and write out audios for evaluation. | |
Args: | |
vctk_dataset_dir: str, the directory of the VCTK dataset | |
symphony_dataset_dir: str, the directory of the symphony dataset | |
evaluation_audios_dir: str, the directory to write out randomly selected and mixed audio segments | |
sample_rate: int | |
channels: int, e.g., 1 | 2 | |
evaluation_segments_num: int | |
mono: bool | |
Returns: | |
NoReturn | |
""" | |
# arguments & parameters | |
vctk_dataset_dir = args.vctk_dataset_dir | |
musdb18_dataset_dir = args.musdb18_dataset_dir | |
evaluation_audios_dir = args.evaluation_audios_dir | |
sample_rate = args.sample_rate | |
channels = args.channels | |
evaluation_segments_num = args.evaluation_segments_num | |
mono = True if channels == 1 else False | |
split = 'test' | |
random_state = np.random.RandomState(1234) | |
# paths | |
audios_dir = os.path.join(vctk_dataset_dir, "wav48", split) | |
for source_type in ['speech', 'music', 'mixture']: | |
output_dir = os.path.join(evaluation_audios_dir, split, source_type) | |
os.makedirs(output_dir, exist_ok=True) | |
# Get VCTK audio paths. | |
speech_audio_paths = [] | |
speaker_ids = sorted(os.listdir(audios_dir)) | |
for speaker_id in speaker_ids: | |
speaker_audios_dir = os.path.join(audios_dir, speaker_id) | |
audio_names = sorted(os.listdir(speaker_audios_dir)) | |
for audio_name in audio_names: | |
speaker_audio_path = os.path.join(speaker_audios_dir, audio_name) | |
speech_audio_paths.append(speaker_audio_path) | |
# Get Musdb18 audio paths. | |
mus = musdb.DB(root=musdb18_dataset_dir, subsets=[split]) | |
track_indexes = np.arange(len(mus.tracks)) | |
for n in range(evaluation_segments_num): | |
print('{} / {}'.format(n, evaluation_segments_num)) | |
# Randomly select and write out a clean speech segment. | |
speech_audio_path = random_state.choice(speech_audio_paths) | |
speech_audio = load_audio( | |
audio_path=speech_audio_path, mono=mono, sample_rate=sample_rate | |
) | |
# (channels_num, audio_samples) | |
if channels == 2: | |
speech_audio = np.tile(speech_audio, (2, 1)) | |
# (channels_num, audio_samples) | |
output_speech_path = os.path.join( | |
evaluation_audios_dir, split, 'speech', '{:04d}.wav'.format(n) | |
) | |
soundfile.write( | |
file=output_speech_path, data=speech_audio.T, samplerate=sample_rate | |
) | |
print("Write out to {}".format(output_speech_path)) | |
# Randomly select and write out a clean music segment. | |
track_index = random_state.choice(track_indexes) | |
track = mus[track_index] | |
segment_samples = speech_audio.shape[1] | |
start_sample = int( | |
random_state.uniform(0.0, segment_samples - speech_audio.shape[1]) | |
) | |
music_audio = track.audio[start_sample : start_sample + segment_samples, :].T | |
# (channels_num, audio_samples) | |
output_music_path = os.path.join( | |
evaluation_audios_dir, split, 'music', '{:04d}.wav'.format(n) | |
) | |
soundfile.write( | |
file=output_music_path, data=music_audio.T, samplerate=sample_rate | |
) | |
print("Write out to {}".format(output_music_path)) | |
# Mix speech and music segments and write out a mixture segment. | |
mixture_audio = speech_audio + music_audio | |
# (channels_num, audio_samples) | |
output_mixture_path = os.path.join( | |
evaluation_audios_dir, split, 'mixture', '{:04d}.wav'.format(n) | |
) | |
soundfile.write( | |
file=output_mixture_path, data=mixture_audio.T, samplerate=sample_rate | |
) | |
print("Write out to {}".format(output_mixture_path)) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--vctk_dataset_dir", | |
type=str, | |
required=True, | |
help="The directory of the VCTK dataset.", | |
) | |
parser.add_argument( | |
"--musdb18_dataset_dir", | |
type=str, | |
required=True, | |
help="The directory of the MUSDB18 dataset.", | |
) | |
parser.add_argument( | |
"--evaluation_audios_dir", | |
type=str, | |
required=True, | |
help="The directory to write out randomly selected and mixed audio segments.", | |
) | |
parser.add_argument( | |
"--sample_rate", | |
type=int, | |
required=True, | |
help="Sample rate", | |
) | |
parser.add_argument( | |
"--channels", | |
type=int, | |
required=True, | |
help="Audio channels, e.g, 1 or 2.", | |
) | |
parser.add_argument( | |
"--evaluation_segments_num", | |
type=int, | |
required=True, | |
help="The number of segments to create for evaluation.", | |
) | |
# Parse arguments. | |
args = parser.parse_args() | |
create_evaluation(args) | |