JustinLin610
update
10b0761
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import logging
from pathlib import Path
import soundfile as sf
from examples.speech_to_text.prep_mustc_data import (
MUSTC
)
from tqdm import tqdm
log = logging.getLogger(__name__)
def main(args):
root = Path(args.data_root).absolute()
lang = args.lang
split = args.split
cur_root = root / f"en-{lang}"
assert cur_root.is_dir(), (
f"{cur_root.as_posix()} does not exist. Skipped."
)
dataset = MUSTC(root.as_posix(), lang, split)
output = Path(args.output).absolute()
output.mkdir(exist_ok=True)
f_text = open(output / f"{split}.{lang}", "w")
f_wav_list = open(output / f"{split}.wav_list", "w")
for waveform, sample_rate, _, text, _, utt_id in tqdm(dataset):
sf.write(
output / f"{utt_id}.wav",
waveform.squeeze(0).numpy(),
samplerate=int(sample_rate)
)
f_text.write(text + "\n")
f_wav_list.write(str(output / f"{utt_id}.wav") + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data-root", "-d", required=True, type=str)
parser.add_argument("--task", required=True, type=str, choices=["asr", "st"])
parser.add_argument("--lang", required=True, type=str)
parser.add_argument("--output", required=True, type=str)
parser.add_argument("--split", required=True, choices=MUSTC.SPLITS)
args = parser.parse_args()
main(args)