jcmc's picture
Upload speechbrain IC model
9b333dd
#!/usr/bin/env/python3
"""Recipe for training a BPE tokenizer for Fluent Speech Commands.
The tokenizer coverts semantics into sub-word units that can
be used to train a language (LM) or an acoustic model (AM).
To run this recipe, do the following:
> python train.py hparams/tokenizer_bpe51.yaml
Authors
* Abdel Heba 2021
* Mirco Ravanelli 2021
* Loren Lugosch 2021
"""
import sys
import speechbrain as sb
from hyperpyyaml import load_hyperpyyaml
from speechbrain.utils.distributed import run_on_main
if __name__ == "__main__":
# CLI:
hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
with open(hparams_file) as fin:
hparams = load_hyperpyyaml(fin, overrides)
# If distributed_launch=True then
# create ddp_group with the right communication protocol
sb.utils.distributed.ddp_init_group(run_opts)
# Create experiment directory
sb.create_experiment_directory(
experiment_directory=hparams["output_folder"],
hyperparams_to_save=hparams_file,
overrides=overrides,
)
# 1. # Dataset prep
from prepare import prepare_FSC # noqa
# multi-gpu (ddp) save data preparation
run_on_main(
prepare_FSC,
kwargs={
"data_folder": hparams["data_folder"],
"save_folder": hparams["output_folder"],
"skip_prep": hparams["skip_prep"],
},
)
# Train tokenizer
hparams["tokenizer"]()