OFA-Image_Caption / fairseq /fairseq /tasks /frm_text_to_speech.py
JustinLin610
update
8437114
raw history blame
No virus
2.12 kB
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import logging
from fairseq.data.audio.frm_text_to_speech_dataset import FrmTextToSpeechDatasetCreator
from fairseq.tasks import register_task
from fairseq.tasks.text_to_speech import TextToSpeechTask
logging.basicConfig(
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO
)
logger = logging.getLogger(__name__)
@register_task('frm_text_to_speech')
class FrmTextToSpeechTask(TextToSpeechTask):
@staticmethod
def add_args(parser):
TextToSpeechTask.add_args(parser)
parser.add_argument(
"--do_chunk", action="store_true", help="train on chunks"
)
parser.add_argument("--chunk_bound", default=-1, type=int)
parser.add_argument("--chunk_init", default=50, type=int)
parser.add_argument("--chunk_incr", default=5, type=int)
parser.add_argument("--add_eos", action="store_true")
parser.add_argument("--dedup", action="store_true")
parser.add_argument("--ref_fpu", default=-1, type=float)
def load_dataset(self, split, **unused_kwargs):
is_train_split = split.startswith("train")
pre_tokenizer = self.build_tokenizer(self.args)
bpe_tokenizer = self.build_bpe(self.args)
self.datasets[split] = FrmTextToSpeechDatasetCreator.from_tsv(
self.args.data,
self.data_cfg,
split,
self.src_dict,
pre_tokenizer,
bpe_tokenizer,
is_train_split=is_train_split,
n_frames_per_step=self.args.n_frames_per_step,
speaker_to_id=self.speaker_to_id,
do_chunk=self.args.do_chunk,
chunk_bound=self.args.chunk_bound,
chunk_init=self.args.chunk_init,
chunk_incr=self.args.chunk_incr,
add_eos=self.args.add_eos,
dedup=self.args.dedup,
ref_fpu=self.args.ref_fpu
)