File size: 4,734 Bytes
8c92a11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import torch
from models.tts.fastspeech2.fs2_trainer import FastSpeech2Trainer
from models.tts.vits.vits_trainer import VITSTrainer
from models.tts.valle.valle_trainer import VALLETrainer
from models.tts.naturalspeech2.ns2_trainer import NS2Trainer
from models.tts.valle_v2.valle_ar_trainer import ValleARTrainer as VALLE_V2_AR
from models.tts.valle_v2.valle_nar_trainer import ValleNARTrainer as VALLE_V2_NAR
from models.tts.jets.jets_trainer import JetsTrainer
from utils.util import load_config
def build_trainer(args, cfg):
supported_trainer = {
"FastSpeech2": FastSpeech2Trainer,
"VITS": VITSTrainer,
"VALLE": VALLETrainer,
"NaturalSpeech2": NS2Trainer,
"VALLE_V2_AR": VALLE_V2_AR,
"VALLE_V2_NAR": VALLE_V2_NAR,
"Jets": JetsTrainer,
}
trainer_class = supported_trainer[cfg.model_type]
trainer = trainer_class(args, cfg)
return trainer
def cuda_relevant(deterministic=False):
torch.cuda.empty_cache()
# TF32 on Ampere and above
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.allow_tf32 = True
# Deterministic
torch.backends.cudnn.deterministic = deterministic
torch.backends.cudnn.benchmark = not deterministic
torch.use_deterministic_algorithms(deterministic)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--config",
default="config.json",
help="json files for configurations.",
required=True,
)
parser.add_argument(
"--seed",
type=int,
default=1234,
help="random seed",
required=False,
)
parser.add_argument(
"--exp_name",
type=str,
default="exp_name",
help="A specific name to note the experiment",
required=True,
)
parser.add_argument(
"--resume", action="store_true", help="The model name to restore"
)
parser.add_argument(
"--test", action="store_true", default=False, help="Test the model"
)
parser.add_argument(
"--log_level", default="warning", help="logging level (debug, info, warning)"
)
parser.add_argument(
"--resume_type",
type=str,
default="resume",
help="Resume training or finetuning.",
)
parser.add_argument(
"--checkpoint_path",
type=str,
default=None,
help="Checkpoint for resume training or finetuning.",
)
parser.add_argument(
"--resume_from_ckpt_path",
type=str,
default="",
help="Checkpoint for resume training or finetuning.",
)
# VALLETrainer.add_arguments(parser)
args = parser.parse_args()
cfg = load_config(args.config)
# Data Augmentation
if hasattr(cfg, "preprocess"):
if hasattr(cfg.preprocess, "data_augment"):
if (
type(cfg.preprocess.data_augment) == list
and len(cfg.preprocess.data_augment) > 0
):
new_datasets_list = []
for dataset in cfg.preprocess.data_augment:
new_datasets = [
(
f"{dataset}_pitch_shift"
if cfg.preprocess.use_pitch_shift
else None
),
(
f"{dataset}_formant_shift"
if cfg.preprocess.use_formant_shift
else None
),
(
f"{dataset}_equalizer"
if cfg.preprocess.use_equalizer
else None
),
(
f"{dataset}_time_stretch"
if cfg.preprocess.use_time_stretch
else None
),
]
new_datasets_list.extend(filter(None, new_datasets))
cfg.dataset.extend(new_datasets_list)
print("experiment name: ", args.exp_name)
# # CUDA settings
cuda_relevant()
# Build trainer
print(f"Building {cfg.model_type} trainer")
trainer = build_trainer(args, cfg)
print(f"Start training {cfg.model_type} model")
if args.test:
trainer.test_loop()
else:
trainer.train_loop()
if __name__ == "__main__":
main()
|