Spaces:
Runtime error
Runtime error
Update train_ms.py
Browse files- train_ms.py +28 -19
train_ms.py
CHANGED
@@ -4,6 +4,7 @@ import argparse
|
|
4 |
import itertools
|
5 |
import math
|
6 |
import torch
|
|
|
7 |
from torch import nn, optim
|
8 |
from torch.nn import functional as F
|
9 |
from torch.utils.data import DataLoader
|
@@ -38,12 +39,8 @@ from text.symbols import symbols
|
|
38 |
|
39 |
torch.backends.cudnn.benchmark = True
|
40 |
torch.backends.cuda.matmul.allow_tf32 = True
|
41 |
-
torch.backends.cudnn.allow_tf32 = True
|
42 |
torch.set_float32_matmul_precision('medium')
|
43 |
-
torch.backends.cuda.sdp_kernel("flash")
|
44 |
-
torch.backends.cuda.enable_flash_sdp(True)
|
45 |
-
torch.backends.cuda.enable_mem_efficient_sdp(True) # Not avaliable if torch version is lower than 2.0
|
46 |
-
torch.backends.cuda.enable_math_sdp(True)
|
47 |
global_step = 0
|
48 |
|
49 |
|
@@ -56,6 +53,10 @@ def main():
|
|
56 |
os.environ['MASTER_PORT'] = '65280'
|
57 |
|
58 |
hps = utils.get_hparams()
|
|
|
|
|
|
|
|
|
59 |
mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,))
|
60 |
|
61 |
|
@@ -68,7 +69,7 @@ def run(rank, n_gpus, hps):
|
|
68 |
writer = SummaryWriter(log_dir=hps.model_dir)
|
69 |
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
|
70 |
|
71 |
-
dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank)
|
72 |
torch.manual_seed(hps.train.seed)
|
73 |
torch.cuda.set_device(rank)
|
74 |
|
@@ -81,9 +82,8 @@ def run(rank, n_gpus, hps):
|
|
81 |
rank=rank,
|
82 |
shuffle=True)
|
83 |
collate_fn = TextAudioSpeakerCollate()
|
84 |
-
train_loader = DataLoader(train_dataset, num_workers=
|
85 |
-
collate_fn=collate_fn, batch_sampler=train_sampler
|
86 |
-
persistent_workers=True,prefetch_factor=4) #256G Memory suitable loader.
|
87 |
if rank == 0:
|
88 |
eval_dataset = TextAudioSpeakerLoader(hps.data.validation_files, hps.data)
|
89 |
eval_loader = DataLoader(eval_dataset, num_workers=0, shuffle=False,
|
@@ -155,20 +155,29 @@ def run(rank, n_gpus, hps):
|
|
155 |
net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
156 |
if net_dur_disc is not None:
|
157 |
net_dur_disc = DDP(net_dur_disc, device_ids=[rank], find_unused_parameters=True)
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
print(e)
|
170 |
epoch_str = 1
|
171 |
global_step = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
|
174 |
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
|
|
|
4 |
import itertools
|
5 |
import math
|
6 |
import torch
|
7 |
+
import shutil
|
8 |
from torch import nn, optim
|
9 |
from torch.nn import functional as F
|
10 |
from torch.utils.data import DataLoader
|
|
|
39 |
|
40 |
torch.backends.cudnn.benchmark = True
|
41 |
torch.backends.cuda.matmul.allow_tf32 = True
|
42 |
+
torch.backends.cudnn.allow_tf32 = True
|
43 |
torch.set_float32_matmul_precision('medium')
|
|
|
|
|
|
|
|
|
44 |
global_step = 0
|
45 |
|
46 |
|
|
|
53 |
os.environ['MASTER_PORT'] = '65280'
|
54 |
|
55 |
hps = utils.get_hparams()
|
56 |
+
if not hps.cont:
|
57 |
+
shutil.copy('./pretrained_models/D_0.pth','./logs/OUTPUT_MODEL/D_0.pth')
|
58 |
+
shutil.copy('./pretrained_models/G_0.pth','./logs/OUTPUT_MODEL/G_0.pth')
|
59 |
+
shutil.copy('./pretrained_models/DUR_0.pth','./logs/OUTPUT_MODEL/DUR_0.pth')
|
60 |
mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,))
|
61 |
|
62 |
|
|
|
69 |
writer = SummaryWriter(log_dir=hps.model_dir)
|
70 |
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
|
71 |
|
72 |
+
dist.init_process_group(backend= 'gloo' if os.name == 'nt' else 'nccl', init_method='env://', world_size=n_gpus, rank=rank)
|
73 |
torch.manual_seed(hps.train.seed)
|
74 |
torch.cuda.set_device(rank)
|
75 |
|
|
|
82 |
rank=rank,
|
83 |
shuffle=True)
|
84 |
collate_fn = TextAudioSpeakerCollate()
|
85 |
+
train_loader = DataLoader(train_dataset, num_workers=2, shuffle=False, pin_memory=True,
|
86 |
+
collate_fn=collate_fn, batch_sampler=train_sampler)
|
|
|
87 |
if rank == 0:
|
88 |
eval_dataset = TextAudioSpeakerLoader(hps.data.validation_files, hps.data)
|
89 |
eval_loader = DataLoader(eval_dataset, num_workers=0, shuffle=False,
|
|
|
155 |
net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
156 |
if net_dur_disc is not None:
|
157 |
net_dur_disc = DDP(net_dur_disc, device_ids=[rank], find_unused_parameters=True)
|
158 |
+
|
159 |
+
pretrain_dir = None
|
160 |
+
if pretrain_dir is None:
|
161 |
+
try:
|
162 |
+
if net_dur_disc is not None:
|
163 |
+
_, optim_dur_disc, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "DUR_*.pth"), net_dur_disc, optim_dur_disc, skip_optimizer=not hps.cont)
|
164 |
+
_, optim_g, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g,
|
165 |
+
optim_g, skip_optimizer=not hps.cont)
|
166 |
+
_, optim_d, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d,
|
167 |
+
optim_d, skip_optimizer=not hps.cont)
|
168 |
|
169 |
+
epoch_str = max(epoch_str, 1)
|
170 |
+
global_step = (epoch_str - 1) * len(train_loader)
|
171 |
+
except Exception as e:
|
172 |
print(e)
|
173 |
epoch_str = 1
|
174 |
global_step = 0
|
175 |
+
else:
|
176 |
+
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(pretrain_dir, "G_*.pth"), net_g,
|
177 |
+
optim_g, True)
|
178 |
+
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(pretrain_dir, "D_*.pth"), net_d,
|
179 |
+
optim_d, True)
|
180 |
+
|
181 |
|
182 |
|
183 |
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
|