RVC_HF

Sleeping

App Files Files Community

RVC_HF / rvc /train /utils.py

r3gm

update

1397f77 over 1 year ago

raw

history blame contribute delete

9.95 kB

	import os
	import glob
	import json
	import torch
	import argparse
	import numpy as np
	from scipy.io.wavfile import read
	from collections import OrderedDict


	def replace_keys_in_dict(d, old_key_part, new_key_part):
	if isinstance(d, OrderedDict):
	updated_dict = OrderedDict()
	else:
	updated_dict = {}
	for key, value in d.items():
	if isinstance(key, str):
	new_key = key.replace(old_key_part, new_key_part)
	else:
	new_key = key
	if isinstance(value, dict):
	value = replace_keys_in_dict(value, old_key_part, new_key_part)
	updated_dict[new_key] = value
	return updated_dict


	def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
	assert os.path.isfile(checkpoint_path)
	checkpoint_old_dict = torch.load(checkpoint_path, map_location="cpu")
	checkpoint_new_version_path = os.path.join(
	os.path.dirname(checkpoint_path),
	f"{os.path.splitext(os.path.basename(checkpoint_path))[0]}_new_version.pth",
	)

	torch.save(
	replace_keys_in_dict(
	replace_keys_in_dict(
	checkpoint_old_dict, ".weight_v", ".parametrizations.weight.original1"
	),
	".weight_g",
	".parametrizations.weight.original0",
	),
	checkpoint_new_version_path,
	)

	os.remove(checkpoint_path)
	os.rename(checkpoint_new_version_path, checkpoint_path)

	checkpoint_dict = torch.load(checkpoint_path, map_location="cpu")
	saved_state_dict = checkpoint_dict["model"]
	if hasattr(model, "module"):
	state_dict = model.module.state_dict()
	else:
	state_dict = model.state_dict()
	new_state_dict = {}
	for k, v in state_dict.items():
	try:
	new_state_dict[k] = saved_state_dict[k]
	if saved_state_dict[k].shape != state_dict[k].shape:
	print(
	"shape-%s-mismatch\|need-%s\|get-%s",
	k,
	state_dict[k].shape,
	saved_state_dict[k].shape,
	)
	raise KeyError
	except:
	print("%s is not in the checkpoint", k)
	new_state_dict[k] = v
	if hasattr(model, "module"):
	model.module.load_state_dict(new_state_dict, strict=False)
	else:
	model.load_state_dict(new_state_dict, strict=False)

	iteration = checkpoint_dict["iteration"]
	learning_rate = checkpoint_dict["learning_rate"]
	if optimizer is not None and load_opt == 1:
	optimizer.load_state_dict(checkpoint_dict["optimizer"])
	print(f"Loaded checkpoint '{checkpoint_path}' (epoch {iteration})")
	return model, optimizer, learning_rate, iteration


	def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
	print(f"Saved model '{checkpoint_path}' (epoch {iteration})")
	checkpoint_old_version_path = os.path.join(
	os.path.dirname(checkpoint_path),
	f"{os.path.splitext(os.path.basename(checkpoint_path))[0]}_old_version.pth",
	)
	if hasattr(model, "module"):
	state_dict = model.module.state_dict()
	else:
	state_dict = model.state_dict()
	torch.save(
	{
	"model": state_dict,
	"iteration": iteration,
	"optimizer": optimizer.state_dict(),
	"learning_rate": learning_rate,
	},
	checkpoint_path,
	)
	checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
	torch.save(
	replace_keys_in_dict(
	replace_keys_in_dict(
	checkpoint, ".parametrizations.weight.original1", ".weight_v"
	),
	".parametrizations.weight.original0",
	".weight_g",
	),
	checkpoint_old_version_path,
	)
	os.remove(checkpoint_path)
	os.rename(checkpoint_old_version_path, checkpoint_path)


	def summarize(
	writer,
	global_step,
	scalars={},
	histograms={},
	images={},
	audios={},
	audio_sampling_rate=22050,
	):
	for k, v in scalars.items():
	writer.add_scalar(k, v, global_step)
	for k, v in histograms.items():
	writer.add_histogram(k, v, global_step)
	for k, v in images.items():
	writer.add_image(k, v, global_step, dataformats="HWC")
	for k, v in audios.items():
	writer.add_audio(k, v, global_step, audio_sampling_rate)


	def latest_checkpoint_path(dir_path, regex="G_*.pth"):
	f_list = glob.glob(os.path.join(dir_path, regex))
	f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
	x = f_list[-1]
	return x


	def plot_spectrogram_to_numpy(spectrogram):
	import matplotlib.pylab as plt
	import numpy as np

	fig, ax = plt.subplots(figsize=(10, 2))
	im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
	plt.colorbar(im, ax=ax)
	plt.xlabel("Frames")
	plt.ylabel("Channels")
	plt.tight_layout()

	fig.canvas.draw()
	data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
	data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
	plt.close()
	return data


	def load_wav_to_torch(full_path):
	sampling_rate, data = read(full_path)
	return torch.FloatTensor(data.astype(np.float32)), sampling_rate


	def load_filepaths_and_text(filename, split="\|"):
	with open(filename, encoding="utf-8") as f:
	filepaths_and_text = [line.strip().split(split) for line in f]
	return filepaths_and_text


	def get_hparams():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"-se",
	"--save_every_epoch",
	type=int,
	required=True,
	help="checkpoint save frequency (epoch)",
	)
	parser.add_argument(
	"-te", "--total_epoch", type=int, required=True, help="total_epoch"
	)
	parser.add_argument(
	"-pg", "--pretrainG", type=str, default="", help="Pretrained Discriminator path"
	)
	parser.add_argument(
	"-pd", "--pretrainD", type=str, default="", help="Pretrained Generator path"
	)
	parser.add_argument("-g", "--gpus", type=str, default="0", help="split by -")
	parser.add_argument(
	"-bs", "--batch_size", type=int, required=True, help="batch size"
	)
	parser.add_argument(
	"-e", "--experiment_dir", type=str, required=True, help="experiment dir"
	)
	parser.add_argument(
	"-sr", "--sample_rate", type=str, required=True, help="sample rate, 32k/40k/48k"
	)
	parser.add_argument(
	"-sw",
	"--save_every_weights",
	type=str,
	default="0",
	help="save the extracted model in weights directory when saving checkpoints",
	)
	parser.add_argument(
	"-v", "--version", type=str, required=True, help="model version"
	)
	parser.add_argument(
	"-f0",
	"--if_f0",
	type=int,
	required=True,
	help="use f0 as one of the inputs of the model, 1 or 0",
	)
	parser.add_argument(
	"-l",
	"--if_latest",
	type=int,
	required=True,
	help="if only save the latest G/D pth file, 1 or 0",
	)
	parser.add_argument(
	"-c",
	"--if_cache_data_in_gpu",
	type=int,
	required=True,
	help="if caching the dataset in GPU memory, 1 or 0",
	)

	parser.add_argument(
	"-od",
	"--overtraining_detector",
	type=int,
	required=True,
	help="Detect overtraining or not, 1 or 0",
	)
	parser.add_argument(
	"-ot",
	"--overtraining_threshold",
	type=int,
	default=50,
	help="overtraining_threshold",
	)
	parser.add_argument(
	"-sg",
	"--sync-graph",
	type=int,
	required=True,
	help="Sync graph or not, 1 or 0",
	)

	args = parser.parse_args()
	name = args.experiment_dir
	experiment_dir = os.path.join("./logs", args.experiment_dir)
	config_save_path = os.path.join(experiment_dir, "config.json")
	with open(config_save_path, "r") as f:
	config = json.load(f)
	hparams = HParams(**config)
	hparams.model_dir = hparams.experiment_dir = experiment_dir
	hparams.save_every_epoch = args.save_every_epoch
	hparams.name = name
	hparams.total_epoch = args.total_epoch
	hparams.pretrainG = args.pretrainG
	hparams.pretrainD = args.pretrainD
	hparams.version = args.version
	hparams.gpus = args.gpus
	hparams.train.batch_size = args.batch_size
	hparams.sample_rate = args.sample_rate
	hparams.if_f0 = args.if_f0
	hparams.if_latest = args.if_latest
	hparams.save_every_weights = args.save_every_weights
	hparams.if_cache_data_in_gpu = args.if_cache_data_in_gpu
	hparams.data.training_files = f"{experiment_dir}/filelist.txt"
	hparams.overtraining_detector = args.overtraining_detector
	hparams.overtraining_threshold = args.overtraining_threshold
	hparams.sync_graph = args.sync_graph
	return hparams


	class HParams:
	def __init__(self, **kwargs):
	for k, v in kwargs.items():
	if type(v) == dict:
	v = HParams(**v)
	self[k] = v

	def keys(self):
	return self.__dict__.keys()

	def items(self):
	return self.__dict__.items()

	def values(self):
	return self.__dict__.values()

	def __len__(self):
	return len(self.__dict__)

	def __getitem__(self, key):
	return getattr(self, key)

	def __setitem__(self, key, value):
	return setattr(self, key, value)

	def __contains__(self, key):
	return key in self.__dict__

	def __repr__(self):
	return self.__dict__.__repr__()