Spaces:

Aki004
/

herta-so-vits

Runtime error

App Files Files Community

herta-so-vits / onnx_export_speaker_mix.py

Aki004

first commit

5a030e1 over 1 year ago

raw

history blame

3.88 kB

	import torch
	from torchaudio.models.wav2vec2.utils import import_fairseq_model
	from fairseq import checkpoint_utils
	from onnxexport.model_onnx_speaker_mix import SynthesizerTrn
	import utils

	def get_hubert_model():
	vec_path = "hubert/checkpoint_best_legacy_500.pt"
	print("load model(s) from {}".format(vec_path))
	models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
	[vec_path],
	suffix="",
	)
	model = models[0]
	model.eval()
	return model


	def main(HubertExport, NetExport):
	path = "SoVits4.0"

	'''if HubertExport:
	device = torch.device("cpu")
	vec_path = "hubert/checkpoint_best_legacy_500.pt"
	models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
	[vec_path],
	suffix="",
	)
	original = models[0]
	original.eval()
	model = original
	test_input = torch.rand(1, 1, 16000)
	model(test_input)
	torch.onnx.export(model,
	test_input,
	"hubert4.0.onnx",
	export_params=True,
	opset_version=16,
	do_constant_folding=True,
	input_names=['source'],
	output_names=['embed'],
	dynamic_axes={
	'source':
	{
	2: "sample_length"
	},
	}
	)'''
	if NetExport:
	device = torch.device("cpu")
	hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
	SVCVITS = SynthesizerTrn(
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	**hps.model)
	_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
	_ = SVCVITS.eval().to(device)
	for i in SVCVITS.parameters():
	i.requires_grad = False
	test_hidden_unit = torch.rand(1, 10, SVCVITS.gin_channels)
	test_pitch = torch.rand(1, 10)
	test_mel2ph = torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0)
	test_uv = torch.ones(1, 10, dtype=torch.float32)
	test_noise = torch.randn(1, 192, 10)

	export_mix = False

	test_sid = torch.LongTensor([0])
	spk_mix = []
	if export_mix:
	n_spk = len(hps.spk)
	for i in range(n_spk):
	spk_mix.append(1.0/float(n_spk))
	test_sid = torch.tensor(spk_mix)
	SVCVITS.export_chara_mix(n_spk)

	input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
	output_names = ["audio", ]
	SVCVITS.eval()

	torch.onnx.export(SVCVITS,
	(
	test_hidden_unit.to(device),
	test_pitch.to(device),
	test_mel2ph.to(device),
	test_uv.to(device),
	test_noise.to(device),
	test_sid.to(device)
	),
	f"checkpoints/{path}/model.onnx",
	dynamic_axes={
	"c": [0, 1],
	"f0": [1],
	"mel2ph": [1],
	"uv": [1],
	"noise": [2],
	},
	do_constant_folding=False,
	opset_version=16,
	verbose=False,
	input_names=input_names,
	output_names=output_names)


	if __name__ == '__main__':
	main(False, True)