Spaces:

liuhaozhe6788
/

CelebChat

Runtime error

App Files Files Community

CelebChat / rtvc /synthesizer /synthesize.py

lhzstar

initial commits

6bc94ac 11 months ago

raw

history blame

No virus

6.17 kB

	import platform
	from functools import partial
	from pathlib import Path

	import numpy as np
	import torch
	from torch.utils.data import DataLoader
	from tqdm import tqdm

	from synthesizer.hparams import hparams_debug_string
	from synthesizer.models.tacotron import Tacotron
	from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
	from synthesizer.utils import data_parallel_workaround
	from synthesizer.utils.symbols import symbols


	def run_synthesis(in_dir: Path, out_dir: Path, syn_model_fpath: Path, hparams):
	# This generates ground truth-aligned mels for vocoder training
	train_in_dir = in_dir.joinpath("train")
	train_out_dir = out_dir.joinpath("train")
	dev_in_dir = in_dir.joinpath("dev")
	dev_out_dir = out_dir.joinpath("dev")
	train_synth_dir = train_out_dir / "mels_gta"
	train_synth_dir.mkdir(exist_ok=True, parents=True)
	dev_synth_dir = dev_out_dir / "mels_gta"
	dev_synth_dir.mkdir(exist_ok=True, parents=True)
	print(hparams_debug_string())

	# Check for GPU
	if torch.cuda.is_available():
	device = torch.device("cuda")
	if hparams.synthesis_batch_size % torch.cuda.device_count() != 0:
	raise ValueError("`hparams.synthesis_batch_size` must be evenly divisible by n_gpus!")
	else:
	device = torch.device("cpu")
	print("Synthesizer using device:", device)

	# Instantiate Tacotron model
	model = Tacotron(embed_dims=hparams.tts_embed_dims,
	num_chars=len(symbols),
	encoder_dims=hparams.tts_encoder_dims,
	decoder_dims=hparams.tts_decoder_dims,
	n_mels=hparams.num_mels,
	fft_bins=hparams.num_mels,
	postnet_dims=hparams.tts_postnet_dims,
	encoder_K=hparams.tts_encoder_K,
	lstm_dims=hparams.tts_lstm_dims,
	postnet_K=hparams.tts_postnet_K,
	num_highways=hparams.tts_num_highways,
	dropout=0., # Use zero dropout for gta mels
	stop_threshold=hparams.tts_stop_threshold,
	speaker_embedding_size=hparams.speaker_embedding_size).to(device)

	# Load the weights
	print("\nLoading weights at %s" % syn_model_fpath)
	model.load(syn_model_fpath)
	print("Tacotron weights loaded from step %d" % model.step)

	# Synthesize using same reduction factor as the model is currently trained
	r = np.int32(model.r)

	# Set model to eval mode (disable gradient and zoneout)
	model.eval()

	# Initialize the dataset
	train_metadata_fpath = train_in_dir.joinpath("train.txt")
	train_mel_dir = train_in_dir.joinpath("mels")
	train_embed_dir = train_in_dir.joinpath("embeds")
	dev_metadata_fpath = dev_in_dir.joinpath("dev.txt")
	dev_mel_dir = dev_in_dir.joinpath("mels")
	dev_embed_dir = dev_in_dir.joinpath("embeds")

	train_dataset = SynthesizerDataset(train_metadata_fpath, train_mel_dir, train_embed_dir, hparams)
	dev_dataset = SynthesizerDataset(dev_metadata_fpath, dev_mel_dir, dev_embed_dir, hparams)
	collate_fn = partial(collate_synthesizer, r=r, hparams=hparams)
	train_data_loader = DataLoader(train_dataset, hparams.synthesis_batch_size, collate_fn=collate_fn, num_workers=2)
	dev_data_loader = DataLoader(dev_dataset, hparams.synthesis_batch_size, collate_fn=collate_fn, num_workers=2)

	# Generate train GTA mels
	train_meta_out_fpath = train_out_dir / "synthesized.txt"
	with train_meta_out_fpath.open("w") as file:
	for i, (texts, mels, embeds, idx) in tqdm(enumerate(train_data_loader), total=len(train_data_loader)):
	texts, mels, embeds = texts.to(device), mels.to(device), embeds.to(device)

	# Parallelize model onto GPUS using workaround due to python bug
	# if device.type == "cuda" and torch.cuda.device_count() > 1:
	# _, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
	# else:
	_, mels_out, _, _ = model(texts, mels, embeds)

	for j, k in enumerate(idx):
	# Note: outputs mel-spectrogram files and target ones have same names, just different folders
	mel_filename = Path(train_synth_dir).joinpath(train_dataset.metadata[k][1])
	mel_out = mels_out[j].detach().cpu().numpy().T

	# Use the length of the ground truth mel to remove padding from the generated mels
	mel_out = mel_out[:int(train_dataset.metadata[k][4])]

	# Write the spectrogram to disk
	np.save(mel_filename, mel_out, allow_pickle=False)

	# Write metadata into the synthesized file
	file.write("\|".join(train_dataset.metadata[k]))

	# Generate dev GTA mels
	dev_meta_out_fpath = dev_out_dir / "synthesized.txt"
	with dev_meta_out_fpath.open("w") as file:
	for i, (texts, mels, embeds, idx) in tqdm(enumerate(dev_data_loader), total=len(dev_data_loader)):
	texts, mels, embeds = texts.to(device), mels.to(device), embeds.to(device)

	# Parallelize model onto GPUS using workaround due to python bug
	# if device.type == "cuda" and torch.cuda.device_count() > 1:
	# _, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
	# else:
	_, mels_out, _, _ = model(texts, mels, embeds)

	for j, k in enumerate(idx):
	# Note: outputs mel-spectrogram files and target ones have same names, just different folders
	mel_filename = Path(dev_synth_dir).joinpath(dev_dataset.metadata[k][1])
	mel_out = mels_out[j].detach().cpu().numpy().T

	# Use the length of the ground truth mel to remove padding from the generated mels
	mel_out = mel_out[:int(dev_dataset.metadata[k][4])]

	# Write the spectrogram to disk
	np.save(mel_filename, mel_out, allow_pickle=False)

	# Write metadata into the synthesized file
	file.write("\|".join(dev_dataset.metadata[k]))