Ritori
/

Yue_tacotron2

Inference Endpoints

Model card Files Files and versions Community

Yue_tacotron2 / hparams.py

Ritori's picture

Upload 72 files

a722365 11 months ago

history blame contribute delete

No virus

2.55 kB

	import torch
	from text import symbols


	class create_hparams():
	"""Create model hyperparameters. Parse nondefault from given string."""
	################################
	# CUDA Enable #
	################################
	if torch.cuda.is_available() :
	cuda_enabled = True
	else :
	cuda_enabled = False

	################################
	# Experiment Parameters #
	################################
	epochs = 100
	iters_per_checkpoint = 500
	seed= 1234
	dynamic_loss_scaling = True
	fp16_run = False
	distributed_run = False
	dist_backend = "nccl"
	dist_url = "tcp://localhost:54321"
	cudnn_enabled = True
	cudnn_benchmark = False
	ignore_layers = ['embedding.weight']

	################################
	# Data Parameters #
	################################
	load_mel_from_disk = False
	training_files = 'filelists/transcript_train.txt'
	validation_files = 'filelists/transcript_val.txt'
	text_cleaners = ['japanese_cleaners']

	################################
	# Audio Parameters #
	################################
	max_wav_value = 32768.0
	sampling_rate = 22050
	filter_length = 1024
	hop_length = 256
	win_length = 1024
	n_mel_channels = 80
	mel_fmin = 0.0
	mel_fmax = 8000.0

	################################
	# Model Parameters #
	################################
	n_symbols = len(symbols)
	symbols_embedding_dim = 512

	# Encoder parameters
	encoder_kernel_size = 5
	encoder_n_convolutions = 3
	encoder_embedding_dim = 512

	# Decoder parameters
	n_frames_per_step = 1 # currently only 1 is supported
	decoder_rnn_dim = 1024
	prenet_dim = 256
	max_decoder_steps = 1000
	gate_threshold = 0.5
	p_attention_dropout = 0.1
	p_decoder_dropout = 0.1

	# Attention parameters
	attention_rnn_dim = 1024
	attention_dim = 128
	# Location Layer parameters
	attention_location_n_filters = 32
	attention_location_kernel_size = 31

	# Mel-post processing network parameters
	postnet_embedding_dim = 512
	postnet_kernel_size = 5
	postnet_n_convolutions = 5

	################################
	# Optimization Hyperparameters #
	################################
	use_saved_learning_rate = False
	learning_rate = 1e-3
	weight_decay = 1e-6
	grad_clip_thresh = 1.0
	batch_size = 64
	mask_padding = True # set model's padded outputs to padded values