TTS_Yui / hparams (1).py
Ritori's picture
Upload folder using huggingface_hub
2d7dc7b
raw
history blame
2.55 kB
import torch
from text import symbols
class create_hparams():
"""Create model hyperparameters. Parse nondefault from given string."""
################################
# CUDA Enable #
################################
if torch.cuda.is_available() :
cuda_enabled = True
else :
cuda_enabled = False
################################
# Experiment Parameters #
################################
epochs = 100
iters_per_checkpoint = 500
seed= 1234
dynamic_loss_scaling = True
fp16_run = False
distributed_run = False
dist_backend = "nccl"
dist_url = "tcp://localhost:54321"
cudnn_enabled = True
cudnn_benchmark = False
ignore_layers = ['embedding.weight']
################################
# Data Parameters #
################################
load_mel_from_disk = False
training_files = 'filelists/transcript_train.txt'
validation_files = 'filelists/transcript_val.txt'
text_cleaners = ['japanese_cleaners']
################################
# Audio Parameters #
################################
max_wav_value = 32768.0
sampling_rate = 22050
filter_length = 1024
hop_length = 256
win_length = 1024
n_mel_channels = 80
mel_fmin = 0.0
mel_fmax = 8000.0
################################
# Model Parameters #
################################
n_symbols = len(symbols)
symbols_embedding_dim = 512
# Encoder parameters
encoder_kernel_size = 5
encoder_n_convolutions = 3
encoder_embedding_dim = 512
# Decoder parameters
n_frames_per_step = 1 # currently only 1 is supported
decoder_rnn_dim = 1024
prenet_dim = 256
max_decoder_steps = 1000
gate_threshold = 0.5
p_attention_dropout = 0.1
p_decoder_dropout = 0.1
# Attention parameters
attention_rnn_dim = 1024
attention_dim = 128
# Location Layer parameters
attention_location_n_filters = 32
attention_location_kernel_size = 31
# Mel-post processing network parameters
postnet_embedding_dim = 512
postnet_kernel_size = 5
postnet_n_convolutions = 5
################################
# Optimization Hyperparameters #
################################
use_saved_learning_rate = False
learning_rate = 1e-3
weight_decay = 1e-6
grad_clip_thresh = 1.0
batch_size = 64
mask_padding = True # set model's padded outputs to padded values