import torch | |
from text import symbols | |
class create_hparams(): | |
"""Create model hyperparameters. Parse nondefault from given string.""" | |
################################ | |
# CUDA Enable # | |
################################ | |
if torch.cuda.is_available() : | |
cuda_enabled = True | |
else : | |
cuda_enabled = False | |
################################ | |
# Experiment Parameters # | |
################################ | |
epochs = 100 | |
iters_per_checkpoint = 500 | |
seed= 1234 | |
dynamic_loss_scaling = True | |
fp16_run = False | |
distributed_run = False | |
dist_backend = "nccl" | |
dist_url = "tcp://localhost:54321" | |
cudnn_enabled = True | |
cudnn_benchmark = False | |
ignore_layers = ['embedding.weight'] | |
################################ | |
# Data Parameters # | |
################################ | |
load_mel_from_disk = False | |
training_files = 'filelists/transcript_train.txt' | |
validation_files = 'filelists/transcript_val.txt' | |
text_cleaners = ['japanese_cleaners'] | |
################################ | |
# Audio Parameters # | |
################################ | |
max_wav_value = 32768.0 | |
sampling_rate = 22050 | |
filter_length = 1024 | |
hop_length = 256 | |
win_length = 1024 | |
n_mel_channels = 80 | |
mel_fmin = 0.0 | |
mel_fmax = 8000.0 | |
################################ | |
# Model Parameters # | |
################################ | |
n_symbols = len(symbols) | |
symbols_embedding_dim = 512 | |
# Encoder parameters | |
encoder_kernel_size = 5 | |
encoder_n_convolutions = 3 | |
encoder_embedding_dim = 512 | |
# Decoder parameters | |
n_frames_per_step = 1 # currently only 1 is supported | |
decoder_rnn_dim = 1024 | |
prenet_dim = 256 | |
max_decoder_steps = 1000 | |
gate_threshold = 0.5 | |
p_attention_dropout = 0.1 | |
p_decoder_dropout = 0.1 | |
# Attention parameters | |
attention_rnn_dim = 1024 | |
attention_dim = 128 | |
# Location Layer parameters | |
attention_location_n_filters = 32 | |
attention_location_kernel_size = 31 | |
# Mel-post processing network parameters | |
postnet_embedding_dim = 512 | |
postnet_kernel_size = 5 | |
postnet_n_convolutions = 5 | |
################################ | |
# Optimization Hyperparameters # | |
################################ | |
use_saved_learning_rate = False | |
learning_rate = 1e-3 | |
weight_decay = 1e-6 | |
grad_clip_thresh = 1.0 | |
batch_size = 64 | |
mask_padding = True # set model's padded outputs to padded values | |