File size: 2,546 Bytes
a722365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch
from text import symbols


class create_hparams():
    """Create model hyperparameters. Parse nondefault from given string."""
    ################################
    #       CUDA Enable            #
    ################################
    if torch.cuda.is_available() :
        cuda_enabled = True    
    else :
        cuda_enabled = False

    ################################
    # Experiment Parameters        #
    ################################
    epochs = 100
    iters_per_checkpoint = 500
    seed= 1234
    dynamic_loss_scaling = True
    fp16_run = False
    distributed_run = False
    dist_backend = "nccl"
    dist_url = "tcp://localhost:54321"
    cudnn_enabled = True
    cudnn_benchmark = False
    ignore_layers = ['embedding.weight']

    ################################
    # Data Parameters             #
    ################################
    load_mel_from_disk = False
    training_files = 'filelists/transcript_train.txt'
    validation_files = 'filelists/transcript_val.txt'
    text_cleaners = ['japanese_cleaners']

    ################################
    # Audio Parameters             #
    ################################
    max_wav_value = 32768.0
    sampling_rate = 22050
    filter_length = 1024
    hop_length = 256
    win_length = 1024
    n_mel_channels = 80
    mel_fmin = 0.0
    mel_fmax = 8000.0

    ################################
    # Model Parameters             #
    ################################
    n_symbols = len(symbols)
    symbols_embedding_dim = 512

    # Encoder parameters
    encoder_kernel_size = 5
    encoder_n_convolutions = 3
    encoder_embedding_dim = 512

    # Decoder parameters
    n_frames_per_step = 1  # currently only 1 is supported
    decoder_rnn_dim = 1024
    prenet_dim = 256
    max_decoder_steps = 1000
    gate_threshold = 0.5
    p_attention_dropout = 0.1
    p_decoder_dropout = 0.1

    # Attention parameters
    attention_rnn_dim = 1024
    attention_dim = 128
    # Location Layer parameters
    attention_location_n_filters = 32
    attention_location_kernel_size = 31

    # Mel-post processing network parameters
    postnet_embedding_dim = 512
    postnet_kernel_size = 5
    postnet_n_convolutions = 5

    ################################
    # Optimization Hyperparameters #
    ################################
    use_saved_learning_rate = False
    learning_rate = 1e-3
    weight_decay = 1e-6
    grad_clip_thresh = 1.0
    batch_size = 64
    mask_padding = True  # set model's padded outputs to padded values