File size: 953 Bytes
feec0bf 73736ed feec0bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
transformer:
encoder_layer: 4
encoder_head: 2
encoder_hidden: 256
decoder_layer: 6
decoder_head: 2
decoder_hidden: 256
conv_filter_size: 1024
conv_kernel_size: [9, 1]
encoder_dropout: 0.2
decoder_dropout: 0.2
variance_predictor:
filter_size: 256
kernel_size: 3
dropout: 0.5
variance_embedding:
pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
n_bins: 256
# gst:
# use_gst: False
# conv_filters: [32, 32, 64, 64, 128, 128]
# gru_hidden: 128
# token_size: 128
# n_style_token: 10
# attn_head: 4
multi_speaker: True
multi_emotion: True
max_seq_len: 1000
vocoder:
model: "HiFi-GAN" # support 'HiFi-GAN', 'MelGAN'
speaker: "universal" # support 'LJSpeech', 'universal'
|