Ionut-Bostan's picture
Upload 82 files
feec0bf
raw
history blame
975 Bytes
transformer:
encoder_layer: 4
encoder_head: 2
encoder_hidden: 256
decoder_layer: 6
decoder_head: 2
decoder_hidden: 256
conv_filter_size: 1024
conv_kernel_size: [9, 1]
encoder_dropout: 0.2
decoder_dropout: 0.2
variance_predictor:
filter_size: 256
kernel_size: 3
dropout: 0.5
variance_embedding:
pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
n_bins: 256
# gst:
# use_gst: False
# conv_filters: [32, 32, 64, 64, 128, 128]
# gru_hidden: 128
# token_size: 128
# n_style_token: 10
# attn_head: 4
multi_speaker: True
multi_emotion: False
bert_emotion: False
max_seq_len: 1000
vocoder:
model: "HiFi-GAN" # support 'HiFi-GAN', 'MelGAN'
speaker: "universal" # support 'LJSpeech', 'universal'