tts_en_de_zerovox_alpha1 / modelcfg.yaml
goooofy's picture
Upload modelcfg.yaml
ebe350e verified
audio:
eps: 1e-10
fft_size: 2048
filter_length: 1200
hop_size: 300
log_base: 10.0
mel_fmax: 7600
mel_fmin: 80
num_mels: 80
sampling_rate: 24000
win_length: 1200
window: hann
lang:
- de
- en
model:
decoder:
conv_filter_size: 1024
conv_kernel_size:
- 9
- 1
dropout: 0.2
n_head: 2
n_layers: 6
scln: true
emb_dim: 512
emb_reduction: 1
encoder:
depth: 2
expansion: 2
fs2_dropout: 0.2
fs2_head: 2
fs2_layer: 4
kernel_size: 5
kind: fastspeech2
n_heads: 2
ve_energy_quantization: linear
ve_n_bins: 256
ve_pitch_quantization: linear
vp_dropout: 0.5
vp_filter_size: 256
vp_kernel_size: 3
gst:
n_heads: 8
n_style_tokens: 2000
ref_enc_filters:
- 32
- 32
- 64
- 64
- 128
- 128
max_seq_len: 1500
postnet:
postnet_embedding_dim: 0
postnet_kernel_size: 5
postnet_n_convolutions: 5
punct_emb_dim: 16
resnet:
encoder_type: ASP
layers:
- 3
- 4
- 6
- 3
num_filters:
- 32
- 64
- 128
- 256
spkemb:
kind: ResNetSE34V2
stats:
energy_max: 305.466064453125
energy_min: -2.440225667951865
pitch_max: 656.2979356469282
pitch_min: -45.333167047555264