Upload modelcfg.yaml
Browse files- modelcfg.yaml +63 -0
modelcfg.yaml
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
eps: 1e-10
|
3 |
+
fft_size: 2048
|
4 |
+
filter_length: 1200
|
5 |
+
hop_size: 300
|
6 |
+
log_base: 10.0
|
7 |
+
mel_fmax: 7600
|
8 |
+
mel_fmin: 80
|
9 |
+
num_mels: 80
|
10 |
+
sampling_rate: 24000
|
11 |
+
win_length: 1200
|
12 |
+
window: hann
|
13 |
+
lang:
|
14 |
+
- de
|
15 |
+
- en
|
16 |
+
model:
|
17 |
+
decoder:
|
18 |
+
conv_filter_size: 1024
|
19 |
+
conv_kernel_size:
|
20 |
+
- 9
|
21 |
+
- 1
|
22 |
+
dropout: 0.2
|
23 |
+
n_head: 2
|
24 |
+
n_layers: 6
|
25 |
+
scln: true
|
26 |
+
emb_dim: 384
|
27 |
+
emb_reduction: 1
|
28 |
+
encoder:
|
29 |
+
depth: 2
|
30 |
+
expansion: 2
|
31 |
+
fs2_dropout: 0.2
|
32 |
+
fs2_head: 2
|
33 |
+
fs2_layer: 4
|
34 |
+
kernel_size: 5
|
35 |
+
kind: fastspeech2
|
36 |
+
n_heads: 2
|
37 |
+
ve_energy_quantization: linear
|
38 |
+
ve_n_bins: 256
|
39 |
+
ve_pitch_quantization: linear
|
40 |
+
vp_dropout: 0.5
|
41 |
+
vp_filter_size: 256
|
42 |
+
vp_kernel_size: 3
|
43 |
+
gst:
|
44 |
+
n_heads: 8
|
45 |
+
n_style_tokens: 2000
|
46 |
+
ref_enc_filters:
|
47 |
+
- 32
|
48 |
+
- 32
|
49 |
+
- 64
|
50 |
+
- 64
|
51 |
+
- 128
|
52 |
+
- 128
|
53 |
+
max_seq_len: 1500
|
54 |
+
postnet:
|
55 |
+
postnet_embedding_dim: 0
|
56 |
+
postnet_kernel_size: 5
|
57 |
+
postnet_n_convolutions: 5
|
58 |
+
punct_emb_dim: 16
|
59 |
+
stats:
|
60 |
+
energy_max: 305.466064453125
|
61 |
+
energy_min: -2.440225667951865
|
62 |
+
pitch_max: 656.2979356469282
|
63 |
+
pitch_min: -45.333167047555264
|