|
allow_cache: true |
|
batch_max_steps: 8192 |
|
batch_max_steps_valid: 81920 |
|
batch_size: 16 |
|
config: egs/ljspeech/conf/multi_stft_melgan.yaml |
|
dev_dir: ./egs/ljspeech/dump/valid |
|
discriminator_mixed_precision: false |
|
discriminator_optimizer_params: |
|
beta_1: 0.5 |
|
beta_2: 0.9 |
|
decay_steps: 4000000 |
|
end_learning_rate: 0.0001 |
|
initial_learning_rate: 0.0001 |
|
warmup_steps: 0 |
|
weight_decay: 0.0 |
|
discriminator_params: |
|
downsample_pooling: AveragePooling1D |
|
downsample_pooling_params: |
|
pool_size: 4 |
|
strides: 2 |
|
downsample_scales: |
|
- 4 |
|
- 4 |
|
- 4 |
|
- 4 |
|
filters: 16 |
|
is_weight_norm: false |
|
kernel_sizes: |
|
- 5 |
|
- 3 |
|
max_downsample_filters: 1024 |
|
nonlinear_activation: LeakyReLU |
|
nonlinear_activation_params: |
|
alpha: 0.2 |
|
out_channels: 1 |
|
scales: 3 |
|
discriminator_train_start_steps: 100000 |
|
eval_interval_steps: 5000 |
|
format: npy |
|
generator_mixed_precision: false |
|
generator_optimizer_params: |
|
beta_1: 0.5 |
|
beta_2: 0.9 |
|
decay_steps: 100000 |
|
end_learning_rate: 0.0001 |
|
initial_learning_rate: 0.001 |
|
warmup_steps: 0 |
|
weight_decay: 0.0 |
|
generator_params: |
|
filters: 512 |
|
is_weight_norm: false |
|
kernel_size: 7 |
|
out_channels: 1 |
|
stack_kernel_size: 3 |
|
stacks: 3 |
|
upsample_scales: |
|
- 8 |
|
- 8 |
|
- 2 |
|
- 2 |
|
hop_size: 256 |
|
is_shuffle: true |
|
lambda_adv: 4.0 |
|
lambda_feat_match: 10.0 |
|
log_interval_steps: 200 |
|
num_save_intermediate_results: 1 |
|
outdir: ./egs/ljspeech/exp/melgan.stft.v2/ |
|
remove_short_samples: true |
|
resume: egs/ljspeech/exp/melgan.stft.v2/checkpoints/ckpt-100000 |
|
sampling_rate: 22050 |
|
save_interval_steps: 20000 |
|
stft_loss_params: |
|
fft_lengths: |
|
- 1024 |
|
- 2048 |
|
- 512 |
|
frame_lengths: |
|
- 600 |
|
- 1200 |
|
- 240 |
|
frame_steps: |
|
- 120 |
|
- 240 |
|
- 50 |
|
train_dir: ./egs/ljspeech/dump/train/ |
|
train_max_steps: 4000000 |
|
use_norm: true |
|
verbose: 1 |
|
version: 0.3.4 |
|
|