voidful's picture
Upload 21 files
63e3df4
analyzer: exp/autoencoder/symAD_vctk_48000_hop300/checkpoint-200000steps.pkl
batch_length: 9600
batch_size: 16
config: config/vocoder/AudioDec_v1_symAD_vctk_48000_hop300_clean.yaml
data:
path: /mnt/home/yichiaowu/datasets/vctk_noisy/48000
subset:
test: clean_testset_wav
train: clean_trainset_84spk_wav
valid: clean_validset_84spk_wav
disable_cudnn: 'False'
discriminator_adv_loss_params:
average_by_discriminators: false
discriminator_grad_norm: -1
discriminator_optimizer_params:
betas:
- 0.5
- 0.9
lr: 0.0002
weight_decay: 0.0
discriminator_optimizer_type: Adam
discriminator_params:
follow_official_norm: true
period_discriminator_params:
bias: true
channels: 32
downsample_scales:
- 3
- 3
- 3
- 3
- 1
in_channels: 1
kernel_sizes:
- 5
- 3
max_downsample_channels: 1024
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
use_spectral_norm: false
use_weight_norm: true
periods:
- 2
- 3
- 5
- 7
- 11
scale_discriminator_params:
bias: true
channels: 128
downsample_scales:
- 4
- 4
- 4
- 4
- 1
in_channels: 1
kernel_sizes:
- 15
- 41
- 5
- 3
max_downsample_channels: 1024
max_groups: 16
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
scale_downsample_pooling: AvgPool1d
scale_downsample_pooling_params:
kernel_size: 4
padding: 2
stride: 2
scales: 3
discriminator_scheduler_params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
discriminator_scheduler_type: MultiStepLR
discriminator_train_start_steps: 0
eval_interval_steps: 1000
exp_root: exp
feat_match_loss_params:
average_by_discriminators: false
average_by_layers: false
include_final_outputs: false
generator_adv_loss_params:
average_by_discriminators: false
generator_grad_norm: -1
generator_optimizer_params:
betas:
- 0.5
- 0.9
lr: 0.0002
weight_decay: 0.0
generator_optimizer_type: Adam
generator_params:
bias: true
channels: 512
groups: 3
in_channels: 64
kernel_size: 7
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
resblock_dilations:
- - 1
- 3
- 5
resblock_kernel_sizes:
- 11
stats: stats/symAD_vctk_48000_hop300_clean.npy
upsample_kernel_sizes:
- 10
- 10
- 8
- 6
upsample_scales:
- 5
- 5
- 4
- 3
use_additional_convs: true
use_weight_norm: true
generator_scheduler_params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
generator_scheduler_type: MultiStepLR
generator_train_start_steps: 1
lambda_adv: 1.0
lambda_feat_match: 2.0
lambda_mel_loss: 45.0
lambda_shape_loss: 45.0
lambda_stft_loss: 45.0
log_interval_steps: 100
mel_loss_params:
fft_sizes:
- 2048
fmax: 24000
fmin: 0
fs: 48000
hop_sizes:
- 300
log_base: null
num_mels: 80
win_lengths:
- 2048
window: hann_window
model_type: HiFiGAN
num_workers: 2
outdir: exp/vocoder/AudioDec_v1_symAD_vctk_48000_hop300_clean
pin_memory: true
resume: exp/vocoder/AudioDec_v1_symAD_vctk_48000_hop300_clean/checkpoint-100000steps.pkl
sampling_rate: 48000
save_interval_steps: 100000
seed: 1337
shape_loss_params:
winlen:
- 300
stft_loss_params:
fft_sizes:
- 1024
- 2048
- 512
hop_sizes:
- 120
- 240
- 50
win_lengths:
- 600
- 1200
- 240
window: hann_window
tag: vocoder/AudioDec_v1_symAD_vctk_48000_hop300_clean
train_max_steps: 500000
train_mode: vocoder
use_feat_match_loss: true
use_mel_loss: true
use_shape_loss: false
use_stft_loss: false