backbone: class_path: vocos.models.VocosBackbone init_args: adanorm_num_embeddings: null dim: 1024 input_channels: 128 intermediate_dim: 2048 layer_scale_init_value: null num_layers: 8 decay_mel_coeff: false enable_discriminator: true evaluate_periodicty: true evaluate_pesq: true evaluate_utmos: true feature_extractor: class_path: vocos.feature_extractors.MelSpectrogramFeatures init_args: hop_length: 256 n_fft: 2048 n_mels: 128 padding: center sample_rate: 48000 generator_period: 3 grad_acc: 1 head: class_path: vocos.heads.ISTFTHead init_args: dim: 1024 hop_length: 256 n_fft: 2048 padding: center initial_learning_rate: 0.0003 mel_loss_coeff: 15.0 mrd_loss_coeff: 0.1 num_warmup_steps: 500 pretrain_decoupled_steps: 0 pretrain_disc_steps: 500 pretrain_mel_steps: 0 pretrained_ckpt: null sample_rate: 48000