feature_extractor: class_path: vocos.feature_extractors.MelSpectrogramFeatures init_args: sample_rate: 44100 n_fft: 2048 hop_length: 512 n_mels: 80 padding: same f_min: 0 f_max: 8000 norm: "slaney" mel_scale: "slaney" backbone: class_path: vocos.models.VocosBackbone init_args: input_channels: 80 dim: 512 intermediate_dim: 1536 num_layers: 8 head: class_path: vocos.heads.ISTFTHead init_args: dim: 512 n_fft: 2048 hop_length: 512 padding: same