model: base_learning_rate: 1.0e-06 target: specvqgan.models.vqgan.VQModel params: embed_dim: 256 n_embed: 256 ddconfig: double_z: false z_channels: 256 resolution: 848 in_channels: 1 out_ch: 1 ch: 128 ch_mult: - 1 - 1 - 2 - 2 - 4 num_res_blocks: 2 attn_resolutions: - 53 dropout: 0.0 lossconfig: target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per params: disc_conditional: false disc_in_channels: 1 disc_start: 50001 disc_weight: 0.8 codebook_weight: 1.0 min_adapt_weight: 1.0 max_adapt_weight: 1.0 perceptual_weight: 0.0 data: target: train.SpectrogramDataModuleFromConfig params: batch_size: 12 num_workers: 8 spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre sample_rate: 22050 mel_num: 80 spec_len: 860 spec_crop_len: 848 random_crop: false train: target: specvqgan.data.AllAudio.VASSpecsTrain params: specs_dataset_cfg: split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre validation: target: specvqgan.data.AllAudio.VASSpecsValidation params: specs_dataset_cfg: split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre