Diffsound / 2022-12-02T00-49-12-project.yaml
Dongchao's picture
Upload 3 files
60b9091
raw
history blame
1.49 kB
model:
base_learning_rate: 1.0e-06
target: specvqgan.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 256
ddconfig:
double_z: false
z_channels: 256
resolution: 848
in_channels: 1
out_ch: 1
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 53
dropout: 0.0
lossconfig:
target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per
params:
disc_conditional: false
disc_in_channels: 1
disc_start: 50001
disc_weight: 0.8
codebook_weight: 1.0
min_adapt_weight: 1.0
max_adapt_weight: 1.0
perceptual_weight: 0.0
data:
target: train.SpectrogramDataModuleFromConfig
params:
batch_size: 12
num_workers: 8
spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
sample_rate: 22050
mel_num: 80
spec_len: 860
spec_crop_len: 848
random_crop: false
train:
target: specvqgan.data.AllAudio.VASSpecsTrain
params:
specs_dataset_cfg:
split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
validation:
target: specvqgan.data.AllAudio.VASSpecsValidation
params:
specs_dataset_cfg:
split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre