File size: 1,489 Bytes
60b9091 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
model:
base_learning_rate: 1.0e-06
target: specvqgan.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 256
ddconfig:
double_z: false
z_channels: 256
resolution: 848
in_channels: 1
out_ch: 1
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 53
dropout: 0.0
lossconfig:
target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per
params:
disc_conditional: false
disc_in_channels: 1
disc_start: 50001
disc_weight: 0.8
codebook_weight: 1.0
min_adapt_weight: 1.0
max_adapt_weight: 1.0
perceptual_weight: 0.0
data:
target: train.SpectrogramDataModuleFromConfig
params:
batch_size: 12
num_workers: 8
spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
sample_rate: 22050
mel_num: 80
spec_len: 860
spec_crop_len: 848
random_crop: false
train:
target: specvqgan.data.AllAudio.VASSpecsTrain
params:
specs_dataset_cfg:
split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
validation:
target: specvqgan.data.AllAudio.VASSpecsValidation
params:
specs_dataset_cfg:
split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
|