|
model: |
|
base_learning_rate: 1.0e-06 |
|
target: specvqgan.models.vqgan.VQModel |
|
params: |
|
embed_dim: 256 |
|
n_embed: 256 |
|
ddconfig: |
|
double_z: false |
|
z_channels: 256 |
|
resolution: 848 |
|
in_channels: 1 |
|
out_ch: 1 |
|
ch: 128 |
|
ch_mult: |
|
- 1 |
|
- 1 |
|
- 2 |
|
- 2 |
|
- 4 |
|
num_res_blocks: 2 |
|
attn_resolutions: |
|
- 53 |
|
dropout: 0.0 |
|
lossconfig: |
|
target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per |
|
params: |
|
disc_conditional: false |
|
disc_in_channels: 1 |
|
disc_start: 50001 |
|
disc_weight: 0.8 |
|
codebook_weight: 1.0 |
|
min_adapt_weight: 1.0 |
|
max_adapt_weight: 1.0 |
|
perceptual_weight: 0.0 |
|
data: |
|
target: train.SpectrogramDataModuleFromConfig |
|
params: |
|
batch_size: 12 |
|
num_workers: 8 |
|
spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre |
|
sample_rate: 22050 |
|
mel_num: 80 |
|
spec_len: 860 |
|
spec_crop_len: 848 |
|
random_crop: false |
|
train: |
|
target: specvqgan.data.AllAudio.VASSpecsTrain |
|
params: |
|
specs_dataset_cfg: |
|
split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre |
|
validation: |
|
target: specvqgan.data.AllAudio.VASSpecsValidation |
|
params: |
|
specs_dataset_cfg: |
|
split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre |
|
|