File size: 1,489 Bytes
60b9091
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
model:
  base_learning_rate: 1.0e-06
  target: specvqgan.models.vqgan.VQModel
  params:
    embed_dim: 256
    n_embed: 256
    ddconfig:
      double_z: false
      z_channels: 256
      resolution: 848
      in_channels: 1
      out_ch: 1
      ch: 128
      ch_mult:
      - 1
      - 1
      - 2
      - 2
      - 4
      num_res_blocks: 2
      attn_resolutions:
      - 53
      dropout: 0.0
    lossconfig:
      target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per
      params:
        disc_conditional: false
        disc_in_channels: 1
        disc_start: 50001
        disc_weight: 0.8
        codebook_weight: 1.0
        min_adapt_weight: 1.0
        max_adapt_weight: 1.0
        perceptual_weight: 0.0
data:
  target: train.SpectrogramDataModuleFromConfig
  params:
    batch_size: 12
    num_workers: 8
    spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
    sample_rate: 22050
    mel_num: 80
    spec_len: 860
    spec_crop_len: 848
    random_crop: false
    train:
      target: specvqgan.data.AllAudio.VASSpecsTrain
      params:
        specs_dataset_cfg:
          split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
    validation:
      target: specvqgan.data.AllAudio.VASSpecsValidation
      params:
        specs_dataset_cfg:
          split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre