Dongchao commited on
Commit
60b9091
1 Parent(s): fa242d0

Upload 3 files

Browse files
2022-12-02T00-49-12-lightning.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ for_specs: true
7
+ vocoder_cfg:
8
+ target: train.VocoderMelGan
9
+ params:
10
+ ckpt_vocoder: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/vocoder/logs/vggsound/
11
+ trainer:
12
+ sync_batchnorm: true
13
+ distributed_backend: ddp
14
+ gpus: 0,1,2,3,4,5,6,7,
2022-12-02T00-49-12-project.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-06
3
+ target: specvqgan.models.vqgan.VQModel
4
+ params:
5
+ embed_dim: 256
6
+ n_embed: 256
7
+ ddconfig:
8
+ double_z: false
9
+ z_channels: 256
10
+ resolution: 848
11
+ in_channels: 1
12
+ out_ch: 1
13
+ ch: 128
14
+ ch_mult:
15
+ - 1
16
+ - 1
17
+ - 2
18
+ - 2
19
+ - 4
20
+ num_res_blocks: 2
21
+ attn_resolutions:
22
+ - 53
23
+ dropout: 0.0
24
+ lossconfig:
25
+ target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per
26
+ params:
27
+ disc_conditional: false
28
+ disc_in_channels: 1
29
+ disc_start: 50001
30
+ disc_weight: 0.8
31
+ codebook_weight: 1.0
32
+ min_adapt_weight: 1.0
33
+ max_adapt_weight: 1.0
34
+ perceptual_weight: 0.0
35
+ data:
36
+ target: train.SpectrogramDataModuleFromConfig
37
+ params:
38
+ batch_size: 12
39
+ num_workers: 8
40
+ spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
41
+ sample_rate: 22050
42
+ mel_num: 80
43
+ spec_len: 860
44
+ spec_crop_len: 848
45
+ random_crop: false
46
+ train:
47
+ target: specvqgan.data.AllAudio.VASSpecsTrain
48
+ params:
49
+ specs_dataset_cfg:
50
+ split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
51
+ validation:
52
+ target: specvqgan.data.AllAudio.VASSpecsValidation
53
+ params:
54
+ specs_dataset_cfg:
55
+ split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
VQ-VAE_trained_on_audioset_256.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1224fe476ad9a7bed1c50c5d1738f14414c0d8b694561e6a8cf402cc710b53b
3
+ size 896826124