Dongchao
/

Diffsound

Model card Files Files and versions Community

Dongchao commited on Apr 4, 2023

Commit

60b9091

•

1 Parent(s): fa242d0

Upload 3 files

Browse files

Files changed (3) hide show

2022-12-02T00-49-12-lightning.yaml +14 -0
2022-12-02T00-49-12-project.yaml +55 -0
VQ-VAE_trained_on_audioset_256.ckpt +3 -0

2022-12-02T00-49-12-lightning.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+lightning:
+  callbacks:
+    image_logger:
+      target: train.ImageLogger
+      params:
+        for_specs: true
+        vocoder_cfg:
+          target: train.VocoderMelGan
+          params:
+            ckpt_vocoder: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/vocoder/logs/vggsound/
+  trainer:
+    sync_batchnorm: true
+    distributed_backend: ddp
+    gpus: 0,1,2,3,4,5,6,7,

2022-12-02T00-49-12-project.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+model:
+  base_learning_rate: 1.0e-06
+  target: specvqgan.models.vqgan.VQModel
+  params:
+    embed_dim: 256
+    n_embed: 256
+    ddconfig:
+      double_z: false
+      z_channels: 256
+      resolution: 848
+      in_channels: 1
+      out_ch: 1
+      ch: 128
+      ch_mult:
+      - 1
+      - 1
+      - 2
+      - 2
+      - 4
+      num_res_blocks: 2
+      attn_resolutions:
+      - 53
+      dropout: 0.0
+    lossconfig:
+      target: specvqgan.modules.losses.vqperceptual.VQLPAPSWithDiscriminator_without_per
+      params:
+        disc_conditional: false
+        disc_in_channels: 1
+        disc_start: 50001
+        disc_weight: 0.8
+        codebook_weight: 1.0
+        min_adapt_weight: 1.0
+        max_adapt_weight: 1.0
+        perceptual_weight: 0.0
+data:
+  target: train.SpectrogramDataModuleFromConfig
+  params:
+    batch_size: 12
+    num_workers: 8
+    spec_dir_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
+    sample_rate: 22050
+    mel_num: 80
+    spec_len: 860
+    spec_crop_len: 848
+    random_crop: false
+    train:
+      target: specvqgan.data.AllAudio.VASSpecsTrain
+      params:
+        specs_dataset_cfg:
+          split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre
+    validation:
+      target: specvqgan.data.AllAudio.VASSpecsValidation
+      params:
+        specs_dataset_cfg:
+          split_path: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/VQ-VAE/audioset_pre

VQ-VAE_trained_on_audioset_256.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1224fe476ad9a7bed1c50c5d1738f14414c0d8b694561e6a8cf402cc710b53b
+size 896826124