BSC-LT
/

wavenext-mel

Model card Files Files and versions Community

wetdog commited on Jun 7, 2024

Commit

f5c97ea

·

verified ·

1 Parent(s): 5c7d9f6

Upload model

Files changed (2) hide show

config.yaml +107 -0
pytorch_model.bin +3 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,107 @@

+# pytorch_lightning==1.8.6
+seed_everything: 4444
+data:
+  class_path: vocos.dataset.VocosDataModule
+  init_args:
+    train_params:
+      filelist_path: ???
+      sampling_rate: 22050
+      num_samples: 16384
+      batch_size: 16
+      num_workers: 8
+    val_params:
+      filelist_path: ???
+      sampling_rate: 22050
+      num_samples: 48384
+      batch_size: 16
+      num_workers: 8
+model:
+  class_path: vocos.experiment.VocosExp
+  init_args:
+    sample_rate: 22050
+    initial_learning_rate: 1e-3
+    mel_loss_coeff: 45
+    mrd_loss_coeff: 0.1 # original value 0.1
+    num_warmup_steps: 500 # Optimizers warmup steps
+    pretrain_mel_steps: 0  # 0 means GAN objective from the first iteration
+    # automatic evaluation
+    evaluate_utmos: true
+    evaluate_pesq: true
+    evaluate_periodicty: true
+feature_extractor:
+  class_path: vocos.feature_extractors.MelSpectrogramFeatures
+  init_args:
+    sample_rate: 22050
+    n_fft: 1024
+    hop_length: 256
+    n_mels: 80
+    padding: same
+    f_min: 0
+    f_max: 8000
+    norm: "slaney"
+    mel_scale: "slaney"
+    clip_val: 1e-5
+backbone:
+  class_path: vocos.models.VocosBackbone
+  init_args:
+    input_channels: 80
+    dim: 512
+    intermediate_dim: 1536
+    num_layers: 8
+head:
+  class_path: vocos.heads.WaveNextHead
+  init_args:
+    dim: 512
+    n_fft: 1024
+    hop_length: 256
+    padding: same
+melspec_loss:
+  class_path: vocos.loss.MelSpecReconstructionLoss
+  init_args:
+    sample_rate: 22050
+    n_fft: 1024
+    hop_length: 256
+    n_mels: 128
+    f_min: 0
+    f_max: 11000
+    norm: "slaney"
+    mel_scale: "slaney"
+    clip_val: 1e-5
+trainer:
+  logger:
+    class_path: pytorch_lightning.loggers.TensorBoardLogger
+    init_args:
+      save_dir: ???
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+    - class_path: pytorch_lightning.callbacks.ModelSummary
+      init_args:
+        max_depth: 2
+    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val_loss
+        filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
+        save_top_k: 3
+        save_last: true
+    - class_path: vocos.helpers.GradNormCallback
+  # Lightning calculates max_steps across all optimizer steps (rather than number of batches)
+  # This equals to 1M steps per generator and 1M per discriminator
+  max_steps: 2000000
+  # You might want to limit val batches when evaluating all the metrics, as they are time-consuming
+  limit_val_batches: 50
+  accelerator: gpu
+  strategy: ddp
+  devices: [0]
+  log_every_n_steps: 250

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3349cbad46af135e27a5df03668b1faf980fd11e150285f8435c7641330d1803
+size 55097575