youngsheen
/

SimVQ

Model card Files Files and versions Community

youngsheen commited on Nov 18, 2024

Commit

a45d801

1 Parent(s): f9499bf

add vq_audio 4k

Browse files

Files changed (2) hide show

vq_audio_log/simvq_4k/1second/config.yaml +145 -0
vq_audio_log/simvq_4k/epoch=49-step=138600.ckpt +3 -0

vq_audio_log/simvq_4k/1second/config.yaml ADDED Viewed

	@@ -0,0 +1,145 @@

+# lightning.pytorch==2.2.5
+seed_everything: 0
+trainer:
+  accelerator: gpu
+  strategy: ddp_find_unused_parameters_true
+  devices: 2
+  num_nodes: 1
+  precision: 16-mixed
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: vq_audio_log/simvq_4k
+      name: null
+      version: 1second
+      log_graph: false
+      default_hp_metric: true
+      prefix: ''
+      sub_dir: null
+      comment: ''
+      purge_step: null
+      max_queue: 10
+      flush_secs: 120
+      filename_suffix: ''
+  callbacks:
+  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+    init_args:
+      dirpath: vq_audio_log/simvq_4k
+      filename: null
+      monitor: null
+      verbose: false
+      save_last: null
+      save_top_k: -1
+      save_weights_only: false
+      mode: min
+      auto_insert_metric_name: true
+      every_n_train_steps: null
+      train_time_interval: null
+      every_n_epochs: null
+      save_on_train_epoch_end: null
+      enable_version_counter: true
+  - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+    init_args:
+      logging_interval: step
+      log_momentum: false
+      log_weight_decay: false
+  fast_dev_run: false
+  max_epochs: 50
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  max_time: null
+  limit_train_batches: null
+  limit_val_batches: null
+  limit_test_batches: null
+  limit_predict_batches: null
+  overfit_batches: 0.0
+  val_check_interval: null
+  check_val_every_n_epoch: 1
+  num_sanity_val_steps: 0
+  log_every_n_steps: 100
+  enable_checkpointing: null
+  enable_progress_bar: null
+  enable_model_summary: null
+  accumulate_grad_batches: 1
+  gradient_clip_val: null
+  gradient_clip_algorithm: null
+  deterministic: null
+  benchmark: null
+  inference_mode: true
+  use_distributed_sampler: true
+  profiler: null
+  detect_anomaly: false
+  barebones: false
+  plugins: null
+  sync_batchnorm: false
+  reload_dataloaders_every_n_epochs: 0
+  default_root_dir: null
+ckpt_path: null
+model:
+  class_path: taming.models.vq_audio.VQModel
+  init_args:
+    ddconfig:
+      causal: true
+      dimension: 512
+    lossconfig:
+      target: taming.modules.losses.stft.VQSTFTWithDiscriminator
+      params:
+        disc_conditional: false
+        disc_in_channels: 1
+        disc_start: 0
+        codebook_enlarge_ratio: 0
+        codebook_enlarge_steps: 2000
+        sample_rate: 24000
+        commit_weight: 1000.0
+        gen_loss_weight: 1.0
+        mel_loss_coeff: 45.0
+        mrd_loss_coeff: 1.0
+    quantconfig:
+      target: taming.modules.vqvae.quantize.SimVQ1D
+      params:
+        n_e: 4096
+        e_dim: 512
+        beta: 0.25
+        legacy: false
+    sample_rate: 24000
+    target_bandwidths: null
+    audio_normalize: false
+    segment: None
+    ckpt_path: null
+    ignore_keys: []
+    colorize_nlabels: null
+    monitor: null
+    learning_rate: 0.0001
+    warmup_epochs: 1.0
+    scheduler_type: None
+    min_learning_rate: 0
+    use_ema: true
+    stage: null
+data:
+  class_path: main.PadDataModuleFromConfig
+  init_args:
+    batch_size: 64
+    train:
+      target: taming.data.libritts.LibriTTSTrain
+      params:
+        config:
+          sample_rate: 24000
+          channels: 1
+          clip_seconds: 1
+    validation:
+      target: taming.data.libritts.LibriTTSDev
+      params:
+        config:
+          sample_rate: 24000
+          channels: 1
+          clip_seconds: 1
+    test:
+      target: taming.data.libritts.LibriTTSTest
+      params:
+        config:
+          sample_rate: 24000
+          channels: 1
+          clip_seconds: -1
+    wrap: false
+    num_workers: 8

vq_audio_log/simvq_4k/epoch=49-step=138600.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cb338f1c12b0db56bd384ab4ac45d11c9070da7963d37c6dedcb872a1fe124f
+size 1329012174