youngsheen
/

SimVQ

Model card Files Files and versions Community

youngsheen commited on 12 days ago

Commit

f9499bf

•

1 Parent(s): 528f777

upload ckpt

Browse files

Files changed (4) hide show

vq_audio_log/simvq_262k/1second/config.yaml +145 -0
vq_audio_log/simvq_262k/epoch=49-step=138600.ckpt +3 -0
vq_log/simvq_262k/epoch=49-step=250250.ckpt +3 -0
vq_log/simvq_262k/size128/config.yaml +148 -0

vq_audio_log/simvq_262k/1second/config.yaml ADDED Viewed

	@@ -0,0 +1,145 @@

+# lightning.pytorch==2.2.5
+seed_everything: 0
+trainer:
+  accelerator: gpu
+  strategy: ddp_find_unused_parameters_true
+  devices: 2
+  num_nodes: 1
+  precision: 16-mixed
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: vq_audio_log/simvq_262k
+      name: null
+      version: 1second
+      log_graph: false
+      default_hp_metric: true
+      prefix: ''
+      sub_dir: null
+      comment: ''
+      purge_step: null
+      max_queue: 10
+      flush_secs: 120
+      filename_suffix: ''
+  callbacks:
+  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+    init_args:
+      dirpath: vq_audio_log/simvq_262k
+      filename: null
+      monitor: null
+      verbose: false
+      save_last: null
+      save_top_k: -1
+      save_weights_only: false
+      mode: min
+      auto_insert_metric_name: true
+      every_n_train_steps: null
+      train_time_interval: null
+      every_n_epochs: null
+      save_on_train_epoch_end: null
+      enable_version_counter: true
+  - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+    init_args:
+      logging_interval: step
+      log_momentum: false
+      log_weight_decay: false
+  fast_dev_run: false
+  max_epochs: 50
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  max_time: null
+  limit_train_batches: null
+  limit_val_batches: null
+  limit_test_batches: null
+  limit_predict_batches: null
+  overfit_batches: 0.0
+  val_check_interval: null
+  check_val_every_n_epoch: 1
+  num_sanity_val_steps: 0
+  log_every_n_steps: 100
+  enable_checkpointing: null
+  enable_progress_bar: null
+  enable_model_summary: null
+  accumulate_grad_batches: 1
+  gradient_clip_val: null
+  gradient_clip_algorithm: null
+  deterministic: null
+  benchmark: null
+  inference_mode: true
+  use_distributed_sampler: true
+  profiler: null
+  detect_anomaly: false
+  barebones: false
+  plugins: null
+  sync_batchnorm: false
+  reload_dataloaders_every_n_epochs: 0
+  default_root_dir: null
+ckpt_path: null
+model:
+  class_path: taming.models.vq_audio.VQModel
+  init_args:
+    ddconfig:
+      causal: true
+      dimension: 512
+    lossconfig:
+      target: taming.modules.losses.stft.VQSTFTWithDiscriminator
+      params:
+        disc_conditional: false
+        disc_in_channels: 1
+        disc_start: 0
+        codebook_enlarge_ratio: 0
+        codebook_enlarge_steps: 2000
+        sample_rate: 24000
+        commit_weight: 1000.0
+        gen_loss_weight: 1.0
+        mel_loss_coeff: 45.0
+        mrd_loss_coeff: 1.0
+    quantconfig:
+      target: taming.modules.vqvae.quantize.SimVQ1D
+      params:
+        n_e: 262144
+        e_dim: 512
+        beta: 0.25
+        legacy: false
+    sample_rate: 24000
+    target_bandwidths: null
+    audio_normalize: false
+    segment: None
+    ckpt_path: null
+    ignore_keys: []
+    colorize_nlabels: null
+    monitor: null
+    learning_rate: 0.0001
+    warmup_epochs: 1.0
+    scheduler_type: None
+    min_learning_rate: 0
+    use_ema: true
+    stage: null
+data:
+  class_path: main.PadDataModuleFromConfig
+  init_args:
+    batch_size: 64
+    train:
+      target: taming.data.libritts.LibriTTSTrain
+      params:
+        config:
+          sample_rate: 24000
+          channels: 1
+          clip_seconds: 1
+    validation:
+      target: taming.data.libritts.LibriTTSDev
+      params:
+        config:
+          sample_rate: 24000
+          channels: 1
+          clip_seconds: 1
+    test:
+      target: taming.data.libritts.LibriTTSTest
+      params:
+        config:
+          sample_rate: 24000
+          channels: 1
+          clip_seconds: -1
+    wrap: false
+    num_workers: 8

vq_audio_log/simvq_262k/epoch=49-step=138600.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0626979f252eceb2f4e87264649a2aaf72ae5c9e300aec508695005b43f04617
+size 1857494478

vq_log/simvq_262k/epoch=49-step=250250.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2dff3d7d4b27b1a222fdb1b5453f336b681d0ab5c79d07245aa156a170ebb997
+size 728546835

vq_log/simvq_262k/size128/config.yaml ADDED Viewed

	@@ -0,0 +1,148 @@

+# lightning.pytorch==2.2.5
+seed_everything: 0
+trainer:
+  accelerator: gpu
+  strategy: ddp_find_unused_parameters_true
+  devices: 4
+  num_nodes: 1
+  precision: 16-mixed
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: vq_log/simvq_262k
+      name: null
+      version: size128
+      log_graph: false
+      default_hp_metric: true
+      prefix: ''
+      sub_dir: null
+      comment: ''
+      purge_step: null
+      max_queue: 10
+      flush_secs: 120
+      filename_suffix: ''
+  callbacks:
+  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+    init_args:
+      dirpath: vq_log/simvq_262k
+      filename: null
+      monitor: null
+      verbose: false
+      save_last: null
+      save_top_k: -1
+      save_weights_only: false
+      mode: min
+      auto_insert_metric_name: true
+      every_n_train_steps: null
+      train_time_interval: null
+      every_n_epochs: null
+      save_on_train_epoch_end: null
+      enable_version_counter: true
+  - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+    init_args:
+      logging_interval: step
+      log_momentum: false
+      log_weight_decay: false
+  fast_dev_run: false
+  max_epochs: 50
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  max_time: null
+  limit_train_batches: null
+  limit_val_batches: null
+  limit_test_batches: null
+  limit_predict_batches: null
+  overfit_batches: 0.0
+  val_check_interval: null
+  check_val_every_n_epoch: 1
+  num_sanity_val_steps: 0
+  log_every_n_steps: 100
+  enable_checkpointing: null
+  enable_progress_bar: null
+  enable_model_summary: null
+  accumulate_grad_batches: 1
+  gradient_clip_val: null
+  gradient_clip_algorithm: null
+  deterministic: null
+  benchmark: null
+  inference_mode: true
+  use_distributed_sampler: true
+  profiler: null
+  detect_anomaly: false
+  barebones: false
+  plugins: null
+  sync_batchnorm: false
+  reload_dataloaders_every_n_epochs: 0
+  default_root_dir: null
+ckpt_path: null
+model:
+  class_path: taming.models.vq.VQModel
+  init_args:
+    ddconfig:
+      double_z: false
+      z_channels: 128
+      resolution: 128
+      in_channels: 3
+      out_ch: 3
+      ch: 128
+      ch_mult:
+      - 1
+      - 2
+      - 2
+      - 4
+      num_res_blocks: 2
+    lossconfig:
+      target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
+      params:
+        disc_conditional: false
+        disc_in_channels: 3
+        disc_start: 0
+        disc_weight: 0.8
+        gen_loss_weight: 0.1
+        codebook_weight: 0.1
+        commit_weight: 1.0
+        codebook_enlarge_ratio: 0
+        codebook_enlarge_steps: 2000
+    quantconfig:
+      target: taming.modules.vqvae.quantize.SimVQ
+      params:
+        n_e: 262144
+        e_dim: 128
+        beta: 0.25
+        legacy: false
+    ckpt_path: null
+    ignore_keys: []
+    image_key: image
+    colorize_nlabels: null
+    monitor: null
+    learning_rate: 0.0001
+    warmup_epochs: 1.0
+    scheduler_type: None
+    min_learning_rate: 0
+    use_ema: true
+    stage: null
+data:
+  class_path: main.DataModuleFromConfig
+  init_args:
+    batch_size: 64
+    train:
+      target: taming.data.imagenet.ImageNetTrain
+      params:
+        config:
+          size: 128
+          subset: null
+    validation:
+      target: taming.data.imagenet.ImageNetValidation
+      params:
+        config:
+          size: 128
+          subset: null
+    test:
+      target: taming.data.imagenet.ImageNetValidation
+      params:
+        config:
+          size: 128
+          subset: null
+    wrap: false
+    num_workers: 8