| task_name: train |
| tags: |
| - audioset |
| - best-rq-2 |
| - cluster GPU |
| train: true |
| test: true |
| ckpt_path: null |
| seed: 21072023 |
| data: |
| _target_: src.data.audioset_datamodule.AudioSetDataModule |
| data_dir: ${paths.data_dir}/AudioSet |
| batch_size: 256 |
| num_workers: ${oc.decode:${oc.env:SLURM_CPUS_PER_TASK}} |
| pin_memory: true |
| train_h5: full_unbal_bal_train_wav.h5 |
| train_csv: silent_files_full_unbal_bal_train_wav.csv |
| val_h5: eval_soxrhq.h5 |
| val_csv: silent_files_eval_soxrhq.csv |
| max_audio_length_sec: 10.0 |
| target_sample_rate: 16000 |
| collate_mode: pad |
| model: |
| _target_: src.models.best_rq2_module.BestRQ2Module |
| optimizer: |
| _target_: torch.optim.AdamW |
| _partial_: true |
| lr: 0.0001 |
| weight_decay: 0.05 |
| warmup_pct: 0.05 |
| spectrogram_adjustment_mode: truncate |
| criterion: |
| _target_: torch.nn.CrossEntropyLoss |
| _partial_: true |
| reduction: mean |
| codebook_dim: 16 |
| vocab_size: 8192 |
| net: |
| spectrogram: |
| sample_rate: ${data.target_sample_rate} |
| n_fft: 2048 |
| win_length_ms: 128 |
| hop_length_ms: 39.0625 |
| n_mels: 128 |
| f_min: 0 |
| f_max: 8000 |
| power: 2.0 |
| patch_embed: |
| img_size: |
| - 128 |
| - 256 |
| patch_size: |
| - 16 |
| - 16 |
| in_chans: 1 |
| embed_dim: 768 |
| masking: |
| input_size: |
| - 128 |
| - 256 |
| patch_size: |
| - 16 |
| - 16 |
| mask_ratio: |
| - 0.4 |
| - 0.6 |
| encoder: |
| embed_dim: 768 |
| depth: 12 |
| num_heads: 12 |
| mlp_ratio: 4.0 |
| qkv_bias: true |
| drop_rate: 0.0 |
| attn_drop_rate: 0.0 |
| drop_path_rate: 0.1 |
| num_patches: 128 |
| pos_embed_type: sincos |
| predictor: |
| embed_dim: 768 |
| depth: 4 |
| num_heads: 12 |
| mlp_ratio: 4.0 |
| qkv_bias: true |
| drop_rate: 0.0 |
| attn_drop_rate: 0.0 |
| drop_path_rate: 0.0 |
| num_patches: 128 |
| pos_embed_type: sincos |
| callbacks: |
| model_checkpoint: null |
| model_summary: |
| _target_: lightning.pytorch.callbacks.RichModelSummary |
| max_depth: 1 |
| rich_progress_bar: null |
| safetensors: |
| _target_: src.callbacks.safetensors_callback.SafetensorsCallback |
| device_stats: |
| _target_: lightning.pytorch.callbacks.DeviceStatsMonitor |
| visualization: |
| _target_: src.callbacks.visualization_callback.VisualizationCallback |
| num_samples: 4 |
| wandb_offline_checkpoint: |
| _target_: src.callbacks.wandb_callbacks.WandbOfflineCheckpointCallback |
| logger: |
| wandb: |
| _target_: lightning.pytorch.loggers.wandb.WandbLogger |
| save_dir: ${paths.output_dir} |
| offline: true |
| id: null |
| anonymous: null |
| project: audio embeddings |
| log_model: false |
| prefix: "" |
| group: "" |
| tags: [] |
| job_type: "" |
| name: best_rq2-audioset-200k-256x1bs |
| trainer: |
| _target_: lightning.pytorch.trainer.Trainer |
| default_root_dir: ${paths.output_dir} |
| accelerator: gpu |
| devices: 1 |
| check_val_every_n_epoch: 1 |
| deterministic: false |
| max_steps: 200000 |
| strategy: auto |
| max_time: 00:19:50:00 |
| paths: |
| root_dir: ${oc.env:PROJECT_ROOT} |
| data_dir: ${paths.root_dir}/data/ |
| log_dir: ${paths.root_dir}/logs/ |
| output_dir: ${hydra:runtime.output_dir} |
| work_dir: ${hydra:runtime.cwd} |
| extras: |
| ignore_warnings: false |
| enforce_tags: true |
| print_config: true |
|
|