#### general settings name: test_noisy_audio_clips_classifier use_tb_logger: true model: extensibletrainer distortion: sr scale: 1 gpu_ids: [0] start_step: 0 checkpointing_enabled: true fp16: false wandb: true datasets: test: name: clips_val n_workers: 1 batch_size: 16 mode: unsupervised_audio path: [Z:\split\garbage-2\podcast_dump0_garbage] cache_path: Z:\split\garbage-2\podcast_dump0_garbage_cache.pth sampling_rate: 22050 do_augmentation: false pad_to_samples: 65536 extra_samples: 0 networks: classifier: type: generator which_model_G: mini_audio_encoder_classifier kwargs: classes: 5 spec_dim: 80 embedding_dim: 1024 base_channels: 128 depth: 3 resnet_blocks: 2 attn_blocks: 8 num_attn_heads: 4 dropout: .1 #### path path: pretrain_model_classifier: noisy_audio_clips_classifier.pth strict_load: true #resume_state: ../experiments/train_noisy_audio_clips_classifier/training_state/51000.state steps: classifier: training: classifier optimizer: adamw optimizer_params: lr: !!float 3e-4 weight_decay: !!float 1e-5 beta1: 0.9 beta2: 0.9999 clip_grad_eps: 1.0 injectors: to_mel: type: mel_spectrogram in: clip out: actual_mel pad: type: pad multiple: 16 in: actual_mel out: inp_mel gen_inj_train: type: generator generator: classifier in: inp_mel out: logits losses: classification_loss: type: crossentropy weight: 1.0 logits: logits labels: label train: niter: 500000 warmup_iter: -1 mega_batch_factor: 1 val_freq: 2000 # Default LR scheduler options default_lr_scheme: MultiStepLR gen_lr_steps: [ 20000, 40000, 60000 ] lr_gamma: 0.2 eval: path_key: path classifier_logits_key: logits output_dir: D:\tmp\podcasts_split # Derived from audio_with_noise_dataset output_labels: [fine, env_noise, music, two_voices, reverb] logger: print_freq: 30 save_checkpoint_freq: 1000 visuals: [] is_mel_spectrogram: true visual_debug_rate: 500