--- task_name: audioset data: indexes_dict: "hdf5s/indexes/full_train.h5" sampler_type: balanced_sampler anchor_segment_detect_mode: max_area # "max_area" | "random" sample_rate: 32000 frames_per_second: 100 segment_seconds: 2.0 classes_num: 527 augmentation: match_energy: True mix_num: 2 sound_event_detection: model_type: Cnn14_DecisionLevelMax freeze: True query_net: model_type: Cnn14_Wrapper # "Cnn14_Wrapper" | "AdaptiveCnn14_Wrapper" base_checkpoint_type: "Cnn14" freeze_base: True freeze_adaptor: False bottleneck_type: at_soft # "embedding" | "at_soft" outputs_num: 527 ss_model: model_type: ResUNet30 input_channels: 1 output_channels: 1 train: num_workers: 16 loss_type: l1_wav optimizer: optimizer_type: AdamW learning_rate: 1e-3 lr_lambda_type: constant_warm_up # "constant_warm_up" | "linear_warm_up" warm_up_steps: 10000 reduce_lr_steps: 1000000 batch_size_per_device: 16 precision: 32 steps_per_epoch: 10000 # Every 10000 steps is called an epoch evaluate_step_frequency: 20000 # Evaluate every #evaluate_step_frequency steps save_step_frequency: 100000 # Save every #save_step_frequency steps early_stop_steps: 10000001 random_seed: 1234 resume_checkpoint_path: "" evaluate: balanced_train_eval_dir: "evaluation/audioset/2s_segments_balanced_train" test_eval_dir: "evaluation/audioset/2s_segments_test" max_eval_per_class: 10