File size: 1,558 Bytes
7557af0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
---
task_name: audioset

data:
    indexes_dict: "hdf5s/indexes/full_train.h5"
    sampler_type: balanced_sampler
    anchor_segment_detect_mode: max_area  # "max_area" | "random"
    sample_rate: 32000
    frames_per_second: 100
    segment_seconds: 2.0
    classes_num: 527
    augmentation:
        match_energy: True
    mix_num: 2

sound_event_detection:
    model_type: Cnn14_DecisionLevelMax
    freeze: True

query_net:
    model_type: Cnn14_Wrapper  # "Cnn14_Wrapper" | "AdaptiveCnn14_Wrapper"
    base_checkpoint_type: "Cnn14"
    freeze_base: True
    freeze_adaptor: False
    bottleneck_type: at_soft  # "embedding" | "at_soft"
    outputs_num: 527

ss_model:
    model_type: ResUNet30
    input_channels: 1
    output_channels: 1
    
train:
    num_workers: 16
    loss_type: l1_wav
    optimizer:
        optimizer_type: AdamW
        learning_rate: 1e-3
        lr_lambda_type: constant_warm_up    # "constant_warm_up" | "linear_warm_up"
        warm_up_steps: 10000
        reduce_lr_steps: 1000000
    batch_size_per_device: 16
    precision: 32
    steps_per_epoch: 10000  # Every 10000 steps is called an epoch
    evaluate_step_frequency: 20000     # Evaluate every #evaluate_step_frequency steps
    save_step_frequency: 100000  # Save every #save_step_frequency steps
    early_stop_steps: 10000001
    random_seed: 1234
    resume_checkpoint_path: ""
    
evaluate:
    balanced_train_eval_dir: "evaluation/audioset/2s_segments_balanced_train"
    test_eval_dir: "evaluation/audioset/2s_segments_test"
    max_eval_per_class: 10