File size: 5,175 Bytes
9ace58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import os
import datetime


def mk_dir(path):
    if not os.path.isdir(path):
        os.makedirs(path)
    
    
def get_params(make_dirs=False, exps_dir='../../experiments/'):
    params = {}

    params['model_name'] = 'Net2DFast' # Net2DFast, Net2DSkip, Net2DSimple, Net2DSkipDS, Net2DRN
    params['num_filters'] =  128

    now_str = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
    model_name                = now_str + '.pth.tar'
    params['experiment']      = os.path.join(exps_dir, now_str, '')
    params['model_file_name'] = os.path.join(params['experiment'], model_name)
    params['op_im_dir']       = os.path.join(params['experiment'], 'op_ims', '')
    params['op_im_dir_test']  = os.path.join(params['experiment'], 'op_ims_test', '')
    #params['notes']           = ''  # can save notes about an experiment here


    # spec parameters
    params['target_samp_rate'] = 256000        # resamples all audio so that it is at this rate
    params['fft_win_length'] = 512 / 256000.0  # in milliseconds, amount of time per stft time step
    params['fft_overlap']    = 0.75            # stft window overlap

    params['max_freq'] = 120000       # in Hz, everything above this will be discarded
    params['min_freq'] = 10000        # in Hz, everything below this will be discarded

    params['resize_factor'] = 0.5     # resize so the spectrogram at the input of the network
    params['spec_height'] = 256       # units are number of frequency bins (before resizing is performed)
    params['spec_train_width'] = 512  # units are number of time steps (before resizing is performed)
    params['spec_divide_factor'] = 32 # spectrogram should be divisible by this amount in width and height

    # spec processing params
    params['denoise_spec_avg'] = True  # removes the mean for each frequency band
    params['scale_raw_audio'] = False  # scales the raw audio to [-1, 1]
    params['max_scale_spec'] = False   # scales the spectrogram so that it is max 1
    params['spec_scale'] = 'pcen'      # 'log', 'pcen', 'none'

    # detection params
    params['detection_overlap'] = 0.01   # has to be within this number of ms to count as detection
    params['ignore_start_end'] = 0.01    # if start of GT calls are within this time from the start/end of file ignore
    params['detection_threshold'] = 0.01 # the smaller this is the better the recall will be
    params['nms_kernel_size'] = 9
    params['nms_top_k_per_sec'] = 200    # keep top K highest predictions per second of audio
    params['target_sigma'] = 2.0

    # augmentation params
    params['aug_prob'] = 0.20               # augmentations will be performed with this probability
    params['augment_at_train'] = True
    params['augment_at_train_combine'] = True
    params['echo_max_delay'] = 0.005        # simulate echo by adding copy of raw audio
    params['stretch_squeeze_delta'] = 0.04  # stretch or squeeze spec
    params['mask_max_time_perc'] = 0.05     # max mask size - here percentage, not ideal
    params['mask_max_freq_perc'] = 0.10     # max mask size - here percentage, not ideal
    params['spec_amp_scaling']   = 2.0      # multiply the "volume" by 0:X times current amount
    params['aug_sampling_rates'] = [220500, 256000, 300000, 312500, 384000, 441000, 500000]

    # loss params
    params['train_loss'] = 'focal'         # mse or focal
    params['det_loss_weight'] = 1.0        # weight for the detection part of the loss
    params['size_loss_weight'] = 0.1       # weight for the bbox size loss
    params['class_loss_weight'] = 2.0      # weight for the classification loss
    params['individual_loss_weight'] = 0.0 # not used
    if params['individual_loss_weight'] == 0.0:
        params['emb_dim'] = 0              # number of dimensions used for individual id embedding
    else:
        params['emb_dim'] = 3

    # train params
    params['lr'] = 0.001
    params['batch_size'] = 8
    params['num_workers'] = 4
    params['num_epochs'] = 200
    params['num_eval_epochs'] = 5  # run evaluation every X epochs
    params['device'] = 'cuda'
    params['save_test_image_during_train'] = False
    params['save_test_image_after_train'] = True

    params['convert_to_genus'] = False
    params['genus_mapping'] = []
    params['class_names'] = []
    params['classes_to_ignore'] = ['', ' ', 'Unknown', 'Not Bat']
    params['generic_class'] = ['Bat']
    params['events_of_interest'] = ['Echolocation']  # will ignore all other types of events e.g. social calls

    # the classes in this list are standardized during training so that the same low and high freq are used
    params['standardize_classs_names'] = []

    # create directories
    if make_dirs:
        print('Model name : ' + params['model_name'])
        print('Model file : ' + params['model_file_name'])
        print('Experiment : ' + params['experiment'])

        mk_dir(params['experiment'])
        if params['save_test_image_during_train']:
            mk_dir(params['op_im_dir'])
        if params['save_test_image_after_train']:
            mk_dir(params['op_im_dir_test'])
        mk_dir(os.path.dirname(params['model_file_name']))

    return params