Oisin Mac Aodha
added bat code
9ace58a
import numpy as np
import os
import datetime
def mk_dir(path):
if not os.path.isdir(path):
os.makedirs(path)
def get_params(make_dirs=False, exps_dir='../../experiments/'):
params = {}
params['model_name'] = 'Net2DFast' # Net2DFast, Net2DSkip, Net2DSimple, Net2DSkipDS, Net2DRN
params['num_filters'] = 128
now_str = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
model_name = now_str + '.pth.tar'
params['experiment'] = os.path.join(exps_dir, now_str, '')
params['model_file_name'] = os.path.join(params['experiment'], model_name)
params['op_im_dir'] = os.path.join(params['experiment'], 'op_ims', '')
params['op_im_dir_test'] = os.path.join(params['experiment'], 'op_ims_test', '')
#params['notes'] = '' # can save notes about an experiment here
# spec parameters
params['target_samp_rate'] = 256000 # resamples all audio so that it is at this rate
params['fft_win_length'] = 512 / 256000.0 # in milliseconds, amount of time per stft time step
params['fft_overlap'] = 0.75 # stft window overlap
params['max_freq'] = 120000 # in Hz, everything above this will be discarded
params['min_freq'] = 10000 # in Hz, everything below this will be discarded
params['resize_factor'] = 0.5 # resize so the spectrogram at the input of the network
params['spec_height'] = 256 # units are number of frequency bins (before resizing is performed)
params['spec_train_width'] = 512 # units are number of time steps (before resizing is performed)
params['spec_divide_factor'] = 32 # spectrogram should be divisible by this amount in width and height
# spec processing params
params['denoise_spec_avg'] = True # removes the mean for each frequency band
params['scale_raw_audio'] = False # scales the raw audio to [-1, 1]
params['max_scale_spec'] = False # scales the spectrogram so that it is max 1
params['spec_scale'] = 'pcen' # 'log', 'pcen', 'none'
# detection params
params['detection_overlap'] = 0.01 # has to be within this number of ms to count as detection
params['ignore_start_end'] = 0.01 # if start of GT calls are within this time from the start/end of file ignore
params['detection_threshold'] = 0.01 # the smaller this is the better the recall will be
params['nms_kernel_size'] = 9
params['nms_top_k_per_sec'] = 200 # keep top K highest predictions per second of audio
params['target_sigma'] = 2.0
# augmentation params
params['aug_prob'] = 0.20 # augmentations will be performed with this probability
params['augment_at_train'] = True
params['augment_at_train_combine'] = True
params['echo_max_delay'] = 0.005 # simulate echo by adding copy of raw audio
params['stretch_squeeze_delta'] = 0.04 # stretch or squeeze spec
params['mask_max_time_perc'] = 0.05 # max mask size - here percentage, not ideal
params['mask_max_freq_perc'] = 0.10 # max mask size - here percentage, not ideal
params['spec_amp_scaling'] = 2.0 # multiply the "volume" by 0:X times current amount
params['aug_sampling_rates'] = [220500, 256000, 300000, 312500, 384000, 441000, 500000]
# loss params
params['train_loss'] = 'focal' # mse or focal
params['det_loss_weight'] = 1.0 # weight for the detection part of the loss
params['size_loss_weight'] = 0.1 # weight for the bbox size loss
params['class_loss_weight'] = 2.0 # weight for the classification loss
params['individual_loss_weight'] = 0.0 # not used
if params['individual_loss_weight'] == 0.0:
params['emb_dim'] = 0 # number of dimensions used for individual id embedding
else:
params['emb_dim'] = 3
# train params
params['lr'] = 0.001
params['batch_size'] = 8
params['num_workers'] = 4
params['num_epochs'] = 200
params['num_eval_epochs'] = 5 # run evaluation every X epochs
params['device'] = 'cuda'
params['save_test_image_during_train'] = False
params['save_test_image_after_train'] = True
params['convert_to_genus'] = False
params['genus_mapping'] = []
params['class_names'] = []
params['classes_to_ignore'] = ['', ' ', 'Unknown', 'Not Bat']
params['generic_class'] = ['Bat']
params['events_of_interest'] = ['Echolocation'] # will ignore all other types of events e.g. social calls
# the classes in this list are standardized during training so that the same low and high freq are used
params['standardize_classs_names'] = []
# create directories
if make_dirs:
print('Model name : ' + params['model_name'])
print('Model file : ' + params['model_file_name'])
print('Experiment : ' + params['experiment'])
mk_dir(params['experiment'])
if params['save_test_image_during_train']:
mk_dir(params['op_im_dir'])
if params['save_test_image_after_train']:
mk_dir(params['op_im_dir_test'])
mk_dir(os.path.dirname(params['model_file_name']))
return params