Spaces:

macaodha
/

batdetect2

Running

batdetect2 / bat_detect /detector /parameters.py

Oisin Mac Aodha

added bat code

9ace58a over 2 years ago

5.18 kB

	import numpy as np
	import os
	import datetime


	def mk_dir(path):
	if not os.path.isdir(path):
	os.makedirs(path)


	def get_params(make_dirs=False, exps_dir='../../experiments/'):
	params = {}

	params['model_name'] = 'Net2DFast' # Net2DFast, Net2DSkip, Net2DSimple, Net2DSkipDS, Net2DRN
	params['num_filters'] = 128

	now_str = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
	model_name = now_str + '.pth.tar'
	params['experiment'] = os.path.join(exps_dir, now_str, '')
	params['model_file_name'] = os.path.join(params['experiment'], model_name)
	params['op_im_dir'] = os.path.join(params['experiment'], 'op_ims', '')
	params['op_im_dir_test'] = os.path.join(params['experiment'], 'op_ims_test', '')
	#params['notes'] = '' # can save notes about an experiment here


	# spec parameters
	params['target_samp_rate'] = 256000 # resamples all audio so that it is at this rate
	params['fft_win_length'] = 512 / 256000.0 # in milliseconds, amount of time per stft time step
	params['fft_overlap'] = 0.75 # stft window overlap

	params['max_freq'] = 120000 # in Hz, everything above this will be discarded
	params['min_freq'] = 10000 # in Hz, everything below this will be discarded

	params['resize_factor'] = 0.5 # resize so the spectrogram at the input of the network
	params['spec_height'] = 256 # units are number of frequency bins (before resizing is performed)
	params['spec_train_width'] = 512 # units are number of time steps (before resizing is performed)
	params['spec_divide_factor'] = 32 # spectrogram should be divisible by this amount in width and height

	# spec processing params
	params['denoise_spec_avg'] = True # removes the mean for each frequency band
	params['scale_raw_audio'] = False # scales the raw audio to [-1, 1]
	params['max_scale_spec'] = False # scales the spectrogram so that it is max 1
	params['spec_scale'] = 'pcen' # 'log', 'pcen', 'none'

	# detection params
	params['detection_overlap'] = 0.01 # has to be within this number of ms to count as detection
	params['ignore_start_end'] = 0.01 # if start of GT calls are within this time from the start/end of file ignore
	params['detection_threshold'] = 0.01 # the smaller this is the better the recall will be
	params['nms_kernel_size'] = 9
	params['nms_top_k_per_sec'] = 200 # keep top K highest predictions per second of audio
	params['target_sigma'] = 2.0

	# augmentation params
	params['aug_prob'] = 0.20 # augmentations will be performed with this probability
	params['augment_at_train'] = True
	params['augment_at_train_combine'] = True
	params['echo_max_delay'] = 0.005 # simulate echo by adding copy of raw audio
	params['stretch_squeeze_delta'] = 0.04 # stretch or squeeze spec
	params['mask_max_time_perc'] = 0.05 # max mask size - here percentage, not ideal
	params['mask_max_freq_perc'] = 0.10 # max mask size - here percentage, not ideal
	params['spec_amp_scaling'] = 2.0 # multiply the "volume" by 0:X times current amount
	params['aug_sampling_rates'] = [220500, 256000, 300000, 312500, 384000, 441000, 500000]

	# loss params
	params['train_loss'] = 'focal' # mse or focal
	params['det_loss_weight'] = 1.0 # weight for the detection part of the loss
	params['size_loss_weight'] = 0.1 # weight for the bbox size loss
	params['class_loss_weight'] = 2.0 # weight for the classification loss
	params['individual_loss_weight'] = 0.0 # not used
	if params['individual_loss_weight'] == 0.0:
	params['emb_dim'] = 0 # number of dimensions used for individual id embedding
	else:
	params['emb_dim'] = 3

	# train params
	params['lr'] = 0.001
	params['batch_size'] = 8
	params['num_workers'] = 4
	params['num_epochs'] = 200
	params['num_eval_epochs'] = 5 # run evaluation every X epochs
	params['device'] = 'cuda'
	params['save_test_image_during_train'] = False
	params['save_test_image_after_train'] = True

	params['convert_to_genus'] = False
	params['genus_mapping'] = []
	params['class_names'] = []
	params['classes_to_ignore'] = ['', ' ', 'Unknown', 'Not Bat']
	params['generic_class'] = ['Bat']
	params['events_of_interest'] = ['Echolocation'] # will ignore all other types of events e.g. social calls

	# the classes in this list are standardized during training so that the same low and high freq are used
	params['standardize_classs_names'] = []

	# create directories
	if make_dirs:
	print('Model name : ' + params['model_name'])
	print('Model file : ' + params['model_file_name'])
	print('Experiment : ' + params['experiment'])

	mk_dir(params['experiment'])
	if params['save_test_image_during_train']:
	mk_dir(params['op_im_dir'])
	if params['save_test_image_after_train']:
	mk_dir(params['op_im_dir_test'])
	mk_dir(os.path.dirname(params['model_file_name']))

	return params