FlexSED / src /configs /.ipynb_checkpoints /model-checkpoint.yml
OpenSound's picture
Upload 544 files
3b6a091 verified
model_name: TSED_AS_filter
encoder:
target_length: 1008
patch_size: [64, 4]
patch_stride: [64, 4]
group_masking: True
embed_dim: 768
depth: 12
num_heads: 12
decoder:
embed_dim: 768
depth: 6
num_heads: 12
cls_dim: 512
fusion: adaln
ft_blocks: [6, 7, 8, 9, 10, 11]
frozen_encoder: false
net_pooling: 4
sr: 16000
data_aug:
time_mask_ratios: [5, 20]
transform: # hyperparameters for data augmentations that do not alter the label information.
n_transform: 2 # 0: no augmentation below is applied. 1: same augmentation below is applied on student/teacher model input. 2: different augmentations below is applied on student/teacher model input.
choice: [ 1, 0, 0 ] # apply the chosen data augmentations: [ FilterAugment, freq_mask, add_noise ]
filter_db_range: [ -4.5, 6 ] # db range of FilterAugment to be applied on each band
filter_bands: [ 2, 5 ] # range of frequency band number in FilterAugment
filter_minimum_bandwidth: 4
filter_type: step
freq_mask_ratio: 16 # maximum ratio of freuqnecy masking range. max 1/16 of total frequnecy number will be masked
noise_snrs: [ 35, 40 ] # snr of original signal wrpt the noise added.
opt: !include opt_config.yml
data: !include data_config.yml