|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seed: 11 |
|
__set_seed: !!python/object/apply:torch.manual_seed [11] |
|
|
|
|
|
|
|
data_folder: /data2/ESC-50-master |
|
|
|
open_rir_folder: <data_folder>/RIRS |
|
audio_data_folder: /data2/ESC-50-master/audio |
|
|
|
|
|
|
|
|
|
|
|
experiment_name: cnn14 |
|
output_folder: ./results/cnn14/11 |
|
save_folder: ./results/cnn14/11/save |
|
train_log: ./results/cnn14/11/train_log.txt |
|
|
|
test_only: false |
|
|
|
|
|
use_tensorboard: false |
|
tensorboard_logs_folder: ./results/cnn14/11/tb_logs/ |
|
|
|
|
|
train_annotation: /data2/ESC-50-master/manifest/train.json |
|
valid_annotation: /data2/ESC-50-master/manifest/valid.json |
|
test_annotation: /data2/ESC-50-master/manifest/test.json |
|
|
|
|
|
|
|
train_fold_nums: [1, 2, 3] |
|
valid_fold_nums: [4] |
|
test_fold_nums: [5] |
|
skip_manifest_creation: false |
|
|
|
ckpt_interval_minutes: 15 |
|
|
|
|
|
number_of_epochs: 200 |
|
batch_size: 32 |
|
lr: 0.0002 |
|
base_lr: 0.00000001 |
|
max_lr: 0.0002 |
|
step_size: 65000 |
|
sample_rate: 44100 |
|
|
|
device: cpu |
|
|
|
|
|
n_mels: 80 |
|
left_frames: 0 |
|
right_frames: 0 |
|
deltas: false |
|
amp_to_db: true |
|
normalize: true |
|
|
|
|
|
out_n_neurons: 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
shuffle: true |
|
dataloader_options: |
|
batch_size: 32 |
|
shuffle: true |
|
num_workers: 0 |
|
|
|
|
|
compute_features: &id003 !new:speechbrain.lobes.features.Fbank |
|
n_mels: 80 |
|
left_frames: 0 |
|
right_frames: 0 |
|
deltas: false |
|
sample_rate: 44100 |
|
n_fft: 1024 |
|
win_length: 20 |
|
hop_length: 10 |
|
|
|
use_pretrain: false |
|
embedding_model: &id009 !new:speechbrain.lobes.models.Cnn14.Cnn14 |
|
mel_bins: 80 |
|
emb_dim: 2048 |
|
|
|
classifier: &id010 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier |
|
input_size: 2048 |
|
out_neurons: 50 |
|
lin_blocks: 1 |
|
|
|
epoch_counter: &id012 !new:speechbrain.utils.epoch_loop.EpochCounter |
|
|
|
|
|
|
|
limit: 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
augment_pipeline: [] |
|
concat_augment: true |
|
|
|
mean_var_norm: &id011 !new:speechbrain.processing.features.InputNormalization |
|
|
|
norm_type: sentence |
|
std_norm: false |
|
|
|
|
|
n_fft: 1024 |
|
spec_mag_power: 0.5 |
|
hop_length: 11.6099 |
|
win_length: 23.2199 |
|
compute_stft: &id001 !new:speechbrain.processing.features.STFT |
|
n_fft: 1024 |
|
hop_length: 11.6099 |
|
win_length: 23.2199 |
|
sample_rate: 44100 |
|
|
|
compute_fbank: &id002 !new:speechbrain.processing.features.Filterbank |
|
n_mels: 80 |
|
n_fft: 1024 |
|
sample_rate: 44100 |
|
|
|
modules: |
|
compute_stft: *id001 |
|
compute_fbank: *id002 |
|
compute_features: *id003 |
|
embedding_model: *id009 |
|
classifier: *id010 |
|
mean_var_norm: *id011 |
|
compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper |
|
loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin |
|
margin: 0.2 |
|
scale: 30 |
|
|
|
|
|
|
|
opt_class: !name:torch.optim.Adam |
|
lr: 0.0002 |
|
weight_decay: 0.000002 |
|
|
|
lr_annealing: !new:speechbrain.nnet.schedulers.CyclicLRScheduler |
|
base_lr: 0.00000001 |
|
max_lr: 0.0002 |
|
step_size: 65000 |
|
|
|
|
|
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger |
|
save_file: ./results/cnn14/11/train_log.txt |
|
|
|
error_stats: !name:speechbrain.utils.metric_stats.MetricStats |
|
metric: !name:speechbrain.nnet.losses.classification_error |
|
reduction: batch |
|
|
|
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer |
|
checkpoints_dir: ./results/cnn14/11/save |
|
recoverables: |
|
embedding_model: *id009 |
|
classifier: *id010 |
|
normalizer: *id011 |
|
counter: *id012 |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
embedding_model: !ref <embedding_model> |
|
classifier: !ref <classifier> |
|
|