# Data parameters: # With data_parallel batch_size is split into N jobs. # With DDP batch_size is multiplied by N jobs. batch_size: 6 test_batch_size: 2 # We remove utterances longer than 90s in the train/dev/test sets as # longer sentences certainly correspond to "open microphones". avoid_if_longer_than: 90.0 avoid_if_smaller_than: 0.0 dataloader_options: batch_size: 6 num_workers: 6 shuffle: true test_dataloader_options: batch_size: 2 num_workers: 3 # Feature parameters: sample_rate: 16000 feats_dim: 1024 # Training parameters: number_of_epochs: 80 lr: 1 lr_wav2vec: 0.0001 annealing_factor: 0.8 annealing_factor_wav2vec: 0.9 improvement_threshold: 0.0025 improvement_threshold_wav2vec: 0.0025 patient: 0 patient_wav2vec: 0 sorting: random # Model parameters: activation: &id001 !name:torch.nn.LeakyReLU dropout: 0.15 cnn_blocks: 0 rnn_layers: 0 dnn_blocks: 1 rnn_neurons: 0 dnn_neurons: 1024 # Wav2Vec parameters: freeze: false # Decoding parameters: blank_index: 0 # Outputs: output_neurons: 113 # ------ Functions and classes epoch_counter: &id008 !new:speechbrain.utils.epoch_loop.EpochCounter limit: 80 wav2vec: &id002 !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2 source: microsoft/wavlm-large output_norm: true freeze: false save_path: results/TARIC_SLU_wav2vec_wavLM_with_intent_criterion_a100_copie/1212/save/wav2vec.pt dec: &id003 !new:speechbrain.lobes.models.VanillaNN.VanillaNN input_shape: [null, null, 1024] activation: *id001 dnn_blocks: 1 dnn_neurons: 1024 output_lin: &id004 !new:speechbrain.nnet.linear.Linear input_size: 1024 n_neurons: 113 bias: true softmax: !new:speechbrain.nnet.activations.Softmax apply_log: true ctc_cost: !name:speechbrain.nnet.losses.ctc_loss blank_index: 0 modules: wav2vec: *id002 dec: *id003 output_lin: *id004 model: &id005 !new:torch.nn.ModuleList - [*id003, *id004] model_wav2vec: !new:torch.nn.ModuleList - [*id002] opt_class: !name:torch.optim.Adadelta lr: 1 rho: 0.95 eps: 1.e-8 opt_class_wav2vec: !name:torch.optim.Adam lr: 0.0001 lr_annealing: &id006 !new:speechbrain.nnet.schedulers.NewBobScheduler initial_value: 1 improvement_threshold: 0.0025 annealing_factor: 0.8 patient: 0 lr_annealing_wav2vec: &id007 !new:speechbrain.nnet.schedulers.NewBobScheduler initial_value: 0.0001 improvement_threshold: 0.0025 annealing_factor: 0.9 patient: 0 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer checkpoints_dir: results/TARIC_SLU_wav2vec_wavLM_with_intent_criterion_a100_copie/1212/save recoverables: model: *id005 wav2vec: *id002 lr_annealing: *id006 lr_annealing_wav2vec: *id007 counter: *id008 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger save_file: results/TARIC_SLU_wav2vec_wavLM_with_intent_criterion_a100_copie/1212/train_log.txt ctc_computer: !name:speechbrain.utils.metric_stats.MetricStats metric: !name:speechbrain.nnet.losses.ctc_loss blank_index: 0 reduction: batch error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats merge_tokens: true coer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats extract_concepts_values: true keep_values: false tag_in: < tag_out: > cver_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats extract_concepts_values: true keep_values: true tag_in: < tag_out: > tokenizer: !new:speechbrain.dataio.encoder.CTCTextEncoder pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: model: !ref wav2vec: !ref tokenizer: !ref paths: model: !ref /content/sample_data/SLU/model.cpkt wav2vec: !ref /content/sample_data/SLU/wav2vec.cpkt tokenizer: !ref /content/sample_data/SLU/label_encoder.txt decoding_function: !name:speechbrain.decoders.ctc_greedy_decode blank_id: 0 # Tag list: tag_list: , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,