# Generated 2021-03-21 from: # /home/mila/l/lugoschl/code/fork/speechbrain/recipes/timers-and-such/direct/hparams/train.yaml # yamllint disable # ############################################################################ # Model: Direct SLU # Encoder: Pre-trained ASR encoder -> LSTM # Decoder: GRU + beamsearch # Tokens: BPE with unigram # losses: NLL # Training: Timers and Such # Authors: Loren Lugosch, Mirco Ravanelli 2020 # ############################################################################ # Seed needs to be set at top of yaml, before objects with parameters are made seed: 4 __set_seed: !apply:torch.manual_seed [4] experiment: train-real-only output_folder: results/train-real-only/4 save_folder: results/train-real-only/4/save train_log: results/train-real-only/4/train_log.txt # Data files data_folder: /localscratch/timers-and-such/ # e.g, /localscratch/timers-and-such data_folder_rirs: /localscratch/timers-and-such/ train_splits: [train-real] csv_train: results/train-real-only/4/train-type=direct.csv csv_dev_real: results/train-real-only/4/dev-real-type=direct.csv csv_dev_synth: results/train-real-only/4/dev-synth-type=direct.csv csv_test_real: results/train-real-only/4/test-real-type=direct.csv csv_test_synth: results/train-real-only/4/test-synth-type=direct.csv csv_all_real: results/train-real-only/4/all-real-type=direct.csv tokenizer_file: /home/mila/l/lugoschl/code/speechbrain/recipes/timers-and-such/Tokenizer/results/tokenizer_bpe51/51_unigram.model skip_prep: false ckpt_interval_minutes: 15 # save checkpoint every N min test_on_all_real: false # Training parameters number_of_epochs: 50 batch_size: 16 lr: 0.0003 token_type: unigram # ["unigram", "bpe", "char"] sorting: random # Model parameters sample_rate: 16000 emb_size: 128 dec_neurons: 512 output_neurons: 51 # index(eos/bos) = 0 ASR_encoder_dim: 512 encoder_dim: 256 # Decoding parameters bos_index: 0 eos_index: 0 min_decode_ratio: 0.0 max_decode_ratio: 10.0 slu_beam_size: 80 eos_threshold: 1.5 temperature: 1.25 dataloader_opts: batch_size: 16 shuffle: true epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter limit: 50 # Models asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams source: speechbrain/asr-crdnn-rnnlm-librispeech run_opts: {device: cuda:0} slu_enc: &id001 !new:speechbrain.nnet.containers.Sequential input_shape: [null, null, 512] lstm: !new:speechbrain.nnet.RNN.LSTM input_size: 512 bidirectional: true hidden_size: 256 num_layers: 2 linear: !new:speechbrain.nnet.linear.Linear input_size: 512 n_neurons: 256 output_emb: &id002 !new:speechbrain.nnet.embedding.Embedding num_embeddings: 51 embedding_dim: 128 dec: &id003 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder enc_dim: 256 input_size: 128 rnn_type: gru attn_type: keyvalue hidden_size: 512 attn_dim: 512 num_layers: 3 scaling: 1.0 dropout: 0.0 seq_lin: &id004 !new:speechbrain.nnet.linear.Linear input_size: 512 n_neurons: 51 env_corrupt: &id005 !new:speechbrain.lobes.augment.EnvCorrupt openrir_folder: /localscratch/timers-and-such/ babble_prob: 0.0 reverb_prob: 0.0 noise_prob: 1.0 noise_snr_low: 0 noise_snr_high: 15 modules: slu_enc: *id001 output_emb: *id002 dec: *id003 seq_lin: *id004 env_corrupt: *id005 model: &id007 !new:torch.nn.ModuleList - [*id001, *id002, *id003, *id004] tokenizer: &id006 !new:sentencepiece.SentencePieceProcessor pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer collect_in: results/train-real-only/4/save/TAS_tokenizer loadables: tokenizer: *id006 paths: tokenizer: /home/mila/l/lugoschl/code/speechbrain/recipes/timers-and-such/Tokenizer/results/tokenizer_bpe51/51_unigram.model beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher embedding: *id002 decoder: *id003 linear: *id004 bos_index: 0 eos_index: 0 min_decode_ratio: 0.0 max_decode_ratio: 10.0 beam_size: 80 eos_threshold: 1.5 temperature: 1.25 using_max_attn_shift: false max_attn_shift: 30 coverage_penalty: 0. opt_class: !name:torch.optim.Adam lr: 0.0003 lr_annealing: &id008 !new:speechbrain.nnet.schedulers.NewBobScheduler initial_value: 0.0003 improvement_threshold: 0.0025 annealing_factor: 0.8 patient: 0 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer checkpoints_dir: results/train-real-only/4/save recoverables: model: *id007 scheduler: *id008 counter: *id009 augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment sample_rate: 16000 speeds: [95, 100, 105] log_softmax: !new:speechbrain.nnet.activations.Softmax apply_log: true seq_cost: !name:speechbrain.nnet.losses.nll_loss label_smoothing: 0.1 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger save_file: results/train-real-only/4/train_log.txt error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats split_tokens: true