speechbrain
/

noisy-whisper-rescuespeech

+# Generated 2023-06-24 from:
+# /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/Enhancement/joint-training/transformers/hparams/robust_asr_16k.yaml
+# yamllint disable
+# Model: wav2vec2 + DNN + CTC
+# Augmentation: SpecAugment
+# Authors: Sangeet Sagar 2023
+# ################################
+# URL for the biggest whisper model.
+# URL for the biggest Fairseq english whisper model.
+whisper_hub: openai/whisper-large-v2
+language: german
+## Model parameters
+sample_rate: 16000
+freeze_whisper: false
+freeze_encoder_only: false
+freeze_encoder: true
+# These values are only used for the searchers.
+# They needs to be hardcoded and should not be changed with Whisper.
+# They are used as part of the searching process.
+# The bos token of the searcher will be timestamp_index
+# and will be concatenated with the bos, language and task tokens.
+timestamp_index: 50363
+eos_index: 50257
+bos_index: 50258
+# ASR model
+whisper: &id003 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
+    source: !ref <whisper_hub>
+    freeze: !ref <freeze_whisper>
+    freeze_encoder: !ref <freeze_encoder>
+    save_path: whisper_checkpoints
+    encoder_only:  False
+decoder: &id006 !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
+  model: *id003
+  bos_index: 50363
+  eos_index: 50257
+  min_decode_ratio: 0.0
+  max_decode_ratio: 1.0
+# Change the path to use a local model instead of the remote one
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+  loadables:
+    whisper: !ref <whisper>
+    decoder: !ref <decoder>
+modules:
+  whisper: *id003
+  decoder: *id006

enhance.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+# Generated 2023-06-24 from:
+# /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/Enhancement/joint-training/transformers/hparams/robust_asr_16k.yaml
+# yamllint disable
+# Model: wav2vec2 + DNN + CTC
+# Augmentation: SpecAugment
+# Authors: Sangeet Sagar 2023
+# ################################
+## Model parameters
+sample_rate: 16000
+# Decoding parameters
+min_decode_ratio: 0.0
+max_decode_ratio: 1.0
+test_beam_size: 8
+num_spks: 1
+# Enhancement model
+Encoder: &id004 !new:speechbrain.lobes.models.dual_path.Encoder
+  kernel_size: 16
+  out_channels: 256
+SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
+  num_layers: 8
+  d_model: 256
+  nhead: 8
+  d_ffn: 1024
+  dropout: 0
+  use_positional_encoding: true
+  norm_before: true
+SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
+  num_layers: 8
+  d_model: 256
+  nhead: 8
+  d_ffn: 1024
+  dropout: 0
+  use_positional_encoding: true
+  norm_before: true
+MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
+  num_spks: 1
+  in_channels: 256
+  out_channels: 256
+  num_layers: 2
+  K: 250
+  intra_model: *id001
+  inter_model: *id002
+  norm: ln
+  linear_layer_after_inter_intra: false
+  skip_around_intra: true
+# Whisper ASR and its decoder
+Decoder: &id006 !new:speechbrain.lobes.models.dual_path.Decoder
+  in_channels: 256
+  out_channels: 1
+  kernel_size: 16
+  stride: 8
+  bias: false
+# Change the path to use a local model instead of the remote one
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+  loadables:
+    encoder: !ref <Encoder>
+    masknet: !ref <MaskNet>
+    decoder: !ref <Decoder>
+modules:
+  encoder: *id004
+  masknet: *id005
+  decoder: *id006