speechbrain
/

noisy-whisper-rescuespeech

@@ -6,17 +6,17 @@
 # Authors: Sangeet Sagar 2023
 # ################################
-# URL for the biggest whisper model.
 # URL for the biggest Fairseq english whisper model.
 whisper_hub: openai/whisper-large-v2
 language: german
 normalized_transcripts: true
-## Model parameters
 sample_rate: 16000
-freeze_whisper: True
-freeze_encoder: True
 # These values are only used for the searchers.
 # They needs to be hardcoded and should not be changed with Whisper.
@@ -27,27 +27,38 @@ timestamp_index: 50363
 eos_index: 50257
 bos_index: 50258
-# ASR model
-whisper: &id003 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
     source: !ref <whisper_hub>
     freeze: !ref <freeze_whisper>
     freeze_encoder: !ref <freeze_encoder>
     save_path: whisper_checkpoints
     encoder_only:  False
-decoder: &id006 !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
-  model: *id003
-  bos_index: 50363
-  eos_index: 50257
-  min_decode_ratio: 0.0
-  max_decode_ratio: 1.0
-# Change the path to use a local model instead of the remote one
-pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
-  loadables:
-    whisper: !ref <whisper>
-    decoder: !ref <decoder>
 modules:
-  whisper: *id003

 # Authors: Sangeet Sagar 2023
 # ################################
 # URL for the biggest Fairseq english whisper model.
 whisper_hub: openai/whisper-large-v2
 language: german
+# Normalize the english inputs with
+# the same normalization done in the paper
 normalized_transcripts: true
+test_only: false # Set it to True if you only want to  do the evaluation
+auto_mix_prec: False
 sample_rate: 16000
 # These values are only used for the searchers.
 # They needs to be hardcoded and should not be changed with Whisper.
 eos_index: 50257
 bos_index: 50258
+# Decoding parameters
+min_decode_ratio: 0.0
+max_decode_ratio: 0.1
+test_beam_size: 8
+# Model parameters
+freeze_whisper: True
+freeze_encoder: True
+whisper: !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
     source: !ref <whisper_hub>
     freeze: !ref <freeze_whisper>
     freeze_encoder: !ref <freeze_encoder>
     save_path: whisper_checkpoints
     encoder_only:  False
+decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
+    model: !ref <whisper>
+    bos_index: !ref <timestamp_index>
+    eos_index: !ref <eos_index>
+    min_decode_ratio: !ref <min_decode_ratio>
+    max_decode_ratio: !ref <max_decode_ratio>
 modules:
+    whisper: !ref <whisper>
+    decoder:  !ref <decoder>
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        whisper: !ref <whisper>