chaanks
/

asr-whisper-large-v2-sb

Automatic Speech Recognition

hf-asr-leaderboard

Model card Files Files and versions Community

chaanks commited on Aug 1, 2023

Commit

e3c9f37

•

1 Parent(s): 0007f0e

Upload 2 files

Files changed (2) hide show

config.json +3 -0
hyperparams.yaml +65 -0

config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "speechbrain_interface": "WhisperASR"
+}

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,65 @@

+# URL for the biggest Fairseq english whisper model.
+whisper_hub: openai/whisper-tiny
+# Normalize inputs with
+# the same normalization done in the paper. Refer to Appendix C for further information.
+normalized_transcripts: True
+language: english
+auto_mix_prec: False
+sample_rate: 16000
+# These values are only used for the searchers.
+# They needs to be hardcoded and should not be changed with Whisper.
+# They are used as part of the searching process.
+# The bos token of the searcher will be timestamp_index
+# and will be concatenated with the bos, language and task tokens.
+timestamp_index: 50363
+eos_index: 50257
+bos_index: 50258
+# Decoding parameters
+min_decode_ratio: 0.0
+max_decode_ratio: 1.0
+test_beam_size: 8
+# Model parameters
+freeze_whisper: True
+freeze_encoder: True
+whisper: !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
+    source: !ref <whisper_hub>
+    freeze: !ref <freeze_whisper>
+    freeze_encoder: !ref <freeze_encoder>
+    save_path: pretrained_models
+    encoder_only:  False
+decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
+    model: !ref <whisper>
+    bos_index: !ref <timestamp_index>
+    eos_index: !ref <eos_index>
+    min_decode_ratio: !ref <min_decode_ratio>
+    max_decode_ratio: !ref <max_decode_ratio>
+# test_beam_searcher: !new:speechbrain.decoders.seq2seq.S2SWhisperBeamSearch
+#     module: [!ref <whisper>]
+#     bos_index: !ref <timestamp_index>
+#     eos_index: !ref <eos_index>
+#     min_decode_ratio: !ref <min_decode_ratio>
+#     max_decode_ratio: !ref <max_decode_ratio>
+#     beam_size: !ref <test_beam_size>
+modules:
+    whisper: !ref <whisper>
+    decoder:  !ref <decoder>
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        whisper: !ref <whisper>
+    paths:
+        whisper: !ref <whisper_hub>/pytorch_model.bin