# ################################ # Model: Whisper (Encoder-Decoder) + NLL # Augmentation: TimeDomainSpecAugment # Authors: Pooneh Mousavi 2022 # ################################ # URL for the biggest Fairseq english whisper model. whisper_hub: openai/whisper-large-v2 # Normalize inputs with # the same normalization done in the paper. Refer to Appendix C for further information. normalized_transcripts: True language: mongolian auto_mix_prec: False sample_rate: 16000 # These values are only used for the searchers. # They needs to be hardcoded and should not be changed with Whisper. # They are used as part of the searching process. # The bos token of the searcher will be timestamp_index # and will be concatenated with the bos, language and task tokens. timestamp_index: 50363 eos_index: 50257 bos_index: 50258 # Decoding parameters min_decode_ratio: 0.0 max_decode_ratio: 0.1 test_beam_size: 8 # Model parameters freeze_whisper: True freeze_encoder: True whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper source: !ref freeze: !ref freeze_encoder: !ref save_path: whisper_checkpoints encoder_only: False decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearcher model: !ref bos_index: !ref eos_index: !ref min_decode_ratio: !ref max_decode_ratio: !ref # test_beam_searcher: !new:speechbrain.decoders.seq2seq.S2SWhisperBeamSearcher # module: [!ref ] # bos_index: !ref # eos_index: !ref # min_decode_ratio: !ref # max_decode_ratio: !ref # beam_size: !ref modules: whisper: !ref decoder: !ref pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: whisper: !ref