# Generated 2023-06-24 from: # /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/Enhancement/joint-training/transformers/hparams/robust_asr_16k.yaml # yamllint disable # Model: wav2vec2 + DNN + CTC # Augmentation: SpecAugment # Authors: Sangeet Sagar 2023 # ################################ # URL for the biggest whisper model. # URL for the biggest Fairseq english whisper model. whisper_hub: openai/whisper-large-v2 language: german ## Model parameters sample_rate: 16000 freeze_whisper: false freeze_encoder_only: false freeze_encoder: true # These values are only used for the searchers. # They needs to be hardcoded and should not be changed with Whisper. # They are used as part of the searching process. # The bos token of the searcher will be timestamp_index # and will be concatenated with the bos, language and task tokens. timestamp_index: 50363 eos_index: 50257 bos_index: 50258 # ASR model whisper: &id003 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper source: !ref freeze: !ref freeze_encoder: !ref save_path: whisper_checkpoints encoder_only: False decoder: &id006 !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch model: *id003 bos_index: 50363 eos_index: 50257 min_decode_ratio: 0.0 max_decode_ratio: 1.0 # Change the path to use a local model instead of the remote one pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: whisper: !ref decoder: !ref modules: whisper: *id003 decoder: *id006