# Generated 2023-06-24 from: # /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/ASR/transformer/hparams/train_hf_whisper.yaml # yamllint disable # ################################ # Model: Whisper (Encoder-Decoder) + NLL # Augmentation: TimeDomainSpecAugment # Authors: Sangeet Sagar 2022 # ################################ # URL for the biggest Fairseq english whisper model. whisper_hub: openai/whisper-large-v2 language: german # Normalize the english inputs with # the same normalization done in the paper normalized_transcripts: true test_only: false # Set it to True if you only want to do the evaluation auto_mix_prec: false sample_rate: 16000 # These values are only used for the searchers. # They needs to be hardcoded and should not be changed with Whisper. # They are used as part of the searching process. # The bos token of the searcher will be timestamp_index # and will be concatenated with the bos, language and task tokens. timestamp_index: 50363 eos_index: 50257 bos_index: 50258 # Decoding parameters min_decode_ratio: 0.0 max_decode_ratio: 1.0 test_beam_size: 8 # Model parameters freeze_whisper: false freeze_encoder_only: false freeze_encoder: true # # Functions and classes # whisper: &id001 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper source: openai/whisper-large-v2/ freeze: false save_path: openai/whisper-large-v2/ encoder_only: false freeze_encoder: true modules: whisper: *id001 whisper_opt_class: !name:torch.optim.AdamW lr: 0.00003 weight_decay: 0.01 decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch model: *id001 bos_index: 50363 eos_index: 50257 min_decode_ratio: 0.0 max_decode_ratio: 1.0 pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: whisper: !ref