# Generated 2023-06-24 from: | |
# /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/ASR/transformer/hparams/train_hf_whisper.yaml | |
# yamllint disable | |
# ################################ | |
# Model: Whisper (Encoder-Decoder) + NLL | |
# Augmentation: TimeDomainSpecAugment | |
# Authors: Sangeet Sagar 2022 | |
# ################################ | |
# URL for the biggest Fairseq english whisper model. | |
whisper_hub: openai/whisper-large-v2 | |
language: german | |
# Normalize the english inputs with | |
# the same normalization done in the paper | |
normalized_transcripts: true | |
test_only: false # Set it to True if you only want to do the evaluation | |
auto_mix_prec: false | |
sample_rate: 16000 | |
# These values are only used for the searchers. | |
# They needs to be hardcoded and should not be changed with Whisper. | |
# They are used as part of the searching process. | |
# The bos token of the searcher will be timestamp_index | |
# and will be concatenated with the bos, language and task tokens. | |
timestamp_index: 50363 | |
eos_index: 50257 | |
bos_index: 50258 | |
# Decoding parameters | |
min_decode_ratio: 0.0 | |
max_decode_ratio: 1.0 | |
test_beam_size: 8 | |
# Model parameters | |
freeze_whisper: false | |
freeze_encoder_only: false | |
freeze_encoder: true | |
# | |
# Functions and classes | |
# | |
whisper: &id001 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper | |
source: openai/whisper-large-v2/ | |
freeze: false | |
save_path: openai/whisper-large-v2/ | |
encoder_only: false | |
freeze_encoder: true | |
modules: | |
whisper: | |
whisper_opt_class: !name:torch.optim.AdamW | |
lr: 0.00003 | |
weight_decay: 0.01 | |
decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch | |
model: | |
bos_index: 50363 | |
eos_index: 50257 | |
min_decode_ratio: 0.0 | |
max_decode_ratio: 1.0 | |
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer | |
loadables: | |
whisper: !ref <whisper> | |