whisper_rescuespeech / hyperparams.yaml
sangeet2020's picture
push model files
ef649a5
raw
history blame
1.82 kB
# Generated 2023-06-24 from:
# /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
# yamllint disable
# ################################
# Model: Whisper (Encoder-Decoder) + NLL
# Augmentation: TimeDomainSpecAugment
# Authors: Sangeet Sagar 2022
# ################################
# URL for the biggest Fairseq english whisper model.
whisper_hub: openai/whisper-large-v2
language: german
# Normalize the english inputs with
# the same normalization done in the paper
normalized_transcripts: true
test_only: false # Set it to True if you only want to do the evaluation
auto_mix_prec: false
sample_rate: 16000
# These values are only used for the searchers.
# They needs to be hardcoded and should not be changed with Whisper.
# They are used as part of the searching process.
# The bos token of the searcher will be timestamp_index
# and will be concatenated with the bos, language and task tokens.
timestamp_index: 50363
eos_index: 50257
bos_index: 50258
# Decoding parameters
min_decode_ratio: 0.0
max_decode_ratio: 1.0
test_beam_size: 8
# Model parameters
freeze_whisper: false
freeze_encoder_only: false
freeze_encoder: true
#
# Functions and classes
#
whisper: &id001 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
source: openai/whisper-large-v2/
freeze: false
save_path: openai/whisper-large-v2/
encoder_only: false
freeze_encoder: true
modules:
whisper: *id001
whisper_opt_class: !name:torch.optim.AdamW
lr: 0.00003
weight_decay: 0.01
decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
model: *id001
bos_index: 50363
eos_index: 50257
min_decode_ratio: 0.0
max_decode_ratio: 1.0
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
whisper: !ref <whisper>