# Generated 2023-06-24 from: | |
# /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/Enhancement/joint-training/transformers/hparams/robust_asr_16k.yaml | |
# yamllint disable | |
# Model: wav2vec2 + DNN + CTC | |
# Augmentation: SpecAugment | |
# Authors: Sangeet Sagar 2023 | |
# ################################ | |
# URL for the biggest whisper model. | |
# URL for the biggest Fairseq english whisper model. | |
whisper_hub: openai/whisper-large-v2 | |
language: german | |
normalized_transcripts: true | |
## Model parameters | |
sample_rate: 16000 | |
freeze_whisper: false | |
freeze_encoder_only: false | |
freeze_encoder: true | |
# These values are only used for the searchers. | |
# They needs to be hardcoded and should not be changed with Whisper. | |
# They are used as part of the searching process. | |
# The bos token of the searcher will be timestamp_index | |
# and will be concatenated with the bos, language and task tokens. | |
timestamp_index: 50363 | |
eos_index: 50257 | |
bos_index: 50258 | |
# ASR model | |
whisper: &id003 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper | |
source: !ref <whisper_hub> | |
freeze: !ref <freeze_whisper> | |
freeze_encoder: !ref <freeze_encoder> | |
save_path: whisper_checkpoints | |
encoder_only: False | |
decoder: &id006 !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch | |
model: | |
bos_index: 50363 | |
eos_index: 50257 | |
min_decode_ratio: 0.0 | |
max_decode_ratio: 1.0 | |
# Change the path to use a local model instead of the remote one | |
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer | |
loadables: | |
whisper: !ref <whisper> | |
decoder: !ref <decoder> | |
modules: | |
whisper: | |
decoder: | |