speechbrain
/

noisy-whisper-rescuespeech

Automatic Speech Recognition

Model card Files Files and versions Community

sangeet2020 commited on Jul 11, 2023

Commit

720f1ce

•

1 Parent(s): 602c4eb

remove old hyperparam file

Files changed (1) hide show

hyperparams.yaml +0 -105

hyperparams.yaml DELETED Viewed

@@ -1,105 +0,0 @@
-# Generated 2023-06-24 from:
-# /netscratch/sagar/thesis/speechbrain/recipes/RescueSpeech/Enhancement/joint-training/transformers/hparams/robust_asr_16k.yaml
-# yamllint disable
-# Model: wav2vec2 + DNN + CTC
-# Augmentation: SpecAugment
-# Authors: Sangeet Sagar 2023
-# ################################
-# URL for the biggest whisper model.
-# URL for the biggest Fairseq english whisper model.
-whisper_hub: openai/whisper-large-v2
-language: german
-## Model parameters
-sample_rate: 16000
-freeze_whisper: false
-freeze_encoder_only: false
-freeze_encoder: true
-# These values are only used for the searchers.
-# They needs to be hardcoded and should not be changed with Whisper.
-# They are used as part of the searching process.
-# The bos token of the searcher will be timestamp_index
-# and will be concatenated with the bos, language and task tokens.
-timestamp_index: 50363
-eos_index: 50257
-bos_index: 50258
-# Decoding parameters
-min_decode_ratio: 0.0
-max_decode_ratio: 1.0
-test_beam_size: 8
-num_spks: 1
-# Enhancement model
-Encoder: &id004 !new:speechbrain.lobes.models.dual_path.Encoder
-  kernel_size: 16
-  out_channels: 256
-SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
-  num_layers: 8
-  d_model: 256
-  nhead: 8
-  d_ffn: 1024
-  dropout: 0
-  use_positional_encoding: true
-  norm_before: true
-SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
-  num_layers: 8
-  d_model: 256
-  nhead: 8
-  d_ffn: 1024
-  dropout: 0
-  use_positional_encoding: true
-  norm_before: true
-MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
-  num_spks: 1
-  in_channels: 256
-  out_channels: 256
-  num_layers: 2
-  K: 250
-  intra_model: *id001
-  inter_model: *id002
-  norm: ln
-  linear_layer_after_inter_intra: false
-  skip_around_intra: true
-# Whisper ASR and its decoder
-Decoder: &id006 !new:speechbrain.lobes.models.dual_path.Decoder
-  in_channels: 256
-  out_channels: 1
-  kernel_size: 16
-  stride: 8
-  bias: false
-whisper: &id003 !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
-    source: !ref <whisper_hub>
-    freeze: !ref <freeze_whisper>
-    freeze_encoder: !ref <freeze_encoder>
-    save_path: whisper_checkpoints
-    encoder_only:  False
-decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
-  model: *id003
-  bos_index: 50363
-  eos_index: 50257
-  min_decode_ratio: 0.0
-  max_decode_ratio: 1.0
-# Change the path to use a local model instead of the remote one
-pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
-  loadables:
-    encoder: !ref <Encoder>
-    masknet: !ref <MaskNet>
-    decoder: !ref <Decoder>
-    whisper: !ref <whisper>
-modules:
-  encoder: *id004
-  masknet: *id005
-  decoder: *id006
-  whisper: *id003