name: &name "QuartzNet15x5" model: sample_rate: &sample_rate 16000 repeat: &repeat 5 dropout: &dropout 0.0 separable: &separable true labels: &labels [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "á", "æ", "é", "í", "ð", "ó", "ö", "ú", "ý", "þ"] train_ds: manifest_filepath: ??? sample_rate: 16000 labels: *labels batch_size: 16 ########################## trim_silence: True max_duration: 16.7 shuffle: True num_workers: 8 pin_memory: true # tarred datasets is_tarred: false tarred_audio_filepaths: null shuffle_n: 2048 # bucketing params bucketing_strategy: "synced_randomized" bucketing_batch_size: null validation_ds: manifest_filepath: ??? sample_rate: 16000 labels: *labels batch_size: 16 ########################## shuffle: False num_workers: 8 pin_memory: true preprocessor: _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor normalize: "per_feature" window_size: 0.02 sample_rate: *sample_rate window_stride: 0.01 window: "hann" features: &n_mels 64 n_fft: 512 frame_splicing: 1 dither: 1.0e-05 spec_augment: _target_: nemo.collections.asr.modules.SpectrogramAugmentation rect_freq: 50 rect_masks: 5 rect_time: 120 encoder: _target_: nemo.collections.asr.modules.ConvASREncoder feat_in: *n_mels activation: relu conv_mask: true jasper: #1 - dilation: [1] dropout: *dropout filters: 256 kernel: [33] repeat: 1 residual: false separable: *separable stride: [2] #2 - dilation: [1] dropout: *dropout filters: 256 kernel: [33] repeat: *repeat residual: true separable: *separable stride: [1] #3 - dilation: [1] dropout: *dropout filters: 256 kernel: [33] repeat: *repeat residual: true separable: *separable stride: [1] #4 - dilation: [1] dropout: *dropout filters: 256 kernel: [33] repeat: *repeat residual: true separable: *separable stride: [1] #5 - dilation: [1] dropout: *dropout filters: 256 kernel: [39] repeat: *repeat residual: true separable: *separable stride: [1] #6 - dilation: [1] dropout: *dropout filters: 256 kernel: [39] repeat: *repeat residual: true separable: *separable stride: [1] #7 - dilation: [1] dropout: *dropout filters: 256 kernel: [39] repeat: *repeat residual: true separable: *separable stride: [1] #8 - dilation: [1] dropout: *dropout filters: 512 kernel: [51] repeat: *repeat residual: true separable: *separable stride: [1] #9 - dilation: [1] dropout: *dropout filters: 512 kernel: [51] repeat: *repeat residual: true separable: *separable stride: [1] #10 - dilation: [1] dropout: *dropout filters: 512 kernel: [51] repeat: *repeat residual: true separable: *separable stride: [1] #11 - dilation: [1] dropout: *dropout filters: 512 kernel: [63] repeat: *repeat residual: true separable: *separable stride: [1] #12 - dilation: [1] dropout: *dropout filters: 512 kernel: [63] repeat: *repeat residual: true separable: *separable stride: [1] #13 - dilation: [1] dropout: *dropout filters: 512 kernel: [63] repeat: *repeat residual: true separable: *separable stride: [1] #14 - dilation: [1] dropout: *dropout filters: 512 kernel: [75] repeat: *repeat residual: true separable: *separable stride: [1] #15 - dilation: [1] dropout: *dropout filters: 512 kernel: [75] repeat: *repeat residual: true separable: *separable stride: [1] #16 - dilation: [1] dropout: *dropout filters: 512 kernel: [75] repeat: *repeat residual: true separable: *separable stride: [1] #17 - dilation: [2] dropout: *dropout filters: 512 kernel: [87] repeat: 1 residual: false separable: *separable stride: [1] #18 - dilation: [1] dropout: *dropout filters: &enc_filters 1024 kernel: [1] repeat: 1 residual: false stride: [1] decoder: _target_: nemo.collections.asr.modules.ConvASRDecoder feat_in: *enc_filters num_classes: 37 vocabulary: *labels optim: name: novograd # _target_: nemo.core.optim.optimizers.Novograd lr: 0.0012 # optimizer arguments betas: [0.95, 0.25] weight_decay: 0.001 # scheduler setup sched: name: CosineAnnealing # pytorch lightning args # monitor: val_loss # reduce_on_plateau: false # Scheduler params warmup_steps: null warmup_ratio: null min_lr: 0.0 last_epoch: -1 trainer: devices: 1 # number of gpus max_epochs: 5 max_steps: -1 # computed at runtime if not set num_nodes: 1 accelerator: gpu strategy: ddp accumulate_grad_batches: 1 enable_checkpointing: False # Provided by exp_manager logger: False # Provided by exp_manager log_every_n_steps: 1 # Interval of logging. val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations benchmark: false # needs to be false for models with variable-length speech input as it slows down training exp_manager: exp_dir: null name: *name create_tensorboard_logger: True create_checkpoint_callback: True checkpoint_callback_params: monitor: "val_wer" mode: "min" create_wandb_logger: False wandb_logger_kwargs: name: null project: null