|
cfg: |
|
sample_rate: 16000 |
|
train_ds: |
|
manifest_filepath: /content/datasets/ta/yaygomii/Tamil-Speech-Dialect-Corpus-Shuffled-Split/train/train_yaygomii_Tamil-Speech-Dialect-Corpus-Shuffled-Split_manifest_processed.json,/content/datasets/ta/yaygomii/Tamil-Speech-Dialect-Corpus-Shuffled-Split/valid/valid_yaygomii_Tamil-Speech-Dialect-Corpus-Shuffled-Split_manifest_processed.json |
|
sample_rate: 16000 |
|
batch_size: 16 |
|
trim_silence: true |
|
max_duration: 16.7 |
|
shuffle: true |
|
is_tarred: false |
|
tarred_audio_filepaths: null |
|
num_workers: 8 |
|
pin_memory: true |
|
use_start_end_token: true |
|
validation_ds: |
|
manifest_filepath: /content/datasets/ta/yaygomii/Tamil-Speech-Dialect-Corpus-Shuffled-Split/test/test_yaygomii_Tamil-Speech-Dialect-Corpus-Shuffled-Split_manifest_processed.json |
|
sample_rate: 16000 |
|
batch_size: 8 |
|
shuffle: false |
|
num_workers: 8 |
|
pin_memory: true |
|
use_start_end_token: true |
|
trim_silence: true |
|
test_ds: |
|
manifest_filepath: /content/datasets/ta/yaygomii/Tamil-Speech-Dialect-Corpus-Shuffled-Split/test/test_yaygomii_Tamil-Speech-Dialect-Corpus-Shuffled-Split_manifest_processed.json |
|
sample_rate: 16000 |
|
batch_size: 8 |
|
shuffle: false |
|
num_workers: 8 |
|
pin_memory: true |
|
use_start_end_token: true |
|
trim_silence: true |
|
model_defaults: |
|
repeat: 5 |
|
dropout: 0.0 |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
tokenizer: |
|
dir: tokenizers/ta/tokenizer_spe_bpe_v174/ |
|
type: bpe |
|
model_path: nemo:288fd8b4d3c64e75ac7a6b32b0f3586d_tokenizer.model |
|
vocab_path: nemo:6698e9428f25429e8ab2c5238438d52f_vocab.txt |
|
spe_tokenizer_vocab: nemo:344d9da9a52049caad7742ae1d994d19_tokenizer.vocab |
|
preprocessor: |
|
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor |
|
sample_rate: 16000 |
|
normalize: per_feature |
|
window_size: 0.025 |
|
window_stride: 0.01 |
|
window: hann |
|
features: 80 |
|
n_fft: 512 |
|
frame_splicing: 1 |
|
dither: 1.0e-05 |
|
pad_to: 16 |
|
stft_conv: false |
|
spec_augment: |
|
_target_: nemo.collections.asr.modules.SpectrogramAugmentation |
|
freq_masks: 2 |
|
time_masks: 10 |
|
freq_width: 25 |
|
time_width: 0.05 |
|
encoder: |
|
_target_: nemo.collections.asr.modules.ConvASREncoder |
|
feat_in: 80 |
|
activation: relu |
|
conv_mask: true |
|
jasper: |
|
- filters: 512 |
|
repeat: 1 |
|
kernel: |
|
- 5 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: false |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 11 |
|
stride: |
|
- 2 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
stride_last: true |
|
residual_mode: stride_add |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 13 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 15 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 17 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 19 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 21 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 13 |
|
stride: |
|
- 2 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
stride_last: true |
|
residual_mode: stride_add |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 15 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 17 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 19 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 21 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 23 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 25 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 25 |
|
stride: |
|
- 2 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
stride_last: true |
|
residual_mode: stride_add |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 27 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 29 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 31 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 33 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 35 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 37 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 512 |
|
repeat: 5 |
|
kernel: |
|
- 39 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
- filters: 640 |
|
repeat: 1 |
|
kernel: |
|
- 41 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: false |
|
separable: true |
|
se: true |
|
se_context_size: -1 |
|
decoder: |
|
_target_: nemo.collections.asr.modules.ConvASRDecoder |
|
feat_in: 640 |
|
num_classes: 174 |
|
vocabulary: |
|
- <unk> |
|
- ்க |
|
- ம் |
|
- ▁ப |
|
- க்க |
|
- ்த |
|
- ன் |
|
- ்ட |
|
- ▁வ |
|
- ங்க |
|
- ரு |
|
- ▁இ |
|
- ▁க |
|
- ▁அ |
|
- ▁எ |
|
- க்கு |
|
- ▁ச |
|
- ல் |
|
- ந்த |
|
- ட்ட |
|
- ப் |
|
- ▁ந |
|
- த்த |
|
- து |
|
- ப்ப |
|
- ▁ம |
|
- ல்ல |
|
- ▁த |
|
- ்ச |
|
- ன்ன |
|
- ▁இரு |
|
- டி |
|
- டு |
|
- ▁போ |
|
- ும் |
|
- ந்து |
|
- ட்டு |
|
- ான் |
|
- ாங்க |
|
- ச்ச |
|
- ிய |
|
- ண் |
|
- மா |
|
- த்து |
|
- ▁வந்து |
|
- ர் |
|
- ▁பா |
|
- ண்ண |
|
- ▁ஒ |
|
- ல்லா |
|
- ண்ட |
|
- ▁ஆ |
|
- ம்ப |
|
- ேன் |
|
- னு |
|
- க்கா |
|
- னா |
|
- ைய |
|
- ▁மா |
|
- ▁இருக்கு |
|
- ▁கொ |
|
- ஸ் |
|
- ரி |
|
- ▁என்ன |
|
- ▁சொ |
|
- ▁சா |
|
- ச்சு |
|
- ள் |
|
- ▁ர |
|
- ▁பண்ண |
|
- ோம் |
|
- லா |
|
- ▁அப்ப |
|
- ட் |
|
- ஞ்ச |
|
- ▁கா |
|
- யி |
|
- ய் |
|
- ▁எங்க |
|
- ▁ஏ |
|
- ▁நா |
|
- ▁ஒரு |
|
- ▁அவ |
|
- ீங்க |
|
- ியா |
|
- ▁அது |
|
- ▁எல்லா |
|
- ▁கு |
|
- தி |
|
- ▁இல்ல |
|
- ▁வெ |
|
- ▁வே |
|
- ▁தான் |
|
- யா |
|
- ▁பே |
|
- றது |
|
- ▁செ |
|
- ுக்கு |
|
- ▁இருக்க |
|
- ண்டு |
|
- ில |
|
- ▁பி |
|
- ▁ட |
|
- ிரு |
|
- ளா |
|
- ் |
|
- ▁ |
|
- ு |
|
- க |
|
- ா |
|
- த |
|
- ட |
|
- ப |
|
- ம |
|
- ி |
|
- ன |
|
- ர |
|
- ல |
|
- வ |
|
- ச |
|
- ந |
|
- ங |
|
- ய |
|
- ே |
|
- ோ |
|
- ண |
|
- இ |
|
- . |
|
- அ |
|
- எ |
|
- ள |
|
- ை |
|
- ெ |
|
- ற |
|
- ொ |
|
- ீ |
|
- ஸ |
|
- ூ |
|
- ஒ |
|
- ஆ |
|
- ழ |
|
- ஞ |
|
- ஏ |
|
- ஷ |
|
- ஜ |
|
- ',' |
|
- உ |
|
- ஊ |
|
- ஓ |
|
- ஃ |
|
- ஹ |
|
- ஐ |
|
- ௌ |
|
- ஈ |
|
- '!' |
|
- '5' |
|
- '2' |
|
- '0' |
|
- '1' |
|
- _ |
|
- '3' |
|
- '6' |
|
- ஂ |
|
- g |
|
- k |
|
- m |
|
- p |
|
- s |
|
- '௫' |
|
- '7' |
|
- '8' |
|
- '9' |
|
- t |
|
- '௯' |
|
optim: |
|
name: novograd |
|
lr: 0.025 |
|
betas: |
|
- 0.8 |
|
- 0.25 |
|
weight_decay: 0.001 |
|
sched: |
|
name: CosineAnnealing |
|
warmup_steps: null |
|
warmup_ratio: 0.1 |
|
min_lr: 1.0e-09 |
|
last_epoch: -1 |
|
target: nemo.collections.asr.models.ctc_bpe_models.EncDecCTCModelBPE |
|
nemo_version: 1.22.0 |
|
decoding: |
|
strategy: greedy |
|
preserve_alignments: null |
|
compute_timestamps: null |
|
word_seperator: ' ' |
|
ctc_timestamp_type: all |
|
batch_dim_index: 0 |
|
greedy: |
|
preserve_alignments: false |
|
compute_timestamps: false |
|
preserve_frame_confidence: false |
|
confidence_method_cfg: |
|
name: entropy |
|
entropy_type: tsallis |
|
alpha: 0.33 |
|
entropy_norm: exp |
|
temperature: DEPRECATED |
|
beam: |
|
beam_size: 4 |
|
search_type: default |
|
preserve_alignments: false |
|
compute_timestamps: false |
|
return_best_hypothesis: true |
|
beam_alpha: 1.0 |
|
beam_beta: 0.0 |
|
kenlm_path: null |
|
flashlight_cfg: |
|
lexicon_path: null |
|
boost_path: null |
|
beam_size_token: 16 |
|
beam_threshold: 20.0 |
|
unk_weight: -.inf |
|
sil_weight: 0.0 |
|
pyctcdecode_cfg: |
|
beam_prune_logp: -10.0 |
|
token_min_logp: -5.0 |
|
prune_history: false |
|
hotwords: null |
|
hotword_weight: 10.0 |
|
confidence_cfg: |
|
preserve_frame_confidence: false |
|
preserve_token_confidence: false |
|
preserve_word_confidence: false |
|
exclude_blank: true |
|
aggregation: min |
|
method_cfg: |
|
name: entropy |
|
entropy_type: tsallis |
|
alpha: 0.33 |
|
entropy_norm: exp |
|
temperature: DEPRECATED |
|
temperature: 1.0 |
|
|