nemo-vad-marblenet / model_config.yaml
huseinzol05's picture
Upload 2 files
f6d1140
sample_rate: 16000
timesteps: 64
repeat: 2
dropout: 0.0
kernel_size_factor: 1.0
labels:
- background
- speech
train_ds:
vad_stream: false
manifest_filepath: /home/fjia/code/manifest64/train.json
sample_rate: 16000
labels:
- background
- speech
batch_size: 128
num_workers: 20
shuffle: true
augmentor:
shift:
prob: 0.8
min_shift_ms: -5.0
max_shift_ms: 5.0
white_noise:
prob: 0.8
min_level: -90
max_level: -46
validation_ds:
vad_stream: false
manifest_filepath: /home/fjia/code/manifest64/validation.json
sample_rate: 16000
labels:
- background
- speech
batch_size: 128
shuffle: false
val_loss_idx: 0
num_workers: 20
test_ds:
vad_stream: false
manifest_filepath: null
sample_rate: 16000
labels:
- background
- speech
batch_size: 128
shuffle: false
test_loss_idx: 0
num_workers: 20
preprocessor:
cls: nemo.collections.asr.modules.AudioToMFCCPreprocessor
params:
window_size: 0.025
window_stride: 0.01
window: hann
n_mels: 64
n_mfcc: 64
n_fft: 512
spec_augment:
cls: nemo.collections.asr.modules.SpectrogramAugmentation
params:
freq_masks: 2
time_masks: 2
freq_width: 15
time_width: 25
rect_masks: 5
rect_time: 25
rect_freq: 15
encoder:
cls: nemo.collections.asr.modules.ConvASREncoder
params:
feat_in: 64
activation: relu
conv_mask: true
jasper:
- filters: 128
repeat: 1
kernel:
- 11
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: false
separable: true
kernel_size_factor: 1.0
- filters: 64
repeat: 2
kernel:
- 13
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: true
separable: true
kernel_size_factor: 1.0
- filters: 64
repeat: 2
kernel:
- 15
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: true
separable: true
kernel_size_factor: 1.0
- filters: 64
repeat: 2
kernel:
- 17
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: true
separable: true
kernel_size_factor: 1.0
- filters: 128
repeat: 1
kernel:
- 29
stride:
- 1
dilation:
- 2
dropout: 0.0
residual: false
separable: true
kernel_size_factor: 1.0
- filters: 128
repeat: 1
kernel:
- 1
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: false
decoder:
cls: nemo.collections.asr.modules.ConvASRDecoderClassification
params:
feat_in: 128
num_classes: 2
return_logits: true
pooling_type: avg
optim:
name: sgd
lr: 0.01
weight_decay: 0.001
sched:
name: PolynomialHoldDecayAnnealing
power: 2.0
warmup_ratio: 0.05
hold_ratio: 0.45
min_lr: 0.001
last_epoch: -1
momentum: 0.9
target: nemo.collections.asr.models.classification_models.EncDecClassificationModel