|
sample_rate: 16000 |
|
timesteps: 64 |
|
repeat: 2 |
|
dropout: 0.0 |
|
kernel_size_factor: 1.0 |
|
labels: |
|
- background |
|
- speech |
|
train_ds: |
|
vad_stream: false |
|
manifest_filepath: /home/fjia/code/manifest64/train.json |
|
sample_rate: 16000 |
|
labels: |
|
- background |
|
- speech |
|
batch_size: 128 |
|
num_workers: 20 |
|
shuffle: true |
|
augmentor: |
|
shift: |
|
prob: 0.8 |
|
min_shift_ms: -5.0 |
|
max_shift_ms: 5.0 |
|
white_noise: |
|
prob: 0.8 |
|
min_level: -90 |
|
max_level: -46 |
|
validation_ds: |
|
vad_stream: false |
|
manifest_filepath: /home/fjia/code/manifest64/validation.json |
|
sample_rate: 16000 |
|
labels: |
|
- background |
|
- speech |
|
batch_size: 128 |
|
shuffle: false |
|
val_loss_idx: 0 |
|
num_workers: 20 |
|
test_ds: |
|
vad_stream: false |
|
manifest_filepath: null |
|
sample_rate: 16000 |
|
labels: |
|
- background |
|
- speech |
|
batch_size: 128 |
|
shuffle: false |
|
test_loss_idx: 0 |
|
num_workers: 20 |
|
preprocessor: |
|
cls: nemo.collections.asr.modules.AudioToMFCCPreprocessor |
|
params: |
|
window_size: 0.025 |
|
window_stride: 0.01 |
|
window: hann |
|
n_mels: 64 |
|
n_mfcc: 64 |
|
n_fft: 512 |
|
spec_augment: |
|
cls: nemo.collections.asr.modules.SpectrogramAugmentation |
|
params: |
|
freq_masks: 2 |
|
time_masks: 2 |
|
freq_width: 15 |
|
time_width: 25 |
|
rect_masks: 5 |
|
rect_time: 25 |
|
rect_freq: 15 |
|
encoder: |
|
cls: nemo.collections.asr.modules.ConvASREncoder |
|
params: |
|
feat_in: 64 |
|
activation: relu |
|
conv_mask: true |
|
jasper: |
|
- filters: 128 |
|
repeat: 1 |
|
kernel: |
|
- 11 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: false |
|
separable: true |
|
kernel_size_factor: 1.0 |
|
- filters: 64 |
|
repeat: 2 |
|
kernel: |
|
- 13 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
kernel_size_factor: 1.0 |
|
- filters: 64 |
|
repeat: 2 |
|
kernel: |
|
- 15 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
kernel_size_factor: 1.0 |
|
- filters: 64 |
|
repeat: 2 |
|
kernel: |
|
- 17 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: true |
|
separable: true |
|
kernel_size_factor: 1.0 |
|
- filters: 128 |
|
repeat: 1 |
|
kernel: |
|
- 29 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 2 |
|
dropout: 0.0 |
|
residual: false |
|
separable: true |
|
kernel_size_factor: 1.0 |
|
- filters: 128 |
|
repeat: 1 |
|
kernel: |
|
- 1 |
|
stride: |
|
- 1 |
|
dilation: |
|
- 1 |
|
dropout: 0.0 |
|
residual: false |
|
decoder: |
|
cls: nemo.collections.asr.modules.ConvASRDecoderClassification |
|
params: |
|
feat_in: 128 |
|
num_classes: 2 |
|
return_logits: true |
|
pooling_type: avg |
|
optim: |
|
name: sgd |
|
lr: 0.01 |
|
weight_decay: 0.001 |
|
sched: |
|
name: PolynomialHoldDecayAnnealing |
|
power: 2.0 |
|
warmup_ratio: 0.05 |
|
hold_ratio: 0.45 |
|
min_lr: 0.001 |
|
last_epoch: -1 |
|
momentum: 0.9 |
|
target: nemo.collections.asr.models.classification_models.EncDecClassificationModel |
|
|