File size: 3,048 Bytes
f6d1140 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
sample_rate: 16000
timesteps: 64
repeat: 2
dropout: 0.0
kernel_size_factor: 1.0
labels:
- background
- speech
train_ds:
vad_stream: false
manifest_filepath: /home/fjia/code/manifest64/train.json
sample_rate: 16000
labels:
- background
- speech
batch_size: 128
num_workers: 20
shuffle: true
augmentor:
shift:
prob: 0.8
min_shift_ms: -5.0
max_shift_ms: 5.0
white_noise:
prob: 0.8
min_level: -90
max_level: -46
validation_ds:
vad_stream: false
manifest_filepath: /home/fjia/code/manifest64/validation.json
sample_rate: 16000
labels:
- background
- speech
batch_size: 128
shuffle: false
val_loss_idx: 0
num_workers: 20
test_ds:
vad_stream: false
manifest_filepath: null
sample_rate: 16000
labels:
- background
- speech
batch_size: 128
shuffle: false
test_loss_idx: 0
num_workers: 20
preprocessor:
cls: nemo.collections.asr.modules.AudioToMFCCPreprocessor
params:
window_size: 0.025
window_stride: 0.01
window: hann
n_mels: 64
n_mfcc: 64
n_fft: 512
spec_augment:
cls: nemo.collections.asr.modules.SpectrogramAugmentation
params:
freq_masks: 2
time_masks: 2
freq_width: 15
time_width: 25
rect_masks: 5
rect_time: 25
rect_freq: 15
encoder:
cls: nemo.collections.asr.modules.ConvASREncoder
params:
feat_in: 64
activation: relu
conv_mask: true
jasper:
- filters: 128
repeat: 1
kernel:
- 11
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: false
separable: true
kernel_size_factor: 1.0
- filters: 64
repeat: 2
kernel:
- 13
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: true
separable: true
kernel_size_factor: 1.0
- filters: 64
repeat: 2
kernel:
- 15
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: true
separable: true
kernel_size_factor: 1.0
- filters: 64
repeat: 2
kernel:
- 17
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: true
separable: true
kernel_size_factor: 1.0
- filters: 128
repeat: 1
kernel:
- 29
stride:
- 1
dilation:
- 2
dropout: 0.0
residual: false
separable: true
kernel_size_factor: 1.0
- filters: 128
repeat: 1
kernel:
- 1
stride:
- 1
dilation:
- 1
dropout: 0.0
residual: false
decoder:
cls: nemo.collections.asr.modules.ConvASRDecoderClassification
params:
feat_in: 128
num_classes: 2
return_logits: true
pooling_type: avg
optim:
name: sgd
lr: 0.01
weight_decay: 0.001
sched:
name: PolynomialHoldDecayAnnealing
power: 2.0
warmup_ratio: 0.05
hold_ratio: 0.45
min_lr: 0.001
last_epoch: -1
momentum: 0.9
target: nemo.collections.asr.models.classification_models.EncDecClassificationModel
|