nemo-is-clean-titanet_large / model_config.yaml
huseinzol05's picture
Upload model_config.yaml with huggingface_hub
7786141
raw history blame
No virus
2.87 kB
decoder:
_target_: nemo.collections.asr.modules.SpeakerDecoder
angular: false
emb_sizes: 192
feat_in: 3072
num_classes: 2
pool_mode: attention
encoder:
_target_: nemo.collections.asr.modules.ConvASREncoder
activation: relu
conv_mask: true
feat_in: 80
jasper:
- dilation:
- 1
dropout: 0.0
filters: 1024
kernel:
- 3
repeat: 1
residual: false
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.1
filters: 1024
kernel:
- 7
repeat: 3
residual: true
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.1
filters: 1024
kernel:
- 11
repeat: 3
residual: true
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.1
filters: 1024
kernel:
- 15
repeat: 3
residual: true
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.0
filters: 3072
kernel:
- 1
repeat: 1
residual: false
se: true
se_context_size: -1
separable: true
stride:
- 1
loss:
margin: 0.2
scale: 30
model_defaults:
dropout: 0.1
enc_hidden: 640
filters: 1024
joint_hidden: 640
kernel_size_factor: 1.0
pred_hidden: 640
repeat: 3
se: true
se_context_size: -1
separable: true
optim:
lr: 0.08
momentum: 0.9
name: sgd
sched:
min_lr: 0.0
name: CosineAnnealing
warmup_ratio: 0.1
weight_decay: 0.0002
preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
dither: 1.0e-05
features: 80
frame_splicing: 1
n_fft: 512
normalize: per_feature
sample_rate: 16000
window: hann
window_size: 0.025
window_stride: 0.01
spec_augment:
_target_: nemo.collections.asr.modules.SpectrogramAugmentation
freq_masks: 3
freq_width: 4
time_masks: 5
time_width: 0.03
target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel
train_ds:
augmentor:
noise:
manifest_path: /manifests/noise/rir_noise_manifest.json
max_snr_db: 15
min_snr_db: 0
prob: 0.5
speed:
max_speed_rate: 1.05
min_speed_rate: 0.95
prob: 0.5
resample_type: kaiser_fast
sr: 16000
batch_size: 64
is_tarred: false
labels: null
manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
num_workers: 15
pin_memory: true
sample_rate: 16000
shuffle: true
tarred_audio_filepaths: null
tarred_shard_strategy: scatter
time_length: 3
validation_ds:
batch_size: 128
labels: null
manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
num_workers: 15
pin_memory: true
sample_rate: 16000
shuffle: false
time_length: 3