emiyasstar
/

librispeech

Model card Files Files and versions Community

emiyasstar commited on Jul 28, 2022

Commit

ac85292

•

1 Parent(s): 649d904

Upload train_conformer_100h.yaml

Browse files

Files changed (1) hide show

train_conformer_100h.yaml +91 -0

train_conformer_100h.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+    output_size: 512    # dimension of attention
+    attention_heads: 8
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.0
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
+    normalize_before: true
+    cnn_module_kernel: 31
+    use_cnn_module: True
+    activation_type: 'swish'
+    pos_enc_layer_type: 'rel_pos'
+    selfattention_layer_type: 'rel_selfattn'
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 2
+    linear_units: 512
+    num_blocks: 1
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.0
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.7
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+# use raw_wav or kaldi feature
+raw_wav: true
+# dataset related
+dataset_conf:
+    filter_conf:
+        max_length: 2000
+        min_length: 50
+        token_max_length: 400
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    speed_perturb: true
+    fbank_conf:
+        num_mel_bins: 80
+        frame_shift: 10
+        frame_length: 25
+        dither: 1.0
+    spec_aug: true
+    spec_aug_conf:
+        num_t_mask: 3
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500  # sort_size should be less than shuffle_size
+    batch_conf:
+        batch_type: 'static' # static or dynamic
+        batch_size: 10
+pretrain: False
+wav2vec_conf:
+    pretrain: False
+    quantize_targets: True
+    project_targets: True
+    latent_vars: 320
+    latent_dim: 512
+    latent_groups: 2
+    mask: False
+grad_clip: 5
+accum_grad: 1
+max_epoch: 120
+log_interval: 100
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 15000