uer commited on
Commit
d395273
1 Parent(s): 0f8ca4c

Upload train_conformer_large_w2v.yaml

Browse files
Files changed (1) hide show
  1. train_conformer_large_w2v.yaml +119 -0
train_conformer_large_w2v.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # network architecture
2
+ # encoder related
3
+ encoder: conformer
4
+ encoder_conf:
5
+ output_size: 512 # dimension of attention
6
+ attention_heads: 8
7
+ linear_units: 2048 # the number of units of position-wise feed forward
8
+ num_blocks: 18 # the number of encoder blocks
9
+ dropout_rate: 0.1
10
+ positional_dropout_rate: 0.0
11
+ attention_dropout_rate: 0.0
12
+ input_layer: conv2d6 # encoder input type, you can chose conv2d, conv2d6 and conv2d8
13
+ normalize_before: true
14
+ cnn_module_kernel: 15
15
+ use_cnn_module: True
16
+ activation_type: 'swish'
17
+ macaron_style: True
18
+ pos_enc_layer_type: 'rel_pos'
19
+ selfattention_layer_type: 'abs_selfattn'
20
+ nonorm: False
21
+ cnn_prev: True
22
+ cnn_after: False
23
+
24
+ # decoder related
25
+ decoder: transformer
26
+ decoder_conf:
27
+ attention_heads: 4
28
+ linear_units: 2048
29
+ num_blocks: 1
30
+ dropout_rate: 0.0
31
+ positional_dropout_rate: 0.0
32
+ self_attention_dropout_rate: 0.0
33
+ src_attention_dropout_rate: 0.0
34
+
35
+ # hybrid CTC/attention
36
+ model_conf:
37
+ ctc_weight: 1.0
38
+ lsm_weight: 0.1 # label smoothing option
39
+ length_normalized_loss: false
40
+
41
+ raw_wav: False
42
+ data_save: True
43
+ use_gc: True
44
+
45
+ w2v_encoder: True
46
+ pretrain: True
47
+ random_pretrain: False
48
+ wav2vec: True
49
+ w2v_coef: 1.0
50
+
51
+ mpc_didi_ver: False
52
+ wav2mpc: False
53
+ wav2mpc_reduction: False
54
+ mpc_mask_loss: False
55
+ mpc_coef: 0.0
56
+
57
+ mask: True
58
+ quantize_targets: True
59
+ project_targets: True
60
+ latent_vars: 320
61
+ w2v_reduct: True
62
+ w2v_ext_loss: True
63
+ w2v_loss_weights: [0.1,0]
64
+
65
+ w2v_mask_prob: 0.65
66
+ mpc_prob: 0.5
67
+
68
+ remove_valbest: False
69
+
70
+ model:
71
+ method: 'npc' # Accepts npc/apc/vqapc
72
+ paras:
73
+ kernel_size: 15 # Receptive field size (R) = kernel_size + 2*(n_blocks)
74
+ mask_size: 5 # Desired input mask size (M_in) as described in NPC paper
75
+ n_blocks: 4 # Number of ConvBlocks stacked in NPC model
76
+ hidden_size: 512 # Dimension of feature of all layers
77
+ dropout: 0.1 # Dropout in ConvBlock
78
+ residual: True # Residual connection in ConvBlock
79
+ batch_norm: True # Apply BatchNorm in ConvBlock
80
+ activate: 'relu' # Activation function of ConvBlock
81
+ disable_cross_layer: False # Apply Masked ConvBlock at last layer only
82
+ vq:
83
+ codebook_size: [64,64,64,64] # Codebook size of each group in VQ-layer
84
+ code_dim: [128,128,128,128] # Dim of each group summing up to hidden_size
85
+ gumbel_temperature: 1.0 # Temperature of Gumbel Softmax in VQ-layer
86
+
87
+ collate_conf:
88
+ spec_aug: false
89
+
90
+ # specaugmentation related
91
+ spec_aug_conf:
92
+ num_time_mask: 2
93
+ num_freq_mask: 2
94
+ max_time_mask: 50
95
+ max_freq_mask: 10
96
+ max_time_warp: 80
97
+ gauss_mask_for_time: False
98
+ warp_for_time: False
99
+
100
+ # dataset related
101
+ dataset_conf:
102
+ max_length: 4500
103
+ min_length: 80
104
+ max_frames_in_batch: 16000
105
+ batch_type: 'dynamic' # static or dynamic
106
+ batch_size: 20
107
+ sort: true
108
+
109
+ grad_clip: 10
110
+ accum_grad: 2
111
+ max_epoch: 180
112
+ log_interval: 100
113
+
114
+ optim: adam
115
+ optim_conf:
116
+ lr: 0.001
117
+ scheduler: warmuplr # pytorch v1.1.0+ required
118
+ scheduler_conf:
119
+ warmup_steps: 10000