root commited on
Commit
6a974a7
1 Parent(s): 69d5f59

fix model avg

Browse files
conf/conformer_rnnt_large.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # network architecture
2
+ # encoder related
3
+ encoder: conformer
4
+ encoder_conf:
5
+ output_size: 512 # dimension of attention
6
+ attention_heads: 8
7
+ linear_units: 2048 # the number of units of position-wise feed forward
8
+ num_blocks: 12 # the number of encoder blocks
9
+ dropout_rate: 0.1
10
+ positional_dropout_rate: 0.1
11
+ attention_dropout_rate: 0.1
12
+ input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
13
+ normalize_before: true
14
+ cnn_module_kernel: 31
15
+ use_cnn_module: true
16
+ activation_type: 'swish'
17
+ pos_enc_layer_type: 'rel_pos'
18
+ selfattention_layer_type: 'rel_selfattn'
19
+
20
+
21
+ joint_conf:
22
+ join_dim: 512
23
+ prejoin_linear: True
24
+ postjoin_linear: false
25
+ joint_mode: 'add'
26
+ activation: 'tanh'
27
+
28
+ predictor: rnn
29
+ predictor_conf:
30
+ embed_size: 512
31
+ output_size: 512
32
+ embed_dropout: 0.1
33
+ hidden_size: 512
34
+ num_layers: 2
35
+ bias: true
36
+ rnn_type: 'lstm'
37
+ dropout: 0.1
38
+
39
+ decoder: bitransformer
40
+ decoder_conf:
41
+ attention_heads: 8
42
+ dropout_rate: 0.1
43
+ linear_units: 2048
44
+ num_blocks: 3
45
+ positional_dropout_rate: 0.1
46
+ r_num_blocks: 3
47
+ self_attention_dropout_rate: 0.1
48
+ src_attention_dropout_rate: 0.1
49
+
50
+ # hybrid transducer+ctc+attention
51
+ model_conf:
52
+ transducer_weight: 0.75
53
+ ctc_weight: 0.1
54
+ attention_weight: 0.15
55
+ lsm_weight: 0.1 # label smoothing option
56
+ length_normalized_loss: false
57
+ reverse_weight: 0.3
58
+
59
+ dataset_conf:
60
+ filter_conf:
61
+ max_length: 1650
62
+ min_length: 10
63
+ token_max_length: 200
64
+ token_min_length: 1
65
+ resample_conf:
66
+ resample_rate: 16000
67
+ speed_perturb: true
68
+ fbank_conf:
69
+ num_mel_bins: 80
70
+ frame_shift: 10
71
+ frame_length: 25
72
+ dither: 0.1
73
+ spec_aug: true
74
+ spec_aug_conf:
75
+ num_t_mask: 2
76
+ num_f_mask: 2
77
+ max_t: 50
78
+ max_f: 10
79
+ shuffle: true
80
+ shuffle_conf:
81
+ shuffle_size: 1500
82
+ sort: true
83
+ sort_conf:
84
+ sort_size: 500 # sort_size should be less than shuffle_size
85
+ batch_conf:
86
+ batch_type: 'dynamic' # static or dynamic
87
+ max_frames_in_batch: 4000
88
+
89
+ grad_clip: 4
90
+ accum_grad: 1
91
+ max_epoch: 140
92
+ log_interval: 100
93
+
94
+ optim: adam
95
+ optim_conf:
96
+ lr: 0.001
97
+ scheduler: warmuplr # pytorch v1.1.0+ required
98
+ scheduler_conf:
99
+ warmup_steps: 25000
100
+
exp/conformer_transducer/avg_5.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c5d9396cda5646df79f3c77286bb0d8627a3e0a527d36cbe50d3349b42f1e9
3
  size 277103087
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3224c02c52e558395dd02f4489ac23a243094c94d446119558c6f86540fb347b
3
  size 277103087
exp/conformer_transducer/{avg_10.pt → avg_6.pt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f737fede01276bfe182dd099d1799987a7b30a8a911b7d79a798d0db24a16c9
3
  size 277103087
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3fa1073140f9adfe46cbfc7a391ff7c4894a414f2a7cb4d28f0cc60ed1f98b
3
  size 277103087