ddwkim commited on
Commit
f94e4bc
1 Parent(s): 31c9b51

Update models

Browse files
Files changed (5) hide show
  1. asr.ckpt +2 -2
  2. hyperparams.yaml +16 -19
  3. lm.ckpt +2 -2
  4. normalizer.ckpt +2 -2
  5. tokenizer.ckpt +2 -2
asr.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:893a5fb84a67315a954d7645fd3b5f96cee806531f538e0073f6dcdf17dcf7c3
3
- size 183510489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e718dc29b403dfaa8d2604c43c3666be3fa99e958b77e3c6ff387e94d4a174c
3
+ size 184546287
hyperparams.yaml CHANGED
@@ -29,7 +29,6 @@ vocab_size: 5000
29
 
30
  # Outputs
31
  blank_index: 0
32
- label_smoothing: 0.1
33
  pad_index: 0
34
  bos_index: 1
35
  eos_index: 2
@@ -38,10 +37,8 @@ unk_index: 0
38
  # Decoding parameters
39
  min_decode_ratio: 0.0
40
  max_decode_ratio: 1.0
41
- valid_search_interval: 10
42
- valid_beam_size: 10
43
- test_beam_size: 60
44
- lm_weight: 0.20
45
  ctc_weight_decode: 0.40
46
 
47
  ############################## models ################################
@@ -51,15 +48,15 @@ normalizer: !new:speechbrain.processing.features.InputNormalization
51
 
52
  CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
53
  input_shape: (8, 10, 80)
54
- num_blocks: 2
55
  num_layers_per_block: 1
56
- out_channels: (64, 32)
57
- kernel_sizes: (3, 3)
58
- strides: (2, 2)
59
- residuals: (False, False)
60
-
61
  Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
62
- input_size: 640
63
  tgt_vocab: !ref <output_neurons>
64
  d_model: !ref <d_model>
65
  nhead: !ref <nhead>
@@ -106,11 +103,14 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
106
  ctc_weight: !ref <ctc_weight_decode>
107
  lm_weight: !ref <lm_weight>
108
  lm_modules: !ref <lm_model>
109
- temperature: 1.25
110
- temperature_lm: 1.25
111
  using_eos_threshold: False
112
  length_normalization: True
113
 
 
 
 
114
  Tencoder: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
115
  transformer: !ref <Transformer>
116
 
@@ -122,11 +122,7 @@ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
122
  transformer_encoder: !ref <Tencoder>
123
 
124
  asr_model: !new:torch.nn.ModuleList
125
- - [!ref <normalizer>, !ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
126
-
127
- log_softmax: !new:torch.nn.LogSoftmax
128
- dim: -1
129
-
130
 
131
  compute_features: !new:speechbrain.lobes.features.Fbank
132
  sample_rate: !ref <sample_rate>
@@ -142,6 +138,7 @@ modules:
142
  lm_model: !ref <lm_model>
143
  encoder: !ref <encoder>
144
  decoder: !ref <decoder>
 
145
  # The pretrainer allows a mapping between pretrained files and instances that
146
  # are declared in the yaml.
147
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
 
29
 
30
  # Outputs
31
  blank_index: 0
 
32
  pad_index: 0
33
  bos_index: 1
34
  eos_index: 2
 
37
  # Decoding parameters
38
  min_decode_ratio: 0.0
39
  max_decode_ratio: 1.0
40
+ test_beam_size: 10
41
+ lm_weight: 0.0
 
 
42
  ctc_weight_decode: 0.40
43
 
44
  ############################## models ################################
 
48
 
49
  CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
50
  input_shape: (8, 10, 80)
51
+ num_blocks: 3
52
  num_layers_per_block: 1
53
+ out_channels: (64, 64, 64)
54
+ kernel_sizes: (5, 5, 1)
55
+ strides: (2, 2, 1)
56
+ residuals: (False, False, True)
57
+
58
  Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
59
+ input_size: 1280
60
  tgt_vocab: !ref <output_neurons>
61
  d_model: !ref <d_model>
62
  nhead: !ref <nhead>
 
103
  ctc_weight: !ref <ctc_weight_decode>
104
  lm_weight: !ref <lm_weight>
105
  lm_modules: !ref <lm_model>
106
+ temperature: 1.30
107
+ temperature_lm: 1.30
108
  using_eos_threshold: False
109
  length_normalization: True
110
 
111
+ log_softmax: !new:torch.nn.LogSoftmax
112
+ dim: -1
113
+
114
  Tencoder: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
115
  transformer: !ref <Transformer>
116
 
 
122
  transformer_encoder: !ref <Tencoder>
123
 
124
  asr_model: !new:torch.nn.ModuleList
125
+ - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
 
 
 
 
126
 
127
  compute_features: !new:speechbrain.lobes.features.Fbank
128
  sample_rate: !ref <sample_rate>
 
138
  lm_model: !ref <lm_model>
139
  encoder: !ref <encoder>
140
  decoder: !ref <decoder>
141
+
142
  # The pretrainer allows a mapping between pretrained files and instances that
143
  # are declared in the yaml.
144
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
lm.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee4a5a5d9ce11e24dcea93f24a241528b9b376798be6478c70fb279736515110
3
- size 381074814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0b49d5e1f9894c0c9f2ec21c8658da8e1a07f509b807e8624450ba19ea667c
3
+ size 381072461
normalizer.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4866d96b29f5c97526c7469aa6f58cd50aeb9865b457daf599f0f42e5827be9
3
- size 1783
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da2ced935d955c014177591249e5db497d0c5dc7143e64378da0cb5590fe77a
3
+ size 1703
tokenizer.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e095c023a42b6bd25352512597a245db9bf9126ce6bf64082bd41d0a196b220
3
- size 313899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d419e55734c26df6c5690671be2b887a7db389c1a7f63286111ce737508c6569
3
+ size 313900