dave-rtzr commited on
Commit
48158be
1 Parent(s): 79a172a

Update hyperparams and citations

Browse files
Files changed (2) hide show
  1. README.md +29 -3
  2. hyperparams.yaml +5 -4
README.md CHANGED
@@ -27,9 +27,9 @@ SpeechBrain. For a better experience, we encourage you to learn more about
27
  [SpeechBrain](https://speechbrain.github.io).
28
  The performance of the model is the following:
29
 
30
- | Release | eval clean CER | eval other CER | GPUs |
31
- |:-------------:|:--------------:|:--------------:|:--------:|
32
- | 09-05-21 | 7.86 | 8.93 | 6xA100 80GB |
33
 
34
  ## Pipeline description
35
 
@@ -105,4 +105,30 @@ Please, cite SpeechBrain if you use it for your research or business.
105
  primaryClass={eess.AS},
106
  note={arXiv:2106.04624}
107
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  ```
27
  [SpeechBrain](https://speechbrain.github.io).
28
  The performance of the model is the following:
29
 
30
+ | Release | eval clean CER | eval other CER | GPUs |
31
+ | :------: | :------------: | :------------: | :---------: |
32
+ | 09-05-21 | 7.48% | 8.38% | 6xA100 80GB |
33
 
34
  ## Pipeline description
35
 
105
  primaryClass={eess.AS},
106
  note={arXiv:2106.04624}
107
  }
108
+ ```
109
+
110
+ # Citing the model
111
+ ```bibtex
112
+ @misc{returnzero,
113
+ title = {ReturnZero Conformer Korean ASR model},
114
+ author = {Dongwon Kim and Dongwoo Kim and Roh Jeongkyu},
115
+ year = {2021},
116
+ howpublished = {\url{https://huggingface.co/ddwkim/asr-conformer-transformerlm-ksponspeech}},
117
+ }
118
+ ```
119
+
120
+ # Citing KsponSpeech dataset
121
+ ```bibtex
122
+ @Article{app10196936,
123
+ AUTHOR = {Bang, Jeong-Uk and Yun, Seung and Kim, Seung-Hi and Choi, Mu-Yeol and Lee, Min-Kyu and Kim, Yeo-Jeong and Kim, Dong-Hyun and Park, Jun and Lee, Young-Jik and Kim, Sang-Hun},
124
+ TITLE = {KsponSpeech: Korean Spontaneous Speech Corpus for Automatic Speech Recognition},
125
+ JOURNAL = {Applied Sciences},
126
+ VOLUME = {10},
127
+ YEAR = {2020},
128
+ NUMBER = {19},
129
+ ARTICLE-NUMBER = {6936},
130
+ URL = {https://www.mdpi.com/2076-3417/10/19/6936},
131
+ ISSN = {2076-3417},
132
+ DOI = {10.3390/app10196936}
133
+ }
134
  ```
hyperparams.yaml CHANGED
@@ -5,7 +5,8 @@
5
  # Tokens: unigram
6
  # losses: CTC + KLdiv (Label Smoothing loss)
7
  # Training: KsponSpeech 965.2h
8
- # Authors: Dongwon Kim, Dongwoo Kim
 
9
  # ############################################################################
10
  # Seed needs to be set at top of yaml, before objects with parameters are made
11
 
@@ -40,7 +41,7 @@ max_decode_ratio: 1.0
40
  valid_search_interval: 10
41
  valid_beam_size: 10
42
  test_beam_size: 60
43
- lm_weight: 0.60
44
  ctc_weight_decode: 0.40
45
 
46
  ############################## models ################################
@@ -105,8 +106,8 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
105
  ctc_weight: !ref <ctc_weight_decode>
106
  lm_weight: !ref <lm_weight>
107
  lm_modules: !ref <lm_model>
108
- temperature: 1.15
109
- temperature_lm: 1.15
110
  using_eos_threshold: False
111
  length_normalization: True
112
 
5
  # Tokens: unigram
6
  # losses: CTC + KLdiv (Label Smoothing loss)
7
  # Training: KsponSpeech 965.2h
8
+ # Based on the works of: Jianyuan Zhong, Titouan Parcollet 2021
9
+ # Authors: Dongwon Kim, Dongwoo Kim 2021
10
  # ############################################################################
11
  # Seed needs to be set at top of yaml, before objects with parameters are made
12
 
41
  valid_search_interval: 10
42
  valid_beam_size: 10
43
  test_beam_size: 60
44
+ lm_weight: 0.20
45
  ctc_weight_decode: 0.40
46
 
47
  ############################## models ################################
106
  ctc_weight: !ref <ctc_weight_decode>
107
  lm_weight: !ref <lm_weight>
108
  lm_modules: !ref <lm_model>
109
+ temperature: 1.25
110
+ temperature_lm: 1.25
111
  using_eos_threshold: False
112
  length_normalization: True
113