gary109 commited on
Commit
d64c5b3
1 Parent(s): 55c27ba

Upload lm-boosted decoder new

Browse files
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "c", "h", "k", "o", "s", "t", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
language_model/5gram_correct.arpa ADDED
The diff for this file is too large to render. See raw diff
 
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ </s>
2
+ <s>
3
+ c
4
+ c.
5
+ ch
6
+ ck
7
+ ck.
8
+ cs
9
+ cs.
10
+ h
11
+ h.
12
+ k
13
+ k.
14
+ kh
15
+ kh.
16
+ o
17
+ o.
18
+ s
19
+ s.
20
+ sh
21
+ sh.
22
+ sk
23
+ skh
24
+ ss
25
+ st
26
+ t
27
+ t.
preprocessor_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
- "processor_class": "Wav2Vec2Processor",
8
  "return_attention_mask": false,
9
  "sampling_rate": 16000
10
  }
 
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "return_attention_mask": false,
9
  "sampling_rate": 16000
10
  }
special_tokens_map.json CHANGED
@@ -21,6 +21,20 @@
21
  "rstrip": false,
22
  "single_word": false
23
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  {
25
  "content": "</s>",
26
  "lstrip": false,
 
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
+ {
25
+ "content": "</s>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s>",
33
+ "lstrip": false,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
  {
39
  "content": "</s>",
40
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -2,9 +2,9 @@
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
- "name_or_path": "ai-light-dance_drums_ft_pretrain_wav2vec2-base-new-v2",
6
  "pad_token": "[PAD]",
7
- "processor_class": "Wav2Vec2Processor",
8
  "replace_word_delimiter_char": " ",
9
  "special_tokens_map_file": null,
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
+ "name_or_path": "gary109/ai-light-dance_drums_ft_pretrain_wav2vec2-base-new-v2",
6
  "pad_token": "[PAD]",
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "replace_word_delimiter_char": " ",
9
  "special_tokens_map_file": null,
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",