crossdelenna commited on
Commit
c944ca0
1 Parent(s): 98cf19e

Upload lm-boosted decoder

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 30, "</s>": 31}
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["s", "n", "y", "o", "t", "l", "r", "k", "w", "c", "v", "g", "h", "z", "'", "m", "a", "e", "u", "i", "q", " ", "b", "j", "p", "f", "x", "d", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
language_model/3gram.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761824bf3b24ecc641c50f1b43073e7cea0854cbe5c3bccf7c1ff4d0e7e51f0e
3
+ size 10419611
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
 
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
  }
 
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000
10
  }
special_tokens_map.json CHANGED
@@ -1,6 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
- }
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,10 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "do_lower_case": false,
4
- "eos_token": "</s>",
5
- "pad_token": "[PAD]",
6
- "replace_word_delimiter_char": " ",
7
- "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
- "unk_token": "[UNK]",
9
- "word_delimiter_token": "|"
10
- }
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": "/root/.cache/huggingface/transformers/f260852cdc917ea96f4c3e190c02d8e56bfc4b959b5160d503d135ab05754df1.fea372b8528a479b7415f13ca4e27a2f5f3782cbb3f15b4d19bb3cbe734e8137", "name_or_path": "crossdelenna/wav2vec2-base-eng-ind", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2ProcessorWithLM"}
 
 
 
 
 
 
 
 
 
vocab.json CHANGED
@@ -1,32 +1 @@
1
- {
2
- "'": 14,
3
- "[PAD]": 29,
4
- "[UNK]": 28,
5
- "a": 16,
6
- "b": 22,
7
- "c": 9,
8
- "d": 27,
9
- "e": 17,
10
- "f": 25,
11
- "g": 11,
12
- "h": 12,
13
- "i": 19,
14
- "j": 23,
15
- "k": 7,
16
- "l": 5,
17
- "m": 15,
18
- "n": 1,
19
- "o": 3,
20
- "p": 24,
21
- "q": 20,
22
- "r": 6,
23
- "s": 0,
24
- "t": 4,
25
- "u": 18,
26
- "v": 10,
27
- "w": 8,
28
- "x": 26,
29
- "y": 2,
30
- "z": 13,
31
- "|": 21
32
- }
 
1
+ {"'": 14, "[PAD]": 29, "[UNK]": 28, "a": 16, "b": 22, "c": 9, "d": 27, "e": 17, "f": 25, "g": 11, "h": 12, "i": 19, "j": 23, "k": 7, "l": 5, "m": 15, "n": 1, "o": 3, "p": 24, "q": 20, "r": 6, "s": 0, "t": 4, "u": 18, "v": 10, "w": 8, "x": 26, "y": 2, "z": 13, "|": 21}