Umong commited on
Commit
bbc84ad
1 Parent(s): 5908384

Upload lm-boosted decoder

Browse files
alphabet.json CHANGED
@@ -1 +1 @@
1
- {"labels": ["</s>", "", "<s>", "\u2047", " ", "\u0981", "\u0982", "\u0983", "\u0985", "\u0986", "\u0987", "\u0988", "\u0989", "\u098a", "\u098b", "\u098f", "\u0990", "\u0993", "\u0994", "\u0995", "\u0996", "\u0997", "\u0998", "\u0999", "\u099a", "\u099b", "\u099c", "\u099d", "\u099e", "\u099f", "\u09a0", "\u09a1", "\u09a2", "\u09a3", "\u09a4", "\u09a5", "\u09a6", "\u09a7", "\u09a8", "\u09aa", "\u09ab", "\u09ac", "\u09ad", "\u09ae", "\u09af", "\u09b0", "\u09b2", "\u09b6", "\u09b7", "\u09b8", "\u09b9", "\u09be", "\u09bf", "\u09c0", "\u09c1", "\u09c2", "\u09c3", "\u09c7", "\u09c8", "\u09cb", "\u09cc", "\u09cd", "\u09ce", "\u09dc", "\u09dd", "\u09df"], "is_bpe": false}
 
1
+ {"labels": [" ", "\u0981", "\u0982", "\u0983", "\u0985", "\u0986", "\u0987", "\u0988", "\u0989", "\u098a", "\u098b", "\u098f", "\u0990", "\u0993", "\u0994", "\u0995", "\u0996", "\u0997", "\u0998", "\u0999", "\u099a", "\u099b", "\u099c", "\u099d", "\u099e", "\u099f", "\u09a0", "\u09a1", "\u09a2", "\u09a3", "\u09a4", "\u09a5", "\u09a6", "\u09a7", "\u09a8", "\u09aa", "\u09ab", "\u09ac", "\u09ad", "\u09ae", "\u09af", "\u09b0", "\u09b2", "\u09b6", "\u09b7", "\u09b8", "\u09b9", "\u09be", "\u09bf", "\u09c0", "\u09c1", "\u09c2", "\u09c3", "\u09c7", "\u09c8", "\u09cb", "\u09cc", "\u09cd", "\u09ce", "\u09dc", "\u09dd", "\u09df", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
  }
tokenizer_config.json CHANGED
@@ -4,11 +4,11 @@
4
  "do_lower_case": false,
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
7
- "pad_token": "[PAD]",
8
  "processor_class": "Wav2Vec2ProcessorWithLM",
9
  "replace_word_delimiter_char": " ",
10
  "target_lang": null,
11
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
12
- "unk_token": "[UNK]",
13
  "word_delimiter_token": "|"
14
  }
 
4
  "do_lower_case": false,
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "<pad>",
8
  "processor_class": "Wav2Vec2ProcessorWithLM",
9
  "replace_word_delimiter_char": " ",
10
  "target_lang": null,
11
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
12
+ "unk_token": "<unk>",
13
  "word_delimiter_token": "|"
14
  }
vocab.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "</s>": 65,
3
- "[PAD]": 63,
4
  "<s>": 64,
5
- "[UNK]": 62,
6
  "|": 0,
7
  "ঁ": 1,
8
  "ং": 2,
 
1
  {
2
  "</s>": 65,
3
+ "<pad>": 63,
4
  "<s>": 64,
5
+ "<unk>": 62,
6
  "|": 0,
7
  "ঁ": 1,
8
  "ং": 2,