hrdipto commited on
Commit
fc0a63f
1 Parent(s): dcfd5ec

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, "“": 26, "”": 27, "।": 28, "ঁ": 29, "ং": 30, "ঃ": 31, "অ": 32, "আ": 33, "ই": 34, "ঈ": 35, "উ": 36, "ঊ": 37, "ঋ": 38, "এ": 39, "ঐ": 40, "ও": 41, "ঔ": 42, "ক": 43, "খ": 44, "গ": 45, "ঘ": 46, "ঙ": 47, "চ": 48, "ছ": 49, "জ": 50, "ঝ": 51, "ঞ": 52, "ট": 53, "ঠ": 54, "ড": 55, "ঢ": 56, "ণ": 57, "ত": 58, "থ": 59, "দ": 60, "ধ": 61, "ন": 62, "প": 63, "ফ": 64, "ব": 65, "ভ": 66, "ম": 67, "য": 68, "র": 69, "ল": 70, "শ": 71, "ষ": 72, "স": 73, "হ": 74, "়": 75, "া": 76, "ি": 77, "ী": 78, "ু": 79, "ূ": 80, "ৃ": 81, "ে": 82, "ৈ": 83, "ো": 84, "ৌ": 85, "্": 86, "ৎ": 87, "ৗ": 88, "ড়": 89, "ঢ়": 90, "য়": 91, "ৰ": 92, "‌": 93, "‍": 94, "‎": 95, "|": 0, "[UNK]": 96, "[PAD]": 97}