rafiulrumy commited on
Commit
f006f12
1 Parent(s): df7e655

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 74, "</s>": 75}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"ू": 0, "ऑ": 1, "ल": 2, "श": 3, "ध": 4, "फ": 5, "ं": 6, "ै": 7, "ग": 8, "a": 9, "ब": 10, "आ": 11, "ख": 12, "p": 13, "म": 14, "े": 15, "औ": 16, "ष": 17, "ः": 18, "r": 19, "ो": 20, "न": 21, "भ": 22, "अ": 23, "ज": 24, "।": 25, "ठ": 26, "ड़": 27, "र": 28, "इ": 29, "l": 30, "ह": 31, "त": 32, "ई": 33, "m": 34, "य": 35, "ण": 36, "व": 37, "स": 38, "थ": 39, "ए": 40, "ँ": 41, "ौ": 42, "झ": 44, "ृ": 45, "w": 46, "ऐ": 47, "'": 48, "ी": 49, "च": 50, "घ": 51, "्": 52, "ड": 53, "ु": 54, "e": 55, "़": 56, "u": 57, "ट": 58, "ऊ": 59, "छ": 60, "f": 61, "क": 62, "ढ": 63, "ओ": 64, "द": 65, "उ": 66, "प": 67, "ा": 68, "ॉ": 69, "ि": 70, "i": 71, "|": 43, "[UNK]": 72, "[PAD]": 73}