nikhil6041
commited on
Commit
·
618dc99
1
Parent(s):
d868e72
add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"्": 0, "स": 1, "ग": 2, "ऊ": 3, "व": 4, "W": 5, "़": 6, "ः": 7, "ष": 8, "ई": 9, "थ": 10, "ब": 11, "श": 12, "प": 13, "ऐ": 14, "l": 15, "ठ": 16, "ल": 17, "च": 18, "F": 19, ":": 20, "औ": 21, "ु": 22, "र": 23, "ू": 24, "आ": 25, "झ": 26, "य": 27, "ँ": 28, "ओ": 29, "-": 30, "i": 31, "ज": 32, "r": 33, "?": 34, "द": 35, "!": 36, "ध": 37, "फ": 38, "छ": 39, "इ": 40, "a": 41, "ो": 42, "घ": 43, "भ": 44, "m": 45, "p": 46, "ॉ": 47, "।": 48, ",": 49, "ण": 50, "त": 51, "न": 52, "अ": 53, "\"": 54, "ड": 55, "'": 56, "ृ": 57, "े": 58, "ं": 59, ".": 60, "उ": 61, "ी": 62, "क": 63, "ि": 64, "ा": 65, "ड़": 66, "ै": 67, "ह": 68, "e": 69, "म": 70, "u": 71, "ए": 72, "ढ": 73, "ऑ": 74, "M": 75, "ौ": 76, "ट": 77, "ख": 79, "|": 78, "[UNK]": 80, "[PAD]": 81}
|