jawaharreddy247 commited on
Commit
404be69
1 Parent(s): e1ff356

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"ख": 0, "ण": 1, "ह": 2, "ड़": 3, "ढ": 4, "ा": 5, "प": 6, "ं": 7, "ठ": 8, "r": 9, "ए": 10, ".": 11, "इ": 12, "ँ": 13, "भ": 14, "म": 15, "ो": 16, "m": 17, "ज": 18, "ल": 19, "ई": 20, "ब": 21, ":": 22, "छ": 23, "ड": 24, "i": 25, "ध": 26, "ृ": 27, "l": 28, "ऑ": 29, "F": 30, "उ": 31, "p": 32, "फ": 33, "े": 34, "घ": 35, "ु": 36, "ौ": 37, "आ": 38, "र": 39, "झ": 40, "व": 41, "?": 42, "द": 43, "क": 44, "M": 45, "्": 46, "ट": 47, "'": 48, ",": 49, "।": 50, "\"": 51, "औ": 52, "!": 53, "-": 54, "श": 55, "ै": 56, "अ": 57, "ॉ": 58, "़": 59, "स": 60, "थ": 61, "ग": 62, "य": 63, "ओ": 64, "ू": 65, "ः": 67, "ऊ": 68, "a": 69, "ि": 70, "e": 71, "त": 72, "W": 73, "ी": 74, "च": 75, "ऐ": 76, "ष": 77, "u": 78, "न": 79, "|": 66, "[UNK]": 80, "[PAD]": 81}