nvshubhsharma commited on
Commit
b45120e
1 Parent(s): 0520c58

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ई": 0, "थ": 1, "i": 2, "म": 3, "ॉ": 4, "r": 5, "ठ": 6, "ण": 7, "ी": 8, "त": 9, "प": 10, "ल": 11, "ै": 12, "य": 13, "ं": 14, "ख": 15, "घ": 16, "f": 17, "ट": 18, "l": 19, "u": 20, "ः": 21, "छ": 22, "आ": 23, "स": 24, "w": 25, "ौ": 26, "ग": 27, "ऑ": 28, "च": 29, "'": 30, "न": 31, "m": 32, "a": 33, "र": 34, "ए": 35, "द": 36, "ड़": 37, "क": 38, "e": 39, "ब": 40, "्": 41, "भ": 42, "p": 43, "़": 44, "झ": 45, "ह": 46, "ड": 47, "व": 48, "ओ": 49, "ढ": 50, "ँ": 52, "अ": 53, "औ": 54, "श": 55, "इ": 56, "ज": 57, "फ": 58, "उ": 59, "ध": 60, "ऊ": 61, "ो": 62, "ृ": 63, "ा": 64, "ु": 65, "ि": 66, "ू": 67, "े": 68, "ऐ": 69, "ष": 70, "|": 51, "[UNK]": 71, "[PAD]": 72}