infinitejoy commited on
Commit
6250973
1 Parent(s): ff43006

add tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +1 -1
  2. vocab.json +1 -1
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"<s>": 68, "</s>": 69}
 
1
+ {"<s>": 91, "</s>": 92}
vocab.json CHANGED
@@ -1 +1 @@
1
- {"e": 1, "m": 2, "p": 3, "r": 4, "u": 5, "w": 6, "": 7, "": 8, "": 9, "": 10, "": 11, "": 12, "": 13, "": 14, "": 15, "": 16, "": 17, "": 18, "": 19, "": 20, "": 21, "": 22, "ग": 23, "घ": 24, "च": 25, "छ": 26, "ज": 27, "झ": 28, "ट": 29, "ठ": 30, "ड": 31, "ढ": 32, "ण": 33, "त": 34, "थ": 35, "द": 36, "ध": 37, "न": 38, "प": 39, "फ": 40, "ब": 41, "भ": 42, "म": 43, "य": 44, "र": 45, "ल": 46, "व": 47, "श": 48, "ष": 49, "स": 50, "ह": 51, "़": 52, "ा": 53, "ि": 54, "ी": 55, "ु": 56, "ू": 57, "ृ": 58, "े": 59, "ै": 60, "ॉ": 61, "ो": 62, "ौ": 63, "्": 64, "ड़": 65, "|": 0, "[UNK]": 66, "[PAD]": 67}
 
1
+ {"&": 1, "b": 2, "c": 3, "d": 4, "e": 5, "g": 6, "h": 7, "j": 8, "k": 9, "m": 10, "n": 11, "o": 12, "p": 13, "r": 14, "s": 15, "t": 16, "u": 17, "v": 18, "w": 19, "x": 20, "y": 21, "z": 22, "|": 0, "ँ": 24, "ं": 25, "ः": 26, "अ": 27, "आ": 28, "इ": 29, "ई": 30, "उ": 31, "ऊ": 32, "ऋ": 33, "ए": 34, "ऐ": 35, "ऑ": 36, "ओ": 37, "औ": 38, "क": 39, "ख": 40, "ग": 41, "घ": 42, "च": 43, "छ": 44, "ज": 45, "झ": 46, "ञ": 47, "ट": 48, "ठ": 49, "ड": 50, "ढ": 51, "ण": 52, "त": 53, "थ": 54, "द": 55, "ध": 56, "न": 57, "प": 58, "फ": 59, "ब": 60, "भ": 61, "म": 62, "य": 63, "र": 64, "ल": 65, "व": 66, "श": 67, "ष": 68, "स": 69, "ह": 70, "़": 71, "ा": 72, "ि": 73, "ी": 74, "ु": 75, "ू": 76, "ृ": 77, "ॅ": 78, "े": 79, "ै": 80, "ॉ": 81, "ो": 82, "ौ": 83, "्": 84, "क़": 85, "ग़": 86, "ज़": 87, "ड़": 88, "": 89, "’": 90, "[UNK]": 90, "[PAD]": 91}