nikhil6041's picture
add tokenizer
4a848a7
raw
history blame
857 Bytes
{"द": 0, "l": 1, "।": 2, "!": 3, "p": 4, "प": 5, "ा": 6, "ी": 7, "ँ": 8, "झ": 9, "F": 10, "ग": 11, "े": 12, "य": 13, "ु": 14, "अ": 15, "आ": 16, "ऐ": 17, "ॉ": 18, "म": 19, "a": 20, "ब": 21, "उ": 22, "स": 23, "u": 24, "ष": 25, "ण": 26, ",": 27, "त": 28, "भ": 29, "ज": 30, "थ": 31, "़": 32, "ई": 33, "व": 34, ":": 35, "m": 36, "r": 37, "ठ": 38, ".": 39, "फ": 40, "ए": 41, "M": 42, "ओ": 43, "ऊ": 44, "न": 45, "ड़": 46, "क": 47, "्": 48, "ं": 49, "ऑ": 50, "ढ": 51, "इ": 52, "?": 53, "च": 54, "i": 55, "ै": 56, "ड": 57, "ह": 58, "e": 59, "घ": 60, "'": 61, "ल": 62, "औ": 63, "-": 64, "ि": 65, "ख": 66, "ू": 67, "\"": 68, "W": 69, "ध": 70, "ौ": 71, "ो": 72, "र": 73, "छ": 74, "ट": 75, "श": 76, "ृ": 77, "ः": 78, "|": 79, "[UNK]": 80, "[PAD]": 81}