riri-en-np / vocab.json
roviso's picture
Upload tokenizer
77acd54
raw
history blame
No virus
2.15 kB
{
"npi": {
"\n": 0,
"!": 2,
"\"": 3,
"%": 4,
"'": 5,
"(": 6,
")": 7,
",": 8,
"-": 9,
".": 10,
"/": 11,
"0": 12,
"1": 13,
"2": 14,
"3": 15,
"4": 16,
"5": 17,
"6": 18,
"7": 19,
"8": 20,
"9": 21,
":": 22,
";": 23,
"?": 24,
"A": 25,
"D": 26,
"M": 27,
"N": 28,
"R": 29,
"S": 30,
"[": 31,
"[PAD]": 146,
"[UNK]": 145,
"]": 32,
"_": 33,
"`": 34,
"a": 35,
"b": 36,
"c": 37,
"d": 38,
"e": 39,
"g": 40,
"h": 41,
"i": 42,
"j": 43,
"k": 44,
"l": 45,
"m": 46,
"n": 47,
"o": 48,
"p": 49,
"r": 50,
"s": 51,
"t": 52,
"v": 53,
"w": 54,
"x": 55,
"y": 56,
"|": 1,
"¥": 58,
"«": 59,
"®": 60,
"·": 61,
"Ï": 62,
"÷": 63,
"ँ": 64,
"ं": 65,
"ः": 66,
"अ": 67,
"आ": 68,
"इ": 69,
"ई": 70,
"उ": 71,
"ऊ": 72,
"ऋ": 73,
"ए": 74,
"ऐ": 75,
"ओ": 76,
"औ": 77,
"क": 78,
"ख": 79,
"ग": 80,
"घ": 81,
"ङ": 82,
"च": 83,
"छ": 84,
"ज": 85,
"झ": 86,
"ञ": 87,
"ट": 88,
"ठ": 89,
"ड": 90,
"ढ": 91,
"ण": 92,
"त": 93,
"थ": 94,
"द": 95,
"ध": 96,
"न": 97,
"प": 98,
"फ": 99,
"ब": 100,
"भ": 101,
"म": 102,
"य": 103,
"र": 104,
"ल": 105,
"व": 106,
"श": 107,
"ष": 108,
"स": 109,
"ह": 110,
"़": 111,
"ा": 112,
"ि": 113,
"ी": 114,
"ु": 115,
"ू": 116,
"ृ": 117,
"े": 118,
"ै": 119,
"ो": 120,
"ौ": 121,
"्": 122,
"ॐ": 123,
"।": 124,
"०": 125,
"१": 126,
"२": 127,
"३": 128,
"४": 129,
"५": 130,
"६": 131,
"७": 132,
"८": 133,
"९": 134,
"​": 135,
"‌": 136,
"‍": 137,
"–": 138,
"—": 139,
"‘": 140,
"’": 141,
"“": 142,
"”": 143,
"…": 144,
" ": 145
}
}