w2v2-vi-en-char-based / vocab.json
tuanio's picture
Upload tokenizer
faf4d9c
raw history blame
No virus
1.78 kB
{
"!": 1,
"#": 2,
"&": 3,
"'": 4,
"(": 5,
")": 6,
",": 7,
"-": 8,
".": 9,
"/": 10,
"0": 11,
"1": 12,
"2": 13,
"3": 14,
"4": 15,
"5": 16,
"6": 17,
"7": 18,
"8": 19,
"9": 20,
"<": 21,
"=": 22,
">": 23,
"?": 24,
"@": 25,
"_": 26,
"a": 27,
"b": 28,
"c": 29,
"d": 30,
"e": 31,
"f": 32,
"g": 33,
"h": 34,
"i": 35,
"j": 36,
"k": 37,
"l": 38,
"m": 39,
"n": 40,
"o": 41,
"p": 42,
"q": 43,
"r": 44,
"s": 45,
"t": 46,
"u": 47,
"v": 48,
"w": 49,
"x": 50,
"y": 51,
"z": 52,
"|": 0,
"£": 53,
"à": 54,
"á": 55,
"â": 56,
"ã": 57,
"è": 58,
"é": 59,
"ê": 60,
"ì": 61,
"í": 62,
"ð": 63,
"ò": 64,
"ó": 65,
"ô": 66,
"õ": 67,
"ö": 68,
"ù": 69,
"ú": 70,
"ý": 71,
"ă": 72,
"ć": 73,
"đ": 74,
"ġ": 75,
"ĩ": 76,
"ij": 77,
"ũ": 78,
"ơ": 79,
"ư": 80,
"ǎ": 81,
"ǡ": 82,
"̀": 83,
"́": 84,
"̃": 85,
"̉": 86,
"̣": 87,
"ۃ": 88,
"ۙ": 89,
"۟": 90,
"ۣ": 91,
"ạ": 92,
"ả": 93,
"ấ": 94,
"ầ": 95,
"ẩ": 96,
"ẫ": 97,
"ậ": 98,
"ắ": 99,
"ằ": 100,
"ẳ": 101,
"ẵ": 102,
"ặ": 103,
"ẹ": 104,
"ẻ": 105,
"ẽ": 106,
"ế": 107,
"ề": 108,
"ể": 109,
"ễ": 110,
"ệ": 111,
"ỉ": 112,
"ị": 113,
"ọ": 114,
"ỏ": 115,
"ố": 116,
"ồ": 117,
"ổ": 118,
"ỗ": 119,
"ộ": 120,
"ớ": 121,
"ờ": 122,
"ở": 123,
"ỡ": 124,
"ợ": 125,
"ụ": 126,
"ủ": 127,
"ứ": 128,
"ừ": 129,
"ử": 130,
"ữ": 131,
"ự": 132,
"ỳ": 133,
"ỵ": 134,
"ỷ": 135,
"ỹ": 136,
"‎": 137,
"–": 138,
"‘": 139,
"’": 140,
"“": 141,
"”": 142,
"…": 143,
"": 144
}