1ucky40nc3's picture
add tokenizer
a169124
{
"$": 1,
"&": 2,
"(": 3,
")": 4,
"+": 5,
"/": 6,
"=": 7,
"@": 8,
"[": 9,
"[PAD]": 180,
"[UNK]": 179,
"]": 10,
"_": 11,
"`": 12,
"a": 13,
"b": 14,
"c": 15,
"d": 16,
"e": 17,
"f": 18,
"g": 19,
"h": 20,
"i": 21,
"j": 22,
"k": 23,
"l": 24,
"m": 25,
"n": 26,
"o": 27,
"p": 28,
"q": 29,
"r": 30,
"s": 31,
"t": 32,
"u": 33,
"v": 34,
"w": 35,
"x": 36,
"y": 37,
"z": 38,
"|": 0,
"¡": 39,
"«": 40,
"°": 41,
"´": 42,
"µ": 43,
"·": 44,
"»": 45,
"ß": 46,
"à": 47,
"á": 48,
"ã": 49,
"ä": 50,
"å": 51,
"æ": 52,
"ç": 53,
"é": 54,
"ê": 55,
"ë": 56,
"ì": 57,
"í": 58,
"ï": 59,
"ð": 60,
"ñ": 61,
"ò": 62,
"ó": 63,
"õ": 64,
"ö": 65,
"ø": 66,
"ù": 67,
"ú": 68,
"ü": 69,
"ý": 70,
"þ": 71,
"ā": 72,
"ă": 73,
"ą": 74,
"ć": 75,
"č": 76,
"ď": 77,
"đ": 78,
"ē": 79,
"ė": 80,
"ę": 81,
"ě": 82,
"ğ": 83,
"ġ": 84,
"ħ": 85,
"ī": 86,
"ı": 87,
"ł": 88,
"ń": 89,
"ņ": 90,
"ň": 91,
"ō": 92,
"ŏ": 93,
"ő": 94,
"œ": 95,
"ř": 96,
"ś": 97,
"ş": 98,
"š": 99,
"ť": 100,
"ū": 101,
"ů": 102,
"ź": 103,
"ż": 104,
"ž": 105,
"ǐ": 106,
"ǔ": 107,
"ș": 108,
"ț": 109,
"ə": 110,
"ʻ": 111,
"ʿ": 112,
"̆": 113,
"̇": 114,
"а": 115,
"в": 116,
"е": 117,
"и": 118,
"к": 119,
"м": 120,
"о": 121,
"р": 122,
"с": 123,
"ф": 124,
"ч": 125,
"ш": 126,
"ѹ": 127,
"א": 128,
"ב": 129,
"נ": 130,
"ע": 131,
"ש": 132,
"་": 133,
"ན": 134,
"ḫ": 135,
"ṟ": 136,
"ṣ": 137,
"ṭ": 138,
"ạ": 139,
"ả": 140,
"ắ": 141,
"ằ": 142,
"ế": 143,
"ễ": 144,
"ọ": 145,
"ồ": 146,
"ộ": 147,
"ứ": 148,
"‑": 149,
"–": 150,
"—": 151,
"’": 152,
"‚": 153,
"„": 154,
"‟": 155,
"…": 156,
"′": 157,
"″": 158,
"‹": 159,
"›": 160,
"→": 161,
"−": 162,
"≡": 163,
"⟨": 164,
"⟩": 165,
"カ": 166,
"临": 167,
"乡": 168,
"孙": 169,
"尣": 170,
"幺": 171,
"支": 172,
"比": 173,
"無": 174,
"生": 175,
"臣": 176,
"辶": 177,
"道": 178
}