wav2vec2_xlsr / vocab.json
diallomama's picture
Upload tokenizer
5891ad3
{
"$": 1,
"&": 2,
"-": 3,
"=": 4,
"[PAD]": 219,
"[UNK]": 218,
"_": 5,
"`": 6,
"a": 7,
"b": 8,
"c": 9,
"d": 10,
"e": 11,
"f": 12,
"g": 13,
"h": 14,
"i": 15,
"j": 16,
"k": 17,
"l": 18,
"m": 19,
"n": 20,
"o": 21,
"p": 22,
"q": 23,
"r": 24,
"s": 25,
"t": 26,
"u": 27,
"v": 28,
"w": 29,
"x": 30,
"y": 31,
"z": 32,
"{": 33,
"|": 0,
"}": 35,
"~": 36,
"¨": 37,
"ª": 38,
"«": 39,
"®": 40,
"°": 41,
"´": 42,
"·": 43,
"»": 44,
"ß": 45,
"à": 46,
"á": 47,
"â": 48,
"ã": 49,
"ä": 50,
"å": 51,
"æ": 52,
"é": 53,
"ê": 54,
"ë": 55,
"ì": 56,
"í": 57,
"î": 58,
"ï": 59,
"ð": 60,
"ñ": 61,
"ò": 62,
"ó": 63,
"ô": 64,
"õ": 65,
"ö": 66,
"ø": 67,
"ù": 68,
"ú": 69,
"û": 70,
"ü": 71,
"ý": 72,
"þ": 73,
"ā": 74,
"ă": 75,
"ć": 76,
"č": 77,
"đ": 78,
"ė": 79,
"ę": 80,
"ě": 81,
"ğ": 82,
"ī": 83,
"ı": 84,
"ł": 85,
"ń": 86,
"ō": 87,
"ő": 88,
"œ": 89,
"ř": 90,
"ś": 91,
"ş": 92,
"š": 93,
"ū": 94,
"ź": 95,
"ž": 96,
"ș": 97,
"ț": 98,
"ə": 99,
"ʷ": 100,
"ʻ": 101,
"ʽ": 102,
"ʿ": 103,
"ː": 104,
"́": 105,
"̇": 106,
"ϙ": 107,
"а": 108,
"б": 109,
"в": 110,
"г": 111,
"д": 112,
"е": 113,
"и": 114,
"й": 115,
"к": 116,
"л": 117,
"н": 118,
"о": 119,
"п": 120,
"р": 121,
"с": 122,
"т": 123,
"ч": 124,
"ш": 125,
"ы": 126,
"ь": 127,
"ю": 128,
"я": 129,
"ё": 130,
"ү": 131,
"ө": 132,
"ְ": 133,
"ִ": 134,
"ֵ": 135,
"ָ": 136,
"ֹ": 137,
"ּ": 138,
"ב": 139,
"ה": 140,
"ו": 141,
"י": 142,
"כ": 143,
"ל": 144,
"ם": 145,
"מ": 146,
"נ": 147,
"ס": 148,
"ק": 149,
"ר": 150,
"ש": 151,
"ת": 152,
"ب": 153,
"ة": 154,
"ذ": 155,
"ه": 156,
"ي": 157,
"ਆ": 158,
"ਘ": 159,
"ਤ": 160,
"ਨ": 161,
"ਮ": 162,
"ਸ": 163,
"ਾ": 164,
"ਿ": 165,
"ੰ": 166,
"ṁ": 167,
"ṃ": 168,
"ṇ": 169,
"ồ": 170,
"‐": 171,
"‑": 172,
"–": 173,
"—": 174,
"―": 175,
"’": 176,
"„": 177,
"…": 178,
"‧": 179,
"‹": 180,
"›": 181,
"→": 182,
"≪": 183,
"≫": 184,
"し": 185,
"の": 186,
"ひ": 187,
"ら": 188,
"ゴ": 189,
"ヒ": 190,
"ミ": 191,
"ム": 192,
"ラ": 193,
"㓁": 194,
"口": 195,
"周": 196,
"山": 197,
"戌": 198,
"日": 199,
"本": 200,
"比": 201,
"毵": 202,
"消": 203,
"生": 204,
"申": 205,
"真": 206,
"箱": 207,
"网": 208,
"罒": 209,
"罓": 210,
"肋": 211,
"肌": 212,
"背": 213,
"良": 214,
"鮓": 215,
"鮨": 216,
"fi": 217,
"": 218
}