Stxlla commited on
Commit
8b1460b
1 Parent(s): 49180e9

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"œ": 0, "́": 1, "ө": 2, "ʽ": 3, "_": 4, "ã": 5, "и": 6, "ы": 7, "«": 8, "č": 9, "ồ": 10, "ž": 11, "в": 12, "i": 13, "¡": 14, "ר": 15, "": 16, "ʿ": 17, "ń": 18, "ק": 19, "û": 20, "ó": 21, "ب": 22, "ੰ": 23, "蝦": 24, "¿": 25, "б": 26, "夷": 27, "å": 28, "ਮ": 29, "д": 30, "ő": 31, "й": 32, "ر": 33, "c": 34, "н": 35, "ם": 36, "æ": 37, "…": 38, "ь": 39, "肋": 40, "鮓": 41, "→": 42, "ひ": 43, "e": 44, "j": 45, "ذ": 46, "ס": 47, "ֹ": 48, "消": 49, "ò": 50, "~": 51, "ć": 52, "ō": 53, "ב": 54, "し": 55, "̇": 56, "ô": 57, "ṁ": 58, "申": 59, "n": 60, "р": 61, "ל": 62, "п": 63, "õ": 64, "q": 65, "ş": 66, "ę": 67, "ਾ": 68, "=": 69, "ָ": 70, "‹": 71, "¨": 72, "ミ": 73, "ù": 74, "戌": 75, "ヒ": 76, "ש": 77, "ਨ": 78, "ִ": 79, "y": 80, "ė": 81, "ي": 82, "{": 83, "م": 84, "ł": 85, "ë": 86, "–": 87, "т": 88, "$": 89, "ě": 90, "‧": 91, "ś": 92, "㓁": 93, "u": 94, "毵": 95, "d": 96, "г": 97, "ֵ": 98, "ة": 99, "ਤ": 100, "t": 101, "ム": 102, "ə": 104, "x": 105, "é": 106, "י": 107, "ð": 108, "´": 109, "ו": 110, "ּ": 111, "b": 112, "'": 113, "î": 114, "®": 115, "}": 116, "o": 117, "―": 118, "ਸ": 119, "ਿ": 120, "ý": 121, "&": 122, "背": 123, "罓": 124, "ṃ": 125, "ш": 126, "‑": 127, "p": 128, "מ": 129, "ř": 130, "ː": 131, "比": 132, "ה": 133, "»": 134, "日": 135, "≪": 136, "—": 137, "本": 138, "の": 139, "á": 140, "ö": 141, "е": 142, "â": 143, "ת": 144, "đ": 145, "л": 146, "|": 103, "о": 148, "ā": 149, "°": 150, "ș": 151, "ī": 152, "í": 153, "ل": 154, "ź": 155, "ì": 156, "ਆ": 157, "ª": 158, "к": 159, "ä": 160, "ı": 161, "l": 162, "ğ": 163, "ਘ": 164, "ё": 165, "ŏ": 166, "و": 167, "ñ": 168, "a": 169, "周": 170, "ら": 171, "真": 172, "ū": 173, "f": 174, "h": 175, "ï": 176, "`": 177, "·": 178, "k": 179, "ă": 180, "à": 181, "良": 182, "ü": 183, "口": 184, "箱": 185, "z": 186, "›": 187, "m": 188, "„": 189, "ү": 190, "肌": 191, "ê": 192, "ʻ": 193, "ゴ": 194, "r": 195, "š": 196, "w": 197, "‐": 198, "ț": 199, "ч": 200, "v": 201, "ø": 202, "ṇ": 203, "а": 204, "ا": 205, "生": 206, "ß": 207, "я": 208, "נ": 209, "’": 210, "ю": 211, "ラ": 212, "网": 213, "د": 214, "fi": 215, "山": 216, "ż": 217, "ú": 218, "ʷ": 219, "ه": 220, "ϙ": 221, "≫": 222, "g": 223, "ְ": 224, "s": 225, "כ": 226, "罒": 227, "с": 228, "鮨": 229, "ç": 230, "þ": 231, "[UNK]": 231, "[PAD]": 232}