Upload tokenizer
6f325cf
verified
|
{ |
|
"(cmn)": 1, |
|
"(de)": 2, |
|
"(en)": 3, |
|
"(es)": 4, |
|
"(fr)": 5, |
|
"(it)": 6, |
|
"(ko)": 7, |
|
"1": 8, |
|
"??": 9, |
|
"[PAD]": 246, |
|
"[UNK]": 245, |
|
"a": 10, |
|
"a1": 11, |
|
"a2": 12, |
|
"a5": 13, |
|
"ai1": 14, |
|
"ai2": 15, |
|
"ai5": 16, |
|
"aiɜ": 17, |
|
"aɜ": 18, |
|
"aɪ": 19, |
|
"aɪə": 20, |
|
"aɪɚ": 21, |
|
"aʊ": 22, |
|
"aː": 23, |
|
"b": 24, |
|
"bː": 25, |
|
"d": 26, |
|
"dz": 27, |
|
"dzː": 28, |
|
"dʑ": 29, |
|
"dʒ": 30, |
|
"dʒː": 31, |
|
"dː": 32, |
|
"d̪": 33, |
|
"e": 34, |
|
"ei1": 35, |
|
"ei2": 36, |
|
"ei5": 37, |
|
"eiɜ": 38, |
|
"eə": 39, |
|
"eɪ": 40, |
|
"eʊ": 41, |
|
"eː": 42, |
|
"f": 43, |
|
"h": 44, |
|
"hʲ": 45, |
|
"i": 46, |
|
"i.1": 47, |
|
"i.2": 48, |
|
"i.5": 49, |
|
"i.ɜ": 50, |
|
"i1": 51, |
|
"i2": 52, |
|
"i5": 53, |
|
"iou1": 54, |
|
"iou2": 55, |
|
"iou5": 56, |
|
"iouɜ": 57, |
|
"iɑ1": 58, |
|
"iɑ2": 59, |
|
"iɑ5": 60, |
|
"iɑɜ": 61, |
|
"iə": 62, |
|
"iɛ1": 63, |
|
"iɛ2": 64, |
|
"iɛ5": 65, |
|
"iɛɜ": 66, |
|
"iɜ": 67, |
|
"iː": 68, |
|
"i̪1": 69, |
|
"i̪2": 70, |
|
"i̪5": 71, |
|
"i̪ɜ": 72, |
|
"j": 73, |
|
"k": 74, |
|
"kh": 75, |
|
"kː": 76, |
|
"l": 77, |
|
"m": 78, |
|
"n": 79, |
|
"n̩": 80, |
|
"o": 81, |
|
"o1": 82, |
|
"o2": 83, |
|
"o5": 84, |
|
"onɡ1": 85, |
|
"onɡ2": 86, |
|
"onɡ5": 87, |
|
"onɡɜ": 88, |
|
"ou1": 89, |
|
"ou2": 90, |
|
"ou5": 91, |
|
"ouɜ": 92, |
|
"oɜ": 93, |
|
"oɪ": 94, |
|
"oʊ": 95, |
|
"oː": 96, |
|
"oːɹ": 97, |
|
"p": 98, |
|
"pf": 99, |
|
"ph": 100, |
|
"pː": 101, |
|
"q": 102, |
|
"r": 103, |
|
"s": 104, |
|
"s.": 105, |
|
"ss": 106, |
|
"t": 107, |
|
"th": 108, |
|
"ts": 109, |
|
"ts.": 110, |
|
"ts.h": 111, |
|
"tsh": 112, |
|
"tsː": 113, |
|
"tɕ": 114, |
|
"tɕh": 115, |
|
"tʃ": 116, |
|
"tʃː": 117, |
|
"tː": 118, |
|
"t̪": 119, |
|
"u": 120, |
|
"u1": 121, |
|
"u2": 122, |
|
"u5": 123, |
|
"ua1": 124, |
|
"ua2": 125, |
|
"ua5": 126, |
|
"uai2": 127, |
|
"uai5": 128, |
|
"uaiɜ": 129, |
|
"uaɜ": 130, |
|
"uei1": 131, |
|
"uei2": 132, |
|
"uei5": 133, |
|
"ueiɜ": 134, |
|
"uo1": 135, |
|
"uo2": 136, |
|
"uo5": 137, |
|
"uoɜ": 138, |
|
"uə1": 139, |
|
"uə2": 140, |
|
"uə5": 141, |
|
"uəɜ": 142, |
|
"uɜ": 143, |
|
"uɪ": 144, |
|
"uː": 145, |
|
"v": 146, |
|
"w": 147, |
|
"x": 148, |
|
"y": 149, |
|
"y1": 150, |
|
"y2": 151, |
|
"y5": 152, |
|
"yiɜ": 153, |
|
"yæ1": 154, |
|
"yæ2": 155, |
|
"yæ5": 156, |
|
"yæɜ": 157, |
|
"yə2": 158, |
|
"yə5": 159, |
|
"yəɜ": 160, |
|
"yɛ1": 161, |
|
"yɛ2": 162, |
|
"yɛ5": 163, |
|
"yɛɜ": 164, |
|
"yɜ": 165, |
|
"yː": 166, |
|
"z": 167, |
|
"|": 0, |
|
"æ": 168, |
|
"ç": 169, |
|
"ð": 170, |
|
"ø": 171, |
|
"øː": 172, |
|
"ŋ": 173, |
|
"œ": 174, |
|
"œ̃": 175, |
|
"ɐ": 176, |
|
"ɑ": 177, |
|
"ɑ1": 178, |
|
"ɑ2": 179, |
|
"ɑ5": 180, |
|
"ɑu1": 181, |
|
"ɑu2": 182, |
|
"ɑu5": 183, |
|
"ɑuɜ": 184, |
|
"ɑɜ": 185, |
|
"ɑː": 186, |
|
"ɑːɹ": 187, |
|
"ɑ̃": 188, |
|
"ɒ": 189, |
|
"ɔ": 190, |
|
"ɔø": 191, |
|
"ɔɪ": 192, |
|
"ɔː": 193, |
|
"ɔːɹ": 194, |
|
"ɔ̃": 195, |
|
"ɕ": 196, |
|
"ə": 197, |
|
"ə1": 198, |
|
"ə2": 199, |
|
"ə5": 200, |
|
"əl": 201, |
|
"ər2": 202, |
|
"ər5": 203, |
|
"ərɜ": 204, |
|
"əɜ": 205, |
|
"əʊ": 206, |
|
"əː": 207, |
|
"ɚ": 208, |
|
"ɛ": 209, |
|
"ɛɪ": 210, |
|
"ɛɹ": 211, |
|
"ɛː": 212, |
|
"ɛ̃": 213, |
|
"ɜ": 214, |
|
"ɜː": 215, |
|
"ɟ": 216, |
|
"ɡ": 217, |
|
"ɡː": 218, |
|
"ɣ": 219, |
|
"ɪ": 220, |
|
"ɪɹ": 221, |
|
"ɪː": 222, |
|
"ɫ": 223, |
|
"ɬ": 224, |
|
"ɯ": 225, |
|
"ɲ": 226, |
|
"ɹ": 227, |
|
"ɾ": 228, |
|
"ʁ": 229, |
|
"ʃ": 230, |
|
"ʊ": 231, |
|
"ʊə": 232, |
|
"ʊɹ": 233, |
|
"ʊː": 234, |
|
"ʌ": 235, |
|
"ʎ": 236, |
|
"ʐ": 237, |
|
"ʒ": 238, |
|
"ʔ": 239, |
|
"ʝ": 240, |
|
"β": 241, |
|
"θ": 242, |
|
"χ": 243, |
|
"ᵻ": 244 |
|
} |
|
|